Source file src/cmd/internal/obj/x86/obj6.go

     1  // Inferno utils/6l/pass.c
     2  // https://bitbucket.org/inferno-os/inferno-os/src/master/utils/6l/pass.c
     3  //
     4  //	Copyright © 1994-1999 Lucent Technologies Inc.  All rights reserved.
     5  //	Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net)
     6  //	Portions Copyright © 1997-1999 Vita Nuova Limited
     7  //	Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com)
     8  //	Portions Copyright © 2004,2006 Bruce Ellis
     9  //	Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net)
    10  //	Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
    11  //	Portions Copyright © 2009 The Go Authors. All rights reserved.
    12  //
    13  // Permission is hereby granted, free of charge, to any person obtaining a copy
    14  // of this software and associated documentation files (the "Software"), to deal
    15  // in the Software without restriction, including without limitation the rights
    16  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
    17  // copies of the Software, and to permit persons to whom the Software is
    18  // furnished to do so, subject to the following conditions:
    19  //
    20  // The above copyright notice and this permission notice shall be included in
    21  // all copies or substantial portions of the Software.
    22  //
    23  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    24  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    25  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
    26  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    27  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    28  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    29  // THE SOFTWARE.
    30  
    31  package x86
    32  
    33  import (
    34  	"cmd/internal/obj"
    35  	"cmd/internal/objabi"
    36  	"cmd/internal/src"
    37  	"cmd/internal/sys"
    38  	"internal/abi"
    39  	"log"
    40  	"math"
    41  	"path"
    42  	"strings"
    43  )
    44  
    45  func CanUse1InsnTLS(ctxt *obj.Link) bool {
    46  	if isAndroid {
    47  		// Android uses a global variable for the tls offset.
    48  		return false
    49  	}
    50  
    51  	if ctxt.Arch.Family == sys.I386 {
    52  		switch ctxt.Headtype {
    53  		case objabi.Hlinux,
    54  			objabi.Hplan9,
    55  			objabi.Hwindows:
    56  			return false
    57  		}
    58  
    59  		return true
    60  	}
    61  
    62  	switch ctxt.Headtype {
    63  	case objabi.Hplan9, objabi.Hwindows:
    64  		return false
    65  	case objabi.Hlinux, objabi.Hfreebsd:
    66  		return !ctxt.Flag_shared
    67  	}
    68  
    69  	return true
    70  }
    71  
    72  func progedit(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) {
    73  	// Thread-local storage references use the TLS pseudo-register.
    74  	// As a register, TLS refers to the thread-local storage base, and it
    75  	// can only be loaded into another register:
    76  	//
    77  	//         MOVQ TLS, AX
    78  	//
    79  	// An offset from the thread-local storage base is written off(reg)(TLS*1).
    80  	// Semantically it is off(reg), but the (TLS*1) annotation marks this as
    81  	// indexing from the loaded TLS base. This emits a relocation so that
    82  	// if the linker needs to adjust the offset, it can. For example:
    83  	//
    84  	//         MOVQ TLS, AX
    85  	//         MOVQ 0(AX)(TLS*1), CX // load g into CX
    86  	//
    87  	// On systems that support direct access to the TLS memory, this
    88  	// pair of instructions can be reduced to a direct TLS memory reference:
    89  	//
    90  	//         MOVQ 0(TLS), CX // load g into CX
    91  	//
    92  	// The 2-instruction and 1-instruction forms correspond to the two code
    93  	// sequences for loading a TLS variable in the local exec model given in "ELF
    94  	// Handling For Thread-Local Storage".
    95  	//
    96  	// We apply this rewrite on systems that support the 1-instruction form.
    97  	// The decision is made using only the operating system and the -shared flag,
    98  	// not the link mode. If some link modes on a particular operating system
    99  	// require the 2-instruction form, then all builds for that operating system
   100  	// will use the 2-instruction form, so that the link mode decision can be
   101  	// delayed to link time.
   102  	//
   103  	// In this way, all supported systems use identical instructions to
   104  	// access TLS, and they are rewritten appropriately first here in
   105  	// liblink and then finally using relocations in the linker.
   106  	//
   107  	// When -shared is passed, we leave the code in the 2-instruction form but
   108  	// assemble (and relocate) them in different ways to generate the initial
   109  	// exec code sequence. It's a bit of a fluke that this is possible without
   110  	// rewriting the instructions more comprehensively, and it only does because
   111  	// we only support a single TLS variable (g).
   112  
   113  	if CanUse1InsnTLS(ctxt) {
   114  		// Reduce 2-instruction sequence to 1-instruction sequence.
   115  		// Sequences like
   116  		//	MOVQ TLS, BX
   117  		//	... off(BX)(TLS*1) ...
   118  		// become
   119  		//	NOP
   120  		//	... off(TLS) ...
   121  		//
   122  		// TODO(rsc): Remove the Hsolaris special case. It exists only to
   123  		// guarantee we are producing byte-identical binaries as before this code.
   124  		// But it should be unnecessary.
   125  		if (p.As == AMOVQ || p.As == AMOVL) && p.From.Type == obj.TYPE_REG && p.From.Reg == REG_TLS && p.To.Type == obj.TYPE_REG && REG_AX <= p.To.Reg && p.To.Reg <= REG_R15 && ctxt.Headtype != objabi.Hsolaris {
   126  			obj.Nopout(p)
   127  		}
   128  		if p.From.Type == obj.TYPE_MEM && p.From.Index == REG_TLS && REG_AX <= p.From.Reg && p.From.Reg <= REG_R15 {
   129  			p.From.Reg = REG_TLS
   130  			p.From.Scale = 0
   131  			p.From.Index = REG_NONE
   132  		}
   133  
   134  		if p.To.Type == obj.TYPE_MEM && p.To.Index == REG_TLS && REG_AX <= p.To.Reg && p.To.Reg <= REG_R15 {
   135  			p.To.Reg = REG_TLS
   136  			p.To.Scale = 0
   137  			p.To.Index = REG_NONE
   138  		}
   139  	} else {
   140  		// load_g, below, always inserts the 1-instruction sequence. Rewrite it
   141  		// as the 2-instruction sequence if necessary.
   142  		//	MOVQ 0(TLS), BX
   143  		// becomes
   144  		//	MOVQ TLS, BX
   145  		//	MOVQ 0(BX)(TLS*1), BX
   146  		if (p.As == AMOVQ || p.As == AMOVL) && p.From.Type == obj.TYPE_MEM && p.From.Reg == REG_TLS && p.To.Type == obj.TYPE_REG && REG_AX <= p.To.Reg && p.To.Reg <= REG_R15 {
   147  			q := obj.Appendp(p, newprog)
   148  			q.As = p.As
   149  			q.From = p.From
   150  			q.From.Type = obj.TYPE_MEM
   151  			q.From.Reg = p.To.Reg
   152  			q.From.Index = REG_TLS
   153  			q.From.Scale = 2 // TODO: use 1
   154  			q.To = p.To
   155  			p.From.Type = obj.TYPE_REG
   156  			p.From.Reg = REG_TLS
   157  			p.From.Index = REG_NONE
   158  			p.From.Offset = 0
   159  		}
   160  	}
   161  
   162  	// Android and Windows use a tls offset determined at runtime. Rewrite
   163  	//	MOVQ TLS, BX
   164  	// to
   165  	//	MOVQ runtime.tls_g(SB), BX
   166  	if (isAndroid || ctxt.Headtype == objabi.Hwindows) &&
   167  		(p.As == AMOVQ || p.As == AMOVL) && p.From.Type == obj.TYPE_REG && p.From.Reg == REG_TLS && p.To.Type == obj.TYPE_REG && REG_AX <= p.To.Reg && p.To.Reg <= REG_R15 {
   168  		p.From.Type = obj.TYPE_MEM
   169  		p.From.Name = obj.NAME_EXTERN
   170  		p.From.Reg = REG_NONE
   171  		p.From.Sym = ctxt.Lookup("runtime.tls_g")
   172  		p.From.Index = REG_NONE
   173  		if ctxt.Headtype == objabi.Hwindows {
   174  			// Windows requires an additional indirection
   175  			// to retrieve the TLS pointer,
   176  			// as runtime.tls_g contains the TLS offset from GS or FS.
   177  			// on AMD64 add
   178  			//	MOVQ 0(BX)(GS*1), BX
   179  			// on 386 add
   180  			//	MOVQ 0(BX)(FS*1), BX4
   181  			q := obj.Appendp(p, newprog)
   182  			q.As = p.As
   183  			q.From = obj.Addr{}
   184  			q.From.Type = obj.TYPE_MEM
   185  			q.From.Reg = p.To.Reg
   186  			if ctxt.Arch.Family == sys.AMD64 {
   187  				q.From.Index = REG_GS
   188  			} else {
   189  				q.From.Index = REG_FS
   190  			}
   191  			q.From.Scale = 1
   192  			q.From.Offset = 0
   193  			q.To = p.To
   194  		}
   195  	}
   196  
   197  	// TODO: Remove.
   198  	if ctxt.Headtype == objabi.Hwindows && ctxt.Arch.Family == sys.AMD64 || ctxt.Headtype == objabi.Hplan9 {
   199  		if p.From.Scale == 1 && p.From.Index == REG_TLS {
   200  			p.From.Scale = 2
   201  		}
   202  		if p.To.Scale == 1 && p.To.Index == REG_TLS {
   203  			p.To.Scale = 2
   204  		}
   205  	}
   206  
   207  	// Rewrite 0 to $0 in 3rd argument to CMPPS etc.
   208  	// That's what the tables expect.
   209  	switch p.As {
   210  	case ACMPPD, ACMPPS, ACMPSD, ACMPSS:
   211  		if p.To.Type == obj.TYPE_MEM && p.To.Name == obj.NAME_NONE && p.To.Reg == REG_NONE && p.To.Index == REG_NONE && p.To.Sym == nil {
   212  			p.To.Type = obj.TYPE_CONST
   213  		}
   214  	}
   215  
   216  	// Rewrite CALL/JMP/RET to symbol as TYPE_BRANCH.
   217  	switch p.As {
   218  	case obj.ACALL, obj.AJMP, obj.ARET:
   219  		if p.To.Type == obj.TYPE_MEM && (p.To.Name == obj.NAME_EXTERN || p.To.Name == obj.NAME_STATIC) && p.To.Sym != nil {
   220  			p.To.Type = obj.TYPE_BRANCH
   221  		}
   222  	}
   223  
   224  	// Rewrite MOVL/MOVQ $XXX(FP/SP) as LEAL/LEAQ.
   225  	if p.From.Type == obj.TYPE_ADDR && (ctxt.Arch.Family == sys.AMD64 || p.From.Name != obj.NAME_EXTERN && p.From.Name != obj.NAME_STATIC) {
   226  		switch p.As {
   227  		case AMOVL:
   228  			p.As = ALEAL
   229  			p.From.Type = obj.TYPE_MEM
   230  		case AMOVQ:
   231  			p.As = ALEAQ
   232  			p.From.Type = obj.TYPE_MEM
   233  		}
   234  	}
   235  
   236  	// Rewrite float constants to values stored in memory.
   237  	switch p.As {
   238  	// Convert AMOVSS $(0), Xx to AXORPS Xx, Xx
   239  	case AMOVSS:
   240  		if p.From.Type == obj.TYPE_FCONST {
   241  			//  f == 0 can't be used here due to -0, so use Float64bits
   242  			if f := p.From.Val.(float64); math.Float64bits(f) == 0 {
   243  				if p.To.Type == obj.TYPE_REG && REG_X0 <= p.To.Reg && p.To.Reg <= REG_X15 {
   244  					p.As = AXORPS
   245  					p.From = p.To
   246  					break
   247  				}
   248  			}
   249  		}
   250  		fallthrough
   251  
   252  	case AFMOVF,
   253  		AFADDF,
   254  		AFSUBF,
   255  		AFSUBRF,
   256  		AFMULF,
   257  		AFDIVF,
   258  		AFDIVRF,
   259  		AFCOMF,
   260  		AFCOMFP,
   261  		AADDSS,
   262  		ASUBSS,
   263  		AMULSS,
   264  		ADIVSS,
   265  		ACOMISS,
   266  		AUCOMISS:
   267  		if p.From.Type == obj.TYPE_FCONST {
   268  			f32 := float32(p.From.Val.(float64))
   269  			p.From.Type = obj.TYPE_MEM
   270  			p.From.Name = obj.NAME_EXTERN
   271  			p.From.Sym = ctxt.Float32Sym(f32)
   272  			p.From.Offset = 0
   273  		}
   274  
   275  	case AMOVSD:
   276  		// Convert AMOVSD $(0), Xx to AXORPS Xx, Xx
   277  		if p.From.Type == obj.TYPE_FCONST {
   278  			//  f == 0 can't be used here due to -0, so use Float64bits
   279  			if f := p.From.Val.(float64); math.Float64bits(f) == 0 {
   280  				if p.To.Type == obj.TYPE_REG && REG_X0 <= p.To.Reg && p.To.Reg <= REG_X15 {
   281  					p.As = AXORPS
   282  					p.From = p.To
   283  					break
   284  				}
   285  			}
   286  		}
   287  		fallthrough
   288  
   289  	case AFMOVD,
   290  		AFADDD,
   291  		AFSUBD,
   292  		AFSUBRD,
   293  		AFMULD,
   294  		AFDIVD,
   295  		AFDIVRD,
   296  		AFCOMD,
   297  		AFCOMDP,
   298  		AADDSD,
   299  		ASUBSD,
   300  		AMULSD,
   301  		ADIVSD,
   302  		ACOMISD,
   303  		AUCOMISD:
   304  		if p.From.Type == obj.TYPE_FCONST {
   305  			f64 := p.From.Val.(float64)
   306  			p.From.Type = obj.TYPE_MEM
   307  			p.From.Name = obj.NAME_EXTERN
   308  			p.From.Sym = ctxt.Float64Sym(f64)
   309  			p.From.Offset = 0
   310  		}
   311  	}
   312  
   313  	if ctxt.Flag_dynlink {
   314  		rewriteToUseGot(ctxt, p, newprog)
   315  	}
   316  
   317  	if ctxt.Flag_shared && ctxt.Arch.Family == sys.I386 {
   318  		rewriteToPcrel(ctxt, p, newprog)
   319  	}
   320  }
   321  
   322  // Rewrite p, if necessary, to access global data via the global offset table.
   323  func rewriteToUseGot(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) {
   324  	var lea, mov obj.As
   325  	var reg int16
   326  	if ctxt.Arch.Family == sys.AMD64 {
   327  		lea = ALEAQ
   328  		mov = AMOVQ
   329  		reg = REG_R15
   330  	} else {
   331  		lea = ALEAL
   332  		mov = AMOVL
   333  		reg = REG_CX
   334  		if p.As == ALEAL && p.To.Reg != p.From.Reg && p.To.Reg != p.From.Index {
   335  			// Special case: clobber the destination register with
   336  			// the PC so we don't have to clobber CX.
   337  			// The SSA backend depends on CX not being clobbered across LEAL.
   338  			// See cmd/compile/internal/ssa/gen/386.rules (search for Flag_shared).
   339  			reg = p.To.Reg
   340  		}
   341  	}
   342  
   343  	if p.As == obj.ADUFFCOPY || p.As == obj.ADUFFZERO {
   344  		//     ADUFFxxx $offset
   345  		// becomes
   346  		//     $MOV runtime.duffxxx@GOT, $reg
   347  		//     $LEA $offset($reg), $reg
   348  		//     CALL $reg
   349  		// (we use LEAx rather than ADDx because ADDx clobbers
   350  		// flags and duffzero on 386 does not otherwise do so).
   351  		var sym *obj.LSym
   352  		if p.As == obj.ADUFFZERO {
   353  			sym = ctxt.LookupABI("runtime.duffzero", obj.ABIInternal)
   354  		} else {
   355  			sym = ctxt.LookupABI("runtime.duffcopy", obj.ABIInternal)
   356  		}
   357  		offset := p.To.Offset
   358  		p.As = mov
   359  		p.From.Type = obj.TYPE_MEM
   360  		p.From.Name = obj.NAME_GOTREF
   361  		p.From.Sym = sym
   362  		p.To.Type = obj.TYPE_REG
   363  		p.To.Reg = reg
   364  		p.To.Offset = 0
   365  		p.To.Sym = nil
   366  		p1 := obj.Appendp(p, newprog)
   367  		p1.As = lea
   368  		p1.From.Type = obj.TYPE_MEM
   369  		p1.From.Offset = offset
   370  		p1.From.Reg = reg
   371  		p1.To.Type = obj.TYPE_REG
   372  		p1.To.Reg = reg
   373  		p2 := obj.Appendp(p1, newprog)
   374  		p2.As = obj.ACALL
   375  		p2.To.Type = obj.TYPE_REG
   376  		p2.To.Reg = reg
   377  	}
   378  
   379  	// We only care about global data: NAME_EXTERN means a global
   380  	// symbol in the Go sense, and p.Sym.Local is true for a few
   381  	// internally defined symbols.
   382  	if p.As == lea && p.From.Type == obj.TYPE_MEM && p.From.Name == obj.NAME_EXTERN && !p.From.Sym.Local() {
   383  		// $LEA sym, Rx becomes $MOV $sym, Rx which will be rewritten below
   384  		p.As = mov
   385  		p.From.Type = obj.TYPE_ADDR
   386  	}
   387  	if p.From.Type == obj.TYPE_ADDR && p.From.Name == obj.NAME_EXTERN && !p.From.Sym.Local() {
   388  		// $MOV $sym, Rx becomes $MOV sym@GOT, Rx
   389  		// $MOV $sym+<off>, Rx becomes $MOV sym@GOT, Rx; $LEA <off>(Rx), Rx
   390  		// On 386 only, more complicated things like PUSHL $sym become $MOV sym@GOT, CX; PUSHL CX
   391  		cmplxdest := false
   392  		pAs := p.As
   393  		var dest obj.Addr
   394  		if p.To.Type != obj.TYPE_REG || pAs != mov {
   395  			if ctxt.Arch.Family == sys.AMD64 {
   396  				ctxt.Diag("do not know how to handle LEA-type insn to non-register in %v with -dynlink", p)
   397  			}
   398  			cmplxdest = true
   399  			dest = p.To
   400  			p.As = mov
   401  			p.To.Type = obj.TYPE_REG
   402  			p.To.Reg = reg
   403  			p.To.Sym = nil
   404  			p.To.Name = obj.NAME_NONE
   405  		}
   406  		p.From.Type = obj.TYPE_MEM
   407  		p.From.Name = obj.NAME_GOTREF
   408  		q := p
   409  		if p.From.Offset != 0 {
   410  			q = obj.Appendp(p, newprog)
   411  			q.As = lea
   412  			q.From.Type = obj.TYPE_MEM
   413  			q.From.Reg = p.To.Reg
   414  			q.From.Offset = p.From.Offset
   415  			q.To = p.To
   416  			p.From.Offset = 0
   417  		}
   418  		if cmplxdest {
   419  			q = obj.Appendp(q, newprog)
   420  			q.As = pAs
   421  			q.To = dest
   422  			q.From.Type = obj.TYPE_REG
   423  			q.From.Reg = reg
   424  		}
   425  	}
   426  	if p.GetFrom3() != nil && p.GetFrom3().Name == obj.NAME_EXTERN {
   427  		ctxt.Diag("don't know how to handle %v with -dynlink", p)
   428  	}
   429  	var source *obj.Addr
   430  	// MOVx sym, Ry becomes $MOV sym@GOT, R15; MOVx (R15), Ry
   431  	// MOVx Ry, sym becomes $MOV sym@GOT, R15; MOVx Ry, (R15)
   432  	// An addition may be inserted between the two MOVs if there is an offset.
   433  	if p.From.Name == obj.NAME_EXTERN && !p.From.Sym.Local() {
   434  		if p.To.Name == obj.NAME_EXTERN && !p.To.Sym.Local() {
   435  			ctxt.Diag("cannot handle NAME_EXTERN on both sides in %v with -dynlink", p)
   436  		}
   437  		source = &p.From
   438  	} else if p.To.Name == obj.NAME_EXTERN && !p.To.Sym.Local() {
   439  		source = &p.To
   440  	} else {
   441  		return
   442  	}
   443  	if p.As == obj.ACALL {
   444  		// When dynlinking on 386, almost any call might end up being a call
   445  		// to a PLT, so make sure the GOT pointer is loaded into BX.
   446  		// RegTo2 is set on the replacement call insn to stop it being
   447  		// processed when it is in turn passed to progedit.
   448  		//
   449  		// We disable open-coded defers in buildssa() on 386 ONLY with shared
   450  		// libraries because of this extra code added before deferreturn calls.
   451  		//
   452  		// computeDeferReturn in cmd/link/internal/ld/pcln.go depends
   453  		// on the size of these instructions.
   454  		if ctxt.Arch.Family == sys.AMD64 || (p.To.Sym != nil && p.To.Sym.Local()) || p.RegTo2 != 0 {
   455  			return
   456  		}
   457  		p1 := obj.Appendp(p, newprog)
   458  		p2 := obj.Appendp(p1, newprog)
   459  
   460  		p1.As = ALEAL
   461  		p1.From.Type = obj.TYPE_MEM
   462  		p1.From.Name = obj.NAME_STATIC
   463  		p1.From.Sym = ctxt.Lookup("_GLOBAL_OFFSET_TABLE_")
   464  		p1.To.Type = obj.TYPE_REG
   465  		p1.To.Reg = REG_BX
   466  
   467  		p2.As = p.As
   468  		p2.Scond = p.Scond
   469  		p2.From = p.From
   470  		if p.RestArgs != nil {
   471  			p2.RestArgs = append(p2.RestArgs, p.RestArgs...)
   472  		}
   473  		p2.Reg = p.Reg
   474  		p2.To = p.To
   475  		// p.To.Type was set to TYPE_BRANCH above, but that makes checkaddr
   476  		// in ../pass.go complain, so set it back to TYPE_MEM here, until p2
   477  		// itself gets passed to progedit.
   478  		p2.To.Type = obj.TYPE_MEM
   479  		p2.RegTo2 = 1
   480  
   481  		obj.Nopout(p)
   482  		return
   483  
   484  	}
   485  	if p.As == obj.ATEXT || p.As == obj.AFUNCDATA || p.As == obj.ARET || p.As == obj.AJMP {
   486  		return
   487  	}
   488  	if source.Type != obj.TYPE_MEM {
   489  		ctxt.Diag("don't know how to handle %v with -dynlink", p)
   490  	}
   491  	p1 := obj.Appendp(p, newprog)
   492  	p2 := obj.Appendp(p1, newprog)
   493  
   494  	p1.As = mov
   495  	p1.From.Type = obj.TYPE_MEM
   496  	p1.From.Sym = source.Sym
   497  	p1.From.Name = obj.NAME_GOTREF
   498  	p1.To.Type = obj.TYPE_REG
   499  	p1.To.Reg = reg
   500  
   501  	p2.As = p.As
   502  	p2.From = p.From
   503  	p2.To = p.To
   504  	if from3 := p.GetFrom3(); from3 != nil {
   505  		p2.AddRestSource(*from3)
   506  	}
   507  	if p.From.Name == obj.NAME_EXTERN {
   508  		p2.From.Reg = reg
   509  		p2.From.Name = obj.NAME_NONE
   510  		p2.From.Sym = nil
   511  	} else if p.To.Name == obj.NAME_EXTERN {
   512  		p2.To.Reg = reg
   513  		p2.To.Name = obj.NAME_NONE
   514  		p2.To.Sym = nil
   515  	} else {
   516  		return
   517  	}
   518  	obj.Nopout(p)
   519  }
   520  
   521  func rewriteToPcrel(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) {
   522  	// RegTo2 is set on the instructions we insert here so they don't get
   523  	// processed twice.
   524  	if p.RegTo2 != 0 {
   525  		return
   526  	}
   527  	if p.As == obj.ATEXT || p.As == obj.AFUNCDATA || p.As == obj.ACALL || p.As == obj.ARET || p.As == obj.AJMP {
   528  		return
   529  	}
   530  	// Any Prog (aside from the above special cases) with an Addr with Name ==
   531  	// NAME_EXTERN, NAME_STATIC or NAME_GOTREF has a CALL __x86.get_pc_thunk.XX
   532  	// inserted before it.
   533  	isName := func(a *obj.Addr) bool {
   534  		if a.Sym == nil || (a.Type != obj.TYPE_MEM && a.Type != obj.TYPE_ADDR) || a.Reg != 0 {
   535  			return false
   536  		}
   537  		if a.Sym.Type == objabi.STLSBSS {
   538  			return false
   539  		}
   540  		return a.Name == obj.NAME_EXTERN || a.Name == obj.NAME_STATIC || a.Name == obj.NAME_GOTREF
   541  	}
   542  
   543  	if isName(&p.From) && p.From.Type == obj.TYPE_ADDR {
   544  		// Handle things like "MOVL $sym, (SP)" or "PUSHL $sym" by rewriting
   545  		// to "MOVL $sym, CX; MOVL CX, (SP)" or "MOVL $sym, CX; PUSHL CX"
   546  		// respectively.
   547  		if p.To.Type != obj.TYPE_REG {
   548  			q := obj.Appendp(p, newprog)
   549  			q.As = p.As
   550  			q.From.Type = obj.TYPE_REG
   551  			q.From.Reg = REG_CX
   552  			q.To = p.To
   553  			p.As = AMOVL
   554  			p.To.Type = obj.TYPE_REG
   555  			p.To.Reg = REG_CX
   556  			p.To.Sym = nil
   557  			p.To.Name = obj.NAME_NONE
   558  		}
   559  	}
   560  
   561  	if !isName(&p.From) && !isName(&p.To) && (p.GetFrom3() == nil || !isName(p.GetFrom3())) {
   562  		return
   563  	}
   564  	var dst int16 = REG_CX
   565  	if (p.As == ALEAL || p.As == AMOVL) && p.To.Reg != p.From.Reg && p.To.Reg != p.From.Index {
   566  		dst = p.To.Reg
   567  		// Why? See the comment near the top of rewriteToUseGot above.
   568  		// AMOVLs might be introduced by the GOT rewrites.
   569  	}
   570  	q := obj.Appendp(p, newprog)
   571  	q.RegTo2 = 1
   572  	r := obj.Appendp(q, newprog)
   573  	r.RegTo2 = 1
   574  	q.As = obj.ACALL
   575  	thunkname := "__x86.get_pc_thunk." + strings.ToLower(rconv(int(dst)))
   576  	q.To.Sym = ctxt.LookupInit(thunkname, func(s *obj.LSym) { s.Set(obj.AttrLocal, true) })
   577  	q.To.Type = obj.TYPE_MEM
   578  	q.To.Name = obj.NAME_EXTERN
   579  	r.As = p.As
   580  	r.Scond = p.Scond
   581  	r.From = p.From
   582  	r.RestArgs = p.RestArgs
   583  	r.Reg = p.Reg
   584  	r.To = p.To
   585  	if isName(&p.From) {
   586  		r.From.Reg = dst
   587  	}
   588  	if isName(&p.To) {
   589  		r.To.Reg = dst
   590  	}
   591  	if p.GetFrom3() != nil && isName(p.GetFrom3()) {
   592  		r.GetFrom3().Reg = dst
   593  	}
   594  	obj.Nopout(p)
   595  }
   596  
   597  // Prog.mark
   598  const (
   599  	markBit = 1 << 0 // used in errorCheck to avoid duplicate work
   600  )
   601  
   602  func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
   603  	if cursym.Func().Text == nil || cursym.Func().Text.Link == nil {
   604  		return
   605  	}
   606  
   607  	p := cursym.Func().Text
   608  	autoffset := int32(p.To.Offset)
   609  	if autoffset < 0 {
   610  		autoffset = 0
   611  	}
   612  
   613  	hasCall := false
   614  	for q := p; q != nil; q = q.Link {
   615  		if q.As == obj.ACALL || q.As == obj.ADUFFCOPY || q.As == obj.ADUFFZERO {
   616  			hasCall = true
   617  			break
   618  		}
   619  	}
   620  
   621  	var bpsize int
   622  	if ctxt.Arch.Family == sys.AMD64 &&
   623  		!p.From.Sym.NoFrame() && // (1) below
   624  		!(autoffset == 0 && !hasCall) { // (2) below
   625  		// Make room to save a base pointer.
   626  		// There are 2 cases we must avoid:
   627  		// 1) If noframe is set (which we do for functions which tail call).
   628  		// For performance, we also want to avoid:
   629  		// 2) Frameless leaf functions
   630  		bpsize = ctxt.Arch.PtrSize
   631  		autoffset += int32(bpsize)
   632  		p.To.Offset += int64(bpsize)
   633  	} else {
   634  		bpsize = 0
   635  		p.From.Sym.Set(obj.AttrNoFrame, true)
   636  	}
   637  
   638  	textarg := int64(p.To.Val.(int32))
   639  	cursym.Func().Args = int32(textarg)
   640  	cursym.Func().Locals = int32(p.To.Offset)
   641  
   642  	// TODO(rsc): Remove.
   643  	if ctxt.Arch.Family == sys.I386 && cursym.Func().Locals < 0 {
   644  		cursym.Func().Locals = 0
   645  	}
   646  
   647  	// TODO(rsc): Remove 'ctxt.Arch.Family == sys.AMD64 &&'.
   648  	if ctxt.Arch.Family == sys.AMD64 && autoffset < abi.StackSmall && !p.From.Sym.NoSplit() {
   649  		leaf := true
   650  	LeafSearch:
   651  		for q := p; q != nil; q = q.Link {
   652  			switch q.As {
   653  			case obj.ACALL:
   654  				// Treat common runtime calls that take no arguments
   655  				// the same as duffcopy and duffzero.
   656  				if !isZeroArgRuntimeCall(q.To.Sym) {
   657  					leaf = false
   658  					break LeafSearch
   659  				}
   660  				fallthrough
   661  			case obj.ADUFFCOPY, obj.ADUFFZERO:
   662  				if autoffset >= abi.StackSmall-8 {
   663  					leaf = false
   664  					break LeafSearch
   665  				}
   666  			}
   667  		}
   668  
   669  		if leaf {
   670  			p.From.Sym.Set(obj.AttrNoSplit, true)
   671  		}
   672  	}
   673  
   674  	var regEntryTmp0, regEntryTmp1 int16
   675  	if ctxt.Arch.Family == sys.AMD64 {
   676  		regEntryTmp0, regEntryTmp1 = REGENTRYTMP0, REGENTRYTMP1
   677  	} else {
   678  		regEntryTmp0, regEntryTmp1 = REG_BX, REG_DI
   679  	}
   680  
   681  	var regg int16
   682  	if !p.From.Sym.NoSplit() {
   683  		// Emit split check and load G register
   684  		p, regg = stacksplit(ctxt, cursym, p, newprog, autoffset, int32(textarg))
   685  	} else if p.From.Sym.Wrapper() {
   686  		// Load G register for the wrapper code
   687  		p, regg = loadG(ctxt, cursym, p, newprog)
   688  	}
   689  
   690  	if bpsize > 0 {
   691  		// Save caller's BP
   692  		p = obj.Appendp(p, newprog)
   693  
   694  		p.As = APUSHQ
   695  		p.From.Type = obj.TYPE_REG
   696  		p.From.Reg = REG_BP
   697  
   698  		// Move current frame to BP
   699  		p = obj.Appendp(p, newprog)
   700  
   701  		p.As = AMOVQ
   702  		p.From.Type = obj.TYPE_REG
   703  		p.From.Reg = REG_SP
   704  		p.To.Type = obj.TYPE_REG
   705  		p.To.Reg = REG_BP
   706  	}
   707  
   708  	if autoffset%int32(ctxt.Arch.RegSize) != 0 {
   709  		ctxt.Diag("unaligned stack size %d", autoffset)
   710  	}
   711  
   712  	// localoffset is autoffset discounting the frame pointer,
   713  	// which has already been allocated in the stack.
   714  	localoffset := autoffset - int32(bpsize)
   715  	if localoffset != 0 {
   716  		p = obj.Appendp(p, newprog)
   717  		p.As = AADJSP
   718  		p.From.Type = obj.TYPE_CONST
   719  		p.From.Offset = int64(localoffset)
   720  		p.Spadj = localoffset
   721  	}
   722  
   723  	// Delve debugger would like the next instruction to be noted as the end of the function prologue.
   724  	// TODO: are there other cases (e.g., wrapper functions) that need marking?
   725  	if autoffset != 0 {
   726  		p.Pos = p.Pos.WithXlogue(src.PosPrologueEnd)
   727  	}
   728  
   729  	if cursym.Func().Text.From.Sym.Wrapper() {
   730  		// if g._panic != nil && g._panic.argp == FP {
   731  		//   g._panic.argp = bottom-of-frame
   732  		// }
   733  		//
   734  		//	MOVQ g_panic(g), regEntryTmp0
   735  		//	TESTQ regEntryTmp0, regEntryTmp0
   736  		//	JNE checkargp
   737  		// end:
   738  		//	NOP
   739  		//  ... rest of function ...
   740  		// checkargp:
   741  		//	LEAQ (autoffset+8)(SP), regEntryTmp1
   742  		//	CMPQ panic_argp(regEntryTmp0), regEntryTmp1
   743  		//	JNE end
   744  		//  MOVQ SP, panic_argp(regEntryTmp0)
   745  		//  JMP end
   746  		//
   747  		// The NOP is needed to give the jumps somewhere to land.
   748  		// It is a liblink NOP, not an x86 NOP: it encodes to 0 instruction bytes.
   749  		//
   750  		// The layout is chosen to help static branch prediction:
   751  		// Both conditional jumps are unlikely, so they are arranged to be forward jumps.
   752  
   753  		// MOVQ g_panic(g), regEntryTmp0
   754  		p = obj.Appendp(p, newprog)
   755  		p.As = AMOVQ
   756  		p.From.Type = obj.TYPE_MEM
   757  		p.From.Reg = regg
   758  		p.From.Offset = 4 * int64(ctxt.Arch.PtrSize) // g_panic
   759  		p.To.Type = obj.TYPE_REG
   760  		p.To.Reg = regEntryTmp0
   761  		if ctxt.Arch.Family == sys.I386 {
   762  			p.As = AMOVL
   763  		}
   764  
   765  		// TESTQ regEntryTmp0, regEntryTmp0
   766  		p = obj.Appendp(p, newprog)
   767  		p.As = ATESTQ
   768  		p.From.Type = obj.TYPE_REG
   769  		p.From.Reg = regEntryTmp0
   770  		p.To.Type = obj.TYPE_REG
   771  		p.To.Reg = regEntryTmp0
   772  		if ctxt.Arch.Family == sys.I386 {
   773  			p.As = ATESTL
   774  		}
   775  
   776  		// JNE checkargp (checkargp to be resolved later)
   777  		jne := obj.Appendp(p, newprog)
   778  		jne.As = AJNE
   779  		jne.To.Type = obj.TYPE_BRANCH
   780  
   781  		// end:
   782  		//  NOP
   783  		end := obj.Appendp(jne, newprog)
   784  		end.As = obj.ANOP
   785  
   786  		// Fast forward to end of function.
   787  		var last *obj.Prog
   788  		for last = end; last.Link != nil; last = last.Link {
   789  		}
   790  
   791  		// LEAQ (autoffset+8)(SP), regEntryTmp1
   792  		p = obj.Appendp(last, newprog)
   793  		p.As = ALEAQ
   794  		p.From.Type = obj.TYPE_MEM
   795  		p.From.Reg = REG_SP
   796  		p.From.Offset = int64(autoffset) + int64(ctxt.Arch.RegSize)
   797  		p.To.Type = obj.TYPE_REG
   798  		p.To.Reg = regEntryTmp1
   799  		if ctxt.Arch.Family == sys.I386 {
   800  			p.As = ALEAL
   801  		}
   802  
   803  		// Set jne branch target.
   804  		jne.To.SetTarget(p)
   805  
   806  		// CMPQ panic_argp(regEntryTmp0), regEntryTmp1
   807  		p = obj.Appendp(p, newprog)
   808  		p.As = ACMPQ
   809  		p.From.Type = obj.TYPE_MEM
   810  		p.From.Reg = regEntryTmp0
   811  		p.From.Offset = 0 // Panic.argp
   812  		p.To.Type = obj.TYPE_REG
   813  		p.To.Reg = regEntryTmp1
   814  		if ctxt.Arch.Family == sys.I386 {
   815  			p.As = ACMPL
   816  		}
   817  
   818  		// JNE end
   819  		p = obj.Appendp(p, newprog)
   820  		p.As = AJNE
   821  		p.To.Type = obj.TYPE_BRANCH
   822  		p.To.SetTarget(end)
   823  
   824  		// MOVQ SP, panic_argp(regEntryTmp0)
   825  		p = obj.Appendp(p, newprog)
   826  		p.As = AMOVQ
   827  		p.From.Type = obj.TYPE_REG
   828  		p.From.Reg = REG_SP
   829  		p.To.Type = obj.TYPE_MEM
   830  		p.To.Reg = regEntryTmp0
   831  		p.To.Offset = 0 // Panic.argp
   832  		if ctxt.Arch.Family == sys.I386 {
   833  			p.As = AMOVL
   834  		}
   835  
   836  		// JMP end
   837  		p = obj.Appendp(p, newprog)
   838  		p.As = obj.AJMP
   839  		p.To.Type = obj.TYPE_BRANCH
   840  		p.To.SetTarget(end)
   841  
   842  		// Reset p for following code.
   843  		p = end
   844  	}
   845  
   846  	var deltasp int32
   847  	for p = cursym.Func().Text; p != nil; p = p.Link {
   848  		pcsize := ctxt.Arch.RegSize
   849  		switch p.From.Name {
   850  		case obj.NAME_AUTO:
   851  			p.From.Offset += int64(deltasp) - int64(bpsize)
   852  		case obj.NAME_PARAM:
   853  			p.From.Offset += int64(deltasp) + int64(pcsize)
   854  		}
   855  		if p.GetFrom3() != nil {
   856  			switch p.GetFrom3().Name {
   857  			case obj.NAME_AUTO:
   858  				p.GetFrom3().Offset += int64(deltasp) - int64(bpsize)
   859  			case obj.NAME_PARAM:
   860  				p.GetFrom3().Offset += int64(deltasp) + int64(pcsize)
   861  			}
   862  		}
   863  		switch p.To.Name {
   864  		case obj.NAME_AUTO:
   865  			p.To.Offset += int64(deltasp) - int64(bpsize)
   866  		case obj.NAME_PARAM:
   867  			p.To.Offset += int64(deltasp) + int64(pcsize)
   868  		}
   869  
   870  		switch p.As {
   871  		default:
   872  			if p.To.Type == obj.TYPE_REG && p.To.Reg == REG_SP && p.As != ACMPL && p.As != ACMPQ {
   873  				f := cursym.Func()
   874  				if f.FuncFlag&abi.FuncFlagSPWrite == 0 {
   875  					f.FuncFlag |= abi.FuncFlagSPWrite
   876  					if ctxt.Debugvlog || !ctxt.IsAsm {
   877  						ctxt.Logf("auto-SPWRITE: %s %v\n", cursym.Name, p)
   878  						if !ctxt.IsAsm {
   879  							ctxt.Diag("invalid auto-SPWRITE in non-assembly")
   880  							ctxt.DiagFlush()
   881  							log.Fatalf("bad SPWRITE")
   882  						}
   883  					}
   884  				}
   885  			}
   886  			continue
   887  
   888  		case APUSHL, APUSHFL:
   889  			deltasp += 4
   890  			p.Spadj = 4
   891  			continue
   892  
   893  		case APUSHQ, APUSHFQ:
   894  			deltasp += 8
   895  			p.Spadj = 8
   896  			continue
   897  
   898  		case APUSHW, APUSHFW:
   899  			deltasp += 2
   900  			p.Spadj = 2
   901  			continue
   902  
   903  		case APOPL, APOPFL:
   904  			deltasp -= 4
   905  			p.Spadj = -4
   906  			continue
   907  
   908  		case APOPQ, APOPFQ:
   909  			deltasp -= 8
   910  			p.Spadj = -8
   911  			continue
   912  
   913  		case APOPW, APOPFW:
   914  			deltasp -= 2
   915  			p.Spadj = -2
   916  			continue
   917  
   918  		case AADJSP:
   919  			p.Spadj = int32(p.From.Offset)
   920  			deltasp += int32(p.From.Offset)
   921  			continue
   922  
   923  		case obj.ARET:
   924  			// do nothing
   925  		}
   926  
   927  		if autoffset != deltasp {
   928  			ctxt.Diag("%s: unbalanced PUSH/POP", cursym)
   929  		}
   930  
   931  		if autoffset != 0 {
   932  			to := p.To // Keep To attached to RET for retjmp below
   933  			p.To = obj.Addr{}
   934  			if localoffset != 0 {
   935  				p.As = AADJSP
   936  				p.From.Type = obj.TYPE_CONST
   937  				p.From.Offset = int64(-localoffset)
   938  				p.Spadj = -localoffset
   939  				p = obj.Appendp(p, newprog)
   940  			}
   941  
   942  			if bpsize > 0 {
   943  				// Restore caller's BP
   944  				p.As = APOPQ
   945  				p.To.Type = obj.TYPE_REG
   946  				p.To.Reg = REG_BP
   947  				p.Spadj = -int32(bpsize)
   948  				p = obj.Appendp(p, newprog)
   949  			}
   950  
   951  			p.As = obj.ARET
   952  			p.To = to
   953  
   954  			// If there are instructions following
   955  			// this ARET, they come from a branch
   956  			// with the same stackframe, so undo
   957  			// the cleanup.
   958  			p.Spadj = +autoffset
   959  		}
   960  
   961  		if p.To.Sym != nil { // retjmp
   962  			p.As = obj.AJMP
   963  		}
   964  	}
   965  }
   966  
   967  func isZeroArgRuntimeCall(s *obj.LSym) bool {
   968  	if s == nil {
   969  		return false
   970  	}
   971  	switch s.Name {
   972  	case "runtime.panicdivide", "runtime.panicwrap", "runtime.panicshift":
   973  		return true
   974  	}
   975  	if strings.HasPrefix(s.Name, "runtime.panicIndex") || strings.HasPrefix(s.Name, "runtime.panicSlice") {
   976  		// These functions do take arguments (in registers),
   977  		// but use no stack before they do a stack check. We
   978  		// should include them. See issue 31219.
   979  		return true
   980  	}
   981  	return false
   982  }
   983  
   984  func indir_cx(ctxt *obj.Link, a *obj.Addr) {
   985  	a.Type = obj.TYPE_MEM
   986  	a.Reg = REG_CX
   987  }
   988  
   989  // loadG ensures the G is loaded into a register (either CX or REGG),
   990  // appending instructions to p if necessary. It returns the new last
   991  // instruction and the G register.
   992  func loadG(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, newprog obj.ProgAlloc) (*obj.Prog, int16) {
   993  	if ctxt.Arch.Family == sys.AMD64 && cursym.ABI() == obj.ABIInternal {
   994  		// Use the G register directly in ABIInternal
   995  		return p, REGG
   996  	}
   997  
   998  	var regg int16 = REG_CX
   999  	if ctxt.Arch.Family == sys.AMD64 {
  1000  		regg = REGG // == REG_R14
  1001  	}
  1002  
  1003  	p = obj.Appendp(p, newprog)
  1004  	p.As = AMOVQ
  1005  	if ctxt.Arch.PtrSize == 4 {
  1006  		p.As = AMOVL
  1007  	}
  1008  	p.From.Type = obj.TYPE_MEM
  1009  	p.From.Reg = REG_TLS
  1010  	p.From.Offset = 0
  1011  	p.To.Type = obj.TYPE_REG
  1012  	p.To.Reg = regg
  1013  
  1014  	// Rewrite TLS instruction if necessary.
  1015  	next := p.Link
  1016  	progedit(ctxt, p, newprog)
  1017  	for p.Link != next {
  1018  		p = p.Link
  1019  		progedit(ctxt, p, newprog)
  1020  	}
  1021  
  1022  	if p.From.Index == REG_TLS {
  1023  		p.From.Scale = 2
  1024  	}
  1025  
  1026  	return p, regg
  1027  }
  1028  
  1029  // Append code to p to check for stack split.
  1030  // Appends to (does not overwrite) p.
  1031  // Assumes g is in rg.
  1032  // Returns last new instruction and G register.
  1033  func stacksplit(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, newprog obj.ProgAlloc, framesize int32, textarg int32) (*obj.Prog, int16) {
  1034  	cmp := ACMPQ
  1035  	lea := ALEAQ
  1036  	mov := AMOVQ
  1037  	sub := ASUBQ
  1038  	push, pop := APUSHQ, APOPQ
  1039  
  1040  	if ctxt.Arch.Family == sys.I386 {
  1041  		cmp = ACMPL
  1042  		lea = ALEAL
  1043  		mov = AMOVL
  1044  		sub = ASUBL
  1045  		push, pop = APUSHL, APOPL
  1046  	}
  1047  
  1048  	tmp := int16(REG_AX) // use AX for 32-bit
  1049  	if ctxt.Arch.Family == sys.AMD64 {
  1050  		// Avoid register parameters.
  1051  		tmp = int16(REGENTRYTMP0)
  1052  	}
  1053  
  1054  	if ctxt.Flag_maymorestack != "" {
  1055  		p = cursym.Func().SpillRegisterArgs(p, newprog)
  1056  
  1057  		if cursym.Func().Text.From.Sym.NeedCtxt() {
  1058  			p = obj.Appendp(p, newprog)
  1059  			p.As = push
  1060  			p.From.Type = obj.TYPE_REG
  1061  			p.From.Reg = REGCTXT
  1062  		}
  1063  
  1064  		// We call maymorestack with an ABI matching the
  1065  		// caller's ABI. Since this is the first thing that
  1066  		// happens in the function, we have to be consistent
  1067  		// with the caller about CPU state (notably,
  1068  		// fixed-meaning registers).
  1069  
  1070  		p = obj.Appendp(p, newprog)
  1071  		p.As = obj.ACALL
  1072  		p.To.Type = obj.TYPE_BRANCH
  1073  		p.To.Name = obj.NAME_EXTERN
  1074  		p.To.Sym = ctxt.LookupABI(ctxt.Flag_maymorestack, cursym.ABI())
  1075  
  1076  		if cursym.Func().Text.From.Sym.NeedCtxt() {
  1077  			p = obj.Appendp(p, newprog)
  1078  			p.As = pop
  1079  			p.To.Type = obj.TYPE_REG
  1080  			p.To.Reg = REGCTXT
  1081  		}
  1082  
  1083  		p = cursym.Func().UnspillRegisterArgs(p, newprog)
  1084  	}
  1085  
  1086  	// Jump back to here after morestack returns.
  1087  	startPred := p
  1088  
  1089  	// Load G register
  1090  	var rg int16
  1091  	p, rg = loadG(ctxt, cursym, p, newprog)
  1092  
  1093  	var q1 *obj.Prog
  1094  	if framesize <= abi.StackSmall {
  1095  		// small stack: SP <= stackguard
  1096  		//	CMPQ SP, stackguard
  1097  		p = obj.Appendp(p, newprog)
  1098  
  1099  		p.As = cmp
  1100  		p.From.Type = obj.TYPE_REG
  1101  		p.From.Reg = REG_SP
  1102  		p.To.Type = obj.TYPE_MEM
  1103  		p.To.Reg = rg
  1104  		p.To.Offset = 2 * int64(ctxt.Arch.PtrSize) // G.stackguard0
  1105  		if cursym.CFunc() {
  1106  			p.To.Offset = 3 * int64(ctxt.Arch.PtrSize) // G.stackguard1
  1107  		}
  1108  
  1109  		// Mark the stack bound check and morestack call async nonpreemptible.
  1110  		// If we get preempted here, when resumed the preemption request is
  1111  		// cleared, but we'll still call morestack, which will double the stack
  1112  		// unnecessarily. See issue #35470.
  1113  		p = ctxt.StartUnsafePoint(p, newprog)
  1114  	} else if framesize <= abi.StackBig {
  1115  		// large stack: SP-framesize <= stackguard-StackSmall
  1116  		//	LEAQ -xxx(SP), tmp
  1117  		//	CMPQ tmp, stackguard
  1118  		p = obj.Appendp(p, newprog)
  1119  
  1120  		p.As = lea
  1121  		p.From.Type = obj.TYPE_MEM
  1122  		p.From.Reg = REG_SP
  1123  		p.From.Offset = -(int64(framesize) - abi.StackSmall)
  1124  		p.To.Type = obj.TYPE_REG
  1125  		p.To.Reg = tmp
  1126  
  1127  		p = obj.Appendp(p, newprog)
  1128  		p.As = cmp
  1129  		p.From.Type = obj.TYPE_REG
  1130  		p.From.Reg = tmp
  1131  		p.To.Type = obj.TYPE_MEM
  1132  		p.To.Reg = rg
  1133  		p.To.Offset = 2 * int64(ctxt.Arch.PtrSize) // G.stackguard0
  1134  		if cursym.CFunc() {
  1135  			p.To.Offset = 3 * int64(ctxt.Arch.PtrSize) // G.stackguard1
  1136  		}
  1137  
  1138  		p = ctxt.StartUnsafePoint(p, newprog) // see the comment above
  1139  	} else {
  1140  		// Such a large stack we need to protect against underflow.
  1141  		// The runtime guarantees SP > objabi.StackBig, but
  1142  		// framesize is large enough that SP-framesize may
  1143  		// underflow, causing a direct comparison with the
  1144  		// stack guard to incorrectly succeed. We explicitly
  1145  		// guard against underflow.
  1146  		//
  1147  		//	MOVQ	SP, tmp
  1148  		//	SUBQ	$(framesize - StackSmall), tmp
  1149  		//	// If subtraction wrapped (carry set), morestack.
  1150  		//	JCS	label-of-call-to-morestack
  1151  		//	CMPQ	tmp, stackguard
  1152  
  1153  		p = obj.Appendp(p, newprog)
  1154  
  1155  		p.As = mov
  1156  		p.From.Type = obj.TYPE_REG
  1157  		p.From.Reg = REG_SP
  1158  		p.To.Type = obj.TYPE_REG
  1159  		p.To.Reg = tmp
  1160  
  1161  		p = ctxt.StartUnsafePoint(p, newprog) // see the comment above
  1162  
  1163  		p = obj.Appendp(p, newprog)
  1164  		p.As = sub
  1165  		p.From.Type = obj.TYPE_CONST
  1166  		p.From.Offset = int64(framesize) - abi.StackSmall
  1167  		p.To.Type = obj.TYPE_REG
  1168  		p.To.Reg = tmp
  1169  
  1170  		p = obj.Appendp(p, newprog)
  1171  		p.As = AJCS
  1172  		p.To.Type = obj.TYPE_BRANCH
  1173  		q1 = p
  1174  
  1175  		p = obj.Appendp(p, newprog)
  1176  		p.As = cmp
  1177  		p.From.Type = obj.TYPE_REG
  1178  		p.From.Reg = tmp
  1179  		p.To.Type = obj.TYPE_MEM
  1180  		p.To.Reg = rg
  1181  		p.To.Offset = 2 * int64(ctxt.Arch.PtrSize) // G.stackguard0
  1182  		if cursym.CFunc() {
  1183  			p.To.Offset = 3 * int64(ctxt.Arch.PtrSize) // G.stackguard1
  1184  		}
  1185  	}
  1186  
  1187  	// common
  1188  	jls := obj.Appendp(p, newprog)
  1189  	jls.As = AJLS
  1190  	jls.To.Type = obj.TYPE_BRANCH
  1191  
  1192  	end := ctxt.EndUnsafePoint(jls, newprog, -1)
  1193  
  1194  	var last *obj.Prog
  1195  	for last = cursym.Func().Text; last.Link != nil; last = last.Link {
  1196  	}
  1197  
  1198  	// Now we are at the end of the function, but logically
  1199  	// we are still in function prologue. We need to fix the
  1200  	// SP data and PCDATA.
  1201  	spfix := obj.Appendp(last, newprog)
  1202  	spfix.As = obj.ANOP
  1203  	spfix.Spadj = -framesize
  1204  
  1205  	pcdata := ctxt.EmitEntryStackMap(cursym, spfix, newprog)
  1206  	spill := ctxt.StartUnsafePoint(pcdata, newprog)
  1207  	pcdata = cursym.Func().SpillRegisterArgs(spill, newprog)
  1208  
  1209  	call := obj.Appendp(pcdata, newprog)
  1210  	call.Pos = cursym.Func().Text.Pos
  1211  	call.As = obj.ACALL
  1212  	call.To.Type = obj.TYPE_BRANCH
  1213  	call.To.Name = obj.NAME_EXTERN
  1214  	morestack := "runtime.morestack"
  1215  	switch {
  1216  	case cursym.CFunc():
  1217  		morestack = "runtime.morestackc"
  1218  	case !cursym.Func().Text.From.Sym.NeedCtxt():
  1219  		morestack = "runtime.morestack_noctxt"
  1220  	}
  1221  	call.To.Sym = ctxt.Lookup(morestack)
  1222  	// When compiling 386 code for dynamic linking, the call needs to be adjusted
  1223  	// to follow PIC rules. This in turn can insert more instructions, so we need
  1224  	// to keep track of the start of the call (where the jump will be to) and the
  1225  	// end (which following instructions are appended to).
  1226  	callend := call
  1227  	progedit(ctxt, callend, newprog)
  1228  	for ; callend.Link != nil; callend = callend.Link {
  1229  		progedit(ctxt, callend.Link, newprog)
  1230  	}
  1231  
  1232  	// The instructions which unspill regs should be preemptible.
  1233  	pcdata = ctxt.EndUnsafePoint(callend, newprog, -1)
  1234  	unspill := cursym.Func().UnspillRegisterArgs(pcdata, newprog)
  1235  
  1236  	jmp := obj.Appendp(unspill, newprog)
  1237  	jmp.As = obj.AJMP
  1238  	jmp.To.Type = obj.TYPE_BRANCH
  1239  	jmp.To.SetTarget(startPred.Link)
  1240  	jmp.Spadj = +framesize
  1241  
  1242  	jls.To.SetTarget(spill)
  1243  	if q1 != nil {
  1244  		q1.To.SetTarget(spill)
  1245  	}
  1246  
  1247  	return end, rg
  1248  }
  1249  
  1250  func isR15(r int16) bool {
  1251  	return r == REG_R15 || r == REG_R15B
  1252  }
  1253  func addrMentionsR15(a *obj.Addr) bool {
  1254  	if a == nil {
  1255  		return false
  1256  	}
  1257  	return isR15(a.Reg) || isR15(a.Index)
  1258  }
  1259  func progMentionsR15(p *obj.Prog) bool {
  1260  	return addrMentionsR15(&p.From) || addrMentionsR15(&p.To) || isR15(p.Reg) || addrMentionsR15(p.GetFrom3())
  1261  }
  1262  
  1263  func addrUsesGlobal(a *obj.Addr) bool {
  1264  	if a == nil {
  1265  		return false
  1266  	}
  1267  	return a.Name == obj.NAME_EXTERN && !a.Sym.Local()
  1268  }
  1269  func progUsesGlobal(p *obj.Prog) bool {
  1270  	if p.As == obj.ACALL || p.As == obj.ATEXT || p.As == obj.AFUNCDATA || p.As == obj.ARET || p.As == obj.AJMP {
  1271  		// These opcodes don't use a GOT to access their argument (see rewriteToUseGot),
  1272  		// or R15 would be dead at them anyway.
  1273  		return false
  1274  	}
  1275  	if p.As == ALEAQ {
  1276  		// The GOT entry is placed directly in the destination register; R15 is not used.
  1277  		return false
  1278  	}
  1279  	return addrUsesGlobal(&p.From) || addrUsesGlobal(&p.To) || addrUsesGlobal(p.GetFrom3())
  1280  }
  1281  
  1282  type rwMask int
  1283  
  1284  const (
  1285  	readFrom rwMask = 1 << iota
  1286  	readTo
  1287  	readReg
  1288  	readFrom3
  1289  	writeFrom
  1290  	writeTo
  1291  	writeReg
  1292  	writeFrom3
  1293  )
  1294  
  1295  // progRW returns a mask describing the effects of the instruction p.
  1296  // Note: this isn't exhaustively accurate. It is only currently used for detecting
  1297  // reads/writes to R15, so SSE register behavior isn't fully correct, and
  1298  // other weird cases (e.g. writes to DX by CLD) also aren't captured.
  1299  func progRW(p *obj.Prog) rwMask {
  1300  	var m rwMask
  1301  	// Default for most instructions
  1302  	if p.From.Type != obj.TYPE_NONE {
  1303  		m |= readFrom
  1304  	}
  1305  	if p.To.Type != obj.TYPE_NONE {
  1306  		// Most x86 instructions update the To value
  1307  		m |= readTo | writeTo
  1308  	}
  1309  	if p.Reg != 0 {
  1310  		m |= readReg
  1311  	}
  1312  	if p.GetFrom3() != nil {
  1313  		m |= readFrom3
  1314  	}
  1315  
  1316  	// Lots of exceptions to the above defaults.
  1317  	name := p.As.String()
  1318  	if strings.HasPrefix(name, "MOV") || strings.HasPrefix(name, "PMOV") {
  1319  		// MOV instructions don't read To.
  1320  		m &^= readTo
  1321  	}
  1322  	switch p.As {
  1323  	case APOPW, APOPL, APOPQ,
  1324  		ALEAL, ALEAQ,
  1325  		AIMUL3W, AIMUL3L, AIMUL3Q,
  1326  		APEXTRB, APEXTRW, APEXTRD, APEXTRQ, AVPEXTRB, AVPEXTRW, AVPEXTRD, AVPEXTRQ, AEXTRACTPS,
  1327  		ABSFW, ABSFL, ABSFQ, ABSRW, ABSRL, ABSRQ, APOPCNTW, APOPCNTL, APOPCNTQ, ALZCNTW, ALZCNTL, ALZCNTQ,
  1328  		ASHLXL, ASHLXQ, ASHRXL, ASHRXQ, ASARXL, ASARXQ:
  1329  		// These instructions are pure writes to To. They don't use its old value.
  1330  		m &^= readTo
  1331  	case AXORL, AXORQ:
  1332  		// Register-clearing idiom doesn't read previous value.
  1333  		if p.From.Type == obj.TYPE_REG && p.To.Type == obj.TYPE_REG && p.From.Reg == p.To.Reg {
  1334  			m &^= readFrom | readTo
  1335  		}
  1336  	case AMULXL, AMULXQ:
  1337  		// These are write-only to both To and From3.
  1338  		m &^= readTo | readFrom3
  1339  		m |= writeFrom3
  1340  	}
  1341  	return m
  1342  }
  1343  
  1344  // progReadsR15 reports whether p reads the register R15.
  1345  func progReadsR15(p *obj.Prog) bool {
  1346  	m := progRW(p)
  1347  	if m&readFrom != 0 && p.From.Type == obj.TYPE_REG && isR15(p.From.Reg) {
  1348  		return true
  1349  	}
  1350  	if m&readTo != 0 && p.To.Type == obj.TYPE_REG && isR15(p.To.Reg) {
  1351  		return true
  1352  	}
  1353  	if m&readReg != 0 && isR15(p.Reg) {
  1354  		return true
  1355  	}
  1356  	if m&readFrom3 != 0 && p.GetFrom3().Type == obj.TYPE_REG && isR15(p.GetFrom3().Reg) {
  1357  		return true
  1358  	}
  1359  	// reads of the index registers
  1360  	if p.From.Type == obj.TYPE_MEM && (isR15(p.From.Reg) || isR15(p.From.Index)) {
  1361  		return true
  1362  	}
  1363  	if p.To.Type == obj.TYPE_MEM && (isR15(p.To.Reg) || isR15(p.To.Index)) {
  1364  		return true
  1365  	}
  1366  	if f3 := p.GetFrom3(); f3 != nil && f3.Type == obj.TYPE_MEM && (isR15(f3.Reg) || isR15(f3.Index)) {
  1367  		return true
  1368  	}
  1369  	return false
  1370  }
  1371  
  1372  // progWritesR15 reports whether p writes the register R15.
  1373  func progWritesR15(p *obj.Prog) bool {
  1374  	m := progRW(p)
  1375  	if m&writeFrom != 0 && p.From.Type == obj.TYPE_REG && isR15(p.From.Reg) {
  1376  		return true
  1377  	}
  1378  	if m&writeTo != 0 && p.To.Type == obj.TYPE_REG && isR15(p.To.Reg) {
  1379  		return true
  1380  	}
  1381  	if m&writeReg != 0 && isR15(p.Reg) {
  1382  		return true
  1383  	}
  1384  	if m&writeFrom3 != 0 && p.GetFrom3().Type == obj.TYPE_REG && isR15(p.GetFrom3().Reg) {
  1385  		return true
  1386  	}
  1387  	return false
  1388  }
  1389  
  1390  func errorCheck(ctxt *obj.Link, s *obj.LSym) {
  1391  	// When dynamic linking, R15 is used to access globals. Reject code that
  1392  	// uses R15 after a global variable access.
  1393  	if !ctxt.Flag_dynlink {
  1394  		return
  1395  	}
  1396  
  1397  	// Flood fill all the instructions where R15's value is junk.
  1398  	// If there are any uses of R15 in that set, report an error.
  1399  	var work []*obj.Prog
  1400  	var mentionsR15 bool
  1401  	for p := s.Func().Text; p != nil; p = p.Link {
  1402  		if progUsesGlobal(p) {
  1403  			work = append(work, p)
  1404  			p.Mark |= markBit
  1405  		}
  1406  		if progMentionsR15(p) {
  1407  			mentionsR15 = true
  1408  		}
  1409  	}
  1410  	if mentionsR15 {
  1411  		for len(work) > 0 {
  1412  			p := work[len(work)-1]
  1413  			work = work[:len(work)-1]
  1414  			if progReadsR15(p) {
  1415  				pos := ctxt.PosTable.Pos(p.Pos)
  1416  				ctxt.Diag("%s:%s: when dynamic linking, R15 is clobbered by a global variable access and is used here: %v", path.Base(pos.Filename()), pos.LineNumber(), p)
  1417  				break // only report one error
  1418  			}
  1419  			if progWritesR15(p) {
  1420  				// R15 is overwritten by this instruction. Its value is not junk any more.
  1421  				continue
  1422  			}
  1423  			if q := p.To.Target(); q != nil && q.Mark&markBit == 0 {
  1424  				q.Mark |= markBit
  1425  				work = append(work, q)
  1426  			}
  1427  			if p.As == obj.AJMP || p.As == obj.ARET {
  1428  				continue // no fallthrough
  1429  			}
  1430  			if q := p.Link; q != nil && q.Mark&markBit == 0 {
  1431  				q.Mark |= markBit
  1432  				work = append(work, q)
  1433  			}
  1434  		}
  1435  	}
  1436  
  1437  	// Clean up.
  1438  	for p := s.Func().Text; p != nil; p = p.Link {
  1439  		p.Mark &^= markBit
  1440  	}
  1441  }
  1442  
  1443  var unaryDst = map[obj.As]bool{
  1444  	ABSWAPL:     true,
  1445  	ABSWAPQ:     true,
  1446  	ACLDEMOTE:   true,
  1447  	ACLFLUSH:    true,
  1448  	ACLFLUSHOPT: true,
  1449  	ACLWB:       true,
  1450  	ACMPXCHG16B: true,
  1451  	ACMPXCHG8B:  true,
  1452  	ADECB:       true,
  1453  	ADECL:       true,
  1454  	ADECQ:       true,
  1455  	ADECW:       true,
  1456  	AFBSTP:      true,
  1457  	AFFREE:      true,
  1458  	AFLDENV:     true,
  1459  	AFSAVE:      true,
  1460  	AFSTCW:      true,
  1461  	AFSTENV:     true,
  1462  	AFSTSW:      true,
  1463  	AFXSAVE64:   true,
  1464  	AFXSAVE:     true,
  1465  	AINCB:       true,
  1466  	AINCL:       true,
  1467  	AINCQ:       true,
  1468  	AINCW:       true,
  1469  	ANEGB:       true,
  1470  	ANEGL:       true,
  1471  	ANEGQ:       true,
  1472  	ANEGW:       true,
  1473  	ANOTB:       true,
  1474  	ANOTL:       true,
  1475  	ANOTQ:       true,
  1476  	ANOTW:       true,
  1477  	APOPL:       true,
  1478  	APOPQ:       true,
  1479  	APOPW:       true,
  1480  	ARDFSBASEL:  true,
  1481  	ARDFSBASEQ:  true,
  1482  	ARDGSBASEL:  true,
  1483  	ARDGSBASEQ:  true,
  1484  	ARDPID:      true,
  1485  	ARDRANDL:    true,
  1486  	ARDRANDQ:    true,
  1487  	ARDRANDW:    true,
  1488  	ARDSEEDL:    true,
  1489  	ARDSEEDQ:    true,
  1490  	ARDSEEDW:    true,
  1491  	ASETCC:      true,
  1492  	ASETCS:      true,
  1493  	ASETEQ:      true,
  1494  	ASETGE:      true,
  1495  	ASETGT:      true,
  1496  	ASETHI:      true,
  1497  	ASETLE:      true,
  1498  	ASETLS:      true,
  1499  	ASETLT:      true,
  1500  	ASETMI:      true,
  1501  	ASETNE:      true,
  1502  	ASETOC:      true,
  1503  	ASETOS:      true,
  1504  	ASETPC:      true,
  1505  	ASETPL:      true,
  1506  	ASETPS:      true,
  1507  	ASGDT:       true,
  1508  	ASIDT:       true,
  1509  	ASLDTL:      true,
  1510  	ASLDTQ:      true,
  1511  	ASLDTW:      true,
  1512  	ASMSWL:      true,
  1513  	ASMSWQ:      true,
  1514  	ASMSWW:      true,
  1515  	ASTMXCSR:    true,
  1516  	ASTRL:       true,
  1517  	ASTRQ:       true,
  1518  	ASTRW:       true,
  1519  	AXSAVE64:    true,
  1520  	AXSAVE:      true,
  1521  	AXSAVEC64:   true,
  1522  	AXSAVEC:     true,
  1523  	AXSAVEOPT64: true,
  1524  	AXSAVEOPT:   true,
  1525  	AXSAVES64:   true,
  1526  	AXSAVES:     true,
  1527  }
  1528  
  1529  var Linkamd64 = obj.LinkArch{
  1530  	Arch:           sys.ArchAMD64,
  1531  	Init:           instinit,
  1532  	ErrorCheck:     errorCheck,
  1533  	Preprocess:     preprocess,
  1534  	Assemble:       span6,
  1535  	Progedit:       progedit,
  1536  	SEH:            populateSeh,
  1537  	UnaryDst:       unaryDst,
  1538  	DWARFRegisters: AMD64DWARFRegisters,
  1539  }
  1540  
  1541  var Link386 = obj.LinkArch{
  1542  	Arch:           sys.Arch386,
  1543  	Init:           instinit,
  1544  	Preprocess:     preprocess,
  1545  	Assemble:       span6,
  1546  	Progedit:       progedit,
  1547  	UnaryDst:       unaryDst,
  1548  	DWARFRegisters: X86DWARFRegisters,
  1549  }
  1550  

View as plain text