Source file src/cmd/internal/obj/x86/obj6.go

     1  // Inferno utils/6l/pass.c
     2  // https://bitbucket.org/inferno-os/inferno-os/src/master/utils/6l/pass.c
     3  //
     4  //	Copyright © 1994-1999 Lucent Technologies Inc.  All rights reserved.
     5  //	Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net)
     6  //	Portions Copyright © 1997-1999 Vita Nuova Limited
     7  //	Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com)
     8  //	Portions Copyright © 2004,2006 Bruce Ellis
     9  //	Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net)
    10  //	Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
    11  //	Portions Copyright © 2009 The Go Authors. All rights reserved.
    12  //
    13  // Permission is hereby granted, free of charge, to any person obtaining a copy
    14  // of this software and associated documentation files (the "Software"), to deal
    15  // in the Software without restriction, including without limitation the rights
    16  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
    17  // copies of the Software, and to permit persons to whom the Software is
    18  // furnished to do so, subject to the following conditions:
    19  //
    20  // The above copyright notice and this permission notice shall be included in
    21  // all copies or substantial portions of the Software.
    22  //
    23  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    24  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    25  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
    26  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    27  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    28  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    29  // THE SOFTWARE.
    30  
    31  package x86
    32  
    33  import (
    34  	"cmd/internal/obj"
    35  	"cmd/internal/objabi"
    36  	"cmd/internal/src"
    37  	"cmd/internal/sys"
    38  	"internal/abi"
    39  	"log"
    40  	"math"
    41  	"path"
    42  	"strings"
    43  )
    44  
    45  func CanUse1InsnTLS(ctxt *obj.Link) bool {
    46  	if isAndroid {
    47  		// Android uses a global variable for the tls offset.
    48  		return false
    49  	}
    50  
    51  	if ctxt.Arch.Family == sys.I386 {
    52  		switch ctxt.Headtype {
    53  		case objabi.Hlinux,
    54  			objabi.Hplan9,
    55  			objabi.Hwindows:
    56  			return false
    57  		}
    58  
    59  		return true
    60  	}
    61  
    62  	switch ctxt.Headtype {
    63  	case objabi.Hplan9, objabi.Hwindows:
    64  		return false
    65  	case objabi.Hlinux, objabi.Hfreebsd:
    66  		return !ctxt.Flag_shared
    67  	}
    68  
    69  	return true
    70  }
    71  
    72  func progedit(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) {
    73  	// Thread-local storage references use the TLS pseudo-register.
    74  	// As a register, TLS refers to the thread-local storage base, and it
    75  	// can only be loaded into another register:
    76  	//
    77  	//         MOVQ TLS, AX
    78  	//
    79  	// An offset from the thread-local storage base is written off(reg)(TLS*1).
    80  	// Semantically it is off(reg), but the (TLS*1) annotation marks this as
    81  	// indexing from the loaded TLS base. This emits a relocation so that
    82  	// if the linker needs to adjust the offset, it can. For example:
    83  	//
    84  	//         MOVQ TLS, AX
    85  	//         MOVQ 0(AX)(TLS*1), CX // load g into CX
    86  	//
    87  	// On systems that support direct access to the TLS memory, this
    88  	// pair of instructions can be reduced to a direct TLS memory reference:
    89  	//
    90  	//         MOVQ 0(TLS), CX // load g into CX
    91  	//
    92  	// The 2-instruction and 1-instruction forms correspond to the two code
    93  	// sequences for loading a TLS variable in the local exec model given in "ELF
    94  	// Handling For Thread-Local Storage".
    95  	//
    96  	// We apply this rewrite on systems that support the 1-instruction form.
    97  	// The decision is made using only the operating system and the -shared flag,
    98  	// not the link mode. If some link modes on a particular operating system
    99  	// require the 2-instruction form, then all builds for that operating system
   100  	// will use the 2-instruction form, so that the link mode decision can be
   101  	// delayed to link time.
   102  	//
   103  	// In this way, all supported systems use identical instructions to
   104  	// access TLS, and they are rewritten appropriately first here in
   105  	// liblink and then finally using relocations in the linker.
   106  	//
   107  	// When -shared is passed, we leave the code in the 2-instruction form but
   108  	// assemble (and relocate) them in different ways to generate the initial
   109  	// exec code sequence. It's a bit of a fluke that this is possible without
   110  	// rewriting the instructions more comprehensively, and it only does because
   111  	// we only support a single TLS variable (g).
   112  
   113  	if CanUse1InsnTLS(ctxt) {
   114  		// Reduce 2-instruction sequence to 1-instruction sequence.
   115  		// Sequences like
   116  		//	MOVQ TLS, BX
   117  		//	... off(BX)(TLS*1) ...
   118  		// become
   119  		//	NOP
   120  		//	... off(TLS) ...
   121  		//
   122  		// TODO(rsc): Remove the Hsolaris special case. It exists only to
   123  		// guarantee we are producing byte-identical binaries as before this code.
   124  		// But it should be unnecessary.
   125  		if (p.As == AMOVQ || p.As == AMOVL) && p.From.Type == obj.TYPE_REG && p.From.Reg == REG_TLS && p.To.Type == obj.TYPE_REG && REG_AX <= p.To.Reg && p.To.Reg <= REG_R15 && ctxt.Headtype != objabi.Hsolaris {
   126  			obj.Nopout(p)
   127  		}
   128  		if p.From.Type == obj.TYPE_MEM && p.From.Index == REG_TLS && REG_AX <= p.From.Reg && p.From.Reg <= REG_R15 {
   129  			p.From.Reg = REG_TLS
   130  			p.From.Scale = 0
   131  			p.From.Index = REG_NONE
   132  		}
   133  
   134  		if p.To.Type == obj.TYPE_MEM && p.To.Index == REG_TLS && REG_AX <= p.To.Reg && p.To.Reg <= REG_R15 {
   135  			p.To.Reg = REG_TLS
   136  			p.To.Scale = 0
   137  			p.To.Index = REG_NONE
   138  		}
   139  	} else {
   140  		// load_g, below, always inserts the 1-instruction sequence. Rewrite it
   141  		// as the 2-instruction sequence if necessary.
   142  		//	MOVQ 0(TLS), BX
   143  		// becomes
   144  		//	MOVQ TLS, BX
   145  		//	MOVQ 0(BX)(TLS*1), BX
   146  		if (p.As == AMOVQ || p.As == AMOVL) && p.From.Type == obj.TYPE_MEM && p.From.Reg == REG_TLS && p.To.Type == obj.TYPE_REG && REG_AX <= p.To.Reg && p.To.Reg <= REG_R15 {
   147  			q := obj.Appendp(p, newprog)
   148  			q.As = p.As
   149  			q.From = p.From
   150  			q.From.Type = obj.TYPE_MEM
   151  			q.From.Reg = p.To.Reg
   152  			q.From.Index = REG_TLS
   153  			q.From.Scale = 2 // TODO: use 1
   154  			q.To = p.To
   155  			p.From.Type = obj.TYPE_REG
   156  			p.From.Reg = REG_TLS
   157  			p.From.Index = REG_NONE
   158  			p.From.Offset = 0
   159  		}
   160  	}
   161  
   162  	// Android and Windows use a tls offset determined at runtime. Rewrite
   163  	//	MOVQ TLS, BX
   164  	// to
   165  	//	MOVQ runtime.tls_g(SB), BX
   166  	if (isAndroid || ctxt.Headtype == objabi.Hwindows) &&
   167  		(p.As == AMOVQ || p.As == AMOVL) && p.From.Type == obj.TYPE_REG && p.From.Reg == REG_TLS && p.To.Type == obj.TYPE_REG && REG_AX <= p.To.Reg && p.To.Reg <= REG_R15 {
   168  		p.From.Type = obj.TYPE_MEM
   169  		p.From.Name = obj.NAME_EXTERN
   170  		p.From.Reg = REG_NONE
   171  		p.From.Sym = ctxt.Lookup("runtime.tls_g")
   172  		p.From.Index = REG_NONE
   173  		if ctxt.Headtype == objabi.Hwindows {
   174  			// Windows requires an additional indirection
   175  			// to retrieve the TLS pointer,
   176  			// as runtime.tls_g contains the TLS offset from GS or FS.
   177  			// on AMD64 add
   178  			//	MOVQ 0(BX)(GS*1), BX
   179  			// on 386 add
   180  			//	MOVQ 0(BX)(FS*1), BX4
   181  			q := obj.Appendp(p, newprog)
   182  			q.As = p.As
   183  			q.From = obj.Addr{}
   184  			q.From.Type = obj.TYPE_MEM
   185  			q.From.Reg = p.To.Reg
   186  			if ctxt.Arch.Family == sys.AMD64 {
   187  				q.From.Index = REG_GS
   188  			} else {
   189  				q.From.Index = REG_FS
   190  			}
   191  			q.From.Scale = 1
   192  			q.From.Offset = 0
   193  			q.To = p.To
   194  		}
   195  	}
   196  
   197  	// TODO: Remove.
   198  	if ctxt.Headtype == objabi.Hwindows && ctxt.Arch.Family == sys.AMD64 || ctxt.Headtype == objabi.Hplan9 {
   199  		if p.From.Scale == 1 && p.From.Index == REG_TLS {
   200  			p.From.Scale = 2
   201  		}
   202  		if p.To.Scale == 1 && p.To.Index == REG_TLS {
   203  			p.To.Scale = 2
   204  		}
   205  	}
   206  
   207  	// Rewrite 0 to $0 in 3rd argument to CMPPS etc.
   208  	// That's what the tables expect.
   209  	switch p.As {
   210  	case ACMPPD, ACMPPS, ACMPSD, ACMPSS:
   211  		if p.To.Type == obj.TYPE_MEM && p.To.Name == obj.NAME_NONE && p.To.Reg == REG_NONE && p.To.Index == REG_NONE && p.To.Sym == nil {
   212  			p.To.Type = obj.TYPE_CONST
   213  		}
   214  	}
   215  
   216  	// Rewrite CALL/JMP/RET to symbol as TYPE_BRANCH.
   217  	switch p.As {
   218  	case obj.ACALL, obj.AJMP, obj.ARET:
   219  		if p.To.Type == obj.TYPE_MEM && (p.To.Name == obj.NAME_EXTERN || p.To.Name == obj.NAME_STATIC) && p.To.Sym != nil {
   220  			p.To.Type = obj.TYPE_BRANCH
   221  		}
   222  	}
   223  
   224  	// Rewrite MOVL/MOVQ $XXX(FP/SP) as LEAL/LEAQ.
   225  	if p.From.Type == obj.TYPE_ADDR && (ctxt.Arch.Family == sys.AMD64 || p.From.Name != obj.NAME_EXTERN && p.From.Name != obj.NAME_STATIC) {
   226  		switch p.As {
   227  		case AMOVL:
   228  			p.As = ALEAL
   229  			p.From.Type = obj.TYPE_MEM
   230  		case AMOVQ:
   231  			p.As = ALEAQ
   232  			p.From.Type = obj.TYPE_MEM
   233  		}
   234  	}
   235  
   236  	// Rewrite float constants to values stored in memory.
   237  	switch p.As {
   238  	// Convert AMOVSS $(0), Xx to AXORPS Xx, Xx
   239  	case AMOVSS, AVMOVSS:
   240  		if p.From.Type == obj.TYPE_FCONST {
   241  			//  f == 0 can't be used here due to -0, so use Float64bits
   242  			if f := p.From.Val.(float64); math.Float64bits(f) == 0 {
   243  				if p.To.Type == obj.TYPE_REG && REG_X0 <= p.To.Reg && p.To.Reg <= REG_X15 {
   244  					p.As = AXORPS
   245  					p.From = p.To
   246  					break
   247  				}
   248  			}
   249  		}
   250  		fallthrough
   251  
   252  	case AFMOVF,
   253  		AFADDF,
   254  		AFSUBF,
   255  		AFSUBRF,
   256  		AFMULF,
   257  		AFDIVF,
   258  		AFDIVRF,
   259  		AFCOMF,
   260  		AFCOMFP,
   261  		AADDSS,
   262  		ASUBSS,
   263  		AMULSS,
   264  		ADIVSS,
   265  		ACOMISS,
   266  		AUCOMISS:
   267  		if p.From.Type == obj.TYPE_FCONST {
   268  			f32 := float32(p.From.Val.(float64))
   269  			p.From.Type = obj.TYPE_MEM
   270  			p.From.Name = obj.NAME_EXTERN
   271  			p.From.Sym = ctxt.Float32Sym(f32)
   272  			p.From.Offset = 0
   273  		}
   274  
   275  	case AMOVSD, AVMOVSD:
   276  		// Convert AMOVSD $(0), Xx to AXORPS Xx, Xx
   277  		if p.From.Type == obj.TYPE_FCONST {
   278  			//  f == 0 can't be used here due to -0, so use Float64bits
   279  			if f := p.From.Val.(float64); math.Float64bits(f) == 0 {
   280  				if p.To.Type == obj.TYPE_REG && REG_X0 <= p.To.Reg && p.To.Reg <= REG_X15 {
   281  					p.As = AXORPS
   282  					p.From = p.To
   283  					break
   284  				}
   285  			}
   286  		}
   287  		fallthrough
   288  
   289  	case AFMOVD,
   290  		AFADDD,
   291  		AFSUBD,
   292  		AFSUBRD,
   293  		AFMULD,
   294  		AFDIVD,
   295  		AFDIVRD,
   296  		AFCOMD,
   297  		AFCOMDP,
   298  		AADDSD,
   299  		ASUBSD,
   300  		AMULSD,
   301  		ADIVSD,
   302  		ACOMISD,
   303  		AUCOMISD:
   304  		if p.From.Type == obj.TYPE_FCONST {
   305  			f64 := p.From.Val.(float64)
   306  			p.From.Type = obj.TYPE_MEM
   307  			p.From.Name = obj.NAME_EXTERN
   308  			p.From.Sym = ctxt.Float64Sym(f64)
   309  			p.From.Offset = 0
   310  		}
   311  	}
   312  
   313  	if ctxt.Flag_dynlink {
   314  		rewriteToUseGot(ctxt, p, newprog)
   315  	}
   316  
   317  	if ctxt.Flag_shared && ctxt.Arch.Family == sys.I386 {
   318  		rewriteToPcrel(ctxt, p, newprog)
   319  	}
   320  }
   321  
   322  // Rewrite p, if necessary, to access global data via the global offset table.
   323  func rewriteToUseGot(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) {
   324  	var lea, mov obj.As
   325  	var reg int16
   326  	if ctxt.Arch.Family == sys.AMD64 {
   327  		lea = ALEAQ
   328  		mov = AMOVQ
   329  		reg = REG_R15
   330  	} else {
   331  		lea = ALEAL
   332  		mov = AMOVL
   333  		reg = REG_CX
   334  		if p.As == ALEAL && p.To.Reg != p.From.Reg && p.To.Reg != p.From.Index {
   335  			// Special case: clobber the destination register with
   336  			// the PC so we don't have to clobber CX.
   337  			// The SSA backend depends on CX not being clobbered across LEAL.
   338  			// See cmd/compile/internal/ssa/gen/386.rules (search for Flag_shared).
   339  			reg = p.To.Reg
   340  		}
   341  	}
   342  
   343  	if p.As == obj.ADUFFCOPY || p.As == obj.ADUFFZERO {
   344  		//     ADUFFxxx $offset
   345  		// becomes
   346  		//     $MOV runtime.duffxxx@GOT, $reg
   347  		//     $LEA $offset($reg), $reg
   348  		//     CALL $reg
   349  		// (we use LEAx rather than ADDx because ADDx clobbers
   350  		// flags and duffzero on 386 does not otherwise do so).
   351  		var sym *obj.LSym
   352  		if p.As == obj.ADUFFZERO {
   353  			sym = ctxt.LookupABI("runtime.duffzero", obj.ABIInternal)
   354  		} else {
   355  			sym = ctxt.LookupABI("runtime.duffcopy", obj.ABIInternal)
   356  		}
   357  		offset := p.To.Offset
   358  		p.As = mov
   359  		p.From.Type = obj.TYPE_MEM
   360  		p.From.Name = obj.NAME_GOTREF
   361  		p.From.Sym = sym
   362  		p.To.Type = obj.TYPE_REG
   363  		p.To.Reg = reg
   364  		p.To.Offset = 0
   365  		p.To.Sym = nil
   366  		p1 := obj.Appendp(p, newprog)
   367  		p1.As = lea
   368  		p1.From.Type = obj.TYPE_MEM
   369  		p1.From.Offset = offset
   370  		p1.From.Reg = reg
   371  		p1.To.Type = obj.TYPE_REG
   372  		p1.To.Reg = reg
   373  		p2 := obj.Appendp(p1, newprog)
   374  		p2.As = obj.ACALL
   375  		p2.To.Type = obj.TYPE_REG
   376  		p2.To.Reg = reg
   377  	}
   378  
   379  	// We only care about global data: NAME_EXTERN means a global
   380  	// symbol in the Go sense, and p.Sym.Local is true for a few
   381  	// internally defined symbols.
   382  	if p.As == lea && p.From.Type == obj.TYPE_MEM && p.From.Name == obj.NAME_EXTERN && !p.From.Sym.Local() {
   383  		// $LEA sym, Rx becomes $MOV $sym, Rx which will be rewritten below
   384  		p.As = mov
   385  		p.From.Type = obj.TYPE_ADDR
   386  	}
   387  	if p.From.Type == obj.TYPE_ADDR && p.From.Name == obj.NAME_EXTERN && !p.From.Sym.Local() {
   388  		// $MOV $sym, Rx becomes $MOV sym@GOT, Rx
   389  		// $MOV $sym+<off>, Rx becomes $MOV sym@GOT, Rx; $LEA <off>(Rx), Rx
   390  		// On 386 only, more complicated things like PUSHL $sym become $MOV sym@GOT, CX; PUSHL CX
   391  		cmplxdest := false
   392  		pAs := p.As
   393  		var dest obj.Addr
   394  		if p.To.Type != obj.TYPE_REG || pAs != mov {
   395  			if ctxt.Arch.Family == sys.AMD64 {
   396  				ctxt.Diag("do not know how to handle LEA-type insn to non-register in %v with -dynlink", p)
   397  			}
   398  			cmplxdest = true
   399  			dest = p.To
   400  			p.As = mov
   401  			p.To.Type = obj.TYPE_REG
   402  			p.To.Reg = reg
   403  			p.To.Sym = nil
   404  			p.To.Name = obj.NAME_NONE
   405  		}
   406  		p.From.Type = obj.TYPE_MEM
   407  		p.From.Name = obj.NAME_GOTREF
   408  		q := p
   409  		if p.From.Offset != 0 {
   410  			q = obj.Appendp(p, newprog)
   411  			q.As = lea
   412  			q.From.Type = obj.TYPE_MEM
   413  			q.From.Reg = p.To.Reg
   414  			q.From.Offset = p.From.Offset
   415  			q.To = p.To
   416  			p.From.Offset = 0
   417  		}
   418  		if cmplxdest {
   419  			q = obj.Appendp(q, newprog)
   420  			q.As = pAs
   421  			q.To = dest
   422  			q.From.Type = obj.TYPE_REG
   423  			q.From.Reg = reg
   424  		}
   425  	}
   426  	from3 := p.GetFrom3()
   427  	for i := range p.RestArgs {
   428  		a := &p.RestArgs[i].Addr
   429  		if a != from3 && a.Name == obj.NAME_EXTERN && !a.Sym.Local() {
   430  			ctxt.Diag("don't know how to handle %v with -dynlink", p)
   431  		}
   432  	}
   433  	var source *obj.Addr
   434  	// MOVx sym, Ry becomes $MOV sym@GOT, R15; MOVx (R15), Ry
   435  	// MOVx Ry, sym becomes $MOV sym@GOT, R15; MOVx Ry, (R15)
   436  	// An addition may be inserted between the two MOVs if there is an offset.
   437  	if p.From.Name == obj.NAME_EXTERN && !p.From.Sym.Local() {
   438  		if p.To.Name == obj.NAME_EXTERN && !p.To.Sym.Local() {
   439  			ctxt.Diag("cannot handle NAME_EXTERN on both sides in %v with -dynlink", p)
   440  		}
   441  		if from3 != nil && from3.Name == obj.NAME_EXTERN && !from3.Sym.Local() {
   442  			ctxt.Diag("cannot handle NAME_EXTERN on multiple operands in %v with -dynlink", p)
   443  		}
   444  		source = &p.From
   445  	} else if p.To.Name == obj.NAME_EXTERN && !p.To.Sym.Local() {
   446  		if from3 != nil && from3.Name == obj.NAME_EXTERN && !from3.Sym.Local() {
   447  			ctxt.Diag("cannot handle NAME_EXTERN on multiple operands in %v with -dynlink", p)
   448  		}
   449  		source = &p.To
   450  	} else if from3 != nil && from3.Name == obj.NAME_EXTERN && !from3.Sym.Local() {
   451  		source = from3
   452  	} else {
   453  		return
   454  	}
   455  	if p.As == obj.ACALL {
   456  		// When dynlinking on 386, almost any call might end up being a call
   457  		// to a PLT, so make sure the GOT pointer is loaded into BX.
   458  		// RegTo2 is set on the replacement call insn to stop it being
   459  		// processed when it is in turn passed to progedit.
   460  		//
   461  		// We disable open-coded defers in buildssa() on 386 ONLY with shared
   462  		// libraries because of this extra code added before deferreturn calls.
   463  		//
   464  		// computeDeferReturn in cmd/link/internal/ld/pcln.go depends
   465  		// on the size of these instructions.
   466  		if ctxt.Arch.Family == sys.AMD64 || (p.To.Sym != nil && p.To.Sym.Local()) || p.RegTo2 != 0 {
   467  			return
   468  		}
   469  		p1 := obj.Appendp(p, newprog)
   470  		p2 := obj.Appendp(p1, newprog)
   471  
   472  		p1.As = ALEAL
   473  		p1.From.Type = obj.TYPE_MEM
   474  		p1.From.Name = obj.NAME_STATIC
   475  		p1.From.Sym = ctxt.Lookup("_GLOBAL_OFFSET_TABLE_")
   476  		p1.To.Type = obj.TYPE_REG
   477  		p1.To.Reg = REG_BX
   478  
   479  		p2.As = p.As
   480  		p2.Scond = p.Scond
   481  		p2.From = p.From
   482  		if p.RestArgs != nil {
   483  			p2.RestArgs = append(p2.RestArgs, p.RestArgs...)
   484  		}
   485  		p2.Reg = p.Reg
   486  		p2.To = p.To
   487  		// p.To.Type was set to TYPE_BRANCH above, but that makes checkaddr
   488  		// in ../pass.go complain, so set it back to TYPE_MEM here, until p2
   489  		// itself gets passed to progedit.
   490  		p2.To.Type = obj.TYPE_MEM
   491  		p2.RegTo2 = 1
   492  
   493  		obj.Nopout(p)
   494  		return
   495  
   496  	}
   497  	if p.As == obj.ATEXT || p.As == obj.AFUNCDATA || p.As == obj.ARET || p.As == obj.AJMP {
   498  		return
   499  	}
   500  	if source.Type != obj.TYPE_MEM {
   501  		ctxt.Diag("don't know how to handle %v with -dynlink", p)
   502  	}
   503  	p1 := obj.Appendp(p, newprog)
   504  	p2 := obj.Appendp(p1, newprog)
   505  
   506  	p1.As = mov
   507  	p1.From.Type = obj.TYPE_MEM
   508  	p1.From.Sym = source.Sym
   509  	p1.From.Name = obj.NAME_GOTREF
   510  	p1.To.Type = obj.TYPE_REG
   511  	p1.To.Reg = reg
   512  
   513  	p2.As = p.As
   514  	p2.From = p.From
   515  	p2.To = p.To
   516  	p2.RestArgs = p.RestArgs
   517  	if p.From.Name == obj.NAME_EXTERN {
   518  		p2.From.Reg = reg
   519  		p2.From.Name = obj.NAME_NONE
   520  		p2.From.Sym = nil
   521  	} else if p.To.Name == obj.NAME_EXTERN {
   522  		p2.To.Reg = reg
   523  		p2.To.Name = obj.NAME_NONE
   524  		p2.To.Sym = nil
   525  	} else if p.GetFrom3() != nil && p.GetFrom3().Name == obj.NAME_EXTERN {
   526  		from3 = p2.GetFrom3()
   527  		from3.Reg = reg
   528  		from3.Name = obj.NAME_NONE
   529  		from3.Sym = nil
   530  	} else {
   531  		return
   532  	}
   533  	obj.Nopout(p)
   534  }
   535  
   536  func rewriteToPcrel(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) {
   537  	// RegTo2 is set on the instructions we insert here so they don't get
   538  	// processed twice.
   539  	if p.RegTo2 != 0 {
   540  		return
   541  	}
   542  	if p.As == obj.ATEXT || p.As == obj.AFUNCDATA || p.As == obj.ACALL || p.As == obj.ARET || p.As == obj.AJMP {
   543  		return
   544  	}
   545  	// Any Prog (aside from the above special cases) with an Addr with Name ==
   546  	// NAME_EXTERN, NAME_STATIC or NAME_GOTREF has a CALL __x86.get_pc_thunk.XX
   547  	// inserted before it.
   548  	isName := func(a *obj.Addr) bool {
   549  		if a.Sym == nil || (a.Type != obj.TYPE_MEM && a.Type != obj.TYPE_ADDR) || a.Reg != 0 {
   550  			return false
   551  		}
   552  		if a.Sym.Type == objabi.STLSBSS {
   553  			return false
   554  		}
   555  		return a.Name == obj.NAME_EXTERN || a.Name == obj.NAME_STATIC || a.Name == obj.NAME_GOTREF
   556  	}
   557  
   558  	if isName(&p.From) && p.From.Type == obj.TYPE_ADDR {
   559  		// Handle things like "MOVL $sym, (SP)" or "PUSHL $sym" by rewriting
   560  		// to "MOVL $sym, CX; MOVL CX, (SP)" or "MOVL $sym, CX; PUSHL CX"
   561  		// respectively.
   562  		if p.To.Type != obj.TYPE_REG {
   563  			q := obj.Appendp(p, newprog)
   564  			q.As = p.As
   565  			q.From.Type = obj.TYPE_REG
   566  			q.From.Reg = REG_CX
   567  			q.To = p.To
   568  			p.As = AMOVL
   569  			p.To.Type = obj.TYPE_REG
   570  			p.To.Reg = REG_CX
   571  			p.To.Sym = nil
   572  			p.To.Name = obj.NAME_NONE
   573  		}
   574  	}
   575  
   576  	if !isName(&p.From) && !isName(&p.To) && (p.GetFrom3() == nil || !isName(p.GetFrom3())) {
   577  		return
   578  	}
   579  	var dst int16 = REG_CX
   580  	if (p.As == ALEAL || p.As == AMOVL) && p.To.Reg != p.From.Reg && p.To.Reg != p.From.Index {
   581  		dst = p.To.Reg
   582  		// Why? See the comment near the top of rewriteToUseGot above.
   583  		// AMOVLs might be introduced by the GOT rewrites.
   584  	}
   585  	q := obj.Appendp(p, newprog)
   586  	q.RegTo2 = 1
   587  	r := obj.Appendp(q, newprog)
   588  	r.RegTo2 = 1
   589  	q.As = obj.ACALL
   590  	thunkname := "__x86.get_pc_thunk." + strings.ToLower(rconv(int(dst)))
   591  	q.To.Sym = ctxt.LookupInit(thunkname, func(s *obj.LSym) { s.Set(obj.AttrLocal, true) })
   592  	q.To.Type = obj.TYPE_MEM
   593  	q.To.Name = obj.NAME_EXTERN
   594  	r.As = p.As
   595  	r.Scond = p.Scond
   596  	r.From = p.From
   597  	r.RestArgs = p.RestArgs
   598  	r.Reg = p.Reg
   599  	r.To = p.To
   600  	if isName(&p.From) {
   601  		r.From.Reg = dst
   602  	}
   603  	if isName(&p.To) {
   604  		r.To.Reg = dst
   605  	}
   606  	if p.GetFrom3() != nil && isName(p.GetFrom3()) {
   607  		r.GetFrom3().Reg = dst
   608  	}
   609  	obj.Nopout(p)
   610  }
   611  
   612  // Prog.mark
   613  const (
   614  	markBit = 1 << 0 // used in errorCheck to avoid duplicate work
   615  )
   616  
   617  func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
   618  	if cursym.Func().Text == nil || cursym.Func().Text.Link == nil {
   619  		return
   620  	}
   621  
   622  	p := cursym.Func().Text
   623  	autoffset := int32(p.To.Offset)
   624  	if autoffset < 0 {
   625  		autoffset = 0
   626  	}
   627  
   628  	hasCall := false
   629  	for q := p; q != nil; q = q.Link {
   630  		if q.As == obj.ACALL || q.As == obj.ADUFFCOPY || q.As == obj.ADUFFZERO {
   631  			hasCall = true
   632  			break
   633  		}
   634  	}
   635  
   636  	var bpsize int
   637  	if ctxt.Arch.Family == sys.AMD64 &&
   638  		!p.From.Sym.NoFrame() && // (1) below
   639  		!(autoffset == 0 && !hasCall) { // (2) below
   640  		// Make room to save a base pointer.
   641  		// There are 2 cases we must avoid:
   642  		// 1) If noframe is set (which we do for functions which tail call).
   643  		// For performance, we also want to avoid:
   644  		// 2) Frameless leaf functions
   645  		bpsize = ctxt.Arch.PtrSize
   646  		autoffset += int32(bpsize)
   647  		p.To.Offset += int64(bpsize)
   648  	} else {
   649  		bpsize = 0
   650  		p.From.Sym.Set(obj.AttrNoFrame, true)
   651  	}
   652  
   653  	textarg := int64(p.To.Val.(int32))
   654  	cursym.Func().Args = int32(textarg)
   655  	cursym.Func().Locals = int32(p.To.Offset)
   656  
   657  	// TODO(rsc): Remove.
   658  	if ctxt.Arch.Family == sys.I386 && cursym.Func().Locals < 0 {
   659  		cursym.Func().Locals = 0
   660  	}
   661  
   662  	// TODO(rsc): Remove 'ctxt.Arch.Family == sys.AMD64 &&'.
   663  	if ctxt.Arch.Family == sys.AMD64 && autoffset < abi.StackSmall && !p.From.Sym.NoSplit() {
   664  		leaf := true
   665  	LeafSearch:
   666  		for q := p; q != nil; q = q.Link {
   667  			switch q.As {
   668  			case obj.ACALL:
   669  				// Treat common runtime calls that take no arguments
   670  				// the same as duffcopy and duffzero.
   671  
   672  				// Note that of these functions, panicBounds does
   673  				// use some stack, but its stack together with the
   674  				// < StackSmall used by this function is still
   675  				// less than stackNosplit. See issue 31219.
   676  				if !isZeroArgRuntimeCall(q.To.Sym) {
   677  					leaf = false
   678  					break LeafSearch
   679  				}
   680  				fallthrough
   681  			case obj.ADUFFCOPY, obj.ADUFFZERO:
   682  				if autoffset >= abi.StackSmall-8 {
   683  					leaf = false
   684  					break LeafSearch
   685  				}
   686  			}
   687  		}
   688  
   689  		if leaf {
   690  			p.From.Sym.Set(obj.AttrNoSplit, true)
   691  		}
   692  	}
   693  
   694  	if !p.From.Sym.NoSplit() {
   695  		// Emit split check.
   696  		p = stacksplit(ctxt, cursym, p, newprog, autoffset, int32(textarg))
   697  	}
   698  
   699  	if bpsize > 0 {
   700  		// Save caller's BP
   701  		p = obj.Appendp(p, newprog)
   702  
   703  		p.As = APUSHQ
   704  		p.From.Type = obj.TYPE_REG
   705  		p.From.Reg = REG_BP
   706  
   707  		// Move current frame to BP
   708  		p = obj.Appendp(p, newprog)
   709  
   710  		p.As = AMOVQ
   711  		p.From.Type = obj.TYPE_REG
   712  		p.From.Reg = REG_SP
   713  		p.To.Type = obj.TYPE_REG
   714  		p.To.Reg = REG_BP
   715  	}
   716  
   717  	if autoffset%int32(ctxt.Arch.RegSize) != 0 {
   718  		ctxt.Diag("unaligned stack size %d", autoffset)
   719  	}
   720  
   721  	// localoffset is autoffset discounting the frame pointer,
   722  	// which has already been allocated in the stack.
   723  	localoffset := autoffset - int32(bpsize)
   724  	if localoffset != 0 {
   725  		p = obj.Appendp(p, newprog)
   726  		p.As = AADJSP
   727  		p.From.Type = obj.TYPE_CONST
   728  		p.From.Offset = int64(localoffset)
   729  		p.Spadj = localoffset
   730  	}
   731  
   732  	// Delve debugger would like the next instruction to be noted as the end of the function prologue.
   733  	// TODO: are there other cases (e.g., wrapper functions) that need marking?
   734  	if autoffset != 0 {
   735  		p.Pos = p.Pos.WithXlogue(src.PosPrologueEnd)
   736  	}
   737  
   738  	var deltasp int32
   739  	for p = cursym.Func().Text; p != nil; p = p.Link {
   740  		pcsize := ctxt.Arch.RegSize
   741  		switch p.From.Name {
   742  		case obj.NAME_AUTO:
   743  			p.From.Offset += int64(deltasp) - int64(bpsize)
   744  		case obj.NAME_PARAM:
   745  			p.From.Offset += int64(deltasp) + int64(pcsize)
   746  		}
   747  		if p.GetFrom3() != nil {
   748  			switch p.GetFrom3().Name {
   749  			case obj.NAME_AUTO:
   750  				p.GetFrom3().Offset += int64(deltasp) - int64(bpsize)
   751  			case obj.NAME_PARAM:
   752  				p.GetFrom3().Offset += int64(deltasp) + int64(pcsize)
   753  			}
   754  		}
   755  		switch p.To.Name {
   756  		case obj.NAME_AUTO:
   757  			p.To.Offset += int64(deltasp) - int64(bpsize)
   758  		case obj.NAME_PARAM:
   759  			p.To.Offset += int64(deltasp) + int64(pcsize)
   760  		}
   761  
   762  		switch p.As {
   763  		default:
   764  			if p.To.Type == obj.TYPE_REG && p.To.Reg == REG_SP && p.As != ACMPL && p.As != ACMPQ {
   765  				f := cursym.Func()
   766  				if f.FuncFlag&abi.FuncFlagSPWrite == 0 {
   767  					f.FuncFlag |= abi.FuncFlagSPWrite
   768  					if ctxt.Debugvlog || !ctxt.IsAsm {
   769  						ctxt.Logf("auto-SPWRITE: %s %v\n", cursym.Name, p)
   770  						if !ctxt.IsAsm {
   771  							ctxt.Diag("invalid auto-SPWRITE in non-assembly")
   772  							ctxt.DiagFlush()
   773  							log.Fatalf("bad SPWRITE")
   774  						}
   775  					}
   776  				}
   777  			}
   778  			continue
   779  
   780  		case APUSHL, APUSHFL:
   781  			deltasp += 4
   782  			p.Spadj = 4
   783  			continue
   784  
   785  		case APUSHQ, APUSHFQ:
   786  			deltasp += 8
   787  			p.Spadj = 8
   788  			continue
   789  
   790  		case APUSHW, APUSHFW:
   791  			deltasp += 2
   792  			p.Spadj = 2
   793  			continue
   794  
   795  		case APOPL, APOPFL:
   796  			deltasp -= 4
   797  			p.Spadj = -4
   798  			continue
   799  
   800  		case APOPQ, APOPFQ:
   801  			deltasp -= 8
   802  			p.Spadj = -8
   803  			continue
   804  
   805  		case APOPW, APOPFW:
   806  			deltasp -= 2
   807  			p.Spadj = -2
   808  			continue
   809  
   810  		case AADJSP:
   811  			p.Spadj = int32(p.From.Offset)
   812  			deltasp += int32(p.From.Offset)
   813  			continue
   814  
   815  		case obj.ARET:
   816  			// do nothing
   817  		}
   818  
   819  		if autoffset != deltasp {
   820  			ctxt.Diag("%s: unbalanced PUSH/POP", cursym)
   821  		}
   822  
   823  		if autoffset != 0 {
   824  			to := p.To // Keep To attached to RET for retjmp below
   825  			p.To = obj.Addr{}
   826  			if localoffset != 0 {
   827  				p.As = AADJSP
   828  				p.From.Type = obj.TYPE_CONST
   829  				p.From.Offset = int64(-localoffset)
   830  				p.Spadj = -localoffset
   831  				p = obj.Appendp(p, newprog)
   832  			}
   833  
   834  			if bpsize > 0 {
   835  				// Restore caller's BP
   836  				p.As = APOPQ
   837  				p.To.Type = obj.TYPE_REG
   838  				p.To.Reg = REG_BP
   839  				p.Spadj = -int32(bpsize)
   840  				p = obj.Appendp(p, newprog)
   841  			}
   842  
   843  			p.As = obj.ARET
   844  			p.To = to
   845  
   846  			// If there are instructions following
   847  			// this ARET, they come from a branch
   848  			// with the same stackframe, so undo
   849  			// the cleanup.
   850  			p.Spadj = +autoffset
   851  		}
   852  
   853  		if p.To.Sym != nil { // retjmp
   854  			p.As = obj.AJMP
   855  		}
   856  	}
   857  }
   858  
   859  func isZeroArgRuntimeCall(s *obj.LSym) bool {
   860  	if s == nil {
   861  		return false
   862  	}
   863  	switch s.Name {
   864  	case "runtime.panicdivide", "runtime.panicwrap", "runtime.panicshift", "runtime.panicBounds", "runtime.panicExtend":
   865  		return true
   866  	}
   867  	return false
   868  }
   869  
   870  // loadG ensures the G is loaded into a register (either CX or REGG),
   871  // appending instructions to p if necessary. It returns the new last
   872  // instruction and the G register.
   873  func loadG(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, newprog obj.ProgAlloc) (*obj.Prog, int16) {
   874  	if ctxt.Arch.Family == sys.AMD64 && cursym.ABI() == obj.ABIInternal {
   875  		// Use the G register directly in ABIInternal
   876  		return p, REGG
   877  	}
   878  
   879  	var regg int16 = REG_CX
   880  	if ctxt.Arch.Family == sys.AMD64 {
   881  		regg = REGG // == REG_R14
   882  	}
   883  
   884  	p = obj.Appendp(p, newprog)
   885  	p.As = AMOVQ
   886  	if ctxt.Arch.PtrSize == 4 {
   887  		p.As = AMOVL
   888  	}
   889  	p.From.Type = obj.TYPE_MEM
   890  	p.From.Reg = REG_TLS
   891  	p.From.Offset = 0
   892  	p.To.Type = obj.TYPE_REG
   893  	p.To.Reg = regg
   894  
   895  	// Rewrite TLS instruction if necessary.
   896  	next := p.Link
   897  	progedit(ctxt, p, newprog)
   898  	for p.Link != next {
   899  		p = p.Link
   900  		progedit(ctxt, p, newprog)
   901  	}
   902  
   903  	if p.From.Index == REG_TLS {
   904  		p.From.Scale = 2
   905  	}
   906  
   907  	return p, regg
   908  }
   909  
   910  // Append code to p to check for stack split.
   911  // Appends to (does not overwrite) p.
   912  // Assumes g is in rg.
   913  // Returns last new instruction.
   914  func stacksplit(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, newprog obj.ProgAlloc, framesize int32, textarg int32) *obj.Prog {
   915  	cmp := ACMPQ
   916  	lea := ALEAQ
   917  	mov := AMOVQ
   918  	sub := ASUBQ
   919  	push, pop := APUSHQ, APOPQ
   920  
   921  	if ctxt.Arch.Family == sys.I386 {
   922  		cmp = ACMPL
   923  		lea = ALEAL
   924  		mov = AMOVL
   925  		sub = ASUBL
   926  		push, pop = APUSHL, APOPL
   927  	}
   928  
   929  	tmp := int16(REG_AX) // use AX for 32-bit
   930  	if ctxt.Arch.Family == sys.AMD64 {
   931  		// Avoid register parameters.
   932  		tmp = int16(REGENTRYTMP0)
   933  	}
   934  
   935  	if ctxt.Flag_maymorestack != "" {
   936  		p = cursym.Func().SpillRegisterArgs(p, newprog)
   937  
   938  		if cursym.Func().Text.From.Sym.NeedCtxt() {
   939  			p = obj.Appendp(p, newprog)
   940  			p.As = push
   941  			p.From.Type = obj.TYPE_REG
   942  			p.From.Reg = REGCTXT
   943  		}
   944  
   945  		// We call maymorestack with an ABI matching the
   946  		// caller's ABI. Since this is the first thing that
   947  		// happens in the function, we have to be consistent
   948  		// with the caller about CPU state (notably,
   949  		// fixed-meaning registers).
   950  
   951  		p = obj.Appendp(p, newprog)
   952  		p.As = obj.ACALL
   953  		p.To.Type = obj.TYPE_BRANCH
   954  		p.To.Name = obj.NAME_EXTERN
   955  		p.To.Sym = ctxt.LookupABI(ctxt.Flag_maymorestack, cursym.ABI())
   956  
   957  		if cursym.Func().Text.From.Sym.NeedCtxt() {
   958  			p = obj.Appendp(p, newprog)
   959  			p.As = pop
   960  			p.To.Type = obj.TYPE_REG
   961  			p.To.Reg = REGCTXT
   962  		}
   963  
   964  		p = cursym.Func().UnspillRegisterArgs(p, newprog)
   965  	}
   966  
   967  	// Jump back to here after morestack returns.
   968  	startPred := p
   969  
   970  	// Load G register
   971  	var rg int16
   972  	p, rg = loadG(ctxt, cursym, p, newprog)
   973  
   974  	var q1 *obj.Prog
   975  	if framesize <= abi.StackSmall {
   976  		// small stack: SP <= stackguard
   977  		//	CMPQ SP, stackguard
   978  		p = obj.Appendp(p, newprog)
   979  
   980  		p.As = cmp
   981  		p.From.Type = obj.TYPE_REG
   982  		p.From.Reg = REG_SP
   983  		p.To.Type = obj.TYPE_MEM
   984  		p.To.Reg = rg
   985  		p.To.Offset = 2 * int64(ctxt.Arch.PtrSize) // G.stackguard0
   986  		if cursym.CFunc() {
   987  			p.To.Offset = 3 * int64(ctxt.Arch.PtrSize) // G.stackguard1
   988  		}
   989  
   990  		// Mark the stack bound check and morestack call async nonpreemptible.
   991  		// If we get preempted here, when resumed the preemption request is
   992  		// cleared, but we'll still call morestack, which will double the stack
   993  		// unnecessarily. See issue #35470.
   994  		p = ctxt.StartUnsafePoint(p, newprog)
   995  	} else if framesize <= abi.StackBig {
   996  		// large stack: SP-framesize <= stackguard-StackSmall
   997  		//	LEAQ -xxx(SP), tmp
   998  		//	CMPQ tmp, stackguard
   999  		p = obj.Appendp(p, newprog)
  1000  
  1001  		p.As = lea
  1002  		p.From.Type = obj.TYPE_MEM
  1003  		p.From.Reg = REG_SP
  1004  		p.From.Offset = -(int64(framesize) - abi.StackSmall)
  1005  		p.To.Type = obj.TYPE_REG
  1006  		p.To.Reg = tmp
  1007  
  1008  		p = obj.Appendp(p, newprog)
  1009  		p.As = cmp
  1010  		p.From.Type = obj.TYPE_REG
  1011  		p.From.Reg = tmp
  1012  		p.To.Type = obj.TYPE_MEM
  1013  		p.To.Reg = rg
  1014  		p.To.Offset = 2 * int64(ctxt.Arch.PtrSize) // G.stackguard0
  1015  		if cursym.CFunc() {
  1016  			p.To.Offset = 3 * int64(ctxt.Arch.PtrSize) // G.stackguard1
  1017  		}
  1018  
  1019  		p = ctxt.StartUnsafePoint(p, newprog) // see the comment above
  1020  	} else {
  1021  		// Such a large stack we need to protect against underflow.
  1022  		// The runtime guarantees SP > objabi.StackBig, but
  1023  		// framesize is large enough that SP-framesize may
  1024  		// underflow, causing a direct comparison with the
  1025  		// stack guard to incorrectly succeed. We explicitly
  1026  		// guard against underflow.
  1027  		//
  1028  		//	MOVQ	SP, tmp
  1029  		//	SUBQ	$(framesize - StackSmall), tmp
  1030  		//	// If subtraction wrapped (carry set), morestack.
  1031  		//	JCS	label-of-call-to-morestack
  1032  		//	CMPQ	tmp, stackguard
  1033  
  1034  		p = obj.Appendp(p, newprog)
  1035  
  1036  		p.As = mov
  1037  		p.From.Type = obj.TYPE_REG
  1038  		p.From.Reg = REG_SP
  1039  		p.To.Type = obj.TYPE_REG
  1040  		p.To.Reg = tmp
  1041  
  1042  		p = ctxt.StartUnsafePoint(p, newprog) // see the comment above
  1043  
  1044  		p = obj.Appendp(p, newprog)
  1045  		p.As = sub
  1046  		p.From.Type = obj.TYPE_CONST
  1047  		p.From.Offset = int64(framesize) - abi.StackSmall
  1048  		p.To.Type = obj.TYPE_REG
  1049  		p.To.Reg = tmp
  1050  
  1051  		p = obj.Appendp(p, newprog)
  1052  		p.As = AJCS
  1053  		p.To.Type = obj.TYPE_BRANCH
  1054  		q1 = p
  1055  
  1056  		p = obj.Appendp(p, newprog)
  1057  		p.As = cmp
  1058  		p.From.Type = obj.TYPE_REG
  1059  		p.From.Reg = tmp
  1060  		p.To.Type = obj.TYPE_MEM
  1061  		p.To.Reg = rg
  1062  		p.To.Offset = 2 * int64(ctxt.Arch.PtrSize) // G.stackguard0
  1063  		if cursym.CFunc() {
  1064  			p.To.Offset = 3 * int64(ctxt.Arch.PtrSize) // G.stackguard1
  1065  		}
  1066  	}
  1067  
  1068  	// common
  1069  	jls := obj.Appendp(p, newprog)
  1070  	jls.As = AJLS
  1071  	jls.To.Type = obj.TYPE_BRANCH
  1072  
  1073  	end := ctxt.EndUnsafePoint(jls, newprog, -1)
  1074  
  1075  	var last *obj.Prog
  1076  	for last = cursym.Func().Text; last.Link != nil; last = last.Link {
  1077  	}
  1078  
  1079  	// Now we are at the end of the function, but logically
  1080  	// we are still in function prologue. We need to fix the
  1081  	// SP data and PCDATA.
  1082  	spfix := obj.Appendp(last, newprog)
  1083  	spfix.As = obj.ANOP
  1084  	spfix.Spadj = -framesize
  1085  
  1086  	pcdata := ctxt.EmitEntryStackMap(cursym, spfix, newprog)
  1087  	spill := ctxt.StartUnsafePoint(pcdata, newprog)
  1088  	pcdata = cursym.Func().SpillRegisterArgs(spill, newprog)
  1089  
  1090  	call := obj.Appendp(pcdata, newprog)
  1091  	call.Pos = cursym.Func().Text.Pos
  1092  	call.As = obj.ACALL
  1093  	call.To.Type = obj.TYPE_BRANCH
  1094  	call.To.Name = obj.NAME_EXTERN
  1095  	morestack := "runtime.morestack"
  1096  	switch {
  1097  	case cursym.CFunc():
  1098  		morestack = "runtime.morestackc"
  1099  	case !cursym.Func().Text.From.Sym.NeedCtxt():
  1100  		morestack = "runtime.morestack_noctxt"
  1101  	}
  1102  	call.To.Sym = ctxt.Lookup(morestack)
  1103  	// When compiling 386 code for dynamic linking, the call needs to be adjusted
  1104  	// to follow PIC rules. This in turn can insert more instructions, so we need
  1105  	// to keep track of the start of the call (where the jump will be to) and the
  1106  	// end (which following instructions are appended to).
  1107  	callend := call
  1108  	progedit(ctxt, callend, newprog)
  1109  	for ; callend.Link != nil; callend = callend.Link {
  1110  		progedit(ctxt, callend.Link, newprog)
  1111  	}
  1112  
  1113  	// The instructions which unspill regs should be preemptible.
  1114  	pcdata = ctxt.EndUnsafePoint(callend, newprog, -1)
  1115  	unspill := cursym.Func().UnspillRegisterArgs(pcdata, newprog)
  1116  
  1117  	jmp := obj.Appendp(unspill, newprog)
  1118  	jmp.As = obj.AJMP
  1119  	jmp.To.Type = obj.TYPE_BRANCH
  1120  	jmp.To.SetTarget(startPred.Link)
  1121  	jmp.Spadj = +framesize
  1122  
  1123  	jls.To.SetTarget(spill)
  1124  	if q1 != nil {
  1125  		q1.To.SetTarget(spill)
  1126  	}
  1127  
  1128  	return end
  1129  }
  1130  
  1131  func isR15(r int16) bool {
  1132  	return r == REG_R15 || r == REG_R15B
  1133  }
  1134  func addrMentionsR15(a *obj.Addr) bool {
  1135  	if a == nil {
  1136  		return false
  1137  	}
  1138  	return isR15(a.Reg) || isR15(a.Index)
  1139  }
  1140  func progMentionsR15(p *obj.Prog) bool {
  1141  	return addrMentionsR15(&p.From) || addrMentionsR15(&p.To) || isR15(p.Reg) || addrMentionsR15(p.GetFrom3())
  1142  }
  1143  
  1144  func addrUsesGlobal(a *obj.Addr) bool {
  1145  	if a == nil {
  1146  		return false
  1147  	}
  1148  	return a.Name == obj.NAME_EXTERN && !a.Sym.Local()
  1149  }
  1150  func progUsesGlobal(p *obj.Prog) bool {
  1151  	if p.As == obj.ACALL || p.As == obj.ATEXT || p.As == obj.AFUNCDATA || p.As == obj.ARET || p.As == obj.AJMP {
  1152  		// These opcodes don't use a GOT to access their argument (see rewriteToUseGot),
  1153  		// or R15 would be dead at them anyway.
  1154  		return false
  1155  	}
  1156  	if p.As == ALEAQ {
  1157  		// The GOT entry is placed directly in the destination register; R15 is not used.
  1158  		return false
  1159  	}
  1160  	return addrUsesGlobal(&p.From) || addrUsesGlobal(&p.To) || addrUsesGlobal(p.GetFrom3())
  1161  }
  1162  
  1163  type rwMask int
  1164  
  1165  const (
  1166  	readFrom rwMask = 1 << iota
  1167  	readTo
  1168  	readReg
  1169  	readFrom3
  1170  	writeFrom
  1171  	writeTo
  1172  	writeReg
  1173  	writeFrom3
  1174  )
  1175  
  1176  // progRW returns a mask describing the effects of the instruction p.
  1177  // Note: this isn't exhaustively accurate. It is only currently used for detecting
  1178  // reads/writes to R15, so SSE register behavior isn't fully correct, and
  1179  // other weird cases (e.g. writes to DX by CLD) also aren't captured.
  1180  func progRW(p *obj.Prog) rwMask {
  1181  	var m rwMask
  1182  	// Default for most instructions
  1183  	if p.From.Type != obj.TYPE_NONE {
  1184  		m |= readFrom
  1185  	}
  1186  	if p.To.Type != obj.TYPE_NONE {
  1187  		// Most x86 instructions update the To value
  1188  		m |= readTo | writeTo
  1189  	}
  1190  	if p.Reg != 0 {
  1191  		m |= readReg
  1192  	}
  1193  	if p.GetFrom3() != nil {
  1194  		m |= readFrom3
  1195  	}
  1196  
  1197  	// Lots of exceptions to the above defaults.
  1198  	name := p.As.String()
  1199  	if strings.HasPrefix(name, "MOV") || strings.HasPrefix(name, "PMOV") {
  1200  		// MOV instructions don't read To.
  1201  		m &^= readTo
  1202  	}
  1203  	switch p.As {
  1204  	case APOPW, APOPL, APOPQ,
  1205  		ALEAL, ALEAQ,
  1206  		AIMUL3W, AIMUL3L, AIMUL3Q,
  1207  		APEXTRB, APEXTRW, APEXTRD, APEXTRQ, AVPEXTRB, AVPEXTRW, AVPEXTRD, AVPEXTRQ, AEXTRACTPS,
  1208  		ABSFW, ABSFL, ABSFQ, ABSRW, ABSRL, ABSRQ, APOPCNTW, APOPCNTL, APOPCNTQ, ALZCNTW, ALZCNTL, ALZCNTQ,
  1209  		ASHLXL, ASHLXQ, ASHRXL, ASHRXQ, ASARXL, ASARXQ:
  1210  		// These instructions are pure writes to To. They don't use its old value.
  1211  		m &^= readTo
  1212  	case AXORL, AXORQ:
  1213  		// Register-clearing idiom doesn't read previous value.
  1214  		if p.From.Type == obj.TYPE_REG && p.To.Type == obj.TYPE_REG && p.From.Reg == p.To.Reg {
  1215  			m &^= readFrom | readTo
  1216  		}
  1217  	case AMULXL, AMULXQ:
  1218  		// These are write-only to both To and From3.
  1219  		m &^= readTo | readFrom3
  1220  		m |= writeFrom3
  1221  	}
  1222  	return m
  1223  }
  1224  
  1225  // progReadsR15 reports whether p reads the register R15.
  1226  func progReadsR15(p *obj.Prog) bool {
  1227  	m := progRW(p)
  1228  	if m&readFrom != 0 && p.From.Type == obj.TYPE_REG && isR15(p.From.Reg) {
  1229  		return true
  1230  	}
  1231  	if m&readTo != 0 && p.To.Type == obj.TYPE_REG && isR15(p.To.Reg) {
  1232  		return true
  1233  	}
  1234  	if m&readReg != 0 && isR15(p.Reg) {
  1235  		return true
  1236  	}
  1237  	if m&readFrom3 != 0 && p.GetFrom3().Type == obj.TYPE_REG && isR15(p.GetFrom3().Reg) {
  1238  		return true
  1239  	}
  1240  	// reads of the index registers
  1241  	if p.From.Type == obj.TYPE_MEM && (isR15(p.From.Reg) || isR15(p.From.Index)) {
  1242  		return true
  1243  	}
  1244  	if p.To.Type == obj.TYPE_MEM && (isR15(p.To.Reg) || isR15(p.To.Index)) {
  1245  		return true
  1246  	}
  1247  	if f3 := p.GetFrom3(); f3 != nil && f3.Type == obj.TYPE_MEM && (isR15(f3.Reg) || isR15(f3.Index)) {
  1248  		return true
  1249  	}
  1250  	return false
  1251  }
  1252  
  1253  // progWritesR15 reports whether p writes the register R15.
  1254  func progWritesR15(p *obj.Prog) bool {
  1255  	m := progRW(p)
  1256  	if m&writeFrom != 0 && p.From.Type == obj.TYPE_REG && isR15(p.From.Reg) {
  1257  		return true
  1258  	}
  1259  	if m&writeTo != 0 && p.To.Type == obj.TYPE_REG && isR15(p.To.Reg) {
  1260  		return true
  1261  	}
  1262  	if m&writeReg != 0 && isR15(p.Reg) {
  1263  		return true
  1264  	}
  1265  	if m&writeFrom3 != 0 && p.GetFrom3().Type == obj.TYPE_REG && isR15(p.GetFrom3().Reg) {
  1266  		return true
  1267  	}
  1268  	return false
  1269  }
  1270  
  1271  func errorCheck(ctxt *obj.Link, s *obj.LSym) {
  1272  	// When dynamic linking, R15 is used to access globals. Reject code that
  1273  	// uses R15 after a global variable access.
  1274  	if !ctxt.Flag_dynlink {
  1275  		return
  1276  	}
  1277  
  1278  	// Flood fill all the instructions where R15's value is junk.
  1279  	// If there are any uses of R15 in that set, report an error.
  1280  	var work []*obj.Prog
  1281  	var mentionsR15 bool
  1282  	for p := s.Func().Text; p != nil; p = p.Link {
  1283  		if progUsesGlobal(p) {
  1284  			work = append(work, p)
  1285  			p.Mark |= markBit
  1286  		}
  1287  		if progMentionsR15(p) {
  1288  			mentionsR15 = true
  1289  		}
  1290  	}
  1291  	if mentionsR15 {
  1292  		for len(work) > 0 {
  1293  			p := work[len(work)-1]
  1294  			work = work[:len(work)-1]
  1295  			if progReadsR15(p) {
  1296  				pos := ctxt.PosTable.Pos(p.Pos)
  1297  				ctxt.Diag("%s:%s: when dynamic linking, R15 is clobbered by a global variable access and is used here: %v", path.Base(pos.Filename()), pos.LineNumber(), p)
  1298  				break // only report one error
  1299  			}
  1300  			if progWritesR15(p) {
  1301  				// R15 is overwritten by this instruction. Its value is not junk any more.
  1302  				continue
  1303  			}
  1304  			if q := p.To.Target(); q != nil && q.Mark&markBit == 0 {
  1305  				q.Mark |= markBit
  1306  				work = append(work, q)
  1307  			}
  1308  			if p.As == obj.AJMP || p.As == obj.ARET {
  1309  				continue // no fallthrough
  1310  			}
  1311  			if q := p.Link; q != nil && q.Mark&markBit == 0 {
  1312  				q.Mark |= markBit
  1313  				work = append(work, q)
  1314  			}
  1315  		}
  1316  	}
  1317  
  1318  	// Clean up.
  1319  	for p := s.Func().Text; p != nil; p = p.Link {
  1320  		p.Mark &^= markBit
  1321  	}
  1322  }
  1323  
  1324  var unaryDst = map[obj.As]bool{
  1325  	ABSWAPL:     true,
  1326  	ABSWAPQ:     true,
  1327  	ACLDEMOTE:   true,
  1328  	ACLFLUSH:    true,
  1329  	ACLFLUSHOPT: true,
  1330  	ACLWB:       true,
  1331  	ACMPXCHG16B: true,
  1332  	ACMPXCHG8B:  true,
  1333  	ADECB:       true,
  1334  	ADECL:       true,
  1335  	ADECQ:       true,
  1336  	ADECW:       true,
  1337  	AFBSTP:      true,
  1338  	AFFREE:      true,
  1339  	AFLDENV:     true,
  1340  	AFSAVE:      true,
  1341  	AFSTCW:      true,
  1342  	AFSTENV:     true,
  1343  	AFSTSW:      true,
  1344  	AFXSAVE64:   true,
  1345  	AFXSAVE:     true,
  1346  	AINCB:       true,
  1347  	AINCL:       true,
  1348  	AINCQ:       true,
  1349  	AINCW:       true,
  1350  	ANEGB:       true,
  1351  	ANEGL:       true,
  1352  	ANEGQ:       true,
  1353  	ANEGW:       true,
  1354  	ANOTB:       true,
  1355  	ANOTL:       true,
  1356  	ANOTQ:       true,
  1357  	ANOTW:       true,
  1358  	APOPL:       true,
  1359  	APOPQ:       true,
  1360  	APOPW:       true,
  1361  	ARDFSBASEL:  true,
  1362  	ARDFSBASEQ:  true,
  1363  	ARDGSBASEL:  true,
  1364  	ARDGSBASEQ:  true,
  1365  	ARDPID:      true,
  1366  	ARDRANDL:    true,
  1367  	ARDRANDQ:    true,
  1368  	ARDRANDW:    true,
  1369  	ARDSEEDL:    true,
  1370  	ARDSEEDQ:    true,
  1371  	ARDSEEDW:    true,
  1372  	ASETCC:      true,
  1373  	ASETCS:      true,
  1374  	ASETEQ:      true,
  1375  	ASETGE:      true,
  1376  	ASETGT:      true,
  1377  	ASETHI:      true,
  1378  	ASETLE:      true,
  1379  	ASETLS:      true,
  1380  	ASETLT:      true,
  1381  	ASETMI:      true,
  1382  	ASETNE:      true,
  1383  	ASETOC:      true,
  1384  	ASETOS:      true,
  1385  	ASETPC:      true,
  1386  	ASETPL:      true,
  1387  	ASETPS:      true,
  1388  	ASGDT:       true,
  1389  	ASIDT:       true,
  1390  	ASLDTL:      true,
  1391  	ASLDTQ:      true,
  1392  	ASLDTW:      true,
  1393  	ASMSWL:      true,
  1394  	ASMSWQ:      true,
  1395  	ASMSWW:      true,
  1396  	ASTMXCSR:    true,
  1397  	ASTRL:       true,
  1398  	ASTRQ:       true,
  1399  	ASTRW:       true,
  1400  	AXSAVE64:    true,
  1401  	AXSAVE:      true,
  1402  	AXSAVEC64:   true,
  1403  	AXSAVEC:     true,
  1404  	AXSAVEOPT64: true,
  1405  	AXSAVEOPT:   true,
  1406  	AXSAVES64:   true,
  1407  	AXSAVES:     true,
  1408  }
  1409  
  1410  var Linkamd64 = obj.LinkArch{
  1411  	Arch:           sys.ArchAMD64,
  1412  	Init:           instinit,
  1413  	ErrorCheck:     errorCheck,
  1414  	Preprocess:     preprocess,
  1415  	Assemble:       span6,
  1416  	Progedit:       progedit,
  1417  	SEH:            populateSeh,
  1418  	UnaryDst:       unaryDst,
  1419  	DWARFRegisters: AMD64DWARFRegisters,
  1420  }
  1421  
  1422  var Link386 = obj.LinkArch{
  1423  	Arch:           sys.Arch386,
  1424  	Init:           instinit,
  1425  	Preprocess:     preprocess,
  1426  	Assemble:       span6,
  1427  	Progedit:       progedit,
  1428  	UnaryDst:       unaryDst,
  1429  	DWARFRegisters: X86DWARFRegisters,
  1430  }
  1431  

View as plain text