Source file src/cmd/compile/internal/ssa/rewrite.go

     1  // Copyright 2015 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package ssa
     6  
     7  import (
     8  	"cmd/compile/internal/base"
     9  	"cmd/compile/internal/ir"
    10  	"cmd/compile/internal/logopt"
    11  	"cmd/compile/internal/reflectdata"
    12  	"cmd/compile/internal/rttype"
    13  	"cmd/compile/internal/typecheck"
    14  	"cmd/compile/internal/types"
    15  	"cmd/internal/obj"
    16  	"cmd/internal/obj/s390x"
    17  	"cmd/internal/objabi"
    18  	"cmd/internal/src"
    19  	"encoding/binary"
    20  	"fmt"
    21  	"internal/buildcfg"
    22  	"io"
    23  	"math"
    24  	"math/bits"
    25  	"os"
    26  	"path/filepath"
    27  	"strings"
    28  )
    29  
    30  type deadValueChoice bool
    31  
    32  const (
    33  	leaveDeadValues  deadValueChoice = false
    34  	removeDeadValues                 = true
    35  
    36  	repZeroThreshold = 1408 // size beyond which we use REP STOS for zeroing
    37  	repMoveThreshold = 1408 // size beyond which we use REP MOVS for copying
    38  )
    39  
    40  // deadcode indicates whether rewrite should try to remove any values that become dead.
    41  func applyRewrite(f *Func, rb blockRewriter, rv valueRewriter, deadcode deadValueChoice) {
    42  	// repeat rewrites until we find no more rewrites
    43  	pendingLines := f.cachedLineStarts // Holds statement boundaries that need to be moved to a new value/block
    44  	pendingLines.clear()
    45  	debug := f.pass.debug
    46  	if debug > 1 {
    47  		fmt.Printf("%s: rewriting for %s\n", f.pass.name, f.Name)
    48  	}
    49  	// if the number of rewrite iterations reaches itersLimit we will
    50  	// at that point turn on cycle detection. Instead of a fixed limit,
    51  	// size the limit according to func size to allow for cases such
    52  	// as the one in issue #66773.
    53  	itersLimit := f.NumBlocks()
    54  	if itersLimit < 20 {
    55  		itersLimit = 20
    56  	}
    57  	var iters int
    58  	var states map[string]bool
    59  	for {
    60  		change := false
    61  		deadChange := false
    62  		for _, b := range f.Blocks {
    63  			var b0 *Block
    64  			if debug > 1 {
    65  				b0 = new(Block)
    66  				*b0 = *b
    67  				b0.Succs = append([]Edge{}, b.Succs...) // make a new copy, not aliasing
    68  			}
    69  			for i, c := range b.ControlValues() {
    70  				for c.Op == OpCopy {
    71  					c = c.Args[0]
    72  					b.ReplaceControl(i, c)
    73  				}
    74  			}
    75  			if rb(b) {
    76  				change = true
    77  				if debug > 1 {
    78  					fmt.Printf("rewriting %s  ->  %s\n", b0.LongString(), b.LongString())
    79  				}
    80  			}
    81  			for j, v := range b.Values {
    82  				var v0 *Value
    83  				if debug > 1 {
    84  					v0 = new(Value)
    85  					*v0 = *v
    86  					v0.Args = append([]*Value{}, v.Args...) // make a new copy, not aliasing
    87  				}
    88  				if v.Uses == 0 && v.removeable() {
    89  					if v.Op != OpInvalid && deadcode == removeDeadValues {
    90  						// Reset any values that are now unused, so that we decrement
    91  						// the use count of all of its arguments.
    92  						// Not quite a deadcode pass, because it does not handle cycles.
    93  						// But it should help Uses==1 rules to fire.
    94  						v.reset(OpInvalid)
    95  						deadChange = true
    96  					}
    97  					// No point rewriting values which aren't used.
    98  					continue
    99  				}
   100  
   101  				vchange := phielimValue(v)
   102  				if vchange && debug > 1 {
   103  					fmt.Printf("rewriting %s  ->  %s\n", v0.LongString(), v.LongString())
   104  				}
   105  
   106  				// Eliminate copy inputs.
   107  				// If any copy input becomes unused, mark it
   108  				// as invalid and discard its argument. Repeat
   109  				// recursively on the discarded argument.
   110  				// This phase helps remove phantom "dead copy" uses
   111  				// of a value so that a x.Uses==1 rule condition
   112  				// fires reliably.
   113  				for i, a := range v.Args {
   114  					if a.Op != OpCopy {
   115  						continue
   116  					}
   117  					aa := copySource(a)
   118  					v.SetArg(i, aa)
   119  					// If a, a copy, has a line boundary indicator, attempt to find a new value
   120  					// to hold it.  The first candidate is the value that will replace a (aa),
   121  					// if it shares the same block and line and is eligible.
   122  					// The second option is v, which has a as an input.  Because aa is earlier in
   123  					// the data flow, it is the better choice.
   124  					if a.Pos.IsStmt() == src.PosIsStmt {
   125  						if aa.Block == a.Block && aa.Pos.Line() == a.Pos.Line() && aa.Pos.IsStmt() != src.PosNotStmt {
   126  							aa.Pos = aa.Pos.WithIsStmt()
   127  						} else if v.Block == a.Block && v.Pos.Line() == a.Pos.Line() && v.Pos.IsStmt() != src.PosNotStmt {
   128  							v.Pos = v.Pos.WithIsStmt()
   129  						} else {
   130  							// Record the lost line and look for a new home after all rewrites are complete.
   131  							// TODO: it's possible (in FOR loops, in particular) for statement boundaries for the same
   132  							// line to appear in more than one block, but only one block is stored, so if both end
   133  							// up here, then one will be lost.
   134  							pendingLines.set(a.Pos, int32(a.Block.ID))
   135  						}
   136  						a.Pos = a.Pos.WithNotStmt()
   137  					}
   138  					vchange = true
   139  					for a.Uses == 0 {
   140  						b := a.Args[0]
   141  						a.reset(OpInvalid)
   142  						a = b
   143  					}
   144  				}
   145  				if vchange && debug > 1 {
   146  					fmt.Printf("rewriting %s  ->  %s\n", v0.LongString(), v.LongString())
   147  				}
   148  
   149  				// apply rewrite function
   150  				if rv(v) {
   151  					vchange = true
   152  					// If value changed to a poor choice for a statement boundary, move the boundary
   153  					if v.Pos.IsStmt() == src.PosIsStmt {
   154  						if k := nextGoodStatementIndex(v, j, b); k != j {
   155  							v.Pos = v.Pos.WithNotStmt()
   156  							b.Values[k].Pos = b.Values[k].Pos.WithIsStmt()
   157  						}
   158  					}
   159  				}
   160  
   161  				change = change || vchange
   162  				if vchange && debug > 1 {
   163  					fmt.Printf("rewriting %s  ->  %s\n", v0.LongString(), v.LongString())
   164  				}
   165  			}
   166  		}
   167  		if !change && !deadChange {
   168  			break
   169  		}
   170  		iters++
   171  		if (iters > itersLimit || debug >= 2) && change {
   172  			// We've done a suspiciously large number of rewrites (or we're in debug mode).
   173  			// As of Sep 2021, 90% of rewrites complete in 4 iterations or fewer
   174  			// and the maximum value encountered during make.bash is 12.
   175  			// Start checking for cycles. (This is too expensive to do routinely.)
   176  			// Note: we avoid this path for deadChange-only iterations, to fix #51639.
   177  			if states == nil {
   178  				states = make(map[string]bool)
   179  			}
   180  			h := f.rewriteHash()
   181  			if _, ok := states[h]; ok {
   182  				// We've found a cycle.
   183  				// To diagnose it, set debug to 2 and start again,
   184  				// so that we'll print all rules applied until we complete another cycle.
   185  				// If debug is already >= 2, we've already done that, so it's time to crash.
   186  				if debug < 2 {
   187  					debug = 2
   188  					states = make(map[string]bool)
   189  				} else {
   190  					f.Fatalf("rewrite cycle detected")
   191  				}
   192  			}
   193  			states[h] = true
   194  		}
   195  	}
   196  	// remove clobbered values
   197  	for _, b := range f.Blocks {
   198  		j := 0
   199  		for i, v := range b.Values {
   200  			vl := v.Pos
   201  			if v.Op == OpInvalid {
   202  				if v.Pos.IsStmt() == src.PosIsStmt {
   203  					pendingLines.set(vl, int32(b.ID))
   204  				}
   205  				f.freeValue(v)
   206  				continue
   207  			}
   208  			if v.Pos.IsStmt() != src.PosNotStmt && !notStmtBoundary(v.Op) {
   209  				if pl, ok := pendingLines.get(vl); ok && pl == int32(b.ID) {
   210  					pendingLines.remove(vl)
   211  					v.Pos = v.Pos.WithIsStmt()
   212  				}
   213  			}
   214  			if i != j {
   215  				b.Values[j] = v
   216  			}
   217  			j++
   218  		}
   219  		if pl, ok := pendingLines.get(b.Pos); ok && pl == int32(b.ID) {
   220  			b.Pos = b.Pos.WithIsStmt()
   221  			pendingLines.remove(b.Pos)
   222  		}
   223  		b.truncateValues(j)
   224  	}
   225  }
   226  
   227  // Common functions called from rewriting rules
   228  
   229  func is64BitFloat(t *types.Type) bool {
   230  	return t.Size() == 8 && t.IsFloat()
   231  }
   232  
   233  func is32BitFloat(t *types.Type) bool {
   234  	return t.Size() == 4 && t.IsFloat()
   235  }
   236  
   237  func is64BitInt(t *types.Type) bool {
   238  	return t.Size() == 8 && t.IsInteger()
   239  }
   240  
   241  func is32BitInt(t *types.Type) bool {
   242  	return t.Size() == 4 && t.IsInteger()
   243  }
   244  
   245  func is16BitInt(t *types.Type) bool {
   246  	return t.Size() == 2 && t.IsInteger()
   247  }
   248  
   249  func is8BitInt(t *types.Type) bool {
   250  	return t.Size() == 1 && t.IsInteger()
   251  }
   252  
   253  func isPtr(t *types.Type) bool {
   254  	return t.IsPtrShaped()
   255  }
   256  
   257  func copyCompatibleType(t1, t2 *types.Type) bool {
   258  	if t1.Size() != t2.Size() {
   259  		return false
   260  	}
   261  	if t1.IsInteger() {
   262  		return t2.IsInteger()
   263  	}
   264  	if isPtr(t1) {
   265  		return isPtr(t2)
   266  	}
   267  	return t1.Compare(t2) == types.CMPeq
   268  }
   269  
   270  // mergeSym merges two symbolic offsets. There is no real merging of
   271  // offsets, we just pick the non-nil one.
   272  func mergeSym(x, y Sym) Sym {
   273  	if x == nil {
   274  		return y
   275  	}
   276  	if y == nil {
   277  		return x
   278  	}
   279  	panic(fmt.Sprintf("mergeSym with two non-nil syms %v %v", x, y))
   280  }
   281  
   282  func canMergeSym(x, y Sym) bool {
   283  	return x == nil || y == nil
   284  }
   285  
   286  // canMergeLoadClobber reports whether the load can be merged into target without
   287  // invalidating the schedule.
   288  // It also checks that the other non-load argument x is something we
   289  // are ok with clobbering.
   290  func canMergeLoadClobber(target, load, x *Value) bool {
   291  	// The register containing x is going to get clobbered.
   292  	// Don't merge if we still need the value of x.
   293  	// We don't have liveness information here, but we can
   294  	// approximate x dying with:
   295  	//  1) target is x's only use.
   296  	//  2) target is not in a deeper loop than x.
   297  	switch {
   298  	case x.Uses == 2 && x.Op == OpPhi && len(x.Args) == 2 && (x.Args[0] == target || x.Args[1] == target) && target.Uses == 1:
   299  		// This is a simple detector to determine that x is probably
   300  		// not live after target. (It does not need to be perfect,
   301  		// regalloc will issue a reg-reg move to save it if we are wrong.)
   302  		// We have:
   303  		//   x = Phi(?, target)
   304  		//   target = Op(load, x)
   305  		// Because target has only one use as a Phi argument, we can schedule it
   306  		// very late. Hopefully, later than the other use of x. (The other use died
   307  		// between x and target, or exists on another branch entirely).
   308  	case x.Uses > 1:
   309  		return false
   310  	}
   311  	loopnest := x.Block.Func.loopnest()
   312  	if loopnest.depth(target.Block.ID) > loopnest.depth(x.Block.ID) {
   313  		return false
   314  	}
   315  	return canMergeLoad(target, load)
   316  }
   317  
   318  // canMergeLoad reports whether the load can be merged into target without
   319  // invalidating the schedule.
   320  func canMergeLoad(target, load *Value) bool {
   321  	if target.Block.ID != load.Block.ID {
   322  		// If the load is in a different block do not merge it.
   323  		return false
   324  	}
   325  
   326  	// We can't merge the load into the target if the load
   327  	// has more than one use.
   328  	if load.Uses != 1 {
   329  		return false
   330  	}
   331  
   332  	mem := load.MemoryArg()
   333  
   334  	// We need the load's memory arg to still be alive at target. That
   335  	// can't be the case if one of target's args depends on a memory
   336  	// state that is a successor of load's memory arg.
   337  	//
   338  	// For example, it would be invalid to merge load into target in
   339  	// the following situation because newmem has killed oldmem
   340  	// before target is reached:
   341  	//     load = read ... oldmem
   342  	//   newmem = write ... oldmem
   343  	//     arg0 = read ... newmem
   344  	//   target = add arg0 load
   345  	//
   346  	// If the argument comes from a different block then we can exclude
   347  	// it immediately because it must dominate load (which is in the
   348  	// same block as target).
   349  	var args []*Value
   350  	for _, a := range target.Args {
   351  		if a != load && a.Block.ID == target.Block.ID {
   352  			args = append(args, a)
   353  		}
   354  	}
   355  
   356  	// memPreds contains memory states known to be predecessors of load's
   357  	// memory state. It is lazily initialized.
   358  	var memPreds map[*Value]bool
   359  	for i := 0; len(args) > 0; i++ {
   360  		const limit = 100
   361  		if i >= limit {
   362  			// Give up if we have done a lot of iterations.
   363  			return false
   364  		}
   365  		v := args[len(args)-1]
   366  		args = args[:len(args)-1]
   367  		if target.Block.ID != v.Block.ID {
   368  			// Since target and load are in the same block
   369  			// we can stop searching when we leave the block.
   370  			continue
   371  		}
   372  		if v.Op == OpPhi {
   373  			// A Phi implies we have reached the top of the block.
   374  			// The memory phi, if it exists, is always
   375  			// the first logical store in the block.
   376  			continue
   377  		}
   378  		if v.Type.IsTuple() && v.Type.FieldType(1).IsMemory() {
   379  			// We could handle this situation however it is likely
   380  			// to be very rare.
   381  			return false
   382  		}
   383  		if v.Op.SymEffect()&SymAddr != 0 {
   384  			// This case prevents an operation that calculates the
   385  			// address of a local variable from being forced to schedule
   386  			// before its corresponding VarDef.
   387  			// See issue 28445.
   388  			//   v1 = LOAD ...
   389  			//   v2 = VARDEF
   390  			//   v3 = LEAQ
   391  			//   v4 = CMPQ v1 v3
   392  			// We don't want to combine the CMPQ with the load, because
   393  			// that would force the CMPQ to schedule before the VARDEF, which
   394  			// in turn requires the LEAQ to schedule before the VARDEF.
   395  			return false
   396  		}
   397  		if v.Type.IsMemory() {
   398  			if memPreds == nil {
   399  				// Initialise a map containing memory states
   400  				// known to be predecessors of load's memory
   401  				// state.
   402  				memPreds = make(map[*Value]bool)
   403  				m := mem
   404  				const limit = 50
   405  				for i := 0; i < limit; i++ {
   406  					if m.Op == OpPhi {
   407  						// The memory phi, if it exists, is always
   408  						// the first logical store in the block.
   409  						break
   410  					}
   411  					if m.Block.ID != target.Block.ID {
   412  						break
   413  					}
   414  					if !m.Type.IsMemory() {
   415  						break
   416  					}
   417  					memPreds[m] = true
   418  					if len(m.Args) == 0 {
   419  						break
   420  					}
   421  					m = m.MemoryArg()
   422  				}
   423  			}
   424  
   425  			// We can merge if v is a predecessor of mem.
   426  			//
   427  			// For example, we can merge load into target in the
   428  			// following scenario:
   429  			//      x = read ... v
   430  			//    mem = write ... v
   431  			//   load = read ... mem
   432  			// target = add x load
   433  			if memPreds[v] {
   434  				continue
   435  			}
   436  			return false
   437  		}
   438  		if len(v.Args) > 0 && v.Args[len(v.Args)-1] == mem {
   439  			// If v takes mem as an input then we know mem
   440  			// is valid at this point.
   441  			continue
   442  		}
   443  		for _, a := range v.Args {
   444  			if target.Block.ID == a.Block.ID {
   445  				args = append(args, a)
   446  			}
   447  		}
   448  	}
   449  
   450  	return true
   451  }
   452  
   453  // isSameCall reports whether aux is the same as the given named symbol.
   454  func isSameCall(aux Aux, name string) bool {
   455  	fn := aux.(*AuxCall).Fn
   456  	return fn != nil && fn.String() == name
   457  }
   458  
   459  func isMalloc(aux Aux) bool {
   460  	return isNewObject(aux) || isSpecializedMalloc(aux)
   461  }
   462  
   463  func isNewObject(aux Aux) bool {
   464  	fn := aux.(*AuxCall).Fn
   465  	return fn != nil && fn.String() == "runtime.newobject"
   466  }
   467  
   468  func isSpecializedMalloc(aux Aux) bool {
   469  	fn := aux.(*AuxCall).Fn
   470  	if fn == nil {
   471  		return false
   472  	}
   473  	name := fn.String()
   474  	return strings.HasPrefix(name, "runtime.mallocgcSmallNoScanSC") ||
   475  		strings.HasPrefix(name, "runtime.mallocgcSmallScanNoHeaderSC") ||
   476  		strings.HasPrefix(name, "runtime.mallocTiny")
   477  }
   478  
   479  // canLoadUnaligned reports if the architecture supports unaligned load operations.
   480  func canLoadUnaligned(c *Config) bool {
   481  	return c.ctxt.Arch.Alignment == 1
   482  }
   483  
   484  // nlzX returns the number of leading zeros.
   485  func nlz64(x int64) int { return bits.LeadingZeros64(uint64(x)) }
   486  func nlz32(x int32) int { return bits.LeadingZeros32(uint32(x)) }
   487  func nlz16(x int16) int { return bits.LeadingZeros16(uint16(x)) }
   488  func nlz8(x int8) int   { return bits.LeadingZeros8(uint8(x)) }
   489  
   490  // ntzX returns the number of trailing zeros.
   491  func ntz64(x int64) int { return bits.TrailingZeros64(uint64(x)) }
   492  func ntz32(x int32) int { return bits.TrailingZeros32(uint32(x)) }
   493  func ntz16(x int16) int { return bits.TrailingZeros16(uint16(x)) }
   494  func ntz8(x int8) int   { return bits.TrailingZeros8(uint8(x)) }
   495  
   496  // oneBit reports whether x contains exactly one set bit.
   497  func oneBit[T int8 | int16 | int32 | int64](x T) bool {
   498  	return x&(x-1) == 0 && x != 0
   499  }
   500  
   501  // nto returns the number of trailing ones.
   502  func nto(x int64) int64 {
   503  	return int64(ntz64(^x))
   504  }
   505  
   506  // logX returns logarithm of n base 2.
   507  // n must be a positive power of 2 (isPowerOfTwoX returns true).
   508  func log8(n int8) int64   { return log8u(uint8(n)) }
   509  func log16(n int16) int64 { return log16u(uint16(n)) }
   510  func log32(n int32) int64 { return log32u(uint32(n)) }
   511  func log64(n int64) int64 { return log64u(uint64(n)) }
   512  
   513  // logXu returns the logarithm of n base 2.
   514  // n must be a power of 2 (isUnsignedPowerOfTwo returns true)
   515  func log8u(n uint8) int64   { return int64(bits.Len8(n)) - 1 }
   516  func log16u(n uint16) int64 { return int64(bits.Len16(n)) - 1 }
   517  func log32u(n uint32) int64 { return int64(bits.Len32(n)) - 1 }
   518  func log64u(n uint64) int64 { return int64(bits.Len64(n)) - 1 }
   519  
   520  // isPowerOfTwoX functions report whether n is a power of 2.
   521  func isPowerOfTwo[T int8 | int16 | int32 | int64](n T) bool {
   522  	return n > 0 && n&(n-1) == 0
   523  }
   524  
   525  // isUnsignedPowerOfTwo reports whether n is an unsigned power of 2.
   526  func isUnsignedPowerOfTwo[T uint8 | uint16 | uint32 | uint64](n T) bool {
   527  	return n != 0 && n&(n-1) == 0
   528  }
   529  
   530  // is32Bit reports whether n can be represented as a signed 32 bit integer.
   531  func is32Bit(n int64) bool {
   532  	return n == int64(int32(n))
   533  }
   534  
   535  // is16Bit reports whether n can be represented as a signed 16 bit integer.
   536  func is16Bit(n int64) bool {
   537  	return n == int64(int16(n))
   538  }
   539  
   540  // is8Bit reports whether n can be represented as a signed 8 bit integer.
   541  func is8Bit(n int64) bool {
   542  	return n == int64(int8(n))
   543  }
   544  
   545  // isU8Bit reports whether n can be represented as an unsigned 8 bit integer.
   546  func isU8Bit(n int64) bool {
   547  	return n == int64(uint8(n))
   548  }
   549  
   550  // is12Bit reports whether n can be represented as a signed 12 bit integer.
   551  func is12Bit(n int64) bool {
   552  	return -(1<<11) <= n && n < (1<<11)
   553  }
   554  
   555  // isU12Bit reports whether n can be represented as an unsigned 12 bit integer.
   556  func isU12Bit(n int64) bool {
   557  	return 0 <= n && n < (1<<12)
   558  }
   559  
   560  // isU16Bit reports whether n can be represented as an unsigned 16 bit integer.
   561  func isU16Bit(n int64) bool {
   562  	return n == int64(uint16(n))
   563  }
   564  
   565  // isU32Bit reports whether n can be represented as an unsigned 32 bit integer.
   566  func isU32Bit(n int64) bool {
   567  	return n == int64(uint32(n))
   568  }
   569  
   570  // is20Bit reports whether n can be represented as a signed 20 bit integer.
   571  func is20Bit(n int64) bool {
   572  	return -(1<<19) <= n && n < (1<<19)
   573  }
   574  
   575  // b2i translates a boolean value to 0 or 1 for assigning to auxInt.
   576  func b2i(b bool) int64 {
   577  	if b {
   578  		return 1
   579  	}
   580  	return 0
   581  }
   582  
   583  // b2i32 translates a boolean value to 0 or 1.
   584  func b2i32(b bool) int32 {
   585  	if b {
   586  		return 1
   587  	}
   588  	return 0
   589  }
   590  
   591  func canMulStrengthReduce(config *Config, x int64) bool {
   592  	_, ok := config.mulRecipes[x]
   593  	return ok
   594  }
   595  func canMulStrengthReduce32(config *Config, x int32) bool {
   596  	_, ok := config.mulRecipes[int64(x)]
   597  	return ok
   598  }
   599  
   600  // mulStrengthReduce returns v*x evaluated at the location
   601  // (block and source position) of m.
   602  // canMulStrengthReduce must have returned true.
   603  func mulStrengthReduce(m *Value, v *Value, x int64) *Value {
   604  	return v.Block.Func.Config.mulRecipes[x].build(m, v)
   605  }
   606  
   607  // mulStrengthReduce32 returns v*x evaluated at the location
   608  // (block and source position) of m.
   609  // canMulStrengthReduce32 must have returned true.
   610  // The upper 32 bits of m might be set to junk.
   611  func mulStrengthReduce32(m *Value, v *Value, x int32) *Value {
   612  	return v.Block.Func.Config.mulRecipes[int64(x)].build(m, v)
   613  }
   614  
   615  // shiftIsBounded reports whether (left/right) shift Value v is known to be bounded.
   616  // A shift is bounded if it is shifting by less than the width of the shifted value.
   617  func shiftIsBounded(v *Value) bool {
   618  	return v.AuxInt != 0
   619  }
   620  
   621  // canonLessThan returns whether x is "ordered" less than y, for purposes of normalizing
   622  // generated code as much as possible.
   623  func canonLessThan(x, y *Value) bool {
   624  	if x.Op != y.Op {
   625  		return x.Op < y.Op
   626  	}
   627  	if !x.Pos.SameFileAndLine(y.Pos) {
   628  		return x.Pos.Before(y.Pos)
   629  	}
   630  	return x.ID < y.ID
   631  }
   632  
   633  // truncate64Fto32F converts a float64 value to a float32 preserving the bit pattern
   634  // of the mantissa. It will panic if the truncation results in lost information.
   635  func truncate64Fto32F(f float64) float32 {
   636  	if !isExactFloat32(f) {
   637  		panic("truncate64Fto32F: truncation is not exact")
   638  	}
   639  	if !math.IsNaN(f) {
   640  		return float32(f)
   641  	}
   642  	// NaN bit patterns aren't necessarily preserved across conversion
   643  	// instructions so we need to do the conversion manually.
   644  	b := math.Float64bits(f)
   645  	m := b & ((1 << 52) - 1) // mantissa (a.k.a. significand)
   646  	//          | sign                  | exponent   | mantissa       |
   647  	r := uint32(((b >> 32) & (1 << 31)) | 0x7f800000 | (m >> (52 - 23)))
   648  	return math.Float32frombits(r)
   649  }
   650  
   651  // DivisionNeedsFixUp reports whether the division needs fix-up code.
   652  func DivisionNeedsFixUp(v *Value) bool {
   653  	return v.AuxInt == 0
   654  }
   655  
   656  // auxTo32F decodes a float32 from the AuxInt value provided.
   657  func auxTo32F(i int64) float32 {
   658  	return truncate64Fto32F(math.Float64frombits(uint64(i)))
   659  }
   660  
   661  func auxIntToBool(i int64) bool {
   662  	if i == 0 {
   663  		return false
   664  	}
   665  	return true
   666  }
   667  func auxIntToInt8(i int64) int8 {
   668  	return int8(i)
   669  }
   670  func auxIntToInt16(i int64) int16 {
   671  	return int16(i)
   672  }
   673  func auxIntToInt32(i int64) int32 {
   674  	return int32(i)
   675  }
   676  func auxIntToInt64(i int64) int64 {
   677  	return i
   678  }
   679  func auxIntToUint8(i int64) uint8 {
   680  	return uint8(i)
   681  }
   682  func auxIntToFloat32(i int64) float32 {
   683  	return float32(math.Float64frombits(uint64(i)))
   684  }
   685  func auxIntToFloat64(i int64) float64 {
   686  	return math.Float64frombits(uint64(i))
   687  }
   688  func auxIntToValAndOff(i int64) ValAndOff {
   689  	return ValAndOff(i)
   690  }
   691  func auxIntToArm64BitField(i int64) arm64BitField {
   692  	return arm64BitField(i)
   693  }
   694  func auxIntToArm64ConditionalParams(i int64) arm64ConditionalParams {
   695  	var params arm64ConditionalParams
   696  	params.cond = Op(i & 0xffff)
   697  	i >>= 16
   698  	params.nzcv = uint8(i & 0x0f)
   699  	i >>= 4
   700  	params.constValue = uint8(i & 0x1f)
   701  	i >>= 5
   702  	params.ind = i == 1
   703  	return params
   704  }
   705  func auxIntToFlagConstant(x int64) flagConstant {
   706  	return flagConstant(x)
   707  }
   708  
   709  func auxIntToOp(cc int64) Op {
   710  	return Op(cc)
   711  }
   712  
   713  func boolToAuxInt(b bool) int64 {
   714  	if b {
   715  		return 1
   716  	}
   717  	return 0
   718  }
   719  func int8ToAuxInt(i int8) int64 {
   720  	return int64(i)
   721  }
   722  func int16ToAuxInt(i int16) int64 {
   723  	return int64(i)
   724  }
   725  func int32ToAuxInt(i int32) int64 {
   726  	return int64(i)
   727  }
   728  func int64ToAuxInt(i int64) int64 {
   729  	return i
   730  }
   731  func uint8ToAuxInt(i uint8) int64 {
   732  	return int64(int8(i))
   733  }
   734  func float32ToAuxInt(f float32) int64 {
   735  	return int64(math.Float64bits(float64(f)))
   736  }
   737  func float64ToAuxInt(f float64) int64 {
   738  	return int64(math.Float64bits(f))
   739  }
   740  func valAndOffToAuxInt(v ValAndOff) int64 {
   741  	return int64(v)
   742  }
   743  func arm64BitFieldToAuxInt(v arm64BitField) int64 {
   744  	return int64(v)
   745  }
   746  func arm64ConditionalParamsToAuxInt(v arm64ConditionalParams) int64 {
   747  	if v.cond&^0xffff != 0 {
   748  		panic("condition value exceeds 16 bits")
   749  	}
   750  
   751  	var i int64
   752  	if v.ind {
   753  		i = 1 << 25
   754  	}
   755  	i |= int64(v.constValue) << 20
   756  	i |= int64(v.nzcv) << 16
   757  	i |= int64(v.cond)
   758  	return i
   759  }
   760  
   761  func float64ExactBits(f float64, c float64) bool {
   762  	return math.Float64bits(f) == math.Float64bits(c)
   763  }
   764  
   765  func flagConstantToAuxInt(x flagConstant) int64 {
   766  	return int64(x)
   767  }
   768  
   769  func opToAuxInt(o Op) int64 {
   770  	return int64(o)
   771  }
   772  
   773  // Aux is an interface to hold miscellaneous data in Blocks and Values.
   774  type Aux interface {
   775  	CanBeAnSSAAux()
   776  }
   777  
   778  // for now only used to mark moves that need to avoid clobbering flags
   779  type auxMark bool
   780  
   781  func (auxMark) CanBeAnSSAAux() {}
   782  
   783  var AuxMark auxMark
   784  
   785  // stringAux wraps string values for use in Aux.
   786  type stringAux string
   787  
   788  func (stringAux) CanBeAnSSAAux() {}
   789  
   790  func auxToString(i Aux) string {
   791  	return string(i.(stringAux))
   792  }
   793  func auxToSym(i Aux) Sym {
   794  	// TODO: kind of a hack - allows nil interface through
   795  	s, _ := i.(Sym)
   796  	return s
   797  }
   798  func auxToType(i Aux) *types.Type {
   799  	return i.(*types.Type)
   800  }
   801  func auxToCall(i Aux) *AuxCall {
   802  	return i.(*AuxCall)
   803  }
   804  func auxToS390xCCMask(i Aux) s390x.CCMask {
   805  	return i.(s390x.CCMask)
   806  }
   807  func auxToS390xRotateParams(i Aux) s390x.RotateParams {
   808  	return i.(s390x.RotateParams)
   809  }
   810  
   811  func StringToAux(s string) Aux {
   812  	return stringAux(s)
   813  }
   814  func symToAux(s Sym) Aux {
   815  	return s
   816  }
   817  func callToAux(s *AuxCall) Aux {
   818  	return s
   819  }
   820  func typeToAux(t *types.Type) Aux {
   821  	return t
   822  }
   823  func s390xCCMaskToAux(c s390x.CCMask) Aux {
   824  	return c
   825  }
   826  func s390xRotateParamsToAux(r s390x.RotateParams) Aux {
   827  	return r
   828  }
   829  
   830  // uaddOvf reports whether unsigned a+b would overflow.
   831  func uaddOvf(a, b int64) bool {
   832  	return uint64(a)+uint64(b) < uint64(a)
   833  }
   834  
   835  func devirtLECall(v *Value, sym *obj.LSym) *Value {
   836  	v.Op = OpStaticLECall
   837  	auxcall := v.Aux.(*AuxCall)
   838  	auxcall.Fn = sym
   839  	// Remove first arg
   840  	v.Args[0].Uses--
   841  	copy(v.Args[0:], v.Args[1:])
   842  	v.Args[len(v.Args)-1] = nil // aid GC
   843  	v.Args = v.Args[:len(v.Args)-1]
   844  	if f := v.Block.Func; f.pass.debug > 0 {
   845  		f.Warnl(v.Pos, "de-virtualizing call")
   846  	}
   847  	return v
   848  }
   849  
   850  // isSamePtr reports whether p1 and p2 point to the same address.
   851  func isSamePtr(p1, p2 *Value) bool {
   852  	if p1 == p2 {
   853  		return true
   854  	}
   855  	if p1.Op != p2.Op {
   856  		for p1.Op == OpOffPtr && p1.AuxInt == 0 {
   857  			p1 = p1.Args[0]
   858  		}
   859  		for p2.Op == OpOffPtr && p2.AuxInt == 0 {
   860  			p2 = p2.Args[0]
   861  		}
   862  		if p1 == p2 {
   863  			return true
   864  		}
   865  		if p1.Op != p2.Op {
   866  			return false
   867  		}
   868  	}
   869  	switch p1.Op {
   870  	case OpOffPtr:
   871  		return p1.AuxInt == p2.AuxInt && isSamePtr(p1.Args[0], p2.Args[0])
   872  	case OpAddr, OpLocalAddr:
   873  		return p1.Aux == p2.Aux
   874  	case OpAddPtr:
   875  		return p1.Args[1] == p2.Args[1] && isSamePtr(p1.Args[0], p2.Args[0])
   876  	}
   877  	return false
   878  }
   879  
   880  func isStackPtr(v *Value) bool {
   881  	for v.Op == OpOffPtr || v.Op == OpAddPtr {
   882  		v = v.Args[0]
   883  	}
   884  	return v.Op == OpSP || v.Op == OpLocalAddr
   885  }
   886  
   887  // disjoint reports whether the memory region specified by [p1:p1+n1)
   888  // does not overlap with [p2:p2+n2).
   889  // A return value of false does not imply the regions overlap.
   890  func disjoint(p1 *Value, n1 int64, p2 *Value, n2 int64) bool {
   891  	if n1 == 0 || n2 == 0 {
   892  		return true
   893  	}
   894  	if p1 == p2 {
   895  		return false
   896  	}
   897  	baseAndOffset := func(ptr *Value) (base *Value, offset int64) {
   898  		base, offset = ptr, 0
   899  		for base.Op == OpOffPtr {
   900  			offset += base.AuxInt
   901  			base = base.Args[0]
   902  		}
   903  		if opcodeTable[base.Op].nilCheck {
   904  			base = base.Args[0]
   905  		}
   906  		return base, offset
   907  	}
   908  
   909  	// Run types-based analysis
   910  	if disjointTypes(p1.Type, p2.Type) {
   911  		return true
   912  	}
   913  
   914  	p1, off1 := baseAndOffset(p1)
   915  	p2, off2 := baseAndOffset(p2)
   916  	if isSamePtr(p1, p2) {
   917  		return !overlap(off1, n1, off2, n2)
   918  	}
   919  	// p1 and p2 are not the same, so if they are both OpAddrs then
   920  	// they point to different variables.
   921  	// If one pointer is on the stack and the other is an argument
   922  	// then they can't overlap.
   923  	switch p1.Op {
   924  	case OpAddr, OpLocalAddr:
   925  		if p2.Op == OpAddr || p2.Op == OpLocalAddr || p2.Op == OpSP {
   926  			return true
   927  		}
   928  		return (p2.Op == OpArg || p2.Op == OpArgIntReg) && p1.Args[0].Op == OpSP
   929  	case OpArg, OpArgIntReg:
   930  		if p2.Op == OpSP || p2.Op == OpLocalAddr {
   931  			return true
   932  		}
   933  	case OpSP:
   934  		return p2.Op == OpAddr || p2.Op == OpLocalAddr || p2.Op == OpArg || p2.Op == OpArgIntReg || p2.Op == OpSP
   935  	}
   936  	return false
   937  }
   938  
   939  // disjointTypes reports whether a memory region pointed to by a pointer of type
   940  // t1 does not overlap with a memory region pointed to by a pointer of type t2 --
   941  // based on type aliasing rules.
   942  func disjointTypes(t1 *types.Type, t2 *types.Type) bool {
   943  	// Unsafe pointer can alias with anything.
   944  	if t1.IsUnsafePtr() || t2.IsUnsafePtr() {
   945  		return false
   946  	}
   947  
   948  	if !t1.IsPtr() || !t2.IsPtr() {
   949  		panic("disjointTypes: one of arguments is not a pointer")
   950  	}
   951  
   952  	t1 = t1.Elem()
   953  	t2 = t2.Elem()
   954  
   955  	// Not-in-heap types are not supported -- they are rare and non-important; also,
   956  	// type.HasPointers check doesn't work for them correctly.
   957  	if t1.NotInHeap() || t2.NotInHeap() {
   958  		return false
   959  	}
   960  
   961  	isPtrShaped := func(t *types.Type) bool { return int(t.Size()) == types.PtrSize && t.HasPointers() }
   962  
   963  	// Pointers and non-pointers are disjoint (https://pkg.go.dev/unsafe#Pointer).
   964  	if (isPtrShaped(t1) && !t2.HasPointers()) ||
   965  		(isPtrShaped(t2) && !t1.HasPointers()) {
   966  		return true
   967  	}
   968  
   969  	return false
   970  }
   971  
   972  // moveSize returns the number of bytes an aligned MOV instruction moves.
   973  func moveSize(align int64, c *Config) int64 {
   974  	switch {
   975  	case align%8 == 0 && c.PtrSize == 8:
   976  		return 8
   977  	case align%4 == 0:
   978  		return 4
   979  	case align%2 == 0:
   980  		return 2
   981  	}
   982  	return 1
   983  }
   984  
   985  // mergePoint finds a block among a's blocks which dominates b and is itself
   986  // dominated by all of a's blocks. Returns nil if it can't find one.
   987  // Might return nil even if one does exist.
   988  func mergePoint(b *Block, a ...*Value) *Block {
   989  	// Walk backward from b looking for one of the a's blocks.
   990  
   991  	// Max distance
   992  	d := 100
   993  
   994  	for d > 0 {
   995  		for _, x := range a {
   996  			if b == x.Block {
   997  				goto found
   998  			}
   999  		}
  1000  		if len(b.Preds) > 1 {
  1001  			// Don't know which way to go back. Abort.
  1002  			return nil
  1003  		}
  1004  		b = b.Preds[0].b
  1005  		d--
  1006  	}
  1007  	return nil // too far away
  1008  found:
  1009  	// At this point, r is the first value in a that we find by walking backwards.
  1010  	// if we return anything, r will be it.
  1011  	r := b
  1012  
  1013  	// Keep going, counting the other a's that we find. They must all dominate r.
  1014  	na := 0
  1015  	for d > 0 {
  1016  		for _, x := range a {
  1017  			if b == x.Block {
  1018  				na++
  1019  			}
  1020  		}
  1021  		if na == len(a) {
  1022  			// Found all of a in a backwards walk. We can return r.
  1023  			return r
  1024  		}
  1025  		if len(b.Preds) > 1 {
  1026  			return nil
  1027  		}
  1028  		b = b.Preds[0].b
  1029  		d--
  1030  
  1031  	}
  1032  	return nil // too far away
  1033  }
  1034  
  1035  // clobber invalidates values. Returns true.
  1036  // clobber is used by rewrite rules to:
  1037  //
  1038  //	A) make sure the values are really dead and never used again.
  1039  //	B) decrement use counts of the values' args.
  1040  func clobber(vv ...*Value) bool {
  1041  	for _, v := range vv {
  1042  		v.reset(OpInvalid)
  1043  		// Note: leave v.Block intact.  The Block field is used after clobber.
  1044  	}
  1045  	return true
  1046  }
  1047  
  1048  // resetCopy resets v to be a copy of arg.
  1049  // Always returns true.
  1050  func resetCopy(v *Value, arg *Value) bool {
  1051  	v.reset(OpCopy)
  1052  	v.AddArg(arg)
  1053  	return true
  1054  }
  1055  
  1056  // clobberIfDead resets v when use count is 1. Returns true.
  1057  // clobberIfDead is used by rewrite rules to decrement
  1058  // use counts of v's args when v is dead and never used.
  1059  func clobberIfDead(v *Value) bool {
  1060  	if v.Uses == 1 {
  1061  		v.reset(OpInvalid)
  1062  	}
  1063  	// Note: leave v.Block intact.  The Block field is used after clobberIfDead.
  1064  	return true
  1065  }
  1066  
  1067  // noteRule is an easy way to track if a rule is matched when writing
  1068  // new ones.  Make the rule of interest also conditional on
  1069  //
  1070  //	noteRule("note to self: rule of interest matched")
  1071  //
  1072  // and that message will print when the rule matches.
  1073  func noteRule(s string) bool {
  1074  	fmt.Println(s)
  1075  	return true
  1076  }
  1077  
  1078  // countRule increments Func.ruleMatches[key].
  1079  // If Func.ruleMatches is non-nil at the end
  1080  // of compilation, it will be printed to stdout.
  1081  // This is intended to make it easier to find which functions
  1082  // which contain lots of rules matches when developing new rules.
  1083  func countRule(v *Value, key string) bool {
  1084  	f := v.Block.Func
  1085  	if f.ruleMatches == nil {
  1086  		f.ruleMatches = make(map[string]int)
  1087  	}
  1088  	f.ruleMatches[key]++
  1089  	return true
  1090  }
  1091  
  1092  // warnRule generates compiler debug output with string s when
  1093  // v is not in autogenerated code, cond is true and the rule has fired.
  1094  func warnRule(cond bool, v *Value, s string) bool {
  1095  	if pos := v.Pos; pos.Line() > 1 && cond {
  1096  		v.Block.Func.Warnl(pos, s)
  1097  	}
  1098  	return true
  1099  }
  1100  
  1101  // for a pseudo-op like (LessThan x), extract x.
  1102  func flagArg(v *Value) *Value {
  1103  	if len(v.Args) != 1 || !v.Args[0].Type.IsFlags() {
  1104  		return nil
  1105  	}
  1106  	return v.Args[0]
  1107  }
  1108  
  1109  // arm64Negate finds the complement to an ARM64 condition code,
  1110  // for example !Equal -> NotEqual or !LessThan -> GreaterEqual
  1111  //
  1112  // For floating point, it's more subtle because NaN is unordered. We do
  1113  // !LessThanF -> NotLessThanF, the latter takes care of NaNs.
  1114  func arm64Negate(op Op) Op {
  1115  	switch op {
  1116  	case OpARM64LessThan:
  1117  		return OpARM64GreaterEqual
  1118  	case OpARM64LessThanU:
  1119  		return OpARM64GreaterEqualU
  1120  	case OpARM64GreaterThan:
  1121  		return OpARM64LessEqual
  1122  	case OpARM64GreaterThanU:
  1123  		return OpARM64LessEqualU
  1124  	case OpARM64LessEqual:
  1125  		return OpARM64GreaterThan
  1126  	case OpARM64LessEqualU:
  1127  		return OpARM64GreaterThanU
  1128  	case OpARM64GreaterEqual:
  1129  		return OpARM64LessThan
  1130  	case OpARM64GreaterEqualU:
  1131  		return OpARM64LessThanU
  1132  	case OpARM64Equal:
  1133  		return OpARM64NotEqual
  1134  	case OpARM64NotEqual:
  1135  		return OpARM64Equal
  1136  	case OpARM64LessThanF:
  1137  		return OpARM64NotLessThanF
  1138  	case OpARM64NotLessThanF:
  1139  		return OpARM64LessThanF
  1140  	case OpARM64LessEqualF:
  1141  		return OpARM64NotLessEqualF
  1142  	case OpARM64NotLessEqualF:
  1143  		return OpARM64LessEqualF
  1144  	case OpARM64GreaterThanF:
  1145  		return OpARM64NotGreaterThanF
  1146  	case OpARM64NotGreaterThanF:
  1147  		return OpARM64GreaterThanF
  1148  	case OpARM64GreaterEqualF:
  1149  		return OpARM64NotGreaterEqualF
  1150  	case OpARM64NotGreaterEqualF:
  1151  		return OpARM64GreaterEqualF
  1152  	default:
  1153  		panic("unreachable")
  1154  	}
  1155  }
  1156  
  1157  // arm64Invert evaluates (InvertFlags op), which
  1158  // is the same as altering the condition codes such
  1159  // that the same result would be produced if the arguments
  1160  // to the flag-generating instruction were reversed, e.g.
  1161  // (InvertFlags (CMP x y)) -> (CMP y x)
  1162  func arm64Invert(op Op) Op {
  1163  	switch op {
  1164  	case OpARM64LessThan:
  1165  		return OpARM64GreaterThan
  1166  	case OpARM64LessThanU:
  1167  		return OpARM64GreaterThanU
  1168  	case OpARM64GreaterThan:
  1169  		return OpARM64LessThan
  1170  	case OpARM64GreaterThanU:
  1171  		return OpARM64LessThanU
  1172  	case OpARM64LessEqual:
  1173  		return OpARM64GreaterEqual
  1174  	case OpARM64LessEqualU:
  1175  		return OpARM64GreaterEqualU
  1176  	case OpARM64GreaterEqual:
  1177  		return OpARM64LessEqual
  1178  	case OpARM64GreaterEqualU:
  1179  		return OpARM64LessEqualU
  1180  	case OpARM64Equal, OpARM64NotEqual:
  1181  		return op
  1182  	case OpARM64LessThanF:
  1183  		return OpARM64GreaterThanF
  1184  	case OpARM64GreaterThanF:
  1185  		return OpARM64LessThanF
  1186  	case OpARM64LessEqualF:
  1187  		return OpARM64GreaterEqualF
  1188  	case OpARM64GreaterEqualF:
  1189  		return OpARM64LessEqualF
  1190  	case OpARM64NotLessThanF:
  1191  		return OpARM64NotGreaterThanF
  1192  	case OpARM64NotGreaterThanF:
  1193  		return OpARM64NotLessThanF
  1194  	case OpARM64NotLessEqualF:
  1195  		return OpARM64NotGreaterEqualF
  1196  	case OpARM64NotGreaterEqualF:
  1197  		return OpARM64NotLessEqualF
  1198  	default:
  1199  		panic("unreachable")
  1200  	}
  1201  }
  1202  
  1203  // evaluate an ARM64 op against a flags value
  1204  // that is potentially constant; return 1 for true,
  1205  // -1 for false, and 0 for not constant.
  1206  func ccARM64Eval(op Op, flags *Value) int {
  1207  	fop := flags.Op
  1208  	if fop == OpARM64InvertFlags {
  1209  		return -ccARM64Eval(op, flags.Args[0])
  1210  	}
  1211  	if fop != OpARM64FlagConstant {
  1212  		return 0
  1213  	}
  1214  	fc := flagConstant(flags.AuxInt)
  1215  	b2i := func(b bool) int {
  1216  		if b {
  1217  			return 1
  1218  		}
  1219  		return -1
  1220  	}
  1221  	switch op {
  1222  	case OpARM64Equal:
  1223  		return b2i(fc.eq())
  1224  	case OpARM64NotEqual:
  1225  		return b2i(fc.ne())
  1226  	case OpARM64LessThan:
  1227  		return b2i(fc.lt())
  1228  	case OpARM64LessThanU:
  1229  		return b2i(fc.ult())
  1230  	case OpARM64GreaterThan:
  1231  		return b2i(fc.gt())
  1232  	case OpARM64GreaterThanU:
  1233  		return b2i(fc.ugt())
  1234  	case OpARM64LessEqual:
  1235  		return b2i(fc.le())
  1236  	case OpARM64LessEqualU:
  1237  		return b2i(fc.ule())
  1238  	case OpARM64GreaterEqual:
  1239  		return b2i(fc.ge())
  1240  	case OpARM64GreaterEqualU:
  1241  		return b2i(fc.uge())
  1242  	}
  1243  	return 0
  1244  }
  1245  
  1246  // logRule logs the use of the rule s. This will only be enabled if
  1247  // rewrite rules were generated with the -log option, see _gen/rulegen.go.
  1248  func logRule(s string) {
  1249  	if ruleFile == nil {
  1250  		// Open a log file to write log to. We open in append
  1251  		// mode because all.bash runs the compiler lots of times,
  1252  		// and we want the concatenation of all of those logs.
  1253  		// This means, of course, that users need to rm the old log
  1254  		// to get fresh data.
  1255  		// TODO: all.bash runs compilers in parallel. Need to synchronize logging somehow?
  1256  		w, err := os.OpenFile(filepath.Join(os.Getenv("GOROOT"), "src", "rulelog"),
  1257  			os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0666)
  1258  		if err != nil {
  1259  			panic(err)
  1260  		}
  1261  		ruleFile = w
  1262  	}
  1263  	_, err := fmt.Fprintln(ruleFile, s)
  1264  	if err != nil {
  1265  		panic(err)
  1266  	}
  1267  }
  1268  
  1269  var ruleFile io.Writer
  1270  
  1271  func isConstZero(v *Value) bool {
  1272  	switch v.Op {
  1273  	case OpConstNil:
  1274  		return true
  1275  	case OpConst64, OpConst32, OpConst16, OpConst8, OpConstBool, OpConst32F, OpConst64F:
  1276  		return v.AuxInt == 0
  1277  	case OpStringMake, OpIMake, OpComplexMake:
  1278  		return isConstZero(v.Args[0]) && isConstZero(v.Args[1])
  1279  	case OpSliceMake:
  1280  		return isConstZero(v.Args[0]) && isConstZero(v.Args[1]) && isConstZero(v.Args[2])
  1281  	case OpStringPtr, OpStringLen, OpSlicePtr, OpSliceLen, OpSliceCap, OpITab, OpIData, OpComplexReal, OpComplexImag:
  1282  		return isConstZero(v.Args[0])
  1283  	}
  1284  	return false
  1285  }
  1286  
  1287  // reciprocalExact64 reports whether 1/c is exactly representable.
  1288  func reciprocalExact64(c float64) bool {
  1289  	b := math.Float64bits(c)
  1290  	man := b & (1<<52 - 1)
  1291  	if man != 0 {
  1292  		return false // not a power of 2, denormal, or NaN
  1293  	}
  1294  	exp := b >> 52 & (1<<11 - 1)
  1295  	// exponent bias is 0x3ff.  So taking the reciprocal of a number
  1296  	// changes the exponent to 0x7fe-exp.
  1297  	switch exp {
  1298  	case 0:
  1299  		return false // ±0
  1300  	case 0x7ff:
  1301  		return false // ±inf
  1302  	case 0x7fe:
  1303  		return false // exponent is not representable
  1304  	default:
  1305  		return true
  1306  	}
  1307  }
  1308  
  1309  // reciprocalExact32 reports whether 1/c is exactly representable.
  1310  func reciprocalExact32(c float32) bool {
  1311  	b := math.Float32bits(c)
  1312  	man := b & (1<<23 - 1)
  1313  	if man != 0 {
  1314  		return false // not a power of 2, denormal, or NaN
  1315  	}
  1316  	exp := b >> 23 & (1<<8 - 1)
  1317  	// exponent bias is 0x7f.  So taking the reciprocal of a number
  1318  	// changes the exponent to 0xfe-exp.
  1319  	switch exp {
  1320  	case 0:
  1321  		return false // ±0
  1322  	case 0xff:
  1323  		return false // ±inf
  1324  	case 0xfe:
  1325  		return false // exponent is not representable
  1326  	default:
  1327  		return true
  1328  	}
  1329  }
  1330  
  1331  // check if an immediate can be directly encoded into an ARM's instruction.
  1332  func isARMImmRot(v uint32) bool {
  1333  	for i := 0; i < 16; i++ {
  1334  		if v&^0xff == 0 {
  1335  			return true
  1336  		}
  1337  		v = v<<2 | v>>30
  1338  	}
  1339  
  1340  	return false
  1341  }
  1342  
  1343  // overlap reports whether the ranges given by the given offset and
  1344  // size pairs overlap.
  1345  func overlap(offset1, size1, offset2, size2 int64) bool {
  1346  	if offset1 >= offset2 && offset2+size2 > offset1 {
  1347  		return true
  1348  	}
  1349  	if offset2 >= offset1 && offset1+size1 > offset2 {
  1350  		return true
  1351  	}
  1352  	return false
  1353  }
  1354  
  1355  // check if value zeroes out upper 32-bit of 64-bit register.
  1356  // depth limits recursion depth. In AMD64.rules 3 is used as limit,
  1357  // because it catches same amount of cases as 4.
  1358  func zeroUpper32Bits(x *Value, depth int) bool {
  1359  	if x.Type.IsSigned() && x.Type.Size() < 8 {
  1360  		// If the value is signed, it might get re-sign-extended
  1361  		// during spill and restore. See issue 68227.
  1362  		return false
  1363  	}
  1364  	switch x.Op {
  1365  	case OpAMD64MOVLconst, OpAMD64MOVLload, OpAMD64MOVLQZX, OpAMD64MOVLloadidx1,
  1366  		OpAMD64MOVWload, OpAMD64MOVWloadidx1, OpAMD64MOVBload, OpAMD64MOVBloadidx1,
  1367  		OpAMD64MOVLloadidx4, OpAMD64ADDLload, OpAMD64SUBLload, OpAMD64ANDLload,
  1368  		OpAMD64ORLload, OpAMD64XORLload, OpAMD64CVTTSD2SL,
  1369  		OpAMD64ADDL, OpAMD64ADDLconst, OpAMD64SUBL, OpAMD64SUBLconst,
  1370  		OpAMD64ANDL, OpAMD64ANDLconst, OpAMD64ORL, OpAMD64ORLconst,
  1371  		OpAMD64XORL, OpAMD64XORLconst, OpAMD64NEGL, OpAMD64NOTL,
  1372  		OpAMD64SHRL, OpAMD64SHRLconst, OpAMD64SARL, OpAMD64SARLconst,
  1373  		OpAMD64SHLL, OpAMD64SHLLconst:
  1374  		return true
  1375  	case OpARM64REV16W, OpARM64REVW, OpARM64RBITW, OpARM64CLZW, OpARM64EXTRWconst,
  1376  		OpARM64MULW, OpARM64MNEGW, OpARM64UDIVW, OpARM64DIVW, OpARM64UMODW,
  1377  		OpARM64MADDW, OpARM64MSUBW, OpARM64RORW, OpARM64RORWconst:
  1378  		return true
  1379  	case OpArg: // note: but not ArgIntReg
  1380  		// amd64 always loads args from the stack unsigned.
  1381  		// most other architectures load them sign/zero extended based on the type.
  1382  		return x.Type.Size() == 4 && x.Block.Func.Config.arch == "amd64"
  1383  	case OpPhi, OpSelect0, OpSelect1:
  1384  		// Phis can use each-other as an arguments, instead of tracking visited values,
  1385  		// just limit recursion depth.
  1386  		if depth <= 0 {
  1387  			return false
  1388  		}
  1389  		for i := range x.Args {
  1390  			if !zeroUpper32Bits(x.Args[i], depth-1) {
  1391  				return false
  1392  			}
  1393  		}
  1394  		return true
  1395  
  1396  	}
  1397  	return false
  1398  }
  1399  
  1400  // zeroUpper48Bits is similar to zeroUpper32Bits, but for upper 48 bits.
  1401  func zeroUpper48Bits(x *Value, depth int) bool {
  1402  	if x.Type.IsSigned() && x.Type.Size() < 8 {
  1403  		return false
  1404  	}
  1405  	switch x.Op {
  1406  	case OpAMD64MOVWQZX, OpAMD64MOVWload, OpAMD64MOVWloadidx1, OpAMD64MOVWloadidx2:
  1407  		return true
  1408  	case OpArg: // note: but not ArgIntReg
  1409  		return x.Type.Size() == 2 && x.Block.Func.Config.arch == "amd64"
  1410  	case OpPhi, OpSelect0, OpSelect1:
  1411  		// Phis can use each-other as an arguments, instead of tracking visited values,
  1412  		// just limit recursion depth.
  1413  		if depth <= 0 {
  1414  			return false
  1415  		}
  1416  		for i := range x.Args {
  1417  			if !zeroUpper48Bits(x.Args[i], depth-1) {
  1418  				return false
  1419  			}
  1420  		}
  1421  		return true
  1422  
  1423  	}
  1424  	return false
  1425  }
  1426  
  1427  // zeroUpper56Bits is similar to zeroUpper32Bits, but for upper 56 bits.
  1428  func zeroUpper56Bits(x *Value, depth int) bool {
  1429  	if x.Type.IsSigned() && x.Type.Size() < 8 {
  1430  		return false
  1431  	}
  1432  	switch x.Op {
  1433  	case OpAMD64MOVBQZX, OpAMD64MOVBload, OpAMD64MOVBloadidx1:
  1434  		return true
  1435  	case OpArg: // note: but not ArgIntReg
  1436  		return x.Type.Size() == 1 && x.Block.Func.Config.arch == "amd64"
  1437  	case OpPhi, OpSelect0, OpSelect1:
  1438  		// Phis can use each-other as an arguments, instead of tracking visited values,
  1439  		// just limit recursion depth.
  1440  		if depth <= 0 {
  1441  			return false
  1442  		}
  1443  		for i := range x.Args {
  1444  			if !zeroUpper56Bits(x.Args[i], depth-1) {
  1445  				return false
  1446  			}
  1447  		}
  1448  		return true
  1449  
  1450  	}
  1451  	return false
  1452  }
  1453  
  1454  func isInlinableMemclr(c *Config, sz int64) bool {
  1455  	if sz < 0 {
  1456  		return false
  1457  	}
  1458  	// TODO: expand this check to allow other architectures
  1459  	// see CL 454255 and issue 56997
  1460  	switch c.arch {
  1461  	case "amd64", "arm64":
  1462  		return true
  1463  	case "ppc64le", "ppc64", "loong64":
  1464  		return sz < 512
  1465  	}
  1466  	return false
  1467  }
  1468  
  1469  // isInlinableMemmove reports whether the given arch performs a Move of the given size
  1470  // faster than memmove. It will only return true if replacing the memmove with a Move is
  1471  // safe, either because Move will do all of its loads before any of its stores, or
  1472  // because the arguments are known to be disjoint.
  1473  // This is used as a check for replacing memmove with Move ops.
  1474  func isInlinableMemmove(dst, src *Value, sz int64, c *Config) bool {
  1475  	// It is always safe to convert memmove into Move when its arguments are disjoint.
  1476  	// Move ops may or may not be faster for large sizes depending on how the platform
  1477  	// lowers them, so we only perform this optimization on platforms that we know to
  1478  	// have fast Move ops.
  1479  	switch c.arch {
  1480  	case "amd64":
  1481  		return sz <= 16 || (sz < 1024 && disjoint(dst, sz, src, sz))
  1482  	case "arm64":
  1483  		return sz <= 64 || (sz <= 1024 && disjoint(dst, sz, src, sz))
  1484  	case "386":
  1485  		return sz <= 8
  1486  	case "s390x", "ppc64", "ppc64le":
  1487  		return sz <= 8 || disjoint(dst, sz, src, sz)
  1488  	case "arm", "loong64", "mips", "mips64", "mipsle", "mips64le":
  1489  		return sz <= 4
  1490  	}
  1491  	return false
  1492  }
  1493  func IsInlinableMemmove(dst, src *Value, sz int64, c *Config) bool {
  1494  	return isInlinableMemmove(dst, src, sz, c)
  1495  }
  1496  
  1497  // logLargeCopy logs the occurrence of a large copy.
  1498  // The best place to do this is in the rewrite rules where the size of the move is easy to find.
  1499  // "Large" is arbitrarily chosen to be 128 bytes; this may change.
  1500  func logLargeCopy(v *Value, s int64) bool {
  1501  	if s < 128 {
  1502  		return true
  1503  	}
  1504  	if logopt.Enabled() {
  1505  		logopt.LogOpt(v.Pos, "copy", "lower", v.Block.Func.Name, fmt.Sprintf("%d bytes", s))
  1506  	}
  1507  	return true
  1508  }
  1509  func LogLargeCopy(funcName string, pos src.XPos, s int64) {
  1510  	if s < 128 {
  1511  		return
  1512  	}
  1513  	if logopt.Enabled() {
  1514  		logopt.LogOpt(pos, "copy", "lower", funcName, fmt.Sprintf("%d bytes", s))
  1515  	}
  1516  }
  1517  
  1518  // hasSmallRotate reports whether the architecture has rotate instructions
  1519  // for sizes < 32-bit.  This is used to decide whether to promote some rotations.
  1520  func hasSmallRotate(c *Config) bool {
  1521  	switch c.arch {
  1522  	case "amd64", "386":
  1523  		return true
  1524  	default:
  1525  		return false
  1526  	}
  1527  }
  1528  
  1529  func supportsPPC64PCRel() bool {
  1530  	// PCRel is currently supported for >= power10, linux only
  1531  	// Internal and external linking supports this on ppc64le; internal linking on ppc64.
  1532  	return buildcfg.GOPPC64 >= 10 && buildcfg.GOOS == "linux"
  1533  }
  1534  
  1535  func newPPC64ShiftAuxInt(sh, mb, me, sz int64) int32 {
  1536  	if sh < 0 || sh >= sz {
  1537  		panic("PPC64 shift arg sh out of range")
  1538  	}
  1539  	if mb < 0 || mb >= sz {
  1540  		panic("PPC64 shift arg mb out of range")
  1541  	}
  1542  	if me < 0 || me >= sz {
  1543  		panic("PPC64 shift arg me out of range")
  1544  	}
  1545  	return int32(sh<<16 | mb<<8 | me)
  1546  }
  1547  
  1548  func GetPPC64Shiftsh(auxint int64) int64 {
  1549  	return int64(int8(auxint >> 16))
  1550  }
  1551  
  1552  func GetPPC64Shiftmb(auxint int64) int64 {
  1553  	return int64(int8(auxint >> 8))
  1554  }
  1555  
  1556  // Test if this value can encoded as a mask for a rlwinm like
  1557  // operation.  Masks can also extend from the msb and wrap to
  1558  // the lsb too.  That is, the valid masks are 32 bit strings
  1559  // of the form: 0..01..10..0 or 1..10..01..1 or 1...1
  1560  //
  1561  // Note: This ignores the upper 32 bits of the input. When a
  1562  // zero extended result is desired (e.g a 64 bit result), the
  1563  // user must verify the upper 32 bits are 0 and the mask is
  1564  // contiguous (that is, non-wrapping).
  1565  func isPPC64WordRotateMask(v64 int64) bool {
  1566  	// Isolate rightmost 1 (if none 0) and add.
  1567  	v := uint32(v64)
  1568  	vp := (v & -v) + v
  1569  	// Likewise, for the wrapping case.
  1570  	vn := ^v
  1571  	vpn := (vn & -vn) + vn
  1572  	return (v&vp == 0 || vn&vpn == 0) && v != 0
  1573  }
  1574  
  1575  // Test if this mask is a valid, contiguous bitmask which can be
  1576  // represented by a RLWNM mask and also clears the upper 32 bits
  1577  // of the register.
  1578  func isPPC64WordRotateMaskNonWrapping(v64 int64) bool {
  1579  	// Isolate rightmost 1 (if none 0) and add.
  1580  	v := uint32(v64)
  1581  	vp := (v & -v) + v
  1582  	return (v&vp == 0) && v != 0 && uint64(uint32(v64)) == uint64(v64)
  1583  }
  1584  
  1585  // Compress mask and shift into single value of the form
  1586  // me | mb<<8 | rotate<<16 | nbits<<24 where me and mb can
  1587  // be used to regenerate the input mask.
  1588  func encodePPC64RotateMask(rotate, mask, nbits int64) int64 {
  1589  	var mb, me, mbn, men int
  1590  
  1591  	// Determine boundaries and then decode them
  1592  	if mask == 0 || ^mask == 0 || rotate >= nbits {
  1593  		panic(fmt.Sprintf("invalid PPC64 rotate mask: %x %d %d", uint64(mask), rotate, nbits))
  1594  	} else if nbits == 32 {
  1595  		mb = bits.LeadingZeros32(uint32(mask))
  1596  		me = 32 - bits.TrailingZeros32(uint32(mask))
  1597  		mbn = bits.LeadingZeros32(^uint32(mask))
  1598  		men = 32 - bits.TrailingZeros32(^uint32(mask))
  1599  	} else {
  1600  		mb = bits.LeadingZeros64(uint64(mask))
  1601  		me = 64 - bits.TrailingZeros64(uint64(mask))
  1602  		mbn = bits.LeadingZeros64(^uint64(mask))
  1603  		men = 64 - bits.TrailingZeros64(^uint64(mask))
  1604  	}
  1605  	// Check for a wrapping mask (e.g bits at 0 and 63)
  1606  	if mb == 0 && me == int(nbits) {
  1607  		// swap the inverted values
  1608  		mb, me = men, mbn
  1609  	}
  1610  
  1611  	return int64(me) | int64(mb<<8) | rotate<<16 | nbits<<24
  1612  }
  1613  
  1614  // Merge (RLDICL [encoded] (SRDconst [s] x)) into (RLDICL [new_encoded] x)
  1615  // SRDconst on PPC64 is an extended mnemonic of RLDICL. If the input to an
  1616  // RLDICL is an SRDconst, and the RLDICL does not rotate its value, the two
  1617  // operations can be combined. This functions assumes the two opcodes can
  1618  // be merged, and returns an encoded rotate+mask value of the combined RLDICL.
  1619  func mergePPC64RLDICLandSRDconst(encoded, s int64) int64 {
  1620  	mb := s
  1621  	r := 64 - s
  1622  	// A larger mb is a smaller mask.
  1623  	if (encoded>>8)&0xFF < mb {
  1624  		encoded = (encoded &^ 0xFF00) | mb<<8
  1625  	}
  1626  	// The rotate is expected to be 0.
  1627  	if (encoded & 0xFF0000) != 0 {
  1628  		panic("non-zero rotate")
  1629  	}
  1630  	return encoded | r<<16
  1631  }
  1632  
  1633  // DecodePPC64RotateMask is the inverse operation of encodePPC64RotateMask.  The values returned as
  1634  // mb and me satisfy the POWER ISA definition of MASK(x,y) where MASK(mb,me) = mask.
  1635  func DecodePPC64RotateMask(sauxint int64) (rotate, mb, me int64, mask uint64) {
  1636  	auxint := uint64(sauxint)
  1637  	rotate = int64((auxint >> 16) & 0xFF)
  1638  	mb = int64((auxint >> 8) & 0xFF)
  1639  	me = int64((auxint >> 0) & 0xFF)
  1640  	nbits := int64((auxint >> 24) & 0xFF)
  1641  	mask = ((1 << uint(nbits-mb)) - 1) ^ ((1 << uint(nbits-me)) - 1)
  1642  	if mb > me {
  1643  		mask = ^mask
  1644  	}
  1645  	if nbits == 32 {
  1646  		mask = uint64(uint32(mask))
  1647  	}
  1648  
  1649  	// Fixup ME to match ISA definition.  The second argument to MASK(..,me)
  1650  	// is inclusive.
  1651  	me = (me - 1) & (nbits - 1)
  1652  	return
  1653  }
  1654  
  1655  // This verifies that the mask is a set of
  1656  // consecutive bits including the least
  1657  // significant bit.
  1658  func isPPC64ValidShiftMask(v int64) bool {
  1659  	if (v != 0) && ((v+1)&v) == 0 {
  1660  		return true
  1661  	}
  1662  	return false
  1663  }
  1664  
  1665  func getPPC64ShiftMaskLength(v int64) int64 {
  1666  	return int64(bits.Len64(uint64(v)))
  1667  }
  1668  
  1669  // Decompose a shift right into an equivalent rotate/mask,
  1670  // and return mask & m.
  1671  func mergePPC64RShiftMask(m, s, nbits int64) int64 {
  1672  	smask := uint64((1<<uint(nbits))-1) >> uint(s)
  1673  	return m & int64(smask)
  1674  }
  1675  
  1676  // Combine (ANDconst [m] (SRWconst [s])) into (RLWINM [y]) or return 0
  1677  func mergePPC64AndSrwi(m, s int64) int64 {
  1678  	mask := mergePPC64RShiftMask(m, s, 32)
  1679  	if !isPPC64WordRotateMask(mask) {
  1680  		return 0
  1681  	}
  1682  	return encodePPC64RotateMask((32-s)&31, mask, 32)
  1683  }
  1684  
  1685  // Combine (ANDconst [m] (SRDconst [s])) into (RLWINM [y]) or return 0
  1686  func mergePPC64AndSrdi(m, s int64) int64 {
  1687  	mask := mergePPC64RShiftMask(m, s, 64)
  1688  
  1689  	// Verify the rotate and mask result only uses the lower 32 bits.
  1690  	rv := bits.RotateLeft64(0xFFFFFFFF00000000, -int(s))
  1691  	if rv&uint64(mask) != 0 {
  1692  		return 0
  1693  	}
  1694  	if !isPPC64WordRotateMaskNonWrapping(mask) {
  1695  		return 0
  1696  	}
  1697  	return encodePPC64RotateMask((32-s)&31, mask, 32)
  1698  }
  1699  
  1700  // Combine (ANDconst [m] (SLDconst [s])) into (RLWINM [y]) or return 0
  1701  func mergePPC64AndSldi(m, s int64) int64 {
  1702  	mask := -1 << s & m
  1703  
  1704  	// Verify the rotate and mask result only uses the lower 32 bits.
  1705  	rv := bits.RotateLeft64(0xFFFFFFFF00000000, int(s))
  1706  	if rv&uint64(mask) != 0 {
  1707  		return 0
  1708  	}
  1709  	if !isPPC64WordRotateMaskNonWrapping(mask) {
  1710  		return 0
  1711  	}
  1712  	return encodePPC64RotateMask(s&31, mask, 32)
  1713  }
  1714  
  1715  // Test if a word shift right feeding into a CLRLSLDI can be merged into RLWINM.
  1716  // Return the encoded RLWINM constant, or 0 if they cannot be merged.
  1717  func mergePPC64ClrlsldiSrw(sld, srw int64) int64 {
  1718  	mask_1 := uint64(0xFFFFFFFF >> uint(srw))
  1719  	// for CLRLSLDI, it's more convenient to think of it as a mask left bits then rotate left.
  1720  	mask_2 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(GetPPC64Shiftmb(sld))
  1721  
  1722  	// Rewrite mask to apply after the final left shift.
  1723  	mask_3 := (mask_1 & mask_2) << uint(GetPPC64Shiftsh(sld))
  1724  
  1725  	r_1 := 32 - srw
  1726  	r_2 := GetPPC64Shiftsh(sld)
  1727  	r_3 := (r_1 + r_2) & 31 // This can wrap.
  1728  
  1729  	if uint64(uint32(mask_3)) != mask_3 || mask_3 == 0 {
  1730  		return 0
  1731  	}
  1732  	return encodePPC64RotateMask(r_3, int64(mask_3), 32)
  1733  }
  1734  
  1735  // Test if a doubleword shift right feeding into a CLRLSLDI can be merged into RLWINM.
  1736  // Return the encoded RLWINM constant, or 0 if they cannot be merged.
  1737  func mergePPC64ClrlsldiSrd(sld, srd int64) int64 {
  1738  	mask_1 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(srd)
  1739  	// for CLRLSLDI, it's more convenient to think of it as a mask left bits then rotate left.
  1740  	mask_2 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(GetPPC64Shiftmb(sld))
  1741  
  1742  	// Rewrite mask to apply after the final left shift.
  1743  	mask_3 := (mask_1 & mask_2) << uint(GetPPC64Shiftsh(sld))
  1744  
  1745  	r_1 := 64 - srd
  1746  	r_2 := GetPPC64Shiftsh(sld)
  1747  	r_3 := (r_1 + r_2) & 63 // This can wrap.
  1748  
  1749  	if uint64(uint32(mask_3)) != mask_3 || mask_3 == 0 {
  1750  		return 0
  1751  	}
  1752  	// This combine only works when selecting and shifting the lower 32 bits.
  1753  	v1 := bits.RotateLeft64(0xFFFFFFFF00000000, int(r_3))
  1754  	if v1&mask_3 != 0 {
  1755  		return 0
  1756  	}
  1757  	return encodePPC64RotateMask(r_3&31, int64(mask_3), 32)
  1758  }
  1759  
  1760  // Test if a RLWINM feeding into a CLRLSLDI can be merged into RLWINM.  Return
  1761  // the encoded RLWINM constant, or 0 if they cannot be merged.
  1762  func mergePPC64ClrlsldiRlwinm(sld int32, rlw int64) int64 {
  1763  	r_1, _, _, mask_1 := DecodePPC64RotateMask(rlw)
  1764  	// for CLRLSLDI, it's more convenient to think of it as a mask left bits then rotate left.
  1765  	mask_2 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(GetPPC64Shiftmb(int64(sld)))
  1766  
  1767  	// combine the masks, and adjust for the final left shift.
  1768  	mask_3 := (mask_1 & mask_2) << uint(GetPPC64Shiftsh(int64(sld)))
  1769  	r_2 := GetPPC64Shiftsh(int64(sld))
  1770  	r_3 := (r_1 + r_2) & 31 // This can wrap.
  1771  
  1772  	// Verify the result is still a valid bitmask of <= 32 bits.
  1773  	if !isPPC64WordRotateMask(int64(mask_3)) || uint64(uint32(mask_3)) != mask_3 {
  1774  		return 0
  1775  	}
  1776  	return encodePPC64RotateMask(r_3, int64(mask_3), 32)
  1777  }
  1778  
  1779  // Test if RLWINM feeding into an ANDconst can be merged. Return the encoded RLWINM constant,
  1780  // or 0 if they cannot be merged.
  1781  func mergePPC64AndRlwinm(mask uint32, rlw int64) int64 {
  1782  	r, _, _, mask_rlw := DecodePPC64RotateMask(rlw)
  1783  	mask_out := (mask_rlw & uint64(mask))
  1784  
  1785  	// Verify the result is still a valid bitmask of <= 32 bits.
  1786  	if !isPPC64WordRotateMask(int64(mask_out)) {
  1787  		return 0
  1788  	}
  1789  	return encodePPC64RotateMask(r, int64(mask_out), 32)
  1790  }
  1791  
  1792  // Test if RLWINM opcode rlw clears the upper 32 bits of the
  1793  // result. Return rlw if it does, 0 otherwise.
  1794  func mergePPC64MovwzregRlwinm(rlw int64) int64 {
  1795  	_, mb, me, _ := DecodePPC64RotateMask(rlw)
  1796  	if mb > me {
  1797  		return 0
  1798  	}
  1799  	return rlw
  1800  }
  1801  
  1802  // Test if AND feeding into an ANDconst can be merged. Return the encoded RLWINM constant,
  1803  // or 0 if they cannot be merged.
  1804  func mergePPC64RlwinmAnd(rlw int64, mask uint32) int64 {
  1805  	r, _, _, mask_rlw := DecodePPC64RotateMask(rlw)
  1806  
  1807  	// Rotate the input mask, combine with the rlwnm mask, and test if it is still a valid rlwinm mask.
  1808  	r_mask := bits.RotateLeft32(mask, int(r))
  1809  
  1810  	mask_out := (mask_rlw & uint64(r_mask))
  1811  
  1812  	// Verify the result is still a valid bitmask of <= 32 bits.
  1813  	if !isPPC64WordRotateMask(int64(mask_out)) {
  1814  		return 0
  1815  	}
  1816  	return encodePPC64RotateMask(r, int64(mask_out), 32)
  1817  }
  1818  
  1819  // Test if RLWINM feeding into SRDconst can be merged. Return the encoded RLIWNM constant,
  1820  // or 0 if they cannot be merged.
  1821  func mergePPC64SldiRlwinm(sldi, rlw int64) int64 {
  1822  	r_1, mb, me, mask_1 := DecodePPC64RotateMask(rlw)
  1823  	if mb > me || mb < sldi {
  1824  		// Wrapping masks cannot be merged as the upper 32 bits are effectively undefined in this case.
  1825  		// Likewise, if mb is less than the shift amount, it cannot be merged.
  1826  		return 0
  1827  	}
  1828  	// combine the masks, and adjust for the final left shift.
  1829  	mask_3 := mask_1 << sldi
  1830  	r_3 := (r_1 + sldi) & 31 // This can wrap.
  1831  
  1832  	// Verify the result is still a valid bitmask of <= 32 bits.
  1833  	if uint64(uint32(mask_3)) != mask_3 {
  1834  		return 0
  1835  	}
  1836  	return encodePPC64RotateMask(r_3, int64(mask_3), 32)
  1837  }
  1838  
  1839  // Compute the encoded RLWINM constant from combining (SLDconst [sld] (SRWconst [srw] x)),
  1840  // or return 0 if they cannot be combined.
  1841  func mergePPC64SldiSrw(sld, srw int64) int64 {
  1842  	if sld > srw || srw >= 32 {
  1843  		return 0
  1844  	}
  1845  	mask_r := uint32(0xFFFFFFFF) >> uint(srw)
  1846  	mask_l := uint32(0xFFFFFFFF) >> uint(sld)
  1847  	mask := (mask_r & mask_l) << uint(sld)
  1848  	return encodePPC64RotateMask((32-srw+sld)&31, int64(mask), 32)
  1849  }
  1850  
  1851  // Convert a PPC64 opcode from the Op to OpCC form. This converts (op x y)
  1852  // to (Select0 (opCC x y)) without having to explicitly fixup every user
  1853  // of op.
  1854  //
  1855  // E.g consider the case:
  1856  // a = (ADD x y)
  1857  // b = (CMPconst [0] a)
  1858  // c = (OR a z)
  1859  //
  1860  // A rule like (CMPconst [0] (ADD x y)) => (CMPconst [0] (Select0 (ADDCC x y)))
  1861  // would produce:
  1862  // a  = (ADD x y)
  1863  // a' = (ADDCC x y)
  1864  // a” = (Select0 a')
  1865  // b  = (CMPconst [0] a”)
  1866  // c  = (OR a z)
  1867  //
  1868  // which makes it impossible to rewrite the second user. Instead the result
  1869  // of this conversion is:
  1870  // a' = (ADDCC x y)
  1871  // a  = (Select0 a')
  1872  // b  = (CMPconst [0] a)
  1873  // c  = (OR a z)
  1874  //
  1875  // Which makes it trivial to rewrite b using a lowering rule.
  1876  func convertPPC64OpToOpCC(op *Value) *Value {
  1877  	ccOpMap := map[Op]Op{
  1878  		OpPPC64ADD:      OpPPC64ADDCC,
  1879  		OpPPC64ADDconst: OpPPC64ADDCCconst,
  1880  		OpPPC64AND:      OpPPC64ANDCC,
  1881  		OpPPC64ANDN:     OpPPC64ANDNCC,
  1882  		OpPPC64ANDconst: OpPPC64ANDCCconst,
  1883  		OpPPC64CNTLZD:   OpPPC64CNTLZDCC,
  1884  		OpPPC64MULHDU:   OpPPC64MULHDUCC,
  1885  		OpPPC64NEG:      OpPPC64NEGCC,
  1886  		OpPPC64NOR:      OpPPC64NORCC,
  1887  		OpPPC64OR:       OpPPC64ORCC,
  1888  		OpPPC64RLDICL:   OpPPC64RLDICLCC,
  1889  		OpPPC64SUB:      OpPPC64SUBCC,
  1890  		OpPPC64XOR:      OpPPC64XORCC,
  1891  	}
  1892  	b := op.Block
  1893  	opCC := b.NewValue0I(op.Pos, ccOpMap[op.Op], types.NewTuple(op.Type, types.TypeFlags), op.AuxInt)
  1894  	opCC.AddArgs(op.Args...)
  1895  	op.reset(OpSelect0)
  1896  	op.AddArgs(opCC)
  1897  	return op
  1898  }
  1899  
  1900  // Try converting a RLDICL to ANDCC. If successful, return the mask otherwise 0.
  1901  func convertPPC64RldiclAndccconst(sauxint int64) int64 {
  1902  	r, _, _, mask := DecodePPC64RotateMask(sauxint)
  1903  	if r != 0 || mask&0xFFFF != mask {
  1904  		return 0
  1905  	}
  1906  	return int64(mask)
  1907  }
  1908  
  1909  // Convenience function to rotate a 32 bit constant value by another constant.
  1910  func rotateLeft32(v, rotate int64) int64 {
  1911  	return int64(bits.RotateLeft32(uint32(v), int(rotate)))
  1912  }
  1913  
  1914  func rotateRight64(v, rotate int64) int64 {
  1915  	return int64(bits.RotateLeft64(uint64(v), int(-rotate)))
  1916  }
  1917  
  1918  // encodes the lsb and width for arm(64) bitfield ops into the expected auxInt format.
  1919  func armBFAuxInt(lsb, width int64) arm64BitField {
  1920  	if lsb < 0 || lsb > 63 {
  1921  		panic("ARM(64) bit field lsb constant out of range")
  1922  	}
  1923  	if width < 1 || lsb+width > 64 {
  1924  		panic("ARM(64) bit field width constant out of range")
  1925  	}
  1926  	return arm64BitField(width | lsb<<8)
  1927  }
  1928  
  1929  // returns the lsb part of the auxInt field of arm64 bitfield ops.
  1930  func (bfc arm64BitField) lsb() int64 {
  1931  	return int64(uint64(bfc) >> 8)
  1932  }
  1933  
  1934  // returns the width part of the auxInt field of arm64 bitfield ops.
  1935  func (bfc arm64BitField) width() int64 {
  1936  	return int64(bfc) & 0xff
  1937  }
  1938  
  1939  // checks if mask >> rshift applied at lsb is a valid arm64 bitfield op mask.
  1940  func isARM64BFMask(lsb, mask, rshift int64) bool {
  1941  	shiftedMask := int64(uint64(mask) >> uint64(rshift))
  1942  	return shiftedMask != 0 && isPowerOfTwo(shiftedMask+1) && nto(shiftedMask)+lsb < 64
  1943  }
  1944  
  1945  // returns the bitfield width of mask >> rshift for arm64 bitfield ops.
  1946  func arm64BFWidth(mask, rshift int64) int64 {
  1947  	shiftedMask := int64(uint64(mask) >> uint64(rshift))
  1948  	if shiftedMask == 0 {
  1949  		panic("ARM64 BF mask is zero")
  1950  	}
  1951  	return nto(shiftedMask)
  1952  }
  1953  
  1954  // encodes condition code and NZCV flags into auxint.
  1955  func arm64ConditionalParamsAuxInt(cond Op, nzcv uint8) arm64ConditionalParams {
  1956  	if cond < OpARM64Equal || cond > OpARM64GreaterEqualU {
  1957  		panic("Wrong conditional operation")
  1958  	}
  1959  	if nzcv&0x0f != nzcv {
  1960  		panic("Wrong value of NZCV flag")
  1961  	}
  1962  	return arm64ConditionalParams{cond, nzcv, 0, false}
  1963  }
  1964  
  1965  // encodes condition code, NZCV flags and constant value into auxint.
  1966  func arm64ConditionalParamsAuxIntWithValue(cond Op, nzcv uint8, value uint8) arm64ConditionalParams {
  1967  	if value&0x1f != value {
  1968  		panic("Wrong value of constant")
  1969  	}
  1970  	params := arm64ConditionalParamsAuxInt(cond, nzcv)
  1971  	params.constValue = value
  1972  	params.ind = true
  1973  	return params
  1974  }
  1975  
  1976  // extracts condition code from auxint.
  1977  func (condParams arm64ConditionalParams) Cond() Op {
  1978  	return condParams.cond
  1979  }
  1980  
  1981  // extracts NZCV flags from auxint.
  1982  func (condParams arm64ConditionalParams) Nzcv() int64 {
  1983  	return int64(condParams.nzcv)
  1984  }
  1985  
  1986  // extracts constant value from auxint if present.
  1987  func (condParams arm64ConditionalParams) ConstValue() (int64, bool) {
  1988  	return int64(condParams.constValue), condParams.ind
  1989  }
  1990  
  1991  // registerizable reports whether t is a primitive type that fits in
  1992  // a register. It assumes float64 values will always fit into registers
  1993  // even if that isn't strictly true.
  1994  func registerizable(b *Block, typ *types.Type) bool {
  1995  	if typ.IsPtrShaped() || typ.IsFloat() || typ.IsBoolean() {
  1996  		return true
  1997  	}
  1998  	if typ.IsInteger() {
  1999  		return typ.Size() <= b.Func.Config.RegSize
  2000  	}
  2001  	return false
  2002  }
  2003  
  2004  // needRaceCleanup reports whether this call to racefuncenter/exit isn't needed.
  2005  func needRaceCleanup(sym *AuxCall, v *Value) bool {
  2006  	f := v.Block.Func
  2007  	if !f.Config.Race {
  2008  		return false
  2009  	}
  2010  	if !isSameCall(sym, "runtime.racefuncenter") && !isSameCall(sym, "runtime.racefuncexit") {
  2011  		return false
  2012  	}
  2013  	for _, b := range f.Blocks {
  2014  		for _, v := range b.Values {
  2015  			switch v.Op {
  2016  			case OpStaticCall, OpStaticLECall:
  2017  				// Check for racefuncenter will encounter racefuncexit and vice versa.
  2018  				// Allow calls to panic*
  2019  				s := v.Aux.(*AuxCall).Fn.String()
  2020  				switch s {
  2021  				case "runtime.racefuncenter", "runtime.racefuncexit",
  2022  					"runtime.panicdivide", "runtime.panicwrap",
  2023  					"runtime.panicshift":
  2024  					continue
  2025  				}
  2026  				// If we encountered any call, we need to keep racefunc*,
  2027  				// for accurate stacktraces.
  2028  				return false
  2029  			case OpPanicBounds, OpPanicExtend:
  2030  				// Note: these are panic generators that are ok (like the static calls above).
  2031  			case OpClosureCall, OpInterCall, OpClosureLECall, OpInterLECall:
  2032  				// We must keep the race functions if there are any other call types.
  2033  				return false
  2034  			}
  2035  		}
  2036  	}
  2037  	if isSameCall(sym, "runtime.racefuncenter") {
  2038  		// TODO REGISTER ABI this needs to be cleaned up.
  2039  		// If we're removing racefuncenter, remove its argument as well.
  2040  		if v.Args[0].Op != OpStore {
  2041  			if v.Op == OpStaticLECall {
  2042  				// there is no store, yet.
  2043  				return true
  2044  			}
  2045  			return false
  2046  		}
  2047  		mem := v.Args[0].Args[2]
  2048  		v.Args[0].reset(OpCopy)
  2049  		v.Args[0].AddArg(mem)
  2050  	}
  2051  	return true
  2052  }
  2053  
  2054  // symIsRO reports whether sym is a read-only global.
  2055  func symIsRO(sym Sym) bool {
  2056  	lsym := sym.(*obj.LSym)
  2057  	return lsym.Type == objabi.SRODATA && len(lsym.R) == 0
  2058  }
  2059  
  2060  // symIsROZero reports whether sym is a read-only global whose data contains all zeros.
  2061  func symIsROZero(sym Sym) bool {
  2062  	lsym := sym.(*obj.LSym)
  2063  	if lsym.Type != objabi.SRODATA || len(lsym.R) != 0 {
  2064  		return false
  2065  	}
  2066  	for _, b := range lsym.P {
  2067  		if b != 0 {
  2068  			return false
  2069  		}
  2070  	}
  2071  	return true
  2072  }
  2073  
  2074  // isFixedLoad returns true if the load can be resolved to fixed address or constant,
  2075  // and can be rewritten by rewriteFixedLoad.
  2076  func isFixedLoad(v *Value, sym Sym, off int64) bool {
  2077  	lsym := sym.(*obj.LSym)
  2078  	if (v.Type.IsPtrShaped() || v.Type.IsUintptr()) && lsym.Type == objabi.SRODATA {
  2079  		for _, r := range lsym.R {
  2080  			if (r.Type == objabi.R_ADDR || r.Type == objabi.R_WEAKADDR) && int64(r.Off) == off && r.Add == 0 {
  2081  				return true
  2082  			}
  2083  		}
  2084  		return false
  2085  	}
  2086  
  2087  	if ti := lsym.TypeInfo(); ti != nil {
  2088  		// Type symbols do not contain information about their fields, unlike the cases above.
  2089  		// Hand-implement field accesses.
  2090  		// TODO: can this be replaced with reflectdata.writeType and just use the code above?
  2091  
  2092  		t := ti.Type.(*types.Type)
  2093  
  2094  		for _, f := range rttype.Type.Fields() {
  2095  			if f.Offset == off && copyCompatibleType(v.Type, f.Type) {
  2096  				switch f.Sym.Name {
  2097  				case "Size_", "PtrBytes", "Hash", "Kind_", "GCData":
  2098  					return true
  2099  				default:
  2100  					// fmt.Println("unknown field", f.Sym.Name)
  2101  					return false
  2102  				}
  2103  			}
  2104  		}
  2105  
  2106  		if t.IsPtr() && off == rttype.PtrType.OffsetOf("Elem") {
  2107  			return true
  2108  		}
  2109  
  2110  		return false
  2111  	}
  2112  
  2113  	return false
  2114  }
  2115  
  2116  // rewriteFixedLoad rewrites a load to a fixed address or constant, if isFixedLoad returns true.
  2117  func rewriteFixedLoad(v *Value, sym Sym, sb *Value, off int64) *Value {
  2118  	b := v.Block
  2119  	f := b.Func
  2120  
  2121  	lsym := sym.(*obj.LSym)
  2122  	if (v.Type.IsPtrShaped() || v.Type.IsUintptr()) && lsym.Type == objabi.SRODATA {
  2123  		for _, r := range lsym.R {
  2124  			if (r.Type == objabi.R_ADDR || r.Type == objabi.R_WEAKADDR) && int64(r.Off) == off && r.Add == 0 {
  2125  				if strings.HasPrefix(r.Sym.Name, "type:") {
  2126  					// In case we're loading a type out of a dictionary, we need to record
  2127  					// that the containing function might put that type in an interface.
  2128  					// That information is currently recorded in relocations in the dictionary,
  2129  					// but if we perform this load at compile time then the dictionary
  2130  					// might be dead.
  2131  					reflectdata.MarkTypeSymUsedInInterface(r.Sym, f.fe.Func().Linksym())
  2132  				} else if strings.HasPrefix(r.Sym.Name, "go:itab") {
  2133  					// Same, but if we're using an itab we need to record that the
  2134  					// itab._type might be put in an interface.
  2135  					reflectdata.MarkTypeSymUsedInInterface(r.Sym, f.fe.Func().Linksym())
  2136  				}
  2137  				v.reset(OpAddr)
  2138  				v.Aux = symToAux(r.Sym)
  2139  				v.AddArg(sb)
  2140  				return v
  2141  			}
  2142  		}
  2143  		base.Fatalf("fixedLoad data not known for %s:%d", sym, off)
  2144  	}
  2145  
  2146  	if ti := lsym.TypeInfo(); ti != nil {
  2147  		// Type symbols do not contain information about their fields, unlike the cases above.
  2148  		// Hand-implement field accesses.
  2149  		// TODO: can this be replaced with reflectdata.writeType and just use the code above?
  2150  
  2151  		t := ti.Type.(*types.Type)
  2152  
  2153  		ptrSizedOpConst := OpConst64
  2154  		if f.Config.PtrSize == 4 {
  2155  			ptrSizedOpConst = OpConst32
  2156  		}
  2157  
  2158  		for _, f := range rttype.Type.Fields() {
  2159  			if f.Offset == off && copyCompatibleType(v.Type, f.Type) {
  2160  				switch f.Sym.Name {
  2161  				case "Size_":
  2162  					v.reset(ptrSizedOpConst)
  2163  					v.AuxInt = t.Size()
  2164  					return v
  2165  				case "PtrBytes":
  2166  					v.reset(ptrSizedOpConst)
  2167  					v.AuxInt = types.PtrDataSize(t)
  2168  					return v
  2169  				case "Hash":
  2170  					v.reset(OpConst32)
  2171  					v.AuxInt = int64(types.TypeHash(t))
  2172  					return v
  2173  				case "Kind_":
  2174  					v.reset(OpConst8)
  2175  					v.AuxInt = int64(reflectdata.ABIKindOfType(t))
  2176  					return v
  2177  				case "GCData":
  2178  					gcdata, _ := reflectdata.GCSym(t, true)
  2179  					v.reset(OpAddr)
  2180  					v.Aux = symToAux(gcdata)
  2181  					v.AddArg(sb)
  2182  					return v
  2183  				default:
  2184  					base.Fatalf("unknown field %s for fixedLoad of %s at offset %d", f.Sym.Name, lsym.Name, off)
  2185  				}
  2186  			}
  2187  		}
  2188  
  2189  		if t.IsPtr() && off == rttype.PtrType.OffsetOf("Elem") {
  2190  			elemSym := reflectdata.TypeLinksym(t.Elem())
  2191  			reflectdata.MarkTypeSymUsedInInterface(elemSym, f.fe.Func().Linksym())
  2192  			v.reset(OpAddr)
  2193  			v.Aux = symToAux(elemSym)
  2194  			v.AddArg(sb)
  2195  			return v
  2196  		}
  2197  
  2198  		base.Fatalf("fixedLoad data not known for %s:%d", sym, off)
  2199  	}
  2200  
  2201  	base.Fatalf("fixedLoad data not known for %s:%d", sym, off)
  2202  	return nil
  2203  }
  2204  
  2205  // read8 reads one byte from the read-only global sym at offset off.
  2206  func read8(sym Sym, off int64) uint8 {
  2207  	lsym := sym.(*obj.LSym)
  2208  	if off >= int64(len(lsym.P)) || off < 0 {
  2209  		// Invalid index into the global sym.
  2210  		// This can happen in dead code, so we don't want to panic.
  2211  		// Just return any value, it will eventually get ignored.
  2212  		// See issue 29215.
  2213  		return 0
  2214  	}
  2215  	return lsym.P[off]
  2216  }
  2217  
  2218  // read16 reads two bytes from the read-only global sym at offset off.
  2219  func read16(sym Sym, off int64, byteorder binary.ByteOrder) uint16 {
  2220  	lsym := sym.(*obj.LSym)
  2221  	// lsym.P is written lazily.
  2222  	// Bytes requested after the end of lsym.P are 0.
  2223  	var src []byte
  2224  	if 0 <= off && off < int64(len(lsym.P)) {
  2225  		src = lsym.P[off:]
  2226  	}
  2227  	buf := make([]byte, 2)
  2228  	copy(buf, src)
  2229  	return byteorder.Uint16(buf)
  2230  }
  2231  
  2232  // read32 reads four bytes from the read-only global sym at offset off.
  2233  func read32(sym Sym, off int64, byteorder binary.ByteOrder) uint32 {
  2234  	lsym := sym.(*obj.LSym)
  2235  	var src []byte
  2236  	if 0 <= off && off < int64(len(lsym.P)) {
  2237  		src = lsym.P[off:]
  2238  	}
  2239  	buf := make([]byte, 4)
  2240  	copy(buf, src)
  2241  	return byteorder.Uint32(buf)
  2242  }
  2243  
  2244  // read64 reads eight bytes from the read-only global sym at offset off.
  2245  func read64(sym Sym, off int64, byteorder binary.ByteOrder) uint64 {
  2246  	lsym := sym.(*obj.LSym)
  2247  	var src []byte
  2248  	if 0 <= off && off < int64(len(lsym.P)) {
  2249  		src = lsym.P[off:]
  2250  	}
  2251  	buf := make([]byte, 8)
  2252  	copy(buf, src)
  2253  	return byteorder.Uint64(buf)
  2254  }
  2255  
  2256  // sequentialAddresses reports true if it can prove that x + n == y
  2257  func sequentialAddresses(x, y *Value, n int64) bool {
  2258  	if x == y && n == 0 {
  2259  		return true
  2260  	}
  2261  	if x.Op == Op386ADDL && y.Op == Op386LEAL1 && y.AuxInt == n && y.Aux == nil &&
  2262  		(x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
  2263  			x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
  2264  		return true
  2265  	}
  2266  	if x.Op == Op386LEAL1 && y.Op == Op386LEAL1 && y.AuxInt == x.AuxInt+n && x.Aux == y.Aux &&
  2267  		(x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
  2268  			x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
  2269  		return true
  2270  	}
  2271  	if x.Op == OpAMD64ADDQ && y.Op == OpAMD64LEAQ1 && y.AuxInt == n && y.Aux == nil &&
  2272  		(x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
  2273  			x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
  2274  		return true
  2275  	}
  2276  	if x.Op == OpAMD64LEAQ1 && y.Op == OpAMD64LEAQ1 && y.AuxInt == x.AuxInt+n && x.Aux == y.Aux &&
  2277  		(x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
  2278  			x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
  2279  		return true
  2280  	}
  2281  	return false
  2282  }
  2283  
  2284  // flagConstant represents the result of a compile-time comparison.
  2285  // The sense of these flags does not necessarily represent the hardware's notion
  2286  // of a flags register - these are just a compile-time construct.
  2287  // We happen to match the semantics to those of arm/arm64.
  2288  // Note that these semantics differ from x86: the carry flag has the opposite
  2289  // sense on a subtraction!
  2290  //
  2291  //	On amd64, C=1 represents a borrow, e.g. SBB on amd64 does x - y - C.
  2292  //	On arm64, C=0 represents a borrow, e.g. SBC on arm64 does x - y - ^C.
  2293  //	 (because it does x + ^y + C).
  2294  //
  2295  // See https://en.wikipedia.org/wiki/Carry_flag#Vs._borrow_flag
  2296  type flagConstant uint8
  2297  
  2298  // N reports whether the result of an operation is negative (high bit set).
  2299  func (fc flagConstant) N() bool {
  2300  	return fc&1 != 0
  2301  }
  2302  
  2303  // Z reports whether the result of an operation is 0.
  2304  func (fc flagConstant) Z() bool {
  2305  	return fc&2 != 0
  2306  }
  2307  
  2308  // C reports whether an unsigned add overflowed (carry), or an
  2309  // unsigned subtract did not underflow (borrow).
  2310  func (fc flagConstant) C() bool {
  2311  	return fc&4 != 0
  2312  }
  2313  
  2314  // V reports whether a signed operation overflowed or underflowed.
  2315  func (fc flagConstant) V() bool {
  2316  	return fc&8 != 0
  2317  }
  2318  
  2319  func (fc flagConstant) eq() bool {
  2320  	return fc.Z()
  2321  }
  2322  func (fc flagConstant) ne() bool {
  2323  	return !fc.Z()
  2324  }
  2325  func (fc flagConstant) lt() bool {
  2326  	return fc.N() != fc.V()
  2327  }
  2328  func (fc flagConstant) le() bool {
  2329  	return fc.Z() || fc.lt()
  2330  }
  2331  func (fc flagConstant) gt() bool {
  2332  	return !fc.Z() && fc.ge()
  2333  }
  2334  func (fc flagConstant) ge() bool {
  2335  	return fc.N() == fc.V()
  2336  }
  2337  func (fc flagConstant) ult() bool {
  2338  	return !fc.C()
  2339  }
  2340  func (fc flagConstant) ule() bool {
  2341  	return fc.Z() || fc.ult()
  2342  }
  2343  func (fc flagConstant) ugt() bool {
  2344  	return !fc.Z() && fc.uge()
  2345  }
  2346  func (fc flagConstant) uge() bool {
  2347  	return fc.C()
  2348  }
  2349  
  2350  func (fc flagConstant) ltNoov() bool {
  2351  	return fc.lt() && !fc.V()
  2352  }
  2353  func (fc flagConstant) leNoov() bool {
  2354  	return fc.le() && !fc.V()
  2355  }
  2356  func (fc flagConstant) gtNoov() bool {
  2357  	return fc.gt() && !fc.V()
  2358  }
  2359  func (fc flagConstant) geNoov() bool {
  2360  	return fc.ge() && !fc.V()
  2361  }
  2362  
  2363  func (fc flagConstant) String() string {
  2364  	return fmt.Sprintf("N=%v,Z=%v,C=%v,V=%v", fc.N(), fc.Z(), fc.C(), fc.V())
  2365  }
  2366  
  2367  type flagConstantBuilder struct {
  2368  	N bool
  2369  	Z bool
  2370  	C bool
  2371  	V bool
  2372  }
  2373  
  2374  func (fcs flagConstantBuilder) encode() flagConstant {
  2375  	var fc flagConstant
  2376  	if fcs.N {
  2377  		fc |= 1
  2378  	}
  2379  	if fcs.Z {
  2380  		fc |= 2
  2381  	}
  2382  	if fcs.C {
  2383  		fc |= 4
  2384  	}
  2385  	if fcs.V {
  2386  		fc |= 8
  2387  	}
  2388  	return fc
  2389  }
  2390  
  2391  // Note: addFlags(x,y) != subFlags(x,-y) in some situations:
  2392  //  - the results of the C flag are different
  2393  //  - the results of the V flag when y==minint are different
  2394  
  2395  // addFlags64 returns the flags that would be set from computing x+y.
  2396  func addFlags64(x, y int64) flagConstant {
  2397  	var fcb flagConstantBuilder
  2398  	fcb.Z = x+y == 0
  2399  	fcb.N = x+y < 0
  2400  	fcb.C = uint64(x+y) < uint64(x)
  2401  	fcb.V = x >= 0 && y >= 0 && x+y < 0 || x < 0 && y < 0 && x+y >= 0
  2402  	return fcb.encode()
  2403  }
  2404  
  2405  // subFlags64 returns the flags that would be set from computing x-y.
  2406  func subFlags64(x, y int64) flagConstant {
  2407  	var fcb flagConstantBuilder
  2408  	fcb.Z = x-y == 0
  2409  	fcb.N = x-y < 0
  2410  	fcb.C = uint64(y) <= uint64(x) // This code follows the arm carry flag model.
  2411  	fcb.V = x >= 0 && y < 0 && x-y < 0 || x < 0 && y >= 0 && x-y >= 0
  2412  	return fcb.encode()
  2413  }
  2414  
  2415  // addFlags32 returns the flags that would be set from computing x+y.
  2416  func addFlags32(x, y int32) flagConstant {
  2417  	var fcb flagConstantBuilder
  2418  	fcb.Z = x+y == 0
  2419  	fcb.N = x+y < 0
  2420  	fcb.C = uint32(x+y) < uint32(x)
  2421  	fcb.V = x >= 0 && y >= 0 && x+y < 0 || x < 0 && y < 0 && x+y >= 0
  2422  	return fcb.encode()
  2423  }
  2424  
  2425  // subFlags32 returns the flags that would be set from computing x-y.
  2426  func subFlags32(x, y int32) flagConstant {
  2427  	var fcb flagConstantBuilder
  2428  	fcb.Z = x-y == 0
  2429  	fcb.N = x-y < 0
  2430  	fcb.C = uint32(y) <= uint32(x) // This code follows the arm carry flag model.
  2431  	fcb.V = x >= 0 && y < 0 && x-y < 0 || x < 0 && y >= 0 && x-y >= 0
  2432  	return fcb.encode()
  2433  }
  2434  
  2435  // logicFlags64 returns flags set to the sign/zeroness of x.
  2436  // C and V are set to false.
  2437  func logicFlags64(x int64) flagConstant {
  2438  	var fcb flagConstantBuilder
  2439  	fcb.Z = x == 0
  2440  	fcb.N = x < 0
  2441  	return fcb.encode()
  2442  }
  2443  
  2444  // logicFlags32 returns flags set to the sign/zeroness of x.
  2445  // C and V are set to false.
  2446  func logicFlags32(x int32) flagConstant {
  2447  	var fcb flagConstantBuilder
  2448  	fcb.Z = x == 0
  2449  	fcb.N = x < 0
  2450  	return fcb.encode()
  2451  }
  2452  
  2453  func makeJumpTableSym(b *Block) *obj.LSym {
  2454  	s := base.Ctxt.Lookup(fmt.Sprintf("%s.jump%d", b.Func.fe.Func().LSym.Name, b.ID))
  2455  	// The jump table symbol is accessed only from the function symbol.
  2456  	s.Set(obj.AttrStatic, true)
  2457  	return s
  2458  }
  2459  
  2460  // canRotate reports whether the architecture supports
  2461  // rotates of integer registers with the given number of bits.
  2462  func canRotate(c *Config, bits int64) bool {
  2463  	if bits > c.PtrSize*8 {
  2464  		// Don't rewrite to rotates bigger than the machine word.
  2465  		return false
  2466  	}
  2467  	switch c.arch {
  2468  	case "386", "amd64", "arm64", "loong64", "riscv64":
  2469  		return true
  2470  	case "arm", "s390x", "ppc64", "ppc64le", "wasm":
  2471  		return bits >= 32
  2472  	default:
  2473  		return false
  2474  	}
  2475  }
  2476  
  2477  // isARM64bitcon reports whether a constant can be encoded into a logical instruction.
  2478  func isARM64bitcon(x uint64) bool {
  2479  	if x == 1<<64-1 || x == 0 {
  2480  		return false
  2481  	}
  2482  	// determine the period and sign-extend a unit to 64 bits
  2483  	switch {
  2484  	case x != x>>32|x<<32:
  2485  		// period is 64
  2486  		// nothing to do
  2487  	case x != x>>16|x<<48:
  2488  		// period is 32
  2489  		x = uint64(int64(int32(x)))
  2490  	case x != x>>8|x<<56:
  2491  		// period is 16
  2492  		x = uint64(int64(int16(x)))
  2493  	case x != x>>4|x<<60:
  2494  		// period is 8
  2495  		x = uint64(int64(int8(x)))
  2496  	default:
  2497  		// period is 4 or 2, always true
  2498  		// 0001, 0010, 0100, 1000 -- 0001 rotate
  2499  		// 0011, 0110, 1100, 1001 -- 0011 rotate
  2500  		// 0111, 1011, 1101, 1110 -- 0111 rotate
  2501  		// 0101, 1010             -- 01   rotate, repeat
  2502  		return true
  2503  	}
  2504  	return sequenceOfOnes(x) || sequenceOfOnes(^x)
  2505  }
  2506  
  2507  // sequenceOfOnes tests whether a constant is a sequence of ones in binary, with leading and trailing zeros.
  2508  func sequenceOfOnes(x uint64) bool {
  2509  	y := x & -x // lowest set bit of x. x is good iff x+y is a power of 2
  2510  	y += x
  2511  	return (y-1)&y == 0
  2512  }
  2513  
  2514  // isARM64addcon reports whether x can be encoded as the immediate value in an ADD or SUB instruction.
  2515  func isARM64addcon(v int64) bool {
  2516  	/* uimm12 or uimm24? */
  2517  	if v < 0 {
  2518  		return false
  2519  	}
  2520  	if (v & 0xFFF) == 0 {
  2521  		v >>= 12
  2522  	}
  2523  	return v <= 0xFFF
  2524  }
  2525  
  2526  // setPos sets the position of v to pos, then returns true.
  2527  // Useful for setting the result of a rewrite's position to
  2528  // something other than the default.
  2529  func setPos(v *Value, pos src.XPos) bool {
  2530  	v.Pos = pos
  2531  	return true
  2532  }
  2533  
  2534  // isNonNegative reports whether v is known to be greater or equal to zero.
  2535  // Note that this is pretty simplistic. The prove pass generates more detailed
  2536  // nonnegative information about values.
  2537  func isNonNegative(v *Value) bool {
  2538  	if !v.Type.IsInteger() {
  2539  		v.Fatalf("isNonNegative bad type: %v", v.Type)
  2540  	}
  2541  	// TODO: return true if !v.Type.IsSigned()
  2542  	// SSA isn't type-safe enough to do that now (issue 37753).
  2543  	// The checks below depend only on the pattern of bits.
  2544  
  2545  	switch v.Op {
  2546  	case OpConst64:
  2547  		return v.AuxInt >= 0
  2548  
  2549  	case OpConst32:
  2550  		return int32(v.AuxInt) >= 0
  2551  
  2552  	case OpConst16:
  2553  		return int16(v.AuxInt) >= 0
  2554  
  2555  	case OpConst8:
  2556  		return int8(v.AuxInt) >= 0
  2557  
  2558  	case OpStringLen, OpSliceLen, OpSliceCap,
  2559  		OpZeroExt8to64, OpZeroExt16to64, OpZeroExt32to64,
  2560  		OpZeroExt8to32, OpZeroExt16to32, OpZeroExt8to16,
  2561  		OpCtz64, OpCtz32, OpCtz16, OpCtz8,
  2562  		OpCtz64NonZero, OpCtz32NonZero, OpCtz16NonZero, OpCtz8NonZero,
  2563  		OpBitLen64, OpBitLen32, OpBitLen16, OpBitLen8:
  2564  		return true
  2565  
  2566  	case OpRsh64Ux64, OpRsh32Ux64:
  2567  		by := v.Args[1]
  2568  		return by.Op == OpConst64 && by.AuxInt > 0
  2569  
  2570  	case OpRsh64x64, OpRsh32x64, OpRsh8x64, OpRsh16x64, OpRsh32x32, OpRsh64x32,
  2571  		OpSignExt32to64, OpSignExt16to64, OpSignExt8to64, OpSignExt16to32, OpSignExt8to32:
  2572  		return isNonNegative(v.Args[0])
  2573  
  2574  	case OpAnd64, OpAnd32, OpAnd16, OpAnd8:
  2575  		return isNonNegative(v.Args[0]) || isNonNegative(v.Args[1])
  2576  
  2577  	case OpMod64, OpMod32, OpMod16, OpMod8,
  2578  		OpDiv64, OpDiv32, OpDiv16, OpDiv8,
  2579  		OpOr64, OpOr32, OpOr16, OpOr8,
  2580  		OpXor64, OpXor32, OpXor16, OpXor8:
  2581  		return isNonNegative(v.Args[0]) && isNonNegative(v.Args[1])
  2582  
  2583  		// We could handle OpPhi here, but the improvements from doing
  2584  		// so are very minor, and it is neither simple nor cheap.
  2585  	}
  2586  	return false
  2587  }
  2588  
  2589  func rewriteStructLoad(v *Value) *Value {
  2590  	b := v.Block
  2591  	ptr := v.Args[0]
  2592  	mem := v.Args[1]
  2593  
  2594  	t := v.Type
  2595  	args := make([]*Value, t.NumFields())
  2596  	for i := range args {
  2597  		ft := t.FieldType(i)
  2598  		addr := b.NewValue1I(v.Pos, OpOffPtr, ft.PtrTo(), t.FieldOff(i), ptr)
  2599  		args[i] = b.NewValue2(v.Pos, OpLoad, ft, addr, mem)
  2600  	}
  2601  
  2602  	v.reset(OpStructMake)
  2603  	v.AddArgs(args...)
  2604  	return v
  2605  }
  2606  
  2607  func rewriteStructStore(v *Value) *Value {
  2608  	b := v.Block
  2609  	dst := v.Args[0]
  2610  	x := v.Args[1]
  2611  	if x.Op != OpStructMake {
  2612  		base.Fatalf("invalid struct store: %v", x)
  2613  	}
  2614  	mem := v.Args[2]
  2615  
  2616  	t := x.Type
  2617  	for i, arg := range x.Args {
  2618  		ft := t.FieldType(i)
  2619  
  2620  		addr := b.NewValue1I(v.Pos, OpOffPtr, ft.PtrTo(), t.FieldOff(i), dst)
  2621  		mem = b.NewValue3A(v.Pos, OpStore, types.TypeMem, typeToAux(ft), addr, arg, mem)
  2622  	}
  2623  
  2624  	return mem
  2625  }
  2626  
  2627  // isDirectAndComparableType reports whether v represents a type
  2628  // (a *runtime._type) whose value is stored directly in an
  2629  // interface (i.e., is pointer or pointer-like) and is comparable.
  2630  func isDirectAndComparableType(v *Value) bool {
  2631  	return isDirectAndComparableType1(v)
  2632  }
  2633  
  2634  // v is a type
  2635  func isDirectAndComparableType1(v *Value) bool {
  2636  	switch v.Op {
  2637  	case OpITab:
  2638  		return isDirectAndComparableType2(v.Args[0])
  2639  	case OpAddr:
  2640  		lsym := v.Aux.(*obj.LSym)
  2641  		if ti := lsym.TypeInfo(); ti != nil {
  2642  			t := ti.Type.(*types.Type)
  2643  			return types.IsDirectIface(t) && types.IsComparable(t)
  2644  		}
  2645  	}
  2646  	return false
  2647  }
  2648  
  2649  // v is an empty interface
  2650  func isDirectAndComparableType2(v *Value) bool {
  2651  	switch v.Op {
  2652  	case OpIMake:
  2653  		return isDirectAndComparableType1(v.Args[0])
  2654  	}
  2655  	return false
  2656  }
  2657  
  2658  // isDirectAndComparableIface reports whether v represents an itab
  2659  // (a *runtime._itab) for a type whose value is stored directly
  2660  // in an interface (i.e., is pointer or pointer-like) and is comparable.
  2661  func isDirectAndComparableIface(v *Value) bool {
  2662  	return isDirectAndComparableIface1(v, 9)
  2663  }
  2664  
  2665  // v is an itab
  2666  func isDirectAndComparableIface1(v *Value, depth int) bool {
  2667  	if depth == 0 {
  2668  		return false
  2669  	}
  2670  	switch v.Op {
  2671  	case OpITab:
  2672  		return isDirectAndComparableIface2(v.Args[0], depth-1)
  2673  	case OpAddr:
  2674  		lsym := v.Aux.(*obj.LSym)
  2675  		if ii := lsym.ItabInfo(); ii != nil {
  2676  			t := ii.Type.(*types.Type)
  2677  			return types.IsDirectIface(t) && types.IsComparable(t)
  2678  		}
  2679  	case OpConstNil:
  2680  		// We can treat this as direct, because if the itab is
  2681  		// nil, the data field must be nil also.
  2682  		return true
  2683  	}
  2684  	return false
  2685  }
  2686  
  2687  // v is an interface
  2688  func isDirectAndComparableIface2(v *Value, depth int) bool {
  2689  	if depth == 0 {
  2690  		return false
  2691  	}
  2692  	switch v.Op {
  2693  	case OpIMake:
  2694  		return isDirectAndComparableIface1(v.Args[0], depth-1)
  2695  	case OpPhi:
  2696  		for _, a := range v.Args {
  2697  			if !isDirectAndComparableIface2(a, depth-1) {
  2698  				return false
  2699  			}
  2700  		}
  2701  		return true
  2702  	}
  2703  	return false
  2704  }
  2705  
  2706  func bitsAdd64(x, y, carry int64) (r struct{ sum, carry int64 }) {
  2707  	s, c := bits.Add64(uint64(x), uint64(y), uint64(carry))
  2708  	r.sum, r.carry = int64(s), int64(c)
  2709  	return
  2710  }
  2711  
  2712  func bitsMulU64(x, y int64) (r struct{ hi, lo int64 }) {
  2713  	hi, lo := bits.Mul64(uint64(x), uint64(y))
  2714  	r.hi, r.lo = int64(hi), int64(lo)
  2715  	return
  2716  }
  2717  func bitsMulU32(x, y int32) (r struct{ hi, lo int32 }) {
  2718  	hi, lo := bits.Mul32(uint32(x), uint32(y))
  2719  	r.hi, r.lo = int32(hi), int32(lo)
  2720  	return
  2721  }
  2722  
  2723  // flagify rewrites v which is (X ...) to (Select0 (Xflags ...)).
  2724  func flagify(v *Value) bool {
  2725  	var flagVersion Op
  2726  	switch v.Op {
  2727  	case OpAMD64ADDQconst:
  2728  		flagVersion = OpAMD64ADDQconstflags
  2729  	case OpAMD64ADDLconst:
  2730  		flagVersion = OpAMD64ADDLconstflags
  2731  	default:
  2732  		base.Fatalf("can't flagify op %s", v.Op)
  2733  	}
  2734  	inner := v.copyInto(v.Block)
  2735  	inner.Op = flagVersion
  2736  	inner.Type = types.NewTuple(v.Type, types.TypeFlags)
  2737  	v.reset(OpSelect0)
  2738  	v.AddArg(inner)
  2739  	return true
  2740  }
  2741  
  2742  // PanicBoundsC contains a constant for a bounds failure.
  2743  type PanicBoundsC struct {
  2744  	C int64
  2745  }
  2746  
  2747  // PanicBoundsCC contains 2 constants for a bounds failure.
  2748  type PanicBoundsCC struct {
  2749  	Cx int64
  2750  	Cy int64
  2751  }
  2752  
  2753  func (p PanicBoundsC) CanBeAnSSAAux() {
  2754  }
  2755  func (p PanicBoundsCC) CanBeAnSSAAux() {
  2756  }
  2757  
  2758  func auxToPanicBoundsC(i Aux) PanicBoundsC {
  2759  	return i.(PanicBoundsC)
  2760  }
  2761  func auxToPanicBoundsCC(i Aux) PanicBoundsCC {
  2762  	return i.(PanicBoundsCC)
  2763  }
  2764  func panicBoundsCToAux(p PanicBoundsC) Aux {
  2765  	return p
  2766  }
  2767  func panicBoundsCCToAux(p PanicBoundsCC) Aux {
  2768  	return p
  2769  }
  2770  
  2771  func isDictArgSym(sym Sym) bool {
  2772  	return sym.(*ir.Name).Sym().Name == typecheck.LocalDictName
  2773  }
  2774  

View as plain text