Source file src/cmd/compile/internal/ssa/rewrite.go

     1  // Copyright 2015 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package ssa
     6  
     7  import (
     8  	"cmd/compile/internal/base"
     9  	"cmd/compile/internal/logopt"
    10  	"cmd/compile/internal/reflectdata"
    11  	"cmd/compile/internal/types"
    12  	"cmd/internal/obj"
    13  	"cmd/internal/obj/s390x"
    14  	"cmd/internal/objabi"
    15  	"cmd/internal/src"
    16  	"encoding/binary"
    17  	"fmt"
    18  	"internal/buildcfg"
    19  	"io"
    20  	"math"
    21  	"math/bits"
    22  	"os"
    23  	"path/filepath"
    24  	"strings"
    25  )
    26  
    27  type deadValueChoice bool
    28  
    29  const (
    30  	leaveDeadValues  deadValueChoice = false
    31  	removeDeadValues                 = true
    32  )
    33  
    34  // deadcode indicates whether rewrite should try to remove any values that become dead.
    35  func applyRewrite(f *Func, rb blockRewriter, rv valueRewriter, deadcode deadValueChoice) {
    36  	// repeat rewrites until we find no more rewrites
    37  	pendingLines := f.cachedLineStarts // Holds statement boundaries that need to be moved to a new value/block
    38  	pendingLines.clear()
    39  	debug := f.pass.debug
    40  	if debug > 1 {
    41  		fmt.Printf("%s: rewriting for %s\n", f.pass.name, f.Name)
    42  	}
    43  	// if the number of rewrite iterations reaches itersLimit we will
    44  	// at that point turn on cycle detection. Instead of a fixed limit,
    45  	// size the limit according to func size to allow for cases such
    46  	// as the one in issue #66773.
    47  	itersLimit := f.NumBlocks()
    48  	if itersLimit < 20 {
    49  		itersLimit = 20
    50  	}
    51  	var iters int
    52  	var states map[string]bool
    53  	for {
    54  		change := false
    55  		deadChange := false
    56  		for _, b := range f.Blocks {
    57  			var b0 *Block
    58  			if debug > 1 {
    59  				b0 = new(Block)
    60  				*b0 = *b
    61  				b0.Succs = append([]Edge{}, b.Succs...) // make a new copy, not aliasing
    62  			}
    63  			for i, c := range b.ControlValues() {
    64  				for c.Op == OpCopy {
    65  					c = c.Args[0]
    66  					b.ReplaceControl(i, c)
    67  				}
    68  			}
    69  			if rb(b) {
    70  				change = true
    71  				if debug > 1 {
    72  					fmt.Printf("rewriting %s  ->  %s\n", b0.LongString(), b.LongString())
    73  				}
    74  			}
    75  			for j, v := range b.Values {
    76  				var v0 *Value
    77  				if debug > 1 {
    78  					v0 = new(Value)
    79  					*v0 = *v
    80  					v0.Args = append([]*Value{}, v.Args...) // make a new copy, not aliasing
    81  				}
    82  				if v.Uses == 0 && v.removeable() {
    83  					if v.Op != OpInvalid && deadcode == removeDeadValues {
    84  						// Reset any values that are now unused, so that we decrement
    85  						// the use count of all of its arguments.
    86  						// Not quite a deadcode pass, because it does not handle cycles.
    87  						// But it should help Uses==1 rules to fire.
    88  						v.reset(OpInvalid)
    89  						deadChange = true
    90  					}
    91  					// No point rewriting values which aren't used.
    92  					continue
    93  				}
    94  
    95  				vchange := phielimValue(v)
    96  				if vchange && debug > 1 {
    97  					fmt.Printf("rewriting %s  ->  %s\n", v0.LongString(), v.LongString())
    98  				}
    99  
   100  				// Eliminate copy inputs.
   101  				// If any copy input becomes unused, mark it
   102  				// as invalid and discard its argument. Repeat
   103  				// recursively on the discarded argument.
   104  				// This phase helps remove phantom "dead copy" uses
   105  				// of a value so that a x.Uses==1 rule condition
   106  				// fires reliably.
   107  				for i, a := range v.Args {
   108  					if a.Op != OpCopy {
   109  						continue
   110  					}
   111  					aa := copySource(a)
   112  					v.SetArg(i, aa)
   113  					// If a, a copy, has a line boundary indicator, attempt to find a new value
   114  					// to hold it.  The first candidate is the value that will replace a (aa),
   115  					// if it shares the same block and line and is eligible.
   116  					// The second option is v, which has a as an input.  Because aa is earlier in
   117  					// the data flow, it is the better choice.
   118  					if a.Pos.IsStmt() == src.PosIsStmt {
   119  						if aa.Block == a.Block && aa.Pos.Line() == a.Pos.Line() && aa.Pos.IsStmt() != src.PosNotStmt {
   120  							aa.Pos = aa.Pos.WithIsStmt()
   121  						} else if v.Block == a.Block && v.Pos.Line() == a.Pos.Line() && v.Pos.IsStmt() != src.PosNotStmt {
   122  							v.Pos = v.Pos.WithIsStmt()
   123  						} else {
   124  							// Record the lost line and look for a new home after all rewrites are complete.
   125  							// TODO: it's possible (in FOR loops, in particular) for statement boundaries for the same
   126  							// line to appear in more than one block, but only one block is stored, so if both end
   127  							// up here, then one will be lost.
   128  							pendingLines.set(a.Pos, int32(a.Block.ID))
   129  						}
   130  						a.Pos = a.Pos.WithNotStmt()
   131  					}
   132  					vchange = true
   133  					for a.Uses == 0 {
   134  						b := a.Args[0]
   135  						a.reset(OpInvalid)
   136  						a = b
   137  					}
   138  				}
   139  				if vchange && debug > 1 {
   140  					fmt.Printf("rewriting %s  ->  %s\n", v0.LongString(), v.LongString())
   141  				}
   142  
   143  				// apply rewrite function
   144  				if rv(v) {
   145  					vchange = true
   146  					// If value changed to a poor choice for a statement boundary, move the boundary
   147  					if v.Pos.IsStmt() == src.PosIsStmt {
   148  						if k := nextGoodStatementIndex(v, j, b); k != j {
   149  							v.Pos = v.Pos.WithNotStmt()
   150  							b.Values[k].Pos = b.Values[k].Pos.WithIsStmt()
   151  						}
   152  					}
   153  				}
   154  
   155  				change = change || vchange
   156  				if vchange && debug > 1 {
   157  					fmt.Printf("rewriting %s  ->  %s\n", v0.LongString(), v.LongString())
   158  				}
   159  			}
   160  		}
   161  		if !change && !deadChange {
   162  			break
   163  		}
   164  		iters++
   165  		if (iters > itersLimit || debug >= 2) && change {
   166  			// We've done a suspiciously large number of rewrites (or we're in debug mode).
   167  			// As of Sep 2021, 90% of rewrites complete in 4 iterations or fewer
   168  			// and the maximum value encountered during make.bash is 12.
   169  			// Start checking for cycles. (This is too expensive to do routinely.)
   170  			// Note: we avoid this path for deadChange-only iterations, to fix #51639.
   171  			if states == nil {
   172  				states = make(map[string]bool)
   173  			}
   174  			h := f.rewriteHash()
   175  			if _, ok := states[h]; ok {
   176  				// We've found a cycle.
   177  				// To diagnose it, set debug to 2 and start again,
   178  				// so that we'll print all rules applied until we complete another cycle.
   179  				// If debug is already >= 2, we've already done that, so it's time to crash.
   180  				if debug < 2 {
   181  					debug = 2
   182  					states = make(map[string]bool)
   183  				} else {
   184  					f.Fatalf("rewrite cycle detected")
   185  				}
   186  			}
   187  			states[h] = true
   188  		}
   189  	}
   190  	// remove clobbered values
   191  	for _, b := range f.Blocks {
   192  		j := 0
   193  		for i, v := range b.Values {
   194  			vl := v.Pos
   195  			if v.Op == OpInvalid {
   196  				if v.Pos.IsStmt() == src.PosIsStmt {
   197  					pendingLines.set(vl, int32(b.ID))
   198  				}
   199  				f.freeValue(v)
   200  				continue
   201  			}
   202  			if v.Pos.IsStmt() != src.PosNotStmt && !notStmtBoundary(v.Op) && pendingLines.get(vl) == int32(b.ID) {
   203  				pendingLines.remove(vl)
   204  				v.Pos = v.Pos.WithIsStmt()
   205  			}
   206  			if i != j {
   207  				b.Values[j] = v
   208  			}
   209  			j++
   210  		}
   211  		if pendingLines.get(b.Pos) == int32(b.ID) {
   212  			b.Pos = b.Pos.WithIsStmt()
   213  			pendingLines.remove(b.Pos)
   214  		}
   215  		b.truncateValues(j)
   216  	}
   217  }
   218  
   219  // Common functions called from rewriting rules
   220  
   221  func is64BitFloat(t *types.Type) bool {
   222  	return t.Size() == 8 && t.IsFloat()
   223  }
   224  
   225  func is32BitFloat(t *types.Type) bool {
   226  	return t.Size() == 4 && t.IsFloat()
   227  }
   228  
   229  func is64BitInt(t *types.Type) bool {
   230  	return t.Size() == 8 && t.IsInteger()
   231  }
   232  
   233  func is32BitInt(t *types.Type) bool {
   234  	return t.Size() == 4 && t.IsInteger()
   235  }
   236  
   237  func is16BitInt(t *types.Type) bool {
   238  	return t.Size() == 2 && t.IsInteger()
   239  }
   240  
   241  func is8BitInt(t *types.Type) bool {
   242  	return t.Size() == 1 && t.IsInteger()
   243  }
   244  
   245  func isPtr(t *types.Type) bool {
   246  	return t.IsPtrShaped()
   247  }
   248  
   249  func copyCompatibleType(t1, t2 *types.Type) bool {
   250  	if t1.Size() != t2.Size() {
   251  		return false
   252  	}
   253  	if t1.IsInteger() {
   254  		return t2.IsInteger()
   255  	}
   256  	if isPtr(t1) {
   257  		return isPtr(t2)
   258  	}
   259  	return t1.Compare(t2) == types.CMPeq
   260  }
   261  
   262  // mergeSym merges two symbolic offsets. There is no real merging of
   263  // offsets, we just pick the non-nil one.
   264  func mergeSym(x, y Sym) Sym {
   265  	if x == nil {
   266  		return y
   267  	}
   268  	if y == nil {
   269  		return x
   270  	}
   271  	panic(fmt.Sprintf("mergeSym with two non-nil syms %v %v", x, y))
   272  }
   273  
   274  func canMergeSym(x, y Sym) bool {
   275  	return x == nil || y == nil
   276  }
   277  
   278  // canMergeLoadClobber reports whether the load can be merged into target without
   279  // invalidating the schedule.
   280  // It also checks that the other non-load argument x is something we
   281  // are ok with clobbering.
   282  func canMergeLoadClobber(target, load, x *Value) bool {
   283  	// The register containing x is going to get clobbered.
   284  	// Don't merge if we still need the value of x.
   285  	// We don't have liveness information here, but we can
   286  	// approximate x dying with:
   287  	//  1) target is x's only use.
   288  	//  2) target is not in a deeper loop than x.
   289  	if x.Uses != 1 {
   290  		return false
   291  	}
   292  	loopnest := x.Block.Func.loopnest()
   293  	loopnest.calculateDepths()
   294  	if loopnest.depth(target.Block.ID) > loopnest.depth(x.Block.ID) {
   295  		return false
   296  	}
   297  	return canMergeLoad(target, load)
   298  }
   299  
   300  // canMergeLoad reports whether the load can be merged into target without
   301  // invalidating the schedule.
   302  func canMergeLoad(target, load *Value) bool {
   303  	if target.Block.ID != load.Block.ID {
   304  		// If the load is in a different block do not merge it.
   305  		return false
   306  	}
   307  
   308  	// We can't merge the load into the target if the load
   309  	// has more than one use.
   310  	if load.Uses != 1 {
   311  		return false
   312  	}
   313  
   314  	mem := load.MemoryArg()
   315  
   316  	// We need the load's memory arg to still be alive at target. That
   317  	// can't be the case if one of target's args depends on a memory
   318  	// state that is a successor of load's memory arg.
   319  	//
   320  	// For example, it would be invalid to merge load into target in
   321  	// the following situation because newmem has killed oldmem
   322  	// before target is reached:
   323  	//     load = read ... oldmem
   324  	//   newmem = write ... oldmem
   325  	//     arg0 = read ... newmem
   326  	//   target = add arg0 load
   327  	//
   328  	// If the argument comes from a different block then we can exclude
   329  	// it immediately because it must dominate load (which is in the
   330  	// same block as target).
   331  	var args []*Value
   332  	for _, a := range target.Args {
   333  		if a != load && a.Block.ID == target.Block.ID {
   334  			args = append(args, a)
   335  		}
   336  	}
   337  
   338  	// memPreds contains memory states known to be predecessors of load's
   339  	// memory state. It is lazily initialized.
   340  	var memPreds map[*Value]bool
   341  	for i := 0; len(args) > 0; i++ {
   342  		const limit = 100
   343  		if i >= limit {
   344  			// Give up if we have done a lot of iterations.
   345  			return false
   346  		}
   347  		v := args[len(args)-1]
   348  		args = args[:len(args)-1]
   349  		if target.Block.ID != v.Block.ID {
   350  			// Since target and load are in the same block
   351  			// we can stop searching when we leave the block.
   352  			continue
   353  		}
   354  		if v.Op == OpPhi {
   355  			// A Phi implies we have reached the top of the block.
   356  			// The memory phi, if it exists, is always
   357  			// the first logical store in the block.
   358  			continue
   359  		}
   360  		if v.Type.IsTuple() && v.Type.FieldType(1).IsMemory() {
   361  			// We could handle this situation however it is likely
   362  			// to be very rare.
   363  			return false
   364  		}
   365  		if v.Op.SymEffect()&SymAddr != 0 {
   366  			// This case prevents an operation that calculates the
   367  			// address of a local variable from being forced to schedule
   368  			// before its corresponding VarDef.
   369  			// See issue 28445.
   370  			//   v1 = LOAD ...
   371  			//   v2 = VARDEF
   372  			//   v3 = LEAQ
   373  			//   v4 = CMPQ v1 v3
   374  			// We don't want to combine the CMPQ with the load, because
   375  			// that would force the CMPQ to schedule before the VARDEF, which
   376  			// in turn requires the LEAQ to schedule before the VARDEF.
   377  			return false
   378  		}
   379  		if v.Type.IsMemory() {
   380  			if memPreds == nil {
   381  				// Initialise a map containing memory states
   382  				// known to be predecessors of load's memory
   383  				// state.
   384  				memPreds = make(map[*Value]bool)
   385  				m := mem
   386  				const limit = 50
   387  				for i := 0; i < limit; i++ {
   388  					if m.Op == OpPhi {
   389  						// The memory phi, if it exists, is always
   390  						// the first logical store in the block.
   391  						break
   392  					}
   393  					if m.Block.ID != target.Block.ID {
   394  						break
   395  					}
   396  					if !m.Type.IsMemory() {
   397  						break
   398  					}
   399  					memPreds[m] = true
   400  					if len(m.Args) == 0 {
   401  						break
   402  					}
   403  					m = m.MemoryArg()
   404  				}
   405  			}
   406  
   407  			// We can merge if v is a predecessor of mem.
   408  			//
   409  			// For example, we can merge load into target in the
   410  			// following scenario:
   411  			//      x = read ... v
   412  			//    mem = write ... v
   413  			//   load = read ... mem
   414  			// target = add x load
   415  			if memPreds[v] {
   416  				continue
   417  			}
   418  			return false
   419  		}
   420  		if len(v.Args) > 0 && v.Args[len(v.Args)-1] == mem {
   421  			// If v takes mem as an input then we know mem
   422  			// is valid at this point.
   423  			continue
   424  		}
   425  		for _, a := range v.Args {
   426  			if target.Block.ID == a.Block.ID {
   427  				args = append(args, a)
   428  			}
   429  		}
   430  	}
   431  
   432  	return true
   433  }
   434  
   435  // isSameCall reports whether aux is the same as the given named symbol.
   436  func isSameCall(aux Aux, name string) bool {
   437  	fn := aux.(*AuxCall).Fn
   438  	return fn != nil && fn.String() == name
   439  }
   440  
   441  // canLoadUnaligned reports if the architecture supports unaligned load operations.
   442  func canLoadUnaligned(c *Config) bool {
   443  	return c.ctxt.Arch.Alignment == 1
   444  }
   445  
   446  // nlzX returns the number of leading zeros.
   447  func nlz64(x int64) int { return bits.LeadingZeros64(uint64(x)) }
   448  func nlz32(x int32) int { return bits.LeadingZeros32(uint32(x)) }
   449  func nlz16(x int16) int { return bits.LeadingZeros16(uint16(x)) }
   450  func nlz8(x int8) int   { return bits.LeadingZeros8(uint8(x)) }
   451  
   452  // ntzX returns the number of trailing zeros.
   453  func ntz64(x int64) int { return bits.TrailingZeros64(uint64(x)) }
   454  func ntz32(x int32) int { return bits.TrailingZeros32(uint32(x)) }
   455  func ntz16(x int16) int { return bits.TrailingZeros16(uint16(x)) }
   456  func ntz8(x int8) int   { return bits.TrailingZeros8(uint8(x)) }
   457  
   458  func oneBit(x int64) bool   { return x&(x-1) == 0 && x != 0 }
   459  func oneBit8(x int8) bool   { return x&(x-1) == 0 && x != 0 }
   460  func oneBit16(x int16) bool { return x&(x-1) == 0 && x != 0 }
   461  func oneBit32(x int32) bool { return x&(x-1) == 0 && x != 0 }
   462  func oneBit64(x int64) bool { return x&(x-1) == 0 && x != 0 }
   463  
   464  // nto returns the number of trailing ones.
   465  func nto(x int64) int64 {
   466  	return int64(ntz64(^x))
   467  }
   468  
   469  // logX returns logarithm of n base 2.
   470  // n must be a positive power of 2 (isPowerOfTwoX returns true).
   471  func log8(n int8) int64 {
   472  	return int64(bits.Len8(uint8(n))) - 1
   473  }
   474  func log16(n int16) int64 {
   475  	return int64(bits.Len16(uint16(n))) - 1
   476  }
   477  func log32(n int32) int64 {
   478  	return int64(bits.Len32(uint32(n))) - 1
   479  }
   480  func log64(n int64) int64 {
   481  	return int64(bits.Len64(uint64(n))) - 1
   482  }
   483  
   484  // log2uint32 returns logarithm in base 2 of uint32(n), with log2(0) = -1.
   485  // Rounds down.
   486  func log2uint32(n int64) int64 {
   487  	return int64(bits.Len32(uint32(n))) - 1
   488  }
   489  
   490  // isPowerOfTwoX functions report whether n is a power of 2.
   491  func isPowerOfTwo[T int8 | int16 | int32 | int64](n T) bool {
   492  	return n > 0 && n&(n-1) == 0
   493  }
   494  
   495  // isUint64PowerOfTwo reports whether uint64(n) is a power of 2.
   496  func isUint64PowerOfTwo(in int64) bool {
   497  	n := uint64(in)
   498  	return n > 0 && n&(n-1) == 0
   499  }
   500  
   501  // isUint32PowerOfTwo reports whether uint32(n) is a power of 2.
   502  func isUint32PowerOfTwo(in int64) bool {
   503  	n := uint64(uint32(in))
   504  	return n > 0 && n&(n-1) == 0
   505  }
   506  
   507  // is32Bit reports whether n can be represented as a signed 32 bit integer.
   508  func is32Bit(n int64) bool {
   509  	return n == int64(int32(n))
   510  }
   511  
   512  // is16Bit reports whether n can be represented as a signed 16 bit integer.
   513  func is16Bit(n int64) bool {
   514  	return n == int64(int16(n))
   515  }
   516  
   517  // is8Bit reports whether n can be represented as a signed 8 bit integer.
   518  func is8Bit(n int64) bool {
   519  	return n == int64(int8(n))
   520  }
   521  
   522  // isU8Bit reports whether n can be represented as an unsigned 8 bit integer.
   523  func isU8Bit(n int64) bool {
   524  	return n == int64(uint8(n))
   525  }
   526  
   527  // isU12Bit reports whether n can be represented as an unsigned 12 bit integer.
   528  func isU12Bit(n int64) bool {
   529  	return 0 <= n && n < (1<<12)
   530  }
   531  
   532  // isU16Bit reports whether n can be represented as an unsigned 16 bit integer.
   533  func isU16Bit(n int64) bool {
   534  	return n == int64(uint16(n))
   535  }
   536  
   537  // isU32Bit reports whether n can be represented as an unsigned 32 bit integer.
   538  func isU32Bit(n int64) bool {
   539  	return n == int64(uint32(n))
   540  }
   541  
   542  // is20Bit reports whether n can be represented as a signed 20 bit integer.
   543  func is20Bit(n int64) bool {
   544  	return -(1<<19) <= n && n < (1<<19)
   545  }
   546  
   547  // b2i translates a boolean value to 0 or 1 for assigning to auxInt.
   548  func b2i(b bool) int64 {
   549  	if b {
   550  		return 1
   551  	}
   552  	return 0
   553  }
   554  
   555  // b2i32 translates a boolean value to 0 or 1.
   556  func b2i32(b bool) int32 {
   557  	if b {
   558  		return 1
   559  	}
   560  	return 0
   561  }
   562  
   563  // shiftIsBounded reports whether (left/right) shift Value v is known to be bounded.
   564  // A shift is bounded if it is shifting by less than the width of the shifted value.
   565  func shiftIsBounded(v *Value) bool {
   566  	return v.AuxInt != 0
   567  }
   568  
   569  // canonLessThan returns whether x is "ordered" less than y, for purposes of normalizing
   570  // generated code as much as possible.
   571  func canonLessThan(x, y *Value) bool {
   572  	if x.Op != y.Op {
   573  		return x.Op < y.Op
   574  	}
   575  	if !x.Pos.SameFileAndLine(y.Pos) {
   576  		return x.Pos.Before(y.Pos)
   577  	}
   578  	return x.ID < y.ID
   579  }
   580  
   581  // truncate64Fto32F converts a float64 value to a float32 preserving the bit pattern
   582  // of the mantissa. It will panic if the truncation results in lost information.
   583  func truncate64Fto32F(f float64) float32 {
   584  	if !isExactFloat32(f) {
   585  		panic("truncate64Fto32F: truncation is not exact")
   586  	}
   587  	if !math.IsNaN(f) {
   588  		return float32(f)
   589  	}
   590  	// NaN bit patterns aren't necessarily preserved across conversion
   591  	// instructions so we need to do the conversion manually.
   592  	b := math.Float64bits(f)
   593  	m := b & ((1 << 52) - 1) // mantissa (a.k.a. significand)
   594  	//          | sign                  | exponent   | mantissa       |
   595  	r := uint32(((b >> 32) & (1 << 31)) | 0x7f800000 | (m >> (52 - 23)))
   596  	return math.Float32frombits(r)
   597  }
   598  
   599  // extend32Fto64F converts a float32 value to a float64 value preserving the bit
   600  // pattern of the mantissa.
   601  func extend32Fto64F(f float32) float64 {
   602  	if !math.IsNaN(float64(f)) {
   603  		return float64(f)
   604  	}
   605  	// NaN bit patterns aren't necessarily preserved across conversion
   606  	// instructions so we need to do the conversion manually.
   607  	b := uint64(math.Float32bits(f))
   608  	//   | sign                  | exponent      | mantissa                    |
   609  	r := ((b << 32) & (1 << 63)) | (0x7ff << 52) | ((b & 0x7fffff) << (52 - 23))
   610  	return math.Float64frombits(r)
   611  }
   612  
   613  // DivisionNeedsFixUp reports whether the division needs fix-up code.
   614  func DivisionNeedsFixUp(v *Value) bool {
   615  	return v.AuxInt == 0
   616  }
   617  
   618  // auxFrom64F encodes a float64 value so it can be stored in an AuxInt.
   619  func auxFrom64F(f float64) int64 {
   620  	if f != f {
   621  		panic("can't encode a NaN in AuxInt field")
   622  	}
   623  	return int64(math.Float64bits(f))
   624  }
   625  
   626  // auxFrom32F encodes a float32 value so it can be stored in an AuxInt.
   627  func auxFrom32F(f float32) int64 {
   628  	if f != f {
   629  		panic("can't encode a NaN in AuxInt field")
   630  	}
   631  	return int64(math.Float64bits(extend32Fto64F(f)))
   632  }
   633  
   634  // auxTo32F decodes a float32 from the AuxInt value provided.
   635  func auxTo32F(i int64) float32 {
   636  	return truncate64Fto32F(math.Float64frombits(uint64(i)))
   637  }
   638  
   639  // auxTo64F decodes a float64 from the AuxInt value provided.
   640  func auxTo64F(i int64) float64 {
   641  	return math.Float64frombits(uint64(i))
   642  }
   643  
   644  func auxIntToBool(i int64) bool {
   645  	if i == 0 {
   646  		return false
   647  	}
   648  	return true
   649  }
   650  func auxIntToInt8(i int64) int8 {
   651  	return int8(i)
   652  }
   653  func auxIntToInt16(i int64) int16 {
   654  	return int16(i)
   655  }
   656  func auxIntToInt32(i int64) int32 {
   657  	return int32(i)
   658  }
   659  func auxIntToInt64(i int64) int64 {
   660  	return i
   661  }
   662  func auxIntToUint8(i int64) uint8 {
   663  	return uint8(i)
   664  }
   665  func auxIntToFloat32(i int64) float32 {
   666  	return float32(math.Float64frombits(uint64(i)))
   667  }
   668  func auxIntToFloat64(i int64) float64 {
   669  	return math.Float64frombits(uint64(i))
   670  }
   671  func auxIntToValAndOff(i int64) ValAndOff {
   672  	return ValAndOff(i)
   673  }
   674  func auxIntToArm64BitField(i int64) arm64BitField {
   675  	return arm64BitField(i)
   676  }
   677  func auxIntToInt128(x int64) int128 {
   678  	if x != 0 {
   679  		panic("nonzero int128 not allowed")
   680  	}
   681  	return 0
   682  }
   683  func auxIntToFlagConstant(x int64) flagConstant {
   684  	return flagConstant(x)
   685  }
   686  
   687  func auxIntToOp(cc int64) Op {
   688  	return Op(cc)
   689  }
   690  
   691  func boolToAuxInt(b bool) int64 {
   692  	if b {
   693  		return 1
   694  	}
   695  	return 0
   696  }
   697  func int8ToAuxInt(i int8) int64 {
   698  	return int64(i)
   699  }
   700  func int16ToAuxInt(i int16) int64 {
   701  	return int64(i)
   702  }
   703  func int32ToAuxInt(i int32) int64 {
   704  	return int64(i)
   705  }
   706  func int64ToAuxInt(i int64) int64 {
   707  	return int64(i)
   708  }
   709  func uint8ToAuxInt(i uint8) int64 {
   710  	return int64(int8(i))
   711  }
   712  func float32ToAuxInt(f float32) int64 {
   713  	return int64(math.Float64bits(float64(f)))
   714  }
   715  func float64ToAuxInt(f float64) int64 {
   716  	return int64(math.Float64bits(f))
   717  }
   718  func valAndOffToAuxInt(v ValAndOff) int64 {
   719  	return int64(v)
   720  }
   721  func arm64BitFieldToAuxInt(v arm64BitField) int64 {
   722  	return int64(v)
   723  }
   724  func int128ToAuxInt(x int128) int64 {
   725  	if x != 0 {
   726  		panic("nonzero int128 not allowed")
   727  	}
   728  	return 0
   729  }
   730  func flagConstantToAuxInt(x flagConstant) int64 {
   731  	return int64(x)
   732  }
   733  
   734  func opToAuxInt(o Op) int64 {
   735  	return int64(o)
   736  }
   737  
   738  // Aux is an interface to hold miscellaneous data in Blocks and Values.
   739  type Aux interface {
   740  	CanBeAnSSAAux()
   741  }
   742  
   743  // for now only used to mark moves that need to avoid clobbering flags
   744  type auxMark bool
   745  
   746  func (auxMark) CanBeAnSSAAux() {}
   747  
   748  var AuxMark auxMark
   749  
   750  // stringAux wraps string values for use in Aux.
   751  type stringAux string
   752  
   753  func (stringAux) CanBeAnSSAAux() {}
   754  
   755  func auxToString(i Aux) string {
   756  	return string(i.(stringAux))
   757  }
   758  func auxToSym(i Aux) Sym {
   759  	// TODO: kind of a hack - allows nil interface through
   760  	s, _ := i.(Sym)
   761  	return s
   762  }
   763  func auxToType(i Aux) *types.Type {
   764  	return i.(*types.Type)
   765  }
   766  func auxToCall(i Aux) *AuxCall {
   767  	return i.(*AuxCall)
   768  }
   769  func auxToS390xCCMask(i Aux) s390x.CCMask {
   770  	return i.(s390x.CCMask)
   771  }
   772  func auxToS390xRotateParams(i Aux) s390x.RotateParams {
   773  	return i.(s390x.RotateParams)
   774  }
   775  
   776  func StringToAux(s string) Aux {
   777  	return stringAux(s)
   778  }
   779  func symToAux(s Sym) Aux {
   780  	return s
   781  }
   782  func callToAux(s *AuxCall) Aux {
   783  	return s
   784  }
   785  func typeToAux(t *types.Type) Aux {
   786  	return t
   787  }
   788  func s390xCCMaskToAux(c s390x.CCMask) Aux {
   789  	return c
   790  }
   791  func s390xRotateParamsToAux(r s390x.RotateParams) Aux {
   792  	return r
   793  }
   794  
   795  // uaddOvf reports whether unsigned a+b would overflow.
   796  func uaddOvf(a, b int64) bool {
   797  	return uint64(a)+uint64(b) < uint64(a)
   798  }
   799  
   800  // loadLSymOffset simulates reading a word at an offset into a
   801  // read-only symbol's runtime memory. If it would read a pointer to
   802  // another symbol, that symbol is returned. Otherwise, it returns nil.
   803  func loadLSymOffset(lsym *obj.LSym, offset int64) *obj.LSym {
   804  	if lsym.Type != objabi.SRODATA {
   805  		return nil
   806  	}
   807  
   808  	for _, r := range lsym.R {
   809  		if int64(r.Off) == offset && r.Type&^objabi.R_WEAK == objabi.R_ADDR && r.Add == 0 {
   810  			return r.Sym
   811  		}
   812  	}
   813  
   814  	return nil
   815  }
   816  
   817  func devirtLECall(v *Value, sym *obj.LSym) *Value {
   818  	v.Op = OpStaticLECall
   819  	auxcall := v.Aux.(*AuxCall)
   820  	auxcall.Fn = sym
   821  	// Remove first arg
   822  	v.Args[0].Uses--
   823  	copy(v.Args[0:], v.Args[1:])
   824  	v.Args[len(v.Args)-1] = nil // aid GC
   825  	v.Args = v.Args[:len(v.Args)-1]
   826  	if f := v.Block.Func; f.pass.debug > 0 {
   827  		f.Warnl(v.Pos, "de-virtualizing call")
   828  	}
   829  	return v
   830  }
   831  
   832  // isSamePtr reports whether p1 and p2 point to the same address.
   833  func isSamePtr(p1, p2 *Value) bool {
   834  	if p1 == p2 {
   835  		return true
   836  	}
   837  	if p1.Op != p2.Op {
   838  		for p1.Op == OpOffPtr && p1.AuxInt == 0 {
   839  			p1 = p1.Args[0]
   840  		}
   841  		for p2.Op == OpOffPtr && p2.AuxInt == 0 {
   842  			p2 = p2.Args[0]
   843  		}
   844  		if p1 == p2 {
   845  			return true
   846  		}
   847  		if p1.Op != p2.Op {
   848  			return false
   849  		}
   850  	}
   851  	switch p1.Op {
   852  	case OpOffPtr:
   853  		return p1.AuxInt == p2.AuxInt && isSamePtr(p1.Args[0], p2.Args[0])
   854  	case OpAddr, OpLocalAddr:
   855  		return p1.Aux == p2.Aux
   856  	case OpAddPtr:
   857  		return p1.Args[1] == p2.Args[1] && isSamePtr(p1.Args[0], p2.Args[0])
   858  	}
   859  	return false
   860  }
   861  
   862  func isStackPtr(v *Value) bool {
   863  	for v.Op == OpOffPtr || v.Op == OpAddPtr {
   864  		v = v.Args[0]
   865  	}
   866  	return v.Op == OpSP || v.Op == OpLocalAddr
   867  }
   868  
   869  // disjoint reports whether the memory region specified by [p1:p1+n1)
   870  // does not overlap with [p2:p2+n2).
   871  // A return value of false does not imply the regions overlap.
   872  func disjoint(p1 *Value, n1 int64, p2 *Value, n2 int64) bool {
   873  	if n1 == 0 || n2 == 0 {
   874  		return true
   875  	}
   876  	if p1 == p2 {
   877  		return false
   878  	}
   879  	baseAndOffset := func(ptr *Value) (base *Value, offset int64) {
   880  		base, offset = ptr, 0
   881  		for base.Op == OpOffPtr {
   882  			offset += base.AuxInt
   883  			base = base.Args[0]
   884  		}
   885  		if opcodeTable[base.Op].nilCheck {
   886  			base = base.Args[0]
   887  		}
   888  		return base, offset
   889  	}
   890  
   891  	// Run types-based analysis
   892  	if disjointTypes(p1.Type, p2.Type) {
   893  		return true
   894  	}
   895  
   896  	p1, off1 := baseAndOffset(p1)
   897  	p2, off2 := baseAndOffset(p2)
   898  	if isSamePtr(p1, p2) {
   899  		return !overlap(off1, n1, off2, n2)
   900  	}
   901  	// p1 and p2 are not the same, so if they are both OpAddrs then
   902  	// they point to different variables.
   903  	// If one pointer is on the stack and the other is an argument
   904  	// then they can't overlap.
   905  	switch p1.Op {
   906  	case OpAddr, OpLocalAddr:
   907  		if p2.Op == OpAddr || p2.Op == OpLocalAddr || p2.Op == OpSP {
   908  			return true
   909  		}
   910  		return (p2.Op == OpArg || p2.Op == OpArgIntReg) && p1.Args[0].Op == OpSP
   911  	case OpArg, OpArgIntReg:
   912  		if p2.Op == OpSP || p2.Op == OpLocalAddr {
   913  			return true
   914  		}
   915  	case OpSP:
   916  		return p2.Op == OpAddr || p2.Op == OpLocalAddr || p2.Op == OpArg || p2.Op == OpArgIntReg || p2.Op == OpSP
   917  	}
   918  	return false
   919  }
   920  
   921  // disjointTypes reports whether a memory region pointed to by a pointer of type
   922  // t1 does not overlap with a memory region pointed to by a pointer of type t2 --
   923  // based on type aliasing rules.
   924  func disjointTypes(t1 *types.Type, t2 *types.Type) bool {
   925  	// Unsafe pointer can alias with anything.
   926  	if t1.IsUnsafePtr() || t2.IsUnsafePtr() {
   927  		return false
   928  	}
   929  
   930  	if !t1.IsPtr() || !t2.IsPtr() {
   931  		panic("disjointTypes: one of arguments is not a pointer")
   932  	}
   933  
   934  	t1 = t1.Elem()
   935  	t2 = t2.Elem()
   936  
   937  	// Not-in-heap types are not supported -- they are rare and non-important; also,
   938  	// type.HasPointers check doesn't work for them correctly.
   939  	if t1.NotInHeap() || t2.NotInHeap() {
   940  		return false
   941  	}
   942  
   943  	isPtrShaped := func(t *types.Type) bool { return int(t.Size()) == types.PtrSize && t.HasPointers() }
   944  
   945  	// Pointers and non-pointers are disjoint (https://pkg.go.dev/unsafe#Pointer).
   946  	if (isPtrShaped(t1) && !t2.HasPointers()) ||
   947  		(isPtrShaped(t2) && !t1.HasPointers()) {
   948  		return true
   949  	}
   950  
   951  	return false
   952  }
   953  
   954  // moveSize returns the number of bytes an aligned MOV instruction moves.
   955  func moveSize(align int64, c *Config) int64 {
   956  	switch {
   957  	case align%8 == 0 && c.PtrSize == 8:
   958  		return 8
   959  	case align%4 == 0:
   960  		return 4
   961  	case align%2 == 0:
   962  		return 2
   963  	}
   964  	return 1
   965  }
   966  
   967  // mergePoint finds a block among a's blocks which dominates b and is itself
   968  // dominated by all of a's blocks. Returns nil if it can't find one.
   969  // Might return nil even if one does exist.
   970  func mergePoint(b *Block, a ...*Value) *Block {
   971  	// Walk backward from b looking for one of the a's blocks.
   972  
   973  	// Max distance
   974  	d := 100
   975  
   976  	for d > 0 {
   977  		for _, x := range a {
   978  			if b == x.Block {
   979  				goto found
   980  			}
   981  		}
   982  		if len(b.Preds) > 1 {
   983  			// Don't know which way to go back. Abort.
   984  			return nil
   985  		}
   986  		b = b.Preds[0].b
   987  		d--
   988  	}
   989  	return nil // too far away
   990  found:
   991  	// At this point, r is the first value in a that we find by walking backwards.
   992  	// if we return anything, r will be it.
   993  	r := b
   994  
   995  	// Keep going, counting the other a's that we find. They must all dominate r.
   996  	na := 0
   997  	for d > 0 {
   998  		for _, x := range a {
   999  			if b == x.Block {
  1000  				na++
  1001  			}
  1002  		}
  1003  		if na == len(a) {
  1004  			// Found all of a in a backwards walk. We can return r.
  1005  			return r
  1006  		}
  1007  		if len(b.Preds) > 1 {
  1008  			return nil
  1009  		}
  1010  		b = b.Preds[0].b
  1011  		d--
  1012  
  1013  	}
  1014  	return nil // too far away
  1015  }
  1016  
  1017  // clobber invalidates values. Returns true.
  1018  // clobber is used by rewrite rules to:
  1019  //
  1020  //	A) make sure the values are really dead and never used again.
  1021  //	B) decrement use counts of the values' args.
  1022  func clobber(vv ...*Value) bool {
  1023  	for _, v := range vv {
  1024  		v.reset(OpInvalid)
  1025  		// Note: leave v.Block intact.  The Block field is used after clobber.
  1026  	}
  1027  	return true
  1028  }
  1029  
  1030  // resetCopy resets v to be a copy of arg.
  1031  // Always returns true.
  1032  func resetCopy(v *Value, arg *Value) bool {
  1033  	v.reset(OpCopy)
  1034  	v.AddArg(arg)
  1035  	return true
  1036  }
  1037  
  1038  // clobberIfDead resets v when use count is 1. Returns true.
  1039  // clobberIfDead is used by rewrite rules to decrement
  1040  // use counts of v's args when v is dead and never used.
  1041  func clobberIfDead(v *Value) bool {
  1042  	if v.Uses == 1 {
  1043  		v.reset(OpInvalid)
  1044  	}
  1045  	// Note: leave v.Block intact.  The Block field is used after clobberIfDead.
  1046  	return true
  1047  }
  1048  
  1049  // noteRule is an easy way to track if a rule is matched when writing
  1050  // new ones.  Make the rule of interest also conditional on
  1051  //
  1052  //	noteRule("note to self: rule of interest matched")
  1053  //
  1054  // and that message will print when the rule matches.
  1055  func noteRule(s string) bool {
  1056  	fmt.Println(s)
  1057  	return true
  1058  }
  1059  
  1060  // countRule increments Func.ruleMatches[key].
  1061  // If Func.ruleMatches is non-nil at the end
  1062  // of compilation, it will be printed to stdout.
  1063  // This is intended to make it easier to find which functions
  1064  // which contain lots of rules matches when developing new rules.
  1065  func countRule(v *Value, key string) bool {
  1066  	f := v.Block.Func
  1067  	if f.ruleMatches == nil {
  1068  		f.ruleMatches = make(map[string]int)
  1069  	}
  1070  	f.ruleMatches[key]++
  1071  	return true
  1072  }
  1073  
  1074  // warnRule generates compiler debug output with string s when
  1075  // v is not in autogenerated code, cond is true and the rule has fired.
  1076  func warnRule(cond bool, v *Value, s string) bool {
  1077  	if pos := v.Pos; pos.Line() > 1 && cond {
  1078  		v.Block.Func.Warnl(pos, s)
  1079  	}
  1080  	return true
  1081  }
  1082  
  1083  // for a pseudo-op like (LessThan x), extract x.
  1084  func flagArg(v *Value) *Value {
  1085  	if len(v.Args) != 1 || !v.Args[0].Type.IsFlags() {
  1086  		return nil
  1087  	}
  1088  	return v.Args[0]
  1089  }
  1090  
  1091  // arm64Negate finds the complement to an ARM64 condition code,
  1092  // for example !Equal -> NotEqual or !LessThan -> GreaterEqual
  1093  //
  1094  // For floating point, it's more subtle because NaN is unordered. We do
  1095  // !LessThanF -> NotLessThanF, the latter takes care of NaNs.
  1096  func arm64Negate(op Op) Op {
  1097  	switch op {
  1098  	case OpARM64LessThan:
  1099  		return OpARM64GreaterEqual
  1100  	case OpARM64LessThanU:
  1101  		return OpARM64GreaterEqualU
  1102  	case OpARM64GreaterThan:
  1103  		return OpARM64LessEqual
  1104  	case OpARM64GreaterThanU:
  1105  		return OpARM64LessEqualU
  1106  	case OpARM64LessEqual:
  1107  		return OpARM64GreaterThan
  1108  	case OpARM64LessEqualU:
  1109  		return OpARM64GreaterThanU
  1110  	case OpARM64GreaterEqual:
  1111  		return OpARM64LessThan
  1112  	case OpARM64GreaterEqualU:
  1113  		return OpARM64LessThanU
  1114  	case OpARM64Equal:
  1115  		return OpARM64NotEqual
  1116  	case OpARM64NotEqual:
  1117  		return OpARM64Equal
  1118  	case OpARM64LessThanF:
  1119  		return OpARM64NotLessThanF
  1120  	case OpARM64NotLessThanF:
  1121  		return OpARM64LessThanF
  1122  	case OpARM64LessEqualF:
  1123  		return OpARM64NotLessEqualF
  1124  	case OpARM64NotLessEqualF:
  1125  		return OpARM64LessEqualF
  1126  	case OpARM64GreaterThanF:
  1127  		return OpARM64NotGreaterThanF
  1128  	case OpARM64NotGreaterThanF:
  1129  		return OpARM64GreaterThanF
  1130  	case OpARM64GreaterEqualF:
  1131  		return OpARM64NotGreaterEqualF
  1132  	case OpARM64NotGreaterEqualF:
  1133  		return OpARM64GreaterEqualF
  1134  	default:
  1135  		panic("unreachable")
  1136  	}
  1137  }
  1138  
  1139  // arm64Invert evaluates (InvertFlags op), which
  1140  // is the same as altering the condition codes such
  1141  // that the same result would be produced if the arguments
  1142  // to the flag-generating instruction were reversed, e.g.
  1143  // (InvertFlags (CMP x y)) -> (CMP y x)
  1144  func arm64Invert(op Op) Op {
  1145  	switch op {
  1146  	case OpARM64LessThan:
  1147  		return OpARM64GreaterThan
  1148  	case OpARM64LessThanU:
  1149  		return OpARM64GreaterThanU
  1150  	case OpARM64GreaterThan:
  1151  		return OpARM64LessThan
  1152  	case OpARM64GreaterThanU:
  1153  		return OpARM64LessThanU
  1154  	case OpARM64LessEqual:
  1155  		return OpARM64GreaterEqual
  1156  	case OpARM64LessEqualU:
  1157  		return OpARM64GreaterEqualU
  1158  	case OpARM64GreaterEqual:
  1159  		return OpARM64LessEqual
  1160  	case OpARM64GreaterEqualU:
  1161  		return OpARM64LessEqualU
  1162  	case OpARM64Equal, OpARM64NotEqual:
  1163  		return op
  1164  	case OpARM64LessThanF:
  1165  		return OpARM64GreaterThanF
  1166  	case OpARM64GreaterThanF:
  1167  		return OpARM64LessThanF
  1168  	case OpARM64LessEqualF:
  1169  		return OpARM64GreaterEqualF
  1170  	case OpARM64GreaterEqualF:
  1171  		return OpARM64LessEqualF
  1172  	case OpARM64NotLessThanF:
  1173  		return OpARM64NotGreaterThanF
  1174  	case OpARM64NotGreaterThanF:
  1175  		return OpARM64NotLessThanF
  1176  	case OpARM64NotLessEqualF:
  1177  		return OpARM64NotGreaterEqualF
  1178  	case OpARM64NotGreaterEqualF:
  1179  		return OpARM64NotLessEqualF
  1180  	default:
  1181  		panic("unreachable")
  1182  	}
  1183  }
  1184  
  1185  // evaluate an ARM64 op against a flags value
  1186  // that is potentially constant; return 1 for true,
  1187  // -1 for false, and 0 for not constant.
  1188  func ccARM64Eval(op Op, flags *Value) int {
  1189  	fop := flags.Op
  1190  	if fop == OpARM64InvertFlags {
  1191  		return -ccARM64Eval(op, flags.Args[0])
  1192  	}
  1193  	if fop != OpARM64FlagConstant {
  1194  		return 0
  1195  	}
  1196  	fc := flagConstant(flags.AuxInt)
  1197  	b2i := func(b bool) int {
  1198  		if b {
  1199  			return 1
  1200  		}
  1201  		return -1
  1202  	}
  1203  	switch op {
  1204  	case OpARM64Equal:
  1205  		return b2i(fc.eq())
  1206  	case OpARM64NotEqual:
  1207  		return b2i(fc.ne())
  1208  	case OpARM64LessThan:
  1209  		return b2i(fc.lt())
  1210  	case OpARM64LessThanU:
  1211  		return b2i(fc.ult())
  1212  	case OpARM64GreaterThan:
  1213  		return b2i(fc.gt())
  1214  	case OpARM64GreaterThanU:
  1215  		return b2i(fc.ugt())
  1216  	case OpARM64LessEqual:
  1217  		return b2i(fc.le())
  1218  	case OpARM64LessEqualU:
  1219  		return b2i(fc.ule())
  1220  	case OpARM64GreaterEqual:
  1221  		return b2i(fc.ge())
  1222  	case OpARM64GreaterEqualU:
  1223  		return b2i(fc.uge())
  1224  	}
  1225  	return 0
  1226  }
  1227  
  1228  // logRule logs the use of the rule s. This will only be enabled if
  1229  // rewrite rules were generated with the -log option, see _gen/rulegen.go.
  1230  func logRule(s string) {
  1231  	if ruleFile == nil {
  1232  		// Open a log file to write log to. We open in append
  1233  		// mode because all.bash runs the compiler lots of times,
  1234  		// and we want the concatenation of all of those logs.
  1235  		// This means, of course, that users need to rm the old log
  1236  		// to get fresh data.
  1237  		// TODO: all.bash runs compilers in parallel. Need to synchronize logging somehow?
  1238  		w, err := os.OpenFile(filepath.Join(os.Getenv("GOROOT"), "src", "rulelog"),
  1239  			os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0666)
  1240  		if err != nil {
  1241  			panic(err)
  1242  		}
  1243  		ruleFile = w
  1244  	}
  1245  	_, err := fmt.Fprintln(ruleFile, s)
  1246  	if err != nil {
  1247  		panic(err)
  1248  	}
  1249  }
  1250  
  1251  var ruleFile io.Writer
  1252  
  1253  func isConstZero(v *Value) bool {
  1254  	switch v.Op {
  1255  	case OpConstNil:
  1256  		return true
  1257  	case OpConst64, OpConst32, OpConst16, OpConst8, OpConstBool, OpConst32F, OpConst64F:
  1258  		return v.AuxInt == 0
  1259  	case OpStringMake, OpIMake, OpComplexMake:
  1260  		return isConstZero(v.Args[0]) && isConstZero(v.Args[1])
  1261  	case OpSliceMake:
  1262  		return isConstZero(v.Args[0]) && isConstZero(v.Args[1]) && isConstZero(v.Args[2])
  1263  	case OpStringPtr, OpStringLen, OpSlicePtr, OpSliceLen, OpSliceCap, OpITab, OpIData, OpComplexReal, OpComplexImag:
  1264  		return isConstZero(v.Args[0])
  1265  	}
  1266  	return false
  1267  }
  1268  
  1269  // reciprocalExact64 reports whether 1/c is exactly representable.
  1270  func reciprocalExact64(c float64) bool {
  1271  	b := math.Float64bits(c)
  1272  	man := b & (1<<52 - 1)
  1273  	if man != 0 {
  1274  		return false // not a power of 2, denormal, or NaN
  1275  	}
  1276  	exp := b >> 52 & (1<<11 - 1)
  1277  	// exponent bias is 0x3ff.  So taking the reciprocal of a number
  1278  	// changes the exponent to 0x7fe-exp.
  1279  	switch exp {
  1280  	case 0:
  1281  		return false // ±0
  1282  	case 0x7ff:
  1283  		return false // ±inf
  1284  	case 0x7fe:
  1285  		return false // exponent is not representable
  1286  	default:
  1287  		return true
  1288  	}
  1289  }
  1290  
  1291  // reciprocalExact32 reports whether 1/c is exactly representable.
  1292  func reciprocalExact32(c float32) bool {
  1293  	b := math.Float32bits(c)
  1294  	man := b & (1<<23 - 1)
  1295  	if man != 0 {
  1296  		return false // not a power of 2, denormal, or NaN
  1297  	}
  1298  	exp := b >> 23 & (1<<8 - 1)
  1299  	// exponent bias is 0x7f.  So taking the reciprocal of a number
  1300  	// changes the exponent to 0xfe-exp.
  1301  	switch exp {
  1302  	case 0:
  1303  		return false // ±0
  1304  	case 0xff:
  1305  		return false // ±inf
  1306  	case 0xfe:
  1307  		return false // exponent is not representable
  1308  	default:
  1309  		return true
  1310  	}
  1311  }
  1312  
  1313  // check if an immediate can be directly encoded into an ARM's instruction.
  1314  func isARMImmRot(v uint32) bool {
  1315  	for i := 0; i < 16; i++ {
  1316  		if v&^0xff == 0 {
  1317  			return true
  1318  		}
  1319  		v = v<<2 | v>>30
  1320  	}
  1321  
  1322  	return false
  1323  }
  1324  
  1325  // overlap reports whether the ranges given by the given offset and
  1326  // size pairs overlap.
  1327  func overlap(offset1, size1, offset2, size2 int64) bool {
  1328  	if offset1 >= offset2 && offset2+size2 > offset1 {
  1329  		return true
  1330  	}
  1331  	if offset2 >= offset1 && offset1+size1 > offset2 {
  1332  		return true
  1333  	}
  1334  	return false
  1335  }
  1336  
  1337  // check if value zeroes out upper 32-bit of 64-bit register.
  1338  // depth limits recursion depth. In AMD64.rules 3 is used as limit,
  1339  // because it catches same amount of cases as 4.
  1340  func zeroUpper32Bits(x *Value, depth int) bool {
  1341  	if x.Type.IsSigned() && x.Type.Size() < 8 {
  1342  		// If the value is signed, it might get re-sign-extended
  1343  		// during spill and restore. See issue 68227.
  1344  		return false
  1345  	}
  1346  	switch x.Op {
  1347  	case OpAMD64MOVLconst, OpAMD64MOVLload, OpAMD64MOVLQZX, OpAMD64MOVLloadidx1,
  1348  		OpAMD64MOVWload, OpAMD64MOVWloadidx1, OpAMD64MOVBload, OpAMD64MOVBloadidx1,
  1349  		OpAMD64MOVLloadidx4, OpAMD64ADDLload, OpAMD64SUBLload, OpAMD64ANDLload,
  1350  		OpAMD64ORLload, OpAMD64XORLload, OpAMD64CVTTSD2SL,
  1351  		OpAMD64ADDL, OpAMD64ADDLconst, OpAMD64SUBL, OpAMD64SUBLconst,
  1352  		OpAMD64ANDL, OpAMD64ANDLconst, OpAMD64ORL, OpAMD64ORLconst,
  1353  		OpAMD64XORL, OpAMD64XORLconst, OpAMD64NEGL, OpAMD64NOTL,
  1354  		OpAMD64SHRL, OpAMD64SHRLconst, OpAMD64SARL, OpAMD64SARLconst,
  1355  		OpAMD64SHLL, OpAMD64SHLLconst:
  1356  		return true
  1357  	case OpARM64REV16W, OpARM64REVW, OpARM64RBITW, OpARM64CLZW, OpARM64EXTRWconst,
  1358  		OpARM64MULW, OpARM64MNEGW, OpARM64UDIVW, OpARM64DIVW, OpARM64UMODW,
  1359  		OpARM64MADDW, OpARM64MSUBW, OpARM64RORW, OpARM64RORWconst:
  1360  		return true
  1361  	case OpArg: // note: but not ArgIntReg
  1362  		// amd64 always loads args from the stack unsigned.
  1363  		// most other architectures load them sign/zero extended based on the type.
  1364  		return x.Type.Size() == 4 && x.Block.Func.Config.arch == "amd64"
  1365  	case OpPhi, OpSelect0, OpSelect1:
  1366  		// Phis can use each-other as an arguments, instead of tracking visited values,
  1367  		// just limit recursion depth.
  1368  		if depth <= 0 {
  1369  			return false
  1370  		}
  1371  		for i := range x.Args {
  1372  			if !zeroUpper32Bits(x.Args[i], depth-1) {
  1373  				return false
  1374  			}
  1375  		}
  1376  		return true
  1377  
  1378  	}
  1379  	return false
  1380  }
  1381  
  1382  // zeroUpper48Bits is similar to zeroUpper32Bits, but for upper 48 bits.
  1383  func zeroUpper48Bits(x *Value, depth int) bool {
  1384  	if x.Type.IsSigned() && x.Type.Size() < 8 {
  1385  		return false
  1386  	}
  1387  	switch x.Op {
  1388  	case OpAMD64MOVWQZX, OpAMD64MOVWload, OpAMD64MOVWloadidx1, OpAMD64MOVWloadidx2:
  1389  		return true
  1390  	case OpArg: // note: but not ArgIntReg
  1391  		return x.Type.Size() == 2 && x.Block.Func.Config.arch == "amd64"
  1392  	case OpPhi, OpSelect0, OpSelect1:
  1393  		// Phis can use each-other as an arguments, instead of tracking visited values,
  1394  		// just limit recursion depth.
  1395  		if depth <= 0 {
  1396  			return false
  1397  		}
  1398  		for i := range x.Args {
  1399  			if !zeroUpper48Bits(x.Args[i], depth-1) {
  1400  				return false
  1401  			}
  1402  		}
  1403  		return true
  1404  
  1405  	}
  1406  	return false
  1407  }
  1408  
  1409  // zeroUpper56Bits is similar to zeroUpper32Bits, but for upper 56 bits.
  1410  func zeroUpper56Bits(x *Value, depth int) bool {
  1411  	if x.Type.IsSigned() && x.Type.Size() < 8 {
  1412  		return false
  1413  	}
  1414  	switch x.Op {
  1415  	case OpAMD64MOVBQZX, OpAMD64MOVBload, OpAMD64MOVBloadidx1:
  1416  		return true
  1417  	case OpArg: // note: but not ArgIntReg
  1418  		return x.Type.Size() == 1 && x.Block.Func.Config.arch == "amd64"
  1419  	case OpPhi, OpSelect0, OpSelect1:
  1420  		// Phis can use each-other as an arguments, instead of tracking visited values,
  1421  		// just limit recursion depth.
  1422  		if depth <= 0 {
  1423  			return false
  1424  		}
  1425  		for i := range x.Args {
  1426  			if !zeroUpper56Bits(x.Args[i], depth-1) {
  1427  				return false
  1428  			}
  1429  		}
  1430  		return true
  1431  
  1432  	}
  1433  	return false
  1434  }
  1435  
  1436  func isInlinableMemclr(c *Config, sz int64) bool {
  1437  	if sz < 0 {
  1438  		return false
  1439  	}
  1440  	// TODO: expand this check to allow other architectures
  1441  	// see CL 454255 and issue 56997
  1442  	switch c.arch {
  1443  	case "amd64", "arm64":
  1444  		return true
  1445  	case "ppc64le", "ppc64", "loong64":
  1446  		return sz < 512
  1447  	}
  1448  	return false
  1449  }
  1450  
  1451  // isInlinableMemmove reports whether the given arch performs a Move of the given size
  1452  // faster than memmove. It will only return true if replacing the memmove with a Move is
  1453  // safe, either because Move will do all of its loads before any of its stores, or
  1454  // because the arguments are known to be disjoint.
  1455  // This is used as a check for replacing memmove with Move ops.
  1456  func isInlinableMemmove(dst, src *Value, sz int64, c *Config) bool {
  1457  	// It is always safe to convert memmove into Move when its arguments are disjoint.
  1458  	// Move ops may or may not be faster for large sizes depending on how the platform
  1459  	// lowers them, so we only perform this optimization on platforms that we know to
  1460  	// have fast Move ops.
  1461  	switch c.arch {
  1462  	case "amd64":
  1463  		return sz <= 16 || (sz < 1024 && disjoint(dst, sz, src, sz))
  1464  	case "386", "arm64":
  1465  		return sz <= 8
  1466  	case "s390x", "ppc64", "ppc64le":
  1467  		return sz <= 8 || disjoint(dst, sz, src, sz)
  1468  	case "arm", "loong64", "mips", "mips64", "mipsle", "mips64le":
  1469  		return sz <= 4
  1470  	}
  1471  	return false
  1472  }
  1473  func IsInlinableMemmove(dst, src *Value, sz int64, c *Config) bool {
  1474  	return isInlinableMemmove(dst, src, sz, c)
  1475  }
  1476  
  1477  // logLargeCopy logs the occurrence of a large copy.
  1478  // The best place to do this is in the rewrite rules where the size of the move is easy to find.
  1479  // "Large" is arbitrarily chosen to be 128 bytes; this may change.
  1480  func logLargeCopy(v *Value, s int64) bool {
  1481  	if s < 128 {
  1482  		return true
  1483  	}
  1484  	if logopt.Enabled() {
  1485  		logopt.LogOpt(v.Pos, "copy", "lower", v.Block.Func.Name, fmt.Sprintf("%d bytes", s))
  1486  	}
  1487  	return true
  1488  }
  1489  func LogLargeCopy(funcName string, pos src.XPos, s int64) {
  1490  	if s < 128 {
  1491  		return
  1492  	}
  1493  	if logopt.Enabled() {
  1494  		logopt.LogOpt(pos, "copy", "lower", funcName, fmt.Sprintf("%d bytes", s))
  1495  	}
  1496  }
  1497  
  1498  // hasSmallRotate reports whether the architecture has rotate instructions
  1499  // for sizes < 32-bit.  This is used to decide whether to promote some rotations.
  1500  func hasSmallRotate(c *Config) bool {
  1501  	switch c.arch {
  1502  	case "amd64", "386":
  1503  		return true
  1504  	default:
  1505  		return false
  1506  	}
  1507  }
  1508  
  1509  func supportsPPC64PCRel() bool {
  1510  	// PCRel is currently supported for >= power10, linux only
  1511  	// Internal and external linking supports this on ppc64le; internal linking on ppc64.
  1512  	return buildcfg.GOPPC64 >= 10 && buildcfg.GOOS == "linux"
  1513  }
  1514  
  1515  func newPPC64ShiftAuxInt(sh, mb, me, sz int64) int32 {
  1516  	if sh < 0 || sh >= sz {
  1517  		panic("PPC64 shift arg sh out of range")
  1518  	}
  1519  	if mb < 0 || mb >= sz {
  1520  		panic("PPC64 shift arg mb out of range")
  1521  	}
  1522  	if me < 0 || me >= sz {
  1523  		panic("PPC64 shift arg me out of range")
  1524  	}
  1525  	return int32(sh<<16 | mb<<8 | me)
  1526  }
  1527  
  1528  func GetPPC64Shiftsh(auxint int64) int64 {
  1529  	return int64(int8(auxint >> 16))
  1530  }
  1531  
  1532  func GetPPC64Shiftmb(auxint int64) int64 {
  1533  	return int64(int8(auxint >> 8))
  1534  }
  1535  
  1536  func GetPPC64Shiftme(auxint int64) int64 {
  1537  	return int64(int8(auxint))
  1538  }
  1539  
  1540  // Test if this value can encoded as a mask for a rlwinm like
  1541  // operation.  Masks can also extend from the msb and wrap to
  1542  // the lsb too.  That is, the valid masks are 32 bit strings
  1543  // of the form: 0..01..10..0 or 1..10..01..1 or 1...1
  1544  func isPPC64WordRotateMask(v64 int64) bool {
  1545  	// Isolate rightmost 1 (if none 0) and add.
  1546  	v := uint32(v64)
  1547  	vp := (v & -v) + v
  1548  	// Likewise, for the wrapping case.
  1549  	vn := ^v
  1550  	vpn := (vn & -vn) + vn
  1551  	return (v&vp == 0 || vn&vpn == 0) && v != 0
  1552  }
  1553  
  1554  // Compress mask and shift into single value of the form
  1555  // me | mb<<8 | rotate<<16 | nbits<<24 where me and mb can
  1556  // be used to regenerate the input mask.
  1557  func encodePPC64RotateMask(rotate, mask, nbits int64) int64 {
  1558  	var mb, me, mbn, men int
  1559  
  1560  	// Determine boundaries and then decode them
  1561  	if mask == 0 || ^mask == 0 || rotate >= nbits {
  1562  		panic(fmt.Sprintf("invalid PPC64 rotate mask: %x %d %d", uint64(mask), rotate, nbits))
  1563  	} else if nbits == 32 {
  1564  		mb = bits.LeadingZeros32(uint32(mask))
  1565  		me = 32 - bits.TrailingZeros32(uint32(mask))
  1566  		mbn = bits.LeadingZeros32(^uint32(mask))
  1567  		men = 32 - bits.TrailingZeros32(^uint32(mask))
  1568  	} else {
  1569  		mb = bits.LeadingZeros64(uint64(mask))
  1570  		me = 64 - bits.TrailingZeros64(uint64(mask))
  1571  		mbn = bits.LeadingZeros64(^uint64(mask))
  1572  		men = 64 - bits.TrailingZeros64(^uint64(mask))
  1573  	}
  1574  	// Check for a wrapping mask (e.g bits at 0 and 63)
  1575  	if mb == 0 && me == int(nbits) {
  1576  		// swap the inverted values
  1577  		mb, me = men, mbn
  1578  	}
  1579  
  1580  	return int64(me) | int64(mb<<8) | int64(rotate<<16) | int64(nbits<<24)
  1581  }
  1582  
  1583  // Merge (RLDICL [encoded] (SRDconst [s] x)) into (RLDICL [new_encoded] x)
  1584  // SRDconst on PPC64 is an extended mnemonic of RLDICL. If the input to an
  1585  // RLDICL is an SRDconst, and the RLDICL does not rotate its value, the two
  1586  // operations can be combined. This functions assumes the two opcodes can
  1587  // be merged, and returns an encoded rotate+mask value of the combined RLDICL.
  1588  func mergePPC64RLDICLandSRDconst(encoded, s int64) int64 {
  1589  	mb := s
  1590  	r := 64 - s
  1591  	// A larger mb is a smaller mask.
  1592  	if (encoded>>8)&0xFF < mb {
  1593  		encoded = (encoded &^ 0xFF00) | mb<<8
  1594  	}
  1595  	// The rotate is expected to be 0.
  1596  	if (encoded & 0xFF0000) != 0 {
  1597  		panic("non-zero rotate")
  1598  	}
  1599  	return encoded | r<<16
  1600  }
  1601  
  1602  // DecodePPC64RotateMask is the inverse operation of encodePPC64RotateMask.  The values returned as
  1603  // mb and me satisfy the POWER ISA definition of MASK(x,y) where MASK(mb,me) = mask.
  1604  func DecodePPC64RotateMask(sauxint int64) (rotate, mb, me int64, mask uint64) {
  1605  	auxint := uint64(sauxint)
  1606  	rotate = int64((auxint >> 16) & 0xFF)
  1607  	mb = int64((auxint >> 8) & 0xFF)
  1608  	me = int64((auxint >> 0) & 0xFF)
  1609  	nbits := int64((auxint >> 24) & 0xFF)
  1610  	mask = ((1 << uint(nbits-mb)) - 1) ^ ((1 << uint(nbits-me)) - 1)
  1611  	if mb > me {
  1612  		mask = ^mask
  1613  	}
  1614  	if nbits == 32 {
  1615  		mask = uint64(uint32(mask))
  1616  	}
  1617  
  1618  	// Fixup ME to match ISA definition.  The second argument to MASK(..,me)
  1619  	// is inclusive.
  1620  	me = (me - 1) & (nbits - 1)
  1621  	return
  1622  }
  1623  
  1624  // This verifies that the mask is a set of
  1625  // consecutive bits including the least
  1626  // significant bit.
  1627  func isPPC64ValidShiftMask(v int64) bool {
  1628  	if (v != 0) && ((v+1)&v) == 0 {
  1629  		return true
  1630  	}
  1631  	return false
  1632  }
  1633  
  1634  func getPPC64ShiftMaskLength(v int64) int64 {
  1635  	return int64(bits.Len64(uint64(v)))
  1636  }
  1637  
  1638  // Decompose a shift right into an equivalent rotate/mask,
  1639  // and return mask & m.
  1640  func mergePPC64RShiftMask(m, s, nbits int64) int64 {
  1641  	smask := uint64((1<<uint(nbits))-1) >> uint(s)
  1642  	return m & int64(smask)
  1643  }
  1644  
  1645  // Combine (ANDconst [m] (SRWconst [s])) into (RLWINM [y]) or return 0
  1646  func mergePPC64AndSrwi(m, s int64) int64 {
  1647  	mask := mergePPC64RShiftMask(m, s, 32)
  1648  	if !isPPC64WordRotateMask(mask) {
  1649  		return 0
  1650  	}
  1651  	return encodePPC64RotateMask((32-s)&31, mask, 32)
  1652  }
  1653  
  1654  // Combine (ANDconst [m] (SRDconst [s])) into (RLWINM [y]) or return 0
  1655  func mergePPC64AndSrdi(m, s int64) int64 {
  1656  	mask := mergePPC64RShiftMask(m, s, 64)
  1657  
  1658  	// Verify the rotate and mask result only uses the lower 32 bits.
  1659  	rv := bits.RotateLeft64(0xFFFFFFFF00000000, -int(s))
  1660  	if rv&uint64(mask) != 0 {
  1661  		return 0
  1662  	}
  1663  	if !isPPC64WordRotateMask(mask) {
  1664  		return 0
  1665  	}
  1666  	return encodePPC64RotateMask((32-s)&31, mask, 32)
  1667  }
  1668  
  1669  // Combine (ANDconst [m] (SLDconst [s])) into (RLWINM [y]) or return 0
  1670  func mergePPC64AndSldi(m, s int64) int64 {
  1671  	mask := -1 << s & m
  1672  
  1673  	// Verify the rotate and mask result only uses the lower 32 bits.
  1674  	rv := bits.RotateLeft64(0xFFFFFFFF00000000, int(s))
  1675  	if rv&uint64(mask) != 0 {
  1676  		return 0
  1677  	}
  1678  	if !isPPC64WordRotateMask(mask) {
  1679  		return 0
  1680  	}
  1681  	return encodePPC64RotateMask(s&31, mask, 32)
  1682  }
  1683  
  1684  // Test if a word shift right feeding into a CLRLSLDI can be merged into RLWINM.
  1685  // Return the encoded RLWINM constant, or 0 if they cannot be merged.
  1686  func mergePPC64ClrlsldiSrw(sld, srw int64) int64 {
  1687  	mask_1 := uint64(0xFFFFFFFF >> uint(srw))
  1688  	// for CLRLSLDI, it's more convenient to think of it as a mask left bits then rotate left.
  1689  	mask_2 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(GetPPC64Shiftmb(int64(sld)))
  1690  
  1691  	// Rewrite mask to apply after the final left shift.
  1692  	mask_3 := (mask_1 & mask_2) << uint(GetPPC64Shiftsh(sld))
  1693  
  1694  	r_1 := 32 - srw
  1695  	r_2 := GetPPC64Shiftsh(sld)
  1696  	r_3 := (r_1 + r_2) & 31 // This can wrap.
  1697  
  1698  	if uint64(uint32(mask_3)) != mask_3 || mask_3 == 0 {
  1699  		return 0
  1700  	}
  1701  	return encodePPC64RotateMask(int64(r_3), int64(mask_3), 32)
  1702  }
  1703  
  1704  // Test if a doubleword shift right feeding into a CLRLSLDI can be merged into RLWINM.
  1705  // Return the encoded RLWINM constant, or 0 if they cannot be merged.
  1706  func mergePPC64ClrlsldiSrd(sld, srd int64) int64 {
  1707  	mask_1 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(srd)
  1708  	// for CLRLSLDI, it's more convenient to think of it as a mask left bits then rotate left.
  1709  	mask_2 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(GetPPC64Shiftmb(int64(sld)))
  1710  
  1711  	// Rewrite mask to apply after the final left shift.
  1712  	mask_3 := (mask_1 & mask_2) << uint(GetPPC64Shiftsh(sld))
  1713  
  1714  	r_1 := 64 - srd
  1715  	r_2 := GetPPC64Shiftsh(sld)
  1716  	r_3 := (r_1 + r_2) & 63 // This can wrap.
  1717  
  1718  	if uint64(uint32(mask_3)) != mask_3 || mask_3 == 0 {
  1719  		return 0
  1720  	}
  1721  	// This combine only works when selecting and shifting the lower 32 bits.
  1722  	v1 := bits.RotateLeft64(0xFFFFFFFF00000000, int(r_3))
  1723  	if v1&mask_3 != 0 {
  1724  		return 0
  1725  	}
  1726  	return encodePPC64RotateMask(int64(r_3&31), int64(mask_3), 32)
  1727  }
  1728  
  1729  // Test if a RLWINM feeding into a CLRLSLDI can be merged into RLWINM.  Return
  1730  // the encoded RLWINM constant, or 0 if they cannot be merged.
  1731  func mergePPC64ClrlsldiRlwinm(sld int32, rlw int64) int64 {
  1732  	r_1, _, _, mask_1 := DecodePPC64RotateMask(rlw)
  1733  	// for CLRLSLDI, it's more convenient to think of it as a mask left bits then rotate left.
  1734  	mask_2 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(GetPPC64Shiftmb(int64(sld)))
  1735  
  1736  	// combine the masks, and adjust for the final left shift.
  1737  	mask_3 := (mask_1 & mask_2) << uint(GetPPC64Shiftsh(int64(sld)))
  1738  	r_2 := GetPPC64Shiftsh(int64(sld))
  1739  	r_3 := (r_1 + r_2) & 31 // This can wrap.
  1740  
  1741  	// Verify the result is still a valid bitmask of <= 32 bits.
  1742  	if !isPPC64WordRotateMask(int64(mask_3)) || uint64(uint32(mask_3)) != mask_3 {
  1743  		return 0
  1744  	}
  1745  	return encodePPC64RotateMask(r_3, int64(mask_3), 32)
  1746  }
  1747  
  1748  // Test if RLWINM feeding into an ANDconst can be merged. Return the encoded RLWINM constant,
  1749  // or 0 if they cannot be merged.
  1750  func mergePPC64AndRlwinm(mask uint32, rlw int64) int64 {
  1751  	r, _, _, mask_rlw := DecodePPC64RotateMask(rlw)
  1752  	mask_out := (mask_rlw & uint64(mask))
  1753  
  1754  	// Verify the result is still a valid bitmask of <= 32 bits.
  1755  	if !isPPC64WordRotateMask(int64(mask_out)) {
  1756  		return 0
  1757  	}
  1758  	return encodePPC64RotateMask(r, int64(mask_out), 32)
  1759  }
  1760  
  1761  // Test if RLWINM opcode rlw clears the upper 32 bits of the
  1762  // result. Return rlw if it does, 0 otherwise.
  1763  func mergePPC64MovwzregRlwinm(rlw int64) int64 {
  1764  	_, mb, me, _ := DecodePPC64RotateMask(rlw)
  1765  	if mb > me {
  1766  		return 0
  1767  	}
  1768  	return rlw
  1769  }
  1770  
  1771  // Test if AND feeding into an ANDconst can be merged. Return the encoded RLWINM constant,
  1772  // or 0 if they cannot be merged.
  1773  func mergePPC64RlwinmAnd(rlw int64, mask uint32) int64 {
  1774  	r, _, _, mask_rlw := DecodePPC64RotateMask(rlw)
  1775  
  1776  	// Rotate the input mask, combine with the rlwnm mask, and test if it is still a valid rlwinm mask.
  1777  	r_mask := bits.RotateLeft32(mask, int(r))
  1778  
  1779  	mask_out := (mask_rlw & uint64(r_mask))
  1780  
  1781  	// Verify the result is still a valid bitmask of <= 32 bits.
  1782  	if !isPPC64WordRotateMask(int64(mask_out)) {
  1783  		return 0
  1784  	}
  1785  	return encodePPC64RotateMask(r, int64(mask_out), 32)
  1786  }
  1787  
  1788  // Test if RLWINM feeding into SRDconst can be merged. Return the encoded RLIWNM constant,
  1789  // or 0 if they cannot be merged.
  1790  func mergePPC64SldiRlwinm(sldi, rlw int64) int64 {
  1791  	r_1, mb, me, mask_1 := DecodePPC64RotateMask(rlw)
  1792  	if mb > me || mb < sldi {
  1793  		// Wrapping masks cannot be merged as the upper 32 bits are effectively undefined in this case.
  1794  		// Likewise, if mb is less than the shift amount, it cannot be merged.
  1795  		return 0
  1796  	}
  1797  	// combine the masks, and adjust for the final left shift.
  1798  	mask_3 := mask_1 << sldi
  1799  	r_3 := (r_1 + sldi) & 31 // This can wrap.
  1800  
  1801  	// Verify the result is still a valid bitmask of <= 32 bits.
  1802  	if uint64(uint32(mask_3)) != mask_3 {
  1803  		return 0
  1804  	}
  1805  	return encodePPC64RotateMask(r_3, int64(mask_3), 32)
  1806  }
  1807  
  1808  // Compute the encoded RLWINM constant from combining (SLDconst [sld] (SRWconst [srw] x)),
  1809  // or return 0 if they cannot be combined.
  1810  func mergePPC64SldiSrw(sld, srw int64) int64 {
  1811  	if sld > srw || srw >= 32 {
  1812  		return 0
  1813  	}
  1814  	mask_r := uint32(0xFFFFFFFF) >> uint(srw)
  1815  	mask_l := uint32(0xFFFFFFFF) >> uint(sld)
  1816  	mask := (mask_r & mask_l) << uint(sld)
  1817  	return encodePPC64RotateMask((32-srw+sld)&31, int64(mask), 32)
  1818  }
  1819  
  1820  // Convert a PPC64 opcode from the Op to OpCC form. This converts (op x y)
  1821  // to (Select0 (opCC x y)) without having to explicitly fixup every user
  1822  // of op.
  1823  //
  1824  // E.g consider the case:
  1825  // a = (ADD x y)
  1826  // b = (CMPconst [0] a)
  1827  // c = (OR a z)
  1828  //
  1829  // A rule like (CMPconst [0] (ADD x y)) => (CMPconst [0] (Select0 (ADDCC x y)))
  1830  // would produce:
  1831  // a  = (ADD x y)
  1832  // a' = (ADDCC x y)
  1833  // a” = (Select0 a')
  1834  // b  = (CMPconst [0] a”)
  1835  // c  = (OR a z)
  1836  //
  1837  // which makes it impossible to rewrite the second user. Instead the result
  1838  // of this conversion is:
  1839  // a' = (ADDCC x y)
  1840  // a  = (Select0 a')
  1841  // b  = (CMPconst [0] a)
  1842  // c  = (OR a z)
  1843  //
  1844  // Which makes it trivial to rewrite b using a lowering rule.
  1845  func convertPPC64OpToOpCC(op *Value) *Value {
  1846  	ccOpMap := map[Op]Op{
  1847  		OpPPC64ADD:      OpPPC64ADDCC,
  1848  		OpPPC64ADDconst: OpPPC64ADDCCconst,
  1849  		OpPPC64AND:      OpPPC64ANDCC,
  1850  		OpPPC64ANDN:     OpPPC64ANDNCC,
  1851  		OpPPC64ANDconst: OpPPC64ANDCCconst,
  1852  		OpPPC64CNTLZD:   OpPPC64CNTLZDCC,
  1853  		OpPPC64MULHDU:   OpPPC64MULHDUCC,
  1854  		OpPPC64NEG:      OpPPC64NEGCC,
  1855  		OpPPC64NOR:      OpPPC64NORCC,
  1856  		OpPPC64OR:       OpPPC64ORCC,
  1857  		OpPPC64RLDICL:   OpPPC64RLDICLCC,
  1858  		OpPPC64SUB:      OpPPC64SUBCC,
  1859  		OpPPC64XOR:      OpPPC64XORCC,
  1860  	}
  1861  	b := op.Block
  1862  	opCC := b.NewValue0I(op.Pos, ccOpMap[op.Op], types.NewTuple(op.Type, types.TypeFlags), op.AuxInt)
  1863  	opCC.AddArgs(op.Args...)
  1864  	op.reset(OpSelect0)
  1865  	op.AddArgs(opCC)
  1866  	return op
  1867  }
  1868  
  1869  // Try converting a RLDICL to ANDCC. If successful, return the mask otherwise 0.
  1870  func convertPPC64RldiclAndccconst(sauxint int64) int64 {
  1871  	r, _, _, mask := DecodePPC64RotateMask(sauxint)
  1872  	if r != 0 || mask&0xFFFF != mask {
  1873  		return 0
  1874  	}
  1875  	return int64(mask)
  1876  }
  1877  
  1878  // Convenience function to rotate a 32 bit constant value by another constant.
  1879  func rotateLeft32(v, rotate int64) int64 {
  1880  	return int64(bits.RotateLeft32(uint32(v), int(rotate)))
  1881  }
  1882  
  1883  func rotateRight64(v, rotate int64) int64 {
  1884  	return int64(bits.RotateLeft64(uint64(v), int(-rotate)))
  1885  }
  1886  
  1887  // encodes the lsb and width for arm(64) bitfield ops into the expected auxInt format.
  1888  func armBFAuxInt(lsb, width int64) arm64BitField {
  1889  	if lsb < 0 || lsb > 63 {
  1890  		panic("ARM(64) bit field lsb constant out of range")
  1891  	}
  1892  	if width < 1 || lsb+width > 64 {
  1893  		panic("ARM(64) bit field width constant out of range")
  1894  	}
  1895  	return arm64BitField(width | lsb<<8)
  1896  }
  1897  
  1898  // returns the lsb part of the auxInt field of arm64 bitfield ops.
  1899  func (bfc arm64BitField) lsb() int64 {
  1900  	return int64(uint64(bfc) >> 8)
  1901  }
  1902  
  1903  // returns the width part of the auxInt field of arm64 bitfield ops.
  1904  func (bfc arm64BitField) width() int64 {
  1905  	return int64(bfc) & 0xff
  1906  }
  1907  
  1908  // checks if mask >> rshift applied at lsb is a valid arm64 bitfield op mask.
  1909  func isARM64BFMask(lsb, mask, rshift int64) bool {
  1910  	shiftedMask := int64(uint64(mask) >> uint64(rshift))
  1911  	return shiftedMask != 0 && isPowerOfTwo(shiftedMask+1) && nto(shiftedMask)+lsb < 64
  1912  }
  1913  
  1914  // returns the bitfield width of mask >> rshift for arm64 bitfield ops.
  1915  func arm64BFWidth(mask, rshift int64) int64 {
  1916  	shiftedMask := int64(uint64(mask) >> uint64(rshift))
  1917  	if shiftedMask == 0 {
  1918  		panic("ARM64 BF mask is zero")
  1919  	}
  1920  	return nto(shiftedMask)
  1921  }
  1922  
  1923  // registerizable reports whether t is a primitive type that fits in
  1924  // a register. It assumes float64 values will always fit into registers
  1925  // even if that isn't strictly true.
  1926  func registerizable(b *Block, typ *types.Type) bool {
  1927  	if typ.IsPtrShaped() || typ.IsFloat() || typ.IsBoolean() {
  1928  		return true
  1929  	}
  1930  	if typ.IsInteger() {
  1931  		return typ.Size() <= b.Func.Config.RegSize
  1932  	}
  1933  	return false
  1934  }
  1935  
  1936  // needRaceCleanup reports whether this call to racefuncenter/exit isn't needed.
  1937  func needRaceCleanup(sym *AuxCall, v *Value) bool {
  1938  	f := v.Block.Func
  1939  	if !f.Config.Race {
  1940  		return false
  1941  	}
  1942  	if !isSameCall(sym, "runtime.racefuncenter") && !isSameCall(sym, "runtime.racefuncexit") {
  1943  		return false
  1944  	}
  1945  	for _, b := range f.Blocks {
  1946  		for _, v := range b.Values {
  1947  			switch v.Op {
  1948  			case OpStaticCall, OpStaticLECall:
  1949  				// Check for racefuncenter will encounter racefuncexit and vice versa.
  1950  				// Allow calls to panic*
  1951  				s := v.Aux.(*AuxCall).Fn.String()
  1952  				switch s {
  1953  				case "runtime.racefuncenter", "runtime.racefuncexit",
  1954  					"runtime.panicdivide", "runtime.panicwrap",
  1955  					"runtime.panicshift":
  1956  					continue
  1957  				}
  1958  				// If we encountered any call, we need to keep racefunc*,
  1959  				// for accurate stacktraces.
  1960  				return false
  1961  			case OpPanicBounds, OpPanicExtend:
  1962  				// Note: these are panic generators that are ok (like the static calls above).
  1963  			case OpClosureCall, OpInterCall, OpClosureLECall, OpInterLECall:
  1964  				// We must keep the race functions if there are any other call types.
  1965  				return false
  1966  			}
  1967  		}
  1968  	}
  1969  	if isSameCall(sym, "runtime.racefuncenter") {
  1970  		// TODO REGISTER ABI this needs to be cleaned up.
  1971  		// If we're removing racefuncenter, remove its argument as well.
  1972  		if v.Args[0].Op != OpStore {
  1973  			if v.Op == OpStaticLECall {
  1974  				// there is no store, yet.
  1975  				return true
  1976  			}
  1977  			return false
  1978  		}
  1979  		mem := v.Args[0].Args[2]
  1980  		v.Args[0].reset(OpCopy)
  1981  		v.Args[0].AddArg(mem)
  1982  	}
  1983  	return true
  1984  }
  1985  
  1986  // symIsRO reports whether sym is a read-only global.
  1987  func symIsRO(sym Sym) bool {
  1988  	lsym := sym.(*obj.LSym)
  1989  	return lsym.Type == objabi.SRODATA && len(lsym.R) == 0
  1990  }
  1991  
  1992  // symIsROZero reports whether sym is a read-only global whose data contains all zeros.
  1993  func symIsROZero(sym Sym) bool {
  1994  	lsym := sym.(*obj.LSym)
  1995  	if lsym.Type != objabi.SRODATA || len(lsym.R) != 0 {
  1996  		return false
  1997  	}
  1998  	for _, b := range lsym.P {
  1999  		if b != 0 {
  2000  			return false
  2001  		}
  2002  	}
  2003  	return true
  2004  }
  2005  
  2006  // isFixed32 returns true if the int32 at offset off in symbol sym
  2007  // is known and constant.
  2008  func isFixed32(c *Config, sym Sym, off int64) bool {
  2009  	return isFixed(c, sym, off, 4)
  2010  }
  2011  
  2012  // isFixed returns true if the range [off,off+size] of the symbol sym
  2013  // is known and constant.
  2014  func isFixed(c *Config, sym Sym, off, size int64) bool {
  2015  	lsym := sym.(*obj.LSym)
  2016  	if lsym.Extra == nil {
  2017  		return false
  2018  	}
  2019  	if _, ok := (*lsym.Extra).(*obj.TypeInfo); ok {
  2020  		if off == 2*c.PtrSize && size == 4 {
  2021  			return true // type hash field
  2022  		}
  2023  	}
  2024  	return false
  2025  }
  2026  func fixed32(c *Config, sym Sym, off int64) int32 {
  2027  	lsym := sym.(*obj.LSym)
  2028  	if ti, ok := (*lsym.Extra).(*obj.TypeInfo); ok {
  2029  		if off == 2*c.PtrSize {
  2030  			return int32(types.TypeHash(ti.Type.(*types.Type)))
  2031  		}
  2032  	}
  2033  	base.Fatalf("fixed32 data not known for %s:%d", sym, off)
  2034  	return 0
  2035  }
  2036  
  2037  // isFixedSym returns true if the contents of sym at the given offset
  2038  // is known and is the constant address of another symbol.
  2039  func isFixedSym(sym Sym, off int64) bool {
  2040  	lsym := sym.(*obj.LSym)
  2041  	switch {
  2042  	case lsym.Type == objabi.SRODATA:
  2043  		// itabs, dictionaries
  2044  	default:
  2045  		return false
  2046  	}
  2047  	for _, r := range lsym.R {
  2048  		if (r.Type == objabi.R_ADDR || r.Type == objabi.R_WEAKADDR) && int64(r.Off) == off && r.Add == 0 {
  2049  			return true
  2050  		}
  2051  	}
  2052  	return false
  2053  }
  2054  func fixedSym(f *Func, sym Sym, off int64) Sym {
  2055  	lsym := sym.(*obj.LSym)
  2056  	for _, r := range lsym.R {
  2057  		if (r.Type == objabi.R_ADDR || r.Type == objabi.R_WEAKADDR) && int64(r.Off) == off {
  2058  			if strings.HasPrefix(r.Sym.Name, "type:") {
  2059  				// In case we're loading a type out of a dictionary, we need to record
  2060  				// that the containing function might put that type in an interface.
  2061  				// That information is currently recorded in relocations in the dictionary,
  2062  				// but if we perform this load at compile time then the dictionary
  2063  				// might be dead.
  2064  				reflectdata.MarkTypeSymUsedInInterface(r.Sym, f.fe.Func().Linksym())
  2065  			} else if strings.HasPrefix(r.Sym.Name, "go:itab") {
  2066  				// Same, but if we're using an itab we need to record that the
  2067  				// itab._type might be put in an interface.
  2068  				reflectdata.MarkTypeSymUsedInInterface(r.Sym, f.fe.Func().Linksym())
  2069  			}
  2070  			return r.Sym
  2071  		}
  2072  	}
  2073  	base.Fatalf("fixedSym data not known for %s:%d", sym, off)
  2074  	return nil
  2075  }
  2076  
  2077  // read8 reads one byte from the read-only global sym at offset off.
  2078  func read8(sym Sym, off int64) uint8 {
  2079  	lsym := sym.(*obj.LSym)
  2080  	if off >= int64(len(lsym.P)) || off < 0 {
  2081  		// Invalid index into the global sym.
  2082  		// This can happen in dead code, so we don't want to panic.
  2083  		// Just return any value, it will eventually get ignored.
  2084  		// See issue 29215.
  2085  		return 0
  2086  	}
  2087  	return lsym.P[off]
  2088  }
  2089  
  2090  // read16 reads two bytes from the read-only global sym at offset off.
  2091  func read16(sym Sym, off int64, byteorder binary.ByteOrder) uint16 {
  2092  	lsym := sym.(*obj.LSym)
  2093  	// lsym.P is written lazily.
  2094  	// Bytes requested after the end of lsym.P are 0.
  2095  	var src []byte
  2096  	if 0 <= off && off < int64(len(lsym.P)) {
  2097  		src = lsym.P[off:]
  2098  	}
  2099  	buf := make([]byte, 2)
  2100  	copy(buf, src)
  2101  	return byteorder.Uint16(buf)
  2102  }
  2103  
  2104  // read32 reads four bytes from the read-only global sym at offset off.
  2105  func read32(sym Sym, off int64, byteorder binary.ByteOrder) uint32 {
  2106  	lsym := sym.(*obj.LSym)
  2107  	var src []byte
  2108  	if 0 <= off && off < int64(len(lsym.P)) {
  2109  		src = lsym.P[off:]
  2110  	}
  2111  	buf := make([]byte, 4)
  2112  	copy(buf, src)
  2113  	return byteorder.Uint32(buf)
  2114  }
  2115  
  2116  // read64 reads eight bytes from the read-only global sym at offset off.
  2117  func read64(sym Sym, off int64, byteorder binary.ByteOrder) uint64 {
  2118  	lsym := sym.(*obj.LSym)
  2119  	var src []byte
  2120  	if 0 <= off && off < int64(len(lsym.P)) {
  2121  		src = lsym.P[off:]
  2122  	}
  2123  	buf := make([]byte, 8)
  2124  	copy(buf, src)
  2125  	return byteorder.Uint64(buf)
  2126  }
  2127  
  2128  // sequentialAddresses reports true if it can prove that x + n == y
  2129  func sequentialAddresses(x, y *Value, n int64) bool {
  2130  	if x == y && n == 0 {
  2131  		return true
  2132  	}
  2133  	if x.Op == Op386ADDL && y.Op == Op386LEAL1 && y.AuxInt == n && y.Aux == nil &&
  2134  		(x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
  2135  			x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
  2136  		return true
  2137  	}
  2138  	if x.Op == Op386LEAL1 && y.Op == Op386LEAL1 && y.AuxInt == x.AuxInt+n && x.Aux == y.Aux &&
  2139  		(x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
  2140  			x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
  2141  		return true
  2142  	}
  2143  	if x.Op == OpAMD64ADDQ && y.Op == OpAMD64LEAQ1 && y.AuxInt == n && y.Aux == nil &&
  2144  		(x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
  2145  			x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
  2146  		return true
  2147  	}
  2148  	if x.Op == OpAMD64LEAQ1 && y.Op == OpAMD64LEAQ1 && y.AuxInt == x.AuxInt+n && x.Aux == y.Aux &&
  2149  		(x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
  2150  			x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
  2151  		return true
  2152  	}
  2153  	return false
  2154  }
  2155  
  2156  // flagConstant represents the result of a compile-time comparison.
  2157  // The sense of these flags does not necessarily represent the hardware's notion
  2158  // of a flags register - these are just a compile-time construct.
  2159  // We happen to match the semantics to those of arm/arm64.
  2160  // Note that these semantics differ from x86: the carry flag has the opposite
  2161  // sense on a subtraction!
  2162  //
  2163  //	On amd64, C=1 represents a borrow, e.g. SBB on amd64 does x - y - C.
  2164  //	On arm64, C=0 represents a borrow, e.g. SBC on arm64 does x - y - ^C.
  2165  //	 (because it does x + ^y + C).
  2166  //
  2167  // See https://en.wikipedia.org/wiki/Carry_flag#Vs._borrow_flag
  2168  type flagConstant uint8
  2169  
  2170  // N reports whether the result of an operation is negative (high bit set).
  2171  func (fc flagConstant) N() bool {
  2172  	return fc&1 != 0
  2173  }
  2174  
  2175  // Z reports whether the result of an operation is 0.
  2176  func (fc flagConstant) Z() bool {
  2177  	return fc&2 != 0
  2178  }
  2179  
  2180  // C reports whether an unsigned add overflowed (carry), or an
  2181  // unsigned subtract did not underflow (borrow).
  2182  func (fc flagConstant) C() bool {
  2183  	return fc&4 != 0
  2184  }
  2185  
  2186  // V reports whether a signed operation overflowed or underflowed.
  2187  func (fc flagConstant) V() bool {
  2188  	return fc&8 != 0
  2189  }
  2190  
  2191  func (fc flagConstant) eq() bool {
  2192  	return fc.Z()
  2193  }
  2194  func (fc flagConstant) ne() bool {
  2195  	return !fc.Z()
  2196  }
  2197  func (fc flagConstant) lt() bool {
  2198  	return fc.N() != fc.V()
  2199  }
  2200  func (fc flagConstant) le() bool {
  2201  	return fc.Z() || fc.lt()
  2202  }
  2203  func (fc flagConstant) gt() bool {
  2204  	return !fc.Z() && fc.ge()
  2205  }
  2206  func (fc flagConstant) ge() bool {
  2207  	return fc.N() == fc.V()
  2208  }
  2209  func (fc flagConstant) ult() bool {
  2210  	return !fc.C()
  2211  }
  2212  func (fc flagConstant) ule() bool {
  2213  	return fc.Z() || fc.ult()
  2214  }
  2215  func (fc flagConstant) ugt() bool {
  2216  	return !fc.Z() && fc.uge()
  2217  }
  2218  func (fc flagConstant) uge() bool {
  2219  	return fc.C()
  2220  }
  2221  
  2222  func (fc flagConstant) ltNoov() bool {
  2223  	return fc.lt() && !fc.V()
  2224  }
  2225  func (fc flagConstant) leNoov() bool {
  2226  	return fc.le() && !fc.V()
  2227  }
  2228  func (fc flagConstant) gtNoov() bool {
  2229  	return fc.gt() && !fc.V()
  2230  }
  2231  func (fc flagConstant) geNoov() bool {
  2232  	return fc.ge() && !fc.V()
  2233  }
  2234  
  2235  func (fc flagConstant) String() string {
  2236  	return fmt.Sprintf("N=%v,Z=%v,C=%v,V=%v", fc.N(), fc.Z(), fc.C(), fc.V())
  2237  }
  2238  
  2239  type flagConstantBuilder struct {
  2240  	N bool
  2241  	Z bool
  2242  	C bool
  2243  	V bool
  2244  }
  2245  
  2246  func (fcs flagConstantBuilder) encode() flagConstant {
  2247  	var fc flagConstant
  2248  	if fcs.N {
  2249  		fc |= 1
  2250  	}
  2251  	if fcs.Z {
  2252  		fc |= 2
  2253  	}
  2254  	if fcs.C {
  2255  		fc |= 4
  2256  	}
  2257  	if fcs.V {
  2258  		fc |= 8
  2259  	}
  2260  	return fc
  2261  }
  2262  
  2263  // Note: addFlags(x,y) != subFlags(x,-y) in some situations:
  2264  //  - the results of the C flag are different
  2265  //  - the results of the V flag when y==minint are different
  2266  
  2267  // addFlags64 returns the flags that would be set from computing x+y.
  2268  func addFlags64(x, y int64) flagConstant {
  2269  	var fcb flagConstantBuilder
  2270  	fcb.Z = x+y == 0
  2271  	fcb.N = x+y < 0
  2272  	fcb.C = uint64(x+y) < uint64(x)
  2273  	fcb.V = x >= 0 && y >= 0 && x+y < 0 || x < 0 && y < 0 && x+y >= 0
  2274  	return fcb.encode()
  2275  }
  2276  
  2277  // subFlags64 returns the flags that would be set from computing x-y.
  2278  func subFlags64(x, y int64) flagConstant {
  2279  	var fcb flagConstantBuilder
  2280  	fcb.Z = x-y == 0
  2281  	fcb.N = x-y < 0
  2282  	fcb.C = uint64(y) <= uint64(x) // This code follows the arm carry flag model.
  2283  	fcb.V = x >= 0 && y < 0 && x-y < 0 || x < 0 && y >= 0 && x-y >= 0
  2284  	return fcb.encode()
  2285  }
  2286  
  2287  // addFlags32 returns the flags that would be set from computing x+y.
  2288  func addFlags32(x, y int32) flagConstant {
  2289  	var fcb flagConstantBuilder
  2290  	fcb.Z = x+y == 0
  2291  	fcb.N = x+y < 0
  2292  	fcb.C = uint32(x+y) < uint32(x)
  2293  	fcb.V = x >= 0 && y >= 0 && x+y < 0 || x < 0 && y < 0 && x+y >= 0
  2294  	return fcb.encode()
  2295  }
  2296  
  2297  // subFlags32 returns the flags that would be set from computing x-y.
  2298  func subFlags32(x, y int32) flagConstant {
  2299  	var fcb flagConstantBuilder
  2300  	fcb.Z = x-y == 0
  2301  	fcb.N = x-y < 0
  2302  	fcb.C = uint32(y) <= uint32(x) // This code follows the arm carry flag model.
  2303  	fcb.V = x >= 0 && y < 0 && x-y < 0 || x < 0 && y >= 0 && x-y >= 0
  2304  	return fcb.encode()
  2305  }
  2306  
  2307  // logicFlags64 returns flags set to the sign/zeroness of x.
  2308  // C and V are set to false.
  2309  func logicFlags64(x int64) flagConstant {
  2310  	var fcb flagConstantBuilder
  2311  	fcb.Z = x == 0
  2312  	fcb.N = x < 0
  2313  	return fcb.encode()
  2314  }
  2315  
  2316  // logicFlags32 returns flags set to the sign/zeroness of x.
  2317  // C and V are set to false.
  2318  func logicFlags32(x int32) flagConstant {
  2319  	var fcb flagConstantBuilder
  2320  	fcb.Z = x == 0
  2321  	fcb.N = x < 0
  2322  	return fcb.encode()
  2323  }
  2324  
  2325  func makeJumpTableSym(b *Block) *obj.LSym {
  2326  	s := base.Ctxt.Lookup(fmt.Sprintf("%s.jump%d", b.Func.fe.Func().LSym.Name, b.ID))
  2327  	// The jump table symbol is accessed only from the function symbol.
  2328  	s.Set(obj.AttrStatic, true)
  2329  	return s
  2330  }
  2331  
  2332  // canRotate reports whether the architecture supports
  2333  // rotates of integer registers with the given number of bits.
  2334  func canRotate(c *Config, bits int64) bool {
  2335  	if bits > c.PtrSize*8 {
  2336  		// Don't rewrite to rotates bigger than the machine word.
  2337  		return false
  2338  	}
  2339  	switch c.arch {
  2340  	case "386", "amd64", "arm64", "loong64", "riscv64":
  2341  		return true
  2342  	case "arm", "s390x", "ppc64", "ppc64le", "wasm":
  2343  		return bits >= 32
  2344  	default:
  2345  		return false
  2346  	}
  2347  }
  2348  
  2349  // isARM64bitcon reports whether a constant can be encoded into a logical instruction.
  2350  func isARM64bitcon(x uint64) bool {
  2351  	if x == 1<<64-1 || x == 0 {
  2352  		return false
  2353  	}
  2354  	// determine the period and sign-extend a unit to 64 bits
  2355  	switch {
  2356  	case x != x>>32|x<<32:
  2357  		// period is 64
  2358  		// nothing to do
  2359  	case x != x>>16|x<<48:
  2360  		// period is 32
  2361  		x = uint64(int64(int32(x)))
  2362  	case x != x>>8|x<<56:
  2363  		// period is 16
  2364  		x = uint64(int64(int16(x)))
  2365  	case x != x>>4|x<<60:
  2366  		// period is 8
  2367  		x = uint64(int64(int8(x)))
  2368  	default:
  2369  		// period is 4 or 2, always true
  2370  		// 0001, 0010, 0100, 1000 -- 0001 rotate
  2371  		// 0011, 0110, 1100, 1001 -- 0011 rotate
  2372  		// 0111, 1011, 1101, 1110 -- 0111 rotate
  2373  		// 0101, 1010             -- 01   rotate, repeat
  2374  		return true
  2375  	}
  2376  	return sequenceOfOnes(x) || sequenceOfOnes(^x)
  2377  }
  2378  
  2379  // sequenceOfOnes tests whether a constant is a sequence of ones in binary, with leading and trailing zeros.
  2380  func sequenceOfOnes(x uint64) bool {
  2381  	y := x & -x // lowest set bit of x. x is good iff x+y is a power of 2
  2382  	y += x
  2383  	return (y-1)&y == 0
  2384  }
  2385  
  2386  // isARM64addcon reports whether x can be encoded as the immediate value in an ADD or SUB instruction.
  2387  func isARM64addcon(v int64) bool {
  2388  	/* uimm12 or uimm24? */
  2389  	if v < 0 {
  2390  		return false
  2391  	}
  2392  	if (v & 0xFFF) == 0 {
  2393  		v >>= 12
  2394  	}
  2395  	return v <= 0xFFF
  2396  }
  2397  
  2398  // setPos sets the position of v to pos, then returns true.
  2399  // Useful for setting the result of a rewrite's position to
  2400  // something other than the default.
  2401  func setPos(v *Value, pos src.XPos) bool {
  2402  	v.Pos = pos
  2403  	return true
  2404  }
  2405  
  2406  // isNonNegative reports whether v is known to be greater or equal to zero.
  2407  // Note that this is pretty simplistic. The prove pass generates more detailed
  2408  // nonnegative information about values.
  2409  func isNonNegative(v *Value) bool {
  2410  	if !v.Type.IsInteger() {
  2411  		v.Fatalf("isNonNegative bad type: %v", v.Type)
  2412  	}
  2413  	// TODO: return true if !v.Type.IsSigned()
  2414  	// SSA isn't type-safe enough to do that now (issue 37753).
  2415  	// The checks below depend only on the pattern of bits.
  2416  
  2417  	switch v.Op {
  2418  	case OpConst64:
  2419  		return v.AuxInt >= 0
  2420  
  2421  	case OpConst32:
  2422  		return int32(v.AuxInt) >= 0
  2423  
  2424  	case OpConst16:
  2425  		return int16(v.AuxInt) >= 0
  2426  
  2427  	case OpConst8:
  2428  		return int8(v.AuxInt) >= 0
  2429  
  2430  	case OpStringLen, OpSliceLen, OpSliceCap,
  2431  		OpZeroExt8to64, OpZeroExt16to64, OpZeroExt32to64,
  2432  		OpZeroExt8to32, OpZeroExt16to32, OpZeroExt8to16,
  2433  		OpCtz64, OpCtz32, OpCtz16, OpCtz8,
  2434  		OpCtz64NonZero, OpCtz32NonZero, OpCtz16NonZero, OpCtz8NonZero,
  2435  		OpBitLen64, OpBitLen32, OpBitLen16, OpBitLen8:
  2436  		return true
  2437  
  2438  	case OpRsh64Ux64, OpRsh32Ux64:
  2439  		by := v.Args[1]
  2440  		return by.Op == OpConst64 && by.AuxInt > 0
  2441  
  2442  	case OpRsh64x64, OpRsh32x64, OpRsh8x64, OpRsh16x64, OpRsh32x32, OpRsh64x32,
  2443  		OpSignExt32to64, OpSignExt16to64, OpSignExt8to64, OpSignExt16to32, OpSignExt8to32:
  2444  		return isNonNegative(v.Args[0])
  2445  
  2446  	case OpAnd64, OpAnd32, OpAnd16, OpAnd8:
  2447  		return isNonNegative(v.Args[0]) || isNonNegative(v.Args[1])
  2448  
  2449  	case OpMod64, OpMod32, OpMod16, OpMod8,
  2450  		OpDiv64, OpDiv32, OpDiv16, OpDiv8,
  2451  		OpOr64, OpOr32, OpOr16, OpOr8,
  2452  		OpXor64, OpXor32, OpXor16, OpXor8:
  2453  		return isNonNegative(v.Args[0]) && isNonNegative(v.Args[1])
  2454  
  2455  		// We could handle OpPhi here, but the improvements from doing
  2456  		// so are very minor, and it is neither simple nor cheap.
  2457  	}
  2458  	return false
  2459  }
  2460  
  2461  func rewriteStructLoad(v *Value) *Value {
  2462  	b := v.Block
  2463  	ptr := v.Args[0]
  2464  	mem := v.Args[1]
  2465  
  2466  	t := v.Type
  2467  	args := make([]*Value, t.NumFields())
  2468  	for i := range args {
  2469  		ft := t.FieldType(i)
  2470  		addr := b.NewValue1I(v.Pos, OpOffPtr, ft.PtrTo(), t.FieldOff(i), ptr)
  2471  		args[i] = b.NewValue2(v.Pos, OpLoad, ft, addr, mem)
  2472  	}
  2473  
  2474  	v.reset(OpStructMake)
  2475  	v.AddArgs(args...)
  2476  	return v
  2477  }
  2478  
  2479  func rewriteStructStore(v *Value) *Value {
  2480  	b := v.Block
  2481  	dst := v.Args[0]
  2482  	x := v.Args[1]
  2483  	if x.Op != OpStructMake {
  2484  		base.Fatalf("invalid struct store: %v", x)
  2485  	}
  2486  	mem := v.Args[2]
  2487  
  2488  	t := x.Type
  2489  	for i, arg := range x.Args {
  2490  		ft := t.FieldType(i)
  2491  
  2492  		addr := b.NewValue1I(v.Pos, OpOffPtr, ft.PtrTo(), t.FieldOff(i), dst)
  2493  		mem = b.NewValue3A(v.Pos, OpStore, types.TypeMem, typeToAux(ft), addr, arg, mem)
  2494  	}
  2495  
  2496  	return mem
  2497  }
  2498  
  2499  // isDirectType reports whether v represents a type
  2500  // (a *runtime._type) whose value is stored directly in an
  2501  // interface (i.e., is pointer or pointer-like).
  2502  func isDirectType(v *Value) bool {
  2503  	return isDirectType1(v)
  2504  }
  2505  
  2506  // v is a type
  2507  func isDirectType1(v *Value) bool {
  2508  	switch v.Op {
  2509  	case OpITab:
  2510  		return isDirectType2(v.Args[0])
  2511  	case OpAddr:
  2512  		lsym := v.Aux.(*obj.LSym)
  2513  		if lsym.Extra == nil {
  2514  			return false
  2515  		}
  2516  		if ti, ok := (*lsym.Extra).(*obj.TypeInfo); ok {
  2517  			return types.IsDirectIface(ti.Type.(*types.Type))
  2518  		}
  2519  	}
  2520  	return false
  2521  }
  2522  
  2523  // v is an empty interface
  2524  func isDirectType2(v *Value) bool {
  2525  	switch v.Op {
  2526  	case OpIMake:
  2527  		return isDirectType1(v.Args[0])
  2528  	}
  2529  	return false
  2530  }
  2531  
  2532  // isDirectIface reports whether v represents an itab
  2533  // (a *runtime._itab) for a type whose value is stored directly
  2534  // in an interface (i.e., is pointer or pointer-like).
  2535  func isDirectIface(v *Value) bool {
  2536  	return isDirectIface1(v, 9)
  2537  }
  2538  
  2539  // v is an itab
  2540  func isDirectIface1(v *Value, depth int) bool {
  2541  	if depth == 0 {
  2542  		return false
  2543  	}
  2544  	switch v.Op {
  2545  	case OpITab:
  2546  		return isDirectIface2(v.Args[0], depth-1)
  2547  	case OpAddr:
  2548  		lsym := v.Aux.(*obj.LSym)
  2549  		if lsym.Extra == nil {
  2550  			return false
  2551  		}
  2552  		if ii, ok := (*lsym.Extra).(*obj.ItabInfo); ok {
  2553  			return types.IsDirectIface(ii.Type.(*types.Type))
  2554  		}
  2555  	case OpConstNil:
  2556  		// We can treat this as direct, because if the itab is
  2557  		// nil, the data field must be nil also.
  2558  		return true
  2559  	}
  2560  	return false
  2561  }
  2562  
  2563  // v is an interface
  2564  func isDirectIface2(v *Value, depth int) bool {
  2565  	if depth == 0 {
  2566  		return false
  2567  	}
  2568  	switch v.Op {
  2569  	case OpIMake:
  2570  		return isDirectIface1(v.Args[0], depth-1)
  2571  	case OpPhi:
  2572  		for _, a := range v.Args {
  2573  			if !isDirectIface2(a, depth-1) {
  2574  				return false
  2575  			}
  2576  		}
  2577  		return true
  2578  	}
  2579  	return false
  2580  }
  2581  
  2582  func bitsAdd64(x, y, carry int64) (r struct{ sum, carry int64 }) {
  2583  	s, c := bits.Add64(uint64(x), uint64(y), uint64(carry))
  2584  	r.sum, r.carry = int64(s), int64(c)
  2585  	return
  2586  }
  2587  

View as plain text