stackalloc.go

     1  // Copyright 2015 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // TODO: live at start of block instead?
     6  
     7  package ssa
     8  
     9  import (
    10  	"cmd/compile/internal/ir"
    11  	"cmd/compile/internal/types"
    12  	"cmd/internal/src"
    13  	"fmt"
    14  )
    15  
    16  type stackAllocState struct {
    17  	f *Func
    18  
    19  	// live is the output of stackalloc.
    20  	// live[b.id] = live values at the end of block b.
    21  	live [][]ID
    22  
    23  	// The following slices are reused across multiple users
    24  	// of stackAllocState.
    25  	values    []stackValState
    26  	interfere [][]ID // interfere[v.id] = values that interfere with v.
    27  	names     []LocalSlot
    28  
    29  	nArgSlot, // Number of Values sourced to arg slot
    30  	nNotNeed, // Number of Values not needing a stack slot
    31  	nNamedSlot, // Number of Values using a named stack slot
    32  	nReuse, // Number of values reusing a stack slot
    33  	nAuto, // Number of autos allocated for stack slots.
    34  	nSelfInterfere int32 // Number of self-interferences
    35  }
    36  
    37  func newStackAllocState(f *Func) *stackAllocState {
    38  	s := f.Cache.stackAllocState
    39  	if s == nil {
    40  		return new(stackAllocState)
    41  	}
    42  	if s.f != nil {
    43  		f.fe.Fatalf(src.NoXPos, "newStackAllocState called without previous free")
    44  	}
    45  	return s
    46  }
    47  
    48  func putStackAllocState(s *stackAllocState) {
    49  	clear(s.values)
    50  	clear(s.interfere)
    51  	clear(s.names)
    52  	s.f.Cache.stackAllocState = s
    53  	s.f = nil
    54  	s.live = nil
    55  	s.nArgSlot, s.nNotNeed, s.nNamedSlot, s.nReuse, s.nAuto, s.nSelfInterfere = 0, 0, 0, 0, 0, 0
    56  }
    57  
    58  type stackValState struct {
    59  	typ       *types.Type
    60  	spill     *Value
    61  	needSlot  bool
    62  	isArg     bool
    63  	defBlock  ID
    64  	useBlocks []stackUseBlock
    65  }
    66  
    67  // addUseBlock adds a block to the set of blocks that uses this value.
    68  // Note that we only loosely enforce the set property by checking the last
    69  // block that was appended to the list and duplicates may occur.
    70  // Because we add values block by block (barring phi-nodes), the number of duplicates is
    71  // small and we deduplicate as part of the liveness algorithm later anyway.
    72  func (sv *stackValState) addUseBlock(b *Block, liveout bool) {
    73  	entry := stackUseBlock{
    74  		b:       b,
    75  		liveout: liveout,
    76  	}
    77  	if sv.useBlocks == nil || sv.useBlocks[len(sv.useBlocks)-1] != entry {
    78  		sv.useBlocks = append(sv.useBlocks, stackUseBlock{
    79  			b:       b,
    80  			liveout: liveout,
    81  		})
    82  	}
    83  }
    84  
    85  type stackUseBlock struct {
    86  	b       *Block
    87  	liveout bool
    88  }
    89  
    90  // stackalloc allocates storage in the stack frame for
    91  // all Values that did not get a register.
    92  // Returns a map from block ID to the stack values live at the end of that block.
    93  func stackalloc(f *Func, spillLive [][]ID) [][]ID {
    94  	if f.pass.debug > stackDebug {
    95  		fmt.Println("before stackalloc")
    96  		fmt.Println(f.String())
    97  	}
    98  	s := newStackAllocState(f)
    99  	s.init(f, spillLive)
   100  	defer putStackAllocState(s)
   101  
   102  	s.stackalloc()
   103  	if f.pass.stats > 0 {
   104  		f.LogStat("stack_alloc_stats",
   105  			s.nArgSlot, "arg_slots", s.nNotNeed, "slot_not_needed",
   106  			s.nNamedSlot, "named_slots", s.nAuto, "auto_slots",
   107  			s.nReuse, "reused_slots", s.nSelfInterfere, "self_interfering")
   108  	}
   109  
   110  	return s.live
   111  }
   112  
   113  func (s *stackAllocState) init(f *Func, spillLive [][]ID) {
   114  	s.f = f
   115  
   116  	// Initialize value information.
   117  	if n := f.NumValues(); cap(s.values) >= n {
   118  		s.values = s.values[:n]
   119  	} else {
   120  		s.values = make([]stackValState, n)
   121  	}
   122  	for _, b := range f.Blocks {
   123  		for _, v := range b.Values {
   124  			s.values[v.ID].typ = v.Type
   125  			s.values[v.ID].needSlot = !v.Type.IsMemory() && !v.Type.IsVoid() && !v.Type.IsFlags() && f.getHome(v.ID) == nil && !v.rematerializeable() && !v.OnWasmStack
   126  			s.values[v.ID].isArg = hasAnyArgOp(v)
   127  			s.values[v.ID].defBlock = b.ID
   128  			if f.pass.debug > stackDebug && s.values[v.ID].needSlot {
   129  				fmt.Printf("%s needs a stack slot\n", v)
   130  			}
   131  			if v.Op == OpStoreReg {
   132  				s.values[v.Args[0].ID].spill = v
   133  			}
   134  		}
   135  	}
   136  
   137  	// Compute liveness info for values needing a slot.
   138  	s.computeLive(spillLive)
   139  
   140  	// Build interference graph among values needing a slot.
   141  	s.buildInterferenceGraph()
   142  }
   143  
   144  func (s *stackAllocState) stackalloc() {
   145  	f := s.f
   146  
   147  	// Build map from values to their names, if any.
   148  	// A value may be associated with more than one name (e.g. after
   149  	// the assignment i=j). This step picks one name per value arbitrarily.
   150  	if n := f.NumValues(); cap(s.names) >= n {
   151  		s.names = s.names[:n]
   152  	} else {
   153  		s.names = make([]LocalSlot, n)
   154  	}
   155  	names := s.names
   156  	empty := LocalSlot{}
   157  	for _, name := range f.Names {
   158  		// Note: not "range f.NamedValues" above, because
   159  		// that would be nondeterministic.
   160  		for _, v := range f.NamedValues[*name] {
   161  			if v.Op == OpArgIntReg || v.Op == OpArgFloatReg {
   162  				aux := v.Aux.(*AuxNameOffset)
   163  				// Never let an arg be bound to a differently named thing.
   164  				if name.N != aux.Name || name.Off != aux.Offset {
   165  					if f.pass.debug > stackDebug {
   166  						fmt.Printf("stackalloc register arg %s skipping name %s\n", v, name)
   167  					}
   168  					continue
   169  				}
   170  			} else if name.N.Class == ir.PPARAM && v.Op != OpArg {
   171  				// PPARAM's only bind to OpArg
   172  				if f.pass.debug > stackDebug {
   173  					fmt.Printf("stackalloc PPARAM name %s skipping non-Arg %s\n", name, v)
   174  				}
   175  				continue
   176  			}
   177  
   178  			if names[v.ID] == empty {
   179  				if f.pass.debug > stackDebug {
   180  					fmt.Printf("stackalloc value %s to name %s\n", v, *name)
   181  				}
   182  				names[v.ID] = *name
   183  			}
   184  		}
   185  	}
   186  
   187  	// Allocate args to their assigned locations.
   188  	for _, v := range f.Entry.Values {
   189  		if !hasAnyArgOp(v) {
   190  			continue
   191  		}
   192  		if v.Aux == nil {
   193  			f.Fatalf("%s has nil Aux\n", v.LongString())
   194  		}
   195  		if v.Op == OpArg {
   196  			loc := LocalSlot{N: v.Aux.(*ir.Name), Type: v.Type, Off: v.AuxInt}
   197  			if f.pass.debug > stackDebug {
   198  				fmt.Printf("stackalloc OpArg %s to %s\n", v, loc)
   199  			}
   200  			f.setHome(v, loc)
   201  			continue
   202  		}
   203  		// You might think this below would be the right idea, but you would be wrong.
   204  		// It almost works; as of 105a6e9518 - 2021-04-23,
   205  		// GOSSAHASH=11011011001011111 == cmd/compile/internal/noder.(*noder).embedded
   206  		// is compiled incorrectly.  I believe the cause is one of those SSA-to-registers
   207  		// puzzles that the register allocator untangles; in the event that a register
   208  		// parameter does not end up bound to a name, "fixing" it is a bad idea.
   209  		//
   210  		//if f.DebugTest {
   211  		//	if v.Op == OpArgIntReg || v.Op == OpArgFloatReg {
   212  		//		aux := v.Aux.(*AuxNameOffset)
   213  		//		loc := LocalSlot{N: aux.Name, Type: v.Type, Off: aux.Offset}
   214  		//		if f.pass.debug > stackDebug {
   215  		//			fmt.Printf("stackalloc Op%s %s to %s\n", v.Op, v, loc)
   216  		//		}
   217  		//		names[v.ID] = loc
   218  		//		continue
   219  		//	}
   220  		//}
   221  
   222  	}
   223  
   224  	// For each type, we keep track of all the stack slots we
   225  	// have allocated for that type. This map is keyed by
   226  	// strings returned by types.LinkString. This guarantees
   227  	// type equality, but also lets us match the same type represented
   228  	// by two different types.Type structures. See issue 65783.
   229  	locations := map[string][]LocalSlot{}
   230  
   231  	// Each time we assign a stack slot to a value v, we remember
   232  	// the slot we used via an index into locations[v.Type].
   233  	slots := f.Cache.allocIntSlice(f.NumValues())
   234  	defer f.Cache.freeIntSlice(slots)
   235  	for i := range slots {
   236  		slots[i] = -1
   237  	}
   238  
   239  	// Pick a stack slot for each value needing one.
   240  	used := f.Cache.allocBoolSlice(f.NumValues())
   241  	defer f.Cache.freeBoolSlice(used)
   242  	for _, b := range f.Blocks {
   243  		for _, v := range b.Values {
   244  			if !s.values[v.ID].needSlot {
   245  				s.nNotNeed++
   246  				continue
   247  			}
   248  			if hasAnyArgOp(v) {
   249  				s.nArgSlot++
   250  				continue // already picked
   251  			}
   252  
   253  			// If this is a named value, try to use the name as
   254  			// the spill location.
   255  			var name LocalSlot
   256  			if v.Op == OpStoreReg {
   257  				name = names[v.Args[0].ID]
   258  			} else {
   259  				name = names[v.ID]
   260  			}
   261  			if name.N != nil && v.Type.Compare(name.Type) == types.CMPeq {
   262  				for _, id := range s.interfere[v.ID] {
   263  					h := f.getHome(id)
   264  					if h != nil && h.(LocalSlot).N == name.N && h.(LocalSlot).Off == name.Off {
   265  						// A variable can interfere with itself.
   266  						// It is rare, but it can happen.
   267  						s.nSelfInterfere++
   268  						goto noname
   269  					}
   270  				}
   271  				if f.pass.debug > stackDebug {
   272  					fmt.Printf("stackalloc %s to %s\n", v, name)
   273  				}
   274  				s.nNamedSlot++
   275  				f.setHome(v, name)
   276  				continue
   277  			}
   278  
   279  		noname:
   280  			// Set of stack slots we could reuse.
   281  			typeKey := v.Type.LinkString()
   282  			locs := locations[typeKey]
   283  			// Mark all positions in locs used by interfering values.
   284  			for i := 0; i < len(locs); i++ {
   285  				used[i] = false
   286  			}
   287  			for _, xid := range s.interfere[v.ID] {
   288  				slot := slots[xid]
   289  				if slot >= 0 {
   290  					used[slot] = true
   291  				}
   292  			}
   293  			// Find an unused stack slot.
   294  			var i int
   295  			for i = 0; i < len(locs); i++ {
   296  				if !used[i] {
   297  					s.nReuse++
   298  					break
   299  				}
   300  			}
   301  			// If there is no unused stack slot, allocate a new one.
   302  			if i == len(locs) {
   303  				s.nAuto++
   304  				locs = append(locs, LocalSlot{N: f.NewLocal(v.Pos, v.Type), Type: v.Type, Off: 0})
   305  				locations[typeKey] = locs
   306  			}
   307  			// Use the stack variable at that index for v.
   308  			loc := locs[i]
   309  			if f.pass.debug > stackDebug {
   310  				fmt.Printf("stackalloc %s to %s\n", v, loc)
   311  			}
   312  			f.setHome(v, loc)
   313  			slots[v.ID] = i
   314  		}
   315  	}
   316  }
   317  
   318  // computeLive computes a map from block ID to a list of
   319  // stack-slot-needing value IDs live at the end of that block.
   320  func (s *stackAllocState) computeLive(spillLive [][]ID) {
   321  
   322  	// Because values using stack slots are few and far inbetween
   323  	// (compared to the set of all values), we use a path exploration
   324  	// algorithm to calculate liveness here.
   325  	f := s.f
   326  	for _, b := range f.Blocks {
   327  		for _, spillvid := range spillLive[b.ID] {
   328  			val := &s.values[spillvid]
   329  			val.addUseBlock(b, true)
   330  		}
   331  		for _, v := range b.Values {
   332  			for i, a := range v.Args {
   333  				val := &s.values[a.ID]
   334  				useBlock := b
   335  				forceLiveout := false
   336  				if v.Op == OpPhi {
   337  					useBlock = b.Preds[i].b
   338  					forceLiveout = true
   339  					if spill := val.spill; spill != nil {
   340  						//TODO: remove?  Subsumed by SpillUse?
   341  						s.values[spill.ID].addUseBlock(useBlock, true)
   342  					}
   343  				}
   344  				if !val.needSlot {
   345  					continue
   346  				}
   347  				val.addUseBlock(useBlock, forceLiveout)
   348  			}
   349  		}
   350  	}
   351  
   352  	s.live = make([][]ID, f.NumBlocks())
   353  	push := func(bid, vid ID) {
   354  		l := s.live[bid]
   355  		if l == nil || l[len(l)-1] != vid {
   356  			l = append(l, vid)
   357  			s.live[bid] = l
   358  		}
   359  	}
   360  	// TODO: If we can help along the interference graph by calculating livein sets,
   361  	// we can do so trivially by turning this sparse set into an array of arrays
   362  	// and checking the top for the current value instead of inclusion in the sparse set.
   363  	seen := f.newSparseSet(f.NumBlocks())
   364  	defer f.retSparseSet(seen)
   365  	// instead of pruning out duplicate blocks when we build the useblocks slices
   366  	// or when we add them to the queue, rely on the seen set to stop considering
   367  	// them. This is slightly faster than building the workqueues as sets
   368  	//
   369  	// However, this means that the queue can grow larger than the number of blocks,
   370  	// usually in very short functions. Returning a slice with values appended beyond the
   371  	// original allocation can corrupt the allocator state, so cap the queue and return
   372  	// the originally allocated slice regardless.
   373  	allocedBqueue := f.Cache.allocBlockSlice(f.NumBlocks())
   374  	defer f.Cache.freeBlockSlice(allocedBqueue)
   375  	bqueue := allocedBqueue[:0:f.NumBlocks()]
   376  
   377  	for vid, v := range s.values {
   378  		if !v.needSlot {
   379  			continue
   380  		}
   381  		seen.clear()
   382  		bqueue = bqueue[:0]
   383  		for _, b := range v.useBlocks {
   384  			if b.liveout {
   385  				push(b.b.ID, ID(vid))
   386  			}
   387  			bqueue = append(bqueue, b.b)
   388  		}
   389  		for len(bqueue) > 0 {
   390  			work := bqueue[len(bqueue)-1]
   391  			bqueue = bqueue[:len(bqueue)-1]
   392  			if seen.contains(work.ID) || work.ID == v.defBlock {
   393  				continue
   394  			}
   395  			seen.add(work.ID)
   396  			for _, e := range work.Preds {
   397  				push(e.b.ID, ID(vid))
   398  				bqueue = append(bqueue, e.b)
   399  			}
   400  		}
   401  	}
   402  
   403  	if s.f.pass.debug > stackDebug {
   404  		for _, b := range s.f.Blocks {
   405  			fmt.Printf("stacklive %s %v\n", b, s.live[b.ID])
   406  		}
   407  	}
   408  }
   409  
   410  func (f *Func) getHome(vid ID) Location {
   411  	if int(vid) >= len(f.RegAlloc) {
   412  		return nil
   413  	}
   414  	return f.RegAlloc[vid]
   415  }
   416  
   417  func (f *Func) setHome(v *Value, loc Location) {
   418  	for v.ID >= ID(len(f.RegAlloc)) {
   419  		f.RegAlloc = append(f.RegAlloc, nil)
   420  	}
   421  	f.RegAlloc[v.ID] = loc
   422  }
   423  
   424  func (s *stackAllocState) buildInterferenceGraph() {
   425  	f := s.f
   426  	if n := f.NumValues(); cap(s.interfere) >= n {
   427  		s.interfere = s.interfere[:n]
   428  	} else {
   429  		s.interfere = make([][]ID, n)
   430  	}
   431  	live := f.newSparseSet(f.NumValues())
   432  	defer f.retSparseSet(live)
   433  	for _, b := range f.Blocks {
   434  		// Propagate liveness backwards to the start of the block.
   435  		// Two values interfere if one is defined while the other is live.
   436  		live.clear()
   437  		live.addAll(s.live[b.ID])
   438  		for i := len(b.Values) - 1; i >= 0; i-- {
   439  			v := b.Values[i]
   440  			if s.values[v.ID].needSlot {
   441  				live.remove(v.ID)
   442  				for _, id := range live.contents() {
   443  					// Note: args can have different types and still interfere
   444  					// (with each other or with other values). See issue 23522.
   445  					if s.values[v.ID].typ.Compare(s.values[id].typ) == types.CMPeq || hasAnyArgOp(v) || s.values[id].isArg {
   446  						s.interfere[v.ID] = append(s.interfere[v.ID], id)
   447  						s.interfere[id] = append(s.interfere[id], v.ID)
   448  					}
   449  				}
   450  			}
   451  			for _, a := range v.Args {
   452  				if s.values[a.ID].needSlot {
   453  					live.add(a.ID)
   454  				}
   455  			}
   456  			if hasAnyArgOp(v) && s.values[v.ID].needSlot {
   457  				// OpArg is an input argument which is pre-spilled.
   458  				// We add back v.ID here because we want this value
   459  				// to appear live even before this point. Being live
   460  				// all the way to the start of the entry block prevents other
   461  				// values from being allocated to the same slot and clobbering
   462  				// the input value before we have a chance to load it.
   463  
   464  				// TODO(register args) this is apparently not wrong for register args -- is it necessary?
   465  				live.add(v.ID)
   466  			}
   467  		}
   468  	}
   469  	if f.pass.debug > stackDebug {
   470  		for vid, i := range s.interfere {
   471  			if len(i) > 0 {
   472  				fmt.Printf("v%d interferes with", vid)
   473  				for _, x := range i {
   474  					fmt.Printf(" v%d", x)
   475  				}
   476  				fmt.Println()
   477  			}
   478  		}
   479  	}
   480  }
   481  
   482  func hasAnyArgOp(v *Value) bool {
   483  	return v.Op == OpArg || v.Op == OpArgIntReg || v.Op == OpArgFloatReg
   484  }
   485
View as plain text