// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

// TODO: live at start of block instead?

package ssa

import (
	"cmd/compile/internal/ir"
	"cmd/compile/internal/types"
	"cmd/internal/src"
	"fmt"
)

type stackAllocState struct {
	f *Func

	// live is the output of stackalloc.
	// live[b.id] = live values at the end of block b.
	live [][]ID

	// The following slices are reused across multiple users
	// of stackAllocState.
	values    []stackValState
	interfere [][]ID // interfere[v.id] = values that interfere with v.
	names     []LocalSlot

	nArgSlot, // Number of Values sourced to arg slot
	nNotNeed, // Number of Values not needing a stack slot
	nNamedSlot, // Number of Values using a named stack slot
	nReuse, // Number of values reusing a stack slot
	nAuto, // Number of autos allocated for stack slots.
	nSelfInterfere int32 // Number of self-interferences
}

func newStackAllocState(f *Func) *stackAllocState {
	s := f.Cache.stackAllocState
	if s == nil {
		return new(stackAllocState)
	}
	if s.f != nil {
		f.fe.Fatalf(src.NoXPos, "newStackAllocState called without previous free")
	}
	return s
}

func putStackAllocState(s *stackAllocState) {
	clear(s.values)
	clear(s.interfere)
	clear(s.names)
	s.f.Cache.stackAllocState = s
	s.f = nil
	s.live = nil
	s.nArgSlot, s.nNotNeed, s.nNamedSlot, s.nReuse, s.nAuto, s.nSelfInterfere = 0, 0, 0, 0, 0, 0
}

type stackValState struct {
	typ       *types.Type
	spill     *Value
	needSlot  bool
	isArg     bool
	defBlock  ID
	useBlocks []stackUseBlock
}

// addUseBlock adds a block to the set of blocks that uses this value.
// Note that we only loosely enforce the set property by checking the last
// block that was appended to the list and duplicates may occur.
// Because we add values block by block (barring phi-nodes), the number of duplicates is
// small and we deduplicate as part of the liveness algorithm later anyway.
func (sv *stackValState) addUseBlock(b *Block, liveout bool) {
	entry := stackUseBlock{
		b:       b,
		liveout: liveout,
	}
	if sv.useBlocks == nil || sv.useBlocks[len(sv.useBlocks)-1] != entry {
		sv.useBlocks = append(sv.useBlocks, stackUseBlock{
			b:       b,
			liveout: liveout,
		})
	}
}

type stackUseBlock struct {
	b       *Block
	liveout bool
}

// stackalloc allocates storage in the stack frame for
// all Values that did not get a register.
// Returns a map from block ID to the stack values live at the end of that block.
func stackalloc(f *Func, spillLive [][]ID) [][]ID {
	if f.pass.debug > stackDebug {
		fmt.Println("before stackalloc")
		fmt.Println(f.String())
	}
	s := newStackAllocState(f)
	s.init(f, spillLive)
	defer putStackAllocState(s)

	s.stackalloc()
	if f.pass.stats > 0 {
		f.LogStat("stack_alloc_stats",
			s.nArgSlot, "arg_slots", s.nNotNeed, "slot_not_needed",
			s.nNamedSlot, "named_slots", s.nAuto, "auto_slots",
			s.nReuse, "reused_slots", s.nSelfInterfere, "self_interfering")
	}

	return s.live
}

func (s *stackAllocState) init(f *Func, spillLive [][]ID) {
	s.f = f

	// Initialize value information.
	if n := f.NumValues(); cap(s.values) >= n {
		s.values = s.values[:n]
	} else {
		s.values = make([]stackValState, n)
	}
	for _, b := range f.Blocks {
		for _, v := range b.Values {
			s.values[v.ID].typ = v.Type
			s.values[v.ID].needSlot = !v.Type.IsMemory() && !v.Type.IsVoid() && !v.Type.IsFlags() && f.getHome(v.ID) == nil && !v.rematerializeable() && !v.OnWasmStack
			s.values[v.ID].isArg = hasAnyArgOp(v)
			s.values[v.ID].defBlock = b.ID
			if f.pass.debug > stackDebug && s.values[v.ID].needSlot {
				fmt.Printf("%s needs a stack slot\n", v)
			}
			if v.Op == OpStoreReg {
				s.values[v.Args[0].ID].spill = v
			}
		}
	}

	// Compute liveness info for values needing a slot.
	s.computeLive(spillLive)

	// Build interference graph among values needing a slot.
	s.buildInterferenceGraph()
}

func (s *stackAllocState) stackalloc() {
	f := s.f

	// Build map from values to their names, if any.
	// A value may be associated with more than one name (e.g. after
	// the assignment i=j). This step picks one name per value arbitrarily.
	if n := f.NumValues(); cap(s.names) >= n {
		s.names = s.names[:n]
	} else {
		s.names = make([]LocalSlot, n)
	}
	names := s.names
	empty := LocalSlot{}
	for _, name := range f.Names {
		// Note: not "range f.NamedValues" above, because
		// that would be nondeterministic.
		for _, v := range f.NamedValues[*name] {
			if v.Op == OpArgIntReg || v.Op == OpArgFloatReg {
				aux := v.Aux.(*AuxNameOffset)
				// Never let an arg be bound to a differently named thing.
				if name.N != aux.Name || name.Off != aux.Offset {
					if f.pass.debug > stackDebug {
						fmt.Printf("stackalloc register arg %s skipping name %s\n", v, name)
					}
					continue
				}
			} else if name.N.Class == ir.PPARAM && v.Op != OpArg {
				// PPARAM's only bind to OpArg
				if f.pass.debug > stackDebug {
					fmt.Printf("stackalloc PPARAM name %s skipping non-Arg %s\n", name, v)
				}
				continue
			}

			if names[v.ID] == empty {
				if f.pass.debug > stackDebug {
					fmt.Printf("stackalloc value %s to name %s\n", v, *name)
				}
				names[v.ID] = *name
			}
		}
	}

	// Allocate args to their assigned locations.
	for _, v := range f.Entry.Values {
		if !hasAnyArgOp(v) {
			continue
		}
		if v.Aux == nil {
			f.Fatalf("%s has nil Aux\n", v.LongString())
		}
		if v.Op == OpArg {
			loc := LocalSlot{N: v.Aux.(*ir.Name), Type: v.Type, Off: v.AuxInt}
			if f.pass.debug > stackDebug {
				fmt.Printf("stackalloc OpArg %s to %s\n", v, loc)
			}
			f.setHome(v, loc)
			continue
		}
		// You might think this below would be the right idea, but you would be wrong.
		// It almost works; as of 105a6e9518 - 2021-04-23,
		// GOSSAHASH=11011011001011111 == cmd/compile/internal/noder.(*noder).embedded
		// is compiled incorrectly.  I believe the cause is one of those SSA-to-registers
		// puzzles that the register allocator untangles; in the event that a register
		// parameter does not end up bound to a name, "fixing" it is a bad idea.
		//
		//if f.DebugTest {
		//	if v.Op == OpArgIntReg || v.Op == OpArgFloatReg {
		//		aux := v.Aux.(*AuxNameOffset)
		//		loc := LocalSlot{N: aux.Name, Type: v.Type, Off: aux.Offset}
		//		if f.pass.debug > stackDebug {
		//			fmt.Printf("stackalloc Op%s %s to %s\n", v.Op, v, loc)
		//		}
		//		names[v.ID] = loc
		//		continue
		//	}
		//}

	}

	// For each type, we keep track of all the stack slots we
	// have allocated for that type. This map is keyed by
	// strings returned by types.LinkString. This guarantees
	// type equality, but also lets us match the same type represented
	// by two different types.Type structures. See issue 65783.
	locations := map[string][]LocalSlot{}

	// Each time we assign a stack slot to a value v, we remember
	// the slot we used via an index into locations[v.Type].
	slots := f.Cache.allocIntSlice(f.NumValues())
	defer f.Cache.freeIntSlice(slots)
	for i := range slots {
		slots[i] = -1
	}

	// Pick a stack slot for each value needing one.
	used := f.Cache.allocBoolSlice(f.NumValues())
	defer f.Cache.freeBoolSlice(used)
	for _, b := range f.Blocks {
		for _, v := range b.Values {
			if !s.values[v.ID].needSlot {
				s.nNotNeed++
				continue
			}
			if hasAnyArgOp(v) {
				s.nArgSlot++
				continue // already picked
			}

			// If this is a named value, try to use the name as
			// the spill location.
			var name LocalSlot
			if v.Op == OpStoreReg {
				name = names[v.Args[0].ID]
			} else {
				name = names[v.ID]
			}
			if name.N != nil && v.Type.Compare(name.Type) == types.CMPeq {
				for _, id := range s.interfere[v.ID] {
					h := f.getHome(id)
					if h != nil && h.(LocalSlot).N == name.N && h.(LocalSlot).Off == name.Off {
						// A variable can interfere with itself.
						// It is rare, but it can happen.
						s.nSelfInterfere++
						goto noname
					}
				}
				if f.pass.debug > stackDebug {
					fmt.Printf("stackalloc %s to %s\n", v, name)
				}
				s.nNamedSlot++
				f.setHome(v, name)
				continue
			}

		noname:
			// Set of stack slots we could reuse.
			typeKey := v.Type.LinkString()
			locs := locations[typeKey]
			// Mark all positions in locs used by interfering values.
			for i := 0; i < len(locs); i++ {
				used[i] = false
			}
			for _, xid := range s.interfere[v.ID] {
				slot := slots[xid]
				if slot >= 0 {
					used[slot] = true
				}
			}
			// Find an unused stack slot.
			var i int
			for i = 0; i < len(locs); i++ {
				if !used[i] {
					s.nReuse++
					break
				}
			}
			// If there is no unused stack slot, allocate a new one.
			if i == len(locs) {
				s.nAuto++
				locs = append(locs, LocalSlot{N: f.NewLocal(v.Pos, v.Type), Type: v.Type, Off: 0})
				locations[typeKey] = locs
			}
			// Use the stack variable at that index for v.
			loc := locs[i]
			if f.pass.debug > stackDebug {
				fmt.Printf("stackalloc %s to %s\n", v, loc)
			}
			f.setHome(v, loc)
			slots[v.ID] = i
		}
	}
}

// computeLive computes a map from block ID to a list of
// stack-slot-needing value IDs live at the end of that block.
func (s *stackAllocState) computeLive(spillLive [][]ID) {

	// Because values using stack slots are few and far inbetween
	// (compared to the set of all values), we use a path exploration
	// algorithm to calculate liveness here.
	f := s.f
	for _, b := range f.Blocks {
		for _, spillvid := range spillLive[b.ID] {
			val := &s.values[spillvid]
			val.addUseBlock(b, true)
		}
		for _, v := range b.Values {
			for i, a := range v.Args {
				val := &s.values[a.ID]
				useBlock := b
				forceLiveout := false
				if v.Op == OpPhi {
					useBlock = b.Preds[i].b
					forceLiveout = true
					if spill := val.spill; spill != nil {
						//TODO: remove?  Subsumed by SpillUse?
						s.values[spill.ID].addUseBlock(useBlock, true)
					}
				}
				if !val.needSlot {
					continue
				}
				val.addUseBlock(useBlock, forceLiveout)
			}
		}
	}

	s.live = make([][]ID, f.NumBlocks())
	push := func(bid, vid ID) {
		l := s.live[bid]
		if l == nil || l[len(l)-1] != vid {
			l = append(l, vid)
			s.live[bid] = l
		}
	}
	// TODO: If we can help along the interference graph by calculating livein sets,
	// we can do so trivially by turning this sparse set into an array of arrays
	// and checking the top for the current value instead of inclusion in the sparse set.
	seen := f.newSparseSet(f.NumBlocks())
	defer f.retSparseSet(seen)
	// instead of pruning out duplicate blocks when we build the useblocks slices
	// or when we add them to the queue, rely on the seen set to stop considering
	// them. This is slightly faster than building the workqueues as sets
	//
	// However, this means that the queue can grow larger than the number of blocks,
	// usually in very short functions. Returning a slice with values appended beyond the
	// original allocation can corrupt the allocator state, so cap the queue and return
	// the originally allocated slice regardless.
	allocedBqueue := f.Cache.allocBlockSlice(f.NumBlocks())
	defer f.Cache.freeBlockSlice(allocedBqueue)
	bqueue := allocedBqueue[:0:f.NumBlocks()]

	for vid, v := range s.values {
		if !v.needSlot {
			continue
		}
		seen.clear()
		bqueue = bqueue[:0]
		for _, b := range v.useBlocks {
			if b.liveout {
				push(b.b.ID, ID(vid))
			}
			bqueue = append(bqueue, b.b)
		}
		for len(bqueue) > 0 {
			work := bqueue[len(bqueue)-1]
			bqueue = bqueue[:len(bqueue)-1]
			if seen.contains(work.ID) || work.ID == v.defBlock {
				continue
			}
			seen.add(work.ID)
			for _, e := range work.Preds {
				push(e.b.ID, ID(vid))
				bqueue = append(bqueue, e.b)
			}
		}
	}

	if s.f.pass.debug > stackDebug {
		for _, b := range s.f.Blocks {
			fmt.Printf("stacklive %s %v\n", b, s.live[b.ID])
		}
	}
}

func (f *Func) getHome(vid ID) Location {
	if int(vid) >= len(f.RegAlloc) {
		return nil
	}
	return f.RegAlloc[vid]
}

func (f *Func) setHome(v *Value, loc Location) {
	for v.ID >= ID(len(f.RegAlloc)) {
		f.RegAlloc = append(f.RegAlloc, nil)
	}
	f.RegAlloc[v.ID] = loc
}

func (s *stackAllocState) buildInterferenceGraph() {
	f := s.f
	if n := f.NumValues(); cap(s.interfere) >= n {
		s.interfere = s.interfere[:n]
	} else {
		s.interfere = make([][]ID, n)
	}
	live := f.newSparseSet(f.NumValues())
	defer f.retSparseSet(live)
	for _, b := range f.Blocks {
		// Propagate liveness backwards to the start of the block.
		// Two values interfere if one is defined while the other is live.
		live.clear()
		live.addAll(s.live[b.ID])
		for i := len(b.Values) - 1; i >= 0; i-- {
			v := b.Values[i]
			if s.values[v.ID].needSlot {
				live.remove(v.ID)
				for _, id := range live.contents() {
					// Note: args can have different types and still interfere
					// (with each other or with other values). See issue 23522.
					if s.values[v.ID].typ.Compare(s.values[id].typ) == types.CMPeq || hasAnyArgOp(v) || s.values[id].isArg {
						s.interfere[v.ID] = append(s.interfere[v.ID], id)
						s.interfere[id] = append(s.interfere[id], v.ID)
					}
				}
			}
			for _, a := range v.Args {
				if s.values[a.ID].needSlot {
					live.add(a.ID)
				}
			}
			if hasAnyArgOp(v) && s.values[v.ID].needSlot {
				// OpArg is an input argument which is pre-spilled.
				// We add back v.ID here because we want this value
				// to appear live even before this point. Being live
				// all the way to the start of the entry block prevents other
				// values from being allocated to the same slot and clobbering
				// the input value before we have a chance to load it.

				// TODO(register args) this is apparently not wrong for register args -- is it necessary?
				live.add(v.ID)
			}
		}
	}
	if f.pass.debug > stackDebug {
		for vid, i := range s.interfere {
			if len(i) > 0 {
				fmt.Printf("v%d interferes with", vid)
				for _, x := range i {
					fmt.Printf(" v%d", x)
				}
				fmt.Println()
			}
		}
	}
}

func hasAnyArgOp(v *Value) bool {
	return v.Op == OpArg || v.Op == OpArgIntReg || v.Op == OpArgFloatReg
}