1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114 package ssa
115
116 import (
117 "cmd/compile/internal/base"
118 "cmd/compile/internal/ir"
119 "cmd/compile/internal/types"
120 "cmd/internal/src"
121 "cmd/internal/sys"
122 "cmp"
123 "fmt"
124 "internal/buildcfg"
125 "math"
126 "math/bits"
127 "slices"
128 "unsafe"
129 )
130
131 const (
132 moveSpills = iota
133 logSpills
134 regDebug
135 stackDebug
136 )
137
138
139
140 const (
141 likelyDistance = 1
142 normalDistance = 10
143 unlikelyDistance = 100
144 )
145
146
147
148 func regalloc(f *Func) {
149 var s regAllocState
150 s.init(f)
151 s.regalloc(f)
152 s.close()
153 }
154
155 type register uint8
156
157 const noRegister register = 255
158
159
160 var noRegisters [32]register = [32]register{
161 noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister,
162 noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister,
163 noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister,
164 noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister,
165 }
166
167
168
169 type regMask uint64
170
171 func (m regMask) String() string {
172 s := ""
173 for r := register(0); m != 0; r++ {
174 if m>>r&1 == 0 {
175 continue
176 }
177 m &^= regMask(1) << r
178 if s != "" {
179 s += " "
180 }
181 s += fmt.Sprintf("r%d", r)
182 }
183 return s
184 }
185
186 func (m regMask) contains(r register) bool {
187 return m>>r&1 != 0
188 }
189
190 func (s *regAllocState) RegMaskString(m regMask) string {
191 str := ""
192 for r := register(0); m != 0; r++ {
193 if m>>r&1 == 0 {
194 continue
195 }
196 m &^= regMask(1) << r
197 if str != "" {
198 str += " "
199 }
200 str += s.registers[r].String()
201 }
202 return str
203 }
204
205
206 func countRegs(r regMask) int {
207 return bits.OnesCount64(uint64(r))
208 }
209
210
211 func pickReg(r regMask) register {
212 if r == 0 {
213 panic("can't pick a register from an empty set")
214 }
215
216 return register(bits.TrailingZeros64(uint64(r)))
217 }
218
219 type use struct {
220
221
222
223
224
225 dist int32
226 pos src.XPos
227 next *use
228 }
229
230
231 type valState struct {
232 regs regMask
233 uses *use
234 spill *Value
235 restoreMin int32
236 restoreMax int32
237 needReg bool
238 rematerializeable bool
239 }
240
241 type regState struct {
242 v *Value
243 c *Value
244
245 }
246
247 type regAllocState struct {
248 f *Func
249
250 sdom SparseTree
251 registers []Register
252 numRegs register
253 SPReg register
254 SBReg register
255 GReg register
256 ZeroIntReg register
257 allocatable regMask
258
259
260
261
262 live [][]liveInfo
263
264
265
266
267 desired []desiredState
268
269
270 values []valState
271
272
273 sp, sb ID
274
275
276
277 orig []*Value
278
279
280
281 regs []regState
282
283
284 nospill regMask
285
286
287 used regMask
288
289
290 usedSinceBlockStart regMask
291
292
293 tmpused regMask
294
295
296 curBlock *Block
297
298
299 freeUseRecords *use
300
301
302
303 endRegs [][]endReg
304
305
306
307 startRegs [][]startReg
308
309
310
311
312 startRegsMask regMask
313
314
315 spillLive [][]ID
316
317
318
319 copies map[*Value]bool
320
321 loopnest *loopnest
322
323
324 visitOrder []*Block
325
326
327 blockOrder []int32
328
329
330 doClobber bool
331
332
333
334
335
336
337 nextCall []int32
338
339
340
341 curIdx int
342 }
343
344 type endReg struct {
345 r register
346 v *Value
347 c *Value
348 }
349
350 type startReg struct {
351 r register
352 v *Value
353 c *Value
354 pos src.XPos
355 }
356
357
358 func (s *regAllocState) freeReg(r register) {
359 if !s.allocatable.contains(r) && !s.isGReg(r) {
360 return
361 }
362 v := s.regs[r].v
363 if v == nil {
364 s.f.Fatalf("tried to free an already free register %d\n", r)
365 }
366
367
368 if s.f.pass.debug > regDebug {
369 fmt.Printf("freeReg %s (dump %s/%s)\n", &s.registers[r], v, s.regs[r].c)
370 }
371 s.regs[r] = regState{}
372 s.values[v.ID].regs &^= regMask(1) << r
373 s.used &^= regMask(1) << r
374 }
375
376
377 func (s *regAllocState) freeRegs(m regMask) {
378 for m&s.used != 0 {
379 s.freeReg(pickReg(m & s.used))
380 }
381 }
382
383
384 func (s *regAllocState) clobberRegs(m regMask) {
385 m &= s.allocatable & s.f.Config.gpRegMask
386 for m != 0 {
387 r := pickReg(m)
388 m &^= 1 << r
389 x := s.curBlock.NewValue0(src.NoXPos, OpClobberReg, types.TypeVoid)
390 s.f.setHome(x, &s.registers[r])
391 }
392 }
393
394
395
396 func (s *regAllocState) setOrig(c *Value, v *Value) {
397 if int(c.ID) >= cap(s.orig) {
398 x := s.f.Cache.allocValueSlice(int(c.ID) + 1)
399 copy(x, s.orig)
400 s.f.Cache.freeValueSlice(s.orig)
401 s.orig = x
402 }
403 for int(c.ID) >= len(s.orig) {
404 s.orig = append(s.orig, nil)
405 }
406 if s.orig[c.ID] != nil {
407 s.f.Fatalf("orig value set twice %s %s", c, v)
408 }
409 s.orig[c.ID] = s.orig[v.ID]
410 }
411
412
413
414 func (s *regAllocState) assignReg(r register, v *Value, c *Value) {
415 if s.f.pass.debug > regDebug {
416 fmt.Printf("assignReg %s %s/%s\n", &s.registers[r], v, c)
417 }
418
419 s.values[v.ID].regs |= regMask(1) << r
420 s.f.setHome(c, &s.registers[r])
421
422
423 if !s.allocatable.contains(r) && !s.isGReg(r) {
424 return
425 }
426 if s.regs[r].v != nil {
427 s.f.Fatalf("tried to assign register %d to %s/%s but it is already used by %s", r, v, c, s.regs[r].v)
428 }
429 s.regs[r] = regState{v, c}
430 s.used |= regMask(1) << r
431 }
432
433
434
435
436 func (s *regAllocState) allocReg(mask regMask, v *Value) register {
437 if v.OnWasmStack {
438 return noRegister
439 }
440
441 mask &= s.allocatable
442 mask &^= s.nospill
443 if mask == 0 {
444 s.f.Fatalf("no register available for %s", v.LongString())
445 }
446
447
448 if mask&^s.used != 0 {
449 r := pickReg(mask &^ s.used)
450 s.usedSinceBlockStart |= regMask(1) << r
451 return r
452 }
453
454
455
456
457
458
459
460
461
462
463
464 var r register
465 maxuse := int32(-1)
466 for t := register(0); t < s.numRegs; t++ {
467 if mask>>t&1 == 0 {
468 continue
469 }
470 v := s.regs[t].v
471 if n := s.values[v.ID].uses.dist; n > maxuse {
472
473
474 r = t
475 maxuse = n
476 }
477 }
478 if maxuse == -1 {
479 s.f.Fatalf("couldn't find register to spill")
480 }
481
482 if s.f.Config.ctxt.Arch.Arch == sys.ArchWasm {
483
484
485
486 s.freeReg(r)
487 return r
488 }
489
490
491
492 v2 := s.regs[r].v
493 m := s.compatRegs(v2.Type) &^ s.used &^ s.tmpused &^ (regMask(1) << r)
494 if m != 0 && !s.values[v2.ID].rematerializeable && countRegs(s.values[v2.ID].regs) == 1 {
495 s.usedSinceBlockStart |= regMask(1) << r
496 r2 := pickReg(m)
497 c := s.curBlock.NewValue1(v2.Pos, OpCopy, v2.Type, s.regs[r].c)
498 s.copies[c] = false
499 if s.f.pass.debug > regDebug {
500 fmt.Printf("copy %s to %s : %s\n", v2, c, &s.registers[r2])
501 }
502 s.setOrig(c, v2)
503 s.assignReg(r2, v2, c)
504 }
505
506
507
508
509 if s.usedSinceBlockStart&(regMask(1)<<r) == 0 {
510 if s.startRegsMask&(regMask(1)<<r) == 1 {
511 if s.f.pass.debug > regDebug {
512 fmt.Printf("dropped from startRegs: %s\n", &s.registers[r])
513 }
514 s.startRegsMask &^= regMask(1) << r
515 }
516 }
517
518 s.freeReg(r)
519 s.usedSinceBlockStart |= regMask(1) << r
520 return r
521 }
522
523
524
525 func (s *regAllocState) makeSpill(v *Value, b *Block) *Value {
526 vi := &s.values[v.ID]
527 if vi.spill != nil {
528
529 vi.restoreMin = min(vi.restoreMin, s.sdom[b.ID].entry)
530 vi.restoreMax = max(vi.restoreMax, s.sdom[b.ID].exit)
531 return vi.spill
532 }
533
534
535 spill := s.f.newValueNoBlock(OpStoreReg, v.Type, v.Pos)
536
537
538 s.setOrig(spill, v)
539 vi.spill = spill
540 vi.restoreMin = s.sdom[b.ID].entry
541 vi.restoreMax = s.sdom[b.ID].exit
542 return spill
543 }
544
545
546
547
548
549
550
551 func (s *regAllocState) allocValToReg(v *Value, mask regMask, nospill bool, pos src.XPos) *Value {
552 if s.f.Config.ctxt.Arch.Arch == sys.ArchWasm && v.rematerializeable() {
553 c := v.copyIntoWithXPos(s.curBlock, pos)
554 c.OnWasmStack = true
555 s.setOrig(c, v)
556 return c
557 }
558 if v.OnWasmStack {
559 return v
560 }
561
562 vi := &s.values[v.ID]
563 pos = pos.WithNotStmt()
564
565 if mask&vi.regs != 0 {
566 mask &= vi.regs
567 r := pickReg(mask)
568 if mask.contains(s.SPReg) {
569
570
571
572 r = s.SPReg
573 }
574 if !s.allocatable.contains(r) {
575 return v
576 }
577 if s.regs[r].v != v || s.regs[r].c == nil {
578 panic("bad register state")
579 }
580 if nospill {
581 s.nospill |= regMask(1) << r
582 }
583 s.usedSinceBlockStart |= regMask(1) << r
584 return s.regs[r].c
585 }
586
587 var r register
588
589 onWasmStack := nospill && s.f.Config.ctxt.Arch.Arch == sys.ArchWasm
590 if !onWasmStack {
591
592 r = s.allocReg(mask, v)
593 }
594
595
596 var c *Value
597 if vi.regs != 0 {
598
599 r2 := pickReg(vi.regs)
600 var current *Value
601 if !s.allocatable.contains(r2) {
602 current = v
603 } else {
604 if s.regs[r2].v != v {
605 panic("bad register state")
606 }
607 current = s.regs[r2].c
608 }
609 s.usedSinceBlockStart |= regMask(1) << r2
610 c = s.curBlock.NewValue1(pos, OpCopy, v.Type, current)
611 } else if v.rematerializeable() {
612
613 c = v.copyIntoWithXPos(s.curBlock, pos)
614
615
616
617
618
619
620
621 sourceMask := s.regspec(c).outputs[0].regs
622 if mask&sourceMask == 0 && !onWasmStack {
623 s.setOrig(c, v)
624 s.assignReg(s.allocReg(sourceMask, v), v, c)
625
626
627
628
629
630
631
632
633
634
635 c = s.curBlock.NewValue1(pos, OpCopy, v.Type, c)
636 }
637 } else {
638
639 spill := s.makeSpill(v, s.curBlock)
640 if s.f.pass.debug > logSpills {
641 s.f.Warnl(vi.spill.Pos, "load spill for %v from %v", v, spill)
642 }
643 c = s.curBlock.NewValue1(pos, OpLoadReg, v.Type, spill)
644 }
645
646 s.setOrig(c, v)
647
648 if onWasmStack {
649 c.OnWasmStack = true
650 return c
651 }
652
653 s.assignReg(r, v, c)
654 if c.Op == OpLoadReg && s.isGReg(r) {
655 s.f.Fatalf("allocValToReg.OpLoadReg targeting g: " + c.LongString())
656 }
657 if nospill {
658 s.nospill |= regMask(1) << r
659 }
660 return c
661 }
662
663
664 func isLeaf(f *Func) bool {
665 for _, b := range f.Blocks {
666 for _, v := range b.Values {
667 if v.Op.IsCall() && !v.Op.IsTailCall() {
668
669 return false
670 }
671 }
672 }
673 return true
674 }
675
676
677 func (v *Value) needRegister() bool {
678 return !v.Type.IsMemory() && !v.Type.IsVoid() && !v.Type.IsFlags() && !v.Type.IsTuple()
679 }
680
681 func (s *regAllocState) init(f *Func) {
682 s.f = f
683 s.f.RegAlloc = s.f.Cache.locs[:0]
684 s.registers = f.Config.registers
685 if nr := len(s.registers); nr == 0 || nr > int(noRegister) || nr > int(unsafe.Sizeof(regMask(0))*8) {
686 s.f.Fatalf("bad number of registers: %d", nr)
687 } else {
688 s.numRegs = register(nr)
689 }
690
691 s.SPReg = noRegister
692 s.SBReg = noRegister
693 s.GReg = noRegister
694 s.ZeroIntReg = noRegister
695 for r := register(0); r < s.numRegs; r++ {
696 switch s.registers[r].String() {
697 case "SP":
698 s.SPReg = r
699 case "SB":
700 s.SBReg = r
701 case "g":
702 s.GReg = r
703 case "ZERO":
704 s.ZeroIntReg = r
705 }
706 }
707
708 switch noRegister {
709 case s.SPReg:
710 s.f.Fatalf("no SP register found")
711 case s.SBReg:
712 s.f.Fatalf("no SB register found")
713 case s.GReg:
714 if f.Config.hasGReg {
715 s.f.Fatalf("no g register found")
716 }
717 }
718
719
720 s.allocatable = s.f.Config.gpRegMask | s.f.Config.fpRegMask | s.f.Config.specialRegMask
721 s.allocatable &^= 1 << s.SPReg
722 s.allocatable &^= 1 << s.SBReg
723 if s.f.Config.hasGReg {
724 s.allocatable &^= 1 << s.GReg
725 }
726 if s.ZeroIntReg != noRegister {
727 s.allocatable &^= 1 << s.ZeroIntReg
728 }
729 if buildcfg.FramePointerEnabled && s.f.Config.FPReg >= 0 {
730 s.allocatable &^= 1 << uint(s.f.Config.FPReg)
731 }
732 if s.f.Config.LinkReg != -1 {
733 if isLeaf(f) {
734
735 s.allocatable &^= 1 << uint(s.f.Config.LinkReg)
736 }
737 }
738 if s.f.Config.ctxt.Flag_dynlink {
739 switch s.f.Config.arch {
740 case "386":
741
742
743
744
745
746 case "amd64":
747 s.allocatable &^= 1 << 15
748 case "arm":
749 s.allocatable &^= 1 << 9
750 case "arm64":
751
752 case "loong64":
753
754 case "ppc64le":
755
756 case "riscv64":
757
758 case "s390x":
759 s.allocatable &^= 1 << 11
760 default:
761 s.f.fe.Fatalf(src.NoXPos, "arch %s not implemented", s.f.Config.arch)
762 }
763 }
764
765
766
767
768 s.visitOrder = layoutRegallocOrder(f)
769
770
771
772 s.blockOrder = make([]int32, f.NumBlocks())
773 for i, b := range s.visitOrder {
774 s.blockOrder[b.ID] = int32(i)
775 }
776
777 s.regs = make([]regState, s.numRegs)
778 nv := f.NumValues()
779 if cap(s.f.Cache.regallocValues) >= nv {
780 s.f.Cache.regallocValues = s.f.Cache.regallocValues[:nv]
781 } else {
782 s.f.Cache.regallocValues = make([]valState, nv)
783 }
784 s.values = s.f.Cache.regallocValues
785 s.orig = s.f.Cache.allocValueSlice(nv)
786 s.copies = make(map[*Value]bool)
787 for _, b := range s.visitOrder {
788 for _, v := range b.Values {
789 if v.needRegister() {
790 s.values[v.ID].needReg = true
791 s.values[v.ID].rematerializeable = v.rematerializeable()
792 s.orig[v.ID] = v
793 }
794
795
796 }
797 }
798 s.computeLive()
799
800 s.endRegs = make([][]endReg, f.NumBlocks())
801 s.startRegs = make([][]startReg, f.NumBlocks())
802 s.spillLive = make([][]ID, f.NumBlocks())
803 s.sdom = f.Sdom()
804
805
806 if f.Config.ctxt.Arch.Arch == sys.ArchWasm {
807 canLiveOnStack := f.newSparseSet(f.NumValues())
808 defer f.retSparseSet(canLiveOnStack)
809 for _, b := range f.Blocks {
810
811 canLiveOnStack.clear()
812 for _, c := range b.ControlValues() {
813 if c.Uses == 1 && !opcodeTable[c.Op].generic {
814 canLiveOnStack.add(c.ID)
815 }
816 }
817
818 for i := len(b.Values) - 1; i >= 0; i-- {
819 v := b.Values[i]
820 if canLiveOnStack.contains(v.ID) {
821 v.OnWasmStack = true
822 } else {
823
824 canLiveOnStack.clear()
825 }
826 for _, arg := range v.Args {
827
828
829
830
831
832 if arg.Uses == 1 && arg.Block == v.Block && !arg.Type.IsMemory() && !opcodeTable[arg.Op].generic {
833 canLiveOnStack.add(arg.ID)
834 }
835 }
836 }
837 }
838 }
839
840
841
842
843 if base.Flag.ClobberDeadReg && len(s.f.Blocks) <= 10000 {
844
845 s.doClobber = true
846 }
847 }
848
849 func (s *regAllocState) close() {
850 s.f.Cache.freeValueSlice(s.orig)
851 }
852
853
854
855 func (s *regAllocState) addUse(id ID, dist int32, pos src.XPos) {
856 r := s.freeUseRecords
857 if r != nil {
858 s.freeUseRecords = r.next
859 } else {
860 r = &use{}
861 }
862 r.dist = dist
863 r.pos = pos
864 r.next = s.values[id].uses
865 s.values[id].uses = r
866 if r.next != nil && dist > r.next.dist {
867 s.f.Fatalf("uses added in wrong order")
868 }
869 }
870
871
872
873 func (s *regAllocState) advanceUses(v *Value) {
874 for _, a := range v.Args {
875 if !s.values[a.ID].needReg {
876 continue
877 }
878 ai := &s.values[a.ID]
879 r := ai.uses
880 ai.uses = r.next
881 if r.next == nil || (!opcodeTable[a.Op].fixedReg && r.next.dist > s.nextCall[s.curIdx]) {
882
883 s.freeRegs(ai.regs)
884 }
885 r.next = s.freeUseRecords
886 s.freeUseRecords = r
887 }
888 s.dropIfUnused(v)
889 }
890
891
892
893 func (s *regAllocState) dropIfUnused(v *Value) {
894 if !s.values[v.ID].needReg {
895 return
896 }
897 vi := &s.values[v.ID]
898 r := vi.uses
899 if r == nil || (!opcodeTable[v.Op].fixedReg && r.dist > s.nextCall[s.curIdx]) {
900 s.freeRegs(vi.regs)
901 }
902 }
903
904
905
906
907 func (s *regAllocState) liveAfterCurrentInstruction(v *Value) bool {
908 u := s.values[v.ID].uses
909 if u == nil {
910 panic(fmt.Errorf("u is nil, v = %s, s.values[v.ID] = %v", v.LongString(), s.values[v.ID]))
911 }
912 d := u.dist
913 for u != nil && u.dist == d {
914 u = u.next
915 }
916 return u != nil && u.dist > d
917 }
918
919
920 func (s *regAllocState) setState(regs []endReg) {
921 s.freeRegs(s.used)
922 for _, x := range regs {
923 s.assignReg(x.r, x.v, x.c)
924 }
925 }
926
927
928 func (s *regAllocState) compatRegs(t *types.Type) regMask {
929 var m regMask
930 if t.IsTuple() || t.IsFlags() {
931 return 0
932 }
933 if t.IsFloat() || t == types.TypeInt128 {
934 if t.Kind() == types.TFLOAT32 && s.f.Config.fp32RegMask != 0 {
935 m = s.f.Config.fp32RegMask
936 } else if t.Kind() == types.TFLOAT64 && s.f.Config.fp64RegMask != 0 {
937 m = s.f.Config.fp64RegMask
938 } else {
939 m = s.f.Config.fpRegMask
940 }
941 } else {
942 m = s.f.Config.gpRegMask
943 }
944 return m & s.allocatable
945 }
946
947
948 func (s *regAllocState) regspec(v *Value) regInfo {
949 op := v.Op
950 if op == OpConvert {
951
952
953
954 m := s.allocatable & s.f.Config.gpRegMask
955 return regInfo{inputs: []inputInfo{{regs: m}}, outputs: []outputInfo{{regs: m}}}
956 }
957 if op == OpArgIntReg {
958 reg := v.Block.Func.Config.intParamRegs[v.AuxInt8()]
959 return regInfo{outputs: []outputInfo{{regs: 1 << uint(reg)}}}
960 }
961 if op == OpArgFloatReg {
962 reg := v.Block.Func.Config.floatParamRegs[v.AuxInt8()]
963 return regInfo{outputs: []outputInfo{{regs: 1 << uint(reg)}}}
964 }
965 if op.IsCall() {
966 if ac, ok := v.Aux.(*AuxCall); ok && ac.reg != nil {
967 return *ac.Reg(&opcodeTable[op].reg, s.f.Config)
968 }
969 }
970 if op == OpMakeResult && s.f.OwnAux.reg != nil {
971 return *s.f.OwnAux.ResultReg(s.f.Config)
972 }
973 return opcodeTable[op].reg
974 }
975
976 func (s *regAllocState) isGReg(r register) bool {
977 return s.f.Config.hasGReg && s.GReg == r
978 }
979
980
981 var tmpVal Value
982
983 func (s *regAllocState) regalloc(f *Func) {
984 regValLiveSet := f.newSparseSet(f.NumValues())
985 defer f.retSparseSet(regValLiveSet)
986 var oldSched []*Value
987 var phis []*Value
988 var phiRegs []register
989 var args []*Value
990
991
992 var desired desiredState
993 desiredSecondReg := map[ID][4]register{}
994
995
996 type dentry struct {
997 out [4]register
998 in [3][4]register
999 }
1000 var dinfo []dentry
1001
1002 if f.Entry != f.Blocks[0] {
1003 f.Fatalf("entry block must be first")
1004 }
1005
1006 for _, b := range s.visitOrder {
1007 if s.f.pass.debug > regDebug {
1008 fmt.Printf("Begin processing block %v\n", b)
1009 }
1010 s.curBlock = b
1011 s.startRegsMask = 0
1012 s.usedSinceBlockStart = 0
1013 clear(desiredSecondReg)
1014
1015
1016
1017 regValLiveSet.clear()
1018 if s.live != nil {
1019 for _, e := range s.live[b.ID] {
1020 s.addUse(e.ID, int32(len(b.Values))+e.dist, e.pos)
1021 regValLiveSet.add(e.ID)
1022 }
1023 }
1024 for _, v := range b.ControlValues() {
1025 if s.values[v.ID].needReg {
1026 s.addUse(v.ID, int32(len(b.Values)), b.Pos)
1027 regValLiveSet.add(v.ID)
1028 }
1029 }
1030 if len(s.nextCall) < len(b.Values) {
1031 s.nextCall = append(s.nextCall, make([]int32, len(b.Values)-len(s.nextCall))...)
1032 }
1033 var nextCall int32 = math.MaxInt32
1034 for i := len(b.Values) - 1; i >= 0; i-- {
1035 v := b.Values[i]
1036 regValLiveSet.remove(v.ID)
1037 if v.Op == OpPhi {
1038
1039
1040
1041 s.nextCall[i] = nextCall
1042 continue
1043 }
1044 if opcodeTable[v.Op].call {
1045
1046 regValLiveSet.clear()
1047 if s.sp != 0 && s.values[s.sp].uses != nil {
1048 regValLiveSet.add(s.sp)
1049 }
1050 if s.sb != 0 && s.values[s.sb].uses != nil {
1051 regValLiveSet.add(s.sb)
1052 }
1053 nextCall = int32(i)
1054 }
1055 for _, a := range v.Args {
1056 if !s.values[a.ID].needReg {
1057 continue
1058 }
1059 s.addUse(a.ID, int32(i), v.Pos)
1060 regValLiveSet.add(a.ID)
1061 }
1062 s.nextCall[i] = nextCall
1063 }
1064 if s.f.pass.debug > regDebug {
1065 fmt.Printf("use distances for %s\n", b)
1066 for i := range s.values {
1067 vi := &s.values[i]
1068 u := vi.uses
1069 if u == nil {
1070 continue
1071 }
1072 fmt.Printf(" v%d:", i)
1073 for u != nil {
1074 fmt.Printf(" %d", u.dist)
1075 u = u.next
1076 }
1077 fmt.Println()
1078 }
1079 }
1080
1081
1082
1083 nphi := 0
1084 for _, v := range b.Values {
1085 if v.Op != OpPhi {
1086 break
1087 }
1088 nphi++
1089 }
1090 phis = append(phis[:0], b.Values[:nphi]...)
1091 oldSched = append(oldSched[:0], b.Values[nphi:]...)
1092 b.Values = b.Values[:0]
1093
1094
1095 if b == f.Entry {
1096
1097 if nphi > 0 {
1098 f.Fatalf("phis in entry block")
1099 }
1100 } else if len(b.Preds) == 1 {
1101
1102 s.setState(s.endRegs[b.Preds[0].b.ID])
1103 if nphi > 0 {
1104 f.Fatalf("phis in single-predecessor block")
1105 }
1106
1107
1108
1109 for r := register(0); r < s.numRegs; r++ {
1110 v := s.regs[r].v
1111 if v != nil && !regValLiveSet.contains(v.ID) {
1112 s.freeReg(r)
1113 }
1114 }
1115 } else {
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129 idx := -1
1130 for i, p := range b.Preds {
1131
1132
1133 pb := p.b
1134 if s.blockOrder[pb.ID] >= s.blockOrder[b.ID] {
1135 continue
1136 }
1137 if idx == -1 {
1138 idx = i
1139 continue
1140 }
1141 pSel := b.Preds[idx].b
1142 if len(s.spillLive[pb.ID]) < len(s.spillLive[pSel.ID]) {
1143 idx = i
1144 } else if len(s.spillLive[pb.ID]) == len(s.spillLive[pSel.ID]) {
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155 if pb.likelyBranch() && !pSel.likelyBranch() || s.blockOrder[pb.ID] < s.blockOrder[pSel.ID] {
1156 idx = i
1157 }
1158 }
1159 }
1160 if idx < 0 {
1161 f.Fatalf("bad visitOrder, no predecessor of %s has been visited before it", b)
1162 }
1163 p := b.Preds[idx].b
1164 s.setState(s.endRegs[p.ID])
1165
1166 if s.f.pass.debug > regDebug {
1167 fmt.Printf("starting merge block %s with end state of %s:\n", b, p)
1168 for _, x := range s.endRegs[p.ID] {
1169 fmt.Printf(" %s: orig:%s cache:%s\n", &s.registers[x.r], x.v, x.c)
1170 }
1171 }
1172
1173
1174
1175
1176
1177 phiRegs = phiRegs[:0]
1178 var phiUsed regMask
1179
1180 for _, v := range phis {
1181 if !s.values[v.ID].needReg {
1182 phiRegs = append(phiRegs, noRegister)
1183 continue
1184 }
1185 a := v.Args[idx]
1186
1187
1188 m := s.values[a.ID].regs &^ phiUsed & s.allocatable
1189 if m != 0 {
1190 r := pickReg(m)
1191 phiUsed |= regMask(1) << r
1192 phiRegs = append(phiRegs, r)
1193 } else {
1194 phiRegs = append(phiRegs, noRegister)
1195 }
1196 }
1197
1198
1199 for i, v := range phis {
1200 if !s.values[v.ID].needReg {
1201 continue
1202 }
1203 a := v.Args[idx]
1204 r := phiRegs[i]
1205 if r == noRegister {
1206 continue
1207 }
1208 if regValLiveSet.contains(a.ID) {
1209
1210
1211
1212
1213
1214
1215
1216
1217 m := s.compatRegs(a.Type) &^ s.used &^ phiUsed
1218 if m != 0 && !s.values[a.ID].rematerializeable && countRegs(s.values[a.ID].regs) == 1 {
1219 r2 := pickReg(m)
1220 c := p.NewValue1(a.Pos, OpCopy, a.Type, s.regs[r].c)
1221 s.copies[c] = false
1222 if s.f.pass.debug > regDebug {
1223 fmt.Printf("copy %s to %s : %s\n", a, c, &s.registers[r2])
1224 }
1225 s.setOrig(c, a)
1226 s.assignReg(r2, a, c)
1227 s.endRegs[p.ID] = append(s.endRegs[p.ID], endReg{r2, a, c})
1228 }
1229 }
1230 s.freeReg(r)
1231 }
1232
1233
1234 b.Values = append(b.Values, phis...)
1235
1236
1237
1238 for i, v := range phis {
1239 if !s.values[v.ID].needReg {
1240 continue
1241 }
1242 if phiRegs[i] != noRegister {
1243 continue
1244 }
1245 m := s.compatRegs(v.Type) &^ phiUsed &^ s.used
1246
1247
1248 for i, pe := range b.Preds {
1249 if i == idx {
1250 continue
1251 }
1252 ri := noRegister
1253 for _, er := range s.endRegs[pe.b.ID] {
1254 if er.v == s.orig[v.Args[i].ID] {
1255 ri = er.r
1256 break
1257 }
1258 }
1259 if ri != noRegister && m>>ri&1 != 0 {
1260 m = regMask(1) << ri
1261 break
1262 }
1263 }
1264 if m != 0 {
1265 r := pickReg(m)
1266 phiRegs[i] = r
1267 phiUsed |= regMask(1) << r
1268 }
1269 }
1270
1271
1272 for i, v := range phis {
1273 if !s.values[v.ID].needReg {
1274 continue
1275 }
1276 r := phiRegs[i]
1277 if r == noRegister {
1278
1279
1280 s.values[v.ID].spill = v
1281 continue
1282 }
1283
1284 s.assignReg(r, v, v)
1285 }
1286
1287
1288 for r := register(0); r < s.numRegs; r++ {
1289 if phiUsed>>r&1 != 0 {
1290 continue
1291 }
1292 v := s.regs[r].v
1293 if v != nil && !regValLiveSet.contains(v.ID) {
1294 s.freeReg(r)
1295 }
1296 }
1297
1298
1299
1300
1301
1302 regList := make([]startReg, 0, 32)
1303 for r := register(0); r < s.numRegs; r++ {
1304 v := s.regs[r].v
1305 if v == nil {
1306 continue
1307 }
1308 if phiUsed>>r&1 != 0 {
1309
1310
1311 continue
1312 }
1313 regList = append(regList, startReg{r, v, s.regs[r].c, s.values[v.ID].uses.pos})
1314 s.startRegsMask |= regMask(1) << r
1315 }
1316 s.startRegs[b.ID] = make([]startReg, len(regList))
1317 copy(s.startRegs[b.ID], regList)
1318
1319 if s.f.pass.debug > regDebug {
1320 fmt.Printf("after phis\n")
1321 for _, x := range s.startRegs[b.ID] {
1322 fmt.Printf(" %s: v%d\n", &s.registers[x.r], x.v.ID)
1323 }
1324 }
1325 }
1326
1327
1328 for i, v := range phis {
1329 s.curIdx = i
1330 s.dropIfUnused(v)
1331 }
1332
1333
1334 if l := len(oldSched); cap(dinfo) < l {
1335 dinfo = make([]dentry, l)
1336 } else {
1337 dinfo = dinfo[:l]
1338 clear(dinfo)
1339 }
1340
1341
1342 if s.desired != nil {
1343 desired.copy(&s.desired[b.ID])
1344 }
1345
1346
1347
1348
1349
1350
1351 for _, e := range b.Succs {
1352 succ := e.b
1353
1354 for _, x := range s.startRegs[succ.ID] {
1355 desired.add(x.v.ID, x.r)
1356 }
1357
1358 pidx := e.i
1359 for _, v := range succ.Values {
1360 if v.Op != OpPhi {
1361 break
1362 }
1363 if !s.values[v.ID].needReg {
1364 continue
1365 }
1366 rp, ok := s.f.getHome(v.ID).(*Register)
1367 if !ok {
1368
1369
1370
1371
1372 for _, a := range v.Args {
1373 rp, ok = s.f.getHome(a.ID).(*Register)
1374 if ok {
1375 break
1376 }
1377 }
1378 if !ok {
1379 continue
1380 }
1381 }
1382 desired.add(v.Args[pidx].ID, register(rp.num))
1383 }
1384 }
1385
1386
1387 for i := len(oldSched) - 1; i >= 0; i-- {
1388 v := oldSched[i]
1389 prefs := desired.remove(v.ID)
1390 regspec := s.regspec(v)
1391 desired.clobber(regspec.clobbers)
1392 for _, j := range regspec.inputs {
1393 if countRegs(j.regs) != 1 {
1394 continue
1395 }
1396 desired.clobber(j.regs)
1397 desired.add(v.Args[j.idx].ID, pickReg(j.regs))
1398 }
1399 if opcodeTable[v.Op].resultInArg0 || v.Op == OpAMD64ADDQconst || v.Op == OpAMD64ADDLconst || v.Op == OpSelect0 {
1400 if opcodeTable[v.Op].commutative {
1401 desired.addList(v.Args[1].ID, prefs)
1402 }
1403 desired.addList(v.Args[0].ID, prefs)
1404 }
1405
1406 dinfo[i].out = prefs
1407 for j, a := range v.Args {
1408 if j >= len(dinfo[i].in) {
1409 break
1410 }
1411 dinfo[i].in[j] = desired.get(a.ID)
1412 }
1413 if v.Op == OpSelect1 && prefs[0] != noRegister {
1414
1415
1416 desiredSecondReg[v.Args[0].ID] = prefs
1417 }
1418 }
1419
1420
1421 for idx, v := range oldSched {
1422 s.curIdx = nphi + idx
1423 tmpReg := noRegister
1424 if s.f.pass.debug > regDebug {
1425 fmt.Printf(" processing %s\n", v.LongString())
1426 }
1427 regspec := s.regspec(v)
1428 if v.Op == OpPhi {
1429 f.Fatalf("phi %s not at start of block", v)
1430 }
1431 if opcodeTable[v.Op].fixedReg {
1432 switch v.Op {
1433 case OpSP:
1434 s.assignReg(s.SPReg, v, v)
1435 s.sp = v.ID
1436 case OpSB:
1437 s.assignReg(s.SBReg, v, v)
1438 s.sb = v.ID
1439 case OpARM64ZERO, OpLOONG64ZERO, OpMIPS64ZERO:
1440 s.assignReg(s.ZeroIntReg, v, v)
1441 default:
1442 f.Fatalf("unknown fixed-register op %s", v)
1443 }
1444 b.Values = append(b.Values, v)
1445 s.advanceUses(v)
1446 continue
1447 }
1448 if v.Op == OpSelect0 || v.Op == OpSelect1 || v.Op == OpSelectN {
1449 if s.values[v.ID].needReg {
1450 if v.Op == OpSelectN {
1451 s.assignReg(register(s.f.getHome(v.Args[0].ID).(LocResults)[int(v.AuxInt)].(*Register).num), v, v)
1452 } else {
1453 var i = 0
1454 if v.Op == OpSelect1 {
1455 i = 1
1456 }
1457 s.assignReg(register(s.f.getHome(v.Args[0].ID).(LocPair)[i].(*Register).num), v, v)
1458 }
1459 }
1460 b.Values = append(b.Values, v)
1461 s.advanceUses(v)
1462 continue
1463 }
1464 if v.Op == OpGetG && s.f.Config.hasGReg {
1465
1466 if s.regs[s.GReg].v != nil {
1467 s.freeReg(s.GReg)
1468 }
1469 s.assignReg(s.GReg, v, v)
1470 b.Values = append(b.Values, v)
1471 s.advanceUses(v)
1472 continue
1473 }
1474 if v.Op == OpArg {
1475
1476
1477
1478 s.values[v.ID].spill = v
1479 b.Values = append(b.Values, v)
1480 s.advanceUses(v)
1481 continue
1482 }
1483 if v.Op == OpKeepAlive {
1484
1485 s.advanceUses(v)
1486 a := v.Args[0]
1487 vi := &s.values[a.ID]
1488 if vi.regs == 0 && !vi.rematerializeable {
1489
1490
1491
1492 v.SetArg(0, s.makeSpill(a, b))
1493 } else if _, ok := a.Aux.(*ir.Name); ok && vi.rematerializeable {
1494
1495
1496
1497 v.Op = OpVarLive
1498 v.SetArgs1(v.Args[1])
1499 v.Aux = a.Aux
1500 } else {
1501
1502
1503
1504 v.Op = OpCopy
1505 v.SetArgs1(v.Args[1])
1506 }
1507 b.Values = append(b.Values, v)
1508 continue
1509 }
1510 if len(regspec.inputs) == 0 && len(regspec.outputs) == 0 {
1511
1512 if s.doClobber && v.Op.IsCall() {
1513 s.clobberRegs(regspec.clobbers)
1514 }
1515 s.freeRegs(regspec.clobbers)
1516 b.Values = append(b.Values, v)
1517 s.advanceUses(v)
1518 continue
1519 }
1520
1521 if s.values[v.ID].rematerializeable {
1522
1523
1524
1525 for _, a := range v.Args {
1526 a.Uses--
1527 }
1528 s.advanceUses(v)
1529 continue
1530 }
1531
1532 if s.f.pass.debug > regDebug {
1533 fmt.Printf("value %s\n", v.LongString())
1534 fmt.Printf(" out:")
1535 for _, r := range dinfo[idx].out {
1536 if r != noRegister {
1537 fmt.Printf(" %s", &s.registers[r])
1538 }
1539 }
1540 fmt.Println()
1541 for i := 0; i < len(v.Args) && i < 3; i++ {
1542 fmt.Printf(" in%d:", i)
1543 for _, r := range dinfo[idx].in[i] {
1544 if r != noRegister {
1545 fmt.Printf(" %s", &s.registers[r])
1546 }
1547 }
1548 fmt.Println()
1549 }
1550 }
1551
1552
1553
1554
1555 args = append(args[:0], make([]*Value, len(v.Args))...)
1556 for i, a := range v.Args {
1557 if !s.values[a.ID].needReg {
1558 args[i] = a
1559 }
1560 }
1561 for _, i := range regspec.inputs {
1562 mask := i.regs
1563 if countRegs(mask) == 1 && mask&s.values[v.Args[i.idx].ID].regs != 0 {
1564 args[i.idx] = s.allocValToReg(v.Args[i.idx], mask, true, v.Pos)
1565 }
1566 }
1567
1568
1569
1570
1571
1572
1573 for {
1574 freed := false
1575 for _, i := range regspec.inputs {
1576 if args[i.idx] != nil {
1577 continue
1578 }
1579 mask := i.regs
1580 if countRegs(mask) == 1 && mask&^s.used != 0 {
1581 args[i.idx] = s.allocValToReg(v.Args[i.idx], mask, true, v.Pos)
1582
1583
1584
1585 oldregs := s.values[v.Args[i.idx].ID].regs
1586 if oldregs&^regspec.clobbers == 0 || !s.liveAfterCurrentInstruction(v.Args[i.idx]) {
1587 s.freeRegs(oldregs &^ mask &^ s.nospill)
1588 freed = true
1589 }
1590 }
1591 }
1592 if !freed {
1593 break
1594 }
1595 }
1596
1597
1598 for _, i := range regspec.inputs {
1599 if args[i.idx] != nil {
1600 continue
1601 }
1602 mask := i.regs
1603 if mask&s.values[v.Args[i.idx].ID].regs == 0 {
1604
1605 mask &= s.allocatable
1606 mask &^= s.nospill
1607
1608 if i.idx < 3 {
1609 for _, r := range dinfo[idx].in[i.idx] {
1610 if r != noRegister && (mask&^s.used)>>r&1 != 0 {
1611
1612 mask = regMask(1) << r
1613 break
1614 }
1615 }
1616 }
1617
1618 if mask&^desired.avoid != 0 {
1619 mask &^= desired.avoid
1620 }
1621 }
1622 if mask&s.values[v.Args[i.idx].ID].regs&(1<<s.SPReg) != 0 {
1623
1624
1625
1626 mask = 1 << s.SPReg
1627 }
1628 args[i.idx] = s.allocValToReg(v.Args[i.idx], mask, true, v.Pos)
1629 }
1630
1631
1632
1633
1634 if opcodeTable[v.Op].resultInArg0 {
1635 var m regMask
1636 if !s.liveAfterCurrentInstruction(v.Args[0]) {
1637
1638 goto ok
1639 }
1640 if opcodeTable[v.Op].commutative && !s.liveAfterCurrentInstruction(v.Args[1]) {
1641 args[0], args[1] = args[1], args[0]
1642 goto ok
1643 }
1644 if s.values[v.Args[0].ID].rematerializeable {
1645
1646 goto ok
1647 }
1648 if opcodeTable[v.Op].commutative && s.values[v.Args[1].ID].rematerializeable {
1649 args[0], args[1] = args[1], args[0]
1650 goto ok
1651 }
1652 if countRegs(s.values[v.Args[0].ID].regs) >= 2 {
1653
1654 goto ok
1655 }
1656 if opcodeTable[v.Op].commutative && countRegs(s.values[v.Args[1].ID].regs) >= 2 {
1657 args[0], args[1] = args[1], args[0]
1658 goto ok
1659 }
1660
1661
1662
1663
1664
1665 m = s.compatRegs(v.Args[0].Type) &^ s.used
1666 if m == 0 {
1667
1668
1669
1670
1671 goto ok
1672 }
1673
1674
1675 for _, r := range dinfo[idx].out {
1676 if r != noRegister && (m®spec.outputs[0].regs)>>r&1 != 0 {
1677 m = regMask(1) << r
1678 args[0] = s.allocValToReg(v.Args[0], m, true, v.Pos)
1679
1680
1681 goto ok
1682 }
1683 }
1684
1685
1686 for _, r := range dinfo[idx].in[0] {
1687 if r != noRegister && m>>r&1 != 0 {
1688 m = regMask(1) << r
1689 c := s.allocValToReg(v.Args[0], m, true, v.Pos)
1690 s.copies[c] = false
1691
1692
1693 goto ok
1694 }
1695 }
1696 if opcodeTable[v.Op].commutative {
1697 for _, r := range dinfo[idx].in[1] {
1698 if r != noRegister && m>>r&1 != 0 {
1699 m = regMask(1) << r
1700 c := s.allocValToReg(v.Args[1], m, true, v.Pos)
1701 s.copies[c] = false
1702 args[0], args[1] = args[1], args[0]
1703 goto ok
1704 }
1705 }
1706 }
1707
1708
1709 if m&^desired.avoid != 0 {
1710 m &^= desired.avoid
1711 }
1712
1713 c := s.allocValToReg(v.Args[0], m, true, v.Pos)
1714 s.copies[c] = false
1715
1716
1717
1718
1719 if regspec.outputs[0].regs>>s.f.getHome(c.ID).(*Register).num&1 != 0 {
1720 if rp, ok := s.f.getHome(args[0].ID).(*Register); ok {
1721 r := register(rp.num)
1722 for _, r2 := range dinfo[idx].in[0] {
1723 if r == r2 {
1724 args[0] = c
1725 break
1726 }
1727 }
1728 }
1729 }
1730 }
1731 ok:
1732 for i := 0; i < 2; i++ {
1733 if !(i == 0 && regspec.clobbersArg0 || i == 1 && regspec.clobbersArg1) {
1734 continue
1735 }
1736 if !s.liveAfterCurrentInstruction(v.Args[i]) {
1737
1738 continue
1739 }
1740 if s.values[v.Args[i].ID].rematerializeable {
1741
1742 continue
1743 }
1744 if countRegs(s.values[v.Args[i].ID].regs) >= 2 {
1745
1746 continue
1747 }
1748
1749 m := s.compatRegs(v.Args[i].Type) &^ s.used
1750 if m == 0 {
1751
1752
1753
1754
1755 continue
1756 }
1757
1758 c := s.allocValToReg(v.Args[i], m, true, v.Pos)
1759 s.copies[c] = false
1760 }
1761
1762
1763
1764
1765
1766
1767
1768 if opcodeTable[v.Op].needIntTemp {
1769 m := s.allocatable & s.f.Config.gpRegMask
1770 for _, out := range regspec.outputs {
1771 if countRegs(out.regs) == 1 {
1772 m &^= out.regs
1773 }
1774 }
1775 if m&^desired.avoid&^s.nospill != 0 {
1776 m &^= desired.avoid
1777 }
1778 tmpReg = s.allocReg(m, &tmpVal)
1779 s.nospill |= regMask(1) << tmpReg
1780 s.tmpused |= regMask(1) << tmpReg
1781 }
1782
1783 if regspec.clobbersArg0 {
1784 s.freeReg(register(s.f.getHome(args[0].ID).(*Register).num))
1785 }
1786 if regspec.clobbersArg1 {
1787 s.freeReg(register(s.f.getHome(args[1].ID).(*Register).num))
1788 }
1789
1790
1791
1792
1793
1794 if !opcodeTable[v.Op].resultNotInArgs {
1795 s.tmpused = s.nospill
1796 s.nospill = 0
1797 s.advanceUses(v)
1798 }
1799
1800
1801 if s.doClobber && v.Op.IsCall() {
1802
1803
1804 s.clobberRegs(regspec.clobbers &^ s.tmpused &^ s.nospill)
1805 }
1806 s.freeRegs(regspec.clobbers)
1807 s.tmpused |= regspec.clobbers
1808
1809
1810 {
1811 outRegs := noRegisters
1812 maxOutIdx := -1
1813 var used regMask
1814 if tmpReg != noRegister {
1815
1816
1817 used |= regMask(1) << tmpReg
1818 }
1819 for _, out := range regspec.outputs {
1820 if out.regs == 0 {
1821 continue
1822 }
1823 mask := out.regs & s.allocatable &^ used
1824 if mask == 0 {
1825 s.f.Fatalf("can't find any output register %s", v.LongString())
1826 }
1827 if opcodeTable[v.Op].resultInArg0 && out.idx == 0 {
1828 if !opcodeTable[v.Op].commutative {
1829
1830 r := register(s.f.getHome(args[0].ID).(*Register).num)
1831 if mask>>r&1 == 0 {
1832 s.f.Fatalf("resultInArg0 value's input %v cannot be an output of %s", s.f.getHome(args[0].ID).(*Register), v.LongString())
1833 }
1834 mask = regMask(1) << r
1835 } else {
1836
1837 r0 := register(s.f.getHome(args[0].ID).(*Register).num)
1838 r1 := register(s.f.getHome(args[1].ID).(*Register).num)
1839
1840 found := false
1841 for _, r := range dinfo[idx].out {
1842 if (r == r0 || r == r1) && (mask&^s.used)>>r&1 != 0 {
1843 mask = regMask(1) << r
1844 found = true
1845 if r == r1 {
1846 args[0], args[1] = args[1], args[0]
1847 }
1848 break
1849 }
1850 }
1851 if !found {
1852
1853 mask = regMask(1) << r0
1854 }
1855 }
1856 }
1857 if out.idx == 0 {
1858 for _, r := range dinfo[idx].out {
1859 if r != noRegister && (mask&^s.used)>>r&1 != 0 {
1860
1861 mask = regMask(1) << r
1862 break
1863 }
1864 }
1865 }
1866 if out.idx == 1 {
1867 if prefs, ok := desiredSecondReg[v.ID]; ok {
1868 for _, r := range prefs {
1869 if r != noRegister && (mask&^s.used)>>r&1 != 0 {
1870
1871 mask = regMask(1) << r
1872 break
1873 }
1874 }
1875 }
1876 }
1877
1878 if mask&^desired.avoid&^s.nospill&^s.used != 0 {
1879 mask &^= desired.avoid
1880 }
1881 r := s.allocReg(mask, v)
1882 if out.idx > maxOutIdx {
1883 maxOutIdx = out.idx
1884 }
1885 outRegs[out.idx] = r
1886 used |= regMask(1) << r
1887 s.tmpused |= regMask(1) << r
1888 }
1889
1890 if v.Type.IsTuple() {
1891 var outLocs LocPair
1892 if r := outRegs[0]; r != noRegister {
1893 outLocs[0] = &s.registers[r]
1894 }
1895 if r := outRegs[1]; r != noRegister {
1896 outLocs[1] = &s.registers[r]
1897 }
1898 s.f.setHome(v, outLocs)
1899
1900 } else if v.Type.IsResults() {
1901
1902 outLocs := make(LocResults, maxOutIdx+1, maxOutIdx+1)
1903 for i := 0; i <= maxOutIdx; i++ {
1904 if r := outRegs[i]; r != noRegister {
1905 outLocs[i] = &s.registers[r]
1906 }
1907 }
1908 s.f.setHome(v, outLocs)
1909 } else {
1910 if r := outRegs[0]; r != noRegister {
1911 s.assignReg(r, v, v)
1912 }
1913 }
1914 if tmpReg != noRegister {
1915
1916 if s.f.tempRegs == nil {
1917 s.f.tempRegs = map[ID]*Register{}
1918 }
1919 s.f.tempRegs[v.ID] = &s.registers[tmpReg]
1920 }
1921 }
1922
1923
1924 if opcodeTable[v.Op].resultNotInArgs {
1925 s.nospill = 0
1926 s.advanceUses(v)
1927 }
1928 s.tmpused = 0
1929
1930
1931 for i, a := range args {
1932 v.SetArg(i, a)
1933 }
1934 b.Values = append(b.Values, v)
1935 s.dropIfUnused(v)
1936 }
1937
1938
1939
1940 controls := append(make([]*Value, 0, 2), b.ControlValues()...)
1941
1942
1943 for i, v := range b.ControlValues() {
1944 if !s.values[v.ID].needReg {
1945 continue
1946 }
1947 if s.f.pass.debug > regDebug {
1948 fmt.Printf(" processing control %s\n", v.LongString())
1949 }
1950
1951
1952
1953 b.ReplaceControl(i, s.allocValToReg(v, s.compatRegs(v.Type), false, b.Pos))
1954 }
1955
1956
1957
1958 for _, v := range controls {
1959 vi := &s.values[v.ID]
1960 if !vi.needReg {
1961 continue
1962 }
1963
1964 u := vi.uses
1965 vi.uses = u.next
1966 if u.next == nil {
1967 s.freeRegs(vi.regs)
1968 }
1969 u.next = s.freeUseRecords
1970 s.freeUseRecords = u
1971 }
1972
1973
1974
1975
1976 if len(b.Succs) == 1 {
1977 if s.f.Config.hasGReg && s.regs[s.GReg].v != nil {
1978 s.freeReg(s.GReg)
1979 }
1980 if s.blockOrder[b.ID] > s.blockOrder[b.Succs[0].b.ID] {
1981
1982 goto badloop
1983 }
1984
1985 top := b.Succs[0].b
1986 loop := s.loopnest.b2l[top.ID]
1987 if loop == nil || loop.header != top || loop.containsUnavoidableCall {
1988 goto badloop
1989 }
1990
1991
1992 for _, live := range s.live[b.ID] {
1993 if live.dist >= unlikelyDistance {
1994
1995 continue
1996 }
1997 vid := live.ID
1998 vi := &s.values[vid]
1999 if vi.regs != 0 {
2000 continue
2001 }
2002 if vi.rematerializeable {
2003 continue
2004 }
2005 v := s.orig[vid]
2006 m := s.compatRegs(v.Type) &^ s.used
2007
2008 outerloop:
2009 for _, e := range desired.entries {
2010 if e.ID != v.ID {
2011 continue
2012 }
2013 for _, r := range e.regs {
2014 if r != noRegister && m>>r&1 != 0 {
2015 m = regMask(1) << r
2016 break outerloop
2017 }
2018 }
2019 }
2020 if m&^desired.avoid != 0 {
2021 m &^= desired.avoid
2022 }
2023 if m != 0 {
2024 s.allocValToReg(v, m, false, b.Pos)
2025 }
2026 }
2027 }
2028 badloop:
2029 ;
2030
2031
2032
2033 k := 0
2034 for r := register(0); r < s.numRegs; r++ {
2035 v := s.regs[r].v
2036 if v == nil {
2037 continue
2038 }
2039 k++
2040 }
2041 regList := make([]endReg, 0, k)
2042 for r := register(0); r < s.numRegs; r++ {
2043 v := s.regs[r].v
2044 if v == nil {
2045 continue
2046 }
2047 regList = append(regList, endReg{r, v, s.regs[r].c})
2048 }
2049 s.endRegs[b.ID] = regList
2050
2051 if checkEnabled {
2052 regValLiveSet.clear()
2053 if s.live != nil {
2054 for _, x := range s.live[b.ID] {
2055 regValLiveSet.add(x.ID)
2056 }
2057 }
2058 for r := register(0); r < s.numRegs; r++ {
2059 v := s.regs[r].v
2060 if v == nil {
2061 continue
2062 }
2063 if !regValLiveSet.contains(v.ID) {
2064 s.f.Fatalf("val %s is in reg but not live at end of %s", v, b)
2065 }
2066 }
2067 }
2068
2069
2070
2071
2072
2073 if s.live != nil {
2074 for _, e := range s.live[b.ID] {
2075 vi := &s.values[e.ID]
2076 if vi.regs != 0 {
2077
2078 continue
2079 }
2080 if vi.rematerializeable {
2081
2082 continue
2083 }
2084 if s.f.pass.debug > regDebug {
2085 fmt.Printf("live-at-end spill for %s at %s\n", s.orig[e.ID], b)
2086 }
2087 spill := s.makeSpill(s.orig[e.ID], b)
2088 s.spillLive[b.ID] = append(s.spillLive[b.ID], spill.ID)
2089 }
2090
2091
2092
2093
2094 for _, e := range s.live[b.ID] {
2095 u := s.values[e.ID].uses
2096 if u == nil {
2097 f.Fatalf("live at end, no uses v%d", e.ID)
2098 }
2099 if u.next != nil {
2100 f.Fatalf("live at end, too many uses v%d", e.ID)
2101 }
2102 s.values[e.ID].uses = nil
2103 u.next = s.freeUseRecords
2104 s.freeUseRecords = u
2105 }
2106 }
2107
2108
2109
2110
2111
2112
2113
2114 if c := countRegs(s.startRegsMask); c != len(s.startRegs[b.ID]) {
2115 regs := make([]startReg, 0, c)
2116 for _, sr := range s.startRegs[b.ID] {
2117 if s.startRegsMask&(regMask(1)<<sr.r) == 0 {
2118 continue
2119 }
2120 regs = append(regs, sr)
2121 }
2122 s.startRegs[b.ID] = regs
2123 }
2124 }
2125
2126
2127 s.placeSpills()
2128
2129
2130
2131 stacklive := stackalloc(s.f, s.spillLive)
2132
2133
2134 s.shuffle(stacklive)
2135
2136
2137
2138
2139 for {
2140 progress := false
2141 for c, used := range s.copies {
2142 if !used && c.Uses == 0 {
2143 if s.f.pass.debug > regDebug {
2144 fmt.Printf("delete copied value %s\n", c.LongString())
2145 }
2146 c.resetArgs()
2147 f.freeValue(c)
2148 delete(s.copies, c)
2149 progress = true
2150 }
2151 }
2152 if !progress {
2153 break
2154 }
2155 }
2156
2157 for _, b := range s.visitOrder {
2158 i := 0
2159 for _, v := range b.Values {
2160 if v.Op == OpInvalid {
2161 continue
2162 }
2163 b.Values[i] = v
2164 i++
2165 }
2166 b.Values = b.Values[:i]
2167 }
2168 }
2169
2170 func (s *regAllocState) placeSpills() {
2171 mustBeFirst := func(op Op) bool {
2172 return op.isLoweredGetClosurePtr() || op == OpPhi || op == OpArgIntReg || op == OpArgFloatReg
2173 }
2174
2175
2176
2177 start := map[ID][]*Value{}
2178
2179
2180 after := map[ID][]*Value{}
2181
2182 for i := range s.values {
2183 vi := s.values[i]
2184 spill := vi.spill
2185 if spill == nil {
2186 continue
2187 }
2188 if spill.Block != nil {
2189
2190
2191 continue
2192 }
2193 v := s.orig[i]
2194
2195
2196
2197
2198
2199 if v == nil {
2200 panic(fmt.Errorf("nil v, s.orig[%d], vi = %v, spill = %s", i, vi, spill.LongString()))
2201 }
2202 best := v.Block
2203 bestArg := v
2204 var bestDepth int16
2205 if s.loopnest != nil && s.loopnest.b2l[best.ID] != nil {
2206 bestDepth = s.loopnest.b2l[best.ID].depth
2207 }
2208 b := best
2209 const maxSpillSearch = 100
2210 for i := 0; i < maxSpillSearch; i++ {
2211
2212
2213 p := b
2214 b = nil
2215 for c := s.sdom.Child(p); c != nil && i < maxSpillSearch; c, i = s.sdom.Sibling(c), i+1 {
2216 if s.sdom[c.ID].entry <= vi.restoreMin && s.sdom[c.ID].exit >= vi.restoreMax {
2217
2218 b = c
2219 break
2220 }
2221 }
2222 if b == nil {
2223
2224 break
2225 }
2226
2227 var depth int16
2228 if s.loopnest != nil && s.loopnest.b2l[b.ID] != nil {
2229 depth = s.loopnest.b2l[b.ID].depth
2230 }
2231 if depth > bestDepth {
2232
2233 continue
2234 }
2235
2236
2237
2238 if len(b.Preds) == 1 {
2239 for _, e := range s.endRegs[b.Preds[0].b.ID] {
2240 if e.v == v {
2241
2242 best = b
2243 bestArg = e.c
2244 bestDepth = depth
2245 break
2246 }
2247 }
2248 } else {
2249 for _, e := range s.startRegs[b.ID] {
2250 if e.v == v {
2251
2252 best = b
2253 bestArg = e.c
2254 bestDepth = depth
2255 break
2256 }
2257 }
2258 }
2259 }
2260
2261
2262 spill.Block = best
2263 spill.AddArg(bestArg)
2264 if best == v.Block && !mustBeFirst(v.Op) {
2265
2266 after[v.ID] = append(after[v.ID], spill)
2267 } else {
2268
2269 start[best.ID] = append(start[best.ID], spill)
2270 }
2271 }
2272
2273
2274 var oldSched []*Value
2275 for _, b := range s.visitOrder {
2276 nfirst := 0
2277 for _, v := range b.Values {
2278 if !mustBeFirst(v.Op) {
2279 break
2280 }
2281 nfirst++
2282 }
2283 oldSched = append(oldSched[:0], b.Values[nfirst:]...)
2284 b.Values = b.Values[:nfirst]
2285 b.Values = append(b.Values, start[b.ID]...)
2286 for _, v := range oldSched {
2287 b.Values = append(b.Values, v)
2288 b.Values = append(b.Values, after[v.ID]...)
2289 }
2290 }
2291 }
2292
2293
2294 func (s *regAllocState) shuffle(stacklive [][]ID) {
2295 var e edgeState
2296 e.s = s
2297 e.cache = map[ID][]*Value{}
2298 e.contents = map[Location]contentRecord{}
2299 if s.f.pass.debug > regDebug {
2300 fmt.Printf("shuffle %s\n", s.f.Name)
2301 fmt.Println(s.f.String())
2302 }
2303
2304 for _, b := range s.visitOrder {
2305 if len(b.Preds) <= 1 {
2306 continue
2307 }
2308 e.b = b
2309 for i, edge := range b.Preds {
2310 p := edge.b
2311 e.p = p
2312 e.setup(i, s.endRegs[p.ID], s.startRegs[b.ID], stacklive[p.ID])
2313 e.process()
2314 }
2315 }
2316
2317 if s.f.pass.debug > regDebug {
2318 fmt.Printf("post shuffle %s\n", s.f.Name)
2319 fmt.Println(s.f.String())
2320 }
2321 }
2322
2323 type edgeState struct {
2324 s *regAllocState
2325 p, b *Block
2326
2327
2328 cache map[ID][]*Value
2329 cachedVals []ID
2330
2331
2332 contents map[Location]contentRecord
2333
2334
2335 destinations []dstRecord
2336 extra []dstRecord
2337
2338 usedRegs regMask
2339 uniqueRegs regMask
2340 finalRegs regMask
2341 rematerializeableRegs regMask
2342 }
2343
2344 type contentRecord struct {
2345 vid ID
2346 c *Value
2347 final bool
2348 pos src.XPos
2349 }
2350
2351 type dstRecord struct {
2352 loc Location
2353 vid ID
2354 splice **Value
2355 pos src.XPos
2356 }
2357
2358
2359 func (e *edgeState) setup(idx int, srcReg []endReg, dstReg []startReg, stacklive []ID) {
2360 if e.s.f.pass.debug > regDebug {
2361 fmt.Printf("edge %s->%s\n", e.p, e.b)
2362 }
2363
2364
2365 clear(e.cache)
2366 e.cachedVals = e.cachedVals[:0]
2367 clear(e.contents)
2368 e.usedRegs = 0
2369 e.uniqueRegs = 0
2370 e.finalRegs = 0
2371 e.rematerializeableRegs = 0
2372
2373
2374 for _, x := range srcReg {
2375 e.set(&e.s.registers[x.r], x.v.ID, x.c, false, src.NoXPos)
2376 }
2377
2378 for _, spillID := range stacklive {
2379 v := e.s.orig[spillID]
2380 spill := e.s.values[v.ID].spill
2381 if !e.s.sdom.IsAncestorEq(spill.Block, e.p) {
2382
2383
2384
2385
2386
2387
2388
2389
2390 continue
2391 }
2392 e.set(e.s.f.getHome(spillID), v.ID, spill, false, src.NoXPos)
2393 }
2394
2395
2396 dsts := e.destinations[:0]
2397 for _, x := range dstReg {
2398 dsts = append(dsts, dstRecord{&e.s.registers[x.r], x.v.ID, nil, x.pos})
2399 }
2400
2401 for _, v := range e.b.Values {
2402 if v.Op != OpPhi {
2403 break
2404 }
2405 loc := e.s.f.getHome(v.ID)
2406 if loc == nil {
2407 continue
2408 }
2409 dsts = append(dsts, dstRecord{loc, v.Args[idx].ID, &v.Args[idx], v.Pos})
2410 }
2411 e.destinations = dsts
2412
2413 if e.s.f.pass.debug > regDebug {
2414 for _, vid := range e.cachedVals {
2415 a := e.cache[vid]
2416 for _, c := range a {
2417 fmt.Printf("src %s: v%d cache=%s\n", e.s.f.getHome(c.ID), vid, c)
2418 }
2419 }
2420 for _, d := range e.destinations {
2421 fmt.Printf("dst %s: v%d\n", d.loc, d.vid)
2422 }
2423 }
2424 }
2425
2426
2427 func (e *edgeState) process() {
2428 dsts := e.destinations
2429
2430
2431 for len(dsts) > 0 {
2432 i := 0
2433 for _, d := range dsts {
2434 if !e.processDest(d.loc, d.vid, d.splice, d.pos) {
2435
2436 dsts[i] = d
2437 i++
2438 }
2439 }
2440 if i < len(dsts) {
2441
2442 dsts = dsts[:i]
2443
2444
2445 dsts = append(dsts, e.extra...)
2446 e.extra = e.extra[:0]
2447 continue
2448 }
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472 d := dsts[0]
2473 loc := d.loc
2474 vid := e.contents[loc].vid
2475 c := e.contents[loc].c
2476 r := e.findRegFor(c.Type)
2477 if e.s.f.pass.debug > regDebug {
2478 fmt.Printf("breaking cycle with v%d in %s:%s\n", vid, loc, c)
2479 }
2480 e.erase(r)
2481 pos := d.pos.WithNotStmt()
2482 if _, isReg := loc.(*Register); isReg {
2483 c = e.p.NewValue1(pos, OpCopy, c.Type, c)
2484 } else {
2485 c = e.p.NewValue1(pos, OpLoadReg, c.Type, c)
2486 }
2487 e.set(r, vid, c, false, pos)
2488 if c.Op == OpLoadReg && e.s.isGReg(register(r.(*Register).num)) {
2489 e.s.f.Fatalf("process.OpLoadReg targeting g: " + c.LongString())
2490 }
2491 }
2492 }
2493
2494
2495
2496 func (e *edgeState) processDest(loc Location, vid ID, splice **Value, pos src.XPos) bool {
2497 pos = pos.WithNotStmt()
2498 occupant := e.contents[loc]
2499 if occupant.vid == vid {
2500
2501 e.contents[loc] = contentRecord{vid, occupant.c, true, pos}
2502 if splice != nil {
2503 (*splice).Uses--
2504 *splice = occupant.c
2505 occupant.c.Uses++
2506 }
2507
2508
2509
2510 if _, ok := e.s.copies[occupant.c]; ok {
2511
2512 e.s.copies[occupant.c] = true
2513 }
2514 return true
2515 }
2516
2517
2518 if len(e.cache[occupant.vid]) == 1 && !e.s.values[occupant.vid].rematerializeable && !opcodeTable[e.s.orig[occupant.vid].Op].fixedReg {
2519
2520
2521 return false
2522 }
2523
2524
2525 v := e.s.orig[vid]
2526 var c *Value
2527 var src Location
2528 if e.s.f.pass.debug > regDebug {
2529 fmt.Printf("moving v%d to %s\n", vid, loc)
2530 fmt.Printf("sources of v%d:", vid)
2531 }
2532 if opcodeTable[v.Op].fixedReg {
2533 c = v
2534 src = e.s.f.getHome(v.ID)
2535 } else {
2536 for _, w := range e.cache[vid] {
2537 h := e.s.f.getHome(w.ID)
2538 if e.s.f.pass.debug > regDebug {
2539 fmt.Printf(" %s:%s", h, w)
2540 }
2541 _, isreg := h.(*Register)
2542 if src == nil || isreg {
2543 c = w
2544 src = h
2545 }
2546 }
2547 }
2548 if e.s.f.pass.debug > regDebug {
2549 if src != nil {
2550 fmt.Printf(" [use %s]\n", src)
2551 } else {
2552 fmt.Printf(" [no source]\n")
2553 }
2554 }
2555 _, dstReg := loc.(*Register)
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567 e.erase(loc)
2568 var x *Value
2569 if c == nil || e.s.values[vid].rematerializeable {
2570 if !e.s.values[vid].rematerializeable {
2571 e.s.f.Fatalf("can't find source for %s->%s: %s\n", e.p, e.b, v.LongString())
2572 }
2573 if dstReg {
2574
2575
2576
2577
2578 if e.s.regspec(v).outputs[0].regs®Mask(1<<register(loc.(*Register).num)) == 0 {
2579 _, srcReg := src.(*Register)
2580 if srcReg {
2581
2582
2583 x = e.p.NewValue1(pos, OpCopy, c.Type, c)
2584 } else {
2585
2586 x = v.copyInto(e.p)
2587 r := e.findRegFor(x.Type)
2588 e.erase(r)
2589
2590 e.set(r, vid, x, false, pos)
2591
2592 x = e.p.NewValue1(pos, OpCopy, x.Type, x)
2593 }
2594 } else {
2595 x = v.copyInto(e.p)
2596 }
2597 } else {
2598
2599
2600 r := e.findRegFor(v.Type)
2601 e.erase(r)
2602 x = v.copyIntoWithXPos(e.p, pos)
2603 e.set(r, vid, x, false, pos)
2604
2605
2606
2607 x = e.p.NewValue1(pos, OpStoreReg, loc.(LocalSlot).Type, x)
2608 }
2609 } else {
2610
2611 _, srcReg := src.(*Register)
2612 if srcReg {
2613 if dstReg {
2614 x = e.p.NewValue1(pos, OpCopy, c.Type, c)
2615 } else {
2616 x = e.p.NewValue1(pos, OpStoreReg, loc.(LocalSlot).Type, c)
2617 }
2618 } else {
2619 if dstReg {
2620 x = e.p.NewValue1(pos, OpLoadReg, c.Type, c)
2621 } else {
2622
2623 r := e.findRegFor(c.Type)
2624 e.erase(r)
2625 t := e.p.NewValue1(pos, OpLoadReg, c.Type, c)
2626 e.set(r, vid, t, false, pos)
2627 x = e.p.NewValue1(pos, OpStoreReg, loc.(LocalSlot).Type, t)
2628 }
2629 }
2630 }
2631 e.set(loc, vid, x, true, pos)
2632 if x.Op == OpLoadReg && e.s.isGReg(register(loc.(*Register).num)) {
2633 e.s.f.Fatalf("processDest.OpLoadReg targeting g: " + x.LongString())
2634 }
2635 if splice != nil {
2636 (*splice).Uses--
2637 *splice = x
2638 x.Uses++
2639 }
2640 return true
2641 }
2642
2643
2644 func (e *edgeState) set(loc Location, vid ID, c *Value, final bool, pos src.XPos) {
2645 e.s.f.setHome(c, loc)
2646 e.contents[loc] = contentRecord{vid, c, final, pos}
2647 a := e.cache[vid]
2648 if len(a) == 0 {
2649 e.cachedVals = append(e.cachedVals, vid)
2650 }
2651 a = append(a, c)
2652 e.cache[vid] = a
2653 if r, ok := loc.(*Register); ok {
2654 if e.usedRegs&(regMask(1)<<uint(r.num)) != 0 {
2655 e.s.f.Fatalf("%v is already set (v%d/%v)", r, vid, c)
2656 }
2657 e.usedRegs |= regMask(1) << uint(r.num)
2658 if final {
2659 e.finalRegs |= regMask(1) << uint(r.num)
2660 }
2661 if len(a) == 1 {
2662 e.uniqueRegs |= regMask(1) << uint(r.num)
2663 }
2664 if len(a) == 2 {
2665 if t, ok := e.s.f.getHome(a[0].ID).(*Register); ok {
2666 e.uniqueRegs &^= regMask(1) << uint(t.num)
2667 }
2668 }
2669 if e.s.values[vid].rematerializeable {
2670 e.rematerializeableRegs |= regMask(1) << uint(r.num)
2671 }
2672 }
2673 if e.s.f.pass.debug > regDebug {
2674 fmt.Printf("%s\n", c.LongString())
2675 fmt.Printf("v%d now available in %s:%s\n", vid, loc, c)
2676 }
2677 }
2678
2679
2680 func (e *edgeState) erase(loc Location) {
2681 cr := e.contents[loc]
2682 if cr.c == nil {
2683 return
2684 }
2685 vid := cr.vid
2686
2687 if cr.final {
2688
2689
2690
2691 e.extra = append(e.extra, dstRecord{loc, cr.vid, nil, cr.pos})
2692 }
2693
2694
2695 a := e.cache[vid]
2696 for i, c := range a {
2697 if e.s.f.getHome(c.ID) == loc {
2698 if e.s.f.pass.debug > regDebug {
2699 fmt.Printf("v%d no longer available in %s:%s\n", vid, loc, c)
2700 }
2701 a[i], a = a[len(a)-1], a[:len(a)-1]
2702 break
2703 }
2704 }
2705 e.cache[vid] = a
2706
2707
2708 if r, ok := loc.(*Register); ok {
2709 e.usedRegs &^= regMask(1) << uint(r.num)
2710 if cr.final {
2711 e.finalRegs &^= regMask(1) << uint(r.num)
2712 }
2713 e.rematerializeableRegs &^= regMask(1) << uint(r.num)
2714 }
2715 if len(a) == 1 {
2716 if r, ok := e.s.f.getHome(a[0].ID).(*Register); ok {
2717 e.uniqueRegs |= regMask(1) << uint(r.num)
2718 }
2719 }
2720 }
2721
2722
2723 func (e *edgeState) findRegFor(typ *types.Type) Location {
2724
2725 m := e.s.compatRegs(typ)
2726
2727
2728
2729
2730
2731
2732 x := m &^ e.usedRegs
2733 if x != 0 {
2734 return &e.s.registers[pickReg(x)]
2735 }
2736 x = m &^ e.uniqueRegs &^ e.finalRegs
2737 if x != 0 {
2738 return &e.s.registers[pickReg(x)]
2739 }
2740 x = m &^ e.uniqueRegs
2741 if x != 0 {
2742 return &e.s.registers[pickReg(x)]
2743 }
2744 x = m & e.rematerializeableRegs
2745 if x != 0 {
2746 return &e.s.registers[pickReg(x)]
2747 }
2748
2749
2750
2751 for _, vid := range e.cachedVals {
2752 a := e.cache[vid]
2753 for _, c := range a {
2754 if r, ok := e.s.f.getHome(c.ID).(*Register); ok && m>>uint(r.num)&1 != 0 {
2755 if !c.rematerializeable() {
2756 x := e.p.NewValue1(c.Pos, OpStoreReg, c.Type, c)
2757
2758 t := LocalSlot{N: e.s.f.NewLocal(c.Pos, c.Type), Type: c.Type}
2759
2760 e.set(t, vid, x, false, c.Pos)
2761 if e.s.f.pass.debug > regDebug {
2762 fmt.Printf(" SPILL %s->%s %s\n", r, t, x.LongString())
2763 }
2764 }
2765
2766
2767
2768 return r
2769 }
2770 }
2771 }
2772
2773 fmt.Printf("m:%d unique:%d final:%d rematerializable:%d\n", m, e.uniqueRegs, e.finalRegs, e.rematerializeableRegs)
2774 for _, vid := range e.cachedVals {
2775 a := e.cache[vid]
2776 for _, c := range a {
2777 fmt.Printf("v%d: %s %s\n", vid, c, e.s.f.getHome(c.ID))
2778 }
2779 }
2780 e.s.f.Fatalf("can't find empty register on edge %s->%s", e.p, e.b)
2781 return nil
2782 }
2783
2784
2785
2786 func (v *Value) rematerializeable() bool {
2787 if !opcodeTable[v.Op].rematerializeable {
2788 return false
2789 }
2790 for _, a := range v.Args {
2791
2792
2793 if !opcodeTable[a.Op].fixedReg {
2794 return false
2795 }
2796 }
2797 return true
2798 }
2799
2800 type liveInfo struct {
2801 ID ID
2802 dist int32
2803 pos src.XPos
2804 }
2805
2806
2807
2808
2809 func (s *regAllocState) computeLive() {
2810 f := s.f
2811
2812
2813 if len(f.Blocks) == 1 {
2814 return
2815 }
2816 po := f.postorder()
2817 s.live = make([][]liveInfo, f.NumBlocks())
2818 s.desired = make([]desiredState, f.NumBlocks())
2819 s.loopnest = f.loopnest()
2820
2821 rematIDs := make([]ID, 0, 64)
2822
2823 live := f.newSparseMapPos(f.NumValues())
2824 defer f.retSparseMapPos(live)
2825 t := f.newSparseMapPos(f.NumValues())
2826 defer f.retSparseMapPos(t)
2827
2828 s.loopnest.computeUnavoidableCalls()
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844 var loopLiveIn map[*loop][]liveInfo
2845 var numCalls []int32
2846 if len(s.loopnest.loops) > 0 && !s.loopnest.hasIrreducible {
2847 loopLiveIn = make(map[*loop][]liveInfo)
2848 numCalls = f.Cache.allocInt32Slice(f.NumBlocks())
2849 defer f.Cache.freeInt32Slice(numCalls)
2850 }
2851
2852 for {
2853 changed := false
2854
2855 for _, b := range po {
2856
2857 live.clear()
2858 for _, e := range s.live[b.ID] {
2859 live.set(e.ID, e.dist, e.pos)
2860 }
2861 update := false
2862
2863 for _, e := range b.Succs {
2864 succ := e.b
2865 delta := branchDistance(b, succ)
2866 for _, v := range succ.Values {
2867 if v.Op != OpPhi {
2868 break
2869 }
2870 arg := v.Args[e.i]
2871 if s.values[arg.ID].needReg && (!live.contains(arg.ID) || delta < live.get(arg.ID)) {
2872 live.set(arg.ID, delta, v.Pos)
2873 update = true
2874 }
2875 }
2876 }
2877 if update {
2878 s.live[b.ID] = updateLive(live, s.live[b.ID])
2879 }
2880
2881
2882 c := live.contents()
2883 for i := range c {
2884 c[i].val += int32(len(b.Values))
2885 }
2886
2887
2888 for _, c := range b.ControlValues() {
2889 if s.values[c.ID].needReg {
2890 live.set(c.ID, int32(len(b.Values)), b.Pos)
2891 }
2892 }
2893
2894 for i := len(b.Values) - 1; i >= 0; i-- {
2895 v := b.Values[i]
2896 live.remove(v.ID)
2897 if v.Op == OpPhi {
2898 continue
2899 }
2900 if opcodeTable[v.Op].call {
2901 if numCalls != nil {
2902 numCalls[b.ID]++
2903 }
2904 rematIDs = rematIDs[:0]
2905 c := live.contents()
2906 for i := range c {
2907 c[i].val += unlikelyDistance
2908 vid := c[i].key
2909 if s.values[vid].rematerializeable {
2910 rematIDs = append(rematIDs, vid)
2911 }
2912 }
2913
2914
2915
2916 for _, r := range rematIDs {
2917 live.remove(r)
2918 }
2919 }
2920 for _, a := range v.Args {
2921 if s.values[a.ID].needReg {
2922 live.set(a.ID, int32(i), v.Pos)
2923 }
2924 }
2925 }
2926
2927
2928 if loopLiveIn != nil {
2929 loop := s.loopnest.b2l[b.ID]
2930 if loop != nil && loop.header.ID == b.ID {
2931 loopLiveIn[loop] = updateLive(live, nil)
2932 }
2933 }
2934
2935
2936 for _, e := range b.Preds {
2937 p := e.b
2938 delta := branchDistance(p, b)
2939
2940
2941 t.clear()
2942 for _, e := range s.live[p.ID] {
2943 t.set(e.ID, e.dist, e.pos)
2944 }
2945 update := false
2946
2947
2948 for _, e := range live.contents() {
2949 d := e.val + delta
2950 if !t.contains(e.key) || d < t.get(e.key) {
2951 update = true
2952 t.set(e.key, d, e.pos)
2953 }
2954 }
2955
2956 if !update {
2957 continue
2958 }
2959 s.live[p.ID] = updateLive(t, s.live[p.ID])
2960 changed = true
2961 }
2962 }
2963
2964
2965
2966 if !changed {
2967 break
2968 }
2969
2970
2971
2972 if loopLiveIn != nil {
2973 break
2974 }
2975
2976
2977 if len(s.loopnest.loops) == 0 {
2978 break
2979 }
2980 }
2981 if f.pass.debug > regDebug {
2982 s.debugPrintLive("after dfs walk", f, s.live, s.desired)
2983 }
2984
2985
2986
2987 if loopLiveIn == nil {
2988 s.computeDesired()
2989 return
2990 }
2991
2992
2993
2994
2995
2996 loops := slices.Clone(s.loopnest.loops)
2997 slices.SortFunc(loops, func(a, b *loop) int {
2998 return cmp.Compare(a.depth, b.depth)
2999 })
3000
3001 loopset := f.newSparseMapPos(f.NumValues())
3002 defer f.retSparseMapPos(loopset)
3003 for _, loop := range loops {
3004 if loop.outer == nil {
3005 continue
3006 }
3007 livein := loopLiveIn[loop]
3008 loopset.clear()
3009 for _, l := range livein {
3010 loopset.set(l.ID, l.dist, l.pos)
3011 }
3012 update := false
3013 for _, l := range loopLiveIn[loop.outer] {
3014 if !loopset.contains(l.ID) {
3015 loopset.set(l.ID, l.dist, l.pos)
3016 update = true
3017 }
3018 }
3019 if update {
3020 loopLiveIn[loop] = updateLive(loopset, livein)
3021 }
3022 }
3023
3024
3025
3026 const unknownDistance = -1
3027
3028
3029
3030
3031 for _, b := range po {
3032 loop := s.loopnest.b2l[b.ID]
3033 if loop == nil {
3034 continue
3035 }
3036 headerLive := loopLiveIn[loop]
3037 loopset.clear()
3038 for _, l := range s.live[b.ID] {
3039 loopset.set(l.ID, l.dist, l.pos)
3040 }
3041 update := false
3042 for _, l := range headerLive {
3043 if !loopset.contains(l.ID) {
3044 loopset.set(l.ID, unknownDistance, src.NoXPos)
3045 update = true
3046 }
3047 }
3048 if update {
3049 s.live[b.ID] = updateLive(loopset, s.live[b.ID])
3050 }
3051 }
3052 if f.pass.debug > regDebug {
3053 s.debugPrintLive("after live loop prop", f, s.live, s.desired)
3054 }
3055
3056
3057
3058
3059 unfinishedBlocks := f.Cache.allocBlockSlice(len(po))
3060 defer f.Cache.freeBlockSlice(unfinishedBlocks)
3061 copy(unfinishedBlocks, po)
3062
3063 for len(unfinishedBlocks) > 0 {
3064 n := 0
3065 for _, b := range unfinishedBlocks {
3066 live.clear()
3067 unfinishedValues := 0
3068 for _, l := range s.live[b.ID] {
3069 if l.dist == unknownDistance {
3070 unfinishedValues++
3071 }
3072 live.set(l.ID, l.dist, l.pos)
3073 }
3074 update := false
3075 for _, e := range b.Succs {
3076 succ := e.b
3077 for _, l := range s.live[succ.ID] {
3078 if !live.contains(l.ID) || l.dist == unknownDistance {
3079 continue
3080 }
3081 dist := int32(len(succ.Values)) + l.dist + branchDistance(b, succ)
3082 dist += numCalls[succ.ID] * unlikelyDistance
3083 val := live.get(l.ID)
3084 switch {
3085 case val == unknownDistance:
3086 unfinishedValues--
3087 fallthrough
3088 case dist < val:
3089 update = true
3090 live.set(l.ID, dist, l.pos)
3091 }
3092 }
3093 }
3094 if update {
3095 s.live[b.ID] = updateLive(live, s.live[b.ID])
3096 }
3097 if unfinishedValues > 0 {
3098 unfinishedBlocks[n] = b
3099 n++
3100 }
3101 }
3102 unfinishedBlocks = unfinishedBlocks[:n]
3103 }
3104
3105 s.computeDesired()
3106
3107 if f.pass.debug > regDebug {
3108 s.debugPrintLive("final", f, s.live, s.desired)
3109 }
3110 }
3111
3112
3113
3114
3115 func (s *regAllocState) computeDesired() {
3116
3117
3118
3119
3120
3121 var desired desiredState
3122 f := s.f
3123 po := f.postorder()
3124 for {
3125 changed := false
3126 for _, b := range po {
3127 desired.copy(&s.desired[b.ID])
3128 for i := len(b.Values) - 1; i >= 0; i-- {
3129 v := b.Values[i]
3130 prefs := desired.remove(v.ID)
3131 if v.Op == OpPhi {
3132
3133
3134
3135 continue
3136 }
3137 regspec := s.regspec(v)
3138
3139 desired.clobber(regspec.clobbers)
3140
3141 for _, j := range regspec.inputs {
3142 if countRegs(j.regs) != 1 {
3143 continue
3144 }
3145 desired.clobber(j.regs)
3146 desired.add(v.Args[j.idx].ID, pickReg(j.regs))
3147 }
3148
3149 if opcodeTable[v.Op].resultInArg0 || v.Op == OpAMD64ADDQconst || v.Op == OpAMD64ADDLconst || v.Op == OpSelect0 {
3150
3151
3152
3153
3154
3155 if opcodeTable[v.Op].commutative {
3156 desired.addList(v.Args[1].ID, prefs)
3157 }
3158 desired.addList(v.Args[0].ID, prefs)
3159 }
3160 }
3161 for _, e := range b.Preds {
3162 p := e.b
3163 changed = s.desired[p.ID].merge(&desired) || changed
3164 }
3165 }
3166 if !changed || (!s.loopnest.hasIrreducible && len(s.loopnest.loops) == 0) {
3167 break
3168 }
3169 }
3170 }
3171
3172
3173 func updateLive(t *sparseMapPos, live []liveInfo) []liveInfo {
3174 live = live[:0]
3175 if cap(live) < t.size() {
3176 live = make([]liveInfo, 0, t.size())
3177 }
3178 for _, e := range t.contents() {
3179 live = append(live, liveInfo{e.key, e.val, e.pos})
3180 }
3181 return live
3182 }
3183
3184
3185
3186
3187 func branchDistance(b *Block, s *Block) int32 {
3188 if len(b.Succs) == 2 {
3189 if b.Succs[0].b == s && b.Likely == BranchLikely ||
3190 b.Succs[1].b == s && b.Likely == BranchUnlikely {
3191 return likelyDistance
3192 }
3193 if b.Succs[0].b == s && b.Likely == BranchUnlikely ||
3194 b.Succs[1].b == s && b.Likely == BranchLikely {
3195 return unlikelyDistance
3196 }
3197 }
3198
3199
3200 return normalDistance
3201 }
3202
3203 func (s *regAllocState) debugPrintLive(stage string, f *Func, live [][]liveInfo, desired []desiredState) {
3204 fmt.Printf("%s: live values at end of each block: %s\n", stage, f.Name)
3205 for _, b := range f.Blocks {
3206 s.debugPrintLiveBlock(b, live[b.ID], &desired[b.ID])
3207 }
3208 }
3209
3210 func (s *regAllocState) debugPrintLiveBlock(b *Block, live []liveInfo, desired *desiredState) {
3211 fmt.Printf(" %s:", b)
3212 slices.SortFunc(live, func(a, b liveInfo) int {
3213 return cmp.Compare(a.ID, b.ID)
3214 })
3215 for _, x := range live {
3216 fmt.Printf(" v%d(%d)", x.ID, x.dist)
3217 for _, e := range desired.entries {
3218 if e.ID != x.ID {
3219 continue
3220 }
3221 fmt.Printf("[")
3222 first := true
3223 for _, r := range e.regs {
3224 if r == noRegister {
3225 continue
3226 }
3227 if !first {
3228 fmt.Printf(",")
3229 }
3230 fmt.Print(&s.registers[r])
3231 first = false
3232 }
3233 fmt.Printf("]")
3234 }
3235 }
3236 if avoid := desired.avoid; avoid != 0 {
3237 fmt.Printf(" avoid=%v", s.RegMaskString(avoid))
3238 }
3239 fmt.Println()
3240 }
3241
3242
3243 type desiredState struct {
3244
3245
3246 entries []desiredStateEntry
3247
3248
3249
3250
3251 avoid regMask
3252 }
3253 type desiredStateEntry struct {
3254
3255 ID ID
3256
3257
3258
3259
3260
3261 regs [4]register
3262 }
3263
3264
3265 func (d *desiredState) get(vid ID) [4]register {
3266 for _, e := range d.entries {
3267 if e.ID == vid {
3268 return e.regs
3269 }
3270 }
3271 return [4]register{noRegister, noRegister, noRegister, noRegister}
3272 }
3273
3274
3275 func (d *desiredState) add(vid ID, r register) {
3276 d.avoid |= regMask(1) << r
3277 for i := range d.entries {
3278 e := &d.entries[i]
3279 if e.ID != vid {
3280 continue
3281 }
3282 if e.regs[0] == r {
3283
3284 return
3285 }
3286 for j := 1; j < len(e.regs); j++ {
3287 if e.regs[j] == r {
3288
3289 copy(e.regs[1:], e.regs[:j])
3290 e.regs[0] = r
3291 return
3292 }
3293 }
3294 copy(e.regs[1:], e.regs[:])
3295 e.regs[0] = r
3296 return
3297 }
3298 d.entries = append(d.entries, desiredStateEntry{vid, [4]register{r, noRegister, noRegister, noRegister}})
3299 }
3300
3301 func (d *desiredState) addList(vid ID, regs [4]register) {
3302
3303 for i := len(regs) - 1; i >= 0; i-- {
3304 r := regs[i]
3305 if r != noRegister {
3306 d.add(vid, r)
3307 }
3308 }
3309 }
3310
3311
3312 func (d *desiredState) clobber(m regMask) {
3313 for i := 0; i < len(d.entries); {
3314 e := &d.entries[i]
3315 j := 0
3316 for _, r := range e.regs {
3317 if r != noRegister && m>>r&1 == 0 {
3318 e.regs[j] = r
3319 j++
3320 }
3321 }
3322 if j == 0 {
3323
3324 d.entries[i] = d.entries[len(d.entries)-1]
3325 d.entries = d.entries[:len(d.entries)-1]
3326 continue
3327 }
3328 for ; j < len(e.regs); j++ {
3329 e.regs[j] = noRegister
3330 }
3331 i++
3332 }
3333 d.avoid &^= m
3334 }
3335
3336
3337 func (d *desiredState) copy(x *desiredState) {
3338 d.entries = append(d.entries[:0], x.entries...)
3339 d.avoid = x.avoid
3340 }
3341
3342
3343 func (d *desiredState) remove(vid ID) [4]register {
3344 for i := range d.entries {
3345 if d.entries[i].ID == vid {
3346 regs := d.entries[i].regs
3347 d.entries[i] = d.entries[len(d.entries)-1]
3348 d.entries = d.entries[:len(d.entries)-1]
3349 return regs
3350 }
3351 }
3352 return [4]register{noRegister, noRegister, noRegister, noRegister}
3353 }
3354
3355
3356
3357 func (d *desiredState) merge(x *desiredState) bool {
3358 oldAvoid := d.avoid
3359 d.avoid |= x.avoid
3360
3361
3362 for _, e := range x.entries {
3363 d.addList(e.ID, e.regs)
3364 }
3365 return oldAvoid != d.avoid
3366 }
3367
3368
3369 func (loopnest *loopnest) computeUnavoidableCalls() {
3370 f := loopnest.f
3371
3372 hasCall := f.Cache.allocBoolSlice(f.NumBlocks())
3373 defer f.Cache.freeBoolSlice(hasCall)
3374 for _, b := range f.Blocks {
3375 if b.containsCall() {
3376 hasCall[b.ID] = true
3377 }
3378 }
3379 found := f.Cache.allocSparseSet(f.NumBlocks())
3380 defer f.Cache.freeSparseSet(found)
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390 loopLoop:
3391 for _, l := range loopnest.loops {
3392 found.clear()
3393 tovisit := make([]*Block, 0, 8)
3394 tovisit = append(tovisit, l.header)
3395 for len(tovisit) > 0 {
3396 cur := tovisit[len(tovisit)-1]
3397 tovisit = tovisit[:len(tovisit)-1]
3398 if hasCall[cur.ID] {
3399 continue
3400 }
3401 for _, s := range cur.Succs {
3402 nb := s.Block()
3403 if nb == l.header {
3404
3405 continue loopLoop
3406 }
3407 if found.contains(nb.ID) {
3408
3409 continue
3410 }
3411 nl := loopnest.b2l[nb.ID]
3412 if nl == nil || (nl.depth <= l.depth && nl != l) {
3413
3414 continue
3415 }
3416 tovisit = append(tovisit, nb)
3417 found.add(nb.ID)
3418 }
3419 }
3420
3421 l.containsUnavoidableCall = true
3422 }
3423 }
3424
3425 func (b *Block) containsCall() bool {
3426 if b.Kind == BlockDefer {
3427 return true
3428 }
3429 for _, v := range b.Values {
3430 if opcodeTable[v.Op].call {
3431 return true
3432 }
3433 }
3434 return false
3435 }
3436
View as plain text