1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114 package ssa
115
116 import (
117 "cmd/compile/internal/base"
118 "cmd/compile/internal/ir"
119 "cmd/compile/internal/types"
120 "cmd/internal/src"
121 "cmd/internal/sys"
122 "cmp"
123 "fmt"
124 "internal/buildcfg"
125 "math"
126 "math/bits"
127 "slices"
128 "unsafe"
129 )
130
131 const (
132 moveSpills = iota
133 logSpills
134 regDebug
135 stackDebug
136 )
137
138
139
140 const (
141 likelyDistance = 1
142 normalDistance = 10
143 unlikelyDistance = 100
144 )
145
146
147
148 func regalloc(f *Func) {
149 var s regAllocState
150 s.init(f)
151 s.regalloc(f)
152 s.close()
153 }
154
155 type register uint8
156
157 const noRegister register = 255
158
159
160 var noRegisters [32]register = [32]register{
161 noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister,
162 noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister,
163 noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister,
164 noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister,
165 }
166
167
168
169 type regMask uint64
170
171 func (m regMask) String() string {
172 s := ""
173 for r := register(0); m != 0; r++ {
174 if m>>r&1 == 0 {
175 continue
176 }
177 m &^= regMask(1) << r
178 if s != "" {
179 s += " "
180 }
181 s += fmt.Sprintf("r%d", r)
182 }
183 return s
184 }
185
186 func (m regMask) contains(r register) bool {
187 return m>>r&1 != 0
188 }
189
190 func (s *regAllocState) RegMaskString(m regMask) string {
191 str := ""
192 for r := register(0); m != 0; r++ {
193 if m>>r&1 == 0 {
194 continue
195 }
196 m &^= regMask(1) << r
197 if str != "" {
198 str += " "
199 }
200 str += s.registers[r].String()
201 }
202 return str
203 }
204
205
206 func countRegs(r regMask) int {
207 return bits.OnesCount64(uint64(r))
208 }
209
210
211 func pickReg(r regMask) register {
212 if r == 0 {
213 panic("can't pick a register from an empty set")
214 }
215
216 return register(bits.TrailingZeros64(uint64(r)))
217 }
218
219 type use struct {
220
221
222
223
224
225 dist int32
226 pos src.XPos
227 next *use
228 }
229
230
231 type valState struct {
232 regs regMask
233 uses *use
234 spill *Value
235 restoreMin int32
236 restoreMax int32
237 needReg bool
238 rematerializeable bool
239 }
240
241 type regState struct {
242 v *Value
243 c *Value
244
245 }
246
247 type regAllocState struct {
248 f *Func
249
250 sdom SparseTree
251 registers []Register
252 numRegs register
253 SPReg register
254 SBReg register
255 GReg register
256 ZeroIntReg register
257 allocatable regMask
258
259
260
261
262 live [][]liveInfo
263
264
265
266
267 desired []desiredState
268
269
270 values []valState
271
272
273 sp, sb ID
274
275
276
277 orig []*Value
278
279
280
281 regs []regState
282
283
284 nospill regMask
285
286
287 used regMask
288
289
290 usedSinceBlockStart regMask
291
292
293 tmpused regMask
294
295
296 curBlock *Block
297
298
299 freeUseRecords *use
300
301
302
303 endRegs [][]endReg
304
305
306
307 startRegs [][]startReg
308
309
310
311
312 startRegsMask regMask
313
314
315 spillLive [][]ID
316
317
318
319 copies map[*Value]bool
320
321 loopnest *loopnest
322
323
324 visitOrder []*Block
325
326
327 blockOrder []int32
328
329
330 doClobber bool
331
332
333
334
335
336
337 nextCall []int32
338
339
340
341 curIdx int
342 }
343
344 type endReg struct {
345 r register
346 v *Value
347 c *Value
348 }
349
350 type startReg struct {
351 r register
352 v *Value
353 c *Value
354 pos src.XPos
355 }
356
357
358 func (s *regAllocState) freeReg(r register) {
359 if !s.allocatable.contains(r) && !s.isGReg(r) {
360 return
361 }
362 v := s.regs[r].v
363 if v == nil {
364 s.f.Fatalf("tried to free an already free register %d\n", r)
365 }
366
367
368 if s.f.pass.debug > regDebug {
369 fmt.Printf("freeReg %s (dump %s/%s)\n", &s.registers[r], v, s.regs[r].c)
370 }
371 s.regs[r] = regState{}
372 s.values[v.ID].regs &^= regMask(1) << r
373 s.used &^= regMask(1) << r
374 }
375
376
377 func (s *regAllocState) freeRegs(m regMask) {
378 for m&s.used != 0 {
379 s.freeReg(pickReg(m & s.used))
380 }
381 }
382
383
384 func (s *regAllocState) clobberRegs(m regMask) {
385 m &= s.allocatable & s.f.Config.gpRegMask
386 for m != 0 {
387 r := pickReg(m)
388 m &^= 1 << r
389 x := s.curBlock.NewValue0(src.NoXPos, OpClobberReg, types.TypeVoid)
390 s.f.setHome(x, &s.registers[r])
391 }
392 }
393
394
395
396 func (s *regAllocState) setOrig(c *Value, v *Value) {
397 if int(c.ID) >= cap(s.orig) {
398 x := s.f.Cache.allocValueSlice(int(c.ID) + 1)
399 copy(x, s.orig)
400 s.f.Cache.freeValueSlice(s.orig)
401 s.orig = x
402 }
403 for int(c.ID) >= len(s.orig) {
404 s.orig = append(s.orig, nil)
405 }
406 if s.orig[c.ID] != nil {
407 s.f.Fatalf("orig value set twice %s %s", c, v)
408 }
409 s.orig[c.ID] = s.orig[v.ID]
410 }
411
412
413
414 func (s *regAllocState) assignReg(r register, v *Value, c *Value) {
415 if s.f.pass.debug > regDebug {
416 fmt.Printf("assignReg %s %s/%s\n", &s.registers[r], v, c)
417 }
418
419 s.values[v.ID].regs |= regMask(1) << r
420 s.f.setHome(c, &s.registers[r])
421
422
423 if !s.allocatable.contains(r) && !s.isGReg(r) {
424 return
425 }
426 if s.regs[r].v != nil {
427 s.f.Fatalf("tried to assign register %d to %s/%s but it is already used by %s", r, v, c, s.regs[r].v)
428 }
429 s.regs[r] = regState{v, c}
430 s.used |= regMask(1) << r
431 }
432
433
434
435
436 func (s *regAllocState) allocReg(mask regMask, v *Value) register {
437 if v.OnWasmStack {
438 return noRegister
439 }
440
441 mask &= s.allocatable
442 mask &^= s.nospill
443 if mask == 0 {
444 s.f.Fatalf("no register available for %s", v.LongString())
445 }
446
447
448 if mask&^s.used != 0 {
449 r := pickReg(mask &^ s.used)
450 s.usedSinceBlockStart |= regMask(1) << r
451 return r
452 }
453
454
455
456
457
458
459
460
461
462
463
464 var r register
465 maxuse := int32(-1)
466 for t := register(0); t < s.numRegs; t++ {
467 if mask>>t&1 == 0 {
468 continue
469 }
470 v := s.regs[t].v
471 if n := s.values[v.ID].uses.dist; n > maxuse {
472
473
474 r = t
475 maxuse = n
476 }
477 }
478 if maxuse == -1 {
479 s.f.Fatalf("couldn't find register to spill")
480 }
481
482 if s.f.Config.ctxt.Arch.Arch == sys.ArchWasm {
483
484
485
486 s.freeReg(r)
487 return r
488 }
489
490
491
492 v2 := s.regs[r].v
493 m := s.compatRegs(v2.Type) &^ s.used &^ s.tmpused &^ (regMask(1) << r)
494 if m != 0 && !s.values[v2.ID].rematerializeable && countRegs(s.values[v2.ID].regs) == 1 {
495 s.usedSinceBlockStart |= regMask(1) << r
496 r2 := pickReg(m)
497 c := s.curBlock.NewValue1(v2.Pos, OpCopy, v2.Type, s.regs[r].c)
498 s.copies[c] = false
499 if s.f.pass.debug > regDebug {
500 fmt.Printf("copy %s to %s : %s\n", v2, c, &s.registers[r2])
501 }
502 s.setOrig(c, v2)
503 s.assignReg(r2, v2, c)
504 }
505
506
507
508
509 if s.usedSinceBlockStart&(regMask(1)<<r) == 0 {
510 if s.startRegsMask&(regMask(1)<<r) == 1 {
511 if s.f.pass.debug > regDebug {
512 fmt.Printf("dropped from startRegs: %s\n", &s.registers[r])
513 }
514 s.startRegsMask &^= regMask(1) << r
515 }
516 }
517
518 s.freeReg(r)
519 s.usedSinceBlockStart |= regMask(1) << r
520 return r
521 }
522
523
524
525 func (s *regAllocState) makeSpill(v *Value, b *Block) *Value {
526 vi := &s.values[v.ID]
527 if vi.spill != nil {
528
529 vi.restoreMin = min(vi.restoreMin, s.sdom[b.ID].entry)
530 vi.restoreMax = max(vi.restoreMax, s.sdom[b.ID].exit)
531 return vi.spill
532 }
533
534
535 spill := s.f.newValueNoBlock(OpStoreReg, v.Type, v.Pos)
536
537
538 s.setOrig(spill, v)
539 vi.spill = spill
540 vi.restoreMin = s.sdom[b.ID].entry
541 vi.restoreMax = s.sdom[b.ID].exit
542 return spill
543 }
544
545
546
547
548
549
550
551 func (s *regAllocState) allocValToReg(v *Value, mask regMask, nospill bool, pos src.XPos) *Value {
552 if s.f.Config.ctxt.Arch.Arch == sys.ArchWasm && v.rematerializeable() {
553 c := v.copyIntoWithXPos(s.curBlock, pos)
554 c.OnWasmStack = true
555 s.setOrig(c, v)
556 return c
557 }
558 if v.OnWasmStack {
559 return v
560 }
561
562 vi := &s.values[v.ID]
563 pos = pos.WithNotStmt()
564
565 if mask&vi.regs != 0 {
566 mask &= vi.regs
567 r := pickReg(mask)
568 if mask.contains(s.SPReg) {
569
570
571
572 r = s.SPReg
573 }
574 if !s.allocatable.contains(r) {
575 return v
576 }
577 if s.regs[r].v != v || s.regs[r].c == nil {
578 panic("bad register state")
579 }
580 if nospill {
581 s.nospill |= regMask(1) << r
582 }
583 s.usedSinceBlockStart |= regMask(1) << r
584 return s.regs[r].c
585 }
586
587 var r register
588
589 onWasmStack := nospill && s.f.Config.ctxt.Arch.Arch == sys.ArchWasm
590 if !onWasmStack {
591
592 r = s.allocReg(mask, v)
593 }
594
595
596 var c *Value
597 if vi.regs != 0 {
598
599 var current *Value
600 if vi.regs&^s.allocatable != 0 {
601
602 current = v
603 } else {
604 r2 := pickReg(vi.regs)
605 if s.regs[r2].v != v {
606 panic("bad register state")
607 }
608 current = s.regs[r2].c
609 s.usedSinceBlockStart |= regMask(1) << r2
610 }
611 c = s.curBlock.NewValue1(pos, OpCopy, v.Type, current)
612 } else if v.rematerializeable() {
613
614 c = v.copyIntoWithXPos(s.curBlock, pos)
615
616
617
618
619
620
621
622 sourceMask := s.regspec(c).outputs[0].regs
623 if mask&sourceMask == 0 && !onWasmStack {
624 s.setOrig(c, v)
625 s.assignReg(s.allocReg(sourceMask, v), v, c)
626
627
628
629
630
631
632
633
634
635
636 c = s.curBlock.NewValue1(pos, OpCopy, v.Type, c)
637 }
638 } else {
639
640 spill := s.makeSpill(v, s.curBlock)
641 if s.f.pass.debug > logSpills {
642 s.f.Warnl(vi.spill.Pos, "load spill for %v from %v", v, spill)
643 }
644 c = s.curBlock.NewValue1(pos, OpLoadReg, v.Type, spill)
645 }
646
647 s.setOrig(c, v)
648
649 if onWasmStack {
650 c.OnWasmStack = true
651 return c
652 }
653
654 s.assignReg(r, v, c)
655 if c.Op == OpLoadReg && s.isGReg(r) {
656 s.f.Fatalf("allocValToReg.OpLoadReg targeting g: " + c.LongString())
657 }
658 if nospill {
659 s.nospill |= regMask(1) << r
660 }
661 return c
662 }
663
664
665 func isLeaf(f *Func) bool {
666 for _, b := range f.Blocks {
667 for _, v := range b.Values {
668 if v.Op.IsCall() && !v.Op.IsTailCall() {
669
670 return false
671 }
672 }
673 }
674 return true
675 }
676
677
678 func (v *Value) needRegister() bool {
679 return !v.Type.IsMemory() && !v.Type.IsVoid() && !v.Type.IsFlags() && !v.Type.IsTuple()
680 }
681
682 func (s *regAllocState) init(f *Func) {
683 s.f = f
684 s.f.RegAlloc = s.f.Cache.locs[:0]
685 s.registers = f.Config.registers
686 if nr := len(s.registers); nr == 0 || nr > int(noRegister) || nr > int(unsafe.Sizeof(regMask(0))*8) {
687 s.f.Fatalf("bad number of registers: %d", nr)
688 } else {
689 s.numRegs = register(nr)
690 }
691
692 s.SPReg = noRegister
693 s.SBReg = noRegister
694 s.GReg = noRegister
695 s.ZeroIntReg = noRegister
696 for r := register(0); r < s.numRegs; r++ {
697 switch s.registers[r].String() {
698 case "SP":
699 s.SPReg = r
700 case "SB":
701 s.SBReg = r
702 case "g":
703 s.GReg = r
704 case "ZERO":
705 s.ZeroIntReg = r
706 }
707 }
708
709 switch noRegister {
710 case s.SPReg:
711 s.f.Fatalf("no SP register found")
712 case s.SBReg:
713 s.f.Fatalf("no SB register found")
714 case s.GReg:
715 if f.Config.hasGReg {
716 s.f.Fatalf("no g register found")
717 }
718 }
719
720
721 s.allocatable = s.f.Config.gpRegMask | s.f.Config.fpRegMask | s.f.Config.specialRegMask
722 s.allocatable &^= 1 << s.SPReg
723 s.allocatable &^= 1 << s.SBReg
724 if s.f.Config.hasGReg {
725 s.allocatable &^= 1 << s.GReg
726 }
727 if s.ZeroIntReg != noRegister {
728 s.allocatable &^= 1 << s.ZeroIntReg
729 }
730 if buildcfg.FramePointerEnabled && s.f.Config.FPReg >= 0 {
731 s.allocatable &^= 1 << uint(s.f.Config.FPReg)
732 }
733 if s.f.Config.LinkReg != -1 {
734 if isLeaf(f) {
735
736 s.allocatable &^= 1 << uint(s.f.Config.LinkReg)
737 }
738 }
739 if s.f.Config.ctxt.Flag_dynlink {
740 switch s.f.Config.arch {
741 case "386":
742
743
744
745
746
747 case "amd64":
748 s.allocatable &^= 1 << 15
749 case "arm":
750 s.allocatable &^= 1 << 9
751 case "arm64":
752
753 case "loong64":
754
755 case "ppc64le":
756
757 case "riscv64":
758
759 case "s390x":
760 s.allocatable &^= 1 << 11
761 default:
762 s.f.fe.Fatalf(src.NoXPos, "arch %s not implemented", s.f.Config.arch)
763 }
764 }
765
766
767
768
769 s.visitOrder = layoutRegallocOrder(f)
770
771
772
773 s.blockOrder = make([]int32, f.NumBlocks())
774 for i, b := range s.visitOrder {
775 s.blockOrder[b.ID] = int32(i)
776 }
777
778 s.regs = make([]regState, s.numRegs)
779 nv := f.NumValues()
780 if cap(s.f.Cache.regallocValues) >= nv {
781 s.f.Cache.regallocValues = s.f.Cache.regallocValues[:nv]
782 } else {
783 s.f.Cache.regallocValues = make([]valState, nv)
784 }
785 s.values = s.f.Cache.regallocValues
786 s.orig = s.f.Cache.allocValueSlice(nv)
787 s.copies = make(map[*Value]bool)
788 for _, b := range s.visitOrder {
789 for _, v := range b.Values {
790 if v.needRegister() {
791 s.values[v.ID].needReg = true
792 s.values[v.ID].rematerializeable = v.rematerializeable()
793 s.orig[v.ID] = v
794 }
795
796
797 }
798 }
799 s.computeLive()
800
801 s.endRegs = make([][]endReg, f.NumBlocks())
802 s.startRegs = make([][]startReg, f.NumBlocks())
803 s.spillLive = make([][]ID, f.NumBlocks())
804 s.sdom = f.Sdom()
805
806
807 if f.Config.ctxt.Arch.Arch == sys.ArchWasm {
808 canLiveOnStack := f.newSparseSet(f.NumValues())
809 defer f.retSparseSet(canLiveOnStack)
810 for _, b := range f.Blocks {
811
812 canLiveOnStack.clear()
813 for _, c := range b.ControlValues() {
814 if c.Uses == 1 && !opcodeTable[c.Op].generic {
815 canLiveOnStack.add(c.ID)
816 }
817 }
818
819 for i := len(b.Values) - 1; i >= 0; i-- {
820 v := b.Values[i]
821 if canLiveOnStack.contains(v.ID) {
822 v.OnWasmStack = true
823 } else {
824
825 canLiveOnStack.clear()
826 }
827 for _, arg := range v.Args {
828
829
830
831
832
833 if arg.Uses == 1 && arg.Block == v.Block && !arg.Type.IsMemory() && !opcodeTable[arg.Op].generic {
834 canLiveOnStack.add(arg.ID)
835 }
836 }
837 }
838 }
839 }
840
841
842
843
844 if base.Flag.ClobberDeadReg && len(s.f.Blocks) <= 10000 {
845
846 s.doClobber = true
847 }
848 }
849
850 func (s *regAllocState) close() {
851 s.f.Cache.freeValueSlice(s.orig)
852 }
853
854
855
856 func (s *regAllocState) addUse(id ID, dist int32, pos src.XPos) {
857 r := s.freeUseRecords
858 if r != nil {
859 s.freeUseRecords = r.next
860 } else {
861 r = &use{}
862 }
863 r.dist = dist
864 r.pos = pos
865 r.next = s.values[id].uses
866 s.values[id].uses = r
867 if r.next != nil && dist > r.next.dist {
868 s.f.Fatalf("uses added in wrong order")
869 }
870 }
871
872
873
874 func (s *regAllocState) advanceUses(v *Value) {
875 for _, a := range v.Args {
876 if !s.values[a.ID].needReg {
877 continue
878 }
879 ai := &s.values[a.ID]
880 r := ai.uses
881 ai.uses = r.next
882 if r.next == nil || (!opcodeTable[a.Op].fixedReg && r.next.dist > s.nextCall[s.curIdx]) {
883
884 s.freeRegs(ai.regs)
885 }
886 r.next = s.freeUseRecords
887 s.freeUseRecords = r
888 }
889 s.dropIfUnused(v)
890 }
891
892
893
894 func (s *regAllocState) dropIfUnused(v *Value) {
895 if !s.values[v.ID].needReg {
896 return
897 }
898 vi := &s.values[v.ID]
899 r := vi.uses
900 nextCall := s.nextCall[s.curIdx]
901 if opcodeTable[v.Op].call {
902 if s.curIdx == len(s.nextCall)-1 {
903 nextCall = math.MaxInt32
904 } else {
905 nextCall = s.nextCall[s.curIdx+1]
906 }
907 }
908 if r == nil || (!opcodeTable[v.Op].fixedReg && r.dist > nextCall) {
909 s.freeRegs(vi.regs)
910 }
911 }
912
913
914
915
916 func (s *regAllocState) liveAfterCurrentInstruction(v *Value) bool {
917 u := s.values[v.ID].uses
918 if u == nil {
919 panic(fmt.Errorf("u is nil, v = %s, s.values[v.ID] = %v", v.LongString(), s.values[v.ID]))
920 }
921 d := u.dist
922 for u != nil && u.dist == d {
923 u = u.next
924 }
925 return u != nil && u.dist > d
926 }
927
928
929 func (s *regAllocState) setState(regs []endReg) {
930 s.freeRegs(s.used)
931 for _, x := range regs {
932 s.assignReg(x.r, x.v, x.c)
933 }
934 }
935
936
937 func (s *regAllocState) compatRegs(t *types.Type) regMask {
938 var m regMask
939 if t.IsTuple() || t.IsFlags() {
940 return 0
941 }
942 if t.IsSIMD() {
943 if t.Size() > 8 {
944 return s.f.Config.fpRegMask & s.allocatable
945 } else {
946
947 return s.f.Config.gpRegMask & s.allocatable
948 }
949 }
950 if t.IsFloat() || t == types.TypeInt128 {
951 if t.Kind() == types.TFLOAT32 && s.f.Config.fp32RegMask != 0 {
952 m = s.f.Config.fp32RegMask
953 } else if t.Kind() == types.TFLOAT64 && s.f.Config.fp64RegMask != 0 {
954 m = s.f.Config.fp64RegMask
955 } else {
956 m = s.f.Config.fpRegMask
957 }
958 } else {
959 m = s.f.Config.gpRegMask
960 }
961 return m & s.allocatable
962 }
963
964
965 func (s *regAllocState) regspec(v *Value) regInfo {
966 op := v.Op
967 if op == OpConvert {
968
969
970
971 m := s.allocatable & s.f.Config.gpRegMask
972 return regInfo{inputs: []inputInfo{{regs: m}}, outputs: []outputInfo{{regs: m}}}
973 }
974 if op == OpArgIntReg {
975 reg := v.Block.Func.Config.intParamRegs[v.AuxInt8()]
976 return regInfo{outputs: []outputInfo{{regs: 1 << uint(reg)}}}
977 }
978 if op == OpArgFloatReg {
979 reg := v.Block.Func.Config.floatParamRegs[v.AuxInt8()]
980 return regInfo{outputs: []outputInfo{{regs: 1 << uint(reg)}}}
981 }
982 if op.IsCall() {
983 if ac, ok := v.Aux.(*AuxCall); ok && ac.reg != nil {
984 return *ac.Reg(&opcodeTable[op].reg, s.f.Config)
985 }
986 }
987 if op == OpMakeResult && s.f.OwnAux.reg != nil {
988 return *s.f.OwnAux.ResultReg(s.f.Config)
989 }
990 return opcodeTable[op].reg
991 }
992
993 func (s *regAllocState) isGReg(r register) bool {
994 return s.f.Config.hasGReg && s.GReg == r
995 }
996
997
998 var tmpVal Value
999
1000 func (s *regAllocState) regalloc(f *Func) {
1001 regValLiveSet := f.newSparseSet(f.NumValues())
1002 defer f.retSparseSet(regValLiveSet)
1003 var oldSched []*Value
1004 var phis []*Value
1005 var phiRegs []register
1006 var args []*Value
1007
1008
1009 var desired desiredState
1010 desiredSecondReg := map[ID][4]register{}
1011
1012
1013 type dentry struct {
1014 out [4]register
1015 in [3][4]register
1016 }
1017 var dinfo []dentry
1018
1019 if f.Entry != f.Blocks[0] {
1020 f.Fatalf("entry block must be first")
1021 }
1022
1023 for _, b := range s.visitOrder {
1024 if s.f.pass.debug > regDebug {
1025 fmt.Printf("Begin processing block %v\n", b)
1026 }
1027 s.curBlock = b
1028 s.startRegsMask = 0
1029 s.usedSinceBlockStart = 0
1030 clear(desiredSecondReg)
1031
1032
1033
1034 regValLiveSet.clear()
1035 if s.live != nil {
1036 for _, e := range s.live[b.ID] {
1037 s.addUse(e.ID, int32(len(b.Values))+e.dist, e.pos)
1038 regValLiveSet.add(e.ID)
1039 }
1040 }
1041 for _, v := range b.ControlValues() {
1042 if s.values[v.ID].needReg {
1043 s.addUse(v.ID, int32(len(b.Values)), b.Pos)
1044 regValLiveSet.add(v.ID)
1045 }
1046 }
1047 if cap(s.nextCall) < len(b.Values) {
1048 c := cap(s.nextCall)
1049 s.nextCall = append(s.nextCall[:c], make([]int32, len(b.Values)-c)...)
1050 } else {
1051 s.nextCall = s.nextCall[:len(b.Values)]
1052 }
1053 var nextCall int32 = math.MaxInt32
1054 for i := len(b.Values) - 1; i >= 0; i-- {
1055 v := b.Values[i]
1056 regValLiveSet.remove(v.ID)
1057 if v.Op == OpPhi {
1058
1059
1060
1061 s.nextCall[i] = nextCall
1062 continue
1063 }
1064 if opcodeTable[v.Op].call {
1065
1066 regValLiveSet.clear()
1067 if s.sp != 0 && s.values[s.sp].uses != nil {
1068 regValLiveSet.add(s.sp)
1069 }
1070 if s.sb != 0 && s.values[s.sb].uses != nil {
1071 regValLiveSet.add(s.sb)
1072 }
1073 nextCall = int32(i)
1074 }
1075 for _, a := range v.Args {
1076 if !s.values[a.ID].needReg {
1077 continue
1078 }
1079 s.addUse(a.ID, int32(i), v.Pos)
1080 regValLiveSet.add(a.ID)
1081 }
1082 s.nextCall[i] = nextCall
1083 }
1084 if s.f.pass.debug > regDebug {
1085 fmt.Printf("use distances for %s\n", b)
1086 for i := range s.values {
1087 vi := &s.values[i]
1088 u := vi.uses
1089 if u == nil {
1090 continue
1091 }
1092 fmt.Printf(" v%d:", i)
1093 for u != nil {
1094 fmt.Printf(" %d", u.dist)
1095 u = u.next
1096 }
1097 fmt.Println()
1098 }
1099 }
1100
1101
1102
1103 nphi := 0
1104 for _, v := range b.Values {
1105 if v.Op != OpPhi {
1106 break
1107 }
1108 nphi++
1109 }
1110 phis = append(phis[:0], b.Values[:nphi]...)
1111 oldSched = append(oldSched[:0], b.Values[nphi:]...)
1112 b.Values = b.Values[:0]
1113
1114
1115 if b == f.Entry {
1116
1117 if nphi > 0 {
1118 f.Fatalf("phis in entry block")
1119 }
1120 } else if len(b.Preds) == 1 {
1121
1122 s.setState(s.endRegs[b.Preds[0].b.ID])
1123 if nphi > 0 {
1124 f.Fatalf("phis in single-predecessor block")
1125 }
1126
1127
1128
1129 for r := register(0); r < s.numRegs; r++ {
1130 v := s.regs[r].v
1131 if v != nil && !regValLiveSet.contains(v.ID) {
1132 s.freeReg(r)
1133 }
1134 }
1135 } else {
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149 idx := -1
1150 for i, p := range b.Preds {
1151
1152
1153 pb := p.b
1154 if s.blockOrder[pb.ID] >= s.blockOrder[b.ID] {
1155 continue
1156 }
1157 if idx == -1 {
1158 idx = i
1159 continue
1160 }
1161 pSel := b.Preds[idx].b
1162 if len(s.spillLive[pb.ID]) < len(s.spillLive[pSel.ID]) {
1163 idx = i
1164 } else if len(s.spillLive[pb.ID]) == len(s.spillLive[pSel.ID]) {
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175 if pb.likelyBranch() && !pSel.likelyBranch() || s.blockOrder[pb.ID] < s.blockOrder[pSel.ID] {
1176 idx = i
1177 }
1178 }
1179 }
1180 if idx < 0 {
1181 f.Fatalf("bad visitOrder, no predecessor of %s has been visited before it", b)
1182 }
1183 p := b.Preds[idx].b
1184 s.setState(s.endRegs[p.ID])
1185
1186 if s.f.pass.debug > regDebug {
1187 fmt.Printf("starting merge block %s with end state of %s:\n", b, p)
1188 for _, x := range s.endRegs[p.ID] {
1189 fmt.Printf(" %s: orig:%s cache:%s\n", &s.registers[x.r], x.v, x.c)
1190 }
1191 }
1192
1193
1194
1195
1196
1197 phiRegs = phiRegs[:0]
1198 var phiUsed regMask
1199
1200 for _, v := range phis {
1201 if !s.values[v.ID].needReg {
1202 phiRegs = append(phiRegs, noRegister)
1203 continue
1204 }
1205 a := v.Args[idx]
1206
1207
1208 m := s.values[a.ID].regs &^ phiUsed & s.allocatable
1209 if m != 0 {
1210 r := pickReg(m)
1211 phiUsed |= regMask(1) << r
1212 phiRegs = append(phiRegs, r)
1213 } else {
1214 phiRegs = append(phiRegs, noRegister)
1215 }
1216 }
1217
1218
1219 for i, v := range phis {
1220 if !s.values[v.ID].needReg {
1221 continue
1222 }
1223 a := v.Args[idx]
1224 r := phiRegs[i]
1225 if r == noRegister {
1226 continue
1227 }
1228 if regValLiveSet.contains(a.ID) {
1229
1230
1231
1232
1233
1234
1235
1236
1237 m := s.compatRegs(a.Type) &^ s.used &^ phiUsed
1238 if m != 0 && !s.values[a.ID].rematerializeable && countRegs(s.values[a.ID].regs) == 1 {
1239 r2 := pickReg(m)
1240 c := p.NewValue1(a.Pos, OpCopy, a.Type, s.regs[r].c)
1241 s.copies[c] = false
1242 if s.f.pass.debug > regDebug {
1243 fmt.Printf("copy %s to %s : %s\n", a, c, &s.registers[r2])
1244 }
1245 s.setOrig(c, a)
1246 s.assignReg(r2, a, c)
1247 s.endRegs[p.ID] = append(s.endRegs[p.ID], endReg{r2, a, c})
1248 }
1249 }
1250 s.freeReg(r)
1251 }
1252
1253
1254 b.Values = append(b.Values, phis...)
1255
1256
1257
1258 for i, v := range phis {
1259 if !s.values[v.ID].needReg {
1260 continue
1261 }
1262 if phiRegs[i] != noRegister {
1263 continue
1264 }
1265 m := s.compatRegs(v.Type) &^ phiUsed &^ s.used
1266
1267
1268 for i, pe := range b.Preds {
1269 if i == idx {
1270 continue
1271 }
1272 ri := noRegister
1273 for _, er := range s.endRegs[pe.b.ID] {
1274 if er.v == s.orig[v.Args[i].ID] {
1275 ri = er.r
1276 break
1277 }
1278 }
1279 if ri != noRegister && m>>ri&1 != 0 {
1280 m = regMask(1) << ri
1281 break
1282 }
1283 }
1284 if m != 0 {
1285 r := pickReg(m)
1286 phiRegs[i] = r
1287 phiUsed |= regMask(1) << r
1288 }
1289 }
1290
1291
1292 for i, v := range phis {
1293 if !s.values[v.ID].needReg {
1294 continue
1295 }
1296 r := phiRegs[i]
1297 if r == noRegister {
1298
1299
1300 s.values[v.ID].spill = v
1301 continue
1302 }
1303
1304 s.assignReg(r, v, v)
1305 }
1306
1307
1308 for r := register(0); r < s.numRegs; r++ {
1309 if phiUsed>>r&1 != 0 {
1310 continue
1311 }
1312 v := s.regs[r].v
1313 if v != nil && !regValLiveSet.contains(v.ID) {
1314 s.freeReg(r)
1315 }
1316 }
1317
1318
1319
1320
1321
1322 regList := make([]startReg, 0, 32)
1323 for r := register(0); r < s.numRegs; r++ {
1324 v := s.regs[r].v
1325 if v == nil {
1326 continue
1327 }
1328 if phiUsed>>r&1 != 0 {
1329
1330
1331 continue
1332 }
1333 regList = append(regList, startReg{r, v, s.regs[r].c, s.values[v.ID].uses.pos})
1334 s.startRegsMask |= regMask(1) << r
1335 }
1336 s.startRegs[b.ID] = make([]startReg, len(regList))
1337 copy(s.startRegs[b.ID], regList)
1338
1339 if s.f.pass.debug > regDebug {
1340 fmt.Printf("after phis\n")
1341 for _, x := range s.startRegs[b.ID] {
1342 fmt.Printf(" %s: v%d\n", &s.registers[x.r], x.v.ID)
1343 }
1344 }
1345 }
1346
1347
1348 for i, v := range phis {
1349 s.curIdx = i
1350 s.dropIfUnused(v)
1351 }
1352
1353
1354 if l := len(oldSched); cap(dinfo) < l {
1355 dinfo = make([]dentry, l)
1356 } else {
1357 dinfo = dinfo[:l]
1358 clear(dinfo)
1359 }
1360
1361
1362 if s.desired != nil {
1363 desired.copy(&s.desired[b.ID])
1364 }
1365
1366
1367
1368
1369
1370
1371 for _, e := range b.Succs {
1372 succ := e.b
1373
1374 for _, x := range s.startRegs[succ.ID] {
1375 desired.add(x.v.ID, x.r)
1376 }
1377
1378 pidx := e.i
1379 for _, v := range succ.Values {
1380 if v.Op != OpPhi {
1381 break
1382 }
1383 if !s.values[v.ID].needReg {
1384 continue
1385 }
1386 rp, ok := s.f.getHome(v.ID).(*Register)
1387 if !ok {
1388
1389
1390
1391
1392 for _, a := range v.Args {
1393 rp, ok = s.f.getHome(a.ID).(*Register)
1394 if ok {
1395 break
1396 }
1397 }
1398 if !ok {
1399 continue
1400 }
1401 }
1402 desired.add(v.Args[pidx].ID, register(rp.num))
1403 }
1404 }
1405
1406
1407 for i := len(oldSched) - 1; i >= 0; i-- {
1408 v := oldSched[i]
1409 prefs := desired.remove(v.ID)
1410 regspec := s.regspec(v)
1411 desired.clobber(regspec.clobbers)
1412 for _, j := range regspec.inputs {
1413 if countRegs(j.regs) != 1 {
1414 continue
1415 }
1416 desired.clobber(j.regs)
1417 desired.add(v.Args[j.idx].ID, pickReg(j.regs))
1418 }
1419 if opcodeTable[v.Op].resultInArg0 || v.Op == OpAMD64ADDQconst || v.Op == OpAMD64ADDLconst || v.Op == OpSelect0 {
1420 if opcodeTable[v.Op].commutative {
1421 desired.addList(v.Args[1].ID, prefs)
1422 }
1423 desired.addList(v.Args[0].ID, prefs)
1424 }
1425
1426 dinfo[i].out = prefs
1427 for j, a := range v.Args {
1428 if j >= len(dinfo[i].in) {
1429 break
1430 }
1431 dinfo[i].in[j] = desired.get(a.ID)
1432 }
1433 if v.Op == OpSelect1 && prefs[0] != noRegister {
1434
1435
1436 desiredSecondReg[v.Args[0].ID] = prefs
1437 }
1438 }
1439
1440
1441 for idx, v := range oldSched {
1442 s.curIdx = nphi + idx
1443 tmpReg := noRegister
1444 if s.f.pass.debug > regDebug {
1445 fmt.Printf(" processing %s\n", v.LongString())
1446 }
1447 regspec := s.regspec(v)
1448 if v.Op == OpPhi {
1449 f.Fatalf("phi %s not at start of block", v)
1450 }
1451 if opcodeTable[v.Op].fixedReg {
1452 switch v.Op {
1453 case OpSP:
1454 s.assignReg(s.SPReg, v, v)
1455 s.sp = v.ID
1456 case OpSB:
1457 s.assignReg(s.SBReg, v, v)
1458 s.sb = v.ID
1459 case OpARM64ZERO, OpLOONG64ZERO, OpMIPS64ZERO:
1460 s.assignReg(s.ZeroIntReg, v, v)
1461 case OpAMD64Zero128, OpAMD64Zero256, OpAMD64Zero512:
1462 regspec := s.regspec(v)
1463 m := regspec.outputs[0].regs
1464 if countRegs(m) != 1 {
1465 f.Fatalf("bad fixed-register op %s", v)
1466 }
1467 s.assignReg(pickReg(m), v, v)
1468 default:
1469 f.Fatalf("unknown fixed-register op %s", v)
1470 }
1471 b.Values = append(b.Values, v)
1472 s.advanceUses(v)
1473 continue
1474 }
1475 if v.Op == OpSelect0 || v.Op == OpSelect1 || v.Op == OpSelectN {
1476 if s.values[v.ID].needReg {
1477 if v.Op == OpSelectN {
1478 s.assignReg(register(s.f.getHome(v.Args[0].ID).(LocResults)[int(v.AuxInt)].(*Register).num), v, v)
1479 } else {
1480 var i = 0
1481 if v.Op == OpSelect1 {
1482 i = 1
1483 }
1484 s.assignReg(register(s.f.getHome(v.Args[0].ID).(LocPair)[i].(*Register).num), v, v)
1485 }
1486 }
1487 b.Values = append(b.Values, v)
1488 s.advanceUses(v)
1489 continue
1490 }
1491 if v.Op == OpGetG && s.f.Config.hasGReg {
1492
1493 if s.regs[s.GReg].v != nil {
1494 s.freeReg(s.GReg)
1495 }
1496 s.assignReg(s.GReg, v, v)
1497 b.Values = append(b.Values, v)
1498 s.advanceUses(v)
1499 continue
1500 }
1501 if v.Op == OpArg {
1502
1503
1504
1505 s.values[v.ID].spill = v
1506 b.Values = append(b.Values, v)
1507 s.advanceUses(v)
1508 continue
1509 }
1510 if v.Op == OpKeepAlive {
1511
1512 s.advanceUses(v)
1513 a := v.Args[0]
1514 vi := &s.values[a.ID]
1515 if vi.regs == 0 && !vi.rematerializeable {
1516
1517
1518
1519 v.SetArg(0, s.makeSpill(a, b))
1520 } else if _, ok := a.Aux.(*ir.Name); ok && vi.rematerializeable {
1521
1522
1523
1524 v.Op = OpVarLive
1525 v.SetArgs1(v.Args[1])
1526 v.Aux = a.Aux
1527 } else {
1528
1529
1530
1531 v.Op = OpCopy
1532 v.SetArgs1(v.Args[1])
1533 }
1534 b.Values = append(b.Values, v)
1535 continue
1536 }
1537 if len(regspec.inputs) == 0 && len(regspec.outputs) == 0 {
1538
1539 if s.doClobber && v.Op.IsCall() {
1540 s.clobberRegs(regspec.clobbers)
1541 }
1542 s.freeRegs(regspec.clobbers)
1543 b.Values = append(b.Values, v)
1544 s.advanceUses(v)
1545 continue
1546 }
1547
1548 if s.values[v.ID].rematerializeable {
1549
1550
1551
1552 for _, a := range v.Args {
1553 a.Uses--
1554 }
1555 s.advanceUses(v)
1556 continue
1557 }
1558
1559 if s.f.pass.debug > regDebug {
1560 fmt.Printf("value %s\n", v.LongString())
1561 fmt.Printf(" out:")
1562 for _, r := range dinfo[idx].out {
1563 if r != noRegister {
1564 fmt.Printf(" %s", &s.registers[r])
1565 }
1566 }
1567 fmt.Println()
1568 for i := 0; i < len(v.Args) && i < 3; i++ {
1569 fmt.Printf(" in%d:", i)
1570 for _, r := range dinfo[idx].in[i] {
1571 if r != noRegister {
1572 fmt.Printf(" %s", &s.registers[r])
1573 }
1574 }
1575 fmt.Println()
1576 }
1577 }
1578
1579
1580
1581
1582 args = append(args[:0], make([]*Value, len(v.Args))...)
1583 for i, a := range v.Args {
1584 if !s.values[a.ID].needReg {
1585 args[i] = a
1586 }
1587 }
1588 for _, i := range regspec.inputs {
1589 mask := i.regs
1590 if countRegs(mask) == 1 && mask&s.values[v.Args[i.idx].ID].regs != 0 {
1591 args[i.idx] = s.allocValToReg(v.Args[i.idx], mask, true, v.Pos)
1592 }
1593 }
1594
1595
1596
1597
1598
1599
1600 for {
1601 freed := false
1602 for _, i := range regspec.inputs {
1603 if args[i.idx] != nil {
1604 continue
1605 }
1606 mask := i.regs
1607 if countRegs(mask) == 1 && mask&^s.used != 0 {
1608 args[i.idx] = s.allocValToReg(v.Args[i.idx], mask, true, v.Pos)
1609
1610
1611
1612 oldregs := s.values[v.Args[i.idx].ID].regs
1613 if oldregs&^regspec.clobbers == 0 || !s.liveAfterCurrentInstruction(v.Args[i.idx]) {
1614 s.freeRegs(oldregs &^ mask &^ s.nospill)
1615 freed = true
1616 }
1617 }
1618 }
1619 if !freed {
1620 break
1621 }
1622 }
1623
1624
1625 for _, i := range regspec.inputs {
1626 if args[i.idx] != nil {
1627 continue
1628 }
1629 mask := i.regs
1630 if mask&s.values[v.Args[i.idx].ID].regs == 0 {
1631
1632 mask &= s.allocatable
1633 mask &^= s.nospill
1634
1635 if i.idx < 3 {
1636 for _, r := range dinfo[idx].in[i.idx] {
1637 if r != noRegister && (mask&^s.used)>>r&1 != 0 {
1638
1639 mask = regMask(1) << r
1640 break
1641 }
1642 }
1643 }
1644
1645 if mask&^desired.avoid != 0 {
1646 mask &^= desired.avoid
1647 }
1648 }
1649 if mask&s.values[v.Args[i.idx].ID].regs&(1<<s.SPReg) != 0 {
1650
1651
1652
1653 mask = 1 << s.SPReg
1654 }
1655 args[i.idx] = s.allocValToReg(v.Args[i.idx], mask, true, v.Pos)
1656 }
1657
1658
1659
1660
1661 if opcodeTable[v.Op].resultInArg0 {
1662 var m regMask
1663 if !s.liveAfterCurrentInstruction(v.Args[0]) {
1664
1665 goto ok
1666 }
1667 if opcodeTable[v.Op].commutative && !s.liveAfterCurrentInstruction(v.Args[1]) {
1668 args[0], args[1] = args[1], args[0]
1669 goto ok
1670 }
1671 if s.values[v.Args[0].ID].rematerializeable {
1672
1673 goto ok
1674 }
1675 if opcodeTable[v.Op].commutative && s.values[v.Args[1].ID].rematerializeable {
1676 args[0], args[1] = args[1], args[0]
1677 goto ok
1678 }
1679 if countRegs(s.values[v.Args[0].ID].regs) >= 2 {
1680
1681 goto ok
1682 }
1683 if opcodeTable[v.Op].commutative && countRegs(s.values[v.Args[1].ID].regs) >= 2 {
1684 args[0], args[1] = args[1], args[0]
1685 goto ok
1686 }
1687
1688
1689
1690
1691
1692 m = s.compatRegs(v.Args[0].Type) &^ s.used
1693 if m == 0 {
1694
1695
1696
1697
1698 goto ok
1699 }
1700
1701
1702 for _, r := range dinfo[idx].out {
1703 if r != noRegister && (m®spec.outputs[0].regs)>>r&1 != 0 {
1704 m = regMask(1) << r
1705 args[0] = s.allocValToReg(v.Args[0], m, true, v.Pos)
1706
1707
1708 goto ok
1709 }
1710 }
1711
1712
1713 for _, r := range dinfo[idx].in[0] {
1714 if r != noRegister && m>>r&1 != 0 {
1715 m = regMask(1) << r
1716 c := s.allocValToReg(v.Args[0], m, true, v.Pos)
1717 s.copies[c] = false
1718
1719
1720 goto ok
1721 }
1722 }
1723 if opcodeTable[v.Op].commutative {
1724 for _, r := range dinfo[idx].in[1] {
1725 if r != noRegister && m>>r&1 != 0 {
1726 m = regMask(1) << r
1727 c := s.allocValToReg(v.Args[1], m, true, v.Pos)
1728 s.copies[c] = false
1729 args[0], args[1] = args[1], args[0]
1730 goto ok
1731 }
1732 }
1733 }
1734
1735
1736 if m&^desired.avoid != 0 {
1737 m &^= desired.avoid
1738 }
1739
1740 c := s.allocValToReg(v.Args[0], m, true, v.Pos)
1741 s.copies[c] = false
1742
1743
1744
1745
1746 if regspec.outputs[0].regs>>s.f.getHome(c.ID).(*Register).num&1 != 0 {
1747 if rp, ok := s.f.getHome(args[0].ID).(*Register); ok {
1748 r := register(rp.num)
1749 for _, r2 := range dinfo[idx].in[0] {
1750 if r == r2 {
1751 args[0] = c
1752 break
1753 }
1754 }
1755 }
1756 }
1757 }
1758 ok:
1759 for i := 0; i < 2; i++ {
1760 if !(i == 0 && regspec.clobbersArg0 || i == 1 && regspec.clobbersArg1) {
1761 continue
1762 }
1763 if !s.liveAfterCurrentInstruction(v.Args[i]) {
1764
1765 continue
1766 }
1767 if s.values[v.Args[i].ID].rematerializeable {
1768
1769 continue
1770 }
1771 if countRegs(s.values[v.Args[i].ID].regs) >= 2 {
1772
1773 continue
1774 }
1775
1776 m := s.compatRegs(v.Args[i].Type) &^ s.used
1777 if m == 0 {
1778
1779
1780
1781
1782 continue
1783 }
1784
1785 c := s.allocValToReg(v.Args[i], m, true, v.Pos)
1786 s.copies[c] = false
1787 }
1788
1789
1790
1791
1792
1793
1794
1795 if opcodeTable[v.Op].needIntTemp {
1796 m := s.allocatable & s.f.Config.gpRegMask
1797 for _, out := range regspec.outputs {
1798 if countRegs(out.regs) == 1 {
1799 m &^= out.regs
1800 }
1801 }
1802 if m&^desired.avoid&^s.nospill != 0 {
1803 m &^= desired.avoid
1804 }
1805 tmpReg = s.allocReg(m, &tmpVal)
1806 s.nospill |= regMask(1) << tmpReg
1807 s.tmpused |= regMask(1) << tmpReg
1808 }
1809
1810 if regspec.clobbersArg0 {
1811 s.freeReg(register(s.f.getHome(args[0].ID).(*Register).num))
1812 }
1813 if regspec.clobbersArg1 {
1814 s.freeReg(register(s.f.getHome(args[1].ID).(*Register).num))
1815 }
1816
1817
1818
1819
1820
1821 if !opcodeTable[v.Op].resultNotInArgs {
1822 s.tmpused = s.nospill
1823 s.nospill = 0
1824 s.advanceUses(v)
1825 }
1826
1827
1828 if s.doClobber && v.Op.IsCall() {
1829
1830
1831 s.clobberRegs(regspec.clobbers &^ s.tmpused &^ s.nospill)
1832 }
1833 s.freeRegs(regspec.clobbers)
1834 s.tmpused |= regspec.clobbers
1835
1836
1837 {
1838 outRegs := noRegisters
1839 maxOutIdx := -1
1840 var used regMask
1841 if tmpReg != noRegister {
1842
1843
1844 used |= regMask(1) << tmpReg
1845 }
1846 for _, out := range regspec.outputs {
1847 if out.regs == 0 {
1848 continue
1849 }
1850 mask := out.regs & s.allocatable &^ used
1851 if mask == 0 {
1852 s.f.Fatalf("can't find any output register %s", v.LongString())
1853 }
1854 if opcodeTable[v.Op].resultInArg0 && out.idx == 0 {
1855 if !opcodeTable[v.Op].commutative {
1856
1857 r := register(s.f.getHome(args[0].ID).(*Register).num)
1858 if mask>>r&1 == 0 {
1859 s.f.Fatalf("resultInArg0 value's input %v cannot be an output of %s", s.f.getHome(args[0].ID).(*Register), v.LongString())
1860 }
1861 mask = regMask(1) << r
1862 } else {
1863
1864 r0 := register(s.f.getHome(args[0].ID).(*Register).num)
1865 r1 := register(s.f.getHome(args[1].ID).(*Register).num)
1866
1867 found := false
1868 for _, r := range dinfo[idx].out {
1869 if (r == r0 || r == r1) && (mask&^s.used)>>r&1 != 0 {
1870 mask = regMask(1) << r
1871 found = true
1872 if r == r1 {
1873 args[0], args[1] = args[1], args[0]
1874 }
1875 break
1876 }
1877 }
1878 if !found {
1879
1880 mask = regMask(1) << r0
1881 }
1882 }
1883 }
1884 if out.idx == 0 {
1885 for _, r := range dinfo[idx].out {
1886 if r != noRegister && (mask&^s.used)>>r&1 != 0 {
1887
1888 mask = regMask(1) << r
1889 break
1890 }
1891 }
1892 }
1893 if out.idx == 1 {
1894 if prefs, ok := desiredSecondReg[v.ID]; ok {
1895 for _, r := range prefs {
1896 if r != noRegister && (mask&^s.used)>>r&1 != 0 {
1897
1898 mask = regMask(1) << r
1899 break
1900 }
1901 }
1902 }
1903 }
1904
1905 if mask&^desired.avoid&^s.nospill&^s.used != 0 {
1906 mask &^= desired.avoid
1907 }
1908 r := s.allocReg(mask, v)
1909 if out.idx > maxOutIdx {
1910 maxOutIdx = out.idx
1911 }
1912 outRegs[out.idx] = r
1913 used |= regMask(1) << r
1914 s.tmpused |= regMask(1) << r
1915 }
1916
1917 if v.Type.IsTuple() {
1918 var outLocs LocPair
1919 if r := outRegs[0]; r != noRegister {
1920 outLocs[0] = &s.registers[r]
1921 }
1922 if r := outRegs[1]; r != noRegister {
1923 outLocs[1] = &s.registers[r]
1924 }
1925 s.f.setHome(v, outLocs)
1926
1927 } else if v.Type.IsResults() {
1928
1929 outLocs := make(LocResults, maxOutIdx+1, maxOutIdx+1)
1930 for i := 0; i <= maxOutIdx; i++ {
1931 if r := outRegs[i]; r != noRegister {
1932 outLocs[i] = &s.registers[r]
1933 }
1934 }
1935 s.f.setHome(v, outLocs)
1936 } else {
1937 if r := outRegs[0]; r != noRegister {
1938 s.assignReg(r, v, v)
1939 }
1940 }
1941 if tmpReg != noRegister {
1942
1943 if s.f.tempRegs == nil {
1944 s.f.tempRegs = map[ID]*Register{}
1945 }
1946 s.f.tempRegs[v.ID] = &s.registers[tmpReg]
1947 }
1948 }
1949
1950
1951 if opcodeTable[v.Op].resultNotInArgs {
1952 s.nospill = 0
1953 s.advanceUses(v)
1954 }
1955 s.tmpused = 0
1956
1957
1958 for i, a := range args {
1959 v.SetArg(i, a)
1960 }
1961 b.Values = append(b.Values, v)
1962 s.dropIfUnused(v)
1963 }
1964
1965
1966
1967 controls := append(make([]*Value, 0, 2), b.ControlValues()...)
1968
1969
1970 for i, v := range b.ControlValues() {
1971 if !s.values[v.ID].needReg {
1972 continue
1973 }
1974 if s.f.pass.debug > regDebug {
1975 fmt.Printf(" processing control %s\n", v.LongString())
1976 }
1977
1978
1979
1980 b.ReplaceControl(i, s.allocValToReg(v, s.compatRegs(v.Type), false, b.Pos))
1981 }
1982
1983
1984
1985 for _, v := range controls {
1986 vi := &s.values[v.ID]
1987 if !vi.needReg {
1988 continue
1989 }
1990
1991 u := vi.uses
1992 vi.uses = u.next
1993 if u.next == nil {
1994 s.freeRegs(vi.regs)
1995 }
1996 u.next = s.freeUseRecords
1997 s.freeUseRecords = u
1998 }
1999
2000
2001
2002
2003 if len(b.Succs) == 1 {
2004 if s.f.Config.hasGReg && s.regs[s.GReg].v != nil {
2005 s.freeReg(s.GReg)
2006 }
2007 if s.blockOrder[b.ID] > s.blockOrder[b.Succs[0].b.ID] {
2008
2009 goto badloop
2010 }
2011
2012 top := b.Succs[0].b
2013 loop := s.loopnest.b2l[top.ID]
2014 if loop == nil || loop.header != top || loop.containsUnavoidableCall {
2015 goto badloop
2016 }
2017
2018
2019 for _, live := range s.live[b.ID] {
2020 if live.dist >= unlikelyDistance {
2021
2022 continue
2023 }
2024 vid := live.ID
2025 vi := &s.values[vid]
2026 if vi.regs != 0 {
2027 continue
2028 }
2029 if vi.rematerializeable {
2030 continue
2031 }
2032 v := s.orig[vid]
2033 m := s.compatRegs(v.Type) &^ s.used
2034
2035 outerloop:
2036 for _, e := range desired.entries {
2037 if e.ID != v.ID {
2038 continue
2039 }
2040 for _, r := range e.regs {
2041 if r != noRegister && m>>r&1 != 0 {
2042 m = regMask(1) << r
2043 break outerloop
2044 }
2045 }
2046 }
2047 if m&^desired.avoid != 0 {
2048 m &^= desired.avoid
2049 }
2050 if m != 0 {
2051 s.allocValToReg(v, m, false, b.Pos)
2052 }
2053 }
2054 }
2055 badloop:
2056 ;
2057
2058
2059
2060 k := 0
2061 for r := register(0); r < s.numRegs; r++ {
2062 v := s.regs[r].v
2063 if v == nil {
2064 continue
2065 }
2066 k++
2067 }
2068 regList := make([]endReg, 0, k)
2069 for r := register(0); r < s.numRegs; r++ {
2070 v := s.regs[r].v
2071 if v == nil {
2072 continue
2073 }
2074 regList = append(regList, endReg{r, v, s.regs[r].c})
2075 }
2076 s.endRegs[b.ID] = regList
2077
2078 if checkEnabled {
2079 regValLiveSet.clear()
2080 if s.live != nil {
2081 for _, x := range s.live[b.ID] {
2082 regValLiveSet.add(x.ID)
2083 }
2084 }
2085 for r := register(0); r < s.numRegs; r++ {
2086 v := s.regs[r].v
2087 if v == nil {
2088 continue
2089 }
2090 if !regValLiveSet.contains(v.ID) {
2091 s.f.Fatalf("val %s is in reg but not live at end of %s", v, b)
2092 }
2093 }
2094 }
2095
2096
2097
2098
2099
2100 if s.live != nil {
2101 for _, e := range s.live[b.ID] {
2102 vi := &s.values[e.ID]
2103 if vi.regs != 0 {
2104
2105 continue
2106 }
2107 if vi.rematerializeable {
2108
2109 continue
2110 }
2111 if s.f.pass.debug > regDebug {
2112 fmt.Printf("live-at-end spill for %s at %s\n", s.orig[e.ID], b)
2113 }
2114 spill := s.makeSpill(s.orig[e.ID], b)
2115 s.spillLive[b.ID] = append(s.spillLive[b.ID], spill.ID)
2116 }
2117
2118
2119
2120
2121 for _, e := range s.live[b.ID] {
2122 u := s.values[e.ID].uses
2123 if u == nil {
2124 f.Fatalf("live at end, no uses v%d", e.ID)
2125 }
2126 if u.next != nil {
2127 f.Fatalf("live at end, too many uses v%d", e.ID)
2128 }
2129 s.values[e.ID].uses = nil
2130 u.next = s.freeUseRecords
2131 s.freeUseRecords = u
2132 }
2133 }
2134
2135
2136
2137
2138
2139
2140
2141 if c := countRegs(s.startRegsMask); c != len(s.startRegs[b.ID]) {
2142 regs := make([]startReg, 0, c)
2143 for _, sr := range s.startRegs[b.ID] {
2144 if s.startRegsMask&(regMask(1)<<sr.r) == 0 {
2145 continue
2146 }
2147 regs = append(regs, sr)
2148 }
2149 s.startRegs[b.ID] = regs
2150 }
2151 }
2152
2153
2154 s.placeSpills()
2155
2156
2157
2158 stacklive := stackalloc(s.f, s.spillLive)
2159
2160
2161 s.shuffle(stacklive)
2162
2163
2164
2165
2166 for {
2167 progress := false
2168 for c, used := range s.copies {
2169 if !used && c.Uses == 0 {
2170 if s.f.pass.debug > regDebug {
2171 fmt.Printf("delete copied value %s\n", c.LongString())
2172 }
2173 c.resetArgs()
2174 f.freeValue(c)
2175 delete(s.copies, c)
2176 progress = true
2177 }
2178 }
2179 if !progress {
2180 break
2181 }
2182 }
2183
2184 for _, b := range s.visitOrder {
2185 i := 0
2186 for _, v := range b.Values {
2187 if v.Op == OpInvalid {
2188 continue
2189 }
2190 b.Values[i] = v
2191 i++
2192 }
2193 b.Values = b.Values[:i]
2194 }
2195 }
2196
2197 func (s *regAllocState) placeSpills() {
2198 mustBeFirst := func(op Op) bool {
2199 return op.isLoweredGetClosurePtr() || op == OpPhi || op == OpArgIntReg || op == OpArgFloatReg
2200 }
2201
2202
2203
2204 start := map[ID][]*Value{}
2205
2206
2207 after := map[ID][]*Value{}
2208
2209 for i := range s.values {
2210 vi := s.values[i]
2211 spill := vi.spill
2212 if spill == nil {
2213 continue
2214 }
2215 if spill.Block != nil {
2216
2217
2218 continue
2219 }
2220 v := s.orig[i]
2221
2222
2223
2224
2225
2226 if v == nil {
2227 panic(fmt.Errorf("nil v, s.orig[%d], vi = %v, spill = %s", i, vi, spill.LongString()))
2228 }
2229 best := v.Block
2230 bestArg := v
2231 var bestDepth int16
2232 if s.loopnest != nil && s.loopnest.b2l[best.ID] != nil {
2233 bestDepth = s.loopnest.b2l[best.ID].depth
2234 }
2235 b := best
2236 const maxSpillSearch = 100
2237 for i := 0; i < maxSpillSearch; i++ {
2238
2239
2240 p := b
2241 b = nil
2242 for c := s.sdom.Child(p); c != nil && i < maxSpillSearch; c, i = s.sdom.Sibling(c), i+1 {
2243 if s.sdom[c.ID].entry <= vi.restoreMin && s.sdom[c.ID].exit >= vi.restoreMax {
2244
2245 b = c
2246 break
2247 }
2248 }
2249 if b == nil {
2250
2251 break
2252 }
2253
2254 var depth int16
2255 if s.loopnest != nil && s.loopnest.b2l[b.ID] != nil {
2256 depth = s.loopnest.b2l[b.ID].depth
2257 }
2258 if depth > bestDepth {
2259
2260 continue
2261 }
2262
2263
2264
2265 if len(b.Preds) == 1 {
2266 for _, e := range s.endRegs[b.Preds[0].b.ID] {
2267 if e.v == v {
2268
2269 best = b
2270 bestArg = e.c
2271 bestDepth = depth
2272 break
2273 }
2274 }
2275 } else {
2276 for _, e := range s.startRegs[b.ID] {
2277 if e.v == v {
2278
2279 best = b
2280 bestArg = e.c
2281 bestDepth = depth
2282 break
2283 }
2284 }
2285 }
2286 }
2287
2288
2289 spill.Block = best
2290 spill.AddArg(bestArg)
2291 if best == v.Block && !mustBeFirst(v.Op) {
2292
2293 after[v.ID] = append(after[v.ID], spill)
2294 } else {
2295
2296 start[best.ID] = append(start[best.ID], spill)
2297 }
2298 }
2299
2300
2301 var oldSched []*Value
2302 for _, b := range s.visitOrder {
2303 nfirst := 0
2304 for _, v := range b.Values {
2305 if !mustBeFirst(v.Op) {
2306 break
2307 }
2308 nfirst++
2309 }
2310 oldSched = append(oldSched[:0], b.Values[nfirst:]...)
2311 b.Values = b.Values[:nfirst]
2312 b.Values = append(b.Values, start[b.ID]...)
2313 for _, v := range oldSched {
2314 b.Values = append(b.Values, v)
2315 b.Values = append(b.Values, after[v.ID]...)
2316 }
2317 }
2318 }
2319
2320
2321 func (s *regAllocState) shuffle(stacklive [][]ID) {
2322 var e edgeState
2323 e.s = s
2324 e.cache = map[ID][]*Value{}
2325 e.contents = map[Location]contentRecord{}
2326 if s.f.pass.debug > regDebug {
2327 fmt.Printf("shuffle %s\n", s.f.Name)
2328 fmt.Println(s.f.String())
2329 }
2330
2331 for _, b := range s.visitOrder {
2332 if len(b.Preds) <= 1 {
2333 continue
2334 }
2335 e.b = b
2336 for i, edge := range b.Preds {
2337 p := edge.b
2338 e.p = p
2339 e.setup(i, s.endRegs[p.ID], s.startRegs[b.ID], stacklive[p.ID])
2340 e.process()
2341 }
2342 }
2343
2344 if s.f.pass.debug > regDebug {
2345 fmt.Printf("post shuffle %s\n", s.f.Name)
2346 fmt.Println(s.f.String())
2347 }
2348 }
2349
2350 type edgeState struct {
2351 s *regAllocState
2352 p, b *Block
2353
2354
2355 cache map[ID][]*Value
2356 cachedVals []ID
2357
2358
2359 contents map[Location]contentRecord
2360
2361
2362 destinations []dstRecord
2363 extra []dstRecord
2364
2365 usedRegs regMask
2366 uniqueRegs regMask
2367 finalRegs regMask
2368 rematerializeableRegs regMask
2369 }
2370
2371 type contentRecord struct {
2372 vid ID
2373 c *Value
2374 final bool
2375 pos src.XPos
2376 }
2377
2378 type dstRecord struct {
2379 loc Location
2380 vid ID
2381 splice **Value
2382 pos src.XPos
2383 }
2384
2385
2386 func (e *edgeState) setup(idx int, srcReg []endReg, dstReg []startReg, stacklive []ID) {
2387 if e.s.f.pass.debug > regDebug {
2388 fmt.Printf("edge %s->%s\n", e.p, e.b)
2389 }
2390
2391
2392 clear(e.cache)
2393 e.cachedVals = e.cachedVals[:0]
2394 clear(e.contents)
2395 e.usedRegs = 0
2396 e.uniqueRegs = 0
2397 e.finalRegs = 0
2398 e.rematerializeableRegs = 0
2399
2400
2401 for _, x := range srcReg {
2402 e.set(&e.s.registers[x.r], x.v.ID, x.c, false, src.NoXPos)
2403 }
2404
2405 for _, spillID := range stacklive {
2406 v := e.s.orig[spillID]
2407 spill := e.s.values[v.ID].spill
2408 if !e.s.sdom.IsAncestorEq(spill.Block, e.p) {
2409
2410
2411
2412
2413
2414
2415
2416
2417 continue
2418 }
2419 e.set(e.s.f.getHome(spillID), v.ID, spill, false, src.NoXPos)
2420 }
2421
2422
2423 dsts := e.destinations[:0]
2424 for _, x := range dstReg {
2425 dsts = append(dsts, dstRecord{&e.s.registers[x.r], x.v.ID, nil, x.pos})
2426 }
2427
2428 for _, v := range e.b.Values {
2429 if v.Op != OpPhi {
2430 break
2431 }
2432 loc := e.s.f.getHome(v.ID)
2433 if loc == nil {
2434 continue
2435 }
2436 dsts = append(dsts, dstRecord{loc, v.Args[idx].ID, &v.Args[idx], v.Pos})
2437 }
2438 e.destinations = dsts
2439
2440 if e.s.f.pass.debug > regDebug {
2441 for _, vid := range e.cachedVals {
2442 a := e.cache[vid]
2443 for _, c := range a {
2444 fmt.Printf("src %s: v%d cache=%s\n", e.s.f.getHome(c.ID), vid, c)
2445 }
2446 }
2447 for _, d := range e.destinations {
2448 fmt.Printf("dst %s: v%d\n", d.loc, d.vid)
2449 }
2450 }
2451 }
2452
2453
2454 func (e *edgeState) process() {
2455 dsts := e.destinations
2456
2457
2458 for len(dsts) > 0 {
2459 i := 0
2460 for _, d := range dsts {
2461 if !e.processDest(d.loc, d.vid, d.splice, d.pos) {
2462
2463 dsts[i] = d
2464 i++
2465 }
2466 }
2467 if i < len(dsts) {
2468
2469 dsts = dsts[:i]
2470
2471
2472 dsts = append(dsts, e.extra...)
2473 e.extra = e.extra[:0]
2474 continue
2475 }
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499 d := dsts[0]
2500 loc := d.loc
2501 vid := e.contents[loc].vid
2502 c := e.contents[loc].c
2503 r := e.findRegFor(c.Type)
2504 if e.s.f.pass.debug > regDebug {
2505 fmt.Printf("breaking cycle with v%d in %s:%s\n", vid, loc, c)
2506 }
2507 e.erase(r)
2508 pos := d.pos.WithNotStmt()
2509 if _, isReg := loc.(*Register); isReg {
2510 c = e.p.NewValue1(pos, OpCopy, c.Type, c)
2511 } else {
2512 c = e.p.NewValue1(pos, OpLoadReg, c.Type, c)
2513 }
2514 e.set(r, vid, c, false, pos)
2515 if c.Op == OpLoadReg && e.s.isGReg(register(r.(*Register).num)) {
2516 e.s.f.Fatalf("process.OpLoadReg targeting g: " + c.LongString())
2517 }
2518 }
2519 }
2520
2521
2522
2523 func (e *edgeState) processDest(loc Location, vid ID, splice **Value, pos src.XPos) bool {
2524 pos = pos.WithNotStmt()
2525 occupant := e.contents[loc]
2526 if occupant.vid == vid {
2527
2528 e.contents[loc] = contentRecord{vid, occupant.c, true, pos}
2529 if splice != nil {
2530 (*splice).Uses--
2531 *splice = occupant.c
2532 occupant.c.Uses++
2533 }
2534
2535
2536
2537 if _, ok := e.s.copies[occupant.c]; ok {
2538
2539 e.s.copies[occupant.c] = true
2540 }
2541 return true
2542 }
2543
2544
2545 if len(e.cache[occupant.vid]) == 1 && !e.s.values[occupant.vid].rematerializeable && !opcodeTable[e.s.orig[occupant.vid].Op].fixedReg {
2546
2547
2548 return false
2549 }
2550
2551
2552 v := e.s.orig[vid]
2553 var c *Value
2554 var src Location
2555 if e.s.f.pass.debug > regDebug {
2556 fmt.Printf("moving v%d to %s\n", vid, loc)
2557 fmt.Printf("sources of v%d:", vid)
2558 }
2559 if opcodeTable[v.Op].fixedReg {
2560 c = v
2561 src = e.s.f.getHome(v.ID)
2562 } else {
2563 for _, w := range e.cache[vid] {
2564 h := e.s.f.getHome(w.ID)
2565 if e.s.f.pass.debug > regDebug {
2566 fmt.Printf(" %s:%s", h, w)
2567 }
2568 _, isreg := h.(*Register)
2569 if src == nil || isreg {
2570 c = w
2571 src = h
2572 }
2573 }
2574 }
2575 if e.s.f.pass.debug > regDebug {
2576 if src != nil {
2577 fmt.Printf(" [use %s]\n", src)
2578 } else {
2579 fmt.Printf(" [no source]\n")
2580 }
2581 }
2582 _, dstReg := loc.(*Register)
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594 e.erase(loc)
2595 var x *Value
2596 if c == nil || e.s.values[vid].rematerializeable {
2597 if !e.s.values[vid].rematerializeable {
2598 e.s.f.Fatalf("can't find source for %s->%s: %s\n", e.p, e.b, v.LongString())
2599 }
2600 if dstReg {
2601
2602
2603
2604
2605 if e.s.regspec(v).outputs[0].regs®Mask(1<<register(loc.(*Register).num)) == 0 {
2606 _, srcReg := src.(*Register)
2607 if srcReg {
2608
2609
2610 x = e.p.NewValue1(pos, OpCopy, c.Type, c)
2611 } else {
2612
2613 x = v.copyInto(e.p)
2614 r := e.findRegFor(x.Type)
2615 e.erase(r)
2616
2617 e.set(r, vid, x, false, pos)
2618
2619 x = e.p.NewValue1(pos, OpCopy, x.Type, x)
2620 }
2621 } else {
2622 x = v.copyInto(e.p)
2623 }
2624 } else {
2625
2626
2627 r := e.findRegFor(v.Type)
2628 e.erase(r)
2629 x = v.copyIntoWithXPos(e.p, pos)
2630 e.set(r, vid, x, false, pos)
2631
2632
2633
2634 x = e.p.NewValue1(pos, OpStoreReg, loc.(LocalSlot).Type, x)
2635 }
2636 } else {
2637
2638 _, srcReg := src.(*Register)
2639 if srcReg {
2640 if dstReg {
2641 x = e.p.NewValue1(pos, OpCopy, c.Type, c)
2642 } else {
2643 x = e.p.NewValue1(pos, OpStoreReg, loc.(LocalSlot).Type, c)
2644 }
2645 } else {
2646 if dstReg {
2647 x = e.p.NewValue1(pos, OpLoadReg, c.Type, c)
2648 } else {
2649
2650 r := e.findRegFor(c.Type)
2651 e.erase(r)
2652 t := e.p.NewValue1(pos, OpLoadReg, c.Type, c)
2653 e.set(r, vid, t, false, pos)
2654 x = e.p.NewValue1(pos, OpStoreReg, loc.(LocalSlot).Type, t)
2655 }
2656 }
2657 }
2658 e.set(loc, vid, x, true, pos)
2659 if x.Op == OpLoadReg && e.s.isGReg(register(loc.(*Register).num)) {
2660 e.s.f.Fatalf("processDest.OpLoadReg targeting g: " + x.LongString())
2661 }
2662 if splice != nil {
2663 (*splice).Uses--
2664 *splice = x
2665 x.Uses++
2666 }
2667 return true
2668 }
2669
2670
2671 func (e *edgeState) set(loc Location, vid ID, c *Value, final bool, pos src.XPos) {
2672 e.s.f.setHome(c, loc)
2673 e.contents[loc] = contentRecord{vid, c, final, pos}
2674 a := e.cache[vid]
2675 if len(a) == 0 {
2676 e.cachedVals = append(e.cachedVals, vid)
2677 }
2678 a = append(a, c)
2679 e.cache[vid] = a
2680 if r, ok := loc.(*Register); ok {
2681 if e.usedRegs&(regMask(1)<<uint(r.num)) != 0 {
2682 e.s.f.Fatalf("%v is already set (v%d/%v)", r, vid, c)
2683 }
2684 e.usedRegs |= regMask(1) << uint(r.num)
2685 if final {
2686 e.finalRegs |= regMask(1) << uint(r.num)
2687 }
2688 if len(a) == 1 {
2689 e.uniqueRegs |= regMask(1) << uint(r.num)
2690 }
2691 if len(a) == 2 {
2692 if t, ok := e.s.f.getHome(a[0].ID).(*Register); ok {
2693 e.uniqueRegs &^= regMask(1) << uint(t.num)
2694 }
2695 }
2696 if e.s.values[vid].rematerializeable {
2697 e.rematerializeableRegs |= regMask(1) << uint(r.num)
2698 }
2699 }
2700 if e.s.f.pass.debug > regDebug {
2701 fmt.Printf("%s\n", c.LongString())
2702 fmt.Printf("v%d now available in %s:%s\n", vid, loc, c)
2703 }
2704 }
2705
2706
2707 func (e *edgeState) erase(loc Location) {
2708 cr := e.contents[loc]
2709 if cr.c == nil {
2710 return
2711 }
2712 vid := cr.vid
2713
2714 if cr.final {
2715
2716
2717
2718 e.extra = append(e.extra, dstRecord{loc, cr.vid, nil, cr.pos})
2719 }
2720
2721
2722 a := e.cache[vid]
2723 for i, c := range a {
2724 if e.s.f.getHome(c.ID) == loc {
2725 if e.s.f.pass.debug > regDebug {
2726 fmt.Printf("v%d no longer available in %s:%s\n", vid, loc, c)
2727 }
2728 a[i], a = a[len(a)-1], a[:len(a)-1]
2729 break
2730 }
2731 }
2732 e.cache[vid] = a
2733
2734
2735 if r, ok := loc.(*Register); ok {
2736 e.usedRegs &^= regMask(1) << uint(r.num)
2737 if cr.final {
2738 e.finalRegs &^= regMask(1) << uint(r.num)
2739 }
2740 e.rematerializeableRegs &^= regMask(1) << uint(r.num)
2741 }
2742 if len(a) == 1 {
2743 if r, ok := e.s.f.getHome(a[0].ID).(*Register); ok {
2744 e.uniqueRegs |= regMask(1) << uint(r.num)
2745 }
2746 }
2747 }
2748
2749
2750 func (e *edgeState) findRegFor(typ *types.Type) Location {
2751
2752 m := e.s.compatRegs(typ)
2753
2754
2755
2756
2757
2758
2759 x := m &^ e.usedRegs
2760 if x != 0 {
2761 return &e.s.registers[pickReg(x)]
2762 }
2763 x = m &^ e.uniqueRegs &^ e.finalRegs
2764 if x != 0 {
2765 return &e.s.registers[pickReg(x)]
2766 }
2767 x = m &^ e.uniqueRegs
2768 if x != 0 {
2769 return &e.s.registers[pickReg(x)]
2770 }
2771 x = m & e.rematerializeableRegs
2772 if x != 0 {
2773 return &e.s.registers[pickReg(x)]
2774 }
2775
2776
2777
2778 for _, vid := range e.cachedVals {
2779 a := e.cache[vid]
2780 for _, c := range a {
2781 if r, ok := e.s.f.getHome(c.ID).(*Register); ok && m>>uint(r.num)&1 != 0 {
2782 if !c.rematerializeable() {
2783 x := e.p.NewValue1(c.Pos, OpStoreReg, c.Type, c)
2784
2785 t := LocalSlot{N: e.s.f.NewLocal(c.Pos, c.Type), Type: c.Type}
2786
2787 e.set(t, vid, x, false, c.Pos)
2788 if e.s.f.pass.debug > regDebug {
2789 fmt.Printf(" SPILL %s->%s %s\n", r, t, x.LongString())
2790 }
2791 }
2792
2793
2794
2795 return r
2796 }
2797 }
2798 }
2799
2800 fmt.Printf("m:%d unique:%d final:%d rematerializable:%d\n", m, e.uniqueRegs, e.finalRegs, e.rematerializeableRegs)
2801 for _, vid := range e.cachedVals {
2802 a := e.cache[vid]
2803 for _, c := range a {
2804 fmt.Printf("v%d: %s %s\n", vid, c, e.s.f.getHome(c.ID))
2805 }
2806 }
2807 e.s.f.Fatalf("can't find empty register on edge %s->%s", e.p, e.b)
2808 return nil
2809 }
2810
2811
2812
2813 func (v *Value) rematerializeable() bool {
2814 if !opcodeTable[v.Op].rematerializeable {
2815 return false
2816 }
2817 for _, a := range v.Args {
2818
2819
2820 if !opcodeTable[a.Op].fixedReg {
2821 return false
2822 }
2823 }
2824 return true
2825 }
2826
2827 type liveInfo struct {
2828 ID ID
2829 dist int32
2830 pos src.XPos
2831 }
2832
2833
2834
2835
2836 func (s *regAllocState) computeLive() {
2837 f := s.f
2838
2839
2840 if len(f.Blocks) == 1 {
2841 return
2842 }
2843 po := f.postorder()
2844 s.live = make([][]liveInfo, f.NumBlocks())
2845 s.desired = make([]desiredState, f.NumBlocks())
2846 s.loopnest = f.loopnest()
2847
2848 rematIDs := make([]ID, 0, 64)
2849
2850 live := f.newSparseMapPos(f.NumValues())
2851 defer f.retSparseMapPos(live)
2852 t := f.newSparseMapPos(f.NumValues())
2853 defer f.retSparseMapPos(t)
2854
2855 s.loopnest.computeUnavoidableCalls()
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871 var loopLiveIn map[*loop][]liveInfo
2872 var numCalls []int32
2873 if len(s.loopnest.loops) > 0 && !s.loopnest.hasIrreducible {
2874 loopLiveIn = make(map[*loop][]liveInfo)
2875 numCalls = f.Cache.allocInt32Slice(f.NumBlocks())
2876 defer f.Cache.freeInt32Slice(numCalls)
2877 }
2878
2879 for {
2880 changed := false
2881
2882 for _, b := range po {
2883
2884 live.clear()
2885 for _, e := range s.live[b.ID] {
2886 live.set(e.ID, e.dist, e.pos)
2887 }
2888 update := false
2889
2890 for _, e := range b.Succs {
2891 succ := e.b
2892 delta := branchDistance(b, succ)
2893 for _, v := range succ.Values {
2894 if v.Op != OpPhi {
2895 break
2896 }
2897 arg := v.Args[e.i]
2898 if s.values[arg.ID].needReg && (!live.contains(arg.ID) || delta < live.get(arg.ID)) {
2899 live.set(arg.ID, delta, v.Pos)
2900 update = true
2901 }
2902 }
2903 }
2904 if update {
2905 s.live[b.ID] = updateLive(live, s.live[b.ID])
2906 }
2907
2908
2909 c := live.contents()
2910 for i := range c {
2911 c[i].val += int32(len(b.Values))
2912 }
2913
2914
2915 for _, c := range b.ControlValues() {
2916 if s.values[c.ID].needReg {
2917 live.set(c.ID, int32(len(b.Values)), b.Pos)
2918 }
2919 }
2920
2921 for i := len(b.Values) - 1; i >= 0; i-- {
2922 v := b.Values[i]
2923 live.remove(v.ID)
2924 if v.Op == OpPhi {
2925 continue
2926 }
2927 if opcodeTable[v.Op].call {
2928 if numCalls != nil {
2929 numCalls[b.ID]++
2930 }
2931 rematIDs = rematIDs[:0]
2932 c := live.contents()
2933 for i := range c {
2934 c[i].val += unlikelyDistance
2935 vid := c[i].key
2936 if s.values[vid].rematerializeable {
2937 rematIDs = append(rematIDs, vid)
2938 }
2939 }
2940
2941
2942
2943 for _, r := range rematIDs {
2944 live.remove(r)
2945 }
2946 }
2947 for _, a := range v.Args {
2948 if s.values[a.ID].needReg {
2949 live.set(a.ID, int32(i), v.Pos)
2950 }
2951 }
2952 }
2953
2954
2955 if loopLiveIn != nil {
2956 loop := s.loopnest.b2l[b.ID]
2957 if loop != nil && loop.header.ID == b.ID {
2958 loopLiveIn[loop] = updateLive(live, nil)
2959 }
2960 }
2961
2962
2963 for _, e := range b.Preds {
2964 p := e.b
2965 delta := branchDistance(p, b)
2966
2967
2968 t.clear()
2969 for _, e := range s.live[p.ID] {
2970 t.set(e.ID, e.dist, e.pos)
2971 }
2972 update := false
2973
2974
2975 for _, e := range live.contents() {
2976 d := e.val + delta
2977 if !t.contains(e.key) || d < t.get(e.key) {
2978 update = true
2979 t.set(e.key, d, e.pos)
2980 }
2981 }
2982
2983 if !update {
2984 continue
2985 }
2986 s.live[p.ID] = updateLive(t, s.live[p.ID])
2987 changed = true
2988 }
2989 }
2990
2991
2992
2993 if !changed {
2994 break
2995 }
2996
2997
2998
2999 if loopLiveIn != nil {
3000 break
3001 }
3002
3003
3004 if len(s.loopnest.loops) == 0 {
3005 break
3006 }
3007 }
3008 if f.pass.debug > regDebug {
3009 s.debugPrintLive("after dfs walk", f, s.live, s.desired)
3010 }
3011
3012
3013
3014 if loopLiveIn == nil {
3015 s.computeDesired()
3016 return
3017 }
3018
3019
3020
3021
3022
3023 loops := slices.Clone(s.loopnest.loops)
3024 slices.SortFunc(loops, func(a, b *loop) int {
3025 return cmp.Compare(a.depth, b.depth)
3026 })
3027
3028 loopset := f.newSparseMapPos(f.NumValues())
3029 defer f.retSparseMapPos(loopset)
3030 for _, loop := range loops {
3031 if loop.outer == nil {
3032 continue
3033 }
3034 livein := loopLiveIn[loop]
3035 loopset.clear()
3036 for _, l := range livein {
3037 loopset.set(l.ID, l.dist, l.pos)
3038 }
3039 update := false
3040 for _, l := range loopLiveIn[loop.outer] {
3041 if !loopset.contains(l.ID) {
3042 loopset.set(l.ID, l.dist, l.pos)
3043 update = true
3044 }
3045 }
3046 if update {
3047 loopLiveIn[loop] = updateLive(loopset, livein)
3048 }
3049 }
3050
3051
3052
3053 const unknownDistance = -1
3054
3055
3056
3057
3058 for _, b := range po {
3059 loop := s.loopnest.b2l[b.ID]
3060 if loop == nil {
3061 continue
3062 }
3063 headerLive := loopLiveIn[loop]
3064 loopset.clear()
3065 for _, l := range s.live[b.ID] {
3066 loopset.set(l.ID, l.dist, l.pos)
3067 }
3068 update := false
3069 for _, l := range headerLive {
3070 if !loopset.contains(l.ID) {
3071 loopset.set(l.ID, unknownDistance, src.NoXPos)
3072 update = true
3073 }
3074 }
3075 if update {
3076 s.live[b.ID] = updateLive(loopset, s.live[b.ID])
3077 }
3078 }
3079 if f.pass.debug > regDebug {
3080 s.debugPrintLive("after live loop prop", f, s.live, s.desired)
3081 }
3082
3083
3084
3085
3086 unfinishedBlocks := f.Cache.allocBlockSlice(len(po))
3087 defer f.Cache.freeBlockSlice(unfinishedBlocks)
3088 copy(unfinishedBlocks, po)
3089
3090 for len(unfinishedBlocks) > 0 {
3091 n := 0
3092 for _, b := range unfinishedBlocks {
3093 live.clear()
3094 unfinishedValues := 0
3095 for _, l := range s.live[b.ID] {
3096 if l.dist == unknownDistance {
3097 unfinishedValues++
3098 }
3099 live.set(l.ID, l.dist, l.pos)
3100 }
3101 update := false
3102 for _, e := range b.Succs {
3103 succ := e.b
3104 for _, l := range s.live[succ.ID] {
3105 if !live.contains(l.ID) || l.dist == unknownDistance {
3106 continue
3107 }
3108 dist := int32(len(succ.Values)) + l.dist + branchDistance(b, succ)
3109 dist += numCalls[succ.ID] * unlikelyDistance
3110 val := live.get(l.ID)
3111 switch {
3112 case val == unknownDistance:
3113 unfinishedValues--
3114 fallthrough
3115 case dist < val:
3116 update = true
3117 live.set(l.ID, dist, l.pos)
3118 }
3119 }
3120 }
3121 if update {
3122 s.live[b.ID] = updateLive(live, s.live[b.ID])
3123 }
3124 if unfinishedValues > 0 {
3125 unfinishedBlocks[n] = b
3126 n++
3127 }
3128 }
3129 unfinishedBlocks = unfinishedBlocks[:n]
3130 }
3131
3132 s.computeDesired()
3133
3134 if f.pass.debug > regDebug {
3135 s.debugPrintLive("final", f, s.live, s.desired)
3136 }
3137 }
3138
3139
3140
3141
3142 func (s *regAllocState) computeDesired() {
3143
3144
3145
3146
3147
3148 var desired desiredState
3149 f := s.f
3150 po := f.postorder()
3151 for {
3152 changed := false
3153 for _, b := range po {
3154 desired.copy(&s.desired[b.ID])
3155 for i := len(b.Values) - 1; i >= 0; i-- {
3156 v := b.Values[i]
3157 prefs := desired.remove(v.ID)
3158 if v.Op == OpPhi {
3159
3160
3161
3162 continue
3163 }
3164 regspec := s.regspec(v)
3165
3166 desired.clobber(regspec.clobbers)
3167
3168 for _, j := range regspec.inputs {
3169 if countRegs(j.regs) != 1 {
3170 continue
3171 }
3172 desired.clobber(j.regs)
3173 desired.add(v.Args[j.idx].ID, pickReg(j.regs))
3174 }
3175
3176 if opcodeTable[v.Op].resultInArg0 || v.Op == OpAMD64ADDQconst || v.Op == OpAMD64ADDLconst || v.Op == OpSelect0 {
3177
3178
3179
3180
3181
3182 if opcodeTable[v.Op].commutative {
3183 desired.addList(v.Args[1].ID, prefs)
3184 }
3185 desired.addList(v.Args[0].ID, prefs)
3186 }
3187 }
3188 for _, e := range b.Preds {
3189 p := e.b
3190 changed = s.desired[p.ID].merge(&desired) || changed
3191 }
3192 }
3193 if !changed || (!s.loopnest.hasIrreducible && len(s.loopnest.loops) == 0) {
3194 break
3195 }
3196 }
3197 }
3198
3199
3200 func updateLive(t *sparseMapPos, live []liveInfo) []liveInfo {
3201 live = live[:0]
3202 if cap(live) < t.size() {
3203 live = make([]liveInfo, 0, t.size())
3204 }
3205 for _, e := range t.contents() {
3206 live = append(live, liveInfo{e.key, e.val, e.pos})
3207 }
3208 return live
3209 }
3210
3211
3212
3213
3214 func branchDistance(b *Block, s *Block) int32 {
3215 if len(b.Succs) == 2 {
3216 if b.Succs[0].b == s && b.Likely == BranchLikely ||
3217 b.Succs[1].b == s && b.Likely == BranchUnlikely {
3218 return likelyDistance
3219 }
3220 if b.Succs[0].b == s && b.Likely == BranchUnlikely ||
3221 b.Succs[1].b == s && b.Likely == BranchLikely {
3222 return unlikelyDistance
3223 }
3224 }
3225
3226
3227 return normalDistance
3228 }
3229
3230 func (s *regAllocState) debugPrintLive(stage string, f *Func, live [][]liveInfo, desired []desiredState) {
3231 fmt.Printf("%s: live values at end of each block: %s\n", stage, f.Name)
3232 for _, b := range f.Blocks {
3233 s.debugPrintLiveBlock(b, live[b.ID], &desired[b.ID])
3234 }
3235 }
3236
3237 func (s *regAllocState) debugPrintLiveBlock(b *Block, live []liveInfo, desired *desiredState) {
3238 fmt.Printf(" %s:", b)
3239 slices.SortFunc(live, func(a, b liveInfo) int {
3240 return cmp.Compare(a.ID, b.ID)
3241 })
3242 for _, x := range live {
3243 fmt.Printf(" v%d(%d)", x.ID, x.dist)
3244 for _, e := range desired.entries {
3245 if e.ID != x.ID {
3246 continue
3247 }
3248 fmt.Printf("[")
3249 first := true
3250 for _, r := range e.regs {
3251 if r == noRegister {
3252 continue
3253 }
3254 if !first {
3255 fmt.Printf(",")
3256 }
3257 fmt.Print(&s.registers[r])
3258 first = false
3259 }
3260 fmt.Printf("]")
3261 }
3262 }
3263 if avoid := desired.avoid; avoid != 0 {
3264 fmt.Printf(" avoid=%v", s.RegMaskString(avoid))
3265 }
3266 fmt.Println()
3267 }
3268
3269
3270 type desiredState struct {
3271
3272
3273 entries []desiredStateEntry
3274
3275
3276
3277
3278 avoid regMask
3279 }
3280 type desiredStateEntry struct {
3281
3282 ID ID
3283
3284
3285
3286
3287
3288 regs [4]register
3289 }
3290
3291
3292 func (d *desiredState) get(vid ID) [4]register {
3293 for _, e := range d.entries {
3294 if e.ID == vid {
3295 return e.regs
3296 }
3297 }
3298 return [4]register{noRegister, noRegister, noRegister, noRegister}
3299 }
3300
3301
3302 func (d *desiredState) add(vid ID, r register) {
3303 d.avoid |= regMask(1) << r
3304 for i := range d.entries {
3305 e := &d.entries[i]
3306 if e.ID != vid {
3307 continue
3308 }
3309 if e.regs[0] == r {
3310
3311 return
3312 }
3313 for j := 1; j < len(e.regs); j++ {
3314 if e.regs[j] == r {
3315
3316 copy(e.regs[1:], e.regs[:j])
3317 e.regs[0] = r
3318 return
3319 }
3320 }
3321 copy(e.regs[1:], e.regs[:])
3322 e.regs[0] = r
3323 return
3324 }
3325 d.entries = append(d.entries, desiredStateEntry{vid, [4]register{r, noRegister, noRegister, noRegister}})
3326 }
3327
3328 func (d *desiredState) addList(vid ID, regs [4]register) {
3329
3330 for i := len(regs) - 1; i >= 0; i-- {
3331 r := regs[i]
3332 if r != noRegister {
3333 d.add(vid, r)
3334 }
3335 }
3336 }
3337
3338
3339 func (d *desiredState) clobber(m regMask) {
3340 for i := 0; i < len(d.entries); {
3341 e := &d.entries[i]
3342 j := 0
3343 for _, r := range e.regs {
3344 if r != noRegister && m>>r&1 == 0 {
3345 e.regs[j] = r
3346 j++
3347 }
3348 }
3349 if j == 0 {
3350
3351 d.entries[i] = d.entries[len(d.entries)-1]
3352 d.entries = d.entries[:len(d.entries)-1]
3353 continue
3354 }
3355 for ; j < len(e.regs); j++ {
3356 e.regs[j] = noRegister
3357 }
3358 i++
3359 }
3360 d.avoid &^= m
3361 }
3362
3363
3364 func (d *desiredState) copy(x *desiredState) {
3365 d.entries = append(d.entries[:0], x.entries...)
3366 d.avoid = x.avoid
3367 }
3368
3369
3370 func (d *desiredState) remove(vid ID) [4]register {
3371 for i := range d.entries {
3372 if d.entries[i].ID == vid {
3373 regs := d.entries[i].regs
3374 d.entries[i] = d.entries[len(d.entries)-1]
3375 d.entries = d.entries[:len(d.entries)-1]
3376 return regs
3377 }
3378 }
3379 return [4]register{noRegister, noRegister, noRegister, noRegister}
3380 }
3381
3382
3383
3384 func (d *desiredState) merge(x *desiredState) bool {
3385 oldAvoid := d.avoid
3386 d.avoid |= x.avoid
3387
3388
3389 for _, e := range x.entries {
3390 d.addList(e.ID, e.regs)
3391 }
3392 return oldAvoid != d.avoid
3393 }
3394
3395
3396 func (loopnest *loopnest) computeUnavoidableCalls() {
3397 f := loopnest.f
3398
3399 hasCall := f.Cache.allocBoolSlice(f.NumBlocks())
3400 defer f.Cache.freeBoolSlice(hasCall)
3401 for _, b := range f.Blocks {
3402 if b.containsCall() {
3403 hasCall[b.ID] = true
3404 }
3405 }
3406 found := f.Cache.allocSparseSet(f.NumBlocks())
3407 defer f.Cache.freeSparseSet(found)
3408
3409
3410
3411
3412
3413
3414
3415
3416
3417 loopLoop:
3418 for _, l := range loopnest.loops {
3419 found.clear()
3420 tovisit := make([]*Block, 0, 8)
3421 tovisit = append(tovisit, l.header)
3422 for len(tovisit) > 0 {
3423 cur := tovisit[len(tovisit)-1]
3424 tovisit = tovisit[:len(tovisit)-1]
3425 if hasCall[cur.ID] {
3426 continue
3427 }
3428 for _, s := range cur.Succs {
3429 nb := s.Block()
3430 if nb == l.header {
3431
3432 continue loopLoop
3433 }
3434 if found.contains(nb.ID) {
3435
3436 continue
3437 }
3438 nl := loopnest.b2l[nb.ID]
3439 if nl == nil || (nl.depth <= l.depth && nl != l) {
3440
3441 continue
3442 }
3443 tovisit = append(tovisit, nb)
3444 found.add(nb.ID)
3445 }
3446 }
3447
3448 l.containsUnavoidableCall = true
3449 }
3450 }
3451
3452 func (b *Block) containsCall() bool {
3453 if b.Kind == BlockDefer {
3454 return true
3455 }
3456 for _, v := range b.Values {
3457 if opcodeTable[v.Op].call {
3458 return true
3459 }
3460 }
3461 return false
3462 }
3463
View as plain text