1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114 package ssa
115
116 import (
117 "cmd/compile/internal/base"
118 "cmd/compile/internal/ir"
119 "cmd/compile/internal/types"
120 "cmd/internal/src"
121 "cmd/internal/sys"
122 "fmt"
123 "internal/buildcfg"
124 "math"
125 "math/bits"
126 "unsafe"
127 )
128
129 const (
130 moveSpills = iota
131 logSpills
132 regDebug
133 stackDebug
134 )
135
136
137
138 const (
139 likelyDistance = 1
140 normalDistance = 10
141 unlikelyDistance = 100
142 )
143
144
145
146 func regalloc(f *Func) {
147 var s regAllocState
148 s.init(f)
149 s.regalloc(f)
150 s.close()
151 }
152
153 type register uint8
154
155 const noRegister register = 255
156
157
158 var noRegisters [32]register = [32]register{
159 noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister,
160 noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister,
161 noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister,
162 noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister,
163 }
164
165
166
167 type regMask uint64
168
169 func (m regMask) String() string {
170 s := ""
171 for r := register(0); m != 0; r++ {
172 if m>>r&1 == 0 {
173 continue
174 }
175 m &^= regMask(1) << r
176 if s != "" {
177 s += " "
178 }
179 s += fmt.Sprintf("r%d", r)
180 }
181 return s
182 }
183
184 func (m regMask) contains(r register) bool {
185 return m>>r&1 != 0
186 }
187
188 func (s *regAllocState) RegMaskString(m regMask) string {
189 str := ""
190 for r := register(0); m != 0; r++ {
191 if m>>r&1 == 0 {
192 continue
193 }
194 m &^= regMask(1) << r
195 if str != "" {
196 str += " "
197 }
198 str += s.registers[r].String()
199 }
200 return str
201 }
202
203
204 func countRegs(r regMask) int {
205 return bits.OnesCount64(uint64(r))
206 }
207
208
209 func pickReg(r regMask) register {
210 if r == 0 {
211 panic("can't pick a register from an empty set")
212 }
213
214 return register(bits.TrailingZeros64(uint64(r)))
215 }
216
217 type use struct {
218
219
220
221
222
223 dist int32
224 pos src.XPos
225 next *use
226 }
227
228
229 type valState struct {
230 regs regMask
231 uses *use
232 spill *Value
233 restoreMin int32
234 restoreMax int32
235 needReg bool
236 rematerializeable bool
237 }
238
239 type regState struct {
240 v *Value
241 c *Value
242
243 }
244
245 type regAllocState struct {
246 f *Func
247
248 sdom SparseTree
249 registers []Register
250 numRegs register
251 SPReg register
252 SBReg register
253 GReg register
254 ZeroIntReg register
255 allocatable regMask
256
257
258
259
260 live [][]liveInfo
261
262
263
264
265 desired []desiredState
266
267
268 values []valState
269
270
271 sp, sb ID
272
273
274
275 orig []*Value
276
277
278
279 regs []regState
280
281
282 nospill regMask
283
284
285 used regMask
286
287
288 usedSinceBlockStart regMask
289
290
291 tmpused regMask
292
293
294 curBlock *Block
295
296
297 freeUseRecords *use
298
299
300
301 endRegs [][]endReg
302
303
304
305 startRegs [][]startReg
306
307
308
309
310 startRegsMask regMask
311
312
313 spillLive [][]ID
314
315
316
317 copies map[*Value]bool
318
319 loopnest *loopnest
320
321
322 visitOrder []*Block
323
324
325 blockOrder []int32
326
327
328 doClobber bool
329
330
331
332
333
334
335 nextCall []int32
336
337
338
339 curIdx int
340 }
341
342 type endReg struct {
343 r register
344 v *Value
345 c *Value
346 }
347
348 type startReg struct {
349 r register
350 v *Value
351 c *Value
352 pos src.XPos
353 }
354
355
356 func (s *regAllocState) freeReg(r register) {
357 if !s.allocatable.contains(r) && !s.isGReg(r) {
358 return
359 }
360 v := s.regs[r].v
361 if v == nil {
362 s.f.Fatalf("tried to free an already free register %d\n", r)
363 }
364
365
366 if s.f.pass.debug > regDebug {
367 fmt.Printf("freeReg %s (dump %s/%s)\n", &s.registers[r], v, s.regs[r].c)
368 }
369 s.regs[r] = regState{}
370 s.values[v.ID].regs &^= regMask(1) << r
371 s.used &^= regMask(1) << r
372 }
373
374
375 func (s *regAllocState) freeRegs(m regMask) {
376 for m&s.used != 0 {
377 s.freeReg(pickReg(m & s.used))
378 }
379 }
380
381
382 func (s *regAllocState) clobberRegs(m regMask) {
383 m &= s.allocatable & s.f.Config.gpRegMask
384 for m != 0 {
385 r := pickReg(m)
386 m &^= 1 << r
387 x := s.curBlock.NewValue0(src.NoXPos, OpClobberReg, types.TypeVoid)
388 s.f.setHome(x, &s.registers[r])
389 }
390 }
391
392
393
394 func (s *regAllocState) setOrig(c *Value, v *Value) {
395 if int(c.ID) >= cap(s.orig) {
396 x := s.f.Cache.allocValueSlice(int(c.ID) + 1)
397 copy(x, s.orig)
398 s.f.Cache.freeValueSlice(s.orig)
399 s.orig = x
400 }
401 for int(c.ID) >= len(s.orig) {
402 s.orig = append(s.orig, nil)
403 }
404 if s.orig[c.ID] != nil {
405 s.f.Fatalf("orig value set twice %s %s", c, v)
406 }
407 s.orig[c.ID] = s.orig[v.ID]
408 }
409
410
411
412 func (s *regAllocState) assignReg(r register, v *Value, c *Value) {
413 if s.f.pass.debug > regDebug {
414 fmt.Printf("assignReg %s %s/%s\n", &s.registers[r], v, c)
415 }
416
417 s.values[v.ID].regs |= regMask(1) << r
418 s.f.setHome(c, &s.registers[r])
419
420
421 if !s.allocatable.contains(r) && !s.isGReg(r) {
422 return
423 }
424 if s.regs[r].v != nil {
425 s.f.Fatalf("tried to assign register %d to %s/%s but it is already used by %s", r, v, c, s.regs[r].v)
426 }
427 s.regs[r] = regState{v, c}
428 s.used |= regMask(1) << r
429 }
430
431
432
433
434 func (s *regAllocState) allocReg(mask regMask, v *Value) register {
435 if v.OnWasmStack {
436 return noRegister
437 }
438
439 mask &= s.allocatable
440 mask &^= s.nospill
441 if mask == 0 {
442 s.f.Fatalf("no register available for %s", v.LongString())
443 }
444
445
446 if mask&^s.used != 0 {
447 r := pickReg(mask &^ s.used)
448 s.usedSinceBlockStart |= regMask(1) << r
449 return r
450 }
451
452
453
454
455
456
457
458
459
460
461
462 var r register
463 maxuse := int32(-1)
464 for t := register(0); t < s.numRegs; t++ {
465 if mask>>t&1 == 0 {
466 continue
467 }
468 v := s.regs[t].v
469 if n := s.values[v.ID].uses.dist; n > maxuse {
470
471
472 r = t
473 maxuse = n
474 }
475 }
476 if maxuse == -1 {
477 s.f.Fatalf("couldn't find register to spill")
478 }
479
480 if s.f.Config.ctxt.Arch.Arch == sys.ArchWasm {
481
482
483
484 s.freeReg(r)
485 return r
486 }
487
488
489
490 v2 := s.regs[r].v
491 m := s.compatRegs(v2.Type) &^ s.used &^ s.tmpused &^ (regMask(1) << r)
492 if m != 0 && !s.values[v2.ID].rematerializeable && countRegs(s.values[v2.ID].regs) == 1 {
493 s.usedSinceBlockStart |= regMask(1) << r
494 r2 := pickReg(m)
495 c := s.curBlock.NewValue1(v2.Pos, OpCopy, v2.Type, s.regs[r].c)
496 s.copies[c] = false
497 if s.f.pass.debug > regDebug {
498 fmt.Printf("copy %s to %s : %s\n", v2, c, &s.registers[r2])
499 }
500 s.setOrig(c, v2)
501 s.assignReg(r2, v2, c)
502 }
503
504
505
506
507 if s.usedSinceBlockStart&(regMask(1)<<r) == 0 {
508 if s.startRegsMask&(regMask(1)<<r) == 1 {
509 if s.f.pass.debug > regDebug {
510 fmt.Printf("dropped from startRegs: %s\n", &s.registers[r])
511 }
512 s.startRegsMask &^= regMask(1) << r
513 }
514 }
515
516 s.freeReg(r)
517 s.usedSinceBlockStart |= regMask(1) << r
518 return r
519 }
520
521
522
523 func (s *regAllocState) makeSpill(v *Value, b *Block) *Value {
524 vi := &s.values[v.ID]
525 if vi.spill != nil {
526
527 vi.restoreMin = min(vi.restoreMin, s.sdom[b.ID].entry)
528 vi.restoreMax = max(vi.restoreMax, s.sdom[b.ID].exit)
529 return vi.spill
530 }
531
532
533 spill := s.f.newValueNoBlock(OpStoreReg, v.Type, v.Pos)
534
535
536 s.setOrig(spill, v)
537 vi.spill = spill
538 vi.restoreMin = s.sdom[b.ID].entry
539 vi.restoreMax = s.sdom[b.ID].exit
540 return spill
541 }
542
543
544
545
546
547
548
549 func (s *regAllocState) allocValToReg(v *Value, mask regMask, nospill bool, pos src.XPos) *Value {
550 if s.f.Config.ctxt.Arch.Arch == sys.ArchWasm && v.rematerializeable() {
551 c := v.copyIntoWithXPos(s.curBlock, pos)
552 c.OnWasmStack = true
553 s.setOrig(c, v)
554 return c
555 }
556 if v.OnWasmStack {
557 return v
558 }
559
560 vi := &s.values[v.ID]
561 pos = pos.WithNotStmt()
562
563 if mask&vi.regs != 0 {
564 r := pickReg(mask & vi.regs)
565 if !s.allocatable.contains(r) {
566 return v
567 }
568 if s.regs[r].v != v || s.regs[r].c == nil {
569 panic("bad register state")
570 }
571 if nospill {
572 s.nospill |= regMask(1) << r
573 }
574 s.usedSinceBlockStart |= regMask(1) << r
575 return s.regs[r].c
576 }
577
578 var r register
579
580 onWasmStack := nospill && s.f.Config.ctxt.Arch.Arch == sys.ArchWasm
581 if !onWasmStack {
582
583 r = s.allocReg(mask, v)
584 }
585
586
587 var c *Value
588 if vi.regs != 0 {
589
590 r2 := pickReg(vi.regs)
591 var current *Value
592 if !s.allocatable.contains(r2) {
593 current = v
594 } else {
595 if s.regs[r2].v != v {
596 panic("bad register state")
597 }
598 current = s.regs[r2].c
599 }
600 s.usedSinceBlockStart |= regMask(1) << r2
601 c = s.curBlock.NewValue1(pos, OpCopy, v.Type, current)
602 } else if v.rematerializeable() {
603
604 c = v.copyIntoWithXPos(s.curBlock, pos)
605 } else {
606
607 spill := s.makeSpill(v, s.curBlock)
608 if s.f.pass.debug > logSpills {
609 s.f.Warnl(vi.spill.Pos, "load spill for %v from %v", v, spill)
610 }
611 c = s.curBlock.NewValue1(pos, OpLoadReg, v.Type, spill)
612 }
613
614 s.setOrig(c, v)
615
616 if onWasmStack {
617 c.OnWasmStack = true
618 return c
619 }
620
621 s.assignReg(r, v, c)
622 if c.Op == OpLoadReg && s.isGReg(r) {
623 s.f.Fatalf("allocValToReg.OpLoadReg targeting g: " + c.LongString())
624 }
625 if nospill {
626 s.nospill |= regMask(1) << r
627 }
628 return c
629 }
630
631
632 func isLeaf(f *Func) bool {
633 for _, b := range f.Blocks {
634 for _, v := range b.Values {
635 if v.Op.IsCall() && !v.Op.IsTailCall() {
636
637 return false
638 }
639 }
640 }
641 return true
642 }
643
644
645 func (v *Value) needRegister() bool {
646 return !v.Type.IsMemory() && !v.Type.IsVoid() && !v.Type.IsFlags() && !v.Type.IsTuple()
647 }
648
649 func (s *regAllocState) init(f *Func) {
650 s.f = f
651 s.f.RegAlloc = s.f.Cache.locs[:0]
652 s.registers = f.Config.registers
653 if nr := len(s.registers); nr == 0 || nr > int(noRegister) || nr > int(unsafe.Sizeof(regMask(0))*8) {
654 s.f.Fatalf("bad number of registers: %d", nr)
655 } else {
656 s.numRegs = register(nr)
657 }
658
659 s.SPReg = noRegister
660 s.SBReg = noRegister
661 s.GReg = noRegister
662 s.ZeroIntReg = noRegister
663 for r := register(0); r < s.numRegs; r++ {
664 switch s.registers[r].String() {
665 case "SP":
666 s.SPReg = r
667 case "SB":
668 s.SBReg = r
669 case "g":
670 s.GReg = r
671 case "ZERO":
672 s.ZeroIntReg = r
673 }
674 }
675
676 switch noRegister {
677 case s.SPReg:
678 s.f.Fatalf("no SP register found")
679 case s.SBReg:
680 s.f.Fatalf("no SB register found")
681 case s.GReg:
682 if f.Config.hasGReg {
683 s.f.Fatalf("no g register found")
684 }
685 }
686
687
688 s.allocatable = s.f.Config.gpRegMask | s.f.Config.fpRegMask | s.f.Config.specialRegMask
689 s.allocatable &^= 1 << s.SPReg
690 s.allocatable &^= 1 << s.SBReg
691 if s.f.Config.hasGReg {
692 s.allocatable &^= 1 << s.GReg
693 }
694 if s.ZeroIntReg != noRegister {
695 s.allocatable &^= 1 << s.ZeroIntReg
696 }
697 if buildcfg.FramePointerEnabled && s.f.Config.FPReg >= 0 {
698 s.allocatable &^= 1 << uint(s.f.Config.FPReg)
699 }
700 if s.f.Config.LinkReg != -1 {
701 if isLeaf(f) {
702
703 s.allocatable &^= 1 << uint(s.f.Config.LinkReg)
704 }
705 }
706 if s.f.Config.ctxt.Flag_dynlink {
707 switch s.f.Config.arch {
708 case "386":
709
710
711
712
713
714 case "amd64":
715 s.allocatable &^= 1 << 15
716 case "arm":
717 s.allocatable &^= 1 << 9
718 case "arm64":
719
720 case "loong64":
721
722 case "ppc64le":
723
724 case "riscv64":
725
726 case "s390x":
727 s.allocatable &^= 1 << 11
728 default:
729 s.f.fe.Fatalf(src.NoXPos, "arch %s not implemented", s.f.Config.arch)
730 }
731 }
732
733
734
735
736 s.visitOrder = layoutRegallocOrder(f)
737
738
739
740 s.blockOrder = make([]int32, f.NumBlocks())
741 for i, b := range s.visitOrder {
742 s.blockOrder[b.ID] = int32(i)
743 }
744
745 s.regs = make([]regState, s.numRegs)
746 nv := f.NumValues()
747 if cap(s.f.Cache.regallocValues) >= nv {
748 s.f.Cache.regallocValues = s.f.Cache.regallocValues[:nv]
749 } else {
750 s.f.Cache.regallocValues = make([]valState, nv)
751 }
752 s.values = s.f.Cache.regallocValues
753 s.orig = s.f.Cache.allocValueSlice(nv)
754 s.copies = make(map[*Value]bool)
755 for _, b := range s.visitOrder {
756 for _, v := range b.Values {
757 if v.needRegister() {
758 s.values[v.ID].needReg = true
759 s.values[v.ID].rematerializeable = v.rematerializeable()
760 s.orig[v.ID] = v
761 }
762
763
764 }
765 }
766 s.computeLive()
767
768 s.endRegs = make([][]endReg, f.NumBlocks())
769 s.startRegs = make([][]startReg, f.NumBlocks())
770 s.spillLive = make([][]ID, f.NumBlocks())
771 s.sdom = f.Sdom()
772
773
774 if f.Config.ctxt.Arch.Arch == sys.ArchWasm {
775 canLiveOnStack := f.newSparseSet(f.NumValues())
776 defer f.retSparseSet(canLiveOnStack)
777 for _, b := range f.Blocks {
778
779 canLiveOnStack.clear()
780 for _, c := range b.ControlValues() {
781 if c.Uses == 1 && !opcodeTable[c.Op].generic {
782 canLiveOnStack.add(c.ID)
783 }
784 }
785
786 for i := len(b.Values) - 1; i >= 0; i-- {
787 v := b.Values[i]
788 if canLiveOnStack.contains(v.ID) {
789 v.OnWasmStack = true
790 } else {
791
792 canLiveOnStack.clear()
793 }
794 for _, arg := range v.Args {
795
796
797
798
799
800 if arg.Uses == 1 && arg.Block == v.Block && !arg.Type.IsMemory() && !opcodeTable[arg.Op].generic {
801 canLiveOnStack.add(arg.ID)
802 }
803 }
804 }
805 }
806 }
807
808
809
810
811 if base.Flag.ClobberDeadReg && len(s.f.Blocks) <= 10000 {
812
813 s.doClobber = true
814 }
815 }
816
817 func (s *regAllocState) close() {
818 s.f.Cache.freeValueSlice(s.orig)
819 }
820
821
822
823 func (s *regAllocState) addUse(id ID, dist int32, pos src.XPos) {
824 r := s.freeUseRecords
825 if r != nil {
826 s.freeUseRecords = r.next
827 } else {
828 r = &use{}
829 }
830 r.dist = dist
831 r.pos = pos
832 r.next = s.values[id].uses
833 s.values[id].uses = r
834 if r.next != nil && dist > r.next.dist {
835 s.f.Fatalf("uses added in wrong order")
836 }
837 }
838
839
840
841 func (s *regAllocState) advanceUses(v *Value) {
842 for _, a := range v.Args {
843 if !s.values[a.ID].needReg {
844 continue
845 }
846 ai := &s.values[a.ID]
847 r := ai.uses
848 ai.uses = r.next
849 if r.next == nil || (!opcodeTable[a.Op].fixedReg && r.next.dist > s.nextCall[s.curIdx]) {
850
851 s.freeRegs(ai.regs)
852 }
853 r.next = s.freeUseRecords
854 s.freeUseRecords = r
855 }
856 s.dropIfUnused(v)
857 }
858
859
860
861 func (s *regAllocState) dropIfUnused(v *Value) {
862 if !s.values[v.ID].needReg {
863 return
864 }
865 vi := &s.values[v.ID]
866 r := vi.uses
867 if r == nil || (!opcodeTable[v.Op].fixedReg && r.dist > s.nextCall[s.curIdx]) {
868 s.freeRegs(vi.regs)
869 }
870 }
871
872
873
874
875 func (s *regAllocState) liveAfterCurrentInstruction(v *Value) bool {
876 u := s.values[v.ID].uses
877 if u == nil {
878 panic(fmt.Errorf("u is nil, v = %s, s.values[v.ID] = %v", v.LongString(), s.values[v.ID]))
879 }
880 d := u.dist
881 for u != nil && u.dist == d {
882 u = u.next
883 }
884 return u != nil && u.dist > d
885 }
886
887
888 func (s *regAllocState) setState(regs []endReg) {
889 s.freeRegs(s.used)
890 for _, x := range regs {
891 s.assignReg(x.r, x.v, x.c)
892 }
893 }
894
895
896 func (s *regAllocState) compatRegs(t *types.Type) regMask {
897 var m regMask
898 if t.IsTuple() || t.IsFlags() {
899 return 0
900 }
901 if t.IsFloat() || t == types.TypeInt128 {
902 if t.Kind() == types.TFLOAT32 && s.f.Config.fp32RegMask != 0 {
903 m = s.f.Config.fp32RegMask
904 } else if t.Kind() == types.TFLOAT64 && s.f.Config.fp64RegMask != 0 {
905 m = s.f.Config.fp64RegMask
906 } else {
907 m = s.f.Config.fpRegMask
908 }
909 } else {
910 m = s.f.Config.gpRegMask
911 }
912 return m & s.allocatable
913 }
914
915
916 func (s *regAllocState) regspec(v *Value) regInfo {
917 op := v.Op
918 if op == OpConvert {
919
920
921
922 m := s.allocatable & s.f.Config.gpRegMask
923 return regInfo{inputs: []inputInfo{{regs: m}}, outputs: []outputInfo{{regs: m}}}
924 }
925 if op == OpArgIntReg {
926 reg := v.Block.Func.Config.intParamRegs[v.AuxInt8()]
927 return regInfo{outputs: []outputInfo{{regs: 1 << uint(reg)}}}
928 }
929 if op == OpArgFloatReg {
930 reg := v.Block.Func.Config.floatParamRegs[v.AuxInt8()]
931 return regInfo{outputs: []outputInfo{{regs: 1 << uint(reg)}}}
932 }
933 if op.IsCall() {
934 if ac, ok := v.Aux.(*AuxCall); ok && ac.reg != nil {
935 return *ac.Reg(&opcodeTable[op].reg, s.f.Config)
936 }
937 }
938 if op == OpMakeResult && s.f.OwnAux.reg != nil {
939 return *s.f.OwnAux.ResultReg(s.f.Config)
940 }
941 return opcodeTable[op].reg
942 }
943
944 func (s *regAllocState) isGReg(r register) bool {
945 return s.f.Config.hasGReg && s.GReg == r
946 }
947
948
949 var tmpVal Value
950
951 func (s *regAllocState) regalloc(f *Func) {
952 regValLiveSet := f.newSparseSet(f.NumValues())
953 defer f.retSparseSet(regValLiveSet)
954 var oldSched []*Value
955 var phis []*Value
956 var phiRegs []register
957 var args []*Value
958
959
960 var desired desiredState
961 desiredSecondReg := map[ID][4]register{}
962
963
964 type dentry struct {
965 out [4]register
966 in [3][4]register
967 }
968 var dinfo []dentry
969
970 if f.Entry != f.Blocks[0] {
971 f.Fatalf("entry block must be first")
972 }
973
974 for _, b := range s.visitOrder {
975 if s.f.pass.debug > regDebug {
976 fmt.Printf("Begin processing block %v\n", b)
977 }
978 s.curBlock = b
979 s.startRegsMask = 0
980 s.usedSinceBlockStart = 0
981 clear(desiredSecondReg)
982
983
984
985 regValLiveSet.clear()
986 for _, e := range s.live[b.ID] {
987 s.addUse(e.ID, int32(len(b.Values))+e.dist, e.pos)
988 regValLiveSet.add(e.ID)
989 }
990 for _, v := range b.ControlValues() {
991 if s.values[v.ID].needReg {
992 s.addUse(v.ID, int32(len(b.Values)), b.Pos)
993 regValLiveSet.add(v.ID)
994 }
995 }
996 if len(s.nextCall) < len(b.Values) {
997 s.nextCall = append(s.nextCall, make([]int32, len(b.Values)-len(s.nextCall))...)
998 }
999 var nextCall int32 = math.MaxInt32
1000 for i := len(b.Values) - 1; i >= 0; i-- {
1001 v := b.Values[i]
1002 regValLiveSet.remove(v.ID)
1003 if v.Op == OpPhi {
1004
1005
1006
1007 s.nextCall[i] = nextCall
1008 continue
1009 }
1010 if opcodeTable[v.Op].call {
1011
1012 regValLiveSet.clear()
1013 if s.sp != 0 && s.values[s.sp].uses != nil {
1014 regValLiveSet.add(s.sp)
1015 }
1016 if s.sb != 0 && s.values[s.sb].uses != nil {
1017 regValLiveSet.add(s.sb)
1018 }
1019 nextCall = int32(i)
1020 }
1021 for _, a := range v.Args {
1022 if !s.values[a.ID].needReg {
1023 continue
1024 }
1025 s.addUse(a.ID, int32(i), v.Pos)
1026 regValLiveSet.add(a.ID)
1027 }
1028 s.nextCall[i] = nextCall
1029 }
1030 if s.f.pass.debug > regDebug {
1031 fmt.Printf("use distances for %s\n", b)
1032 for i := range s.values {
1033 vi := &s.values[i]
1034 u := vi.uses
1035 if u == nil {
1036 continue
1037 }
1038 fmt.Printf(" v%d:", i)
1039 for u != nil {
1040 fmt.Printf(" %d", u.dist)
1041 u = u.next
1042 }
1043 fmt.Println()
1044 }
1045 }
1046
1047
1048
1049 nphi := 0
1050 for _, v := range b.Values {
1051 if v.Op != OpPhi {
1052 break
1053 }
1054 nphi++
1055 }
1056 phis = append(phis[:0], b.Values[:nphi]...)
1057 oldSched = append(oldSched[:0], b.Values[nphi:]...)
1058 b.Values = b.Values[:0]
1059
1060
1061 if b == f.Entry {
1062
1063 if nphi > 0 {
1064 f.Fatalf("phis in entry block")
1065 }
1066 } else if len(b.Preds) == 1 {
1067
1068 s.setState(s.endRegs[b.Preds[0].b.ID])
1069 if nphi > 0 {
1070 f.Fatalf("phis in single-predecessor block")
1071 }
1072
1073
1074
1075 for r := register(0); r < s.numRegs; r++ {
1076 v := s.regs[r].v
1077 if v != nil && !regValLiveSet.contains(v.ID) {
1078 s.freeReg(r)
1079 }
1080 }
1081 } else {
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095 idx := -1
1096 for i, p := range b.Preds {
1097
1098
1099 pb := p.b
1100 if s.blockOrder[pb.ID] >= s.blockOrder[b.ID] {
1101 continue
1102 }
1103 if idx == -1 {
1104 idx = i
1105 continue
1106 }
1107 pSel := b.Preds[idx].b
1108 if len(s.spillLive[pb.ID]) < len(s.spillLive[pSel.ID]) {
1109 idx = i
1110 } else if len(s.spillLive[pb.ID]) == len(s.spillLive[pSel.ID]) {
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121 if pb.likelyBranch() && !pSel.likelyBranch() || s.blockOrder[pb.ID] < s.blockOrder[pSel.ID] {
1122 idx = i
1123 }
1124 }
1125 }
1126 if idx < 0 {
1127 f.Fatalf("bad visitOrder, no predecessor of %s has been visited before it", b)
1128 }
1129 p := b.Preds[idx].b
1130 s.setState(s.endRegs[p.ID])
1131
1132 if s.f.pass.debug > regDebug {
1133 fmt.Printf("starting merge block %s with end state of %s:\n", b, p)
1134 for _, x := range s.endRegs[p.ID] {
1135 fmt.Printf(" %s: orig:%s cache:%s\n", &s.registers[x.r], x.v, x.c)
1136 }
1137 }
1138
1139
1140
1141
1142
1143 phiRegs = phiRegs[:0]
1144 var phiUsed regMask
1145
1146 for _, v := range phis {
1147 if !s.values[v.ID].needReg {
1148 phiRegs = append(phiRegs, noRegister)
1149 continue
1150 }
1151 a := v.Args[idx]
1152
1153
1154 m := s.values[a.ID].regs &^ phiUsed & s.allocatable
1155 if m != 0 {
1156 r := pickReg(m)
1157 phiUsed |= regMask(1) << r
1158 phiRegs = append(phiRegs, r)
1159 } else {
1160 phiRegs = append(phiRegs, noRegister)
1161 }
1162 }
1163
1164
1165 for i, v := range phis {
1166 if !s.values[v.ID].needReg {
1167 continue
1168 }
1169 a := v.Args[idx]
1170 r := phiRegs[i]
1171 if r == noRegister {
1172 continue
1173 }
1174 if regValLiveSet.contains(a.ID) {
1175
1176
1177
1178
1179
1180
1181
1182
1183 m := s.compatRegs(a.Type) &^ s.used &^ phiUsed
1184 if m != 0 && !s.values[a.ID].rematerializeable && countRegs(s.values[a.ID].regs) == 1 {
1185 r2 := pickReg(m)
1186 c := p.NewValue1(a.Pos, OpCopy, a.Type, s.regs[r].c)
1187 s.copies[c] = false
1188 if s.f.pass.debug > regDebug {
1189 fmt.Printf("copy %s to %s : %s\n", a, c, &s.registers[r2])
1190 }
1191 s.setOrig(c, a)
1192 s.assignReg(r2, a, c)
1193 s.endRegs[p.ID] = append(s.endRegs[p.ID], endReg{r2, a, c})
1194 }
1195 }
1196 s.freeReg(r)
1197 }
1198
1199
1200 b.Values = append(b.Values, phis...)
1201
1202
1203
1204 for i, v := range phis {
1205 if !s.values[v.ID].needReg {
1206 continue
1207 }
1208 if phiRegs[i] != noRegister {
1209 continue
1210 }
1211 m := s.compatRegs(v.Type) &^ phiUsed &^ s.used
1212
1213
1214 for i, pe := range b.Preds {
1215 if i == idx {
1216 continue
1217 }
1218 ri := noRegister
1219 for _, er := range s.endRegs[pe.b.ID] {
1220 if er.v == s.orig[v.Args[i].ID] {
1221 ri = er.r
1222 break
1223 }
1224 }
1225 if ri != noRegister && m>>ri&1 != 0 {
1226 m = regMask(1) << ri
1227 break
1228 }
1229 }
1230 if m != 0 {
1231 r := pickReg(m)
1232 phiRegs[i] = r
1233 phiUsed |= regMask(1) << r
1234 }
1235 }
1236
1237
1238 for i, v := range phis {
1239 if !s.values[v.ID].needReg {
1240 continue
1241 }
1242 r := phiRegs[i]
1243 if r == noRegister {
1244
1245
1246 s.values[v.ID].spill = v
1247 continue
1248 }
1249
1250 s.assignReg(r, v, v)
1251 }
1252
1253
1254 for r := register(0); r < s.numRegs; r++ {
1255 if phiUsed>>r&1 != 0 {
1256 continue
1257 }
1258 v := s.regs[r].v
1259 if v != nil && !regValLiveSet.contains(v.ID) {
1260 s.freeReg(r)
1261 }
1262 }
1263
1264
1265
1266
1267
1268 regList := make([]startReg, 0, 32)
1269 for r := register(0); r < s.numRegs; r++ {
1270 v := s.regs[r].v
1271 if v == nil {
1272 continue
1273 }
1274 if phiUsed>>r&1 != 0 {
1275
1276
1277 continue
1278 }
1279 regList = append(regList, startReg{r, v, s.regs[r].c, s.values[v.ID].uses.pos})
1280 s.startRegsMask |= regMask(1) << r
1281 }
1282 s.startRegs[b.ID] = make([]startReg, len(regList))
1283 copy(s.startRegs[b.ID], regList)
1284
1285 if s.f.pass.debug > regDebug {
1286 fmt.Printf("after phis\n")
1287 for _, x := range s.startRegs[b.ID] {
1288 fmt.Printf(" %s: v%d\n", &s.registers[x.r], x.v.ID)
1289 }
1290 }
1291 }
1292
1293
1294 for i, v := range phis {
1295 s.curIdx = i
1296 s.dropIfUnused(v)
1297 }
1298
1299
1300 if l := len(oldSched); cap(dinfo) < l {
1301 dinfo = make([]dentry, l)
1302 } else {
1303 dinfo = dinfo[:l]
1304 clear(dinfo)
1305 }
1306
1307
1308 desired.copy(&s.desired[b.ID])
1309
1310
1311
1312
1313
1314
1315 for _, e := range b.Succs {
1316 succ := e.b
1317
1318 for _, x := range s.startRegs[succ.ID] {
1319 desired.add(x.v.ID, x.r)
1320 }
1321
1322 pidx := e.i
1323 for _, v := range succ.Values {
1324 if v.Op != OpPhi {
1325 break
1326 }
1327 if !s.values[v.ID].needReg {
1328 continue
1329 }
1330 rp, ok := s.f.getHome(v.ID).(*Register)
1331 if !ok {
1332
1333
1334
1335
1336 for _, a := range v.Args {
1337 rp, ok = s.f.getHome(a.ID).(*Register)
1338 if ok {
1339 break
1340 }
1341 }
1342 if !ok {
1343 continue
1344 }
1345 }
1346 desired.add(v.Args[pidx].ID, register(rp.num))
1347 }
1348 }
1349
1350
1351 for i := len(oldSched) - 1; i >= 0; i-- {
1352 v := oldSched[i]
1353 prefs := desired.remove(v.ID)
1354 regspec := s.regspec(v)
1355 desired.clobber(regspec.clobbers)
1356 for _, j := range regspec.inputs {
1357 if countRegs(j.regs) != 1 {
1358 continue
1359 }
1360 desired.clobber(j.regs)
1361 desired.add(v.Args[j.idx].ID, pickReg(j.regs))
1362 }
1363 if opcodeTable[v.Op].resultInArg0 || v.Op == OpAMD64ADDQconst || v.Op == OpAMD64ADDLconst || v.Op == OpSelect0 {
1364 if opcodeTable[v.Op].commutative {
1365 desired.addList(v.Args[1].ID, prefs)
1366 }
1367 desired.addList(v.Args[0].ID, prefs)
1368 }
1369
1370 dinfo[i].out = prefs
1371 for j, a := range v.Args {
1372 if j >= len(dinfo[i].in) {
1373 break
1374 }
1375 dinfo[i].in[j] = desired.get(a.ID)
1376 }
1377 if v.Op == OpSelect1 && prefs[0] != noRegister {
1378
1379
1380 desiredSecondReg[v.Args[0].ID] = prefs
1381 }
1382 }
1383
1384
1385 for idx, v := range oldSched {
1386 s.curIdx = nphi + idx
1387 tmpReg := noRegister
1388 if s.f.pass.debug > regDebug {
1389 fmt.Printf(" processing %s\n", v.LongString())
1390 }
1391 regspec := s.regspec(v)
1392 if v.Op == OpPhi {
1393 f.Fatalf("phi %s not at start of block", v)
1394 }
1395 if opcodeTable[v.Op].fixedReg {
1396 switch v.Op {
1397 case OpSP:
1398 s.assignReg(s.SPReg, v, v)
1399 s.sp = v.ID
1400 case OpSB:
1401 s.assignReg(s.SBReg, v, v)
1402 s.sb = v.ID
1403 case OpARM64ZERO:
1404 s.assignReg(s.ZeroIntReg, v, v)
1405 default:
1406 f.Fatalf("unknown fixed-register op %s", v)
1407 }
1408 b.Values = append(b.Values, v)
1409 s.advanceUses(v)
1410 continue
1411 }
1412 if v.Op == OpSelect0 || v.Op == OpSelect1 || v.Op == OpSelectN {
1413 if s.values[v.ID].needReg {
1414 if v.Op == OpSelectN {
1415 s.assignReg(register(s.f.getHome(v.Args[0].ID).(LocResults)[int(v.AuxInt)].(*Register).num), v, v)
1416 } else {
1417 var i = 0
1418 if v.Op == OpSelect1 {
1419 i = 1
1420 }
1421 s.assignReg(register(s.f.getHome(v.Args[0].ID).(LocPair)[i].(*Register).num), v, v)
1422 }
1423 }
1424 b.Values = append(b.Values, v)
1425 s.advanceUses(v)
1426 continue
1427 }
1428 if v.Op == OpGetG && s.f.Config.hasGReg {
1429
1430 if s.regs[s.GReg].v != nil {
1431 s.freeReg(s.GReg)
1432 }
1433 s.assignReg(s.GReg, v, v)
1434 b.Values = append(b.Values, v)
1435 s.advanceUses(v)
1436 continue
1437 }
1438 if v.Op == OpArg {
1439
1440
1441
1442 s.values[v.ID].spill = v
1443 b.Values = append(b.Values, v)
1444 s.advanceUses(v)
1445 continue
1446 }
1447 if v.Op == OpKeepAlive {
1448
1449 s.advanceUses(v)
1450 a := v.Args[0]
1451 vi := &s.values[a.ID]
1452 if vi.regs == 0 && !vi.rematerializeable {
1453
1454
1455
1456 v.SetArg(0, s.makeSpill(a, b))
1457 } else if _, ok := a.Aux.(*ir.Name); ok && vi.rematerializeable {
1458
1459
1460
1461 v.Op = OpVarLive
1462 v.SetArgs1(v.Args[1])
1463 v.Aux = a.Aux
1464 } else {
1465
1466
1467
1468 v.Op = OpCopy
1469 v.SetArgs1(v.Args[1])
1470 }
1471 b.Values = append(b.Values, v)
1472 continue
1473 }
1474 if len(regspec.inputs) == 0 && len(regspec.outputs) == 0 {
1475
1476 if s.doClobber && v.Op.IsCall() {
1477 s.clobberRegs(regspec.clobbers)
1478 }
1479 s.freeRegs(regspec.clobbers)
1480 b.Values = append(b.Values, v)
1481 s.advanceUses(v)
1482 continue
1483 }
1484
1485 if s.values[v.ID].rematerializeable {
1486
1487
1488
1489 for _, a := range v.Args {
1490 a.Uses--
1491 }
1492 s.advanceUses(v)
1493 continue
1494 }
1495
1496 if s.f.pass.debug > regDebug {
1497 fmt.Printf("value %s\n", v.LongString())
1498 fmt.Printf(" out:")
1499 for _, r := range dinfo[idx].out {
1500 if r != noRegister {
1501 fmt.Printf(" %s", &s.registers[r])
1502 }
1503 }
1504 fmt.Println()
1505 for i := 0; i < len(v.Args) && i < 3; i++ {
1506 fmt.Printf(" in%d:", i)
1507 for _, r := range dinfo[idx].in[i] {
1508 if r != noRegister {
1509 fmt.Printf(" %s", &s.registers[r])
1510 }
1511 }
1512 fmt.Println()
1513 }
1514 }
1515
1516
1517
1518
1519 args = append(args[:0], make([]*Value, len(v.Args))...)
1520 for i, a := range v.Args {
1521 if !s.values[a.ID].needReg {
1522 args[i] = a
1523 }
1524 }
1525 for _, i := range regspec.inputs {
1526 mask := i.regs
1527 if countRegs(mask) == 1 && mask&s.values[v.Args[i.idx].ID].regs != 0 {
1528 args[i.idx] = s.allocValToReg(v.Args[i.idx], mask, true, v.Pos)
1529 }
1530 }
1531
1532
1533
1534
1535
1536
1537 for {
1538 freed := false
1539 for _, i := range regspec.inputs {
1540 if args[i.idx] != nil {
1541 continue
1542 }
1543 mask := i.regs
1544 if countRegs(mask) == 1 && mask&^s.used != 0 {
1545 args[i.idx] = s.allocValToReg(v.Args[i.idx], mask, true, v.Pos)
1546
1547
1548
1549 oldregs := s.values[v.Args[i.idx].ID].regs
1550 if oldregs&^regspec.clobbers == 0 || !s.liveAfterCurrentInstruction(v.Args[i.idx]) {
1551 s.freeRegs(oldregs &^ mask &^ s.nospill)
1552 freed = true
1553 }
1554 }
1555 }
1556 if !freed {
1557 break
1558 }
1559 }
1560
1561
1562 for _, i := range regspec.inputs {
1563 if args[i.idx] != nil {
1564 continue
1565 }
1566 mask := i.regs
1567 if mask&s.values[v.Args[i.idx].ID].regs == 0 {
1568
1569 mask &= s.allocatable
1570 mask &^= s.nospill
1571
1572 if i.idx < 3 {
1573 for _, r := range dinfo[idx].in[i.idx] {
1574 if r != noRegister && (mask&^s.used)>>r&1 != 0 {
1575
1576 mask = regMask(1) << r
1577 break
1578 }
1579 }
1580 }
1581
1582 if mask&^desired.avoid != 0 {
1583 mask &^= desired.avoid
1584 }
1585 }
1586 args[i.idx] = s.allocValToReg(v.Args[i.idx], mask, true, v.Pos)
1587 }
1588
1589
1590
1591
1592 if opcodeTable[v.Op].resultInArg0 {
1593 var m regMask
1594 if !s.liveAfterCurrentInstruction(v.Args[0]) {
1595
1596 goto ok
1597 }
1598 if opcodeTable[v.Op].commutative && !s.liveAfterCurrentInstruction(v.Args[1]) {
1599 args[0], args[1] = args[1], args[0]
1600 goto ok
1601 }
1602 if s.values[v.Args[0].ID].rematerializeable {
1603
1604 goto ok
1605 }
1606 if opcodeTable[v.Op].commutative && s.values[v.Args[1].ID].rematerializeable {
1607 args[0], args[1] = args[1], args[0]
1608 goto ok
1609 }
1610 if countRegs(s.values[v.Args[0].ID].regs) >= 2 {
1611
1612 goto ok
1613 }
1614 if opcodeTable[v.Op].commutative && countRegs(s.values[v.Args[1].ID].regs) >= 2 {
1615 args[0], args[1] = args[1], args[0]
1616 goto ok
1617 }
1618
1619
1620
1621
1622
1623 m = s.compatRegs(v.Args[0].Type) &^ s.used
1624 if m == 0 {
1625
1626
1627
1628
1629 goto ok
1630 }
1631
1632
1633 for _, r := range dinfo[idx].out {
1634 if r != noRegister && (m®spec.outputs[0].regs)>>r&1 != 0 {
1635 m = regMask(1) << r
1636 args[0] = s.allocValToReg(v.Args[0], m, true, v.Pos)
1637
1638
1639 goto ok
1640 }
1641 }
1642
1643
1644 for _, r := range dinfo[idx].in[0] {
1645 if r != noRegister && m>>r&1 != 0 {
1646 m = regMask(1) << r
1647 c := s.allocValToReg(v.Args[0], m, true, v.Pos)
1648 s.copies[c] = false
1649
1650
1651 goto ok
1652 }
1653 }
1654 if opcodeTable[v.Op].commutative {
1655 for _, r := range dinfo[idx].in[1] {
1656 if r != noRegister && m>>r&1 != 0 {
1657 m = regMask(1) << r
1658 c := s.allocValToReg(v.Args[1], m, true, v.Pos)
1659 s.copies[c] = false
1660 args[0], args[1] = args[1], args[0]
1661 goto ok
1662 }
1663 }
1664 }
1665
1666
1667 if m&^desired.avoid != 0 {
1668 m &^= desired.avoid
1669 }
1670
1671 c := s.allocValToReg(v.Args[0], m, true, v.Pos)
1672 s.copies[c] = false
1673
1674
1675
1676
1677 if regspec.outputs[0].regs>>s.f.getHome(c.ID).(*Register).num&1 != 0 {
1678 if rp, ok := s.f.getHome(args[0].ID).(*Register); ok {
1679 r := register(rp.num)
1680 for _, r2 := range dinfo[idx].in[0] {
1681 if r == r2 {
1682 args[0] = c
1683 break
1684 }
1685 }
1686 }
1687 }
1688 }
1689
1690 ok:
1691
1692
1693
1694
1695
1696
1697 if opcodeTable[v.Op].needIntTemp {
1698 m := s.allocatable & s.f.Config.gpRegMask
1699 for _, out := range regspec.outputs {
1700 if countRegs(out.regs) == 1 {
1701 m &^= out.regs
1702 }
1703 }
1704 if m&^desired.avoid&^s.nospill != 0 {
1705 m &^= desired.avoid
1706 }
1707 tmpReg = s.allocReg(m, &tmpVal)
1708 s.nospill |= regMask(1) << tmpReg
1709 s.tmpused |= regMask(1) << tmpReg
1710 }
1711
1712
1713
1714
1715
1716 if !opcodeTable[v.Op].resultNotInArgs {
1717 s.tmpused = s.nospill
1718 s.nospill = 0
1719 s.advanceUses(v)
1720 }
1721
1722
1723 if s.doClobber && v.Op.IsCall() {
1724
1725
1726 s.clobberRegs(regspec.clobbers &^ s.tmpused &^ s.nospill)
1727 }
1728 s.freeRegs(regspec.clobbers)
1729 s.tmpused |= regspec.clobbers
1730
1731
1732 {
1733 outRegs := noRegisters
1734 maxOutIdx := -1
1735 var used regMask
1736 if tmpReg != noRegister {
1737
1738
1739 used |= regMask(1) << tmpReg
1740 }
1741 for _, out := range regspec.outputs {
1742 if out.regs == 0 {
1743 continue
1744 }
1745 mask := out.regs & s.allocatable &^ used
1746 if mask == 0 {
1747 s.f.Fatalf("can't find any output register %s", v.LongString())
1748 }
1749 if opcodeTable[v.Op].resultInArg0 && out.idx == 0 {
1750 if !opcodeTable[v.Op].commutative {
1751
1752 r := register(s.f.getHome(args[0].ID).(*Register).num)
1753 if mask>>r&1 == 0 {
1754 s.f.Fatalf("resultInArg0 value's input %v cannot be an output of %s", s.f.getHome(args[0].ID).(*Register), v.LongString())
1755 }
1756 mask = regMask(1) << r
1757 } else {
1758
1759 r0 := register(s.f.getHome(args[0].ID).(*Register).num)
1760 r1 := register(s.f.getHome(args[1].ID).(*Register).num)
1761
1762 found := false
1763 for _, r := range dinfo[idx].out {
1764 if (r == r0 || r == r1) && (mask&^s.used)>>r&1 != 0 {
1765 mask = regMask(1) << r
1766 found = true
1767 if r == r1 {
1768 args[0], args[1] = args[1], args[0]
1769 }
1770 break
1771 }
1772 }
1773 if !found {
1774
1775 mask = regMask(1) << r0
1776 }
1777 }
1778 }
1779 if out.idx == 0 {
1780 for _, r := range dinfo[idx].out {
1781 if r != noRegister && (mask&^s.used)>>r&1 != 0 {
1782
1783 mask = regMask(1) << r
1784 break
1785 }
1786 }
1787 }
1788 if out.idx == 1 {
1789 if prefs, ok := desiredSecondReg[v.ID]; ok {
1790 for _, r := range prefs {
1791 if r != noRegister && (mask&^s.used)>>r&1 != 0 {
1792
1793 mask = regMask(1) << r
1794 break
1795 }
1796 }
1797 }
1798 }
1799
1800 if mask&^desired.avoid&^s.nospill&^s.used != 0 {
1801 mask &^= desired.avoid
1802 }
1803 r := s.allocReg(mask, v)
1804 if out.idx > maxOutIdx {
1805 maxOutIdx = out.idx
1806 }
1807 outRegs[out.idx] = r
1808 used |= regMask(1) << r
1809 s.tmpused |= regMask(1) << r
1810 }
1811
1812 if v.Type.IsTuple() {
1813 var outLocs LocPair
1814 if r := outRegs[0]; r != noRegister {
1815 outLocs[0] = &s.registers[r]
1816 }
1817 if r := outRegs[1]; r != noRegister {
1818 outLocs[1] = &s.registers[r]
1819 }
1820 s.f.setHome(v, outLocs)
1821
1822 } else if v.Type.IsResults() {
1823
1824 outLocs := make(LocResults, maxOutIdx+1, maxOutIdx+1)
1825 for i := 0; i <= maxOutIdx; i++ {
1826 if r := outRegs[i]; r != noRegister {
1827 outLocs[i] = &s.registers[r]
1828 }
1829 }
1830 s.f.setHome(v, outLocs)
1831 } else {
1832 if r := outRegs[0]; r != noRegister {
1833 s.assignReg(r, v, v)
1834 }
1835 }
1836 if tmpReg != noRegister {
1837
1838 if s.f.tempRegs == nil {
1839 s.f.tempRegs = map[ID]*Register{}
1840 }
1841 s.f.tempRegs[v.ID] = &s.registers[tmpReg]
1842 }
1843 }
1844
1845
1846 if opcodeTable[v.Op].resultNotInArgs {
1847 s.nospill = 0
1848 s.advanceUses(v)
1849 }
1850 s.tmpused = 0
1851
1852
1853 for i, a := range args {
1854 v.SetArg(i, a)
1855 }
1856 b.Values = append(b.Values, v)
1857 s.dropIfUnused(v)
1858 }
1859
1860
1861
1862 controls := append(make([]*Value, 0, 2), b.ControlValues()...)
1863
1864
1865 for i, v := range b.ControlValues() {
1866 if !s.values[v.ID].needReg {
1867 continue
1868 }
1869 if s.f.pass.debug > regDebug {
1870 fmt.Printf(" processing control %s\n", v.LongString())
1871 }
1872
1873
1874
1875 b.ReplaceControl(i, s.allocValToReg(v, s.compatRegs(v.Type), false, b.Pos))
1876 }
1877
1878
1879
1880 for _, v := range controls {
1881 vi := &s.values[v.ID]
1882 if !vi.needReg {
1883 continue
1884 }
1885
1886 u := vi.uses
1887 vi.uses = u.next
1888 if u.next == nil {
1889 s.freeRegs(vi.regs)
1890 }
1891 u.next = s.freeUseRecords
1892 s.freeUseRecords = u
1893 }
1894
1895
1896
1897
1898 if len(b.Succs) == 1 {
1899 if s.f.Config.hasGReg && s.regs[s.GReg].v != nil {
1900 s.freeReg(s.GReg)
1901 }
1902
1903 top := b.Succs[0].b
1904 loop := s.loopnest.b2l[top.ID]
1905 if loop == nil || loop.header != top || loop.containsUnavoidableCall {
1906 goto badloop
1907 }
1908
1909
1910 for _, live := range s.live[b.ID] {
1911 if live.dist >= unlikelyDistance {
1912
1913 continue
1914 }
1915 vid := live.ID
1916 vi := &s.values[vid]
1917 if vi.regs != 0 {
1918 continue
1919 }
1920 if vi.rematerializeable {
1921 continue
1922 }
1923 v := s.orig[vid]
1924 m := s.compatRegs(v.Type) &^ s.used
1925
1926 outerloop:
1927 for _, e := range desired.entries {
1928 if e.ID != v.ID {
1929 continue
1930 }
1931 for _, r := range e.regs {
1932 if r != noRegister && m>>r&1 != 0 {
1933 m = regMask(1) << r
1934 break outerloop
1935 }
1936 }
1937 }
1938 if m&^desired.avoid != 0 {
1939 m &^= desired.avoid
1940 }
1941 if m != 0 {
1942 s.allocValToReg(v, m, false, b.Pos)
1943 }
1944 }
1945 }
1946 badloop:
1947 ;
1948
1949
1950
1951 k := 0
1952 for r := register(0); r < s.numRegs; r++ {
1953 v := s.regs[r].v
1954 if v == nil {
1955 continue
1956 }
1957 k++
1958 }
1959 regList := make([]endReg, 0, k)
1960 for r := register(0); r < s.numRegs; r++ {
1961 v := s.regs[r].v
1962 if v == nil {
1963 continue
1964 }
1965 regList = append(regList, endReg{r, v, s.regs[r].c})
1966 }
1967 s.endRegs[b.ID] = regList
1968
1969 if checkEnabled {
1970 regValLiveSet.clear()
1971 for _, x := range s.live[b.ID] {
1972 regValLiveSet.add(x.ID)
1973 }
1974 for r := register(0); r < s.numRegs; r++ {
1975 v := s.regs[r].v
1976 if v == nil {
1977 continue
1978 }
1979 if !regValLiveSet.contains(v.ID) {
1980 s.f.Fatalf("val %s is in reg but not live at end of %s", v, b)
1981 }
1982 }
1983 }
1984
1985
1986
1987
1988
1989 for _, e := range s.live[b.ID] {
1990 vi := &s.values[e.ID]
1991 if vi.regs != 0 {
1992
1993 continue
1994 }
1995 if vi.rematerializeable {
1996
1997 continue
1998 }
1999 if s.f.pass.debug > regDebug {
2000 fmt.Printf("live-at-end spill for %s at %s\n", s.orig[e.ID], b)
2001 }
2002 spill := s.makeSpill(s.orig[e.ID], b)
2003 s.spillLive[b.ID] = append(s.spillLive[b.ID], spill.ID)
2004 }
2005
2006
2007
2008
2009 for _, e := range s.live[b.ID] {
2010 u := s.values[e.ID].uses
2011 if u == nil {
2012 f.Fatalf("live at end, no uses v%d", e.ID)
2013 }
2014 if u.next != nil {
2015 f.Fatalf("live at end, too many uses v%d", e.ID)
2016 }
2017 s.values[e.ID].uses = nil
2018 u.next = s.freeUseRecords
2019 s.freeUseRecords = u
2020 }
2021
2022
2023
2024
2025
2026
2027
2028 if c := countRegs(s.startRegsMask); c != len(s.startRegs[b.ID]) {
2029 regs := make([]startReg, 0, c)
2030 for _, sr := range s.startRegs[b.ID] {
2031 if s.startRegsMask&(regMask(1)<<sr.r) == 0 {
2032 continue
2033 }
2034 regs = append(regs, sr)
2035 }
2036 s.startRegs[b.ID] = regs
2037 }
2038 }
2039
2040
2041 s.placeSpills()
2042
2043
2044
2045 stacklive := stackalloc(s.f, s.spillLive)
2046
2047
2048 s.shuffle(stacklive)
2049
2050
2051
2052
2053 for {
2054 progress := false
2055 for c, used := range s.copies {
2056 if !used && c.Uses == 0 {
2057 if s.f.pass.debug > regDebug {
2058 fmt.Printf("delete copied value %s\n", c.LongString())
2059 }
2060 c.resetArgs()
2061 f.freeValue(c)
2062 delete(s.copies, c)
2063 progress = true
2064 }
2065 }
2066 if !progress {
2067 break
2068 }
2069 }
2070
2071 for _, b := range s.visitOrder {
2072 i := 0
2073 for _, v := range b.Values {
2074 if v.Op == OpInvalid {
2075 continue
2076 }
2077 b.Values[i] = v
2078 i++
2079 }
2080 b.Values = b.Values[:i]
2081 }
2082 }
2083
2084 func (s *regAllocState) placeSpills() {
2085 mustBeFirst := func(op Op) bool {
2086 return op.isLoweredGetClosurePtr() || op == OpPhi || op == OpArgIntReg || op == OpArgFloatReg
2087 }
2088
2089
2090
2091 start := map[ID][]*Value{}
2092
2093
2094 after := map[ID][]*Value{}
2095
2096 for i := range s.values {
2097 vi := s.values[i]
2098 spill := vi.spill
2099 if spill == nil {
2100 continue
2101 }
2102 if spill.Block != nil {
2103
2104
2105 continue
2106 }
2107 v := s.orig[i]
2108
2109
2110
2111
2112
2113 if v == nil {
2114 panic(fmt.Errorf("nil v, s.orig[%d], vi = %v, spill = %s", i, vi, spill.LongString()))
2115 }
2116 best := v.Block
2117 bestArg := v
2118 var bestDepth int16
2119 if l := s.loopnest.b2l[best.ID]; l != nil {
2120 bestDepth = l.depth
2121 }
2122 b := best
2123 const maxSpillSearch = 100
2124 for i := 0; i < maxSpillSearch; i++ {
2125
2126
2127 p := b
2128 b = nil
2129 for c := s.sdom.Child(p); c != nil && i < maxSpillSearch; c, i = s.sdom.Sibling(c), i+1 {
2130 if s.sdom[c.ID].entry <= vi.restoreMin && s.sdom[c.ID].exit >= vi.restoreMax {
2131
2132 b = c
2133 break
2134 }
2135 }
2136 if b == nil {
2137
2138 break
2139 }
2140
2141 var depth int16
2142 if l := s.loopnest.b2l[b.ID]; l != nil {
2143 depth = l.depth
2144 }
2145 if depth > bestDepth {
2146
2147 continue
2148 }
2149
2150
2151
2152 if len(b.Preds) == 1 {
2153 for _, e := range s.endRegs[b.Preds[0].b.ID] {
2154 if e.v == v {
2155
2156 best = b
2157 bestArg = e.c
2158 bestDepth = depth
2159 break
2160 }
2161 }
2162 } else {
2163 for _, e := range s.startRegs[b.ID] {
2164 if e.v == v {
2165
2166 best = b
2167 bestArg = e.c
2168 bestDepth = depth
2169 break
2170 }
2171 }
2172 }
2173 }
2174
2175
2176 spill.Block = best
2177 spill.AddArg(bestArg)
2178 if best == v.Block && !mustBeFirst(v.Op) {
2179
2180 after[v.ID] = append(after[v.ID], spill)
2181 } else {
2182
2183 start[best.ID] = append(start[best.ID], spill)
2184 }
2185 }
2186
2187
2188 var oldSched []*Value
2189 for _, b := range s.visitOrder {
2190 nfirst := 0
2191 for _, v := range b.Values {
2192 if !mustBeFirst(v.Op) {
2193 break
2194 }
2195 nfirst++
2196 }
2197 oldSched = append(oldSched[:0], b.Values[nfirst:]...)
2198 b.Values = b.Values[:nfirst]
2199 b.Values = append(b.Values, start[b.ID]...)
2200 for _, v := range oldSched {
2201 b.Values = append(b.Values, v)
2202 b.Values = append(b.Values, after[v.ID]...)
2203 }
2204 }
2205 }
2206
2207
2208 func (s *regAllocState) shuffle(stacklive [][]ID) {
2209 var e edgeState
2210 e.s = s
2211 e.cache = map[ID][]*Value{}
2212 e.contents = map[Location]contentRecord{}
2213 if s.f.pass.debug > regDebug {
2214 fmt.Printf("shuffle %s\n", s.f.Name)
2215 fmt.Println(s.f.String())
2216 }
2217
2218 for _, b := range s.visitOrder {
2219 if len(b.Preds) <= 1 {
2220 continue
2221 }
2222 e.b = b
2223 for i, edge := range b.Preds {
2224 p := edge.b
2225 e.p = p
2226 e.setup(i, s.endRegs[p.ID], s.startRegs[b.ID], stacklive[p.ID])
2227 e.process()
2228 }
2229 }
2230
2231 if s.f.pass.debug > regDebug {
2232 fmt.Printf("post shuffle %s\n", s.f.Name)
2233 fmt.Println(s.f.String())
2234 }
2235 }
2236
2237 type edgeState struct {
2238 s *regAllocState
2239 p, b *Block
2240
2241
2242 cache map[ID][]*Value
2243 cachedVals []ID
2244
2245
2246 contents map[Location]contentRecord
2247
2248
2249 destinations []dstRecord
2250 extra []dstRecord
2251
2252 usedRegs regMask
2253 uniqueRegs regMask
2254 finalRegs regMask
2255 rematerializeableRegs regMask
2256 }
2257
2258 type contentRecord struct {
2259 vid ID
2260 c *Value
2261 final bool
2262 pos src.XPos
2263 }
2264
2265 type dstRecord struct {
2266 loc Location
2267 vid ID
2268 splice **Value
2269 pos src.XPos
2270 }
2271
2272
2273 func (e *edgeState) setup(idx int, srcReg []endReg, dstReg []startReg, stacklive []ID) {
2274 if e.s.f.pass.debug > regDebug {
2275 fmt.Printf("edge %s->%s\n", e.p, e.b)
2276 }
2277
2278
2279 clear(e.cache)
2280 e.cachedVals = e.cachedVals[:0]
2281 clear(e.contents)
2282 e.usedRegs = 0
2283 e.uniqueRegs = 0
2284 e.finalRegs = 0
2285 e.rematerializeableRegs = 0
2286
2287
2288 for _, x := range srcReg {
2289 e.set(&e.s.registers[x.r], x.v.ID, x.c, false, src.NoXPos)
2290 }
2291
2292 for _, spillID := range stacklive {
2293 v := e.s.orig[spillID]
2294 spill := e.s.values[v.ID].spill
2295 if !e.s.sdom.IsAncestorEq(spill.Block, e.p) {
2296
2297
2298
2299
2300
2301
2302
2303
2304 continue
2305 }
2306 e.set(e.s.f.getHome(spillID), v.ID, spill, false, src.NoXPos)
2307 }
2308
2309
2310 dsts := e.destinations[:0]
2311 for _, x := range dstReg {
2312 dsts = append(dsts, dstRecord{&e.s.registers[x.r], x.v.ID, nil, x.pos})
2313 }
2314
2315 for _, v := range e.b.Values {
2316 if v.Op != OpPhi {
2317 break
2318 }
2319 loc := e.s.f.getHome(v.ID)
2320 if loc == nil {
2321 continue
2322 }
2323 dsts = append(dsts, dstRecord{loc, v.Args[idx].ID, &v.Args[idx], v.Pos})
2324 }
2325 e.destinations = dsts
2326
2327 if e.s.f.pass.debug > regDebug {
2328 for _, vid := range e.cachedVals {
2329 a := e.cache[vid]
2330 for _, c := range a {
2331 fmt.Printf("src %s: v%d cache=%s\n", e.s.f.getHome(c.ID), vid, c)
2332 }
2333 }
2334 for _, d := range e.destinations {
2335 fmt.Printf("dst %s: v%d\n", d.loc, d.vid)
2336 }
2337 }
2338 }
2339
2340
2341 func (e *edgeState) process() {
2342 dsts := e.destinations
2343
2344
2345 for len(dsts) > 0 {
2346 i := 0
2347 for _, d := range dsts {
2348 if !e.processDest(d.loc, d.vid, d.splice, d.pos) {
2349
2350 dsts[i] = d
2351 i++
2352 }
2353 }
2354 if i < len(dsts) {
2355
2356 dsts = dsts[:i]
2357
2358
2359 dsts = append(dsts, e.extra...)
2360 e.extra = e.extra[:0]
2361 continue
2362 }
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386 d := dsts[0]
2387 loc := d.loc
2388 vid := e.contents[loc].vid
2389 c := e.contents[loc].c
2390 r := e.findRegFor(c.Type)
2391 if e.s.f.pass.debug > regDebug {
2392 fmt.Printf("breaking cycle with v%d in %s:%s\n", vid, loc, c)
2393 }
2394 e.erase(r)
2395 pos := d.pos.WithNotStmt()
2396 if _, isReg := loc.(*Register); isReg {
2397 c = e.p.NewValue1(pos, OpCopy, c.Type, c)
2398 } else {
2399 c = e.p.NewValue1(pos, OpLoadReg, c.Type, c)
2400 }
2401 e.set(r, vid, c, false, pos)
2402 if c.Op == OpLoadReg && e.s.isGReg(register(r.(*Register).num)) {
2403 e.s.f.Fatalf("process.OpLoadReg targeting g: " + c.LongString())
2404 }
2405 }
2406 }
2407
2408
2409
2410 func (e *edgeState) processDest(loc Location, vid ID, splice **Value, pos src.XPos) bool {
2411 pos = pos.WithNotStmt()
2412 occupant := e.contents[loc]
2413 if occupant.vid == vid {
2414
2415 e.contents[loc] = contentRecord{vid, occupant.c, true, pos}
2416 if splice != nil {
2417 (*splice).Uses--
2418 *splice = occupant.c
2419 occupant.c.Uses++
2420 }
2421
2422
2423
2424 if _, ok := e.s.copies[occupant.c]; ok {
2425
2426 e.s.copies[occupant.c] = true
2427 }
2428 return true
2429 }
2430
2431
2432 if len(e.cache[occupant.vid]) == 1 && !e.s.values[occupant.vid].rematerializeable {
2433
2434
2435 return false
2436 }
2437
2438
2439 v := e.s.orig[vid]
2440 var c *Value
2441 var src Location
2442 if e.s.f.pass.debug > regDebug {
2443 fmt.Printf("moving v%d to %s\n", vid, loc)
2444 fmt.Printf("sources of v%d:", vid)
2445 }
2446 if opcodeTable[v.Op].fixedReg {
2447 c = v
2448 src = e.s.f.getHome(v.ID)
2449 } else {
2450 for _, w := range e.cache[vid] {
2451 h := e.s.f.getHome(w.ID)
2452 if e.s.f.pass.debug > regDebug {
2453 fmt.Printf(" %s:%s", h, w)
2454 }
2455 _, isreg := h.(*Register)
2456 if src == nil || isreg {
2457 c = w
2458 src = h
2459 }
2460 }
2461 }
2462 if e.s.f.pass.debug > regDebug {
2463 if src != nil {
2464 fmt.Printf(" [use %s]\n", src)
2465 } else {
2466 fmt.Printf(" [no source]\n")
2467 }
2468 }
2469 _, dstReg := loc.(*Register)
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481 e.erase(loc)
2482 var x *Value
2483 if c == nil || e.s.values[vid].rematerializeable {
2484 if !e.s.values[vid].rematerializeable {
2485 e.s.f.Fatalf("can't find source for %s->%s: %s\n", e.p, e.b, v.LongString())
2486 }
2487 if dstReg {
2488 x = v.copyInto(e.p)
2489 } else {
2490
2491
2492 r := e.findRegFor(v.Type)
2493 e.erase(r)
2494 x = v.copyIntoWithXPos(e.p, pos)
2495 e.set(r, vid, x, false, pos)
2496
2497
2498
2499 x = e.p.NewValue1(pos, OpStoreReg, loc.(LocalSlot).Type, x)
2500 }
2501 } else {
2502
2503 _, srcReg := src.(*Register)
2504 if srcReg {
2505 if dstReg {
2506 x = e.p.NewValue1(pos, OpCopy, c.Type, c)
2507 } else {
2508 x = e.p.NewValue1(pos, OpStoreReg, loc.(LocalSlot).Type, c)
2509 }
2510 } else {
2511 if dstReg {
2512 x = e.p.NewValue1(pos, OpLoadReg, c.Type, c)
2513 } else {
2514
2515 r := e.findRegFor(c.Type)
2516 e.erase(r)
2517 t := e.p.NewValue1(pos, OpLoadReg, c.Type, c)
2518 e.set(r, vid, t, false, pos)
2519 x = e.p.NewValue1(pos, OpStoreReg, loc.(LocalSlot).Type, t)
2520 }
2521 }
2522 }
2523 e.set(loc, vid, x, true, pos)
2524 if x.Op == OpLoadReg && e.s.isGReg(register(loc.(*Register).num)) {
2525 e.s.f.Fatalf("processDest.OpLoadReg targeting g: " + x.LongString())
2526 }
2527 if splice != nil {
2528 (*splice).Uses--
2529 *splice = x
2530 x.Uses++
2531 }
2532 return true
2533 }
2534
2535
2536 func (e *edgeState) set(loc Location, vid ID, c *Value, final bool, pos src.XPos) {
2537 e.s.f.setHome(c, loc)
2538 e.contents[loc] = contentRecord{vid, c, final, pos}
2539 a := e.cache[vid]
2540 if len(a) == 0 {
2541 e.cachedVals = append(e.cachedVals, vid)
2542 }
2543 a = append(a, c)
2544 e.cache[vid] = a
2545 if r, ok := loc.(*Register); ok {
2546 if e.usedRegs&(regMask(1)<<uint(r.num)) != 0 {
2547 e.s.f.Fatalf("%v is already set (v%d/%v)", r, vid, c)
2548 }
2549 e.usedRegs |= regMask(1) << uint(r.num)
2550 if final {
2551 e.finalRegs |= regMask(1) << uint(r.num)
2552 }
2553 if len(a) == 1 {
2554 e.uniqueRegs |= regMask(1) << uint(r.num)
2555 }
2556 if len(a) == 2 {
2557 if t, ok := e.s.f.getHome(a[0].ID).(*Register); ok {
2558 e.uniqueRegs &^= regMask(1) << uint(t.num)
2559 }
2560 }
2561 if e.s.values[vid].rematerializeable {
2562 e.rematerializeableRegs |= regMask(1) << uint(r.num)
2563 }
2564 }
2565 if e.s.f.pass.debug > regDebug {
2566 fmt.Printf("%s\n", c.LongString())
2567 fmt.Printf("v%d now available in %s:%s\n", vid, loc, c)
2568 }
2569 }
2570
2571
2572 func (e *edgeState) erase(loc Location) {
2573 cr := e.contents[loc]
2574 if cr.c == nil {
2575 return
2576 }
2577 vid := cr.vid
2578
2579 if cr.final {
2580
2581
2582
2583 e.extra = append(e.extra, dstRecord{loc, cr.vid, nil, cr.pos})
2584 }
2585
2586
2587 a := e.cache[vid]
2588 for i, c := range a {
2589 if e.s.f.getHome(c.ID) == loc {
2590 if e.s.f.pass.debug > regDebug {
2591 fmt.Printf("v%d no longer available in %s:%s\n", vid, loc, c)
2592 }
2593 a[i], a = a[len(a)-1], a[:len(a)-1]
2594 break
2595 }
2596 }
2597 e.cache[vid] = a
2598
2599
2600 if r, ok := loc.(*Register); ok {
2601 e.usedRegs &^= regMask(1) << uint(r.num)
2602 if cr.final {
2603 e.finalRegs &^= regMask(1) << uint(r.num)
2604 }
2605 e.rematerializeableRegs &^= regMask(1) << uint(r.num)
2606 }
2607 if len(a) == 1 {
2608 if r, ok := e.s.f.getHome(a[0].ID).(*Register); ok {
2609 e.uniqueRegs |= regMask(1) << uint(r.num)
2610 }
2611 }
2612 }
2613
2614
2615 func (e *edgeState) findRegFor(typ *types.Type) Location {
2616
2617 types := &e.s.f.Config.Types
2618 m := e.s.compatRegs(typ)
2619
2620
2621
2622
2623
2624
2625 x := m &^ e.usedRegs
2626 if x != 0 {
2627 return &e.s.registers[pickReg(x)]
2628 }
2629 x = m &^ e.uniqueRegs &^ e.finalRegs
2630 if x != 0 {
2631 return &e.s.registers[pickReg(x)]
2632 }
2633 x = m &^ e.uniqueRegs
2634 if x != 0 {
2635 return &e.s.registers[pickReg(x)]
2636 }
2637 x = m & e.rematerializeableRegs
2638 if x != 0 {
2639 return &e.s.registers[pickReg(x)]
2640 }
2641
2642
2643
2644 for _, vid := range e.cachedVals {
2645 a := e.cache[vid]
2646 for _, c := range a {
2647 if r, ok := e.s.f.getHome(c.ID).(*Register); ok && m>>uint(r.num)&1 != 0 {
2648 if !c.rematerializeable() {
2649 x := e.p.NewValue1(c.Pos, OpStoreReg, c.Type, c)
2650
2651
2652
2653 t := LocalSlot{N: e.s.f.NewLocal(c.Pos, types.Int64), Type: types.Int64}
2654
2655 e.set(t, vid, x, false, c.Pos)
2656 if e.s.f.pass.debug > regDebug {
2657 fmt.Printf(" SPILL %s->%s %s\n", r, t, x.LongString())
2658 }
2659 }
2660
2661
2662
2663 return r
2664 }
2665 }
2666 }
2667
2668 fmt.Printf("m:%d unique:%d final:%d rematerializable:%d\n", m, e.uniqueRegs, e.finalRegs, e.rematerializeableRegs)
2669 for _, vid := range e.cachedVals {
2670 a := e.cache[vid]
2671 for _, c := range a {
2672 fmt.Printf("v%d: %s %s\n", vid, c, e.s.f.getHome(c.ID))
2673 }
2674 }
2675 e.s.f.Fatalf("can't find empty register on edge %s->%s", e.p, e.b)
2676 return nil
2677 }
2678
2679
2680
2681 func (v *Value) rematerializeable() bool {
2682 if !opcodeTable[v.Op].rematerializeable {
2683 return false
2684 }
2685 for _, a := range v.Args {
2686
2687
2688 if !opcodeTable[a.Op].fixedReg {
2689 return false
2690 }
2691 }
2692 return true
2693 }
2694
2695 type liveInfo struct {
2696 ID ID
2697 dist int32
2698 pos src.XPos
2699 }
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709 func (s *regAllocState) computeLive() {
2710 f := s.f
2711 s.live = make([][]liveInfo, f.NumBlocks())
2712 s.desired = make([]desiredState, f.NumBlocks())
2713 var phis []*Value
2714
2715 live := f.newSparseMapPos(f.NumValues())
2716 defer f.retSparseMapPos(live)
2717 t := f.newSparseMapPos(f.NumValues())
2718 defer f.retSparseMapPos(t)
2719
2720
2721 var desired desiredState
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732 po := f.postorder()
2733 s.loopnest = f.loopnest()
2734 s.loopnest.calculateDepths()
2735 for {
2736 changed := false
2737
2738 for _, b := range po {
2739
2740
2741
2742 live.clear()
2743 for _, e := range s.live[b.ID] {
2744 live.set(e.ID, e.dist+int32(len(b.Values)), e.pos)
2745 }
2746
2747
2748 for _, c := range b.ControlValues() {
2749 if s.values[c.ID].needReg {
2750 live.set(c.ID, int32(len(b.Values)), b.Pos)
2751 }
2752 }
2753
2754
2755
2756 phis = phis[:0]
2757 for i := len(b.Values) - 1; i >= 0; i-- {
2758 v := b.Values[i]
2759 live.remove(v.ID)
2760 if v.Op == OpPhi {
2761
2762 phis = append(phis, v)
2763 continue
2764 }
2765 if opcodeTable[v.Op].call {
2766 c := live.contents()
2767 for i := range c {
2768 c[i].val += unlikelyDistance
2769 }
2770 }
2771 for _, a := range v.Args {
2772 if s.values[a.ID].needReg {
2773 live.set(a.ID, int32(i), v.Pos)
2774 }
2775 }
2776 }
2777
2778 desired.copy(&s.desired[b.ID])
2779 for i := len(b.Values) - 1; i >= 0; i-- {
2780 v := b.Values[i]
2781 prefs := desired.remove(v.ID)
2782 if v.Op == OpPhi {
2783
2784
2785
2786 continue
2787 }
2788 regspec := s.regspec(v)
2789
2790 desired.clobber(regspec.clobbers)
2791
2792 for _, j := range regspec.inputs {
2793 if countRegs(j.regs) != 1 {
2794 continue
2795 }
2796 desired.clobber(j.regs)
2797 desired.add(v.Args[j.idx].ID, pickReg(j.regs))
2798 }
2799
2800 if opcodeTable[v.Op].resultInArg0 || v.Op == OpAMD64ADDQconst || v.Op == OpAMD64ADDLconst || v.Op == OpSelect0 {
2801
2802
2803
2804
2805
2806 if opcodeTable[v.Op].commutative {
2807 desired.addList(v.Args[1].ID, prefs)
2808 }
2809 desired.addList(v.Args[0].ID, prefs)
2810 }
2811 }
2812
2813
2814
2815 for i, e := range b.Preds {
2816 p := e.b
2817
2818
2819
2820 delta := int32(normalDistance)
2821 if len(p.Succs) == 2 {
2822 if p.Succs[0].b == b && p.Likely == BranchLikely ||
2823 p.Succs[1].b == b && p.Likely == BranchUnlikely {
2824 delta = likelyDistance
2825 }
2826 if p.Succs[0].b == b && p.Likely == BranchUnlikely ||
2827 p.Succs[1].b == b && p.Likely == BranchLikely {
2828 delta = unlikelyDistance
2829 }
2830 }
2831
2832
2833 s.desired[p.ID].merge(&desired)
2834
2835
2836 t.clear()
2837 for _, e := range s.live[p.ID] {
2838 t.set(e.ID, e.dist, e.pos)
2839 }
2840 update := false
2841
2842
2843 for _, e := range live.contents() {
2844 d := e.val + delta
2845 if !t.contains(e.key) || d < t.get(e.key) {
2846 update = true
2847 t.set(e.key, d, e.pos)
2848 }
2849 }
2850
2851
2852
2853 for _, v := range phis {
2854 id := v.Args[i].ID
2855 if s.values[id].needReg && (!t.contains(id) || delta < t.get(id)) {
2856 update = true
2857 t.set(id, delta, v.Pos)
2858 }
2859 }
2860
2861 if !update {
2862 continue
2863 }
2864
2865 l := s.live[p.ID][:0]
2866 if cap(l) < t.size() {
2867 l = make([]liveInfo, 0, t.size())
2868 }
2869 for _, e := range t.contents() {
2870 l = append(l, liveInfo{e.key, e.val, e.pos})
2871 }
2872 s.live[p.ID] = l
2873 changed = true
2874 }
2875 }
2876
2877 if !changed {
2878 break
2879 }
2880 }
2881 if f.pass.debug > regDebug {
2882 fmt.Println("live values at end of each block")
2883 for _, b := range f.Blocks {
2884 fmt.Printf(" %s:", b)
2885 for _, x := range s.live[b.ID] {
2886 fmt.Printf(" v%d(%d)", x.ID, x.dist)
2887 for _, e := range s.desired[b.ID].entries {
2888 if e.ID != x.ID {
2889 continue
2890 }
2891 fmt.Printf("[")
2892 first := true
2893 for _, r := range e.regs {
2894 if r == noRegister {
2895 continue
2896 }
2897 if !first {
2898 fmt.Printf(",")
2899 }
2900 fmt.Print(&s.registers[r])
2901 first = false
2902 }
2903 fmt.Printf("]")
2904 }
2905 }
2906 if avoid := s.desired[b.ID].avoid; avoid != 0 {
2907 fmt.Printf(" avoid=%v", s.RegMaskString(avoid))
2908 }
2909 fmt.Println()
2910 }
2911 }
2912 }
2913
2914
2915 type desiredState struct {
2916
2917
2918 entries []desiredStateEntry
2919
2920
2921
2922
2923 avoid regMask
2924 }
2925 type desiredStateEntry struct {
2926
2927 ID ID
2928
2929
2930
2931
2932
2933 regs [4]register
2934 }
2935
2936 func (d *desiredState) clear() {
2937 d.entries = d.entries[:0]
2938 d.avoid = 0
2939 }
2940
2941
2942 func (d *desiredState) get(vid ID) [4]register {
2943 for _, e := range d.entries {
2944 if e.ID == vid {
2945 return e.regs
2946 }
2947 }
2948 return [4]register{noRegister, noRegister, noRegister, noRegister}
2949 }
2950
2951
2952 func (d *desiredState) add(vid ID, r register) {
2953 d.avoid |= regMask(1) << r
2954 for i := range d.entries {
2955 e := &d.entries[i]
2956 if e.ID != vid {
2957 continue
2958 }
2959 if e.regs[0] == r {
2960
2961 return
2962 }
2963 for j := 1; j < len(e.regs); j++ {
2964 if e.regs[j] == r {
2965
2966 copy(e.regs[1:], e.regs[:j])
2967 e.regs[0] = r
2968 return
2969 }
2970 }
2971 copy(e.regs[1:], e.regs[:])
2972 e.regs[0] = r
2973 return
2974 }
2975 d.entries = append(d.entries, desiredStateEntry{vid, [4]register{r, noRegister, noRegister, noRegister}})
2976 }
2977
2978 func (d *desiredState) addList(vid ID, regs [4]register) {
2979
2980 for i := len(regs) - 1; i >= 0; i-- {
2981 r := regs[i]
2982 if r != noRegister {
2983 d.add(vid, r)
2984 }
2985 }
2986 }
2987
2988
2989 func (d *desiredState) clobber(m regMask) {
2990 for i := 0; i < len(d.entries); {
2991 e := &d.entries[i]
2992 j := 0
2993 for _, r := range e.regs {
2994 if r != noRegister && m>>r&1 == 0 {
2995 e.regs[j] = r
2996 j++
2997 }
2998 }
2999 if j == 0 {
3000
3001 d.entries[i] = d.entries[len(d.entries)-1]
3002 d.entries = d.entries[:len(d.entries)-1]
3003 continue
3004 }
3005 for ; j < len(e.regs); j++ {
3006 e.regs[j] = noRegister
3007 }
3008 i++
3009 }
3010 d.avoid &^= m
3011 }
3012
3013
3014 func (d *desiredState) copy(x *desiredState) {
3015 d.entries = append(d.entries[:0], x.entries...)
3016 d.avoid = x.avoid
3017 }
3018
3019
3020 func (d *desiredState) remove(vid ID) [4]register {
3021 for i := range d.entries {
3022 if d.entries[i].ID == vid {
3023 regs := d.entries[i].regs
3024 d.entries[i] = d.entries[len(d.entries)-1]
3025 d.entries = d.entries[:len(d.entries)-1]
3026 return regs
3027 }
3028 }
3029 return [4]register{noRegister, noRegister, noRegister, noRegister}
3030 }
3031
3032
3033 func (d *desiredState) merge(x *desiredState) {
3034 d.avoid |= x.avoid
3035
3036
3037 for _, e := range x.entries {
3038 d.addList(e.ID, e.regs)
3039 }
3040 }
3041
View as plain text