1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114 package ssa
115
116 import (
117 "cmd/compile/internal/base"
118 "cmd/compile/internal/ir"
119 "cmd/compile/internal/types"
120 "cmd/internal/src"
121 "cmd/internal/sys"
122 "fmt"
123 "internal/buildcfg"
124 "math"
125 "math/bits"
126 "unsafe"
127 )
128
129 const (
130 moveSpills = iota
131 logSpills
132 regDebug
133 stackDebug
134 )
135
136
137
138 const (
139 likelyDistance = 1
140 normalDistance = 10
141 unlikelyDistance = 100
142 )
143
144
145
146 func regalloc(f *Func) {
147 var s regAllocState
148 s.init(f)
149 s.regalloc(f)
150 s.close()
151 }
152
153 type register uint8
154
155 const noRegister register = 255
156
157
158 var noRegisters [32]register = [32]register{
159 noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister,
160 noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister,
161 noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister,
162 noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister,
163 }
164
165
166
167 type regMask uint64
168
169 func (m regMask) String() string {
170 s := ""
171 for r := register(0); m != 0; r++ {
172 if m>>r&1 == 0 {
173 continue
174 }
175 m &^= regMask(1) << r
176 if s != "" {
177 s += " "
178 }
179 s += fmt.Sprintf("r%d", r)
180 }
181 return s
182 }
183
184 func (s *regAllocState) RegMaskString(m regMask) string {
185 str := ""
186 for r := register(0); m != 0; r++ {
187 if m>>r&1 == 0 {
188 continue
189 }
190 m &^= regMask(1) << r
191 if str != "" {
192 str += " "
193 }
194 str += s.registers[r].String()
195 }
196 return str
197 }
198
199
200 func countRegs(r regMask) int {
201 return bits.OnesCount64(uint64(r))
202 }
203
204
205 func pickReg(r regMask) register {
206 if r == 0 {
207 panic("can't pick a register from an empty set")
208 }
209
210 return register(bits.TrailingZeros64(uint64(r)))
211 }
212
213 type use struct {
214
215
216
217
218
219 dist int32
220 pos src.XPos
221 next *use
222 }
223
224
225 type valState struct {
226 regs regMask
227 uses *use
228 spill *Value
229 restoreMin int32
230 restoreMax int32
231 needReg bool
232 rematerializeable bool
233 }
234
235 type regState struct {
236 v *Value
237 c *Value
238
239 }
240
241 type regAllocState struct {
242 f *Func
243
244 sdom SparseTree
245 registers []Register
246 numRegs register
247 SPReg register
248 SBReg register
249 GReg register
250 allocatable regMask
251
252
253
254
255 live [][]liveInfo
256
257
258
259
260 desired []desiredState
261
262
263 values []valState
264
265
266 sp, sb ID
267
268
269
270 orig []*Value
271
272
273 regs []regState
274
275
276 nospill regMask
277
278
279 used regMask
280
281
282 usedSinceBlockStart regMask
283
284
285 tmpused regMask
286
287
288 curBlock *Block
289
290
291 freeUseRecords *use
292
293
294
295 endRegs [][]endReg
296
297
298
299 startRegs [][]startReg
300
301
302
303
304 startRegsMask regMask
305
306
307 spillLive [][]ID
308
309
310
311 copies map[*Value]bool
312
313 loopnest *loopnest
314
315
316 visitOrder []*Block
317
318
319 blockOrder []int32
320
321
322 doClobber bool
323
324
325
326
327
328
329 nextCall []int32
330
331
332
333 curIdx int
334 }
335
336 type endReg struct {
337 r register
338 v *Value
339 c *Value
340 }
341
342 type startReg struct {
343 r register
344 v *Value
345 c *Value
346 pos src.XPos
347 }
348
349
350 func (s *regAllocState) freeReg(r register) {
351 v := s.regs[r].v
352 if v == nil {
353 s.f.Fatalf("tried to free an already free register %d\n", r)
354 }
355
356
357 if s.f.pass.debug > regDebug {
358 fmt.Printf("freeReg %s (dump %s/%s)\n", &s.registers[r], v, s.regs[r].c)
359 }
360 s.regs[r] = regState{}
361 s.values[v.ID].regs &^= regMask(1) << r
362 s.used &^= regMask(1) << r
363 }
364
365
366 func (s *regAllocState) freeRegs(m regMask) {
367 for m&s.used != 0 {
368 s.freeReg(pickReg(m & s.used))
369 }
370 }
371
372
373 func (s *regAllocState) clobberRegs(m regMask) {
374 m &= s.allocatable & s.f.Config.gpRegMask
375 for m != 0 {
376 r := pickReg(m)
377 m &^= 1 << r
378 x := s.curBlock.NewValue0(src.NoXPos, OpClobberReg, types.TypeVoid)
379 s.f.setHome(x, &s.registers[r])
380 }
381 }
382
383
384
385 func (s *regAllocState) setOrig(c *Value, v *Value) {
386 if int(c.ID) >= cap(s.orig) {
387 x := s.f.Cache.allocValueSlice(int(c.ID) + 1)
388 copy(x, s.orig)
389 s.f.Cache.freeValueSlice(s.orig)
390 s.orig = x
391 }
392 for int(c.ID) >= len(s.orig) {
393 s.orig = append(s.orig, nil)
394 }
395 if s.orig[c.ID] != nil {
396 s.f.Fatalf("orig value set twice %s %s", c, v)
397 }
398 s.orig[c.ID] = s.orig[v.ID]
399 }
400
401
402
403 func (s *regAllocState) assignReg(r register, v *Value, c *Value) {
404 if s.f.pass.debug > regDebug {
405 fmt.Printf("assignReg %s %s/%s\n", &s.registers[r], v, c)
406 }
407 if s.regs[r].v != nil {
408 s.f.Fatalf("tried to assign register %d to %s/%s but it is already used by %s", r, v, c, s.regs[r].v)
409 }
410
411
412 s.regs[r] = regState{v, c}
413 s.values[v.ID].regs |= regMask(1) << r
414 s.used |= regMask(1) << r
415 s.f.setHome(c, &s.registers[r])
416 }
417
418
419
420
421 func (s *regAllocState) allocReg(mask regMask, v *Value) register {
422 if v.OnWasmStack {
423 return noRegister
424 }
425
426 mask &= s.allocatable
427 mask &^= s.nospill
428 if mask == 0 {
429 s.f.Fatalf("no register available for %s", v.LongString())
430 }
431
432
433 if mask&^s.used != 0 {
434 r := pickReg(mask &^ s.used)
435 s.usedSinceBlockStart |= regMask(1) << r
436 return r
437 }
438
439
440
441
442
443
444
445
446
447
448
449 var r register
450 maxuse := int32(-1)
451 for t := register(0); t < s.numRegs; t++ {
452 if mask>>t&1 == 0 {
453 continue
454 }
455 v := s.regs[t].v
456 if n := s.values[v.ID].uses.dist; n > maxuse {
457
458
459 r = t
460 maxuse = n
461 }
462 }
463 if maxuse == -1 {
464 s.f.Fatalf("couldn't find register to spill")
465 }
466
467 if s.f.Config.ctxt.Arch.Arch == sys.ArchWasm {
468
469
470
471 s.freeReg(r)
472 return r
473 }
474
475
476
477 v2 := s.regs[r].v
478 m := s.compatRegs(v2.Type) &^ s.used &^ s.tmpused &^ (regMask(1) << r)
479 if m != 0 && !s.values[v2.ID].rematerializeable && countRegs(s.values[v2.ID].regs) == 1 {
480 s.usedSinceBlockStart |= regMask(1) << r
481 r2 := pickReg(m)
482 c := s.curBlock.NewValue1(v2.Pos, OpCopy, v2.Type, s.regs[r].c)
483 s.copies[c] = false
484 if s.f.pass.debug > regDebug {
485 fmt.Printf("copy %s to %s : %s\n", v2, c, &s.registers[r2])
486 }
487 s.setOrig(c, v2)
488 s.assignReg(r2, v2, c)
489 }
490
491
492
493
494 if s.usedSinceBlockStart&(regMask(1)<<r) == 0 {
495 if s.startRegsMask&(regMask(1)<<r) == 1 {
496 if s.f.pass.debug > regDebug {
497 fmt.Printf("dropped from startRegs: %s\n", &s.registers[r])
498 }
499 s.startRegsMask &^= regMask(1) << r
500 }
501 }
502
503 s.freeReg(r)
504 s.usedSinceBlockStart |= regMask(1) << r
505 return r
506 }
507
508
509
510 func (s *regAllocState) makeSpill(v *Value, b *Block) *Value {
511 vi := &s.values[v.ID]
512 if vi.spill != nil {
513
514 vi.restoreMin = min(vi.restoreMin, s.sdom[b.ID].entry)
515 vi.restoreMax = max(vi.restoreMax, s.sdom[b.ID].exit)
516 return vi.spill
517 }
518
519
520 spill := s.f.newValueNoBlock(OpStoreReg, v.Type, v.Pos)
521
522
523 s.setOrig(spill, v)
524 vi.spill = spill
525 vi.restoreMin = s.sdom[b.ID].entry
526 vi.restoreMax = s.sdom[b.ID].exit
527 return spill
528 }
529
530
531
532
533
534
535
536 func (s *regAllocState) allocValToReg(v *Value, mask regMask, nospill bool, pos src.XPos) *Value {
537 if s.f.Config.ctxt.Arch.Arch == sys.ArchWasm && v.rematerializeable() {
538 c := v.copyIntoWithXPos(s.curBlock, pos)
539 c.OnWasmStack = true
540 s.setOrig(c, v)
541 return c
542 }
543 if v.OnWasmStack {
544 return v
545 }
546
547 vi := &s.values[v.ID]
548 pos = pos.WithNotStmt()
549
550 if mask&vi.regs != 0 {
551 r := pickReg(mask & vi.regs)
552 if s.regs[r].v != v || s.regs[r].c == nil {
553 panic("bad register state")
554 }
555 if nospill {
556 s.nospill |= regMask(1) << r
557 }
558 s.usedSinceBlockStart |= regMask(1) << r
559 return s.regs[r].c
560 }
561
562 var r register
563
564 onWasmStack := nospill && s.f.Config.ctxt.Arch.Arch == sys.ArchWasm
565 if !onWasmStack {
566
567 r = s.allocReg(mask, v)
568 }
569
570
571 var c *Value
572 if vi.regs != 0 {
573
574 r2 := pickReg(vi.regs)
575 if s.regs[r2].v != v {
576 panic("bad register state")
577 }
578 s.usedSinceBlockStart |= regMask(1) << r2
579 c = s.curBlock.NewValue1(pos, OpCopy, v.Type, s.regs[r2].c)
580 } else if v.rematerializeable() {
581
582 c = v.copyIntoWithXPos(s.curBlock, pos)
583 } else {
584
585 spill := s.makeSpill(v, s.curBlock)
586 if s.f.pass.debug > logSpills {
587 s.f.Warnl(vi.spill.Pos, "load spill for %v from %v", v, spill)
588 }
589 c = s.curBlock.NewValue1(pos, OpLoadReg, v.Type, spill)
590 }
591
592 s.setOrig(c, v)
593
594 if onWasmStack {
595 c.OnWasmStack = true
596 return c
597 }
598
599 s.assignReg(r, v, c)
600 if c.Op == OpLoadReg && s.isGReg(r) {
601 s.f.Fatalf("allocValToReg.OpLoadReg targeting g: " + c.LongString())
602 }
603 if nospill {
604 s.nospill |= regMask(1) << r
605 }
606 return c
607 }
608
609
610 func isLeaf(f *Func) bool {
611 for _, b := range f.Blocks {
612 for _, v := range b.Values {
613 if v.Op.IsCall() && !v.Op.IsTailCall() {
614
615 return false
616 }
617 }
618 }
619 return true
620 }
621
622
623 func (v *Value) needRegister() bool {
624 return !v.Type.IsMemory() && !v.Type.IsVoid() && !v.Type.IsFlags() && !v.Type.IsTuple()
625 }
626
627 func (s *regAllocState) init(f *Func) {
628 s.f = f
629 s.f.RegAlloc = s.f.Cache.locs[:0]
630 s.registers = f.Config.registers
631 if nr := len(s.registers); nr == 0 || nr > int(noRegister) || nr > int(unsafe.Sizeof(regMask(0))*8) {
632 s.f.Fatalf("bad number of registers: %d", nr)
633 } else {
634 s.numRegs = register(nr)
635 }
636
637 s.SPReg = noRegister
638 s.SBReg = noRegister
639 s.GReg = noRegister
640 for r := register(0); r < s.numRegs; r++ {
641 switch s.registers[r].String() {
642 case "SP":
643 s.SPReg = r
644 case "SB":
645 s.SBReg = r
646 case "g":
647 s.GReg = r
648 }
649 }
650
651 switch noRegister {
652 case s.SPReg:
653 s.f.Fatalf("no SP register found")
654 case s.SBReg:
655 s.f.Fatalf("no SB register found")
656 case s.GReg:
657 if f.Config.hasGReg {
658 s.f.Fatalf("no g register found")
659 }
660 }
661
662
663 s.allocatable = s.f.Config.gpRegMask | s.f.Config.fpRegMask | s.f.Config.specialRegMask
664 s.allocatable &^= 1 << s.SPReg
665 s.allocatable &^= 1 << s.SBReg
666 if s.f.Config.hasGReg {
667 s.allocatable &^= 1 << s.GReg
668 }
669 if buildcfg.FramePointerEnabled && s.f.Config.FPReg >= 0 {
670 s.allocatable &^= 1 << uint(s.f.Config.FPReg)
671 }
672 if s.f.Config.LinkReg != -1 {
673 if isLeaf(f) {
674
675 s.allocatable &^= 1 << uint(s.f.Config.LinkReg)
676 }
677 }
678 if s.f.Config.ctxt.Flag_dynlink {
679 switch s.f.Config.arch {
680 case "386":
681
682
683
684
685
686 case "amd64":
687 s.allocatable &^= 1 << 15
688 case "arm":
689 s.allocatable &^= 1 << 9
690 case "arm64":
691
692 case "loong64":
693
694 case "ppc64le":
695
696 case "riscv64":
697
698 case "s390x":
699 s.allocatable &^= 1 << 11
700 default:
701 s.f.fe.Fatalf(src.NoXPos, "arch %s not implemented", s.f.Config.arch)
702 }
703 }
704
705
706
707
708 s.visitOrder = layoutRegallocOrder(f)
709
710
711
712 s.blockOrder = make([]int32, f.NumBlocks())
713 for i, b := range s.visitOrder {
714 s.blockOrder[b.ID] = int32(i)
715 }
716
717 s.regs = make([]regState, s.numRegs)
718 nv := f.NumValues()
719 if cap(s.f.Cache.regallocValues) >= nv {
720 s.f.Cache.regallocValues = s.f.Cache.regallocValues[:nv]
721 } else {
722 s.f.Cache.regallocValues = make([]valState, nv)
723 }
724 s.values = s.f.Cache.regallocValues
725 s.orig = s.f.Cache.allocValueSlice(nv)
726 s.copies = make(map[*Value]bool)
727 for _, b := range s.visitOrder {
728 for _, v := range b.Values {
729 if v.needRegister() {
730 s.values[v.ID].needReg = true
731 s.values[v.ID].rematerializeable = v.rematerializeable()
732 s.orig[v.ID] = v
733 }
734
735
736 }
737 }
738 s.computeLive()
739
740 s.endRegs = make([][]endReg, f.NumBlocks())
741 s.startRegs = make([][]startReg, f.NumBlocks())
742 s.spillLive = make([][]ID, f.NumBlocks())
743 s.sdom = f.Sdom()
744
745
746 if f.Config.ctxt.Arch.Arch == sys.ArchWasm {
747 canLiveOnStack := f.newSparseSet(f.NumValues())
748 defer f.retSparseSet(canLiveOnStack)
749 for _, b := range f.Blocks {
750
751 canLiveOnStack.clear()
752 for _, c := range b.ControlValues() {
753 if c.Uses == 1 && !opcodeTable[c.Op].generic {
754 canLiveOnStack.add(c.ID)
755 }
756 }
757
758 for i := len(b.Values) - 1; i >= 0; i-- {
759 v := b.Values[i]
760 if canLiveOnStack.contains(v.ID) {
761 v.OnWasmStack = true
762 } else {
763
764 canLiveOnStack.clear()
765 }
766 for _, arg := range v.Args {
767
768
769
770
771
772 if arg.Uses == 1 && arg.Block == v.Block && !arg.Type.IsMemory() && !opcodeTable[arg.Op].generic {
773 canLiveOnStack.add(arg.ID)
774 }
775 }
776 }
777 }
778 }
779
780
781
782
783 if base.Flag.ClobberDeadReg && len(s.f.Blocks) <= 10000 {
784
785 s.doClobber = true
786 }
787 }
788
789 func (s *regAllocState) close() {
790 s.f.Cache.freeValueSlice(s.orig)
791 }
792
793
794
795 func (s *regAllocState) addUse(id ID, dist int32, pos src.XPos) {
796 r := s.freeUseRecords
797 if r != nil {
798 s.freeUseRecords = r.next
799 } else {
800 r = &use{}
801 }
802 r.dist = dist
803 r.pos = pos
804 r.next = s.values[id].uses
805 s.values[id].uses = r
806 if r.next != nil && dist > r.next.dist {
807 s.f.Fatalf("uses added in wrong order")
808 }
809 }
810
811
812
813 func (s *regAllocState) advanceUses(v *Value) {
814 for _, a := range v.Args {
815 if !s.values[a.ID].needReg {
816 continue
817 }
818 ai := &s.values[a.ID]
819 r := ai.uses
820 ai.uses = r.next
821 if r.next == nil || (a.Op != OpSP && a.Op != OpSB && r.next.dist > s.nextCall[s.curIdx]) {
822
823 s.freeRegs(ai.regs)
824 }
825 r.next = s.freeUseRecords
826 s.freeUseRecords = r
827 }
828 s.dropIfUnused(v)
829 }
830
831
832
833 func (s *regAllocState) dropIfUnused(v *Value) {
834 if !s.values[v.ID].needReg {
835 return
836 }
837 vi := &s.values[v.ID]
838 r := vi.uses
839 if r == nil || (v.Op != OpSP && v.Op != OpSB && r.dist > s.nextCall[s.curIdx]) {
840 s.freeRegs(vi.regs)
841 }
842 }
843
844
845
846
847 func (s *regAllocState) liveAfterCurrentInstruction(v *Value) bool {
848 u := s.values[v.ID].uses
849 if u == nil {
850 panic(fmt.Errorf("u is nil, v = %s, s.values[v.ID] = %v", v.LongString(), s.values[v.ID]))
851 }
852 d := u.dist
853 for u != nil && u.dist == d {
854 u = u.next
855 }
856 return u != nil && u.dist > d
857 }
858
859
860 func (s *regAllocState) setState(regs []endReg) {
861 s.freeRegs(s.used)
862 for _, x := range regs {
863 s.assignReg(x.r, x.v, x.c)
864 }
865 }
866
867
868 func (s *regAllocState) compatRegs(t *types.Type) regMask {
869 var m regMask
870 if t.IsTuple() || t.IsFlags() {
871 return 0
872 }
873 if t.IsFloat() || t == types.TypeInt128 {
874 if t.Kind() == types.TFLOAT32 && s.f.Config.fp32RegMask != 0 {
875 m = s.f.Config.fp32RegMask
876 } else if t.Kind() == types.TFLOAT64 && s.f.Config.fp64RegMask != 0 {
877 m = s.f.Config.fp64RegMask
878 } else {
879 m = s.f.Config.fpRegMask
880 }
881 } else {
882 m = s.f.Config.gpRegMask
883 }
884 return m & s.allocatable
885 }
886
887
888 func (s *regAllocState) regspec(v *Value) regInfo {
889 op := v.Op
890 if op == OpConvert {
891
892
893
894 m := s.allocatable & s.f.Config.gpRegMask
895 return regInfo{inputs: []inputInfo{{regs: m}}, outputs: []outputInfo{{regs: m}}}
896 }
897 if op == OpArgIntReg {
898 reg := v.Block.Func.Config.intParamRegs[v.AuxInt8()]
899 return regInfo{outputs: []outputInfo{{regs: 1 << uint(reg)}}}
900 }
901 if op == OpArgFloatReg {
902 reg := v.Block.Func.Config.floatParamRegs[v.AuxInt8()]
903 return regInfo{outputs: []outputInfo{{regs: 1 << uint(reg)}}}
904 }
905 if op.IsCall() {
906 if ac, ok := v.Aux.(*AuxCall); ok && ac.reg != nil {
907 return *ac.Reg(&opcodeTable[op].reg, s.f.Config)
908 }
909 }
910 if op == OpMakeResult && s.f.OwnAux.reg != nil {
911 return *s.f.OwnAux.ResultReg(s.f.Config)
912 }
913 return opcodeTable[op].reg
914 }
915
916 func (s *regAllocState) isGReg(r register) bool {
917 return s.f.Config.hasGReg && s.GReg == r
918 }
919
920
921 var tmpVal Value
922
923 func (s *regAllocState) regalloc(f *Func) {
924 regValLiveSet := f.newSparseSet(f.NumValues())
925 defer f.retSparseSet(regValLiveSet)
926 var oldSched []*Value
927 var phis []*Value
928 var phiRegs []register
929 var args []*Value
930
931
932 var desired desiredState
933
934
935 type dentry struct {
936 out [4]register
937 in [3][4]register
938 }
939 var dinfo []dentry
940
941 if f.Entry != f.Blocks[0] {
942 f.Fatalf("entry block must be first")
943 }
944
945 for _, b := range s.visitOrder {
946 if s.f.pass.debug > regDebug {
947 fmt.Printf("Begin processing block %v\n", b)
948 }
949 s.curBlock = b
950 s.startRegsMask = 0
951 s.usedSinceBlockStart = 0
952
953
954
955 regValLiveSet.clear()
956 for _, e := range s.live[b.ID] {
957 s.addUse(e.ID, int32(len(b.Values))+e.dist, e.pos)
958 regValLiveSet.add(e.ID)
959 }
960 for _, v := range b.ControlValues() {
961 if s.values[v.ID].needReg {
962 s.addUse(v.ID, int32(len(b.Values)), b.Pos)
963 regValLiveSet.add(v.ID)
964 }
965 }
966 if len(s.nextCall) < len(b.Values) {
967 s.nextCall = append(s.nextCall, make([]int32, len(b.Values)-len(s.nextCall))...)
968 }
969 var nextCall int32 = math.MaxInt32
970 for i := len(b.Values) - 1; i >= 0; i-- {
971 v := b.Values[i]
972 regValLiveSet.remove(v.ID)
973 if v.Op == OpPhi {
974
975
976
977 s.nextCall[i] = nextCall
978 continue
979 }
980 if opcodeTable[v.Op].call {
981
982 regValLiveSet.clear()
983 if s.sp != 0 && s.values[s.sp].uses != nil {
984 regValLiveSet.add(s.sp)
985 }
986 if s.sb != 0 && s.values[s.sb].uses != nil {
987 regValLiveSet.add(s.sb)
988 }
989 nextCall = int32(i)
990 }
991 for _, a := range v.Args {
992 if !s.values[a.ID].needReg {
993 continue
994 }
995 s.addUse(a.ID, int32(i), v.Pos)
996 regValLiveSet.add(a.ID)
997 }
998 s.nextCall[i] = nextCall
999 }
1000 if s.f.pass.debug > regDebug {
1001 fmt.Printf("use distances for %s\n", b)
1002 for i := range s.values {
1003 vi := &s.values[i]
1004 u := vi.uses
1005 if u == nil {
1006 continue
1007 }
1008 fmt.Printf(" v%d:", i)
1009 for u != nil {
1010 fmt.Printf(" %d", u.dist)
1011 u = u.next
1012 }
1013 fmt.Println()
1014 }
1015 }
1016
1017
1018
1019 nphi := 0
1020 for _, v := range b.Values {
1021 if v.Op != OpPhi {
1022 break
1023 }
1024 nphi++
1025 }
1026 phis = append(phis[:0], b.Values[:nphi]...)
1027 oldSched = append(oldSched[:0], b.Values[nphi:]...)
1028 b.Values = b.Values[:0]
1029
1030
1031 if b == f.Entry {
1032
1033 if nphi > 0 {
1034 f.Fatalf("phis in entry block")
1035 }
1036 } else if len(b.Preds) == 1 {
1037
1038 s.setState(s.endRegs[b.Preds[0].b.ID])
1039 if nphi > 0 {
1040 f.Fatalf("phis in single-predecessor block")
1041 }
1042
1043
1044
1045 for r := register(0); r < s.numRegs; r++ {
1046 v := s.regs[r].v
1047 if v != nil && !regValLiveSet.contains(v.ID) {
1048 s.freeReg(r)
1049 }
1050 }
1051 } else {
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065 idx := -1
1066 for i, p := range b.Preds {
1067
1068
1069 pb := p.b
1070 if s.blockOrder[pb.ID] >= s.blockOrder[b.ID] {
1071 continue
1072 }
1073 if idx == -1 {
1074 idx = i
1075 continue
1076 }
1077 pSel := b.Preds[idx].b
1078 if len(s.spillLive[pb.ID]) < len(s.spillLive[pSel.ID]) {
1079 idx = i
1080 } else if len(s.spillLive[pb.ID]) == len(s.spillLive[pSel.ID]) {
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091 if pb.likelyBranch() && !pSel.likelyBranch() || s.blockOrder[pb.ID] < s.blockOrder[pSel.ID] {
1092 idx = i
1093 }
1094 }
1095 }
1096 if idx < 0 {
1097 f.Fatalf("bad visitOrder, no predecessor of %s has been visited before it", b)
1098 }
1099 p := b.Preds[idx].b
1100 s.setState(s.endRegs[p.ID])
1101
1102 if s.f.pass.debug > regDebug {
1103 fmt.Printf("starting merge block %s with end state of %s:\n", b, p)
1104 for _, x := range s.endRegs[p.ID] {
1105 fmt.Printf(" %s: orig:%s cache:%s\n", &s.registers[x.r], x.v, x.c)
1106 }
1107 }
1108
1109
1110
1111
1112
1113 phiRegs = phiRegs[:0]
1114 var phiUsed regMask
1115
1116 for _, v := range phis {
1117 if !s.values[v.ID].needReg {
1118 phiRegs = append(phiRegs, noRegister)
1119 continue
1120 }
1121 a := v.Args[idx]
1122
1123
1124 m := s.values[a.ID].regs &^ phiUsed & s.allocatable
1125 if m != 0 {
1126 r := pickReg(m)
1127 phiUsed |= regMask(1) << r
1128 phiRegs = append(phiRegs, r)
1129 } else {
1130 phiRegs = append(phiRegs, noRegister)
1131 }
1132 }
1133
1134
1135 for i, v := range phis {
1136 if !s.values[v.ID].needReg {
1137 continue
1138 }
1139 a := v.Args[idx]
1140 r := phiRegs[i]
1141 if r == noRegister {
1142 continue
1143 }
1144 if regValLiveSet.contains(a.ID) {
1145
1146
1147
1148
1149
1150
1151
1152
1153 m := s.compatRegs(a.Type) &^ s.used &^ phiUsed
1154 if m != 0 && !s.values[a.ID].rematerializeable && countRegs(s.values[a.ID].regs) == 1 {
1155 r2 := pickReg(m)
1156 c := p.NewValue1(a.Pos, OpCopy, a.Type, s.regs[r].c)
1157 s.copies[c] = false
1158 if s.f.pass.debug > regDebug {
1159 fmt.Printf("copy %s to %s : %s\n", a, c, &s.registers[r2])
1160 }
1161 s.setOrig(c, a)
1162 s.assignReg(r2, a, c)
1163 s.endRegs[p.ID] = append(s.endRegs[p.ID], endReg{r2, a, c})
1164 }
1165 }
1166 s.freeReg(r)
1167 }
1168
1169
1170 b.Values = append(b.Values, phis...)
1171
1172
1173
1174 for i, v := range phis {
1175 if !s.values[v.ID].needReg {
1176 continue
1177 }
1178 if phiRegs[i] != noRegister {
1179 continue
1180 }
1181 m := s.compatRegs(v.Type) &^ phiUsed &^ s.used
1182
1183
1184 for i, pe := range b.Preds {
1185 if i == idx {
1186 continue
1187 }
1188 ri := noRegister
1189 for _, er := range s.endRegs[pe.b.ID] {
1190 if er.v == s.orig[v.Args[i].ID] {
1191 ri = er.r
1192 break
1193 }
1194 }
1195 if ri != noRegister && m>>ri&1 != 0 {
1196 m = regMask(1) << ri
1197 break
1198 }
1199 }
1200 if m != 0 {
1201 r := pickReg(m)
1202 phiRegs[i] = r
1203 phiUsed |= regMask(1) << r
1204 }
1205 }
1206
1207
1208 for i, v := range phis {
1209 if !s.values[v.ID].needReg {
1210 continue
1211 }
1212 r := phiRegs[i]
1213 if r == noRegister {
1214
1215
1216 s.values[v.ID].spill = v
1217 continue
1218 }
1219
1220 s.assignReg(r, v, v)
1221 }
1222
1223
1224 for r := register(0); r < s.numRegs; r++ {
1225 if phiUsed>>r&1 != 0 {
1226 continue
1227 }
1228 v := s.regs[r].v
1229 if v != nil && !regValLiveSet.contains(v.ID) {
1230 s.freeReg(r)
1231 }
1232 }
1233
1234
1235
1236
1237
1238 regList := make([]startReg, 0, 32)
1239 for r := register(0); r < s.numRegs; r++ {
1240 v := s.regs[r].v
1241 if v == nil {
1242 continue
1243 }
1244 if phiUsed>>r&1 != 0 {
1245
1246
1247 continue
1248 }
1249 regList = append(regList, startReg{r, v, s.regs[r].c, s.values[v.ID].uses.pos})
1250 s.startRegsMask |= regMask(1) << r
1251 }
1252 s.startRegs[b.ID] = make([]startReg, len(regList))
1253 copy(s.startRegs[b.ID], regList)
1254
1255 if s.f.pass.debug > regDebug {
1256 fmt.Printf("after phis\n")
1257 for _, x := range s.startRegs[b.ID] {
1258 fmt.Printf(" %s: v%d\n", &s.registers[x.r], x.v.ID)
1259 }
1260 }
1261 }
1262
1263
1264 for i, v := range phis {
1265 s.curIdx = i
1266 s.dropIfUnused(v)
1267 }
1268
1269
1270 if l := len(oldSched); cap(dinfo) < l {
1271 dinfo = make([]dentry, l)
1272 } else {
1273 dinfo = dinfo[:l]
1274 for i := range dinfo {
1275 dinfo[i] = dentry{}
1276 }
1277 }
1278
1279
1280 desired.copy(&s.desired[b.ID])
1281
1282
1283
1284
1285
1286
1287 for _, e := range b.Succs {
1288 succ := e.b
1289
1290 for _, x := range s.startRegs[succ.ID] {
1291 desired.add(x.v.ID, x.r)
1292 }
1293
1294 pidx := e.i
1295 for _, v := range succ.Values {
1296 if v.Op != OpPhi {
1297 break
1298 }
1299 if !s.values[v.ID].needReg {
1300 continue
1301 }
1302 rp, ok := s.f.getHome(v.ID).(*Register)
1303 if !ok {
1304
1305
1306
1307
1308 for _, a := range v.Args {
1309 rp, ok = s.f.getHome(a.ID).(*Register)
1310 if ok {
1311 break
1312 }
1313 }
1314 if !ok {
1315 continue
1316 }
1317 }
1318 desired.add(v.Args[pidx].ID, register(rp.num))
1319 }
1320 }
1321
1322
1323 for i := len(oldSched) - 1; i >= 0; i-- {
1324 v := oldSched[i]
1325 prefs := desired.remove(v.ID)
1326 regspec := s.regspec(v)
1327 desired.clobber(regspec.clobbers)
1328 for _, j := range regspec.inputs {
1329 if countRegs(j.regs) != 1 {
1330 continue
1331 }
1332 desired.clobber(j.regs)
1333 desired.add(v.Args[j.idx].ID, pickReg(j.regs))
1334 }
1335 if opcodeTable[v.Op].resultInArg0 || v.Op == OpAMD64ADDQconst || v.Op == OpAMD64ADDLconst || v.Op == OpSelect0 {
1336 if opcodeTable[v.Op].commutative {
1337 desired.addList(v.Args[1].ID, prefs)
1338 }
1339 desired.addList(v.Args[0].ID, prefs)
1340 }
1341
1342 dinfo[i].out = prefs
1343 for j, a := range v.Args {
1344 if j >= len(dinfo[i].in) {
1345 break
1346 }
1347 dinfo[i].in[j] = desired.get(a.ID)
1348 }
1349 }
1350
1351
1352 for idx, v := range oldSched {
1353 s.curIdx = nphi + idx
1354 tmpReg := noRegister
1355 if s.f.pass.debug > regDebug {
1356 fmt.Printf(" processing %s\n", v.LongString())
1357 }
1358 regspec := s.regspec(v)
1359 if v.Op == OpPhi {
1360 f.Fatalf("phi %s not at start of block", v)
1361 }
1362 if v.Op == OpSP {
1363 s.assignReg(s.SPReg, v, v)
1364 b.Values = append(b.Values, v)
1365 s.advanceUses(v)
1366 s.sp = v.ID
1367 continue
1368 }
1369 if v.Op == OpSB {
1370 s.assignReg(s.SBReg, v, v)
1371 b.Values = append(b.Values, v)
1372 s.advanceUses(v)
1373 s.sb = v.ID
1374 continue
1375 }
1376 if v.Op == OpSelect0 || v.Op == OpSelect1 || v.Op == OpSelectN {
1377 if s.values[v.ID].needReg {
1378 if v.Op == OpSelectN {
1379 s.assignReg(register(s.f.getHome(v.Args[0].ID).(LocResults)[int(v.AuxInt)].(*Register).num), v, v)
1380 } else {
1381 var i = 0
1382 if v.Op == OpSelect1 {
1383 i = 1
1384 }
1385 s.assignReg(register(s.f.getHome(v.Args[0].ID).(LocPair)[i].(*Register).num), v, v)
1386 }
1387 }
1388 b.Values = append(b.Values, v)
1389 s.advanceUses(v)
1390 continue
1391 }
1392 if v.Op == OpGetG && s.f.Config.hasGReg {
1393
1394 if s.regs[s.GReg].v != nil {
1395 s.freeReg(s.GReg)
1396 }
1397 s.assignReg(s.GReg, v, v)
1398 b.Values = append(b.Values, v)
1399 s.advanceUses(v)
1400 continue
1401 }
1402 if v.Op == OpArg {
1403
1404
1405
1406 s.values[v.ID].spill = v
1407 b.Values = append(b.Values, v)
1408 s.advanceUses(v)
1409 continue
1410 }
1411 if v.Op == OpKeepAlive {
1412
1413 s.advanceUses(v)
1414 a := v.Args[0]
1415 vi := &s.values[a.ID]
1416 if vi.regs == 0 && !vi.rematerializeable {
1417
1418
1419
1420 v.SetArg(0, s.makeSpill(a, b))
1421 } else if _, ok := a.Aux.(*ir.Name); ok && vi.rematerializeable {
1422
1423
1424
1425 v.Op = OpVarLive
1426 v.SetArgs1(v.Args[1])
1427 v.Aux = a.Aux
1428 } else {
1429
1430
1431
1432 v.Op = OpCopy
1433 v.SetArgs1(v.Args[1])
1434 }
1435 b.Values = append(b.Values, v)
1436 continue
1437 }
1438 if len(regspec.inputs) == 0 && len(regspec.outputs) == 0 {
1439
1440 if s.doClobber && v.Op.IsCall() {
1441 s.clobberRegs(regspec.clobbers)
1442 }
1443 s.freeRegs(regspec.clobbers)
1444 b.Values = append(b.Values, v)
1445 s.advanceUses(v)
1446 continue
1447 }
1448
1449 if s.values[v.ID].rematerializeable {
1450
1451
1452
1453 for _, a := range v.Args {
1454 a.Uses--
1455 }
1456 s.advanceUses(v)
1457 continue
1458 }
1459
1460 if s.f.pass.debug > regDebug {
1461 fmt.Printf("value %s\n", v.LongString())
1462 fmt.Printf(" out:")
1463 for _, r := range dinfo[idx].out {
1464 if r != noRegister {
1465 fmt.Printf(" %s", &s.registers[r])
1466 }
1467 }
1468 fmt.Println()
1469 for i := 0; i < len(v.Args) && i < 3; i++ {
1470 fmt.Printf(" in%d:", i)
1471 for _, r := range dinfo[idx].in[i] {
1472 if r != noRegister {
1473 fmt.Printf(" %s", &s.registers[r])
1474 }
1475 }
1476 fmt.Println()
1477 }
1478 }
1479
1480
1481
1482
1483 args = append(args[:0], make([]*Value, len(v.Args))...)
1484 for i, a := range v.Args {
1485 if !s.values[a.ID].needReg {
1486 args[i] = a
1487 }
1488 }
1489 for _, i := range regspec.inputs {
1490 mask := i.regs
1491 if countRegs(mask) == 1 && mask&s.values[v.Args[i.idx].ID].regs != 0 {
1492 args[i.idx] = s.allocValToReg(v.Args[i.idx], mask, true, v.Pos)
1493 }
1494 }
1495
1496
1497
1498
1499
1500
1501 for {
1502 freed := false
1503 for _, i := range regspec.inputs {
1504 if args[i.idx] != nil {
1505 continue
1506 }
1507 mask := i.regs
1508 if countRegs(mask) == 1 && mask&^s.used != 0 {
1509 args[i.idx] = s.allocValToReg(v.Args[i.idx], mask, true, v.Pos)
1510
1511
1512
1513 oldregs := s.values[v.Args[i.idx].ID].regs
1514 if oldregs&^regspec.clobbers == 0 || !s.liveAfterCurrentInstruction(v.Args[i.idx]) {
1515 s.freeRegs(oldregs &^ mask &^ s.nospill)
1516 freed = true
1517 }
1518 }
1519 }
1520 if !freed {
1521 break
1522 }
1523 }
1524
1525
1526 for _, i := range regspec.inputs {
1527 if args[i.idx] != nil {
1528 continue
1529 }
1530 mask := i.regs
1531 if mask&s.values[v.Args[i.idx].ID].regs == 0 {
1532
1533 mask &= s.allocatable
1534 mask &^= s.nospill
1535
1536 if i.idx < 3 {
1537 for _, r := range dinfo[idx].in[i.idx] {
1538 if r != noRegister && (mask&^s.used)>>r&1 != 0 {
1539
1540 mask = regMask(1) << r
1541 break
1542 }
1543 }
1544 }
1545
1546 if mask&^desired.avoid != 0 {
1547 mask &^= desired.avoid
1548 }
1549 }
1550 args[i.idx] = s.allocValToReg(v.Args[i.idx], mask, true, v.Pos)
1551 }
1552
1553
1554
1555
1556 if opcodeTable[v.Op].resultInArg0 {
1557 var m regMask
1558 if !s.liveAfterCurrentInstruction(v.Args[0]) {
1559
1560 goto ok
1561 }
1562 if opcodeTable[v.Op].commutative && !s.liveAfterCurrentInstruction(v.Args[1]) {
1563 args[0], args[1] = args[1], args[0]
1564 goto ok
1565 }
1566 if s.values[v.Args[0].ID].rematerializeable {
1567
1568 goto ok
1569 }
1570 if opcodeTable[v.Op].commutative && s.values[v.Args[1].ID].rematerializeable {
1571 args[0], args[1] = args[1], args[0]
1572 goto ok
1573 }
1574 if countRegs(s.values[v.Args[0].ID].regs) >= 2 {
1575
1576 goto ok
1577 }
1578 if opcodeTable[v.Op].commutative && countRegs(s.values[v.Args[1].ID].regs) >= 2 {
1579 args[0], args[1] = args[1], args[0]
1580 goto ok
1581 }
1582
1583
1584
1585
1586
1587 m = s.compatRegs(v.Args[0].Type) &^ s.used
1588 if m == 0 {
1589
1590
1591
1592
1593 goto ok
1594 }
1595
1596
1597 for _, r := range dinfo[idx].out {
1598 if r != noRegister && (m®spec.outputs[0].regs)>>r&1 != 0 {
1599 m = regMask(1) << r
1600 args[0] = s.allocValToReg(v.Args[0], m, true, v.Pos)
1601
1602
1603 goto ok
1604 }
1605 }
1606
1607
1608 for _, r := range dinfo[idx].in[0] {
1609 if r != noRegister && m>>r&1 != 0 {
1610 m = regMask(1) << r
1611 c := s.allocValToReg(v.Args[0], m, true, v.Pos)
1612 s.copies[c] = false
1613
1614
1615 goto ok
1616 }
1617 }
1618 if opcodeTable[v.Op].commutative {
1619 for _, r := range dinfo[idx].in[1] {
1620 if r != noRegister && m>>r&1 != 0 {
1621 m = regMask(1) << r
1622 c := s.allocValToReg(v.Args[1], m, true, v.Pos)
1623 s.copies[c] = false
1624 args[0], args[1] = args[1], args[0]
1625 goto ok
1626 }
1627 }
1628 }
1629
1630
1631 if m&^desired.avoid != 0 {
1632 m &^= desired.avoid
1633 }
1634
1635 c := s.allocValToReg(v.Args[0], m, true, v.Pos)
1636 s.copies[c] = false
1637
1638
1639
1640
1641 if regspec.outputs[0].regs>>s.f.getHome(c.ID).(*Register).num&1 != 0 {
1642 if rp, ok := s.f.getHome(args[0].ID).(*Register); ok {
1643 r := register(rp.num)
1644 for _, r2 := range dinfo[idx].in[0] {
1645 if r == r2 {
1646 args[0] = c
1647 break
1648 }
1649 }
1650 }
1651 }
1652 }
1653
1654 ok:
1655
1656
1657
1658
1659
1660
1661 if opcodeTable[v.Op].needIntTemp {
1662 m := s.allocatable & s.f.Config.gpRegMask
1663 for _, out := range regspec.outputs {
1664 if countRegs(out.regs) == 1 {
1665 m &^= out.regs
1666 }
1667 }
1668 if m&^desired.avoid&^s.nospill != 0 {
1669 m &^= desired.avoid
1670 }
1671 tmpReg = s.allocReg(m, &tmpVal)
1672 s.nospill |= regMask(1) << tmpReg
1673 s.tmpused |= regMask(1) << tmpReg
1674 }
1675
1676
1677
1678
1679
1680 if !opcodeTable[v.Op].resultNotInArgs {
1681 s.tmpused = s.nospill
1682 s.nospill = 0
1683 s.advanceUses(v)
1684 }
1685
1686
1687 if s.doClobber && v.Op.IsCall() {
1688
1689
1690 s.clobberRegs(regspec.clobbers &^ s.tmpused &^ s.nospill)
1691 }
1692 s.freeRegs(regspec.clobbers)
1693 s.tmpused |= regspec.clobbers
1694
1695
1696 {
1697 outRegs := noRegisters
1698 maxOutIdx := -1
1699 var used regMask
1700 if tmpReg != noRegister {
1701
1702
1703 used |= regMask(1) << tmpReg
1704 }
1705 for _, out := range regspec.outputs {
1706 if out.regs == 0 {
1707 continue
1708 }
1709 mask := out.regs & s.allocatable &^ used
1710 if mask == 0 {
1711 s.f.Fatalf("can't find any output register %s", v.LongString())
1712 }
1713 if opcodeTable[v.Op].resultInArg0 && out.idx == 0 {
1714 if !opcodeTable[v.Op].commutative {
1715
1716 r := register(s.f.getHome(args[0].ID).(*Register).num)
1717 if mask>>r&1 == 0 {
1718 s.f.Fatalf("resultInArg0 value's input %v cannot be an output of %s", s.f.getHome(args[0].ID).(*Register), v.LongString())
1719 }
1720 mask = regMask(1) << r
1721 } else {
1722
1723 r0 := register(s.f.getHome(args[0].ID).(*Register).num)
1724 r1 := register(s.f.getHome(args[1].ID).(*Register).num)
1725
1726 found := false
1727 for _, r := range dinfo[idx].out {
1728 if (r == r0 || r == r1) && (mask&^s.used)>>r&1 != 0 {
1729 mask = regMask(1) << r
1730 found = true
1731 if r == r1 {
1732 args[0], args[1] = args[1], args[0]
1733 }
1734 break
1735 }
1736 }
1737 if !found {
1738
1739 mask = regMask(1) << r0
1740 }
1741 }
1742 }
1743 if out.idx == 0 {
1744 for _, r := range dinfo[idx].out {
1745 if r != noRegister && (mask&^s.used)>>r&1 != 0 {
1746
1747 mask = regMask(1) << r
1748 break
1749 }
1750 }
1751 }
1752
1753 if mask&^desired.avoid&^s.nospill&^s.used != 0 {
1754 mask &^= desired.avoid
1755 }
1756 r := s.allocReg(mask, v)
1757 if out.idx > maxOutIdx {
1758 maxOutIdx = out.idx
1759 }
1760 outRegs[out.idx] = r
1761 used |= regMask(1) << r
1762 s.tmpused |= regMask(1) << r
1763 }
1764
1765 if v.Type.IsTuple() {
1766 var outLocs LocPair
1767 if r := outRegs[0]; r != noRegister {
1768 outLocs[0] = &s.registers[r]
1769 }
1770 if r := outRegs[1]; r != noRegister {
1771 outLocs[1] = &s.registers[r]
1772 }
1773 s.f.setHome(v, outLocs)
1774
1775 } else if v.Type.IsResults() {
1776
1777 outLocs := make(LocResults, maxOutIdx+1, maxOutIdx+1)
1778 for i := 0; i <= maxOutIdx; i++ {
1779 if r := outRegs[i]; r != noRegister {
1780 outLocs[i] = &s.registers[r]
1781 }
1782 }
1783 s.f.setHome(v, outLocs)
1784 } else {
1785 if r := outRegs[0]; r != noRegister {
1786 s.assignReg(r, v, v)
1787 }
1788 }
1789 if tmpReg != noRegister {
1790
1791 if s.f.tempRegs == nil {
1792 s.f.tempRegs = map[ID]*Register{}
1793 }
1794 s.f.tempRegs[v.ID] = &s.registers[tmpReg]
1795 }
1796 }
1797
1798
1799 if opcodeTable[v.Op].resultNotInArgs {
1800 s.nospill = 0
1801 s.advanceUses(v)
1802 }
1803 s.tmpused = 0
1804
1805
1806 for i, a := range args {
1807 v.SetArg(i, a)
1808 }
1809 b.Values = append(b.Values, v)
1810 s.dropIfUnused(v)
1811 }
1812
1813
1814
1815 controls := append(make([]*Value, 0, 2), b.ControlValues()...)
1816
1817
1818 for i, v := range b.ControlValues() {
1819 if !s.values[v.ID].needReg {
1820 continue
1821 }
1822 if s.f.pass.debug > regDebug {
1823 fmt.Printf(" processing control %s\n", v.LongString())
1824 }
1825
1826
1827
1828 b.ReplaceControl(i, s.allocValToReg(v, s.compatRegs(v.Type), false, b.Pos))
1829 }
1830
1831
1832
1833 for _, v := range controls {
1834 vi := &s.values[v.ID]
1835 if !vi.needReg {
1836 continue
1837 }
1838
1839 u := vi.uses
1840 vi.uses = u.next
1841 if u.next == nil {
1842 s.freeRegs(vi.regs)
1843 }
1844 u.next = s.freeUseRecords
1845 s.freeUseRecords = u
1846 }
1847
1848
1849
1850
1851 if len(b.Succs) == 1 {
1852 if s.f.Config.hasGReg && s.regs[s.GReg].v != nil {
1853 s.freeReg(s.GReg)
1854 }
1855
1856 top := b.Succs[0].b
1857 loop := s.loopnest.b2l[top.ID]
1858 if loop == nil || loop.header != top || loop.containsUnavoidableCall {
1859 goto badloop
1860 }
1861
1862
1863 for _, live := range s.live[b.ID] {
1864 if live.dist >= unlikelyDistance {
1865
1866 continue
1867 }
1868 vid := live.ID
1869 vi := &s.values[vid]
1870 if vi.regs != 0 {
1871 continue
1872 }
1873 if vi.rematerializeable {
1874 continue
1875 }
1876 v := s.orig[vid]
1877 m := s.compatRegs(v.Type) &^ s.used
1878
1879 outerloop:
1880 for _, e := range desired.entries {
1881 if e.ID != v.ID {
1882 continue
1883 }
1884 for _, r := range e.regs {
1885 if r != noRegister && m>>r&1 != 0 {
1886 m = regMask(1) << r
1887 break outerloop
1888 }
1889 }
1890 }
1891 if m&^desired.avoid != 0 {
1892 m &^= desired.avoid
1893 }
1894 if m != 0 {
1895 s.allocValToReg(v, m, false, b.Pos)
1896 }
1897 }
1898 }
1899 badloop:
1900 ;
1901
1902
1903
1904 k := 0
1905 for r := register(0); r < s.numRegs; r++ {
1906 v := s.regs[r].v
1907 if v == nil {
1908 continue
1909 }
1910 k++
1911 }
1912 regList := make([]endReg, 0, k)
1913 for r := register(0); r < s.numRegs; r++ {
1914 v := s.regs[r].v
1915 if v == nil {
1916 continue
1917 }
1918 regList = append(regList, endReg{r, v, s.regs[r].c})
1919 }
1920 s.endRegs[b.ID] = regList
1921
1922 if checkEnabled {
1923 regValLiveSet.clear()
1924 for _, x := range s.live[b.ID] {
1925 regValLiveSet.add(x.ID)
1926 }
1927 for r := register(0); r < s.numRegs; r++ {
1928 v := s.regs[r].v
1929 if v == nil {
1930 continue
1931 }
1932 if !regValLiveSet.contains(v.ID) {
1933 s.f.Fatalf("val %s is in reg but not live at end of %s", v, b)
1934 }
1935 }
1936 }
1937
1938
1939
1940
1941
1942 for _, e := range s.live[b.ID] {
1943 vi := &s.values[e.ID]
1944 if vi.regs != 0 {
1945
1946 continue
1947 }
1948 if vi.rematerializeable {
1949
1950 continue
1951 }
1952 if s.f.pass.debug > regDebug {
1953 fmt.Printf("live-at-end spill for %s at %s\n", s.orig[e.ID], b)
1954 }
1955 spill := s.makeSpill(s.orig[e.ID], b)
1956 s.spillLive[b.ID] = append(s.spillLive[b.ID], spill.ID)
1957 }
1958
1959
1960
1961
1962 for _, e := range s.live[b.ID] {
1963 u := s.values[e.ID].uses
1964 if u == nil {
1965 f.Fatalf("live at end, no uses v%d", e.ID)
1966 }
1967 if u.next != nil {
1968 f.Fatalf("live at end, too many uses v%d", e.ID)
1969 }
1970 s.values[e.ID].uses = nil
1971 u.next = s.freeUseRecords
1972 s.freeUseRecords = u
1973 }
1974
1975
1976
1977
1978
1979
1980
1981 if c := countRegs(s.startRegsMask); c != len(s.startRegs[b.ID]) {
1982 regs := make([]startReg, 0, c)
1983 for _, sr := range s.startRegs[b.ID] {
1984 if s.startRegsMask&(regMask(1)<<sr.r) == 0 {
1985 continue
1986 }
1987 regs = append(regs, sr)
1988 }
1989 s.startRegs[b.ID] = regs
1990 }
1991 }
1992
1993
1994 s.placeSpills()
1995
1996
1997
1998 stacklive := stackalloc(s.f, s.spillLive)
1999
2000
2001 s.shuffle(stacklive)
2002
2003
2004
2005
2006 for {
2007 progress := false
2008 for c, used := range s.copies {
2009 if !used && c.Uses == 0 {
2010 if s.f.pass.debug > regDebug {
2011 fmt.Printf("delete copied value %s\n", c.LongString())
2012 }
2013 c.resetArgs()
2014 f.freeValue(c)
2015 delete(s.copies, c)
2016 progress = true
2017 }
2018 }
2019 if !progress {
2020 break
2021 }
2022 }
2023
2024 for _, b := range s.visitOrder {
2025 i := 0
2026 for _, v := range b.Values {
2027 if v.Op == OpInvalid {
2028 continue
2029 }
2030 b.Values[i] = v
2031 i++
2032 }
2033 b.Values = b.Values[:i]
2034 }
2035 }
2036
2037 func (s *regAllocState) placeSpills() {
2038 mustBeFirst := func(op Op) bool {
2039 return op.isLoweredGetClosurePtr() || op == OpPhi || op == OpArgIntReg || op == OpArgFloatReg
2040 }
2041
2042
2043
2044 start := map[ID][]*Value{}
2045
2046
2047 after := map[ID][]*Value{}
2048
2049 for i := range s.values {
2050 vi := s.values[i]
2051 spill := vi.spill
2052 if spill == nil {
2053 continue
2054 }
2055 if spill.Block != nil {
2056
2057
2058 continue
2059 }
2060 v := s.orig[i]
2061
2062
2063
2064
2065
2066 if v == nil {
2067 panic(fmt.Errorf("nil v, s.orig[%d], vi = %v, spill = %s", i, vi, spill.LongString()))
2068 }
2069 best := v.Block
2070 bestArg := v
2071 var bestDepth int16
2072 if l := s.loopnest.b2l[best.ID]; l != nil {
2073 bestDepth = l.depth
2074 }
2075 b := best
2076 const maxSpillSearch = 100
2077 for i := 0; i < maxSpillSearch; i++ {
2078
2079
2080 p := b
2081 b = nil
2082 for c := s.sdom.Child(p); c != nil && i < maxSpillSearch; c, i = s.sdom.Sibling(c), i+1 {
2083 if s.sdom[c.ID].entry <= vi.restoreMin && s.sdom[c.ID].exit >= vi.restoreMax {
2084
2085 b = c
2086 break
2087 }
2088 }
2089 if b == nil {
2090
2091 break
2092 }
2093
2094 var depth int16
2095 if l := s.loopnest.b2l[b.ID]; l != nil {
2096 depth = l.depth
2097 }
2098 if depth > bestDepth {
2099
2100 continue
2101 }
2102
2103
2104
2105 if len(b.Preds) == 1 {
2106 for _, e := range s.endRegs[b.Preds[0].b.ID] {
2107 if e.v == v {
2108
2109 best = b
2110 bestArg = e.c
2111 bestDepth = depth
2112 break
2113 }
2114 }
2115 } else {
2116 for _, e := range s.startRegs[b.ID] {
2117 if e.v == v {
2118
2119 best = b
2120 bestArg = e.c
2121 bestDepth = depth
2122 break
2123 }
2124 }
2125 }
2126 }
2127
2128
2129 spill.Block = best
2130 spill.AddArg(bestArg)
2131 if best == v.Block && !mustBeFirst(v.Op) {
2132
2133 after[v.ID] = append(after[v.ID], spill)
2134 } else {
2135
2136 start[best.ID] = append(start[best.ID], spill)
2137 }
2138 }
2139
2140
2141 var oldSched []*Value
2142 for _, b := range s.visitOrder {
2143 nfirst := 0
2144 for _, v := range b.Values {
2145 if !mustBeFirst(v.Op) {
2146 break
2147 }
2148 nfirst++
2149 }
2150 oldSched = append(oldSched[:0], b.Values[nfirst:]...)
2151 b.Values = b.Values[:nfirst]
2152 b.Values = append(b.Values, start[b.ID]...)
2153 for _, v := range oldSched {
2154 b.Values = append(b.Values, v)
2155 b.Values = append(b.Values, after[v.ID]...)
2156 }
2157 }
2158 }
2159
2160
2161 func (s *regAllocState) shuffle(stacklive [][]ID) {
2162 var e edgeState
2163 e.s = s
2164 e.cache = map[ID][]*Value{}
2165 e.contents = map[Location]contentRecord{}
2166 if s.f.pass.debug > regDebug {
2167 fmt.Printf("shuffle %s\n", s.f.Name)
2168 fmt.Println(s.f.String())
2169 }
2170
2171 for _, b := range s.visitOrder {
2172 if len(b.Preds) <= 1 {
2173 continue
2174 }
2175 e.b = b
2176 for i, edge := range b.Preds {
2177 p := edge.b
2178 e.p = p
2179 e.setup(i, s.endRegs[p.ID], s.startRegs[b.ID], stacklive[p.ID])
2180 e.process()
2181 }
2182 }
2183
2184 if s.f.pass.debug > regDebug {
2185 fmt.Printf("post shuffle %s\n", s.f.Name)
2186 fmt.Println(s.f.String())
2187 }
2188 }
2189
2190 type edgeState struct {
2191 s *regAllocState
2192 p, b *Block
2193
2194
2195 cache map[ID][]*Value
2196 cachedVals []ID
2197
2198
2199 contents map[Location]contentRecord
2200
2201
2202 destinations []dstRecord
2203 extra []dstRecord
2204
2205 usedRegs regMask
2206 uniqueRegs regMask
2207 finalRegs regMask
2208 rematerializeableRegs regMask
2209 }
2210
2211 type contentRecord struct {
2212 vid ID
2213 c *Value
2214 final bool
2215 pos src.XPos
2216 }
2217
2218 type dstRecord struct {
2219 loc Location
2220 vid ID
2221 splice **Value
2222 pos src.XPos
2223 }
2224
2225
2226 func (e *edgeState) setup(idx int, srcReg []endReg, dstReg []startReg, stacklive []ID) {
2227 if e.s.f.pass.debug > regDebug {
2228 fmt.Printf("edge %s->%s\n", e.p, e.b)
2229 }
2230
2231
2232 clear(e.cache)
2233 e.cachedVals = e.cachedVals[:0]
2234 clear(e.contents)
2235 e.usedRegs = 0
2236 e.uniqueRegs = 0
2237 e.finalRegs = 0
2238 e.rematerializeableRegs = 0
2239
2240
2241 for _, x := range srcReg {
2242 e.set(&e.s.registers[x.r], x.v.ID, x.c, false, src.NoXPos)
2243 }
2244
2245 for _, spillID := range stacklive {
2246 v := e.s.orig[spillID]
2247 spill := e.s.values[v.ID].spill
2248 if !e.s.sdom.IsAncestorEq(spill.Block, e.p) {
2249
2250
2251
2252
2253
2254
2255
2256
2257 continue
2258 }
2259 e.set(e.s.f.getHome(spillID), v.ID, spill, false, src.NoXPos)
2260 }
2261
2262
2263 dsts := e.destinations[:0]
2264 for _, x := range dstReg {
2265 dsts = append(dsts, dstRecord{&e.s.registers[x.r], x.v.ID, nil, x.pos})
2266 }
2267
2268 for _, v := range e.b.Values {
2269 if v.Op != OpPhi {
2270 break
2271 }
2272 loc := e.s.f.getHome(v.ID)
2273 if loc == nil {
2274 continue
2275 }
2276 dsts = append(dsts, dstRecord{loc, v.Args[idx].ID, &v.Args[idx], v.Pos})
2277 }
2278 e.destinations = dsts
2279
2280 if e.s.f.pass.debug > regDebug {
2281 for _, vid := range e.cachedVals {
2282 a := e.cache[vid]
2283 for _, c := range a {
2284 fmt.Printf("src %s: v%d cache=%s\n", e.s.f.getHome(c.ID), vid, c)
2285 }
2286 }
2287 for _, d := range e.destinations {
2288 fmt.Printf("dst %s: v%d\n", d.loc, d.vid)
2289 }
2290 }
2291 }
2292
2293
2294 func (e *edgeState) process() {
2295 dsts := e.destinations
2296
2297
2298 for len(dsts) > 0 {
2299 i := 0
2300 for _, d := range dsts {
2301 if !e.processDest(d.loc, d.vid, d.splice, d.pos) {
2302
2303 dsts[i] = d
2304 i++
2305 }
2306 }
2307 if i < len(dsts) {
2308
2309 dsts = dsts[:i]
2310
2311
2312 dsts = append(dsts, e.extra...)
2313 e.extra = e.extra[:0]
2314 continue
2315 }
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339 d := dsts[0]
2340 loc := d.loc
2341 vid := e.contents[loc].vid
2342 c := e.contents[loc].c
2343 r := e.findRegFor(c.Type)
2344 if e.s.f.pass.debug > regDebug {
2345 fmt.Printf("breaking cycle with v%d in %s:%s\n", vid, loc, c)
2346 }
2347 e.erase(r)
2348 pos := d.pos.WithNotStmt()
2349 if _, isReg := loc.(*Register); isReg {
2350 c = e.p.NewValue1(pos, OpCopy, c.Type, c)
2351 } else {
2352 c = e.p.NewValue1(pos, OpLoadReg, c.Type, c)
2353 }
2354 e.set(r, vid, c, false, pos)
2355 if c.Op == OpLoadReg && e.s.isGReg(register(r.(*Register).num)) {
2356 e.s.f.Fatalf("process.OpLoadReg targeting g: " + c.LongString())
2357 }
2358 }
2359 }
2360
2361
2362
2363 func (e *edgeState) processDest(loc Location, vid ID, splice **Value, pos src.XPos) bool {
2364 pos = pos.WithNotStmt()
2365 occupant := e.contents[loc]
2366 if occupant.vid == vid {
2367
2368 e.contents[loc] = contentRecord{vid, occupant.c, true, pos}
2369 if splice != nil {
2370 (*splice).Uses--
2371 *splice = occupant.c
2372 occupant.c.Uses++
2373 }
2374
2375
2376
2377 if _, ok := e.s.copies[occupant.c]; ok {
2378
2379 e.s.copies[occupant.c] = true
2380 }
2381 return true
2382 }
2383
2384
2385 if len(e.cache[occupant.vid]) == 1 && !e.s.values[occupant.vid].rematerializeable {
2386
2387
2388 return false
2389 }
2390
2391
2392 v := e.s.orig[vid]
2393 var c *Value
2394 var src Location
2395 if e.s.f.pass.debug > regDebug {
2396 fmt.Printf("moving v%d to %s\n", vid, loc)
2397 fmt.Printf("sources of v%d:", vid)
2398 }
2399 for _, w := range e.cache[vid] {
2400 h := e.s.f.getHome(w.ID)
2401 if e.s.f.pass.debug > regDebug {
2402 fmt.Printf(" %s:%s", h, w)
2403 }
2404 _, isreg := h.(*Register)
2405 if src == nil || isreg {
2406 c = w
2407 src = h
2408 }
2409 }
2410 if e.s.f.pass.debug > regDebug {
2411 if src != nil {
2412 fmt.Printf(" [use %s]\n", src)
2413 } else {
2414 fmt.Printf(" [no source]\n")
2415 }
2416 }
2417 _, dstReg := loc.(*Register)
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429 e.erase(loc)
2430 var x *Value
2431 if c == nil || e.s.values[vid].rematerializeable {
2432 if !e.s.values[vid].rematerializeable {
2433 e.s.f.Fatalf("can't find source for %s->%s: %s\n", e.p, e.b, v.LongString())
2434 }
2435 if dstReg {
2436 x = v.copyInto(e.p)
2437 } else {
2438
2439
2440 r := e.findRegFor(v.Type)
2441 e.erase(r)
2442 x = v.copyIntoWithXPos(e.p, pos)
2443 e.set(r, vid, x, false, pos)
2444
2445
2446
2447 x = e.p.NewValue1(pos, OpStoreReg, loc.(LocalSlot).Type, x)
2448 }
2449 } else {
2450
2451 _, srcReg := src.(*Register)
2452 if srcReg {
2453 if dstReg {
2454 x = e.p.NewValue1(pos, OpCopy, c.Type, c)
2455 } else {
2456 x = e.p.NewValue1(pos, OpStoreReg, loc.(LocalSlot).Type, c)
2457 }
2458 } else {
2459 if dstReg {
2460 x = e.p.NewValue1(pos, OpLoadReg, c.Type, c)
2461 } else {
2462
2463 r := e.findRegFor(c.Type)
2464 e.erase(r)
2465 t := e.p.NewValue1(pos, OpLoadReg, c.Type, c)
2466 e.set(r, vid, t, false, pos)
2467 x = e.p.NewValue1(pos, OpStoreReg, loc.(LocalSlot).Type, t)
2468 }
2469 }
2470 }
2471 e.set(loc, vid, x, true, pos)
2472 if x.Op == OpLoadReg && e.s.isGReg(register(loc.(*Register).num)) {
2473 e.s.f.Fatalf("processDest.OpLoadReg targeting g: " + x.LongString())
2474 }
2475 if splice != nil {
2476 (*splice).Uses--
2477 *splice = x
2478 x.Uses++
2479 }
2480 return true
2481 }
2482
2483
2484 func (e *edgeState) set(loc Location, vid ID, c *Value, final bool, pos src.XPos) {
2485 e.s.f.setHome(c, loc)
2486 e.contents[loc] = contentRecord{vid, c, final, pos}
2487 a := e.cache[vid]
2488 if len(a) == 0 {
2489 e.cachedVals = append(e.cachedVals, vid)
2490 }
2491 a = append(a, c)
2492 e.cache[vid] = a
2493 if r, ok := loc.(*Register); ok {
2494 if e.usedRegs&(regMask(1)<<uint(r.num)) != 0 {
2495 e.s.f.Fatalf("%v is already set (v%d/%v)", r, vid, c)
2496 }
2497 e.usedRegs |= regMask(1) << uint(r.num)
2498 if final {
2499 e.finalRegs |= regMask(1) << uint(r.num)
2500 }
2501 if len(a) == 1 {
2502 e.uniqueRegs |= regMask(1) << uint(r.num)
2503 }
2504 if len(a) == 2 {
2505 if t, ok := e.s.f.getHome(a[0].ID).(*Register); ok {
2506 e.uniqueRegs &^= regMask(1) << uint(t.num)
2507 }
2508 }
2509 if e.s.values[vid].rematerializeable {
2510 e.rematerializeableRegs |= regMask(1) << uint(r.num)
2511 }
2512 }
2513 if e.s.f.pass.debug > regDebug {
2514 fmt.Printf("%s\n", c.LongString())
2515 fmt.Printf("v%d now available in %s:%s\n", vid, loc, c)
2516 }
2517 }
2518
2519
2520 func (e *edgeState) erase(loc Location) {
2521 cr := e.contents[loc]
2522 if cr.c == nil {
2523 return
2524 }
2525 vid := cr.vid
2526
2527 if cr.final {
2528
2529
2530
2531 e.extra = append(e.extra, dstRecord{loc, cr.vid, nil, cr.pos})
2532 }
2533
2534
2535 a := e.cache[vid]
2536 for i, c := range a {
2537 if e.s.f.getHome(c.ID) == loc {
2538 if e.s.f.pass.debug > regDebug {
2539 fmt.Printf("v%d no longer available in %s:%s\n", vid, loc, c)
2540 }
2541 a[i], a = a[len(a)-1], a[:len(a)-1]
2542 break
2543 }
2544 }
2545 e.cache[vid] = a
2546
2547
2548 if r, ok := loc.(*Register); ok {
2549 e.usedRegs &^= regMask(1) << uint(r.num)
2550 if cr.final {
2551 e.finalRegs &^= regMask(1) << uint(r.num)
2552 }
2553 e.rematerializeableRegs &^= regMask(1) << uint(r.num)
2554 }
2555 if len(a) == 1 {
2556 if r, ok := e.s.f.getHome(a[0].ID).(*Register); ok {
2557 e.uniqueRegs |= regMask(1) << uint(r.num)
2558 }
2559 }
2560 }
2561
2562
2563 func (e *edgeState) findRegFor(typ *types.Type) Location {
2564
2565 types := &e.s.f.Config.Types
2566 m := e.s.compatRegs(typ)
2567
2568
2569
2570
2571
2572
2573 x := m &^ e.usedRegs
2574 if x != 0 {
2575 return &e.s.registers[pickReg(x)]
2576 }
2577 x = m &^ e.uniqueRegs &^ e.finalRegs
2578 if x != 0 {
2579 return &e.s.registers[pickReg(x)]
2580 }
2581 x = m &^ e.uniqueRegs
2582 if x != 0 {
2583 return &e.s.registers[pickReg(x)]
2584 }
2585 x = m & e.rematerializeableRegs
2586 if x != 0 {
2587 return &e.s.registers[pickReg(x)]
2588 }
2589
2590
2591
2592 for _, vid := range e.cachedVals {
2593 a := e.cache[vid]
2594 for _, c := range a {
2595 if r, ok := e.s.f.getHome(c.ID).(*Register); ok && m>>uint(r.num)&1 != 0 {
2596 if !c.rematerializeable() {
2597 x := e.p.NewValue1(c.Pos, OpStoreReg, c.Type, c)
2598
2599
2600
2601 t := LocalSlot{N: e.s.f.NewLocal(c.Pos, types.Int64), Type: types.Int64}
2602
2603 e.set(t, vid, x, false, c.Pos)
2604 if e.s.f.pass.debug > regDebug {
2605 fmt.Printf(" SPILL %s->%s %s\n", r, t, x.LongString())
2606 }
2607 }
2608
2609
2610
2611 return r
2612 }
2613 }
2614 }
2615
2616 fmt.Printf("m:%d unique:%d final:%d rematerializable:%d\n", m, e.uniqueRegs, e.finalRegs, e.rematerializeableRegs)
2617 for _, vid := range e.cachedVals {
2618 a := e.cache[vid]
2619 for _, c := range a {
2620 fmt.Printf("v%d: %s %s\n", vid, c, e.s.f.getHome(c.ID))
2621 }
2622 }
2623 e.s.f.Fatalf("can't find empty register on edge %s->%s", e.p, e.b)
2624 return nil
2625 }
2626
2627
2628
2629 func (v *Value) rematerializeable() bool {
2630 if !opcodeTable[v.Op].rematerializeable {
2631 return false
2632 }
2633 for _, a := range v.Args {
2634
2635 if a.Op != OpSP && a.Op != OpSB {
2636 return false
2637 }
2638 }
2639 return true
2640 }
2641
2642 type liveInfo struct {
2643 ID ID
2644 dist int32
2645 pos src.XPos
2646 }
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656 func (s *regAllocState) computeLive() {
2657 f := s.f
2658 s.live = make([][]liveInfo, f.NumBlocks())
2659 s.desired = make([]desiredState, f.NumBlocks())
2660 var phis []*Value
2661
2662 live := f.newSparseMapPos(f.NumValues())
2663 defer f.retSparseMapPos(live)
2664 t := f.newSparseMapPos(f.NumValues())
2665 defer f.retSparseMapPos(t)
2666
2667
2668 var desired desiredState
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679 po := f.postorder()
2680 s.loopnest = f.loopnest()
2681 s.loopnest.calculateDepths()
2682 for {
2683 changed := false
2684
2685 for _, b := range po {
2686
2687
2688
2689 live.clear()
2690 for _, e := range s.live[b.ID] {
2691 live.set(e.ID, e.dist+int32(len(b.Values)), e.pos)
2692 }
2693
2694
2695 for _, c := range b.ControlValues() {
2696 if s.values[c.ID].needReg {
2697 live.set(c.ID, int32(len(b.Values)), b.Pos)
2698 }
2699 }
2700
2701
2702
2703 phis = phis[:0]
2704 for i := len(b.Values) - 1; i >= 0; i-- {
2705 v := b.Values[i]
2706 live.remove(v.ID)
2707 if v.Op == OpPhi {
2708
2709 phis = append(phis, v)
2710 continue
2711 }
2712 if opcodeTable[v.Op].call {
2713 c := live.contents()
2714 for i := range c {
2715 c[i].val += unlikelyDistance
2716 }
2717 }
2718 for _, a := range v.Args {
2719 if s.values[a.ID].needReg {
2720 live.set(a.ID, int32(i), v.Pos)
2721 }
2722 }
2723 }
2724
2725 desired.copy(&s.desired[b.ID])
2726 for i := len(b.Values) - 1; i >= 0; i-- {
2727 v := b.Values[i]
2728 prefs := desired.remove(v.ID)
2729 if v.Op == OpPhi {
2730
2731
2732
2733 continue
2734 }
2735 regspec := s.regspec(v)
2736
2737 desired.clobber(regspec.clobbers)
2738
2739 for _, j := range regspec.inputs {
2740 if countRegs(j.regs) != 1 {
2741 continue
2742 }
2743 desired.clobber(j.regs)
2744 desired.add(v.Args[j.idx].ID, pickReg(j.regs))
2745 }
2746
2747 if opcodeTable[v.Op].resultInArg0 || v.Op == OpAMD64ADDQconst || v.Op == OpAMD64ADDLconst || v.Op == OpSelect0 {
2748
2749
2750
2751
2752
2753 if opcodeTable[v.Op].commutative {
2754 desired.addList(v.Args[1].ID, prefs)
2755 }
2756 desired.addList(v.Args[0].ID, prefs)
2757 }
2758 }
2759
2760
2761
2762 for i, e := range b.Preds {
2763 p := e.b
2764
2765
2766
2767 delta := int32(normalDistance)
2768 if len(p.Succs) == 2 {
2769 if p.Succs[0].b == b && p.Likely == BranchLikely ||
2770 p.Succs[1].b == b && p.Likely == BranchUnlikely {
2771 delta = likelyDistance
2772 }
2773 if p.Succs[0].b == b && p.Likely == BranchUnlikely ||
2774 p.Succs[1].b == b && p.Likely == BranchLikely {
2775 delta = unlikelyDistance
2776 }
2777 }
2778
2779
2780 s.desired[p.ID].merge(&desired)
2781
2782
2783 t.clear()
2784 for _, e := range s.live[p.ID] {
2785 t.set(e.ID, e.dist, e.pos)
2786 }
2787 update := false
2788
2789
2790 for _, e := range live.contents() {
2791 d := e.val + delta
2792 if !t.contains(e.key) || d < t.get(e.key) {
2793 update = true
2794 t.set(e.key, d, e.pos)
2795 }
2796 }
2797
2798
2799
2800 for _, v := range phis {
2801 id := v.Args[i].ID
2802 if s.values[id].needReg && (!t.contains(id) || delta < t.get(id)) {
2803 update = true
2804 t.set(id, delta, v.Pos)
2805 }
2806 }
2807
2808 if !update {
2809 continue
2810 }
2811
2812 l := s.live[p.ID][:0]
2813 if cap(l) < t.size() {
2814 l = make([]liveInfo, 0, t.size())
2815 }
2816 for _, e := range t.contents() {
2817 l = append(l, liveInfo{e.key, e.val, e.pos})
2818 }
2819 s.live[p.ID] = l
2820 changed = true
2821 }
2822 }
2823
2824 if !changed {
2825 break
2826 }
2827 }
2828 if f.pass.debug > regDebug {
2829 fmt.Println("live values at end of each block")
2830 for _, b := range f.Blocks {
2831 fmt.Printf(" %s:", b)
2832 for _, x := range s.live[b.ID] {
2833 fmt.Printf(" v%d(%d)", x.ID, x.dist)
2834 for _, e := range s.desired[b.ID].entries {
2835 if e.ID != x.ID {
2836 continue
2837 }
2838 fmt.Printf("[")
2839 first := true
2840 for _, r := range e.regs {
2841 if r == noRegister {
2842 continue
2843 }
2844 if !first {
2845 fmt.Printf(",")
2846 }
2847 fmt.Print(&s.registers[r])
2848 first = false
2849 }
2850 fmt.Printf("]")
2851 }
2852 }
2853 if avoid := s.desired[b.ID].avoid; avoid != 0 {
2854 fmt.Printf(" avoid=%v", s.RegMaskString(avoid))
2855 }
2856 fmt.Println()
2857 }
2858 }
2859 }
2860
2861
2862 type desiredState struct {
2863
2864
2865 entries []desiredStateEntry
2866
2867
2868
2869
2870 avoid regMask
2871 }
2872 type desiredStateEntry struct {
2873
2874 ID ID
2875
2876
2877
2878
2879 regs [4]register
2880 }
2881
2882 func (d *desiredState) clear() {
2883 d.entries = d.entries[:0]
2884 d.avoid = 0
2885 }
2886
2887
2888 func (d *desiredState) get(vid ID) [4]register {
2889 for _, e := range d.entries {
2890 if e.ID == vid {
2891 return e.regs
2892 }
2893 }
2894 return [4]register{noRegister, noRegister, noRegister, noRegister}
2895 }
2896
2897
2898 func (d *desiredState) add(vid ID, r register) {
2899 d.avoid |= regMask(1) << r
2900 for i := range d.entries {
2901 e := &d.entries[i]
2902 if e.ID != vid {
2903 continue
2904 }
2905 if e.regs[0] == r {
2906
2907 return
2908 }
2909 for j := 1; j < len(e.regs); j++ {
2910 if e.regs[j] == r {
2911
2912 copy(e.regs[1:], e.regs[:j])
2913 e.regs[0] = r
2914 return
2915 }
2916 }
2917 copy(e.regs[1:], e.regs[:])
2918 e.regs[0] = r
2919 return
2920 }
2921 d.entries = append(d.entries, desiredStateEntry{vid, [4]register{r, noRegister, noRegister, noRegister}})
2922 }
2923
2924 func (d *desiredState) addList(vid ID, regs [4]register) {
2925
2926 for i := len(regs) - 1; i >= 0; i-- {
2927 r := regs[i]
2928 if r != noRegister {
2929 d.add(vid, r)
2930 }
2931 }
2932 }
2933
2934
2935 func (d *desiredState) clobber(m regMask) {
2936 for i := 0; i < len(d.entries); {
2937 e := &d.entries[i]
2938 j := 0
2939 for _, r := range e.regs {
2940 if r != noRegister && m>>r&1 == 0 {
2941 e.regs[j] = r
2942 j++
2943 }
2944 }
2945 if j == 0 {
2946
2947 d.entries[i] = d.entries[len(d.entries)-1]
2948 d.entries = d.entries[:len(d.entries)-1]
2949 continue
2950 }
2951 for ; j < len(e.regs); j++ {
2952 e.regs[j] = noRegister
2953 }
2954 i++
2955 }
2956 d.avoid &^= m
2957 }
2958
2959
2960 func (d *desiredState) copy(x *desiredState) {
2961 d.entries = append(d.entries[:0], x.entries...)
2962 d.avoid = x.avoid
2963 }
2964
2965
2966 func (d *desiredState) remove(vid ID) [4]register {
2967 for i := range d.entries {
2968 if d.entries[i].ID == vid {
2969 regs := d.entries[i].regs
2970 d.entries[i] = d.entries[len(d.entries)-1]
2971 d.entries = d.entries[:len(d.entries)-1]
2972 return regs
2973 }
2974 }
2975 return [4]register{noRegister, noRegister, noRegister, noRegister}
2976 }
2977
2978
2979 func (d *desiredState) merge(x *desiredState) {
2980 d.avoid |= x.avoid
2981
2982
2983 for _, e := range x.entries {
2984 d.addList(e.ID, e.regs)
2985 }
2986 }
2987
View as plain text