Source file src/math/big/internal/asmgen/shift.go
1 // Copyright 2025 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package asmgen 6 7 // shiftVU generates lshVU and rshVU, which do 8 // z, c = x << s and z, c = x >> s, for 0 < s < _W. 9 func shiftVU(a *Asm, name string) { 10 // Because these routines can be called for z.Lsh(z, N) and z.Rsh(z, N), 11 // the input and output slices may be aliased at different offsets. 12 // For example (on 64-bit systems), during z.Lsh(z, 65), &z[0] == &x[1], 13 // and during z.Rsh(z, 65), &z[1] == &x[0]. 14 // For left shift, we must process the slices from len(z)-1 down to 0, 15 // so that we don't overwrite a word before we need to read it. 16 // For right shift, we must process the slices from 0 up to len(z)-1. 17 // The different traversals at least make the two cases more consistent, 18 // since we're always delaying the output by one word compared 19 // to the input. 20 21 f := a.Func("func " + name + "(z, x []Word, s uint) (c Word)") 22 23 // Check for no input early, since we need to start by reading 1 word. 24 n := f.Arg("z_len") 25 a.JmpZero(n, "ret0") 26 27 // Start loop by reading first input word. 28 s := f.ArgHint("s", HintShiftCount) 29 p := f.Pipe() 30 if name == "lshVU" { 31 p.SetBackward() 32 } 33 unroll := []int{1, 4} 34 if a.Arch == Arch386 { 35 unroll = []int{1} // too few registers for more 36 p.SetUseIndexCounter() 37 } 38 p.LoadPtrs(n) 39 a.Comment("shift first word into carry") 40 prev := p.LoadN(1)[0][0] 41 42 // Decide how to shift. On systems with a wide shift (x86), use that. 43 // Otherwise, we need shift by s and negative (reverse) shift by 64-s or 32-s. 44 shift := a.Lsh 45 shiftWide := a.LshWide 46 negShift := a.Rsh 47 negShiftReg := a.RshReg 48 if name == "rshVU" { 49 shift = a.Rsh 50 shiftWide = a.RshWide 51 negShift = a.Lsh 52 negShiftReg = a.LshReg 53 } 54 if a.Arch.HasShiftWide() { 55 // Use wide shift to avoid needing negative shifts. 56 // The invariant is that prev holds the previous word (not shifted at all), 57 // to be used as input into the wide shift. 58 // After the loop finishes, prev holds the final output word to be written. 59 c := a.Reg() 60 shiftWide(s, prev, a.Imm(0), c) 61 f.StoreArg(c, "c") 62 a.Free(c) 63 a.Comment("shift remaining words") 64 p.Start(n, unroll...) 65 p.Loop(func(in [][]Reg, out [][]Reg) { 66 // We reuse the input registers as output, delayed one cycle; prev is the first output. 67 // After writing the outputs to memory, we can copy the final x value into prev 68 // for the next iteration. 69 old := prev 70 for i, x := range in[0] { 71 shiftWide(s, x, old, old) 72 out[0][i] = old 73 old = x 74 } 75 p.StoreN(out) 76 a.Mov(old, prev) 77 }) 78 a.Comment("store final shifted bits") 79 shift(s, prev, prev) 80 } else { 81 // Construct values from x << s and x >> (64-s). 82 // After the first word has been processed, the invariant is that 83 // prev holds x << s, to be used as the high bits of the next output word, 84 // once we find the low bits after reading the next input word. 85 // After the loop finishes, prev holds the final output word to be written. 86 sNeg := a.Reg() 87 a.Mov(a.Imm(a.Arch.WordBits), sNeg) 88 a.Sub(s, sNeg, sNeg, SmashCarry) 89 c := a.Reg() 90 negShift(sNeg, prev, c) 91 shift(s, prev, prev) 92 f.StoreArg(c, "c") 93 a.Free(c) 94 a.Comment("shift remaining words") 95 p.Start(n, unroll...) 96 p.Loop(func(in, out [][]Reg) { 97 if a.HasRegShift() { 98 // ARM (32-bit) allows shifts in most arithmetic expressions, 99 // including OR, letting us combine the negShift and a.Or. 100 // The simplest way to manage the registers is to do StoreN for 101 // one output at a time, and since we don't use multi-register 102 // stores on ARM, that doesn't hurt us. 103 out[0] = out[0][:1] 104 for _, x := range in[0] { 105 a.Or(negShiftReg(sNeg, x), prev, prev) 106 out[0][0] = prev 107 p.StoreN(out) 108 shift(s, x, prev) 109 } 110 return 111 } 112 // We reuse the input registers as output, delayed one cycle; z0 is the first output. 113 z0 := a.Reg() 114 z := z0 115 for i, x := range in[0] { 116 negShift(sNeg, x, z) 117 a.Or(prev, z, z) 118 shift(s, x, prev) 119 out[0][i] = z 120 z = x 121 } 122 p.StoreN(out) 123 }) 124 a.Comment("store final shifted bits") 125 } 126 p.StoreN([][]Reg{{prev}}) 127 p.Done() 128 a.Free(s) 129 a.Ret() 130 131 // Return 0, used from above. 132 a.Label("ret0") 133 f.StoreArg(a.Imm(0), "c") 134 a.Ret() 135 } 136