arith.go

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // This file provides Go implementations of elementary multi-precision
     6  // arithmetic operations on word vectors. These have the suffix _g.
     7  // These are needed for platforms without assembly implementations of these routines.
     8  // This file also contains elementary operations that can be implemented
     9  // sufficiently efficiently in Go.
    10  
    11  package big
    12  
    13  import (
    14  	"math/bits"
    15  	_ "unsafe" // for go:linkname
    16  )
    17  
    18  // A Word represents a single digit of a multi-precision unsigned integer.
    19  type Word uint
    20  
    21  const (
    22  	_S = _W / 8 // word size in bytes
    23  
    24  	_W = bits.UintSize // word size in bits
    25  	_B = 1 << _W       // digit base
    26  	_M = _B - 1        // digit mask
    27  )
    28  
    29  // In these routines, it is the caller's responsibility to arrange for
    30  // x, y, and z to all have the same length. We check this and panic.
    31  // The assembly versions of these routines do not include that check.
    32  //
    33  // The check+panic also has the effect of teaching the compiler that
    34  // “i in range for z” implies “i in range for x and y”, eliminating all
    35  // bounds checks in loops from 0 to len(z) and vice versa.
    36  
    37  // ----------------------------------------------------------------------------
    38  // Elementary operations on words
    39  //
    40  // These operations are used by the vector operations below.
    41  
    42  // z1<<_W + z0 = x*y
    43  func mulWW(x, y Word) (z1, z0 Word) {
    44  	hi, lo := bits.Mul(uint(x), uint(y))
    45  	return Word(hi), Word(lo)
    46  }
    47  
    48  // z1<<_W + z0 = x*y + c
    49  func mulAddWWW_g(x, y, c Word) (z1, z0 Word) {
    50  	hi, lo := bits.Mul(uint(x), uint(y))
    51  	var cc uint
    52  	lo, cc = bits.Add(lo, uint(c), 0)
    53  	return Word(hi + cc), Word(lo)
    54  }
    55  
    56  // nlz returns the number of leading zeros in x.
    57  // Wraps bits.LeadingZeros call for convenience.
    58  func nlz(x Word) uint {
    59  	return uint(bits.LeadingZeros(uint(x)))
    60  }
    61  
    62  // The resulting carry c is either 0 or 1.
    63  func addVV_g(z, x, y []Word) (c Word) {
    64  	if len(x) != len(z) || len(y) != len(z) {
    65  		panic("addVV len")
    66  	}
    67  
    68  	for i := range z {
    69  		zi, cc := bits.Add(uint(x[i]), uint(y[i]), uint(c))
    70  		z[i] = Word(zi)
    71  		c = Word(cc)
    72  	}
    73  	return
    74  }
    75  
    76  // The resulting carry c is either 0 or 1.
    77  func subVV_g(z, x, y []Word) (c Word) {
    78  	if len(x) != len(z) || len(y) != len(z) {
    79  		panic("subVV len")
    80  	}
    81  
    82  	for i := range z {
    83  		zi, cc := bits.Sub(uint(x[i]), uint(y[i]), uint(c))
    84  		z[i] = Word(zi)
    85  		c = Word(cc)
    86  	}
    87  	return
    88  }
    89  
    90  // addVW sets z = x + y, returning the final carry c.
    91  // The behavior is undefined if len(x) != len(z).
    92  // If len(z) == 0, c = y; otherwise, c is 0 or 1.
    93  //
    94  // addVW should be an internal detail,
    95  // but widely used packages access it using linkname.
    96  // Notable members of the hall of shame include:
    97  //   - github.com/remyoudompheng/bigfft
    98  //
    99  // Do not remove or change the type signature.
   100  // See go.dev/issue/67401.
   101  //
   102  //go:linkname addVW
   103  func addVW(z, x []Word, y Word) (c Word) {
   104  	if len(x) != len(z) {
   105  		panic("addVW len")
   106  	}
   107  
   108  	if len(z) == 0 {
   109  		return y
   110  	}
   111  	zi, cc := bits.Add(uint(x[0]), uint(y), 0)
   112  	z[0] = Word(zi)
   113  	if cc == 0 {
   114  		if &z[0] != &x[0] {
   115  			copy(z[1:], x[1:])
   116  		}
   117  		return 0
   118  	}
   119  	for i := 1; i < len(z); i++ {
   120  		xi := x[i]
   121  		if xi != ^Word(0) {
   122  			z[i] = xi + 1
   123  			if &z[0] != &x[0] {
   124  				copy(z[i+1:], x[i+1:])
   125  			}
   126  			return 0
   127  		}
   128  		z[i] = 0
   129  	}
   130  	return 1
   131  }
   132  
   133  // addVW_ref is the reference implementation for addVW, used only for testing.
   134  func addVW_ref(z, x []Word, y Word) (c Word) {
   135  	c = y
   136  	for i := range z {
   137  		zi, cc := bits.Add(uint(x[i]), uint(c), 0)
   138  		z[i] = Word(zi)
   139  		c = Word(cc)
   140  	}
   141  	return
   142  }
   143  
   144  // subVW sets z = x - y, returning the final carry c.
   145  // The behavior is undefined if len(x) != len(z).
   146  // If len(z) == 0, c = y; otherwise, c is 0 or 1.
   147  //
   148  // subVW should be an internal detail,
   149  // but widely used packages access it using linkname.
   150  // Notable members of the hall of shame include:
   151  //   - github.com/remyoudompheng/bigfft
   152  //
   153  // Do not remove or change the type signature.
   154  // See go.dev/issue/67401.
   155  //
   156  //go:linkname subVW
   157  func subVW(z, x []Word, y Word) (c Word) {
   158  	if len(x) != len(z) {
   159  		panic("subVW len")
   160  	}
   161  
   162  	if len(z) == 0 {
   163  		return y
   164  	}
   165  	zi, cc := bits.Sub(uint(x[0]), uint(y), 0)
   166  	z[0] = Word(zi)
   167  	if cc == 0 {
   168  		if &z[0] != &x[0] {
   169  			copy(z[1:], x[1:])
   170  		}
   171  		return 0
   172  	}
   173  	for i := 1; i < len(z); i++ {
   174  		xi := x[i]
   175  		if xi != 0 {
   176  			z[i] = xi - 1
   177  			if &z[0] != &x[0] {
   178  				copy(z[i+1:], x[i+1:])
   179  			}
   180  			return 0
   181  		}
   182  		z[i] = ^Word(0)
   183  	}
   184  	return 1
   185  }
   186  
   187  // subVW_ref is the reference implementation for subVW, used only for testing.
   188  func subVW_ref(z, x []Word, y Word) (c Word) {
   189  	c = y
   190  	for i := range z {
   191  		zi, cc := bits.Sub(uint(x[i]), uint(c), 0)
   192  		z[i] = Word(zi)
   193  		c = Word(cc)
   194  	}
   195  	return c
   196  }
   197  
   198  func lshVU_g(z, x []Word, s uint) (c Word) {
   199  	if len(x) != len(z) {
   200  		panic("lshVU len")
   201  	}
   202  
   203  	if s == 0 {
   204  		copy(z, x)
   205  		return
   206  	}
   207  	if len(z) == 0 {
   208  		return
   209  	}
   210  	s &= _W - 1 // hint to the compiler that shifts by s don't need guard code
   211  	ŝ := _W - s
   212  	ŝ &= _W - 1 // ditto
   213  	c = x[len(z)-1] >> ŝ
   214  	for i := len(z) - 1; i > 0; i-- {
   215  		z[i] = x[i]<<s | x[i-1]>>ŝ
   216  	}
   217  	z[0] = x[0] << s
   218  	return
   219  }
   220  
   221  func rshVU_g(z, x []Word, s uint) (c Word) {
   222  	if len(x) != len(z) {
   223  		panic("rshVU len")
   224  	}
   225  
   226  	if s == 0 {
   227  		copy(z, x)
   228  		return
   229  	}
   230  	if len(z) == 0 {
   231  		return
   232  	}
   233  	s &= _W - 1 // hint to the compiler that shifts by s don't need guard code
   234  	ŝ := _W - s
   235  	ŝ &= _W - 1 // ditto
   236  	c = x[0] << ŝ
   237  	for i := 1; i < len(z); i++ {
   238  		z[i-1] = x[i-1]>>s | x[i]<<ŝ
   239  	}
   240  	z[len(z)-1] = x[len(z)-1] >> s
   241  	return
   242  }
   243  
   244  func mulAddVWW_g(z, x []Word, y, r Word) (c Word) {
   245  	if len(x) != len(z) {
   246  		panic("mulAddVWW len")
   247  	}
   248  	c = r
   249  	for i := range z {
   250  		c, z[i] = mulAddWWW_g(x[i], y, c)
   251  	}
   252  	return
   253  }
   254  
   255  func addMulVVWW_g(z, x, y []Word, m, a Word) (c Word) {
   256  	if len(x) != len(z) || len(y) != len(z) {
   257  		panic("rshVU len")
   258  	}
   259  
   260  	c = a
   261  	for i := range z {
   262  		z1, z0 := mulAddWWW_g(y[i], m, x[i])
   263  		lo, cc := bits.Add(uint(z0), uint(c), 0)
   264  		c, z[i] = Word(cc), Word(lo)
   265  		c += z1
   266  	}
   267  	return
   268  }
   269  
   270  // q = ( x1 << _W + x0 - r)/y. m = floor(( _B^2 - 1 ) / d - _B). Requiring x1<y.
   271  // An approximate reciprocal with a reference to "Improved Division by Invariant Integers
   272  // (IEEE Transactions on Computers, 11 Jun. 2010)"
   273  func divWW(x1, x0, y, m Word) (q, r Word) {
   274  	s := nlz(y)
   275  	if s != 0 {
   276  		x1 = x1<<s | x0>>(_W-s)
   277  		x0 <<= s
   278  		y <<= s
   279  	}
   280  	d := uint(y)
   281  	// We know that
   282  	//   m = ⎣(B^2-1)/d⎦-B
   283  	//   ⎣(B^2-1)/d⎦ = m+B
   284  	//   (B^2-1)/d = m+B+delta1    0 <= delta1 <= (d-1)/d
   285  	//   B^2/d = m+B+delta2        0 <= delta2 <= 1
   286  	// The quotient we're trying to compute is
   287  	//   quotient = ⎣(x1*B+x0)/d⎦
   288  	//            = ⎣(x1*B*(B^2/d)+x0*(B^2/d))/B^2⎦
   289  	//            = ⎣(x1*B*(m+B+delta2)+x0*(m+B+delta2))/B^2⎦
   290  	//            = ⎣(x1*m+x1*B+x0)/B + x0*m/B^2 + delta2*(x1*B+x0)/B^2⎦
   291  	// The latter two terms of this three-term sum are between 0 and 1.
   292  	// So we can compute just the first term, and we will be low by at most 2.
   293  	t1, t0 := bits.Mul(uint(m), uint(x1))
   294  	_, c := bits.Add(t0, uint(x0), 0)
   295  	t1, _ = bits.Add(t1, uint(x1), c)
   296  	// The quotient is either t1, t1+1, or t1+2.
   297  	// We'll try t1 and adjust if needed.
   298  	qq := t1
   299  	// compute remainder r=x-d*q.
   300  	dq1, dq0 := bits.Mul(d, qq)
   301  	r0, b := bits.Sub(uint(x0), dq0, 0)
   302  	r1, _ := bits.Sub(uint(x1), dq1, b)
   303  	// The remainder we just computed is bounded above by B+d:
   304  	// r = x1*B + x0 - d*q.
   305  	//   = x1*B + x0 - d*⎣(x1*m+x1*B+x0)/B⎦
   306  	//   = x1*B + x0 - d*((x1*m+x1*B+x0)/B-alpha)                                   0 <= alpha < 1
   307  	//   = x1*B + x0 - x1*d/B*m                         - x1*d - x0*d/B + d*alpha
   308  	//   = x1*B + x0 - x1*d/B*⎣(B^2-1)/d-B⎦             - x1*d - x0*d/B + d*alpha
   309  	//   = x1*B + x0 - x1*d/B*⎣(B^2-1)/d-B⎦             - x1*d - x0*d/B + d*alpha
   310  	//   = x1*B + x0 - x1*d/B*((B^2-1)/d-B-beta)        - x1*d - x0*d/B + d*alpha   0 <= beta < 1
   311  	//   = x1*B + x0 - x1*B + x1/B + x1*d + x1*d/B*beta - x1*d - x0*d/B + d*alpha
   312  	//   =        x0        + x1/B        + x1*d/B*beta        - x0*d/B + d*alpha
   313  	//   = x0*(1-d/B) + x1*(1+d*beta)/B + d*alpha
   314  	//   <  B*(1-d/B) +  d*B/B          + d          because x0<B (and 1-d/B>0), x1<d, 1+d*beta<=B, alpha<1
   315  	//   =  B - d     +  d              + d
   316  	//   = B+d
   317  	// So r1 can only be 0 or 1. If r1 is 1, then we know q was too small.
   318  	// Add 1 to q and subtract d from r. That guarantees that r is <B, so
   319  	// we no longer need to keep track of r1.
   320  	if r1 != 0 {
   321  		qq++
   322  		r0 -= d
   323  	}
   324  	// If the remainder is still too large, increment q one more time.
   325  	if r0 >= d {
   326  		qq++
   327  		r0 -= d
   328  	}
   329  	return Word(qq), Word(r0 >> s)
   330  }
   331  
   332  // reciprocalWord return the reciprocal of the divisor. rec = floor(( _B^2 - 1 ) / u - _B). u = d1 << nlz(d1).
   333  func reciprocalWord(d1 Word) Word {
   334  	u := uint(d1 << nlz(d1))
   335  	x1 := ^u
   336  	x0 := uint(_M)
   337  	rec, _ := bits.Div(x1, x0, u) // (_B^2-1)/U-_B = (_B*(_M-C)+_M)/U
   338  	return Word(rec)
   339  }
   340
View as plain text