atof.go

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package strconv
     6  
     7  // decimal to binary floating point conversion.
     8  // Algorithm:
     9  //   1) Store input in multiprecision decimal.
    10  //   2) Multiply/divide decimal by powers of two until in range [0.5, 1)
    11  //   3) Multiply by 2^precision and round to get mantissa.
    12  
    13  import "math"
    14  
    15  var optimize = true // set to false to force slow-path conversions for testing
    16  
    17  // commonPrefixLenIgnoreCase returns the length of the common
    18  // prefix of s and prefix, with the character case of s ignored.
    19  // The prefix argument must be all lower-case.
    20  func commonPrefixLenIgnoreCase(s, prefix string) int {
    21  	n := min(len(prefix), len(s))
    22  	for i := 0; i < n; i++ {
    23  		c := s[i]
    24  		if 'A' <= c && c <= 'Z' {
    25  			c += 'a' - 'A'
    26  		}
    27  		if c != prefix[i] {
    28  			return i
    29  		}
    30  	}
    31  	return n
    32  }
    33  
    34  // special returns the floating-point value for the special,
    35  // possibly signed floating-point representations inf, infinity,
    36  // and NaN. The result is ok if a prefix of s contains one
    37  // of these representations and n is the length of that prefix.
    38  // The character case is ignored.
    39  func special(s string) (f float64, n int, ok bool) {
    40  	if len(s) == 0 {
    41  		return 0, 0, false
    42  	}
    43  	sign := 1
    44  	nsign := 0
    45  	switch s[0] {
    46  	case '+', '-':
    47  		if s[0] == '-' {
    48  			sign = -1
    49  		}
    50  		nsign = 1
    51  		s = s[1:]
    52  		fallthrough
    53  	case 'i', 'I':
    54  		n := commonPrefixLenIgnoreCase(s, "infinity")
    55  		// Anything longer than "inf" is ok, but if we
    56  		// don't have "infinity", only consume "inf".
    57  		if 3 < n && n < 8 {
    58  			n = 3
    59  		}
    60  		if n == 3 || n == 8 {
    61  			return math.Inf(sign), nsign + n, true
    62  		}
    63  	case 'n', 'N':
    64  		if commonPrefixLenIgnoreCase(s, "nan") == 3 {
    65  			return math.NaN(), 3, true
    66  		}
    67  	}
    68  	return 0, 0, false
    69  }
    70  
    71  func (b *decimal) set(s string) (ok bool) {
    72  	i := 0
    73  	b.neg = false
    74  	b.trunc = false
    75  
    76  	// optional sign
    77  	if i >= len(s) {
    78  		return
    79  	}
    80  	switch s[i] {
    81  	case '+':
    82  		i++
    83  	case '-':
    84  		i++
    85  		b.neg = true
    86  	}
    87  
    88  	// digits
    89  	sawdot := false
    90  	sawdigits := false
    91  	for ; i < len(s); i++ {
    92  		switch {
    93  		case s[i] == '_':
    94  			// readFloat already checked underscores
    95  			continue
    96  		case s[i] == '.':
    97  			if sawdot {
    98  				return
    99  			}
   100  			sawdot = true
   101  			b.dp = b.nd
   102  			continue
   103  
   104  		case '0' <= s[i] && s[i] <= '9':
   105  			sawdigits = true
   106  			if s[i] == '0' && b.nd == 0 { // ignore leading zeros
   107  				b.dp--
   108  				continue
   109  			}
   110  			if b.nd < len(b.d) {
   111  				b.d[b.nd] = s[i]
   112  				b.nd++
   113  			} else if s[i] != '0' {
   114  				b.trunc = true
   115  			}
   116  			continue
   117  		}
   118  		break
   119  	}
   120  	if !sawdigits {
   121  		return
   122  	}
   123  	if !sawdot {
   124  		b.dp = b.nd
   125  	}
   126  
   127  	// optional exponent moves decimal point.
   128  	// if we read a very large, very long number,
   129  	// just be sure to move the decimal point by
   130  	// a lot (say, 100000).  it doesn't matter if it's
   131  	// not the exact number.
   132  	if i < len(s) && lower(s[i]) == 'e' {
   133  		i++
   134  		if i >= len(s) {
   135  			return
   136  		}
   137  		esign := 1
   138  		switch s[i] {
   139  		case '+':
   140  			i++
   141  		case '-':
   142  			i++
   143  			esign = -1
   144  		}
   145  		if i >= len(s) || s[i] < '0' || s[i] > '9' {
   146  			return
   147  		}
   148  		e := 0
   149  		for ; i < len(s) && ('0' <= s[i] && s[i] <= '9' || s[i] == '_'); i++ {
   150  			if s[i] == '_' {
   151  				// readFloat already checked underscores
   152  				continue
   153  			}
   154  			if e < 10000 {
   155  				e = e*10 + int(s[i]) - '0'
   156  			}
   157  		}
   158  		b.dp += e * esign
   159  	}
   160  
   161  	if i != len(s) {
   162  		return
   163  	}
   164  
   165  	ok = true
   166  	return
   167  }
   168  
   169  // readFloat reads a decimal or hexadecimal mantissa and exponent from a float
   170  // string representation in s; the number may be followed by other characters.
   171  // readFloat reports the number of bytes consumed (i), and whether the number
   172  // is valid (ok).
   173  func readFloat(s string) (mantissa uint64, exp int, neg, trunc, hex bool, i int, ok bool) {
   174  	underscores := false
   175  
   176  	// optional sign
   177  	if i >= len(s) {
   178  		return
   179  	}
   180  	switch s[i] {
   181  	case '+':
   182  		i++
   183  	case '-':
   184  		i++
   185  		neg = true
   186  	}
   187  
   188  	// digits
   189  	base := uint64(10)
   190  	maxMantDigits := 19 // 10^19 fits in uint64
   191  	expChar := byte('e')
   192  	if i+2 < len(s) && s[i] == '0' && lower(s[i+1]) == 'x' {
   193  		base = 16
   194  		maxMantDigits = 16 // 16^16 fits in uint64
   195  		i += 2
   196  		expChar = 'p'
   197  		hex = true
   198  	}
   199  	sawdot := false
   200  	sawdigits := false
   201  	nd := 0
   202  	ndMant := 0
   203  	dp := 0
   204  loop:
   205  	for ; i < len(s); i++ {
   206  		switch c := s[i]; true {
   207  		case c == '_':
   208  			underscores = true
   209  			continue
   210  
   211  		case c == '.':
   212  			if sawdot {
   213  				break loop
   214  			}
   215  			sawdot = true
   216  			dp = nd
   217  			continue
   218  
   219  		case '0' <= c && c <= '9':
   220  			sawdigits = true
   221  			if c == '0' && nd == 0 { // ignore leading zeros
   222  				dp--
   223  				continue
   224  			}
   225  			nd++
   226  			if ndMant < maxMantDigits {
   227  				mantissa *= base
   228  				mantissa += uint64(c - '0')
   229  				ndMant++
   230  			} else if c != '0' {
   231  				trunc = true
   232  			}
   233  			continue
   234  
   235  		case base == 16 && 'a' <= lower(c) && lower(c) <= 'f':
   236  			sawdigits = true
   237  			nd++
   238  			if ndMant < maxMantDigits {
   239  				mantissa *= 16
   240  				mantissa += uint64(lower(c) - 'a' + 10)
   241  				ndMant++
   242  			} else {
   243  				trunc = true
   244  			}
   245  			continue
   246  		}
   247  		break
   248  	}
   249  	if !sawdigits {
   250  		return
   251  	}
   252  	if !sawdot {
   253  		dp = nd
   254  	}
   255  
   256  	if base == 16 {
   257  		dp *= 4
   258  		ndMant *= 4
   259  	}
   260  
   261  	// optional exponent moves decimal point.
   262  	// if we read a very large, very long number,
   263  	// just be sure to move the decimal point by
   264  	// a lot (say, 100000).  it doesn't matter if it's
   265  	// not the exact number.
   266  	if i < len(s) && lower(s[i]) == expChar {
   267  		i++
   268  		if i >= len(s) {
   269  			return
   270  		}
   271  		esign := 1
   272  		switch s[i] {
   273  		case '+':
   274  			i++
   275  		case '-':
   276  			i++
   277  			esign = -1
   278  		}
   279  		if i >= len(s) || s[i] < '0' || s[i] > '9' {
   280  			return
   281  		}
   282  		e := 0
   283  		for ; i < len(s) && ('0' <= s[i] && s[i] <= '9' || s[i] == '_'); i++ {
   284  			if s[i] == '_' {
   285  				underscores = true
   286  				continue
   287  			}
   288  			if e < 10000 {
   289  				e = e*10 + int(s[i]) - '0'
   290  			}
   291  		}
   292  		dp += e * esign
   293  	} else if base == 16 {
   294  		// Must have exponent.
   295  		return
   296  	}
   297  
   298  	if mantissa != 0 {
   299  		exp = dp - ndMant
   300  	}
   301  
   302  	if underscores && !underscoreOK(s[:i]) {
   303  		return
   304  	}
   305  
   306  	ok = true
   307  	return
   308  }
   309  
   310  // decimal power of ten to binary power of two.
   311  var powtab = []int{1, 3, 6, 9, 13, 16, 19, 23, 26}
   312  
   313  func (d *decimal) floatBits(flt *floatInfo) (b uint64, overflow bool) {
   314  	var exp int
   315  	var mant uint64
   316  
   317  	// Zero is always a special case.
   318  	if d.nd == 0 {
   319  		mant = 0
   320  		exp = flt.bias
   321  		goto out
   322  	}
   323  
   324  	// Obvious overflow/underflow.
   325  	// These bounds are for 64-bit floats.
   326  	// Will have to change if we want to support 80-bit floats in the future.
   327  	if d.dp > 310 {
   328  		goto overflow
   329  	}
   330  	if d.dp < -330 {
   331  		// zero
   332  		mant = 0
   333  		exp = flt.bias
   334  		goto out
   335  	}
   336  
   337  	// Scale by powers of two until in range [0.5, 1.0)
   338  	exp = 0
   339  	for d.dp > 0 {
   340  		var n int
   341  		if d.dp >= len(powtab) {
   342  			n = 27
   343  		} else {
   344  			n = powtab[d.dp]
   345  		}
   346  		d.Shift(-n)
   347  		exp += n
   348  	}
   349  	for d.dp < 0 || d.dp == 0 && d.d[0] < '5' {
   350  		var n int
   351  		if -d.dp >= len(powtab) {
   352  			n = 27
   353  		} else {
   354  			n = powtab[-d.dp]
   355  		}
   356  		d.Shift(n)
   357  		exp -= n
   358  	}
   359  
   360  	// Our range is [0.5,1) but floating point range is [1,2).
   361  	exp--
   362  
   363  	// Minimum representable exponent is flt.bias+1.
   364  	// If the exponent is smaller, move it up and
   365  	// adjust d accordingly.
   366  	if exp < flt.bias+1 {
   367  		n := flt.bias + 1 - exp
   368  		d.Shift(-n)
   369  		exp += n
   370  	}
   371  
   372  	if exp-flt.bias >= 1<<flt.expbits-1 {
   373  		goto overflow
   374  	}
   375  
   376  	// Extract 1+flt.mantbits bits.
   377  	d.Shift(int(1 + flt.mantbits))
   378  	mant = d.RoundedInteger()
   379  
   380  	// Rounding might have added a bit; shift down.
   381  	if mant == 2<<flt.mantbits {
   382  		mant >>= 1
   383  		exp++
   384  		if exp-flt.bias >= 1<<flt.expbits-1 {
   385  			goto overflow
   386  		}
   387  	}
   388  
   389  	// Denormalized?
   390  	if mant&(1<<flt.mantbits) == 0 {
   391  		exp = flt.bias
   392  	}
   393  	goto out
   394  
   395  overflow:
   396  	// ±Inf
   397  	mant = 0
   398  	exp = 1<<flt.expbits - 1 + flt.bias
   399  	overflow = true
   400  
   401  out:
   402  	// Assemble bits.
   403  	bits := mant & (uint64(1)<<flt.mantbits - 1)
   404  	bits |= uint64((exp-flt.bias)&(1<<flt.expbits-1)) << flt.mantbits
   405  	if d.neg {
   406  		bits |= 1 << flt.mantbits << flt.expbits
   407  	}
   408  	return bits, overflow
   409  }
   410  
   411  // Exact powers of 10.
   412  var float64pow10 = []float64{
   413  	1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9,
   414  	1e10, 1e11, 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19,
   415  	1e20, 1e21, 1e22,
   416  }
   417  var float32pow10 = []float32{1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 1e10}
   418  
   419  // If possible to convert decimal representation to 64-bit float f exactly,
   420  // entirely in floating-point math, do so, avoiding the expense of decimalToFloatBits.
   421  // Three common cases:
   422  //
   423  //	value is exact integer
   424  //	value is exact integer * exact power of ten
   425  //	value is exact integer / exact power of ten
   426  //
   427  // These all produce potentially inexact but correctly rounded answers.
   428  func atof64exact(mantissa uint64, exp int, neg bool) (f float64, ok bool) {
   429  	if mantissa>>float64info.mantbits != 0 {
   430  		return
   431  	}
   432  	f = float64(mantissa)
   433  	if neg {
   434  		f = -f
   435  	}
   436  	switch {
   437  	case exp == 0:
   438  		// an integer.
   439  		return f, true
   440  	// Exact integers are <= 10^15.
   441  	// Exact powers of ten are <= 10^22.
   442  	case exp > 0 && exp <= 15+22: // int * 10^k
   443  		// If exponent is big but number of digits is not,
   444  		// can move a few zeros into the integer part.
   445  		if exp > 22 {
   446  			f *= float64pow10[exp-22]
   447  			exp = 22
   448  		}
   449  		if f > 1e15 || f < -1e15 {
   450  			// the exponent was really too large.
   451  			return
   452  		}
   453  		return f * float64pow10[exp], true
   454  	case exp < 0 && exp >= -22: // int / 10^k
   455  		return f / float64pow10[-exp], true
   456  	}
   457  	return
   458  }
   459  
   460  // If possible to compute mantissa*10^exp to 32-bit float f exactly,
   461  // entirely in floating-point math, do so, avoiding the machinery above.
   462  func atof32exact(mantissa uint64, exp int, neg bool) (f float32, ok bool) {
   463  	if mantissa>>float32info.mantbits != 0 {
   464  		return
   465  	}
   466  	f = float32(mantissa)
   467  	if neg {
   468  		f = -f
   469  	}
   470  	switch {
   471  	case exp == 0:
   472  		return f, true
   473  	// Exact integers are <= 10^7.
   474  	// Exact powers of ten are <= 10^10.
   475  	case exp > 0 && exp <= 7+10: // int * 10^k
   476  		// If exponent is big but number of digits is not,
   477  		// can move a few zeros into the integer part.
   478  		if exp > 10 {
   479  			f *= float32pow10[exp-10]
   480  			exp = 10
   481  		}
   482  		if f > 1e7 || f < -1e7 {
   483  			// the exponent was really too large.
   484  			return
   485  		}
   486  		return f * float32pow10[exp], true
   487  	case exp < 0 && exp >= -10: // int / 10^k
   488  		return f / float32pow10[-exp], true
   489  	}
   490  	return
   491  }
   492  
   493  // atofHex converts the hex floating-point string s
   494  // to a rounded float32 or float64 value (depending on flt==&float32info or flt==&float64info)
   495  // and returns it as a float64.
   496  // The string s has already been parsed into a mantissa, exponent, and sign (neg==true for negative).
   497  // If trunc is true, trailing non-zero bits have been omitted from the mantissa.
   498  func atofHex(s string, flt *floatInfo, mantissa uint64, exp int, neg, trunc bool) (float64, error) {
   499  	maxExp := 1<<flt.expbits + flt.bias - 2
   500  	minExp := flt.bias + 1
   501  	exp += int(flt.mantbits) // mantissa now implicitly divided by 2^mantbits.
   502  
   503  	// Shift mantissa and exponent to bring representation into float range.
   504  	// Eventually we want a mantissa with a leading 1-bit followed by mantbits other bits.
   505  	// For rounding, we need two more, where the bottom bit represents
   506  	// whether that bit or any later bit was non-zero.
   507  	// (If the mantissa has already lost non-zero bits, trunc is true,
   508  	// and we OR in a 1 below after shifting left appropriately.)
   509  	for mantissa != 0 && mantissa>>(flt.mantbits+2) == 0 {
   510  		mantissa <<= 1
   511  		exp--
   512  	}
   513  	if trunc {
   514  		mantissa |= 1
   515  	}
   516  	for mantissa>>(1+flt.mantbits+2) != 0 {
   517  		mantissa = mantissa>>1 | mantissa&1
   518  		exp++
   519  	}
   520  
   521  	// If exponent is too negative,
   522  	// denormalize in hopes of making it representable.
   523  	// (The -2 is for the rounding bits.)
   524  	for mantissa > 1 && exp < minExp-2 {
   525  		mantissa = mantissa>>1 | mantissa&1
   526  		exp++
   527  	}
   528  
   529  	// Round using two bottom bits.
   530  	round := mantissa & 3
   531  	mantissa >>= 2
   532  	round |= mantissa & 1 // round to even (round up if mantissa is odd)
   533  	exp += 2
   534  	if round == 3 {
   535  		mantissa++
   536  		if mantissa == 1<<(1+flt.mantbits) {
   537  			mantissa >>= 1
   538  			exp++
   539  		}
   540  	}
   541  
   542  	if mantissa>>flt.mantbits == 0 { // Denormal or zero.
   543  		exp = flt.bias
   544  	}
   545  	var err error
   546  	if exp > maxExp { // infinity and range error
   547  		mantissa = 1 << flt.mantbits
   548  		exp = maxExp + 1
   549  		err = rangeError(fnParseFloat, s)
   550  	}
   551  
   552  	bits := mantissa & (1<<flt.mantbits - 1)
   553  	bits |= uint64((exp-flt.bias)&(1<<flt.expbits-1)) << flt.mantbits
   554  	if neg {
   555  		bits |= 1 << flt.mantbits << flt.expbits
   556  	}
   557  	if flt == &float32info {
   558  		return float64(math.Float32frombits(uint32(bits))), err
   559  	}
   560  	return math.Float64frombits(bits), err
   561  }
   562  
   563  const fnParseFloat = "ParseFloat"
   564  
   565  func atof32(s string) (f float32, n int, err error) {
   566  	if val, n, ok := special(s); ok {
   567  		return float32(val), n, nil
   568  	}
   569  
   570  	mantissa, exp, neg, trunc, hex, n, ok := readFloat(s)
   571  	if !ok {
   572  		return 0, n, syntaxError(fnParseFloat, s)
   573  	}
   574  
   575  	if hex {
   576  		f, err := atofHex(s[:n], &float32info, mantissa, exp, neg, trunc)
   577  		return float32(f), n, err
   578  	}
   579  
   580  	if optimize {
   581  		// Try pure floating-point arithmetic conversion, and if that fails,
   582  		// the Eisel-Lemire algorithm.
   583  		if !trunc {
   584  			if f, ok := atof32exact(mantissa, exp, neg); ok {
   585  				return f, n, nil
   586  			}
   587  		}
   588  		f, ok := eiselLemire32(mantissa, exp, neg)
   589  		if ok {
   590  			if !trunc {
   591  				return f, n, nil
   592  			}
   593  			// Even if the mantissa was truncated, we may
   594  			// have found the correct result. Confirm by
   595  			// converting the upper mantissa bound.
   596  			fUp, ok := eiselLemire32(mantissa+1, exp, neg)
   597  			if ok && f == fUp {
   598  				return f, n, nil
   599  			}
   600  		}
   601  	}
   602  
   603  	// Slow fallback.
   604  	var d decimal
   605  	if !d.set(s[:n]) {
   606  		return 0, n, syntaxError(fnParseFloat, s)
   607  	}
   608  	b, ovf := d.floatBits(&float32info)
   609  	f = math.Float32frombits(uint32(b))
   610  	if ovf {
   611  		err = rangeError(fnParseFloat, s)
   612  	}
   613  	return f, n, err
   614  }
   615  
   616  func atof64(s string) (f float64, n int, err error) {
   617  	if val, n, ok := special(s); ok {
   618  		return val, n, nil
   619  	}
   620  
   621  	mantissa, exp, neg, trunc, hex, n, ok := readFloat(s)
   622  	if !ok {
   623  		return 0, n, syntaxError(fnParseFloat, s)
   624  	}
   625  
   626  	if hex {
   627  		f, err := atofHex(s[:n], &float64info, mantissa, exp, neg, trunc)
   628  		return f, n, err
   629  	}
   630  
   631  	if optimize {
   632  		// Try pure floating-point arithmetic conversion, and if that fails,
   633  		// the Eisel-Lemire algorithm.
   634  		if !trunc {
   635  			if f, ok := atof64exact(mantissa, exp, neg); ok {
   636  				return f, n, nil
   637  			}
   638  		}
   639  		f, ok := eiselLemire64(mantissa, exp, neg)
   640  		if ok {
   641  			if !trunc {
   642  				return f, n, nil
   643  			}
   644  			// Even if the mantissa was truncated, we may
   645  			// have found the correct result. Confirm by
   646  			// converting the upper mantissa bound.
   647  			fUp, ok := eiselLemire64(mantissa+1, exp, neg)
   648  			if ok && f == fUp {
   649  				return f, n, nil
   650  			}
   651  		}
   652  	}
   653  
   654  	// Slow fallback.
   655  	var d decimal
   656  	if !d.set(s[:n]) {
   657  		return 0, n, syntaxError(fnParseFloat, s)
   658  	}
   659  	b, ovf := d.floatBits(&float64info)
   660  	f = math.Float64frombits(b)
   661  	if ovf {
   662  		err = rangeError(fnParseFloat, s)
   663  	}
   664  	return f, n, err
   665  }
   666  
   667  // ParseFloat converts the string s to a floating-point number
   668  // with the precision specified by bitSize: 32 for float32, or 64 for float64.
   669  // When bitSize=32, the result still has type float64, but it will be
   670  // convertible to float32 without changing its value.
   671  //
   672  // ParseFloat accepts decimal and hexadecimal floating-point numbers
   673  // as defined by the Go syntax for [floating-point literals].
   674  // If s is well-formed and near a valid floating-point number,
   675  // ParseFloat returns the nearest floating-point number rounded
   676  // using IEEE754 unbiased rounding.
   677  // (Parsing a hexadecimal floating-point value only rounds when
   678  // there are more bits in the hexadecimal representation than
   679  // will fit in the mantissa.)
   680  //
   681  // The errors that ParseFloat returns have concrete type *NumError
   682  // and include err.Num = s.
   683  //
   684  // If s is not syntactically well-formed, ParseFloat returns err.Err = ErrSyntax.
   685  //
   686  // If s is syntactically well-formed but is more than 1/2 ULP
   687  // away from the largest floating point number of the given size,
   688  // ParseFloat returns f = ±Inf, err.Err = ErrRange.
   689  //
   690  // ParseFloat recognizes the string "NaN", and the (possibly signed) strings "Inf" and "Infinity"
   691  // as their respective special floating point values. It ignores case when matching.
   692  //
   693  // [floating-point literals]: https://go.dev/ref/spec#Floating-point_literals
   694  func ParseFloat(s string, bitSize int) (float64, error) {
   695  	f, n, err := parseFloatPrefix(s, bitSize)
   696  	if n != len(s) && (err == nil || err.(*NumError).Err != ErrSyntax) {
   697  		return 0, syntaxError(fnParseFloat, s)
   698  	}
   699  	return f, err
   700  }
   701  
   702  func parseFloatPrefix(s string, bitSize int) (float64, int, error) {
   703  	if bitSize == 32 {
   704  		f, n, err := atof32(s)
   705  		return float64(f), n, err
   706  	}
   707  	return atof64(s)
   708  }
   709
View as plain text