Source file src/encoding/json/stream.go

     1  // Copyright 2010 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  //go:build !goexperiment.jsonv2
     6  
     7  package json
     8  
     9  import (
    10  	"bytes"
    11  	"errors"
    12  	"io"
    13  )
    14  
    15  // A Decoder reads and decodes JSON values from an input stream.
    16  type Decoder struct {
    17  	r       io.Reader
    18  	buf     []byte
    19  	d       decodeState
    20  	scanp   int   // start of unread data in buf
    21  	scanned int64 // amount of data already scanned
    22  	scan    scanner
    23  	err     error
    24  
    25  	tokenState int
    26  	tokenStack []int
    27  }
    28  
    29  // NewDecoder returns a new decoder that reads from r.
    30  //
    31  // The decoder introduces its own buffering and may
    32  // read data from r beyond the JSON values requested.
    33  func NewDecoder(r io.Reader) *Decoder {
    34  	return &Decoder{r: r}
    35  }
    36  
    37  // UseNumber causes the Decoder to unmarshal a number into an
    38  // interface value as a [Number] instead of as a float64.
    39  func (dec *Decoder) UseNumber() { dec.d.useNumber = true }
    40  
    41  // DisallowUnknownFields causes the Decoder to return an error when the destination
    42  // is a struct and the input contains object keys which do not match any
    43  // non-ignored, exported fields in the destination.
    44  func (dec *Decoder) DisallowUnknownFields() { dec.d.disallowUnknownFields = true }
    45  
    46  // Decode reads the next JSON-encoded value from its
    47  // input and stores it in the value pointed to by v.
    48  //
    49  // See the documentation for [Unmarshal] for details about
    50  // the conversion of JSON into a Go value.
    51  func (dec *Decoder) Decode(v any) error {
    52  	if dec.err != nil {
    53  		return dec.err
    54  	}
    55  
    56  	if err := dec.tokenPrepareForDecode(); err != nil {
    57  		return err
    58  	}
    59  
    60  	if !dec.tokenValueAllowed() {
    61  		return &SyntaxError{msg: "not at beginning of value", Offset: dec.InputOffset()}
    62  	}
    63  
    64  	// Read whole value into buffer.
    65  	n, err := dec.readValue()
    66  	if err != nil {
    67  		return err
    68  	}
    69  	dec.d.init(dec.buf[dec.scanp : dec.scanp+n])
    70  	dec.scanp += n
    71  
    72  	// Don't save err from unmarshal into dec.err:
    73  	// the connection is still usable since we read a complete JSON
    74  	// object from it before the error happened.
    75  	err = dec.d.unmarshal(v)
    76  
    77  	// fixup token streaming state
    78  	dec.tokenValueEnd()
    79  
    80  	return err
    81  }
    82  
    83  // Buffered returns a reader of the data remaining in the Decoder's
    84  // buffer. The reader is valid until the next call to [Decoder.Decode].
    85  func (dec *Decoder) Buffered() io.Reader {
    86  	return bytes.NewReader(dec.buf[dec.scanp:])
    87  }
    88  
    89  // readValue reads a JSON value into dec.buf.
    90  // It returns the length of the encoding.
    91  func (dec *Decoder) readValue() (int, error) {
    92  	dec.scan.reset()
    93  
    94  	scanp := dec.scanp
    95  	var err error
    96  Input:
    97  	// help the compiler see that scanp is never negative, so it can remove
    98  	// some bounds checks below.
    99  	for scanp >= 0 {
   100  
   101  		// Look in the buffer for a new value.
   102  		for ; scanp < len(dec.buf); scanp++ {
   103  			c := dec.buf[scanp]
   104  			dec.scan.bytes++
   105  			switch dec.scan.step(&dec.scan, c) {
   106  			case scanEnd:
   107  				// scanEnd is delayed one byte so we decrement
   108  				// the scanner bytes count by 1 to ensure that
   109  				// this value is correct in the next call of Decode.
   110  				dec.scan.bytes--
   111  				break Input
   112  			case scanEndObject, scanEndArray:
   113  				// scanEnd is delayed one byte.
   114  				// We might block trying to get that byte from src,
   115  				// so instead invent a space byte.
   116  				if stateEndValue(&dec.scan, ' ') == scanEnd {
   117  					scanp++
   118  					break Input
   119  				}
   120  			case scanError:
   121  				dec.err = dec.scan.err
   122  				return 0, dec.scan.err
   123  			}
   124  		}
   125  
   126  		// Did the last read have an error?
   127  		// Delayed until now to allow buffer scan.
   128  		if err != nil {
   129  			if err == io.EOF {
   130  				if dec.scan.step(&dec.scan, ' ') == scanEnd {
   131  					break Input
   132  				}
   133  				if nonSpace(dec.buf) {
   134  					err = io.ErrUnexpectedEOF
   135  				}
   136  			}
   137  			dec.err = err
   138  			return 0, err
   139  		}
   140  
   141  		n := scanp - dec.scanp
   142  		err = dec.refill()
   143  		scanp = dec.scanp + n
   144  	}
   145  	return scanp - dec.scanp, nil
   146  }
   147  
   148  func (dec *Decoder) refill() error {
   149  	// Make room to read more into the buffer.
   150  	// First slide down data already consumed.
   151  	if dec.scanp > 0 {
   152  		dec.scanned += int64(dec.scanp)
   153  		n := copy(dec.buf, dec.buf[dec.scanp:])
   154  		dec.buf = dec.buf[:n]
   155  		dec.scanp = 0
   156  	}
   157  
   158  	// Grow buffer if not large enough.
   159  	const minRead = 512
   160  	if cap(dec.buf)-len(dec.buf) < minRead {
   161  		newBuf := make([]byte, len(dec.buf), 2*cap(dec.buf)+minRead)
   162  		copy(newBuf, dec.buf)
   163  		dec.buf = newBuf
   164  	}
   165  
   166  	// Read. Delay error for next iteration (after scan).
   167  	n, err := dec.r.Read(dec.buf[len(dec.buf):cap(dec.buf)])
   168  	dec.buf = dec.buf[0 : len(dec.buf)+n]
   169  
   170  	return err
   171  }
   172  
   173  func nonSpace(b []byte) bool {
   174  	for _, c := range b {
   175  		if !isSpace(c) {
   176  			return true
   177  		}
   178  	}
   179  	return false
   180  }
   181  
   182  // An Encoder writes JSON values to an output stream.
   183  type Encoder struct {
   184  	w          io.Writer
   185  	err        error
   186  	escapeHTML bool
   187  
   188  	indentBuf    []byte
   189  	indentPrefix string
   190  	indentValue  string
   191  }
   192  
   193  // NewEncoder returns a new encoder that writes to w.
   194  func NewEncoder(w io.Writer) *Encoder {
   195  	return &Encoder{w: w, escapeHTML: true}
   196  }
   197  
   198  // Encode writes the JSON encoding of v to the stream,
   199  // with insignificant space characters elided,
   200  // followed by a newline character.
   201  //
   202  // See the documentation for [Marshal] for details about the
   203  // conversion of Go values to JSON.
   204  func (enc *Encoder) Encode(v any) error {
   205  	if enc.err != nil {
   206  		return enc.err
   207  	}
   208  
   209  	e := newEncodeState()
   210  	defer encodeStatePool.Put(e)
   211  
   212  	err := e.marshal(v, encOpts{escapeHTML: enc.escapeHTML})
   213  	if err != nil {
   214  		return err
   215  	}
   216  
   217  	// Terminate each value with a newline.
   218  	// This makes the output look a little nicer
   219  	// when debugging, and some kind of space
   220  	// is required if the encoded value was a number,
   221  	// so that the reader knows there aren't more
   222  	// digits coming.
   223  	e.WriteByte('\n')
   224  
   225  	b := e.Bytes()
   226  	if enc.indentPrefix != "" || enc.indentValue != "" {
   227  		enc.indentBuf, err = appendIndent(enc.indentBuf[:0], b, enc.indentPrefix, enc.indentValue)
   228  		if err != nil {
   229  			return err
   230  		}
   231  		b = enc.indentBuf
   232  	}
   233  	if _, err = enc.w.Write(b); err != nil {
   234  		enc.err = err
   235  	}
   236  	return err
   237  }
   238  
   239  // SetIndent instructs the encoder to format each subsequent encoded
   240  // value as if indented by the package-level function Indent(dst, src, prefix, indent).
   241  // Calling SetIndent("", "") disables indentation.
   242  func (enc *Encoder) SetIndent(prefix, indent string) {
   243  	enc.indentPrefix = prefix
   244  	enc.indentValue = indent
   245  }
   246  
   247  // SetEscapeHTML specifies whether problematic HTML characters
   248  // should be escaped inside JSON quoted strings.
   249  // The default behavior is to escape &, <, and > to \u0026, \u003c, and \u003e
   250  // to avoid certain safety problems that can arise when embedding JSON in HTML.
   251  //
   252  // In non-HTML settings where the escaping interferes with the readability
   253  // of the output, SetEscapeHTML(false) disables this behavior.
   254  func (enc *Encoder) SetEscapeHTML(on bool) {
   255  	enc.escapeHTML = on
   256  }
   257  
   258  // RawMessage is a raw encoded JSON value.
   259  // It implements [Marshaler] and [Unmarshaler] and can
   260  // be used to delay JSON decoding or precompute a JSON encoding.
   261  type RawMessage []byte
   262  
   263  // MarshalJSON returns m as the JSON encoding of m.
   264  func (m RawMessage) MarshalJSON() ([]byte, error) {
   265  	if m == nil {
   266  		return []byte("null"), nil
   267  	}
   268  	return m, nil
   269  }
   270  
   271  // UnmarshalJSON sets *m to a copy of data.
   272  func (m *RawMessage) UnmarshalJSON(data []byte) error {
   273  	if m == nil {
   274  		return errors.New("json.RawMessage: UnmarshalJSON on nil pointer")
   275  	}
   276  	*m = append((*m)[0:0], data...)
   277  	return nil
   278  }
   279  
   280  var _ Marshaler = (*RawMessage)(nil)
   281  var _ Unmarshaler = (*RawMessage)(nil)
   282  
   283  // A Token holds a value of one of these types:
   284  //
   285  //   - [Delim], for the four JSON delimiters [ ] { }
   286  //   - bool, for JSON booleans
   287  //   - float64, for JSON numbers
   288  //   - [Number], for JSON numbers
   289  //   - string, for JSON string literals
   290  //   - nil, for JSON null
   291  type Token any
   292  
   293  const (
   294  	tokenTopValue = iota
   295  	tokenArrayStart
   296  	tokenArrayValue
   297  	tokenArrayComma
   298  	tokenObjectStart
   299  	tokenObjectKey
   300  	tokenObjectColon
   301  	tokenObjectValue
   302  	tokenObjectComma
   303  )
   304  
   305  // advance tokenstate from a separator state to a value state
   306  func (dec *Decoder) tokenPrepareForDecode() error {
   307  	// Note: Not calling peek before switch, to avoid
   308  	// putting peek into the standard Decode path.
   309  	// peek is only called when using the Token API.
   310  	switch dec.tokenState {
   311  	case tokenArrayComma:
   312  		c, err := dec.peek()
   313  		if err != nil {
   314  			return err
   315  		}
   316  		if c != ',' {
   317  			return &SyntaxError{"expected comma after array element", dec.InputOffset()}
   318  		}
   319  		dec.scanp++
   320  		dec.tokenState = tokenArrayValue
   321  	case tokenObjectColon:
   322  		c, err := dec.peek()
   323  		if err != nil {
   324  			return err
   325  		}
   326  		if c != ':' {
   327  			return &SyntaxError{"expected colon after object key", dec.InputOffset()}
   328  		}
   329  		dec.scanp++
   330  		dec.tokenState = tokenObjectValue
   331  	}
   332  	return nil
   333  }
   334  
   335  func (dec *Decoder) tokenValueAllowed() bool {
   336  	switch dec.tokenState {
   337  	case tokenTopValue, tokenArrayStart, tokenArrayValue, tokenObjectValue:
   338  		return true
   339  	}
   340  	return false
   341  }
   342  
   343  func (dec *Decoder) tokenValueEnd() {
   344  	switch dec.tokenState {
   345  	case tokenArrayStart, tokenArrayValue:
   346  		dec.tokenState = tokenArrayComma
   347  	case tokenObjectValue:
   348  		dec.tokenState = tokenObjectComma
   349  	}
   350  }
   351  
   352  // A Delim is a JSON array or object delimiter, one of [ ] { or }.
   353  type Delim rune
   354  
   355  func (d Delim) String() string {
   356  	return string(d)
   357  }
   358  
   359  // Token returns the next JSON token in the input stream.
   360  // At the end of the input stream, Token returns nil, [io.EOF].
   361  //
   362  // Token guarantees that the delimiters [ ] { } it returns are
   363  // properly nested and matched: if Token encounters an unexpected
   364  // delimiter in the input, it will return an error.
   365  //
   366  // The input stream consists of basic JSON values—bool, string,
   367  // number, and null—along with delimiters [ ] { } of type [Delim]
   368  // to mark the start and end of arrays and objects.
   369  // Commas and colons are elided.
   370  func (dec *Decoder) Token() (Token, error) {
   371  	for {
   372  		c, err := dec.peek()
   373  		if err != nil {
   374  			return nil, err
   375  		}
   376  		switch c {
   377  		case '[':
   378  			if !dec.tokenValueAllowed() {
   379  				return dec.tokenError(c)
   380  			}
   381  			dec.scanp++
   382  			dec.tokenStack = append(dec.tokenStack, dec.tokenState)
   383  			dec.tokenState = tokenArrayStart
   384  			return Delim('['), nil
   385  
   386  		case ']':
   387  			if dec.tokenState != tokenArrayStart && dec.tokenState != tokenArrayComma {
   388  				return dec.tokenError(c)
   389  			}
   390  			dec.scanp++
   391  			dec.tokenState = dec.tokenStack[len(dec.tokenStack)-1]
   392  			dec.tokenStack = dec.tokenStack[:len(dec.tokenStack)-1]
   393  			dec.tokenValueEnd()
   394  			return Delim(']'), nil
   395  
   396  		case '{':
   397  			if !dec.tokenValueAllowed() {
   398  				return dec.tokenError(c)
   399  			}
   400  			dec.scanp++
   401  			dec.tokenStack = append(dec.tokenStack, dec.tokenState)
   402  			dec.tokenState = tokenObjectStart
   403  			return Delim('{'), nil
   404  
   405  		case '}':
   406  			if dec.tokenState != tokenObjectStart && dec.tokenState != tokenObjectComma {
   407  				return dec.tokenError(c)
   408  			}
   409  			dec.scanp++
   410  			dec.tokenState = dec.tokenStack[len(dec.tokenStack)-1]
   411  			dec.tokenStack = dec.tokenStack[:len(dec.tokenStack)-1]
   412  			dec.tokenValueEnd()
   413  			return Delim('}'), nil
   414  
   415  		case ':':
   416  			if dec.tokenState != tokenObjectColon {
   417  				return dec.tokenError(c)
   418  			}
   419  			dec.scanp++
   420  			dec.tokenState = tokenObjectValue
   421  			continue
   422  
   423  		case ',':
   424  			if dec.tokenState == tokenArrayComma {
   425  				dec.scanp++
   426  				dec.tokenState = tokenArrayValue
   427  				continue
   428  			}
   429  			if dec.tokenState == tokenObjectComma {
   430  				dec.scanp++
   431  				dec.tokenState = tokenObjectKey
   432  				continue
   433  			}
   434  			return dec.tokenError(c)
   435  
   436  		case '"':
   437  			if dec.tokenState == tokenObjectStart || dec.tokenState == tokenObjectKey {
   438  				var x string
   439  				old := dec.tokenState
   440  				dec.tokenState = tokenTopValue
   441  				err := dec.Decode(&x)
   442  				dec.tokenState = old
   443  				if err != nil {
   444  					return nil, err
   445  				}
   446  				dec.tokenState = tokenObjectColon
   447  				return x, nil
   448  			}
   449  			fallthrough
   450  
   451  		default:
   452  			if !dec.tokenValueAllowed() {
   453  				return dec.tokenError(c)
   454  			}
   455  			var x any
   456  			if err := dec.Decode(&x); err != nil {
   457  				return nil, err
   458  			}
   459  			return x, nil
   460  		}
   461  	}
   462  }
   463  
   464  func (dec *Decoder) tokenError(c byte) (Token, error) {
   465  	var context string
   466  	switch dec.tokenState {
   467  	case tokenTopValue:
   468  		context = " looking for beginning of value"
   469  	case tokenArrayStart, tokenArrayValue, tokenObjectValue:
   470  		context = " looking for beginning of value"
   471  	case tokenArrayComma:
   472  		context = " after array element"
   473  	case tokenObjectKey:
   474  		context = " looking for beginning of object key string"
   475  	case tokenObjectColon:
   476  		context = " after object key"
   477  	case tokenObjectComma:
   478  		context = " after object key:value pair"
   479  	}
   480  	return nil, &SyntaxError{"invalid character " + quoteChar(c) + context, dec.InputOffset()}
   481  }
   482  
   483  // More reports whether there is another element in the
   484  // current array or object being parsed.
   485  func (dec *Decoder) More() bool {
   486  	c, err := dec.peek()
   487  	return err == nil && c != ']' && c != '}'
   488  }
   489  
   490  func (dec *Decoder) peek() (byte, error) {
   491  	var err error
   492  	for {
   493  		for i := dec.scanp; i < len(dec.buf); i++ {
   494  			c := dec.buf[i]
   495  			if isSpace(c) {
   496  				continue
   497  			}
   498  			dec.scanp = i
   499  			return c, nil
   500  		}
   501  		// buffer has been scanned, now report any error
   502  		if err != nil {
   503  			return 0, err
   504  		}
   505  		err = dec.refill()
   506  	}
   507  }
   508  
   509  // InputOffset returns the input stream byte offset of the current decoder position.
   510  // The offset gives the location of the end of the most recently returned token
   511  // and the beginning of the next token.
   512  func (dec *Decoder) InputOffset() int64 {
   513  	return dec.scanned + int64(dec.scanp)
   514  }
   515  

View as plain text