1// Copyright 2018 The Go Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4 5package json 6 7import ( 8 "bytes" 9 "fmt" 10 "io" 11 "regexp" 12 "unicode/utf8" 13 14 "google.golang.org/protobuf/internal/errors" 15) 16 17// call specifies which Decoder method was invoked. 18type call uint8 19 20const ( 21 readCall call = iota 22 peekCall 23) 24 25const unexpectedFmt = "unexpected token %s" 26 27// ErrUnexpectedEOF means that EOF was encountered in the middle of the input. 28var ErrUnexpectedEOF = errors.New("%v", io.ErrUnexpectedEOF) 29 30// Decoder is a token-based JSON decoder. 31type Decoder struct { 32 // lastCall is last method called, either readCall or peekCall. 33 // Initial value is readCall. 34 lastCall call 35 36 // lastToken contains the last read token. 37 lastToken Token 38 39 // lastErr contains the last read error. 40 lastErr error 41 42 // openStack is a stack containing ObjectOpen and ArrayOpen values. The 43 // top of stack represents the object or the array the current value is 44 // directly located in. 45 openStack []Kind 46 47 // orig is used in reporting line and column. 48 orig []byte 49 // in contains the unconsumed input. 50 in []byte 51} 52 53// NewDecoder returns a Decoder to read the given []byte. 54func NewDecoder(b []byte) *Decoder { 55 return &Decoder{orig: b, in: b} 56} 57 58// Peek looks ahead and returns the next token kind without advancing a read. 59func (d *Decoder) Peek() (Token, error) { 60 defer func() { d.lastCall = peekCall }() 61 if d.lastCall == readCall { 62 d.lastToken, d.lastErr = d.Read() 63 } 64 return d.lastToken, d.lastErr 65} 66 67// Read returns the next JSON token. 68// It will return an error if there is no valid token. 69func (d *Decoder) Read() (Token, error) { 70 const scalar = Null | Bool | Number | String 71 72 defer func() { d.lastCall = readCall }() 73 if d.lastCall == peekCall { 74 return d.lastToken, d.lastErr 75 } 76 77 tok, err := d.parseNext() 78 if err != nil { 79 return Token{}, err 80 } 81 82 switch tok.kind { 83 case EOF: 84 if len(d.openStack) != 0 || 85 d.lastToken.kind&scalar|ObjectClose|ArrayClose == 0 { 86 return Token{}, ErrUnexpectedEOF 87 } 88 89 case Null: 90 if !d.isValueNext() { 91 return Token{}, d.newSyntaxError(tok.pos, unexpectedFmt, tok.RawString()) 92 } 93 94 case Bool, Number: 95 if !d.isValueNext() { 96 return Token{}, d.newSyntaxError(tok.pos, unexpectedFmt, tok.RawString()) 97 } 98 99 case String: 100 if d.isValueNext() { 101 break 102 } 103 // This string token should only be for a field name. 104 if d.lastToken.kind&(ObjectOpen|comma) == 0 { 105 return Token{}, d.newSyntaxError(tok.pos, unexpectedFmt, tok.RawString()) 106 } 107 if len(d.in) == 0 { 108 return Token{}, ErrUnexpectedEOF 109 } 110 if c := d.in[0]; c != ':' { 111 return Token{}, d.newSyntaxError(d.currPos(), `unexpected character %s, missing ":" after field name`, string(c)) 112 } 113 tok.kind = Name 114 d.consume(1) 115 116 case ObjectOpen, ArrayOpen: 117 if !d.isValueNext() { 118 return Token{}, d.newSyntaxError(tok.pos, unexpectedFmt, tok.RawString()) 119 } 120 d.openStack = append(d.openStack, tok.kind) 121 122 case ObjectClose: 123 if len(d.openStack) == 0 || 124 d.lastToken.kind == comma || 125 d.openStack[len(d.openStack)-1] != ObjectOpen { 126 return Token{}, d.newSyntaxError(tok.pos, unexpectedFmt, tok.RawString()) 127 } 128 d.openStack = d.openStack[:len(d.openStack)-1] 129 130 case ArrayClose: 131 if len(d.openStack) == 0 || 132 d.lastToken.kind == comma || 133 d.openStack[len(d.openStack)-1] != ArrayOpen { 134 return Token{}, d.newSyntaxError(tok.pos, unexpectedFmt, tok.RawString()) 135 } 136 d.openStack = d.openStack[:len(d.openStack)-1] 137 138 case comma: 139 if len(d.openStack) == 0 || 140 d.lastToken.kind&(scalar|ObjectClose|ArrayClose) == 0 { 141 return Token{}, d.newSyntaxError(tok.pos, unexpectedFmt, tok.RawString()) 142 } 143 } 144 145 // Update d.lastToken only after validating token to be in the right sequence. 146 d.lastToken = tok 147 148 if d.lastToken.kind == comma { 149 return d.Read() 150 } 151 return tok, nil 152} 153 154// Any sequence that looks like a non-delimiter (for error reporting). 155var errRegexp = regexp.MustCompile(`^([-+._a-zA-Z0-9]{1,32}|.)`) 156 157// parseNext parses for the next JSON token. It returns a Token object for 158// different types, except for Name. It does not handle whether the next token 159// is in a valid sequence or not. 160func (d *Decoder) parseNext() (Token, error) { 161 // Trim leading spaces. 162 d.consume(0) 163 164 in := d.in 165 if len(in) == 0 { 166 return d.consumeToken(EOF, 0), nil 167 } 168 169 switch in[0] { 170 case 'n': 171 if n := matchWithDelim("null", in); n != 0 { 172 return d.consumeToken(Null, n), nil 173 } 174 175 case 't': 176 if n := matchWithDelim("true", in); n != 0 { 177 return d.consumeBoolToken(true, n), nil 178 } 179 180 case 'f': 181 if n := matchWithDelim("false", in); n != 0 { 182 return d.consumeBoolToken(false, n), nil 183 } 184 185 case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': 186 if n, ok := parseNumber(in); ok { 187 return d.consumeToken(Number, n), nil 188 } 189 190 case '"': 191 s, n, err := d.parseString(in) 192 if err != nil { 193 return Token{}, err 194 } 195 return d.consumeStringToken(s, n), nil 196 197 case '{': 198 return d.consumeToken(ObjectOpen, 1), nil 199 200 case '}': 201 return d.consumeToken(ObjectClose, 1), nil 202 203 case '[': 204 return d.consumeToken(ArrayOpen, 1), nil 205 206 case ']': 207 return d.consumeToken(ArrayClose, 1), nil 208 209 case ',': 210 return d.consumeToken(comma, 1), nil 211 } 212 return Token{}, d.newSyntaxError(d.currPos(), "invalid value %s", errRegexp.Find(in)) 213} 214 215// newSyntaxError returns an error with line and column information useful for 216// syntax errors. 217func (d *Decoder) newSyntaxError(pos int, f string, x ...interface{}) error { 218 e := errors.New(f, x...) 219 line, column := d.Position(pos) 220 return errors.New("syntax error (line %d:%d): %v", line, column, e) 221} 222 223// Position returns line and column number of given index of the original input. 224// It will panic if index is out of range. 225func (d *Decoder) Position(idx int) (line int, column int) { 226 b := d.orig[:idx] 227 line = bytes.Count(b, []byte("\n")) + 1 228 if i := bytes.LastIndexByte(b, '\n'); i >= 0 { 229 b = b[i+1:] 230 } 231 column = utf8.RuneCount(b) + 1 // ignore multi-rune characters 232 return line, column 233} 234 235// currPos returns the current index position of d.in from d.orig. 236func (d *Decoder) currPos() int { 237 return len(d.orig) - len(d.in) 238} 239 240// matchWithDelim matches s with the input b and verifies that the match 241// terminates with a delimiter of some form (e.g., r"[^-+_.a-zA-Z0-9]"). 242// As a special case, EOF is considered a delimiter. It returns the length of s 243// if there is a match, else 0. 244func matchWithDelim(s string, b []byte) int { 245 if !bytes.HasPrefix(b, []byte(s)) { 246 return 0 247 } 248 249 n := len(s) 250 if n < len(b) && isNotDelim(b[n]) { 251 return 0 252 } 253 return n 254} 255 256// isNotDelim returns true if given byte is a not delimiter character. 257func isNotDelim(c byte) bool { 258 return (c == '-' || c == '+' || c == '.' || c == '_' || 259 ('a' <= c && c <= 'z') || 260 ('A' <= c && c <= 'Z') || 261 ('0' <= c && c <= '9')) 262} 263 264// consume consumes n bytes of input and any subsequent whitespace. 265func (d *Decoder) consume(n int) { 266 d.in = d.in[n:] 267 for len(d.in) > 0 { 268 switch d.in[0] { 269 case ' ', '\n', '\r', '\t': 270 d.in = d.in[1:] 271 default: 272 return 273 } 274 } 275} 276 277// isValueNext returns true if next type should be a JSON value: Null, 278// Number, String or Bool. 279func (d *Decoder) isValueNext() bool { 280 if len(d.openStack) == 0 { 281 return d.lastToken.kind == 0 282 } 283 284 start := d.openStack[len(d.openStack)-1] 285 switch start { 286 case ObjectOpen: 287 return d.lastToken.kind&Name != 0 288 case ArrayOpen: 289 return d.lastToken.kind&(ArrayOpen|comma) != 0 290 } 291 panic(fmt.Sprintf( 292 "unreachable logic in Decoder.isValueNext, lastToken.kind: %v, openStack: %v", 293 d.lastToken.kind, start)) 294} 295 296// consumeToken constructs a Token for given Kind with raw value derived from 297// current d.in and given size, and consumes the given size-length of it. 298func (d *Decoder) consumeToken(kind Kind, size int) Token { 299 tok := Token{ 300 kind: kind, 301 raw: d.in[:size], 302 pos: len(d.orig) - len(d.in), 303 } 304 d.consume(size) 305 return tok 306} 307 308// consumeBoolToken constructs a Token for a Bool kind with raw value derived from 309// current d.in and given size. 310func (d *Decoder) consumeBoolToken(b bool, size int) Token { 311 tok := Token{ 312 kind: Bool, 313 raw: d.in[:size], 314 pos: len(d.orig) - len(d.in), 315 boo: b, 316 } 317 d.consume(size) 318 return tok 319} 320 321// consumeStringToken constructs a Token for a String kind with raw value derived 322// from current d.in and given size. 323func (d *Decoder) consumeStringToken(s string, size int) Token { 324 tok := Token{ 325 kind: String, 326 raw: d.in[:size], 327 pos: len(d.orig) - len(d.in), 328 str: s, 329 } 330 d.consume(size) 331 return tok 332} 333 334// Clone returns a copy of the Decoder for use in reading ahead the next JSON 335// object, array or other values without affecting current Decoder. 336func (d *Decoder) Clone() *Decoder { 337 ret := *d 338 ret.openStack = append([]Kind(nil), ret.openStack...) 339 return &ret 340} 341