• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2011 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef V8_JSON_PARSER_H_
6 #define V8_JSON_PARSER_H_
7 
8 #include "src/v8.h"
9 
10 #include "src/char-predicates-inl.h"
11 #include "src/conversions.h"
12 #include "src/heap/spaces-inl.h"
13 #include "src/messages.h"
14 #include "src/token.h"
15 
16 namespace v8 {
17 namespace internal {
18 
19 // A simple json parser.
20 template <bool seq_one_byte>
21 class JsonParser BASE_EMBEDDED {
22  public:
Parse(Handle<String> source)23   MUST_USE_RESULT static MaybeHandle<Object> Parse(Handle<String> source) {
24     return JsonParser(source).ParseJson();
25   }
26 
27   static const int kEndOfString = -1;
28 
29  private:
JsonParser(Handle<String> source)30   explicit JsonParser(Handle<String> source)
31       : source_(source),
32         source_length_(source->length()),
33         isolate_(source->map()->GetHeap()->isolate()),
34         factory_(isolate_->factory()),
35         zone_(isolate_),
36         object_constructor_(isolate_->native_context()->object_function(),
37                             isolate_),
38         position_(-1) {
39     source_ = String::Flatten(source_);
40     pretenure_ = (source_length_ >= kPretenureTreshold) ? TENURED : NOT_TENURED;
41 
42     // Optimized fast case where we only have Latin1 characters.
43     if (seq_one_byte) {
44       seq_source_ = Handle<SeqOneByteString>::cast(source_);
45     }
46   }
47 
48   // Parse a string containing a single JSON value.
49   MaybeHandle<Object> ParseJson();
50 
Advance()51   inline void Advance() {
52     position_++;
53     if (position_ >= source_length_) {
54       c0_ = kEndOfString;
55     } else if (seq_one_byte) {
56       c0_ = seq_source_->SeqOneByteStringGet(position_);
57     } else {
58       c0_ = source_->Get(position_);
59     }
60   }
61 
62   // The JSON lexical grammar is specified in the ECMAScript 5 standard,
63   // section 15.12.1.1. The only allowed whitespace characters between tokens
64   // are tab, carriage-return, newline and space.
65 
AdvanceSkipWhitespace()66   inline void AdvanceSkipWhitespace() {
67     do {
68       Advance();
69     } while (c0_ == ' ' || c0_ == '\t' || c0_ == '\n' || c0_ == '\r');
70   }
71 
SkipWhitespace()72   inline void SkipWhitespace() {
73     while (c0_ == ' ' || c0_ == '\t' || c0_ == '\n' || c0_ == '\r') {
74       Advance();
75     }
76   }
77 
AdvanceGetChar()78   inline uc32 AdvanceGetChar() {
79     Advance();
80     return c0_;
81   }
82 
83   // Checks that current charater is c.
84   // If so, then consume c and skip whitespace.
MatchSkipWhiteSpace(uc32 c)85   inline bool MatchSkipWhiteSpace(uc32 c) {
86     if (c0_ == c) {
87       AdvanceSkipWhitespace();
88       return true;
89     }
90     return false;
91   }
92 
93   // A JSON string (production JSONString) is subset of valid JavaScript string
94   // literals. The string must only be double-quoted (not single-quoted), and
95   // the only allowed backslash-escapes are ", /, \, b, f, n, r, t and
96   // four-digit hex escapes (uXXXX). Any other use of backslashes is invalid.
ParseJsonString()97   Handle<String> ParseJsonString() {
98     return ScanJsonString<false>();
99   }
100 
ParseJsonString(Handle<String> expected)101   bool ParseJsonString(Handle<String> expected) {
102     int length = expected->length();
103     if (source_->length() - position_ - 1 > length) {
104       DisallowHeapAllocation no_gc;
105       String::FlatContent content = expected->GetFlatContent();
106       if (content.IsOneByte()) {
107         DCHECK_EQ('"', c0_);
108         const uint8_t* input_chars = seq_source_->GetChars() + position_ + 1;
109         const uint8_t* expected_chars = content.ToOneByteVector().start();
110         for (int i = 0; i < length; i++) {
111           uint8_t c0 = input_chars[i];
112           if (c0 != expected_chars[i] ||
113               c0 == '"' || c0 < 0x20 || c0 == '\\') {
114             return false;
115           }
116         }
117         if (input_chars[length] == '"') {
118           position_ = position_ + length + 1;
119           AdvanceSkipWhitespace();
120           return true;
121         }
122       }
123     }
124     return false;
125   }
126 
ParseJsonInternalizedString()127   Handle<String> ParseJsonInternalizedString() {
128     return ScanJsonString<true>();
129   }
130 
131   template <bool is_internalized>
132   Handle<String> ScanJsonString();
133   // Creates a new string and copies prefix[start..end] into the beginning
134   // of it. Then scans the rest of the string, adding characters after the
135   // prefix. Called by ScanJsonString when reaching a '\' or non-Latin1 char.
136   template <typename StringType, typename SinkChar>
137   Handle<String> SlowScanJsonString(Handle<String> prefix, int start, int end);
138 
139   // A JSON number (production JSONNumber) is a subset of the valid JavaScript
140   // decimal number literals.
141   // It includes an optional minus sign, must have at least one
142   // digit before and after a decimal point, may not have prefixed zeros (unless
143   // the integer part is zero), and may include an exponent part (e.g., "e-10").
144   // Hexadecimal and octal numbers are not allowed.
145   Handle<Object> ParseJsonNumber();
146 
147   // Parse a single JSON value from input (grammar production JSONValue).
148   // A JSON value is either a (double-quoted) string literal, a number literal,
149   // one of "true", "false", or "null", or an object or array literal.
150   Handle<Object> ParseJsonValue();
151 
152   // Parse a JSON object literal (grammar production JSONObject).
153   // An object literal is a squiggly-braced and comma separated sequence
154   // (possibly empty) of key/value pairs, where the key is a JSON string
155   // literal, the value is a JSON value, and the two are separated by a colon.
156   // A JSON array doesn't allow numbers and identifiers as keys, like a
157   // JavaScript array.
158   Handle<Object> ParseJsonObject();
159 
160   // Parses a JSON array literal (grammar production JSONArray). An array
161   // literal is a square-bracketed and comma separated sequence (possibly empty)
162   // of JSON values.
163   // A JSON array doesn't allow leaving out values from the sequence, nor does
164   // it allow a terminal comma, like a JavaScript array does.
165   Handle<Object> ParseJsonArray();
166 
167 
168   // Mark that a parsing error has happened at the current token, and
169   // return a null handle. Primarily for readability.
ReportUnexpectedCharacter()170   inline Handle<Object> ReportUnexpectedCharacter() {
171     return Handle<Object>::null();
172   }
173 
isolate()174   inline Isolate* isolate() { return isolate_; }
factory()175   inline Factory* factory() { return factory_; }
object_constructor()176   inline Handle<JSFunction> object_constructor() { return object_constructor_; }
177 
178   static const int kInitialSpecialStringLength = 1024;
179   static const int kPretenureTreshold = 100 * 1024;
180 
181 
182  private:
zone()183   Zone* zone() { return &zone_; }
184 
185   void CommitStateToJsonObject(Handle<JSObject> json_object, Handle<Map> map,
186                                ZoneList<Handle<Object> >* properties);
187 
188   Handle<String> source_;
189   int source_length_;
190   Handle<SeqOneByteString> seq_source_;
191 
192   PretenureFlag pretenure_;
193   Isolate* isolate_;
194   Factory* factory_;
195   Zone zone_;
196   Handle<JSFunction> object_constructor_;
197   uc32 c0_;
198   int position_;
199 };
200 
201 template <bool seq_one_byte>
ParseJson()202 MaybeHandle<Object> JsonParser<seq_one_byte>::ParseJson() {
203   // Advance to the first character (possibly EOS)
204   AdvanceSkipWhitespace();
205   Handle<Object> result = ParseJsonValue();
206   if (result.is_null() || c0_ != kEndOfString) {
207     // Some exception (for example stack overflow) is already pending.
208     if (isolate_->has_pending_exception()) return Handle<Object>::null();
209 
210     // Parse failed. Current character is the unexpected token.
211     const char* message;
212     Factory* factory = this->factory();
213     Handle<JSArray> array;
214 
215     switch (c0_) {
216       case kEndOfString:
217         message = "unexpected_eos";
218         array = factory->NewJSArray(0);
219         break;
220       case '-':
221       case '0':
222       case '1':
223       case '2':
224       case '3':
225       case '4':
226       case '5':
227       case '6':
228       case '7':
229       case '8':
230       case '9':
231         message = "unexpected_token_number";
232         array = factory->NewJSArray(0);
233         break;
234       case '"':
235         message = "unexpected_token_string";
236         array = factory->NewJSArray(0);
237         break;
238       default:
239         message = "unexpected_token";
240         Handle<Object> name = factory->LookupSingleCharacterStringFromCode(c0_);
241         Handle<FixedArray> element = factory->NewFixedArray(1);
242         element->set(0, *name);
243         array = factory->NewJSArrayWithElements(element);
244         break;
245     }
246 
247     MessageLocation location(factory->NewScript(source_),
248                              position_,
249                              position_ + 1);
250     Handle<Object> error;
251     ASSIGN_RETURN_ON_EXCEPTION(isolate(), error,
252                                factory->NewSyntaxError(message, array), Object);
253     return isolate()->template Throw<Object>(error, &location);
254   }
255   return result;
256 }
257 
258 
259 // Parse any JSON value.
260 template <bool seq_one_byte>
ParseJsonValue()261 Handle<Object> JsonParser<seq_one_byte>::ParseJsonValue() {
262   StackLimitCheck stack_check(isolate_);
263   if (stack_check.HasOverflowed()) {
264     isolate_->StackOverflow();
265     return Handle<Object>::null();
266   }
267 
268   if (c0_ == '"') return ParseJsonString();
269   if ((c0_ >= '0' && c0_ <= '9') || c0_ == '-') return ParseJsonNumber();
270   if (c0_ == '{') return ParseJsonObject();
271   if (c0_ == '[') return ParseJsonArray();
272   if (c0_ == 'f') {
273     if (AdvanceGetChar() == 'a' && AdvanceGetChar() == 'l' &&
274         AdvanceGetChar() == 's' && AdvanceGetChar() == 'e') {
275       AdvanceSkipWhitespace();
276       return factory()->false_value();
277     }
278     return ReportUnexpectedCharacter();
279   }
280   if (c0_ == 't') {
281     if (AdvanceGetChar() == 'r' && AdvanceGetChar() == 'u' &&
282         AdvanceGetChar() == 'e') {
283       AdvanceSkipWhitespace();
284       return factory()->true_value();
285     }
286     return ReportUnexpectedCharacter();
287   }
288   if (c0_ == 'n') {
289     if (AdvanceGetChar() == 'u' && AdvanceGetChar() == 'l' &&
290         AdvanceGetChar() == 'l') {
291       AdvanceSkipWhitespace();
292       return factory()->null_value();
293     }
294     return ReportUnexpectedCharacter();
295   }
296   return ReportUnexpectedCharacter();
297 }
298 
299 
300 // Parse a JSON object. Position must be right at '{'.
301 template <bool seq_one_byte>
ParseJsonObject()302 Handle<Object> JsonParser<seq_one_byte>::ParseJsonObject() {
303   HandleScope scope(isolate());
304   Handle<JSObject> json_object =
305       factory()->NewJSObject(object_constructor(), pretenure_);
306   Handle<Map> map(json_object->map());
307   ZoneList<Handle<Object> > properties(8, zone());
308   DCHECK_EQ(c0_, '{');
309 
310   bool transitioning = true;
311 
312   AdvanceSkipWhitespace();
313   if (c0_ != '}') {
314     do {
315       if (c0_ != '"') return ReportUnexpectedCharacter();
316 
317       int start_position = position_;
318       Advance();
319 
320       uint32_t index = 0;
321       if (c0_ >= '0' && c0_ <= '9') {
322         // Maybe an array index, try to parse it.
323         if (c0_ == '0') {
324           // With a leading zero, the string has to be "0" only to be an index.
325           Advance();
326         } else {
327           do {
328             int d = c0_ - '0';
329             if (index > 429496729U - ((d > 5) ? 1 : 0)) break;
330             index = (index * 10) + d;
331             Advance();
332           } while (c0_ >= '0' && c0_ <= '9');
333         }
334 
335         if (c0_ == '"') {
336           // Successfully parsed index, parse and store element.
337           AdvanceSkipWhitespace();
338 
339           if (c0_ != ':') return ReportUnexpectedCharacter();
340           AdvanceSkipWhitespace();
341           Handle<Object> value = ParseJsonValue();
342           if (value.is_null()) return ReportUnexpectedCharacter();
343 
344           JSObject::SetOwnElement(json_object, index, value, SLOPPY).Assert();
345           continue;
346         }
347         // Not an index, fallback to the slow path.
348       }
349 
350       position_ = start_position;
351 #ifdef DEBUG
352       c0_ = '"';
353 #endif
354 
355       Handle<String> key;
356       Handle<Object> value;
357 
358       // Try to follow existing transitions as long as possible. Once we stop
359       // transitioning, no transition can be found anymore.
360       if (transitioning) {
361         // First check whether there is a single expected transition. If so, try
362         // to parse it first.
363         bool follow_expected = false;
364         Handle<Map> target;
365         if (seq_one_byte) {
366           key = Map::ExpectedTransitionKey(map);
367           follow_expected = !key.is_null() && ParseJsonString(key);
368         }
369         // If the expected transition hits, follow it.
370         if (follow_expected) {
371           target = Map::ExpectedTransitionTarget(map);
372         } else {
373           // If the expected transition failed, parse an internalized string and
374           // try to find a matching transition.
375           key = ParseJsonInternalizedString();
376           if (key.is_null()) return ReportUnexpectedCharacter();
377 
378           target = Map::FindTransitionToField(map, key);
379           // If a transition was found, follow it and continue.
380           transitioning = !target.is_null();
381         }
382         if (c0_ != ':') return ReportUnexpectedCharacter();
383 
384         AdvanceSkipWhitespace();
385         value = ParseJsonValue();
386         if (value.is_null()) return ReportUnexpectedCharacter();
387 
388         if (transitioning) {
389           int descriptor = map->NumberOfOwnDescriptors();
390           PropertyDetails details =
391               target->instance_descriptors()->GetDetails(descriptor);
392           Representation expected_representation = details.representation();
393 
394           if (value->FitsRepresentation(expected_representation)) {
395             if (expected_representation.IsDouble()) {
396               value = Object::NewStorageFor(isolate(), value,
397                                             expected_representation);
398             } else if (expected_representation.IsHeapObject() &&
399                        !target->instance_descriptors()->GetFieldType(
400                            descriptor)->NowContains(value)) {
401               Handle<HeapType> value_type(value->OptimalType(
402                       isolate(), expected_representation));
403               Map::GeneralizeFieldType(target, descriptor, value_type);
404             }
405             DCHECK(target->instance_descriptors()->GetFieldType(
406                     descriptor)->NowContains(value));
407             properties.Add(value, zone());
408             map = target;
409             continue;
410           } else {
411             transitioning = false;
412           }
413         }
414 
415         // Commit the intermediate state to the object and stop transitioning.
416         CommitStateToJsonObject(json_object, map, &properties);
417       } else {
418         key = ParseJsonInternalizedString();
419         if (key.is_null() || c0_ != ':') return ReportUnexpectedCharacter();
420 
421         AdvanceSkipWhitespace();
422         value = ParseJsonValue();
423         if (value.is_null()) return ReportUnexpectedCharacter();
424       }
425 
426       Runtime::DefineObjectProperty(json_object, key, value, NONE).Check();
427     } while (MatchSkipWhiteSpace(','));
428     if (c0_ != '}') {
429       return ReportUnexpectedCharacter();
430     }
431 
432     // If we transitioned until the very end, transition the map now.
433     if (transitioning) {
434       CommitStateToJsonObject(json_object, map, &properties);
435     }
436   }
437   AdvanceSkipWhitespace();
438   return scope.CloseAndEscape(json_object);
439 }
440 
441 
442 template <bool seq_one_byte>
CommitStateToJsonObject(Handle<JSObject> json_object,Handle<Map> map,ZoneList<Handle<Object>> * properties)443 void JsonParser<seq_one_byte>::CommitStateToJsonObject(
444     Handle<JSObject> json_object, Handle<Map> map,
445     ZoneList<Handle<Object> >* properties) {
446   JSObject::AllocateStorageForMap(json_object, map);
447   DCHECK(!json_object->map()->is_dictionary_map());
448 
449   DisallowHeapAllocation no_gc;
450   Factory* factory = isolate()->factory();
451   // If the |json_object|'s map is exactly the same as |map| then the
452   // |properties| values correspond to the |map| and nothing more has to be
453   // done. But if the |json_object|'s map is different then we have to
454   // iterate descriptors to ensure that properties still correspond to the
455   // map.
456   bool slow_case = json_object->map() != *map;
457   DescriptorArray* descriptors = NULL;
458 
459   int length = properties->length();
460   if (slow_case) {
461     descriptors = json_object->map()->instance_descriptors();
462     DCHECK(json_object->map()->NumberOfOwnDescriptors() == length);
463   }
464   for (int i = 0; i < length; i++) {
465     Handle<Object> value = (*properties)[i];
466     if (slow_case && value->IsMutableHeapNumber() &&
467         !descriptors->GetDetails(i).representation().IsDouble()) {
468       // Turn mutable heap numbers into immutable if the field representation
469       // is not double.
470       HeapNumber::cast(*value)->set_map(*factory->heap_number_map());
471     }
472     FieldIndex index = FieldIndex::ForPropertyIndex(*map, i);
473     json_object->FastPropertyAtPut(index, *value);
474   }
475 }
476 
477 
478 // Parse a JSON array. Position must be right at '['.
479 template <bool seq_one_byte>
ParseJsonArray()480 Handle<Object> JsonParser<seq_one_byte>::ParseJsonArray() {
481   HandleScope scope(isolate());
482   ZoneList<Handle<Object> > elements(4, zone());
483   DCHECK_EQ(c0_, '[');
484 
485   AdvanceSkipWhitespace();
486   if (c0_ != ']') {
487     do {
488       Handle<Object> element = ParseJsonValue();
489       if (element.is_null()) return ReportUnexpectedCharacter();
490       elements.Add(element, zone());
491     } while (MatchSkipWhiteSpace(','));
492     if (c0_ != ']') {
493       return ReportUnexpectedCharacter();
494     }
495   }
496   AdvanceSkipWhitespace();
497   // Allocate a fixed array with all the elements.
498   Handle<FixedArray> fast_elements =
499       factory()->NewFixedArray(elements.length(), pretenure_);
500   for (int i = 0, n = elements.length(); i < n; i++) {
501     fast_elements->set(i, *elements[i]);
502   }
503   Handle<Object> json_array = factory()->NewJSArrayWithElements(
504       fast_elements, FAST_ELEMENTS, pretenure_);
505   return scope.CloseAndEscape(json_array);
506 }
507 
508 
509 template <bool seq_one_byte>
ParseJsonNumber()510 Handle<Object> JsonParser<seq_one_byte>::ParseJsonNumber() {
511   bool negative = false;
512   int beg_pos = position_;
513   if (c0_ == '-') {
514     Advance();
515     negative = true;
516   }
517   if (c0_ == '0') {
518     Advance();
519     // Prefix zero is only allowed if it's the only digit before
520     // a decimal point or exponent.
521     if ('0' <= c0_ && c0_ <= '9') return ReportUnexpectedCharacter();
522   } else {
523     int i = 0;
524     int digits = 0;
525     if (c0_ < '1' || c0_ > '9') return ReportUnexpectedCharacter();
526     do {
527       i = i * 10 + c0_ - '0';
528       digits++;
529       Advance();
530     } while (c0_ >= '0' && c0_ <= '9');
531     if (c0_ != '.' && c0_ != 'e' && c0_ != 'E' && digits < 10) {
532       SkipWhitespace();
533       return Handle<Smi>(Smi::FromInt((negative ? -i : i)), isolate());
534     }
535   }
536   if (c0_ == '.') {
537     Advance();
538     if (c0_ < '0' || c0_ > '9') return ReportUnexpectedCharacter();
539     do {
540       Advance();
541     } while (c0_ >= '0' && c0_ <= '9');
542   }
543   if (AsciiAlphaToLower(c0_) == 'e') {
544     Advance();
545     if (c0_ == '-' || c0_ == '+') Advance();
546     if (c0_ < '0' || c0_ > '9') return ReportUnexpectedCharacter();
547     do {
548       Advance();
549     } while (c0_ >= '0' && c0_ <= '9');
550   }
551   int length = position_ - beg_pos;
552   double number;
553   if (seq_one_byte) {
554     Vector<const uint8_t> chars(seq_source_->GetChars() +  beg_pos, length);
555     number = StringToDouble(isolate()->unicode_cache(),
556                             chars,
557                             NO_FLAGS,  // Hex, octal or trailing junk.
558                             base::OS::nan_value());
559   } else {
560     Vector<uint8_t> buffer = Vector<uint8_t>::New(length);
561     String::WriteToFlat(*source_, buffer.start(), beg_pos, position_);
562     Vector<const uint8_t> result =
563         Vector<const uint8_t>(buffer.start(), length);
564     number = StringToDouble(isolate()->unicode_cache(),
565                             result,
566                             NO_FLAGS,  // Hex, octal or trailing junk.
567                             0.0);
568     buffer.Dispose();
569   }
570   SkipWhitespace();
571   return factory()->NewNumber(number, pretenure_);
572 }
573 
574 
575 template <typename StringType>
576 inline void SeqStringSet(Handle<StringType> seq_str, int i, uc32 c);
577 
578 template <>
SeqStringSet(Handle<SeqTwoByteString> seq_str,int i,uc32 c)579 inline void SeqStringSet(Handle<SeqTwoByteString> seq_str, int i, uc32 c) {
580   seq_str->SeqTwoByteStringSet(i, c);
581 }
582 
583 template <>
SeqStringSet(Handle<SeqOneByteString> seq_str,int i,uc32 c)584 inline void SeqStringSet(Handle<SeqOneByteString> seq_str, int i, uc32 c) {
585   seq_str->SeqOneByteStringSet(i, c);
586 }
587 
588 template <typename StringType>
589 inline Handle<StringType> NewRawString(Factory* factory,
590                                        int length,
591                                        PretenureFlag pretenure);
592 
593 template <>
NewRawString(Factory * factory,int length,PretenureFlag pretenure)594 inline Handle<SeqTwoByteString> NewRawString(Factory* factory,
595                                              int length,
596                                              PretenureFlag pretenure) {
597   return factory->NewRawTwoByteString(length, pretenure).ToHandleChecked();
598 }
599 
600 template <>
NewRawString(Factory * factory,int length,PretenureFlag pretenure)601 inline Handle<SeqOneByteString> NewRawString(Factory* factory,
602                                            int length,
603                                            PretenureFlag pretenure) {
604   return factory->NewRawOneByteString(length, pretenure).ToHandleChecked();
605 }
606 
607 
608 // Scans the rest of a JSON string starting from position_ and writes
609 // prefix[start..end] along with the scanned characters into a
610 // sequential string of type StringType.
611 template <bool seq_one_byte>
612 template <typename StringType, typename SinkChar>
SlowScanJsonString(Handle<String> prefix,int start,int end)613 Handle<String> JsonParser<seq_one_byte>::SlowScanJsonString(
614     Handle<String> prefix, int start, int end) {
615   int count = end - start;
616   int max_length = count + source_length_ - position_;
617   int length = Min(max_length, Max(kInitialSpecialStringLength, 2 * count));
618   Handle<StringType> seq_string =
619       NewRawString<StringType>(factory(), length, pretenure_);
620   // Copy prefix into seq_str.
621   SinkChar* dest = seq_string->GetChars();
622   String::WriteToFlat(*prefix, dest, start, end);
623 
624   while (c0_ != '"') {
625     // Check for control character (0x00-0x1f) or unterminated string (<0).
626     if (c0_ < 0x20) return Handle<String>::null();
627     if (count >= length) {
628       // We need to create a longer sequential string for the result.
629       return SlowScanJsonString<StringType, SinkChar>(seq_string, 0, count);
630     }
631     if (c0_ != '\\') {
632       // If the sink can contain UC16 characters, or source_ contains only
633       // Latin1 characters, there's no need to test whether we can store the
634       // character. Otherwise check whether the UC16 source character can fit
635       // in the Latin1 sink.
636       if (sizeof(SinkChar) == kUC16Size || seq_one_byte ||
637           c0_ <= String::kMaxOneByteCharCode) {
638         SeqStringSet(seq_string, count++, c0_);
639         Advance();
640       } else {
641         // StringType is SeqOneByteString and we just read a non-Latin1 char.
642         return SlowScanJsonString<SeqTwoByteString, uc16>(seq_string, 0, count);
643       }
644     } else {
645       Advance();  // Advance past the \.
646       switch (c0_) {
647         case '"':
648         case '\\':
649         case '/':
650           SeqStringSet(seq_string, count++, c0_);
651           break;
652         case 'b':
653           SeqStringSet(seq_string, count++, '\x08');
654           break;
655         case 'f':
656           SeqStringSet(seq_string, count++, '\x0c');
657           break;
658         case 'n':
659           SeqStringSet(seq_string, count++, '\x0a');
660           break;
661         case 'r':
662           SeqStringSet(seq_string, count++, '\x0d');
663           break;
664         case 't':
665           SeqStringSet(seq_string, count++, '\x09');
666           break;
667         case 'u': {
668           uc32 value = 0;
669           for (int i = 0; i < 4; i++) {
670             Advance();
671             int digit = HexValue(c0_);
672             if (digit < 0) {
673               return Handle<String>::null();
674             }
675             value = value * 16 + digit;
676           }
677           if (sizeof(SinkChar) == kUC16Size ||
678               value <= String::kMaxOneByteCharCode) {
679             SeqStringSet(seq_string, count++, value);
680             break;
681           } else {
682             // StringType is SeqOneByteString and we just read a non-Latin1
683             // char.
684             position_ -= 6;  // Rewind position_ to \ in \uxxxx.
685             Advance();
686             return SlowScanJsonString<SeqTwoByteString, uc16>(seq_string,
687                                                               0,
688                                                               count);
689           }
690         }
691         default:
692           return Handle<String>::null();
693       }
694       Advance();
695     }
696   }
697 
698   DCHECK_EQ('"', c0_);
699   // Advance past the last '"'.
700   AdvanceSkipWhitespace();
701 
702   // Shrink seq_string length to count and return.
703   return SeqString::Truncate(seq_string, count);
704 }
705 
706 
707 template <bool seq_one_byte>
708 template <bool is_internalized>
ScanJsonString()709 Handle<String> JsonParser<seq_one_byte>::ScanJsonString() {
710   DCHECK_EQ('"', c0_);
711   Advance();
712   if (c0_ == '"') {
713     AdvanceSkipWhitespace();
714     return factory()->empty_string();
715   }
716 
717   if (seq_one_byte && is_internalized) {
718     // Fast path for existing internalized strings.  If the the string being
719     // parsed is not a known internalized string, contains backslashes or
720     // unexpectedly reaches the end of string, return with an empty handle.
721     uint32_t running_hash = isolate()->heap()->HashSeed();
722     int position = position_;
723     uc32 c0 = c0_;
724     do {
725       if (c0 == '\\') {
726         c0_ = c0;
727         int beg_pos = position_;
728         position_ = position;
729         return SlowScanJsonString<SeqOneByteString, uint8_t>(source_,
730                                                              beg_pos,
731                                                              position_);
732       }
733       if (c0 < 0x20) return Handle<String>::null();
734       if (static_cast<uint32_t>(c0) >
735           unibrow::Utf16::kMaxNonSurrogateCharCode) {
736         running_hash =
737             StringHasher::AddCharacterCore(running_hash,
738                                            unibrow::Utf16::LeadSurrogate(c0));
739         running_hash =
740             StringHasher::AddCharacterCore(running_hash,
741                                            unibrow::Utf16::TrailSurrogate(c0));
742       } else {
743         running_hash = StringHasher::AddCharacterCore(running_hash, c0);
744       }
745       position++;
746       if (position >= source_length_) return Handle<String>::null();
747       c0 = seq_source_->SeqOneByteStringGet(position);
748     } while (c0 != '"');
749     int length = position - position_;
750     uint32_t hash = (length <= String::kMaxHashCalcLength)
751                         ? StringHasher::GetHashCore(running_hash)
752                         : static_cast<uint32_t>(length);
753     Vector<const uint8_t> string_vector(
754         seq_source_->GetChars() + position_, length);
755     StringTable* string_table = isolate()->heap()->string_table();
756     uint32_t capacity = string_table->Capacity();
757     uint32_t entry = StringTable::FirstProbe(hash, capacity);
758     uint32_t count = 1;
759     Handle<String> result;
760     while (true) {
761       Object* element = string_table->KeyAt(entry);
762       if (element == isolate()->heap()->undefined_value()) {
763         // Lookup failure.
764         result = factory()->InternalizeOneByteString(
765             seq_source_, position_, length);
766         break;
767       }
768       if (element != isolate()->heap()->the_hole_value() &&
769           String::cast(element)->IsOneByteEqualTo(string_vector)) {
770         result = Handle<String>(String::cast(element), isolate());
771 #ifdef DEBUG
772         uint32_t hash_field =
773             (hash << String::kHashShift) | String::kIsNotArrayIndexMask;
774         DCHECK_EQ(static_cast<int>(result->Hash()),
775                   static_cast<int>(hash_field >> String::kHashShift));
776 #endif
777         break;
778       }
779       entry = StringTable::NextProbe(entry, count++, capacity);
780     }
781     position_ = position;
782     // Advance past the last '"'.
783     AdvanceSkipWhitespace();
784     return result;
785   }
786 
787   int beg_pos = position_;
788   // Fast case for Latin1 only without escape characters.
789   do {
790     // Check for control character (0x00-0x1f) or unterminated string (<0).
791     if (c0_ < 0x20) return Handle<String>::null();
792     if (c0_ != '\\') {
793       if (seq_one_byte || c0_ <= String::kMaxOneByteCharCode) {
794         Advance();
795       } else {
796         return SlowScanJsonString<SeqTwoByteString, uc16>(source_,
797                                                           beg_pos,
798                                                           position_);
799       }
800     } else {
801       return SlowScanJsonString<SeqOneByteString, uint8_t>(source_,
802                                                            beg_pos,
803                                                            position_);
804     }
805   } while (c0_ != '"');
806   int length = position_ - beg_pos;
807   Handle<String> result =
808       factory()->NewRawOneByteString(length, pretenure_).ToHandleChecked();
809   uint8_t* dest = SeqOneByteString::cast(*result)->GetChars();
810   String::WriteToFlat(*source_, dest, beg_pos, position_);
811 
812   DCHECK_EQ('"', c0_);
813   // Advance past the last '"'.
814   AdvanceSkipWhitespace();
815   return result;
816 }
817 
818 } }  // namespace v8::internal
819 
820 #endif  // V8_JSON_PARSER_H_
821