• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2011 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef V8_JSON_JSON_PARSER_H_
6 #define V8_JSON_JSON_PARSER_H_
7 
8 #include "include/v8-callbacks.h"
9 #include "src/base/small-vector.h"
10 #include "src/base/strings.h"
11 #include "src/common/high-allocation-throughput-scope.h"
12 #include "src/execution/isolate.h"
13 #include "src/heap/factory.h"
14 #include "src/objects/objects.h"
15 #include "src/zone/zone-containers.h"
16 
17 namespace v8 {
18 namespace internal {
19 
20 enum ParseElementResult { kElementFound, kElementNotFound };
21 
22 class JsonString final {
23  public:
JsonString()24   JsonString()
25       : start_(0),
26         length_(0),
27         needs_conversion_(false),
28         internalize_(false),
29         has_escape_(false),
30         is_index_(false) {}
31 
JsonString(uint32_t index)32   explicit JsonString(uint32_t index)
33       : index_(index),
34         length_(0),
35         needs_conversion_(false),
36         internalize_(false),
37         has_escape_(false),
38         is_index_(true) {}
39 
JsonString(int start,int length,bool needs_conversion,bool needs_internalization,bool has_escape)40   JsonString(int start, int length, bool needs_conversion,
41              bool needs_internalization, bool has_escape)
42       : start_(start),
43         length_(length),
44         needs_conversion_(needs_conversion),
45         internalize_(needs_internalization ||
46                      length_ <= kMaxInternalizedStringValueLength),
47         has_escape_(has_escape),
48         is_index_(false) {}
49 
internalize()50   bool internalize() const {
51     DCHECK(!is_index_);
52     return internalize_;
53   }
54 
needs_conversion()55   bool needs_conversion() const {
56     DCHECK(!is_index_);
57     return needs_conversion_;
58   }
59 
has_escape()60   bool has_escape() const {
61     DCHECK(!is_index_);
62     return has_escape_;
63   }
64 
start()65   int start() const {
66     DCHECK(!is_index_);
67     return start_;
68   }
69 
length()70   int length() const {
71     DCHECK(!is_index_);
72     return length_;
73   }
74 
index()75   uint32_t index() const {
76     DCHECK(is_index_);
77     return index_;
78   }
79 
is_index()80   bool is_index() const { return is_index_; }
81 
82  private:
83   static const int kMaxInternalizedStringValueLength = 10;
84 
85   union {
86     const int start_;
87     const uint32_t index_;
88   };
89   const int length_;
90   const bool needs_conversion_ : 1;
91   const bool internalize_ : 1;
92   const bool has_escape_ : 1;
93   const bool is_index_ : 1;
94 };
95 
96 struct JsonProperty {
JsonPropertyJsonProperty97   JsonProperty() { UNREACHABLE(); }
JsonPropertyJsonProperty98   explicit JsonProperty(const JsonString& string) : string(string) {}
99 
100   JsonString string;
101   Handle<Object> value;
102 };
103 
104 class JsonParseInternalizer {
105  public:
106   static MaybeHandle<Object> Internalize(Isolate* isolate,
107                                          Handle<Object> object,
108                                          Handle<Object> reviver);
109 
110  private:
JsonParseInternalizer(Isolate * isolate,Handle<JSReceiver> reviver)111   JsonParseInternalizer(Isolate* isolate, Handle<JSReceiver> reviver)
112       : isolate_(isolate), reviver_(reviver) {}
113 
114   MaybeHandle<Object> InternalizeJsonProperty(Handle<JSReceiver> holder,
115                                               Handle<String> key);
116 
117   bool RecurseAndApply(Handle<JSReceiver> holder, Handle<String> name);
118 
119   Isolate* isolate_;
120   Handle<JSReceiver> reviver_;
121 };
122 
123 enum class JsonToken : uint8_t {
124   NUMBER,
125   STRING,
126   LBRACE,
127   RBRACE,
128   LBRACK,
129   RBRACK,
130   TRUE_LITERAL,
131   FALSE_LITERAL,
132   NULL_LITERAL,
133   WHITESPACE,
134   COLON,
135   COMMA,
136   ILLEGAL,
137   EOS
138 };
139 
140 // A simple json parser.
141 template <typename Char>
142 class JsonParser final {
143  public:
144   using SeqString = typename CharTraits<Char>::String;
145   using SeqExternalString = typename CharTraits<Char>::ExternalString;
146 
Parse(Isolate * isolate,Handle<String> source,Handle<Object> reviver)147   V8_WARN_UNUSED_RESULT static MaybeHandle<Object> Parse(
148       Isolate* isolate, Handle<String> source, Handle<Object> reviver) {
149     HighAllocationThroughputScope high_throughput_scope(
150         V8::GetCurrentPlatform());
151     Handle<Object> result;
152     ASSIGN_RETURN_ON_EXCEPTION(isolate, result,
153                                JsonParser(isolate, source).ParseJson(), Object);
154     if (reviver->IsCallable()) {
155       return JsonParseInternalizer::Internalize(isolate, result, reviver);
156     }
157     return result;
158   }
159 
160   static constexpr base::uc32 kEndOfString = static_cast<base::uc32>(-1);
161   static constexpr base::uc32 kInvalidUnicodeCharacter =
162       static_cast<base::uc32>(-1);
163 
164  private:
165   template <typename T>
166   using SmallVector = base::SmallVector<T, 16>;
167   struct JsonContinuation {
168     enum Type : uint8_t { kReturn, kObjectProperty, kArrayElement };
JsonContinuationJsonContinuation169     JsonContinuation(Isolate* isolate, Type type, size_t index)
170         : scope(isolate),
171           type_(type),
172           index(static_cast<uint32_t>(index)),
173           max_index(0),
174           elements(0) {}
175 
typeJsonContinuation176     Type type() const { return static_cast<Type>(type_); }
set_typeJsonContinuation177     void set_type(Type type) { type_ = static_cast<uint8_t>(type); }
178 
179     HandleScope scope;
180     // Unfortunately GCC doesn't like packing Type in two bits.
181     uint32_t type_ : 2;
182     uint32_t index : 30;
183     uint32_t max_index;
184     uint32_t elements;
185   };
186 
187   JsonParser(Isolate* isolate, Handle<String> source);
188   ~JsonParser();
189 
190   // Parse a string containing a single JSON value.
191   MaybeHandle<Object> ParseJson();
192 
advance()193   void advance() { ++cursor_; }
194 
CurrentCharacter()195   base::uc32 CurrentCharacter() {
196     if (V8_UNLIKELY(is_at_end())) return kEndOfString;
197     return *cursor_;
198   }
199 
NextCharacter()200   base::uc32 NextCharacter() {
201     advance();
202     return CurrentCharacter();
203   }
204 
205   void AdvanceToNonDecimal();
206 
peek()207   V8_INLINE JsonToken peek() const { return next_; }
208 
Consume(JsonToken token)209   void Consume(JsonToken token) {
210     DCHECK_EQ(peek(), token);
211     advance();
212   }
213 
Expect(JsonToken token)214   void Expect(JsonToken token) {
215     if (V8_LIKELY(peek() == token)) {
216       advance();
217     } else {
218       ReportUnexpectedToken(peek());
219     }
220   }
221 
ExpectNext(JsonToken token)222   void ExpectNext(JsonToken token) {
223     SkipWhitespace();
224     Expect(token);
225   }
226 
Check(JsonToken token)227   bool Check(JsonToken token) {
228     SkipWhitespace();
229     if (next_ != token) return false;
230     advance();
231     return true;
232   }
233 
234   template <size_t N>
ScanLiteral(const char (& s)[N])235   void ScanLiteral(const char (&s)[N]) {
236     DCHECK(!is_at_end());
237     // There's at least 1 character, we always consume a character and compare
238     // the next character. The first character was compared before we jumped
239     // to ScanLiteral.
240     STATIC_ASSERT(N > 2);
241     size_t remaining = static_cast<size_t>(end_ - cursor_);
242     if (V8_LIKELY(remaining >= N - 1 &&
243                   CompareCharsEqual(s + 1, cursor_ + 1, N - 2))) {
244       cursor_ += N - 1;
245       return;
246     }
247 
248     cursor_++;
249     for (size_t i = 0; i < std::min(N - 2, remaining - 1); i++) {
250       if (*(s + 1 + i) != *cursor_) {
251         ReportUnexpectedCharacter(*cursor_);
252         return;
253       }
254       cursor_++;
255     }
256 
257     DCHECK(is_at_end());
258     ReportUnexpectedToken(JsonToken::EOS);
259   }
260 
261   // The JSON lexical grammar is specified in the ECMAScript 5 standard,
262   // section 15.12.1.1. The only allowed whitespace characters between tokens
263   // are tab, carriage-return, newline and space.
264   void SkipWhitespace();
265 
266   // A JSON string (production JSONString) is subset of valid JavaScript string
267   // literals. The string must only be double-quoted (not single-quoted), and
268   // the only allowed backslash-escapes are ", /, \, b, f, n, r, t and
269   // four-digit hex escapes (uXXXX). Any other use of backslashes is invalid.
270   JsonString ScanJsonString(bool needs_internalization);
271   JsonString ScanJsonPropertyKey(JsonContinuation* cont);
272   base::uc32 ScanUnicodeCharacter();
273   Handle<String> MakeString(const JsonString& string,
274                             Handle<String> hint = Handle<String>());
275 
276   template <typename SinkChar>
277   void DecodeString(SinkChar* sink, int start, int length);
278 
279   template <typename SinkSeqString>
280   Handle<String> DecodeString(const JsonString& string,
281                               Handle<SinkSeqString> intermediate,
282                               Handle<String> hint);
283 
284   // A JSON number (production JSONNumber) is a subset of the valid JavaScript
285   // decimal number literals.
286   // It includes an optional minus sign, must have at least one
287   // digit before and after a decimal point, may not have prefixed zeros (unless
288   // the integer part is zero), and may include an exponent part (e.g., "e-10").
289   // Hexadecimal and octal numbers are not allowed.
290   Handle<Object> ParseJsonNumber();
291 
292   // Parse a single JSON value from input (grammar production JSONValue).
293   // A JSON value is either a (double-quoted) string literal, a number literal,
294   // one of "true", "false", or "null", or an object or array literal.
295   MaybeHandle<Object> ParseJsonValue();
296 
297   Handle<Object> BuildJsonObject(
298       const JsonContinuation& cont,
299       const SmallVector<JsonProperty>& property_stack, Handle<Map> feedback);
300   Handle<Object> BuildJsonArray(
301       const JsonContinuation& cont,
302       const SmallVector<Handle<Object>>& element_stack);
303 
304   // Mark that a parsing error has happened at the current character.
305   void ReportUnexpectedCharacter(base::uc32 c);
306   // Mark that a parsing error has happened at the current token.
307   void ReportUnexpectedToken(JsonToken token);
308 
isolate()309   inline Isolate* isolate() { return isolate_; }
factory()310   inline Factory* factory() { return isolate_->factory(); }
object_constructor()311   inline Handle<JSFunction> object_constructor() { return object_constructor_; }
312 
313   static const int kInitialSpecialStringLength = 32;
314 
UpdatePointersCallback(void * parser)315   static void UpdatePointersCallback(void* parser) {
316     reinterpret_cast<JsonParser<Char>*>(parser)->UpdatePointers();
317   }
318 
UpdatePointers()319   void UpdatePointers() {
320     DisallowGarbageCollection no_gc;
321     const Char* chars = Handle<SeqString>::cast(source_)->GetChars(no_gc);
322     if (chars_ != chars) {
323       size_t position = cursor_ - chars_;
324       size_t length = end_ - chars_;
325       chars_ = chars;
326       cursor_ = chars_ + position;
327       end_ = chars_ + length;
328     }
329   }
330 
331  private:
332   static const bool kIsOneByte = sizeof(Char) == 1;
333 
is_at_end()334   bool is_at_end() const {
335     DCHECK_LE(cursor_, end_);
336     return cursor_ == end_;
337   }
338 
position()339   int position() const { return static_cast<int>(cursor_ - chars_); }
340 
341   Isolate* isolate_;
342   const uint64_t hash_seed_;
343   JsonToken next_;
344   // Indicates whether the bytes underneath source_ can relocate during GC.
345   bool chars_may_relocate_;
346   Handle<JSFunction> object_constructor_;
347   const Handle<String> original_source_;
348   Handle<String> source_;
349 
350   // Cached pointer to the raw chars in source. In case source is on-heap, we
351   // register an UpdatePointers callback. For this reason, chars_, cursor_ and
352   // end_ should never be locally cached across a possible allocation. The scope
353   // in which we cache chars has to be guarded by a DisallowGarbageCollection
354   // scope.
355   const Char* cursor_;
356   const Char* end_;
357   const Char* chars_;
358 };
359 
360 // Explicit instantiation declarations.
361 extern template class JsonParser<uint8_t>;
362 extern template class JsonParser<uint16_t>;
363 
364 }  // namespace internal
365 }  // namespace v8
366 
367 #endif  // V8_JSON_JSON_PARSER_H_
368