• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2011 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef V8_JSON_JSON_PARSER_H_
6 #define V8_JSON_JSON_PARSER_H_
7 
8 #include "src/execution/isolate.h"
9 #include "src/heap/factory.h"
10 #include "src/objects/objects.h"
11 #include "src/zone/zone-containers.h"
12 
13 namespace v8 {
14 namespace internal {
15 
16 enum ParseElementResult { kElementFound, kElementNotFound };
17 
18 class JsonString final {
19  public:
JsonString()20   JsonString()
21       : start_(0),
22         length_(0),
23         needs_conversion_(false),
24         internalize_(false),
25         has_escape_(false),
26         is_index_(false) {}
27 
JsonString(uint32_t index)28   explicit JsonString(uint32_t index)
29       : index_(index),
30         length_(0),
31         needs_conversion_(false),
32         internalize_(false),
33         has_escape_(false),
34         is_index_(true) {}
35 
JsonString(int start,int length,bool needs_conversion,bool needs_internalization,bool has_escape)36   JsonString(int start, int length, bool needs_conversion,
37              bool needs_internalization, bool has_escape)
38       : start_(start),
39         length_(length),
40         needs_conversion_(needs_conversion),
41         internalize_(needs_internalization ||
42                      length_ <= kMaxInternalizedStringValueLength),
43         has_escape_(has_escape),
44         is_index_(false) {}
45 
internalize()46   bool internalize() const {
47     DCHECK(!is_index_);
48     return internalize_;
49   }
50 
needs_conversion()51   bool needs_conversion() const {
52     DCHECK(!is_index_);
53     return needs_conversion_;
54   }
55 
has_escape()56   bool has_escape() const {
57     DCHECK(!is_index_);
58     return has_escape_;
59   }
60 
start()61   int start() const {
62     DCHECK(!is_index_);
63     return start_;
64   }
65 
length()66   int length() const {
67     DCHECK(!is_index_);
68     return length_;
69   }
70 
index()71   uint32_t index() const {
72     DCHECK(is_index_);
73     return index_;
74   }
75 
is_index()76   bool is_index() const { return is_index_; }
77 
78  private:
79   static const int kMaxInternalizedStringValueLength = 10;
80 
81   union {
82     const int start_;
83     const uint32_t index_;
84   };
85   const int length_;
86   const bool needs_conversion_ : 1;
87   const bool internalize_ : 1;
88   const bool has_escape_ : 1;
89   const bool is_index_ : 1;
90 };
91 
92 struct JsonProperty {
JsonPropertyJsonProperty93   JsonProperty() { UNREACHABLE(); }
JsonPropertyJsonProperty94   explicit JsonProperty(const JsonString& string) : string(string) {}
95 
96   JsonString string;
97   Handle<Object> value;
98 };
99 
100 class JsonParseInternalizer {
101  public:
102   static MaybeHandle<Object> Internalize(Isolate* isolate,
103                                          Handle<Object> object,
104                                          Handle<Object> reviver);
105 
106  private:
JsonParseInternalizer(Isolate * isolate,Handle<JSReceiver> reviver)107   JsonParseInternalizer(Isolate* isolate, Handle<JSReceiver> reviver)
108       : isolate_(isolate), reviver_(reviver) {}
109 
110   MaybeHandle<Object> InternalizeJsonProperty(Handle<JSReceiver> holder,
111                                               Handle<String> key);
112 
113   bool RecurseAndApply(Handle<JSReceiver> holder, Handle<String> name);
114 
115   Isolate* isolate_;
116   Handle<JSReceiver> reviver_;
117 };
118 
119 enum class JsonToken : uint8_t {
120   NUMBER,
121   STRING,
122   LBRACE,
123   RBRACE,
124   LBRACK,
125   RBRACK,
126   TRUE_LITERAL,
127   FALSE_LITERAL,
128   NULL_LITERAL,
129   WHITESPACE,
130   COLON,
131   COMMA,
132   ILLEGAL,
133   EOS
134 };
135 
136 // A simple json parser.
137 template <typename Char>
138 class JsonParser final {
139  public:
140   using SeqString = typename CharTraits<Char>::String;
141   using SeqExternalString = typename CharTraits<Char>::ExternalString;
142 
Parse(Isolate * isolate,Handle<String> source,Handle<Object> reviver)143   V8_WARN_UNUSED_RESULT static MaybeHandle<Object> Parse(
144       Isolate* isolate, Handle<String> source, Handle<Object> reviver) {
145     Handle<Object> result;
146     ASSIGN_RETURN_ON_EXCEPTION(isolate, result,
147                                JsonParser(isolate, source).ParseJson(), Object);
148     if (reviver->IsCallable()) {
149       return JsonParseInternalizer::Internalize(isolate, result, reviver);
150     }
151     return result;
152   }
153 
154   static constexpr uc32 kEndOfString = static_cast<uc32>(-1);
155   static constexpr uc32 kInvalidUnicodeCharacter = static_cast<uc32>(-1);
156 
157  private:
158   struct JsonContinuation {
159     enum Type : uint8_t { kReturn, kObjectProperty, kArrayElement };
JsonContinuationJsonContinuation160     JsonContinuation(Isolate* isolate, Type type, size_t index)
161         : scope(isolate),
162           type_(type),
163           index(static_cast<uint32_t>(index)),
164           max_index(0),
165           elements(0) {}
166 
typeJsonContinuation167     Type type() const { return static_cast<Type>(type_); }
set_typeJsonContinuation168     void set_type(Type type) { type_ = static_cast<uint8_t>(type); }
169 
170     HandleScope scope;
171     // Unfortunately GCC doesn't like packing Type in two bits.
172     uint32_t type_ : 2;
173     uint32_t index : 30;
174     uint32_t max_index;
175     uint32_t elements;
176   };
177 
178   JsonParser(Isolate* isolate, Handle<String> source);
179   ~JsonParser();
180 
181   // Parse a string containing a single JSON value.
182   MaybeHandle<Object> ParseJson();
183 
advance()184   void advance() { ++cursor_; }
185 
CurrentCharacter()186   uc32 CurrentCharacter() {
187     if (V8_UNLIKELY(is_at_end())) return kEndOfString;
188     return *cursor_;
189   }
190 
NextCharacter()191   uc32 NextCharacter() {
192     advance();
193     return CurrentCharacter();
194   }
195 
196   void AdvanceToNonDecimal();
197 
peek()198   V8_INLINE JsonToken peek() const { return next_; }
199 
Consume(JsonToken token)200   void Consume(JsonToken token) {
201     DCHECK_EQ(peek(), token);
202     advance();
203   }
204 
Expect(JsonToken token)205   void Expect(JsonToken token) {
206     if (V8_LIKELY(peek() == token)) {
207       advance();
208     } else {
209       ReportUnexpectedToken(peek());
210     }
211   }
212 
ExpectNext(JsonToken token)213   void ExpectNext(JsonToken token) {
214     SkipWhitespace();
215     Expect(token);
216   }
217 
Check(JsonToken token)218   bool Check(JsonToken token) {
219     SkipWhitespace();
220     if (next_ != token) return false;
221     advance();
222     return true;
223   }
224 
225   template <size_t N>
ScanLiteral(const char (& s)[N])226   void ScanLiteral(const char (&s)[N]) {
227     DCHECK(!is_at_end());
228     // There's at least 1 character, we always consume a character and compare
229     // the next character. The first character was compared before we jumped
230     // to ScanLiteral.
231     STATIC_ASSERT(N > 2);
232     size_t remaining = static_cast<size_t>(end_ - cursor_);
233     if (V8_LIKELY(remaining >= N - 1 &&
234                   CompareChars(s + 1, cursor_ + 1, N - 2) == 0)) {
235       cursor_ += N - 1;
236       return;
237     }
238 
239     cursor_++;
240     for (size_t i = 0; i < Min(N - 2, remaining - 1); i++) {
241       if (*(s + 1 + i) != *cursor_) {
242         ReportUnexpectedCharacter(*cursor_);
243         return;
244       }
245       cursor_++;
246     }
247 
248     DCHECK(is_at_end());
249     ReportUnexpectedToken(JsonToken::EOS);
250   }
251 
252   // The JSON lexical grammar is specified in the ECMAScript 5 standard,
253   // section 15.12.1.1. The only allowed whitespace characters between tokens
254   // are tab, carriage-return, newline and space.
255   void SkipWhitespace();
256 
257   // A JSON string (production JSONString) is subset of valid JavaScript string
258   // literals. The string must only be double-quoted (not single-quoted), and
259   // the only allowed backslash-escapes are ", /, \, b, f, n, r, t and
260   // four-digit hex escapes (uXXXX). Any other use of backslashes is invalid.
261   JsonString ScanJsonString(bool needs_internalization);
262   JsonString ScanJsonPropertyKey(JsonContinuation* cont);
263   uc32 ScanUnicodeCharacter();
264   Handle<String> MakeString(const JsonString& string,
265                             Handle<String> hint = Handle<String>());
266 
267   template <typename SinkChar>
268   void DecodeString(SinkChar* sink, int start, int length);
269 
270   template <typename SinkSeqString>
271   Handle<String> DecodeString(const JsonString& string,
272                               Handle<SinkSeqString> intermediate,
273                               Handle<String> hint);
274 
275   // A JSON number (production JSONNumber) is a subset of the valid JavaScript
276   // decimal number literals.
277   // It includes an optional minus sign, must have at least one
278   // digit before and after a decimal point, may not have prefixed zeros (unless
279   // the integer part is zero), and may include an exponent part (e.g., "e-10").
280   // Hexadecimal and octal numbers are not allowed.
281   Handle<Object> ParseJsonNumber();
282 
283   // Parse a single JSON value from input (grammar production JSONValue).
284   // A JSON value is either a (double-quoted) string literal, a number literal,
285   // one of "true", "false", or "null", or an object or array literal.
286   MaybeHandle<Object> ParseJsonValue();
287 
288   Handle<Object> BuildJsonObject(
289       const JsonContinuation& cont,
290       const std::vector<JsonProperty>& property_stack, Handle<Map> feedback);
291   Handle<Object> BuildJsonArray(
292       const JsonContinuation& cont,
293       const std::vector<Handle<Object>>& element_stack);
294 
295   // Mark that a parsing error has happened at the current character.
296   void ReportUnexpectedCharacter(uc32 c);
297   // Mark that a parsing error has happened at the current token.
298   void ReportUnexpectedToken(JsonToken token);
299 
isolate()300   inline Isolate* isolate() { return isolate_; }
factory()301   inline Factory* factory() { return isolate_->factory(); }
object_constructor()302   inline Handle<JSFunction> object_constructor() { return object_constructor_; }
303 
304   static const int kInitialSpecialStringLength = 32;
305 
UpdatePointersCallback(v8::Isolate * v8_isolate,v8::GCType type,v8::GCCallbackFlags flags,void * parser)306   static void UpdatePointersCallback(v8::Isolate* v8_isolate, v8::GCType type,
307                                      v8::GCCallbackFlags flags, void* parser) {
308     reinterpret_cast<JsonParser<Char>*>(parser)->UpdatePointers();
309   }
310 
UpdatePointers()311   void UpdatePointers() {
312     DisallowHeapAllocation no_gc;
313     const Char* chars = Handle<SeqString>::cast(source_)->GetChars(no_gc);
314     if (chars_ != chars) {
315       size_t position = cursor_ - chars_;
316       size_t length = end_ - chars_;
317       chars_ = chars;
318       cursor_ = chars_ + position;
319       end_ = chars_ + length;
320     }
321   }
322 
323  private:
324   static const bool kIsOneByte = sizeof(Char) == 1;
325 
is_at_end()326   bool is_at_end() const {
327     DCHECK_LE(cursor_, end_);
328     return cursor_ == end_;
329   }
330 
position()331   int position() const { return static_cast<int>(cursor_ - chars_); }
332 
333   Isolate* isolate_;
334   const uint64_t hash_seed_;
335   JsonToken next_;
336   // Indicates whether the bytes underneath source_ can relocate during GC.
337   bool chars_may_relocate_;
338   Handle<JSFunction> object_constructor_;
339   const Handle<String> original_source_;
340   Handle<String> source_;
341 
342   // Cached pointer to the raw chars in source. In case source is on-heap, we
343   // register an UpdatePointers callback. For this reason, chars_, cursor_ and
344   // end_ should never be locally cached across a possible allocation. The scope
345   // in which we cache chars has to be guarded by a DisallowHeapAllocation
346   // scope.
347   const Char* cursor_;
348   const Char* end_;
349   const Char* chars_;
350 };
351 
352 // Explicit instantiation declarations.
353 extern template class JsonParser<uint8_t>;
354 extern template class JsonParser<uint16_t>;
355 
356 }  // namespace internal
357 }  // namespace v8
358 
359 #endif  // V8_JSON_JSON_PARSER_H_
360