• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2018 Google Inc.
3  *
4  * Use of this source code is governed by a BSD-style license that can be
5  * found in the LICENSE file.
6  */
7 
8 #include "src/utils/SkJSON.h"
9 
10 #include "include/core/SkStream.h"
11 #include "include/core/SkString.h"
12 #include "include/private/SkMalloc.h"
13 #include "include/utils/SkParse.h"
14 #include "src/utils/SkUTF.h"
15 
16 #include <cmath>
17 #include <tuple>
18 #include <vector>
19 
20 namespace skjson {
21 
22 // #define SK_JSON_REPORT_ERRORS
23 
24 static_assert( sizeof(Value) == 8, "");
25 static_assert(alignof(Value) == 8, "");
26 
27 static constexpr size_t kRecAlign = alignof(Value);
28 
init_tagged(Tag t)29 void Value::init_tagged(Tag t) {
30     memset(fData8, 0, sizeof(fData8));
31     fData8[0] = SkTo<uint8_t>(t);
32     SkASSERT(this->getTag() == t);
33 }
34 
35 // Pointer values store a type (in the lower kTagBits bits) and a pointer.
init_tagged_pointer(Tag t,void * p)36 void Value::init_tagged_pointer(Tag t, void* p) {
37     if (sizeof(Value) == sizeof(uintptr_t)) {
38         *this->cast<uintptr_t>() = reinterpret_cast<uintptr_t>(p);
39         // For 64-bit, we rely on the pointer lower bits being zero.
40         SkASSERT(!(fData8[0] & kTagMask));
41         fData8[0] |= SkTo<uint8_t>(t);
42     } else {
43         // For 32-bit, we store the pointer in the upper word
44         SkASSERT(sizeof(Value) == sizeof(uintptr_t) * 2);
45         this->init_tagged(t);
46         *this->cast<uintptr_t>() = reinterpret_cast<uintptr_t>(p);
47     }
48 
49     SkASSERT(this->getTag()    == t);
50     SkASSERT(this->ptr<void>() == p);
51 }
52 
NullValue()53 NullValue::NullValue() {
54     this->init_tagged(Tag::kNull);
55     SkASSERT(this->getTag() == Tag::kNull);
56 }
57 
BoolValue(bool b)58 BoolValue::BoolValue(bool b) {
59     this->init_tagged(Tag::kBool);
60     *this->cast<bool>() = b;
61     SkASSERT(this->getTag() == Tag::kBool);
62 }
63 
NumberValue(int32_t i)64 NumberValue::NumberValue(int32_t i) {
65     this->init_tagged(Tag::kInt);
66     *this->cast<int32_t>() = i;
67     SkASSERT(this->getTag() == Tag::kInt);
68 }
69 
NumberValue(float f)70 NumberValue::NumberValue(float f) {
71     this->init_tagged(Tag::kFloat);
72     *this->cast<float>() = f;
73     SkASSERT(this->getTag() == Tag::kFloat);
74 }
75 
76 // Vector recs point to externally allocated slabs with the following layout:
77 //
78 //   [size_t n] [REC_0] ... [REC_n-1] [optional extra trailing storage]
79 //
80 // Long strings use extra_alloc_size == 1 to store the \0 terminator.
81 //
82 template <typename T, size_t extra_alloc_size = 0>
MakeVector(const void * src,size_t size,SkArenaAlloc & alloc)83 static void* MakeVector(const void* src, size_t size, SkArenaAlloc& alloc) {
84     // The Ts are already in memory, so their size should be safe.
85     const auto total_size = sizeof(size_t) + size * sizeof(T) + extra_alloc_size;
86     auto* size_ptr = reinterpret_cast<size_t*>(alloc.makeBytesAlignedTo(total_size, kRecAlign));
87 
88     *size_ptr = size;
89     sk_careful_memcpy(size_ptr + 1, src, size * sizeof(T));
90 
91     return size_ptr;
92 }
93 
ArrayValue(const Value * src,size_t size,SkArenaAlloc & alloc)94 ArrayValue::ArrayValue(const Value* src, size_t size, SkArenaAlloc& alloc) {
95     this->init_tagged_pointer(Tag::kArray, MakeVector<Value>(src, size, alloc));
96     SkASSERT(this->getTag() == Tag::kArray);
97 }
98 
99 // Strings have two flavors:
100 //
101 // -- short strings (len <= 7) -> these are stored inline, in the record
102 //    (one byte reserved for null terminator/type):
103 //
104 //        [str] [\0]|[max_len - actual_len]
105 //
106 //    Storing [max_len - actual_len] allows the 'len' field to double-up as a
107 //    null terminator when size == max_len (this works 'cause kShortString == 0).
108 //
109 // -- long strings (len > 7) -> these are externally allocated vectors (VectorRec<char>).
110 //
111 // The string data plus a null-char terminator are copied over.
112 //
113 namespace {
114 
115 // An internal string builder with a fast 8 byte short string load path
116 // (for the common case where the string is not at the end of the stream).
117 class FastString final : public Value {
118 public:
FastString(const char * src,size_t size,const char * eos,SkArenaAlloc & alloc)119     FastString(const char* src, size_t size, const char* eos, SkArenaAlloc& alloc) {
120         SkASSERT(src <= eos);
121 
122         if (size > kMaxInlineStringSize) {
123             this->initLongString(src, size, alloc);
124             SkASSERT(this->getTag() == Tag::kString);
125             return;
126         }
127 
128         // initFastShortString is faster (doh), but requires access to 6 chars past src.
129         if (src && src + 6 <= eos) {
130             this->initFastShortString(src, size);
131         } else {
132             this->initShortString(src, size);
133         }
134 
135         SkASSERT(this->getTag() == Tag::kShortString);
136     }
137 
138 private:
139     // first byte reserved for tagging, \0 terminator => 6 usable chars
140     inline static constexpr size_t kMaxInlineStringSize = sizeof(Value) - 2;
141 
initLongString(const char * src,size_t size,SkArenaAlloc & alloc)142     void initLongString(const char* src, size_t size, SkArenaAlloc& alloc) {
143         SkASSERT(size > kMaxInlineStringSize);
144 
145         this->init_tagged_pointer(Tag::kString, MakeVector<char, 1>(src, size, alloc));
146 
147         auto* data = this->cast<VectorValue<char, Value::Type::kString>>()->begin();
148         const_cast<char*>(data)[size] = '\0';
149     }
150 
initShortString(const char * src,size_t size)151     void initShortString(const char* src, size_t size) {
152         SkASSERT(size <= kMaxInlineStringSize);
153 
154         this->init_tagged(Tag::kShortString);
155         sk_careful_memcpy(this->cast<char>(), src, size);
156         // Null terminator provided by init_tagged() above (fData8 is zero-initialized).
157     }
158 
initFastShortString(const char * src,size_t size)159     void initFastShortString(const char* src, size_t size) {
160         SkASSERT(size <= kMaxInlineStringSize);
161 
162         uint64_t* s64 = this->cast<uint64_t>();
163 
164         // Load 8 chars and mask out the tag and \0 terminator.
165         // Note: we picked kShortString == 0 to avoid setting explicitly below.
166         static_assert(SkToU8(Tag::kShortString) == 0, "please don't break this");
167 
168         // Since the first byte is occupied by the tag, we want the string chars [0..5] to land
169         // on bytes [1..6] => the fastest way is to read8 @(src - 1) (always safe, because the
170         // string requires a " prefix at the very least).
171         memcpy(s64, src - 1, 8);
172 
173 #if defined(SK_CPU_LENDIAN)
174         // The mask for a max-length string (6), with a leading tag and trailing \0 is
175         // 0x00ffffffffffff00.  Accounting for the final left-shift, this becomes
176         // 0x0000ffffffffffff.
177         *s64 &= (0x0000ffffffffffffULL >> ((kMaxInlineStringSize - size) * 8)) // trailing \0s
178                     << 8;                                                      // tag byte
179 #else
180         static_assert(false, "Big-endian builds are not supported at this time.");
181 #endif
182     }
183 };
184 
185 } // namespace
186 
StringValue(const char * src,size_t size,SkArenaAlloc & alloc)187 StringValue::StringValue(const char* src, size_t size, SkArenaAlloc& alloc) {
188     new (this) FastString(src, size, src, alloc);
189 }
190 
ObjectValue(const Member * src,size_t size,SkArenaAlloc & alloc)191 ObjectValue::ObjectValue(const Member* src, size_t size, SkArenaAlloc& alloc) {
192     this->init_tagged_pointer(Tag::kObject, MakeVector<Member>(src, size, alloc));
193     SkASSERT(this->getTag() == Tag::kObject);
194 }
195 
196 
197 // Boring public Value glue.
198 
inline_strcmp(const char a[],const char b[])199 static int inline_strcmp(const char a[], const char b[]) {
200     for (;;) {
201         char c = *a++;
202         if (c == 0) {
203             break;
204         }
205         if (c != *b++) {
206             return 1;
207         }
208     }
209     return *b != 0;
210 }
211 
operator [](const char * key) const212 const Value& ObjectValue::operator[](const char* key) const {
213     // Reverse search for duplicates resolution (policy: return last).
214     const auto* begin  = this->begin();
215     const auto* member = this->end();
216 
217     while (member > begin) {
218         --member;
219         if (0 == inline_strcmp(key, member->fKey.as<StringValue>().begin())) {
220             return member->fValue;
221         }
222     }
223 
224     static const Value g_null = NullValue();
225     return g_null;
226 }
227 
228 namespace {
229 
230 // Lexer/parser inspired by rapidjson [1], sajson [2] and pjson [3].
231 //
232 // [1] https://github.com/Tencent/rapidjson/
233 // [2] https://github.com/chadaustin/sajson
234 // [3] https://pastebin.com/hnhSTL3h
235 
236 
237 // bit 0 (0x01) - plain ASCII string character
238 // bit 1 (0x02) - whitespace
239 // bit 2 (0x04) - string terminator (" \\ \0 [control chars] **AND } ]** <- see matchString notes)
240 // bit 3 (0x08) - 0-9
241 // bit 4 (0x10) - 0-9 e E .
242 // bit 5 (0x20) - scope terminator (} ])
243 static constexpr uint8_t g_token_flags[256] = {
244  // 0    1    2    3    4    5    6    7      8    9    A    B    C    D    E    F
245     4,   4,   4,   4,   4,   4,   4,   4,     4,   6,   6,   4,   4,   6,   4,   4, // 0
246     4,   4,   4,   4,   4,   4,   4,   4,     4,   4,   4,   4,   4,   4,   4,   4, // 1
247     3,   1,   4,   1,   1,   1,   1,   1,     1,   1,   1,   1,   1,   1,   0x11,1, // 2
248  0x19,0x19,0x19,0x19,0x19,0x19,0x19,0x19,  0x19,0x19,   1,   1,   1,   1,   1,   1, // 3
249     1,   1,   1,   1,   1,   0x11,1,   1,     1,   1,   1,   1,   1,   1,   1,   1, // 4
250     1,   1,   1,   1,   1,   1,   1,   1,     1,   1,   1,   1,   4,0x25,   1,   1, // 5
251     1,   1,   1,   1,   1,   0x11,1,   1,     1,   1,   1,   1,   1,   1,   1,   1, // 6
252     1,   1,   1,   1,   1,   1,   1,   1,     1,   1,   1,   1,   1,0x25,   1,   1, // 7
253 
254  // 128-255
255     0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,
256     0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,
257     0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,
258     0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0
259 };
260 
is_ws(char c)261 static inline bool is_ws(char c)       { return g_token_flags[static_cast<uint8_t>(c)] & 0x02; }
is_eostring(char c)262 static inline bool is_eostring(char c) { return g_token_flags[static_cast<uint8_t>(c)] & 0x04; }
is_digit(char c)263 static inline bool is_digit(char c)    { return g_token_flags[static_cast<uint8_t>(c)] & 0x08; }
is_numeric(char c)264 static inline bool is_numeric(char c)  { return g_token_flags[static_cast<uint8_t>(c)] & 0x10; }
is_eoscope(char c)265 static inline bool is_eoscope(char c)  { return g_token_flags[static_cast<uint8_t>(c)] & 0x20; }
266 
skip_ws(const char * p)267 static inline const char* skip_ws(const char* p) {
268     while (is_ws(*p)) ++p;
269     return p;
270 }
271 
pow10(int32_t exp)272 static inline float pow10(int32_t exp) {
273     static constexpr float g_pow10_table[63] =
274     {
275        1.e-031f, 1.e-030f, 1.e-029f, 1.e-028f, 1.e-027f, 1.e-026f, 1.e-025f, 1.e-024f,
276        1.e-023f, 1.e-022f, 1.e-021f, 1.e-020f, 1.e-019f, 1.e-018f, 1.e-017f, 1.e-016f,
277        1.e-015f, 1.e-014f, 1.e-013f, 1.e-012f, 1.e-011f, 1.e-010f, 1.e-009f, 1.e-008f,
278        1.e-007f, 1.e-006f, 1.e-005f, 1.e-004f, 1.e-003f, 1.e-002f, 1.e-001f, 1.e+000f,
279        1.e+001f, 1.e+002f, 1.e+003f, 1.e+004f, 1.e+005f, 1.e+006f, 1.e+007f, 1.e+008f,
280        1.e+009f, 1.e+010f, 1.e+011f, 1.e+012f, 1.e+013f, 1.e+014f, 1.e+015f, 1.e+016f,
281        1.e+017f, 1.e+018f, 1.e+019f, 1.e+020f, 1.e+021f, 1.e+022f, 1.e+023f, 1.e+024f,
282        1.e+025f, 1.e+026f, 1.e+027f, 1.e+028f, 1.e+029f, 1.e+030f, 1.e+031f
283     };
284 
285     static constexpr int32_t k_exp_offset = SK_ARRAY_COUNT(g_pow10_table) / 2;
286 
287     // We only support negative exponents for now.
288     SkASSERT(exp <= 0);
289 
290     return (exp >= -k_exp_offset) ? g_pow10_table[exp + k_exp_offset]
291                                   : std::pow(10.0f, static_cast<float>(exp));
292 }
293 
294 class DOMParser {
295 public:
DOMParser(SkArenaAlloc & alloc)296     explicit DOMParser(SkArenaAlloc& alloc)
297         : fAlloc(alloc) {
298         fValueStack.reserve(kValueStackReserve);
299         fUnescapeBuffer.reserve(kUnescapeBufferReserve);
300     }
301 
parse(const char * p,size_t size)302     Value parse(const char* p, size_t size) {
303         if (!size) {
304             return this->error(NullValue(), p, "invalid empty input");
305         }
306 
307         const char* p_stop = p + size - 1;
308 
309         // We're only checking for end-of-stream on object/array close('}',']'),
310         // so we must trim any whitespace from the buffer tail.
311         while (p_stop > p && is_ws(*p_stop)) --p_stop;
312 
313         SkASSERT(p_stop >= p && p_stop < p + size);
314         if (!is_eoscope(*p_stop)) {
315             return this->error(NullValue(), p_stop, "invalid top-level value");
316         }
317 
318         p = skip_ws(p);
319 
320         switch (*p) {
321         case '{':
322             goto match_object;
323         case '[':
324             goto match_array;
325         default:
326             return this->error(NullValue(), p, "invalid top-level value");
327         }
328 
329     match_object:
330         SkASSERT(*p == '{');
331         p = skip_ws(p + 1);
332 
333         this->pushObjectScope();
334 
335         if (*p == '}') goto pop_object;
336 
337         // goto match_object_key;
338     match_object_key:
339         p = skip_ws(p);
340         if (*p != '"') return this->error(NullValue(), p, "expected object key");
341 
342         p = this->matchString(p, p_stop, [this](const char* key, size_t size, const char* eos) {
343             this->pushObjectKey(key, size, eos);
344         });
345         if (!p) return NullValue();
346 
347         p = skip_ws(p);
348         if (*p != ':') return this->error(NullValue(), p, "expected ':' separator");
349 
350         ++p;
351 
352         // goto match_value;
353     match_value:
354         p = skip_ws(p);
355 
356         switch (*p) {
357         case '\0':
358             return this->error(NullValue(), p, "unexpected input end");
359         case '"':
360             p = this->matchString(p, p_stop, [this](const char* str, size_t size, const char* eos) {
361                 this->pushString(str, size, eos);
362             });
363             break;
364         case '[':
365             goto match_array;
366         case 'f':
367             p = this->matchFalse(p);
368             break;
369         case 'n':
370             p = this->matchNull(p);
371             break;
372         case 't':
373             p = this->matchTrue(p);
374             break;
375         case '{':
376             goto match_object;
377         default:
378             p = this->matchNumber(p);
379             break;
380         }
381 
382         if (!p) return NullValue();
383 
384         // goto match_post_value;
385     match_post_value:
386         SkASSERT(!this->inTopLevelScope());
387 
388         p = skip_ws(p);
389         switch (*p) {
390         case ',':
391             ++p;
392             if (this->inObjectScope()) {
393                 goto match_object_key;
394             } else {
395                 SkASSERT(this->inArrayScope());
396                 goto match_value;
397             }
398         case ']':
399             goto pop_array;
400         case '}':
401             goto pop_object;
402         default:
403             return this->error(NullValue(), p - 1, "unexpected value-trailing token");
404         }
405 
406         // unreachable
407         SkASSERT(false);
408 
409     pop_object:
410         SkASSERT(*p == '}');
411 
412         if (this->inArrayScope()) {
413             return this->error(NullValue(), p, "unexpected object terminator");
414         }
415 
416         this->popObjectScope();
417 
418         // goto pop_common
419     pop_common:
420         SkASSERT(is_eoscope(*p));
421 
422         if (this->inTopLevelScope()) {
423             SkASSERT(fValueStack.size() == 1);
424 
425             // Success condition: parsed the top level element and reached the stop token.
426             return p == p_stop
427                 ? fValueStack.front()
428                 : this->error(NullValue(), p + 1, "trailing root garbage");
429         }
430 
431         if (p == p_stop) {
432             return this->error(NullValue(), p, "unexpected end-of-input");
433         }
434 
435         ++p;
436 
437         goto match_post_value;
438 
439     match_array:
440         SkASSERT(*p == '[');
441         p = skip_ws(p + 1);
442 
443         this->pushArrayScope();
444 
445         if (*p != ']') goto match_value;
446 
447         // goto pop_array;
448     pop_array:
449         SkASSERT(*p == ']');
450 
451         if (this->inObjectScope()) {
452             return this->error(NullValue(), p, "unexpected array terminator");
453         }
454 
455         this->popArrayScope();
456 
457         goto pop_common;
458 
459         SkASSERT(false);
460         return NullValue();
461     }
462 
getError() const463     std::tuple<const char*, const SkString> getError() const {
464         return std::make_tuple(fErrorToken, fErrorMessage);
465     }
466 
467 private:
468     SkArenaAlloc&         fAlloc;
469 
470     // Pending values stack.
471     inline static constexpr size_t kValueStackReserve = 256;
472     std::vector<Value>    fValueStack;
473 
474     // String unescape buffer.
475     inline static constexpr size_t kUnescapeBufferReserve = 512;
476     std::vector<char>     fUnescapeBuffer;
477 
478     // Tracks the current object/array scope, as an index into fStack:
479     //
480     //   - for objects: fScopeIndex =  (index of first value in scope)
481     //   - for arrays : fScopeIndex = -(index of first value in scope)
482     //
483     // fScopeIndex == 0 IFF we are at the top level (no current/active scope).
484     intptr_t              fScopeIndex = 0;
485 
486     // Error reporting.
487     const char*           fErrorToken = nullptr;
488     SkString              fErrorMessage;
489 
inTopLevelScope() const490     bool inTopLevelScope() const { return fScopeIndex == 0; }
inObjectScope() const491     bool inObjectScope()   const { return fScopeIndex >  0; }
inArrayScope() const492     bool inArrayScope()    const { return fScopeIndex <  0; }
493 
494     // Helper for masquerading raw primitive types as Values (bypassing tagging, etc).
495     template <typename T>
496     class RawValue final : public Value {
497     public:
RawValue(T v)498         explicit RawValue(T v) {
499             static_assert(sizeof(T) <= sizeof(Value), "");
500             *this->cast<T>() = v;
501         }
502 
operator *() const503         T operator *() const { return *this->cast<T>(); }
504     };
505 
506     template <typename VectorT>
popScopeAsVec(size_t scope_start)507     void popScopeAsVec(size_t scope_start) {
508         SkASSERT(scope_start > 0);
509         SkASSERT(scope_start <= fValueStack.size());
510 
511         using T = typename VectorT::ValueT;
512         static_assert( sizeof(T) >=  sizeof(Value), "");
513         static_assert( sizeof(T)  %  sizeof(Value) == 0, "");
514         static_assert(alignof(T) == alignof(Value), "");
515 
516         const auto scope_count = fValueStack.size() - scope_start,
517                          count = scope_count / (sizeof(T) / sizeof(Value));
518         SkASSERT(scope_count % (sizeof(T) / sizeof(Value)) == 0);
519 
520         const auto* begin = reinterpret_cast<const T*>(fValueStack.data() + scope_start);
521 
522         // Restore the previous scope index from saved placeholder value,
523         // and instantiate as a vector of values in scope.
524         auto& placeholder = fValueStack[scope_start - 1];
525         fScopeIndex = *static_cast<RawValue<intptr_t>&>(placeholder);
526         placeholder = VectorT(begin, count, fAlloc);
527 
528         // Drop the (consumed) values in scope.
529         fValueStack.resize(scope_start);
530     }
531 
pushObjectScope()532     void pushObjectScope() {
533         // Save a scope index now, and then later we'll overwrite this value as the Object itself.
534         fValueStack.push_back(RawValue<intptr_t>(fScopeIndex));
535 
536         // New object scope.
537         fScopeIndex = SkTo<intptr_t>(fValueStack.size());
538     }
539 
popObjectScope()540     void popObjectScope() {
541         SkASSERT(this->inObjectScope());
542         this->popScopeAsVec<ObjectValue>(SkTo<size_t>(fScopeIndex));
543 
544         SkDEBUGCODE(
545             const auto& obj = fValueStack.back().as<ObjectValue>();
546             SkASSERT(obj.is<ObjectValue>());
547             for (const auto& member : obj) {
548                 SkASSERT(member.fKey.is<StringValue>());
549             }
550         )
551     }
552 
pushArrayScope()553     void pushArrayScope() {
554         // Save a scope index now, and then later we'll overwrite this value as the Array itself.
555         fValueStack.push_back(RawValue<intptr_t>(fScopeIndex));
556 
557         // New array scope.
558         fScopeIndex = -SkTo<intptr_t>(fValueStack.size());
559     }
560 
popArrayScope()561     void popArrayScope() {
562         SkASSERT(this->inArrayScope());
563         this->popScopeAsVec<ArrayValue>(SkTo<size_t>(-fScopeIndex));
564 
565         SkDEBUGCODE(
566             const auto& arr = fValueStack.back().as<ArrayValue>();
567             SkASSERT(arr.is<ArrayValue>());
568         )
569     }
570 
pushObjectKey(const char * key,size_t size,const char * eos)571     void pushObjectKey(const char* key, size_t size, const char* eos) {
572         SkASSERT(this->inObjectScope());
573         SkASSERT(fValueStack.size() >= SkTo<size_t>(fScopeIndex));
574         SkASSERT(!((fValueStack.size() - SkTo<size_t>(fScopeIndex)) & 1));
575         this->pushString(key, size, eos);
576     }
577 
pushTrue()578     void pushTrue() {
579         fValueStack.push_back(BoolValue(true));
580     }
581 
pushFalse()582     void pushFalse() {
583         fValueStack.push_back(BoolValue(false));
584     }
585 
pushNull()586     void pushNull() {
587         fValueStack.push_back(NullValue());
588     }
589 
pushString(const char * s,size_t size,const char * eos)590     void pushString(const char* s, size_t size, const char* eos) {
591         fValueStack.push_back(FastString(s, size, eos, fAlloc));
592     }
593 
pushInt32(int32_t i)594     void pushInt32(int32_t i) {
595         fValueStack.push_back(NumberValue(i));
596     }
597 
pushFloat(float f)598     void pushFloat(float f) {
599         fValueStack.push_back(NumberValue(f));
600     }
601 
602     template <typename T>
error(T && ret_val,const char * p,const char * msg)603     T error(T&& ret_val, const char* p, const char* msg) {
604 #if defined(SK_JSON_REPORT_ERRORS)
605         fErrorToken = p;
606         fErrorMessage.set(msg);
607 #endif
608         return ret_val;
609     }
610 
matchTrue(const char * p)611     const char* matchTrue(const char* p) {
612         SkASSERT(p[0] == 't');
613 
614         if (p[1] == 'r' && p[2] == 'u' && p[3] == 'e') {
615             this->pushTrue();
616             return p + 4;
617         }
618 
619         return this->error(nullptr, p, "invalid token");
620     }
621 
matchFalse(const char * p)622     const char* matchFalse(const char* p) {
623         SkASSERT(p[0] == 'f');
624 
625         if (p[1] == 'a' && p[2] == 'l' && p[3] == 's' && p[4] == 'e') {
626             this->pushFalse();
627             return p + 5;
628         }
629 
630         return this->error(nullptr, p, "invalid token");
631     }
632 
matchNull(const char * p)633     const char* matchNull(const char* p) {
634         SkASSERT(p[0] == 'n');
635 
636         if (p[1] == 'u' && p[2] == 'l' && p[3] == 'l') {
637             this->pushNull();
638             return p + 4;
639         }
640 
641         return this->error(nullptr, p, "invalid token");
642     }
643 
unescapeString(const char * begin,const char * end)644     const std::vector<char>* unescapeString(const char* begin, const char* end) {
645         fUnescapeBuffer.clear();
646 
647         for (const auto* p = begin; p != end; ++p) {
648             if (*p != '\\') {
649                 fUnescapeBuffer.push_back(*p);
650                 continue;
651             }
652 
653             if (++p == end) {
654                 return nullptr;
655             }
656 
657             switch (*p) {
658             case  '"': fUnescapeBuffer.push_back( '"'); break;
659             case '\\': fUnescapeBuffer.push_back('\\'); break;
660             case  '/': fUnescapeBuffer.push_back( '/'); break;
661             case  'b': fUnescapeBuffer.push_back('\b'); break;
662             case  'f': fUnescapeBuffer.push_back('\f'); break;
663             case  'n': fUnescapeBuffer.push_back('\n'); break;
664             case  'r': fUnescapeBuffer.push_back('\r'); break;
665             case  't': fUnescapeBuffer.push_back('\t'); break;
666             case  'u': {
667                 if (p + 4 >= end) {
668                     return nullptr;
669                 }
670 
671                 uint32_t hexed;
672                 const char hex_str[] = {p[1], p[2], p[3], p[4], '\0'};
673                 const auto* eos = SkParse::FindHex(hex_str, &hexed);
674                 if (!eos || *eos) {
675                     return nullptr;
676                 }
677 
678                 char utf8[SkUTF::kMaxBytesInUTF8Sequence];
679                 const auto utf8_len = SkUTF::ToUTF8(SkTo<SkUnichar>(hexed), utf8);
680                 fUnescapeBuffer.insert(fUnescapeBuffer.end(), utf8, utf8 + utf8_len);
681                 p += 4;
682             } break;
683             default: return nullptr;
684             }
685         }
686 
687         return &fUnescapeBuffer;
688     }
689 
690     template <typename MatchFunc>
matchString(const char * p,const char * p_stop,MatchFunc && func)691     const char* matchString(const char* p, const char* p_stop, MatchFunc&& func) {
692         SkASSERT(*p == '"');
693         const auto* s_begin = p + 1;
694         bool requires_unescape = false;
695 
696         do {
697             // Consume string chars.
698             // This is the fast path, and hopefully we only hit it once then quick-exit below.
699             for (p = p + 1; !is_eostring(*p); ++p);
700 
701             if (*p == '"') {
702                 // Valid string found.
703                 if (!requires_unescape) {
704                     func(s_begin, p - s_begin, p_stop);
705                 } else {
706                     // Slow unescape.  We could avoid this extra copy with some effort,
707                     // but in practice escaped strings should be rare.
708                     const auto* buf = this->unescapeString(s_begin, p);
709                     if (!buf) {
710                         break;
711                     }
712 
713                     SkASSERT(!buf->empty());
714                     func(buf->data(), buf->size(), buf->data() + buf->size() - 1);
715                 }
716                 return p + 1;
717             }
718 
719             if (*p == '\\') {
720                 requires_unescape = true;
721                 ++p;
722                 continue;
723             }
724 
725             // End-of-scope chars are special: we use them to tag the end of the input.
726             // Thus they cannot be consumed indiscriminately -- we need to check if we hit the
727             // end of the input.  To that effect, we treat them as string terminators above,
728             // then we catch them here.
729             if (is_eoscope(*p)) {
730                 continue;
731             }
732 
733             // Invalid/unexpected char.
734             break;
735         } while (p != p_stop);
736 
737         // Premature end-of-input, or illegal string char.
738         return this->error(nullptr, s_begin - 1, "invalid string");
739     }
740 
matchFastFloatDecimalPart(const char * p,int sign,float f,int exp)741     const char* matchFastFloatDecimalPart(const char* p, int sign, float f, int exp) {
742         SkASSERT(exp <= 0);
743 
744         for (;;) {
745             if (!is_digit(*p)) break;
746             f = f * 10.f + (*p++ - '0'); --exp;
747             if (!is_digit(*p)) break;
748             f = f * 10.f + (*p++ - '0'); --exp;
749         }
750 
751         const auto decimal_scale = pow10(exp);
752         if (is_numeric(*p) || !decimal_scale) {
753             SkASSERT((*p == '.' || *p == 'e' || *p == 'E') || !decimal_scale);
754             // Malformed input, or an (unsupported) exponent, or a collapsed decimal factor.
755             return nullptr;
756         }
757 
758         this->pushFloat(sign * f * decimal_scale);
759 
760         return p;
761     }
762 
matchFastFloatPart(const char * p,int sign,float f)763     const char* matchFastFloatPart(const char* p, int sign, float f) {
764         for (;;) {
765             if (!is_digit(*p)) break;
766             f = f * 10.f + (*p++ - '0');
767             if (!is_digit(*p)) break;
768             f = f * 10.f + (*p++ - '0');
769         }
770 
771         if (!is_numeric(*p)) {
772             // Matched (integral) float.
773             this->pushFloat(sign * f);
774             return p;
775         }
776 
777         return (*p == '.') ? this->matchFastFloatDecimalPart(p + 1, sign, f, 0)
778                            : nullptr;
779     }
780 
matchFast32OrFloat(const char * p)781     const char* matchFast32OrFloat(const char* p) {
782         int sign = 1;
783         if (*p == '-') {
784             sign = -1;
785             ++p;
786         }
787 
788         const auto* digits_start = p;
789 
790         int32_t n32 = 0;
791 
792         // This is the largest absolute int32 value we can handle before
793         // risking overflow *on the next digit* (214748363).
794         static constexpr int32_t kMaxInt32 = (std::numeric_limits<int32_t>::max() - 9) / 10;
795 
796         if (is_digit(*p)) {
797             n32 = (*p++ - '0');
798             for (;;) {
799                 if (!is_digit(*p) || n32 > kMaxInt32) break;
800                 n32 = n32 * 10 + (*p++ - '0');
801             }
802         }
803 
804         if (!is_numeric(*p)) {
805             // Did we actually match any digits?
806             if (p > digits_start) {
807                 this->pushInt32(sign * n32);
808                 return p;
809             }
810             return nullptr;
811         }
812 
813         if (*p == '.') {
814             const auto* decimals_start = ++p;
815 
816             int exp = 0;
817 
818             for (;;) {
819                 if (!is_digit(*p) || n32 > kMaxInt32) break;
820                 n32 = n32 * 10 + (*p++ - '0'); --exp;
821                 if (!is_digit(*p) || n32 > kMaxInt32) break;
822                 n32 = n32 * 10 + (*p++ - '0'); --exp;
823             }
824 
825             if (!is_numeric(*p)) {
826                 // Did we actually match any digits?
827                 if (p > decimals_start) {
828                     this->pushFloat(sign * n32 * pow10(exp));
829                     return p;
830                 }
831                 return nullptr;
832             }
833 
834             if (n32 > kMaxInt32) {
835                 // we ran out on n32 bits
836                 return this->matchFastFloatDecimalPart(p, sign, n32, exp);
837             }
838         }
839 
840         return this->matchFastFloatPart(p, sign, n32);
841     }
842 
matchNumber(const char * p)843     const char* matchNumber(const char* p) {
844         if (const auto* fast = this->matchFast32OrFloat(p)) return fast;
845 
846         // slow fallback
847         char* matched;
848         float f = strtof(p, &matched);
849         if (matched > p) {
850             this->pushFloat(f);
851             return matched;
852         }
853         return this->error(nullptr, p, "invalid numeric token");
854     }
855 };
856 
Write(const Value & v,SkWStream * stream)857 void Write(const Value& v, SkWStream* stream) {
858     switch (v.getType()) {
859     case Value::Type::kNull:
860         stream->writeText("null");
861         break;
862     case Value::Type::kBool:
863         stream->writeText(*v.as<BoolValue>() ? "true" : "false");
864         break;
865     case Value::Type::kNumber:
866         stream->writeScalarAsText(*v.as<NumberValue>());
867         break;
868     case Value::Type::kString:
869         stream->writeText("\"");
870         stream->writeText(v.as<StringValue>().begin());
871         stream->writeText("\"");
872         break;
873     case Value::Type::kArray: {
874         const auto& array = v.as<ArrayValue>();
875         stream->writeText("[");
876         bool first_value = true;
877         for (const auto& entry : array) {
878             if (!first_value) stream->writeText(",");
879             Write(entry, stream);
880             first_value = false;
881         }
882         stream->writeText("]");
883         break;
884     }
885     case Value::Type::kObject:
886         const auto& object = v.as<ObjectValue>();
887         stream->writeText("{");
888         bool first_member = true;
889         for (const auto& member : object) {
890             SkASSERT(member.fKey.getType() == Value::Type::kString);
891             if (!first_member) stream->writeText(",");
892             Write(member.fKey, stream);
893             stream->writeText(":");
894             Write(member.fValue, stream);
895             first_member = false;
896         }
897         stream->writeText("}");
898         break;
899     }
900 }
901 
902 } // namespace
903 
toString() const904 SkString Value::toString() const {
905     SkDynamicMemoryWStream wstream;
906     Write(*this, &wstream);
907     const auto data = wstream.detachAsData();
908     // TODO: is there a better way to pass data around without copying?
909     return SkString(static_cast<const char*>(data->data()), data->size());
910 }
911 
912 static constexpr size_t kMinChunkSize = 4096;
913 
DOM(const char * data,size_t size)914 DOM::DOM(const char* data, size_t size)
915     : fAlloc(kMinChunkSize) {
916     DOMParser parser(fAlloc);
917 
918     fRoot = parser.parse(data, size);
919 }
920 
write(SkWStream * stream) const921 void DOM::write(SkWStream* stream) const {
922     Write(fRoot, stream);
923 }
924 
925 } // namespace skjson
926