• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2018 Google Inc.
3  *
4  * Use of this source code is governed by a BSD-style license that can be
5  * found in the LICENSE file.
6  */
7 
8 #include "src/utils/SkJSON.h"
9 
10 #include "include/core/SkData.h"
11 #include "include/core/SkRefCnt.h"
12 #include "include/core/SkStream.h"
13 #include "include/core/SkString.h"
14 #include "include/private/base/SkMalloc.h"
15 #include "include/private/base/SkDebug.h"
16 #include "include/private/base/SkTo.h"
17 #include "include/utils/SkParse.h"
18 #include "src/base/SkUTF.h"
19 
20 #include <cmath>
21 #include <cstdint>
22 #include <cstdlib>
23 #include <limits>
24 #include <new>
25 #include <tuple>
26 #include <vector>
27 
28 namespace skjson {
29 
30 // #define SK_JSON_REPORT_ERRORS
31 
32 static_assert( sizeof(Value) == 8, "");
33 static_assert(alignof(Value) == 8, "");
34 
35 static constexpr size_t kRecAlign = alignof(Value);
36 
init_tagged(Tag t)37 void Value::init_tagged(Tag t) {
38     memset(fData8, 0, sizeof(fData8));
39     fData8[0] = SkTo<uint8_t>(t);
40     SkASSERT(this->getTag() == t);
41 }
42 
43 // Pointer values store a type (in the lower kTagBits bits) and a pointer.
init_tagged_pointer(Tag t,void * p)44 void Value::init_tagged_pointer(Tag t, void* p) {
45     if (sizeof(Value) == sizeof(uintptr_t)) {
46         *this->cast<uintptr_t>() = reinterpret_cast<uintptr_t>(p);
47         // For 64-bit, we rely on the pointer lower bits being zero.
48         SkASSERT(!(fData8[0] & kTagMask));
49         fData8[0] |= SkTo<uint8_t>(t);
50     } else {
51         // For 32-bit, we store the pointer in the upper word
52         SkASSERT(sizeof(Value) == sizeof(uintptr_t) * 2);
53         this->init_tagged(t);
54         *this->cast<uintptr_t>() = reinterpret_cast<uintptr_t>(p);
55     }
56 
57     SkASSERT(this->getTag()    == t);
58     SkASSERT(this->ptr<void>() == p);
59 }
60 
NullValue()61 NullValue::NullValue() {
62     this->init_tagged(Tag::kNull);
63     SkASSERT(this->getTag() == Tag::kNull);
64 }
65 
BoolValue(bool b)66 BoolValue::BoolValue(bool b) {
67     this->init_tagged(Tag::kBool);
68     *this->cast<bool>() = b;
69     SkASSERT(this->getTag() == Tag::kBool);
70 }
71 
NumberValue(int32_t i)72 NumberValue::NumberValue(int32_t i) {
73     this->init_tagged(Tag::kInt);
74     *this->cast<int32_t>() = i;
75     SkASSERT(this->getTag() == Tag::kInt);
76 }
77 
NumberValue(float f)78 NumberValue::NumberValue(float f) {
79     this->init_tagged(Tag::kFloat);
80     *this->cast<float>() = f;
81     SkASSERT(this->getTag() == Tag::kFloat);
82 }
83 
84 // Vector recs point to externally allocated slabs with the following layout:
85 //
86 //   [size_t n] [REC_0] ... [REC_n-1] [optional extra trailing storage]
87 //
88 // Long strings use extra_alloc_size == 1 to store the \0 terminator.
89 //
90 template <typename T, size_t extra_alloc_size = 0>
MakeVector(const void * src,size_t size,SkArenaAlloc & alloc)91 static void* MakeVector(const void* src, size_t size, SkArenaAlloc& alloc) {
92     // The Ts are already in memory, so their size should be safe.
93     const auto total_size = sizeof(size_t) + size * sizeof(T) + extra_alloc_size;
94     auto* size_ptr = reinterpret_cast<size_t*>(alloc.makeBytesAlignedTo(total_size, kRecAlign));
95 
96     *size_ptr = size;
97     sk_careful_memcpy(size_ptr + 1, src, size * sizeof(T));
98 
99     return size_ptr;
100 }
101 
ArrayValue(const Value * src,size_t size,SkArenaAlloc & alloc)102 ArrayValue::ArrayValue(const Value* src, size_t size, SkArenaAlloc& alloc) {
103     this->init_tagged_pointer(Tag::kArray, MakeVector<Value>(src, size, alloc));
104     SkASSERT(this->getTag() == Tag::kArray);
105 }
106 
107 // Strings have two flavors:
108 //
109 // -- short strings (len <= 7) -> these are stored inline, in the record
110 //    (one byte reserved for null terminator/type):
111 //
112 //        [str] [\0]|[max_len - actual_len]
113 //
114 //    Storing [max_len - actual_len] allows the 'len' field to double-up as a
115 //    null terminator when size == max_len (this works 'cause kShortString == 0).
116 //
117 // -- long strings (len > 7) -> these are externally allocated vectors (VectorRec<char>).
118 //
119 // The string data plus a null-char terminator are copied over.
120 //
121 namespace {
122 
123 // An internal string builder with a fast 8 byte short string load path
124 // (for the common case where the string is not at the end of the stream).
125 class FastString final : public Value {
126 public:
FastString(const char * src,size_t size,const char * eos,SkArenaAlloc & alloc)127     FastString(const char* src, size_t size, const char* eos, SkArenaAlloc& alloc) {
128         SkASSERT(src <= eos);
129 
130         if (size > kMaxInlineStringSize) {
131             this->initLongString(src, size, alloc);
132             SkASSERT(this->getTag() == Tag::kString);
133             return;
134         }
135 
136         // initFastShortString is faster (doh), but requires access to 6 chars past src.
137         if (src && src + 6 <= eos) {
138             this->initFastShortString(src, size);
139         } else {
140             this->initShortString(src, size);
141         }
142 
143         SkASSERT(this->getTag() == Tag::kShortString);
144     }
145 
146 private:
147     // first byte reserved for tagging, \0 terminator => 6 usable chars
148     inline static constexpr size_t kMaxInlineStringSize = sizeof(Value) - 2;
149 
initLongString(const char * src,size_t size,SkArenaAlloc & alloc)150     void initLongString(const char* src, size_t size, SkArenaAlloc& alloc) {
151         SkASSERT(size > kMaxInlineStringSize);
152 
153         this->init_tagged_pointer(Tag::kString, MakeVector<char, 1>(src, size, alloc));
154 
155         auto* data = this->cast<VectorValue<char, Value::Type::kString>>()->begin();
156         const_cast<char*>(data)[size] = '\0';
157     }
158 
initShortString(const char * src,size_t size)159     void initShortString(const char* src, size_t size) {
160         SkASSERT(size <= kMaxInlineStringSize);
161 
162         this->init_tagged(Tag::kShortString);
163         sk_careful_memcpy(this->cast<char>(), src, size);
164         // Null terminator provided by init_tagged() above (fData8 is zero-initialized).
165     }
166 
initFastShortString(const char * src,size_t size)167     void initFastShortString(const char* src, size_t size) {
168         SkASSERT(size <= kMaxInlineStringSize);
169 
170         uint64_t* s64 = this->cast<uint64_t>();
171 
172         // Load 8 chars and mask out the tag and \0 terminator.
173         // Note: we picked kShortString == 0 to avoid setting explicitly below.
174         static_assert(SkToU8(Tag::kShortString) == 0, "please don't break this");
175 
176         // Since the first byte is occupied by the tag, we want the string chars [0..5] to land
177         // on bytes [1..6] => the fastest way is to read8 @(src - 1) (always safe, because the
178         // string requires a " prefix at the very least).
179         memcpy(s64, src - 1, 8);
180 
181 #if defined(SK_CPU_LENDIAN)
182         // The mask for a max-length string (6), with a leading tag and trailing \0 is
183         // 0x00ffffffffffff00.  Accounting for the final left-shift, this becomes
184         // 0x0000ffffffffffff.
185         *s64 &= (0x0000ffffffffffffULL >> ((kMaxInlineStringSize - size) * 8)) // trailing \0s
186                     << 8;                                                      // tag byte
187 #else
188         static_assert(false, "Big-endian builds are not supported at this time.");
189 #endif
190     }
191 };
192 
193 } // namespace
194 
StringValue(const char * src,size_t size,SkArenaAlloc & alloc)195 StringValue::StringValue(const char* src, size_t size, SkArenaAlloc& alloc) {
196     new (this) FastString(src, size, src, alloc);
197 }
198 
ObjectValue(const Member * src,size_t size,SkArenaAlloc & alloc)199 ObjectValue::ObjectValue(const Member* src, size_t size, SkArenaAlloc& alloc) {
200     this->init_tagged_pointer(Tag::kObject, MakeVector<Member>(src, size, alloc));
201     SkASSERT(this->getTag() == Tag::kObject);
202 }
203 
204 
205 // Boring public Value glue.
206 
inline_strcmp(const char a[],const char b[])207 static int inline_strcmp(const char a[], const char b[]) {
208     for (;;) {
209         char c = *a++;
210         if (c == 0) {
211             break;
212         }
213         if (c != *b++) {
214             return 1;
215         }
216     }
217     return *b != 0;
218 }
219 
operator [](const char * key) const220 const Value& ObjectValue::operator[](const char* key) const {
221     // Reverse search for duplicates resolution (policy: return last).
222     const auto* begin  = this->begin();
223     const auto* member = this->end();
224 
225     while (member > begin) {
226         --member;
227         if (0 == inline_strcmp(key, member->fKey.as<StringValue>().begin())) {
228             return member->fValue;
229         }
230     }
231 
232     static const Value g_null = NullValue();
233     return g_null;
234 }
235 
236 namespace {
237 
238 // Lexer/parser inspired by rapidjson [1], sajson [2] and pjson [3].
239 //
240 // [1] https://github.com/Tencent/rapidjson/
241 // [2] https://github.com/chadaustin/sajson
242 // [3] https://pastebin.com/hnhSTL3h
243 
244 
245 // bit 0 (0x01) - plain ASCII string character
246 // bit 1 (0x02) - whitespace
247 // bit 2 (0x04) - string terminator (" \\ \0 [control chars] **AND } ]** <- see matchString notes)
248 // bit 3 (0x08) - 0-9
249 // bit 4 (0x10) - 0-9 e E .
250 // bit 5 (0x20) - scope terminator (} ])
251 static constexpr uint8_t g_token_flags[256] = {
252  // 0    1    2    3    4    5    6    7      8    9    A    B    C    D    E    F
253     4,   4,   4,   4,   4,   4,   4,   4,     4,   6,   6,   4,   4,   6,   4,   4, // 0
254     4,   4,   4,   4,   4,   4,   4,   4,     4,   4,   4,   4,   4,   4,   4,   4, // 1
255     3,   1,   4,   1,   1,   1,   1,   1,     1,   1,   1,   1,   1,   1,   0x11,1, // 2
256  0x19,0x19,0x19,0x19,0x19,0x19,0x19,0x19,  0x19,0x19,   1,   1,   1,   1,   1,   1, // 3
257     1,   1,   1,   1,   1,   0x11,1,   1,     1,   1,   1,   1,   1,   1,   1,   1, // 4
258     1,   1,   1,   1,   1,   1,   1,   1,     1,   1,   1,   1,   4,0x25,   1,   1, // 5
259     1,   1,   1,   1,   1,   0x11,1,   1,     1,   1,   1,   1,   1,   1,   1,   1, // 6
260     1,   1,   1,   1,   1,   1,   1,   1,     1,   1,   1,   1,   1,0x25,   1,   1, // 7
261 
262  // 128-255
263     0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,
264     0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,
265     0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,
266     0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0
267 };
268 
is_ws(char c)269 static inline bool is_ws(char c)       { return g_token_flags[static_cast<uint8_t>(c)] & 0x02; }
is_eostring(char c)270 static inline bool is_eostring(char c) { return g_token_flags[static_cast<uint8_t>(c)] & 0x04; }
is_digit(char c)271 static inline bool is_digit(char c)    { return g_token_flags[static_cast<uint8_t>(c)] & 0x08; }
is_numeric(char c)272 static inline bool is_numeric(char c)  { return g_token_flags[static_cast<uint8_t>(c)] & 0x10; }
is_eoscope(char c)273 static inline bool is_eoscope(char c)  { return g_token_flags[static_cast<uint8_t>(c)] & 0x20; }
274 
skip_ws(const char * p)275 static inline const char* skip_ws(const char* p) {
276     while (is_ws(*p)) ++p;
277     return p;
278 }
279 
pow10(int32_t exp)280 static inline float pow10(int32_t exp) {
281     static constexpr float g_pow10_table[63] =
282     {
283        1.e-031f, 1.e-030f, 1.e-029f, 1.e-028f, 1.e-027f, 1.e-026f, 1.e-025f, 1.e-024f,
284        1.e-023f, 1.e-022f, 1.e-021f, 1.e-020f, 1.e-019f, 1.e-018f, 1.e-017f, 1.e-016f,
285        1.e-015f, 1.e-014f, 1.e-013f, 1.e-012f, 1.e-011f, 1.e-010f, 1.e-009f, 1.e-008f,
286        1.e-007f, 1.e-006f, 1.e-005f, 1.e-004f, 1.e-003f, 1.e-002f, 1.e-001f, 1.e+000f,
287        1.e+001f, 1.e+002f, 1.e+003f, 1.e+004f, 1.e+005f, 1.e+006f, 1.e+007f, 1.e+008f,
288        1.e+009f, 1.e+010f, 1.e+011f, 1.e+012f, 1.e+013f, 1.e+014f, 1.e+015f, 1.e+016f,
289        1.e+017f, 1.e+018f, 1.e+019f, 1.e+020f, 1.e+021f, 1.e+022f, 1.e+023f, 1.e+024f,
290        1.e+025f, 1.e+026f, 1.e+027f, 1.e+028f, 1.e+029f, 1.e+030f, 1.e+031f
291     };
292 
293     static constexpr int32_t k_exp_offset = std::size(g_pow10_table) / 2;
294 
295     // We only support negative exponents for now.
296     SkASSERT(exp <= 0);
297 
298     return (exp >= -k_exp_offset) ? g_pow10_table[exp + k_exp_offset]
299                                   : std::pow(10.0f, static_cast<float>(exp));
300 }
301 
302 class DOMParser {
303 public:
DOMParser(SkArenaAlloc & alloc)304     explicit DOMParser(SkArenaAlloc& alloc)
305         : fAlloc(alloc) {
306         fValueStack.reserve(kValueStackReserve);
307         fUnescapeBuffer.reserve(kUnescapeBufferReserve);
308     }
309 
parse(const char * p,size_t size)310     Value parse(const char* p, size_t size) {
311         if (!size) {
312             return this->error(NullValue(), p, "invalid empty input");
313         }
314 
315         const char* p_stop = p + size - 1;
316 
317         // We're only checking for end-of-stream on object/array close('}',']'),
318         // so we must trim any whitespace from the buffer tail.
319         while (p_stop > p && is_ws(*p_stop)) --p_stop;
320 
321         SkASSERT(p_stop >= p && p_stop < p + size);
322         if (!is_eoscope(*p_stop)) {
323             return this->error(NullValue(), p_stop, "invalid top-level value");
324         }
325 
326         p = skip_ws(p);
327 
328         switch (*p) {
329         case '{':
330             goto match_object;
331         case '[':
332             goto match_array;
333         default:
334             return this->error(NullValue(), p, "invalid top-level value");
335         }
336 
337     match_object:
338         SkASSERT(*p == '{');
339         p = skip_ws(p + 1);
340 
341         this->pushObjectScope();
342 
343         if (*p == '}') goto pop_object;
344 
345         // goto match_object_key;
346     match_object_key:
347         p = skip_ws(p);
348         if (*p != '"') return this->error(NullValue(), p, "expected object key");
349 
350         p = this->matchString(p, p_stop, [this](const char* key, size_t size, const char* eos) {
351             this->pushObjectKey(key, size, eos);
352         });
353         if (!p) return NullValue();
354 
355         p = skip_ws(p);
356         if (*p != ':') return this->error(NullValue(), p, "expected ':' separator");
357 
358         ++p;
359 
360         // goto match_value;
361     match_value:
362         p = skip_ws(p);
363 
364         switch (*p) {
365         case '\0':
366             return this->error(NullValue(), p, "unexpected input end");
367         case '"':
368             p = this->matchString(p, p_stop, [this](const char* str, size_t size, const char* eos) {
369                 this->pushString(str, size, eos);
370             });
371             break;
372         case '[':
373             goto match_array;
374         case 'f':
375             p = this->matchFalse(p);
376             break;
377         case 'n':
378             p = this->matchNull(p);
379             break;
380         case 't':
381             p = this->matchTrue(p);
382             break;
383         case '{':
384             goto match_object;
385         default:
386             p = this->matchNumber(p);
387             break;
388         }
389 
390         if (!p) return NullValue();
391 
392         // goto match_post_value;
393     match_post_value:
394         SkASSERT(!this->inTopLevelScope());
395 
396         p = skip_ws(p);
397         switch (*p) {
398         case ',':
399             ++p;
400             if (this->inObjectScope()) {
401                 goto match_object_key;
402             } else {
403                 SkASSERT(this->inArrayScope());
404                 goto match_value;
405             }
406         case ']':
407             goto pop_array;
408         case '}':
409             goto pop_object;
410         default:
411             return this->error(NullValue(), p - 1, "unexpected value-trailing token");
412         }
413 
414         // unreachable
415         SkASSERT(false);
416 
417     pop_object:
418         SkASSERT(*p == '}');
419 
420         if (this->inArrayScope()) {
421             return this->error(NullValue(), p, "unexpected object terminator");
422         }
423 
424         this->popObjectScope();
425 
426         // goto pop_common
427     pop_common:
428         SkASSERT(is_eoscope(*p));
429 
430         if (this->inTopLevelScope()) {
431             SkASSERT(fValueStack.size() == 1);
432 
433             // Success condition: parsed the top level element and reached the stop token.
434             return p == p_stop
435                 ? fValueStack.front()
436                 : this->error(NullValue(), p + 1, "trailing root garbage");
437         }
438 
439         if (p == p_stop) {
440             return this->error(NullValue(), p, "unexpected end-of-input");
441         }
442 
443         ++p;
444 
445         goto match_post_value;
446 
447     match_array:
448         SkASSERT(*p == '[');
449         p = skip_ws(p + 1);
450 
451         this->pushArrayScope();
452 
453         if (*p != ']') goto match_value;
454 
455         // goto pop_array;
456     pop_array:
457         SkASSERT(*p == ']');
458 
459         if (this->inObjectScope()) {
460             return this->error(NullValue(), p, "unexpected array terminator");
461         }
462 
463         this->popArrayScope();
464 
465         goto pop_common;
466 
467         SkASSERT(false);
468         return NullValue();
469     }
470 
getError() const471     std::tuple<const char*, const SkString> getError() const {
472         return std::make_tuple(fErrorToken, fErrorMessage);
473     }
474 
475 private:
476     SkArenaAlloc&         fAlloc;
477 
478     // Pending values stack.
479     inline static constexpr size_t kValueStackReserve = 256;
480     std::vector<Value>    fValueStack;
481 
482     // String unescape buffer.
483     inline static constexpr size_t kUnescapeBufferReserve = 512;
484     std::vector<char>     fUnescapeBuffer;
485 
486     // Tracks the current object/array scope, as an index into fStack:
487     //
488     //   - for objects: fScopeIndex =  (index of first value in scope)
489     //   - for arrays : fScopeIndex = -(index of first value in scope)
490     //
491     // fScopeIndex == 0 IFF we are at the top level (no current/active scope).
492     intptr_t              fScopeIndex = 0;
493 
494     // Error reporting.
495     const char*           fErrorToken = nullptr;
496     SkString              fErrorMessage;
497 
inTopLevelScope() const498     bool inTopLevelScope() const { return fScopeIndex == 0; }
inObjectScope() const499     bool inObjectScope()   const { return fScopeIndex >  0; }
inArrayScope() const500     bool inArrayScope()    const { return fScopeIndex <  0; }
501 
502     // Helper for masquerading raw primitive types as Values (bypassing tagging, etc).
503     template <typename T>
504     class RawValue final : public Value {
505     public:
RawValue(T v)506         explicit RawValue(T v) {
507             static_assert(sizeof(T) <= sizeof(Value), "");
508             *this->cast<T>() = v;
509         }
510 
operator *() const511         T operator *() const { return *this->cast<T>(); }
512     };
513 
514     template <typename VectorT>
popScopeAsVec(size_t scope_start)515     void popScopeAsVec(size_t scope_start) {
516         SkASSERT(scope_start > 0);
517         SkASSERT(scope_start <= fValueStack.size());
518 
519         using T = typename VectorT::ValueT;
520         static_assert( sizeof(T) >=  sizeof(Value), "");
521         static_assert( sizeof(T)  %  sizeof(Value) == 0, "");
522         static_assert(alignof(T) == alignof(Value), "");
523 
524         const auto scope_count = fValueStack.size() - scope_start,
525                          count = scope_count / (sizeof(T) / sizeof(Value));
526         SkASSERT(scope_count % (sizeof(T) / sizeof(Value)) == 0);
527 
528         const auto* begin = reinterpret_cast<const T*>(fValueStack.data() + scope_start);
529 
530         // Restore the previous scope index from saved placeholder value,
531         // and instantiate as a vector of values in scope.
532         auto& placeholder = fValueStack[scope_start - 1];
533         fScopeIndex = *static_cast<RawValue<intptr_t>&>(placeholder);
534         placeholder = VectorT(begin, count, fAlloc);
535 
536         // Drop the (consumed) values in scope.
537         fValueStack.resize(scope_start);
538     }
539 
pushObjectScope()540     void pushObjectScope() {
541         // Save a scope index now, and then later we'll overwrite this value as the Object itself.
542         fValueStack.push_back(RawValue<intptr_t>(fScopeIndex));
543 
544         // New object scope.
545         fScopeIndex = SkTo<intptr_t>(fValueStack.size());
546     }
547 
popObjectScope()548     void popObjectScope() {
549         SkASSERT(this->inObjectScope());
550         this->popScopeAsVec<ObjectValue>(SkTo<size_t>(fScopeIndex));
551 
552         SkDEBUGCODE(
553             const auto& obj = fValueStack.back().as<ObjectValue>();
554             SkASSERT(obj.is<ObjectValue>());
555             for (const auto& member : obj) {
556                 SkASSERT(member.fKey.is<StringValue>());
557             }
558         )
559     }
560 
pushArrayScope()561     void pushArrayScope() {
562         // Save a scope index now, and then later we'll overwrite this value as the Array itself.
563         fValueStack.push_back(RawValue<intptr_t>(fScopeIndex));
564 
565         // New array scope.
566         fScopeIndex = -SkTo<intptr_t>(fValueStack.size());
567     }
568 
popArrayScope()569     void popArrayScope() {
570         SkASSERT(this->inArrayScope());
571         this->popScopeAsVec<ArrayValue>(SkTo<size_t>(-fScopeIndex));
572 
573         SkDEBUGCODE(
574             const auto& arr = fValueStack.back().as<ArrayValue>();
575             SkASSERT(arr.is<ArrayValue>());
576         )
577     }
578 
pushObjectKey(const char * key,size_t size,const char * eos)579     void pushObjectKey(const char* key, size_t size, const char* eos) {
580         SkASSERT(this->inObjectScope());
581         SkASSERT(fValueStack.size() >= SkTo<size_t>(fScopeIndex));
582         SkASSERT(!((fValueStack.size() - SkTo<size_t>(fScopeIndex)) & 1));
583         this->pushString(key, size, eos);
584     }
585 
pushTrue()586     void pushTrue() {
587         fValueStack.push_back(BoolValue(true));
588     }
589 
pushFalse()590     void pushFalse() {
591         fValueStack.push_back(BoolValue(false));
592     }
593 
pushNull()594     void pushNull() {
595         fValueStack.push_back(NullValue());
596     }
597 
pushString(const char * s,size_t size,const char * eos)598     void pushString(const char* s, size_t size, const char* eos) {
599         fValueStack.push_back(FastString(s, size, eos, fAlloc));
600     }
601 
pushInt32(int32_t i)602     void pushInt32(int32_t i) {
603         fValueStack.push_back(NumberValue(i));
604     }
605 
pushFloat(float f)606     void pushFloat(float f) {
607         fValueStack.push_back(NumberValue(f));
608     }
609 
610     template <typename T>
error(T && ret_val,const char * p,const char * msg)611     T error(T&& ret_val, const char* p, const char* msg) {
612 #if defined(SK_JSON_REPORT_ERRORS)
613         fErrorToken = p;
614         fErrorMessage.set(msg);
615 #endif
616         return ret_val;
617     }
618 
matchTrue(const char * p)619     const char* matchTrue(const char* p) {
620         SkASSERT(p[0] == 't');
621 
622         if (p[1] == 'r' && p[2] == 'u' && p[3] == 'e') {
623             this->pushTrue();
624             return p + 4;
625         }
626 
627         return this->error(nullptr, p, "invalid token");
628     }
629 
matchFalse(const char * p)630     const char* matchFalse(const char* p) {
631         SkASSERT(p[0] == 'f');
632 
633         if (p[1] == 'a' && p[2] == 'l' && p[3] == 's' && p[4] == 'e') {
634             this->pushFalse();
635             return p + 5;
636         }
637 
638         return this->error(nullptr, p, "invalid token");
639     }
640 
matchNull(const char * p)641     const char* matchNull(const char* p) {
642         SkASSERT(p[0] == 'n');
643 
644         if (p[1] == 'u' && p[2] == 'l' && p[3] == 'l') {
645             this->pushNull();
646             return p + 4;
647         }
648 
649         return this->error(nullptr, p, "invalid token");
650     }
651 
unescapeString(const char * begin,const char * end)652     const std::vector<char>* unescapeString(const char* begin, const char* end) {
653         fUnescapeBuffer.clear();
654 
655         for (const auto* p = begin; p != end; ++p) {
656             if (*p != '\\') {
657                 fUnescapeBuffer.push_back(*p);
658                 continue;
659             }
660 
661             if (++p == end) {
662                 return nullptr;
663             }
664 
665             switch (*p) {
666             case  '"': fUnescapeBuffer.push_back( '"'); break;
667             case '\\': fUnescapeBuffer.push_back('\\'); break;
668             case  '/': fUnescapeBuffer.push_back( '/'); break;
669             case  'b': fUnescapeBuffer.push_back('\b'); break;
670             case  'f': fUnescapeBuffer.push_back('\f'); break;
671             case  'n': fUnescapeBuffer.push_back('\n'); break;
672             case  'r': fUnescapeBuffer.push_back('\r'); break;
673             case  't': fUnescapeBuffer.push_back('\t'); break;
674             case  'u': {
675                 if (p + 4 >= end) {
676                     return nullptr;
677                 }
678 
679                 uint32_t hexed;
680                 const char hex_str[] = {p[1], p[2], p[3], p[4], '\0'};
681                 const auto* eos = SkParse::FindHex(hex_str, &hexed);
682                 if (!eos || *eos) {
683                     return nullptr;
684                 }
685 
686                 char utf8[SkUTF::kMaxBytesInUTF8Sequence];
687                 const auto utf8_len = SkUTF::ToUTF8(SkTo<SkUnichar>(hexed), utf8);
688                 fUnescapeBuffer.insert(fUnescapeBuffer.end(), utf8, utf8 + utf8_len);
689                 p += 4;
690             } break;
691             default: return nullptr;
692             }
693         }
694 
695         return &fUnescapeBuffer;
696     }
697 
698     template <typename MatchFunc>
matchString(const char * p,const char * p_stop,MatchFunc && func)699     const char* matchString(const char* p, const char* p_stop, MatchFunc&& func) {
700         SkASSERT(*p == '"');
701         const auto* s_begin = p + 1;
702         bool requires_unescape = false;
703 
704         do {
705             // Consume string chars.
706             // This is the fast path, and hopefully we only hit it once then quick-exit below.
707             for (p = p + 1; !is_eostring(*p); ++p);
708 
709             if (*p == '"') {
710                 // Valid string found.
711                 if (!requires_unescape) {
712                     func(s_begin, p - s_begin, p_stop);
713                 } else {
714                     // Slow unescape.  We could avoid this extra copy with some effort,
715                     // but in practice escaped strings should be rare.
716                     const auto* buf = this->unescapeString(s_begin, p);
717                     if (!buf) {
718                         break;
719                     }
720 
721                     SkASSERT(!buf->empty());
722                     func(buf->data(), buf->size(), buf->data() + buf->size() - 1);
723                 }
724                 return p + 1;
725             }
726 
727             if (*p == '\\') {
728                 requires_unescape = true;
729                 ++p;
730                 continue;
731             }
732 
733             // End-of-scope chars are special: we use them to tag the end of the input.
734             // Thus they cannot be consumed indiscriminately -- we need to check if we hit the
735             // end of the input.  To that effect, we treat them as string terminators above,
736             // then we catch them here.
737             if (is_eoscope(*p)) {
738                 continue;
739             }
740 
741             // Invalid/unexpected char.
742             break;
743         } while (p != p_stop);
744 
745         // Premature end-of-input, or illegal string char.
746         return this->error(nullptr, s_begin - 1, "invalid string");
747     }
748 
matchFastFloatDecimalPart(const char * p,int sign,float f,int exp)749     const char* matchFastFloatDecimalPart(const char* p, int sign, float f, int exp) {
750         SkASSERT(exp <= 0);
751 
752         for (;;) {
753             if (!is_digit(*p)) break;
754             f = f * 10.f + (*p++ - '0'); --exp;
755             if (!is_digit(*p)) break;
756             f = f * 10.f + (*p++ - '0'); --exp;
757         }
758 
759         const auto decimal_scale = pow10(exp);
760         if (is_numeric(*p) || !decimal_scale) {
761             SkASSERT((*p == '.' || *p == 'e' || *p == 'E') || !decimal_scale);
762             // Malformed input, or an (unsupported) exponent, or a collapsed decimal factor.
763             return nullptr;
764         }
765 
766         this->pushFloat(sign * f * decimal_scale);
767 
768         return p;
769     }
770 
matchFastFloatPart(const char * p,int sign,float f)771     const char* matchFastFloatPart(const char* p, int sign, float f) {
772         for (;;) {
773             if (!is_digit(*p)) break;
774             f = f * 10.f + (*p++ - '0');
775             if (!is_digit(*p)) break;
776             f = f * 10.f + (*p++ - '0');
777         }
778 
779         if (!is_numeric(*p)) {
780             // Matched (integral) float.
781             this->pushFloat(sign * f);
782             return p;
783         }
784 
785         return (*p == '.') ? this->matchFastFloatDecimalPart(p + 1, sign, f, 0)
786                            : nullptr;
787     }
788 
matchFast32OrFloat(const char * p)789     const char* matchFast32OrFloat(const char* p) {
790         int sign = 1;
791         if (*p == '-') {
792             sign = -1;
793             ++p;
794         }
795 
796         const auto* digits_start = p;
797 
798         int32_t n32 = 0;
799 
800         // This is the largest absolute int32 value we can handle before
801         // risking overflow *on the next digit* (214748363).
802         static constexpr int32_t kMaxInt32 = (std::numeric_limits<int32_t>::max() - 9) / 10;
803 
804         if (is_digit(*p)) {
805             n32 = (*p++ - '0');
806             for (;;) {
807                 if (!is_digit(*p) || n32 > kMaxInt32) break;
808                 n32 = n32 * 10 + (*p++ - '0');
809             }
810         }
811 
812         if (!is_numeric(*p)) {
813             // Did we actually match any digits?
814             if (p > digits_start) {
815                 this->pushInt32(sign * n32);
816                 return p;
817             }
818             return nullptr;
819         }
820 
821         if (*p == '.') {
822             const auto* decimals_start = ++p;
823 
824             int exp = 0;
825 
826             for (;;) {
827                 if (!is_digit(*p) || n32 > kMaxInt32) break;
828                 n32 = n32 * 10 + (*p++ - '0'); --exp;
829                 if (!is_digit(*p) || n32 > kMaxInt32) break;
830                 n32 = n32 * 10 + (*p++ - '0'); --exp;
831             }
832 
833             if (!is_numeric(*p)) {
834                 // Did we actually match any digits?
835                 if (p > decimals_start) {
836                     this->pushFloat(sign * n32 * pow10(exp));
837                     return p;
838                 }
839                 return nullptr;
840             }
841 
842             if (n32 > kMaxInt32) {
843                 // we ran out on n32 bits
844                 return this->matchFastFloatDecimalPart(p, sign, n32, exp);
845             }
846         }
847 
848         return this->matchFastFloatPart(p, sign, n32);
849     }
850 
matchNumber(const char * p)851     const char* matchNumber(const char* p) {
852         if (const auto* fast = this->matchFast32OrFloat(p)) return fast;
853 
854         // slow fallback
855         char* matched;
856         float f = strtof(p, &matched);
857         if (matched > p) {
858             this->pushFloat(f);
859             return matched;
860         }
861         return this->error(nullptr, p, "invalid numeric token");
862     }
863 };
864 
Write(const Value & v,SkWStream * stream)865 void Write(const Value& v, SkWStream* stream) {
866     switch (v.getType()) {
867     case Value::Type::kNull:
868         stream->writeText("null");
869         break;
870     case Value::Type::kBool:
871         stream->writeText(*v.as<BoolValue>() ? "true" : "false");
872         break;
873     case Value::Type::kNumber:
874         stream->writeScalarAsText(*v.as<NumberValue>());
875         break;
876     case Value::Type::kString:
877         stream->writeText("\"");
878         stream->writeText(v.as<StringValue>().begin());
879         stream->writeText("\"");
880         break;
881     case Value::Type::kArray: {
882         const auto& array = v.as<ArrayValue>();
883         stream->writeText("[");
884         bool first_value = true;
885         for (const auto& entry : array) {
886             if (!first_value) stream->writeText(",");
887             Write(entry, stream);
888             first_value = false;
889         }
890         stream->writeText("]");
891         break;
892     }
893     case Value::Type::kObject:
894         const auto& object = v.as<ObjectValue>();
895         stream->writeText("{");
896         bool first_member = true;
897         for (const auto& member : object) {
898             SkASSERT(member.fKey.getType() == Value::Type::kString);
899             if (!first_member) stream->writeText(",");
900             Write(member.fKey, stream);
901             stream->writeText(":");
902             Write(member.fValue, stream);
903             first_member = false;
904         }
905         stream->writeText("}");
906         break;
907     }
908 }
909 
910 } // namespace
911 
toString() const912 SkString Value::toString() const {
913     SkDynamicMemoryWStream wstream;
914     Write(*this, &wstream);
915     const auto data = wstream.detachAsData();
916     // TODO: is there a better way to pass data around without copying?
917     return SkString(static_cast<const char*>(data->data()), data->size());
918 }
919 
920 static constexpr size_t kMinChunkSize = 4096;
921 
DOM(const char * data,size_t size)922 DOM::DOM(const char* data, size_t size)
923     : fAlloc(kMinChunkSize) {
924     DOMParser parser(fAlloc);
925 
926     fRoot = parser.parse(data, size);
927 }
928 
write(SkWStream * stream) const929 void DOM::write(SkWStream* stream) const {
930     Write(fRoot, stream);
931 }
932 
933 } // namespace skjson
934