1 /*
2 * Copyright 2018 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8 #include "src/utils/SkJSON.h"
9
10 #include "include/core/SkStream.h"
11 #include "include/core/SkString.h"
12 #include "include/private/SkMalloc.h"
13 #include "include/utils/SkParse.h"
14 #include "src/utils/SkUTF.h"
15
16 #include <cmath>
17 #include <tuple>
18 #include <vector>
19
20 namespace skjson {
21
22 // #define SK_JSON_REPORT_ERRORS
23
24 static_assert( sizeof(Value) == 8, "");
25 static_assert(alignof(Value) == 8, "");
26
27 static constexpr size_t kRecAlign = alignof(Value);
28
init_tagged(Tag t)29 void Value::init_tagged(Tag t) {
30 memset(fData8, 0, sizeof(fData8));
31 fData8[0] = SkTo<uint8_t>(t);
32 SkASSERT(this->getTag() == t);
33 }
34
35 // Pointer values store a type (in the lower kTagBits bits) and a pointer.
init_tagged_pointer(Tag t,void * p)36 void Value::init_tagged_pointer(Tag t, void* p) {
37 if (sizeof(Value) == sizeof(uintptr_t)) {
38 *this->cast<uintptr_t>() = reinterpret_cast<uintptr_t>(p);
39 // For 64-bit, we rely on the pointer lower bits being zero.
40 SkASSERT(!(fData8[0] & kTagMask));
41 fData8[0] |= SkTo<uint8_t>(t);
42 } else {
43 // For 32-bit, we store the pointer in the upper word
44 SkASSERT(sizeof(Value) == sizeof(uintptr_t) * 2);
45 this->init_tagged(t);
46 *this->cast<uintptr_t>() = reinterpret_cast<uintptr_t>(p);
47 }
48
49 SkASSERT(this->getTag() == t);
50 SkASSERT(this->ptr<void>() == p);
51 }
52
NullValue()53 NullValue::NullValue() {
54 this->init_tagged(Tag::kNull);
55 SkASSERT(this->getTag() == Tag::kNull);
56 }
57
BoolValue(bool b)58 BoolValue::BoolValue(bool b) {
59 this->init_tagged(Tag::kBool);
60 *this->cast<bool>() = b;
61 SkASSERT(this->getTag() == Tag::kBool);
62 }
63
NumberValue(int32_t i)64 NumberValue::NumberValue(int32_t i) {
65 this->init_tagged(Tag::kInt);
66 *this->cast<int32_t>() = i;
67 SkASSERT(this->getTag() == Tag::kInt);
68 }
69
NumberValue(float f)70 NumberValue::NumberValue(float f) {
71 this->init_tagged(Tag::kFloat);
72 *this->cast<float>() = f;
73 SkASSERT(this->getTag() == Tag::kFloat);
74 }
75
76 // Vector recs point to externally allocated slabs with the following layout:
77 //
78 // [size_t n] [REC_0] ... [REC_n-1] [optional extra trailing storage]
79 //
80 // Long strings use extra_alloc_size == 1 to store the \0 terminator.
81 //
82 template <typename T, size_t extra_alloc_size = 0>
MakeVector(const void * src,size_t size,SkArenaAlloc & alloc)83 static void* MakeVector(const void* src, size_t size, SkArenaAlloc& alloc) {
84 // The Ts are already in memory, so their size should be safe.
85 const auto total_size = sizeof(size_t) + size * sizeof(T) + extra_alloc_size;
86 auto* size_ptr = reinterpret_cast<size_t*>(alloc.makeBytesAlignedTo(total_size, kRecAlign));
87
88 *size_ptr = size;
89 sk_careful_memcpy(size_ptr + 1, src, size * sizeof(T));
90
91 return size_ptr;
92 }
93
ArrayValue(const Value * src,size_t size,SkArenaAlloc & alloc)94 ArrayValue::ArrayValue(const Value* src, size_t size, SkArenaAlloc& alloc) {
95 this->init_tagged_pointer(Tag::kArray, MakeVector<Value>(src, size, alloc));
96 SkASSERT(this->getTag() == Tag::kArray);
97 }
98
99 // Strings have two flavors:
100 //
101 // -- short strings (len <= 7) -> these are stored inline, in the record
102 // (one byte reserved for null terminator/type):
103 //
104 // [str] [\0]|[max_len - actual_len]
105 //
106 // Storing [max_len - actual_len] allows the 'len' field to double-up as a
107 // null terminator when size == max_len (this works 'cause kShortString == 0).
108 //
109 // -- long strings (len > 7) -> these are externally allocated vectors (VectorRec<char>).
110 //
111 // The string data plus a null-char terminator are copied over.
112 //
113 namespace {
114
115 // An internal string builder with a fast 8 byte short string load path
116 // (for the common case where the string is not at the end of the stream).
117 class FastString final : public Value {
118 public:
FastString(const char * src,size_t size,const char * eos,SkArenaAlloc & alloc)119 FastString(const char* src, size_t size, const char* eos, SkArenaAlloc& alloc) {
120 SkASSERT(src <= eos);
121
122 if (size > kMaxInlineStringSize) {
123 this->initLongString(src, size, alloc);
124 SkASSERT(this->getTag() == Tag::kString);
125 return;
126 }
127
128 // initFastShortString is faster (doh), but requires access to 6 chars past src.
129 if (src && src + 6 <= eos) {
130 this->initFastShortString(src, size);
131 } else {
132 this->initShortString(src, size);
133 }
134
135 SkASSERT(this->getTag() == Tag::kShortString);
136 }
137
138 private:
139 // first byte reserved for tagging, \0 terminator => 6 usable chars
140 inline static constexpr size_t kMaxInlineStringSize = sizeof(Value) - 2;
141
initLongString(const char * src,size_t size,SkArenaAlloc & alloc)142 void initLongString(const char* src, size_t size, SkArenaAlloc& alloc) {
143 SkASSERT(size > kMaxInlineStringSize);
144
145 this->init_tagged_pointer(Tag::kString, MakeVector<char, 1>(src, size, alloc));
146
147 auto* data = this->cast<VectorValue<char, Value::Type::kString>>()->begin();
148 const_cast<char*>(data)[size] = '\0';
149 }
150
initShortString(const char * src,size_t size)151 void initShortString(const char* src, size_t size) {
152 SkASSERT(size <= kMaxInlineStringSize);
153
154 this->init_tagged(Tag::kShortString);
155 sk_careful_memcpy(this->cast<char>(), src, size);
156 // Null terminator provided by init_tagged() above (fData8 is zero-initialized).
157 }
158
initFastShortString(const char * src,size_t size)159 void initFastShortString(const char* src, size_t size) {
160 SkASSERT(size <= kMaxInlineStringSize);
161
162 uint64_t* s64 = this->cast<uint64_t>();
163
164 // Load 8 chars and mask out the tag and \0 terminator.
165 // Note: we picked kShortString == 0 to avoid setting explicitly below.
166 static_assert(SkToU8(Tag::kShortString) == 0, "please don't break this");
167
168 // Since the first byte is occupied by the tag, we want the string chars [0..5] to land
169 // on bytes [1..6] => the fastest way is to read8 @(src - 1) (always safe, because the
170 // string requires a " prefix at the very least).
171 memcpy(s64, src - 1, 8);
172
173 #if defined(SK_CPU_LENDIAN)
174 // The mask for a max-length string (6), with a leading tag and trailing \0 is
175 // 0x00ffffffffffff00. Accounting for the final left-shift, this becomes
176 // 0x0000ffffffffffff.
177 *s64 &= (0x0000ffffffffffffULL >> ((kMaxInlineStringSize - size) * 8)) // trailing \0s
178 << 8; // tag byte
179 #else
180 static_assert(false, "Big-endian builds are not supported at this time.");
181 #endif
182 }
183 };
184
185 } // namespace
186
StringValue(const char * src,size_t size,SkArenaAlloc & alloc)187 StringValue::StringValue(const char* src, size_t size, SkArenaAlloc& alloc) {
188 new (this) FastString(src, size, src, alloc);
189 }
190
ObjectValue(const Member * src,size_t size,SkArenaAlloc & alloc)191 ObjectValue::ObjectValue(const Member* src, size_t size, SkArenaAlloc& alloc) {
192 this->init_tagged_pointer(Tag::kObject, MakeVector<Member>(src, size, alloc));
193 SkASSERT(this->getTag() == Tag::kObject);
194 }
195
196
197 // Boring public Value glue.
198
inline_strcmp(const char a[],const char b[])199 static int inline_strcmp(const char a[], const char b[]) {
200 for (;;) {
201 char c = *a++;
202 if (c == 0) {
203 break;
204 }
205 if (c != *b++) {
206 return 1;
207 }
208 }
209 return *b != 0;
210 }
211
operator [](const char * key) const212 const Value& ObjectValue::operator[](const char* key) const {
213 // Reverse search for duplicates resolution (policy: return last).
214 const auto* begin = this->begin();
215 const auto* member = this->end();
216
217 while (member > begin) {
218 --member;
219 if (0 == inline_strcmp(key, member->fKey.as<StringValue>().begin())) {
220 return member->fValue;
221 }
222 }
223
224 static const Value g_null = NullValue();
225 return g_null;
226 }
227
228 namespace {
229
230 // Lexer/parser inspired by rapidjson [1], sajson [2] and pjson [3].
231 //
232 // [1] https://github.com/Tencent/rapidjson/
233 // [2] https://github.com/chadaustin/sajson
234 // [3] https://pastebin.com/hnhSTL3h
235
236
237 // bit 0 (0x01) - plain ASCII string character
238 // bit 1 (0x02) - whitespace
239 // bit 2 (0x04) - string terminator (" \\ \0 [control chars] **AND } ]** <- see matchString notes)
240 // bit 3 (0x08) - 0-9
241 // bit 4 (0x10) - 0-9 e E .
242 // bit 5 (0x20) - scope terminator (} ])
243 static constexpr uint8_t g_token_flags[256] = {
244 // 0 1 2 3 4 5 6 7 8 9 A B C D E F
245 4, 4, 4, 4, 4, 4, 4, 4, 4, 6, 6, 4, 4, 6, 4, 4, // 0
246 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, // 1
247 3, 1, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0x11,1, // 2
248 0x19,0x19,0x19,0x19,0x19,0x19,0x19,0x19, 0x19,0x19, 1, 1, 1, 1, 1, 1, // 3
249 1, 1, 1, 1, 1, 0x11,1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 4
250 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4,0x25, 1, 1, // 5
251 1, 1, 1, 1, 1, 0x11,1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 6
252 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,0x25, 1, 1, // 7
253
254 // 128-255
255 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,
256 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,
257 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,
258 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0
259 };
260
is_ws(char c)261 static inline bool is_ws(char c) { return g_token_flags[static_cast<uint8_t>(c)] & 0x02; }
is_eostring(char c)262 static inline bool is_eostring(char c) { return g_token_flags[static_cast<uint8_t>(c)] & 0x04; }
is_digit(char c)263 static inline bool is_digit(char c) { return g_token_flags[static_cast<uint8_t>(c)] & 0x08; }
is_numeric(char c)264 static inline bool is_numeric(char c) { return g_token_flags[static_cast<uint8_t>(c)] & 0x10; }
is_eoscope(char c)265 static inline bool is_eoscope(char c) { return g_token_flags[static_cast<uint8_t>(c)] & 0x20; }
266
skip_ws(const char * p)267 static inline const char* skip_ws(const char* p) {
268 while (is_ws(*p)) ++p;
269 return p;
270 }
271
pow10(int32_t exp)272 static inline float pow10(int32_t exp) {
273 static constexpr float g_pow10_table[63] =
274 {
275 1.e-031f, 1.e-030f, 1.e-029f, 1.e-028f, 1.e-027f, 1.e-026f, 1.e-025f, 1.e-024f,
276 1.e-023f, 1.e-022f, 1.e-021f, 1.e-020f, 1.e-019f, 1.e-018f, 1.e-017f, 1.e-016f,
277 1.e-015f, 1.e-014f, 1.e-013f, 1.e-012f, 1.e-011f, 1.e-010f, 1.e-009f, 1.e-008f,
278 1.e-007f, 1.e-006f, 1.e-005f, 1.e-004f, 1.e-003f, 1.e-002f, 1.e-001f, 1.e+000f,
279 1.e+001f, 1.e+002f, 1.e+003f, 1.e+004f, 1.e+005f, 1.e+006f, 1.e+007f, 1.e+008f,
280 1.e+009f, 1.e+010f, 1.e+011f, 1.e+012f, 1.e+013f, 1.e+014f, 1.e+015f, 1.e+016f,
281 1.e+017f, 1.e+018f, 1.e+019f, 1.e+020f, 1.e+021f, 1.e+022f, 1.e+023f, 1.e+024f,
282 1.e+025f, 1.e+026f, 1.e+027f, 1.e+028f, 1.e+029f, 1.e+030f, 1.e+031f
283 };
284
285 static constexpr int32_t k_exp_offset = SK_ARRAY_COUNT(g_pow10_table) / 2;
286
287 // We only support negative exponents for now.
288 SkASSERT(exp <= 0);
289
290 return (exp >= -k_exp_offset) ? g_pow10_table[exp + k_exp_offset]
291 : std::pow(10.0f, static_cast<float>(exp));
292 }
293
294 class DOMParser {
295 public:
DOMParser(SkArenaAlloc & alloc)296 explicit DOMParser(SkArenaAlloc& alloc)
297 : fAlloc(alloc) {
298 fValueStack.reserve(kValueStackReserve);
299 fUnescapeBuffer.reserve(kUnescapeBufferReserve);
300 }
301
parse(const char * p,size_t size)302 Value parse(const char* p, size_t size) {
303 if (!size) {
304 return this->error(NullValue(), p, "invalid empty input");
305 }
306
307 const char* p_stop = p + size - 1;
308
309 // We're only checking for end-of-stream on object/array close('}',']'),
310 // so we must trim any whitespace from the buffer tail.
311 while (p_stop > p && is_ws(*p_stop)) --p_stop;
312
313 SkASSERT(p_stop >= p && p_stop < p + size);
314 if (!is_eoscope(*p_stop)) {
315 return this->error(NullValue(), p_stop, "invalid top-level value");
316 }
317
318 p = skip_ws(p);
319
320 switch (*p) {
321 case '{':
322 goto match_object;
323 case '[':
324 goto match_array;
325 default:
326 return this->error(NullValue(), p, "invalid top-level value");
327 }
328
329 match_object:
330 SkASSERT(*p == '{');
331 p = skip_ws(p + 1);
332
333 this->pushObjectScope();
334
335 if (*p == '}') goto pop_object;
336
337 // goto match_object_key;
338 match_object_key:
339 p = skip_ws(p);
340 if (*p != '"') return this->error(NullValue(), p, "expected object key");
341
342 p = this->matchString(p, p_stop, [this](const char* key, size_t size, const char* eos) {
343 this->pushObjectKey(key, size, eos);
344 });
345 if (!p) return NullValue();
346
347 p = skip_ws(p);
348 if (*p != ':') return this->error(NullValue(), p, "expected ':' separator");
349
350 ++p;
351
352 // goto match_value;
353 match_value:
354 p = skip_ws(p);
355
356 switch (*p) {
357 case '\0':
358 return this->error(NullValue(), p, "unexpected input end");
359 case '"':
360 p = this->matchString(p, p_stop, [this](const char* str, size_t size, const char* eos) {
361 this->pushString(str, size, eos);
362 });
363 break;
364 case '[':
365 goto match_array;
366 case 'f':
367 p = this->matchFalse(p);
368 break;
369 case 'n':
370 p = this->matchNull(p);
371 break;
372 case 't':
373 p = this->matchTrue(p);
374 break;
375 case '{':
376 goto match_object;
377 default:
378 p = this->matchNumber(p);
379 break;
380 }
381
382 if (!p) return NullValue();
383
384 // goto match_post_value;
385 match_post_value:
386 SkASSERT(!this->inTopLevelScope());
387
388 p = skip_ws(p);
389 switch (*p) {
390 case ',':
391 ++p;
392 if (this->inObjectScope()) {
393 goto match_object_key;
394 } else {
395 SkASSERT(this->inArrayScope());
396 goto match_value;
397 }
398 case ']':
399 goto pop_array;
400 case '}':
401 goto pop_object;
402 default:
403 return this->error(NullValue(), p - 1, "unexpected value-trailing token");
404 }
405
406 // unreachable
407 SkASSERT(false);
408
409 pop_object:
410 SkASSERT(*p == '}');
411
412 if (this->inArrayScope()) {
413 return this->error(NullValue(), p, "unexpected object terminator");
414 }
415
416 this->popObjectScope();
417
418 // goto pop_common
419 pop_common:
420 SkASSERT(is_eoscope(*p));
421
422 if (this->inTopLevelScope()) {
423 SkASSERT(fValueStack.size() == 1);
424
425 // Success condition: parsed the top level element and reached the stop token.
426 return p == p_stop
427 ? fValueStack.front()
428 : this->error(NullValue(), p + 1, "trailing root garbage");
429 }
430
431 if (p == p_stop) {
432 return this->error(NullValue(), p, "unexpected end-of-input");
433 }
434
435 ++p;
436
437 goto match_post_value;
438
439 match_array:
440 SkASSERT(*p == '[');
441 p = skip_ws(p + 1);
442
443 this->pushArrayScope();
444
445 if (*p != ']') goto match_value;
446
447 // goto pop_array;
448 pop_array:
449 SkASSERT(*p == ']');
450
451 if (this->inObjectScope()) {
452 return this->error(NullValue(), p, "unexpected array terminator");
453 }
454
455 this->popArrayScope();
456
457 goto pop_common;
458
459 SkASSERT(false);
460 return NullValue();
461 }
462
getError() const463 std::tuple<const char*, const SkString> getError() const {
464 return std::make_tuple(fErrorToken, fErrorMessage);
465 }
466
467 private:
468 SkArenaAlloc& fAlloc;
469
470 // Pending values stack.
471 inline static constexpr size_t kValueStackReserve = 256;
472 std::vector<Value> fValueStack;
473
474 // String unescape buffer.
475 inline static constexpr size_t kUnescapeBufferReserve = 512;
476 std::vector<char> fUnescapeBuffer;
477
478 // Tracks the current object/array scope, as an index into fStack:
479 //
480 // - for objects: fScopeIndex = (index of first value in scope)
481 // - for arrays : fScopeIndex = -(index of first value in scope)
482 //
483 // fScopeIndex == 0 IFF we are at the top level (no current/active scope).
484 intptr_t fScopeIndex = 0;
485
486 // Error reporting.
487 const char* fErrorToken = nullptr;
488 SkString fErrorMessage;
489
inTopLevelScope() const490 bool inTopLevelScope() const { return fScopeIndex == 0; }
inObjectScope() const491 bool inObjectScope() const { return fScopeIndex > 0; }
inArrayScope() const492 bool inArrayScope() const { return fScopeIndex < 0; }
493
494 // Helper for masquerading raw primitive types as Values (bypassing tagging, etc).
495 template <typename T>
496 class RawValue final : public Value {
497 public:
RawValue(T v)498 explicit RawValue(T v) {
499 static_assert(sizeof(T) <= sizeof(Value), "");
500 *this->cast<T>() = v;
501 }
502
operator *() const503 T operator *() const { return *this->cast<T>(); }
504 };
505
506 template <typename VectorT>
popScopeAsVec(size_t scope_start)507 void popScopeAsVec(size_t scope_start) {
508 SkASSERT(scope_start > 0);
509 SkASSERT(scope_start <= fValueStack.size());
510
511 using T = typename VectorT::ValueT;
512 static_assert( sizeof(T) >= sizeof(Value), "");
513 static_assert( sizeof(T) % sizeof(Value) == 0, "");
514 static_assert(alignof(T) == alignof(Value), "");
515
516 const auto scope_count = fValueStack.size() - scope_start,
517 count = scope_count / (sizeof(T) / sizeof(Value));
518 SkASSERT(scope_count % (sizeof(T) / sizeof(Value)) == 0);
519
520 const auto* begin = reinterpret_cast<const T*>(fValueStack.data() + scope_start);
521
522 // Restore the previous scope index from saved placeholder value,
523 // and instantiate as a vector of values in scope.
524 auto& placeholder = fValueStack[scope_start - 1];
525 fScopeIndex = *static_cast<RawValue<intptr_t>&>(placeholder);
526 placeholder = VectorT(begin, count, fAlloc);
527
528 // Drop the (consumed) values in scope.
529 fValueStack.resize(scope_start);
530 }
531
pushObjectScope()532 void pushObjectScope() {
533 // Save a scope index now, and then later we'll overwrite this value as the Object itself.
534 fValueStack.push_back(RawValue<intptr_t>(fScopeIndex));
535
536 // New object scope.
537 fScopeIndex = SkTo<intptr_t>(fValueStack.size());
538 }
539
popObjectScope()540 void popObjectScope() {
541 SkASSERT(this->inObjectScope());
542 this->popScopeAsVec<ObjectValue>(SkTo<size_t>(fScopeIndex));
543
544 SkDEBUGCODE(
545 const auto& obj = fValueStack.back().as<ObjectValue>();
546 SkASSERT(obj.is<ObjectValue>());
547 for (const auto& member : obj) {
548 SkASSERT(member.fKey.is<StringValue>());
549 }
550 )
551 }
552
pushArrayScope()553 void pushArrayScope() {
554 // Save a scope index now, and then later we'll overwrite this value as the Array itself.
555 fValueStack.push_back(RawValue<intptr_t>(fScopeIndex));
556
557 // New array scope.
558 fScopeIndex = -SkTo<intptr_t>(fValueStack.size());
559 }
560
popArrayScope()561 void popArrayScope() {
562 SkASSERT(this->inArrayScope());
563 this->popScopeAsVec<ArrayValue>(SkTo<size_t>(-fScopeIndex));
564
565 SkDEBUGCODE(
566 const auto& arr = fValueStack.back().as<ArrayValue>();
567 SkASSERT(arr.is<ArrayValue>());
568 )
569 }
570
pushObjectKey(const char * key,size_t size,const char * eos)571 void pushObjectKey(const char* key, size_t size, const char* eos) {
572 SkASSERT(this->inObjectScope());
573 SkASSERT(fValueStack.size() >= SkTo<size_t>(fScopeIndex));
574 SkASSERT(!((fValueStack.size() - SkTo<size_t>(fScopeIndex)) & 1));
575 this->pushString(key, size, eos);
576 }
577
pushTrue()578 void pushTrue() {
579 fValueStack.push_back(BoolValue(true));
580 }
581
pushFalse()582 void pushFalse() {
583 fValueStack.push_back(BoolValue(false));
584 }
585
pushNull()586 void pushNull() {
587 fValueStack.push_back(NullValue());
588 }
589
pushString(const char * s,size_t size,const char * eos)590 void pushString(const char* s, size_t size, const char* eos) {
591 fValueStack.push_back(FastString(s, size, eos, fAlloc));
592 }
593
pushInt32(int32_t i)594 void pushInt32(int32_t i) {
595 fValueStack.push_back(NumberValue(i));
596 }
597
pushFloat(float f)598 void pushFloat(float f) {
599 fValueStack.push_back(NumberValue(f));
600 }
601
602 template <typename T>
error(T && ret_val,const char * p,const char * msg)603 T error(T&& ret_val, const char* p, const char* msg) {
604 #if defined(SK_JSON_REPORT_ERRORS)
605 fErrorToken = p;
606 fErrorMessage.set(msg);
607 #endif
608 return ret_val;
609 }
610
matchTrue(const char * p)611 const char* matchTrue(const char* p) {
612 SkASSERT(p[0] == 't');
613
614 if (p[1] == 'r' && p[2] == 'u' && p[3] == 'e') {
615 this->pushTrue();
616 return p + 4;
617 }
618
619 return this->error(nullptr, p, "invalid token");
620 }
621
matchFalse(const char * p)622 const char* matchFalse(const char* p) {
623 SkASSERT(p[0] == 'f');
624
625 if (p[1] == 'a' && p[2] == 'l' && p[3] == 's' && p[4] == 'e') {
626 this->pushFalse();
627 return p + 5;
628 }
629
630 return this->error(nullptr, p, "invalid token");
631 }
632
matchNull(const char * p)633 const char* matchNull(const char* p) {
634 SkASSERT(p[0] == 'n');
635
636 if (p[1] == 'u' && p[2] == 'l' && p[3] == 'l') {
637 this->pushNull();
638 return p + 4;
639 }
640
641 return this->error(nullptr, p, "invalid token");
642 }
643
unescapeString(const char * begin,const char * end)644 const std::vector<char>* unescapeString(const char* begin, const char* end) {
645 fUnescapeBuffer.clear();
646
647 for (const auto* p = begin; p != end; ++p) {
648 if (*p != '\\') {
649 fUnescapeBuffer.push_back(*p);
650 continue;
651 }
652
653 if (++p == end) {
654 return nullptr;
655 }
656
657 switch (*p) {
658 case '"': fUnescapeBuffer.push_back( '"'); break;
659 case '\\': fUnescapeBuffer.push_back('\\'); break;
660 case '/': fUnescapeBuffer.push_back( '/'); break;
661 case 'b': fUnescapeBuffer.push_back('\b'); break;
662 case 'f': fUnescapeBuffer.push_back('\f'); break;
663 case 'n': fUnescapeBuffer.push_back('\n'); break;
664 case 'r': fUnescapeBuffer.push_back('\r'); break;
665 case 't': fUnescapeBuffer.push_back('\t'); break;
666 case 'u': {
667 if (p + 4 >= end) {
668 return nullptr;
669 }
670
671 uint32_t hexed;
672 const char hex_str[] = {p[1], p[2], p[3], p[4], '\0'};
673 const auto* eos = SkParse::FindHex(hex_str, &hexed);
674 if (!eos || *eos) {
675 return nullptr;
676 }
677
678 char utf8[SkUTF::kMaxBytesInUTF8Sequence];
679 const auto utf8_len = SkUTF::ToUTF8(SkTo<SkUnichar>(hexed), utf8);
680 fUnescapeBuffer.insert(fUnescapeBuffer.end(), utf8, utf8 + utf8_len);
681 p += 4;
682 } break;
683 default: return nullptr;
684 }
685 }
686
687 return &fUnescapeBuffer;
688 }
689
690 template <typename MatchFunc>
matchString(const char * p,const char * p_stop,MatchFunc && func)691 const char* matchString(const char* p, const char* p_stop, MatchFunc&& func) {
692 SkASSERT(*p == '"');
693 const auto* s_begin = p + 1;
694 bool requires_unescape = false;
695
696 do {
697 // Consume string chars.
698 // This is the fast path, and hopefully we only hit it once then quick-exit below.
699 for (p = p + 1; !is_eostring(*p); ++p);
700
701 if (*p == '"') {
702 // Valid string found.
703 if (!requires_unescape) {
704 func(s_begin, p - s_begin, p_stop);
705 } else {
706 // Slow unescape. We could avoid this extra copy with some effort,
707 // but in practice escaped strings should be rare.
708 const auto* buf = this->unescapeString(s_begin, p);
709 if (!buf) {
710 break;
711 }
712
713 SkASSERT(!buf->empty());
714 func(buf->data(), buf->size(), buf->data() + buf->size() - 1);
715 }
716 return p + 1;
717 }
718
719 if (*p == '\\') {
720 requires_unescape = true;
721 ++p;
722 continue;
723 }
724
725 // End-of-scope chars are special: we use them to tag the end of the input.
726 // Thus they cannot be consumed indiscriminately -- we need to check if we hit the
727 // end of the input. To that effect, we treat them as string terminators above,
728 // then we catch them here.
729 if (is_eoscope(*p)) {
730 continue;
731 }
732
733 // Invalid/unexpected char.
734 break;
735 } while (p != p_stop);
736
737 // Premature end-of-input, or illegal string char.
738 return this->error(nullptr, s_begin - 1, "invalid string");
739 }
740
matchFastFloatDecimalPart(const char * p,int sign,float f,int exp)741 const char* matchFastFloatDecimalPart(const char* p, int sign, float f, int exp) {
742 SkASSERT(exp <= 0);
743
744 for (;;) {
745 if (!is_digit(*p)) break;
746 f = f * 10.f + (*p++ - '0'); --exp;
747 if (!is_digit(*p)) break;
748 f = f * 10.f + (*p++ - '0'); --exp;
749 }
750
751 const auto decimal_scale = pow10(exp);
752 if (is_numeric(*p) || !decimal_scale) {
753 SkASSERT((*p == '.' || *p == 'e' || *p == 'E') || !decimal_scale);
754 // Malformed input, or an (unsupported) exponent, or a collapsed decimal factor.
755 return nullptr;
756 }
757
758 this->pushFloat(sign * f * decimal_scale);
759
760 return p;
761 }
762
matchFastFloatPart(const char * p,int sign,float f)763 const char* matchFastFloatPart(const char* p, int sign, float f) {
764 for (;;) {
765 if (!is_digit(*p)) break;
766 f = f * 10.f + (*p++ - '0');
767 if (!is_digit(*p)) break;
768 f = f * 10.f + (*p++ - '0');
769 }
770
771 if (!is_numeric(*p)) {
772 // Matched (integral) float.
773 this->pushFloat(sign * f);
774 return p;
775 }
776
777 return (*p == '.') ? this->matchFastFloatDecimalPart(p + 1, sign, f, 0)
778 : nullptr;
779 }
780
matchFast32OrFloat(const char * p)781 const char* matchFast32OrFloat(const char* p) {
782 int sign = 1;
783 if (*p == '-') {
784 sign = -1;
785 ++p;
786 }
787
788 const auto* digits_start = p;
789
790 int32_t n32 = 0;
791
792 // This is the largest absolute int32 value we can handle before
793 // risking overflow *on the next digit* (214748363).
794 static constexpr int32_t kMaxInt32 = (std::numeric_limits<int32_t>::max() - 9) / 10;
795
796 if (is_digit(*p)) {
797 n32 = (*p++ - '0');
798 for (;;) {
799 if (!is_digit(*p) || n32 > kMaxInt32) break;
800 n32 = n32 * 10 + (*p++ - '0');
801 }
802 }
803
804 if (!is_numeric(*p)) {
805 // Did we actually match any digits?
806 if (p > digits_start) {
807 this->pushInt32(sign * n32);
808 return p;
809 }
810 return nullptr;
811 }
812
813 if (*p == '.') {
814 const auto* decimals_start = ++p;
815
816 int exp = 0;
817
818 for (;;) {
819 if (!is_digit(*p) || n32 > kMaxInt32) break;
820 n32 = n32 * 10 + (*p++ - '0'); --exp;
821 if (!is_digit(*p) || n32 > kMaxInt32) break;
822 n32 = n32 * 10 + (*p++ - '0'); --exp;
823 }
824
825 if (!is_numeric(*p)) {
826 // Did we actually match any digits?
827 if (p > decimals_start) {
828 this->pushFloat(sign * n32 * pow10(exp));
829 return p;
830 }
831 return nullptr;
832 }
833
834 if (n32 > kMaxInt32) {
835 // we ran out on n32 bits
836 return this->matchFastFloatDecimalPart(p, sign, n32, exp);
837 }
838 }
839
840 return this->matchFastFloatPart(p, sign, n32);
841 }
842
matchNumber(const char * p)843 const char* matchNumber(const char* p) {
844 if (const auto* fast = this->matchFast32OrFloat(p)) return fast;
845
846 // slow fallback
847 char* matched;
848 float f = strtof(p, &matched);
849 if (matched > p) {
850 this->pushFloat(f);
851 return matched;
852 }
853 return this->error(nullptr, p, "invalid numeric token");
854 }
855 };
856
Write(const Value & v,SkWStream * stream)857 void Write(const Value& v, SkWStream* stream) {
858 switch (v.getType()) {
859 case Value::Type::kNull:
860 stream->writeText("null");
861 break;
862 case Value::Type::kBool:
863 stream->writeText(*v.as<BoolValue>() ? "true" : "false");
864 break;
865 case Value::Type::kNumber:
866 stream->writeScalarAsText(*v.as<NumberValue>());
867 break;
868 case Value::Type::kString:
869 stream->writeText("\"");
870 stream->writeText(v.as<StringValue>().begin());
871 stream->writeText("\"");
872 break;
873 case Value::Type::kArray: {
874 const auto& array = v.as<ArrayValue>();
875 stream->writeText("[");
876 bool first_value = true;
877 for (const auto& entry : array) {
878 if (!first_value) stream->writeText(",");
879 Write(entry, stream);
880 first_value = false;
881 }
882 stream->writeText("]");
883 break;
884 }
885 case Value::Type::kObject:
886 const auto& object = v.as<ObjectValue>();
887 stream->writeText("{");
888 bool first_member = true;
889 for (const auto& member : object) {
890 SkASSERT(member.fKey.getType() == Value::Type::kString);
891 if (!first_member) stream->writeText(",");
892 Write(member.fKey, stream);
893 stream->writeText(":");
894 Write(member.fValue, stream);
895 first_member = false;
896 }
897 stream->writeText("}");
898 break;
899 }
900 }
901
902 } // namespace
903
toString() const904 SkString Value::toString() const {
905 SkDynamicMemoryWStream wstream;
906 Write(*this, &wstream);
907 const auto data = wstream.detachAsData();
908 // TODO: is there a better way to pass data around without copying?
909 return SkString(static_cast<const char*>(data->data()), data->size());
910 }
911
912 static constexpr size_t kMinChunkSize = 4096;
913
DOM(const char * data,size_t size)914 DOM::DOM(const char* data, size_t size)
915 : fAlloc(kMinChunkSize) {
916 DOMParser parser(fAlloc);
917
918 fRoot = parser.parse(data, size);
919 }
920
write(SkWStream * stream) const921 void DOM::write(SkWStream* stream) const {
922 Write(fRoot, stream);
923 }
924
925 } // namespace skjson
926