1 /*
2 * Copyright 2018 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8 #include "src/utils/SkJSON.h"
9
10 #include "include/core/SkData.h"
11 #include "include/core/SkRefCnt.h"
12 #include "include/core/SkStream.h"
13 #include "include/core/SkString.h"
14 #include "include/private/base/SkMalloc.h"
15 #include "include/private/base/SkDebug.h"
16 #include "include/private/base/SkTo.h"
17 #include "include/utils/SkParse.h"
18 #include "src/base/SkUTF.h"
19
20 #include <cmath>
21 #include <cstdint>
22 #include <cstdlib>
23 #include <limits>
24 #include <new>
25 #include <tuple>
26 #include <vector>
27
28 namespace skjson {
29
30 // #define SK_JSON_REPORT_ERRORS
31
32 static_assert( sizeof(Value) == 8, "");
33 static_assert(alignof(Value) == 8, "");
34
35 static constexpr size_t kRecAlign = alignof(Value);
36
init_tagged(Tag t)37 void Value::init_tagged(Tag t) {
38 memset(fData8, 0, sizeof(fData8));
39 fData8[0] = SkTo<uint8_t>(t);
40 SkASSERT(this->getTag() == t);
41 }
42
43 // Pointer values store a type (in the lower kTagBits bits) and a pointer.
init_tagged_pointer(Tag t,void * p)44 void Value::init_tagged_pointer(Tag t, void* p) {
45 if (sizeof(Value) == sizeof(uintptr_t)) {
46 *this->cast<uintptr_t>() = reinterpret_cast<uintptr_t>(p);
47 // For 64-bit, we rely on the pointer lower bits being zero.
48 SkASSERT(!(fData8[0] & kTagMask));
49 fData8[0] |= SkTo<uint8_t>(t);
50 } else {
51 // For 32-bit, we store the pointer in the upper word
52 SkASSERT(sizeof(Value) == sizeof(uintptr_t) * 2);
53 this->init_tagged(t);
54 *this->cast<uintptr_t>() = reinterpret_cast<uintptr_t>(p);
55 }
56
57 SkASSERT(this->getTag() == t);
58 SkASSERT(this->ptr<void>() == p);
59 }
60
NullValue()61 NullValue::NullValue() {
62 this->init_tagged(Tag::kNull);
63 SkASSERT(this->getTag() == Tag::kNull);
64 }
65
BoolValue(bool b)66 BoolValue::BoolValue(bool b) {
67 this->init_tagged(Tag::kBool);
68 *this->cast<bool>() = b;
69 SkASSERT(this->getTag() == Tag::kBool);
70 }
71
NumberValue(int32_t i)72 NumberValue::NumberValue(int32_t i) {
73 this->init_tagged(Tag::kInt);
74 *this->cast<int32_t>() = i;
75 SkASSERT(this->getTag() == Tag::kInt);
76 }
77
NumberValue(float f)78 NumberValue::NumberValue(float f) {
79 this->init_tagged(Tag::kFloat);
80 *this->cast<float>() = f;
81 SkASSERT(this->getTag() == Tag::kFloat);
82 }
83
84 // Vector recs point to externally allocated slabs with the following layout:
85 //
86 // [size_t n] [REC_0] ... [REC_n-1] [optional extra trailing storage]
87 //
88 // Long strings use extra_alloc_size == 1 to store the \0 terminator.
89 //
90 template <typename T, size_t extra_alloc_size = 0>
MakeVector(const void * src,size_t size,SkArenaAlloc & alloc)91 static void* MakeVector(const void* src, size_t size, SkArenaAlloc& alloc) {
92 // The Ts are already in memory, so their size should be safe.
93 const auto total_size = sizeof(size_t) + size * sizeof(T) + extra_alloc_size;
94 auto* size_ptr = reinterpret_cast<size_t*>(alloc.makeBytesAlignedTo(total_size, kRecAlign));
95
96 *size_ptr = size;
97 sk_careful_memcpy(size_ptr + 1, src, size * sizeof(T));
98
99 return size_ptr;
100 }
101
ArrayValue(const Value * src,size_t size,SkArenaAlloc & alloc)102 ArrayValue::ArrayValue(const Value* src, size_t size, SkArenaAlloc& alloc) {
103 this->init_tagged_pointer(Tag::kArray, MakeVector<Value>(src, size, alloc));
104 SkASSERT(this->getTag() == Tag::kArray);
105 }
106
107 // Strings have two flavors:
108 //
109 // -- short strings (len <= 7) -> these are stored inline, in the record
110 // (one byte reserved for null terminator/type):
111 //
112 // [str] [\0]|[max_len - actual_len]
113 //
114 // Storing [max_len - actual_len] allows the 'len' field to double-up as a
115 // null terminator when size == max_len (this works 'cause kShortString == 0).
116 //
117 // -- long strings (len > 7) -> these are externally allocated vectors (VectorRec<char>).
118 //
119 // The string data plus a null-char terminator are copied over.
120 //
121 namespace {
122
123 // An internal string builder with a fast 8 byte short string load path
124 // (for the common case where the string is not at the end of the stream).
125 class FastString final : public Value {
126 public:
FastString(const char * src,size_t size,const char * eos,SkArenaAlloc & alloc)127 FastString(const char* src, size_t size, const char* eos, SkArenaAlloc& alloc) {
128 SkASSERT(src <= eos);
129
130 if (size > kMaxInlineStringSize) {
131 this->initLongString(src, size, alloc);
132 SkASSERT(this->getTag() == Tag::kString);
133 return;
134 }
135
136 // initFastShortString is faster (doh), but requires access to 6 chars past src.
137 if (src && src + 6 <= eos) {
138 this->initFastShortString(src, size);
139 } else {
140 this->initShortString(src, size);
141 }
142
143 SkASSERT(this->getTag() == Tag::kShortString);
144 }
145
146 private:
147 // first byte reserved for tagging, \0 terminator => 6 usable chars
148 inline static constexpr size_t kMaxInlineStringSize = sizeof(Value) - 2;
149
initLongString(const char * src,size_t size,SkArenaAlloc & alloc)150 void initLongString(const char* src, size_t size, SkArenaAlloc& alloc) {
151 SkASSERT(size > kMaxInlineStringSize);
152
153 this->init_tagged_pointer(Tag::kString, MakeVector<char, 1>(src, size, alloc));
154
155 auto* data = this->cast<VectorValue<char, Value::Type::kString>>()->begin();
156 const_cast<char*>(data)[size] = '\0';
157 }
158
initShortString(const char * src,size_t size)159 void initShortString(const char* src, size_t size) {
160 SkASSERT(size <= kMaxInlineStringSize);
161
162 this->init_tagged(Tag::kShortString);
163 sk_careful_memcpy(this->cast<char>(), src, size);
164 // Null terminator provided by init_tagged() above (fData8 is zero-initialized).
165 }
166
initFastShortString(const char * src,size_t size)167 void initFastShortString(const char* src, size_t size) {
168 SkASSERT(size <= kMaxInlineStringSize);
169
170 uint64_t* s64 = this->cast<uint64_t>();
171
172 // Load 8 chars and mask out the tag and \0 terminator.
173 // Note: we picked kShortString == 0 to avoid setting explicitly below.
174 static_assert(SkToU8(Tag::kShortString) == 0, "please don't break this");
175
176 // Since the first byte is occupied by the tag, we want the string chars [0..5] to land
177 // on bytes [1..6] => the fastest way is to read8 @(src - 1) (always safe, because the
178 // string requires a " prefix at the very least).
179 memcpy(s64, src - 1, 8);
180
181 #if defined(SK_CPU_LENDIAN)
182 // The mask for a max-length string (6), with a leading tag and trailing \0 is
183 // 0x00ffffffffffff00. Accounting for the final left-shift, this becomes
184 // 0x0000ffffffffffff.
185 *s64 &= (0x0000ffffffffffffULL >> ((kMaxInlineStringSize - size) * 8)) // trailing \0s
186 << 8; // tag byte
187 #else
188 static_assert(false, "Big-endian builds are not supported at this time.");
189 #endif
190 }
191 };
192
193 } // namespace
194
StringValue(const char * src,size_t size,SkArenaAlloc & alloc)195 StringValue::StringValue(const char* src, size_t size, SkArenaAlloc& alloc) {
196 new (this) FastString(src, size, src, alloc);
197 }
198
ObjectValue(const Member * src,size_t size,SkArenaAlloc & alloc)199 ObjectValue::ObjectValue(const Member* src, size_t size, SkArenaAlloc& alloc) {
200 this->init_tagged_pointer(Tag::kObject, MakeVector<Member>(src, size, alloc));
201 SkASSERT(this->getTag() == Tag::kObject);
202 }
203
204
205 // Boring public Value glue.
206
inline_strcmp(const char a[],const char b[])207 static int inline_strcmp(const char a[], const char b[]) {
208 for (;;) {
209 char c = *a++;
210 if (c == 0) {
211 break;
212 }
213 if (c != *b++) {
214 return 1;
215 }
216 }
217 return *b != 0;
218 }
219
operator [](const char * key) const220 const Value& ObjectValue::operator[](const char* key) const {
221 // Reverse search for duplicates resolution (policy: return last).
222 const auto* begin = this->begin();
223 const auto* member = this->end();
224
225 while (member > begin) {
226 --member;
227 if (0 == inline_strcmp(key, member->fKey.as<StringValue>().begin())) {
228 return member->fValue;
229 }
230 }
231
232 static const Value g_null = NullValue();
233 return g_null;
234 }
235
236 namespace {
237
238 // Lexer/parser inspired by rapidjson [1], sajson [2] and pjson [3].
239 //
240 // [1] https://github.com/Tencent/rapidjson/
241 // [2] https://github.com/chadaustin/sajson
242 // [3] https://pastebin.com/hnhSTL3h
243
244
245 // bit 0 (0x01) - plain ASCII string character
246 // bit 1 (0x02) - whitespace
247 // bit 2 (0x04) - string terminator (" \\ \0 [control chars] **AND } ]** <- see matchString notes)
248 // bit 3 (0x08) - 0-9
249 // bit 4 (0x10) - 0-9 e E .
250 // bit 5 (0x20) - scope terminator (} ])
251 static constexpr uint8_t g_token_flags[256] = {
252 // 0 1 2 3 4 5 6 7 8 9 A B C D E F
253 4, 4, 4, 4, 4, 4, 4, 4, 4, 6, 6, 4, 4, 6, 4, 4, // 0
254 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, // 1
255 3, 1, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0x11,1, // 2
256 0x19,0x19,0x19,0x19,0x19,0x19,0x19,0x19, 0x19,0x19, 1, 1, 1, 1, 1, 1, // 3
257 1, 1, 1, 1, 1, 0x11,1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 4
258 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4,0x25, 1, 1, // 5
259 1, 1, 1, 1, 1, 0x11,1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 6
260 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,0x25, 1, 1, // 7
261
262 // 128-255
263 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,
264 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,
265 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,
266 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0
267 };
268
is_ws(char c)269 static inline bool is_ws(char c) { return g_token_flags[static_cast<uint8_t>(c)] & 0x02; }
is_eostring(char c)270 static inline bool is_eostring(char c) { return g_token_flags[static_cast<uint8_t>(c)] & 0x04; }
is_digit(char c)271 static inline bool is_digit(char c) { return g_token_flags[static_cast<uint8_t>(c)] & 0x08; }
is_numeric(char c)272 static inline bool is_numeric(char c) { return g_token_flags[static_cast<uint8_t>(c)] & 0x10; }
is_eoscope(char c)273 static inline bool is_eoscope(char c) { return g_token_flags[static_cast<uint8_t>(c)] & 0x20; }
274
skip_ws(const char * p)275 static inline const char* skip_ws(const char* p) {
276 while (is_ws(*p)) ++p;
277 return p;
278 }
279
pow10(int32_t exp)280 static inline float pow10(int32_t exp) {
281 static constexpr float g_pow10_table[63] =
282 {
283 1.e-031f, 1.e-030f, 1.e-029f, 1.e-028f, 1.e-027f, 1.e-026f, 1.e-025f, 1.e-024f,
284 1.e-023f, 1.e-022f, 1.e-021f, 1.e-020f, 1.e-019f, 1.e-018f, 1.e-017f, 1.e-016f,
285 1.e-015f, 1.e-014f, 1.e-013f, 1.e-012f, 1.e-011f, 1.e-010f, 1.e-009f, 1.e-008f,
286 1.e-007f, 1.e-006f, 1.e-005f, 1.e-004f, 1.e-003f, 1.e-002f, 1.e-001f, 1.e+000f,
287 1.e+001f, 1.e+002f, 1.e+003f, 1.e+004f, 1.e+005f, 1.e+006f, 1.e+007f, 1.e+008f,
288 1.e+009f, 1.e+010f, 1.e+011f, 1.e+012f, 1.e+013f, 1.e+014f, 1.e+015f, 1.e+016f,
289 1.e+017f, 1.e+018f, 1.e+019f, 1.e+020f, 1.e+021f, 1.e+022f, 1.e+023f, 1.e+024f,
290 1.e+025f, 1.e+026f, 1.e+027f, 1.e+028f, 1.e+029f, 1.e+030f, 1.e+031f
291 };
292
293 static constexpr int32_t k_exp_offset = std::size(g_pow10_table) / 2;
294
295 // We only support negative exponents for now.
296 SkASSERT(exp <= 0);
297
298 return (exp >= -k_exp_offset) ? g_pow10_table[exp + k_exp_offset]
299 : std::pow(10.0f, static_cast<float>(exp));
300 }
301
302 class DOMParser {
303 public:
DOMParser(SkArenaAlloc & alloc)304 explicit DOMParser(SkArenaAlloc& alloc)
305 : fAlloc(alloc) {
306 fValueStack.reserve(kValueStackReserve);
307 fUnescapeBuffer.reserve(kUnescapeBufferReserve);
308 }
309
parse(const char * p,size_t size)310 Value parse(const char* p, size_t size) {
311 if (!size) {
312 return this->error(NullValue(), p, "invalid empty input");
313 }
314
315 const char* p_stop = p + size - 1;
316
317 // We're only checking for end-of-stream on object/array close('}',']'),
318 // so we must trim any whitespace from the buffer tail.
319 while (p_stop > p && is_ws(*p_stop)) --p_stop;
320
321 SkASSERT(p_stop >= p && p_stop < p + size);
322 if (!is_eoscope(*p_stop)) {
323 return this->error(NullValue(), p_stop, "invalid top-level value");
324 }
325
326 p = skip_ws(p);
327
328 switch (*p) {
329 case '{':
330 goto match_object;
331 case '[':
332 goto match_array;
333 default:
334 return this->error(NullValue(), p, "invalid top-level value");
335 }
336
337 match_object:
338 SkASSERT(*p == '{');
339 p = skip_ws(p + 1);
340
341 this->pushObjectScope();
342
343 if (*p == '}') goto pop_object;
344
345 // goto match_object_key;
346 match_object_key:
347 p = skip_ws(p);
348 if (*p != '"') return this->error(NullValue(), p, "expected object key");
349
350 p = this->matchString(p, p_stop, [this](const char* key, size_t size, const char* eos) {
351 this->pushObjectKey(key, size, eos);
352 });
353 if (!p) return NullValue();
354
355 p = skip_ws(p);
356 if (*p != ':') return this->error(NullValue(), p, "expected ':' separator");
357
358 ++p;
359
360 // goto match_value;
361 match_value:
362 p = skip_ws(p);
363
364 switch (*p) {
365 case '\0':
366 return this->error(NullValue(), p, "unexpected input end");
367 case '"':
368 p = this->matchString(p, p_stop, [this](const char* str, size_t size, const char* eos) {
369 this->pushString(str, size, eos);
370 });
371 break;
372 case '[':
373 goto match_array;
374 case 'f':
375 p = this->matchFalse(p);
376 break;
377 case 'n':
378 p = this->matchNull(p);
379 break;
380 case 't':
381 p = this->matchTrue(p);
382 break;
383 case '{':
384 goto match_object;
385 default:
386 p = this->matchNumber(p);
387 break;
388 }
389
390 if (!p) return NullValue();
391
392 // goto match_post_value;
393 match_post_value:
394 SkASSERT(!this->inTopLevelScope());
395
396 p = skip_ws(p);
397 switch (*p) {
398 case ',':
399 ++p;
400 if (this->inObjectScope()) {
401 goto match_object_key;
402 } else {
403 SkASSERT(this->inArrayScope());
404 goto match_value;
405 }
406 case ']':
407 goto pop_array;
408 case '}':
409 goto pop_object;
410 default:
411 return this->error(NullValue(), p - 1, "unexpected value-trailing token");
412 }
413
414 // unreachable
415 SkASSERT(false);
416
417 pop_object:
418 SkASSERT(*p == '}');
419
420 if (this->inArrayScope()) {
421 return this->error(NullValue(), p, "unexpected object terminator");
422 }
423
424 this->popObjectScope();
425
426 // goto pop_common
427 pop_common:
428 SkASSERT(is_eoscope(*p));
429
430 if (this->inTopLevelScope()) {
431 SkASSERT(fValueStack.size() == 1);
432
433 // Success condition: parsed the top level element and reached the stop token.
434 return p == p_stop
435 ? fValueStack.front()
436 : this->error(NullValue(), p + 1, "trailing root garbage");
437 }
438
439 if (p == p_stop) {
440 return this->error(NullValue(), p, "unexpected end-of-input");
441 }
442
443 ++p;
444
445 goto match_post_value;
446
447 match_array:
448 SkASSERT(*p == '[');
449 p = skip_ws(p + 1);
450
451 this->pushArrayScope();
452
453 if (*p != ']') goto match_value;
454
455 // goto pop_array;
456 pop_array:
457 SkASSERT(*p == ']');
458
459 if (this->inObjectScope()) {
460 return this->error(NullValue(), p, "unexpected array terminator");
461 }
462
463 this->popArrayScope();
464
465 goto pop_common;
466
467 SkASSERT(false);
468 return NullValue();
469 }
470
getError() const471 std::tuple<const char*, const SkString> getError() const {
472 return std::make_tuple(fErrorToken, fErrorMessage);
473 }
474
475 private:
476 SkArenaAlloc& fAlloc;
477
478 // Pending values stack.
479 inline static constexpr size_t kValueStackReserve = 256;
480 std::vector<Value> fValueStack;
481
482 // String unescape buffer.
483 inline static constexpr size_t kUnescapeBufferReserve = 512;
484 std::vector<char> fUnescapeBuffer;
485
486 // Tracks the current object/array scope, as an index into fStack:
487 //
488 // - for objects: fScopeIndex = (index of first value in scope)
489 // - for arrays : fScopeIndex = -(index of first value in scope)
490 //
491 // fScopeIndex == 0 IFF we are at the top level (no current/active scope).
492 intptr_t fScopeIndex = 0;
493
494 // Error reporting.
495 const char* fErrorToken = nullptr;
496 SkString fErrorMessage;
497
inTopLevelScope() const498 bool inTopLevelScope() const { return fScopeIndex == 0; }
inObjectScope() const499 bool inObjectScope() const { return fScopeIndex > 0; }
inArrayScope() const500 bool inArrayScope() const { return fScopeIndex < 0; }
501
502 // Helper for masquerading raw primitive types as Values (bypassing tagging, etc).
503 template <typename T>
504 class RawValue final : public Value {
505 public:
RawValue(T v)506 explicit RawValue(T v) {
507 static_assert(sizeof(T) <= sizeof(Value), "");
508 *this->cast<T>() = v;
509 }
510
operator *() const511 T operator *() const { return *this->cast<T>(); }
512 };
513
514 template <typename VectorT>
popScopeAsVec(size_t scope_start)515 void popScopeAsVec(size_t scope_start) {
516 SkASSERT(scope_start > 0);
517 SkASSERT(scope_start <= fValueStack.size());
518
519 using T = typename VectorT::ValueT;
520 static_assert( sizeof(T) >= sizeof(Value), "");
521 static_assert( sizeof(T) % sizeof(Value) == 0, "");
522 static_assert(alignof(T) == alignof(Value), "");
523
524 const auto scope_count = fValueStack.size() - scope_start,
525 count = scope_count / (sizeof(T) / sizeof(Value));
526 SkASSERT(scope_count % (sizeof(T) / sizeof(Value)) == 0);
527
528 const auto* begin = reinterpret_cast<const T*>(fValueStack.data() + scope_start);
529
530 // Restore the previous scope index from saved placeholder value,
531 // and instantiate as a vector of values in scope.
532 auto& placeholder = fValueStack[scope_start - 1];
533 fScopeIndex = *static_cast<RawValue<intptr_t>&>(placeholder);
534 placeholder = VectorT(begin, count, fAlloc);
535
536 // Drop the (consumed) values in scope.
537 fValueStack.resize(scope_start);
538 }
539
pushObjectScope()540 void pushObjectScope() {
541 // Save a scope index now, and then later we'll overwrite this value as the Object itself.
542 fValueStack.push_back(RawValue<intptr_t>(fScopeIndex));
543
544 // New object scope.
545 fScopeIndex = SkTo<intptr_t>(fValueStack.size());
546 }
547
popObjectScope()548 void popObjectScope() {
549 SkASSERT(this->inObjectScope());
550 this->popScopeAsVec<ObjectValue>(SkTo<size_t>(fScopeIndex));
551
552 SkDEBUGCODE(
553 const auto& obj = fValueStack.back().as<ObjectValue>();
554 SkASSERT(obj.is<ObjectValue>());
555 for (const auto& member : obj) {
556 SkASSERT(member.fKey.is<StringValue>());
557 }
558 )
559 }
560
pushArrayScope()561 void pushArrayScope() {
562 // Save a scope index now, and then later we'll overwrite this value as the Array itself.
563 fValueStack.push_back(RawValue<intptr_t>(fScopeIndex));
564
565 // New array scope.
566 fScopeIndex = -SkTo<intptr_t>(fValueStack.size());
567 }
568
popArrayScope()569 void popArrayScope() {
570 SkASSERT(this->inArrayScope());
571 this->popScopeAsVec<ArrayValue>(SkTo<size_t>(-fScopeIndex));
572
573 SkDEBUGCODE(
574 const auto& arr = fValueStack.back().as<ArrayValue>();
575 SkASSERT(arr.is<ArrayValue>());
576 )
577 }
578
pushObjectKey(const char * key,size_t size,const char * eos)579 void pushObjectKey(const char* key, size_t size, const char* eos) {
580 SkASSERT(this->inObjectScope());
581 SkASSERT(fValueStack.size() >= SkTo<size_t>(fScopeIndex));
582 SkASSERT(!((fValueStack.size() - SkTo<size_t>(fScopeIndex)) & 1));
583 this->pushString(key, size, eos);
584 }
585
pushTrue()586 void pushTrue() {
587 fValueStack.push_back(BoolValue(true));
588 }
589
pushFalse()590 void pushFalse() {
591 fValueStack.push_back(BoolValue(false));
592 }
593
pushNull()594 void pushNull() {
595 fValueStack.push_back(NullValue());
596 }
597
pushString(const char * s,size_t size,const char * eos)598 void pushString(const char* s, size_t size, const char* eos) {
599 fValueStack.push_back(FastString(s, size, eos, fAlloc));
600 }
601
pushInt32(int32_t i)602 void pushInt32(int32_t i) {
603 fValueStack.push_back(NumberValue(i));
604 }
605
pushFloat(float f)606 void pushFloat(float f) {
607 fValueStack.push_back(NumberValue(f));
608 }
609
610 template <typename T>
error(T && ret_val,const char * p,const char * msg)611 T error(T&& ret_val, const char* p, const char* msg) {
612 #if defined(SK_JSON_REPORT_ERRORS)
613 fErrorToken = p;
614 fErrorMessage.set(msg);
615 #endif
616 return ret_val;
617 }
618
matchTrue(const char * p)619 const char* matchTrue(const char* p) {
620 SkASSERT(p[0] == 't');
621
622 if (p[1] == 'r' && p[2] == 'u' && p[3] == 'e') {
623 this->pushTrue();
624 return p + 4;
625 }
626
627 return this->error(nullptr, p, "invalid token");
628 }
629
matchFalse(const char * p)630 const char* matchFalse(const char* p) {
631 SkASSERT(p[0] == 'f');
632
633 if (p[1] == 'a' && p[2] == 'l' && p[3] == 's' && p[4] == 'e') {
634 this->pushFalse();
635 return p + 5;
636 }
637
638 return this->error(nullptr, p, "invalid token");
639 }
640
matchNull(const char * p)641 const char* matchNull(const char* p) {
642 SkASSERT(p[0] == 'n');
643
644 if (p[1] == 'u' && p[2] == 'l' && p[3] == 'l') {
645 this->pushNull();
646 return p + 4;
647 }
648
649 return this->error(nullptr, p, "invalid token");
650 }
651
unescapeString(const char * begin,const char * end)652 const std::vector<char>* unescapeString(const char* begin, const char* end) {
653 fUnescapeBuffer.clear();
654
655 for (const auto* p = begin; p != end; ++p) {
656 if (*p != '\\') {
657 fUnescapeBuffer.push_back(*p);
658 continue;
659 }
660
661 if (++p == end) {
662 return nullptr;
663 }
664
665 switch (*p) {
666 case '"': fUnescapeBuffer.push_back( '"'); break;
667 case '\\': fUnescapeBuffer.push_back('\\'); break;
668 case '/': fUnescapeBuffer.push_back( '/'); break;
669 case 'b': fUnescapeBuffer.push_back('\b'); break;
670 case 'f': fUnescapeBuffer.push_back('\f'); break;
671 case 'n': fUnescapeBuffer.push_back('\n'); break;
672 case 'r': fUnescapeBuffer.push_back('\r'); break;
673 case 't': fUnescapeBuffer.push_back('\t'); break;
674 case 'u': {
675 if (p + 4 >= end) {
676 return nullptr;
677 }
678
679 uint32_t hexed;
680 const char hex_str[] = {p[1], p[2], p[3], p[4], '\0'};
681 const auto* eos = SkParse::FindHex(hex_str, &hexed);
682 if (!eos || *eos) {
683 return nullptr;
684 }
685
686 char utf8[SkUTF::kMaxBytesInUTF8Sequence];
687 const auto utf8_len = SkUTF::ToUTF8(SkTo<SkUnichar>(hexed), utf8);
688 fUnescapeBuffer.insert(fUnescapeBuffer.end(), utf8, utf8 + utf8_len);
689 p += 4;
690 } break;
691 default: return nullptr;
692 }
693 }
694
695 return &fUnescapeBuffer;
696 }
697
698 template <typename MatchFunc>
matchString(const char * p,const char * p_stop,MatchFunc && func)699 const char* matchString(const char* p, const char* p_stop, MatchFunc&& func) {
700 SkASSERT(*p == '"');
701 const auto* s_begin = p + 1;
702 bool requires_unescape = false;
703
704 do {
705 // Consume string chars.
706 // This is the fast path, and hopefully we only hit it once then quick-exit below.
707 for (p = p + 1; !is_eostring(*p); ++p);
708
709 if (*p == '"') {
710 // Valid string found.
711 if (!requires_unescape) {
712 func(s_begin, p - s_begin, p_stop);
713 } else {
714 // Slow unescape. We could avoid this extra copy with some effort,
715 // but in practice escaped strings should be rare.
716 const auto* buf = this->unescapeString(s_begin, p);
717 if (!buf) {
718 break;
719 }
720
721 SkASSERT(!buf->empty());
722 func(buf->data(), buf->size(), buf->data() + buf->size() - 1);
723 }
724 return p + 1;
725 }
726
727 if (*p == '\\') {
728 requires_unescape = true;
729 ++p;
730 continue;
731 }
732
733 // End-of-scope chars are special: we use them to tag the end of the input.
734 // Thus they cannot be consumed indiscriminately -- we need to check if we hit the
735 // end of the input. To that effect, we treat them as string terminators above,
736 // then we catch them here.
737 if (is_eoscope(*p)) {
738 continue;
739 }
740
741 // Invalid/unexpected char.
742 break;
743 } while (p != p_stop);
744
745 // Premature end-of-input, or illegal string char.
746 return this->error(nullptr, s_begin - 1, "invalid string");
747 }
748
matchFastFloatDecimalPart(const char * p,int sign,float f,int exp)749 const char* matchFastFloatDecimalPart(const char* p, int sign, float f, int exp) {
750 SkASSERT(exp <= 0);
751
752 for (;;) {
753 if (!is_digit(*p)) break;
754 f = f * 10.f + (*p++ - '0'); --exp;
755 if (!is_digit(*p)) break;
756 f = f * 10.f + (*p++ - '0'); --exp;
757 }
758
759 const auto decimal_scale = pow10(exp);
760 if (is_numeric(*p) || !decimal_scale) {
761 SkASSERT((*p == '.' || *p == 'e' || *p == 'E') || !decimal_scale);
762 // Malformed input, or an (unsupported) exponent, or a collapsed decimal factor.
763 return nullptr;
764 }
765
766 this->pushFloat(sign * f * decimal_scale);
767
768 return p;
769 }
770
matchFastFloatPart(const char * p,int sign,float f)771 const char* matchFastFloatPart(const char* p, int sign, float f) {
772 for (;;) {
773 if (!is_digit(*p)) break;
774 f = f * 10.f + (*p++ - '0');
775 if (!is_digit(*p)) break;
776 f = f * 10.f + (*p++ - '0');
777 }
778
779 if (!is_numeric(*p)) {
780 // Matched (integral) float.
781 this->pushFloat(sign * f);
782 return p;
783 }
784
785 return (*p == '.') ? this->matchFastFloatDecimalPart(p + 1, sign, f, 0)
786 : nullptr;
787 }
788
matchFast32OrFloat(const char * p)789 const char* matchFast32OrFloat(const char* p) {
790 int sign = 1;
791 if (*p == '-') {
792 sign = -1;
793 ++p;
794 }
795
796 const auto* digits_start = p;
797
798 int32_t n32 = 0;
799
800 // This is the largest absolute int32 value we can handle before
801 // risking overflow *on the next digit* (214748363).
802 static constexpr int32_t kMaxInt32 = (std::numeric_limits<int32_t>::max() - 9) / 10;
803
804 if (is_digit(*p)) {
805 n32 = (*p++ - '0');
806 for (;;) {
807 if (!is_digit(*p) || n32 > kMaxInt32) break;
808 n32 = n32 * 10 + (*p++ - '0');
809 }
810 }
811
812 if (!is_numeric(*p)) {
813 // Did we actually match any digits?
814 if (p > digits_start) {
815 this->pushInt32(sign * n32);
816 return p;
817 }
818 return nullptr;
819 }
820
821 if (*p == '.') {
822 const auto* decimals_start = ++p;
823
824 int exp = 0;
825
826 for (;;) {
827 if (!is_digit(*p) || n32 > kMaxInt32) break;
828 n32 = n32 * 10 + (*p++ - '0'); --exp;
829 if (!is_digit(*p) || n32 > kMaxInt32) break;
830 n32 = n32 * 10 + (*p++ - '0'); --exp;
831 }
832
833 if (!is_numeric(*p)) {
834 // Did we actually match any digits?
835 if (p > decimals_start) {
836 this->pushFloat(sign * n32 * pow10(exp));
837 return p;
838 }
839 return nullptr;
840 }
841
842 if (n32 > kMaxInt32) {
843 // we ran out on n32 bits
844 return this->matchFastFloatDecimalPart(p, sign, n32, exp);
845 }
846 }
847
848 return this->matchFastFloatPart(p, sign, n32);
849 }
850
matchNumber(const char * p)851 const char* matchNumber(const char* p) {
852 if (const auto* fast = this->matchFast32OrFloat(p)) return fast;
853
854 // slow fallback
855 char* matched;
856 float f = strtof(p, &matched);
857 if (matched > p) {
858 this->pushFloat(f);
859 return matched;
860 }
861 return this->error(nullptr, p, "invalid numeric token");
862 }
863 };
864
Write(const Value & v,SkWStream * stream)865 void Write(const Value& v, SkWStream* stream) {
866 switch (v.getType()) {
867 case Value::Type::kNull:
868 stream->writeText("null");
869 break;
870 case Value::Type::kBool:
871 stream->writeText(*v.as<BoolValue>() ? "true" : "false");
872 break;
873 case Value::Type::kNumber:
874 stream->writeScalarAsText(*v.as<NumberValue>());
875 break;
876 case Value::Type::kString:
877 stream->writeText("\"");
878 stream->writeText(v.as<StringValue>().begin());
879 stream->writeText("\"");
880 break;
881 case Value::Type::kArray: {
882 const auto& array = v.as<ArrayValue>();
883 stream->writeText("[");
884 bool first_value = true;
885 for (const auto& entry : array) {
886 if (!first_value) stream->writeText(",");
887 Write(entry, stream);
888 first_value = false;
889 }
890 stream->writeText("]");
891 break;
892 }
893 case Value::Type::kObject:
894 const auto& object = v.as<ObjectValue>();
895 stream->writeText("{");
896 bool first_member = true;
897 for (const auto& member : object) {
898 SkASSERT(member.fKey.getType() == Value::Type::kString);
899 if (!first_member) stream->writeText(",");
900 Write(member.fKey, stream);
901 stream->writeText(":");
902 Write(member.fValue, stream);
903 first_member = false;
904 }
905 stream->writeText("}");
906 break;
907 }
908 }
909
910 } // namespace
911
toString() const912 SkString Value::toString() const {
913 SkDynamicMemoryWStream wstream;
914 Write(*this, &wstream);
915 const auto data = wstream.detachAsData();
916 // TODO: is there a better way to pass data around without copying?
917 return SkString(static_cast<const char*>(data->data()), data->size());
918 }
919
920 static constexpr size_t kMinChunkSize = 4096;
921
DOM(const char * data,size_t size)922 DOM::DOM(const char* data, size_t size)
923 : fAlloc(kMinChunkSize) {
924 DOMParser parser(fAlloc);
925
926 fRoot = parser.parse(data, size);
927 }
928
write(SkWStream * stream) const929 void DOM::write(SkWStream* stream) const {
930 Write(fRoot, stream);
931 }
932
933 } // namespace skjson
934