• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2019 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "encoding.h"
6 
7 #include <algorithm>
8 #include <cassert>
9 #include <cmath>
10 #include <cstring>
11 #include <limits>
12 #include <stack>
13 
14 namespace v8_inspector_protocol_encoding {
15 // =============================================================================
16 // Status and Error codes
17 // =============================================================================
18 
ToASCIIString() const19 std::string Status::ToASCIIString() const {
20   switch (error) {
21     case Error::OK:
22       return "OK";
23     case Error::JSON_PARSER_UNPROCESSED_INPUT_REMAINS:
24       return ToASCIIString("JSON: unprocessed input remains");
25     case Error::JSON_PARSER_STACK_LIMIT_EXCEEDED:
26       return ToASCIIString("JSON: stack limit exceeded");
27     case Error::JSON_PARSER_NO_INPUT:
28       return ToASCIIString("JSON: no input");
29     case Error::JSON_PARSER_INVALID_TOKEN:
30       return ToASCIIString("JSON: invalid token");
31     case Error::JSON_PARSER_INVALID_NUMBER:
32       return ToASCIIString("JSON: invalid number");
33     case Error::JSON_PARSER_INVALID_STRING:
34       return ToASCIIString("JSON: invalid string");
35     case Error::JSON_PARSER_UNEXPECTED_ARRAY_END:
36       return ToASCIIString("JSON: unexpected array end");
37     case Error::JSON_PARSER_COMMA_OR_ARRAY_END_EXPECTED:
38       return ToASCIIString("JSON: comma or array end expected");
39     case Error::JSON_PARSER_STRING_LITERAL_EXPECTED:
40       return ToASCIIString("JSON: string literal expected");
41     case Error::JSON_PARSER_COLON_EXPECTED:
42       return ToASCIIString("JSON: colon expected");
43     case Error::JSON_PARSER_UNEXPECTED_MAP_END:
44       return ToASCIIString("JSON: unexpected map end");
45     case Error::JSON_PARSER_COMMA_OR_MAP_END_EXPECTED:
46       return ToASCIIString("JSON: comma or map end expected");
47     case Error::JSON_PARSER_VALUE_EXPECTED:
48       return ToASCIIString("JSON: value expected");
49 
50     case Error::CBOR_INVALID_INT32:
51       return ToASCIIString("CBOR: invalid int32");
52     case Error::CBOR_INVALID_DOUBLE:
53       return ToASCIIString("CBOR: invalid double");
54     case Error::CBOR_INVALID_ENVELOPE:
55       return ToASCIIString("CBOR: invalid envelope");
56     case Error::CBOR_INVALID_STRING8:
57       return ToASCIIString("CBOR: invalid string8");
58     case Error::CBOR_INVALID_STRING16:
59       return ToASCIIString("CBOR: invalid string16");
60     case Error::CBOR_INVALID_BINARY:
61       return ToASCIIString("CBOR: invalid binary");
62     case Error::CBOR_UNSUPPORTED_VALUE:
63       return ToASCIIString("CBOR: unsupported value");
64     case Error::CBOR_NO_INPUT:
65       return ToASCIIString("CBOR: no input");
66     case Error::CBOR_INVALID_START_BYTE:
67       return ToASCIIString("CBOR: invalid start byte");
68     case Error::CBOR_UNEXPECTED_EOF_EXPECTED_VALUE:
69       return ToASCIIString("CBOR: unexpected eof expected value");
70     case Error::CBOR_UNEXPECTED_EOF_IN_ARRAY:
71       return ToASCIIString("CBOR: unexpected eof in array");
72     case Error::CBOR_UNEXPECTED_EOF_IN_MAP:
73       return ToASCIIString("CBOR: unexpected eof in map");
74     case Error::CBOR_INVALID_MAP_KEY:
75       return ToASCIIString("CBOR: invalid map key");
76     case Error::CBOR_STACK_LIMIT_EXCEEDED:
77       return ToASCIIString("CBOR: stack limit exceeded");
78     case Error::CBOR_TRAILING_JUNK:
79       return ToASCIIString("CBOR: trailing junk");
80     case Error::CBOR_MAP_START_EXPECTED:
81       return ToASCIIString("CBOR: map start expected");
82     case Error::CBOR_MAP_STOP_EXPECTED:
83       return ToASCIIString("CBOR: map stop expected");
84     case Error::CBOR_ENVELOPE_SIZE_LIMIT_EXCEEDED:
85       return ToASCIIString("CBOR: envelope size limit exceeded");
86   }
87   // Some compilers can't figure out that we can't get here.
88   return "INVALID ERROR CODE";
89 }
90 
ToASCIIString(const char * msg) const91 std::string Status::ToASCIIString(const char* msg) const {
92   return std::string(msg) + " at position " + std::to_string(pos);
93 }
94 
95 namespace cbor {
96 namespace {
97 // Indicates the number of bits the "initial byte" needs to be shifted to the
98 // right after applying |kMajorTypeMask| to produce the major type in the
99 // lowermost bits.
100 static constexpr uint8_t kMajorTypeBitShift = 5u;
101 // Mask selecting the low-order 5 bits of the "initial byte", which is where
102 // the additional information is encoded.
103 static constexpr uint8_t kAdditionalInformationMask = 0x1f;
104 // Mask selecting the high-order 3 bits of the "initial byte", which indicates
105 // the major type of the encoded value.
106 static constexpr uint8_t kMajorTypeMask = 0xe0;
107 // Indicates the integer is in the following byte.
108 static constexpr uint8_t kAdditionalInformation1Byte = 24u;
109 // Indicates the integer is in the next 2 bytes.
110 static constexpr uint8_t kAdditionalInformation2Bytes = 25u;
111 // Indicates the integer is in the next 4 bytes.
112 static constexpr uint8_t kAdditionalInformation4Bytes = 26u;
113 // Indicates the integer is in the next 8 bytes.
114 static constexpr uint8_t kAdditionalInformation8Bytes = 27u;
115 
116 // Encodes the initial byte, consisting of the |type| in the first 3 bits
117 // followed by 5 bits of |additional_info|.
EncodeInitialByte(MajorType type,uint8_t additional_info)118 constexpr uint8_t EncodeInitialByte(MajorType type, uint8_t additional_info) {
119   return (static_cast<uint8_t>(type) << kMajorTypeBitShift) |
120          (additional_info & kAdditionalInformationMask);
121 }
122 
123 // TAG 24 indicates that what follows is a byte string which is
124 // encoded in CBOR format. We use this as a wrapper for
125 // maps and arrays, allowing us to skip them, because the
126 // byte string carries its size (byte length).
127 // https://tools.ietf.org/html/rfc7049#section-2.4.4.1
128 static constexpr uint8_t kInitialByteForEnvelope =
129     EncodeInitialByte(MajorType::TAG, 24);
130 // The initial byte for a byte string with at most 2^32 bytes
131 // of payload. This is used for envelope encoding, even if
132 // the byte string is shorter.
133 static constexpr uint8_t kInitialByteFor32BitLengthByteString =
134     EncodeInitialByte(MajorType::BYTE_STRING, 26);
135 
136 // See RFC 7049 Section 2.2.1, indefinite length arrays / maps have additional
137 // info = 31.
138 static constexpr uint8_t kInitialByteIndefiniteLengthArray =
139     EncodeInitialByte(MajorType::ARRAY, 31);
140 static constexpr uint8_t kInitialByteIndefiniteLengthMap =
141     EncodeInitialByte(MajorType::MAP, 31);
142 // See RFC 7049 Section 2.3, Table 1; this is used for finishing indefinite
143 // length maps / arrays.
144 static constexpr uint8_t kStopByte =
145     EncodeInitialByte(MajorType::SIMPLE_VALUE, 31);
146 
147 // See RFC 7049 Section 2.3, Table 2.
148 static constexpr uint8_t kEncodedTrue =
149     EncodeInitialByte(MajorType::SIMPLE_VALUE, 21);
150 static constexpr uint8_t kEncodedFalse =
151     EncodeInitialByte(MajorType::SIMPLE_VALUE, 20);
152 static constexpr uint8_t kEncodedNull =
153     EncodeInitialByte(MajorType::SIMPLE_VALUE, 22);
154 static constexpr uint8_t kInitialByteForDouble =
155     EncodeInitialByte(MajorType::SIMPLE_VALUE, 27);
156 
157 // See RFC 7049 Table 3 and Section 2.4.4.2. This is used as a prefix for
158 // arbitrary binary data encoded as BYTE_STRING.
159 static constexpr uint8_t kExpectedConversionToBase64Tag =
160     EncodeInitialByte(MajorType::TAG, 22);
161 
162 // Writes the bytes for |v| to |out|, starting with the most significant byte.
163 // See also: https://commandcenter.blogspot.com/2012/04/byte-order-fallacy.html
164 template <typename T, class C>
WriteBytesMostSignificantByteFirst(T v,C * out)165 void WriteBytesMostSignificantByteFirst(T v, C* out) {
166   for (int shift_bytes = sizeof(T) - 1; shift_bytes >= 0; --shift_bytes)
167     out->push_back(0xff & (v >> (shift_bytes * 8)));
168 }
169 
170 // Extracts sizeof(T) bytes from |in| to extract a value of type T
171 // (e.g. uint64_t, uint32_t, ...), most significant byte first.
172 // See also: https://commandcenter.blogspot.com/2012/04/byte-order-fallacy.html
173 template <typename T>
ReadBytesMostSignificantByteFirst(span<uint8_t> in)174 T ReadBytesMostSignificantByteFirst(span<uint8_t> in) {
175   assert(in.size() >= sizeof(T));
176   T result = 0;
177   for (size_t shift_bytes = 0; shift_bytes < sizeof(T); ++shift_bytes)
178     result |= T(in[sizeof(T) - 1 - shift_bytes]) << (shift_bytes * 8);
179   return result;
180 }
181 }  // namespace
182 
183 namespace internals {
184 // Reads the start of a token with definitive size from |bytes|.
185 // |type| is the major type as specified in RFC 7049 Section 2.1.
186 // |value| is the payload (e.g. for MajorType::UNSIGNED) or is the size
187 // (e.g. for BYTE_STRING).
188 // If successful, returns the number of bytes read. Otherwise returns 0.
ReadTokenStart(span<uint8_t> bytes,MajorType * type,uint64_t * value)189 size_t ReadTokenStart(span<uint8_t> bytes, MajorType* type, uint64_t* value) {
190   if (bytes.empty())
191     return 0;
192   uint8_t initial_byte = bytes[0];
193   *type = MajorType((initial_byte & kMajorTypeMask) >> kMajorTypeBitShift);
194 
195   uint8_t additional_information = initial_byte & kAdditionalInformationMask;
196   if (additional_information < 24) {
197     // Values 0-23 are encoded directly into the additional info of the
198     // initial byte.
199     *value = additional_information;
200     return 1;
201   }
202   if (additional_information == kAdditionalInformation1Byte) {
203     // Values 24-255 are encoded with one initial byte, followed by the value.
204     if (bytes.size() < 2)
205       return 0;
206     *value = ReadBytesMostSignificantByteFirst<uint8_t>(bytes.subspan(1));
207     return 2;
208   }
209   if (additional_information == kAdditionalInformation2Bytes) {
210     // Values 256-65535: 1 initial byte + 2 bytes payload.
211     if (bytes.size() < 1 + sizeof(uint16_t))
212       return 0;
213     *value = ReadBytesMostSignificantByteFirst<uint16_t>(bytes.subspan(1));
214     return 3;
215   }
216   if (additional_information == kAdditionalInformation4Bytes) {
217     // 32 bit uint: 1 initial byte + 4 bytes payload.
218     if (bytes.size() < 1 + sizeof(uint32_t))
219       return 0;
220     *value = ReadBytesMostSignificantByteFirst<uint32_t>(bytes.subspan(1));
221     return 5;
222   }
223   if (additional_information == kAdditionalInformation8Bytes) {
224     // 64 bit uint: 1 initial byte + 8 bytes payload.
225     if (bytes.size() < 1 + sizeof(uint64_t))
226       return 0;
227     *value = ReadBytesMostSignificantByteFirst<uint64_t>(bytes.subspan(1));
228     return 9;
229   }
230   return 0;
231 }
232 
233 // Writes the start of a token with |type|. The |value| may indicate the size,
234 // or it may be the payload if the value is an unsigned integer.
235 template <typename C>
WriteTokenStartTmpl(MajorType type,uint64_t value,C * encoded)236 void WriteTokenStartTmpl(MajorType type, uint64_t value, C* encoded) {
237   if (value < 24) {
238     // Values 0-23 are encoded directly into the additional info of the
239     // initial byte.
240     encoded->push_back(EncodeInitialByte(type, /*additional_info=*/value));
241     return;
242   }
243   if (value <= std::numeric_limits<uint8_t>::max()) {
244     // Values 24-255 are encoded with one initial byte, followed by the value.
245     encoded->push_back(EncodeInitialByte(type, kAdditionalInformation1Byte));
246     encoded->push_back(value);
247     return;
248   }
249   if (value <= std::numeric_limits<uint16_t>::max()) {
250     // Values 256-65535: 1 initial byte + 2 bytes payload.
251     encoded->push_back(EncodeInitialByte(type, kAdditionalInformation2Bytes));
252     WriteBytesMostSignificantByteFirst<uint16_t>(value, encoded);
253     return;
254   }
255   if (value <= std::numeric_limits<uint32_t>::max()) {
256     // 32 bit uint: 1 initial byte + 4 bytes payload.
257     encoded->push_back(EncodeInitialByte(type, kAdditionalInformation4Bytes));
258     WriteBytesMostSignificantByteFirst<uint32_t>(static_cast<uint32_t>(value),
259                                                  encoded);
260     return;
261   }
262   // 64 bit uint: 1 initial byte + 8 bytes payload.
263   encoded->push_back(EncodeInitialByte(type, kAdditionalInformation8Bytes));
264   WriteBytesMostSignificantByteFirst<uint64_t>(value, encoded);
265 }
WriteTokenStart(MajorType type,uint64_t value,std::vector<uint8_t> * encoded)266 void WriteTokenStart(MajorType type,
267                      uint64_t value,
268                      std::vector<uint8_t>* encoded) {
269   WriteTokenStartTmpl(type, value, encoded);
270 }
WriteTokenStart(MajorType type,uint64_t value,std::string * encoded)271 void WriteTokenStart(MajorType type, uint64_t value, std::string* encoded) {
272   WriteTokenStartTmpl(type, value, encoded);
273 }
274 }  // namespace internals
275 
276 // =============================================================================
277 // Detecting CBOR content
278 // =============================================================================
279 
InitialByteForEnvelope()280 uint8_t InitialByteForEnvelope() {
281   return kInitialByteForEnvelope;
282 }
InitialByteFor32BitLengthByteString()283 uint8_t InitialByteFor32BitLengthByteString() {
284   return kInitialByteFor32BitLengthByteString;
285 }
IsCBORMessage(span<uint8_t> msg)286 bool IsCBORMessage(span<uint8_t> msg) {
287   return msg.size() >= 6 && msg[0] == InitialByteForEnvelope() &&
288          msg[1] == InitialByteFor32BitLengthByteString();
289 }
290 
291 // =============================================================================
292 // Encoding invidiual CBOR items
293 // =============================================================================
294 
EncodeTrue()295 uint8_t EncodeTrue() {
296   return kEncodedTrue;
297 }
EncodeFalse()298 uint8_t EncodeFalse() {
299   return kEncodedFalse;
300 }
EncodeNull()301 uint8_t EncodeNull() {
302   return kEncodedNull;
303 }
304 
EncodeIndefiniteLengthArrayStart()305 uint8_t EncodeIndefiniteLengthArrayStart() {
306   return kInitialByteIndefiniteLengthArray;
307 }
308 
EncodeIndefiniteLengthMapStart()309 uint8_t EncodeIndefiniteLengthMapStart() {
310   return kInitialByteIndefiniteLengthMap;
311 }
312 
EncodeStop()313 uint8_t EncodeStop() {
314   return kStopByte;
315 }
316 
317 template <typename C>
EncodeInt32Tmpl(int32_t value,C * out)318 void EncodeInt32Tmpl(int32_t value, C* out) {
319   if (value >= 0) {
320     internals::WriteTokenStart(MajorType::UNSIGNED, value, out);
321   } else {
322     uint64_t representation = static_cast<uint64_t>(-(value + 1));
323     internals::WriteTokenStart(MajorType::NEGATIVE, representation, out);
324   }
325 }
EncodeInt32(int32_t value,std::vector<uint8_t> * out)326 void EncodeInt32(int32_t value, std::vector<uint8_t>* out) {
327   EncodeInt32Tmpl(value, out);
328 }
EncodeInt32(int32_t value,std::string * out)329 void EncodeInt32(int32_t value, std::string* out) {
330   EncodeInt32Tmpl(value, out);
331 }
332 
333 template <typename C>
EncodeString16Tmpl(span<uint16_t> in,C * out)334 void EncodeString16Tmpl(span<uint16_t> in, C* out) {
335   uint64_t byte_length = static_cast<uint64_t>(in.size_bytes());
336   internals::WriteTokenStart(MajorType::BYTE_STRING, byte_length, out);
337   // When emitting UTF16 characters, we always write the least significant byte
338   // first; this is because it's the native representation for X86.
339   // TODO(johannes): Implement a more efficient thing here later, e.g.
340   // casting *iff* the machine has this byte order.
341   // The wire format for UTF16 chars will probably remain the same
342   // (least significant byte first) since this way we can have
343   // golden files, unittests, etc. that port easily and universally.
344   // See also:
345   // https://commandcenter.blogspot.com/2012/04/byte-order-fallacy.html
346   for (const uint16_t two_bytes : in) {
347     out->push_back(two_bytes);
348     out->push_back(two_bytes >> 8);
349   }
350 }
EncodeString16(span<uint16_t> in,std::vector<uint8_t> * out)351 void EncodeString16(span<uint16_t> in, std::vector<uint8_t>* out) {
352   EncodeString16Tmpl(in, out);
353 }
EncodeString16(span<uint16_t> in,std::string * out)354 void EncodeString16(span<uint16_t> in, std::string* out) {
355   EncodeString16Tmpl(in, out);
356 }
357 
358 template <typename C>
EncodeString8Tmpl(span<uint8_t> in,C * out)359 void EncodeString8Tmpl(span<uint8_t> in, C* out) {
360   internals::WriteTokenStart(MajorType::STRING,
361                              static_cast<uint64_t>(in.size_bytes()), out);
362   out->insert(out->end(), in.begin(), in.end());
363 }
EncodeString8(span<uint8_t> in,std::vector<uint8_t> * out)364 void EncodeString8(span<uint8_t> in, std::vector<uint8_t>* out) {
365   EncodeString8Tmpl(in, out);
366 }
EncodeString8(span<uint8_t> in,std::string * out)367 void EncodeString8(span<uint8_t> in, std::string* out) {
368   EncodeString8Tmpl(in, out);
369 }
370 
371 template <typename C>
EncodeFromLatin1Tmpl(span<uint8_t> latin1,C * out)372 void EncodeFromLatin1Tmpl(span<uint8_t> latin1, C* out) {
373   for (size_t ii = 0; ii < latin1.size(); ++ii) {
374     if (latin1[ii] <= 127)
375       continue;
376     // If there's at least one non-ASCII char, convert to UTF8.
377     std::vector<uint8_t> utf8(latin1.begin(), latin1.begin() + ii);
378     for (; ii < latin1.size(); ++ii) {
379       if (latin1[ii] <= 127) {
380         utf8.push_back(latin1[ii]);
381       } else {
382         // 0xC0 means it's a UTF8 sequence with 2 bytes.
383         utf8.push_back((latin1[ii] >> 6) | 0xc0);
384         utf8.push_back((latin1[ii] | 0x80) & 0xbf);
385       }
386     }
387     EncodeString8(SpanFrom(utf8), out);
388     return;
389   }
390   EncodeString8(latin1, out);
391 }
EncodeFromLatin1(span<uint8_t> latin1,std::vector<uint8_t> * out)392 void EncodeFromLatin1(span<uint8_t> latin1, std::vector<uint8_t>* out) {
393   EncodeFromLatin1Tmpl(latin1, out);
394 }
EncodeFromLatin1(span<uint8_t> latin1,std::string * out)395 void EncodeFromLatin1(span<uint8_t> latin1, std::string* out) {
396   EncodeFromLatin1Tmpl(latin1, out);
397 }
398 
399 template <typename C>
EncodeFromUTF16Tmpl(span<uint16_t> utf16,C * out)400 void EncodeFromUTF16Tmpl(span<uint16_t> utf16, C* out) {
401   // If there's at least one non-ASCII char, encode as STRING16 (UTF16).
402   for (uint16_t ch : utf16) {
403     if (ch <= 127)
404       continue;
405     EncodeString16(utf16, out);
406     return;
407   }
408   // It's all US-ASCII, strip out every second byte and encode as UTF8.
409   internals::WriteTokenStart(MajorType::STRING,
410                              static_cast<uint64_t>(utf16.size()), out);
411   out->insert(out->end(), utf16.begin(), utf16.end());
412 }
EncodeFromUTF16(span<uint16_t> utf16,std::vector<uint8_t> * out)413 void EncodeFromUTF16(span<uint16_t> utf16, std::vector<uint8_t>* out) {
414   EncodeFromUTF16Tmpl(utf16, out);
415 }
EncodeFromUTF16(span<uint16_t> utf16,std::string * out)416 void EncodeFromUTF16(span<uint16_t> utf16, std::string* out) {
417   EncodeFromUTF16Tmpl(utf16, out);
418 }
419 
420 template <typename C>
EncodeBinaryTmpl(span<uint8_t> in,C * out)421 void EncodeBinaryTmpl(span<uint8_t> in, C* out) {
422   out->push_back(kExpectedConversionToBase64Tag);
423   uint64_t byte_length = static_cast<uint64_t>(in.size_bytes());
424   internals::WriteTokenStart(MajorType::BYTE_STRING, byte_length, out);
425   out->insert(out->end(), in.begin(), in.end());
426 }
EncodeBinary(span<uint8_t> in,std::vector<uint8_t> * out)427 void EncodeBinary(span<uint8_t> in, std::vector<uint8_t>* out) {
428   EncodeBinaryTmpl(in, out);
429 }
EncodeBinary(span<uint8_t> in,std::string * out)430 void EncodeBinary(span<uint8_t> in, std::string* out) {
431   EncodeBinaryTmpl(in, out);
432 }
433 
434 // A double is encoded with a specific initial byte
435 // (kInitialByteForDouble) plus the 64 bits of payload for its value.
436 constexpr size_t kEncodedDoubleSize = 1 + sizeof(uint64_t);
437 
438 // An envelope is encoded with a specific initial byte
439 // (kInitialByteForEnvelope), plus the start byte for a BYTE_STRING with a 32
440 // bit wide length, plus a 32 bit length for that string.
441 constexpr size_t kEncodedEnvelopeHeaderSize = 1 + 1 + sizeof(uint32_t);
442 
443 template <typename C>
EncodeDoubleTmpl(double value,C * out)444 void EncodeDoubleTmpl(double value, C* out) {
445   // The additional_info=27 indicates 64 bits for the double follow.
446   // See RFC 7049 Section 2.3, Table 1.
447   out->push_back(kInitialByteForDouble);
448   union {
449     double from_double;
450     uint64_t to_uint64;
451   } reinterpret;
452   reinterpret.from_double = value;
453   WriteBytesMostSignificantByteFirst<uint64_t>(reinterpret.to_uint64, out);
454 }
EncodeDouble(double value,std::vector<uint8_t> * out)455 void EncodeDouble(double value, std::vector<uint8_t>* out) {
456   EncodeDoubleTmpl(value, out);
457 }
EncodeDouble(double value,std::string * out)458 void EncodeDouble(double value, std::string* out) {
459   EncodeDoubleTmpl(value, out);
460 }
461 
462 // =============================================================================
463 // cbor::EnvelopeEncoder - for wrapping submessages
464 // =============================================================================
465 
466 template <typename C>
EncodeStartTmpl(C * out,size_t * byte_size_pos)467 void EncodeStartTmpl(C* out, size_t* byte_size_pos) {
468   assert(*byte_size_pos == 0);
469   out->push_back(kInitialByteForEnvelope);
470   out->push_back(kInitialByteFor32BitLengthByteString);
471   *byte_size_pos = out->size();
472   out->resize(out->size() + sizeof(uint32_t));
473 }
474 
EncodeStart(std::vector<uint8_t> * out)475 void EnvelopeEncoder::EncodeStart(std::vector<uint8_t>* out) {
476   EncodeStartTmpl<std::vector<uint8_t>>(out, &byte_size_pos_);
477 }
478 
EncodeStart(std::string * out)479 void EnvelopeEncoder::EncodeStart(std::string* out) {
480   EncodeStartTmpl<std::string>(out, &byte_size_pos_);
481 }
482 
483 template <typename C>
EncodeStopTmpl(C * out,size_t * byte_size_pos)484 bool EncodeStopTmpl(C* out, size_t* byte_size_pos) {
485   assert(*byte_size_pos != 0);
486   // The byte size is the size of the payload, that is, all the
487   // bytes that were written past the byte size position itself.
488   uint64_t byte_size = out->size() - (*byte_size_pos + sizeof(uint32_t));
489   // We store exactly 4 bytes, so at most INT32MAX, with most significant
490   // byte first.
491   if (byte_size > std::numeric_limits<uint32_t>::max())
492     return false;
493   for (int shift_bytes = sizeof(uint32_t) - 1; shift_bytes >= 0;
494        --shift_bytes) {
495     (*out)[(*byte_size_pos)++] = 0xff & (byte_size >> (shift_bytes * 8));
496   }
497   return true;
498 }
499 
EncodeStop(std::vector<uint8_t> * out)500 bool EnvelopeEncoder::EncodeStop(std::vector<uint8_t>* out) {
501   return EncodeStopTmpl(out, &byte_size_pos_);
502 }
503 
EncodeStop(std::string * out)504 bool EnvelopeEncoder::EncodeStop(std::string* out) {
505   return EncodeStopTmpl(out, &byte_size_pos_);
506 }
507 
508 // =============================================================================
509 // cbor::NewCBOREncoder - for encoding from a streaming parser
510 // =============================================================================
511 
512 namespace {
513 template <typename C>
514 class CBOREncoder : public StreamingParserHandler {
515  public:
CBOREncoder(C * out,Status * status)516   CBOREncoder(C* out, Status* status) : out_(out), status_(status) {
517     *status_ = Status();
518   }
519 
HandleMapBegin()520   void HandleMapBegin() override {
521     if (!status_->ok())
522       return;
523     envelopes_.emplace_back();
524     envelopes_.back().EncodeStart(out_);
525     out_->push_back(kInitialByteIndefiniteLengthMap);
526   }
527 
HandleMapEnd()528   void HandleMapEnd() override {
529     if (!status_->ok())
530       return;
531     out_->push_back(kStopByte);
532     assert(!envelopes_.empty());
533     if (!envelopes_.back().EncodeStop(out_)) {
534       HandleError(
535           Status(Error::CBOR_ENVELOPE_SIZE_LIMIT_EXCEEDED, out_->size()));
536       return;
537     }
538     envelopes_.pop_back();
539   }
540 
HandleArrayBegin()541   void HandleArrayBegin() override {
542     if (!status_->ok())
543       return;
544     envelopes_.emplace_back();
545     envelopes_.back().EncodeStart(out_);
546     out_->push_back(kInitialByteIndefiniteLengthArray);
547   }
548 
HandleArrayEnd()549   void HandleArrayEnd() override {
550     if (!status_->ok())
551       return;
552     out_->push_back(kStopByte);
553     assert(!envelopes_.empty());
554     if (!envelopes_.back().EncodeStop(out_)) {
555       HandleError(
556           Status(Error::CBOR_ENVELOPE_SIZE_LIMIT_EXCEEDED, out_->size()));
557       return;
558     }
559     envelopes_.pop_back();
560   }
561 
HandleString8(span<uint8_t> chars)562   void HandleString8(span<uint8_t> chars) override {
563     if (!status_->ok())
564       return;
565     EncodeString8(chars, out_);
566   }
567 
HandleString16(span<uint16_t> chars)568   void HandleString16(span<uint16_t> chars) override {
569     if (!status_->ok())
570       return;
571     EncodeFromUTF16(chars, out_);
572   }
573 
HandleBinary(span<uint8_t> bytes)574   void HandleBinary(span<uint8_t> bytes) override {
575     if (!status_->ok())
576       return;
577     EncodeBinary(bytes, out_);
578   }
579 
HandleDouble(double value)580   void HandleDouble(double value) override {
581     if (!status_->ok())
582       return;
583     EncodeDouble(value, out_);
584   }
585 
HandleInt32(int32_t value)586   void HandleInt32(int32_t value) override {
587     if (!status_->ok())
588       return;
589     EncodeInt32(value, out_);
590   }
591 
HandleBool(bool value)592   void HandleBool(bool value) override {
593     if (!status_->ok())
594       return;
595     // See RFC 7049 Section 2.3, Table 2.
596     out_->push_back(value ? kEncodedTrue : kEncodedFalse);
597   }
598 
HandleNull()599   void HandleNull() override {
600     if (!status_->ok())
601       return;
602     // See RFC 7049 Section 2.3, Table 2.
603     out_->push_back(kEncodedNull);
604   }
605 
HandleError(Status error)606   void HandleError(Status error) override {
607     if (!status_->ok())
608       return;
609     *status_ = error;
610     out_->clear();
611   }
612 
613  private:
614   C* out_;
615   std::vector<EnvelopeEncoder> envelopes_;
616   Status* status_;
617 };
618 }  // namespace
619 
NewCBOREncoder(std::vector<uint8_t> * out,Status * status)620 std::unique_ptr<StreamingParserHandler> NewCBOREncoder(
621     std::vector<uint8_t>* out,
622     Status* status) {
623   return std::unique_ptr<StreamingParserHandler>(
624       new CBOREncoder<std::vector<uint8_t>>(out, status));
625 }
NewCBOREncoder(std::string * out,Status * status)626 std::unique_ptr<StreamingParserHandler> NewCBOREncoder(std::string* out,
627                                                        Status* status) {
628   return std::unique_ptr<StreamingParserHandler>(
629       new CBOREncoder<std::string>(out, status));
630 }
631 
632 // =============================================================================
633 // cbor::CBORTokenizer - for parsing individual CBOR items
634 // =============================================================================
635 
CBORTokenizer(span<uint8_t> bytes)636 CBORTokenizer::CBORTokenizer(span<uint8_t> bytes) : bytes_(bytes) {
637   ReadNextToken(/*enter_envelope=*/false);
638 }
~CBORTokenizer()639 CBORTokenizer::~CBORTokenizer() {}
640 
TokenTag() const641 CBORTokenTag CBORTokenizer::TokenTag() const {
642   return token_tag_;
643 }
644 
Next()645 void CBORTokenizer::Next() {
646   if (token_tag_ == CBORTokenTag::ERROR_VALUE ||
647       token_tag_ == CBORTokenTag::DONE)
648     return;
649   ReadNextToken(/*enter_envelope=*/false);
650 }
651 
EnterEnvelope()652 void CBORTokenizer::EnterEnvelope() {
653   assert(token_tag_ == CBORTokenTag::ENVELOPE);
654   ReadNextToken(/*enter_envelope=*/true);
655 }
656 
Status() const657 Status CBORTokenizer::Status() const {
658   return status_;
659 }
660 
661 // The following accessor functions ::GetInt32, ::GetDouble,
662 // ::GetString8, ::GetString16WireRep, ::GetBinary, ::GetEnvelopeContents
663 // assume that a particular token was recognized in ::ReadNextToken.
664 // That's where all the error checking is done. By design,
665 // the accessors (assuming the token was recognized) never produce
666 // an error.
667 
GetInt32() const668 int32_t CBORTokenizer::GetInt32() const {
669   assert(token_tag_ == CBORTokenTag::INT32);
670   // The range checks happen in ::ReadNextToken().
671   return static_cast<int32_t>(
672       token_start_type_ == MajorType::UNSIGNED
673           ? token_start_internal_value_
674           : -static_cast<int64_t>(token_start_internal_value_) - 1);
675 }
676 
GetDouble() const677 double CBORTokenizer::GetDouble() const {
678   assert(token_tag_ == CBORTokenTag::DOUBLE);
679   union {
680     uint64_t from_uint64;
681     double to_double;
682   } reinterpret;
683   reinterpret.from_uint64 = ReadBytesMostSignificantByteFirst<uint64_t>(
684       bytes_.subspan(status_.pos + 1));
685   return reinterpret.to_double;
686 }
687 
GetString8() const688 span<uint8_t> CBORTokenizer::GetString8() const {
689   assert(token_tag_ == CBORTokenTag::STRING8);
690   auto length = static_cast<size_t>(token_start_internal_value_);
691   return bytes_.subspan(status_.pos + (token_byte_length_ - length), length);
692 }
693 
GetString16WireRep() const694 span<uint8_t> CBORTokenizer::GetString16WireRep() const {
695   assert(token_tag_ == CBORTokenTag::STRING16);
696   auto length = static_cast<size_t>(token_start_internal_value_);
697   return bytes_.subspan(status_.pos + (token_byte_length_ - length), length);
698 }
699 
GetBinary() const700 span<uint8_t> CBORTokenizer::GetBinary() const {
701   assert(token_tag_ == CBORTokenTag::BINARY);
702   auto length = static_cast<size_t>(token_start_internal_value_);
703   return bytes_.subspan(status_.pos + (token_byte_length_ - length), length);
704 }
705 
GetEnvelopeContents() const706 span<uint8_t> CBORTokenizer::GetEnvelopeContents() const {
707   assert(token_tag_ == CBORTokenTag::ENVELOPE);
708   auto length = static_cast<size_t>(token_start_internal_value_);
709   return bytes_.subspan(status_.pos + kEncodedEnvelopeHeaderSize, length);
710 }
711 
712 // All error checking happens in ::ReadNextToken, so that the accessors
713 // can avoid having to carry an error return value.
714 //
715 // With respect to checking the encoded lengths of strings, arrays, etc:
716 // On the wire, CBOR uses 1,2,4, and 8 byte unsigned integers, so
717 // we initially read them as uint64_t, usually into token_start_internal_value_.
718 //
719 // However, since these containers have a representation on the machine,
720 // we need to do corresponding size computations on the input byte array,
721 // output span (e.g. the payload for a string), etc., and size_t is
722 // machine specific (in practice either 32 bit or 64 bit).
723 //
724 // Further, we must avoid overflowing size_t. Therefore, we use this
725 // kMaxValidLength constant to:
726 // - Reject values that are larger than the architecture specific
727 //   max size_t (differs between 32 bit and 64 bit arch).
728 // - Reserve at least one bit so that we can check against overflows
729 //   when adding lengths (array / string length / etc.); we do this by
730 //   ensuring that the inputs to an addition are <= kMaxValidLength,
731 //   and then checking whether the sum went past it.
732 //
733 // See also
734 // https://chromium.googlesource.com/chromium/src/+/master/docs/security/integer-semantics.md
735 static const uint64_t kMaxValidLength =
736     std::min<uint64_t>(std::numeric_limits<uint64_t>::max() >> 2,
737                        std::numeric_limits<size_t>::max());
738 
ReadNextToken(bool enter_envelope)739 void CBORTokenizer::ReadNextToken(bool enter_envelope) {
740   if (enter_envelope) {
741     status_.pos += kEncodedEnvelopeHeaderSize;
742   } else {
743     status_.pos =
744         status_.pos == Status::npos() ? 0 : status_.pos + token_byte_length_;
745   }
746   status_.error = Error::OK;
747   if (status_.pos >= bytes_.size()) {
748     token_tag_ = CBORTokenTag::DONE;
749     return;
750   }
751   const size_t remaining_bytes = bytes_.size() - status_.pos;
752   switch (bytes_[status_.pos]) {
753     case kStopByte:
754       SetToken(CBORTokenTag::STOP, 1);
755       return;
756     case kInitialByteIndefiniteLengthMap:
757       SetToken(CBORTokenTag::MAP_START, 1);
758       return;
759     case kInitialByteIndefiniteLengthArray:
760       SetToken(CBORTokenTag::ARRAY_START, 1);
761       return;
762     case kEncodedTrue:
763       SetToken(CBORTokenTag::TRUE_VALUE, 1);
764       return;
765     case kEncodedFalse:
766       SetToken(CBORTokenTag::FALSE_VALUE, 1);
767       return;
768     case kEncodedNull:
769       SetToken(CBORTokenTag::NULL_VALUE, 1);
770       return;
771     case kExpectedConversionToBase64Tag: {  // BINARY
772       const size_t bytes_read = internals::ReadTokenStart(
773           bytes_.subspan(status_.pos + 1), &token_start_type_,
774           &token_start_internal_value_);
775       if (!bytes_read || token_start_type_ != MajorType::BYTE_STRING ||
776           token_start_internal_value_ > kMaxValidLength) {
777         SetError(Error::CBOR_INVALID_BINARY);
778         return;
779       }
780       const uint64_t token_byte_length = token_start_internal_value_ +
781                                          /* tag before token start: */ 1 +
782                                          /* token start: */ bytes_read;
783       if (token_byte_length > remaining_bytes) {
784         SetError(Error::CBOR_INVALID_BINARY);
785         return;
786       }
787       SetToken(CBORTokenTag::BINARY, static_cast<size_t>(token_byte_length));
788       return;
789     }
790     case kInitialByteForDouble: {  // DOUBLE
791       if (kEncodedDoubleSize > remaining_bytes) {
792         SetError(Error::CBOR_INVALID_DOUBLE);
793         return;
794       }
795       SetToken(CBORTokenTag::DOUBLE, kEncodedDoubleSize);
796       return;
797     }
798     case kInitialByteForEnvelope: {  // ENVELOPE
799       if (kEncodedEnvelopeHeaderSize > remaining_bytes) {
800         SetError(Error::CBOR_INVALID_ENVELOPE);
801         return;
802       }
803       // The envelope must be a byte string with 32 bit length.
804       if (bytes_[status_.pos + 1] != kInitialByteFor32BitLengthByteString) {
805         SetError(Error::CBOR_INVALID_ENVELOPE);
806         return;
807       }
808       // Read the length of the byte string.
809       token_start_internal_value_ = ReadBytesMostSignificantByteFirst<uint32_t>(
810           bytes_.subspan(status_.pos + 2));
811       if (token_start_internal_value_ > kMaxValidLength) {
812         SetError(Error::CBOR_INVALID_ENVELOPE);
813         return;
814       }
815       uint64_t token_byte_length =
816           token_start_internal_value_ + kEncodedEnvelopeHeaderSize;
817       if (token_byte_length > remaining_bytes) {
818         SetError(Error::CBOR_INVALID_ENVELOPE);
819         return;
820       }
821       SetToken(CBORTokenTag::ENVELOPE, static_cast<size_t>(token_byte_length));
822       return;
823     }
824     default: {
825       const size_t bytes_read = internals::ReadTokenStart(
826           bytes_.subspan(status_.pos), &token_start_type_,
827           &token_start_internal_value_);
828       switch (token_start_type_) {
829         case MajorType::UNSIGNED:  // INT32.
830           // INT32 is a signed int32 (int32 makes sense for the
831           // inspector_protocol, it's not a CBOR limitation), so we check
832           // against the signed max, so that the allowable values are
833           // 0, 1, 2, ... 2^31 - 1.
834           if (!bytes_read ||
835                 static_cast<int64_t>(std::numeric_limits<int32_t>::max()) <
836                   static_cast<int64_t>(token_start_internal_value_)) {
837             SetError(Error::CBOR_INVALID_INT32);
838             return;
839           }
840           SetToken(CBORTokenTag::INT32, bytes_read);
841           return;
842         case MajorType::NEGATIVE: {  // INT32.
843           // INT32 is a signed int32 (int32 makes sense for the
844           // inspector_protocol, it's not a CBOR limitation); in CBOR, the
845           // negative values for INT32 are represented as NEGATIVE, that is, -1
846           // INT32 is represented as 1 << 5 | 0 (major type 1, additional info
847           // value 0).
848           // The represented allowed values range is -1 to -2^31.
849           // They are mapped into the encoded range of 0 to 2^31-1.
850           // We check the the payload in token_start_internal_value_ against
851           // that range (2^31-1 is also known as
852           // std::numeric_limits<int32_t>::max()).
853           if (!bytes_read ||
854 	      static_cast<int64_t>(token_start_internal_value_) >
855                 static_cast<int64_t>(std::numeric_limits<int32_t>::max())) {
856             SetError(Error::CBOR_INVALID_INT32);
857             return;
858           }
859           SetToken(CBORTokenTag::INT32, bytes_read);
860           return;
861         }
862         case MajorType::STRING: {  // STRING8.
863           if (!bytes_read || token_start_internal_value_ > kMaxValidLength) {
864             SetError(Error::CBOR_INVALID_STRING8);
865             return;
866           }
867           uint64_t token_byte_length = token_start_internal_value_ + bytes_read;
868           if (token_byte_length > remaining_bytes) {
869             SetError(Error::CBOR_INVALID_STRING8);
870             return;
871           }
872           SetToken(CBORTokenTag::STRING8,
873                    static_cast<size_t>(token_byte_length));
874           return;
875         }
876         case MajorType::BYTE_STRING: {  // STRING16.
877           // Length must be divisible by 2 since UTF16 is 2 bytes per
878           // character, hence the &1 check.
879           if (!bytes_read || token_start_internal_value_ > kMaxValidLength ||
880               token_start_internal_value_ & 1) {
881             SetError(Error::CBOR_INVALID_STRING16);
882             return;
883           }
884           uint64_t token_byte_length = token_start_internal_value_ + bytes_read;
885           if (token_byte_length > remaining_bytes) {
886             SetError(Error::CBOR_INVALID_STRING16);
887             return;
888           }
889           SetToken(CBORTokenTag::STRING16,
890                    static_cast<size_t>(token_byte_length));
891           return;
892         }
893         case MajorType::ARRAY:
894         case MajorType::MAP:
895         case MajorType::TAG:
896         case MajorType::SIMPLE_VALUE:
897           SetError(Error::CBOR_UNSUPPORTED_VALUE);
898           return;
899       }
900     }
901   }
902 }
903 
SetToken(CBORTokenTag token_tag,size_t token_byte_length)904 void CBORTokenizer::SetToken(CBORTokenTag token_tag, size_t token_byte_length) {
905   token_tag_ = token_tag;
906   token_byte_length_ = token_byte_length;
907 }
908 
SetError(Error error)909 void CBORTokenizer::SetError(Error error) {
910   token_tag_ = CBORTokenTag::ERROR_VALUE;
911   status_.error = error;
912 }
913 
914 // =============================================================================
915 // cbor::ParseCBOR - for receiving streaming parser events for CBOR messages
916 // =============================================================================
917 
918 namespace {
919 // When parsing CBOR, we limit recursion depth for objects and arrays
920 // to this constant.
921 static constexpr int kStackLimit = 300;
922 
923 // Below are three parsing routines for CBOR, which cover enough
924 // to roundtrip JSON messages.
925 bool ParseMap(int32_t stack_depth,
926               CBORTokenizer* tokenizer,
927               StreamingParserHandler* out);
928 bool ParseArray(int32_t stack_depth,
929                 CBORTokenizer* tokenizer,
930                 StreamingParserHandler* out);
931 bool ParseValue(int32_t stack_depth,
932                 CBORTokenizer* tokenizer,
933                 StreamingParserHandler* out);
934 
ParseUTF16String(CBORTokenizer * tokenizer,StreamingParserHandler * out)935 void ParseUTF16String(CBORTokenizer* tokenizer, StreamingParserHandler* out) {
936   std::vector<uint16_t> value;
937   span<uint8_t> rep = tokenizer->GetString16WireRep();
938   for (size_t ii = 0; ii < rep.size(); ii += 2)
939     value.push_back((rep[ii + 1] << 8) | rep[ii]);
940   out->HandleString16(span<uint16_t>(value.data(), value.size()));
941   tokenizer->Next();
942 }
943 
ParseUTF8String(CBORTokenizer * tokenizer,StreamingParserHandler * out)944 bool ParseUTF8String(CBORTokenizer* tokenizer, StreamingParserHandler* out) {
945   assert(tokenizer->TokenTag() == CBORTokenTag::STRING8);
946   out->HandleString8(tokenizer->GetString8());
947   tokenizer->Next();
948   return true;
949 }
950 
ParseValue(int32_t stack_depth,CBORTokenizer * tokenizer,StreamingParserHandler * out)951 bool ParseValue(int32_t stack_depth,
952                 CBORTokenizer* tokenizer,
953                 StreamingParserHandler* out) {
954   if (stack_depth > kStackLimit) {
955     out->HandleError(
956         Status{Error::CBOR_STACK_LIMIT_EXCEEDED, tokenizer->Status().pos});
957     return false;
958   }
959   // Skip past the envelope to get to what's inside.
960   if (tokenizer->TokenTag() == CBORTokenTag::ENVELOPE)
961     tokenizer->EnterEnvelope();
962   switch (tokenizer->TokenTag()) {
963     case CBORTokenTag::ERROR_VALUE:
964       out->HandleError(tokenizer->Status());
965       return false;
966     case CBORTokenTag::DONE:
967       out->HandleError(Status{Error::CBOR_UNEXPECTED_EOF_EXPECTED_VALUE,
968                               tokenizer->Status().pos});
969       return false;
970     case CBORTokenTag::TRUE_VALUE:
971       out->HandleBool(true);
972       tokenizer->Next();
973       return true;
974     case CBORTokenTag::FALSE_VALUE:
975       out->HandleBool(false);
976       tokenizer->Next();
977       return true;
978     case CBORTokenTag::NULL_VALUE:
979       out->HandleNull();
980       tokenizer->Next();
981       return true;
982     case CBORTokenTag::INT32:
983       out->HandleInt32(tokenizer->GetInt32());
984       tokenizer->Next();
985       return true;
986     case CBORTokenTag::DOUBLE:
987       out->HandleDouble(tokenizer->GetDouble());
988       tokenizer->Next();
989       return true;
990     case CBORTokenTag::STRING8:
991       return ParseUTF8String(tokenizer, out);
992     case CBORTokenTag::STRING16:
993       ParseUTF16String(tokenizer, out);
994       return true;
995     case CBORTokenTag::BINARY: {
996       out->HandleBinary(tokenizer->GetBinary());
997       tokenizer->Next();
998       return true;
999     }
1000     case CBORTokenTag::MAP_START:
1001       return ParseMap(stack_depth + 1, tokenizer, out);
1002     case CBORTokenTag::ARRAY_START:
1003       return ParseArray(stack_depth + 1, tokenizer, out);
1004     default:
1005       out->HandleError(
1006           Status{Error::CBOR_UNSUPPORTED_VALUE, tokenizer->Status().pos});
1007       return false;
1008   }
1009 }
1010 
1011 // |bytes| must start with the indefinite length array byte, so basically,
1012 // ParseArray may only be called after an indefinite length array has been
1013 // detected.
ParseArray(int32_t stack_depth,CBORTokenizer * tokenizer,StreamingParserHandler * out)1014 bool ParseArray(int32_t stack_depth,
1015                 CBORTokenizer* tokenizer,
1016                 StreamingParserHandler* out) {
1017   assert(tokenizer->TokenTag() == CBORTokenTag::ARRAY_START);
1018   tokenizer->Next();
1019   out->HandleArrayBegin();
1020   while (tokenizer->TokenTag() != CBORTokenTag::STOP) {
1021     if (tokenizer->TokenTag() == CBORTokenTag::DONE) {
1022       out->HandleError(
1023           Status{Error::CBOR_UNEXPECTED_EOF_IN_ARRAY, tokenizer->Status().pos});
1024       return false;
1025     }
1026     if (tokenizer->TokenTag() == CBORTokenTag::ERROR_VALUE) {
1027       out->HandleError(tokenizer->Status());
1028       return false;
1029     }
1030     // Parse value.
1031     if (!ParseValue(stack_depth, tokenizer, out))
1032       return false;
1033   }
1034   out->HandleArrayEnd();
1035   tokenizer->Next();
1036   return true;
1037 }
1038 
1039 // |bytes| must start with the indefinite length array byte, so basically,
1040 // ParseArray may only be called after an indefinite length array has been
1041 // detected.
ParseMap(int32_t stack_depth,CBORTokenizer * tokenizer,StreamingParserHandler * out)1042 bool ParseMap(int32_t stack_depth,
1043               CBORTokenizer* tokenizer,
1044               StreamingParserHandler* out) {
1045   assert(tokenizer->TokenTag() == CBORTokenTag::MAP_START);
1046   out->HandleMapBegin();
1047   tokenizer->Next();
1048   while (tokenizer->TokenTag() != CBORTokenTag::STOP) {
1049     if (tokenizer->TokenTag() == CBORTokenTag::DONE) {
1050       out->HandleError(
1051           Status{Error::CBOR_UNEXPECTED_EOF_IN_MAP, tokenizer->Status().pos});
1052       return false;
1053     }
1054     if (tokenizer->TokenTag() == CBORTokenTag::ERROR_VALUE) {
1055       out->HandleError(tokenizer->Status());
1056       return false;
1057     }
1058     // Parse key.
1059     if (tokenizer->TokenTag() == CBORTokenTag::STRING8) {
1060       if (!ParseUTF8String(tokenizer, out))
1061         return false;
1062     } else if (tokenizer->TokenTag() == CBORTokenTag::STRING16) {
1063       ParseUTF16String(tokenizer, out);
1064     } else {
1065       out->HandleError(
1066           Status{Error::CBOR_INVALID_MAP_KEY, tokenizer->Status().pos});
1067       return false;
1068     }
1069     // Parse value.
1070     if (!ParseValue(stack_depth, tokenizer, out))
1071       return false;
1072   }
1073   out->HandleMapEnd();
1074   tokenizer->Next();
1075   return true;
1076 }
1077 }  // namespace
1078 
ParseCBOR(span<uint8_t> bytes,StreamingParserHandler * out)1079 void ParseCBOR(span<uint8_t> bytes, StreamingParserHandler* out) {
1080   if (bytes.empty()) {
1081     out->HandleError(Status{Error::CBOR_NO_INPUT, 0});
1082     return;
1083   }
1084   if (bytes[0] != kInitialByteForEnvelope) {
1085     out->HandleError(Status{Error::CBOR_INVALID_START_BYTE, 0});
1086     return;
1087   }
1088   CBORTokenizer tokenizer(bytes);
1089   if (tokenizer.TokenTag() == CBORTokenTag::ERROR_VALUE) {
1090     out->HandleError(tokenizer.Status());
1091     return;
1092   }
1093   // We checked for the envelope start byte above, so the tokenizer
1094   // must agree here, since it's not an error.
1095   assert(tokenizer.TokenTag() == CBORTokenTag::ENVELOPE);
1096   tokenizer.EnterEnvelope();
1097   if (tokenizer.TokenTag() != CBORTokenTag::MAP_START) {
1098     out->HandleError(
1099         Status{Error::CBOR_MAP_START_EXPECTED, tokenizer.Status().pos});
1100     return;
1101   }
1102   if (!ParseMap(/*stack_depth=*/1, &tokenizer, out))
1103     return;
1104   if (tokenizer.TokenTag() == CBORTokenTag::DONE)
1105     return;
1106   if (tokenizer.TokenTag() == CBORTokenTag::ERROR_VALUE) {
1107     out->HandleError(tokenizer.Status());
1108     return;
1109   }
1110   out->HandleError(Status{Error::CBOR_TRAILING_JUNK, tokenizer.Status().pos});
1111 }
1112 
1113 // =============================================================================
1114 // cbor::AppendString8EntryToMap - for limited in-place editing of messages
1115 // =============================================================================
1116 
1117 template <typename C>
AppendString8EntryToCBORMapTmpl(span<uint8_t> string8_key,span<uint8_t> string8_value,C * cbor)1118 Status AppendString8EntryToCBORMapTmpl(span<uint8_t> string8_key,
1119                                        span<uint8_t> string8_value,
1120                                        C* cbor) {
1121   // Careful below: Don't compare (*cbor)[idx] with a uint8_t, since
1122   // it could be a char (signed!). Instead, use bytes.
1123   span<uint8_t> bytes(reinterpret_cast<const uint8_t*>(cbor->data()),
1124                       cbor->size());
1125   CBORTokenizer tokenizer(bytes);
1126   if (tokenizer.TokenTag() == CBORTokenTag::ERROR_VALUE)
1127     return tokenizer.Status();
1128   if (tokenizer.TokenTag() != CBORTokenTag::ENVELOPE)
1129     return Status(Error::CBOR_INVALID_ENVELOPE, 0);
1130   size_t envelope_size = tokenizer.GetEnvelopeContents().size();
1131   size_t old_size = cbor->size();
1132   if (old_size != envelope_size + kEncodedEnvelopeHeaderSize)
1133     return Status(Error::CBOR_INVALID_ENVELOPE, 0);
1134   if (envelope_size == 0 ||
1135       (tokenizer.GetEnvelopeContents()[0] != EncodeIndefiniteLengthMapStart()))
1136     return Status(Error::CBOR_MAP_START_EXPECTED, kEncodedEnvelopeHeaderSize);
1137   if (bytes[bytes.size() - 1] != EncodeStop())
1138     return Status(Error::CBOR_MAP_STOP_EXPECTED, cbor->size() - 1);
1139   cbor->pop_back();
1140   EncodeString8(string8_key, cbor);
1141   EncodeString8(string8_value, cbor);
1142   cbor->push_back(EncodeStop());
1143   size_t new_envelope_size = envelope_size + (cbor->size() - old_size);
1144   if (new_envelope_size > std::numeric_limits<uint32_t>::max())
1145     return Status(Error::CBOR_ENVELOPE_SIZE_LIMIT_EXCEEDED, 0);
1146   size_t size_pos = cbor->size() - new_envelope_size - sizeof(uint32_t);
1147   uint8_t* out = reinterpret_cast<uint8_t*>(&cbor->at(size_pos));
1148   *(out++) = (new_envelope_size >> 24) & 0xff;
1149   *(out++) = (new_envelope_size >> 16) & 0xff;
1150   *(out++) = (new_envelope_size >> 8) & 0xff;
1151   *(out) = new_envelope_size & 0xff;
1152   return Status();
1153 }
AppendString8EntryToCBORMap(span<uint8_t> string8_key,span<uint8_t> string8_value,std::vector<uint8_t> * cbor)1154 Status AppendString8EntryToCBORMap(span<uint8_t> string8_key,
1155                                    span<uint8_t> string8_value,
1156                                    std::vector<uint8_t>* cbor) {
1157   return AppendString8EntryToCBORMapTmpl(string8_key, string8_value, cbor);
1158 }
AppendString8EntryToCBORMap(span<uint8_t> string8_key,span<uint8_t> string8_value,std::string * cbor)1159 Status AppendString8EntryToCBORMap(span<uint8_t> string8_key,
1160                                    span<uint8_t> string8_value,
1161                                    std::string* cbor) {
1162   return AppendString8EntryToCBORMapTmpl(string8_key, string8_value, cbor);
1163 }
1164 }  // namespace cbor
1165 
1166 namespace json {
1167 
1168 // =============================================================================
1169 // json::NewJSONEncoder - for encoding streaming parser events as JSON
1170 // =============================================================================
1171 
1172 namespace {
1173 // Prints |value| to |out| with 4 hex digits, most significant chunk first.
1174 template <typename C>
PrintHex(uint16_t value,C * out)1175 void PrintHex(uint16_t value, C* out) {
1176   for (int ii = 3; ii >= 0; --ii) {
1177     int four_bits = 0xf & (value >> (4 * ii));
1178     out->push_back(four_bits + ((four_bits <= 9) ? '0' : ('a' - 10)));
1179   }
1180 }
1181 
1182 // In the writer below, we maintain a stack of State instances.
1183 // It is just enough to emit the appropriate delimiters and brackets
1184 // in JSON.
1185 enum class Container {
1186   // Used for the top-level, initial state.
1187   NONE,
1188   // Inside a JSON object.
1189   MAP,
1190   // Inside a JSON array.
1191   ARRAY
1192 };
1193 class State {
1194  public:
State(Container container)1195   explicit State(Container container) : container_(container) {}
StartElement(std::vector<uint8_t> * out)1196   void StartElement(std::vector<uint8_t>* out) { StartElementTmpl(out); }
StartElement(std::string * out)1197   void StartElement(std::string* out) { StartElementTmpl(out); }
container() const1198   Container container() const { return container_; }
1199 
1200  private:
1201   template <typename C>
StartElementTmpl(C * out)1202   void StartElementTmpl(C* out) {
1203     assert(container_ != Container::NONE || size_ == 0);
1204     if (size_ != 0) {
1205       char delim = (!(size_ & 1) || container_ == Container::ARRAY) ? ',' : ':';
1206       out->push_back(delim);
1207     }
1208     ++size_;
1209   }
1210 
1211   Container container_ = Container::NONE;
1212   int size_ = 0;
1213 };
1214 
1215 constexpr char kBase64Table[] =
1216     "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
1217     "abcdefghijklmnopqrstuvwxyz0123456789+/";
1218 
1219 template <typename C>
Base64Encode(const span<uint8_t> & in,C * out)1220 void Base64Encode(const span<uint8_t>& in, C* out) {
1221   // The following three cases are based on the tables in the example
1222   // section in https://en.wikipedia.org/wiki/Base64. We process three
1223   // input bytes at a time, emitting 4 output bytes at a time.
1224   size_t ii = 0;
1225 
1226   // While possible, process three input bytes.
1227   for (; ii + 3 <= in.size(); ii += 3) {
1228     uint32_t twentyfour_bits = (in[ii] << 16) | (in[ii + 1] << 8) | in[ii + 2];
1229     out->push_back(kBase64Table[(twentyfour_bits >> 18)]);
1230     out->push_back(kBase64Table[(twentyfour_bits >> 12) & 0x3f]);
1231     out->push_back(kBase64Table[(twentyfour_bits >> 6) & 0x3f]);
1232     out->push_back(kBase64Table[twentyfour_bits & 0x3f]);
1233   }
1234   if (ii + 2 <= in.size()) {  // Process two input bytes.
1235     uint32_t twentyfour_bits = (in[ii] << 16) | (in[ii + 1] << 8);
1236     out->push_back(kBase64Table[(twentyfour_bits >> 18)]);
1237     out->push_back(kBase64Table[(twentyfour_bits >> 12) & 0x3f]);
1238     out->push_back(kBase64Table[(twentyfour_bits >> 6) & 0x3f]);
1239     out->push_back('=');  // Emit padding.
1240     return;
1241   }
1242   if (ii + 1 <= in.size()) {  // Process a single input byte.
1243     uint32_t twentyfour_bits = (in[ii] << 16);
1244     out->push_back(kBase64Table[(twentyfour_bits >> 18)]);
1245     out->push_back(kBase64Table[(twentyfour_bits >> 12) & 0x3f]);
1246     out->push_back('=');  // Emit padding.
1247     out->push_back('=');  // Emit padding.
1248   }
1249 }
1250 
1251 // Implements a handler for JSON parser events to emit a JSON string.
1252 template <typename C>
1253 class JSONEncoder : public StreamingParserHandler {
1254  public:
JSONEncoder(const Platform * platform,C * out,Status * status)1255   JSONEncoder(const Platform* platform, C* out, Status* status)
1256       : platform_(platform), out_(out), status_(status) {
1257     *status_ = Status();
1258     state_.emplace(Container::NONE);
1259   }
1260 
HandleMapBegin()1261   void HandleMapBegin() override {
1262     if (!status_->ok())
1263       return;
1264     assert(!state_.empty());
1265     state_.top().StartElement(out_);
1266     state_.emplace(Container::MAP);
1267     Emit('{');
1268   }
1269 
HandleMapEnd()1270   void HandleMapEnd() override {
1271     if (!status_->ok())
1272       return;
1273     assert(state_.size() >= 2 && state_.top().container() == Container::MAP);
1274     state_.pop();
1275     Emit('}');
1276   }
1277 
HandleArrayBegin()1278   void HandleArrayBegin() override {
1279     if (!status_->ok())
1280       return;
1281     state_.top().StartElement(out_);
1282     state_.emplace(Container::ARRAY);
1283     Emit('[');
1284   }
1285 
HandleArrayEnd()1286   void HandleArrayEnd() override {
1287     if (!status_->ok())
1288       return;
1289     assert(state_.size() >= 2 && state_.top().container() == Container::ARRAY);
1290     state_.pop();
1291     Emit(']');
1292   }
1293 
HandleString16(span<uint16_t> chars)1294   void HandleString16(span<uint16_t> chars) override {
1295     if (!status_->ok())
1296       return;
1297     state_.top().StartElement(out_);
1298     Emit('"');
1299     for (const uint16_t ch : chars) {
1300       if (ch == '"') {
1301         Emit("\\\"");
1302       } else if (ch == '\\') {
1303         Emit("\\\\");
1304       } else if (ch == '\b') {
1305         Emit("\\b");
1306       } else if (ch == '\f') {
1307         Emit("\\f");
1308       } else if (ch == '\n') {
1309         Emit("\\n");
1310       } else if (ch == '\r') {
1311         Emit("\\r");
1312       } else if (ch == '\t') {
1313         Emit("\\t");
1314       } else if (ch >= 32 && ch <= 126) {
1315         Emit(ch);
1316       } else {
1317         Emit("\\u");
1318         PrintHex(ch, out_);
1319       }
1320     }
1321     Emit('"');
1322   }
1323 
HandleString8(span<uint8_t> chars)1324   void HandleString8(span<uint8_t> chars) override {
1325     if (!status_->ok())
1326       return;
1327     state_.top().StartElement(out_);
1328     Emit('"');
1329     for (size_t ii = 0; ii < chars.size(); ++ii) {
1330       uint8_t c = chars[ii];
1331       if (c == '"') {
1332         Emit("\\\"");
1333       } else if (c == '\\') {
1334         Emit("\\\\");
1335       } else if (c == '\b') {
1336         Emit("\\b");
1337       } else if (c == '\f') {
1338         Emit("\\f");
1339       } else if (c == '\n') {
1340         Emit("\\n");
1341       } else if (c == '\r') {
1342         Emit("\\r");
1343       } else if (c == '\t') {
1344         Emit("\\t");
1345       } else if (c >= 32 && c <= 126) {
1346         Emit(c);
1347       } else if (c < 32) {
1348         Emit("\\u");
1349         PrintHex(static_cast<uint16_t>(c), out_);
1350       } else {
1351         // Inspect the leading byte to figure out how long the utf8
1352         // byte sequence is; while doing this initialize |codepoint|
1353         // with the first few bits.
1354         // See table in: https://en.wikipedia.org/wiki/UTF-8
1355         // byte one is 110x xxxx -> 2 byte utf8 sequence
1356         // byte one is 1110 xxxx -> 3 byte utf8 sequence
1357         // byte one is 1111 0xxx -> 4 byte utf8 sequence
1358         uint32_t codepoint;
1359         int num_bytes_left;
1360         if ((c & 0xe0) == 0xc0) {  // 2 byte utf8 sequence
1361           num_bytes_left = 1;
1362           codepoint = c & 0x1f;
1363         } else if ((c & 0xf0) == 0xe0) {  // 3 byte utf8 sequence
1364           num_bytes_left = 2;
1365           codepoint = c & 0x0f;
1366         } else if ((c & 0xf8) == 0xf0) {  // 4 byte utf8 sequence
1367           codepoint = c & 0x07;
1368           num_bytes_left = 3;
1369         } else {
1370           continue;  // invalid leading byte
1371         }
1372 
1373         // If we have enough bytes in our input, decode the remaining ones
1374         // belonging to this Unicode character into |codepoint|.
1375         if (ii + num_bytes_left > chars.size())
1376           continue;
1377         while (num_bytes_left > 0) {
1378           c = chars[++ii];
1379           --num_bytes_left;
1380           // Check the next byte is a continuation byte, that is 10xx xxxx.
1381           if ((c & 0xc0) != 0x80)
1382             continue;
1383           codepoint = (codepoint << 6) | (c & 0x3f);
1384         }
1385 
1386         // Disallow overlong encodings for ascii characters, as these
1387         // would include " and other characters significant to JSON
1388         // string termination / control.
1389         if (codepoint < 0x7f)
1390           continue;
1391         // Invalid in UTF8, and can't be represented in UTF16 anyway.
1392         if (codepoint > 0x10ffff)
1393           continue;
1394 
1395         // So, now we transcode to UTF16,
1396         // using the math described at https://en.wikipedia.org/wiki/UTF-16,
1397         // for either one or two 16 bit characters.
1398         if (codepoint < 0xffff) {
1399           Emit("\\u");
1400           PrintHex(static_cast<uint16_t>(codepoint), out_);
1401           continue;
1402         }
1403         codepoint -= 0x10000;
1404         // high surrogate
1405         Emit("\\u");
1406         PrintHex(static_cast<uint16_t>((codepoint >> 10) + 0xd800), out_);
1407         // low surrogate
1408         Emit("\\u");
1409         PrintHex(static_cast<uint16_t>((codepoint & 0x3ff) + 0xdc00), out_);
1410       }
1411     }
1412     Emit('"');
1413   }
1414 
HandleBinary(span<uint8_t> bytes)1415   void HandleBinary(span<uint8_t> bytes) override {
1416     if (!status_->ok())
1417       return;
1418     state_.top().StartElement(out_);
1419     Emit('"');
1420     Base64Encode(bytes, out_);
1421     Emit('"');
1422   }
1423 
HandleDouble(double value)1424   void HandleDouble(double value) override {
1425     if (!status_->ok())
1426       return;
1427     state_.top().StartElement(out_);
1428     // JSON cannot represent NaN or Infinity. So, for compatibility,
1429     // we behave like the JSON object in web browsers: emit 'null'.
1430     if (!std::isfinite(value)) {
1431       Emit("null");
1432       return;
1433     }
1434     std::unique_ptr<char[]> str_value = platform_->DToStr(value);
1435 
1436     // DToStr may fail to emit a 0 before the decimal dot. E.g. this is
1437     // the case in base::NumberToString in Chromium (which is based on
1438     // dmg_fp). So, much like
1439     // https://cs.chromium.org/chromium/src/base/json/json_writer.cc
1440     // we probe for this and emit the leading 0 anyway if necessary.
1441     const char* chars = str_value.get();
1442     if (chars[0] == '.') {
1443       Emit('0');
1444     } else if (chars[0] == '-' && chars[1] == '.') {
1445       Emit("-0");
1446       ++chars;
1447     }
1448     Emit(chars);
1449   }
1450 
HandleInt32(int32_t value)1451   void HandleInt32(int32_t value) override {
1452     if (!status_->ok())
1453       return;
1454     state_.top().StartElement(out_);
1455     Emit(std::to_string(value));
1456   }
1457 
HandleBool(bool value)1458   void HandleBool(bool value) override {
1459     if (!status_->ok())
1460       return;
1461     state_.top().StartElement(out_);
1462     Emit(value ? "true" : "false");
1463   }
1464 
HandleNull()1465   void HandleNull() override {
1466     if (!status_->ok())
1467       return;
1468     state_.top().StartElement(out_);
1469     Emit("null");
1470   }
1471 
HandleError(Status error)1472   void HandleError(Status error) override {
1473     assert(!error.ok());
1474     *status_ = error;
1475     out_->clear();
1476   }
1477 
1478  private:
Emit(char c)1479   void Emit(char c) { out_->push_back(c); }
Emit(const char * str)1480   void Emit(const char* str) {
1481     out_->insert(out_->end(), str, str + strlen(str));
1482   }
Emit(const std::string & str)1483   void Emit(const std::string& str) {
1484     out_->insert(out_->end(), str.begin(), str.end());
1485   }
1486 
1487   const Platform* platform_;
1488   C* out_;
1489   Status* status_;
1490   std::stack<State> state_;
1491 };
1492 }  // namespace
1493 
NewJSONEncoder(const Platform * platform,std::vector<uint8_t> * out,Status * status)1494 std::unique_ptr<StreamingParserHandler> NewJSONEncoder(
1495     const Platform* platform,
1496     std::vector<uint8_t>* out,
1497     Status* status) {
1498   return std::unique_ptr<StreamingParserHandler>(
1499       new JSONEncoder<std::vector<uint8_t>>(platform, out, status));
1500 }
NewJSONEncoder(const Platform * platform,std::string * out,Status * status)1501 std::unique_ptr<StreamingParserHandler> NewJSONEncoder(const Platform* platform,
1502                                                        std::string* out,
1503                                                        Status* status) {
1504   return std::unique_ptr<StreamingParserHandler>(
1505       new JSONEncoder<std::string>(platform, out, status));
1506 }
1507 
1508 // =============================================================================
1509 // json::ParseJSON - for receiving streaming parser events for JSON.
1510 // =============================================================================
1511 
1512 namespace {
1513 const int kStackLimit = 300;
1514 
1515 enum Token {
1516   ObjectBegin,
1517   ObjectEnd,
1518   ArrayBegin,
1519   ArrayEnd,
1520   StringLiteral,
1521   Number,
1522   BoolTrue,
1523   BoolFalse,
1524   NullToken,
1525   ListSeparator,
1526   ObjectPairSeparator,
1527   InvalidToken,
1528   NoInput
1529 };
1530 
1531 const char* const kNullString = "null";
1532 const char* const kTrueString = "true";
1533 const char* const kFalseString = "false";
1534 
1535 template <typename Char>
1536 class JsonParser {
1537  public:
JsonParser(const Platform * platform,StreamingParserHandler * handler)1538   JsonParser(const Platform* platform, StreamingParserHandler* handler)
1539       : platform_(platform), handler_(handler) {}
1540 
Parse(const Char * start,size_t length)1541   void Parse(const Char* start, size_t length) {
1542     start_pos_ = start;
1543     const Char* end = start + length;
1544     const Char* tokenEnd = nullptr;
1545     ParseValue(start, end, &tokenEnd, 0);
1546     if (error_)
1547       return;
1548     if (tokenEnd != end) {
1549       HandleError(Error::JSON_PARSER_UNPROCESSED_INPUT_REMAINS, tokenEnd);
1550     }
1551   }
1552 
1553  private:
CharsToDouble(const uint16_t * chars,size_t length,double * result)1554   bool CharsToDouble(const uint16_t* chars, size_t length, double* result) {
1555     std::string buffer;
1556     buffer.reserve(length + 1);
1557     for (size_t ii = 0; ii < length; ++ii) {
1558       bool is_ascii = !(chars[ii] & ~0x7F);
1559       if (!is_ascii)
1560         return false;
1561       buffer.push_back(static_cast<char>(chars[ii]));
1562     }
1563     return platform_->StrToD(buffer.c_str(), result);
1564   }
1565 
CharsToDouble(const uint8_t * chars,size_t length,double * result)1566   bool CharsToDouble(const uint8_t* chars, size_t length, double* result) {
1567     std::string buffer(reinterpret_cast<const char*>(chars), length);
1568     return platform_->StrToD(buffer.c_str(), result);
1569   }
1570 
ParseConstToken(const Char * start,const Char * end,const Char ** token_end,const char * token)1571   static bool ParseConstToken(const Char* start,
1572                               const Char* end,
1573                               const Char** token_end,
1574                               const char* token) {
1575     // |token| is \0 terminated, it's one of the constants at top of the file.
1576     while (start < end && *token != '\0' && *start++ == *token++) {
1577     }
1578     if (*token != '\0')
1579       return false;
1580     *token_end = start;
1581     return true;
1582   }
1583 
ReadInt(const Char * start,const Char * end,const Char ** token_end,bool allow_leading_zeros)1584   static bool ReadInt(const Char* start,
1585                       const Char* end,
1586                       const Char** token_end,
1587                       bool allow_leading_zeros) {
1588     if (start == end)
1589       return false;
1590     bool has_leading_zero = '0' == *start;
1591     int length = 0;
1592     while (start < end && '0' <= *start && *start <= '9') {
1593       ++start;
1594       ++length;
1595     }
1596     if (!length)
1597       return false;
1598     if (!allow_leading_zeros && length > 1 && has_leading_zero)
1599       return false;
1600     *token_end = start;
1601     return true;
1602   }
1603 
ParseNumberToken(const Char * start,const Char * end,const Char ** token_end)1604   static bool ParseNumberToken(const Char* start,
1605                                const Char* end,
1606                                const Char** token_end) {
1607     // We just grab the number here. We validate the size in DecodeNumber.
1608     // According to RFC4627, a valid number is: [minus] int [frac] [exp]
1609     if (start == end)
1610       return false;
1611     Char c = *start;
1612     if ('-' == c)
1613       ++start;
1614 
1615     if (!ReadInt(start, end, &start, /*allow_leading_zeros=*/false))
1616       return false;
1617     if (start == end) {
1618       *token_end = start;
1619       return true;
1620     }
1621 
1622     // Optional fraction part
1623     c = *start;
1624     if ('.' == c) {
1625       ++start;
1626       if (!ReadInt(start, end, &start, /*allow_leading_zeros=*/true))
1627         return false;
1628       if (start == end) {
1629         *token_end = start;
1630         return true;
1631       }
1632       c = *start;
1633     }
1634 
1635     // Optional exponent part
1636     if ('e' == c || 'E' == c) {
1637       ++start;
1638       if (start == end)
1639         return false;
1640       c = *start;
1641       if ('-' == c || '+' == c) {
1642         ++start;
1643         if (start == end)
1644           return false;
1645       }
1646       if (!ReadInt(start, end, &start, /*allow_leading_zeros=*/true))
1647         return false;
1648     }
1649 
1650     *token_end = start;
1651     return true;
1652   }
1653 
ReadHexDigits(const Char * start,const Char * end,const Char ** token_end,int digits)1654   static bool ReadHexDigits(const Char* start,
1655                             const Char* end,
1656                             const Char** token_end,
1657                             int digits) {
1658     if (end - start < digits)
1659       return false;
1660     for (int i = 0; i < digits; ++i) {
1661       Char c = *start++;
1662       if (!(('0' <= c && c <= '9') || ('a' <= c && c <= 'f') ||
1663             ('A' <= c && c <= 'F')))
1664         return false;
1665     }
1666     *token_end = start;
1667     return true;
1668   }
1669 
ParseStringToken(const Char * start,const Char * end,const Char ** token_end)1670   static bool ParseStringToken(const Char* start,
1671                                const Char* end,
1672                                const Char** token_end) {
1673     while (start < end) {
1674       Char c = *start++;
1675       if ('\\' == c) {
1676         if (start == end)
1677           return false;
1678         c = *start++;
1679         // Make sure the escaped char is valid.
1680         switch (c) {
1681           case 'x':
1682             if (!ReadHexDigits(start, end, &start, 2))
1683               return false;
1684             break;
1685           case 'u':
1686             if (!ReadHexDigits(start, end, &start, 4))
1687               return false;
1688             break;
1689           case '\\':
1690           case '/':
1691           case 'b':
1692           case 'f':
1693           case 'n':
1694           case 'r':
1695           case 't':
1696           case 'v':
1697           case '"':
1698             break;
1699           default:
1700             return false;
1701         }
1702       } else if ('"' == c) {
1703         *token_end = start;
1704         return true;
1705       }
1706     }
1707     return false;
1708   }
1709 
SkipComment(const Char * start,const Char * end,const Char ** comment_end)1710   static bool SkipComment(const Char* start,
1711                           const Char* end,
1712                           const Char** comment_end) {
1713     if (start == end)
1714       return false;
1715 
1716     if (*start != '/' || start + 1 >= end)
1717       return false;
1718     ++start;
1719 
1720     if (*start == '/') {
1721       // Single line comment, read to newline.
1722       for (++start; start < end; ++start) {
1723         if (*start == '\n' || *start == '\r') {
1724           *comment_end = start + 1;
1725           return true;
1726         }
1727       }
1728       *comment_end = end;
1729       // Comment reaches end-of-input, which is fine.
1730       return true;
1731     }
1732 
1733     if (*start == '*') {
1734       Char previous = '\0';
1735       // Block comment, read until end marker.
1736       for (++start; start < end; previous = *start++) {
1737         if (previous == '*' && *start == '/') {
1738           *comment_end = start + 1;
1739           return true;
1740         }
1741       }
1742       // Block comment must close before end-of-input.
1743       return false;
1744     }
1745 
1746     return false;
1747   }
1748 
IsSpaceOrNewLine(Char c)1749   static bool IsSpaceOrNewLine(Char c) {
1750     // \v = vertial tab; \f = form feed page break.
1751     return c == ' ' || c == '\n' || c == '\v' || c == '\f' || c == '\r' ||
1752            c == '\t';
1753   }
1754 
SkipWhitespaceAndComments(const Char * start,const Char * end,const Char ** whitespace_end)1755   static void SkipWhitespaceAndComments(const Char* start,
1756                                         const Char* end,
1757                                         const Char** whitespace_end) {
1758     while (start < end) {
1759       if (IsSpaceOrNewLine(*start)) {
1760         ++start;
1761       } else if (*start == '/') {
1762         const Char* comment_end = nullptr;
1763         if (!SkipComment(start, end, &comment_end))
1764           break;
1765         start = comment_end;
1766       } else {
1767         break;
1768       }
1769     }
1770     *whitespace_end = start;
1771   }
1772 
ParseToken(const Char * start,const Char * end,const Char ** tokenStart,const Char ** token_end)1773   static Token ParseToken(const Char* start,
1774                           const Char* end,
1775                           const Char** tokenStart,
1776                           const Char** token_end) {
1777     SkipWhitespaceAndComments(start, end, tokenStart);
1778     start = *tokenStart;
1779 
1780     if (start == end)
1781       return NoInput;
1782 
1783     switch (*start) {
1784       case 'n':
1785         if (ParseConstToken(start, end, token_end, kNullString))
1786           return NullToken;
1787         break;
1788       case 't':
1789         if (ParseConstToken(start, end, token_end, kTrueString))
1790           return BoolTrue;
1791         break;
1792       case 'f':
1793         if (ParseConstToken(start, end, token_end, kFalseString))
1794           return BoolFalse;
1795         break;
1796       case '[':
1797         *token_end = start + 1;
1798         return ArrayBegin;
1799       case ']':
1800         *token_end = start + 1;
1801         return ArrayEnd;
1802       case ',':
1803         *token_end = start + 1;
1804         return ListSeparator;
1805       case '{':
1806         *token_end = start + 1;
1807         return ObjectBegin;
1808       case '}':
1809         *token_end = start + 1;
1810         return ObjectEnd;
1811       case ':':
1812         *token_end = start + 1;
1813         return ObjectPairSeparator;
1814       case '0':
1815       case '1':
1816       case '2':
1817       case '3':
1818       case '4':
1819       case '5':
1820       case '6':
1821       case '7':
1822       case '8':
1823       case '9':
1824       case '-':
1825         if (ParseNumberToken(start, end, token_end))
1826           return Number;
1827         break;
1828       case '"':
1829         if (ParseStringToken(start + 1, end, token_end))
1830           return StringLiteral;
1831         break;
1832     }
1833     return InvalidToken;
1834   }
1835 
HexToInt(Char c)1836   static int HexToInt(Char c) {
1837     if ('0' <= c && c <= '9')
1838       return c - '0';
1839     if ('A' <= c && c <= 'F')
1840       return c - 'A' + 10;
1841     if ('a' <= c && c <= 'f')
1842       return c - 'a' + 10;
1843     assert(false);  // Unreachable.
1844     return 0;
1845   }
1846 
DecodeString(const Char * start,const Char * end,std::vector<uint16_t> * output)1847   static bool DecodeString(const Char* start,
1848                            const Char* end,
1849                            std::vector<uint16_t>* output) {
1850     if (start == end)
1851       return true;
1852     if (start > end)
1853       return false;
1854     output->reserve(end - start);
1855     while (start < end) {
1856       uint16_t c = *start++;
1857       // If the |Char| we're dealing with is really a byte, then
1858       // we have utf8 here, and we need to check for multibyte characters
1859       // and transcode them to utf16 (either one or two utf16 chars).
1860       if (sizeof(Char) == sizeof(uint8_t) && c > 0x7f) {
1861         // Inspect the leading byte to figure out how long the utf8
1862         // byte sequence is; while doing this initialize |codepoint|
1863         // with the first few bits.
1864         // See table in: https://en.wikipedia.org/wiki/UTF-8
1865         // byte one is 110x xxxx -> 2 byte utf8 sequence
1866         // byte one is 1110 xxxx -> 3 byte utf8 sequence
1867         // byte one is 1111 0xxx -> 4 byte utf8 sequence
1868         uint32_t codepoint;
1869         int num_bytes_left;
1870         if ((c & 0xe0) == 0xc0) {  // 2 byte utf8 sequence
1871           num_bytes_left = 1;
1872           codepoint = c & 0x1f;
1873         } else if ((c & 0xf0) == 0xe0) {  // 3 byte utf8 sequence
1874           num_bytes_left = 2;
1875           codepoint = c & 0x0f;
1876         } else if ((c & 0xf8) == 0xf0) {  // 4 byte utf8 sequence
1877           codepoint = c & 0x07;
1878           num_bytes_left = 3;
1879         } else {
1880           return false;  // invalid leading byte
1881         }
1882 
1883         // If we have enough bytes in our inpput, decode the remaining ones
1884         // belonging to this Unicode character into |codepoint|.
1885         if (start + num_bytes_left > end)
1886           return false;
1887         while (num_bytes_left > 0) {
1888           c = *start++;
1889           --num_bytes_left;
1890           // Check the next byte is a continuation byte, that is 10xx xxxx.
1891           if ((c & 0xc0) != 0x80)
1892             return false;
1893           codepoint = (codepoint << 6) | (c & 0x3f);
1894         }
1895 
1896         // Disallow overlong encodings for ascii characters, as these
1897         // would include " and other characters significant to JSON
1898         // string termination / control.
1899         if (codepoint <= 0x7f)
1900           return false;
1901         // Invalid in UTF8, and can't be represented in UTF16 anyway.
1902         if (codepoint > 0x10ffff)
1903           return false;
1904 
1905         // So, now we transcode to UTF16,
1906         // using the math described at https://en.wikipedia.org/wiki/UTF-16,
1907         // for either one or two 16 bit characters.
1908         if (codepoint < 0xffff) {
1909           output->push_back(codepoint);
1910           continue;
1911         }
1912         codepoint -= 0x10000;
1913         output->push_back((codepoint >> 10) + 0xd800);    // high surrogate
1914         output->push_back((codepoint & 0x3ff) + 0xdc00);  // low surrogate
1915         continue;
1916       }
1917       if ('\\' != c) {
1918         output->push_back(c);
1919         continue;
1920       }
1921       if (start == end)
1922         return false;
1923       c = *start++;
1924 
1925       if (c == 'x') {
1926         // \x is not supported.
1927         return false;
1928       }
1929 
1930       switch (c) {
1931         case '"':
1932         case '/':
1933         case '\\':
1934           break;
1935         case 'b':
1936           c = '\b';
1937           break;
1938         case 'f':
1939           c = '\f';
1940           break;
1941         case 'n':
1942           c = '\n';
1943           break;
1944         case 'r':
1945           c = '\r';
1946           break;
1947         case 't':
1948           c = '\t';
1949           break;
1950         case 'v':
1951           c = '\v';
1952           break;
1953         case 'u':
1954           c = (HexToInt(*start) << 12) + (HexToInt(*(start + 1)) << 8) +
1955               (HexToInt(*(start + 2)) << 4) + HexToInt(*(start + 3));
1956           start += 4;
1957           break;
1958         default:
1959           return false;
1960       }
1961       output->push_back(c);
1962     }
1963     return true;
1964   }
1965 
ParseValue(const Char * start,const Char * end,const Char ** value_token_end,int depth)1966   void ParseValue(const Char* start,
1967                   const Char* end,
1968                   const Char** value_token_end,
1969                   int depth) {
1970     if (depth > kStackLimit) {
1971       HandleError(Error::JSON_PARSER_STACK_LIMIT_EXCEEDED, start);
1972       return;
1973     }
1974     const Char* token_start = nullptr;
1975     const Char* token_end = nullptr;
1976     Token token = ParseToken(start, end, &token_start, &token_end);
1977     switch (token) {
1978       case NoInput:
1979         HandleError(Error::JSON_PARSER_NO_INPUT, token_start);
1980         return;
1981       case InvalidToken:
1982         HandleError(Error::JSON_PARSER_INVALID_TOKEN, token_start);
1983         return;
1984       case NullToken:
1985         handler_->HandleNull();
1986         break;
1987       case BoolTrue:
1988         handler_->HandleBool(true);
1989         break;
1990       case BoolFalse:
1991         handler_->HandleBool(false);
1992         break;
1993       case Number: {
1994         double value;
1995         if (!CharsToDouble(token_start, token_end - token_start, &value)) {
1996           HandleError(Error::JSON_PARSER_INVALID_NUMBER, token_start);
1997           return;
1998         }
1999         if (value >= std::numeric_limits<int32_t>::min() &&
2000             value <= std::numeric_limits<int32_t>::max() &&
2001             static_cast<int32_t>(value) == value)
2002           handler_->HandleInt32(static_cast<int32_t>(value));
2003         else
2004           handler_->HandleDouble(value);
2005         break;
2006       }
2007       case StringLiteral: {
2008         std::vector<uint16_t> value;
2009         bool ok = DecodeString(token_start + 1, token_end - 1, &value);
2010         if (!ok) {
2011           HandleError(Error::JSON_PARSER_INVALID_STRING, token_start);
2012           return;
2013         }
2014         handler_->HandleString16(span<uint16_t>(value.data(), value.size()));
2015         break;
2016       }
2017       case ArrayBegin: {
2018         handler_->HandleArrayBegin();
2019         start = token_end;
2020         token = ParseToken(start, end, &token_start, &token_end);
2021         while (token != ArrayEnd) {
2022           ParseValue(start, end, &token_end, depth + 1);
2023           if (error_)
2024             return;
2025 
2026           // After a list value, we expect a comma or the end of the list.
2027           start = token_end;
2028           token = ParseToken(start, end, &token_start, &token_end);
2029           if (token == ListSeparator) {
2030             start = token_end;
2031             token = ParseToken(start, end, &token_start, &token_end);
2032             if (token == ArrayEnd) {
2033               HandleError(Error::JSON_PARSER_UNEXPECTED_ARRAY_END, token_start);
2034               return;
2035             }
2036           } else if (token != ArrayEnd) {
2037             // Unexpected value after list value. Bail out.
2038             HandleError(Error::JSON_PARSER_COMMA_OR_ARRAY_END_EXPECTED,
2039                         token_start);
2040             return;
2041           }
2042         }
2043         handler_->HandleArrayEnd();
2044         break;
2045       }
2046       case ObjectBegin: {
2047         handler_->HandleMapBegin();
2048         start = token_end;
2049         token = ParseToken(start, end, &token_start, &token_end);
2050         while (token != ObjectEnd) {
2051           if (token != StringLiteral) {
2052             HandleError(Error::JSON_PARSER_STRING_LITERAL_EXPECTED,
2053                         token_start);
2054             return;
2055           }
2056           std::vector<uint16_t> key;
2057           if (!DecodeString(token_start + 1, token_end - 1, &key)) {
2058             HandleError(Error::JSON_PARSER_INVALID_STRING, token_start);
2059             return;
2060           }
2061           handler_->HandleString16(span<uint16_t>(key.data(), key.size()));
2062           start = token_end;
2063 
2064           token = ParseToken(start, end, &token_start, &token_end);
2065           if (token != ObjectPairSeparator) {
2066             HandleError(Error::JSON_PARSER_COLON_EXPECTED, token_start);
2067             return;
2068           }
2069           start = token_end;
2070 
2071           ParseValue(start, end, &token_end, depth + 1);
2072           if (error_)
2073             return;
2074           start = token_end;
2075 
2076           // After a key/value pair, we expect a comma or the end of the
2077           // object.
2078           token = ParseToken(start, end, &token_start, &token_end);
2079           if (token == ListSeparator) {
2080             start = token_end;
2081             token = ParseToken(start, end, &token_start, &token_end);
2082             if (token == ObjectEnd) {
2083               HandleError(Error::JSON_PARSER_UNEXPECTED_MAP_END, token_start);
2084               return;
2085             }
2086           } else if (token != ObjectEnd) {
2087             // Unexpected value after last object value. Bail out.
2088             HandleError(Error::JSON_PARSER_COMMA_OR_MAP_END_EXPECTED,
2089                         token_start);
2090             return;
2091           }
2092         }
2093         handler_->HandleMapEnd();
2094         break;
2095       }
2096 
2097       default:
2098         // We got a token that's not a value.
2099         HandleError(Error::JSON_PARSER_VALUE_EXPECTED, token_start);
2100         return;
2101     }
2102 
2103     SkipWhitespaceAndComments(token_end, end, value_token_end);
2104   }
2105 
HandleError(Error error,const Char * pos)2106   void HandleError(Error error, const Char* pos) {
2107     assert(error != Error::OK);
2108     if (!error_) {
2109       handler_->HandleError(
2110           Status{error, static_cast<size_t>(pos - start_pos_)});
2111       error_ = true;
2112     }
2113   }
2114 
2115   const Char* start_pos_ = nullptr;
2116   bool error_ = false;
2117   const Platform* platform_;
2118   StreamingParserHandler* handler_;
2119 };
2120 }  // namespace
2121 
ParseJSON(const Platform & platform,span<uint8_t> chars,StreamingParserHandler * handler)2122 void ParseJSON(const Platform& platform,
2123                span<uint8_t> chars,
2124                StreamingParserHandler* handler) {
2125   JsonParser<uint8_t> parser(&platform, handler);
2126   parser.Parse(chars.data(), chars.size());
2127 }
2128 
ParseJSON(const Platform & platform,span<uint16_t> chars,StreamingParserHandler * handler)2129 void ParseJSON(const Platform& platform,
2130                span<uint16_t> chars,
2131                StreamingParserHandler* handler) {
2132   JsonParser<uint16_t> parser(&platform, handler);
2133   parser.Parse(chars.data(), chars.size());
2134 }
2135 
2136 // =============================================================================
2137 // json::ConvertCBORToJSON, json::ConvertJSONToCBOR - for transcoding
2138 // =============================================================================
2139 template <typename C>
ConvertCBORToJSONTmpl(const Platform & platform,span<uint8_t> cbor,C * json)2140 Status ConvertCBORToJSONTmpl(const Platform& platform,
2141                              span<uint8_t> cbor,
2142                              C* json) {
2143   Status status;
2144   std::unique_ptr<StreamingParserHandler> json_writer =
2145       NewJSONEncoder(&platform, json, &status);
2146   cbor::ParseCBOR(cbor, json_writer.get());
2147   return status;
2148 }
2149 
ConvertCBORToJSON(const Platform & platform,span<uint8_t> cbor,std::vector<uint8_t> * json)2150 Status ConvertCBORToJSON(const Platform& platform,
2151                          span<uint8_t> cbor,
2152                          std::vector<uint8_t>* json) {
2153   return ConvertCBORToJSONTmpl(platform, cbor, json);
2154 }
ConvertCBORToJSON(const Platform & platform,span<uint8_t> cbor,std::string * json)2155 Status ConvertCBORToJSON(const Platform& platform,
2156                          span<uint8_t> cbor,
2157                          std::string* json) {
2158   return ConvertCBORToJSONTmpl(platform, cbor, json);
2159 }
2160 
2161 template <typename T, typename C>
ConvertJSONToCBORTmpl(const Platform & platform,span<T> json,C * cbor)2162 Status ConvertJSONToCBORTmpl(const Platform& platform, span<T> json, C* cbor) {
2163   Status status;
2164   std::unique_ptr<StreamingParserHandler> encoder =
2165       cbor::NewCBOREncoder(cbor, &status);
2166   ParseJSON(platform, json, encoder.get());
2167   return status;
2168 }
ConvertJSONToCBOR(const Platform & platform,span<uint8_t> json,std::string * cbor)2169 Status ConvertJSONToCBOR(const Platform& platform,
2170                          span<uint8_t> json,
2171                          std::string* cbor) {
2172   return ConvertJSONToCBORTmpl(platform, json, cbor);
2173 }
ConvertJSONToCBOR(const Platform & platform,span<uint16_t> json,std::string * cbor)2174 Status ConvertJSONToCBOR(const Platform& platform,
2175                          span<uint16_t> json,
2176                          std::string* cbor) {
2177   return ConvertJSONToCBORTmpl(platform, json, cbor);
2178 }
ConvertJSONToCBOR(const Platform & platform,span<uint8_t> json,std::vector<uint8_t> * cbor)2179 Status ConvertJSONToCBOR(const Platform& platform,
2180                          span<uint8_t> json,
2181                          std::vector<uint8_t>* cbor) {
2182   return ConvertJSONToCBORTmpl(platform, json, cbor);
2183 }
ConvertJSONToCBOR(const Platform & platform,span<uint16_t> json,std::vector<uint8_t> * cbor)2184 Status ConvertJSONToCBOR(const Platform& platform,
2185                          span<uint16_t> json,
2186                          std::vector<uint8_t>* cbor) {
2187   return ConvertJSONToCBORTmpl(platform, json, cbor);
2188 }
2189 }  // namespace json
2190 }  // namespace v8_inspector_protocol_encoding
2191