• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2019 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "encoding.h"
6 
7 #include <algorithm>
8 #include <cassert>
9 #include <cmath>
10 #include <cstring>
11 #include <limits>
12 #include <stack>
13 
14 namespace v8_inspector_protocol_encoding {
15 // =============================================================================
16 // Status and Error codes
17 // =============================================================================
18 
ToASCIIString() const19 std::string Status::ToASCIIString() const {
20   switch (error) {
21     case Error::OK:
22       return "OK";
23     case Error::JSON_PARSER_UNPROCESSED_INPUT_REMAINS:
24       return ToASCIIString("JSON: unprocessed input remains");
25     case Error::JSON_PARSER_STACK_LIMIT_EXCEEDED:
26       return ToASCIIString("JSON: stack limit exceeded");
27     case Error::JSON_PARSER_NO_INPUT:
28       return ToASCIIString("JSON: no input");
29     case Error::JSON_PARSER_INVALID_TOKEN:
30       return ToASCIIString("JSON: invalid token");
31     case Error::JSON_PARSER_INVALID_NUMBER:
32       return ToASCIIString("JSON: invalid number");
33     case Error::JSON_PARSER_INVALID_STRING:
34       return ToASCIIString("JSON: invalid string");
35     case Error::JSON_PARSER_UNEXPECTED_ARRAY_END:
36       return ToASCIIString("JSON: unexpected array end");
37     case Error::JSON_PARSER_COMMA_OR_ARRAY_END_EXPECTED:
38       return ToASCIIString("JSON: comma or array end expected");
39     case Error::JSON_PARSER_STRING_LITERAL_EXPECTED:
40       return ToASCIIString("JSON: string literal expected");
41     case Error::JSON_PARSER_COLON_EXPECTED:
42       return ToASCIIString("JSON: colon expected");
43     case Error::JSON_PARSER_UNEXPECTED_MAP_END:
44       return ToASCIIString("JSON: unexpected map end");
45     case Error::JSON_PARSER_COMMA_OR_MAP_END_EXPECTED:
46       return ToASCIIString("JSON: comma or map end expected");
47     case Error::JSON_PARSER_VALUE_EXPECTED:
48       return ToASCIIString("JSON: value expected");
49 
50     case Error::CBOR_INVALID_INT32:
51       return ToASCIIString("CBOR: invalid int32");
52     case Error::CBOR_INVALID_DOUBLE:
53       return ToASCIIString("CBOR: invalid double");
54     case Error::CBOR_INVALID_ENVELOPE:
55       return ToASCIIString("CBOR: invalid envelope");
56     case Error::CBOR_INVALID_STRING8:
57       return ToASCIIString("CBOR: invalid string8");
58     case Error::CBOR_INVALID_STRING16:
59       return ToASCIIString("CBOR: invalid string16");
60     case Error::CBOR_INVALID_BINARY:
61       return ToASCIIString("CBOR: invalid binary");
62     case Error::CBOR_UNSUPPORTED_VALUE:
63       return ToASCIIString("CBOR: unsupported value");
64     case Error::CBOR_NO_INPUT:
65       return ToASCIIString("CBOR: no input");
66     case Error::CBOR_INVALID_START_BYTE:
67       return ToASCIIString("CBOR: invalid start byte");
68     case Error::CBOR_UNEXPECTED_EOF_EXPECTED_VALUE:
69       return ToASCIIString("CBOR: unexpected eof expected value");
70     case Error::CBOR_UNEXPECTED_EOF_IN_ARRAY:
71       return ToASCIIString("CBOR: unexpected eof in array");
72     case Error::CBOR_UNEXPECTED_EOF_IN_MAP:
73       return ToASCIIString("CBOR: unexpected eof in map");
74     case Error::CBOR_INVALID_MAP_KEY:
75       return ToASCIIString("CBOR: invalid map key");
76     case Error::CBOR_STACK_LIMIT_EXCEEDED:
77       return ToASCIIString("CBOR: stack limit exceeded");
78     case Error::CBOR_TRAILING_JUNK:
79       return ToASCIIString("CBOR: trailing junk");
80     case Error::CBOR_MAP_START_EXPECTED:
81       return ToASCIIString("CBOR: map start expected");
82     case Error::CBOR_MAP_STOP_EXPECTED:
83       return ToASCIIString("CBOR: map stop expected");
84     case Error::CBOR_ENVELOPE_SIZE_LIMIT_EXCEEDED:
85       return ToASCIIString("CBOR: envelope size limit exceeded");
86   }
87   // Some compilers can't figure out that we can't get here.
88   return "INVALID ERROR CODE";
89 }
90 
ToASCIIString(const char * msg) const91 std::string Status::ToASCIIString(const char* msg) const {
92   return std::string(msg) + " at position " + std::to_string(pos);
93 }
94 
95 namespace cbor {
96 namespace {
97 // Indicates the number of bits the "initial byte" needs to be shifted to the
98 // right after applying |kMajorTypeMask| to produce the major type in the
99 // lowermost bits.
100 static constexpr uint8_t kMajorTypeBitShift = 5u;
101 // Mask selecting the low-order 5 bits of the "initial byte", which is where
102 // the additional information is encoded.
103 static constexpr uint8_t kAdditionalInformationMask = 0x1f;
104 // Mask selecting the high-order 3 bits of the "initial byte", which indicates
105 // the major type of the encoded value.
106 static constexpr uint8_t kMajorTypeMask = 0xe0;
107 // Indicates the integer is in the following byte.
108 static constexpr uint8_t kAdditionalInformation1Byte = 24u;
109 // Indicates the integer is in the next 2 bytes.
110 static constexpr uint8_t kAdditionalInformation2Bytes = 25u;
111 // Indicates the integer is in the next 4 bytes.
112 static constexpr uint8_t kAdditionalInformation4Bytes = 26u;
113 // Indicates the integer is in the next 8 bytes.
114 static constexpr uint8_t kAdditionalInformation8Bytes = 27u;
115 
116 // Encodes the initial byte, consisting of the |type| in the first 3 bits
117 // followed by 5 bits of |additional_info|.
EncodeInitialByte(MajorType type,uint8_t additional_info)118 constexpr uint8_t EncodeInitialByte(MajorType type, uint8_t additional_info) {
119   return (static_cast<uint8_t>(type) << kMajorTypeBitShift) |
120          (additional_info & kAdditionalInformationMask);
121 }
122 
123 // TAG 24 indicates that what follows is a byte string which is
124 // encoded in CBOR format. We use this as a wrapper for
125 // maps and arrays, allowing us to skip them, because the
126 // byte string carries its size (byte length).
127 // https://tools.ietf.org/html/rfc7049#section-2.4.4.1
128 static constexpr uint8_t kInitialByteForEnvelope =
129     EncodeInitialByte(MajorType::TAG, 24);
130 // The initial byte for a byte string with at most 2^32 bytes
131 // of payload. This is used for envelope encoding, even if
132 // the byte string is shorter.
133 static constexpr uint8_t kInitialByteFor32BitLengthByteString =
134     EncodeInitialByte(MajorType::BYTE_STRING, 26);
135 
136 // See RFC 7049 Section 2.2.1, indefinite length arrays / maps have additional
137 // info = 31.
138 static constexpr uint8_t kInitialByteIndefiniteLengthArray =
139     EncodeInitialByte(MajorType::ARRAY, 31);
140 static constexpr uint8_t kInitialByteIndefiniteLengthMap =
141     EncodeInitialByte(MajorType::MAP, 31);
142 // See RFC 7049 Section 2.3, Table 1; this is used for finishing indefinite
143 // length maps / arrays.
144 static constexpr uint8_t kStopByte =
145     EncodeInitialByte(MajorType::SIMPLE_VALUE, 31);
146 
147 // See RFC 7049 Section 2.3, Table 2.
148 static constexpr uint8_t kEncodedTrue =
149     EncodeInitialByte(MajorType::SIMPLE_VALUE, 21);
150 static constexpr uint8_t kEncodedFalse =
151     EncodeInitialByte(MajorType::SIMPLE_VALUE, 20);
152 static constexpr uint8_t kEncodedNull =
153     EncodeInitialByte(MajorType::SIMPLE_VALUE, 22);
154 static constexpr uint8_t kInitialByteForDouble =
155     EncodeInitialByte(MajorType::SIMPLE_VALUE, 27);
156 
157 // See RFC 7049 Table 3 and Section 2.4.4.2. This is used as a prefix for
158 // arbitrary binary data encoded as BYTE_STRING.
159 static constexpr uint8_t kExpectedConversionToBase64Tag =
160     EncodeInitialByte(MajorType::TAG, 22);
161 
162 // Writes the bytes for |v| to |out|, starting with the most significant byte.
163 // See also: https://commandcenter.blogspot.com/2012/04/byte-order-fallacy.html
164 template <typename T, class C>
WriteBytesMostSignificantByteFirst(T v,C * out)165 void WriteBytesMostSignificantByteFirst(T v, C* out) {
166   for (int shift_bytes = sizeof(T) - 1; shift_bytes >= 0; --shift_bytes)
167     out->push_back(0xff & (v >> (shift_bytes * 8)));
168 }
169 
170 // Extracts sizeof(T) bytes from |in| to extract a value of type T
171 // (e.g. uint64_t, uint32_t, ...), most significant byte first.
172 // See also: https://commandcenter.blogspot.com/2012/04/byte-order-fallacy.html
173 template <typename T>
ReadBytesMostSignificantByteFirst(span<uint8_t> in)174 T ReadBytesMostSignificantByteFirst(span<uint8_t> in) {
175   assert(in.size() >= sizeof(T));
176   T result = 0;
177   for (size_t shift_bytes = 0; shift_bytes < sizeof(T); ++shift_bytes)
178     result |= T(in[sizeof(T) - 1 - shift_bytes]) << (shift_bytes * 8);
179   return result;
180 }
181 }  // namespace
182 
183 namespace internals {
184 // Reads the start of a token with definitive size from |bytes|.
185 // |type| is the major type as specified in RFC 7049 Section 2.1.
186 // |value| is the payload (e.g. for MajorType::UNSIGNED) or is the size
187 // (e.g. for BYTE_STRING).
188 // If successful, returns the number of bytes read. Otherwise returns -1.
189 // TODO(johannes): change return type to size_t and use 0 for error.
ReadTokenStart(span<uint8_t> bytes,MajorType * type,uint64_t * value)190 int8_t ReadTokenStart(span<uint8_t> bytes, MajorType* type, uint64_t* value) {
191   if (bytes.empty())
192     return -1;
193   uint8_t initial_byte = bytes[0];
194   *type = MajorType((initial_byte & kMajorTypeMask) >> kMajorTypeBitShift);
195 
196   uint8_t additional_information = initial_byte & kAdditionalInformationMask;
197   if (additional_information < 24) {
198     // Values 0-23 are encoded directly into the additional info of the
199     // initial byte.
200     *value = additional_information;
201     return 1;
202   }
203   if (additional_information == kAdditionalInformation1Byte) {
204     // Values 24-255 are encoded with one initial byte, followed by the value.
205     if (bytes.size() < 2)
206       return -1;
207     *value = ReadBytesMostSignificantByteFirst<uint8_t>(bytes.subspan(1));
208     return 2;
209   }
210   if (additional_information == kAdditionalInformation2Bytes) {
211     // Values 256-65535: 1 initial byte + 2 bytes payload.
212     if (bytes.size() < 1 + sizeof(uint16_t))
213       return -1;
214     *value = ReadBytesMostSignificantByteFirst<uint16_t>(bytes.subspan(1));
215     return 3;
216   }
217   if (additional_information == kAdditionalInformation4Bytes) {
218     // 32 bit uint: 1 initial byte + 4 bytes payload.
219     if (bytes.size() < 1 + sizeof(uint32_t))
220       return -1;
221     *value = ReadBytesMostSignificantByteFirst<uint32_t>(bytes.subspan(1));
222     return 5;
223   }
224   if (additional_information == kAdditionalInformation8Bytes) {
225     // 64 bit uint: 1 initial byte + 8 bytes payload.
226     if (bytes.size() < 1 + sizeof(uint64_t))
227       return -1;
228     *value = ReadBytesMostSignificantByteFirst<uint64_t>(bytes.subspan(1));
229     return 9;
230   }
231   return -1;
232 }
233 
234 // Writes the start of a token with |type|. The |value| may indicate the size,
235 // or it may be the payload if the value is an unsigned integer.
236 template <typename C>
WriteTokenStartTmpl(MajorType type,uint64_t value,C * encoded)237 void WriteTokenStartTmpl(MajorType type, uint64_t value, C* encoded) {
238   if (value < 24) {
239     // Values 0-23 are encoded directly into the additional info of the
240     // initial byte.
241     encoded->push_back(EncodeInitialByte(type, /*additional_info=*/value));
242     return;
243   }
244   if (value <= std::numeric_limits<uint8_t>::max()) {
245     // Values 24-255 are encoded with one initial byte, followed by the value.
246     encoded->push_back(EncodeInitialByte(type, kAdditionalInformation1Byte));
247     encoded->push_back(value);
248     return;
249   }
250   if (value <= std::numeric_limits<uint16_t>::max()) {
251     // Values 256-65535: 1 initial byte + 2 bytes payload.
252     encoded->push_back(EncodeInitialByte(type, kAdditionalInformation2Bytes));
253     WriteBytesMostSignificantByteFirst<uint16_t>(value, encoded);
254     return;
255   }
256   if (value <= std::numeric_limits<uint32_t>::max()) {
257     // 32 bit uint: 1 initial byte + 4 bytes payload.
258     encoded->push_back(EncodeInitialByte(type, kAdditionalInformation4Bytes));
259     WriteBytesMostSignificantByteFirst<uint32_t>(static_cast<uint32_t>(value),
260                                                  encoded);
261     return;
262   }
263   // 64 bit uint: 1 initial byte + 8 bytes payload.
264   encoded->push_back(EncodeInitialByte(type, kAdditionalInformation8Bytes));
265   WriteBytesMostSignificantByteFirst<uint64_t>(value, encoded);
266 }
WriteTokenStart(MajorType type,uint64_t value,std::vector<uint8_t> * encoded)267 void WriteTokenStart(MajorType type,
268                      uint64_t value,
269                      std::vector<uint8_t>* encoded) {
270   WriteTokenStartTmpl(type, value, encoded);
271 }
WriteTokenStart(MajorType type,uint64_t value,std::string * encoded)272 void WriteTokenStart(MajorType type, uint64_t value, std::string* encoded) {
273   WriteTokenStartTmpl(type, value, encoded);
274 }
275 }  // namespace internals
276 
277 // =============================================================================
278 // Detecting CBOR content
279 // =============================================================================
280 
InitialByteForEnvelope()281 uint8_t InitialByteForEnvelope() {
282   return kInitialByteForEnvelope;
283 }
InitialByteFor32BitLengthByteString()284 uint8_t InitialByteFor32BitLengthByteString() {
285   return kInitialByteFor32BitLengthByteString;
286 }
IsCBORMessage(span<uint8_t> msg)287 bool IsCBORMessage(span<uint8_t> msg) {
288   return msg.size() >= 6 && msg[0] == InitialByteForEnvelope() &&
289          msg[1] == InitialByteFor32BitLengthByteString();
290 }
291 
292 // =============================================================================
293 // Encoding invidiual CBOR items
294 // =============================================================================
295 
EncodeTrue()296 uint8_t EncodeTrue() {
297   return kEncodedTrue;
298 }
EncodeFalse()299 uint8_t EncodeFalse() {
300   return kEncodedFalse;
301 }
EncodeNull()302 uint8_t EncodeNull() {
303   return kEncodedNull;
304 }
305 
EncodeIndefiniteLengthArrayStart()306 uint8_t EncodeIndefiniteLengthArrayStart() {
307   return kInitialByteIndefiniteLengthArray;
308 }
309 
EncodeIndefiniteLengthMapStart()310 uint8_t EncodeIndefiniteLengthMapStart() {
311   return kInitialByteIndefiniteLengthMap;
312 }
313 
EncodeStop()314 uint8_t EncodeStop() {
315   return kStopByte;
316 }
317 
318 template <typename C>
EncodeInt32Tmpl(int32_t value,C * out)319 void EncodeInt32Tmpl(int32_t value, C* out) {
320   if (value >= 0) {
321     internals::WriteTokenStart(MajorType::UNSIGNED, value, out);
322   } else {
323     uint64_t representation = static_cast<uint64_t>(-(value + 1));
324     internals::WriteTokenStart(MajorType::NEGATIVE, representation, out);
325   }
326 }
EncodeInt32(int32_t value,std::vector<uint8_t> * out)327 void EncodeInt32(int32_t value, std::vector<uint8_t>* out) {
328   EncodeInt32Tmpl(value, out);
329 }
EncodeInt32(int32_t value,std::string * out)330 void EncodeInt32(int32_t value, std::string* out) {
331   EncodeInt32Tmpl(value, out);
332 }
333 
334 template <typename C>
EncodeString16Tmpl(span<uint16_t> in,C * out)335 void EncodeString16Tmpl(span<uint16_t> in, C* out) {
336   uint64_t byte_length = static_cast<uint64_t>(in.size_bytes());
337   internals::WriteTokenStart(MajorType::BYTE_STRING, byte_length, out);
338   // When emitting UTF16 characters, we always write the least significant byte
339   // first; this is because it's the native representation for X86.
340   // TODO(johannes): Implement a more efficient thing here later, e.g.
341   // casting *iff* the machine has this byte order.
342   // The wire format for UTF16 chars will probably remain the same
343   // (least significant byte first) since this way we can have
344   // golden files, unittests, etc. that port easily and universally.
345   // See also:
346   // https://commandcenter.blogspot.com/2012/04/byte-order-fallacy.html
347   for (const uint16_t two_bytes : in) {
348     out->push_back(two_bytes);
349     out->push_back(two_bytes >> 8);
350   }
351 }
EncodeString16(span<uint16_t> in,std::vector<uint8_t> * out)352 void EncodeString16(span<uint16_t> in, std::vector<uint8_t>* out) {
353   EncodeString16Tmpl(in, out);
354 }
EncodeString16(span<uint16_t> in,std::string * out)355 void EncodeString16(span<uint16_t> in, std::string* out) {
356   EncodeString16Tmpl(in, out);
357 }
358 
359 template <typename C>
EncodeString8Tmpl(span<uint8_t> in,C * out)360 void EncodeString8Tmpl(span<uint8_t> in, C* out) {
361   internals::WriteTokenStart(MajorType::STRING,
362                              static_cast<uint64_t>(in.size_bytes()), out);
363   out->insert(out->end(), in.begin(), in.end());
364 }
EncodeString8(span<uint8_t> in,std::vector<uint8_t> * out)365 void EncodeString8(span<uint8_t> in, std::vector<uint8_t>* out) {
366   EncodeString8Tmpl(in, out);
367 }
EncodeString8(span<uint8_t> in,std::string * out)368 void EncodeString8(span<uint8_t> in, std::string* out) {
369   EncodeString8Tmpl(in, out);
370 }
371 
372 template <typename C>
EncodeFromLatin1Tmpl(span<uint8_t> latin1,C * out)373 void EncodeFromLatin1Tmpl(span<uint8_t> latin1, C* out) {
374   for (size_t ii = 0; ii < latin1.size(); ++ii) {
375     if (latin1[ii] <= 127)
376       continue;
377     // If there's at least one non-ASCII char, convert to UTF8.
378     std::vector<uint8_t> utf8(latin1.begin(), latin1.begin() + ii);
379     for (; ii < latin1.size(); ++ii) {
380       if (latin1[ii] <= 127) {
381         utf8.push_back(latin1[ii]);
382       } else {
383         // 0xC0 means it's a UTF8 sequence with 2 bytes.
384         utf8.push_back((latin1[ii] >> 6) | 0xc0);
385         utf8.push_back((latin1[ii] | 0x80) & 0xbf);
386       }
387     }
388     EncodeString8(SpanFrom(utf8), out);
389     return;
390   }
391   EncodeString8(latin1, out);
392 }
EncodeFromLatin1(span<uint8_t> latin1,std::vector<uint8_t> * out)393 void EncodeFromLatin1(span<uint8_t> latin1, std::vector<uint8_t>* out) {
394   EncodeFromLatin1Tmpl(latin1, out);
395 }
EncodeFromLatin1(span<uint8_t> latin1,std::string * out)396 void EncodeFromLatin1(span<uint8_t> latin1, std::string* out) {
397   EncodeFromLatin1Tmpl(latin1, out);
398 }
399 
400 template <typename C>
EncodeFromUTF16Tmpl(span<uint16_t> utf16,C * out)401 void EncodeFromUTF16Tmpl(span<uint16_t> utf16, C* out) {
402   // If there's at least one non-ASCII char, encode as STRING16 (UTF16).
403   for (uint16_t ch : utf16) {
404     if (ch <= 127)
405       continue;
406     EncodeString16(utf16, out);
407     return;
408   }
409   // It's all US-ASCII, strip out every second byte and encode as UTF8.
410   internals::WriteTokenStart(MajorType::STRING,
411                              static_cast<uint64_t>(utf16.size()), out);
412   out->insert(out->end(), utf16.begin(), utf16.end());
413 }
EncodeFromUTF16(span<uint16_t> utf16,std::vector<uint8_t> * out)414 void EncodeFromUTF16(span<uint16_t> utf16, std::vector<uint8_t>* out) {
415   EncodeFromUTF16Tmpl(utf16, out);
416 }
EncodeFromUTF16(span<uint16_t> utf16,std::string * out)417 void EncodeFromUTF16(span<uint16_t> utf16, std::string* out) {
418   EncodeFromUTF16Tmpl(utf16, out);
419 }
420 
421 template <typename C>
EncodeBinaryTmpl(span<uint8_t> in,C * out)422 void EncodeBinaryTmpl(span<uint8_t> in, C* out) {
423   out->push_back(kExpectedConversionToBase64Tag);
424   uint64_t byte_length = static_cast<uint64_t>(in.size_bytes());
425   internals::WriteTokenStart(MajorType::BYTE_STRING, byte_length, out);
426   out->insert(out->end(), in.begin(), in.end());
427 }
EncodeBinary(span<uint8_t> in,std::vector<uint8_t> * out)428 void EncodeBinary(span<uint8_t> in, std::vector<uint8_t>* out) {
429   EncodeBinaryTmpl(in, out);
430 }
EncodeBinary(span<uint8_t> in,std::string * out)431 void EncodeBinary(span<uint8_t> in, std::string* out) {
432   EncodeBinaryTmpl(in, out);
433 }
434 
435 // A double is encoded with a specific initial byte
436 // (kInitialByteForDouble) plus the 64 bits of payload for its value.
437 constexpr size_t kEncodedDoubleSize = 1 + sizeof(uint64_t);
438 
439 // An envelope is encoded with a specific initial byte
440 // (kInitialByteForEnvelope), plus the start byte for a BYTE_STRING with a 32
441 // bit wide length, plus a 32 bit length for that string.
442 constexpr size_t kEncodedEnvelopeHeaderSize = 1 + 1 + sizeof(uint32_t);
443 
444 template <typename C>
EncodeDoubleTmpl(double value,C * out)445 void EncodeDoubleTmpl(double value, C* out) {
446   // The additional_info=27 indicates 64 bits for the double follow.
447   // See RFC 7049 Section 2.3, Table 1.
448   out->push_back(kInitialByteForDouble);
449   union {
450     double from_double;
451     uint64_t to_uint64;
452   } reinterpret;
453   reinterpret.from_double = value;
454   WriteBytesMostSignificantByteFirst<uint64_t>(reinterpret.to_uint64, out);
455 }
EncodeDouble(double value,std::vector<uint8_t> * out)456 void EncodeDouble(double value, std::vector<uint8_t>* out) {
457   EncodeDoubleTmpl(value, out);
458 }
EncodeDouble(double value,std::string * out)459 void EncodeDouble(double value, std::string* out) {
460   EncodeDoubleTmpl(value, out);
461 }
462 
463 // =============================================================================
464 // cbor::EnvelopeEncoder - for wrapping submessages
465 // =============================================================================
466 
467 template <typename C>
EncodeStartTmpl(C * out,size_t * byte_size_pos)468 void EncodeStartTmpl(C* out, size_t* byte_size_pos) {
469   assert(*byte_size_pos == 0);
470   out->push_back(kInitialByteForEnvelope);
471   out->push_back(kInitialByteFor32BitLengthByteString);
472   *byte_size_pos = out->size();
473   out->resize(out->size() + sizeof(uint32_t));
474 }
475 
EncodeStart(std::vector<uint8_t> * out)476 void EnvelopeEncoder::EncodeStart(std::vector<uint8_t>* out) {
477   EncodeStartTmpl<std::vector<uint8_t>>(out, &byte_size_pos_);
478 }
479 
EncodeStart(std::string * out)480 void EnvelopeEncoder::EncodeStart(std::string* out) {
481   EncodeStartTmpl<std::string>(out, &byte_size_pos_);
482 }
483 
484 template <typename C>
EncodeStopTmpl(C * out,size_t * byte_size_pos)485 bool EncodeStopTmpl(C* out, size_t* byte_size_pos) {
486   assert(*byte_size_pos != 0);
487   // The byte size is the size of the payload, that is, all the
488   // bytes that were written past the byte size position itself.
489   uint64_t byte_size = out->size() - (*byte_size_pos + sizeof(uint32_t));
490   // We store exactly 4 bytes, so at most INT32MAX, with most significant
491   // byte first.
492   if (byte_size > std::numeric_limits<uint32_t>::max())
493     return false;
494   for (int shift_bytes = sizeof(uint32_t) - 1; shift_bytes >= 0;
495        --shift_bytes) {
496     (*out)[(*byte_size_pos)++] = 0xff & (byte_size >> (shift_bytes * 8));
497   }
498   return true;
499 }
500 
EncodeStop(std::vector<uint8_t> * out)501 bool EnvelopeEncoder::EncodeStop(std::vector<uint8_t>* out) {
502   return EncodeStopTmpl(out, &byte_size_pos_);
503 }
504 
EncodeStop(std::string * out)505 bool EnvelopeEncoder::EncodeStop(std::string* out) {
506   return EncodeStopTmpl(out, &byte_size_pos_);
507 }
508 
509 // =============================================================================
510 // cbor::NewCBOREncoder - for encoding from a streaming parser
511 // =============================================================================
512 
513 namespace {
514 template <typename C>
515 class CBOREncoder : public StreamingParserHandler {
516  public:
CBOREncoder(C * out,Status * status)517   CBOREncoder(C* out, Status* status) : out_(out), status_(status) {
518     *status_ = Status();
519   }
520 
HandleMapBegin()521   void HandleMapBegin() override {
522     if (!status_->ok())
523       return;
524     envelopes_.emplace_back();
525     envelopes_.back().EncodeStart(out_);
526     out_->push_back(kInitialByteIndefiniteLengthMap);
527   }
528 
HandleMapEnd()529   void HandleMapEnd() override {
530     if (!status_->ok())
531       return;
532     out_->push_back(kStopByte);
533     assert(!envelopes_.empty());
534     if (!envelopes_.back().EncodeStop(out_)) {
535       HandleError(
536           Status(Error::CBOR_ENVELOPE_SIZE_LIMIT_EXCEEDED, out_->size()));
537       return;
538     }
539     envelopes_.pop_back();
540   }
541 
HandleArrayBegin()542   void HandleArrayBegin() override {
543     if (!status_->ok())
544       return;
545     envelopes_.emplace_back();
546     envelopes_.back().EncodeStart(out_);
547     out_->push_back(kInitialByteIndefiniteLengthArray);
548   }
549 
HandleArrayEnd()550   void HandleArrayEnd() override {
551     if (!status_->ok())
552       return;
553     out_->push_back(kStopByte);
554     assert(!envelopes_.empty());
555     if (!envelopes_.back().EncodeStop(out_)) {
556       HandleError(
557           Status(Error::CBOR_ENVELOPE_SIZE_LIMIT_EXCEEDED, out_->size()));
558       return;
559     }
560     envelopes_.pop_back();
561   }
562 
HandleString8(span<uint8_t> chars)563   void HandleString8(span<uint8_t> chars) override {
564     if (!status_->ok())
565       return;
566     EncodeString8(chars, out_);
567   }
568 
HandleString16(span<uint16_t> chars)569   void HandleString16(span<uint16_t> chars) override {
570     if (!status_->ok())
571       return;
572     EncodeFromUTF16(chars, out_);
573   }
574 
HandleBinary(span<uint8_t> bytes)575   void HandleBinary(span<uint8_t> bytes) override {
576     if (!status_->ok())
577       return;
578     EncodeBinary(bytes, out_);
579   }
580 
HandleDouble(double value)581   void HandleDouble(double value) override {
582     if (!status_->ok())
583       return;
584     EncodeDouble(value, out_);
585   }
586 
HandleInt32(int32_t value)587   void HandleInt32(int32_t value) override {
588     if (!status_->ok())
589       return;
590     EncodeInt32(value, out_);
591   }
592 
HandleBool(bool value)593   void HandleBool(bool value) override {
594     if (!status_->ok())
595       return;
596     // See RFC 7049 Section 2.3, Table 2.
597     out_->push_back(value ? kEncodedTrue : kEncodedFalse);
598   }
599 
HandleNull()600   void HandleNull() override {
601     if (!status_->ok())
602       return;
603     // See RFC 7049 Section 2.3, Table 2.
604     out_->push_back(kEncodedNull);
605   }
606 
HandleError(Status error)607   void HandleError(Status error) override {
608     if (!status_->ok())
609       return;
610     *status_ = error;
611     out_->clear();
612   }
613 
614  private:
615   C* out_;
616   std::vector<EnvelopeEncoder> envelopes_;
617   Status* status_;
618 };
619 }  // namespace
620 
NewCBOREncoder(std::vector<uint8_t> * out,Status * status)621 std::unique_ptr<StreamingParserHandler> NewCBOREncoder(
622     std::vector<uint8_t>* out,
623     Status* status) {
624   return std::unique_ptr<StreamingParserHandler>(
625       new CBOREncoder<std::vector<uint8_t>>(out, status));
626 }
NewCBOREncoder(std::string * out,Status * status)627 std::unique_ptr<StreamingParserHandler> NewCBOREncoder(std::string* out,
628                                                        Status* status) {
629   return std::unique_ptr<StreamingParserHandler>(
630       new CBOREncoder<std::string>(out, status));
631 }
632 
633 // =============================================================================
634 // cbor::CBORTokenizer - for parsing individual CBOR items
635 // =============================================================================
636 
CBORTokenizer(span<uint8_t> bytes)637 CBORTokenizer::CBORTokenizer(span<uint8_t> bytes) : bytes_(bytes) {
638   ReadNextToken(/*enter_envelope=*/false);
639 }
~CBORTokenizer()640 CBORTokenizer::~CBORTokenizer() {}
641 
TokenTag() const642 CBORTokenTag CBORTokenizer::TokenTag() const {
643   return token_tag_;
644 }
645 
Next()646 void CBORTokenizer::Next() {
647   if (token_tag_ == CBORTokenTag::ERROR_VALUE ||
648       token_tag_ == CBORTokenTag::DONE)
649     return;
650   ReadNextToken(/*enter_envelope=*/false);
651 }
652 
EnterEnvelope()653 void CBORTokenizer::EnterEnvelope() {
654   assert(token_tag_ == CBORTokenTag::ENVELOPE);
655   ReadNextToken(/*enter_envelope=*/true);
656 }
657 
Status() const658 Status CBORTokenizer::Status() const {
659   return status_;
660 }
661 
662 // The following accessor functions ::GetInt32, ::GetDouble,
663 // ::GetString8, ::GetString16WireRep, ::GetBinary, ::GetEnvelopeContents
664 // assume that a particular token was recognized in ::ReadNextToken.
665 // That's where all the error checking is done. By design,
666 // the accessors (assuming the token was recognized) never produce
667 // an error.
668 
GetInt32() const669 int32_t CBORTokenizer::GetInt32() const {
670   assert(token_tag_ == CBORTokenTag::INT32);
671   // The range checks happen in ::ReadNextToken().
672   return static_cast<int32_t>(
673       token_start_type_ == MajorType::UNSIGNED
674           ? token_start_internal_value_
675           : -static_cast<int64_t>(token_start_internal_value_) - 1);
676 }
677 
GetDouble() const678 double CBORTokenizer::GetDouble() const {
679   assert(token_tag_ == CBORTokenTag::DOUBLE);
680   union {
681     uint64_t from_uint64;
682     double to_double;
683   } reinterpret;
684   reinterpret.from_uint64 = ReadBytesMostSignificantByteFirst<uint64_t>(
685       bytes_.subspan(status_.pos + 1));
686   return reinterpret.to_double;
687 }
688 
GetString8() const689 span<uint8_t> CBORTokenizer::GetString8() const {
690   assert(token_tag_ == CBORTokenTag::STRING8);
691   auto length = static_cast<size_t>(token_start_internal_value_);
692   return bytes_.subspan(status_.pos + (token_byte_length_ - length), length);
693 }
694 
GetString16WireRep() const695 span<uint8_t> CBORTokenizer::GetString16WireRep() const {
696   assert(token_tag_ == CBORTokenTag::STRING16);
697   auto length = static_cast<size_t>(token_start_internal_value_);
698   return bytes_.subspan(status_.pos + (token_byte_length_ - length), length);
699 }
700 
GetBinary() const701 span<uint8_t> CBORTokenizer::GetBinary() const {
702   assert(token_tag_ == CBORTokenTag::BINARY);
703   auto length = static_cast<size_t>(token_start_internal_value_);
704   return bytes_.subspan(status_.pos + (token_byte_length_ - length), length);
705 }
706 
GetEnvelopeContents() const707 span<uint8_t> CBORTokenizer::GetEnvelopeContents() const {
708   assert(token_tag_ == CBORTokenTag::ENVELOPE);
709   auto length = static_cast<size_t>(token_start_internal_value_);
710   return bytes_.subspan(status_.pos + kEncodedEnvelopeHeaderSize, length);
711 }
712 
713 // All error checking happens in ::ReadNextToken, so that the accessors
714 // can avoid having to carry an error return value.
715 //
716 // With respect to checking the encoded lengths of strings, arrays, etc:
717 // On the wire, CBOR uses 1,2,4, and 8 byte unsigned integers, so
718 // we initially read them as uint64_t, usually into token_start_internal_value_.
719 //
720 // However, since these containers have a representation on the machine,
721 // we need to do corresponding size computations on the input byte array,
722 // output span (e.g. the payload for a string), etc., and size_t is
723 // machine specific (in practice either 32 bit or 64 bit).
724 //
725 // Further, we must avoid overflowing size_t. Therefore, we use this
726 // kMaxValidLength constant to:
727 // - Reject values that are larger than the architecture specific
728 //   max size_t (differs between 32 bit and 64 bit arch).
729 // - Reserve at least one bit so that we can check against overflows
730 //   when adding lengths (array / string length / etc.); we do this by
731 //   ensuring that the inputs to an addition are <= kMaxValidLength,
732 //   and then checking whether the sum went past it.
733 //
734 // See also
735 // https://chromium.googlesource.com/chromium/src/+/master/docs/security/integer-semantics.md
736 static const uint64_t kMaxValidLength =
737     std::min<uint64_t>(std::numeric_limits<uint64_t>::max() >> 2,
738                        std::numeric_limits<size_t>::max());
739 
ReadNextToken(bool enter_envelope)740 void CBORTokenizer::ReadNextToken(bool enter_envelope) {
741   if (enter_envelope) {
742     status_.pos += kEncodedEnvelopeHeaderSize;
743   } else {
744     status_.pos =
745         status_.pos == Status::npos() ? 0 : status_.pos + token_byte_length_;
746   }
747   status_.error = Error::OK;
748   if (status_.pos >= bytes_.size()) {
749     token_tag_ = CBORTokenTag::DONE;
750     return;
751   }
752   const size_t remaining_bytes = bytes_.size() - status_.pos;
753   switch (bytes_[status_.pos]) {
754     case kStopByte:
755       SetToken(CBORTokenTag::STOP, 1);
756       return;
757     case kInitialByteIndefiniteLengthMap:
758       SetToken(CBORTokenTag::MAP_START, 1);
759       return;
760     case kInitialByteIndefiniteLengthArray:
761       SetToken(CBORTokenTag::ARRAY_START, 1);
762       return;
763     case kEncodedTrue:
764       SetToken(CBORTokenTag::TRUE_VALUE, 1);
765       return;
766     case kEncodedFalse:
767       SetToken(CBORTokenTag::FALSE_VALUE, 1);
768       return;
769     case kEncodedNull:
770       SetToken(CBORTokenTag::NULL_VALUE, 1);
771       return;
772     case kExpectedConversionToBase64Tag: {  // BINARY
773       const int8_t bytes_read = internals::ReadTokenStart(
774           bytes_.subspan(status_.pos + 1), &token_start_type_,
775           &token_start_internal_value_);
776       if (bytes_read < 0 || token_start_type_ != MajorType::BYTE_STRING ||
777           token_start_internal_value_ > kMaxValidLength) {
778         SetError(Error::CBOR_INVALID_BINARY);
779         return;
780       }
781       const uint64_t token_byte_length = token_start_internal_value_ +
782                                          /* tag before token start: */ 1 +
783                                          /* token start: */ bytes_read;
784       if (token_byte_length > remaining_bytes) {
785         SetError(Error::CBOR_INVALID_BINARY);
786         return;
787       }
788       SetToken(CBORTokenTag::BINARY, static_cast<size_t>(token_byte_length));
789       return;
790     }
791     case kInitialByteForDouble: {  // DOUBLE
792       if (kEncodedDoubleSize > remaining_bytes) {
793         SetError(Error::CBOR_INVALID_DOUBLE);
794         return;
795       }
796       SetToken(CBORTokenTag::DOUBLE, kEncodedDoubleSize);
797       return;
798     }
799     case kInitialByteForEnvelope: {  // ENVELOPE
800       if (kEncodedEnvelopeHeaderSize > remaining_bytes) {
801         SetError(Error::CBOR_INVALID_ENVELOPE);
802         return;
803       }
804       // The envelope must be a byte string with 32 bit length.
805       if (bytes_[status_.pos + 1] != kInitialByteFor32BitLengthByteString) {
806         SetError(Error::CBOR_INVALID_ENVELOPE);
807         return;
808       }
809       // Read the length of the byte string.
810       token_start_internal_value_ = ReadBytesMostSignificantByteFirst<uint32_t>(
811           bytes_.subspan(status_.pos + 2));
812       if (token_start_internal_value_ > kMaxValidLength) {
813         SetError(Error::CBOR_INVALID_ENVELOPE);
814         return;
815       }
816       uint64_t token_byte_length =
817           token_start_internal_value_ + kEncodedEnvelopeHeaderSize;
818       if (token_byte_length > remaining_bytes) {
819         SetError(Error::CBOR_INVALID_ENVELOPE);
820         return;
821       }
822       SetToken(CBORTokenTag::ENVELOPE, static_cast<size_t>(token_byte_length));
823       return;
824     }
825     default: {
826       const int8_t token_start_length = internals::ReadTokenStart(
827           bytes_.subspan(status_.pos), &token_start_type_,
828           &token_start_internal_value_);
829       const bool success = token_start_length >= 0;
830       switch (token_start_type_) {
831         case MajorType::UNSIGNED:  // INT32.
832           // INT32 is a signed int32 (int32 makes sense for the
833           // inspector_protocol, it's not a CBOR limitation), so we check
834           // against the signed max, so that the allowable values are
835           // 0, 1, 2, ... 2^31 - 1.
836           if (!success || std::numeric_limits<int32_t>::max() <
837                               token_start_internal_value_) {
838             SetError(Error::CBOR_INVALID_INT32);
839             return;
840           }
841           SetToken(CBORTokenTag::INT32, token_start_length);
842           return;
843         case MajorType::NEGATIVE: {  // INT32.
844           // INT32 is a signed int32 (int32 makes sense for the
845           // inspector_protocol, it's not a CBOR limitation); in CBOR,
846           // the negative values for INT32 are represented as NEGATIVE,
847           // that is, -1 INT32 is represented as 1 << 5 | 0 (major type 1,
848           // additional info value 0). So here, we compute the INT32 value
849           // and then check it against the INT32 min.
850           int64_t actual_value =
851               -static_cast<int64_t>(token_start_internal_value_) - 1;
852           if (!success || actual_value < std::numeric_limits<int32_t>::min()) {
853             SetError(Error::CBOR_INVALID_INT32);
854             return;
855           }
856           SetToken(CBORTokenTag::INT32, token_start_length);
857           return;
858         }
859         case MajorType::STRING: {  // STRING8.
860           if (!success || token_start_internal_value_ > kMaxValidLength) {
861             SetError(Error::CBOR_INVALID_STRING8);
862             return;
863           }
864           uint64_t token_byte_length =
865               token_start_internal_value_ + token_start_length;
866           if (token_byte_length > remaining_bytes) {
867             SetError(Error::CBOR_INVALID_STRING8);
868             return;
869           }
870           SetToken(CBORTokenTag::STRING8,
871                    static_cast<size_t>(token_byte_length));
872           return;
873         }
874         case MajorType::BYTE_STRING: {  // STRING16.
875           // Length must be divisible by 2 since UTF16 is 2 bytes per
876           // character, hence the &1 check.
877           if (!success || token_start_internal_value_ > kMaxValidLength ||
878               token_start_internal_value_ & 1) {
879             SetError(Error::CBOR_INVALID_STRING16);
880             return;
881           }
882           uint64_t token_byte_length =
883               token_start_internal_value_ + token_start_length;
884           if (token_byte_length > remaining_bytes) {
885             SetError(Error::CBOR_INVALID_STRING16);
886             return;
887           }
888           SetToken(CBORTokenTag::STRING16,
889                    static_cast<size_t>(token_byte_length));
890           return;
891         }
892         case MajorType::ARRAY:
893         case MajorType::MAP:
894         case MajorType::TAG:
895         case MajorType::SIMPLE_VALUE:
896           SetError(Error::CBOR_UNSUPPORTED_VALUE);
897           return;
898       }
899     }
900   }
901 }
902 
SetToken(CBORTokenTag token_tag,size_t token_byte_length)903 void CBORTokenizer::SetToken(CBORTokenTag token_tag, size_t token_byte_length) {
904   token_tag_ = token_tag;
905   token_byte_length_ = token_byte_length;
906 }
907 
SetError(Error error)908 void CBORTokenizer::SetError(Error error) {
909   token_tag_ = CBORTokenTag::ERROR_VALUE;
910   status_.error = error;
911 }
912 
913 // =============================================================================
914 // cbor::ParseCBOR - for receiving streaming parser events for CBOR messages
915 // =============================================================================
916 
917 namespace {
918 // When parsing CBOR, we limit recursion depth for objects and arrays
919 // to this constant.
920 static constexpr int kStackLimit = 300;
921 
922 // Below are three parsing routines for CBOR, which cover enough
923 // to roundtrip JSON messages.
924 bool ParseMap(int32_t stack_depth,
925               CBORTokenizer* tokenizer,
926               StreamingParserHandler* out);
927 bool ParseArray(int32_t stack_depth,
928                 CBORTokenizer* tokenizer,
929                 StreamingParserHandler* out);
930 bool ParseValue(int32_t stack_depth,
931                 CBORTokenizer* tokenizer,
932                 StreamingParserHandler* out);
933 
ParseUTF16String(CBORTokenizer * tokenizer,StreamingParserHandler * out)934 void ParseUTF16String(CBORTokenizer* tokenizer, StreamingParserHandler* out) {
935   std::vector<uint16_t> value;
936   span<uint8_t> rep = tokenizer->GetString16WireRep();
937   for (size_t ii = 0; ii < rep.size(); ii += 2)
938     value.push_back((rep[ii + 1] << 8) | rep[ii]);
939   out->HandleString16(span<uint16_t>(value.data(), value.size()));
940   tokenizer->Next();
941 }
942 
ParseUTF8String(CBORTokenizer * tokenizer,StreamingParserHandler * out)943 bool ParseUTF8String(CBORTokenizer* tokenizer, StreamingParserHandler* out) {
944   assert(tokenizer->TokenTag() == CBORTokenTag::STRING8);
945   out->HandleString8(tokenizer->GetString8());
946   tokenizer->Next();
947   return true;
948 }
949 
ParseValue(int32_t stack_depth,CBORTokenizer * tokenizer,StreamingParserHandler * out)950 bool ParseValue(int32_t stack_depth,
951                 CBORTokenizer* tokenizer,
952                 StreamingParserHandler* out) {
953   if (stack_depth > kStackLimit) {
954     out->HandleError(
955         Status{Error::CBOR_STACK_LIMIT_EXCEEDED, tokenizer->Status().pos});
956     return false;
957   }
958   // Skip past the envelope to get to what's inside.
959   if (tokenizer->TokenTag() == CBORTokenTag::ENVELOPE)
960     tokenizer->EnterEnvelope();
961   switch (tokenizer->TokenTag()) {
962     case CBORTokenTag::ERROR_VALUE:
963       out->HandleError(tokenizer->Status());
964       return false;
965     case CBORTokenTag::DONE:
966       out->HandleError(Status{Error::CBOR_UNEXPECTED_EOF_EXPECTED_VALUE,
967                               tokenizer->Status().pos});
968       return false;
969     case CBORTokenTag::TRUE_VALUE:
970       out->HandleBool(true);
971       tokenizer->Next();
972       return true;
973     case CBORTokenTag::FALSE_VALUE:
974       out->HandleBool(false);
975       tokenizer->Next();
976       return true;
977     case CBORTokenTag::NULL_VALUE:
978       out->HandleNull();
979       tokenizer->Next();
980       return true;
981     case CBORTokenTag::INT32:
982       out->HandleInt32(tokenizer->GetInt32());
983       tokenizer->Next();
984       return true;
985     case CBORTokenTag::DOUBLE:
986       out->HandleDouble(tokenizer->GetDouble());
987       tokenizer->Next();
988       return true;
989     case CBORTokenTag::STRING8:
990       return ParseUTF8String(tokenizer, out);
991     case CBORTokenTag::STRING16:
992       ParseUTF16String(tokenizer, out);
993       return true;
994     case CBORTokenTag::BINARY: {
995       out->HandleBinary(tokenizer->GetBinary());
996       tokenizer->Next();
997       return true;
998     }
999     case CBORTokenTag::MAP_START:
1000       return ParseMap(stack_depth + 1, tokenizer, out);
1001     case CBORTokenTag::ARRAY_START:
1002       return ParseArray(stack_depth + 1, tokenizer, out);
1003     default:
1004       out->HandleError(
1005           Status{Error::CBOR_UNSUPPORTED_VALUE, tokenizer->Status().pos});
1006       return false;
1007   }
1008 }
1009 
1010 // |bytes| must start with the indefinite length array byte, so basically,
1011 // ParseArray may only be called after an indefinite length array has been
1012 // detected.
ParseArray(int32_t stack_depth,CBORTokenizer * tokenizer,StreamingParserHandler * out)1013 bool ParseArray(int32_t stack_depth,
1014                 CBORTokenizer* tokenizer,
1015                 StreamingParserHandler* out) {
1016   assert(tokenizer->TokenTag() == CBORTokenTag::ARRAY_START);
1017   tokenizer->Next();
1018   out->HandleArrayBegin();
1019   while (tokenizer->TokenTag() != CBORTokenTag::STOP) {
1020     if (tokenizer->TokenTag() == CBORTokenTag::DONE) {
1021       out->HandleError(
1022           Status{Error::CBOR_UNEXPECTED_EOF_IN_ARRAY, tokenizer->Status().pos});
1023       return false;
1024     }
1025     if (tokenizer->TokenTag() == CBORTokenTag::ERROR_VALUE) {
1026       out->HandleError(tokenizer->Status());
1027       return false;
1028     }
1029     // Parse value.
1030     if (!ParseValue(stack_depth, tokenizer, out))
1031       return false;
1032   }
1033   out->HandleArrayEnd();
1034   tokenizer->Next();
1035   return true;
1036 }
1037 
1038 // |bytes| must start with the indefinite length array byte, so basically,
1039 // ParseArray may only be called after an indefinite length array has been
1040 // detected.
ParseMap(int32_t stack_depth,CBORTokenizer * tokenizer,StreamingParserHandler * out)1041 bool ParseMap(int32_t stack_depth,
1042               CBORTokenizer* tokenizer,
1043               StreamingParserHandler* out) {
1044   assert(tokenizer->TokenTag() == CBORTokenTag::MAP_START);
1045   out->HandleMapBegin();
1046   tokenizer->Next();
1047   while (tokenizer->TokenTag() != CBORTokenTag::STOP) {
1048     if (tokenizer->TokenTag() == CBORTokenTag::DONE) {
1049       out->HandleError(
1050           Status{Error::CBOR_UNEXPECTED_EOF_IN_MAP, tokenizer->Status().pos});
1051       return false;
1052     }
1053     if (tokenizer->TokenTag() == CBORTokenTag::ERROR_VALUE) {
1054       out->HandleError(tokenizer->Status());
1055       return false;
1056     }
1057     // Parse key.
1058     if (tokenizer->TokenTag() == CBORTokenTag::STRING8) {
1059       if (!ParseUTF8String(tokenizer, out))
1060         return false;
1061     } else if (tokenizer->TokenTag() == CBORTokenTag::STRING16) {
1062       ParseUTF16String(tokenizer, out);
1063     } else {
1064       out->HandleError(
1065           Status{Error::CBOR_INVALID_MAP_KEY, tokenizer->Status().pos});
1066       return false;
1067     }
1068     // Parse value.
1069     if (!ParseValue(stack_depth, tokenizer, out))
1070       return false;
1071   }
1072   out->HandleMapEnd();
1073   tokenizer->Next();
1074   return true;
1075 }
1076 }  // namespace
1077 
ParseCBOR(span<uint8_t> bytes,StreamingParserHandler * out)1078 void ParseCBOR(span<uint8_t> bytes, StreamingParserHandler* out) {
1079   if (bytes.empty()) {
1080     out->HandleError(Status{Error::CBOR_NO_INPUT, 0});
1081     return;
1082   }
1083   if (bytes[0] != kInitialByteForEnvelope) {
1084     out->HandleError(Status{Error::CBOR_INVALID_START_BYTE, 0});
1085     return;
1086   }
1087   CBORTokenizer tokenizer(bytes);
1088   if (tokenizer.TokenTag() == CBORTokenTag::ERROR_VALUE) {
1089     out->HandleError(tokenizer.Status());
1090     return;
1091   }
1092   // We checked for the envelope start byte above, so the tokenizer
1093   // must agree here, since it's not an error.
1094   assert(tokenizer.TokenTag() == CBORTokenTag::ENVELOPE);
1095   tokenizer.EnterEnvelope();
1096   if (tokenizer.TokenTag() != CBORTokenTag::MAP_START) {
1097     out->HandleError(
1098         Status{Error::CBOR_MAP_START_EXPECTED, tokenizer.Status().pos});
1099     return;
1100   }
1101   if (!ParseMap(/*stack_depth=*/1, &tokenizer, out))
1102     return;
1103   if (tokenizer.TokenTag() == CBORTokenTag::DONE)
1104     return;
1105   if (tokenizer.TokenTag() == CBORTokenTag::ERROR_VALUE) {
1106     out->HandleError(tokenizer.Status());
1107     return;
1108   }
1109   out->HandleError(Status{Error::CBOR_TRAILING_JUNK, tokenizer.Status().pos});
1110 }
1111 
1112 // =============================================================================
1113 // cbor::AppendString8EntryToMap - for limited in-place editing of messages
1114 // =============================================================================
1115 
1116 template <typename C>
AppendString8EntryToCBORMapTmpl(span<uint8_t> string8_key,span<uint8_t> string8_value,C * cbor)1117 Status AppendString8EntryToCBORMapTmpl(span<uint8_t> string8_key,
1118                                        span<uint8_t> string8_value,
1119                                        C* cbor) {
1120   // Careful below: Don't compare (*cbor)[idx] with a uint8_t, since
1121   // it could be a char (signed!). Instead, use bytes.
1122   span<uint8_t> bytes(reinterpret_cast<const uint8_t*>(cbor->data()),
1123                       cbor->size());
1124   CBORTokenizer tokenizer(bytes);
1125   if (tokenizer.TokenTag() == CBORTokenTag::ERROR_VALUE)
1126     return tokenizer.Status();
1127   if (tokenizer.TokenTag() != CBORTokenTag::ENVELOPE)
1128     return Status(Error::CBOR_INVALID_ENVELOPE, 0);
1129   size_t envelope_size = tokenizer.GetEnvelopeContents().size();
1130   size_t old_size = cbor->size();
1131   if (old_size != envelope_size + kEncodedEnvelopeHeaderSize)
1132     return Status(Error::CBOR_INVALID_ENVELOPE, 0);
1133   if (envelope_size == 0 ||
1134       (tokenizer.GetEnvelopeContents()[0] != EncodeIndefiniteLengthMapStart()))
1135     return Status(Error::CBOR_MAP_START_EXPECTED, kEncodedEnvelopeHeaderSize);
1136   if (bytes[bytes.size() - 1] != EncodeStop())
1137     return Status(Error::CBOR_MAP_STOP_EXPECTED, cbor->size() - 1);
1138   cbor->pop_back();
1139   EncodeString8(string8_key, cbor);
1140   EncodeString8(string8_value, cbor);
1141   cbor->push_back(EncodeStop());
1142   size_t new_envelope_size = envelope_size + (cbor->size() - old_size);
1143   if (new_envelope_size > std::numeric_limits<uint32_t>::max())
1144     return Status(Error::CBOR_ENVELOPE_SIZE_LIMIT_EXCEEDED, 0);
1145   size_t size_pos = cbor->size() - new_envelope_size - sizeof(uint32_t);
1146   uint8_t* out = reinterpret_cast<uint8_t*>(&cbor->at(size_pos));
1147   *(out++) = (new_envelope_size >> 24) & 0xff;
1148   *(out++) = (new_envelope_size >> 16) & 0xff;
1149   *(out++) = (new_envelope_size >> 8) & 0xff;
1150   *(out) = new_envelope_size & 0xff;
1151   return Status();
1152 }
AppendString8EntryToCBORMap(span<uint8_t> string8_key,span<uint8_t> string8_value,std::vector<uint8_t> * cbor)1153 Status AppendString8EntryToCBORMap(span<uint8_t> string8_key,
1154                                    span<uint8_t> string8_value,
1155                                    std::vector<uint8_t>* cbor) {
1156   return AppendString8EntryToCBORMapTmpl(string8_key, string8_value, cbor);
1157 }
AppendString8EntryToCBORMap(span<uint8_t> string8_key,span<uint8_t> string8_value,std::string * cbor)1158 Status AppendString8EntryToCBORMap(span<uint8_t> string8_key,
1159                                    span<uint8_t> string8_value,
1160                                    std::string* cbor) {
1161   return AppendString8EntryToCBORMapTmpl(string8_key, string8_value, cbor);
1162 }
1163 }  // namespace cbor
1164 
1165 namespace json {
1166 
1167 // =============================================================================
1168 // json::NewJSONEncoder - for encoding streaming parser events as JSON
1169 // =============================================================================
1170 
1171 namespace {
1172 // Prints |value| to |out| with 4 hex digits, most significant chunk first.
1173 template <typename C>
PrintHex(uint16_t value,C * out)1174 void PrintHex(uint16_t value, C* out) {
1175   for (int ii = 3; ii >= 0; --ii) {
1176     int four_bits = 0xf & (value >> (4 * ii));
1177     out->push_back(four_bits + ((four_bits <= 9) ? '0' : ('a' - 10)));
1178   }
1179 }
1180 
1181 // In the writer below, we maintain a stack of State instances.
1182 // It is just enough to emit the appropriate delimiters and brackets
1183 // in JSON.
1184 enum class Container {
1185   // Used for the top-level, initial state.
1186   NONE,
1187   // Inside a JSON object.
1188   MAP,
1189   // Inside a JSON array.
1190   ARRAY
1191 };
1192 class State {
1193  public:
State(Container container)1194   explicit State(Container container) : container_(container) {}
StartElement(std::vector<uint8_t> * out)1195   void StartElement(std::vector<uint8_t>* out) { StartElementTmpl(out); }
StartElement(std::string * out)1196   void StartElement(std::string* out) { StartElementTmpl(out); }
container() const1197   Container container() const { return container_; }
1198 
1199  private:
1200   template <typename C>
StartElementTmpl(C * out)1201   void StartElementTmpl(C* out) {
1202     assert(container_ != Container::NONE || size_ == 0);
1203     if (size_ != 0) {
1204       char delim = (!(size_ & 1) || container_ == Container::ARRAY) ? ',' : ':';
1205       out->push_back(delim);
1206     }
1207     ++size_;
1208   }
1209 
1210   Container container_ = Container::NONE;
1211   int size_ = 0;
1212 };
1213 
1214 constexpr char kBase64Table[] =
1215     "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
1216     "abcdefghijklmnopqrstuvwxyz0123456789+/";
1217 
1218 template <typename C>
Base64Encode(const span<uint8_t> & in,C * out)1219 void Base64Encode(const span<uint8_t>& in, C* out) {
1220   // The following three cases are based on the tables in the example
1221   // section in https://en.wikipedia.org/wiki/Base64. We process three
1222   // input bytes at a time, emitting 4 output bytes at a time.
1223   size_t ii = 0;
1224 
1225   // While possible, process three input bytes.
1226   for (; ii + 3 <= in.size(); ii += 3) {
1227     uint32_t twentyfour_bits = (in[ii] << 16) | (in[ii + 1] << 8) | in[ii + 2];
1228     out->push_back(kBase64Table[(twentyfour_bits >> 18)]);
1229     out->push_back(kBase64Table[(twentyfour_bits >> 12) & 0x3f]);
1230     out->push_back(kBase64Table[(twentyfour_bits >> 6) & 0x3f]);
1231     out->push_back(kBase64Table[twentyfour_bits & 0x3f]);
1232   }
1233   if (ii + 2 <= in.size()) {  // Process two input bytes.
1234     uint32_t twentyfour_bits = (in[ii] << 16) | (in[ii + 1] << 8);
1235     out->push_back(kBase64Table[(twentyfour_bits >> 18)]);
1236     out->push_back(kBase64Table[(twentyfour_bits >> 12) & 0x3f]);
1237     out->push_back(kBase64Table[(twentyfour_bits >> 6) & 0x3f]);
1238     out->push_back('=');  // Emit padding.
1239     return;
1240   }
1241   if (ii + 1 <= in.size()) {  // Process a single input byte.
1242     uint32_t twentyfour_bits = (in[ii] << 16);
1243     out->push_back(kBase64Table[(twentyfour_bits >> 18)]);
1244     out->push_back(kBase64Table[(twentyfour_bits >> 12) & 0x3f]);
1245     out->push_back('=');  // Emit padding.
1246     out->push_back('=');  // Emit padding.
1247   }
1248 }
1249 
1250 // Implements a handler for JSON parser events to emit a JSON string.
1251 template <typename C>
1252 class JSONEncoder : public StreamingParserHandler {
1253  public:
JSONEncoder(const Platform * platform,C * out,Status * status)1254   JSONEncoder(const Platform* platform, C* out, Status* status)
1255       : platform_(platform), out_(out), status_(status) {
1256     *status_ = Status();
1257     state_.emplace(Container::NONE);
1258   }
1259 
HandleMapBegin()1260   void HandleMapBegin() override {
1261     if (!status_->ok())
1262       return;
1263     assert(!state_.empty());
1264     state_.top().StartElement(out_);
1265     state_.emplace(Container::MAP);
1266     Emit('{');
1267   }
1268 
HandleMapEnd()1269   void HandleMapEnd() override {
1270     if (!status_->ok())
1271       return;
1272     assert(state_.size() >= 2 && state_.top().container() == Container::MAP);
1273     state_.pop();
1274     Emit('}');
1275   }
1276 
HandleArrayBegin()1277   void HandleArrayBegin() override {
1278     if (!status_->ok())
1279       return;
1280     state_.top().StartElement(out_);
1281     state_.emplace(Container::ARRAY);
1282     Emit('[');
1283   }
1284 
HandleArrayEnd()1285   void HandleArrayEnd() override {
1286     if (!status_->ok())
1287       return;
1288     assert(state_.size() >= 2 && state_.top().container() == Container::ARRAY);
1289     state_.pop();
1290     Emit(']');
1291   }
1292 
HandleString16(span<uint16_t> chars)1293   void HandleString16(span<uint16_t> chars) override {
1294     if (!status_->ok())
1295       return;
1296     state_.top().StartElement(out_);
1297     Emit('"');
1298     for (const uint16_t ch : chars) {
1299       if (ch == '"') {
1300         Emit("\\\"");
1301       } else if (ch == '\\') {
1302         Emit("\\\\");
1303       } else if (ch == '\b') {
1304         Emit("\\b");
1305       } else if (ch == '\f') {
1306         Emit("\\f");
1307       } else if (ch == '\n') {
1308         Emit("\\n");
1309       } else if (ch == '\r') {
1310         Emit("\\r");
1311       } else if (ch == '\t') {
1312         Emit("\\t");
1313       } else if (ch >= 32 && ch <= 126) {
1314         Emit(ch);
1315       } else {
1316         Emit("\\u");
1317         PrintHex(ch, out_);
1318       }
1319     }
1320     Emit('"');
1321   }
1322 
HandleString8(span<uint8_t> chars)1323   void HandleString8(span<uint8_t> chars) override {
1324     if (!status_->ok())
1325       return;
1326     state_.top().StartElement(out_);
1327     Emit('"');
1328     for (size_t ii = 0; ii < chars.size(); ++ii) {
1329       uint8_t c = chars[ii];
1330       if (c == '"') {
1331         Emit("\\\"");
1332       } else if (c == '\\') {
1333         Emit("\\\\");
1334       } else if (c == '\b') {
1335         Emit("\\b");
1336       } else if (c == '\f') {
1337         Emit("\\f");
1338       } else if (c == '\n') {
1339         Emit("\\n");
1340       } else if (c == '\r') {
1341         Emit("\\r");
1342       } else if (c == '\t') {
1343         Emit("\\t");
1344       } else if (c >= 32 && c <= 126) {
1345         Emit(c);
1346       } else if (c < 32) {
1347         Emit("\\u");
1348         PrintHex(static_cast<uint16_t>(c), out_);
1349       } else {
1350         // Inspect the leading byte to figure out how long the utf8
1351         // byte sequence is; while doing this initialize |codepoint|
1352         // with the first few bits.
1353         // See table in: https://en.wikipedia.org/wiki/UTF-8
1354         // byte one is 110x xxxx -> 2 byte utf8 sequence
1355         // byte one is 1110 xxxx -> 3 byte utf8 sequence
1356         // byte one is 1111 0xxx -> 4 byte utf8 sequence
1357         uint32_t codepoint;
1358         int num_bytes_left;
1359         if ((c & 0xe0) == 0xc0) {  // 2 byte utf8 sequence
1360           num_bytes_left = 1;
1361           codepoint = c & 0x1f;
1362         } else if ((c & 0xf0) == 0xe0) {  // 3 byte utf8 sequence
1363           num_bytes_left = 2;
1364           codepoint = c & 0x0f;
1365         } else if ((c & 0xf8) == 0xf0) {  // 4 byte utf8 sequence
1366           codepoint = c & 0x07;
1367           num_bytes_left = 3;
1368         } else {
1369           continue;  // invalid leading byte
1370         }
1371 
1372         // If we have enough bytes in our input, decode the remaining ones
1373         // belonging to this Unicode character into |codepoint|.
1374         if (ii + num_bytes_left > chars.size())
1375           continue;
1376         while (num_bytes_left > 0) {
1377           c = chars[++ii];
1378           --num_bytes_left;
1379           // Check the next byte is a continuation byte, that is 10xx xxxx.
1380           if ((c & 0xc0) != 0x80)
1381             continue;
1382           codepoint = (codepoint << 6) | (c & 0x3f);
1383         }
1384 
1385         // Disallow overlong encodings for ascii characters, as these
1386         // would include " and other characters significant to JSON
1387         // string termination / control.
1388         if (codepoint < 0x7f)
1389           continue;
1390         // Invalid in UTF8, and can't be represented in UTF16 anyway.
1391         if (codepoint > 0x10ffff)
1392           continue;
1393 
1394         // So, now we transcode to UTF16,
1395         // using the math described at https://en.wikipedia.org/wiki/UTF-16,
1396         // for either one or two 16 bit characters.
1397         if (codepoint < 0xffff) {
1398           Emit("\\u");
1399           PrintHex(static_cast<uint16_t>(codepoint), out_);
1400           continue;
1401         }
1402         codepoint -= 0x10000;
1403         // high surrogate
1404         Emit("\\u");
1405         PrintHex(static_cast<uint16_t>((codepoint >> 10) + 0xd800), out_);
1406         // low surrogate
1407         Emit("\\u");
1408         PrintHex(static_cast<uint16_t>((codepoint & 0x3ff) + 0xdc00), out_);
1409       }
1410     }
1411     Emit('"');
1412   }
1413 
HandleBinary(span<uint8_t> bytes)1414   void HandleBinary(span<uint8_t> bytes) override {
1415     if (!status_->ok())
1416       return;
1417     state_.top().StartElement(out_);
1418     Emit('"');
1419     Base64Encode(bytes, out_);
1420     Emit('"');
1421   }
1422 
HandleDouble(double value)1423   void HandleDouble(double value) override {
1424     if (!status_->ok())
1425       return;
1426     state_.top().StartElement(out_);
1427     // JSON cannot represent NaN or Infinity. So, for compatibility,
1428     // we behave like the JSON object in web browsers: emit 'null'.
1429     if (!std::isfinite(value)) {
1430       Emit("null");
1431       return;
1432     }
1433     std::unique_ptr<char[]> str_value = platform_->DToStr(value);
1434 
1435     // DToStr may fail to emit a 0 before the decimal dot. E.g. this is
1436     // the case in base::NumberToString in Chromium (which is based on
1437     // dmg_fp). So, much like
1438     // https://cs.chromium.org/chromium/src/base/json/json_writer.cc
1439     // we probe for this and emit the leading 0 anyway if necessary.
1440     const char* chars = str_value.get();
1441     if (chars[0] == '.') {
1442       Emit('0');
1443     } else if (chars[0] == '-' && chars[1] == '.') {
1444       Emit("-0");
1445       ++chars;
1446     }
1447     Emit(chars);
1448   }
1449 
HandleInt32(int32_t value)1450   void HandleInt32(int32_t value) override {
1451     if (!status_->ok())
1452       return;
1453     state_.top().StartElement(out_);
1454     Emit(std::to_string(value));
1455   }
1456 
HandleBool(bool value)1457   void HandleBool(bool value) override {
1458     if (!status_->ok())
1459       return;
1460     state_.top().StartElement(out_);
1461     Emit(value ? "true" : "false");
1462   }
1463 
HandleNull()1464   void HandleNull() override {
1465     if (!status_->ok())
1466       return;
1467     state_.top().StartElement(out_);
1468     Emit("null");
1469   }
1470 
HandleError(Status error)1471   void HandleError(Status error) override {
1472     assert(!error.ok());
1473     *status_ = error;
1474     out_->clear();
1475   }
1476 
1477  private:
Emit(char c)1478   void Emit(char c) { out_->push_back(c); }
Emit(const char * str)1479   void Emit(const char* str) {
1480     out_->insert(out_->end(), str, str + strlen(str));
1481   }
Emit(const std::string & str)1482   void Emit(const std::string& str) {
1483     out_->insert(out_->end(), str.begin(), str.end());
1484   }
1485 
1486   const Platform* platform_;
1487   C* out_;
1488   Status* status_;
1489   std::stack<State> state_;
1490 };
1491 }  // namespace
1492 
NewJSONEncoder(const Platform * platform,std::vector<uint8_t> * out,Status * status)1493 std::unique_ptr<StreamingParserHandler> NewJSONEncoder(
1494     const Platform* platform,
1495     std::vector<uint8_t>* out,
1496     Status* status) {
1497   return std::unique_ptr<StreamingParserHandler>(
1498       new JSONEncoder<std::vector<uint8_t>>(platform, out, status));
1499 }
NewJSONEncoder(const Platform * platform,std::string * out,Status * status)1500 std::unique_ptr<StreamingParserHandler> NewJSONEncoder(const Platform* platform,
1501                                                        std::string* out,
1502                                                        Status* status) {
1503   return std::unique_ptr<StreamingParserHandler>(
1504       new JSONEncoder<std::string>(platform, out, status));
1505 }
1506 
1507 // =============================================================================
1508 // json::ParseJSON - for receiving streaming parser events for JSON.
1509 // =============================================================================
1510 
1511 namespace {
1512 const int kStackLimit = 300;
1513 
1514 enum Token {
1515   ObjectBegin,
1516   ObjectEnd,
1517   ArrayBegin,
1518   ArrayEnd,
1519   StringLiteral,
1520   Number,
1521   BoolTrue,
1522   BoolFalse,
1523   NullToken,
1524   ListSeparator,
1525   ObjectPairSeparator,
1526   InvalidToken,
1527   NoInput
1528 };
1529 
1530 const char* const kNullString = "null";
1531 const char* const kTrueString = "true";
1532 const char* const kFalseString = "false";
1533 
1534 template <typename Char>
1535 class JsonParser {
1536  public:
JsonParser(const Platform * platform,StreamingParserHandler * handler)1537   JsonParser(const Platform* platform, StreamingParserHandler* handler)
1538       : platform_(platform), handler_(handler) {}
1539 
Parse(const Char * start,size_t length)1540   void Parse(const Char* start, size_t length) {
1541     start_pos_ = start;
1542     const Char* end = start + length;
1543     const Char* tokenEnd = nullptr;
1544     ParseValue(start, end, &tokenEnd, 0);
1545     if (error_)
1546       return;
1547     if (tokenEnd != end) {
1548       HandleError(Error::JSON_PARSER_UNPROCESSED_INPUT_REMAINS, tokenEnd);
1549     }
1550   }
1551 
1552  private:
CharsToDouble(const uint16_t * chars,size_t length,double * result)1553   bool CharsToDouble(const uint16_t* chars, size_t length, double* result) {
1554     std::string buffer;
1555     buffer.reserve(length + 1);
1556     for (size_t ii = 0; ii < length; ++ii) {
1557       bool is_ascii = !(chars[ii] & ~0x7F);
1558       if (!is_ascii)
1559         return false;
1560       buffer.push_back(static_cast<char>(chars[ii]));
1561     }
1562     return platform_->StrToD(buffer.c_str(), result);
1563   }
1564 
CharsToDouble(const uint8_t * chars,size_t length,double * result)1565   bool CharsToDouble(const uint8_t* chars, size_t length, double* result) {
1566     std::string buffer(reinterpret_cast<const char*>(chars), length);
1567     return platform_->StrToD(buffer.c_str(), result);
1568   }
1569 
ParseConstToken(const Char * start,const Char * end,const Char ** token_end,const char * token)1570   static bool ParseConstToken(const Char* start,
1571                               const Char* end,
1572                               const Char** token_end,
1573                               const char* token) {
1574     // |token| is \0 terminated, it's one of the constants at top of the file.
1575     while (start < end && *token != '\0' && *start++ == *token++) {
1576     }
1577     if (*token != '\0')
1578       return false;
1579     *token_end = start;
1580     return true;
1581   }
1582 
ReadInt(const Char * start,const Char * end,const Char ** token_end,bool allow_leading_zeros)1583   static bool ReadInt(const Char* start,
1584                       const Char* end,
1585                       const Char** token_end,
1586                       bool allow_leading_zeros) {
1587     if (start == end)
1588       return false;
1589     bool has_leading_zero = '0' == *start;
1590     int length = 0;
1591     while (start < end && '0' <= *start && *start <= '9') {
1592       ++start;
1593       ++length;
1594     }
1595     if (!length)
1596       return false;
1597     if (!allow_leading_zeros && length > 1 && has_leading_zero)
1598       return false;
1599     *token_end = start;
1600     return true;
1601   }
1602 
ParseNumberToken(const Char * start,const Char * end,const Char ** token_end)1603   static bool ParseNumberToken(const Char* start,
1604                                const Char* end,
1605                                const Char** token_end) {
1606     // We just grab the number here. We validate the size in DecodeNumber.
1607     // According to RFC4627, a valid number is: [minus] int [frac] [exp]
1608     if (start == end)
1609       return false;
1610     Char c = *start;
1611     if ('-' == c)
1612       ++start;
1613 
1614     if (!ReadInt(start, end, &start, /*allow_leading_zeros=*/false))
1615       return false;
1616     if (start == end) {
1617       *token_end = start;
1618       return true;
1619     }
1620 
1621     // Optional fraction part
1622     c = *start;
1623     if ('.' == c) {
1624       ++start;
1625       if (!ReadInt(start, end, &start, /*allow_leading_zeros=*/true))
1626         return false;
1627       if (start == end) {
1628         *token_end = start;
1629         return true;
1630       }
1631       c = *start;
1632     }
1633 
1634     // Optional exponent part
1635     if ('e' == c || 'E' == c) {
1636       ++start;
1637       if (start == end)
1638         return false;
1639       c = *start;
1640       if ('-' == c || '+' == c) {
1641         ++start;
1642         if (start == end)
1643           return false;
1644       }
1645       if (!ReadInt(start, end, &start, /*allow_leading_zeros=*/true))
1646         return false;
1647     }
1648 
1649     *token_end = start;
1650     return true;
1651   }
1652 
ReadHexDigits(const Char * start,const Char * end,const Char ** token_end,int digits)1653   static bool ReadHexDigits(const Char* start,
1654                             const Char* end,
1655                             const Char** token_end,
1656                             int digits) {
1657     if (end - start < digits)
1658       return false;
1659     for (int i = 0; i < digits; ++i) {
1660       Char c = *start++;
1661       if (!(('0' <= c && c <= '9') || ('a' <= c && c <= 'f') ||
1662             ('A' <= c && c <= 'F')))
1663         return false;
1664     }
1665     *token_end = start;
1666     return true;
1667   }
1668 
ParseStringToken(const Char * start,const Char * end,const Char ** token_end)1669   static bool ParseStringToken(const Char* start,
1670                                const Char* end,
1671                                const Char** token_end) {
1672     while (start < end) {
1673       Char c = *start++;
1674       if ('\\' == c) {
1675         if (start == end)
1676           return false;
1677         c = *start++;
1678         // Make sure the escaped char is valid.
1679         switch (c) {
1680           case 'x':
1681             if (!ReadHexDigits(start, end, &start, 2))
1682               return false;
1683             break;
1684           case 'u':
1685             if (!ReadHexDigits(start, end, &start, 4))
1686               return false;
1687             break;
1688           case '\\':
1689           case '/':
1690           case 'b':
1691           case 'f':
1692           case 'n':
1693           case 'r':
1694           case 't':
1695           case 'v':
1696           case '"':
1697             break;
1698           default:
1699             return false;
1700         }
1701       } else if ('"' == c) {
1702         *token_end = start;
1703         return true;
1704       }
1705     }
1706     return false;
1707   }
1708 
SkipComment(const Char * start,const Char * end,const Char ** comment_end)1709   static bool SkipComment(const Char* start,
1710                           const Char* end,
1711                           const Char** comment_end) {
1712     if (start == end)
1713       return false;
1714 
1715     if (*start != '/' || start + 1 >= end)
1716       return false;
1717     ++start;
1718 
1719     if (*start == '/') {
1720       // Single line comment, read to newline.
1721       for (++start; start < end; ++start) {
1722         if (*start == '\n' || *start == '\r') {
1723           *comment_end = start + 1;
1724           return true;
1725         }
1726       }
1727       *comment_end = end;
1728       // Comment reaches end-of-input, which is fine.
1729       return true;
1730     }
1731 
1732     if (*start == '*') {
1733       Char previous = '\0';
1734       // Block comment, read until end marker.
1735       for (++start; start < end; previous = *start++) {
1736         if (previous == '*' && *start == '/') {
1737           *comment_end = start + 1;
1738           return true;
1739         }
1740       }
1741       // Block comment must close before end-of-input.
1742       return false;
1743     }
1744 
1745     return false;
1746   }
1747 
IsSpaceOrNewLine(Char c)1748   static bool IsSpaceOrNewLine(Char c) {
1749     // \v = vertial tab; \f = form feed page break.
1750     return c == ' ' || c == '\n' || c == '\v' || c == '\f' || c == '\r' ||
1751            c == '\t';
1752   }
1753 
SkipWhitespaceAndComments(const Char * start,const Char * end,const Char ** whitespace_end)1754   static void SkipWhitespaceAndComments(const Char* start,
1755                                         const Char* end,
1756                                         const Char** whitespace_end) {
1757     while (start < end) {
1758       if (IsSpaceOrNewLine(*start)) {
1759         ++start;
1760       } else if (*start == '/') {
1761         const Char* comment_end = nullptr;
1762         if (!SkipComment(start, end, &comment_end))
1763           break;
1764         start = comment_end;
1765       } else {
1766         break;
1767       }
1768     }
1769     *whitespace_end = start;
1770   }
1771 
ParseToken(const Char * start,const Char * end,const Char ** tokenStart,const Char ** token_end)1772   static Token ParseToken(const Char* start,
1773                           const Char* end,
1774                           const Char** tokenStart,
1775                           const Char** token_end) {
1776     SkipWhitespaceAndComments(start, end, tokenStart);
1777     start = *tokenStart;
1778 
1779     if (start == end)
1780       return NoInput;
1781 
1782     switch (*start) {
1783       case 'n':
1784         if (ParseConstToken(start, end, token_end, kNullString))
1785           return NullToken;
1786         break;
1787       case 't':
1788         if (ParseConstToken(start, end, token_end, kTrueString))
1789           return BoolTrue;
1790         break;
1791       case 'f':
1792         if (ParseConstToken(start, end, token_end, kFalseString))
1793           return BoolFalse;
1794         break;
1795       case '[':
1796         *token_end = start + 1;
1797         return ArrayBegin;
1798       case ']':
1799         *token_end = start + 1;
1800         return ArrayEnd;
1801       case ',':
1802         *token_end = start + 1;
1803         return ListSeparator;
1804       case '{':
1805         *token_end = start + 1;
1806         return ObjectBegin;
1807       case '}':
1808         *token_end = start + 1;
1809         return ObjectEnd;
1810       case ':':
1811         *token_end = start + 1;
1812         return ObjectPairSeparator;
1813       case '0':
1814       case '1':
1815       case '2':
1816       case '3':
1817       case '4':
1818       case '5':
1819       case '6':
1820       case '7':
1821       case '8':
1822       case '9':
1823       case '-':
1824         if (ParseNumberToken(start, end, token_end))
1825           return Number;
1826         break;
1827       case '"':
1828         if (ParseStringToken(start + 1, end, token_end))
1829           return StringLiteral;
1830         break;
1831     }
1832     return InvalidToken;
1833   }
1834 
HexToInt(Char c)1835   static int HexToInt(Char c) {
1836     if ('0' <= c && c <= '9')
1837       return c - '0';
1838     if ('A' <= c && c <= 'F')
1839       return c - 'A' + 10;
1840     if ('a' <= c && c <= 'f')
1841       return c - 'a' + 10;
1842     assert(false);  // Unreachable.
1843     return 0;
1844   }
1845 
DecodeString(const Char * start,const Char * end,std::vector<uint16_t> * output)1846   static bool DecodeString(const Char* start,
1847                            const Char* end,
1848                            std::vector<uint16_t>* output) {
1849     if (start == end)
1850       return true;
1851     if (start > end)
1852       return false;
1853     output->reserve(end - start);
1854     while (start < end) {
1855       uint16_t c = *start++;
1856       // If the |Char| we're dealing with is really a byte, then
1857       // we have utf8 here, and we need to check for multibyte characters
1858       // and transcode them to utf16 (either one or two utf16 chars).
1859       if (sizeof(Char) == sizeof(uint8_t) && c >= 0x7f) {
1860         // Inspect the leading byte to figure out how long the utf8
1861         // byte sequence is; while doing this initialize |codepoint|
1862         // with the first few bits.
1863         // See table in: https://en.wikipedia.org/wiki/UTF-8
1864         // byte one is 110x xxxx -> 2 byte utf8 sequence
1865         // byte one is 1110 xxxx -> 3 byte utf8 sequence
1866         // byte one is 1111 0xxx -> 4 byte utf8 sequence
1867         uint32_t codepoint;
1868         int num_bytes_left;
1869         if ((c & 0xe0) == 0xc0) {  // 2 byte utf8 sequence
1870           num_bytes_left = 1;
1871           codepoint = c & 0x1f;
1872         } else if ((c & 0xf0) == 0xe0) {  // 3 byte utf8 sequence
1873           num_bytes_left = 2;
1874           codepoint = c & 0x0f;
1875         } else if ((c & 0xf8) == 0xf0) {  // 4 byte utf8 sequence
1876           codepoint = c & 0x07;
1877           num_bytes_left = 3;
1878         } else {
1879           return false;  // invalid leading byte
1880         }
1881 
1882         // If we have enough bytes in our inpput, decode the remaining ones
1883         // belonging to this Unicode character into |codepoint|.
1884         if (start + num_bytes_left > end)
1885           return false;
1886         while (num_bytes_left > 0) {
1887           c = *start++;
1888           --num_bytes_left;
1889           // Check the next byte is a continuation byte, that is 10xx xxxx.
1890           if ((c & 0xc0) != 0x80)
1891             return false;
1892           codepoint = (codepoint << 6) | (c & 0x3f);
1893         }
1894 
1895         // Disallow overlong encodings for ascii characters, as these
1896         // would include " and other characters significant to JSON
1897         // string termination / control.
1898         if (codepoint < 0x7f)
1899           return false;
1900         // Invalid in UTF8, and can't be represented in UTF16 anyway.
1901         if (codepoint > 0x10ffff)
1902           return false;
1903 
1904         // So, now we transcode to UTF16,
1905         // using the math described at https://en.wikipedia.org/wiki/UTF-16,
1906         // for either one or two 16 bit characters.
1907         if (codepoint < 0xffff) {
1908           output->push_back(codepoint);
1909           continue;
1910         }
1911         codepoint -= 0x10000;
1912         output->push_back((codepoint >> 10) + 0xd800);    // high surrogate
1913         output->push_back((codepoint & 0x3ff) + 0xdc00);  // low surrogate
1914         continue;
1915       }
1916       if ('\\' != c) {
1917         output->push_back(c);
1918         continue;
1919       }
1920       if (start == end)
1921         return false;
1922       c = *start++;
1923 
1924       if (c == 'x') {
1925         // \x is not supported.
1926         return false;
1927       }
1928 
1929       switch (c) {
1930         case '"':
1931         case '/':
1932         case '\\':
1933           break;
1934         case 'b':
1935           c = '\b';
1936           break;
1937         case 'f':
1938           c = '\f';
1939           break;
1940         case 'n':
1941           c = '\n';
1942           break;
1943         case 'r':
1944           c = '\r';
1945           break;
1946         case 't':
1947           c = '\t';
1948           break;
1949         case 'v':
1950           c = '\v';
1951           break;
1952         case 'u':
1953           c = (HexToInt(*start) << 12) + (HexToInt(*(start + 1)) << 8) +
1954               (HexToInt(*(start + 2)) << 4) + HexToInt(*(start + 3));
1955           start += 4;
1956           break;
1957         default:
1958           return false;
1959       }
1960       output->push_back(c);
1961     }
1962     return true;
1963   }
1964 
ParseValue(const Char * start,const Char * end,const Char ** value_token_end,int depth)1965   void ParseValue(const Char* start,
1966                   const Char* end,
1967                   const Char** value_token_end,
1968                   int depth) {
1969     if (depth > kStackLimit) {
1970       HandleError(Error::JSON_PARSER_STACK_LIMIT_EXCEEDED, start);
1971       return;
1972     }
1973     const Char* token_start = nullptr;
1974     const Char* token_end = nullptr;
1975     Token token = ParseToken(start, end, &token_start, &token_end);
1976     switch (token) {
1977       case NoInput:
1978         HandleError(Error::JSON_PARSER_NO_INPUT, token_start);
1979         return;
1980       case InvalidToken:
1981         HandleError(Error::JSON_PARSER_INVALID_TOKEN, token_start);
1982         return;
1983       case NullToken:
1984         handler_->HandleNull();
1985         break;
1986       case BoolTrue:
1987         handler_->HandleBool(true);
1988         break;
1989       case BoolFalse:
1990         handler_->HandleBool(false);
1991         break;
1992       case Number: {
1993         double value;
1994         if (!CharsToDouble(token_start, token_end - token_start, &value)) {
1995           HandleError(Error::JSON_PARSER_INVALID_NUMBER, token_start);
1996           return;
1997         }
1998         if (value >= std::numeric_limits<int32_t>::min() &&
1999             value <= std::numeric_limits<int32_t>::max() &&
2000             static_cast<int32_t>(value) == value)
2001           handler_->HandleInt32(static_cast<int32_t>(value));
2002         else
2003           handler_->HandleDouble(value);
2004         break;
2005       }
2006       case StringLiteral: {
2007         std::vector<uint16_t> value;
2008         bool ok = DecodeString(token_start + 1, token_end - 1, &value);
2009         if (!ok) {
2010           HandleError(Error::JSON_PARSER_INVALID_STRING, token_start);
2011           return;
2012         }
2013         handler_->HandleString16(span<uint16_t>(value.data(), value.size()));
2014         break;
2015       }
2016       case ArrayBegin: {
2017         handler_->HandleArrayBegin();
2018         start = token_end;
2019         token = ParseToken(start, end, &token_start, &token_end);
2020         while (token != ArrayEnd) {
2021           ParseValue(start, end, &token_end, depth + 1);
2022           if (error_)
2023             return;
2024 
2025           // After a list value, we expect a comma or the end of the list.
2026           start = token_end;
2027           token = ParseToken(start, end, &token_start, &token_end);
2028           if (token == ListSeparator) {
2029             start = token_end;
2030             token = ParseToken(start, end, &token_start, &token_end);
2031             if (token == ArrayEnd) {
2032               HandleError(Error::JSON_PARSER_UNEXPECTED_ARRAY_END, token_start);
2033               return;
2034             }
2035           } else if (token != ArrayEnd) {
2036             // Unexpected value after list value. Bail out.
2037             HandleError(Error::JSON_PARSER_COMMA_OR_ARRAY_END_EXPECTED,
2038                         token_start);
2039             return;
2040           }
2041         }
2042         handler_->HandleArrayEnd();
2043         break;
2044       }
2045       case ObjectBegin: {
2046         handler_->HandleMapBegin();
2047         start = token_end;
2048         token = ParseToken(start, end, &token_start, &token_end);
2049         while (token != ObjectEnd) {
2050           if (token != StringLiteral) {
2051             HandleError(Error::JSON_PARSER_STRING_LITERAL_EXPECTED,
2052                         token_start);
2053             return;
2054           }
2055           std::vector<uint16_t> key;
2056           if (!DecodeString(token_start + 1, token_end - 1, &key)) {
2057             HandleError(Error::JSON_PARSER_INVALID_STRING, token_start);
2058             return;
2059           }
2060           handler_->HandleString16(span<uint16_t>(key.data(), key.size()));
2061           start = token_end;
2062 
2063           token = ParseToken(start, end, &token_start, &token_end);
2064           if (token != ObjectPairSeparator) {
2065             HandleError(Error::JSON_PARSER_COLON_EXPECTED, token_start);
2066             return;
2067           }
2068           start = token_end;
2069 
2070           ParseValue(start, end, &token_end, depth + 1);
2071           if (error_)
2072             return;
2073           start = token_end;
2074 
2075           // After a key/value pair, we expect a comma or the end of the
2076           // object.
2077           token = ParseToken(start, end, &token_start, &token_end);
2078           if (token == ListSeparator) {
2079             start = token_end;
2080             token = ParseToken(start, end, &token_start, &token_end);
2081             if (token == ObjectEnd) {
2082               HandleError(Error::JSON_PARSER_UNEXPECTED_MAP_END, token_start);
2083               return;
2084             }
2085           } else if (token != ObjectEnd) {
2086             // Unexpected value after last object value. Bail out.
2087             HandleError(Error::JSON_PARSER_COMMA_OR_MAP_END_EXPECTED,
2088                         token_start);
2089             return;
2090           }
2091         }
2092         handler_->HandleMapEnd();
2093         break;
2094       }
2095 
2096       default:
2097         // We got a token that's not a value.
2098         HandleError(Error::JSON_PARSER_VALUE_EXPECTED, token_start);
2099         return;
2100     }
2101 
2102     SkipWhitespaceAndComments(token_end, end, value_token_end);
2103   }
2104 
HandleError(Error error,const Char * pos)2105   void HandleError(Error error, const Char* pos) {
2106     assert(error != Error::OK);
2107     if (!error_) {
2108       handler_->HandleError(
2109           Status{error, static_cast<size_t>(pos - start_pos_)});
2110       error_ = true;
2111     }
2112   }
2113 
2114   const Char* start_pos_ = nullptr;
2115   bool error_ = false;
2116   const Platform* platform_;
2117   StreamingParserHandler* handler_;
2118 };
2119 }  // namespace
2120 
ParseJSON(const Platform & platform,span<uint8_t> chars,StreamingParserHandler * handler)2121 void ParseJSON(const Platform& platform,
2122                span<uint8_t> chars,
2123                StreamingParserHandler* handler) {
2124   JsonParser<uint8_t> parser(&platform, handler);
2125   parser.Parse(chars.data(), chars.size());
2126 }
2127 
ParseJSON(const Platform & platform,span<uint16_t> chars,StreamingParserHandler * handler)2128 void ParseJSON(const Platform& platform,
2129                span<uint16_t> chars,
2130                StreamingParserHandler* handler) {
2131   JsonParser<uint16_t> parser(&platform, handler);
2132   parser.Parse(chars.data(), chars.size());
2133 }
2134 
2135 // =============================================================================
2136 // json::ConvertCBORToJSON, json::ConvertJSONToCBOR - for transcoding
2137 // =============================================================================
2138 template <typename C>
ConvertCBORToJSONTmpl(const Platform & platform,span<uint8_t> cbor,C * json)2139 Status ConvertCBORToJSONTmpl(const Platform& platform,
2140                              span<uint8_t> cbor,
2141                              C* json) {
2142   Status status;
2143   std::unique_ptr<StreamingParserHandler> json_writer =
2144       NewJSONEncoder(&platform, json, &status);
2145   cbor::ParseCBOR(cbor, json_writer.get());
2146   return status;
2147 }
2148 
ConvertCBORToJSON(const Platform & platform,span<uint8_t> cbor,std::vector<uint8_t> * json)2149 Status ConvertCBORToJSON(const Platform& platform,
2150                          span<uint8_t> cbor,
2151                          std::vector<uint8_t>* json) {
2152   return ConvertCBORToJSONTmpl(platform, cbor, json);
2153 }
ConvertCBORToJSON(const Platform & platform,span<uint8_t> cbor,std::string * json)2154 Status ConvertCBORToJSON(const Platform& platform,
2155                          span<uint8_t> cbor,
2156                          std::string* json) {
2157   return ConvertCBORToJSONTmpl(platform, cbor, json);
2158 }
2159 
2160 template <typename T, typename C>
ConvertJSONToCBORTmpl(const Platform & platform,span<T> json,C * cbor)2161 Status ConvertJSONToCBORTmpl(const Platform& platform, span<T> json, C* cbor) {
2162   Status status;
2163   std::unique_ptr<StreamingParserHandler> encoder =
2164       cbor::NewCBOREncoder(cbor, &status);
2165   ParseJSON(platform, json, encoder.get());
2166   return status;
2167 }
ConvertJSONToCBOR(const Platform & platform,span<uint8_t> json,std::string * cbor)2168 Status ConvertJSONToCBOR(const Platform& platform,
2169                          span<uint8_t> json,
2170                          std::string* cbor) {
2171   return ConvertJSONToCBORTmpl(platform, json, cbor);
2172 }
ConvertJSONToCBOR(const Platform & platform,span<uint16_t> json,std::string * cbor)2173 Status ConvertJSONToCBOR(const Platform& platform,
2174                          span<uint16_t> json,
2175                          std::string* cbor) {
2176   return ConvertJSONToCBORTmpl(platform, json, cbor);
2177 }
ConvertJSONToCBOR(const Platform & platform,span<uint8_t> json,std::vector<uint8_t> * cbor)2178 Status ConvertJSONToCBOR(const Platform& platform,
2179                          span<uint8_t> json,
2180                          std::vector<uint8_t>* cbor) {
2181   return ConvertJSONToCBORTmpl(platform, json, cbor);
2182 }
ConvertJSONToCBOR(const Platform & platform,span<uint16_t> json,std::vector<uint8_t> * cbor)2183 Status ConvertJSONToCBOR(const Platform& platform,
2184                          span<uint16_t> json,
2185                          std::vector<uint8_t>* cbor) {
2186   return ConvertJSONToCBORTmpl(platform, json, cbor);
2187 }
2188 }  // namespace json
2189 }  // namespace v8_inspector_protocol_encoding
2190