• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1{# This template is generated by gen_cbor_templates.py. #}
2// Generated by lib/encoding_cpp.template.
3
4// Copyright 2019 The Chromium Authors. All rights reserved.
5// Use of this source code is governed by a BSD-style license that can be
6// found in the LICENSE file.
7
8
9#include <algorithm>
10#include <cassert>
11#include <cmath>
12#include <cstring>
13#include <limits>
14#include <stack>
15
16{% for namespace in config.protocol.namespace %}
17namespace {{namespace}} {
18{% endfor %}
19
20// ===== encoding/encoding.cc =====
21
22// =============================================================================
23// Status and Error codes
24// =============================================================================
25
26std::string Status::ToASCIIString() const {
27  switch (error) {
28    case Error::OK:
29      return "OK";
30    case Error::JSON_PARSER_UNPROCESSED_INPUT_REMAINS:
31      return ToASCIIString("JSON: unprocessed input remains");
32    case Error::JSON_PARSER_STACK_LIMIT_EXCEEDED:
33      return ToASCIIString("JSON: stack limit exceeded");
34    case Error::JSON_PARSER_NO_INPUT:
35      return ToASCIIString("JSON: no input");
36    case Error::JSON_PARSER_INVALID_TOKEN:
37      return ToASCIIString("JSON: invalid token");
38    case Error::JSON_PARSER_INVALID_NUMBER:
39      return ToASCIIString("JSON: invalid number");
40    case Error::JSON_PARSER_INVALID_STRING:
41      return ToASCIIString("JSON: invalid string");
42    case Error::JSON_PARSER_UNEXPECTED_ARRAY_END:
43      return ToASCIIString("JSON: unexpected array end");
44    case Error::JSON_PARSER_COMMA_OR_ARRAY_END_EXPECTED:
45      return ToASCIIString("JSON: comma or array end expected");
46    case Error::JSON_PARSER_STRING_LITERAL_EXPECTED:
47      return ToASCIIString("JSON: string literal expected");
48    case Error::JSON_PARSER_COLON_EXPECTED:
49      return ToASCIIString("JSON: colon expected");
50    case Error::JSON_PARSER_UNEXPECTED_MAP_END:
51      return ToASCIIString("JSON: unexpected map end");
52    case Error::JSON_PARSER_COMMA_OR_MAP_END_EXPECTED:
53      return ToASCIIString("JSON: comma or map end expected");
54    case Error::JSON_PARSER_VALUE_EXPECTED:
55      return ToASCIIString("JSON: value expected");
56
57    case Error::CBOR_INVALID_INT32:
58      return ToASCIIString("CBOR: invalid int32");
59    case Error::CBOR_INVALID_DOUBLE:
60      return ToASCIIString("CBOR: invalid double");
61    case Error::CBOR_INVALID_ENVELOPE:
62      return ToASCIIString("CBOR: invalid envelope");
63    case Error::CBOR_INVALID_STRING8:
64      return ToASCIIString("CBOR: invalid string8");
65    case Error::CBOR_INVALID_STRING16:
66      return ToASCIIString("CBOR: invalid string16");
67    case Error::CBOR_INVALID_BINARY:
68      return ToASCIIString("CBOR: invalid binary");
69    case Error::CBOR_UNSUPPORTED_VALUE:
70      return ToASCIIString("CBOR: unsupported value");
71    case Error::CBOR_NO_INPUT:
72      return ToASCIIString("CBOR: no input");
73    case Error::CBOR_INVALID_START_BYTE:
74      return ToASCIIString("CBOR: invalid start byte");
75    case Error::CBOR_UNEXPECTED_EOF_EXPECTED_VALUE:
76      return ToASCIIString("CBOR: unexpected eof expected value");
77    case Error::CBOR_UNEXPECTED_EOF_IN_ARRAY:
78      return ToASCIIString("CBOR: unexpected eof in array");
79    case Error::CBOR_UNEXPECTED_EOF_IN_MAP:
80      return ToASCIIString("CBOR: unexpected eof in map");
81    case Error::CBOR_INVALID_MAP_KEY:
82      return ToASCIIString("CBOR: invalid map key");
83    case Error::CBOR_STACK_LIMIT_EXCEEDED:
84      return ToASCIIString("CBOR: stack limit exceeded");
85    case Error::CBOR_TRAILING_JUNK:
86      return ToASCIIString("CBOR: trailing junk");
87    case Error::CBOR_MAP_START_EXPECTED:
88      return ToASCIIString("CBOR: map start expected");
89    case Error::CBOR_MAP_STOP_EXPECTED:
90      return ToASCIIString("CBOR: map stop expected");
91    case Error::CBOR_ENVELOPE_SIZE_LIMIT_EXCEEDED:
92      return ToASCIIString("CBOR: envelope size limit exceeded");
93  }
94  // Some compilers can't figure out that we can't get here.
95  return "INVALID ERROR CODE";
96}
97
98std::string Status::ToASCIIString(const char* msg) const {
99  return std::string(msg) + " at position " + std::to_string(pos);
100}
101
102namespace cbor {
103namespace {
104// Indicates the number of bits the "initial byte" needs to be shifted to the
105// right after applying |kMajorTypeMask| to produce the major type in the
106// lowermost bits.
107static constexpr uint8_t kMajorTypeBitShift = 5u;
108// Mask selecting the low-order 5 bits of the "initial byte", which is where
109// the additional information is encoded.
110static constexpr uint8_t kAdditionalInformationMask = 0x1f;
111// Mask selecting the high-order 3 bits of the "initial byte", which indicates
112// the major type of the encoded value.
113static constexpr uint8_t kMajorTypeMask = 0xe0;
114// Indicates the integer is in the following byte.
115static constexpr uint8_t kAdditionalInformation1Byte = 24u;
116// Indicates the integer is in the next 2 bytes.
117static constexpr uint8_t kAdditionalInformation2Bytes = 25u;
118// Indicates the integer is in the next 4 bytes.
119static constexpr uint8_t kAdditionalInformation4Bytes = 26u;
120// Indicates the integer is in the next 8 bytes.
121static constexpr uint8_t kAdditionalInformation8Bytes = 27u;
122
123// Encodes the initial byte, consisting of the |type| in the first 3 bits
124// followed by 5 bits of |additional_info|.
125constexpr uint8_t EncodeInitialByte(MajorType type, uint8_t additional_info) {
126  return (static_cast<uint8_t>(type) << kMajorTypeBitShift) |
127         (additional_info & kAdditionalInformationMask);
128}
129
130// TAG 24 indicates that what follows is a byte string which is
131// encoded in CBOR format. We use this as a wrapper for
132// maps and arrays, allowing us to skip them, because the
133// byte string carries its size (byte length).
134// https://tools.ietf.org/html/rfc7049#section-2.4.4.1
135static constexpr uint8_t kInitialByteForEnvelope =
136    EncodeInitialByte(MajorType::TAG, 24);
137// The initial byte for a byte string with at most 2^32 bytes
138// of payload. This is used for envelope encoding, even if
139// the byte string is shorter.
140static constexpr uint8_t kInitialByteFor32BitLengthByteString =
141    EncodeInitialByte(MajorType::BYTE_STRING, 26);
142
143// See RFC 7049 Section 2.2.1, indefinite length arrays / maps have additional
144// info = 31.
145static constexpr uint8_t kInitialByteIndefiniteLengthArray =
146    EncodeInitialByte(MajorType::ARRAY, 31);
147static constexpr uint8_t kInitialByteIndefiniteLengthMap =
148    EncodeInitialByte(MajorType::MAP, 31);
149// See RFC 7049 Section 2.3, Table 1; this is used for finishing indefinite
150// length maps / arrays.
151static constexpr uint8_t kStopByte =
152    EncodeInitialByte(MajorType::SIMPLE_VALUE, 31);
153
154// See RFC 7049 Section 2.3, Table 2.
155static constexpr uint8_t kEncodedTrue =
156    EncodeInitialByte(MajorType::SIMPLE_VALUE, 21);
157static constexpr uint8_t kEncodedFalse =
158    EncodeInitialByte(MajorType::SIMPLE_VALUE, 20);
159static constexpr uint8_t kEncodedNull =
160    EncodeInitialByte(MajorType::SIMPLE_VALUE, 22);
161static constexpr uint8_t kInitialByteForDouble =
162    EncodeInitialByte(MajorType::SIMPLE_VALUE, 27);
163
164// See RFC 7049 Table 3 and Section 2.4.4.2. This is used as a prefix for
165// arbitrary binary data encoded as BYTE_STRING.
166static constexpr uint8_t kExpectedConversionToBase64Tag =
167    EncodeInitialByte(MajorType::TAG, 22);
168
169// Writes the bytes for |v| to |out|, starting with the most significant byte.
170// See also: https://commandcenter.blogspot.com/2012/04/byte-order-fallacy.html
171template <typename T, class C>
172void WriteBytesMostSignificantByteFirst(T v, C* out) {
173  for (int shift_bytes = sizeof(T) - 1; shift_bytes >= 0; --shift_bytes)
174    out->push_back(0xff & (v >> (shift_bytes * 8)));
175}
176
177// Extracts sizeof(T) bytes from |in| to extract a value of type T
178// (e.g. uint64_t, uint32_t, ...), most significant byte first.
179// See also: https://commandcenter.blogspot.com/2012/04/byte-order-fallacy.html
180template <typename T>
181T ReadBytesMostSignificantByteFirst(span<uint8_t> in) {
182  assert(in.size() >= sizeof(T));
183  T result = 0;
184  for (size_t shift_bytes = 0; shift_bytes < sizeof(T); ++shift_bytes)
185    result |= T(in[sizeof(T) - 1 - shift_bytes]) << (shift_bytes * 8);
186  return result;
187}
188}  // namespace
189
190namespace internals {
191// Reads the start of a token with definitive size from |bytes|.
192// |type| is the major type as specified in RFC 7049 Section 2.1.
193// |value| is the payload (e.g. for MajorType::UNSIGNED) or is the size
194// (e.g. for BYTE_STRING).
195// If successful, returns the number of bytes read. Otherwise returns -1.
196// TODO(johannes): change return type to size_t and use 0 for error.
197int8_t ReadTokenStart(span<uint8_t> bytes, MajorType* type, uint64_t* value) {
198  if (bytes.empty())
199    return -1;
200  uint8_t initial_byte = bytes[0];
201  *type = MajorType((initial_byte & kMajorTypeMask) >> kMajorTypeBitShift);
202
203  uint8_t additional_information = initial_byte & kAdditionalInformationMask;
204  if (additional_information < 24) {
205    // Values 0-23 are encoded directly into the additional info of the
206    // initial byte.
207    *value = additional_information;
208    return 1;
209  }
210  if (additional_information == kAdditionalInformation1Byte) {
211    // Values 24-255 are encoded with one initial byte, followed by the value.
212    if (bytes.size() < 2)
213      return -1;
214    *value = ReadBytesMostSignificantByteFirst<uint8_t>(bytes.subspan(1));
215    return 2;
216  }
217  if (additional_information == kAdditionalInformation2Bytes) {
218    // Values 256-65535: 1 initial byte + 2 bytes payload.
219    if (bytes.size() < 1 + sizeof(uint16_t))
220      return -1;
221    *value = ReadBytesMostSignificantByteFirst<uint16_t>(bytes.subspan(1));
222    return 3;
223  }
224  if (additional_information == kAdditionalInformation4Bytes) {
225    // 32 bit uint: 1 initial byte + 4 bytes payload.
226    if (bytes.size() < 1 + sizeof(uint32_t))
227      return -1;
228    *value = ReadBytesMostSignificantByteFirst<uint32_t>(bytes.subspan(1));
229    return 5;
230  }
231  if (additional_information == kAdditionalInformation8Bytes) {
232    // 64 bit uint: 1 initial byte + 8 bytes payload.
233    if (bytes.size() < 1 + sizeof(uint64_t))
234      return -1;
235    *value = ReadBytesMostSignificantByteFirst<uint64_t>(bytes.subspan(1));
236    return 9;
237  }
238  return -1;
239}
240
241// Writes the start of a token with |type|. The |value| may indicate the size,
242// or it may be the payload if the value is an unsigned integer.
243template <typename C>
244void WriteTokenStartTmpl(MajorType type, uint64_t value, C* encoded) {
245  if (value < 24) {
246    // Values 0-23 are encoded directly into the additional info of the
247    // initial byte.
248    encoded->push_back(EncodeInitialByte(type, /*additional_info=*/value));
249    return;
250  }
251  if (value <= std::numeric_limits<uint8_t>::max()) {
252    // Values 24-255 are encoded with one initial byte, followed by the value.
253    encoded->push_back(EncodeInitialByte(type, kAdditionalInformation1Byte));
254    encoded->push_back(value);
255    return;
256  }
257  if (value <= std::numeric_limits<uint16_t>::max()) {
258    // Values 256-65535: 1 initial byte + 2 bytes payload.
259    encoded->push_back(EncodeInitialByte(type, kAdditionalInformation2Bytes));
260    WriteBytesMostSignificantByteFirst<uint16_t>(value, encoded);
261    return;
262  }
263  if (value <= std::numeric_limits<uint32_t>::max()) {
264    // 32 bit uint: 1 initial byte + 4 bytes payload.
265    encoded->push_back(EncodeInitialByte(type, kAdditionalInformation4Bytes));
266    WriteBytesMostSignificantByteFirst<uint32_t>(static_cast<uint32_t>(value),
267                                                 encoded);
268    return;
269  }
270  // 64 bit uint: 1 initial byte + 8 bytes payload.
271  encoded->push_back(EncodeInitialByte(type, kAdditionalInformation8Bytes));
272  WriteBytesMostSignificantByteFirst<uint64_t>(value, encoded);
273}
274void WriteTokenStart(MajorType type,
275                     uint64_t value,
276                     std::vector<uint8_t>* encoded) {
277  WriteTokenStartTmpl(type, value, encoded);
278}
279void WriteTokenStart(MajorType type, uint64_t value, std::string* encoded) {
280  WriteTokenStartTmpl(type, value, encoded);
281}
282}  // namespace internals
283
284// =============================================================================
285// Detecting CBOR content
286// =============================================================================
287
288uint8_t InitialByteForEnvelope() {
289  return kInitialByteForEnvelope;
290}
291uint8_t InitialByteFor32BitLengthByteString() {
292  return kInitialByteFor32BitLengthByteString;
293}
294bool IsCBORMessage(span<uint8_t> msg) {
295  return msg.size() >= 6 && msg[0] == InitialByteForEnvelope() &&
296         msg[1] == InitialByteFor32BitLengthByteString();
297}
298
299// =============================================================================
300// Encoding invidiual CBOR items
301// =============================================================================
302
303uint8_t EncodeTrue() {
304  return kEncodedTrue;
305}
306uint8_t EncodeFalse() {
307  return kEncodedFalse;
308}
309uint8_t EncodeNull() {
310  return kEncodedNull;
311}
312
313uint8_t EncodeIndefiniteLengthArrayStart() {
314  return kInitialByteIndefiniteLengthArray;
315}
316
317uint8_t EncodeIndefiniteLengthMapStart() {
318  return kInitialByteIndefiniteLengthMap;
319}
320
321uint8_t EncodeStop() {
322  return kStopByte;
323}
324
325template <typename C>
326void EncodeInt32Tmpl(int32_t value, C* out) {
327  if (value >= 0) {
328    internals::WriteTokenStart(MajorType::UNSIGNED, value, out);
329  } else {
330    uint64_t representation = static_cast<uint64_t>(-(value + 1));
331    internals::WriteTokenStart(MajorType::NEGATIVE, representation, out);
332  }
333}
334void EncodeInt32(int32_t value, std::vector<uint8_t>* out) {
335  EncodeInt32Tmpl(value, out);
336}
337void EncodeInt32(int32_t value, std::string* out) {
338  EncodeInt32Tmpl(value, out);
339}
340
341template <typename C>
342void EncodeString16Tmpl(span<uint16_t> in, C* out) {
343  uint64_t byte_length = static_cast<uint64_t>(in.size_bytes());
344  internals::WriteTokenStart(MajorType::BYTE_STRING, byte_length, out);
345  // When emitting UTF16 characters, we always write the least significant byte
346  // first; this is because it's the native representation for X86.
347  // TODO(johannes): Implement a more efficient thing here later, e.g.
348  // casting *iff* the machine has this byte order.
349  // The wire format for UTF16 chars will probably remain the same
350  // (least significant byte first) since this way we can have
351  // golden files, unittests, etc. that port easily and universally.
352  // See also:
353  // https://commandcenter.blogspot.com/2012/04/byte-order-fallacy.html
354  for (const uint16_t two_bytes : in) {
355    out->push_back(two_bytes);
356    out->push_back(two_bytes >> 8);
357  }
358}
359void EncodeString16(span<uint16_t> in, std::vector<uint8_t>* out) {
360  EncodeString16Tmpl(in, out);
361}
362void EncodeString16(span<uint16_t> in, std::string* out) {
363  EncodeString16Tmpl(in, out);
364}
365
366template <typename C>
367void EncodeString8Tmpl(span<uint8_t> in, C* out) {
368  internals::WriteTokenStart(MajorType::STRING,
369                             static_cast<uint64_t>(in.size_bytes()), out);
370  out->insert(out->end(), in.begin(), in.end());
371}
372void EncodeString8(span<uint8_t> in, std::vector<uint8_t>* out) {
373  EncodeString8Tmpl(in, out);
374}
375void EncodeString8(span<uint8_t> in, std::string* out) {
376  EncodeString8Tmpl(in, out);
377}
378
379template <typename C>
380void EncodeFromLatin1Tmpl(span<uint8_t> latin1, C* out) {
381  for (size_t ii = 0; ii < latin1.size(); ++ii) {
382    if (latin1[ii] <= 127)
383      continue;
384    // If there's at least one non-ASCII char, convert to UTF8.
385    std::vector<uint8_t> utf8(latin1.begin(), latin1.begin() + ii);
386    for (; ii < latin1.size(); ++ii) {
387      if (latin1[ii] <= 127) {
388        utf8.push_back(latin1[ii]);
389      } else {
390        // 0xC0 means it's a UTF8 sequence with 2 bytes.
391        utf8.push_back((latin1[ii] >> 6) | 0xc0);
392        utf8.push_back((latin1[ii] | 0x80) & 0xbf);
393      }
394    }
395    EncodeString8(SpanFrom(utf8), out);
396    return;
397  }
398  EncodeString8(latin1, out);
399}
400void EncodeFromLatin1(span<uint8_t> latin1, std::vector<uint8_t>* out) {
401  EncodeFromLatin1Tmpl(latin1, out);
402}
403void EncodeFromLatin1(span<uint8_t> latin1, std::string* out) {
404  EncodeFromLatin1Tmpl(latin1, out);
405}
406
407template <typename C>
408void EncodeFromUTF16Tmpl(span<uint16_t> utf16, C* out) {
409  // If there's at least one non-ASCII char, encode as STRING16 (UTF16).
410  for (uint16_t ch : utf16) {
411    if (ch <= 127)
412      continue;
413    EncodeString16(utf16, out);
414    return;
415  }
416  // It's all US-ASCII, strip out every second byte and encode as UTF8.
417  internals::WriteTokenStart(MajorType::STRING,
418                             static_cast<uint64_t>(utf16.size()), out);
419  out->insert(out->end(), utf16.begin(), utf16.end());
420}
421void EncodeFromUTF16(span<uint16_t> utf16, std::vector<uint8_t>* out) {
422  EncodeFromUTF16Tmpl(utf16, out);
423}
424void EncodeFromUTF16(span<uint16_t> utf16, std::string* out) {
425  EncodeFromUTF16Tmpl(utf16, out);
426}
427
428template <typename C>
429void EncodeBinaryTmpl(span<uint8_t> in, C* out) {
430  out->push_back(kExpectedConversionToBase64Tag);
431  uint64_t byte_length = static_cast<uint64_t>(in.size_bytes());
432  internals::WriteTokenStart(MajorType::BYTE_STRING, byte_length, out);
433  out->insert(out->end(), in.begin(), in.end());
434}
435void EncodeBinary(span<uint8_t> in, std::vector<uint8_t>* out) {
436  EncodeBinaryTmpl(in, out);
437}
438void EncodeBinary(span<uint8_t> in, std::string* out) {
439  EncodeBinaryTmpl(in, out);
440}
441
442// A double is encoded with a specific initial byte
443// (kInitialByteForDouble) plus the 64 bits of payload for its value.
444constexpr size_t kEncodedDoubleSize = 1 + sizeof(uint64_t);
445
446// An envelope is encoded with a specific initial byte
447// (kInitialByteForEnvelope), plus the start byte for a BYTE_STRING with a 32
448// bit wide length, plus a 32 bit length for that string.
449constexpr size_t kEncodedEnvelopeHeaderSize = 1 + 1 + sizeof(uint32_t);
450
451template <typename C>
452void EncodeDoubleTmpl(double value, C* out) {
453  // The additional_info=27 indicates 64 bits for the double follow.
454  // See RFC 7049 Section 2.3, Table 1.
455  out->push_back(kInitialByteForDouble);
456  union {
457    double from_double;
458    uint64_t to_uint64;
459  } reinterpret;
460  reinterpret.from_double = value;
461  WriteBytesMostSignificantByteFirst<uint64_t>(reinterpret.to_uint64, out);
462}
463void EncodeDouble(double value, std::vector<uint8_t>* out) {
464  EncodeDoubleTmpl(value, out);
465}
466void EncodeDouble(double value, std::string* out) {
467  EncodeDoubleTmpl(value, out);
468}
469
470// =============================================================================
471// cbor::EnvelopeEncoder - for wrapping submessages
472// =============================================================================
473
474template <typename C>
475void EncodeStartTmpl(C* out, size_t* byte_size_pos) {
476  assert(*byte_size_pos == 0);
477  out->push_back(kInitialByteForEnvelope);
478  out->push_back(kInitialByteFor32BitLengthByteString);
479  *byte_size_pos = out->size();
480  out->resize(out->size() + sizeof(uint32_t));
481}
482
483void EnvelopeEncoder::EncodeStart(std::vector<uint8_t>* out) {
484  EncodeStartTmpl<std::vector<uint8_t>>(out, &byte_size_pos_);
485}
486
487void EnvelopeEncoder::EncodeStart(std::string* out) {
488  EncodeStartTmpl<std::string>(out, &byte_size_pos_);
489}
490
491template <typename C>
492bool EncodeStopTmpl(C* out, size_t* byte_size_pos) {
493  assert(*byte_size_pos != 0);
494  // The byte size is the size of the payload, that is, all the
495  // bytes that were written past the byte size position itself.
496  uint64_t byte_size = out->size() - (*byte_size_pos + sizeof(uint32_t));
497  // We store exactly 4 bytes, so at most INT32MAX, with most significant
498  // byte first.
499  if (byte_size > std::numeric_limits<uint32_t>::max())
500    return false;
501  for (int shift_bytes = sizeof(uint32_t) - 1; shift_bytes >= 0;
502       --shift_bytes) {
503    (*out)[(*byte_size_pos)++] = 0xff & (byte_size >> (shift_bytes * 8));
504  }
505  return true;
506}
507
508bool EnvelopeEncoder::EncodeStop(std::vector<uint8_t>* out) {
509  return EncodeStopTmpl(out, &byte_size_pos_);
510}
511
512bool EnvelopeEncoder::EncodeStop(std::string* out) {
513  return EncodeStopTmpl(out, &byte_size_pos_);
514}
515
516// =============================================================================
517// cbor::NewCBOREncoder - for encoding from a streaming parser
518// =============================================================================
519
520namespace {
521template <typename C>
522class CBOREncoder : public StreamingParserHandler {
523 public:
524  CBOREncoder(C* out, Status* status) : out_(out), status_(status) {
525    *status_ = Status();
526  }
527
528  void HandleMapBegin() override {
529    if (!status_->ok())
530      return;
531    envelopes_.emplace_back();
532    envelopes_.back().EncodeStart(out_);
533    out_->push_back(kInitialByteIndefiniteLengthMap);
534  }
535
536  void HandleMapEnd() override {
537    if (!status_->ok())
538      return;
539    out_->push_back(kStopByte);
540    assert(!envelopes_.empty());
541    if (!envelopes_.back().EncodeStop(out_)) {
542      HandleError(
543          Status(Error::CBOR_ENVELOPE_SIZE_LIMIT_EXCEEDED, out_->size()));
544      return;
545    }
546    envelopes_.pop_back();
547  }
548
549  void HandleArrayBegin() override {
550    if (!status_->ok())
551      return;
552    envelopes_.emplace_back();
553    envelopes_.back().EncodeStart(out_);
554    out_->push_back(kInitialByteIndefiniteLengthArray);
555  }
556
557  void HandleArrayEnd() override {
558    if (!status_->ok())
559      return;
560    out_->push_back(kStopByte);
561    assert(!envelopes_.empty());
562    if (!envelopes_.back().EncodeStop(out_)) {
563      HandleError(
564          Status(Error::CBOR_ENVELOPE_SIZE_LIMIT_EXCEEDED, out_->size()));
565      return;
566    }
567    envelopes_.pop_back();
568  }
569
570  void HandleString8(span<uint8_t> chars) override {
571    if (!status_->ok())
572      return;
573    EncodeString8(chars, out_);
574  }
575
576  void HandleString16(span<uint16_t> chars) override {
577    if (!status_->ok())
578      return;
579    EncodeFromUTF16(chars, out_);
580  }
581
582  void HandleBinary(span<uint8_t> bytes) override {
583    if (!status_->ok())
584      return;
585    EncodeBinary(bytes, out_);
586  }
587
588  void HandleDouble(double value) override {
589    if (!status_->ok())
590      return;
591    EncodeDouble(value, out_);
592  }
593
594  void HandleInt32(int32_t value) override {
595    if (!status_->ok())
596      return;
597    EncodeInt32(value, out_);
598  }
599
600  void HandleBool(bool value) override {
601    if (!status_->ok())
602      return;
603    // See RFC 7049 Section 2.3, Table 2.
604    out_->push_back(value ? kEncodedTrue : kEncodedFalse);
605  }
606
607  void HandleNull() override {
608    if (!status_->ok())
609      return;
610    // See RFC 7049 Section 2.3, Table 2.
611    out_->push_back(kEncodedNull);
612  }
613
614  void HandleError(Status error) override {
615    if (!status_->ok())
616      return;
617    *status_ = error;
618    out_->clear();
619  }
620
621 private:
622  C* out_;
623  std::vector<EnvelopeEncoder> envelopes_;
624  Status* status_;
625};
626}  // namespace
627
628std::unique_ptr<StreamingParserHandler> NewCBOREncoder(
629    std::vector<uint8_t>* out,
630    Status* status) {
631  return std::unique_ptr<StreamingParserHandler>(
632      new CBOREncoder<std::vector<uint8_t>>(out, status));
633}
634std::unique_ptr<StreamingParserHandler> NewCBOREncoder(std::string* out,
635                                                       Status* status) {
636  return std::unique_ptr<StreamingParserHandler>(
637      new CBOREncoder<std::string>(out, status));
638}
639
640// =============================================================================
641// cbor::CBORTokenizer - for parsing individual CBOR items
642// =============================================================================
643
644CBORTokenizer::CBORTokenizer(span<uint8_t> bytes) : bytes_(bytes) {
645  ReadNextToken(/*enter_envelope=*/false);
646}
647CBORTokenizer::~CBORTokenizer() {}
648
649CBORTokenTag CBORTokenizer::TokenTag() const {
650  return token_tag_;
651}
652
653void CBORTokenizer::Next() {
654  if (token_tag_ == CBORTokenTag::ERROR_VALUE ||
655      token_tag_ == CBORTokenTag::DONE)
656    return;
657  ReadNextToken(/*enter_envelope=*/false);
658}
659
660void CBORTokenizer::EnterEnvelope() {
661  assert(token_tag_ == CBORTokenTag::ENVELOPE);
662  ReadNextToken(/*enter_envelope=*/true);
663}
664
665Status CBORTokenizer::Status() const {
666  return status_;
667}
668
669// The following accessor functions ::GetInt32, ::GetDouble,
670// ::GetString8, ::GetString16WireRep, ::GetBinary, ::GetEnvelopeContents
671// assume that a particular token was recognized in ::ReadNextToken.
672// That's where all the error checking is done. By design,
673// the accessors (assuming the token was recognized) never produce
674// an error.
675
676int32_t CBORTokenizer::GetInt32() const {
677  assert(token_tag_ == CBORTokenTag::INT32);
678  // The range checks happen in ::ReadNextToken().
679  return static_cast<int32_t>(
680      token_start_type_ == MajorType::UNSIGNED
681          ? token_start_internal_value_
682          : -static_cast<int64_t>(token_start_internal_value_) - 1);
683}
684
685double CBORTokenizer::GetDouble() const {
686  assert(token_tag_ == CBORTokenTag::DOUBLE);
687  union {
688    uint64_t from_uint64;
689    double to_double;
690  } reinterpret;
691  reinterpret.from_uint64 = ReadBytesMostSignificantByteFirst<uint64_t>(
692      bytes_.subspan(status_.pos + 1));
693  return reinterpret.to_double;
694}
695
696span<uint8_t> CBORTokenizer::GetString8() const {
697  assert(token_tag_ == CBORTokenTag::STRING8);
698  auto length = static_cast<size_t>(token_start_internal_value_);
699  return bytes_.subspan(status_.pos + (token_byte_length_ - length), length);
700}
701
702span<uint8_t> CBORTokenizer::GetString16WireRep() const {
703  assert(token_tag_ == CBORTokenTag::STRING16);
704  auto length = static_cast<size_t>(token_start_internal_value_);
705  return bytes_.subspan(status_.pos + (token_byte_length_ - length), length);
706}
707
708span<uint8_t> CBORTokenizer::GetBinary() const {
709  assert(token_tag_ == CBORTokenTag::BINARY);
710  auto length = static_cast<size_t>(token_start_internal_value_);
711  return bytes_.subspan(status_.pos + (token_byte_length_ - length), length);
712}
713
714span<uint8_t> CBORTokenizer::GetEnvelopeContents() const {
715  assert(token_tag_ == CBORTokenTag::ENVELOPE);
716  auto length = static_cast<size_t>(token_start_internal_value_);
717  return bytes_.subspan(status_.pos + kEncodedEnvelopeHeaderSize, length);
718}
719
720// All error checking happens in ::ReadNextToken, so that the accessors
721// can avoid having to carry an error return value.
722//
723// With respect to checking the encoded lengths of strings, arrays, etc:
724// On the wire, CBOR uses 1,2,4, and 8 byte unsigned integers, so
725// we initially read them as uint64_t, usually into token_start_internal_value_.
726//
727// However, since these containers have a representation on the machine,
728// we need to do corresponding size computations on the input byte array,
729// output span (e.g. the payload for a string), etc., and size_t is
730// machine specific (in practice either 32 bit or 64 bit).
731//
732// Further, we must avoid overflowing size_t. Therefore, we use this
733// kMaxValidLength constant to:
734// - Reject values that are larger than the architecture specific
735//   max size_t (differs between 32 bit and 64 bit arch).
736// - Reserve at least one bit so that we can check against overflows
737//   when adding lengths (array / string length / etc.); we do this by
738//   ensuring that the inputs to an addition are <= kMaxValidLength,
739//   and then checking whether the sum went past it.
740//
741// See also
742// https://chromium.googlesource.com/chromium/src/+/master/docs/security/integer-semantics.md
743static const uint64_t kMaxValidLength =
744    std::min<uint64_t>(std::numeric_limits<uint64_t>::max() >> 2,
745                       std::numeric_limits<size_t>::max());
746
747void CBORTokenizer::ReadNextToken(bool enter_envelope) {
748  if (enter_envelope) {
749    status_.pos += kEncodedEnvelopeHeaderSize;
750  } else {
751    status_.pos =
752        status_.pos == Status::npos() ? 0 : status_.pos + token_byte_length_;
753  }
754  status_.error = Error::OK;
755  if (status_.pos >= bytes_.size()) {
756    token_tag_ = CBORTokenTag::DONE;
757    return;
758  }
759  const size_t remaining_bytes = bytes_.size() - status_.pos;
760  switch (bytes_[status_.pos]) {
761    case kStopByte:
762      SetToken(CBORTokenTag::STOP, 1);
763      return;
764    case kInitialByteIndefiniteLengthMap:
765      SetToken(CBORTokenTag::MAP_START, 1);
766      return;
767    case kInitialByteIndefiniteLengthArray:
768      SetToken(CBORTokenTag::ARRAY_START, 1);
769      return;
770    case kEncodedTrue:
771      SetToken(CBORTokenTag::TRUE_VALUE, 1);
772      return;
773    case kEncodedFalse:
774      SetToken(CBORTokenTag::FALSE_VALUE, 1);
775      return;
776    case kEncodedNull:
777      SetToken(CBORTokenTag::NULL_VALUE, 1);
778      return;
779    case kExpectedConversionToBase64Tag: {  // BINARY
780      const int8_t bytes_read = internals::ReadTokenStart(
781          bytes_.subspan(status_.pos + 1), &token_start_type_,
782          &token_start_internal_value_);
783      if (bytes_read < 0 || token_start_type_ != MajorType::BYTE_STRING ||
784          token_start_internal_value_ > kMaxValidLength) {
785        SetError(Error::CBOR_INVALID_BINARY);
786        return;
787      }
788      const uint64_t token_byte_length = token_start_internal_value_ +
789                                         /* tag before token start: */ 1 +
790                                         /* token start: */ bytes_read;
791      if (token_byte_length > remaining_bytes) {
792        SetError(Error::CBOR_INVALID_BINARY);
793        return;
794      }
795      SetToken(CBORTokenTag::BINARY, static_cast<size_t>(token_byte_length));
796      return;
797    }
798    case kInitialByteForDouble: {  // DOUBLE
799      if (kEncodedDoubleSize > remaining_bytes) {
800        SetError(Error::CBOR_INVALID_DOUBLE);
801        return;
802      }
803      SetToken(CBORTokenTag::DOUBLE, kEncodedDoubleSize);
804      return;
805    }
806    case kInitialByteForEnvelope: {  // ENVELOPE
807      if (kEncodedEnvelopeHeaderSize > remaining_bytes) {
808        SetError(Error::CBOR_INVALID_ENVELOPE);
809        return;
810      }
811      // The envelope must be a byte string with 32 bit length.
812      if (bytes_[status_.pos + 1] != kInitialByteFor32BitLengthByteString) {
813        SetError(Error::CBOR_INVALID_ENVELOPE);
814        return;
815      }
816      // Read the length of the byte string.
817      token_start_internal_value_ = ReadBytesMostSignificantByteFirst<uint32_t>(
818          bytes_.subspan(status_.pos + 2));
819      if (token_start_internal_value_ > kMaxValidLength) {
820        SetError(Error::CBOR_INVALID_ENVELOPE);
821        return;
822      }
823      uint64_t token_byte_length =
824          token_start_internal_value_ + kEncodedEnvelopeHeaderSize;
825      if (token_byte_length > remaining_bytes) {
826        SetError(Error::CBOR_INVALID_ENVELOPE);
827        return;
828      }
829      SetToken(CBORTokenTag::ENVELOPE, static_cast<size_t>(token_byte_length));
830      return;
831    }
832    default: {
833      const int8_t token_start_length = internals::ReadTokenStart(
834          bytes_.subspan(status_.pos), &token_start_type_,
835          &token_start_internal_value_);
836      const bool success = token_start_length >= 0;
837      switch (token_start_type_) {
838        case MajorType::UNSIGNED:  // INT32.
839          // INT32 is a signed int32 (int32 makes sense for the
840          // inspector_protocol, it's not a CBOR limitation), so we check
841          // against the signed max, so that the allowable values are
842          // 0, 1, 2, ... 2^31 - 1.
843          if (!success || std::numeric_limits<int32_t>::max() <
844                              token_start_internal_value_) {
845            SetError(Error::CBOR_INVALID_INT32);
846            return;
847          }
848          SetToken(CBORTokenTag::INT32, token_start_length);
849          return;
850        case MajorType::NEGATIVE: {  // INT32.
851          // INT32 is a signed int32 (int32 makes sense for the
852          // inspector_protocol, it's not a CBOR limitation); in CBOR,
853          // the negative values for INT32 are represented as NEGATIVE,
854          // that is, -1 INT32 is represented as 1 << 5 | 0 (major type 1,
855          // additional info value 0). So here, we compute the INT32 value
856          // and then check it against the INT32 min.
857          int64_t actual_value =
858              -static_cast<int64_t>(token_start_internal_value_) - 1;
859          if (!success || actual_value < std::numeric_limits<int32_t>::min()) {
860            SetError(Error::CBOR_INVALID_INT32);
861            return;
862          }
863          SetToken(CBORTokenTag::INT32, token_start_length);
864          return;
865        }
866        case MajorType::STRING: {  // STRING8.
867          if (!success || token_start_internal_value_ > kMaxValidLength) {
868            SetError(Error::CBOR_INVALID_STRING8);
869            return;
870          }
871          uint64_t token_byte_length =
872              token_start_internal_value_ + token_start_length;
873          if (token_byte_length > remaining_bytes) {
874            SetError(Error::CBOR_INVALID_STRING8);
875            return;
876          }
877          SetToken(CBORTokenTag::STRING8,
878                   static_cast<size_t>(token_byte_length));
879          return;
880        }
881        case MajorType::BYTE_STRING: {  // STRING16.
882          // Length must be divisible by 2 since UTF16 is 2 bytes per
883          // character, hence the &1 check.
884          if (!success || token_start_internal_value_ > kMaxValidLength ||
885              token_start_internal_value_ & 1) {
886            SetError(Error::CBOR_INVALID_STRING16);
887            return;
888          }
889          uint64_t token_byte_length =
890              token_start_internal_value_ + token_start_length;
891          if (token_byte_length > remaining_bytes) {
892            SetError(Error::CBOR_INVALID_STRING16);
893            return;
894          }
895          SetToken(CBORTokenTag::STRING16,
896                   static_cast<size_t>(token_byte_length));
897          return;
898        }
899        case MajorType::ARRAY:
900        case MajorType::MAP:
901        case MajorType::TAG:
902        case MajorType::SIMPLE_VALUE:
903          SetError(Error::CBOR_UNSUPPORTED_VALUE);
904          return;
905      }
906    }
907  }
908}
909
910void CBORTokenizer::SetToken(CBORTokenTag token_tag, size_t token_byte_length) {
911  token_tag_ = token_tag;
912  token_byte_length_ = token_byte_length;
913}
914
915void CBORTokenizer::SetError(Error error) {
916  token_tag_ = CBORTokenTag::ERROR_VALUE;
917  status_.error = error;
918}
919
920// =============================================================================
921// cbor::ParseCBOR - for receiving streaming parser events for CBOR messages
922// =============================================================================
923
924namespace {
925// When parsing CBOR, we limit recursion depth for objects and arrays
926// to this constant.
927static constexpr int kStackLimit = 300;
928
929// Below are three parsing routines for CBOR, which cover enough
930// to roundtrip JSON messages.
931bool ParseMap(int32_t stack_depth,
932              CBORTokenizer* tokenizer,
933              StreamingParserHandler* out);
934bool ParseArray(int32_t stack_depth,
935                CBORTokenizer* tokenizer,
936                StreamingParserHandler* out);
937bool ParseValue(int32_t stack_depth,
938                CBORTokenizer* tokenizer,
939                StreamingParserHandler* out);
940
941void ParseUTF16String(CBORTokenizer* tokenizer, StreamingParserHandler* out) {
942  std::vector<uint16_t> value;
943  span<uint8_t> rep = tokenizer->GetString16WireRep();
944  for (size_t ii = 0; ii < rep.size(); ii += 2)
945    value.push_back((rep[ii + 1] << 8) | rep[ii]);
946  out->HandleString16(span<uint16_t>(value.data(), value.size()));
947  tokenizer->Next();
948}
949
950bool ParseUTF8String(CBORTokenizer* tokenizer, StreamingParserHandler* out) {
951  assert(tokenizer->TokenTag() == CBORTokenTag::STRING8);
952  out->HandleString8(tokenizer->GetString8());
953  tokenizer->Next();
954  return true;
955}
956
957bool ParseValue(int32_t stack_depth,
958                CBORTokenizer* tokenizer,
959                StreamingParserHandler* out) {
960  if (stack_depth > kStackLimit) {
961    out->HandleError(
962        Status{Error::CBOR_STACK_LIMIT_EXCEEDED, tokenizer->Status().pos});
963    return false;
964  }
965  // Skip past the envelope to get to what's inside.
966  if (tokenizer->TokenTag() == CBORTokenTag::ENVELOPE)
967    tokenizer->EnterEnvelope();
968  switch (tokenizer->TokenTag()) {
969    case CBORTokenTag::ERROR_VALUE:
970      out->HandleError(tokenizer->Status());
971      return false;
972    case CBORTokenTag::DONE:
973      out->HandleError(Status{Error::CBOR_UNEXPECTED_EOF_EXPECTED_VALUE,
974                              tokenizer->Status().pos});
975      return false;
976    case CBORTokenTag::TRUE_VALUE:
977      out->HandleBool(true);
978      tokenizer->Next();
979      return true;
980    case CBORTokenTag::FALSE_VALUE:
981      out->HandleBool(false);
982      tokenizer->Next();
983      return true;
984    case CBORTokenTag::NULL_VALUE:
985      out->HandleNull();
986      tokenizer->Next();
987      return true;
988    case CBORTokenTag::INT32:
989      out->HandleInt32(tokenizer->GetInt32());
990      tokenizer->Next();
991      return true;
992    case CBORTokenTag::DOUBLE:
993      out->HandleDouble(tokenizer->GetDouble());
994      tokenizer->Next();
995      return true;
996    case CBORTokenTag::STRING8:
997      return ParseUTF8String(tokenizer, out);
998    case CBORTokenTag::STRING16:
999      ParseUTF16String(tokenizer, out);
1000      return true;
1001    case CBORTokenTag::BINARY: {
1002      out->HandleBinary(tokenizer->GetBinary());
1003      tokenizer->Next();
1004      return true;
1005    }
1006    case CBORTokenTag::MAP_START:
1007      return ParseMap(stack_depth + 1, tokenizer, out);
1008    case CBORTokenTag::ARRAY_START:
1009      return ParseArray(stack_depth + 1, tokenizer, out);
1010    default:
1011      out->HandleError(
1012          Status{Error::CBOR_UNSUPPORTED_VALUE, tokenizer->Status().pos});
1013      return false;
1014  }
1015}
1016
1017// |bytes| must start with the indefinite length array byte, so basically,
1018// ParseArray may only be called after an indefinite length array has been
1019// detected.
1020bool ParseArray(int32_t stack_depth,
1021                CBORTokenizer* tokenizer,
1022                StreamingParserHandler* out) {
1023  assert(tokenizer->TokenTag() == CBORTokenTag::ARRAY_START);
1024  tokenizer->Next();
1025  out->HandleArrayBegin();
1026  while (tokenizer->TokenTag() != CBORTokenTag::STOP) {
1027    if (tokenizer->TokenTag() == CBORTokenTag::DONE) {
1028      out->HandleError(
1029          Status{Error::CBOR_UNEXPECTED_EOF_IN_ARRAY, tokenizer->Status().pos});
1030      return false;
1031    }
1032    if (tokenizer->TokenTag() == CBORTokenTag::ERROR_VALUE) {
1033      out->HandleError(tokenizer->Status());
1034      return false;
1035    }
1036    // Parse value.
1037    if (!ParseValue(stack_depth, tokenizer, out))
1038      return false;
1039  }
1040  out->HandleArrayEnd();
1041  tokenizer->Next();
1042  return true;
1043}
1044
1045// |bytes| must start with the indefinite length array byte, so basically,
1046// ParseArray may only be called after an indefinite length array has been
1047// detected.
1048bool ParseMap(int32_t stack_depth,
1049              CBORTokenizer* tokenizer,
1050              StreamingParserHandler* out) {
1051  assert(tokenizer->TokenTag() == CBORTokenTag::MAP_START);
1052  out->HandleMapBegin();
1053  tokenizer->Next();
1054  while (tokenizer->TokenTag() != CBORTokenTag::STOP) {
1055    if (tokenizer->TokenTag() == CBORTokenTag::DONE) {
1056      out->HandleError(
1057          Status{Error::CBOR_UNEXPECTED_EOF_IN_MAP, tokenizer->Status().pos});
1058      return false;
1059    }
1060    if (tokenizer->TokenTag() == CBORTokenTag::ERROR_VALUE) {
1061      out->HandleError(tokenizer->Status());
1062      return false;
1063    }
1064    // Parse key.
1065    if (tokenizer->TokenTag() == CBORTokenTag::STRING8) {
1066      if (!ParseUTF8String(tokenizer, out))
1067        return false;
1068    } else if (tokenizer->TokenTag() == CBORTokenTag::STRING16) {
1069      ParseUTF16String(tokenizer, out);
1070    } else {
1071      out->HandleError(
1072          Status{Error::CBOR_INVALID_MAP_KEY, tokenizer->Status().pos});
1073      return false;
1074    }
1075    // Parse value.
1076    if (!ParseValue(stack_depth, tokenizer, out))
1077      return false;
1078  }
1079  out->HandleMapEnd();
1080  tokenizer->Next();
1081  return true;
1082}
1083}  // namespace
1084
1085void ParseCBOR(span<uint8_t> bytes, StreamingParserHandler* out) {
1086  if (bytes.empty()) {
1087    out->HandleError(Status{Error::CBOR_NO_INPUT, 0});
1088    return;
1089  }
1090  if (bytes[0] != kInitialByteForEnvelope) {
1091    out->HandleError(Status{Error::CBOR_INVALID_START_BYTE, 0});
1092    return;
1093  }
1094  CBORTokenizer tokenizer(bytes);
1095  if (tokenizer.TokenTag() == CBORTokenTag::ERROR_VALUE) {
1096    out->HandleError(tokenizer.Status());
1097    return;
1098  }
1099  // We checked for the envelope start byte above, so the tokenizer
1100  // must agree here, since it's not an error.
1101  assert(tokenizer.TokenTag() == CBORTokenTag::ENVELOPE);
1102  tokenizer.EnterEnvelope();
1103  if (tokenizer.TokenTag() != CBORTokenTag::MAP_START) {
1104    out->HandleError(
1105        Status{Error::CBOR_MAP_START_EXPECTED, tokenizer.Status().pos});
1106    return;
1107  }
1108  if (!ParseMap(/*stack_depth=*/1, &tokenizer, out))
1109    return;
1110  if (tokenizer.TokenTag() == CBORTokenTag::DONE)
1111    return;
1112  if (tokenizer.TokenTag() == CBORTokenTag::ERROR_VALUE) {
1113    out->HandleError(tokenizer.Status());
1114    return;
1115  }
1116  out->HandleError(Status{Error::CBOR_TRAILING_JUNK, tokenizer.Status().pos});
1117}
1118
1119// =============================================================================
1120// cbor::AppendString8EntryToMap - for limited in-place editing of messages
1121// =============================================================================
1122
1123template <typename C>
1124Status AppendString8EntryToCBORMapTmpl(span<uint8_t> string8_key,
1125                                       span<uint8_t> string8_value,
1126                                       C* cbor) {
1127  // Careful below: Don't compare (*cbor)[idx] with a uint8_t, since
1128  // it could be a char (signed!). Instead, use bytes.
1129  span<uint8_t> bytes(reinterpret_cast<const uint8_t*>(cbor->data()),
1130                      cbor->size());
1131  CBORTokenizer tokenizer(bytes);
1132  if (tokenizer.TokenTag() == CBORTokenTag::ERROR_VALUE)
1133    return tokenizer.Status();
1134  if (tokenizer.TokenTag() != CBORTokenTag::ENVELOPE)
1135    return Status(Error::CBOR_INVALID_ENVELOPE, 0);
1136  size_t envelope_size = tokenizer.GetEnvelopeContents().size();
1137  size_t old_size = cbor->size();
1138  if (old_size != envelope_size + kEncodedEnvelopeHeaderSize)
1139    return Status(Error::CBOR_INVALID_ENVELOPE, 0);
1140  if (envelope_size == 0 ||
1141      (tokenizer.GetEnvelopeContents()[0] != EncodeIndefiniteLengthMapStart()))
1142    return Status(Error::CBOR_MAP_START_EXPECTED, kEncodedEnvelopeHeaderSize);
1143  if (bytes[bytes.size() - 1] != EncodeStop())
1144    return Status(Error::CBOR_MAP_STOP_EXPECTED, cbor->size() - 1);
1145  cbor->pop_back();
1146  EncodeString8(string8_key, cbor);
1147  EncodeString8(string8_value, cbor);
1148  cbor->push_back(EncodeStop());
1149  size_t new_envelope_size = envelope_size + (cbor->size() - old_size);
1150  if (new_envelope_size > std::numeric_limits<uint32_t>::max())
1151    return Status(Error::CBOR_ENVELOPE_SIZE_LIMIT_EXCEEDED, 0);
1152  size_t size_pos = cbor->size() - new_envelope_size - sizeof(uint32_t);
1153  uint8_t* out = reinterpret_cast<uint8_t*>(&cbor->at(size_pos));
1154  *(out++) = (new_envelope_size >> 24) & 0xff;
1155  *(out++) = (new_envelope_size >> 16) & 0xff;
1156  *(out++) = (new_envelope_size >> 8) & 0xff;
1157  *(out) = new_envelope_size & 0xff;
1158  return Status();
1159}
1160Status AppendString8EntryToCBORMap(span<uint8_t> string8_key,
1161                                   span<uint8_t> string8_value,
1162                                   std::vector<uint8_t>* cbor) {
1163  return AppendString8EntryToCBORMapTmpl(string8_key, string8_value, cbor);
1164}
1165Status AppendString8EntryToCBORMap(span<uint8_t> string8_key,
1166                                   span<uint8_t> string8_value,
1167                                   std::string* cbor) {
1168  return AppendString8EntryToCBORMapTmpl(string8_key, string8_value, cbor);
1169}
1170}  // namespace cbor
1171
1172namespace json {
1173
1174// =============================================================================
1175// json::NewJSONEncoder - for encoding streaming parser events as JSON
1176// =============================================================================
1177
1178namespace {
1179// Prints |value| to |out| with 4 hex digits, most significant chunk first.
1180template <typename C>
1181void PrintHex(uint16_t value, C* out) {
1182  for (int ii = 3; ii >= 0; --ii) {
1183    int four_bits = 0xf & (value >> (4 * ii));
1184    out->push_back(four_bits + ((four_bits <= 9) ? '0' : ('a' - 10)));
1185  }
1186}
1187
1188// In the writer below, we maintain a stack of State instances.
1189// It is just enough to emit the appropriate delimiters and brackets
1190// in JSON.
1191enum class Container {
1192  // Used for the top-level, initial state.
1193  NONE,
1194  // Inside a JSON object.
1195  MAP,
1196  // Inside a JSON array.
1197  ARRAY
1198};
1199class State {
1200 public:
1201  explicit State(Container container) : container_(container) {}
1202  void StartElement(std::vector<uint8_t>* out) { StartElementTmpl(out); }
1203  void StartElement(std::string* out) { StartElementTmpl(out); }
1204  Container container() const { return container_; }
1205
1206 private:
1207  template <typename C>
1208  void StartElementTmpl(C* out) {
1209    assert(container_ != Container::NONE || size_ == 0);
1210    if (size_ != 0) {
1211      char delim = (!(size_ & 1) || container_ == Container::ARRAY) ? ',' : ':';
1212      out->push_back(delim);
1213    }
1214    ++size_;
1215  }
1216
1217  Container container_ = Container::NONE;
1218  int size_ = 0;
1219};
1220
1221constexpr char kBase64Table[] =
1222    "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
1223    "abcdefghijklmnopqrstuvwxyz0123456789+/";
1224
1225template <typename C>
1226void Base64Encode(const span<uint8_t>& in, C* out) {
1227  // The following three cases are based on the tables in the example
1228  // section in https://en.wikipedia.org/wiki/Base64. We process three
1229  // input bytes at a time, emitting 4 output bytes at a time.
1230  size_t ii = 0;
1231
1232  // While possible, process three input bytes.
1233  for (; ii + 3 <= in.size(); ii += 3) {
1234    uint32_t twentyfour_bits = (in[ii] << 16) | (in[ii + 1] << 8) | in[ii + 2];
1235    out->push_back(kBase64Table[(twentyfour_bits >> 18)]);
1236    out->push_back(kBase64Table[(twentyfour_bits >> 12) & 0x3f]);
1237    out->push_back(kBase64Table[(twentyfour_bits >> 6) & 0x3f]);
1238    out->push_back(kBase64Table[twentyfour_bits & 0x3f]);
1239  }
1240  if (ii + 2 <= in.size()) {  // Process two input bytes.
1241    uint32_t twentyfour_bits = (in[ii] << 16) | (in[ii + 1] << 8);
1242    out->push_back(kBase64Table[(twentyfour_bits >> 18)]);
1243    out->push_back(kBase64Table[(twentyfour_bits >> 12) & 0x3f]);
1244    out->push_back(kBase64Table[(twentyfour_bits >> 6) & 0x3f]);
1245    out->push_back('=');  // Emit padding.
1246    return;
1247  }
1248  if (ii + 1 <= in.size()) {  // Process a single input byte.
1249    uint32_t twentyfour_bits = (in[ii] << 16);
1250    out->push_back(kBase64Table[(twentyfour_bits >> 18)]);
1251    out->push_back(kBase64Table[(twentyfour_bits >> 12) & 0x3f]);
1252    out->push_back('=');  // Emit padding.
1253    out->push_back('=');  // Emit padding.
1254  }
1255}
1256
1257// Implements a handler for JSON parser events to emit a JSON string.
1258template <typename C>
1259class JSONEncoder : public StreamingParserHandler {
1260 public:
1261  JSONEncoder(const Platform* platform, C* out, Status* status)
1262      : platform_(platform), out_(out), status_(status) {
1263    *status_ = Status();
1264    state_.emplace(Container::NONE);
1265  }
1266
1267  void HandleMapBegin() override {
1268    if (!status_->ok())
1269      return;
1270    assert(!state_.empty());
1271    state_.top().StartElement(out_);
1272    state_.emplace(Container::MAP);
1273    Emit('{');
1274  }
1275
1276  void HandleMapEnd() override {
1277    if (!status_->ok())
1278      return;
1279    assert(state_.size() >= 2 && state_.top().container() == Container::MAP);
1280    state_.pop();
1281    Emit('}');
1282  }
1283
1284  void HandleArrayBegin() override {
1285    if (!status_->ok())
1286      return;
1287    state_.top().StartElement(out_);
1288    state_.emplace(Container::ARRAY);
1289    Emit('[');
1290  }
1291
1292  void HandleArrayEnd() override {
1293    if (!status_->ok())
1294      return;
1295    assert(state_.size() >= 2 && state_.top().container() == Container::ARRAY);
1296    state_.pop();
1297    Emit(']');
1298  }
1299
1300  void HandleString16(span<uint16_t> chars) override {
1301    if (!status_->ok())
1302      return;
1303    state_.top().StartElement(out_);
1304    Emit('"');
1305    for (const uint16_t ch : chars) {
1306      if (ch == '"') {
1307        Emit("\\\"");
1308      } else if (ch == '\\') {
1309        Emit("\\\\");
1310      } else if (ch == '\b') {
1311        Emit("\\b");
1312      } else if (ch == '\f') {
1313        Emit("\\f");
1314      } else if (ch == '\n') {
1315        Emit("\\n");
1316      } else if (ch == '\r') {
1317        Emit("\\r");
1318      } else if (ch == '\t') {
1319        Emit("\\t");
1320      } else if (ch >= 32 && ch <= 126) {
1321        Emit(ch);
1322      } else {
1323        Emit("\\u");
1324        PrintHex(ch, out_);
1325      }
1326    }
1327    Emit('"');
1328  }
1329
1330  void HandleString8(span<uint8_t> chars) override {
1331    if (!status_->ok())
1332      return;
1333    state_.top().StartElement(out_);
1334    Emit('"');
1335    for (size_t ii = 0; ii < chars.size(); ++ii) {
1336      uint8_t c = chars[ii];
1337      if (c == '"') {
1338        Emit("\\\"");
1339      } else if (c == '\\') {
1340        Emit("\\\\");
1341      } else if (c == '\b') {
1342        Emit("\\b");
1343      } else if (c == '\f') {
1344        Emit("\\f");
1345      } else if (c == '\n') {
1346        Emit("\\n");
1347      } else if (c == '\r') {
1348        Emit("\\r");
1349      } else if (c == '\t') {
1350        Emit("\\t");
1351      } else if (c >= 32 && c <= 126) {
1352        Emit(c);
1353      } else if (c < 32) {
1354        Emit("\\u");
1355        PrintHex(static_cast<uint16_t>(c), out_);
1356      } else {
1357        // Inspect the leading byte to figure out how long the utf8
1358        // byte sequence is; while doing this initialize |codepoint|
1359        // with the first few bits.
1360        // See table in: https://en.wikipedia.org/wiki/UTF-8
1361        // byte one is 110x xxxx -> 2 byte utf8 sequence
1362        // byte one is 1110 xxxx -> 3 byte utf8 sequence
1363        // byte one is 1111 0xxx -> 4 byte utf8 sequence
1364        uint32_t codepoint;
1365        int num_bytes_left;
1366        if ((c & 0xe0) == 0xc0) {  // 2 byte utf8 sequence
1367          num_bytes_left = 1;
1368          codepoint = c & 0x1f;
1369        } else if ((c & 0xf0) == 0xe0) {  // 3 byte utf8 sequence
1370          num_bytes_left = 2;
1371          codepoint = c & 0x0f;
1372        } else if ((c & 0xf8) == 0xf0) {  // 4 byte utf8 sequence
1373          codepoint = c & 0x07;
1374          num_bytes_left = 3;
1375        } else {
1376          continue;  // invalid leading byte
1377        }
1378
1379        // If we have enough bytes in our input, decode the remaining ones
1380        // belonging to this Unicode character into |codepoint|.
1381        if (ii + num_bytes_left > chars.size())
1382          continue;
1383        while (num_bytes_left > 0) {
1384          c = chars[++ii];
1385          --num_bytes_left;
1386          // Check the next byte is a continuation byte, that is 10xx xxxx.
1387          if ((c & 0xc0) != 0x80)
1388            continue;
1389          codepoint = (codepoint << 6) | (c & 0x3f);
1390        }
1391
1392        // Disallow overlong encodings for ascii characters, as these
1393        // would include " and other characters significant to JSON
1394        // string termination / control.
1395        if (codepoint < 0x7f)
1396          continue;
1397        // Invalid in UTF8, and can't be represented in UTF16 anyway.
1398        if (codepoint > 0x10ffff)
1399          continue;
1400
1401        // So, now we transcode to UTF16,
1402        // using the math described at https://en.wikipedia.org/wiki/UTF-16,
1403        // for either one or two 16 bit characters.
1404        if (codepoint < 0xffff) {
1405          Emit("\\u");
1406          PrintHex(static_cast<uint16_t>(codepoint), out_);
1407          continue;
1408        }
1409        codepoint -= 0x10000;
1410        // high surrogate
1411        Emit("\\u");
1412        PrintHex(static_cast<uint16_t>((codepoint >> 10) + 0xd800), out_);
1413        // low surrogate
1414        Emit("\\u");
1415        PrintHex(static_cast<uint16_t>((codepoint & 0x3ff) + 0xdc00), out_);
1416      }
1417    }
1418    Emit('"');
1419  }
1420
1421  void HandleBinary(span<uint8_t> bytes) override {
1422    if (!status_->ok())
1423      return;
1424    state_.top().StartElement(out_);
1425    Emit('"');
1426    Base64Encode(bytes, out_);
1427    Emit('"');
1428  }
1429
1430  void HandleDouble(double value) override {
1431    if (!status_->ok())
1432      return;
1433    state_.top().StartElement(out_);
1434    // JSON cannot represent NaN or Infinity. So, for compatibility,
1435    // we behave like the JSON object in web browsers: emit 'null'.
1436    if (!std::isfinite(value)) {
1437      Emit("null");
1438      return;
1439    }
1440    std::unique_ptr<char[]> str_value = platform_->DToStr(value);
1441
1442    // DToStr may fail to emit a 0 before the decimal dot. E.g. this is
1443    // the case in base::NumberToString in Chromium (which is based on
1444    // dmg_fp). So, much like
1445    // https://cs.chromium.org/chromium/src/base/json/json_writer.cc
1446    // we probe for this and emit the leading 0 anyway if necessary.
1447    const char* chars = str_value.get();
1448    if (chars[0] == '.') {
1449      Emit('0');
1450    } else if (chars[0] == '-' && chars[1] == '.') {
1451      Emit("-0");
1452      ++chars;
1453    }
1454    Emit(chars);
1455  }
1456
1457  void HandleInt32(int32_t value) override {
1458    if (!status_->ok())
1459      return;
1460    state_.top().StartElement(out_);
1461    Emit(std::to_string(value));
1462  }
1463
1464  void HandleBool(bool value) override {
1465    if (!status_->ok())
1466      return;
1467    state_.top().StartElement(out_);
1468    Emit(value ? "true" : "false");
1469  }
1470
1471  void HandleNull() override {
1472    if (!status_->ok())
1473      return;
1474    state_.top().StartElement(out_);
1475    Emit("null");
1476  }
1477
1478  void HandleError(Status error) override {
1479    assert(!error.ok());
1480    *status_ = error;
1481    out_->clear();
1482  }
1483
1484 private:
1485  void Emit(char c) { out_->push_back(c); }
1486  void Emit(const char* str) {
1487    out_->insert(out_->end(), str, str + strlen(str));
1488  }
1489  void Emit(const std::string& str) {
1490    out_->insert(out_->end(), str.begin(), str.end());
1491  }
1492
1493  const Platform* platform_;
1494  C* out_;
1495  Status* status_;
1496  std::stack<State> state_;
1497};
1498}  // namespace
1499
1500std::unique_ptr<StreamingParserHandler> NewJSONEncoder(
1501    const Platform* platform,
1502    std::vector<uint8_t>* out,
1503    Status* status) {
1504  return std::unique_ptr<StreamingParserHandler>(
1505      new JSONEncoder<std::vector<uint8_t>>(platform, out, status));
1506}
1507std::unique_ptr<StreamingParserHandler> NewJSONEncoder(const Platform* platform,
1508                                                       std::string* out,
1509                                                       Status* status) {
1510  return std::unique_ptr<StreamingParserHandler>(
1511      new JSONEncoder<std::string>(platform, out, status));
1512}
1513
1514// =============================================================================
1515// json::ParseJSON - for receiving streaming parser events for JSON.
1516// =============================================================================
1517
1518namespace {
1519const int kStackLimit = 300;
1520
1521enum Token {
1522  ObjectBegin,
1523  ObjectEnd,
1524  ArrayBegin,
1525  ArrayEnd,
1526  StringLiteral,
1527  Number,
1528  BoolTrue,
1529  BoolFalse,
1530  NullToken,
1531  ListSeparator,
1532  ObjectPairSeparator,
1533  InvalidToken,
1534  NoInput
1535};
1536
1537const char* const kNullString = "null";
1538const char* const kTrueString = "true";
1539const char* const kFalseString = "false";
1540
1541template <typename Char>
1542class JsonParser {
1543 public:
1544  JsonParser(const Platform* platform, StreamingParserHandler* handler)
1545      : platform_(platform), handler_(handler) {}
1546
1547  void Parse(const Char* start, size_t length) {
1548    start_pos_ = start;
1549    const Char* end = start + length;
1550    const Char* tokenEnd = nullptr;
1551    ParseValue(start, end, &tokenEnd, 0);
1552    if (error_)
1553      return;
1554    if (tokenEnd != end) {
1555      HandleError(Error::JSON_PARSER_UNPROCESSED_INPUT_REMAINS, tokenEnd);
1556    }
1557  }
1558
1559 private:
1560  bool CharsToDouble(const uint16_t* chars, size_t length, double* result) {
1561    std::string buffer;
1562    buffer.reserve(length + 1);
1563    for (size_t ii = 0; ii < length; ++ii) {
1564      bool is_ascii = !(chars[ii] & ~0x7F);
1565      if (!is_ascii)
1566        return false;
1567      buffer.push_back(static_cast<char>(chars[ii]));
1568    }
1569    return platform_->StrToD(buffer.c_str(), result);
1570  }
1571
1572  bool CharsToDouble(const uint8_t* chars, size_t length, double* result) {
1573    std::string buffer(reinterpret_cast<const char*>(chars), length);
1574    return platform_->StrToD(buffer.c_str(), result);
1575  }
1576
1577  static bool ParseConstToken(const Char* start,
1578                              const Char* end,
1579                              const Char** token_end,
1580                              const char* token) {
1581    // |token| is \0 terminated, it's one of the constants at top of the file.
1582    while (start < end && *token != '\0' && *start++ == *token++) {
1583    }
1584    if (*token != '\0')
1585      return false;
1586    *token_end = start;
1587    return true;
1588  }
1589
1590  static bool ReadInt(const Char* start,
1591                      const Char* end,
1592                      const Char** token_end,
1593                      bool allow_leading_zeros) {
1594    if (start == end)
1595      return false;
1596    bool has_leading_zero = '0' == *start;
1597    int length = 0;
1598    while (start < end && '0' <= *start && *start <= '9') {
1599      ++start;
1600      ++length;
1601    }
1602    if (!length)
1603      return false;
1604    if (!allow_leading_zeros && length > 1 && has_leading_zero)
1605      return false;
1606    *token_end = start;
1607    return true;
1608  }
1609
1610  static bool ParseNumberToken(const Char* start,
1611                               const Char* end,
1612                               const Char** token_end) {
1613    // We just grab the number here. We validate the size in DecodeNumber.
1614    // According to RFC4627, a valid number is: [minus] int [frac] [exp]
1615    if (start == end)
1616      return false;
1617    Char c = *start;
1618    if ('-' == c)
1619      ++start;
1620
1621    if (!ReadInt(start, end, &start, /*allow_leading_zeros=*/false))
1622      return false;
1623    if (start == end) {
1624      *token_end = start;
1625      return true;
1626    }
1627
1628    // Optional fraction part
1629    c = *start;
1630    if ('.' == c) {
1631      ++start;
1632      if (!ReadInt(start, end, &start, /*allow_leading_zeros=*/true))
1633        return false;
1634      if (start == end) {
1635        *token_end = start;
1636        return true;
1637      }
1638      c = *start;
1639    }
1640
1641    // Optional exponent part
1642    if ('e' == c || 'E' == c) {
1643      ++start;
1644      if (start == end)
1645        return false;
1646      c = *start;
1647      if ('-' == c || '+' == c) {
1648        ++start;
1649        if (start == end)
1650          return false;
1651      }
1652      if (!ReadInt(start, end, &start, /*allow_leading_zeros=*/true))
1653        return false;
1654    }
1655
1656    *token_end = start;
1657    return true;
1658  }
1659
1660  static bool ReadHexDigits(const Char* start,
1661                            const Char* end,
1662                            const Char** token_end,
1663                            int digits) {
1664    if (end - start < digits)
1665      return false;
1666    for (int i = 0; i < digits; ++i) {
1667      Char c = *start++;
1668      if (!(('0' <= c && c <= '9') || ('a' <= c && c <= 'f') ||
1669            ('A' <= c && c <= 'F')))
1670        return false;
1671    }
1672    *token_end = start;
1673    return true;
1674  }
1675
1676  static bool ParseStringToken(const Char* start,
1677                               const Char* end,
1678                               const Char** token_end) {
1679    while (start < end) {
1680      Char c = *start++;
1681      if ('\\' == c) {
1682        if (start == end)
1683          return false;
1684        c = *start++;
1685        // Make sure the escaped char is valid.
1686        switch (c) {
1687          case 'x':
1688            if (!ReadHexDigits(start, end, &start, 2))
1689              return false;
1690            break;
1691          case 'u':
1692            if (!ReadHexDigits(start, end, &start, 4))
1693              return false;
1694            break;
1695          case '\\':
1696          case '/':
1697          case 'b':
1698          case 'f':
1699          case 'n':
1700          case 'r':
1701          case 't':
1702          case 'v':
1703          case '"':
1704            break;
1705          default:
1706            return false;
1707        }
1708      } else if ('"' == c) {
1709        *token_end = start;
1710        return true;
1711      }
1712    }
1713    return false;
1714  }
1715
1716  static bool SkipComment(const Char* start,
1717                          const Char* end,
1718                          const Char** comment_end) {
1719    if (start == end)
1720      return false;
1721
1722    if (*start != '/' || start + 1 >= end)
1723      return false;
1724    ++start;
1725
1726    if (*start == '/') {
1727      // Single line comment, read to newline.
1728      for (++start; start < end; ++start) {
1729        if (*start == '\n' || *start == '\r') {
1730          *comment_end = start + 1;
1731          return true;
1732        }
1733      }
1734      *comment_end = end;
1735      // Comment reaches end-of-input, which is fine.
1736      return true;
1737    }
1738
1739    if (*start == '*') {
1740      Char previous = '\0';
1741      // Block comment, read until end marker.
1742      for (++start; start < end; previous = *start++) {
1743        if (previous == '*' && *start == '/') {
1744          *comment_end = start + 1;
1745          return true;
1746        }
1747      }
1748      // Block comment must close before end-of-input.
1749      return false;
1750    }
1751
1752    return false;
1753  }
1754
1755  static bool IsSpaceOrNewLine(Char c) {
1756    // \v = vertial tab; \f = form feed page break.
1757    return c == ' ' || c == '\n' || c == '\v' || c == '\f' || c == '\r' ||
1758           c == '\t';
1759  }
1760
1761  static void SkipWhitespaceAndComments(const Char* start,
1762                                        const Char* end,
1763                                        const Char** whitespace_end) {
1764    while (start < end) {
1765      if (IsSpaceOrNewLine(*start)) {
1766        ++start;
1767      } else if (*start == '/') {
1768        const Char* comment_end = nullptr;
1769        if (!SkipComment(start, end, &comment_end))
1770          break;
1771        start = comment_end;
1772      } else {
1773        break;
1774      }
1775    }
1776    *whitespace_end = start;
1777  }
1778
1779  static Token ParseToken(const Char* start,
1780                          const Char* end,
1781                          const Char** tokenStart,
1782                          const Char** token_end) {
1783    SkipWhitespaceAndComments(start, end, tokenStart);
1784    start = *tokenStart;
1785
1786    if (start == end)
1787      return NoInput;
1788
1789    switch (*start) {
1790      case 'n':
1791        if (ParseConstToken(start, end, token_end, kNullString))
1792          return NullToken;
1793        break;
1794      case 't':
1795        if (ParseConstToken(start, end, token_end, kTrueString))
1796          return BoolTrue;
1797        break;
1798      case 'f':
1799        if (ParseConstToken(start, end, token_end, kFalseString))
1800          return BoolFalse;
1801        break;
1802      case '[':
1803        *token_end = start + 1;
1804        return ArrayBegin;
1805      case ']':
1806        *token_end = start + 1;
1807        return ArrayEnd;
1808      case ',':
1809        *token_end = start + 1;
1810        return ListSeparator;
1811      case '{':
1812        *token_end = start + 1;
1813        return ObjectBegin;
1814      case '}':
1815        *token_end = start + 1;
1816        return ObjectEnd;
1817      case ':':
1818        *token_end = start + 1;
1819        return ObjectPairSeparator;
1820      case '0':
1821      case '1':
1822      case '2':
1823      case '3':
1824      case '4':
1825      case '5':
1826      case '6':
1827      case '7':
1828      case '8':
1829      case '9':
1830      case '-':
1831        if (ParseNumberToken(start, end, token_end))
1832          return Number;
1833        break;
1834      case '"':
1835        if (ParseStringToken(start + 1, end, token_end))
1836          return StringLiteral;
1837        break;
1838    }
1839    return InvalidToken;
1840  }
1841
1842  static int HexToInt(Char c) {
1843    if ('0' <= c && c <= '9')
1844      return c - '0';
1845    if ('A' <= c && c <= 'F')
1846      return c - 'A' + 10;
1847    if ('a' <= c && c <= 'f')
1848      return c - 'a' + 10;
1849    assert(false);  // Unreachable.
1850    return 0;
1851  }
1852
1853  static bool DecodeString(const Char* start,
1854                           const Char* end,
1855                           std::vector<uint16_t>* output) {
1856    if (start == end)
1857      return true;
1858    if (start > end)
1859      return false;
1860    output->reserve(end - start);
1861    while (start < end) {
1862      uint16_t c = *start++;
1863      // If the |Char| we're dealing with is really a byte, then
1864      // we have utf8 here, and we need to check for multibyte characters
1865      // and transcode them to utf16 (either one or two utf16 chars).
1866      if (sizeof(Char) == sizeof(uint8_t) && c >= 0x7f) {
1867        // Inspect the leading byte to figure out how long the utf8
1868        // byte sequence is; while doing this initialize |codepoint|
1869        // with the first few bits.
1870        // See table in: https://en.wikipedia.org/wiki/UTF-8
1871        // byte one is 110x xxxx -> 2 byte utf8 sequence
1872        // byte one is 1110 xxxx -> 3 byte utf8 sequence
1873        // byte one is 1111 0xxx -> 4 byte utf8 sequence
1874        uint32_t codepoint;
1875        int num_bytes_left;
1876        if ((c & 0xe0) == 0xc0) {  // 2 byte utf8 sequence
1877          num_bytes_left = 1;
1878          codepoint = c & 0x1f;
1879        } else if ((c & 0xf0) == 0xe0) {  // 3 byte utf8 sequence
1880          num_bytes_left = 2;
1881          codepoint = c & 0x0f;
1882        } else if ((c & 0xf8) == 0xf0) {  // 4 byte utf8 sequence
1883          codepoint = c & 0x07;
1884          num_bytes_left = 3;
1885        } else {
1886          return false;  // invalid leading byte
1887        }
1888
1889        // If we have enough bytes in our inpput, decode the remaining ones
1890        // belonging to this Unicode character into |codepoint|.
1891        if (start + num_bytes_left > end)
1892          return false;
1893        while (num_bytes_left > 0) {
1894          c = *start++;
1895          --num_bytes_left;
1896          // Check the next byte is a continuation byte, that is 10xx xxxx.
1897          if ((c & 0xc0) != 0x80)
1898            return false;
1899          codepoint = (codepoint << 6) | (c & 0x3f);
1900        }
1901
1902        // Disallow overlong encodings for ascii characters, as these
1903        // would include " and other characters significant to JSON
1904        // string termination / control.
1905        if (codepoint < 0x7f)
1906          return false;
1907        // Invalid in UTF8, and can't be represented in UTF16 anyway.
1908        if (codepoint > 0x10ffff)
1909          return false;
1910
1911        // So, now we transcode to UTF16,
1912        // using the math described at https://en.wikipedia.org/wiki/UTF-16,
1913        // for either one or two 16 bit characters.
1914        if (codepoint < 0xffff) {
1915          output->push_back(codepoint);
1916          continue;
1917        }
1918        codepoint -= 0x10000;
1919        output->push_back((codepoint >> 10) + 0xd800);    // high surrogate
1920        output->push_back((codepoint & 0x3ff) + 0xdc00);  // low surrogate
1921        continue;
1922      }
1923      if ('\\' != c) {
1924        output->push_back(c);
1925        continue;
1926      }
1927      if (start == end)
1928        return false;
1929      c = *start++;
1930
1931      if (c == 'x') {
1932        // \x is not supported.
1933        return false;
1934      }
1935
1936      switch (c) {
1937        case '"':
1938        case '/':
1939        case '\\':
1940          break;
1941        case 'b':
1942          c = '\b';
1943          break;
1944        case 'f':
1945          c = '\f';
1946          break;
1947        case 'n':
1948          c = '\n';
1949          break;
1950        case 'r':
1951          c = '\r';
1952          break;
1953        case 't':
1954          c = '\t';
1955          break;
1956        case 'v':
1957          c = '\v';
1958          break;
1959        case 'u':
1960          c = (HexToInt(*start) << 12) + (HexToInt(*(start + 1)) << 8) +
1961              (HexToInt(*(start + 2)) << 4) + HexToInt(*(start + 3));
1962          start += 4;
1963          break;
1964        default:
1965          return false;
1966      }
1967      output->push_back(c);
1968    }
1969    return true;
1970  }
1971
1972  void ParseValue(const Char* start,
1973                  const Char* end,
1974                  const Char** value_token_end,
1975                  int depth) {
1976    if (depth > kStackLimit) {
1977      HandleError(Error::JSON_PARSER_STACK_LIMIT_EXCEEDED, start);
1978      return;
1979    }
1980    const Char* token_start = nullptr;
1981    const Char* token_end = nullptr;
1982    Token token = ParseToken(start, end, &token_start, &token_end);
1983    switch (token) {
1984      case NoInput:
1985        HandleError(Error::JSON_PARSER_NO_INPUT, token_start);
1986        return;
1987      case InvalidToken:
1988        HandleError(Error::JSON_PARSER_INVALID_TOKEN, token_start);
1989        return;
1990      case NullToken:
1991        handler_->HandleNull();
1992        break;
1993      case BoolTrue:
1994        handler_->HandleBool(true);
1995        break;
1996      case BoolFalse:
1997        handler_->HandleBool(false);
1998        break;
1999      case Number: {
2000        double value;
2001        if (!CharsToDouble(token_start, token_end - token_start, &value)) {
2002          HandleError(Error::JSON_PARSER_INVALID_NUMBER, token_start);
2003          return;
2004        }
2005        if (value >= std::numeric_limits<int32_t>::min() &&
2006            value <= std::numeric_limits<int32_t>::max() &&
2007            static_cast<int32_t>(value) == value)
2008          handler_->HandleInt32(static_cast<int32_t>(value));
2009        else
2010          handler_->HandleDouble(value);
2011        break;
2012      }
2013      case StringLiteral: {
2014        std::vector<uint16_t> value;
2015        bool ok = DecodeString(token_start + 1, token_end - 1, &value);
2016        if (!ok) {
2017          HandleError(Error::JSON_PARSER_INVALID_STRING, token_start);
2018          return;
2019        }
2020        handler_->HandleString16(span<uint16_t>(value.data(), value.size()));
2021        break;
2022      }
2023      case ArrayBegin: {
2024        handler_->HandleArrayBegin();
2025        start = token_end;
2026        token = ParseToken(start, end, &token_start, &token_end);
2027        while (token != ArrayEnd) {
2028          ParseValue(start, end, &token_end, depth + 1);
2029          if (error_)
2030            return;
2031
2032          // After a list value, we expect a comma or the end of the list.
2033          start = token_end;
2034          token = ParseToken(start, end, &token_start, &token_end);
2035          if (token == ListSeparator) {
2036            start = token_end;
2037            token = ParseToken(start, end, &token_start, &token_end);
2038            if (token == ArrayEnd) {
2039              HandleError(Error::JSON_PARSER_UNEXPECTED_ARRAY_END, token_start);
2040              return;
2041            }
2042          } else if (token != ArrayEnd) {
2043            // Unexpected value after list value. Bail out.
2044            HandleError(Error::JSON_PARSER_COMMA_OR_ARRAY_END_EXPECTED,
2045                        token_start);
2046            return;
2047          }
2048        }
2049        handler_->HandleArrayEnd();
2050        break;
2051      }
2052      case ObjectBegin: {
2053        handler_->HandleMapBegin();
2054        start = token_end;
2055        token = ParseToken(start, end, &token_start, &token_end);
2056        while (token != ObjectEnd) {
2057          if (token != StringLiteral) {
2058            HandleError(Error::JSON_PARSER_STRING_LITERAL_EXPECTED,
2059                        token_start);
2060            return;
2061          }
2062          std::vector<uint16_t> key;
2063          if (!DecodeString(token_start + 1, token_end - 1, &key)) {
2064            HandleError(Error::JSON_PARSER_INVALID_STRING, token_start);
2065            return;
2066          }
2067          handler_->HandleString16(span<uint16_t>(key.data(), key.size()));
2068          start = token_end;
2069
2070          token = ParseToken(start, end, &token_start, &token_end);
2071          if (token != ObjectPairSeparator) {
2072            HandleError(Error::JSON_PARSER_COLON_EXPECTED, token_start);
2073            return;
2074          }
2075          start = token_end;
2076
2077          ParseValue(start, end, &token_end, depth + 1);
2078          if (error_)
2079            return;
2080          start = token_end;
2081
2082          // After a key/value pair, we expect a comma or the end of the
2083          // object.
2084          token = ParseToken(start, end, &token_start, &token_end);
2085          if (token == ListSeparator) {
2086            start = token_end;
2087            token = ParseToken(start, end, &token_start, &token_end);
2088            if (token == ObjectEnd) {
2089              HandleError(Error::JSON_PARSER_UNEXPECTED_MAP_END, token_start);
2090              return;
2091            }
2092          } else if (token != ObjectEnd) {
2093            // Unexpected value after last object value. Bail out.
2094            HandleError(Error::JSON_PARSER_COMMA_OR_MAP_END_EXPECTED,
2095                        token_start);
2096            return;
2097          }
2098        }
2099        handler_->HandleMapEnd();
2100        break;
2101      }
2102
2103      default:
2104        // We got a token that's not a value.
2105        HandleError(Error::JSON_PARSER_VALUE_EXPECTED, token_start);
2106        return;
2107    }
2108
2109    SkipWhitespaceAndComments(token_end, end, value_token_end);
2110  }
2111
2112  void HandleError(Error error, const Char* pos) {
2113    assert(error != Error::OK);
2114    if (!error_) {
2115      handler_->HandleError(
2116          Status{error, static_cast<size_t>(pos - start_pos_)});
2117      error_ = true;
2118    }
2119  }
2120
2121  const Char* start_pos_ = nullptr;
2122  bool error_ = false;
2123  const Platform* platform_;
2124  StreamingParserHandler* handler_;
2125};
2126}  // namespace
2127
2128void ParseJSON(const Platform& platform,
2129               span<uint8_t> chars,
2130               StreamingParserHandler* handler) {
2131  JsonParser<uint8_t> parser(&platform, handler);
2132  parser.Parse(chars.data(), chars.size());
2133}
2134
2135void ParseJSON(const Platform& platform,
2136               span<uint16_t> chars,
2137               StreamingParserHandler* handler) {
2138  JsonParser<uint16_t> parser(&platform, handler);
2139  parser.Parse(chars.data(), chars.size());
2140}
2141
2142// =============================================================================
2143// json::ConvertCBORToJSON, json::ConvertJSONToCBOR - for transcoding
2144// =============================================================================
2145template <typename C>
2146Status ConvertCBORToJSONTmpl(const Platform& platform,
2147                             span<uint8_t> cbor,
2148                             C* json) {
2149  Status status;
2150  std::unique_ptr<StreamingParserHandler> json_writer =
2151      NewJSONEncoder(&platform, json, &status);
2152  cbor::ParseCBOR(cbor, json_writer.get());
2153  return status;
2154}
2155
2156Status ConvertCBORToJSON(const Platform& platform,
2157                         span<uint8_t> cbor,
2158                         std::vector<uint8_t>* json) {
2159  return ConvertCBORToJSONTmpl(platform, cbor, json);
2160}
2161Status ConvertCBORToJSON(const Platform& platform,
2162                         span<uint8_t> cbor,
2163                         std::string* json) {
2164  return ConvertCBORToJSONTmpl(platform, cbor, json);
2165}
2166
2167template <typename T, typename C>
2168Status ConvertJSONToCBORTmpl(const Platform& platform, span<T> json, C* cbor) {
2169  Status status;
2170  std::unique_ptr<StreamingParserHandler> encoder =
2171      cbor::NewCBOREncoder(cbor, &status);
2172  ParseJSON(platform, json, encoder.get());
2173  return status;
2174}
2175Status ConvertJSONToCBOR(const Platform& platform,
2176                         span<uint8_t> json,
2177                         std::string* cbor) {
2178  return ConvertJSONToCBORTmpl(platform, json, cbor);
2179}
2180Status ConvertJSONToCBOR(const Platform& platform,
2181                         span<uint16_t> json,
2182                         std::string* cbor) {
2183  return ConvertJSONToCBORTmpl(platform, json, cbor);
2184}
2185Status ConvertJSONToCBOR(const Platform& platform,
2186                         span<uint8_t> json,
2187                         std::vector<uint8_t>* cbor) {
2188  return ConvertJSONToCBORTmpl(platform, json, cbor);
2189}
2190Status ConvertJSONToCBOR(const Platform& platform,
2191                         span<uint16_t> json,
2192                         std::vector<uint8_t>* cbor) {
2193  return ConvertJSONToCBORTmpl(platform, json, cbor);
2194}
2195}  // namespace json
2196
2197{% for namespace in config.protocol.namespace %}
2198} // namespace {{namespace}}
2199{% endfor %}
2200