• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1{# This template is generated by gen_cbor_templates.py. #}
2// Generated by lib/encoding_cpp.template.
3
4// Copyright 2019 The Chromium Authors. All rights reserved.
5// Use of this source code is governed by a BSD-style license that can be
6// found in the LICENSE file.
7
8{% if config.encoding_lib.header == "" %}
9
10#include <algorithm>
11#include <cassert>
12#include <cmath>
13#include <cstring>
14#include <limits>
15#include <stack>
16
17{% for namespace in config.protocol.namespace %}
18namespace {{namespace}} {
19{% endfor %}
20
21// ===== encoding/encoding.cc =====
22
23// =============================================================================
24// Status and Error codes
25// =============================================================================
26
27std::string Status::ToASCIIString() const {
28  switch (error) {
29    case Error::OK:
30      return "OK";
31    case Error::JSON_PARSER_UNPROCESSED_INPUT_REMAINS:
32      return ToASCIIString("JSON: unprocessed input remains");
33    case Error::JSON_PARSER_STACK_LIMIT_EXCEEDED:
34      return ToASCIIString("JSON: stack limit exceeded");
35    case Error::JSON_PARSER_NO_INPUT:
36      return ToASCIIString("JSON: no input");
37    case Error::JSON_PARSER_INVALID_TOKEN:
38      return ToASCIIString("JSON: invalid token");
39    case Error::JSON_PARSER_INVALID_NUMBER:
40      return ToASCIIString("JSON: invalid number");
41    case Error::JSON_PARSER_INVALID_STRING:
42      return ToASCIIString("JSON: invalid string");
43    case Error::JSON_PARSER_UNEXPECTED_ARRAY_END:
44      return ToASCIIString("JSON: unexpected array end");
45    case Error::JSON_PARSER_COMMA_OR_ARRAY_END_EXPECTED:
46      return ToASCIIString("JSON: comma or array end expected");
47    case Error::JSON_PARSER_STRING_LITERAL_EXPECTED:
48      return ToASCIIString("JSON: string literal expected");
49    case Error::JSON_PARSER_COLON_EXPECTED:
50      return ToASCIIString("JSON: colon expected");
51    case Error::JSON_PARSER_UNEXPECTED_MAP_END:
52      return ToASCIIString("JSON: unexpected map end");
53    case Error::JSON_PARSER_COMMA_OR_MAP_END_EXPECTED:
54      return ToASCIIString("JSON: comma or map end expected");
55    case Error::JSON_PARSER_VALUE_EXPECTED:
56      return ToASCIIString("JSON: value expected");
57
58    case Error::CBOR_INVALID_INT32:
59      return ToASCIIString("CBOR: invalid int32");
60    case Error::CBOR_INVALID_DOUBLE:
61      return ToASCIIString("CBOR: invalid double");
62    case Error::CBOR_INVALID_ENVELOPE:
63      return ToASCIIString("CBOR: invalid envelope");
64    case Error::CBOR_INVALID_STRING8:
65      return ToASCIIString("CBOR: invalid string8");
66    case Error::CBOR_INVALID_STRING16:
67      return ToASCIIString("CBOR: invalid string16");
68    case Error::CBOR_INVALID_BINARY:
69      return ToASCIIString("CBOR: invalid binary");
70    case Error::CBOR_UNSUPPORTED_VALUE:
71      return ToASCIIString("CBOR: unsupported value");
72    case Error::CBOR_NO_INPUT:
73      return ToASCIIString("CBOR: no input");
74    case Error::CBOR_INVALID_START_BYTE:
75      return ToASCIIString("CBOR: invalid start byte");
76    case Error::CBOR_UNEXPECTED_EOF_EXPECTED_VALUE:
77      return ToASCIIString("CBOR: unexpected eof expected value");
78    case Error::CBOR_UNEXPECTED_EOF_IN_ARRAY:
79      return ToASCIIString("CBOR: unexpected eof in array");
80    case Error::CBOR_UNEXPECTED_EOF_IN_MAP:
81      return ToASCIIString("CBOR: unexpected eof in map");
82    case Error::CBOR_INVALID_MAP_KEY:
83      return ToASCIIString("CBOR: invalid map key");
84    case Error::CBOR_STACK_LIMIT_EXCEEDED:
85      return ToASCIIString("CBOR: stack limit exceeded");
86    case Error::CBOR_TRAILING_JUNK:
87      return ToASCIIString("CBOR: trailing junk");
88    case Error::CBOR_MAP_START_EXPECTED:
89      return ToASCIIString("CBOR: map start expected");
90    case Error::CBOR_MAP_STOP_EXPECTED:
91      return ToASCIIString("CBOR: map stop expected");
92    case Error::CBOR_ENVELOPE_SIZE_LIMIT_EXCEEDED:
93      return ToASCIIString("CBOR: envelope size limit exceeded");
94  }
95  // Some compilers can't figure out that we can't get here.
96  return "INVALID ERROR CODE";
97}
98
99std::string Status::ToASCIIString(const char* msg) const {
100  return std::string(msg) + " at position " + std::to_string(pos);
101}
102
103namespace cbor {
104namespace {
105// Indicates the number of bits the "initial byte" needs to be shifted to the
106// right after applying |kMajorTypeMask| to produce the major type in the
107// lowermost bits.
108static constexpr uint8_t kMajorTypeBitShift = 5u;
109// Mask selecting the low-order 5 bits of the "initial byte", which is where
110// the additional information is encoded.
111static constexpr uint8_t kAdditionalInformationMask = 0x1f;
112// Mask selecting the high-order 3 bits of the "initial byte", which indicates
113// the major type of the encoded value.
114static constexpr uint8_t kMajorTypeMask = 0xe0;
115// Indicates the integer is in the following byte.
116static constexpr uint8_t kAdditionalInformation1Byte = 24u;
117// Indicates the integer is in the next 2 bytes.
118static constexpr uint8_t kAdditionalInformation2Bytes = 25u;
119// Indicates the integer is in the next 4 bytes.
120static constexpr uint8_t kAdditionalInformation4Bytes = 26u;
121// Indicates the integer is in the next 8 bytes.
122static constexpr uint8_t kAdditionalInformation8Bytes = 27u;
123
124// Encodes the initial byte, consisting of the |type| in the first 3 bits
125// followed by 5 bits of |additional_info|.
126constexpr uint8_t EncodeInitialByte(MajorType type, uint8_t additional_info) {
127  return (static_cast<uint8_t>(type) << kMajorTypeBitShift) |
128         (additional_info & kAdditionalInformationMask);
129}
130
131// TAG 24 indicates that what follows is a byte string which is
132// encoded in CBOR format. We use this as a wrapper for
133// maps and arrays, allowing us to skip them, because the
134// byte string carries its size (byte length).
135// https://tools.ietf.org/html/rfc7049#section-2.4.4.1
136static constexpr uint8_t kInitialByteForEnvelope =
137    EncodeInitialByte(MajorType::TAG, 24);
138// The initial byte for a byte string with at most 2^32 bytes
139// of payload. This is used for envelope encoding, even if
140// the byte string is shorter.
141static constexpr uint8_t kInitialByteFor32BitLengthByteString =
142    EncodeInitialByte(MajorType::BYTE_STRING, 26);
143
144// See RFC 7049 Section 2.2.1, indefinite length arrays / maps have additional
145// info = 31.
146static constexpr uint8_t kInitialByteIndefiniteLengthArray =
147    EncodeInitialByte(MajorType::ARRAY, 31);
148static constexpr uint8_t kInitialByteIndefiniteLengthMap =
149    EncodeInitialByte(MajorType::MAP, 31);
150// See RFC 7049 Section 2.3, Table 1; this is used for finishing indefinite
151// length maps / arrays.
152static constexpr uint8_t kStopByte =
153    EncodeInitialByte(MajorType::SIMPLE_VALUE, 31);
154
155// See RFC 7049 Section 2.3, Table 2.
156static constexpr uint8_t kEncodedTrue =
157    EncodeInitialByte(MajorType::SIMPLE_VALUE, 21);
158static constexpr uint8_t kEncodedFalse =
159    EncodeInitialByte(MajorType::SIMPLE_VALUE, 20);
160static constexpr uint8_t kEncodedNull =
161    EncodeInitialByte(MajorType::SIMPLE_VALUE, 22);
162static constexpr uint8_t kInitialByteForDouble =
163    EncodeInitialByte(MajorType::SIMPLE_VALUE, 27);
164
165// See RFC 7049 Table 3 and Section 2.4.4.2. This is used as a prefix for
166// arbitrary binary data encoded as BYTE_STRING.
167static constexpr uint8_t kExpectedConversionToBase64Tag =
168    EncodeInitialByte(MajorType::TAG, 22);
169
170// Writes the bytes for |v| to |out|, starting with the most significant byte.
171// See also: https://commandcenter.blogspot.com/2012/04/byte-order-fallacy.html
172template <typename T, class C>
173void WriteBytesMostSignificantByteFirst(T v, C* out) {
174  for (int shift_bytes = sizeof(T) - 1; shift_bytes >= 0; --shift_bytes)
175    out->push_back(0xff & (v >> (shift_bytes * 8)));
176}
177
178// Extracts sizeof(T) bytes from |in| to extract a value of type T
179// (e.g. uint64_t, uint32_t, ...), most significant byte first.
180// See also: https://commandcenter.blogspot.com/2012/04/byte-order-fallacy.html
181template <typename T>
182T ReadBytesMostSignificantByteFirst(span<uint8_t> in) {
183  assert(in.size() >= sizeof(T));
184  T result = 0;
185  for (size_t shift_bytes = 0; shift_bytes < sizeof(T); ++shift_bytes)
186    result |= T(in[sizeof(T) - 1 - shift_bytes]) << (shift_bytes * 8);
187  return result;
188}
189}  // namespace
190
191namespace internals {
192// Reads the start of a token with definitive size from |bytes|.
193// |type| is the major type as specified in RFC 7049 Section 2.1.
194// |value| is the payload (e.g. for MajorType::UNSIGNED) or is the size
195// (e.g. for BYTE_STRING).
196// If successful, returns the number of bytes read. Otherwise returns 0.
197size_t ReadTokenStart(span<uint8_t> bytes, MajorType* type, uint64_t* value) {
198  if (bytes.empty())
199    return 0;
200  uint8_t initial_byte = bytes[0];
201  *type = MajorType((initial_byte & kMajorTypeMask) >> kMajorTypeBitShift);
202
203  uint8_t additional_information = initial_byte & kAdditionalInformationMask;
204  if (additional_information < 24) {
205    // Values 0-23 are encoded directly into the additional info of the
206    // initial byte.
207    *value = additional_information;
208    return 1;
209  }
210  if (additional_information == kAdditionalInformation1Byte) {
211    // Values 24-255 are encoded with one initial byte, followed by the value.
212    if (bytes.size() < 2)
213      return 0;
214    *value = ReadBytesMostSignificantByteFirst<uint8_t>(bytes.subspan(1));
215    return 2;
216  }
217  if (additional_information == kAdditionalInformation2Bytes) {
218    // Values 256-65535: 1 initial byte + 2 bytes payload.
219    if (bytes.size() < 1 + sizeof(uint16_t))
220      return 0;
221    *value = ReadBytesMostSignificantByteFirst<uint16_t>(bytes.subspan(1));
222    return 3;
223  }
224  if (additional_information == kAdditionalInformation4Bytes) {
225    // 32 bit uint: 1 initial byte + 4 bytes payload.
226    if (bytes.size() < 1 + sizeof(uint32_t))
227      return 0;
228    *value = ReadBytesMostSignificantByteFirst<uint32_t>(bytes.subspan(1));
229    return 5;
230  }
231  if (additional_information == kAdditionalInformation8Bytes) {
232    // 64 bit uint: 1 initial byte + 8 bytes payload.
233    if (bytes.size() < 1 + sizeof(uint64_t))
234      return 0;
235    *value = ReadBytesMostSignificantByteFirst<uint64_t>(bytes.subspan(1));
236    return 9;
237  }
238  return 0;
239}
240
241// Writes the start of a token with |type|. The |value| may indicate the size,
242// or it may be the payload if the value is an unsigned integer.
243template <typename C>
244void WriteTokenStartTmpl(MajorType type, uint64_t value, C* encoded) {
245  if (value < 24) {
246    // Values 0-23 are encoded directly into the additional info of the
247    // initial byte.
248    encoded->push_back(EncodeInitialByte(type, /*additional_info=*/value));
249    return;
250  }
251  if (value <= std::numeric_limits<uint8_t>::max()) {
252    // Values 24-255 are encoded with one initial byte, followed by the value.
253    encoded->push_back(EncodeInitialByte(type, kAdditionalInformation1Byte));
254    encoded->push_back(value);
255    return;
256  }
257  if (value <= std::numeric_limits<uint16_t>::max()) {
258    // Values 256-65535: 1 initial byte + 2 bytes payload.
259    encoded->push_back(EncodeInitialByte(type, kAdditionalInformation2Bytes));
260    WriteBytesMostSignificantByteFirst<uint16_t>(value, encoded);
261    return;
262  }
263  if (value <= std::numeric_limits<uint32_t>::max()) {
264    // 32 bit uint: 1 initial byte + 4 bytes payload.
265    encoded->push_back(EncodeInitialByte(type, kAdditionalInformation4Bytes));
266    WriteBytesMostSignificantByteFirst<uint32_t>(static_cast<uint32_t>(value),
267                                                 encoded);
268    return;
269  }
270  // 64 bit uint: 1 initial byte + 8 bytes payload.
271  encoded->push_back(EncodeInitialByte(type, kAdditionalInformation8Bytes));
272  WriteBytesMostSignificantByteFirst<uint64_t>(value, encoded);
273}
274void WriteTokenStart(MajorType type,
275                     uint64_t value,
276                     std::vector<uint8_t>* encoded) {
277  WriteTokenStartTmpl(type, value, encoded);
278}
279void WriteTokenStart(MajorType type, uint64_t value, std::string* encoded) {
280  WriteTokenStartTmpl(type, value, encoded);
281}
282}  // namespace internals
283
284// =============================================================================
285// Detecting CBOR content
286// =============================================================================
287
288uint8_t InitialByteForEnvelope() {
289  return kInitialByteForEnvelope;
290}
291uint8_t InitialByteFor32BitLengthByteString() {
292  return kInitialByteFor32BitLengthByteString;
293}
294bool IsCBORMessage(span<uint8_t> msg) {
295  return msg.size() >= 6 && msg[0] == InitialByteForEnvelope() &&
296         msg[1] == InitialByteFor32BitLengthByteString();
297}
298
299// =============================================================================
300// Encoding invidiual CBOR items
301// =============================================================================
302
303uint8_t EncodeTrue() {
304  return kEncodedTrue;
305}
306uint8_t EncodeFalse() {
307  return kEncodedFalse;
308}
309uint8_t EncodeNull() {
310  return kEncodedNull;
311}
312
313uint8_t EncodeIndefiniteLengthArrayStart() {
314  return kInitialByteIndefiniteLengthArray;
315}
316
317uint8_t EncodeIndefiniteLengthMapStart() {
318  return kInitialByteIndefiniteLengthMap;
319}
320
321uint8_t EncodeStop() {
322  return kStopByte;
323}
324
325template <typename C>
326void EncodeInt32Tmpl(int32_t value, C* out) {
327  if (value >= 0) {
328    internals::WriteTokenStart(MajorType::UNSIGNED, value, out);
329  } else {
330    uint64_t representation = static_cast<uint64_t>(-(value + 1));
331    internals::WriteTokenStart(MajorType::NEGATIVE, representation, out);
332  }
333}
334void EncodeInt32(int32_t value, std::vector<uint8_t>* out) {
335  EncodeInt32Tmpl(value, out);
336}
337void EncodeInt32(int32_t value, std::string* out) {
338  EncodeInt32Tmpl(value, out);
339}
340
341template <typename C>
342void EncodeString16Tmpl(span<uint16_t> in, C* out) {
343  uint64_t byte_length = static_cast<uint64_t>(in.size_bytes());
344  internals::WriteTokenStart(MajorType::BYTE_STRING, byte_length, out);
345  // When emitting UTF16 characters, we always write the least significant byte
346  // first; this is because it's the native representation for X86.
347  // TODO(johannes): Implement a more efficient thing here later, e.g.
348  // casting *iff* the machine has this byte order.
349  // The wire format for UTF16 chars will probably remain the same
350  // (least significant byte first) since this way we can have
351  // golden files, unittests, etc. that port easily and universally.
352  // See also:
353  // https://commandcenter.blogspot.com/2012/04/byte-order-fallacy.html
354  for (const uint16_t two_bytes : in) {
355    out->push_back(two_bytes);
356    out->push_back(two_bytes >> 8);
357  }
358}
359void EncodeString16(span<uint16_t> in, std::vector<uint8_t>* out) {
360  EncodeString16Tmpl(in, out);
361}
362void EncodeString16(span<uint16_t> in, std::string* out) {
363  EncodeString16Tmpl(in, out);
364}
365
366template <typename C>
367void EncodeString8Tmpl(span<uint8_t> in, C* out) {
368  internals::WriteTokenStart(MajorType::STRING,
369                             static_cast<uint64_t>(in.size_bytes()), out);
370  out->insert(out->end(), in.begin(), in.end());
371}
372void EncodeString8(span<uint8_t> in, std::vector<uint8_t>* out) {
373  EncodeString8Tmpl(in, out);
374}
375void EncodeString8(span<uint8_t> in, std::string* out) {
376  EncodeString8Tmpl(in, out);
377}
378
379template <typename C>
380void EncodeFromLatin1Tmpl(span<uint8_t> latin1, C* out) {
381  for (size_t ii = 0; ii < latin1.size(); ++ii) {
382    if (latin1[ii] <= 127)
383      continue;
384    // If there's at least one non-ASCII char, convert to UTF8.
385    std::vector<uint8_t> utf8(latin1.begin(), latin1.begin() + ii);
386    for (; ii < latin1.size(); ++ii) {
387      if (latin1[ii] <= 127) {
388        utf8.push_back(latin1[ii]);
389      } else {
390        // 0xC0 means it's a UTF8 sequence with 2 bytes.
391        utf8.push_back((latin1[ii] >> 6) | 0xc0);
392        utf8.push_back((latin1[ii] | 0x80) & 0xbf);
393      }
394    }
395    EncodeString8(SpanFrom(utf8), out);
396    return;
397  }
398  EncodeString8(latin1, out);
399}
400void EncodeFromLatin1(span<uint8_t> latin1, std::vector<uint8_t>* out) {
401  EncodeFromLatin1Tmpl(latin1, out);
402}
403void EncodeFromLatin1(span<uint8_t> latin1, std::string* out) {
404  EncodeFromLatin1Tmpl(latin1, out);
405}
406
407template <typename C>
408void EncodeFromUTF16Tmpl(span<uint16_t> utf16, C* out) {
409  // If there's at least one non-ASCII char, encode as STRING16 (UTF16).
410  for (uint16_t ch : utf16) {
411    if (ch <= 127)
412      continue;
413    EncodeString16(utf16, out);
414    return;
415  }
416  // It's all US-ASCII, strip out every second byte and encode as UTF8.
417  internals::WriteTokenStart(MajorType::STRING,
418                             static_cast<uint64_t>(utf16.size()), out);
419  out->insert(out->end(), utf16.begin(), utf16.end());
420}
421void EncodeFromUTF16(span<uint16_t> utf16, std::vector<uint8_t>* out) {
422  EncodeFromUTF16Tmpl(utf16, out);
423}
424void EncodeFromUTF16(span<uint16_t> utf16, std::string* out) {
425  EncodeFromUTF16Tmpl(utf16, out);
426}
427
428template <typename C>
429void EncodeBinaryTmpl(span<uint8_t> in, C* out) {
430  out->push_back(kExpectedConversionToBase64Tag);
431  uint64_t byte_length = static_cast<uint64_t>(in.size_bytes());
432  internals::WriteTokenStart(MajorType::BYTE_STRING, byte_length, out);
433  out->insert(out->end(), in.begin(), in.end());
434}
435void EncodeBinary(span<uint8_t> in, std::vector<uint8_t>* out) {
436  EncodeBinaryTmpl(in, out);
437}
438void EncodeBinary(span<uint8_t> in, std::string* out) {
439  EncodeBinaryTmpl(in, out);
440}
441
442// A double is encoded with a specific initial byte
443// (kInitialByteForDouble) plus the 64 bits of payload for its value.
444constexpr size_t kEncodedDoubleSize = 1 + sizeof(uint64_t);
445
446// An envelope is encoded with a specific initial byte
447// (kInitialByteForEnvelope), plus the start byte for a BYTE_STRING with a 32
448// bit wide length, plus a 32 bit length for that string.
449constexpr size_t kEncodedEnvelopeHeaderSize = 1 + 1 + sizeof(uint32_t);
450
451template <typename C>
452void EncodeDoubleTmpl(double value, C* out) {
453  // The additional_info=27 indicates 64 bits for the double follow.
454  // See RFC 7049 Section 2.3, Table 1.
455  out->push_back(kInitialByteForDouble);
456  union {
457    double from_double;
458    uint64_t to_uint64;
459  } reinterpret;
460  reinterpret.from_double = value;
461  WriteBytesMostSignificantByteFirst<uint64_t>(reinterpret.to_uint64, out);
462}
463void EncodeDouble(double value, std::vector<uint8_t>* out) {
464  EncodeDoubleTmpl(value, out);
465}
466void EncodeDouble(double value, std::string* out) {
467  EncodeDoubleTmpl(value, out);
468}
469
470// =============================================================================
471// cbor::EnvelopeEncoder - for wrapping submessages
472// =============================================================================
473
474template <typename C>
475void EncodeStartTmpl(C* out, size_t* byte_size_pos) {
476  assert(*byte_size_pos == 0);
477  out->push_back(kInitialByteForEnvelope);
478  out->push_back(kInitialByteFor32BitLengthByteString);
479  *byte_size_pos = out->size();
480  out->resize(out->size() + sizeof(uint32_t));
481}
482
483void EnvelopeEncoder::EncodeStart(std::vector<uint8_t>* out) {
484  EncodeStartTmpl<std::vector<uint8_t>>(out, &byte_size_pos_);
485}
486
487void EnvelopeEncoder::EncodeStart(std::string* out) {
488  EncodeStartTmpl<std::string>(out, &byte_size_pos_);
489}
490
491template <typename C>
492bool EncodeStopTmpl(C* out, size_t* byte_size_pos) {
493  assert(*byte_size_pos != 0);
494  // The byte size is the size of the payload, that is, all the
495  // bytes that were written past the byte size position itself.
496  uint64_t byte_size = out->size() - (*byte_size_pos + sizeof(uint32_t));
497  // We store exactly 4 bytes, so at most INT32MAX, with most significant
498  // byte first.
499  if (byte_size > std::numeric_limits<uint32_t>::max())
500    return false;
501  for (int shift_bytes = sizeof(uint32_t) - 1; shift_bytes >= 0;
502       --shift_bytes) {
503    (*out)[(*byte_size_pos)++] = 0xff & (byte_size >> (shift_bytes * 8));
504  }
505  return true;
506}
507
508bool EnvelopeEncoder::EncodeStop(std::vector<uint8_t>* out) {
509  return EncodeStopTmpl(out, &byte_size_pos_);
510}
511
512bool EnvelopeEncoder::EncodeStop(std::string* out) {
513  return EncodeStopTmpl(out, &byte_size_pos_);
514}
515
516// =============================================================================
517// cbor::NewCBOREncoder - for encoding from a streaming parser
518// =============================================================================
519
520namespace {
521template <typename C>
522class CBOREncoder : public StreamingParserHandler {
523 public:
524  CBOREncoder(C* out, Status* status) : out_(out), status_(status) {
525    *status_ = Status();
526  }
527
528  void HandleMapBegin() override {
529    if (!status_->ok())
530      return;
531    envelopes_.emplace_back();
532    envelopes_.back().EncodeStart(out_);
533    out_->push_back(kInitialByteIndefiniteLengthMap);
534  }
535
536  void HandleMapEnd() override {
537    if (!status_->ok())
538      return;
539    out_->push_back(kStopByte);
540    assert(!envelopes_.empty());
541    if (!envelopes_.back().EncodeStop(out_)) {
542      HandleError(
543          Status(Error::CBOR_ENVELOPE_SIZE_LIMIT_EXCEEDED, out_->size()));
544      return;
545    }
546    envelopes_.pop_back();
547  }
548
549  void HandleArrayBegin() override {
550    if (!status_->ok())
551      return;
552    envelopes_.emplace_back();
553    envelopes_.back().EncodeStart(out_);
554    out_->push_back(kInitialByteIndefiniteLengthArray);
555  }
556
557  void HandleArrayEnd() override {
558    if (!status_->ok())
559      return;
560    out_->push_back(kStopByte);
561    assert(!envelopes_.empty());
562    if (!envelopes_.back().EncodeStop(out_)) {
563      HandleError(
564          Status(Error::CBOR_ENVELOPE_SIZE_LIMIT_EXCEEDED, out_->size()));
565      return;
566    }
567    envelopes_.pop_back();
568  }
569
570  void HandleString8(span<uint8_t> chars) override {
571    if (!status_->ok())
572      return;
573    EncodeString8(chars, out_);
574  }
575
576  void HandleString16(span<uint16_t> chars) override {
577    if (!status_->ok())
578      return;
579    EncodeFromUTF16(chars, out_);
580  }
581
582  void HandleBinary(span<uint8_t> bytes) override {
583    if (!status_->ok())
584      return;
585    EncodeBinary(bytes, out_);
586  }
587
588  void HandleDouble(double value) override {
589    if (!status_->ok())
590      return;
591    EncodeDouble(value, out_);
592  }
593
594  void HandleInt32(int32_t value) override {
595    if (!status_->ok())
596      return;
597    EncodeInt32(value, out_);
598  }
599
600  void HandleBool(bool value) override {
601    if (!status_->ok())
602      return;
603    // See RFC 7049 Section 2.3, Table 2.
604    out_->push_back(value ? kEncodedTrue : kEncodedFalse);
605  }
606
607  void HandleNull() override {
608    if (!status_->ok())
609      return;
610    // See RFC 7049 Section 2.3, Table 2.
611    out_->push_back(kEncodedNull);
612  }
613
614  void HandleError(Status error) override {
615    if (!status_->ok())
616      return;
617    *status_ = error;
618    out_->clear();
619  }
620
621 private:
622  C* out_;
623  std::vector<EnvelopeEncoder> envelopes_;
624  Status* status_;
625};
626}  // namespace
627
628std::unique_ptr<StreamingParserHandler> NewCBOREncoder(
629    std::vector<uint8_t>* out,
630    Status* status) {
631  return std::unique_ptr<StreamingParserHandler>(
632      new CBOREncoder<std::vector<uint8_t>>(out, status));
633}
634std::unique_ptr<StreamingParserHandler> NewCBOREncoder(std::string* out,
635                                                       Status* status) {
636  return std::unique_ptr<StreamingParserHandler>(
637      new CBOREncoder<std::string>(out, status));
638}
639
640// =============================================================================
641// cbor::CBORTokenizer - for parsing individual CBOR items
642// =============================================================================
643
644CBORTokenizer::CBORTokenizer(span<uint8_t> bytes) : bytes_(bytes) {
645  ReadNextToken(/*enter_envelope=*/false);
646}
647CBORTokenizer::~CBORTokenizer() {}
648
649CBORTokenTag CBORTokenizer::TokenTag() const {
650  return token_tag_;
651}
652
653void CBORTokenizer::Next() {
654  if (token_tag_ == CBORTokenTag::ERROR_VALUE ||
655      token_tag_ == CBORTokenTag::DONE)
656    return;
657  ReadNextToken(/*enter_envelope=*/false);
658}
659
660void CBORTokenizer::EnterEnvelope() {
661  assert(token_tag_ == CBORTokenTag::ENVELOPE);
662  ReadNextToken(/*enter_envelope=*/true);
663}
664
665Status CBORTokenizer::Status() const {
666  return status_;
667}
668
669// The following accessor functions ::GetInt32, ::GetDouble,
670// ::GetString8, ::GetString16WireRep, ::GetBinary, ::GetEnvelopeContents
671// assume that a particular token was recognized in ::ReadNextToken.
672// That's where all the error checking is done. By design,
673// the accessors (assuming the token was recognized) never produce
674// an error.
675
676int32_t CBORTokenizer::GetInt32() const {
677  assert(token_tag_ == CBORTokenTag::INT32);
678  // The range checks happen in ::ReadNextToken().
679  return static_cast<int32_t>(
680      token_start_type_ == MajorType::UNSIGNED
681          ? token_start_internal_value_
682          : -static_cast<int64_t>(token_start_internal_value_) - 1);
683}
684
685double CBORTokenizer::GetDouble() const {
686  assert(token_tag_ == CBORTokenTag::DOUBLE);
687  union {
688    uint64_t from_uint64;
689    double to_double;
690  } reinterpret;
691  reinterpret.from_uint64 = ReadBytesMostSignificantByteFirst<uint64_t>(
692      bytes_.subspan(status_.pos + 1));
693  return reinterpret.to_double;
694}
695
696span<uint8_t> CBORTokenizer::GetString8() const {
697  assert(token_tag_ == CBORTokenTag::STRING8);
698  auto length = static_cast<size_t>(token_start_internal_value_);
699  return bytes_.subspan(status_.pos + (token_byte_length_ - length), length);
700}
701
702span<uint8_t> CBORTokenizer::GetString16WireRep() const {
703  assert(token_tag_ == CBORTokenTag::STRING16);
704  auto length = static_cast<size_t>(token_start_internal_value_);
705  return bytes_.subspan(status_.pos + (token_byte_length_ - length), length);
706}
707
708span<uint8_t> CBORTokenizer::GetBinary() const {
709  assert(token_tag_ == CBORTokenTag::BINARY);
710  auto length = static_cast<size_t>(token_start_internal_value_);
711  return bytes_.subspan(status_.pos + (token_byte_length_ - length), length);
712}
713
714span<uint8_t> CBORTokenizer::GetEnvelopeContents() const {
715  assert(token_tag_ == CBORTokenTag::ENVELOPE);
716  auto length = static_cast<size_t>(token_start_internal_value_);
717  return bytes_.subspan(status_.pos + kEncodedEnvelopeHeaderSize, length);
718}
719
720// All error checking happens in ::ReadNextToken, so that the accessors
721// can avoid having to carry an error return value.
722//
723// With respect to checking the encoded lengths of strings, arrays, etc:
724// On the wire, CBOR uses 1,2,4, and 8 byte unsigned integers, so
725// we initially read them as uint64_t, usually into token_start_internal_value_.
726//
727// However, since these containers have a representation on the machine,
728// we need to do corresponding size computations on the input byte array,
729// output span (e.g. the payload for a string), etc., and size_t is
730// machine specific (in practice either 32 bit or 64 bit).
731//
732// Further, we must avoid overflowing size_t. Therefore, we use this
733// kMaxValidLength constant to:
734// - Reject values that are larger than the architecture specific
735//   max size_t (differs between 32 bit and 64 bit arch).
736// - Reserve at least one bit so that we can check against overflows
737//   when adding lengths (array / string length / etc.); we do this by
738//   ensuring that the inputs to an addition are <= kMaxValidLength,
739//   and then checking whether the sum went past it.
740//
741// See also
742// https://chromium.googlesource.com/chromium/src/+/HEAD/docs/security/integer-semantics.md
743static const uint64_t kMaxValidLength =
744    std::min<uint64_t>(std::numeric_limits<uint64_t>::max() >> 2,
745                       std::numeric_limits<size_t>::max());
746
747void CBORTokenizer::ReadNextToken(bool enter_envelope) {
748  if (enter_envelope) {
749    status_.pos += kEncodedEnvelopeHeaderSize;
750  } else {
751    status_.pos =
752        status_.pos == Status::npos() ? 0 : status_.pos + token_byte_length_;
753  }
754  status_.error = Error::OK;
755  if (status_.pos >= bytes_.size()) {
756    token_tag_ = CBORTokenTag::DONE;
757    return;
758  }
759  const size_t remaining_bytes = bytes_.size() - status_.pos;
760  switch (bytes_[status_.pos]) {
761    case kStopByte:
762      SetToken(CBORTokenTag::STOP, 1);
763      return;
764    case kInitialByteIndefiniteLengthMap:
765      SetToken(CBORTokenTag::MAP_START, 1);
766      return;
767    case kInitialByteIndefiniteLengthArray:
768      SetToken(CBORTokenTag::ARRAY_START, 1);
769      return;
770    case kEncodedTrue:
771      SetToken(CBORTokenTag::TRUE_VALUE, 1);
772      return;
773    case kEncodedFalse:
774      SetToken(CBORTokenTag::FALSE_VALUE, 1);
775      return;
776    case kEncodedNull:
777      SetToken(CBORTokenTag::NULL_VALUE, 1);
778      return;
779    case kExpectedConversionToBase64Tag: {  // BINARY
780      const size_t bytes_read = internals::ReadTokenStart(
781          bytes_.subspan(status_.pos + 1), &token_start_type_,
782          &token_start_internal_value_);
783      if (!bytes_read || token_start_type_ != MajorType::BYTE_STRING ||
784          token_start_internal_value_ > kMaxValidLength) {
785        SetError(Error::CBOR_INVALID_BINARY);
786        return;
787      }
788      const uint64_t token_byte_length = token_start_internal_value_ +
789                                         /* tag before token start: */ 1 +
790                                         /* token start: */ bytes_read;
791      if (token_byte_length > remaining_bytes) {
792        SetError(Error::CBOR_INVALID_BINARY);
793        return;
794      }
795      SetToken(CBORTokenTag::BINARY, static_cast<size_t>(token_byte_length));
796      return;
797    }
798    case kInitialByteForDouble: {  // DOUBLE
799      if (kEncodedDoubleSize > remaining_bytes) {
800        SetError(Error::CBOR_INVALID_DOUBLE);
801        return;
802      }
803      SetToken(CBORTokenTag::DOUBLE, kEncodedDoubleSize);
804      return;
805    }
806    case kInitialByteForEnvelope: {  // ENVELOPE
807      if (kEncodedEnvelopeHeaderSize > remaining_bytes) {
808        SetError(Error::CBOR_INVALID_ENVELOPE);
809        return;
810      }
811      // The envelope must be a byte string with 32 bit length.
812      if (bytes_[status_.pos + 1] != kInitialByteFor32BitLengthByteString) {
813        SetError(Error::CBOR_INVALID_ENVELOPE);
814        return;
815      }
816      // Read the length of the byte string.
817      token_start_internal_value_ = ReadBytesMostSignificantByteFirst<uint32_t>(
818          bytes_.subspan(status_.pos + 2));
819      if (token_start_internal_value_ > kMaxValidLength) {
820        SetError(Error::CBOR_INVALID_ENVELOPE);
821        return;
822      }
823      uint64_t token_byte_length =
824          token_start_internal_value_ + kEncodedEnvelopeHeaderSize;
825      if (token_byte_length > remaining_bytes) {
826        SetError(Error::CBOR_INVALID_ENVELOPE);
827        return;
828      }
829      SetToken(CBORTokenTag::ENVELOPE, static_cast<size_t>(token_byte_length));
830      return;
831    }
832    default: {
833      const size_t bytes_read = internals::ReadTokenStart(
834          bytes_.subspan(status_.pos), &token_start_type_,
835          &token_start_internal_value_);
836      switch (token_start_type_) {
837        case MajorType::UNSIGNED:  // INT32.
838          // INT32 is a signed int32 (int32 makes sense for the
839          // inspector_protocol, it's not a CBOR limitation), so we check
840          // against the signed max, so that the allowable values are
841          // 0, 1, 2, ... 2^31 - 1.
842          if (!bytes_read ||
843                static_cast<int64_t>(std::numeric_limits<int32_t>::max()) <
844                  static_cast<int64_t>(token_start_internal_value_)) {
845            SetError(Error::CBOR_INVALID_INT32);
846            return;
847          }
848          SetToken(CBORTokenTag::INT32, bytes_read);
849          return;
850        case MajorType::NEGATIVE: {  // INT32.
851          // INT32 is a signed int32 (int32 makes sense for the
852          // inspector_protocol, it's not a CBOR limitation); in CBOR, the
853          // negative values for INT32 are represented as NEGATIVE, that is, -1
854          // INT32 is represented as 1 << 5 | 0 (major type 1, additional info
855          // value 0).
856          // The represented allowed values range is -1 to -2^31.
857          // They are mapped into the encoded range of 0 to 2^31-1.
858          // We check the the payload in token_start_internal_value_ against
859          // that range (2^31-1 is also known as
860          // std::numeric_limits<int32_t>::max()).
861          if (!bytes_read ||
862	        static_cast<int64_t>(token_start_internal_value_) >
863                  static_cast<int64_t>(std::numeric_limits<int32_t>::max())) {
864            SetError(Error::CBOR_INVALID_INT32);
865            return;
866          }
867          SetToken(CBORTokenTag::INT32, bytes_read);
868          return;
869        }
870        case MajorType::STRING: {  // STRING8.
871          if (!bytes_read || token_start_internal_value_ > kMaxValidLength) {
872            SetError(Error::CBOR_INVALID_STRING8);
873            return;
874          }
875          uint64_t token_byte_length = token_start_internal_value_ + bytes_read;
876          if (token_byte_length > remaining_bytes) {
877            SetError(Error::CBOR_INVALID_STRING8);
878            return;
879          }
880          SetToken(CBORTokenTag::STRING8,
881                   static_cast<size_t>(token_byte_length));
882          return;
883        }
884        case MajorType::BYTE_STRING: {  // STRING16.
885          // Length must be divisible by 2 since UTF16 is 2 bytes per
886          // character, hence the &1 check.
887          if (!bytes_read || token_start_internal_value_ > kMaxValidLength ||
888              token_start_internal_value_ & 1) {
889            SetError(Error::CBOR_INVALID_STRING16);
890            return;
891          }
892          uint64_t token_byte_length = token_start_internal_value_ + bytes_read;
893          if (token_byte_length > remaining_bytes) {
894            SetError(Error::CBOR_INVALID_STRING16);
895            return;
896          }
897          SetToken(CBORTokenTag::STRING16,
898                   static_cast<size_t>(token_byte_length));
899          return;
900        }
901        case MajorType::ARRAY:
902        case MajorType::MAP:
903        case MajorType::TAG:
904        case MajorType::SIMPLE_VALUE:
905          SetError(Error::CBOR_UNSUPPORTED_VALUE);
906          return;
907      }
908    }
909  }
910}
911
912void CBORTokenizer::SetToken(CBORTokenTag token_tag, size_t token_byte_length) {
913  token_tag_ = token_tag;
914  token_byte_length_ = token_byte_length;
915}
916
917void CBORTokenizer::SetError(Error error) {
918  token_tag_ = CBORTokenTag::ERROR_VALUE;
919  status_.error = error;
920}
921
922// =============================================================================
923// cbor::ParseCBOR - for receiving streaming parser events for CBOR messages
924// =============================================================================
925
926namespace {
927// When parsing CBOR, we limit recursion depth for objects and arrays
928// to this constant.
929static constexpr int kStackLimit = 300;
930
931// Below are three parsing routines for CBOR, which cover enough
932// to roundtrip JSON messages.
933bool ParseMap(int32_t stack_depth,
934              CBORTokenizer* tokenizer,
935              StreamingParserHandler* out);
936bool ParseArray(int32_t stack_depth,
937                CBORTokenizer* tokenizer,
938                StreamingParserHandler* out);
939bool ParseValue(int32_t stack_depth,
940                CBORTokenizer* tokenizer,
941                StreamingParserHandler* out);
942
943void ParseUTF16String(CBORTokenizer* tokenizer, StreamingParserHandler* out) {
944  std::vector<uint16_t> value;
945  span<uint8_t> rep = tokenizer->GetString16WireRep();
946  for (size_t ii = 0; ii < rep.size(); ii += 2)
947    value.push_back((rep[ii + 1] << 8) | rep[ii]);
948  out->HandleString16(span<uint16_t>(value.data(), value.size()));
949  tokenizer->Next();
950}
951
952bool ParseUTF8String(CBORTokenizer* tokenizer, StreamingParserHandler* out) {
953  assert(tokenizer->TokenTag() == CBORTokenTag::STRING8);
954  out->HandleString8(tokenizer->GetString8());
955  tokenizer->Next();
956  return true;
957}
958
959bool ParseValue(int32_t stack_depth,
960                CBORTokenizer* tokenizer,
961                StreamingParserHandler* out) {
962  if (stack_depth > kStackLimit) {
963    out->HandleError(
964        Status{Error::CBOR_STACK_LIMIT_EXCEEDED, tokenizer->Status().pos});
965    return false;
966  }
967  // Skip past the envelope to get to what's inside.
968  if (tokenizer->TokenTag() == CBORTokenTag::ENVELOPE)
969    tokenizer->EnterEnvelope();
970  switch (tokenizer->TokenTag()) {
971    case CBORTokenTag::ERROR_VALUE:
972      out->HandleError(tokenizer->Status());
973      return false;
974    case CBORTokenTag::DONE:
975      out->HandleError(Status{Error::CBOR_UNEXPECTED_EOF_EXPECTED_VALUE,
976                              tokenizer->Status().pos});
977      return false;
978    case CBORTokenTag::TRUE_VALUE:
979      out->HandleBool(true);
980      tokenizer->Next();
981      return true;
982    case CBORTokenTag::FALSE_VALUE:
983      out->HandleBool(false);
984      tokenizer->Next();
985      return true;
986    case CBORTokenTag::NULL_VALUE:
987      out->HandleNull();
988      tokenizer->Next();
989      return true;
990    case CBORTokenTag::INT32:
991      out->HandleInt32(tokenizer->GetInt32());
992      tokenizer->Next();
993      return true;
994    case CBORTokenTag::DOUBLE:
995      out->HandleDouble(tokenizer->GetDouble());
996      tokenizer->Next();
997      return true;
998    case CBORTokenTag::STRING8:
999      return ParseUTF8String(tokenizer, out);
1000    case CBORTokenTag::STRING16:
1001      ParseUTF16String(tokenizer, out);
1002      return true;
1003    case CBORTokenTag::BINARY: {
1004      out->HandleBinary(tokenizer->GetBinary());
1005      tokenizer->Next();
1006      return true;
1007    }
1008    case CBORTokenTag::MAP_START:
1009      return ParseMap(stack_depth + 1, tokenizer, out);
1010    case CBORTokenTag::ARRAY_START:
1011      return ParseArray(stack_depth + 1, tokenizer, out);
1012    default:
1013      out->HandleError(
1014          Status{Error::CBOR_UNSUPPORTED_VALUE, tokenizer->Status().pos});
1015      return false;
1016  }
1017}
1018
1019// |bytes| must start with the indefinite length array byte, so basically,
1020// ParseArray may only be called after an indefinite length array has been
1021// detected.
1022bool ParseArray(int32_t stack_depth,
1023                CBORTokenizer* tokenizer,
1024                StreamingParserHandler* out) {
1025  assert(tokenizer->TokenTag() == CBORTokenTag::ARRAY_START);
1026  tokenizer->Next();
1027  out->HandleArrayBegin();
1028  while (tokenizer->TokenTag() != CBORTokenTag::STOP) {
1029    if (tokenizer->TokenTag() == CBORTokenTag::DONE) {
1030      out->HandleError(
1031          Status{Error::CBOR_UNEXPECTED_EOF_IN_ARRAY, tokenizer->Status().pos});
1032      return false;
1033    }
1034    if (tokenizer->TokenTag() == CBORTokenTag::ERROR_VALUE) {
1035      out->HandleError(tokenizer->Status());
1036      return false;
1037    }
1038    // Parse value.
1039    if (!ParseValue(stack_depth, tokenizer, out))
1040      return false;
1041  }
1042  out->HandleArrayEnd();
1043  tokenizer->Next();
1044  return true;
1045}
1046
1047// |bytes| must start with the indefinite length array byte, so basically,
1048// ParseArray may only be called after an indefinite length array has been
1049// detected.
1050bool ParseMap(int32_t stack_depth,
1051              CBORTokenizer* tokenizer,
1052              StreamingParserHandler* out) {
1053  assert(tokenizer->TokenTag() == CBORTokenTag::MAP_START);
1054  out->HandleMapBegin();
1055  tokenizer->Next();
1056  while (tokenizer->TokenTag() != CBORTokenTag::STOP) {
1057    if (tokenizer->TokenTag() == CBORTokenTag::DONE) {
1058      out->HandleError(
1059          Status{Error::CBOR_UNEXPECTED_EOF_IN_MAP, tokenizer->Status().pos});
1060      return false;
1061    }
1062    if (tokenizer->TokenTag() == CBORTokenTag::ERROR_VALUE) {
1063      out->HandleError(tokenizer->Status());
1064      return false;
1065    }
1066    // Parse key.
1067    if (tokenizer->TokenTag() == CBORTokenTag::STRING8) {
1068      if (!ParseUTF8String(tokenizer, out))
1069        return false;
1070    } else if (tokenizer->TokenTag() == CBORTokenTag::STRING16) {
1071      ParseUTF16String(tokenizer, out);
1072    } else {
1073      out->HandleError(
1074          Status{Error::CBOR_INVALID_MAP_KEY, tokenizer->Status().pos});
1075      return false;
1076    }
1077    // Parse value.
1078    if (!ParseValue(stack_depth, tokenizer, out))
1079      return false;
1080  }
1081  out->HandleMapEnd();
1082  tokenizer->Next();
1083  return true;
1084}
1085}  // namespace
1086
1087void ParseCBOR(span<uint8_t> bytes, StreamingParserHandler* out) {
1088  if (bytes.empty()) {
1089    out->HandleError(Status{Error::CBOR_NO_INPUT, 0});
1090    return;
1091  }
1092  if (bytes[0] != kInitialByteForEnvelope) {
1093    out->HandleError(Status{Error::CBOR_INVALID_START_BYTE, 0});
1094    return;
1095  }
1096  CBORTokenizer tokenizer(bytes);
1097  if (tokenizer.TokenTag() == CBORTokenTag::ERROR_VALUE) {
1098    out->HandleError(tokenizer.Status());
1099    return;
1100  }
1101  // We checked for the envelope start byte above, so the tokenizer
1102  // must agree here, since it's not an error.
1103  assert(tokenizer.TokenTag() == CBORTokenTag::ENVELOPE);
1104  tokenizer.EnterEnvelope();
1105  if (tokenizer.TokenTag() != CBORTokenTag::MAP_START) {
1106    out->HandleError(
1107        Status{Error::CBOR_MAP_START_EXPECTED, tokenizer.Status().pos});
1108    return;
1109  }
1110  if (!ParseMap(/*stack_depth=*/1, &tokenizer, out))
1111    return;
1112  if (tokenizer.TokenTag() == CBORTokenTag::DONE)
1113    return;
1114  if (tokenizer.TokenTag() == CBORTokenTag::ERROR_VALUE) {
1115    out->HandleError(tokenizer.Status());
1116    return;
1117  }
1118  out->HandleError(Status{Error::CBOR_TRAILING_JUNK, tokenizer.Status().pos});
1119}
1120
1121// =============================================================================
1122// cbor::AppendString8EntryToMap - for limited in-place editing of messages
1123// =============================================================================
1124
1125template <typename C>
1126Status AppendString8EntryToCBORMapTmpl(span<uint8_t> string8_key,
1127                                       span<uint8_t> string8_value,
1128                                       C* cbor) {
1129  // Careful below: Don't compare (*cbor)[idx] with a uint8_t, since
1130  // it could be a char (signed!). Instead, use bytes.
1131  span<uint8_t> bytes(reinterpret_cast<const uint8_t*>(cbor->data()),
1132                      cbor->size());
1133  CBORTokenizer tokenizer(bytes);
1134  if (tokenizer.TokenTag() == CBORTokenTag::ERROR_VALUE)
1135    return tokenizer.Status();
1136  if (tokenizer.TokenTag() != CBORTokenTag::ENVELOPE)
1137    return Status(Error::CBOR_INVALID_ENVELOPE, 0);
1138  size_t envelope_size = tokenizer.GetEnvelopeContents().size();
1139  size_t old_size = cbor->size();
1140  if (old_size != envelope_size + kEncodedEnvelopeHeaderSize)
1141    return Status(Error::CBOR_INVALID_ENVELOPE, 0);
1142  if (envelope_size == 0 ||
1143      (tokenizer.GetEnvelopeContents()[0] != EncodeIndefiniteLengthMapStart()))
1144    return Status(Error::CBOR_MAP_START_EXPECTED, kEncodedEnvelopeHeaderSize);
1145  if (bytes[bytes.size() - 1] != EncodeStop())
1146    return Status(Error::CBOR_MAP_STOP_EXPECTED, cbor->size() - 1);
1147  cbor->pop_back();
1148  EncodeString8(string8_key, cbor);
1149  EncodeString8(string8_value, cbor);
1150  cbor->push_back(EncodeStop());
1151  size_t new_envelope_size = envelope_size + (cbor->size() - old_size);
1152  if (new_envelope_size > std::numeric_limits<uint32_t>::max())
1153    return Status(Error::CBOR_ENVELOPE_SIZE_LIMIT_EXCEEDED, 0);
1154  size_t size_pos = cbor->size() - new_envelope_size - sizeof(uint32_t);
1155  uint8_t* out = reinterpret_cast<uint8_t*>(&cbor->at(size_pos));
1156  *(out++) = (new_envelope_size >> 24) & 0xff;
1157  *(out++) = (new_envelope_size >> 16) & 0xff;
1158  *(out++) = (new_envelope_size >> 8) & 0xff;
1159  *(out) = new_envelope_size & 0xff;
1160  return Status();
1161}
1162Status AppendString8EntryToCBORMap(span<uint8_t> string8_key,
1163                                   span<uint8_t> string8_value,
1164                                   std::vector<uint8_t>* cbor) {
1165  return AppendString8EntryToCBORMapTmpl(string8_key, string8_value, cbor);
1166}
1167Status AppendString8EntryToCBORMap(span<uint8_t> string8_key,
1168                                   span<uint8_t> string8_value,
1169                                   std::string* cbor) {
1170  return AppendString8EntryToCBORMapTmpl(string8_key, string8_value, cbor);
1171}
1172}  // namespace cbor
1173
1174namespace json {
1175
1176// =============================================================================
1177// json::NewJSONEncoder - for encoding streaming parser events as JSON
1178// =============================================================================
1179
1180namespace {
1181// Prints |value| to |out| with 4 hex digits, most significant chunk first.
1182template <typename C>
1183void PrintHex(uint16_t value, C* out) {
1184  for (int ii = 3; ii >= 0; --ii) {
1185    int four_bits = 0xf & (value >> (4 * ii));
1186    out->push_back(four_bits + ((four_bits <= 9) ? '0' : ('a' - 10)));
1187  }
1188}
1189
1190// In the writer below, we maintain a stack of State instances.
1191// It is just enough to emit the appropriate delimiters and brackets
1192// in JSON.
1193enum class Container {
1194  // Used for the top-level, initial state.
1195  NONE,
1196  // Inside a JSON object.
1197  MAP,
1198  // Inside a JSON array.
1199  ARRAY
1200};
1201class State {
1202 public:
1203  explicit State(Container container) : container_(container) {}
1204  void StartElement(std::vector<uint8_t>* out) { StartElementTmpl(out); }
1205  void StartElement(std::string* out) { StartElementTmpl(out); }
1206  Container container() const { return container_; }
1207
1208 private:
1209  template <typename C>
1210  void StartElementTmpl(C* out) {
1211    assert(container_ != Container::NONE || size_ == 0);
1212    if (size_ != 0) {
1213      char delim = (!(size_ & 1) || container_ == Container::ARRAY) ? ',' : ':';
1214      out->push_back(delim);
1215    }
1216    ++size_;
1217  }
1218
1219  Container container_ = Container::NONE;
1220  int size_ = 0;
1221};
1222
1223constexpr char kBase64Table[] =
1224    "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
1225    "abcdefghijklmnopqrstuvwxyz0123456789+/";
1226
1227template <typename C>
1228void Base64Encode(const span<uint8_t>& in, C* out) {
1229  // The following three cases are based on the tables in the example
1230  // section in https://en.wikipedia.org/wiki/Base64. We process three
1231  // input bytes at a time, emitting 4 output bytes at a time.
1232  size_t ii = 0;
1233
1234  // While possible, process three input bytes.
1235  for (; ii + 3 <= in.size(); ii += 3) {
1236    uint32_t twentyfour_bits = (in[ii] << 16) | (in[ii + 1] << 8) | in[ii + 2];
1237    out->push_back(kBase64Table[(twentyfour_bits >> 18)]);
1238    out->push_back(kBase64Table[(twentyfour_bits >> 12) & 0x3f]);
1239    out->push_back(kBase64Table[(twentyfour_bits >> 6) & 0x3f]);
1240    out->push_back(kBase64Table[twentyfour_bits & 0x3f]);
1241  }
1242  if (ii + 2 <= in.size()) {  // Process two input bytes.
1243    uint32_t twentyfour_bits = (in[ii] << 16) | (in[ii + 1] << 8);
1244    out->push_back(kBase64Table[(twentyfour_bits >> 18)]);
1245    out->push_back(kBase64Table[(twentyfour_bits >> 12) & 0x3f]);
1246    out->push_back(kBase64Table[(twentyfour_bits >> 6) & 0x3f]);
1247    out->push_back('=');  // Emit padding.
1248    return;
1249  }
1250  if (ii + 1 <= in.size()) {  // Process a single input byte.
1251    uint32_t twentyfour_bits = (in[ii] << 16);
1252    out->push_back(kBase64Table[(twentyfour_bits >> 18)]);
1253    out->push_back(kBase64Table[(twentyfour_bits >> 12) & 0x3f]);
1254    out->push_back('=');  // Emit padding.
1255    out->push_back('=');  // Emit padding.
1256  }
1257}
1258
1259// Implements a handler for JSON parser events to emit a JSON string.
1260template <typename C>
1261class JSONEncoder : public StreamingParserHandler {
1262 public:
1263  JSONEncoder(const Platform* platform, C* out, Status* status)
1264      : platform_(platform), out_(out), status_(status) {
1265    *status_ = Status();
1266    state_.emplace(Container::NONE);
1267  }
1268
1269  void HandleMapBegin() override {
1270    if (!status_->ok())
1271      return;
1272    assert(!state_.empty());
1273    state_.top().StartElement(out_);
1274    state_.emplace(Container::MAP);
1275    Emit('{');
1276  }
1277
1278  void HandleMapEnd() override {
1279    if (!status_->ok())
1280      return;
1281    assert(state_.size() >= 2 && state_.top().container() == Container::MAP);
1282    state_.pop();
1283    Emit('}');
1284  }
1285
1286  void HandleArrayBegin() override {
1287    if (!status_->ok())
1288      return;
1289    state_.top().StartElement(out_);
1290    state_.emplace(Container::ARRAY);
1291    Emit('[');
1292  }
1293
1294  void HandleArrayEnd() override {
1295    if (!status_->ok())
1296      return;
1297    assert(state_.size() >= 2 && state_.top().container() == Container::ARRAY);
1298    state_.pop();
1299    Emit(']');
1300  }
1301
1302  void HandleString16(span<uint16_t> chars) override {
1303    if (!status_->ok())
1304      return;
1305    state_.top().StartElement(out_);
1306    Emit('"');
1307    for (const uint16_t ch : chars) {
1308      if (ch == '"') {
1309        Emit("\\\"");
1310      } else if (ch == '\\') {
1311        Emit("\\\\");
1312      } else if (ch == '\b') {
1313        Emit("\\b");
1314      } else if (ch == '\f') {
1315        Emit("\\f");
1316      } else if (ch == '\n') {
1317        Emit("\\n");
1318      } else if (ch == '\r') {
1319        Emit("\\r");
1320      } else if (ch == '\t') {
1321        Emit("\\t");
1322      } else if (ch >= 32 && ch <= 126) {
1323        Emit(ch);
1324      } else {
1325        Emit("\\u");
1326        PrintHex(ch, out_);
1327      }
1328    }
1329    Emit('"');
1330  }
1331
1332  void HandleString8(span<uint8_t> chars) override {
1333    if (!status_->ok())
1334      return;
1335    state_.top().StartElement(out_);
1336    Emit('"');
1337    for (size_t ii = 0; ii < chars.size(); ++ii) {
1338      uint8_t c = chars[ii];
1339      if (c == '"') {
1340        Emit("\\\"");
1341      } else if (c == '\\') {
1342        Emit("\\\\");
1343      } else if (c == '\b') {
1344        Emit("\\b");
1345      } else if (c == '\f') {
1346        Emit("\\f");
1347      } else if (c == '\n') {
1348        Emit("\\n");
1349      } else if (c == '\r') {
1350        Emit("\\r");
1351      } else if (c == '\t') {
1352        Emit("\\t");
1353      } else if (c >= 32 && c <= 126) {
1354        Emit(c);
1355      } else if (c < 32) {
1356        Emit("\\u");
1357        PrintHex(static_cast<uint16_t>(c), out_);
1358      } else {
1359        // Inspect the leading byte to figure out how long the utf8
1360        // byte sequence is; while doing this initialize |codepoint|
1361        // with the first few bits.
1362        // See table in: https://en.wikipedia.org/wiki/UTF-8
1363        // byte one is 110x xxxx -> 2 byte utf8 sequence
1364        // byte one is 1110 xxxx -> 3 byte utf8 sequence
1365        // byte one is 1111 0xxx -> 4 byte utf8 sequence
1366        uint32_t codepoint;
1367        int num_bytes_left;
1368        if ((c & 0xe0) == 0xc0) {  // 2 byte utf8 sequence
1369          num_bytes_left = 1;
1370          codepoint = c & 0x1f;
1371        } else if ((c & 0xf0) == 0xe0) {  // 3 byte utf8 sequence
1372          num_bytes_left = 2;
1373          codepoint = c & 0x0f;
1374        } else if ((c & 0xf8) == 0xf0) {  // 4 byte utf8 sequence
1375          codepoint = c & 0x07;
1376          num_bytes_left = 3;
1377        } else {
1378          continue;  // invalid leading byte
1379        }
1380
1381        // If we have enough bytes in our input, decode the remaining ones
1382        // belonging to this Unicode character into |codepoint|.
1383        if (ii + num_bytes_left > chars.size())
1384          continue;
1385        while (num_bytes_left > 0) {
1386          c = chars[++ii];
1387          --num_bytes_left;
1388          // Check the next byte is a continuation byte, that is 10xx xxxx.
1389          if ((c & 0xc0) != 0x80)
1390            continue;
1391          codepoint = (codepoint << 6) | (c & 0x3f);
1392        }
1393
1394        // Disallow overlong encodings for ascii characters, as these
1395        // would include " and other characters significant to JSON
1396        // string termination / control.
1397        if (codepoint < 0x7f)
1398          continue;
1399        // Invalid in UTF8, and can't be represented in UTF16 anyway.
1400        if (codepoint > 0x10ffff)
1401          continue;
1402
1403        // So, now we transcode to UTF16,
1404        // using the math described at https://en.wikipedia.org/wiki/UTF-16,
1405        // for either one or two 16 bit characters.
1406        if (codepoint < 0xffff) {
1407          Emit("\\u");
1408          PrintHex(static_cast<uint16_t>(codepoint), out_);
1409          continue;
1410        }
1411        codepoint -= 0x10000;
1412        // high surrogate
1413        Emit("\\u");
1414        PrintHex(static_cast<uint16_t>((codepoint >> 10) + 0xd800), out_);
1415        // low surrogate
1416        Emit("\\u");
1417        PrintHex(static_cast<uint16_t>((codepoint & 0x3ff) + 0xdc00), out_);
1418      }
1419    }
1420    Emit('"');
1421  }
1422
1423  void HandleBinary(span<uint8_t> bytes) override {
1424    if (!status_->ok())
1425      return;
1426    state_.top().StartElement(out_);
1427    Emit('"');
1428    Base64Encode(bytes, out_);
1429    Emit('"');
1430  }
1431
1432  void HandleDouble(double value) override {
1433    if (!status_->ok())
1434      return;
1435    state_.top().StartElement(out_);
1436    // JSON cannot represent NaN or Infinity. So, for compatibility,
1437    // we behave like the JSON object in web browsers: emit 'null'.
1438    if (!std::isfinite(value)) {
1439      Emit("null");
1440      return;
1441    }
1442    std::unique_ptr<char[]> str_value = platform_->DToStr(value);
1443
1444    // DToStr may fail to emit a 0 before the decimal dot. E.g. this is
1445    // the case in base::NumberToString in Chromium (which is based on
1446    // dmg_fp). So, much like
1447    // https://cs.chromium.org/chromium/src/base/json/json_writer.cc
1448    // we probe for this and emit the leading 0 anyway if necessary.
1449    const char* chars = str_value.get();
1450    if (chars[0] == '.') {
1451      Emit('0');
1452    } else if (chars[0] == '-' && chars[1] == '.') {
1453      Emit("-0");
1454      ++chars;
1455    }
1456    Emit(chars);
1457  }
1458
1459  void HandleInt32(int32_t value) override {
1460    if (!status_->ok())
1461      return;
1462    state_.top().StartElement(out_);
1463    Emit(std::to_string(value));
1464  }
1465
1466  void HandleBool(bool value) override {
1467    if (!status_->ok())
1468      return;
1469    state_.top().StartElement(out_);
1470    Emit(value ? "true" : "false");
1471  }
1472
1473  void HandleNull() override {
1474    if (!status_->ok())
1475      return;
1476    state_.top().StartElement(out_);
1477    Emit("null");
1478  }
1479
1480  void HandleError(Status error) override {
1481    assert(!error.ok());
1482    *status_ = error;
1483    out_->clear();
1484  }
1485
1486 private:
1487  void Emit(char c) { out_->push_back(c); }
1488  void Emit(const char* str) {
1489    out_->insert(out_->end(), str, str + strlen(str));
1490  }
1491  void Emit(const std::string& str) {
1492    out_->insert(out_->end(), str.begin(), str.end());
1493  }
1494
1495  const Platform* platform_;
1496  C* out_;
1497  Status* status_;
1498  std::stack<State> state_;
1499};
1500}  // namespace
1501
1502std::unique_ptr<StreamingParserHandler> NewJSONEncoder(
1503    const Platform* platform,
1504    std::vector<uint8_t>* out,
1505    Status* status) {
1506  return std::unique_ptr<StreamingParserHandler>(
1507      new JSONEncoder<std::vector<uint8_t>>(platform, out, status));
1508}
1509std::unique_ptr<StreamingParserHandler> NewJSONEncoder(const Platform* platform,
1510                                                       std::string* out,
1511                                                       Status* status) {
1512  return std::unique_ptr<StreamingParserHandler>(
1513      new JSONEncoder<std::string>(platform, out, status));
1514}
1515
1516// =============================================================================
1517// json::ParseJSON - for receiving streaming parser events for JSON.
1518// =============================================================================
1519
1520namespace {
1521const int kStackLimit = 300;
1522
1523enum Token {
1524  ObjectBegin,
1525  ObjectEnd,
1526  ArrayBegin,
1527  ArrayEnd,
1528  StringLiteral,
1529  Number,
1530  BoolTrue,
1531  BoolFalse,
1532  NullToken,
1533  ListSeparator,
1534  ObjectPairSeparator,
1535  InvalidToken,
1536  NoInput
1537};
1538
1539const char* const kNullString = "null";
1540const char* const kTrueString = "true";
1541const char* const kFalseString = "false";
1542
1543template <typename Char>
1544class JsonParser {
1545 public:
1546  JsonParser(const Platform* platform, StreamingParserHandler* handler)
1547      : platform_(platform), handler_(handler) {}
1548
1549  void Parse(const Char* start, size_t length) {
1550    start_pos_ = start;
1551    const Char* end = start + length;
1552    const Char* tokenEnd = nullptr;
1553    ParseValue(start, end, &tokenEnd, 0);
1554    if (error_)
1555      return;
1556    if (tokenEnd != end) {
1557      HandleError(Error::JSON_PARSER_UNPROCESSED_INPUT_REMAINS, tokenEnd);
1558    }
1559  }
1560
1561 private:
1562  bool CharsToDouble(const uint16_t* chars, size_t length, double* result) {
1563    std::string buffer;
1564    buffer.reserve(length + 1);
1565    for (size_t ii = 0; ii < length; ++ii) {
1566      bool is_ascii = !(chars[ii] & ~0x7F);
1567      if (!is_ascii)
1568        return false;
1569      buffer.push_back(static_cast<char>(chars[ii]));
1570    }
1571    return platform_->StrToD(buffer.c_str(), result);
1572  }
1573
1574  bool CharsToDouble(const uint8_t* chars, size_t length, double* result) {
1575    std::string buffer(reinterpret_cast<const char*>(chars), length);
1576    return platform_->StrToD(buffer.c_str(), result);
1577  }
1578
1579  static bool ParseConstToken(const Char* start,
1580                              const Char* end,
1581                              const Char** token_end,
1582                              const char* token) {
1583    // |token| is \0 terminated, it's one of the constants at top of the file.
1584    while (start < end && *token != '\0' && *start++ == *token++) {
1585    }
1586    if (*token != '\0')
1587      return false;
1588    *token_end = start;
1589    return true;
1590  }
1591
1592  static bool ReadInt(const Char* start,
1593                      const Char* end,
1594                      const Char** token_end,
1595                      bool allow_leading_zeros) {
1596    if (start == end)
1597      return false;
1598    bool has_leading_zero = '0' == *start;
1599    int length = 0;
1600    while (start < end && '0' <= *start && *start <= '9') {
1601      ++start;
1602      ++length;
1603    }
1604    if (!length)
1605      return false;
1606    if (!allow_leading_zeros && length > 1 && has_leading_zero)
1607      return false;
1608    *token_end = start;
1609    return true;
1610  }
1611
1612  static bool ParseNumberToken(const Char* start,
1613                               const Char* end,
1614                               const Char** token_end) {
1615    // We just grab the number here. We validate the size in DecodeNumber.
1616    // According to RFC4627, a valid number is: [minus] int [frac] [exp]
1617    if (start == end)
1618      return false;
1619    Char c = *start;
1620    if ('-' == c)
1621      ++start;
1622
1623    if (!ReadInt(start, end, &start, /*allow_leading_zeros=*/false))
1624      return false;
1625    if (start == end) {
1626      *token_end = start;
1627      return true;
1628    }
1629
1630    // Optional fraction part
1631    c = *start;
1632    if ('.' == c) {
1633      ++start;
1634      if (!ReadInt(start, end, &start, /*allow_leading_zeros=*/true))
1635        return false;
1636      if (start == end) {
1637        *token_end = start;
1638        return true;
1639      }
1640      c = *start;
1641    }
1642
1643    // Optional exponent part
1644    if ('e' == c || 'E' == c) {
1645      ++start;
1646      if (start == end)
1647        return false;
1648      c = *start;
1649      if ('-' == c || '+' == c) {
1650        ++start;
1651        if (start == end)
1652          return false;
1653      }
1654      if (!ReadInt(start, end, &start, /*allow_leading_zeros=*/true))
1655        return false;
1656    }
1657
1658    *token_end = start;
1659    return true;
1660  }
1661
1662  static bool ReadHexDigits(const Char* start,
1663                            const Char* end,
1664                            const Char** token_end,
1665                            int digits) {
1666    if (end - start < digits)
1667      return false;
1668    for (int i = 0; i < digits; ++i) {
1669      Char c = *start++;
1670      if (!(('0' <= c && c <= '9') || ('a' <= c && c <= 'f') ||
1671            ('A' <= c && c <= 'F')))
1672        return false;
1673    }
1674    *token_end = start;
1675    return true;
1676  }
1677
1678  static bool ParseStringToken(const Char* start,
1679                               const Char* end,
1680                               const Char** token_end) {
1681    while (start < end) {
1682      Char c = *start++;
1683      if ('\\' == c) {
1684        if (start == end)
1685          return false;
1686        c = *start++;
1687        // Make sure the escaped char is valid.
1688        switch (c) {
1689          case 'x':
1690            if (!ReadHexDigits(start, end, &start, 2))
1691              return false;
1692            break;
1693          case 'u':
1694            if (!ReadHexDigits(start, end, &start, 4))
1695              return false;
1696            break;
1697          case '\\':
1698          case '/':
1699          case 'b':
1700          case 'f':
1701          case 'n':
1702          case 'r':
1703          case 't':
1704          case 'v':
1705          case '"':
1706            break;
1707          default:
1708            return false;
1709        }
1710      } else if ('"' == c) {
1711        *token_end = start;
1712        return true;
1713      }
1714    }
1715    return false;
1716  }
1717
1718  static bool SkipComment(const Char* start,
1719                          const Char* end,
1720                          const Char** comment_end) {
1721    if (start == end)
1722      return false;
1723
1724    if (*start != '/' || start + 1 >= end)
1725      return false;
1726    ++start;
1727
1728    if (*start == '/') {
1729      // Single line comment, read to newline.
1730      for (++start; start < end; ++start) {
1731        if (*start == '\n' || *start == '\r') {
1732          *comment_end = start + 1;
1733          return true;
1734        }
1735      }
1736      *comment_end = end;
1737      // Comment reaches end-of-input, which is fine.
1738      return true;
1739    }
1740
1741    if (*start == '*') {
1742      Char previous = '\0';
1743      // Block comment, read until end marker.
1744      for (++start; start < end; previous = *start++) {
1745        if (previous == '*' && *start == '/') {
1746          *comment_end = start + 1;
1747          return true;
1748        }
1749      }
1750      // Block comment must close before end-of-input.
1751      return false;
1752    }
1753
1754    return false;
1755  }
1756
1757  static bool IsSpaceOrNewLine(Char c) {
1758    // \v = vertial tab; \f = form feed page break.
1759    return c == ' ' || c == '\n' || c == '\v' || c == '\f' || c == '\r' ||
1760           c == '\t';
1761  }
1762
1763  static void SkipWhitespaceAndComments(const Char* start,
1764                                        const Char* end,
1765                                        const Char** whitespace_end) {
1766    while (start < end) {
1767      if (IsSpaceOrNewLine(*start)) {
1768        ++start;
1769      } else if (*start == '/') {
1770        const Char* comment_end = nullptr;
1771        if (!SkipComment(start, end, &comment_end))
1772          break;
1773        start = comment_end;
1774      } else {
1775        break;
1776      }
1777    }
1778    *whitespace_end = start;
1779  }
1780
1781  static Token ParseToken(const Char* start,
1782                          const Char* end,
1783                          const Char** tokenStart,
1784                          const Char** token_end) {
1785    SkipWhitespaceAndComments(start, end, tokenStart);
1786    start = *tokenStart;
1787
1788    if (start == end)
1789      return NoInput;
1790
1791    switch (*start) {
1792      case 'n':
1793        if (ParseConstToken(start, end, token_end, kNullString))
1794          return NullToken;
1795        break;
1796      case 't':
1797        if (ParseConstToken(start, end, token_end, kTrueString))
1798          return BoolTrue;
1799        break;
1800      case 'f':
1801        if (ParseConstToken(start, end, token_end, kFalseString))
1802          return BoolFalse;
1803        break;
1804      case '[':
1805        *token_end = start + 1;
1806        return ArrayBegin;
1807      case ']':
1808        *token_end = start + 1;
1809        return ArrayEnd;
1810      case ',':
1811        *token_end = start + 1;
1812        return ListSeparator;
1813      case '{':
1814        *token_end = start + 1;
1815        return ObjectBegin;
1816      case '}':
1817        *token_end = start + 1;
1818        return ObjectEnd;
1819      case ':':
1820        *token_end = start + 1;
1821        return ObjectPairSeparator;
1822      case '0':
1823      case '1':
1824      case '2':
1825      case '3':
1826      case '4':
1827      case '5':
1828      case '6':
1829      case '7':
1830      case '8':
1831      case '9':
1832      case '-':
1833        if (ParseNumberToken(start, end, token_end))
1834          return Number;
1835        break;
1836      case '"':
1837        if (ParseStringToken(start + 1, end, token_end))
1838          return StringLiteral;
1839        break;
1840    }
1841    return InvalidToken;
1842  }
1843
1844  static int HexToInt(Char c) {
1845    if ('0' <= c && c <= '9')
1846      return c - '0';
1847    if ('A' <= c && c <= 'F')
1848      return c - 'A' + 10;
1849    if ('a' <= c && c <= 'f')
1850      return c - 'a' + 10;
1851    assert(false);  // Unreachable.
1852    return 0;
1853  }
1854
1855  static bool DecodeString(const Char* start,
1856                           const Char* end,
1857                           std::vector<uint16_t>* output) {
1858    if (start == end)
1859      return true;
1860    if (start > end)
1861      return false;
1862    output->reserve(end - start);
1863    while (start < end) {
1864      uint16_t c = *start++;
1865      // If the |Char| we're dealing with is really a byte, then
1866      // we have utf8 here, and we need to check for multibyte characters
1867      // and transcode them to utf16 (either one or two utf16 chars).
1868      if (sizeof(Char) == sizeof(uint8_t) && c > 0x7f) {
1869        // Inspect the leading byte to figure out how long the utf8
1870        // byte sequence is; while doing this initialize |codepoint|
1871        // with the first few bits.
1872        // See table in: https://en.wikipedia.org/wiki/UTF-8
1873        // byte one is 110x xxxx -> 2 byte utf8 sequence
1874        // byte one is 1110 xxxx -> 3 byte utf8 sequence
1875        // byte one is 1111 0xxx -> 4 byte utf8 sequence
1876        uint32_t codepoint;
1877        int num_bytes_left;
1878        if ((c & 0xe0) == 0xc0) {  // 2 byte utf8 sequence
1879          num_bytes_left = 1;
1880          codepoint = c & 0x1f;
1881        } else if ((c & 0xf0) == 0xe0) {  // 3 byte utf8 sequence
1882          num_bytes_left = 2;
1883          codepoint = c & 0x0f;
1884        } else if ((c & 0xf8) == 0xf0) {  // 4 byte utf8 sequence
1885          codepoint = c & 0x07;
1886          num_bytes_left = 3;
1887        } else {
1888          return false;  // invalid leading byte
1889        }
1890
1891        // If we have enough bytes in our inpput, decode the remaining ones
1892        // belonging to this Unicode character into |codepoint|.
1893        if (start + num_bytes_left > end)
1894          return false;
1895        while (num_bytes_left > 0) {
1896          c = *start++;
1897          --num_bytes_left;
1898          // Check the next byte is a continuation byte, that is 10xx xxxx.
1899          if ((c & 0xc0) != 0x80)
1900            return false;
1901          codepoint = (codepoint << 6) | (c & 0x3f);
1902        }
1903
1904        // Disallow overlong encodings for ascii characters, as these
1905        // would include " and other characters significant to JSON
1906        // string termination / control.
1907        if (codepoint <= 0x7f)
1908          return false;
1909        // Invalid in UTF8, and can't be represented in UTF16 anyway.
1910        if (codepoint > 0x10ffff)
1911          return false;
1912
1913        // So, now we transcode to UTF16,
1914        // using the math described at https://en.wikipedia.org/wiki/UTF-16,
1915        // for either one or two 16 bit characters.
1916        if (codepoint < 0xffff) {
1917          output->push_back(codepoint);
1918          continue;
1919        }
1920        codepoint -= 0x10000;
1921        output->push_back((codepoint >> 10) + 0xd800);    // high surrogate
1922        output->push_back((codepoint & 0x3ff) + 0xdc00);  // low surrogate
1923        continue;
1924      }
1925      if ('\\' != c) {
1926        output->push_back(c);
1927        continue;
1928      }
1929      if (start == end)
1930        return false;
1931      c = *start++;
1932
1933      if (c == 'x') {
1934        // \x is not supported.
1935        return false;
1936      }
1937
1938      switch (c) {
1939        case '"':
1940        case '/':
1941        case '\\':
1942          break;
1943        case 'b':
1944          c = '\b';
1945          break;
1946        case 'f':
1947          c = '\f';
1948          break;
1949        case 'n':
1950          c = '\n';
1951          break;
1952        case 'r':
1953          c = '\r';
1954          break;
1955        case 't':
1956          c = '\t';
1957          break;
1958        case 'v':
1959          c = '\v';
1960          break;
1961        case 'u':
1962          c = (HexToInt(*start) << 12) + (HexToInt(*(start + 1)) << 8) +
1963              (HexToInt(*(start + 2)) << 4) + HexToInt(*(start + 3));
1964          start += 4;
1965          break;
1966        default:
1967          return false;
1968      }
1969      output->push_back(c);
1970    }
1971    return true;
1972  }
1973
1974  void ParseValue(const Char* start,
1975                  const Char* end,
1976                  const Char** value_token_end,
1977                  int depth) {
1978    if (depth > kStackLimit) {
1979      HandleError(Error::JSON_PARSER_STACK_LIMIT_EXCEEDED, start);
1980      return;
1981    }
1982    const Char* token_start = nullptr;
1983    const Char* token_end = nullptr;
1984    Token token = ParseToken(start, end, &token_start, &token_end);
1985    switch (token) {
1986      case NoInput:
1987        HandleError(Error::JSON_PARSER_NO_INPUT, token_start);
1988        return;
1989      case InvalidToken:
1990        HandleError(Error::JSON_PARSER_INVALID_TOKEN, token_start);
1991        return;
1992      case NullToken:
1993        handler_->HandleNull();
1994        break;
1995      case BoolTrue:
1996        handler_->HandleBool(true);
1997        break;
1998      case BoolFalse:
1999        handler_->HandleBool(false);
2000        break;
2001      case Number: {
2002        double value;
2003        if (!CharsToDouble(token_start, token_end - token_start, &value)) {
2004          HandleError(Error::JSON_PARSER_INVALID_NUMBER, token_start);
2005          return;
2006        }
2007        if (value >= std::numeric_limits<int32_t>::min() &&
2008            value <= std::numeric_limits<int32_t>::max() &&
2009            static_cast<int32_t>(value) == value)
2010          handler_->HandleInt32(static_cast<int32_t>(value));
2011        else
2012          handler_->HandleDouble(value);
2013        break;
2014      }
2015      case StringLiteral: {
2016        std::vector<uint16_t> value;
2017        bool ok = DecodeString(token_start + 1, token_end - 1, &value);
2018        if (!ok) {
2019          HandleError(Error::JSON_PARSER_INVALID_STRING, token_start);
2020          return;
2021        }
2022        handler_->HandleString16(span<uint16_t>(value.data(), value.size()));
2023        break;
2024      }
2025      case ArrayBegin: {
2026        handler_->HandleArrayBegin();
2027        start = token_end;
2028        token = ParseToken(start, end, &token_start, &token_end);
2029        while (token != ArrayEnd) {
2030          ParseValue(start, end, &token_end, depth + 1);
2031          if (error_)
2032            return;
2033
2034          // After a list value, we expect a comma or the end of the list.
2035          start = token_end;
2036          token = ParseToken(start, end, &token_start, &token_end);
2037          if (token == ListSeparator) {
2038            start = token_end;
2039            token = ParseToken(start, end, &token_start, &token_end);
2040            if (token == ArrayEnd) {
2041              HandleError(Error::JSON_PARSER_UNEXPECTED_ARRAY_END, token_start);
2042              return;
2043            }
2044          } else if (token != ArrayEnd) {
2045            // Unexpected value after list value. Bail out.
2046            HandleError(Error::JSON_PARSER_COMMA_OR_ARRAY_END_EXPECTED,
2047                        token_start);
2048            return;
2049          }
2050        }
2051        handler_->HandleArrayEnd();
2052        break;
2053      }
2054      case ObjectBegin: {
2055        handler_->HandleMapBegin();
2056        start = token_end;
2057        token = ParseToken(start, end, &token_start, &token_end);
2058        while (token != ObjectEnd) {
2059          if (token != StringLiteral) {
2060            HandleError(Error::JSON_PARSER_STRING_LITERAL_EXPECTED,
2061                        token_start);
2062            return;
2063          }
2064          std::vector<uint16_t> key;
2065          if (!DecodeString(token_start + 1, token_end - 1, &key)) {
2066            HandleError(Error::JSON_PARSER_INVALID_STRING, token_start);
2067            return;
2068          }
2069          handler_->HandleString16(span<uint16_t>(key.data(), key.size()));
2070          start = token_end;
2071
2072          token = ParseToken(start, end, &token_start, &token_end);
2073          if (token != ObjectPairSeparator) {
2074            HandleError(Error::JSON_PARSER_COLON_EXPECTED, token_start);
2075            return;
2076          }
2077          start = token_end;
2078
2079          ParseValue(start, end, &token_end, depth + 1);
2080          if (error_)
2081            return;
2082          start = token_end;
2083
2084          // After a key/value pair, we expect a comma or the end of the
2085          // object.
2086          token = ParseToken(start, end, &token_start, &token_end);
2087          if (token == ListSeparator) {
2088            start = token_end;
2089            token = ParseToken(start, end, &token_start, &token_end);
2090            if (token == ObjectEnd) {
2091              HandleError(Error::JSON_PARSER_UNEXPECTED_MAP_END, token_start);
2092              return;
2093            }
2094          } else if (token != ObjectEnd) {
2095            // Unexpected value after last object value. Bail out.
2096            HandleError(Error::JSON_PARSER_COMMA_OR_MAP_END_EXPECTED,
2097                        token_start);
2098            return;
2099          }
2100        }
2101        handler_->HandleMapEnd();
2102        break;
2103      }
2104
2105      default:
2106        // We got a token that's not a value.
2107        HandleError(Error::JSON_PARSER_VALUE_EXPECTED, token_start);
2108        return;
2109    }
2110
2111    SkipWhitespaceAndComments(token_end, end, value_token_end);
2112  }
2113
2114  void HandleError(Error error, const Char* pos) {
2115    assert(error != Error::OK);
2116    if (!error_) {
2117      handler_->HandleError(
2118          Status{error, static_cast<size_t>(pos - start_pos_)});
2119      error_ = true;
2120    }
2121  }
2122
2123  const Char* start_pos_ = nullptr;
2124  bool error_ = false;
2125  const Platform* platform_;
2126  StreamingParserHandler* handler_;
2127};
2128}  // namespace
2129
2130void ParseJSON(const Platform& platform,
2131               span<uint8_t> chars,
2132               StreamingParserHandler* handler) {
2133  JsonParser<uint8_t> parser(&platform, handler);
2134  parser.Parse(chars.data(), chars.size());
2135}
2136
2137void ParseJSON(const Platform& platform,
2138               span<uint16_t> chars,
2139               StreamingParserHandler* handler) {
2140  JsonParser<uint16_t> parser(&platform, handler);
2141  parser.Parse(chars.data(), chars.size());
2142}
2143
2144// =============================================================================
2145// json::ConvertCBORToJSON, json::ConvertJSONToCBOR - for transcoding
2146// =============================================================================
2147template <typename C>
2148Status ConvertCBORToJSONTmpl(const Platform& platform,
2149                             span<uint8_t> cbor,
2150                             C* json) {
2151  Status status;
2152  std::unique_ptr<StreamingParserHandler> json_writer =
2153      NewJSONEncoder(&platform, json, &status);
2154  cbor::ParseCBOR(cbor, json_writer.get());
2155  return status;
2156}
2157
2158Status ConvertCBORToJSON(const Platform& platform,
2159                         span<uint8_t> cbor,
2160                         std::vector<uint8_t>* json) {
2161  return ConvertCBORToJSONTmpl(platform, cbor, json);
2162}
2163Status ConvertCBORToJSON(const Platform& platform,
2164                         span<uint8_t> cbor,
2165                         std::string* json) {
2166  return ConvertCBORToJSONTmpl(platform, cbor, json);
2167}
2168
2169template <typename T, typename C>
2170Status ConvertJSONToCBORTmpl(const Platform& platform, span<T> json, C* cbor) {
2171  Status status;
2172  std::unique_ptr<StreamingParserHandler> encoder =
2173      cbor::NewCBOREncoder(cbor, &status);
2174  ParseJSON(platform, json, encoder.get());
2175  return status;
2176}
2177Status ConvertJSONToCBOR(const Platform& platform,
2178                         span<uint8_t> json,
2179                         std::string* cbor) {
2180  return ConvertJSONToCBORTmpl(platform, json, cbor);
2181}
2182Status ConvertJSONToCBOR(const Platform& platform,
2183                         span<uint16_t> json,
2184                         std::string* cbor) {
2185  return ConvertJSONToCBORTmpl(platform, json, cbor);
2186}
2187Status ConvertJSONToCBOR(const Platform& platform,
2188                         span<uint8_t> json,
2189                         std::vector<uint8_t>* cbor) {
2190  return ConvertJSONToCBORTmpl(platform, json, cbor);
2191}
2192Status ConvertJSONToCBOR(const Platform& platform,
2193                         span<uint16_t> json,
2194                         std::vector<uint8_t>* cbor) {
2195  return ConvertJSONToCBORTmpl(platform, json, cbor);
2196}
2197}  // namespace json
2198
2199{% for namespace in config.protocol.namespace %}
2200} // namespace {{namespace}}
2201{% endfor %}
2202
2203{% endif %}
2204