1{# This template is generated by gen_cbor_templates.py. #} 2// Generated by lib/encoding_cpp.template. 3 4// Copyright 2019 The Chromium Authors. All rights reserved. 5// Use of this source code is governed by a BSD-style license that can be 6// found in the LICENSE file. 7 8{% if config.encoding_lib.header == "" %} 9 10#include <algorithm> 11#include <cassert> 12#include <cmath> 13#include <cstring> 14#include <limits> 15#include <stack> 16 17{% for namespace in config.protocol.namespace %} 18namespace {{namespace}} { 19{% endfor %} 20 21// ===== encoding/encoding.cc ===== 22 23// ============================================================================= 24// Status and Error codes 25// ============================================================================= 26 27std::string Status::ToASCIIString() const { 28 switch (error) { 29 case Error::OK: 30 return "OK"; 31 case Error::JSON_PARSER_UNPROCESSED_INPUT_REMAINS: 32 return ToASCIIString("JSON: unprocessed input remains"); 33 case Error::JSON_PARSER_STACK_LIMIT_EXCEEDED: 34 return ToASCIIString("JSON: stack limit exceeded"); 35 case Error::JSON_PARSER_NO_INPUT: 36 return ToASCIIString("JSON: no input"); 37 case Error::JSON_PARSER_INVALID_TOKEN: 38 return ToASCIIString("JSON: invalid token"); 39 case Error::JSON_PARSER_INVALID_NUMBER: 40 return ToASCIIString("JSON: invalid number"); 41 case Error::JSON_PARSER_INVALID_STRING: 42 return ToASCIIString("JSON: invalid string"); 43 case Error::JSON_PARSER_UNEXPECTED_ARRAY_END: 44 return ToASCIIString("JSON: unexpected array end"); 45 case Error::JSON_PARSER_COMMA_OR_ARRAY_END_EXPECTED: 46 return ToASCIIString("JSON: comma or array end expected"); 47 case Error::JSON_PARSER_STRING_LITERAL_EXPECTED: 48 return ToASCIIString("JSON: string literal expected"); 49 case Error::JSON_PARSER_COLON_EXPECTED: 50 return ToASCIIString("JSON: colon expected"); 51 case Error::JSON_PARSER_UNEXPECTED_MAP_END: 52 return ToASCIIString("JSON: unexpected map end"); 53 case Error::JSON_PARSER_COMMA_OR_MAP_END_EXPECTED: 54 return ToASCIIString("JSON: comma or map end expected"); 55 case Error::JSON_PARSER_VALUE_EXPECTED: 56 return ToASCIIString("JSON: value expected"); 57 58 case Error::CBOR_INVALID_INT32: 59 return ToASCIIString("CBOR: invalid int32"); 60 case Error::CBOR_INVALID_DOUBLE: 61 return ToASCIIString("CBOR: invalid double"); 62 case Error::CBOR_INVALID_ENVELOPE: 63 return ToASCIIString("CBOR: invalid envelope"); 64 case Error::CBOR_INVALID_STRING8: 65 return ToASCIIString("CBOR: invalid string8"); 66 case Error::CBOR_INVALID_STRING16: 67 return ToASCIIString("CBOR: invalid string16"); 68 case Error::CBOR_INVALID_BINARY: 69 return ToASCIIString("CBOR: invalid binary"); 70 case Error::CBOR_UNSUPPORTED_VALUE: 71 return ToASCIIString("CBOR: unsupported value"); 72 case Error::CBOR_NO_INPUT: 73 return ToASCIIString("CBOR: no input"); 74 case Error::CBOR_INVALID_START_BYTE: 75 return ToASCIIString("CBOR: invalid start byte"); 76 case Error::CBOR_UNEXPECTED_EOF_EXPECTED_VALUE: 77 return ToASCIIString("CBOR: unexpected eof expected value"); 78 case Error::CBOR_UNEXPECTED_EOF_IN_ARRAY: 79 return ToASCIIString("CBOR: unexpected eof in array"); 80 case Error::CBOR_UNEXPECTED_EOF_IN_MAP: 81 return ToASCIIString("CBOR: unexpected eof in map"); 82 case Error::CBOR_INVALID_MAP_KEY: 83 return ToASCIIString("CBOR: invalid map key"); 84 case Error::CBOR_STACK_LIMIT_EXCEEDED: 85 return ToASCIIString("CBOR: stack limit exceeded"); 86 case Error::CBOR_TRAILING_JUNK: 87 return ToASCIIString("CBOR: trailing junk"); 88 case Error::CBOR_MAP_START_EXPECTED: 89 return ToASCIIString("CBOR: map start expected"); 90 case Error::CBOR_MAP_STOP_EXPECTED: 91 return ToASCIIString("CBOR: map stop expected"); 92 case Error::CBOR_ENVELOPE_SIZE_LIMIT_EXCEEDED: 93 return ToASCIIString("CBOR: envelope size limit exceeded"); 94 } 95 // Some compilers can't figure out that we can't get here. 96 return "INVALID ERROR CODE"; 97} 98 99std::string Status::ToASCIIString(const char* msg) const { 100 return std::string(msg) + " at position " + std::to_string(pos); 101} 102 103namespace cbor { 104namespace { 105// Indicates the number of bits the "initial byte" needs to be shifted to the 106// right after applying |kMajorTypeMask| to produce the major type in the 107// lowermost bits. 108static constexpr uint8_t kMajorTypeBitShift = 5u; 109// Mask selecting the low-order 5 bits of the "initial byte", which is where 110// the additional information is encoded. 111static constexpr uint8_t kAdditionalInformationMask = 0x1f; 112// Mask selecting the high-order 3 bits of the "initial byte", which indicates 113// the major type of the encoded value. 114static constexpr uint8_t kMajorTypeMask = 0xe0; 115// Indicates the integer is in the following byte. 116static constexpr uint8_t kAdditionalInformation1Byte = 24u; 117// Indicates the integer is in the next 2 bytes. 118static constexpr uint8_t kAdditionalInformation2Bytes = 25u; 119// Indicates the integer is in the next 4 bytes. 120static constexpr uint8_t kAdditionalInformation4Bytes = 26u; 121// Indicates the integer is in the next 8 bytes. 122static constexpr uint8_t kAdditionalInformation8Bytes = 27u; 123 124// Encodes the initial byte, consisting of the |type| in the first 3 bits 125// followed by 5 bits of |additional_info|. 126constexpr uint8_t EncodeInitialByte(MajorType type, uint8_t additional_info) { 127 return (static_cast<uint8_t>(type) << kMajorTypeBitShift) | 128 (additional_info & kAdditionalInformationMask); 129} 130 131// TAG 24 indicates that what follows is a byte string which is 132// encoded in CBOR format. We use this as a wrapper for 133// maps and arrays, allowing us to skip them, because the 134// byte string carries its size (byte length). 135// https://tools.ietf.org/html/rfc7049#section-2.4.4.1 136static constexpr uint8_t kInitialByteForEnvelope = 137 EncodeInitialByte(MajorType::TAG, 24); 138// The initial byte for a byte string with at most 2^32 bytes 139// of payload. This is used for envelope encoding, even if 140// the byte string is shorter. 141static constexpr uint8_t kInitialByteFor32BitLengthByteString = 142 EncodeInitialByte(MajorType::BYTE_STRING, 26); 143 144// See RFC 7049 Section 2.2.1, indefinite length arrays / maps have additional 145// info = 31. 146static constexpr uint8_t kInitialByteIndefiniteLengthArray = 147 EncodeInitialByte(MajorType::ARRAY, 31); 148static constexpr uint8_t kInitialByteIndefiniteLengthMap = 149 EncodeInitialByte(MajorType::MAP, 31); 150// See RFC 7049 Section 2.3, Table 1; this is used for finishing indefinite 151// length maps / arrays. 152static constexpr uint8_t kStopByte = 153 EncodeInitialByte(MajorType::SIMPLE_VALUE, 31); 154 155// See RFC 7049 Section 2.3, Table 2. 156static constexpr uint8_t kEncodedTrue = 157 EncodeInitialByte(MajorType::SIMPLE_VALUE, 21); 158static constexpr uint8_t kEncodedFalse = 159 EncodeInitialByte(MajorType::SIMPLE_VALUE, 20); 160static constexpr uint8_t kEncodedNull = 161 EncodeInitialByte(MajorType::SIMPLE_VALUE, 22); 162static constexpr uint8_t kInitialByteForDouble = 163 EncodeInitialByte(MajorType::SIMPLE_VALUE, 27); 164 165// See RFC 7049 Table 3 and Section 2.4.4.2. This is used as a prefix for 166// arbitrary binary data encoded as BYTE_STRING. 167static constexpr uint8_t kExpectedConversionToBase64Tag = 168 EncodeInitialByte(MajorType::TAG, 22); 169 170// Writes the bytes for |v| to |out|, starting with the most significant byte. 171// See also: https://commandcenter.blogspot.com/2012/04/byte-order-fallacy.html 172template <typename T, class C> 173void WriteBytesMostSignificantByteFirst(T v, C* out) { 174 for (int shift_bytes = sizeof(T) - 1; shift_bytes >= 0; --shift_bytes) 175 out->push_back(0xff & (v >> (shift_bytes * 8))); 176} 177 178// Extracts sizeof(T) bytes from |in| to extract a value of type T 179// (e.g. uint64_t, uint32_t, ...), most significant byte first. 180// See also: https://commandcenter.blogspot.com/2012/04/byte-order-fallacy.html 181template <typename T> 182T ReadBytesMostSignificantByteFirst(span<uint8_t> in) { 183 assert(in.size() >= sizeof(T)); 184 T result = 0; 185 for (size_t shift_bytes = 0; shift_bytes < sizeof(T); ++shift_bytes) 186 result |= T(in[sizeof(T) - 1 - shift_bytes]) << (shift_bytes * 8); 187 return result; 188} 189} // namespace 190 191namespace internals { 192// Reads the start of a token with definitive size from |bytes|. 193// |type| is the major type as specified in RFC 7049 Section 2.1. 194// |value| is the payload (e.g. for MajorType::UNSIGNED) or is the size 195// (e.g. for BYTE_STRING). 196// If successful, returns the number of bytes read. Otherwise returns 0. 197size_t ReadTokenStart(span<uint8_t> bytes, MajorType* type, uint64_t* value) { 198 if (bytes.empty()) 199 return 0; 200 uint8_t initial_byte = bytes[0]; 201 *type = MajorType((initial_byte & kMajorTypeMask) >> kMajorTypeBitShift); 202 203 uint8_t additional_information = initial_byte & kAdditionalInformationMask; 204 if (additional_information < 24) { 205 // Values 0-23 are encoded directly into the additional info of the 206 // initial byte. 207 *value = additional_information; 208 return 1; 209 } 210 if (additional_information == kAdditionalInformation1Byte) { 211 // Values 24-255 are encoded with one initial byte, followed by the value. 212 if (bytes.size() < 2) 213 return 0; 214 *value = ReadBytesMostSignificantByteFirst<uint8_t>(bytes.subspan(1)); 215 return 2; 216 } 217 if (additional_information == kAdditionalInformation2Bytes) { 218 // Values 256-65535: 1 initial byte + 2 bytes payload. 219 if (bytes.size() < 1 + sizeof(uint16_t)) 220 return 0; 221 *value = ReadBytesMostSignificantByteFirst<uint16_t>(bytes.subspan(1)); 222 return 3; 223 } 224 if (additional_information == kAdditionalInformation4Bytes) { 225 // 32 bit uint: 1 initial byte + 4 bytes payload. 226 if (bytes.size() < 1 + sizeof(uint32_t)) 227 return 0; 228 *value = ReadBytesMostSignificantByteFirst<uint32_t>(bytes.subspan(1)); 229 return 5; 230 } 231 if (additional_information == kAdditionalInformation8Bytes) { 232 // 64 bit uint: 1 initial byte + 8 bytes payload. 233 if (bytes.size() < 1 + sizeof(uint64_t)) 234 return 0; 235 *value = ReadBytesMostSignificantByteFirst<uint64_t>(bytes.subspan(1)); 236 return 9; 237 } 238 return 0; 239} 240 241// Writes the start of a token with |type|. The |value| may indicate the size, 242// or it may be the payload if the value is an unsigned integer. 243template <typename C> 244void WriteTokenStartTmpl(MajorType type, uint64_t value, C* encoded) { 245 if (value < 24) { 246 // Values 0-23 are encoded directly into the additional info of the 247 // initial byte. 248 encoded->push_back(EncodeInitialByte(type, /*additional_info=*/value)); 249 return; 250 } 251 if (value <= std::numeric_limits<uint8_t>::max()) { 252 // Values 24-255 are encoded with one initial byte, followed by the value. 253 encoded->push_back(EncodeInitialByte(type, kAdditionalInformation1Byte)); 254 encoded->push_back(value); 255 return; 256 } 257 if (value <= std::numeric_limits<uint16_t>::max()) { 258 // Values 256-65535: 1 initial byte + 2 bytes payload. 259 encoded->push_back(EncodeInitialByte(type, kAdditionalInformation2Bytes)); 260 WriteBytesMostSignificantByteFirst<uint16_t>(value, encoded); 261 return; 262 } 263 if (value <= std::numeric_limits<uint32_t>::max()) { 264 // 32 bit uint: 1 initial byte + 4 bytes payload. 265 encoded->push_back(EncodeInitialByte(type, kAdditionalInformation4Bytes)); 266 WriteBytesMostSignificantByteFirst<uint32_t>(static_cast<uint32_t>(value), 267 encoded); 268 return; 269 } 270 // 64 bit uint: 1 initial byte + 8 bytes payload. 271 encoded->push_back(EncodeInitialByte(type, kAdditionalInformation8Bytes)); 272 WriteBytesMostSignificantByteFirst<uint64_t>(value, encoded); 273} 274void WriteTokenStart(MajorType type, 275 uint64_t value, 276 std::vector<uint8_t>* encoded) { 277 WriteTokenStartTmpl(type, value, encoded); 278} 279void WriteTokenStart(MajorType type, uint64_t value, std::string* encoded) { 280 WriteTokenStartTmpl(type, value, encoded); 281} 282} // namespace internals 283 284// ============================================================================= 285// Detecting CBOR content 286// ============================================================================= 287 288uint8_t InitialByteForEnvelope() { 289 return kInitialByteForEnvelope; 290} 291uint8_t InitialByteFor32BitLengthByteString() { 292 return kInitialByteFor32BitLengthByteString; 293} 294bool IsCBORMessage(span<uint8_t> msg) { 295 return msg.size() >= 6 && msg[0] == InitialByteForEnvelope() && 296 msg[1] == InitialByteFor32BitLengthByteString(); 297} 298 299// ============================================================================= 300// Encoding invidiual CBOR items 301// ============================================================================= 302 303uint8_t EncodeTrue() { 304 return kEncodedTrue; 305} 306uint8_t EncodeFalse() { 307 return kEncodedFalse; 308} 309uint8_t EncodeNull() { 310 return kEncodedNull; 311} 312 313uint8_t EncodeIndefiniteLengthArrayStart() { 314 return kInitialByteIndefiniteLengthArray; 315} 316 317uint8_t EncodeIndefiniteLengthMapStart() { 318 return kInitialByteIndefiniteLengthMap; 319} 320 321uint8_t EncodeStop() { 322 return kStopByte; 323} 324 325template <typename C> 326void EncodeInt32Tmpl(int32_t value, C* out) { 327 if (value >= 0) { 328 internals::WriteTokenStart(MajorType::UNSIGNED, value, out); 329 } else { 330 uint64_t representation = static_cast<uint64_t>(-(value + 1)); 331 internals::WriteTokenStart(MajorType::NEGATIVE, representation, out); 332 } 333} 334void EncodeInt32(int32_t value, std::vector<uint8_t>* out) { 335 EncodeInt32Tmpl(value, out); 336} 337void EncodeInt32(int32_t value, std::string* out) { 338 EncodeInt32Tmpl(value, out); 339} 340 341template <typename C> 342void EncodeString16Tmpl(span<uint16_t> in, C* out) { 343 uint64_t byte_length = static_cast<uint64_t>(in.size_bytes()); 344 internals::WriteTokenStart(MajorType::BYTE_STRING, byte_length, out); 345 // When emitting UTF16 characters, we always write the least significant byte 346 // first; this is because it's the native representation for X86. 347 // TODO(johannes): Implement a more efficient thing here later, e.g. 348 // casting *iff* the machine has this byte order. 349 // The wire format for UTF16 chars will probably remain the same 350 // (least significant byte first) since this way we can have 351 // golden files, unittests, etc. that port easily and universally. 352 // See also: 353 // https://commandcenter.blogspot.com/2012/04/byte-order-fallacy.html 354 for (const uint16_t two_bytes : in) { 355 out->push_back(two_bytes); 356 out->push_back(two_bytes >> 8); 357 } 358} 359void EncodeString16(span<uint16_t> in, std::vector<uint8_t>* out) { 360 EncodeString16Tmpl(in, out); 361} 362void EncodeString16(span<uint16_t> in, std::string* out) { 363 EncodeString16Tmpl(in, out); 364} 365 366template <typename C> 367void EncodeString8Tmpl(span<uint8_t> in, C* out) { 368 internals::WriteTokenStart(MajorType::STRING, 369 static_cast<uint64_t>(in.size_bytes()), out); 370 out->insert(out->end(), in.begin(), in.end()); 371} 372void EncodeString8(span<uint8_t> in, std::vector<uint8_t>* out) { 373 EncodeString8Tmpl(in, out); 374} 375void EncodeString8(span<uint8_t> in, std::string* out) { 376 EncodeString8Tmpl(in, out); 377} 378 379template <typename C> 380void EncodeFromLatin1Tmpl(span<uint8_t> latin1, C* out) { 381 for (size_t ii = 0; ii < latin1.size(); ++ii) { 382 if (latin1[ii] <= 127) 383 continue; 384 // If there's at least one non-ASCII char, convert to UTF8. 385 std::vector<uint8_t> utf8(latin1.begin(), latin1.begin() + ii); 386 for (; ii < latin1.size(); ++ii) { 387 if (latin1[ii] <= 127) { 388 utf8.push_back(latin1[ii]); 389 } else { 390 // 0xC0 means it's a UTF8 sequence with 2 bytes. 391 utf8.push_back((latin1[ii] >> 6) | 0xc0); 392 utf8.push_back((latin1[ii] | 0x80) & 0xbf); 393 } 394 } 395 EncodeString8(SpanFrom(utf8), out); 396 return; 397 } 398 EncodeString8(latin1, out); 399} 400void EncodeFromLatin1(span<uint8_t> latin1, std::vector<uint8_t>* out) { 401 EncodeFromLatin1Tmpl(latin1, out); 402} 403void EncodeFromLatin1(span<uint8_t> latin1, std::string* out) { 404 EncodeFromLatin1Tmpl(latin1, out); 405} 406 407template <typename C> 408void EncodeFromUTF16Tmpl(span<uint16_t> utf16, C* out) { 409 // If there's at least one non-ASCII char, encode as STRING16 (UTF16). 410 for (uint16_t ch : utf16) { 411 if (ch <= 127) 412 continue; 413 EncodeString16(utf16, out); 414 return; 415 } 416 // It's all US-ASCII, strip out every second byte and encode as UTF8. 417 internals::WriteTokenStart(MajorType::STRING, 418 static_cast<uint64_t>(utf16.size()), out); 419 out->insert(out->end(), utf16.begin(), utf16.end()); 420} 421void EncodeFromUTF16(span<uint16_t> utf16, std::vector<uint8_t>* out) { 422 EncodeFromUTF16Tmpl(utf16, out); 423} 424void EncodeFromUTF16(span<uint16_t> utf16, std::string* out) { 425 EncodeFromUTF16Tmpl(utf16, out); 426} 427 428template <typename C> 429void EncodeBinaryTmpl(span<uint8_t> in, C* out) { 430 out->push_back(kExpectedConversionToBase64Tag); 431 uint64_t byte_length = static_cast<uint64_t>(in.size_bytes()); 432 internals::WriteTokenStart(MajorType::BYTE_STRING, byte_length, out); 433 out->insert(out->end(), in.begin(), in.end()); 434} 435void EncodeBinary(span<uint8_t> in, std::vector<uint8_t>* out) { 436 EncodeBinaryTmpl(in, out); 437} 438void EncodeBinary(span<uint8_t> in, std::string* out) { 439 EncodeBinaryTmpl(in, out); 440} 441 442// A double is encoded with a specific initial byte 443// (kInitialByteForDouble) plus the 64 bits of payload for its value. 444constexpr size_t kEncodedDoubleSize = 1 + sizeof(uint64_t); 445 446// An envelope is encoded with a specific initial byte 447// (kInitialByteForEnvelope), plus the start byte for a BYTE_STRING with a 32 448// bit wide length, plus a 32 bit length for that string. 449constexpr size_t kEncodedEnvelopeHeaderSize = 1 + 1 + sizeof(uint32_t); 450 451template <typename C> 452void EncodeDoubleTmpl(double value, C* out) { 453 // The additional_info=27 indicates 64 bits for the double follow. 454 // See RFC 7049 Section 2.3, Table 1. 455 out->push_back(kInitialByteForDouble); 456 union { 457 double from_double; 458 uint64_t to_uint64; 459 } reinterpret; 460 reinterpret.from_double = value; 461 WriteBytesMostSignificantByteFirst<uint64_t>(reinterpret.to_uint64, out); 462} 463void EncodeDouble(double value, std::vector<uint8_t>* out) { 464 EncodeDoubleTmpl(value, out); 465} 466void EncodeDouble(double value, std::string* out) { 467 EncodeDoubleTmpl(value, out); 468} 469 470// ============================================================================= 471// cbor::EnvelopeEncoder - for wrapping submessages 472// ============================================================================= 473 474template <typename C> 475void EncodeStartTmpl(C* out, size_t* byte_size_pos) { 476 assert(*byte_size_pos == 0); 477 out->push_back(kInitialByteForEnvelope); 478 out->push_back(kInitialByteFor32BitLengthByteString); 479 *byte_size_pos = out->size(); 480 out->resize(out->size() + sizeof(uint32_t)); 481} 482 483void EnvelopeEncoder::EncodeStart(std::vector<uint8_t>* out) { 484 EncodeStartTmpl<std::vector<uint8_t>>(out, &byte_size_pos_); 485} 486 487void EnvelopeEncoder::EncodeStart(std::string* out) { 488 EncodeStartTmpl<std::string>(out, &byte_size_pos_); 489} 490 491template <typename C> 492bool EncodeStopTmpl(C* out, size_t* byte_size_pos) { 493 assert(*byte_size_pos != 0); 494 // The byte size is the size of the payload, that is, all the 495 // bytes that were written past the byte size position itself. 496 uint64_t byte_size = out->size() - (*byte_size_pos + sizeof(uint32_t)); 497 // We store exactly 4 bytes, so at most INT32MAX, with most significant 498 // byte first. 499 if (byte_size > std::numeric_limits<uint32_t>::max()) 500 return false; 501 for (int shift_bytes = sizeof(uint32_t) - 1; shift_bytes >= 0; 502 --shift_bytes) { 503 (*out)[(*byte_size_pos)++] = 0xff & (byte_size >> (shift_bytes * 8)); 504 } 505 return true; 506} 507 508bool EnvelopeEncoder::EncodeStop(std::vector<uint8_t>* out) { 509 return EncodeStopTmpl(out, &byte_size_pos_); 510} 511 512bool EnvelopeEncoder::EncodeStop(std::string* out) { 513 return EncodeStopTmpl(out, &byte_size_pos_); 514} 515 516// ============================================================================= 517// cbor::NewCBOREncoder - for encoding from a streaming parser 518// ============================================================================= 519 520namespace { 521template <typename C> 522class CBOREncoder : public StreamingParserHandler { 523 public: 524 CBOREncoder(C* out, Status* status) : out_(out), status_(status) { 525 *status_ = Status(); 526 } 527 528 void HandleMapBegin() override { 529 if (!status_->ok()) 530 return; 531 envelopes_.emplace_back(); 532 envelopes_.back().EncodeStart(out_); 533 out_->push_back(kInitialByteIndefiniteLengthMap); 534 } 535 536 void HandleMapEnd() override { 537 if (!status_->ok()) 538 return; 539 out_->push_back(kStopByte); 540 assert(!envelopes_.empty()); 541 if (!envelopes_.back().EncodeStop(out_)) { 542 HandleError( 543 Status(Error::CBOR_ENVELOPE_SIZE_LIMIT_EXCEEDED, out_->size())); 544 return; 545 } 546 envelopes_.pop_back(); 547 } 548 549 void HandleArrayBegin() override { 550 if (!status_->ok()) 551 return; 552 envelopes_.emplace_back(); 553 envelopes_.back().EncodeStart(out_); 554 out_->push_back(kInitialByteIndefiniteLengthArray); 555 } 556 557 void HandleArrayEnd() override { 558 if (!status_->ok()) 559 return; 560 out_->push_back(kStopByte); 561 assert(!envelopes_.empty()); 562 if (!envelopes_.back().EncodeStop(out_)) { 563 HandleError( 564 Status(Error::CBOR_ENVELOPE_SIZE_LIMIT_EXCEEDED, out_->size())); 565 return; 566 } 567 envelopes_.pop_back(); 568 } 569 570 void HandleString8(span<uint8_t> chars) override { 571 if (!status_->ok()) 572 return; 573 EncodeString8(chars, out_); 574 } 575 576 void HandleString16(span<uint16_t> chars) override { 577 if (!status_->ok()) 578 return; 579 EncodeFromUTF16(chars, out_); 580 } 581 582 void HandleBinary(span<uint8_t> bytes) override { 583 if (!status_->ok()) 584 return; 585 EncodeBinary(bytes, out_); 586 } 587 588 void HandleDouble(double value) override { 589 if (!status_->ok()) 590 return; 591 EncodeDouble(value, out_); 592 } 593 594 void HandleInt32(int32_t value) override { 595 if (!status_->ok()) 596 return; 597 EncodeInt32(value, out_); 598 } 599 600 void HandleBool(bool value) override { 601 if (!status_->ok()) 602 return; 603 // See RFC 7049 Section 2.3, Table 2. 604 out_->push_back(value ? kEncodedTrue : kEncodedFalse); 605 } 606 607 void HandleNull() override { 608 if (!status_->ok()) 609 return; 610 // See RFC 7049 Section 2.3, Table 2. 611 out_->push_back(kEncodedNull); 612 } 613 614 void HandleError(Status error) override { 615 if (!status_->ok()) 616 return; 617 *status_ = error; 618 out_->clear(); 619 } 620 621 private: 622 C* out_; 623 std::vector<EnvelopeEncoder> envelopes_; 624 Status* status_; 625}; 626} // namespace 627 628std::unique_ptr<StreamingParserHandler> NewCBOREncoder( 629 std::vector<uint8_t>* out, 630 Status* status) { 631 return std::unique_ptr<StreamingParserHandler>( 632 new CBOREncoder<std::vector<uint8_t>>(out, status)); 633} 634std::unique_ptr<StreamingParserHandler> NewCBOREncoder(std::string* out, 635 Status* status) { 636 return std::unique_ptr<StreamingParserHandler>( 637 new CBOREncoder<std::string>(out, status)); 638} 639 640// ============================================================================= 641// cbor::CBORTokenizer - for parsing individual CBOR items 642// ============================================================================= 643 644CBORTokenizer::CBORTokenizer(span<uint8_t> bytes) : bytes_(bytes) { 645 ReadNextToken(/*enter_envelope=*/false); 646} 647CBORTokenizer::~CBORTokenizer() {} 648 649CBORTokenTag CBORTokenizer::TokenTag() const { 650 return token_tag_; 651} 652 653void CBORTokenizer::Next() { 654 if (token_tag_ == CBORTokenTag::ERROR_VALUE || 655 token_tag_ == CBORTokenTag::DONE) 656 return; 657 ReadNextToken(/*enter_envelope=*/false); 658} 659 660void CBORTokenizer::EnterEnvelope() { 661 assert(token_tag_ == CBORTokenTag::ENVELOPE); 662 ReadNextToken(/*enter_envelope=*/true); 663} 664 665Status CBORTokenizer::Status() const { 666 return status_; 667} 668 669// The following accessor functions ::GetInt32, ::GetDouble, 670// ::GetString8, ::GetString16WireRep, ::GetBinary, ::GetEnvelopeContents 671// assume that a particular token was recognized in ::ReadNextToken. 672// That's where all the error checking is done. By design, 673// the accessors (assuming the token was recognized) never produce 674// an error. 675 676int32_t CBORTokenizer::GetInt32() const { 677 assert(token_tag_ == CBORTokenTag::INT32); 678 // The range checks happen in ::ReadNextToken(). 679 return static_cast<int32_t>( 680 token_start_type_ == MajorType::UNSIGNED 681 ? token_start_internal_value_ 682 : -static_cast<int64_t>(token_start_internal_value_) - 1); 683} 684 685double CBORTokenizer::GetDouble() const { 686 assert(token_tag_ == CBORTokenTag::DOUBLE); 687 union { 688 uint64_t from_uint64; 689 double to_double; 690 } reinterpret; 691 reinterpret.from_uint64 = ReadBytesMostSignificantByteFirst<uint64_t>( 692 bytes_.subspan(status_.pos + 1)); 693 return reinterpret.to_double; 694} 695 696span<uint8_t> CBORTokenizer::GetString8() const { 697 assert(token_tag_ == CBORTokenTag::STRING8); 698 auto length = static_cast<size_t>(token_start_internal_value_); 699 return bytes_.subspan(status_.pos + (token_byte_length_ - length), length); 700} 701 702span<uint8_t> CBORTokenizer::GetString16WireRep() const { 703 assert(token_tag_ == CBORTokenTag::STRING16); 704 auto length = static_cast<size_t>(token_start_internal_value_); 705 return bytes_.subspan(status_.pos + (token_byte_length_ - length), length); 706} 707 708span<uint8_t> CBORTokenizer::GetBinary() const { 709 assert(token_tag_ == CBORTokenTag::BINARY); 710 auto length = static_cast<size_t>(token_start_internal_value_); 711 return bytes_.subspan(status_.pos + (token_byte_length_ - length), length); 712} 713 714span<uint8_t> CBORTokenizer::GetEnvelopeContents() const { 715 assert(token_tag_ == CBORTokenTag::ENVELOPE); 716 auto length = static_cast<size_t>(token_start_internal_value_); 717 return bytes_.subspan(status_.pos + kEncodedEnvelopeHeaderSize, length); 718} 719 720// All error checking happens in ::ReadNextToken, so that the accessors 721// can avoid having to carry an error return value. 722// 723// With respect to checking the encoded lengths of strings, arrays, etc: 724// On the wire, CBOR uses 1,2,4, and 8 byte unsigned integers, so 725// we initially read them as uint64_t, usually into token_start_internal_value_. 726// 727// However, since these containers have a representation on the machine, 728// we need to do corresponding size computations on the input byte array, 729// output span (e.g. the payload for a string), etc., and size_t is 730// machine specific (in practice either 32 bit or 64 bit). 731// 732// Further, we must avoid overflowing size_t. Therefore, we use this 733// kMaxValidLength constant to: 734// - Reject values that are larger than the architecture specific 735// max size_t (differs between 32 bit and 64 bit arch). 736// - Reserve at least one bit so that we can check against overflows 737// when adding lengths (array / string length / etc.); we do this by 738// ensuring that the inputs to an addition are <= kMaxValidLength, 739// and then checking whether the sum went past it. 740// 741// See also 742// https://chromium.googlesource.com/chromium/src/+/HEAD/docs/security/integer-semantics.md 743static const uint64_t kMaxValidLength = 744 std::min<uint64_t>(std::numeric_limits<uint64_t>::max() >> 2, 745 std::numeric_limits<size_t>::max()); 746 747void CBORTokenizer::ReadNextToken(bool enter_envelope) { 748 if (enter_envelope) { 749 status_.pos += kEncodedEnvelopeHeaderSize; 750 } else { 751 status_.pos = 752 status_.pos == Status::npos() ? 0 : status_.pos + token_byte_length_; 753 } 754 status_.error = Error::OK; 755 if (status_.pos >= bytes_.size()) { 756 token_tag_ = CBORTokenTag::DONE; 757 return; 758 } 759 const size_t remaining_bytes = bytes_.size() - status_.pos; 760 switch (bytes_[status_.pos]) { 761 case kStopByte: 762 SetToken(CBORTokenTag::STOP, 1); 763 return; 764 case kInitialByteIndefiniteLengthMap: 765 SetToken(CBORTokenTag::MAP_START, 1); 766 return; 767 case kInitialByteIndefiniteLengthArray: 768 SetToken(CBORTokenTag::ARRAY_START, 1); 769 return; 770 case kEncodedTrue: 771 SetToken(CBORTokenTag::TRUE_VALUE, 1); 772 return; 773 case kEncodedFalse: 774 SetToken(CBORTokenTag::FALSE_VALUE, 1); 775 return; 776 case kEncodedNull: 777 SetToken(CBORTokenTag::NULL_VALUE, 1); 778 return; 779 case kExpectedConversionToBase64Tag: { // BINARY 780 const size_t bytes_read = internals::ReadTokenStart( 781 bytes_.subspan(status_.pos + 1), &token_start_type_, 782 &token_start_internal_value_); 783 if (!bytes_read || token_start_type_ != MajorType::BYTE_STRING || 784 token_start_internal_value_ > kMaxValidLength) { 785 SetError(Error::CBOR_INVALID_BINARY); 786 return; 787 } 788 const uint64_t token_byte_length = token_start_internal_value_ + 789 /* tag before token start: */ 1 + 790 /* token start: */ bytes_read; 791 if (token_byte_length > remaining_bytes) { 792 SetError(Error::CBOR_INVALID_BINARY); 793 return; 794 } 795 SetToken(CBORTokenTag::BINARY, static_cast<size_t>(token_byte_length)); 796 return; 797 } 798 case kInitialByteForDouble: { // DOUBLE 799 if (kEncodedDoubleSize > remaining_bytes) { 800 SetError(Error::CBOR_INVALID_DOUBLE); 801 return; 802 } 803 SetToken(CBORTokenTag::DOUBLE, kEncodedDoubleSize); 804 return; 805 } 806 case kInitialByteForEnvelope: { // ENVELOPE 807 if (kEncodedEnvelopeHeaderSize > remaining_bytes) { 808 SetError(Error::CBOR_INVALID_ENVELOPE); 809 return; 810 } 811 // The envelope must be a byte string with 32 bit length. 812 if (bytes_[status_.pos + 1] != kInitialByteFor32BitLengthByteString) { 813 SetError(Error::CBOR_INVALID_ENVELOPE); 814 return; 815 } 816 // Read the length of the byte string. 817 token_start_internal_value_ = ReadBytesMostSignificantByteFirst<uint32_t>( 818 bytes_.subspan(status_.pos + 2)); 819 if (token_start_internal_value_ > kMaxValidLength) { 820 SetError(Error::CBOR_INVALID_ENVELOPE); 821 return; 822 } 823 uint64_t token_byte_length = 824 token_start_internal_value_ + kEncodedEnvelopeHeaderSize; 825 if (token_byte_length > remaining_bytes) { 826 SetError(Error::CBOR_INVALID_ENVELOPE); 827 return; 828 } 829 SetToken(CBORTokenTag::ENVELOPE, static_cast<size_t>(token_byte_length)); 830 return; 831 } 832 default: { 833 const size_t bytes_read = internals::ReadTokenStart( 834 bytes_.subspan(status_.pos), &token_start_type_, 835 &token_start_internal_value_); 836 switch (token_start_type_) { 837 case MajorType::UNSIGNED: // INT32. 838 // INT32 is a signed int32 (int32 makes sense for the 839 // inspector_protocol, it's not a CBOR limitation), so we check 840 // against the signed max, so that the allowable values are 841 // 0, 1, 2, ... 2^31 - 1. 842 if (!bytes_read || 843 static_cast<int64_t>(std::numeric_limits<int32_t>::max()) < 844 static_cast<int64_t>(token_start_internal_value_)) { 845 SetError(Error::CBOR_INVALID_INT32); 846 return; 847 } 848 SetToken(CBORTokenTag::INT32, bytes_read); 849 return; 850 case MajorType::NEGATIVE: { // INT32. 851 // INT32 is a signed int32 (int32 makes sense for the 852 // inspector_protocol, it's not a CBOR limitation); in CBOR, the 853 // negative values for INT32 are represented as NEGATIVE, that is, -1 854 // INT32 is represented as 1 << 5 | 0 (major type 1, additional info 855 // value 0). 856 // The represented allowed values range is -1 to -2^31. 857 // They are mapped into the encoded range of 0 to 2^31-1. 858 // We check the the payload in token_start_internal_value_ against 859 // that range (2^31-1 is also known as 860 // std::numeric_limits<int32_t>::max()). 861 if (!bytes_read || 862 static_cast<int64_t>(token_start_internal_value_) > 863 static_cast<int64_t>(std::numeric_limits<int32_t>::max())) { 864 SetError(Error::CBOR_INVALID_INT32); 865 return; 866 } 867 SetToken(CBORTokenTag::INT32, bytes_read); 868 return; 869 } 870 case MajorType::STRING: { // STRING8. 871 if (!bytes_read || token_start_internal_value_ > kMaxValidLength) { 872 SetError(Error::CBOR_INVALID_STRING8); 873 return; 874 } 875 uint64_t token_byte_length = token_start_internal_value_ + bytes_read; 876 if (token_byte_length > remaining_bytes) { 877 SetError(Error::CBOR_INVALID_STRING8); 878 return; 879 } 880 SetToken(CBORTokenTag::STRING8, 881 static_cast<size_t>(token_byte_length)); 882 return; 883 } 884 case MajorType::BYTE_STRING: { // STRING16. 885 // Length must be divisible by 2 since UTF16 is 2 bytes per 886 // character, hence the &1 check. 887 if (!bytes_read || token_start_internal_value_ > kMaxValidLength || 888 token_start_internal_value_ & 1) { 889 SetError(Error::CBOR_INVALID_STRING16); 890 return; 891 } 892 uint64_t token_byte_length = token_start_internal_value_ + bytes_read; 893 if (token_byte_length > remaining_bytes) { 894 SetError(Error::CBOR_INVALID_STRING16); 895 return; 896 } 897 SetToken(CBORTokenTag::STRING16, 898 static_cast<size_t>(token_byte_length)); 899 return; 900 } 901 case MajorType::ARRAY: 902 case MajorType::MAP: 903 case MajorType::TAG: 904 case MajorType::SIMPLE_VALUE: 905 SetError(Error::CBOR_UNSUPPORTED_VALUE); 906 return; 907 } 908 } 909 } 910} 911 912void CBORTokenizer::SetToken(CBORTokenTag token_tag, size_t token_byte_length) { 913 token_tag_ = token_tag; 914 token_byte_length_ = token_byte_length; 915} 916 917void CBORTokenizer::SetError(Error error) { 918 token_tag_ = CBORTokenTag::ERROR_VALUE; 919 status_.error = error; 920} 921 922// ============================================================================= 923// cbor::ParseCBOR - for receiving streaming parser events for CBOR messages 924// ============================================================================= 925 926namespace { 927// When parsing CBOR, we limit recursion depth for objects and arrays 928// to this constant. 929static constexpr int kStackLimit = 300; 930 931// Below are three parsing routines for CBOR, which cover enough 932// to roundtrip JSON messages. 933bool ParseMap(int32_t stack_depth, 934 CBORTokenizer* tokenizer, 935 StreamingParserHandler* out); 936bool ParseArray(int32_t stack_depth, 937 CBORTokenizer* tokenizer, 938 StreamingParserHandler* out); 939bool ParseValue(int32_t stack_depth, 940 CBORTokenizer* tokenizer, 941 StreamingParserHandler* out); 942 943void ParseUTF16String(CBORTokenizer* tokenizer, StreamingParserHandler* out) { 944 std::vector<uint16_t> value; 945 span<uint8_t> rep = tokenizer->GetString16WireRep(); 946 for (size_t ii = 0; ii < rep.size(); ii += 2) 947 value.push_back((rep[ii + 1] << 8) | rep[ii]); 948 out->HandleString16(span<uint16_t>(value.data(), value.size())); 949 tokenizer->Next(); 950} 951 952bool ParseUTF8String(CBORTokenizer* tokenizer, StreamingParserHandler* out) { 953 assert(tokenizer->TokenTag() == CBORTokenTag::STRING8); 954 out->HandleString8(tokenizer->GetString8()); 955 tokenizer->Next(); 956 return true; 957} 958 959bool ParseValue(int32_t stack_depth, 960 CBORTokenizer* tokenizer, 961 StreamingParserHandler* out) { 962 if (stack_depth > kStackLimit) { 963 out->HandleError( 964 Status{Error::CBOR_STACK_LIMIT_EXCEEDED, tokenizer->Status().pos}); 965 return false; 966 } 967 // Skip past the envelope to get to what's inside. 968 if (tokenizer->TokenTag() == CBORTokenTag::ENVELOPE) 969 tokenizer->EnterEnvelope(); 970 switch (tokenizer->TokenTag()) { 971 case CBORTokenTag::ERROR_VALUE: 972 out->HandleError(tokenizer->Status()); 973 return false; 974 case CBORTokenTag::DONE: 975 out->HandleError(Status{Error::CBOR_UNEXPECTED_EOF_EXPECTED_VALUE, 976 tokenizer->Status().pos}); 977 return false; 978 case CBORTokenTag::TRUE_VALUE: 979 out->HandleBool(true); 980 tokenizer->Next(); 981 return true; 982 case CBORTokenTag::FALSE_VALUE: 983 out->HandleBool(false); 984 tokenizer->Next(); 985 return true; 986 case CBORTokenTag::NULL_VALUE: 987 out->HandleNull(); 988 tokenizer->Next(); 989 return true; 990 case CBORTokenTag::INT32: 991 out->HandleInt32(tokenizer->GetInt32()); 992 tokenizer->Next(); 993 return true; 994 case CBORTokenTag::DOUBLE: 995 out->HandleDouble(tokenizer->GetDouble()); 996 tokenizer->Next(); 997 return true; 998 case CBORTokenTag::STRING8: 999 return ParseUTF8String(tokenizer, out); 1000 case CBORTokenTag::STRING16: 1001 ParseUTF16String(tokenizer, out); 1002 return true; 1003 case CBORTokenTag::BINARY: { 1004 out->HandleBinary(tokenizer->GetBinary()); 1005 tokenizer->Next(); 1006 return true; 1007 } 1008 case CBORTokenTag::MAP_START: 1009 return ParseMap(stack_depth + 1, tokenizer, out); 1010 case CBORTokenTag::ARRAY_START: 1011 return ParseArray(stack_depth + 1, tokenizer, out); 1012 default: 1013 out->HandleError( 1014 Status{Error::CBOR_UNSUPPORTED_VALUE, tokenizer->Status().pos}); 1015 return false; 1016 } 1017} 1018 1019// |bytes| must start with the indefinite length array byte, so basically, 1020// ParseArray may only be called after an indefinite length array has been 1021// detected. 1022bool ParseArray(int32_t stack_depth, 1023 CBORTokenizer* tokenizer, 1024 StreamingParserHandler* out) { 1025 assert(tokenizer->TokenTag() == CBORTokenTag::ARRAY_START); 1026 tokenizer->Next(); 1027 out->HandleArrayBegin(); 1028 while (tokenizer->TokenTag() != CBORTokenTag::STOP) { 1029 if (tokenizer->TokenTag() == CBORTokenTag::DONE) { 1030 out->HandleError( 1031 Status{Error::CBOR_UNEXPECTED_EOF_IN_ARRAY, tokenizer->Status().pos}); 1032 return false; 1033 } 1034 if (tokenizer->TokenTag() == CBORTokenTag::ERROR_VALUE) { 1035 out->HandleError(tokenizer->Status()); 1036 return false; 1037 } 1038 // Parse value. 1039 if (!ParseValue(stack_depth, tokenizer, out)) 1040 return false; 1041 } 1042 out->HandleArrayEnd(); 1043 tokenizer->Next(); 1044 return true; 1045} 1046 1047// |bytes| must start with the indefinite length array byte, so basically, 1048// ParseArray may only be called after an indefinite length array has been 1049// detected. 1050bool ParseMap(int32_t stack_depth, 1051 CBORTokenizer* tokenizer, 1052 StreamingParserHandler* out) { 1053 assert(tokenizer->TokenTag() == CBORTokenTag::MAP_START); 1054 out->HandleMapBegin(); 1055 tokenizer->Next(); 1056 while (tokenizer->TokenTag() != CBORTokenTag::STOP) { 1057 if (tokenizer->TokenTag() == CBORTokenTag::DONE) { 1058 out->HandleError( 1059 Status{Error::CBOR_UNEXPECTED_EOF_IN_MAP, tokenizer->Status().pos}); 1060 return false; 1061 } 1062 if (tokenizer->TokenTag() == CBORTokenTag::ERROR_VALUE) { 1063 out->HandleError(tokenizer->Status()); 1064 return false; 1065 } 1066 // Parse key. 1067 if (tokenizer->TokenTag() == CBORTokenTag::STRING8) { 1068 if (!ParseUTF8String(tokenizer, out)) 1069 return false; 1070 } else if (tokenizer->TokenTag() == CBORTokenTag::STRING16) { 1071 ParseUTF16String(tokenizer, out); 1072 } else { 1073 out->HandleError( 1074 Status{Error::CBOR_INVALID_MAP_KEY, tokenizer->Status().pos}); 1075 return false; 1076 } 1077 // Parse value. 1078 if (!ParseValue(stack_depth, tokenizer, out)) 1079 return false; 1080 } 1081 out->HandleMapEnd(); 1082 tokenizer->Next(); 1083 return true; 1084} 1085} // namespace 1086 1087void ParseCBOR(span<uint8_t> bytes, StreamingParserHandler* out) { 1088 if (bytes.empty()) { 1089 out->HandleError(Status{Error::CBOR_NO_INPUT, 0}); 1090 return; 1091 } 1092 if (bytes[0] != kInitialByteForEnvelope) { 1093 out->HandleError(Status{Error::CBOR_INVALID_START_BYTE, 0}); 1094 return; 1095 } 1096 CBORTokenizer tokenizer(bytes); 1097 if (tokenizer.TokenTag() == CBORTokenTag::ERROR_VALUE) { 1098 out->HandleError(tokenizer.Status()); 1099 return; 1100 } 1101 // We checked for the envelope start byte above, so the tokenizer 1102 // must agree here, since it's not an error. 1103 assert(tokenizer.TokenTag() == CBORTokenTag::ENVELOPE); 1104 tokenizer.EnterEnvelope(); 1105 if (tokenizer.TokenTag() != CBORTokenTag::MAP_START) { 1106 out->HandleError( 1107 Status{Error::CBOR_MAP_START_EXPECTED, tokenizer.Status().pos}); 1108 return; 1109 } 1110 if (!ParseMap(/*stack_depth=*/1, &tokenizer, out)) 1111 return; 1112 if (tokenizer.TokenTag() == CBORTokenTag::DONE) 1113 return; 1114 if (tokenizer.TokenTag() == CBORTokenTag::ERROR_VALUE) { 1115 out->HandleError(tokenizer.Status()); 1116 return; 1117 } 1118 out->HandleError(Status{Error::CBOR_TRAILING_JUNK, tokenizer.Status().pos}); 1119} 1120 1121// ============================================================================= 1122// cbor::AppendString8EntryToMap - for limited in-place editing of messages 1123// ============================================================================= 1124 1125template <typename C> 1126Status AppendString8EntryToCBORMapTmpl(span<uint8_t> string8_key, 1127 span<uint8_t> string8_value, 1128 C* cbor) { 1129 // Careful below: Don't compare (*cbor)[idx] with a uint8_t, since 1130 // it could be a char (signed!). Instead, use bytes. 1131 span<uint8_t> bytes(reinterpret_cast<const uint8_t*>(cbor->data()), 1132 cbor->size()); 1133 CBORTokenizer tokenizer(bytes); 1134 if (tokenizer.TokenTag() == CBORTokenTag::ERROR_VALUE) 1135 return tokenizer.Status(); 1136 if (tokenizer.TokenTag() != CBORTokenTag::ENVELOPE) 1137 return Status(Error::CBOR_INVALID_ENVELOPE, 0); 1138 size_t envelope_size = tokenizer.GetEnvelopeContents().size(); 1139 size_t old_size = cbor->size(); 1140 if (old_size != envelope_size + kEncodedEnvelopeHeaderSize) 1141 return Status(Error::CBOR_INVALID_ENVELOPE, 0); 1142 if (envelope_size == 0 || 1143 (tokenizer.GetEnvelopeContents()[0] != EncodeIndefiniteLengthMapStart())) 1144 return Status(Error::CBOR_MAP_START_EXPECTED, kEncodedEnvelopeHeaderSize); 1145 if (bytes[bytes.size() - 1] != EncodeStop()) 1146 return Status(Error::CBOR_MAP_STOP_EXPECTED, cbor->size() - 1); 1147 cbor->pop_back(); 1148 EncodeString8(string8_key, cbor); 1149 EncodeString8(string8_value, cbor); 1150 cbor->push_back(EncodeStop()); 1151 size_t new_envelope_size = envelope_size + (cbor->size() - old_size); 1152 if (new_envelope_size > std::numeric_limits<uint32_t>::max()) 1153 return Status(Error::CBOR_ENVELOPE_SIZE_LIMIT_EXCEEDED, 0); 1154 size_t size_pos = cbor->size() - new_envelope_size - sizeof(uint32_t); 1155 uint8_t* out = reinterpret_cast<uint8_t*>(&cbor->at(size_pos)); 1156 *(out++) = (new_envelope_size >> 24) & 0xff; 1157 *(out++) = (new_envelope_size >> 16) & 0xff; 1158 *(out++) = (new_envelope_size >> 8) & 0xff; 1159 *(out) = new_envelope_size & 0xff; 1160 return Status(); 1161} 1162Status AppendString8EntryToCBORMap(span<uint8_t> string8_key, 1163 span<uint8_t> string8_value, 1164 std::vector<uint8_t>* cbor) { 1165 return AppendString8EntryToCBORMapTmpl(string8_key, string8_value, cbor); 1166} 1167Status AppendString8EntryToCBORMap(span<uint8_t> string8_key, 1168 span<uint8_t> string8_value, 1169 std::string* cbor) { 1170 return AppendString8EntryToCBORMapTmpl(string8_key, string8_value, cbor); 1171} 1172} // namespace cbor 1173 1174namespace json { 1175 1176// ============================================================================= 1177// json::NewJSONEncoder - for encoding streaming parser events as JSON 1178// ============================================================================= 1179 1180namespace { 1181// Prints |value| to |out| with 4 hex digits, most significant chunk first. 1182template <typename C> 1183void PrintHex(uint16_t value, C* out) { 1184 for (int ii = 3; ii >= 0; --ii) { 1185 int four_bits = 0xf & (value >> (4 * ii)); 1186 out->push_back(four_bits + ((four_bits <= 9) ? '0' : ('a' - 10))); 1187 } 1188} 1189 1190// In the writer below, we maintain a stack of State instances. 1191// It is just enough to emit the appropriate delimiters and brackets 1192// in JSON. 1193enum class Container { 1194 // Used for the top-level, initial state. 1195 NONE, 1196 // Inside a JSON object. 1197 MAP, 1198 // Inside a JSON array. 1199 ARRAY 1200}; 1201class State { 1202 public: 1203 explicit State(Container container) : container_(container) {} 1204 void StartElement(std::vector<uint8_t>* out) { StartElementTmpl(out); } 1205 void StartElement(std::string* out) { StartElementTmpl(out); } 1206 Container container() const { return container_; } 1207 1208 private: 1209 template <typename C> 1210 void StartElementTmpl(C* out) { 1211 assert(container_ != Container::NONE || size_ == 0); 1212 if (size_ != 0) { 1213 char delim = (!(size_ & 1) || container_ == Container::ARRAY) ? ',' : ':'; 1214 out->push_back(delim); 1215 } 1216 ++size_; 1217 } 1218 1219 Container container_ = Container::NONE; 1220 int size_ = 0; 1221}; 1222 1223constexpr char kBase64Table[] = 1224 "ABCDEFGHIJKLMNOPQRSTUVWXYZ" 1225 "abcdefghijklmnopqrstuvwxyz0123456789+/"; 1226 1227template <typename C> 1228void Base64Encode(const span<uint8_t>& in, C* out) { 1229 // The following three cases are based on the tables in the example 1230 // section in https://en.wikipedia.org/wiki/Base64. We process three 1231 // input bytes at a time, emitting 4 output bytes at a time. 1232 size_t ii = 0; 1233 1234 // While possible, process three input bytes. 1235 for (; ii + 3 <= in.size(); ii += 3) { 1236 uint32_t twentyfour_bits = (in[ii] << 16) | (in[ii + 1] << 8) | in[ii + 2]; 1237 out->push_back(kBase64Table[(twentyfour_bits >> 18)]); 1238 out->push_back(kBase64Table[(twentyfour_bits >> 12) & 0x3f]); 1239 out->push_back(kBase64Table[(twentyfour_bits >> 6) & 0x3f]); 1240 out->push_back(kBase64Table[twentyfour_bits & 0x3f]); 1241 } 1242 if (ii + 2 <= in.size()) { // Process two input bytes. 1243 uint32_t twentyfour_bits = (in[ii] << 16) | (in[ii + 1] << 8); 1244 out->push_back(kBase64Table[(twentyfour_bits >> 18)]); 1245 out->push_back(kBase64Table[(twentyfour_bits >> 12) & 0x3f]); 1246 out->push_back(kBase64Table[(twentyfour_bits >> 6) & 0x3f]); 1247 out->push_back('='); // Emit padding. 1248 return; 1249 } 1250 if (ii + 1 <= in.size()) { // Process a single input byte. 1251 uint32_t twentyfour_bits = (in[ii] << 16); 1252 out->push_back(kBase64Table[(twentyfour_bits >> 18)]); 1253 out->push_back(kBase64Table[(twentyfour_bits >> 12) & 0x3f]); 1254 out->push_back('='); // Emit padding. 1255 out->push_back('='); // Emit padding. 1256 } 1257} 1258 1259// Implements a handler for JSON parser events to emit a JSON string. 1260template <typename C> 1261class JSONEncoder : public StreamingParserHandler { 1262 public: 1263 JSONEncoder(const Platform* platform, C* out, Status* status) 1264 : platform_(platform), out_(out), status_(status) { 1265 *status_ = Status(); 1266 state_.emplace(Container::NONE); 1267 } 1268 1269 void HandleMapBegin() override { 1270 if (!status_->ok()) 1271 return; 1272 assert(!state_.empty()); 1273 state_.top().StartElement(out_); 1274 state_.emplace(Container::MAP); 1275 Emit('{'); 1276 } 1277 1278 void HandleMapEnd() override { 1279 if (!status_->ok()) 1280 return; 1281 assert(state_.size() >= 2 && state_.top().container() == Container::MAP); 1282 state_.pop(); 1283 Emit('}'); 1284 } 1285 1286 void HandleArrayBegin() override { 1287 if (!status_->ok()) 1288 return; 1289 state_.top().StartElement(out_); 1290 state_.emplace(Container::ARRAY); 1291 Emit('['); 1292 } 1293 1294 void HandleArrayEnd() override { 1295 if (!status_->ok()) 1296 return; 1297 assert(state_.size() >= 2 && state_.top().container() == Container::ARRAY); 1298 state_.pop(); 1299 Emit(']'); 1300 } 1301 1302 void HandleString16(span<uint16_t> chars) override { 1303 if (!status_->ok()) 1304 return; 1305 state_.top().StartElement(out_); 1306 Emit('"'); 1307 for (const uint16_t ch : chars) { 1308 if (ch == '"') { 1309 Emit("\\\""); 1310 } else if (ch == '\\') { 1311 Emit("\\\\"); 1312 } else if (ch == '\b') { 1313 Emit("\\b"); 1314 } else if (ch == '\f') { 1315 Emit("\\f"); 1316 } else if (ch == '\n') { 1317 Emit("\\n"); 1318 } else if (ch == '\r') { 1319 Emit("\\r"); 1320 } else if (ch == '\t') { 1321 Emit("\\t"); 1322 } else if (ch >= 32 && ch <= 126) { 1323 Emit(ch); 1324 } else { 1325 Emit("\\u"); 1326 PrintHex(ch, out_); 1327 } 1328 } 1329 Emit('"'); 1330 } 1331 1332 void HandleString8(span<uint8_t> chars) override { 1333 if (!status_->ok()) 1334 return; 1335 state_.top().StartElement(out_); 1336 Emit('"'); 1337 for (size_t ii = 0; ii < chars.size(); ++ii) { 1338 uint8_t c = chars[ii]; 1339 if (c == '"') { 1340 Emit("\\\""); 1341 } else if (c == '\\') { 1342 Emit("\\\\"); 1343 } else if (c == '\b') { 1344 Emit("\\b"); 1345 } else if (c == '\f') { 1346 Emit("\\f"); 1347 } else if (c == '\n') { 1348 Emit("\\n"); 1349 } else if (c == '\r') { 1350 Emit("\\r"); 1351 } else if (c == '\t') { 1352 Emit("\\t"); 1353 } else if (c >= 32 && c <= 126) { 1354 Emit(c); 1355 } else if (c < 32) { 1356 Emit("\\u"); 1357 PrintHex(static_cast<uint16_t>(c), out_); 1358 } else { 1359 // Inspect the leading byte to figure out how long the utf8 1360 // byte sequence is; while doing this initialize |codepoint| 1361 // with the first few bits. 1362 // See table in: https://en.wikipedia.org/wiki/UTF-8 1363 // byte one is 110x xxxx -> 2 byte utf8 sequence 1364 // byte one is 1110 xxxx -> 3 byte utf8 sequence 1365 // byte one is 1111 0xxx -> 4 byte utf8 sequence 1366 uint32_t codepoint; 1367 int num_bytes_left; 1368 if ((c & 0xe0) == 0xc0) { // 2 byte utf8 sequence 1369 num_bytes_left = 1; 1370 codepoint = c & 0x1f; 1371 } else if ((c & 0xf0) == 0xe0) { // 3 byte utf8 sequence 1372 num_bytes_left = 2; 1373 codepoint = c & 0x0f; 1374 } else if ((c & 0xf8) == 0xf0) { // 4 byte utf8 sequence 1375 codepoint = c & 0x07; 1376 num_bytes_left = 3; 1377 } else { 1378 continue; // invalid leading byte 1379 } 1380 1381 // If we have enough bytes in our input, decode the remaining ones 1382 // belonging to this Unicode character into |codepoint|. 1383 if (ii + num_bytes_left > chars.size()) 1384 continue; 1385 while (num_bytes_left > 0) { 1386 c = chars[++ii]; 1387 --num_bytes_left; 1388 // Check the next byte is a continuation byte, that is 10xx xxxx. 1389 if ((c & 0xc0) != 0x80) 1390 continue; 1391 codepoint = (codepoint << 6) | (c & 0x3f); 1392 } 1393 1394 // Disallow overlong encodings for ascii characters, as these 1395 // would include " and other characters significant to JSON 1396 // string termination / control. 1397 if (codepoint < 0x7f) 1398 continue; 1399 // Invalid in UTF8, and can't be represented in UTF16 anyway. 1400 if (codepoint > 0x10ffff) 1401 continue; 1402 1403 // So, now we transcode to UTF16, 1404 // using the math described at https://en.wikipedia.org/wiki/UTF-16, 1405 // for either one or two 16 bit characters. 1406 if (codepoint < 0xffff) { 1407 Emit("\\u"); 1408 PrintHex(static_cast<uint16_t>(codepoint), out_); 1409 continue; 1410 } 1411 codepoint -= 0x10000; 1412 // high surrogate 1413 Emit("\\u"); 1414 PrintHex(static_cast<uint16_t>((codepoint >> 10) + 0xd800), out_); 1415 // low surrogate 1416 Emit("\\u"); 1417 PrintHex(static_cast<uint16_t>((codepoint & 0x3ff) + 0xdc00), out_); 1418 } 1419 } 1420 Emit('"'); 1421 } 1422 1423 void HandleBinary(span<uint8_t> bytes) override { 1424 if (!status_->ok()) 1425 return; 1426 state_.top().StartElement(out_); 1427 Emit('"'); 1428 Base64Encode(bytes, out_); 1429 Emit('"'); 1430 } 1431 1432 void HandleDouble(double value) override { 1433 if (!status_->ok()) 1434 return; 1435 state_.top().StartElement(out_); 1436 // JSON cannot represent NaN or Infinity. So, for compatibility, 1437 // we behave like the JSON object in web browsers: emit 'null'. 1438 if (!std::isfinite(value)) { 1439 Emit("null"); 1440 return; 1441 } 1442 std::unique_ptr<char[]> str_value = platform_->DToStr(value); 1443 1444 // DToStr may fail to emit a 0 before the decimal dot. E.g. this is 1445 // the case in base::NumberToString in Chromium (which is based on 1446 // dmg_fp). So, much like 1447 // https://cs.chromium.org/chromium/src/base/json/json_writer.cc 1448 // we probe for this and emit the leading 0 anyway if necessary. 1449 const char* chars = str_value.get(); 1450 if (chars[0] == '.') { 1451 Emit('0'); 1452 } else if (chars[0] == '-' && chars[1] == '.') { 1453 Emit("-0"); 1454 ++chars; 1455 } 1456 Emit(chars); 1457 } 1458 1459 void HandleInt32(int32_t value) override { 1460 if (!status_->ok()) 1461 return; 1462 state_.top().StartElement(out_); 1463 Emit(std::to_string(value)); 1464 } 1465 1466 void HandleBool(bool value) override { 1467 if (!status_->ok()) 1468 return; 1469 state_.top().StartElement(out_); 1470 Emit(value ? "true" : "false"); 1471 } 1472 1473 void HandleNull() override { 1474 if (!status_->ok()) 1475 return; 1476 state_.top().StartElement(out_); 1477 Emit("null"); 1478 } 1479 1480 void HandleError(Status error) override { 1481 assert(!error.ok()); 1482 *status_ = error; 1483 out_->clear(); 1484 } 1485 1486 private: 1487 void Emit(char c) { out_->push_back(c); } 1488 void Emit(const char* str) { 1489 out_->insert(out_->end(), str, str + strlen(str)); 1490 } 1491 void Emit(const std::string& str) { 1492 out_->insert(out_->end(), str.begin(), str.end()); 1493 } 1494 1495 const Platform* platform_; 1496 C* out_; 1497 Status* status_; 1498 std::stack<State> state_; 1499}; 1500} // namespace 1501 1502std::unique_ptr<StreamingParserHandler> NewJSONEncoder( 1503 const Platform* platform, 1504 std::vector<uint8_t>* out, 1505 Status* status) { 1506 return std::unique_ptr<StreamingParserHandler>( 1507 new JSONEncoder<std::vector<uint8_t>>(platform, out, status)); 1508} 1509std::unique_ptr<StreamingParserHandler> NewJSONEncoder(const Platform* platform, 1510 std::string* out, 1511 Status* status) { 1512 return std::unique_ptr<StreamingParserHandler>( 1513 new JSONEncoder<std::string>(platform, out, status)); 1514} 1515 1516// ============================================================================= 1517// json::ParseJSON - for receiving streaming parser events for JSON. 1518// ============================================================================= 1519 1520namespace { 1521const int kStackLimit = 300; 1522 1523enum Token { 1524 ObjectBegin, 1525 ObjectEnd, 1526 ArrayBegin, 1527 ArrayEnd, 1528 StringLiteral, 1529 Number, 1530 BoolTrue, 1531 BoolFalse, 1532 NullToken, 1533 ListSeparator, 1534 ObjectPairSeparator, 1535 InvalidToken, 1536 NoInput 1537}; 1538 1539const char* const kNullString = "null"; 1540const char* const kTrueString = "true"; 1541const char* const kFalseString = "false"; 1542 1543template <typename Char> 1544class JsonParser { 1545 public: 1546 JsonParser(const Platform* platform, StreamingParserHandler* handler) 1547 : platform_(platform), handler_(handler) {} 1548 1549 void Parse(const Char* start, size_t length) { 1550 start_pos_ = start; 1551 const Char* end = start + length; 1552 const Char* tokenEnd = nullptr; 1553 ParseValue(start, end, &tokenEnd, 0); 1554 if (error_) 1555 return; 1556 if (tokenEnd != end) { 1557 HandleError(Error::JSON_PARSER_UNPROCESSED_INPUT_REMAINS, tokenEnd); 1558 } 1559 } 1560 1561 private: 1562 bool CharsToDouble(const uint16_t* chars, size_t length, double* result) { 1563 std::string buffer; 1564 buffer.reserve(length + 1); 1565 for (size_t ii = 0; ii < length; ++ii) { 1566 bool is_ascii = !(chars[ii] & ~0x7F); 1567 if (!is_ascii) 1568 return false; 1569 buffer.push_back(static_cast<char>(chars[ii])); 1570 } 1571 return platform_->StrToD(buffer.c_str(), result); 1572 } 1573 1574 bool CharsToDouble(const uint8_t* chars, size_t length, double* result) { 1575 std::string buffer(reinterpret_cast<const char*>(chars), length); 1576 return platform_->StrToD(buffer.c_str(), result); 1577 } 1578 1579 static bool ParseConstToken(const Char* start, 1580 const Char* end, 1581 const Char** token_end, 1582 const char* token) { 1583 // |token| is \0 terminated, it's one of the constants at top of the file. 1584 while (start < end && *token != '\0' && *start++ == *token++) { 1585 } 1586 if (*token != '\0') 1587 return false; 1588 *token_end = start; 1589 return true; 1590 } 1591 1592 static bool ReadInt(const Char* start, 1593 const Char* end, 1594 const Char** token_end, 1595 bool allow_leading_zeros) { 1596 if (start == end) 1597 return false; 1598 bool has_leading_zero = '0' == *start; 1599 int length = 0; 1600 while (start < end && '0' <= *start && *start <= '9') { 1601 ++start; 1602 ++length; 1603 } 1604 if (!length) 1605 return false; 1606 if (!allow_leading_zeros && length > 1 && has_leading_zero) 1607 return false; 1608 *token_end = start; 1609 return true; 1610 } 1611 1612 static bool ParseNumberToken(const Char* start, 1613 const Char* end, 1614 const Char** token_end) { 1615 // We just grab the number here. We validate the size in DecodeNumber. 1616 // According to RFC4627, a valid number is: [minus] int [frac] [exp] 1617 if (start == end) 1618 return false; 1619 Char c = *start; 1620 if ('-' == c) 1621 ++start; 1622 1623 if (!ReadInt(start, end, &start, /*allow_leading_zeros=*/false)) 1624 return false; 1625 if (start == end) { 1626 *token_end = start; 1627 return true; 1628 } 1629 1630 // Optional fraction part 1631 c = *start; 1632 if ('.' == c) { 1633 ++start; 1634 if (!ReadInt(start, end, &start, /*allow_leading_zeros=*/true)) 1635 return false; 1636 if (start == end) { 1637 *token_end = start; 1638 return true; 1639 } 1640 c = *start; 1641 } 1642 1643 // Optional exponent part 1644 if ('e' == c || 'E' == c) { 1645 ++start; 1646 if (start == end) 1647 return false; 1648 c = *start; 1649 if ('-' == c || '+' == c) { 1650 ++start; 1651 if (start == end) 1652 return false; 1653 } 1654 if (!ReadInt(start, end, &start, /*allow_leading_zeros=*/true)) 1655 return false; 1656 } 1657 1658 *token_end = start; 1659 return true; 1660 } 1661 1662 static bool ReadHexDigits(const Char* start, 1663 const Char* end, 1664 const Char** token_end, 1665 int digits) { 1666 if (end - start < digits) 1667 return false; 1668 for (int i = 0; i < digits; ++i) { 1669 Char c = *start++; 1670 if (!(('0' <= c && c <= '9') || ('a' <= c && c <= 'f') || 1671 ('A' <= c && c <= 'F'))) 1672 return false; 1673 } 1674 *token_end = start; 1675 return true; 1676 } 1677 1678 static bool ParseStringToken(const Char* start, 1679 const Char* end, 1680 const Char** token_end) { 1681 while (start < end) { 1682 Char c = *start++; 1683 if ('\\' == c) { 1684 if (start == end) 1685 return false; 1686 c = *start++; 1687 // Make sure the escaped char is valid. 1688 switch (c) { 1689 case 'x': 1690 if (!ReadHexDigits(start, end, &start, 2)) 1691 return false; 1692 break; 1693 case 'u': 1694 if (!ReadHexDigits(start, end, &start, 4)) 1695 return false; 1696 break; 1697 case '\\': 1698 case '/': 1699 case 'b': 1700 case 'f': 1701 case 'n': 1702 case 'r': 1703 case 't': 1704 case 'v': 1705 case '"': 1706 break; 1707 default: 1708 return false; 1709 } 1710 } else if ('"' == c) { 1711 *token_end = start; 1712 return true; 1713 } 1714 } 1715 return false; 1716 } 1717 1718 static bool SkipComment(const Char* start, 1719 const Char* end, 1720 const Char** comment_end) { 1721 if (start == end) 1722 return false; 1723 1724 if (*start != '/' || start + 1 >= end) 1725 return false; 1726 ++start; 1727 1728 if (*start == '/') { 1729 // Single line comment, read to newline. 1730 for (++start; start < end; ++start) { 1731 if (*start == '\n' || *start == '\r') { 1732 *comment_end = start + 1; 1733 return true; 1734 } 1735 } 1736 *comment_end = end; 1737 // Comment reaches end-of-input, which is fine. 1738 return true; 1739 } 1740 1741 if (*start == '*') { 1742 Char previous = '\0'; 1743 // Block comment, read until end marker. 1744 for (++start; start < end; previous = *start++) { 1745 if (previous == '*' && *start == '/') { 1746 *comment_end = start + 1; 1747 return true; 1748 } 1749 } 1750 // Block comment must close before end-of-input. 1751 return false; 1752 } 1753 1754 return false; 1755 } 1756 1757 static bool IsSpaceOrNewLine(Char c) { 1758 // \v = vertial tab; \f = form feed page break. 1759 return c == ' ' || c == '\n' || c == '\v' || c == '\f' || c == '\r' || 1760 c == '\t'; 1761 } 1762 1763 static void SkipWhitespaceAndComments(const Char* start, 1764 const Char* end, 1765 const Char** whitespace_end) { 1766 while (start < end) { 1767 if (IsSpaceOrNewLine(*start)) { 1768 ++start; 1769 } else if (*start == '/') { 1770 const Char* comment_end = nullptr; 1771 if (!SkipComment(start, end, &comment_end)) 1772 break; 1773 start = comment_end; 1774 } else { 1775 break; 1776 } 1777 } 1778 *whitespace_end = start; 1779 } 1780 1781 static Token ParseToken(const Char* start, 1782 const Char* end, 1783 const Char** tokenStart, 1784 const Char** token_end) { 1785 SkipWhitespaceAndComments(start, end, tokenStart); 1786 start = *tokenStart; 1787 1788 if (start == end) 1789 return NoInput; 1790 1791 switch (*start) { 1792 case 'n': 1793 if (ParseConstToken(start, end, token_end, kNullString)) 1794 return NullToken; 1795 break; 1796 case 't': 1797 if (ParseConstToken(start, end, token_end, kTrueString)) 1798 return BoolTrue; 1799 break; 1800 case 'f': 1801 if (ParseConstToken(start, end, token_end, kFalseString)) 1802 return BoolFalse; 1803 break; 1804 case '[': 1805 *token_end = start + 1; 1806 return ArrayBegin; 1807 case ']': 1808 *token_end = start + 1; 1809 return ArrayEnd; 1810 case ',': 1811 *token_end = start + 1; 1812 return ListSeparator; 1813 case '{': 1814 *token_end = start + 1; 1815 return ObjectBegin; 1816 case '}': 1817 *token_end = start + 1; 1818 return ObjectEnd; 1819 case ':': 1820 *token_end = start + 1; 1821 return ObjectPairSeparator; 1822 case '0': 1823 case '1': 1824 case '2': 1825 case '3': 1826 case '4': 1827 case '5': 1828 case '6': 1829 case '7': 1830 case '8': 1831 case '9': 1832 case '-': 1833 if (ParseNumberToken(start, end, token_end)) 1834 return Number; 1835 break; 1836 case '"': 1837 if (ParseStringToken(start + 1, end, token_end)) 1838 return StringLiteral; 1839 break; 1840 } 1841 return InvalidToken; 1842 } 1843 1844 static int HexToInt(Char c) { 1845 if ('0' <= c && c <= '9') 1846 return c - '0'; 1847 if ('A' <= c && c <= 'F') 1848 return c - 'A' + 10; 1849 if ('a' <= c && c <= 'f') 1850 return c - 'a' + 10; 1851 assert(false); // Unreachable. 1852 return 0; 1853 } 1854 1855 static bool DecodeString(const Char* start, 1856 const Char* end, 1857 std::vector<uint16_t>* output) { 1858 if (start == end) 1859 return true; 1860 if (start > end) 1861 return false; 1862 output->reserve(end - start); 1863 while (start < end) { 1864 uint16_t c = *start++; 1865 // If the |Char| we're dealing with is really a byte, then 1866 // we have utf8 here, and we need to check for multibyte characters 1867 // and transcode them to utf16 (either one or two utf16 chars). 1868 if (sizeof(Char) == sizeof(uint8_t) && c > 0x7f) { 1869 // Inspect the leading byte to figure out how long the utf8 1870 // byte sequence is; while doing this initialize |codepoint| 1871 // with the first few bits. 1872 // See table in: https://en.wikipedia.org/wiki/UTF-8 1873 // byte one is 110x xxxx -> 2 byte utf8 sequence 1874 // byte one is 1110 xxxx -> 3 byte utf8 sequence 1875 // byte one is 1111 0xxx -> 4 byte utf8 sequence 1876 uint32_t codepoint; 1877 int num_bytes_left; 1878 if ((c & 0xe0) == 0xc0) { // 2 byte utf8 sequence 1879 num_bytes_left = 1; 1880 codepoint = c & 0x1f; 1881 } else if ((c & 0xf0) == 0xe0) { // 3 byte utf8 sequence 1882 num_bytes_left = 2; 1883 codepoint = c & 0x0f; 1884 } else if ((c & 0xf8) == 0xf0) { // 4 byte utf8 sequence 1885 codepoint = c & 0x07; 1886 num_bytes_left = 3; 1887 } else { 1888 return false; // invalid leading byte 1889 } 1890 1891 // If we have enough bytes in our inpput, decode the remaining ones 1892 // belonging to this Unicode character into |codepoint|. 1893 if (start + num_bytes_left > end) 1894 return false; 1895 while (num_bytes_left > 0) { 1896 c = *start++; 1897 --num_bytes_left; 1898 // Check the next byte is a continuation byte, that is 10xx xxxx. 1899 if ((c & 0xc0) != 0x80) 1900 return false; 1901 codepoint = (codepoint << 6) | (c & 0x3f); 1902 } 1903 1904 // Disallow overlong encodings for ascii characters, as these 1905 // would include " and other characters significant to JSON 1906 // string termination / control. 1907 if (codepoint <= 0x7f) 1908 return false; 1909 // Invalid in UTF8, and can't be represented in UTF16 anyway. 1910 if (codepoint > 0x10ffff) 1911 return false; 1912 1913 // So, now we transcode to UTF16, 1914 // using the math described at https://en.wikipedia.org/wiki/UTF-16, 1915 // for either one or two 16 bit characters. 1916 if (codepoint < 0xffff) { 1917 output->push_back(codepoint); 1918 continue; 1919 } 1920 codepoint -= 0x10000; 1921 output->push_back((codepoint >> 10) + 0xd800); // high surrogate 1922 output->push_back((codepoint & 0x3ff) + 0xdc00); // low surrogate 1923 continue; 1924 } 1925 if ('\\' != c) { 1926 output->push_back(c); 1927 continue; 1928 } 1929 if (start == end) 1930 return false; 1931 c = *start++; 1932 1933 if (c == 'x') { 1934 // \x is not supported. 1935 return false; 1936 } 1937 1938 switch (c) { 1939 case '"': 1940 case '/': 1941 case '\\': 1942 break; 1943 case 'b': 1944 c = '\b'; 1945 break; 1946 case 'f': 1947 c = '\f'; 1948 break; 1949 case 'n': 1950 c = '\n'; 1951 break; 1952 case 'r': 1953 c = '\r'; 1954 break; 1955 case 't': 1956 c = '\t'; 1957 break; 1958 case 'v': 1959 c = '\v'; 1960 break; 1961 case 'u': 1962 c = (HexToInt(*start) << 12) + (HexToInt(*(start + 1)) << 8) + 1963 (HexToInt(*(start + 2)) << 4) + HexToInt(*(start + 3)); 1964 start += 4; 1965 break; 1966 default: 1967 return false; 1968 } 1969 output->push_back(c); 1970 } 1971 return true; 1972 } 1973 1974 void ParseValue(const Char* start, 1975 const Char* end, 1976 const Char** value_token_end, 1977 int depth) { 1978 if (depth > kStackLimit) { 1979 HandleError(Error::JSON_PARSER_STACK_LIMIT_EXCEEDED, start); 1980 return; 1981 } 1982 const Char* token_start = nullptr; 1983 const Char* token_end = nullptr; 1984 Token token = ParseToken(start, end, &token_start, &token_end); 1985 switch (token) { 1986 case NoInput: 1987 HandleError(Error::JSON_PARSER_NO_INPUT, token_start); 1988 return; 1989 case InvalidToken: 1990 HandleError(Error::JSON_PARSER_INVALID_TOKEN, token_start); 1991 return; 1992 case NullToken: 1993 handler_->HandleNull(); 1994 break; 1995 case BoolTrue: 1996 handler_->HandleBool(true); 1997 break; 1998 case BoolFalse: 1999 handler_->HandleBool(false); 2000 break; 2001 case Number: { 2002 double value; 2003 if (!CharsToDouble(token_start, token_end - token_start, &value)) { 2004 HandleError(Error::JSON_PARSER_INVALID_NUMBER, token_start); 2005 return; 2006 } 2007 if (value >= std::numeric_limits<int32_t>::min() && 2008 value <= std::numeric_limits<int32_t>::max() && 2009 static_cast<int32_t>(value) == value) 2010 handler_->HandleInt32(static_cast<int32_t>(value)); 2011 else 2012 handler_->HandleDouble(value); 2013 break; 2014 } 2015 case StringLiteral: { 2016 std::vector<uint16_t> value; 2017 bool ok = DecodeString(token_start + 1, token_end - 1, &value); 2018 if (!ok) { 2019 HandleError(Error::JSON_PARSER_INVALID_STRING, token_start); 2020 return; 2021 } 2022 handler_->HandleString16(span<uint16_t>(value.data(), value.size())); 2023 break; 2024 } 2025 case ArrayBegin: { 2026 handler_->HandleArrayBegin(); 2027 start = token_end; 2028 token = ParseToken(start, end, &token_start, &token_end); 2029 while (token != ArrayEnd) { 2030 ParseValue(start, end, &token_end, depth + 1); 2031 if (error_) 2032 return; 2033 2034 // After a list value, we expect a comma or the end of the list. 2035 start = token_end; 2036 token = ParseToken(start, end, &token_start, &token_end); 2037 if (token == ListSeparator) { 2038 start = token_end; 2039 token = ParseToken(start, end, &token_start, &token_end); 2040 if (token == ArrayEnd) { 2041 HandleError(Error::JSON_PARSER_UNEXPECTED_ARRAY_END, token_start); 2042 return; 2043 } 2044 } else if (token != ArrayEnd) { 2045 // Unexpected value after list value. Bail out. 2046 HandleError(Error::JSON_PARSER_COMMA_OR_ARRAY_END_EXPECTED, 2047 token_start); 2048 return; 2049 } 2050 } 2051 handler_->HandleArrayEnd(); 2052 break; 2053 } 2054 case ObjectBegin: { 2055 handler_->HandleMapBegin(); 2056 start = token_end; 2057 token = ParseToken(start, end, &token_start, &token_end); 2058 while (token != ObjectEnd) { 2059 if (token != StringLiteral) { 2060 HandleError(Error::JSON_PARSER_STRING_LITERAL_EXPECTED, 2061 token_start); 2062 return; 2063 } 2064 std::vector<uint16_t> key; 2065 if (!DecodeString(token_start + 1, token_end - 1, &key)) { 2066 HandleError(Error::JSON_PARSER_INVALID_STRING, token_start); 2067 return; 2068 } 2069 handler_->HandleString16(span<uint16_t>(key.data(), key.size())); 2070 start = token_end; 2071 2072 token = ParseToken(start, end, &token_start, &token_end); 2073 if (token != ObjectPairSeparator) { 2074 HandleError(Error::JSON_PARSER_COLON_EXPECTED, token_start); 2075 return; 2076 } 2077 start = token_end; 2078 2079 ParseValue(start, end, &token_end, depth + 1); 2080 if (error_) 2081 return; 2082 start = token_end; 2083 2084 // After a key/value pair, we expect a comma or the end of the 2085 // object. 2086 token = ParseToken(start, end, &token_start, &token_end); 2087 if (token == ListSeparator) { 2088 start = token_end; 2089 token = ParseToken(start, end, &token_start, &token_end); 2090 if (token == ObjectEnd) { 2091 HandleError(Error::JSON_PARSER_UNEXPECTED_MAP_END, token_start); 2092 return; 2093 } 2094 } else if (token != ObjectEnd) { 2095 // Unexpected value after last object value. Bail out. 2096 HandleError(Error::JSON_PARSER_COMMA_OR_MAP_END_EXPECTED, 2097 token_start); 2098 return; 2099 } 2100 } 2101 handler_->HandleMapEnd(); 2102 break; 2103 } 2104 2105 default: 2106 // We got a token that's not a value. 2107 HandleError(Error::JSON_PARSER_VALUE_EXPECTED, token_start); 2108 return; 2109 } 2110 2111 SkipWhitespaceAndComments(token_end, end, value_token_end); 2112 } 2113 2114 void HandleError(Error error, const Char* pos) { 2115 assert(error != Error::OK); 2116 if (!error_) { 2117 handler_->HandleError( 2118 Status{error, static_cast<size_t>(pos - start_pos_)}); 2119 error_ = true; 2120 } 2121 } 2122 2123 const Char* start_pos_ = nullptr; 2124 bool error_ = false; 2125 const Platform* platform_; 2126 StreamingParserHandler* handler_; 2127}; 2128} // namespace 2129 2130void ParseJSON(const Platform& platform, 2131 span<uint8_t> chars, 2132 StreamingParserHandler* handler) { 2133 JsonParser<uint8_t> parser(&platform, handler); 2134 parser.Parse(chars.data(), chars.size()); 2135} 2136 2137void ParseJSON(const Platform& platform, 2138 span<uint16_t> chars, 2139 StreamingParserHandler* handler) { 2140 JsonParser<uint16_t> parser(&platform, handler); 2141 parser.Parse(chars.data(), chars.size()); 2142} 2143 2144// ============================================================================= 2145// json::ConvertCBORToJSON, json::ConvertJSONToCBOR - for transcoding 2146// ============================================================================= 2147template <typename C> 2148Status ConvertCBORToJSONTmpl(const Platform& platform, 2149 span<uint8_t> cbor, 2150 C* json) { 2151 Status status; 2152 std::unique_ptr<StreamingParserHandler> json_writer = 2153 NewJSONEncoder(&platform, json, &status); 2154 cbor::ParseCBOR(cbor, json_writer.get()); 2155 return status; 2156} 2157 2158Status ConvertCBORToJSON(const Platform& platform, 2159 span<uint8_t> cbor, 2160 std::vector<uint8_t>* json) { 2161 return ConvertCBORToJSONTmpl(platform, cbor, json); 2162} 2163Status ConvertCBORToJSON(const Platform& platform, 2164 span<uint8_t> cbor, 2165 std::string* json) { 2166 return ConvertCBORToJSONTmpl(platform, cbor, json); 2167} 2168 2169template <typename T, typename C> 2170Status ConvertJSONToCBORTmpl(const Platform& platform, span<T> json, C* cbor) { 2171 Status status; 2172 std::unique_ptr<StreamingParserHandler> encoder = 2173 cbor::NewCBOREncoder(cbor, &status); 2174 ParseJSON(platform, json, encoder.get()); 2175 return status; 2176} 2177Status ConvertJSONToCBOR(const Platform& platform, 2178 span<uint8_t> json, 2179 std::string* cbor) { 2180 return ConvertJSONToCBORTmpl(platform, json, cbor); 2181} 2182Status ConvertJSONToCBOR(const Platform& platform, 2183 span<uint16_t> json, 2184 std::string* cbor) { 2185 return ConvertJSONToCBORTmpl(platform, json, cbor); 2186} 2187Status ConvertJSONToCBOR(const Platform& platform, 2188 span<uint8_t> json, 2189 std::vector<uint8_t>* cbor) { 2190 return ConvertJSONToCBORTmpl(platform, json, cbor); 2191} 2192Status ConvertJSONToCBOR(const Platform& platform, 2193 span<uint16_t> json, 2194 std::vector<uint8_t>* cbor) { 2195 return ConvertJSONToCBORTmpl(platform, json, cbor); 2196} 2197} // namespace json 2198 2199{% for namespace in config.protocol.namespace %} 2200} // namespace {{namespace}} 2201{% endfor %} 2202 2203{% endif %} 2204