1{# This template is generated by gen_cbor_templates.py. #} 2// Generated by lib/encoding_cpp.template. 3 4// Copyright 2019 The Chromium Authors. All rights reserved. 5// Use of this source code is governed by a BSD-style license that can be 6// found in the LICENSE file. 7 8 9#include <algorithm> 10#include <cassert> 11#include <cmath> 12#include <cstring> 13#include <limits> 14#include <stack> 15 16{% for namespace in config.protocol.namespace %} 17namespace {{namespace}} { 18{% endfor %} 19 20// ===== encoding/encoding.cc ===== 21 22// ============================================================================= 23// Status and Error codes 24// ============================================================================= 25 26std::string Status::ToASCIIString() const { 27 switch (error) { 28 case Error::OK: 29 return "OK"; 30 case Error::JSON_PARSER_UNPROCESSED_INPUT_REMAINS: 31 return ToASCIIString("JSON: unprocessed input remains"); 32 case Error::JSON_PARSER_STACK_LIMIT_EXCEEDED: 33 return ToASCIIString("JSON: stack limit exceeded"); 34 case Error::JSON_PARSER_NO_INPUT: 35 return ToASCIIString("JSON: no input"); 36 case Error::JSON_PARSER_INVALID_TOKEN: 37 return ToASCIIString("JSON: invalid token"); 38 case Error::JSON_PARSER_INVALID_NUMBER: 39 return ToASCIIString("JSON: invalid number"); 40 case Error::JSON_PARSER_INVALID_STRING: 41 return ToASCIIString("JSON: invalid string"); 42 case Error::JSON_PARSER_UNEXPECTED_ARRAY_END: 43 return ToASCIIString("JSON: unexpected array end"); 44 case Error::JSON_PARSER_COMMA_OR_ARRAY_END_EXPECTED: 45 return ToASCIIString("JSON: comma or array end expected"); 46 case Error::JSON_PARSER_STRING_LITERAL_EXPECTED: 47 return ToASCIIString("JSON: string literal expected"); 48 case Error::JSON_PARSER_COLON_EXPECTED: 49 return ToASCIIString("JSON: colon expected"); 50 case Error::JSON_PARSER_UNEXPECTED_MAP_END: 51 return ToASCIIString("JSON: unexpected map end"); 52 case Error::JSON_PARSER_COMMA_OR_MAP_END_EXPECTED: 53 return ToASCIIString("JSON: comma or map end expected"); 54 case Error::JSON_PARSER_VALUE_EXPECTED: 55 return ToASCIIString("JSON: value expected"); 56 57 case Error::CBOR_INVALID_INT32: 58 return ToASCIIString("CBOR: invalid int32"); 59 case Error::CBOR_INVALID_DOUBLE: 60 return ToASCIIString("CBOR: invalid double"); 61 case Error::CBOR_INVALID_ENVELOPE: 62 return ToASCIIString("CBOR: invalid envelope"); 63 case Error::CBOR_INVALID_STRING8: 64 return ToASCIIString("CBOR: invalid string8"); 65 case Error::CBOR_INVALID_STRING16: 66 return ToASCIIString("CBOR: invalid string16"); 67 case Error::CBOR_INVALID_BINARY: 68 return ToASCIIString("CBOR: invalid binary"); 69 case Error::CBOR_UNSUPPORTED_VALUE: 70 return ToASCIIString("CBOR: unsupported value"); 71 case Error::CBOR_NO_INPUT: 72 return ToASCIIString("CBOR: no input"); 73 case Error::CBOR_INVALID_START_BYTE: 74 return ToASCIIString("CBOR: invalid start byte"); 75 case Error::CBOR_UNEXPECTED_EOF_EXPECTED_VALUE: 76 return ToASCIIString("CBOR: unexpected eof expected value"); 77 case Error::CBOR_UNEXPECTED_EOF_IN_ARRAY: 78 return ToASCIIString("CBOR: unexpected eof in array"); 79 case Error::CBOR_UNEXPECTED_EOF_IN_MAP: 80 return ToASCIIString("CBOR: unexpected eof in map"); 81 case Error::CBOR_INVALID_MAP_KEY: 82 return ToASCIIString("CBOR: invalid map key"); 83 case Error::CBOR_STACK_LIMIT_EXCEEDED: 84 return ToASCIIString("CBOR: stack limit exceeded"); 85 case Error::CBOR_TRAILING_JUNK: 86 return ToASCIIString("CBOR: trailing junk"); 87 case Error::CBOR_MAP_START_EXPECTED: 88 return ToASCIIString("CBOR: map start expected"); 89 case Error::CBOR_MAP_STOP_EXPECTED: 90 return ToASCIIString("CBOR: map stop expected"); 91 case Error::CBOR_ENVELOPE_SIZE_LIMIT_EXCEEDED: 92 return ToASCIIString("CBOR: envelope size limit exceeded"); 93 } 94 // Some compilers can't figure out that we can't get here. 95 return "INVALID ERROR CODE"; 96} 97 98std::string Status::ToASCIIString(const char* msg) const { 99 return std::string(msg) + " at position " + std::to_string(pos); 100} 101 102namespace cbor { 103namespace { 104// Indicates the number of bits the "initial byte" needs to be shifted to the 105// right after applying |kMajorTypeMask| to produce the major type in the 106// lowermost bits. 107static constexpr uint8_t kMajorTypeBitShift = 5u; 108// Mask selecting the low-order 5 bits of the "initial byte", which is where 109// the additional information is encoded. 110static constexpr uint8_t kAdditionalInformationMask = 0x1f; 111// Mask selecting the high-order 3 bits of the "initial byte", which indicates 112// the major type of the encoded value. 113static constexpr uint8_t kMajorTypeMask = 0xe0; 114// Indicates the integer is in the following byte. 115static constexpr uint8_t kAdditionalInformation1Byte = 24u; 116// Indicates the integer is in the next 2 bytes. 117static constexpr uint8_t kAdditionalInformation2Bytes = 25u; 118// Indicates the integer is in the next 4 bytes. 119static constexpr uint8_t kAdditionalInformation4Bytes = 26u; 120// Indicates the integer is in the next 8 bytes. 121static constexpr uint8_t kAdditionalInformation8Bytes = 27u; 122 123// Encodes the initial byte, consisting of the |type| in the first 3 bits 124// followed by 5 bits of |additional_info|. 125constexpr uint8_t EncodeInitialByte(MajorType type, uint8_t additional_info) { 126 return (static_cast<uint8_t>(type) << kMajorTypeBitShift) | 127 (additional_info & kAdditionalInformationMask); 128} 129 130// TAG 24 indicates that what follows is a byte string which is 131// encoded in CBOR format. We use this as a wrapper for 132// maps and arrays, allowing us to skip them, because the 133// byte string carries its size (byte length). 134// https://tools.ietf.org/html/rfc7049#section-2.4.4.1 135static constexpr uint8_t kInitialByteForEnvelope = 136 EncodeInitialByte(MajorType::TAG, 24); 137// The initial byte for a byte string with at most 2^32 bytes 138// of payload. This is used for envelope encoding, even if 139// the byte string is shorter. 140static constexpr uint8_t kInitialByteFor32BitLengthByteString = 141 EncodeInitialByte(MajorType::BYTE_STRING, 26); 142 143// See RFC 7049 Section 2.2.1, indefinite length arrays / maps have additional 144// info = 31. 145static constexpr uint8_t kInitialByteIndefiniteLengthArray = 146 EncodeInitialByte(MajorType::ARRAY, 31); 147static constexpr uint8_t kInitialByteIndefiniteLengthMap = 148 EncodeInitialByte(MajorType::MAP, 31); 149// See RFC 7049 Section 2.3, Table 1; this is used for finishing indefinite 150// length maps / arrays. 151static constexpr uint8_t kStopByte = 152 EncodeInitialByte(MajorType::SIMPLE_VALUE, 31); 153 154// See RFC 7049 Section 2.3, Table 2. 155static constexpr uint8_t kEncodedTrue = 156 EncodeInitialByte(MajorType::SIMPLE_VALUE, 21); 157static constexpr uint8_t kEncodedFalse = 158 EncodeInitialByte(MajorType::SIMPLE_VALUE, 20); 159static constexpr uint8_t kEncodedNull = 160 EncodeInitialByte(MajorType::SIMPLE_VALUE, 22); 161static constexpr uint8_t kInitialByteForDouble = 162 EncodeInitialByte(MajorType::SIMPLE_VALUE, 27); 163 164// See RFC 7049 Table 3 and Section 2.4.4.2. This is used as a prefix for 165// arbitrary binary data encoded as BYTE_STRING. 166static constexpr uint8_t kExpectedConversionToBase64Tag = 167 EncodeInitialByte(MajorType::TAG, 22); 168 169// Writes the bytes for |v| to |out|, starting with the most significant byte. 170// See also: https://commandcenter.blogspot.com/2012/04/byte-order-fallacy.html 171template <typename T, class C> 172void WriteBytesMostSignificantByteFirst(T v, C* out) { 173 for (int shift_bytes = sizeof(T) - 1; shift_bytes >= 0; --shift_bytes) 174 out->push_back(0xff & (v >> (shift_bytes * 8))); 175} 176 177// Extracts sizeof(T) bytes from |in| to extract a value of type T 178// (e.g. uint64_t, uint32_t, ...), most significant byte first. 179// See also: https://commandcenter.blogspot.com/2012/04/byte-order-fallacy.html 180template <typename T> 181T ReadBytesMostSignificantByteFirst(span<uint8_t> in) { 182 assert(in.size() >= sizeof(T)); 183 T result = 0; 184 for (size_t shift_bytes = 0; shift_bytes < sizeof(T); ++shift_bytes) 185 result |= T(in[sizeof(T) - 1 - shift_bytes]) << (shift_bytes * 8); 186 return result; 187} 188} // namespace 189 190namespace internals { 191// Reads the start of a token with definitive size from |bytes|. 192// |type| is the major type as specified in RFC 7049 Section 2.1. 193// |value| is the payload (e.g. for MajorType::UNSIGNED) or is the size 194// (e.g. for BYTE_STRING). 195// If successful, returns the number of bytes read. Otherwise returns -1. 196// TODO(johannes): change return type to size_t and use 0 for error. 197int8_t ReadTokenStart(span<uint8_t> bytes, MajorType* type, uint64_t* value) { 198 if (bytes.empty()) 199 return -1; 200 uint8_t initial_byte = bytes[0]; 201 *type = MajorType((initial_byte & kMajorTypeMask) >> kMajorTypeBitShift); 202 203 uint8_t additional_information = initial_byte & kAdditionalInformationMask; 204 if (additional_information < 24) { 205 // Values 0-23 are encoded directly into the additional info of the 206 // initial byte. 207 *value = additional_information; 208 return 1; 209 } 210 if (additional_information == kAdditionalInformation1Byte) { 211 // Values 24-255 are encoded with one initial byte, followed by the value. 212 if (bytes.size() < 2) 213 return -1; 214 *value = ReadBytesMostSignificantByteFirst<uint8_t>(bytes.subspan(1)); 215 return 2; 216 } 217 if (additional_information == kAdditionalInformation2Bytes) { 218 // Values 256-65535: 1 initial byte + 2 bytes payload. 219 if (bytes.size() < 1 + sizeof(uint16_t)) 220 return -1; 221 *value = ReadBytesMostSignificantByteFirst<uint16_t>(bytes.subspan(1)); 222 return 3; 223 } 224 if (additional_information == kAdditionalInformation4Bytes) { 225 // 32 bit uint: 1 initial byte + 4 bytes payload. 226 if (bytes.size() < 1 + sizeof(uint32_t)) 227 return -1; 228 *value = ReadBytesMostSignificantByteFirst<uint32_t>(bytes.subspan(1)); 229 return 5; 230 } 231 if (additional_information == kAdditionalInformation8Bytes) { 232 // 64 bit uint: 1 initial byte + 8 bytes payload. 233 if (bytes.size() < 1 + sizeof(uint64_t)) 234 return -1; 235 *value = ReadBytesMostSignificantByteFirst<uint64_t>(bytes.subspan(1)); 236 return 9; 237 } 238 return -1; 239} 240 241// Writes the start of a token with |type|. The |value| may indicate the size, 242// or it may be the payload if the value is an unsigned integer. 243template <typename C> 244void WriteTokenStartTmpl(MajorType type, uint64_t value, C* encoded) { 245 if (value < 24) { 246 // Values 0-23 are encoded directly into the additional info of the 247 // initial byte. 248 encoded->push_back(EncodeInitialByte(type, /*additional_info=*/value)); 249 return; 250 } 251 if (value <= std::numeric_limits<uint8_t>::max()) { 252 // Values 24-255 are encoded with one initial byte, followed by the value. 253 encoded->push_back(EncodeInitialByte(type, kAdditionalInformation1Byte)); 254 encoded->push_back(value); 255 return; 256 } 257 if (value <= std::numeric_limits<uint16_t>::max()) { 258 // Values 256-65535: 1 initial byte + 2 bytes payload. 259 encoded->push_back(EncodeInitialByte(type, kAdditionalInformation2Bytes)); 260 WriteBytesMostSignificantByteFirst<uint16_t>(value, encoded); 261 return; 262 } 263 if (value <= std::numeric_limits<uint32_t>::max()) { 264 // 32 bit uint: 1 initial byte + 4 bytes payload. 265 encoded->push_back(EncodeInitialByte(type, kAdditionalInformation4Bytes)); 266 WriteBytesMostSignificantByteFirst<uint32_t>(static_cast<uint32_t>(value), 267 encoded); 268 return; 269 } 270 // 64 bit uint: 1 initial byte + 8 bytes payload. 271 encoded->push_back(EncodeInitialByte(type, kAdditionalInformation8Bytes)); 272 WriteBytesMostSignificantByteFirst<uint64_t>(value, encoded); 273} 274void WriteTokenStart(MajorType type, 275 uint64_t value, 276 std::vector<uint8_t>* encoded) { 277 WriteTokenStartTmpl(type, value, encoded); 278} 279void WriteTokenStart(MajorType type, uint64_t value, std::string* encoded) { 280 WriteTokenStartTmpl(type, value, encoded); 281} 282} // namespace internals 283 284// ============================================================================= 285// Detecting CBOR content 286// ============================================================================= 287 288uint8_t InitialByteForEnvelope() { 289 return kInitialByteForEnvelope; 290} 291uint8_t InitialByteFor32BitLengthByteString() { 292 return kInitialByteFor32BitLengthByteString; 293} 294bool IsCBORMessage(span<uint8_t> msg) { 295 return msg.size() >= 6 && msg[0] == InitialByteForEnvelope() && 296 msg[1] == InitialByteFor32BitLengthByteString(); 297} 298 299// ============================================================================= 300// Encoding invidiual CBOR items 301// ============================================================================= 302 303uint8_t EncodeTrue() { 304 return kEncodedTrue; 305} 306uint8_t EncodeFalse() { 307 return kEncodedFalse; 308} 309uint8_t EncodeNull() { 310 return kEncodedNull; 311} 312 313uint8_t EncodeIndefiniteLengthArrayStart() { 314 return kInitialByteIndefiniteLengthArray; 315} 316 317uint8_t EncodeIndefiniteLengthMapStart() { 318 return kInitialByteIndefiniteLengthMap; 319} 320 321uint8_t EncodeStop() { 322 return kStopByte; 323} 324 325template <typename C> 326void EncodeInt32Tmpl(int32_t value, C* out) { 327 if (value >= 0) { 328 internals::WriteTokenStart(MajorType::UNSIGNED, value, out); 329 } else { 330 uint64_t representation = static_cast<uint64_t>(-(value + 1)); 331 internals::WriteTokenStart(MajorType::NEGATIVE, representation, out); 332 } 333} 334void EncodeInt32(int32_t value, std::vector<uint8_t>* out) { 335 EncodeInt32Tmpl(value, out); 336} 337void EncodeInt32(int32_t value, std::string* out) { 338 EncodeInt32Tmpl(value, out); 339} 340 341template <typename C> 342void EncodeString16Tmpl(span<uint16_t> in, C* out) { 343 uint64_t byte_length = static_cast<uint64_t>(in.size_bytes()); 344 internals::WriteTokenStart(MajorType::BYTE_STRING, byte_length, out); 345 // When emitting UTF16 characters, we always write the least significant byte 346 // first; this is because it's the native representation for X86. 347 // TODO(johannes): Implement a more efficient thing here later, e.g. 348 // casting *iff* the machine has this byte order. 349 // The wire format for UTF16 chars will probably remain the same 350 // (least significant byte first) since this way we can have 351 // golden files, unittests, etc. that port easily and universally. 352 // See also: 353 // https://commandcenter.blogspot.com/2012/04/byte-order-fallacy.html 354 for (const uint16_t two_bytes : in) { 355 out->push_back(two_bytes); 356 out->push_back(two_bytes >> 8); 357 } 358} 359void EncodeString16(span<uint16_t> in, std::vector<uint8_t>* out) { 360 EncodeString16Tmpl(in, out); 361} 362void EncodeString16(span<uint16_t> in, std::string* out) { 363 EncodeString16Tmpl(in, out); 364} 365 366template <typename C> 367void EncodeString8Tmpl(span<uint8_t> in, C* out) { 368 internals::WriteTokenStart(MajorType::STRING, 369 static_cast<uint64_t>(in.size_bytes()), out); 370 out->insert(out->end(), in.begin(), in.end()); 371} 372void EncodeString8(span<uint8_t> in, std::vector<uint8_t>* out) { 373 EncodeString8Tmpl(in, out); 374} 375void EncodeString8(span<uint8_t> in, std::string* out) { 376 EncodeString8Tmpl(in, out); 377} 378 379template <typename C> 380void EncodeFromLatin1Tmpl(span<uint8_t> latin1, C* out) { 381 for (size_t ii = 0; ii < latin1.size(); ++ii) { 382 if (latin1[ii] <= 127) 383 continue; 384 // If there's at least one non-ASCII char, convert to UTF8. 385 std::vector<uint8_t> utf8(latin1.begin(), latin1.begin() + ii); 386 for (; ii < latin1.size(); ++ii) { 387 if (latin1[ii] <= 127) { 388 utf8.push_back(latin1[ii]); 389 } else { 390 // 0xC0 means it's a UTF8 sequence with 2 bytes. 391 utf8.push_back((latin1[ii] >> 6) | 0xc0); 392 utf8.push_back((latin1[ii] | 0x80) & 0xbf); 393 } 394 } 395 EncodeString8(SpanFrom(utf8), out); 396 return; 397 } 398 EncodeString8(latin1, out); 399} 400void EncodeFromLatin1(span<uint8_t> latin1, std::vector<uint8_t>* out) { 401 EncodeFromLatin1Tmpl(latin1, out); 402} 403void EncodeFromLatin1(span<uint8_t> latin1, std::string* out) { 404 EncodeFromLatin1Tmpl(latin1, out); 405} 406 407template <typename C> 408void EncodeFromUTF16Tmpl(span<uint16_t> utf16, C* out) { 409 // If there's at least one non-ASCII char, encode as STRING16 (UTF16). 410 for (uint16_t ch : utf16) { 411 if (ch <= 127) 412 continue; 413 EncodeString16(utf16, out); 414 return; 415 } 416 // It's all US-ASCII, strip out every second byte and encode as UTF8. 417 internals::WriteTokenStart(MajorType::STRING, 418 static_cast<uint64_t>(utf16.size()), out); 419 out->insert(out->end(), utf16.begin(), utf16.end()); 420} 421void EncodeFromUTF16(span<uint16_t> utf16, std::vector<uint8_t>* out) { 422 EncodeFromUTF16Tmpl(utf16, out); 423} 424void EncodeFromUTF16(span<uint16_t> utf16, std::string* out) { 425 EncodeFromUTF16Tmpl(utf16, out); 426} 427 428template <typename C> 429void EncodeBinaryTmpl(span<uint8_t> in, C* out) { 430 out->push_back(kExpectedConversionToBase64Tag); 431 uint64_t byte_length = static_cast<uint64_t>(in.size_bytes()); 432 internals::WriteTokenStart(MajorType::BYTE_STRING, byte_length, out); 433 out->insert(out->end(), in.begin(), in.end()); 434} 435void EncodeBinary(span<uint8_t> in, std::vector<uint8_t>* out) { 436 EncodeBinaryTmpl(in, out); 437} 438void EncodeBinary(span<uint8_t> in, std::string* out) { 439 EncodeBinaryTmpl(in, out); 440} 441 442// A double is encoded with a specific initial byte 443// (kInitialByteForDouble) plus the 64 bits of payload for its value. 444constexpr size_t kEncodedDoubleSize = 1 + sizeof(uint64_t); 445 446// An envelope is encoded with a specific initial byte 447// (kInitialByteForEnvelope), plus the start byte for a BYTE_STRING with a 32 448// bit wide length, plus a 32 bit length for that string. 449constexpr size_t kEncodedEnvelopeHeaderSize = 1 + 1 + sizeof(uint32_t); 450 451template <typename C> 452void EncodeDoubleTmpl(double value, C* out) { 453 // The additional_info=27 indicates 64 bits for the double follow. 454 // See RFC 7049 Section 2.3, Table 1. 455 out->push_back(kInitialByteForDouble); 456 union { 457 double from_double; 458 uint64_t to_uint64; 459 } reinterpret; 460 reinterpret.from_double = value; 461 WriteBytesMostSignificantByteFirst<uint64_t>(reinterpret.to_uint64, out); 462} 463void EncodeDouble(double value, std::vector<uint8_t>* out) { 464 EncodeDoubleTmpl(value, out); 465} 466void EncodeDouble(double value, std::string* out) { 467 EncodeDoubleTmpl(value, out); 468} 469 470// ============================================================================= 471// cbor::EnvelopeEncoder - for wrapping submessages 472// ============================================================================= 473 474template <typename C> 475void EncodeStartTmpl(C* out, size_t* byte_size_pos) { 476 assert(*byte_size_pos == 0); 477 out->push_back(kInitialByteForEnvelope); 478 out->push_back(kInitialByteFor32BitLengthByteString); 479 *byte_size_pos = out->size(); 480 out->resize(out->size() + sizeof(uint32_t)); 481} 482 483void EnvelopeEncoder::EncodeStart(std::vector<uint8_t>* out) { 484 EncodeStartTmpl<std::vector<uint8_t>>(out, &byte_size_pos_); 485} 486 487void EnvelopeEncoder::EncodeStart(std::string* out) { 488 EncodeStartTmpl<std::string>(out, &byte_size_pos_); 489} 490 491template <typename C> 492bool EncodeStopTmpl(C* out, size_t* byte_size_pos) { 493 assert(*byte_size_pos != 0); 494 // The byte size is the size of the payload, that is, all the 495 // bytes that were written past the byte size position itself. 496 uint64_t byte_size = out->size() - (*byte_size_pos + sizeof(uint32_t)); 497 // We store exactly 4 bytes, so at most INT32MAX, with most significant 498 // byte first. 499 if (byte_size > std::numeric_limits<uint32_t>::max()) 500 return false; 501 for (int shift_bytes = sizeof(uint32_t) - 1; shift_bytes >= 0; 502 --shift_bytes) { 503 (*out)[(*byte_size_pos)++] = 0xff & (byte_size >> (shift_bytes * 8)); 504 } 505 return true; 506} 507 508bool EnvelopeEncoder::EncodeStop(std::vector<uint8_t>* out) { 509 return EncodeStopTmpl(out, &byte_size_pos_); 510} 511 512bool EnvelopeEncoder::EncodeStop(std::string* out) { 513 return EncodeStopTmpl(out, &byte_size_pos_); 514} 515 516// ============================================================================= 517// cbor::NewCBOREncoder - for encoding from a streaming parser 518// ============================================================================= 519 520namespace { 521template <typename C> 522class CBOREncoder : public StreamingParserHandler { 523 public: 524 CBOREncoder(C* out, Status* status) : out_(out), status_(status) { 525 *status_ = Status(); 526 } 527 528 void HandleMapBegin() override { 529 if (!status_->ok()) 530 return; 531 envelopes_.emplace_back(); 532 envelopes_.back().EncodeStart(out_); 533 out_->push_back(kInitialByteIndefiniteLengthMap); 534 } 535 536 void HandleMapEnd() override { 537 if (!status_->ok()) 538 return; 539 out_->push_back(kStopByte); 540 assert(!envelopes_.empty()); 541 if (!envelopes_.back().EncodeStop(out_)) { 542 HandleError( 543 Status(Error::CBOR_ENVELOPE_SIZE_LIMIT_EXCEEDED, out_->size())); 544 return; 545 } 546 envelopes_.pop_back(); 547 } 548 549 void HandleArrayBegin() override { 550 if (!status_->ok()) 551 return; 552 envelopes_.emplace_back(); 553 envelopes_.back().EncodeStart(out_); 554 out_->push_back(kInitialByteIndefiniteLengthArray); 555 } 556 557 void HandleArrayEnd() override { 558 if (!status_->ok()) 559 return; 560 out_->push_back(kStopByte); 561 assert(!envelopes_.empty()); 562 if (!envelopes_.back().EncodeStop(out_)) { 563 HandleError( 564 Status(Error::CBOR_ENVELOPE_SIZE_LIMIT_EXCEEDED, out_->size())); 565 return; 566 } 567 envelopes_.pop_back(); 568 } 569 570 void HandleString8(span<uint8_t> chars) override { 571 if (!status_->ok()) 572 return; 573 EncodeString8(chars, out_); 574 } 575 576 void HandleString16(span<uint16_t> chars) override { 577 if (!status_->ok()) 578 return; 579 EncodeFromUTF16(chars, out_); 580 } 581 582 void HandleBinary(span<uint8_t> bytes) override { 583 if (!status_->ok()) 584 return; 585 EncodeBinary(bytes, out_); 586 } 587 588 void HandleDouble(double value) override { 589 if (!status_->ok()) 590 return; 591 EncodeDouble(value, out_); 592 } 593 594 void HandleInt32(int32_t value) override { 595 if (!status_->ok()) 596 return; 597 EncodeInt32(value, out_); 598 } 599 600 void HandleBool(bool value) override { 601 if (!status_->ok()) 602 return; 603 // See RFC 7049 Section 2.3, Table 2. 604 out_->push_back(value ? kEncodedTrue : kEncodedFalse); 605 } 606 607 void HandleNull() override { 608 if (!status_->ok()) 609 return; 610 // See RFC 7049 Section 2.3, Table 2. 611 out_->push_back(kEncodedNull); 612 } 613 614 void HandleError(Status error) override { 615 if (!status_->ok()) 616 return; 617 *status_ = error; 618 out_->clear(); 619 } 620 621 private: 622 C* out_; 623 std::vector<EnvelopeEncoder> envelopes_; 624 Status* status_; 625}; 626} // namespace 627 628std::unique_ptr<StreamingParserHandler> NewCBOREncoder( 629 std::vector<uint8_t>* out, 630 Status* status) { 631 return std::unique_ptr<StreamingParserHandler>( 632 new CBOREncoder<std::vector<uint8_t>>(out, status)); 633} 634std::unique_ptr<StreamingParserHandler> NewCBOREncoder(std::string* out, 635 Status* status) { 636 return std::unique_ptr<StreamingParserHandler>( 637 new CBOREncoder<std::string>(out, status)); 638} 639 640// ============================================================================= 641// cbor::CBORTokenizer - for parsing individual CBOR items 642// ============================================================================= 643 644CBORTokenizer::CBORTokenizer(span<uint8_t> bytes) : bytes_(bytes) { 645 ReadNextToken(/*enter_envelope=*/false); 646} 647CBORTokenizer::~CBORTokenizer() {} 648 649CBORTokenTag CBORTokenizer::TokenTag() const { 650 return token_tag_; 651} 652 653void CBORTokenizer::Next() { 654 if (token_tag_ == CBORTokenTag::ERROR_VALUE || 655 token_tag_ == CBORTokenTag::DONE) 656 return; 657 ReadNextToken(/*enter_envelope=*/false); 658} 659 660void CBORTokenizer::EnterEnvelope() { 661 assert(token_tag_ == CBORTokenTag::ENVELOPE); 662 ReadNextToken(/*enter_envelope=*/true); 663} 664 665Status CBORTokenizer::Status() const { 666 return status_; 667} 668 669// The following accessor functions ::GetInt32, ::GetDouble, 670// ::GetString8, ::GetString16WireRep, ::GetBinary, ::GetEnvelopeContents 671// assume that a particular token was recognized in ::ReadNextToken. 672// That's where all the error checking is done. By design, 673// the accessors (assuming the token was recognized) never produce 674// an error. 675 676int32_t CBORTokenizer::GetInt32() const { 677 assert(token_tag_ == CBORTokenTag::INT32); 678 // The range checks happen in ::ReadNextToken(). 679 return static_cast<int32_t>( 680 token_start_type_ == MajorType::UNSIGNED 681 ? token_start_internal_value_ 682 : -static_cast<int64_t>(token_start_internal_value_) - 1); 683} 684 685double CBORTokenizer::GetDouble() const { 686 assert(token_tag_ == CBORTokenTag::DOUBLE); 687 union { 688 uint64_t from_uint64; 689 double to_double; 690 } reinterpret; 691 reinterpret.from_uint64 = ReadBytesMostSignificantByteFirst<uint64_t>( 692 bytes_.subspan(status_.pos + 1)); 693 return reinterpret.to_double; 694} 695 696span<uint8_t> CBORTokenizer::GetString8() const { 697 assert(token_tag_ == CBORTokenTag::STRING8); 698 auto length = static_cast<size_t>(token_start_internal_value_); 699 return bytes_.subspan(status_.pos + (token_byte_length_ - length), length); 700} 701 702span<uint8_t> CBORTokenizer::GetString16WireRep() const { 703 assert(token_tag_ == CBORTokenTag::STRING16); 704 auto length = static_cast<size_t>(token_start_internal_value_); 705 return bytes_.subspan(status_.pos + (token_byte_length_ - length), length); 706} 707 708span<uint8_t> CBORTokenizer::GetBinary() const { 709 assert(token_tag_ == CBORTokenTag::BINARY); 710 auto length = static_cast<size_t>(token_start_internal_value_); 711 return bytes_.subspan(status_.pos + (token_byte_length_ - length), length); 712} 713 714span<uint8_t> CBORTokenizer::GetEnvelopeContents() const { 715 assert(token_tag_ == CBORTokenTag::ENVELOPE); 716 auto length = static_cast<size_t>(token_start_internal_value_); 717 return bytes_.subspan(status_.pos + kEncodedEnvelopeHeaderSize, length); 718} 719 720// All error checking happens in ::ReadNextToken, so that the accessors 721// can avoid having to carry an error return value. 722// 723// With respect to checking the encoded lengths of strings, arrays, etc: 724// On the wire, CBOR uses 1,2,4, and 8 byte unsigned integers, so 725// we initially read them as uint64_t, usually into token_start_internal_value_. 726// 727// However, since these containers have a representation on the machine, 728// we need to do corresponding size computations on the input byte array, 729// output span (e.g. the payload for a string), etc., and size_t is 730// machine specific (in practice either 32 bit or 64 bit). 731// 732// Further, we must avoid overflowing size_t. Therefore, we use this 733// kMaxValidLength constant to: 734// - Reject values that are larger than the architecture specific 735// max size_t (differs between 32 bit and 64 bit arch). 736// - Reserve at least one bit so that we can check against overflows 737// when adding lengths (array / string length / etc.); we do this by 738// ensuring that the inputs to an addition are <= kMaxValidLength, 739// and then checking whether the sum went past it. 740// 741// See also 742// https://chromium.googlesource.com/chromium/src/+/master/docs/security/integer-semantics.md 743static const uint64_t kMaxValidLength = 744 std::min<uint64_t>(std::numeric_limits<uint64_t>::max() >> 2, 745 std::numeric_limits<size_t>::max()); 746 747void CBORTokenizer::ReadNextToken(bool enter_envelope) { 748 if (enter_envelope) { 749 status_.pos += kEncodedEnvelopeHeaderSize; 750 } else { 751 status_.pos = 752 status_.pos == Status::npos() ? 0 : status_.pos + token_byte_length_; 753 } 754 status_.error = Error::OK; 755 if (status_.pos >= bytes_.size()) { 756 token_tag_ = CBORTokenTag::DONE; 757 return; 758 } 759 const size_t remaining_bytes = bytes_.size() - status_.pos; 760 switch (bytes_[status_.pos]) { 761 case kStopByte: 762 SetToken(CBORTokenTag::STOP, 1); 763 return; 764 case kInitialByteIndefiniteLengthMap: 765 SetToken(CBORTokenTag::MAP_START, 1); 766 return; 767 case kInitialByteIndefiniteLengthArray: 768 SetToken(CBORTokenTag::ARRAY_START, 1); 769 return; 770 case kEncodedTrue: 771 SetToken(CBORTokenTag::TRUE_VALUE, 1); 772 return; 773 case kEncodedFalse: 774 SetToken(CBORTokenTag::FALSE_VALUE, 1); 775 return; 776 case kEncodedNull: 777 SetToken(CBORTokenTag::NULL_VALUE, 1); 778 return; 779 case kExpectedConversionToBase64Tag: { // BINARY 780 const int8_t bytes_read = internals::ReadTokenStart( 781 bytes_.subspan(status_.pos + 1), &token_start_type_, 782 &token_start_internal_value_); 783 if (bytes_read < 0 || token_start_type_ != MajorType::BYTE_STRING || 784 token_start_internal_value_ > kMaxValidLength) { 785 SetError(Error::CBOR_INVALID_BINARY); 786 return; 787 } 788 const uint64_t token_byte_length = token_start_internal_value_ + 789 /* tag before token start: */ 1 + 790 /* token start: */ bytes_read; 791 if (token_byte_length > remaining_bytes) { 792 SetError(Error::CBOR_INVALID_BINARY); 793 return; 794 } 795 SetToken(CBORTokenTag::BINARY, static_cast<size_t>(token_byte_length)); 796 return; 797 } 798 case kInitialByteForDouble: { // DOUBLE 799 if (kEncodedDoubleSize > remaining_bytes) { 800 SetError(Error::CBOR_INVALID_DOUBLE); 801 return; 802 } 803 SetToken(CBORTokenTag::DOUBLE, kEncodedDoubleSize); 804 return; 805 } 806 case kInitialByteForEnvelope: { // ENVELOPE 807 if (kEncodedEnvelopeHeaderSize > remaining_bytes) { 808 SetError(Error::CBOR_INVALID_ENVELOPE); 809 return; 810 } 811 // The envelope must be a byte string with 32 bit length. 812 if (bytes_[status_.pos + 1] != kInitialByteFor32BitLengthByteString) { 813 SetError(Error::CBOR_INVALID_ENVELOPE); 814 return; 815 } 816 // Read the length of the byte string. 817 token_start_internal_value_ = ReadBytesMostSignificantByteFirst<uint32_t>( 818 bytes_.subspan(status_.pos + 2)); 819 if (token_start_internal_value_ > kMaxValidLength) { 820 SetError(Error::CBOR_INVALID_ENVELOPE); 821 return; 822 } 823 uint64_t token_byte_length = 824 token_start_internal_value_ + kEncodedEnvelopeHeaderSize; 825 if (token_byte_length > remaining_bytes) { 826 SetError(Error::CBOR_INVALID_ENVELOPE); 827 return; 828 } 829 SetToken(CBORTokenTag::ENVELOPE, static_cast<size_t>(token_byte_length)); 830 return; 831 } 832 default: { 833 const int8_t token_start_length = internals::ReadTokenStart( 834 bytes_.subspan(status_.pos), &token_start_type_, 835 &token_start_internal_value_); 836 const bool success = token_start_length >= 0; 837 switch (token_start_type_) { 838 case MajorType::UNSIGNED: // INT32. 839 // INT32 is a signed int32 (int32 makes sense for the 840 // inspector_protocol, it's not a CBOR limitation), so we check 841 // against the signed max, so that the allowable values are 842 // 0, 1, 2, ... 2^31 - 1. 843 if (!success || std::numeric_limits<int32_t>::max() < 844 token_start_internal_value_) { 845 SetError(Error::CBOR_INVALID_INT32); 846 return; 847 } 848 SetToken(CBORTokenTag::INT32, token_start_length); 849 return; 850 case MajorType::NEGATIVE: { // INT32. 851 // INT32 is a signed int32 (int32 makes sense for the 852 // inspector_protocol, it's not a CBOR limitation); in CBOR, 853 // the negative values for INT32 are represented as NEGATIVE, 854 // that is, -1 INT32 is represented as 1 << 5 | 0 (major type 1, 855 // additional info value 0). So here, we compute the INT32 value 856 // and then check it against the INT32 min. 857 int64_t actual_value = 858 -static_cast<int64_t>(token_start_internal_value_) - 1; 859 if (!success || actual_value < std::numeric_limits<int32_t>::min()) { 860 SetError(Error::CBOR_INVALID_INT32); 861 return; 862 } 863 SetToken(CBORTokenTag::INT32, token_start_length); 864 return; 865 } 866 case MajorType::STRING: { // STRING8. 867 if (!success || token_start_internal_value_ > kMaxValidLength) { 868 SetError(Error::CBOR_INVALID_STRING8); 869 return; 870 } 871 uint64_t token_byte_length = 872 token_start_internal_value_ + token_start_length; 873 if (token_byte_length > remaining_bytes) { 874 SetError(Error::CBOR_INVALID_STRING8); 875 return; 876 } 877 SetToken(CBORTokenTag::STRING8, 878 static_cast<size_t>(token_byte_length)); 879 return; 880 } 881 case MajorType::BYTE_STRING: { // STRING16. 882 // Length must be divisible by 2 since UTF16 is 2 bytes per 883 // character, hence the &1 check. 884 if (!success || token_start_internal_value_ > kMaxValidLength || 885 token_start_internal_value_ & 1) { 886 SetError(Error::CBOR_INVALID_STRING16); 887 return; 888 } 889 uint64_t token_byte_length = 890 token_start_internal_value_ + token_start_length; 891 if (token_byte_length > remaining_bytes) { 892 SetError(Error::CBOR_INVALID_STRING16); 893 return; 894 } 895 SetToken(CBORTokenTag::STRING16, 896 static_cast<size_t>(token_byte_length)); 897 return; 898 } 899 case MajorType::ARRAY: 900 case MajorType::MAP: 901 case MajorType::TAG: 902 case MajorType::SIMPLE_VALUE: 903 SetError(Error::CBOR_UNSUPPORTED_VALUE); 904 return; 905 } 906 } 907 } 908} 909 910void CBORTokenizer::SetToken(CBORTokenTag token_tag, size_t token_byte_length) { 911 token_tag_ = token_tag; 912 token_byte_length_ = token_byte_length; 913} 914 915void CBORTokenizer::SetError(Error error) { 916 token_tag_ = CBORTokenTag::ERROR_VALUE; 917 status_.error = error; 918} 919 920// ============================================================================= 921// cbor::ParseCBOR - for receiving streaming parser events for CBOR messages 922// ============================================================================= 923 924namespace { 925// When parsing CBOR, we limit recursion depth for objects and arrays 926// to this constant. 927static constexpr int kStackLimit = 300; 928 929// Below are three parsing routines for CBOR, which cover enough 930// to roundtrip JSON messages. 931bool ParseMap(int32_t stack_depth, 932 CBORTokenizer* tokenizer, 933 StreamingParserHandler* out); 934bool ParseArray(int32_t stack_depth, 935 CBORTokenizer* tokenizer, 936 StreamingParserHandler* out); 937bool ParseValue(int32_t stack_depth, 938 CBORTokenizer* tokenizer, 939 StreamingParserHandler* out); 940 941void ParseUTF16String(CBORTokenizer* tokenizer, StreamingParserHandler* out) { 942 std::vector<uint16_t> value; 943 span<uint8_t> rep = tokenizer->GetString16WireRep(); 944 for (size_t ii = 0; ii < rep.size(); ii += 2) 945 value.push_back((rep[ii + 1] << 8) | rep[ii]); 946 out->HandleString16(span<uint16_t>(value.data(), value.size())); 947 tokenizer->Next(); 948} 949 950bool ParseUTF8String(CBORTokenizer* tokenizer, StreamingParserHandler* out) { 951 assert(tokenizer->TokenTag() == CBORTokenTag::STRING8); 952 out->HandleString8(tokenizer->GetString8()); 953 tokenizer->Next(); 954 return true; 955} 956 957bool ParseValue(int32_t stack_depth, 958 CBORTokenizer* tokenizer, 959 StreamingParserHandler* out) { 960 if (stack_depth > kStackLimit) { 961 out->HandleError( 962 Status{Error::CBOR_STACK_LIMIT_EXCEEDED, tokenizer->Status().pos}); 963 return false; 964 } 965 // Skip past the envelope to get to what's inside. 966 if (tokenizer->TokenTag() == CBORTokenTag::ENVELOPE) 967 tokenizer->EnterEnvelope(); 968 switch (tokenizer->TokenTag()) { 969 case CBORTokenTag::ERROR_VALUE: 970 out->HandleError(tokenizer->Status()); 971 return false; 972 case CBORTokenTag::DONE: 973 out->HandleError(Status{Error::CBOR_UNEXPECTED_EOF_EXPECTED_VALUE, 974 tokenizer->Status().pos}); 975 return false; 976 case CBORTokenTag::TRUE_VALUE: 977 out->HandleBool(true); 978 tokenizer->Next(); 979 return true; 980 case CBORTokenTag::FALSE_VALUE: 981 out->HandleBool(false); 982 tokenizer->Next(); 983 return true; 984 case CBORTokenTag::NULL_VALUE: 985 out->HandleNull(); 986 tokenizer->Next(); 987 return true; 988 case CBORTokenTag::INT32: 989 out->HandleInt32(tokenizer->GetInt32()); 990 tokenizer->Next(); 991 return true; 992 case CBORTokenTag::DOUBLE: 993 out->HandleDouble(tokenizer->GetDouble()); 994 tokenizer->Next(); 995 return true; 996 case CBORTokenTag::STRING8: 997 return ParseUTF8String(tokenizer, out); 998 case CBORTokenTag::STRING16: 999 ParseUTF16String(tokenizer, out); 1000 return true; 1001 case CBORTokenTag::BINARY: { 1002 out->HandleBinary(tokenizer->GetBinary()); 1003 tokenizer->Next(); 1004 return true; 1005 } 1006 case CBORTokenTag::MAP_START: 1007 return ParseMap(stack_depth + 1, tokenizer, out); 1008 case CBORTokenTag::ARRAY_START: 1009 return ParseArray(stack_depth + 1, tokenizer, out); 1010 default: 1011 out->HandleError( 1012 Status{Error::CBOR_UNSUPPORTED_VALUE, tokenizer->Status().pos}); 1013 return false; 1014 } 1015} 1016 1017// |bytes| must start with the indefinite length array byte, so basically, 1018// ParseArray may only be called after an indefinite length array has been 1019// detected. 1020bool ParseArray(int32_t stack_depth, 1021 CBORTokenizer* tokenizer, 1022 StreamingParserHandler* out) { 1023 assert(tokenizer->TokenTag() == CBORTokenTag::ARRAY_START); 1024 tokenizer->Next(); 1025 out->HandleArrayBegin(); 1026 while (tokenizer->TokenTag() != CBORTokenTag::STOP) { 1027 if (tokenizer->TokenTag() == CBORTokenTag::DONE) { 1028 out->HandleError( 1029 Status{Error::CBOR_UNEXPECTED_EOF_IN_ARRAY, tokenizer->Status().pos}); 1030 return false; 1031 } 1032 if (tokenizer->TokenTag() == CBORTokenTag::ERROR_VALUE) { 1033 out->HandleError(tokenizer->Status()); 1034 return false; 1035 } 1036 // Parse value. 1037 if (!ParseValue(stack_depth, tokenizer, out)) 1038 return false; 1039 } 1040 out->HandleArrayEnd(); 1041 tokenizer->Next(); 1042 return true; 1043} 1044 1045// |bytes| must start with the indefinite length array byte, so basically, 1046// ParseArray may only be called after an indefinite length array has been 1047// detected. 1048bool ParseMap(int32_t stack_depth, 1049 CBORTokenizer* tokenizer, 1050 StreamingParserHandler* out) { 1051 assert(tokenizer->TokenTag() == CBORTokenTag::MAP_START); 1052 out->HandleMapBegin(); 1053 tokenizer->Next(); 1054 while (tokenizer->TokenTag() != CBORTokenTag::STOP) { 1055 if (tokenizer->TokenTag() == CBORTokenTag::DONE) { 1056 out->HandleError( 1057 Status{Error::CBOR_UNEXPECTED_EOF_IN_MAP, tokenizer->Status().pos}); 1058 return false; 1059 } 1060 if (tokenizer->TokenTag() == CBORTokenTag::ERROR_VALUE) { 1061 out->HandleError(tokenizer->Status()); 1062 return false; 1063 } 1064 // Parse key. 1065 if (tokenizer->TokenTag() == CBORTokenTag::STRING8) { 1066 if (!ParseUTF8String(tokenizer, out)) 1067 return false; 1068 } else if (tokenizer->TokenTag() == CBORTokenTag::STRING16) { 1069 ParseUTF16String(tokenizer, out); 1070 } else { 1071 out->HandleError( 1072 Status{Error::CBOR_INVALID_MAP_KEY, tokenizer->Status().pos}); 1073 return false; 1074 } 1075 // Parse value. 1076 if (!ParseValue(stack_depth, tokenizer, out)) 1077 return false; 1078 } 1079 out->HandleMapEnd(); 1080 tokenizer->Next(); 1081 return true; 1082} 1083} // namespace 1084 1085void ParseCBOR(span<uint8_t> bytes, StreamingParserHandler* out) { 1086 if (bytes.empty()) { 1087 out->HandleError(Status{Error::CBOR_NO_INPUT, 0}); 1088 return; 1089 } 1090 if (bytes[0] != kInitialByteForEnvelope) { 1091 out->HandleError(Status{Error::CBOR_INVALID_START_BYTE, 0}); 1092 return; 1093 } 1094 CBORTokenizer tokenizer(bytes); 1095 if (tokenizer.TokenTag() == CBORTokenTag::ERROR_VALUE) { 1096 out->HandleError(tokenizer.Status()); 1097 return; 1098 } 1099 // We checked for the envelope start byte above, so the tokenizer 1100 // must agree here, since it's not an error. 1101 assert(tokenizer.TokenTag() == CBORTokenTag::ENVELOPE); 1102 tokenizer.EnterEnvelope(); 1103 if (tokenizer.TokenTag() != CBORTokenTag::MAP_START) { 1104 out->HandleError( 1105 Status{Error::CBOR_MAP_START_EXPECTED, tokenizer.Status().pos}); 1106 return; 1107 } 1108 if (!ParseMap(/*stack_depth=*/1, &tokenizer, out)) 1109 return; 1110 if (tokenizer.TokenTag() == CBORTokenTag::DONE) 1111 return; 1112 if (tokenizer.TokenTag() == CBORTokenTag::ERROR_VALUE) { 1113 out->HandleError(tokenizer.Status()); 1114 return; 1115 } 1116 out->HandleError(Status{Error::CBOR_TRAILING_JUNK, tokenizer.Status().pos}); 1117} 1118 1119// ============================================================================= 1120// cbor::AppendString8EntryToMap - for limited in-place editing of messages 1121// ============================================================================= 1122 1123template <typename C> 1124Status AppendString8EntryToCBORMapTmpl(span<uint8_t> string8_key, 1125 span<uint8_t> string8_value, 1126 C* cbor) { 1127 // Careful below: Don't compare (*cbor)[idx] with a uint8_t, since 1128 // it could be a char (signed!). Instead, use bytes. 1129 span<uint8_t> bytes(reinterpret_cast<const uint8_t*>(cbor->data()), 1130 cbor->size()); 1131 CBORTokenizer tokenizer(bytes); 1132 if (tokenizer.TokenTag() == CBORTokenTag::ERROR_VALUE) 1133 return tokenizer.Status(); 1134 if (tokenizer.TokenTag() != CBORTokenTag::ENVELOPE) 1135 return Status(Error::CBOR_INVALID_ENVELOPE, 0); 1136 size_t envelope_size = tokenizer.GetEnvelopeContents().size(); 1137 size_t old_size = cbor->size(); 1138 if (old_size != envelope_size + kEncodedEnvelopeHeaderSize) 1139 return Status(Error::CBOR_INVALID_ENVELOPE, 0); 1140 if (envelope_size == 0 || 1141 (tokenizer.GetEnvelopeContents()[0] != EncodeIndefiniteLengthMapStart())) 1142 return Status(Error::CBOR_MAP_START_EXPECTED, kEncodedEnvelopeHeaderSize); 1143 if (bytes[bytes.size() - 1] != EncodeStop()) 1144 return Status(Error::CBOR_MAP_STOP_EXPECTED, cbor->size() - 1); 1145 cbor->pop_back(); 1146 EncodeString8(string8_key, cbor); 1147 EncodeString8(string8_value, cbor); 1148 cbor->push_back(EncodeStop()); 1149 size_t new_envelope_size = envelope_size + (cbor->size() - old_size); 1150 if (new_envelope_size > std::numeric_limits<uint32_t>::max()) 1151 return Status(Error::CBOR_ENVELOPE_SIZE_LIMIT_EXCEEDED, 0); 1152 size_t size_pos = cbor->size() - new_envelope_size - sizeof(uint32_t); 1153 uint8_t* out = reinterpret_cast<uint8_t*>(&cbor->at(size_pos)); 1154 *(out++) = (new_envelope_size >> 24) & 0xff; 1155 *(out++) = (new_envelope_size >> 16) & 0xff; 1156 *(out++) = (new_envelope_size >> 8) & 0xff; 1157 *(out) = new_envelope_size & 0xff; 1158 return Status(); 1159} 1160Status AppendString8EntryToCBORMap(span<uint8_t> string8_key, 1161 span<uint8_t> string8_value, 1162 std::vector<uint8_t>* cbor) { 1163 return AppendString8EntryToCBORMapTmpl(string8_key, string8_value, cbor); 1164} 1165Status AppendString8EntryToCBORMap(span<uint8_t> string8_key, 1166 span<uint8_t> string8_value, 1167 std::string* cbor) { 1168 return AppendString8EntryToCBORMapTmpl(string8_key, string8_value, cbor); 1169} 1170} // namespace cbor 1171 1172namespace json { 1173 1174// ============================================================================= 1175// json::NewJSONEncoder - for encoding streaming parser events as JSON 1176// ============================================================================= 1177 1178namespace { 1179// Prints |value| to |out| with 4 hex digits, most significant chunk first. 1180template <typename C> 1181void PrintHex(uint16_t value, C* out) { 1182 for (int ii = 3; ii >= 0; --ii) { 1183 int four_bits = 0xf & (value >> (4 * ii)); 1184 out->push_back(four_bits + ((four_bits <= 9) ? '0' : ('a' - 10))); 1185 } 1186} 1187 1188// In the writer below, we maintain a stack of State instances. 1189// It is just enough to emit the appropriate delimiters and brackets 1190// in JSON. 1191enum class Container { 1192 // Used for the top-level, initial state. 1193 NONE, 1194 // Inside a JSON object. 1195 MAP, 1196 // Inside a JSON array. 1197 ARRAY 1198}; 1199class State { 1200 public: 1201 explicit State(Container container) : container_(container) {} 1202 void StartElement(std::vector<uint8_t>* out) { StartElementTmpl(out); } 1203 void StartElement(std::string* out) { StartElementTmpl(out); } 1204 Container container() const { return container_; } 1205 1206 private: 1207 template <typename C> 1208 void StartElementTmpl(C* out) { 1209 assert(container_ != Container::NONE || size_ == 0); 1210 if (size_ != 0) { 1211 char delim = (!(size_ & 1) || container_ == Container::ARRAY) ? ',' : ':'; 1212 out->push_back(delim); 1213 } 1214 ++size_; 1215 } 1216 1217 Container container_ = Container::NONE; 1218 int size_ = 0; 1219}; 1220 1221constexpr char kBase64Table[] = 1222 "ABCDEFGHIJKLMNOPQRSTUVWXYZ" 1223 "abcdefghijklmnopqrstuvwxyz0123456789+/"; 1224 1225template <typename C> 1226void Base64Encode(const span<uint8_t>& in, C* out) { 1227 // The following three cases are based on the tables in the example 1228 // section in https://en.wikipedia.org/wiki/Base64. We process three 1229 // input bytes at a time, emitting 4 output bytes at a time. 1230 size_t ii = 0; 1231 1232 // While possible, process three input bytes. 1233 for (; ii + 3 <= in.size(); ii += 3) { 1234 uint32_t twentyfour_bits = (in[ii] << 16) | (in[ii + 1] << 8) | in[ii + 2]; 1235 out->push_back(kBase64Table[(twentyfour_bits >> 18)]); 1236 out->push_back(kBase64Table[(twentyfour_bits >> 12) & 0x3f]); 1237 out->push_back(kBase64Table[(twentyfour_bits >> 6) & 0x3f]); 1238 out->push_back(kBase64Table[twentyfour_bits & 0x3f]); 1239 } 1240 if (ii + 2 <= in.size()) { // Process two input bytes. 1241 uint32_t twentyfour_bits = (in[ii] << 16) | (in[ii + 1] << 8); 1242 out->push_back(kBase64Table[(twentyfour_bits >> 18)]); 1243 out->push_back(kBase64Table[(twentyfour_bits >> 12) & 0x3f]); 1244 out->push_back(kBase64Table[(twentyfour_bits >> 6) & 0x3f]); 1245 out->push_back('='); // Emit padding. 1246 return; 1247 } 1248 if (ii + 1 <= in.size()) { // Process a single input byte. 1249 uint32_t twentyfour_bits = (in[ii] << 16); 1250 out->push_back(kBase64Table[(twentyfour_bits >> 18)]); 1251 out->push_back(kBase64Table[(twentyfour_bits >> 12) & 0x3f]); 1252 out->push_back('='); // Emit padding. 1253 out->push_back('='); // Emit padding. 1254 } 1255} 1256 1257// Implements a handler for JSON parser events to emit a JSON string. 1258template <typename C> 1259class JSONEncoder : public StreamingParserHandler { 1260 public: 1261 JSONEncoder(const Platform* platform, C* out, Status* status) 1262 : platform_(platform), out_(out), status_(status) { 1263 *status_ = Status(); 1264 state_.emplace(Container::NONE); 1265 } 1266 1267 void HandleMapBegin() override { 1268 if (!status_->ok()) 1269 return; 1270 assert(!state_.empty()); 1271 state_.top().StartElement(out_); 1272 state_.emplace(Container::MAP); 1273 Emit('{'); 1274 } 1275 1276 void HandleMapEnd() override { 1277 if (!status_->ok()) 1278 return; 1279 assert(state_.size() >= 2 && state_.top().container() == Container::MAP); 1280 state_.pop(); 1281 Emit('}'); 1282 } 1283 1284 void HandleArrayBegin() override { 1285 if (!status_->ok()) 1286 return; 1287 state_.top().StartElement(out_); 1288 state_.emplace(Container::ARRAY); 1289 Emit('['); 1290 } 1291 1292 void HandleArrayEnd() override { 1293 if (!status_->ok()) 1294 return; 1295 assert(state_.size() >= 2 && state_.top().container() == Container::ARRAY); 1296 state_.pop(); 1297 Emit(']'); 1298 } 1299 1300 void HandleString16(span<uint16_t> chars) override { 1301 if (!status_->ok()) 1302 return; 1303 state_.top().StartElement(out_); 1304 Emit('"'); 1305 for (const uint16_t ch : chars) { 1306 if (ch == '"') { 1307 Emit("\\\""); 1308 } else if (ch == '\\') { 1309 Emit("\\\\"); 1310 } else if (ch == '\b') { 1311 Emit("\\b"); 1312 } else if (ch == '\f') { 1313 Emit("\\f"); 1314 } else if (ch == '\n') { 1315 Emit("\\n"); 1316 } else if (ch == '\r') { 1317 Emit("\\r"); 1318 } else if (ch == '\t') { 1319 Emit("\\t"); 1320 } else if (ch >= 32 && ch <= 126) { 1321 Emit(ch); 1322 } else { 1323 Emit("\\u"); 1324 PrintHex(ch, out_); 1325 } 1326 } 1327 Emit('"'); 1328 } 1329 1330 void HandleString8(span<uint8_t> chars) override { 1331 if (!status_->ok()) 1332 return; 1333 state_.top().StartElement(out_); 1334 Emit('"'); 1335 for (size_t ii = 0; ii < chars.size(); ++ii) { 1336 uint8_t c = chars[ii]; 1337 if (c == '"') { 1338 Emit("\\\""); 1339 } else if (c == '\\') { 1340 Emit("\\\\"); 1341 } else if (c == '\b') { 1342 Emit("\\b"); 1343 } else if (c == '\f') { 1344 Emit("\\f"); 1345 } else if (c == '\n') { 1346 Emit("\\n"); 1347 } else if (c == '\r') { 1348 Emit("\\r"); 1349 } else if (c == '\t') { 1350 Emit("\\t"); 1351 } else if (c >= 32 && c <= 126) { 1352 Emit(c); 1353 } else if (c < 32) { 1354 Emit("\\u"); 1355 PrintHex(static_cast<uint16_t>(c), out_); 1356 } else { 1357 // Inspect the leading byte to figure out how long the utf8 1358 // byte sequence is; while doing this initialize |codepoint| 1359 // with the first few bits. 1360 // See table in: https://en.wikipedia.org/wiki/UTF-8 1361 // byte one is 110x xxxx -> 2 byte utf8 sequence 1362 // byte one is 1110 xxxx -> 3 byte utf8 sequence 1363 // byte one is 1111 0xxx -> 4 byte utf8 sequence 1364 uint32_t codepoint; 1365 int num_bytes_left; 1366 if ((c & 0xe0) == 0xc0) { // 2 byte utf8 sequence 1367 num_bytes_left = 1; 1368 codepoint = c & 0x1f; 1369 } else if ((c & 0xf0) == 0xe0) { // 3 byte utf8 sequence 1370 num_bytes_left = 2; 1371 codepoint = c & 0x0f; 1372 } else if ((c & 0xf8) == 0xf0) { // 4 byte utf8 sequence 1373 codepoint = c & 0x07; 1374 num_bytes_left = 3; 1375 } else { 1376 continue; // invalid leading byte 1377 } 1378 1379 // If we have enough bytes in our input, decode the remaining ones 1380 // belonging to this Unicode character into |codepoint|. 1381 if (ii + num_bytes_left > chars.size()) 1382 continue; 1383 while (num_bytes_left > 0) { 1384 c = chars[++ii]; 1385 --num_bytes_left; 1386 // Check the next byte is a continuation byte, that is 10xx xxxx. 1387 if ((c & 0xc0) != 0x80) 1388 continue; 1389 codepoint = (codepoint << 6) | (c & 0x3f); 1390 } 1391 1392 // Disallow overlong encodings for ascii characters, as these 1393 // would include " and other characters significant to JSON 1394 // string termination / control. 1395 if (codepoint < 0x7f) 1396 continue; 1397 // Invalid in UTF8, and can't be represented in UTF16 anyway. 1398 if (codepoint > 0x10ffff) 1399 continue; 1400 1401 // So, now we transcode to UTF16, 1402 // using the math described at https://en.wikipedia.org/wiki/UTF-16, 1403 // for either one or two 16 bit characters. 1404 if (codepoint < 0xffff) { 1405 Emit("\\u"); 1406 PrintHex(static_cast<uint16_t>(codepoint), out_); 1407 continue; 1408 } 1409 codepoint -= 0x10000; 1410 // high surrogate 1411 Emit("\\u"); 1412 PrintHex(static_cast<uint16_t>((codepoint >> 10) + 0xd800), out_); 1413 // low surrogate 1414 Emit("\\u"); 1415 PrintHex(static_cast<uint16_t>((codepoint & 0x3ff) + 0xdc00), out_); 1416 } 1417 } 1418 Emit('"'); 1419 } 1420 1421 void HandleBinary(span<uint8_t> bytes) override { 1422 if (!status_->ok()) 1423 return; 1424 state_.top().StartElement(out_); 1425 Emit('"'); 1426 Base64Encode(bytes, out_); 1427 Emit('"'); 1428 } 1429 1430 void HandleDouble(double value) override { 1431 if (!status_->ok()) 1432 return; 1433 state_.top().StartElement(out_); 1434 // JSON cannot represent NaN or Infinity. So, for compatibility, 1435 // we behave like the JSON object in web browsers: emit 'null'. 1436 if (!std::isfinite(value)) { 1437 Emit("null"); 1438 return; 1439 } 1440 std::unique_ptr<char[]> str_value = platform_->DToStr(value); 1441 1442 // DToStr may fail to emit a 0 before the decimal dot. E.g. this is 1443 // the case in base::NumberToString in Chromium (which is based on 1444 // dmg_fp). So, much like 1445 // https://cs.chromium.org/chromium/src/base/json/json_writer.cc 1446 // we probe for this and emit the leading 0 anyway if necessary. 1447 const char* chars = str_value.get(); 1448 if (chars[0] == '.') { 1449 Emit('0'); 1450 } else if (chars[0] == '-' && chars[1] == '.') { 1451 Emit("-0"); 1452 ++chars; 1453 } 1454 Emit(chars); 1455 } 1456 1457 void HandleInt32(int32_t value) override { 1458 if (!status_->ok()) 1459 return; 1460 state_.top().StartElement(out_); 1461 Emit(std::to_string(value)); 1462 } 1463 1464 void HandleBool(bool value) override { 1465 if (!status_->ok()) 1466 return; 1467 state_.top().StartElement(out_); 1468 Emit(value ? "true" : "false"); 1469 } 1470 1471 void HandleNull() override { 1472 if (!status_->ok()) 1473 return; 1474 state_.top().StartElement(out_); 1475 Emit("null"); 1476 } 1477 1478 void HandleError(Status error) override { 1479 assert(!error.ok()); 1480 *status_ = error; 1481 out_->clear(); 1482 } 1483 1484 private: 1485 void Emit(char c) { out_->push_back(c); } 1486 void Emit(const char* str) { 1487 out_->insert(out_->end(), str, str + strlen(str)); 1488 } 1489 void Emit(const std::string& str) { 1490 out_->insert(out_->end(), str.begin(), str.end()); 1491 } 1492 1493 const Platform* platform_; 1494 C* out_; 1495 Status* status_; 1496 std::stack<State> state_; 1497}; 1498} // namespace 1499 1500std::unique_ptr<StreamingParserHandler> NewJSONEncoder( 1501 const Platform* platform, 1502 std::vector<uint8_t>* out, 1503 Status* status) { 1504 return std::unique_ptr<StreamingParserHandler>( 1505 new JSONEncoder<std::vector<uint8_t>>(platform, out, status)); 1506} 1507std::unique_ptr<StreamingParserHandler> NewJSONEncoder(const Platform* platform, 1508 std::string* out, 1509 Status* status) { 1510 return std::unique_ptr<StreamingParserHandler>( 1511 new JSONEncoder<std::string>(platform, out, status)); 1512} 1513 1514// ============================================================================= 1515// json::ParseJSON - for receiving streaming parser events for JSON. 1516// ============================================================================= 1517 1518namespace { 1519const int kStackLimit = 300; 1520 1521enum Token { 1522 ObjectBegin, 1523 ObjectEnd, 1524 ArrayBegin, 1525 ArrayEnd, 1526 StringLiteral, 1527 Number, 1528 BoolTrue, 1529 BoolFalse, 1530 NullToken, 1531 ListSeparator, 1532 ObjectPairSeparator, 1533 InvalidToken, 1534 NoInput 1535}; 1536 1537const char* const kNullString = "null"; 1538const char* const kTrueString = "true"; 1539const char* const kFalseString = "false"; 1540 1541template <typename Char> 1542class JsonParser { 1543 public: 1544 JsonParser(const Platform* platform, StreamingParserHandler* handler) 1545 : platform_(platform), handler_(handler) {} 1546 1547 void Parse(const Char* start, size_t length) { 1548 start_pos_ = start; 1549 const Char* end = start + length; 1550 const Char* tokenEnd = nullptr; 1551 ParseValue(start, end, &tokenEnd, 0); 1552 if (error_) 1553 return; 1554 if (tokenEnd != end) { 1555 HandleError(Error::JSON_PARSER_UNPROCESSED_INPUT_REMAINS, tokenEnd); 1556 } 1557 } 1558 1559 private: 1560 bool CharsToDouble(const uint16_t* chars, size_t length, double* result) { 1561 std::string buffer; 1562 buffer.reserve(length + 1); 1563 for (size_t ii = 0; ii < length; ++ii) { 1564 bool is_ascii = !(chars[ii] & ~0x7F); 1565 if (!is_ascii) 1566 return false; 1567 buffer.push_back(static_cast<char>(chars[ii])); 1568 } 1569 return platform_->StrToD(buffer.c_str(), result); 1570 } 1571 1572 bool CharsToDouble(const uint8_t* chars, size_t length, double* result) { 1573 std::string buffer(reinterpret_cast<const char*>(chars), length); 1574 return platform_->StrToD(buffer.c_str(), result); 1575 } 1576 1577 static bool ParseConstToken(const Char* start, 1578 const Char* end, 1579 const Char** token_end, 1580 const char* token) { 1581 // |token| is \0 terminated, it's one of the constants at top of the file. 1582 while (start < end && *token != '\0' && *start++ == *token++) { 1583 } 1584 if (*token != '\0') 1585 return false; 1586 *token_end = start; 1587 return true; 1588 } 1589 1590 static bool ReadInt(const Char* start, 1591 const Char* end, 1592 const Char** token_end, 1593 bool allow_leading_zeros) { 1594 if (start == end) 1595 return false; 1596 bool has_leading_zero = '0' == *start; 1597 int length = 0; 1598 while (start < end && '0' <= *start && *start <= '9') { 1599 ++start; 1600 ++length; 1601 } 1602 if (!length) 1603 return false; 1604 if (!allow_leading_zeros && length > 1 && has_leading_zero) 1605 return false; 1606 *token_end = start; 1607 return true; 1608 } 1609 1610 static bool ParseNumberToken(const Char* start, 1611 const Char* end, 1612 const Char** token_end) { 1613 // We just grab the number here. We validate the size in DecodeNumber. 1614 // According to RFC4627, a valid number is: [minus] int [frac] [exp] 1615 if (start == end) 1616 return false; 1617 Char c = *start; 1618 if ('-' == c) 1619 ++start; 1620 1621 if (!ReadInt(start, end, &start, /*allow_leading_zeros=*/false)) 1622 return false; 1623 if (start == end) { 1624 *token_end = start; 1625 return true; 1626 } 1627 1628 // Optional fraction part 1629 c = *start; 1630 if ('.' == c) { 1631 ++start; 1632 if (!ReadInt(start, end, &start, /*allow_leading_zeros=*/true)) 1633 return false; 1634 if (start == end) { 1635 *token_end = start; 1636 return true; 1637 } 1638 c = *start; 1639 } 1640 1641 // Optional exponent part 1642 if ('e' == c || 'E' == c) { 1643 ++start; 1644 if (start == end) 1645 return false; 1646 c = *start; 1647 if ('-' == c || '+' == c) { 1648 ++start; 1649 if (start == end) 1650 return false; 1651 } 1652 if (!ReadInt(start, end, &start, /*allow_leading_zeros=*/true)) 1653 return false; 1654 } 1655 1656 *token_end = start; 1657 return true; 1658 } 1659 1660 static bool ReadHexDigits(const Char* start, 1661 const Char* end, 1662 const Char** token_end, 1663 int digits) { 1664 if (end - start < digits) 1665 return false; 1666 for (int i = 0; i < digits; ++i) { 1667 Char c = *start++; 1668 if (!(('0' <= c && c <= '9') || ('a' <= c && c <= 'f') || 1669 ('A' <= c && c <= 'F'))) 1670 return false; 1671 } 1672 *token_end = start; 1673 return true; 1674 } 1675 1676 static bool ParseStringToken(const Char* start, 1677 const Char* end, 1678 const Char** token_end) { 1679 while (start < end) { 1680 Char c = *start++; 1681 if ('\\' == c) { 1682 if (start == end) 1683 return false; 1684 c = *start++; 1685 // Make sure the escaped char is valid. 1686 switch (c) { 1687 case 'x': 1688 if (!ReadHexDigits(start, end, &start, 2)) 1689 return false; 1690 break; 1691 case 'u': 1692 if (!ReadHexDigits(start, end, &start, 4)) 1693 return false; 1694 break; 1695 case '\\': 1696 case '/': 1697 case 'b': 1698 case 'f': 1699 case 'n': 1700 case 'r': 1701 case 't': 1702 case 'v': 1703 case '"': 1704 break; 1705 default: 1706 return false; 1707 } 1708 } else if ('"' == c) { 1709 *token_end = start; 1710 return true; 1711 } 1712 } 1713 return false; 1714 } 1715 1716 static bool SkipComment(const Char* start, 1717 const Char* end, 1718 const Char** comment_end) { 1719 if (start == end) 1720 return false; 1721 1722 if (*start != '/' || start + 1 >= end) 1723 return false; 1724 ++start; 1725 1726 if (*start == '/') { 1727 // Single line comment, read to newline. 1728 for (++start; start < end; ++start) { 1729 if (*start == '\n' || *start == '\r') { 1730 *comment_end = start + 1; 1731 return true; 1732 } 1733 } 1734 *comment_end = end; 1735 // Comment reaches end-of-input, which is fine. 1736 return true; 1737 } 1738 1739 if (*start == '*') { 1740 Char previous = '\0'; 1741 // Block comment, read until end marker. 1742 for (++start; start < end; previous = *start++) { 1743 if (previous == '*' && *start == '/') { 1744 *comment_end = start + 1; 1745 return true; 1746 } 1747 } 1748 // Block comment must close before end-of-input. 1749 return false; 1750 } 1751 1752 return false; 1753 } 1754 1755 static bool IsSpaceOrNewLine(Char c) { 1756 // \v = vertial tab; \f = form feed page break. 1757 return c == ' ' || c == '\n' || c == '\v' || c == '\f' || c == '\r' || 1758 c == '\t'; 1759 } 1760 1761 static void SkipWhitespaceAndComments(const Char* start, 1762 const Char* end, 1763 const Char** whitespace_end) { 1764 while (start < end) { 1765 if (IsSpaceOrNewLine(*start)) { 1766 ++start; 1767 } else if (*start == '/') { 1768 const Char* comment_end = nullptr; 1769 if (!SkipComment(start, end, &comment_end)) 1770 break; 1771 start = comment_end; 1772 } else { 1773 break; 1774 } 1775 } 1776 *whitespace_end = start; 1777 } 1778 1779 static Token ParseToken(const Char* start, 1780 const Char* end, 1781 const Char** tokenStart, 1782 const Char** token_end) { 1783 SkipWhitespaceAndComments(start, end, tokenStart); 1784 start = *tokenStart; 1785 1786 if (start == end) 1787 return NoInput; 1788 1789 switch (*start) { 1790 case 'n': 1791 if (ParseConstToken(start, end, token_end, kNullString)) 1792 return NullToken; 1793 break; 1794 case 't': 1795 if (ParseConstToken(start, end, token_end, kTrueString)) 1796 return BoolTrue; 1797 break; 1798 case 'f': 1799 if (ParseConstToken(start, end, token_end, kFalseString)) 1800 return BoolFalse; 1801 break; 1802 case '[': 1803 *token_end = start + 1; 1804 return ArrayBegin; 1805 case ']': 1806 *token_end = start + 1; 1807 return ArrayEnd; 1808 case ',': 1809 *token_end = start + 1; 1810 return ListSeparator; 1811 case '{': 1812 *token_end = start + 1; 1813 return ObjectBegin; 1814 case '}': 1815 *token_end = start + 1; 1816 return ObjectEnd; 1817 case ':': 1818 *token_end = start + 1; 1819 return ObjectPairSeparator; 1820 case '0': 1821 case '1': 1822 case '2': 1823 case '3': 1824 case '4': 1825 case '5': 1826 case '6': 1827 case '7': 1828 case '8': 1829 case '9': 1830 case '-': 1831 if (ParseNumberToken(start, end, token_end)) 1832 return Number; 1833 break; 1834 case '"': 1835 if (ParseStringToken(start + 1, end, token_end)) 1836 return StringLiteral; 1837 break; 1838 } 1839 return InvalidToken; 1840 } 1841 1842 static int HexToInt(Char c) { 1843 if ('0' <= c && c <= '9') 1844 return c - '0'; 1845 if ('A' <= c && c <= 'F') 1846 return c - 'A' + 10; 1847 if ('a' <= c && c <= 'f') 1848 return c - 'a' + 10; 1849 assert(false); // Unreachable. 1850 return 0; 1851 } 1852 1853 static bool DecodeString(const Char* start, 1854 const Char* end, 1855 std::vector<uint16_t>* output) { 1856 if (start == end) 1857 return true; 1858 if (start > end) 1859 return false; 1860 output->reserve(end - start); 1861 while (start < end) { 1862 uint16_t c = *start++; 1863 // If the |Char| we're dealing with is really a byte, then 1864 // we have utf8 here, and we need to check for multibyte characters 1865 // and transcode them to utf16 (either one or two utf16 chars). 1866 if (sizeof(Char) == sizeof(uint8_t) && c >= 0x7f) { 1867 // Inspect the leading byte to figure out how long the utf8 1868 // byte sequence is; while doing this initialize |codepoint| 1869 // with the first few bits. 1870 // See table in: https://en.wikipedia.org/wiki/UTF-8 1871 // byte one is 110x xxxx -> 2 byte utf8 sequence 1872 // byte one is 1110 xxxx -> 3 byte utf8 sequence 1873 // byte one is 1111 0xxx -> 4 byte utf8 sequence 1874 uint32_t codepoint; 1875 int num_bytes_left; 1876 if ((c & 0xe0) == 0xc0) { // 2 byte utf8 sequence 1877 num_bytes_left = 1; 1878 codepoint = c & 0x1f; 1879 } else if ((c & 0xf0) == 0xe0) { // 3 byte utf8 sequence 1880 num_bytes_left = 2; 1881 codepoint = c & 0x0f; 1882 } else if ((c & 0xf8) == 0xf0) { // 4 byte utf8 sequence 1883 codepoint = c & 0x07; 1884 num_bytes_left = 3; 1885 } else { 1886 return false; // invalid leading byte 1887 } 1888 1889 // If we have enough bytes in our inpput, decode the remaining ones 1890 // belonging to this Unicode character into |codepoint|. 1891 if (start + num_bytes_left > end) 1892 return false; 1893 while (num_bytes_left > 0) { 1894 c = *start++; 1895 --num_bytes_left; 1896 // Check the next byte is a continuation byte, that is 10xx xxxx. 1897 if ((c & 0xc0) != 0x80) 1898 return false; 1899 codepoint = (codepoint << 6) | (c & 0x3f); 1900 } 1901 1902 // Disallow overlong encodings for ascii characters, as these 1903 // would include " and other characters significant to JSON 1904 // string termination / control. 1905 if (codepoint < 0x7f) 1906 return false; 1907 // Invalid in UTF8, and can't be represented in UTF16 anyway. 1908 if (codepoint > 0x10ffff) 1909 return false; 1910 1911 // So, now we transcode to UTF16, 1912 // using the math described at https://en.wikipedia.org/wiki/UTF-16, 1913 // for either one or two 16 bit characters. 1914 if (codepoint < 0xffff) { 1915 output->push_back(codepoint); 1916 continue; 1917 } 1918 codepoint -= 0x10000; 1919 output->push_back((codepoint >> 10) + 0xd800); // high surrogate 1920 output->push_back((codepoint & 0x3ff) + 0xdc00); // low surrogate 1921 continue; 1922 } 1923 if ('\\' != c) { 1924 output->push_back(c); 1925 continue; 1926 } 1927 if (start == end) 1928 return false; 1929 c = *start++; 1930 1931 if (c == 'x') { 1932 // \x is not supported. 1933 return false; 1934 } 1935 1936 switch (c) { 1937 case '"': 1938 case '/': 1939 case '\\': 1940 break; 1941 case 'b': 1942 c = '\b'; 1943 break; 1944 case 'f': 1945 c = '\f'; 1946 break; 1947 case 'n': 1948 c = '\n'; 1949 break; 1950 case 'r': 1951 c = '\r'; 1952 break; 1953 case 't': 1954 c = '\t'; 1955 break; 1956 case 'v': 1957 c = '\v'; 1958 break; 1959 case 'u': 1960 c = (HexToInt(*start) << 12) + (HexToInt(*(start + 1)) << 8) + 1961 (HexToInt(*(start + 2)) << 4) + HexToInt(*(start + 3)); 1962 start += 4; 1963 break; 1964 default: 1965 return false; 1966 } 1967 output->push_back(c); 1968 } 1969 return true; 1970 } 1971 1972 void ParseValue(const Char* start, 1973 const Char* end, 1974 const Char** value_token_end, 1975 int depth) { 1976 if (depth > kStackLimit) { 1977 HandleError(Error::JSON_PARSER_STACK_LIMIT_EXCEEDED, start); 1978 return; 1979 } 1980 const Char* token_start = nullptr; 1981 const Char* token_end = nullptr; 1982 Token token = ParseToken(start, end, &token_start, &token_end); 1983 switch (token) { 1984 case NoInput: 1985 HandleError(Error::JSON_PARSER_NO_INPUT, token_start); 1986 return; 1987 case InvalidToken: 1988 HandleError(Error::JSON_PARSER_INVALID_TOKEN, token_start); 1989 return; 1990 case NullToken: 1991 handler_->HandleNull(); 1992 break; 1993 case BoolTrue: 1994 handler_->HandleBool(true); 1995 break; 1996 case BoolFalse: 1997 handler_->HandleBool(false); 1998 break; 1999 case Number: { 2000 double value; 2001 if (!CharsToDouble(token_start, token_end - token_start, &value)) { 2002 HandleError(Error::JSON_PARSER_INVALID_NUMBER, token_start); 2003 return; 2004 } 2005 if (value >= std::numeric_limits<int32_t>::min() && 2006 value <= std::numeric_limits<int32_t>::max() && 2007 static_cast<int32_t>(value) == value) 2008 handler_->HandleInt32(static_cast<int32_t>(value)); 2009 else 2010 handler_->HandleDouble(value); 2011 break; 2012 } 2013 case StringLiteral: { 2014 std::vector<uint16_t> value; 2015 bool ok = DecodeString(token_start + 1, token_end - 1, &value); 2016 if (!ok) { 2017 HandleError(Error::JSON_PARSER_INVALID_STRING, token_start); 2018 return; 2019 } 2020 handler_->HandleString16(span<uint16_t>(value.data(), value.size())); 2021 break; 2022 } 2023 case ArrayBegin: { 2024 handler_->HandleArrayBegin(); 2025 start = token_end; 2026 token = ParseToken(start, end, &token_start, &token_end); 2027 while (token != ArrayEnd) { 2028 ParseValue(start, end, &token_end, depth + 1); 2029 if (error_) 2030 return; 2031 2032 // After a list value, we expect a comma or the end of the list. 2033 start = token_end; 2034 token = ParseToken(start, end, &token_start, &token_end); 2035 if (token == ListSeparator) { 2036 start = token_end; 2037 token = ParseToken(start, end, &token_start, &token_end); 2038 if (token == ArrayEnd) { 2039 HandleError(Error::JSON_PARSER_UNEXPECTED_ARRAY_END, token_start); 2040 return; 2041 } 2042 } else if (token != ArrayEnd) { 2043 // Unexpected value after list value. Bail out. 2044 HandleError(Error::JSON_PARSER_COMMA_OR_ARRAY_END_EXPECTED, 2045 token_start); 2046 return; 2047 } 2048 } 2049 handler_->HandleArrayEnd(); 2050 break; 2051 } 2052 case ObjectBegin: { 2053 handler_->HandleMapBegin(); 2054 start = token_end; 2055 token = ParseToken(start, end, &token_start, &token_end); 2056 while (token != ObjectEnd) { 2057 if (token != StringLiteral) { 2058 HandleError(Error::JSON_PARSER_STRING_LITERAL_EXPECTED, 2059 token_start); 2060 return; 2061 } 2062 std::vector<uint16_t> key; 2063 if (!DecodeString(token_start + 1, token_end - 1, &key)) { 2064 HandleError(Error::JSON_PARSER_INVALID_STRING, token_start); 2065 return; 2066 } 2067 handler_->HandleString16(span<uint16_t>(key.data(), key.size())); 2068 start = token_end; 2069 2070 token = ParseToken(start, end, &token_start, &token_end); 2071 if (token != ObjectPairSeparator) { 2072 HandleError(Error::JSON_PARSER_COLON_EXPECTED, token_start); 2073 return; 2074 } 2075 start = token_end; 2076 2077 ParseValue(start, end, &token_end, depth + 1); 2078 if (error_) 2079 return; 2080 start = token_end; 2081 2082 // After a key/value pair, we expect a comma or the end of the 2083 // object. 2084 token = ParseToken(start, end, &token_start, &token_end); 2085 if (token == ListSeparator) { 2086 start = token_end; 2087 token = ParseToken(start, end, &token_start, &token_end); 2088 if (token == ObjectEnd) { 2089 HandleError(Error::JSON_PARSER_UNEXPECTED_MAP_END, token_start); 2090 return; 2091 } 2092 } else if (token != ObjectEnd) { 2093 // Unexpected value after last object value. Bail out. 2094 HandleError(Error::JSON_PARSER_COMMA_OR_MAP_END_EXPECTED, 2095 token_start); 2096 return; 2097 } 2098 } 2099 handler_->HandleMapEnd(); 2100 break; 2101 } 2102 2103 default: 2104 // We got a token that's not a value. 2105 HandleError(Error::JSON_PARSER_VALUE_EXPECTED, token_start); 2106 return; 2107 } 2108 2109 SkipWhitespaceAndComments(token_end, end, value_token_end); 2110 } 2111 2112 void HandleError(Error error, const Char* pos) { 2113 assert(error != Error::OK); 2114 if (!error_) { 2115 handler_->HandleError( 2116 Status{error, static_cast<size_t>(pos - start_pos_)}); 2117 error_ = true; 2118 } 2119 } 2120 2121 const Char* start_pos_ = nullptr; 2122 bool error_ = false; 2123 const Platform* platform_; 2124 StreamingParserHandler* handler_; 2125}; 2126} // namespace 2127 2128void ParseJSON(const Platform& platform, 2129 span<uint8_t> chars, 2130 StreamingParserHandler* handler) { 2131 JsonParser<uint8_t> parser(&platform, handler); 2132 parser.Parse(chars.data(), chars.size()); 2133} 2134 2135void ParseJSON(const Platform& platform, 2136 span<uint16_t> chars, 2137 StreamingParserHandler* handler) { 2138 JsonParser<uint16_t> parser(&platform, handler); 2139 parser.Parse(chars.data(), chars.size()); 2140} 2141 2142// ============================================================================= 2143// json::ConvertCBORToJSON, json::ConvertJSONToCBOR - for transcoding 2144// ============================================================================= 2145template <typename C> 2146Status ConvertCBORToJSONTmpl(const Platform& platform, 2147 span<uint8_t> cbor, 2148 C* json) { 2149 Status status; 2150 std::unique_ptr<StreamingParserHandler> json_writer = 2151 NewJSONEncoder(&platform, json, &status); 2152 cbor::ParseCBOR(cbor, json_writer.get()); 2153 return status; 2154} 2155 2156Status ConvertCBORToJSON(const Platform& platform, 2157 span<uint8_t> cbor, 2158 std::vector<uint8_t>* json) { 2159 return ConvertCBORToJSONTmpl(platform, cbor, json); 2160} 2161Status ConvertCBORToJSON(const Platform& platform, 2162 span<uint8_t> cbor, 2163 std::string* json) { 2164 return ConvertCBORToJSONTmpl(platform, cbor, json); 2165} 2166 2167template <typename T, typename C> 2168Status ConvertJSONToCBORTmpl(const Platform& platform, span<T> json, C* cbor) { 2169 Status status; 2170 std::unique_ptr<StreamingParserHandler> encoder = 2171 cbor::NewCBOREncoder(cbor, &status); 2172 ParseJSON(platform, json, encoder.get()); 2173 return status; 2174} 2175Status ConvertJSONToCBOR(const Platform& platform, 2176 span<uint8_t> json, 2177 std::string* cbor) { 2178 return ConvertJSONToCBORTmpl(platform, json, cbor); 2179} 2180Status ConvertJSONToCBOR(const Platform& platform, 2181 span<uint16_t> json, 2182 std::string* cbor) { 2183 return ConvertJSONToCBORTmpl(platform, json, cbor); 2184} 2185Status ConvertJSONToCBOR(const Platform& platform, 2186 span<uint8_t> json, 2187 std::vector<uint8_t>* cbor) { 2188 return ConvertJSONToCBORTmpl(platform, json, cbor); 2189} 2190Status ConvertJSONToCBOR(const Platform& platform, 2191 span<uint16_t> json, 2192 std::vector<uint8_t>* cbor) { 2193 return ConvertJSONToCBORTmpl(platform, json, cbor); 2194} 2195} // namespace json 2196 2197{% for namespace in config.protocol.namespace %} 2198} // namespace {{namespace}} 2199{% endfor %} 2200