1{# This template is generated by gen_cbor_templates.py. #} 2// Generated by lib/encoding_h.template. 3 4// Copyright 2019 The Chromium Authors. All rights reserved. 5// Use of this source code is governed by a BSD-style license that can be 6// found in the LICENSE file. 7 8#ifndef {{"_".join(config.protocol.namespace)}}_encoding_h 9#define {{"_".join(config.protocol.namespace)}}_encoding_h 10 11#include <cstddef> 12#include <cstdint> 13#include <cstring> 14#include <limits> 15#include <memory> 16#include <string> 17#include <vector> 18 19{% for namespace in config.protocol.namespace %} 20namespace {{namespace}} { 21{% endfor %} 22 23// ===== encoding/encoding.h ===== 24 25 26// ============================================================================= 27// span - sequence of bytes 28// ============================================================================= 29 30// This template is similar to std::span, which will be included in C++20. 31template <typename T> 32class span { 33 public: 34 using index_type = size_t; 35 36 span() : data_(nullptr), size_(0) {} 37 span(const T* data, index_type size) : data_(data), size_(size) {} 38 39 const T* data() const { return data_; } 40 41 const T* begin() const { return data_; } 42 const T* end() const { return data_ + size_; } 43 44 const T& operator[](index_type idx) const { return data_[idx]; } 45 46 span<T> subspan(index_type offset, index_type count) const { 47 return span(data_ + offset, count); 48 } 49 50 span<T> subspan(index_type offset) const { 51 return span(data_ + offset, size_ - offset); 52 } 53 54 bool empty() const { return size_ == 0; } 55 56 index_type size() const { return size_; } 57 index_type size_bytes() const { return size_ * sizeof(T); } 58 59 private: 60 const T* data_; 61 index_type size_; 62}; 63 64template <typename T> 65span<T> SpanFrom(const std::vector<T>& v) { 66 return span<T>(v.data(), v.size()); 67} 68 69template <size_t N> 70span<uint8_t> SpanFrom(const char (&str)[N]) { 71 return span<uint8_t>(reinterpret_cast<const uint8_t*>(str), N - 1); 72} 73 74inline span<uint8_t> SpanFrom(const char* str) { 75 return str ? span<uint8_t>(reinterpret_cast<const uint8_t*>(str), strlen(str)) 76 : span<uint8_t>(); 77} 78 79inline span<uint8_t> SpanFrom(const std::string& v) { 80 return span<uint8_t>(reinterpret_cast<const uint8_t*>(v.data()), v.size()); 81} 82 83// ============================================================================= 84// Status and Error codes 85// ============================================================================= 86enum class Error { 87 OK = 0, 88 // JSON parsing errors - json_parser.{h,cc}. 89 JSON_PARSER_UNPROCESSED_INPUT_REMAINS = 0x01, 90 JSON_PARSER_STACK_LIMIT_EXCEEDED = 0x02, 91 JSON_PARSER_NO_INPUT = 0x03, 92 JSON_PARSER_INVALID_TOKEN = 0x04, 93 JSON_PARSER_INVALID_NUMBER = 0x05, 94 JSON_PARSER_INVALID_STRING = 0x06, 95 JSON_PARSER_UNEXPECTED_ARRAY_END = 0x07, 96 JSON_PARSER_COMMA_OR_ARRAY_END_EXPECTED = 0x08, 97 JSON_PARSER_STRING_LITERAL_EXPECTED = 0x09, 98 JSON_PARSER_COLON_EXPECTED = 0x0a, 99 JSON_PARSER_UNEXPECTED_MAP_END = 0x0b, 100 JSON_PARSER_COMMA_OR_MAP_END_EXPECTED = 0x0c, 101 JSON_PARSER_VALUE_EXPECTED = 0x0d, 102 103 CBOR_INVALID_INT32 = 0x0e, 104 CBOR_INVALID_DOUBLE = 0x0f, 105 CBOR_INVALID_ENVELOPE = 0x10, 106 CBOR_INVALID_STRING8 = 0x11, 107 CBOR_INVALID_STRING16 = 0x12, 108 CBOR_INVALID_BINARY = 0x13, 109 CBOR_UNSUPPORTED_VALUE = 0x14, 110 CBOR_NO_INPUT = 0x15, 111 CBOR_INVALID_START_BYTE = 0x16, 112 CBOR_UNEXPECTED_EOF_EXPECTED_VALUE = 0x17, 113 CBOR_UNEXPECTED_EOF_IN_ARRAY = 0x18, 114 CBOR_UNEXPECTED_EOF_IN_MAP = 0x19, 115 CBOR_INVALID_MAP_KEY = 0x1a, 116 CBOR_STACK_LIMIT_EXCEEDED = 0x1b, 117 CBOR_TRAILING_JUNK = 0x1c, 118 CBOR_MAP_START_EXPECTED = 0x1d, 119 CBOR_MAP_STOP_EXPECTED = 0x1e, 120 CBOR_ENVELOPE_SIZE_LIMIT_EXCEEDED = 0x1f, 121}; 122 123// A status value with position that can be copied. The default status 124// is OK. Usually, error status values should come with a valid position. 125struct Status { 126 static constexpr size_t npos() { return std::numeric_limits<size_t>::max(); } 127 128 bool ok() const { return error == Error::OK; } 129 130 Error error = Error::OK; 131 size_t pos = npos(); 132 Status(Error error, size_t pos) : error(error), pos(pos) {} 133 Status() = default; 134 135 // Returns a 7 bit US-ASCII string, either "OK" or an error message 136 // that includes the position. 137 std::string ToASCIIString() const; 138 139 private: 140 std::string ToASCIIString(const char* msg) const; 141}; 142 143// Handler interface for parser events emitted by a streaming parser. 144// See cbor::NewCBOREncoder, cbor::ParseCBOR, json::NewJSONEncoder, 145// json::ParseJSON. 146class StreamingParserHandler { 147 public: 148 virtual ~StreamingParserHandler() = default; 149 virtual void HandleMapBegin() = 0; 150 virtual void HandleMapEnd() = 0; 151 virtual void HandleArrayBegin() = 0; 152 virtual void HandleArrayEnd() = 0; 153 virtual void HandleString8(span<uint8_t> chars) = 0; 154 virtual void HandleString16(span<uint16_t> chars) = 0; 155 virtual void HandleBinary(span<uint8_t> bytes) = 0; 156 virtual void HandleDouble(double value) = 0; 157 virtual void HandleInt32(int32_t value) = 0; 158 virtual void HandleBool(bool value) = 0; 159 virtual void HandleNull() = 0; 160 161 // The parser may send one error even after other events have already 162 // been received. Client code is reponsible to then discard the 163 // already processed events. 164 // |error| must be an eror, as in, |error.is_ok()| can't be true. 165 virtual void HandleError(Status error) = 0; 166}; 167 168namespace cbor { 169// The binary encoding for the inspector protocol follows the CBOR specification 170// (RFC 7049). Additional constraints: 171// - Only indefinite length maps and arrays are supported. 172// - Maps and arrays are wrapped with an envelope, that is, a 173// CBOR tag with value 24 followed by a byte string specifying 174// the byte length of the enclosed map / array. The byte string 175// must use a 32 bit wide length. 176// - At the top level, a message must be an indefinite length map 177// wrapped by an envelope. 178// - Maximal size for messages is 2^32 (4 GB). 179// - For scalars, we support only the int32_t range, encoded as 180// UNSIGNED/NEGATIVE (major types 0 / 1). 181// - UTF16 strings, including with unbalanced surrogate pairs, are encoded 182// as CBOR BYTE_STRING (major type 2). For such strings, the number of 183// bytes encoded must be even. 184// - UTF8 strings (major type 3) are supported. 185// - 7 bit US-ASCII strings must always be encoded as UTF8 strings, never 186// as UTF16 strings. 187// - Arbitrary byte arrays, in the inspector protocol called 'binary', 188// are encoded as BYTE_STRING (major type 2), prefixed with a byte 189// indicating base64 when rendered as JSON. 190 191// ============================================================================= 192// Detecting CBOR content 193// ============================================================================= 194 195// The first byte for an envelope, which we use for wrapping dictionaries 196// and arrays; and the byte that indicates a byte string with 32 bit length. 197// These two bytes start an envelope, and thereby also any CBOR message 198// produced or consumed by this protocol. See also |EnvelopeEncoder| below. 199uint8_t InitialByteForEnvelope(); 200uint8_t InitialByteFor32BitLengthByteString(); 201 202// Checks whether |msg| is a cbor message. 203bool IsCBORMessage(span<uint8_t> msg); 204 205// ============================================================================= 206// Encoding individual CBOR items 207// ============================================================================= 208 209// Some constants for CBOR tokens that only take a single byte on the wire. 210uint8_t EncodeTrue(); 211uint8_t EncodeFalse(); 212uint8_t EncodeNull(); 213uint8_t EncodeIndefiniteLengthArrayStart(); 214uint8_t EncodeIndefiniteLengthMapStart(); 215uint8_t EncodeStop(); 216 217// Encodes |value| as |UNSIGNED| (major type 0) iff >= 0, or |NEGATIVE| 218// (major type 1) iff < 0. 219void EncodeInt32(int32_t value, std::vector<uint8_t>* out); 220void EncodeInt32(int32_t value, std::string* out); 221 222// Encodes a UTF16 string as a BYTE_STRING (major type 2). Each utf16 223// character in |in| is emitted with most significant byte first, 224// appending to |out|. 225void EncodeString16(span<uint16_t> in, std::vector<uint8_t>* out); 226void EncodeString16(span<uint16_t> in, std::string* out); 227 228// Encodes a UTF8 string |in| as STRING (major type 3). 229void EncodeString8(span<uint8_t> in, std::vector<uint8_t>* out); 230void EncodeString8(span<uint8_t> in, std::string* out); 231 232// Encodes the given |latin1| string as STRING8. 233// If any non-ASCII character is present, it will be represented 234// as a 2 byte UTF8 sequence. 235void EncodeFromLatin1(span<uint8_t> latin1, std::vector<uint8_t>* out); 236void EncodeFromLatin1(span<uint8_t> latin1, std::string* out); 237 238// Encodes the given |utf16| string as STRING8 if it's entirely US-ASCII. 239// Otherwise, encodes as STRING16. 240void EncodeFromUTF16(span<uint16_t> utf16, std::vector<uint8_t>* out); 241void EncodeFromUTF16(span<uint16_t> utf16, std::string* out); 242 243// Encodes arbitrary binary data in |in| as a BYTE_STRING (major type 2) with 244// definitive length, prefixed with tag 22 indicating expected conversion to 245// base64 (see RFC 7049, Table 3 and Section 2.4.4.2). 246void EncodeBinary(span<uint8_t> in, std::vector<uint8_t>* out); 247void EncodeBinary(span<uint8_t> in, std::string* out); 248 249// Encodes / decodes a double as Major type 7 (SIMPLE_VALUE), 250// with additional info = 27, followed by 8 bytes in big endian. 251void EncodeDouble(double value, std::vector<uint8_t>* out); 252void EncodeDouble(double value, std::string* out); 253 254// ============================================================================= 255// cbor::EnvelopeEncoder - for wrapping submessages 256// ============================================================================= 257 258// An envelope indicates the byte length of a wrapped item. 259// We use this for maps and array, which allows the decoder 260// to skip such (nested) values whole sale. 261// It's implemented as a CBOR tag (major type 6) with additional 262// info = 24, followed by a byte string with a 32 bit length value; 263// so the maximal structure that we can wrap is 2^32 bits long. 264// See also: https://tools.ietf.org/html/rfc7049#section-2.4.4.1 265class EnvelopeEncoder { 266 public: 267 // Emits the envelope start bytes and records the position for the 268 // byte size in |byte_size_pos_|. Also emits empty bytes for the 269 // byte sisze so that encoding can continue. 270 void EncodeStart(std::vector<uint8_t>* out); 271 void EncodeStart(std::string* out); 272 // This records the current size in |out| at position byte_size_pos_. 273 // Returns true iff successful. 274 bool EncodeStop(std::vector<uint8_t>* out); 275 bool EncodeStop(std::string* out); 276 277 private: 278 size_t byte_size_pos_ = 0; 279}; 280 281// ============================================================================= 282// cbor::NewCBOREncoder - for encoding from a streaming parser 283// ============================================================================= 284 285// This can be used to convert to CBOR, by passing the return value to a parser 286// that drives it. The handler will encode into |out|, and iff an error occurs 287// it will set |status| to an error and clear |out|. Otherwise, |status.ok()| 288// will be |true|. 289std::unique_ptr<StreamingParserHandler> NewCBOREncoder( 290 std::vector<uint8_t>* out, 291 Status* status); 292std::unique_ptr<StreamingParserHandler> NewCBOREncoder(std::string* out, 293 Status* status); 294 295// ============================================================================= 296// cbor::CBORTokenizer - for parsing individual CBOR items 297// ============================================================================= 298 299// Tags for the tokens within a CBOR message that CBORTokenizer understands. 300// Note that this is not the same terminology as the CBOR spec (RFC 7049), 301// but rather, our adaptation. For instance, we lump unsigned and signed 302// major type into INT32 here (and disallow values outside the int32_t range). 303enum class CBORTokenTag { 304 // Encountered an error in the structure of the message. Consult 305 // status() for details. 306 ERROR_VALUE, 307 // Booleans and NULL. 308 TRUE_VALUE, 309 FALSE_VALUE, 310 NULL_VALUE, 311 // An int32_t (signed 32 bit integer). 312 INT32, 313 // A double (64 bit floating point). 314 DOUBLE, 315 // A UTF8 string. 316 STRING8, 317 // A UTF16 string. 318 STRING16, 319 // A binary string. 320 BINARY, 321 // Starts an indefinite length map; after the map start we expect 322 // alternating keys and values, followed by STOP. 323 MAP_START, 324 // Starts an indefinite length array; after the array start we 325 // expect values, followed by STOP. 326 ARRAY_START, 327 // Ends a map or an array. 328 STOP, 329 // An envelope indicator, wrapping a map or array. 330 // Internally this carries the byte length of the wrapped 331 // map or array. While CBORTokenizer::Next() will read / skip the entire 332 // envelope, CBORTokenizer::EnterEnvelope() reads the tokens 333 // inside of it. 334 ENVELOPE, 335 // We've reached the end there is nothing else to read. 336 DONE, 337}; 338 339// The major types from RFC 7049 Section 2.1. 340enum class MajorType { 341 UNSIGNED = 0, 342 NEGATIVE = 1, 343 BYTE_STRING = 2, 344 STRING = 3, 345 ARRAY = 4, 346 MAP = 5, 347 TAG = 6, 348 SIMPLE_VALUE = 7 349}; 350 351// CBORTokenizer segments a CBOR message, presenting the tokens therein as 352// numbers, strings, etc. This is not a complete CBOR parser, but makes it much 353// easier to implement one (e.g. ParseCBOR, above). It can also be used to parse 354// messages partially. 355class CBORTokenizer { 356 public: 357 explicit CBORTokenizer(span<uint8_t> bytes); 358 ~CBORTokenizer(); 359 360 // Identifies the current token that we're looking at, 361 // or ERROR_VALUE (in which ase ::Status() has details) 362 // or DONE (if we're past the last token). 363 CBORTokenTag TokenTag() const; 364 365 // Advances to the next token. 366 void Next(); 367 // Can only be called if TokenTag() == CBORTokenTag::ENVELOPE. 368 // While Next() would skip past the entire envelope / what it's 369 // wrapping, EnterEnvelope positions the cursor inside of the envelope, 370 // letting the client explore the nested structure. 371 void EnterEnvelope(); 372 373 // If TokenTag() is CBORTokenTag::ERROR_VALUE, then Status().error describes 374 // the error more precisely; otherwise it'll be set to Error::OK. 375 // In either case, Status().pos is the current position. 376 struct Status Status() const; 377 378 // The following methods retrieve the token values. They can only 379 // be called if TokenTag() matches. 380 381 // To be called only if ::TokenTag() == CBORTokenTag::INT32. 382 int32_t GetInt32() const; 383 384 // To be called only if ::TokenTag() == CBORTokenTag::DOUBLE. 385 double GetDouble() const; 386 387 // To be called only if ::TokenTag() == CBORTokenTag::STRING8. 388 span<uint8_t> GetString8() const; 389 390 // Wire representation for STRING16 is low byte first (little endian). 391 // To be called only if ::TokenTag() == CBORTokenTag::STRING16. 392 span<uint8_t> GetString16WireRep() const; 393 394 // To be called only if ::TokenTag() == CBORTokenTag::BINARY. 395 span<uint8_t> GetBinary() const; 396 397 // To be called only if ::TokenTag() == CBORTokenTag::ENVELOPE. 398 span<uint8_t> GetEnvelopeContents() const; 399 400 private: 401 void ReadNextToken(bool enter_envelope); 402 void SetToken(CBORTokenTag token, size_t token_byte_length); 403 void SetError(Error error); 404 405 span<uint8_t> bytes_; 406 CBORTokenTag token_tag_; 407 struct Status status_; 408 size_t token_byte_length_; 409 MajorType token_start_type_; 410 uint64_t token_start_internal_value_; 411}; 412 413// ============================================================================= 414// cbor::ParseCBOR - for receiving streaming parser events for CBOR messages 415// ============================================================================= 416 417// Parses a CBOR encoded message from |bytes|, sending events to 418// |out|. If an error occurs, sends |out->HandleError|, and parsing stops. 419// The client is responsible for discarding the already received information in 420// that case. 421void ParseCBOR(span<uint8_t> bytes, StreamingParserHandler* out); 422 423// ============================================================================= 424// cbor::AppendString8EntryToMap - for limited in-place editing of messages 425// ============================================================================= 426 427// Modifies the |cbor| message by appending a new key/value entry at the end 428// of the map. Patches up the envelope size; Status.ok() iff successful. 429// If not successful, |cbor| may be corrupted after this call. 430Status AppendString8EntryToCBORMap(span<uint8_t> string8_key, 431 span<uint8_t> string8_value, 432 std::vector<uint8_t>* cbor); 433Status AppendString8EntryToCBORMap(span<uint8_t> string8_key, 434 span<uint8_t> string8_value, 435 std::string* cbor); 436 437namespace internals { // Exposed only for writing tests. 438int8_t ReadTokenStart(span<uint8_t> bytes, 439 cbor::MajorType* type, 440 uint64_t* value); 441 442void WriteTokenStart(cbor::MajorType type, 443 uint64_t value, 444 std::vector<uint8_t>* encoded); 445void WriteTokenStart(cbor::MajorType type, 446 uint64_t value, 447 std::string* encoded); 448} // namespace internals 449} // namespace cbor 450 451namespace json { 452// Client code must provide an instance. Implementation should delegate 453// to whatever is appropriate. 454class Platform { 455 public: 456 virtual ~Platform() = default; 457 // Parses |str| into |result|. Returns false iff there are 458 // leftover characters or parsing errors. 459 virtual bool StrToD(const char* str, double* result) const = 0; 460 461 // Prints |value| in a format suitable for JSON. 462 virtual std::unique_ptr<char[]> DToStr(double value) const = 0; 463}; 464 465// ============================================================================= 466// json::NewJSONEncoder - for encoding streaming parser events as JSON 467// ============================================================================= 468 469// Returns a handler object which will write ascii characters to |out|. 470// |status->ok()| will be false iff the handler routine HandleError() is called. 471// In that case, we'll stop emitting output. 472// Except for calling the HandleError routine at any time, the client 473// code must call the Handle* methods in an order in which they'd occur 474// in valid JSON; otherwise we may crash (the code uses assert). 475std::unique_ptr<StreamingParserHandler> NewJSONEncoder( 476 const Platform* platform, 477 std::vector<uint8_t>* out, 478 Status* status); 479std::unique_ptr<StreamingParserHandler> NewJSONEncoder(const Platform* platform, 480 std::string* out, 481 Status* status); 482 483// ============================================================================= 484// json::ParseJSON - for receiving streaming parser events for JSON 485// ============================================================================= 486 487void ParseJSON(const Platform& platform, 488 span<uint8_t> chars, 489 StreamingParserHandler* handler); 490void ParseJSON(const Platform& platform, 491 span<uint16_t> chars, 492 StreamingParserHandler* handler); 493 494// ============================================================================= 495// json::ConvertCBORToJSON, json::ConvertJSONToCBOR - for transcoding 496// ============================================================================= 497Status ConvertCBORToJSON(const Platform& platform, 498 span<uint8_t> cbor, 499 std::string* json); 500Status ConvertCBORToJSON(const Platform& platform, 501 span<uint8_t> cbor, 502 std::vector<uint8_t>* json); 503Status ConvertJSONToCBOR(const Platform& platform, 504 span<uint8_t> json, 505 std::vector<uint8_t>* cbor); 506Status ConvertJSONToCBOR(const Platform& platform, 507 span<uint16_t> json, 508 std::vector<uint8_t>* cbor); 509Status ConvertJSONToCBOR(const Platform& platform, 510 span<uint8_t> json, 511 std::string* cbor); 512Status ConvertJSONToCBOR(const Platform& platform, 513 span<uint16_t> json, 514 std::string* cbor); 515} // namespace json 516 517{% for namespace in config.protocol.namespace %} 518} // namespace {{namespace}} 519{% endfor %} 520#endif // !defined({{"_".join(config.protocol.namespace)}}_encoding_h) 521