1{# This template is generated by gen_cbor_templates.py. #} 2// Generated by lib/encoding_h.template. 3 4// Copyright 2019 The Chromium Authors. All rights reserved. 5// Use of this source code is governed by a BSD-style license that can be 6// found in the LICENSE file. 7 8{% if config.encoding_lib.header == "" %} 9#ifndef {{"_".join(config.protocol.namespace)}}_encoding_h 10#define {{"_".join(config.protocol.namespace)}}_encoding_h 11 12#include <cstddef> 13#include <cstdint> 14#include <cstring> 15#include <limits> 16#include <memory> 17#include <string> 18#include <vector> 19 20{% for namespace in config.protocol.namespace %} 21namespace {{namespace}} { 22{% endfor %} 23 24// ===== encoding/encoding.h ===== 25 26 27// ============================================================================= 28// span - sequence of bytes 29// ============================================================================= 30 31// This template is similar to std::span, which will be included in C++20. 32template <typename T> 33class span { 34 public: 35 using index_type = size_t; 36 37 span() : data_(nullptr), size_(0) {} 38 span(const T* data, index_type size) : data_(data), size_(size) {} 39 40 const T* data() const { return data_; } 41 42 const T* begin() const { return data_; } 43 const T* end() const { return data_ + size_; } 44 45 const T& operator[](index_type idx) const { return data_[idx]; } 46 47 span<T> subspan(index_type offset, index_type count) const { 48 return span(data_ + offset, count); 49 } 50 51 span<T> subspan(index_type offset) const { 52 return span(data_ + offset, size_ - offset); 53 } 54 55 bool empty() const { return size_ == 0; } 56 57 index_type size() const { return size_; } 58 index_type size_bytes() const { return size_ * sizeof(T); } 59 60 private: 61 const T* data_; 62 index_type size_; 63}; 64 65template <typename T> 66span<T> SpanFrom(const std::vector<T>& v) { 67 return span<T>(v.data(), v.size()); 68} 69 70template <size_t N> 71span<uint8_t> SpanFrom(const char (&str)[N]) { 72 return span<uint8_t>(reinterpret_cast<const uint8_t*>(str), N - 1); 73} 74 75inline span<uint8_t> SpanFrom(const char* str) { 76 return str ? span<uint8_t>(reinterpret_cast<const uint8_t*>(str), strlen(str)) 77 : span<uint8_t>(); 78} 79 80inline span<uint8_t> SpanFrom(const std::string& v) { 81 return span<uint8_t>(reinterpret_cast<const uint8_t*>(v.data()), v.size()); 82} 83 84// ============================================================================= 85// Status and Error codes 86// ============================================================================= 87enum class Error { 88 OK = 0, 89 // JSON parsing errors - json_parser.{h,cc}. 90 JSON_PARSER_UNPROCESSED_INPUT_REMAINS = 0x01, 91 JSON_PARSER_STACK_LIMIT_EXCEEDED = 0x02, 92 JSON_PARSER_NO_INPUT = 0x03, 93 JSON_PARSER_INVALID_TOKEN = 0x04, 94 JSON_PARSER_INVALID_NUMBER = 0x05, 95 JSON_PARSER_INVALID_STRING = 0x06, 96 JSON_PARSER_UNEXPECTED_ARRAY_END = 0x07, 97 JSON_PARSER_COMMA_OR_ARRAY_END_EXPECTED = 0x08, 98 JSON_PARSER_STRING_LITERAL_EXPECTED = 0x09, 99 JSON_PARSER_COLON_EXPECTED = 0x0a, 100 JSON_PARSER_UNEXPECTED_MAP_END = 0x0b, 101 JSON_PARSER_COMMA_OR_MAP_END_EXPECTED = 0x0c, 102 JSON_PARSER_VALUE_EXPECTED = 0x0d, 103 104 CBOR_INVALID_INT32 = 0x0e, 105 CBOR_INVALID_DOUBLE = 0x0f, 106 CBOR_INVALID_ENVELOPE = 0x10, 107 CBOR_INVALID_STRING8 = 0x11, 108 CBOR_INVALID_STRING16 = 0x12, 109 CBOR_INVALID_BINARY = 0x13, 110 CBOR_UNSUPPORTED_VALUE = 0x14, 111 CBOR_NO_INPUT = 0x15, 112 CBOR_INVALID_START_BYTE = 0x16, 113 CBOR_UNEXPECTED_EOF_EXPECTED_VALUE = 0x17, 114 CBOR_UNEXPECTED_EOF_IN_ARRAY = 0x18, 115 CBOR_UNEXPECTED_EOF_IN_MAP = 0x19, 116 CBOR_INVALID_MAP_KEY = 0x1a, 117 CBOR_STACK_LIMIT_EXCEEDED = 0x1b, 118 CBOR_TRAILING_JUNK = 0x1c, 119 CBOR_MAP_START_EXPECTED = 0x1d, 120 CBOR_MAP_STOP_EXPECTED = 0x1e, 121 CBOR_ENVELOPE_SIZE_LIMIT_EXCEEDED = 0x1f, 122}; 123 124// A status value with position that can be copied. The default status 125// is OK. Usually, error status values should come with a valid position. 126struct Status { 127 static constexpr size_t npos() { return std::numeric_limits<size_t>::max(); } 128 129 bool ok() const { return error == Error::OK; } 130 131 Error error = Error::OK; 132 size_t pos = npos(); 133 Status(Error error, size_t pos) : error(error), pos(pos) {} 134 Status() = default; 135 136 // Returns a 7 bit US-ASCII string, either "OK" or an error message 137 // that includes the position. 138 std::string ToASCIIString() const; 139 140 private: 141 std::string ToASCIIString(const char* msg) const; 142}; 143 144// Handler interface for parser events emitted by a streaming parser. 145// See cbor::NewCBOREncoder, cbor::ParseCBOR, json::NewJSONEncoder, 146// json::ParseJSON. 147class StreamingParserHandler { 148 public: 149 virtual ~StreamingParserHandler() = default; 150 virtual void HandleMapBegin() = 0; 151 virtual void HandleMapEnd() = 0; 152 virtual void HandleArrayBegin() = 0; 153 virtual void HandleArrayEnd() = 0; 154 virtual void HandleString8(span<uint8_t> chars) = 0; 155 virtual void HandleString16(span<uint16_t> chars) = 0; 156 virtual void HandleBinary(span<uint8_t> bytes) = 0; 157 virtual void HandleDouble(double value) = 0; 158 virtual void HandleInt32(int32_t value) = 0; 159 virtual void HandleBool(bool value) = 0; 160 virtual void HandleNull() = 0; 161 162 // The parser may send one error even after other events have already 163 // been received. Client code is reponsible to then discard the 164 // already processed events. 165 // |error| must be an eror, as in, |error.is_ok()| can't be true. 166 virtual void HandleError(Status error) = 0; 167}; 168 169namespace cbor { 170// The binary encoding for the inspector protocol follows the CBOR specification 171// (RFC 7049). Additional constraints: 172// - Only indefinite length maps and arrays are supported. 173// - Maps and arrays are wrapped with an envelope, that is, a 174// CBOR tag with value 24 followed by a byte string specifying 175// the byte length of the enclosed map / array. The byte string 176// must use a 32 bit wide length. 177// - At the top level, a message must be an indefinite length map 178// wrapped by an envelope. 179// - Maximal size for messages is 2^32 (4 GB). 180// - For scalars, we support only the int32_t range, encoded as 181// UNSIGNED/NEGATIVE (major types 0 / 1). 182// - UTF16 strings, including with unbalanced surrogate pairs, are encoded 183// as CBOR BYTE_STRING (major type 2). For such strings, the number of 184// bytes encoded must be even. 185// - UTF8 strings (major type 3) are supported. 186// - 7 bit US-ASCII strings must always be encoded as UTF8 strings, never 187// as UTF16 strings. 188// - Arbitrary byte arrays, in the inspector protocol called 'binary', 189// are encoded as BYTE_STRING (major type 2), prefixed with a byte 190// indicating base64 when rendered as JSON. 191 192// ============================================================================= 193// Detecting CBOR content 194// ============================================================================= 195 196// The first byte for an envelope, which we use for wrapping dictionaries 197// and arrays; and the byte that indicates a byte string with 32 bit length. 198// These two bytes start an envelope, and thereby also any CBOR message 199// produced or consumed by this protocol. See also |EnvelopeEncoder| below. 200uint8_t InitialByteForEnvelope(); 201uint8_t InitialByteFor32BitLengthByteString(); 202 203// Checks whether |msg| is a cbor message. 204bool IsCBORMessage(span<uint8_t> msg); 205 206// ============================================================================= 207// Encoding individual CBOR items 208// ============================================================================= 209 210// Some constants for CBOR tokens that only take a single byte on the wire. 211uint8_t EncodeTrue(); 212uint8_t EncodeFalse(); 213uint8_t EncodeNull(); 214uint8_t EncodeIndefiniteLengthArrayStart(); 215uint8_t EncodeIndefiniteLengthMapStart(); 216uint8_t EncodeStop(); 217 218// Encodes |value| as |UNSIGNED| (major type 0) iff >= 0, or |NEGATIVE| 219// (major type 1) iff < 0. 220void EncodeInt32(int32_t value, std::vector<uint8_t>* out); 221void EncodeInt32(int32_t value, std::string* out); 222 223// Encodes a UTF16 string as a BYTE_STRING (major type 2). Each utf16 224// character in |in| is emitted with most significant byte first, 225// appending to |out|. 226void EncodeString16(span<uint16_t> in, std::vector<uint8_t>* out); 227void EncodeString16(span<uint16_t> in, std::string* out); 228 229// Encodes a UTF8 string |in| as STRING (major type 3). 230void EncodeString8(span<uint8_t> in, std::vector<uint8_t>* out); 231void EncodeString8(span<uint8_t> in, std::string* out); 232 233// Encodes the given |latin1| string as STRING8. 234// If any non-ASCII character is present, it will be represented 235// as a 2 byte UTF8 sequence. 236void EncodeFromLatin1(span<uint8_t> latin1, std::vector<uint8_t>* out); 237void EncodeFromLatin1(span<uint8_t> latin1, std::string* out); 238 239// Encodes the given |utf16| string as STRING8 if it's entirely US-ASCII. 240// Otherwise, encodes as STRING16. 241void EncodeFromUTF16(span<uint16_t> utf16, std::vector<uint8_t>* out); 242void EncodeFromUTF16(span<uint16_t> utf16, std::string* out); 243 244// Encodes arbitrary binary data in |in| as a BYTE_STRING (major type 2) with 245// definitive length, prefixed with tag 22 indicating expected conversion to 246// base64 (see RFC 7049, Table 3 and Section 2.4.4.2). 247void EncodeBinary(span<uint8_t> in, std::vector<uint8_t>* out); 248void EncodeBinary(span<uint8_t> in, std::string* out); 249 250// Encodes / decodes a double as Major type 7 (SIMPLE_VALUE), 251// with additional info = 27, followed by 8 bytes in big endian. 252void EncodeDouble(double value, std::vector<uint8_t>* out); 253void EncodeDouble(double value, std::string* out); 254 255// ============================================================================= 256// cbor::EnvelopeEncoder - for wrapping submessages 257// ============================================================================= 258 259// An envelope indicates the byte length of a wrapped item. 260// We use this for maps and array, which allows the decoder 261// to skip such (nested) values whole sale. 262// It's implemented as a CBOR tag (major type 6) with additional 263// info = 24, followed by a byte string with a 32 bit length value; 264// so the maximal structure that we can wrap is 2^32 bits long. 265// See also: https://tools.ietf.org/html/rfc7049#section-2.4.4.1 266class EnvelopeEncoder { 267 public: 268 // Emits the envelope start bytes and records the position for the 269 // byte size in |byte_size_pos_|. Also emits empty bytes for the 270 // byte sisze so that encoding can continue. 271 void EncodeStart(std::vector<uint8_t>* out); 272 void EncodeStart(std::string* out); 273 // This records the current size in |out| at position byte_size_pos_. 274 // Returns true iff successful. 275 bool EncodeStop(std::vector<uint8_t>* out); 276 bool EncodeStop(std::string* out); 277 278 private: 279 size_t byte_size_pos_ = 0; 280}; 281 282// ============================================================================= 283// cbor::NewCBOREncoder - for encoding from a streaming parser 284// ============================================================================= 285 286// This can be used to convert to CBOR, by passing the return value to a parser 287// that drives it. The handler will encode into |out|, and iff an error occurs 288// it will set |status| to an error and clear |out|. Otherwise, |status.ok()| 289// will be |true|. 290std::unique_ptr<StreamingParserHandler> NewCBOREncoder( 291 std::vector<uint8_t>* out, 292 Status* status); 293std::unique_ptr<StreamingParserHandler> NewCBOREncoder(std::string* out, 294 Status* status); 295 296// ============================================================================= 297// cbor::CBORTokenizer - for parsing individual CBOR items 298// ============================================================================= 299 300// Tags for the tokens within a CBOR message that CBORTokenizer understands. 301// Note that this is not the same terminology as the CBOR spec (RFC 7049), 302// but rather, our adaptation. For instance, we lump unsigned and signed 303// major type into INT32 here (and disallow values outside the int32_t range). 304enum class CBORTokenTag { 305 // Encountered an error in the structure of the message. Consult 306 // status() for details. 307 ERROR_VALUE, 308 // Booleans and NULL. 309 TRUE_VALUE, 310 FALSE_VALUE, 311 NULL_VALUE, 312 // An int32_t (signed 32 bit integer). 313 INT32, 314 // A double (64 bit floating point). 315 DOUBLE, 316 // A UTF8 string. 317 STRING8, 318 // A UTF16 string. 319 STRING16, 320 // A binary string. 321 BINARY, 322 // Starts an indefinite length map; after the map start we expect 323 // alternating keys and values, followed by STOP. 324 MAP_START, 325 // Starts an indefinite length array; after the array start we 326 // expect values, followed by STOP. 327 ARRAY_START, 328 // Ends a map or an array. 329 STOP, 330 // An envelope indicator, wrapping a map or array. 331 // Internally this carries the byte length of the wrapped 332 // map or array. While CBORTokenizer::Next() will read / skip the entire 333 // envelope, CBORTokenizer::EnterEnvelope() reads the tokens 334 // inside of it. 335 ENVELOPE, 336 // We've reached the end there is nothing else to read. 337 DONE, 338}; 339 340// The major types from RFC 7049 Section 2.1. 341enum class MajorType { 342 UNSIGNED = 0, 343 NEGATIVE = 1, 344 BYTE_STRING = 2, 345 STRING = 3, 346 ARRAY = 4, 347 MAP = 5, 348 TAG = 6, 349 SIMPLE_VALUE = 7 350}; 351 352// CBORTokenizer segments a CBOR message, presenting the tokens therein as 353// numbers, strings, etc. This is not a complete CBOR parser, but makes it much 354// easier to implement one (e.g. ParseCBOR, above). It can also be used to parse 355// messages partially. 356class CBORTokenizer { 357 public: 358 explicit CBORTokenizer(span<uint8_t> bytes); 359 ~CBORTokenizer(); 360 361 // Identifies the current token that we're looking at, 362 // or ERROR_VALUE (in which ase ::Status() has details) 363 // or DONE (if we're past the last token). 364 CBORTokenTag TokenTag() const; 365 366 // Advances to the next token. 367 void Next(); 368 // Can only be called if TokenTag() == CBORTokenTag::ENVELOPE. 369 // While Next() would skip past the entire envelope / what it's 370 // wrapping, EnterEnvelope positions the cursor inside of the envelope, 371 // letting the client explore the nested structure. 372 void EnterEnvelope(); 373 374 // If TokenTag() is CBORTokenTag::ERROR_VALUE, then Status().error describes 375 // the error more precisely; otherwise it'll be set to Error::OK. 376 // In either case, Status().pos is the current position. 377 struct Status Status() const; 378 379 // The following methods retrieve the token values. They can only 380 // be called if TokenTag() matches. 381 382 // To be called only if ::TokenTag() == CBORTokenTag::INT32. 383 int32_t GetInt32() const; 384 385 // To be called only if ::TokenTag() == CBORTokenTag::DOUBLE. 386 double GetDouble() const; 387 388 // To be called only if ::TokenTag() == CBORTokenTag::STRING8. 389 span<uint8_t> GetString8() const; 390 391 // Wire representation for STRING16 is low byte first (little endian). 392 // To be called only if ::TokenTag() == CBORTokenTag::STRING16. 393 span<uint8_t> GetString16WireRep() const; 394 395 // To be called only if ::TokenTag() == CBORTokenTag::BINARY. 396 span<uint8_t> GetBinary() const; 397 398 // To be called only if ::TokenTag() == CBORTokenTag::ENVELOPE. 399 span<uint8_t> GetEnvelopeContents() const; 400 401 private: 402 void ReadNextToken(bool enter_envelope); 403 void SetToken(CBORTokenTag token, size_t token_byte_length); 404 void SetError(Error error); 405 406 span<uint8_t> bytes_; 407 CBORTokenTag token_tag_; 408 struct Status status_; 409 size_t token_byte_length_; 410 MajorType token_start_type_; 411 uint64_t token_start_internal_value_; 412}; 413 414// ============================================================================= 415// cbor::ParseCBOR - for receiving streaming parser events for CBOR messages 416// ============================================================================= 417 418// Parses a CBOR encoded message from |bytes|, sending events to 419// |out|. If an error occurs, sends |out->HandleError|, and parsing stops. 420// The client is responsible for discarding the already received information in 421// that case. 422void ParseCBOR(span<uint8_t> bytes, StreamingParserHandler* out); 423 424// ============================================================================= 425// cbor::AppendString8EntryToMap - for limited in-place editing of messages 426// ============================================================================= 427 428// Modifies the |cbor| message by appending a new key/value entry at the end 429// of the map. Patches up the envelope size; Status.ok() iff successful. 430// If not successful, |cbor| may be corrupted after this call. 431Status AppendString8EntryToCBORMap(span<uint8_t> string8_key, 432 span<uint8_t> string8_value, 433 std::vector<uint8_t>* cbor); 434Status AppendString8EntryToCBORMap(span<uint8_t> string8_key, 435 span<uint8_t> string8_value, 436 std::string* cbor); 437 438namespace internals { // Exposed only for writing tests. 439size_t ReadTokenStart(span<uint8_t> bytes, 440 cbor::MajorType* type, 441 uint64_t* value); 442 443void WriteTokenStart(cbor::MajorType type, 444 uint64_t value, 445 std::vector<uint8_t>* encoded); 446void WriteTokenStart(cbor::MajorType type, 447 uint64_t value, 448 std::string* encoded); 449} // namespace internals 450} // namespace cbor 451 452namespace json { 453// Client code must provide an instance. Implementation should delegate 454// to whatever is appropriate. 455class Platform { 456 public: 457 virtual ~Platform() = default; 458 // Parses |str| into |result|. Returns false iff there are 459 // leftover characters or parsing errors. 460 virtual bool StrToD(const char* str, double* result) const = 0; 461 462 // Prints |value| in a format suitable for JSON. 463 virtual std::unique_ptr<char[]> DToStr(double value) const = 0; 464}; 465 466// ============================================================================= 467// json::NewJSONEncoder - for encoding streaming parser events as JSON 468// ============================================================================= 469 470// Returns a handler object which will write ascii characters to |out|. 471// |status->ok()| will be false iff the handler routine HandleError() is called. 472// In that case, we'll stop emitting output. 473// Except for calling the HandleError routine at any time, the client 474// code must call the Handle* methods in an order in which they'd occur 475// in valid JSON; otherwise we may crash (the code uses assert). 476std::unique_ptr<StreamingParserHandler> NewJSONEncoder( 477 const Platform* platform, 478 std::vector<uint8_t>* out, 479 Status* status); 480std::unique_ptr<StreamingParserHandler> NewJSONEncoder(const Platform* platform, 481 std::string* out, 482 Status* status); 483 484// ============================================================================= 485// json::ParseJSON - for receiving streaming parser events for JSON 486// ============================================================================= 487 488void ParseJSON(const Platform& platform, 489 span<uint8_t> chars, 490 StreamingParserHandler* handler); 491void ParseJSON(const Platform& platform, 492 span<uint16_t> chars, 493 StreamingParserHandler* handler); 494 495// ============================================================================= 496// json::ConvertCBORToJSON, json::ConvertJSONToCBOR - for transcoding 497// ============================================================================= 498Status ConvertCBORToJSON(const Platform& platform, 499 span<uint8_t> cbor, 500 std::string* json); 501Status ConvertCBORToJSON(const Platform& platform, 502 span<uint8_t> cbor, 503 std::vector<uint8_t>* json); 504Status ConvertJSONToCBOR(const Platform& platform, 505 span<uint8_t> json, 506 std::vector<uint8_t>* cbor); 507Status ConvertJSONToCBOR(const Platform& platform, 508 span<uint16_t> json, 509 std::vector<uint8_t>* cbor); 510Status ConvertJSONToCBOR(const Platform& platform, 511 span<uint8_t> json, 512 std::string* cbor); 513Status ConvertJSONToCBOR(const Platform& platform, 514 span<uint16_t> json, 515 std::string* cbor); 516} // namespace json 517 518{% for namespace in config.protocol.namespace %} 519} // namespace {{namespace}} 520{% endfor %} 521#endif // !defined({{"_".join(config.protocol.namespace)}}_encoding_h) 522{% endif %} 523