1 // Copyright 2019 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #ifndef V8_CRDTP_CBOR_H_ 6 #define V8_CRDTP_CBOR_H_ 7 8 #include <cstddef> 9 #include <cstdint> 10 #include <memory> 11 #include <string> 12 #include <vector> 13 14 #include "export.h" 15 #include "parser_handler.h" 16 #include "span.h" 17 18 namespace v8_crdtp { 19 namespace cbor { 20 // The binary encoding for the inspector protocol follows the CBOR specification 21 // (RFC 7049). Additional constraints: 22 // - Only indefinite length maps and arrays are supported. 23 // - Maps and arrays are wrapped with an envelope, that is, a 24 // CBOR tag with value 24 followed by a byte string specifying 25 // the byte length of the enclosed map / array. The byte string 26 // must use a 32 bit wide length. 27 // - At the top level, a message must be an indefinite length map 28 // wrapped by an envelope. 29 // - Maximal size for messages is 2^32 (4 GB). 30 // - For scalars, we support only the int32_t range, encoded as 31 // UNSIGNED/NEGATIVE (major types 0 / 1). 32 // - UTF16 strings, including with unbalanced surrogate pairs, are encoded 33 // as CBOR BYTE_STRING (major type 2). For such strings, the number of 34 // bytes encoded must be even. 35 // - UTF8 strings (major type 3) are supported. 36 // - 7 bit US-ASCII strings must always be encoded as UTF8 strings, never 37 // as UTF16 strings. 38 // - Arbitrary byte arrays, in the inspector protocol called 'binary', 39 // are encoded as BYTE_STRING (major type 2), prefixed with a byte 40 // indicating base64 when rendered as JSON. 41 42 // ============================================================================= 43 // Detecting CBOR content 44 // ============================================================================= 45 46 // The first byte for an envelope, which we use for wrapping dictionaries 47 // and arrays; and the byte that indicates a byte string with 32 bit length. 48 // These two bytes start an envelope, and thereby also any CBOR message 49 // produced or consumed by this protocol. See also |EnvelopeEncoder| below. 50 uint8_t InitialByteForEnvelope(); 51 uint8_t InitialByteFor32BitLengthByteString(); 52 53 // Checks whether |msg| is a cbor message. 54 bool IsCBORMessage(span<uint8_t> msg); 55 56 // Performs a leightweight check of |msg|. 57 // Disallows: 58 // - Empty message 59 // - Not starting with the two bytes 0xd8, 0x5a 60 // - Empty envelope (all length bytes are 0) 61 // - Not starting with a map after the envelope stanza 62 // DevTools messages should pass this check. 63 Status CheckCBORMessage(span<uint8_t> msg); 64 65 // ============================================================================= 66 // Encoding individual CBOR items 67 // ============================================================================= 68 69 // Some constants for CBOR tokens that only take a single byte on the wire. 70 uint8_t EncodeTrue(); 71 uint8_t EncodeFalse(); 72 uint8_t EncodeNull(); 73 uint8_t EncodeIndefiniteLengthArrayStart(); 74 uint8_t EncodeIndefiniteLengthMapStart(); 75 uint8_t EncodeStop(); 76 77 // Encodes |value| as |UNSIGNED| (major type 0) iff >= 0, or |NEGATIVE| 78 // (major type 1) iff < 0. 79 void EncodeInt32(int32_t value, std::vector<uint8_t>* out); 80 81 // Encodes a UTF16 string as a BYTE_STRING (major type 2). Each utf16 82 // character in |in| is emitted with most significant byte first, 83 // appending to |out|. 84 void EncodeString16(span<uint16_t> in, std::vector<uint8_t>* out); 85 86 // Encodes a UTF8 string |in| as STRING (major type 3). 87 void EncodeString8(span<uint8_t> in, std::vector<uint8_t>* out); 88 89 // Encodes the given |latin1| string as STRING8. 90 // If any non-ASCII character is present, it will be represented 91 // as a 2 byte UTF8 sequence. 92 void EncodeFromLatin1(span<uint8_t> latin1, std::vector<uint8_t>* out); 93 94 // Encodes the given |utf16| string as STRING8 if it's entirely US-ASCII. 95 // Otherwise, encodes as STRING16. 96 void EncodeFromUTF16(span<uint16_t> utf16, std::vector<uint8_t>* out); 97 98 // Encodes arbitrary binary data in |in| as a BYTE_STRING (major type 2) with 99 // definitive length, prefixed with tag 22 indicating expected conversion to 100 // base64 (see RFC 7049, Table 3 and Section 2.4.4.2). 101 void EncodeBinary(span<uint8_t> in, std::vector<uint8_t>* out); 102 103 // Encodes / decodes a double as Major type 7 (SIMPLE_VALUE), 104 // with additional info = 27, followed by 8 bytes in big endian. 105 void EncodeDouble(double value, std::vector<uint8_t>* out); 106 107 // ============================================================================= 108 // cbor::EnvelopeEncoder - for wrapping submessages 109 // ============================================================================= 110 111 // An envelope indicates the byte length of a wrapped item. 112 // We use this for maps and array, which allows the decoder 113 // to skip such (nested) values whole sale. 114 // It's implemented as a CBOR tag (major type 6) with additional 115 // info = 24, followed by a byte string with a 32 bit length value; 116 // so the maximal structure that we can wrap is 2^32 bits long. 117 // See also: https://tools.ietf.org/html/rfc7049#section-2.4.4.1 118 class EnvelopeEncoder { 119 public: 120 // Emits the envelope start bytes and records the position for the 121 // byte size in |byte_size_pos_|. Also emits empty bytes for the 122 // byte sisze so that encoding can continue. 123 void EncodeStart(std::vector<uint8_t>* out); 124 // This records the current size in |out| at position byte_size_pos_. 125 // Returns true iff successful. 126 bool EncodeStop(std::vector<uint8_t>* out); 127 128 private: 129 size_t byte_size_pos_ = 0; 130 }; 131 132 // ============================================================================= 133 // cbor::NewCBOREncoder - for encoding from a streaming parser 134 // ============================================================================= 135 136 // This can be used to convert to CBOR, by passing the return value to a parser 137 // that drives it. The handler will encode into |out|, and iff an error occurs 138 // it will set |status| to an error and clear |out|. Otherwise, |status.ok()| 139 // will be |true|. 140 std::unique_ptr<ParserHandler> NewCBOREncoder(std::vector<uint8_t>* out, 141 Status* status); 142 143 // ============================================================================= 144 // cbor::CBORTokenizer - for parsing individual CBOR items 145 // ============================================================================= 146 147 // Tags for the tokens within a CBOR message that CBORTokenizer understands. 148 // Note that this is not the same terminology as the CBOR spec (RFC 7049), 149 // but rather, our adaptation. For instance, we lump unsigned and signed 150 // major type into INT32 here (and disallow values outside the int32_t range). 151 enum class CBORTokenTag { 152 // Encountered an error in the structure of the message. Consult 153 // status() for details. 154 ERROR_VALUE, 155 // Booleans and NULL. 156 TRUE_VALUE, 157 FALSE_VALUE, 158 NULL_VALUE, 159 // An int32_t (signed 32 bit integer). 160 INT32, 161 // A double (64 bit floating point). 162 DOUBLE, 163 // A UTF8 string. 164 STRING8, 165 // A UTF16 string. 166 STRING16, 167 // A binary string. 168 BINARY, 169 // Starts an indefinite length map; after the map start we expect 170 // alternating keys and values, followed by STOP. 171 MAP_START, 172 // Starts an indefinite length array; after the array start we 173 // expect values, followed by STOP. 174 ARRAY_START, 175 // Ends a map or an array. 176 STOP, 177 // An envelope indicator, wrapping a map or array. 178 // Internally this carries the byte length of the wrapped 179 // map or array. While CBORTokenizer::Next() will read / skip the entire 180 // envelope, CBORTokenizer::EnterEnvelope() reads the tokens 181 // inside of it. 182 ENVELOPE, 183 // We've reached the end there is nothing else to read. 184 DONE, 185 }; 186 187 // The major types from RFC 7049 Section 2.1. 188 enum class MajorType { 189 UNSIGNED = 0, 190 NEGATIVE = 1, 191 BYTE_STRING = 2, 192 STRING = 3, 193 ARRAY = 4, 194 MAP = 5, 195 TAG = 6, 196 SIMPLE_VALUE = 7 197 }; 198 199 // CBORTokenizer segments a CBOR message, presenting the tokens therein as 200 // numbers, strings, etc. This is not a complete CBOR parser, but makes it much 201 // easier to implement one (e.g. ParseCBOR, above). It can also be used to parse 202 // messages partially. 203 class CBORTokenizer { 204 public: 205 explicit CBORTokenizer(span<uint8_t> bytes); 206 ~CBORTokenizer(); 207 208 // Identifies the current token that we're looking at, 209 // or ERROR_VALUE (in which ase ::Status() has details) 210 // or DONE (if we're past the last token). 211 CBORTokenTag TokenTag() const; 212 213 // Advances to the next token. 214 void Next(); 215 // Can only be called if TokenTag() == CBORTokenTag::ENVELOPE. 216 // While Next() would skip past the entire envelope / what it's 217 // wrapping, EnterEnvelope positions the cursor inside of the envelope, 218 // letting the client explore the nested structure. 219 void EnterEnvelope(); 220 221 // If TokenTag() is CBORTokenTag::ERROR_VALUE, then Status().error describes 222 // the error more precisely; otherwise it'll be set to Error::OK. 223 // In either case, Status().pos is the current position. 224 struct Status Status() const; 225 226 // The following methods retrieve the token values. They can only 227 // be called if TokenTag() matches. 228 229 // To be called only if ::TokenTag() == CBORTokenTag::INT32. 230 int32_t GetInt32() const; 231 232 // To be called only if ::TokenTag() == CBORTokenTag::DOUBLE. 233 double GetDouble() const; 234 235 // To be called only if ::TokenTag() == CBORTokenTag::STRING8. 236 span<uint8_t> GetString8() const; 237 238 // Wire representation for STRING16 is low byte first (little endian). 239 // To be called only if ::TokenTag() == CBORTokenTag::STRING16. 240 span<uint8_t> GetString16WireRep() const; 241 242 // To be called only if ::TokenTag() == CBORTokenTag::BINARY. 243 span<uint8_t> GetBinary() const; 244 245 // To be called only if ::TokenTag() == CBORTokenTag::ENVELOPE. 246 // Returns the envelope including its payload; message which 247 // can be passed to the CBORTokenizer constructor, which will 248 // then see the envelope token first (looking at it a second time, 249 // basically). 250 span<uint8_t> GetEnvelope() const; 251 252 // To be called only if ::TokenTag() == CBORTokenTag::ENVELOPE. 253 // Returns only the payload inside the envelope, e.g., a map 254 // or an array. This is not a complete message by our 255 // IsCBORMessage definition, since it doesn't include the 256 // enclosing envelope (the header, basically). 257 span<uint8_t> GetEnvelopeContents() const; 258 259 private: 260 void ReadNextToken(bool enter_envelope); 261 void SetToken(CBORTokenTag token, size_t token_byte_length); 262 void SetError(Error error); 263 264 span<uint8_t> bytes_; 265 CBORTokenTag token_tag_; 266 struct Status status_; 267 size_t token_byte_length_; 268 MajorType token_start_type_; 269 uint64_t token_start_internal_value_; 270 }; 271 272 // ============================================================================= 273 // cbor::ParseCBOR - for receiving streaming parser events for CBOR messages 274 // ============================================================================= 275 276 // Parses a CBOR encoded message from |bytes|, sending events to 277 // |out|. If an error occurs, sends |out->HandleError|, and parsing stops. 278 // The client is responsible for discarding the already received information in 279 // that case. 280 void ParseCBOR(span<uint8_t> bytes, ParserHandler* out); 281 282 // ============================================================================= 283 // cbor::AppendString8EntryToMap - for limited in-place editing of messages 284 // ============================================================================= 285 286 // Modifies the |cbor| message by appending a new key/value entry at the end 287 // of the map. Patches up the envelope size; Status.ok() iff successful. 288 // If not successful, |cbor| may be corrupted after this call. 289 Status AppendString8EntryToCBORMap(span<uint8_t> string8_key, 290 span<uint8_t> string8_value, 291 std::vector<uint8_t>* cbor); 292 293 namespace internals { // Exposed only for writing tests. 294 size_t ReadTokenStart(span<uint8_t> bytes, 295 cbor::MajorType* type, 296 uint64_t* value); 297 298 void WriteTokenStart(cbor::MajorType type, 299 uint64_t value, 300 std::vector<uint8_t>* encoded); 301 } // namespace internals 302 } // namespace cbor 303 } // namespace v8_crdtp 304 305 #endif // V8_CRDTP_CBOR_H_ 306