• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2019 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef V8_CRDTP_CBOR_H_
6 #define V8_CRDTP_CBOR_H_
7 
8 #include <cstddef>
9 #include <cstdint>
10 #include <memory>
11 #include <string>
12 #include <vector>
13 
14 #include "export.h"
15 #include "parser_handler.h"
16 #include "span.h"
17 
18 namespace v8_crdtp {
19 namespace cbor {
20 // The binary encoding for the inspector protocol follows the CBOR specification
21 // (RFC 7049). Additional constraints:
22 // - Only indefinite length maps and arrays are supported.
23 // - Maps and arrays are wrapped with an envelope, that is, a
24 //   CBOR tag with value 24 followed by a byte string specifying
25 //   the byte length of the enclosed map / array. The byte string
26 //   must use a 32 bit wide length.
27 // - At the top level, a message must be an indefinite length map
28 //   wrapped by an envelope.
29 // - Maximal size for messages is 2^32 (4 GB).
30 // - For scalars, we support only the int32_t range, encoded as
31 //   UNSIGNED/NEGATIVE (major types 0 / 1).
32 // - UTF16 strings, including with unbalanced surrogate pairs, are encoded
33 //   as CBOR BYTE_STRING (major type 2). For such strings, the number of
34 //   bytes encoded must be even.
35 // - UTF8 strings (major type 3) are supported.
36 // - 7 bit US-ASCII strings must always be encoded as UTF8 strings, never
37 //   as UTF16 strings.
38 // - Arbitrary byte arrays, in the inspector protocol called 'binary',
39 //   are encoded as BYTE_STRING (major type 2), prefixed with a byte
40 //   indicating base64 when rendered as JSON.
41 
42 // =============================================================================
43 // Detecting CBOR content
44 // =============================================================================
45 
46 // The first byte for an envelope, which we use for wrapping dictionaries
47 // and arrays; and the byte that indicates a byte string with 32 bit length.
48 // These two bytes start an envelope, and thereby also any CBOR message
49 // produced or consumed by this protocol. See also |EnvelopeEncoder| below.
50 uint8_t InitialByteForEnvelope();
51 uint8_t InitialByteFor32BitLengthByteString();
52 
53 // Checks whether |msg| is a cbor message.
54 bool IsCBORMessage(span<uint8_t> msg);
55 
56 // Performs a leightweight check of |msg|.
57 // Disallows:
58 // - Empty message
59 // - Not starting with the two bytes 0xd8, 0x5a
60 // - Empty envelope (all length bytes are 0)
61 // - Not starting with a map after the envelope stanza
62 // DevTools messages should pass this check.
63 Status CheckCBORMessage(span<uint8_t> msg);
64 
65 // =============================================================================
66 // Encoding individual CBOR items
67 // =============================================================================
68 
69 // Some constants for CBOR tokens that only take a single byte on the wire.
70 uint8_t EncodeTrue();
71 uint8_t EncodeFalse();
72 uint8_t EncodeNull();
73 uint8_t EncodeIndefiniteLengthArrayStart();
74 uint8_t EncodeIndefiniteLengthMapStart();
75 uint8_t EncodeStop();
76 
77 // Encodes |value| as |UNSIGNED| (major type 0) iff >= 0, or |NEGATIVE|
78 // (major type 1) iff < 0.
79 void EncodeInt32(int32_t value, std::vector<uint8_t>* out);
80 
81 // Encodes a UTF16 string as a BYTE_STRING (major type 2). Each utf16
82 // character in |in| is emitted with most significant byte first,
83 // appending to |out|.
84 void EncodeString16(span<uint16_t> in, std::vector<uint8_t>* out);
85 
86 // Encodes a UTF8 string |in| as STRING (major type 3).
87 void EncodeString8(span<uint8_t> in, std::vector<uint8_t>* out);
88 
89 // Encodes the given |latin1| string as STRING8.
90 // If any non-ASCII character is present, it will be represented
91 // as a 2 byte UTF8 sequence.
92 void EncodeFromLatin1(span<uint8_t> latin1, std::vector<uint8_t>* out);
93 
94 // Encodes the given |utf16| string as STRING8 if it's entirely US-ASCII.
95 // Otherwise, encodes as STRING16.
96 void EncodeFromUTF16(span<uint16_t> utf16, std::vector<uint8_t>* out);
97 
98 // Encodes arbitrary binary data in |in| as a BYTE_STRING (major type 2) with
99 // definitive length, prefixed with tag 22 indicating expected conversion to
100 // base64 (see RFC 7049, Table 3 and Section 2.4.4.2).
101 void EncodeBinary(span<uint8_t> in, std::vector<uint8_t>* out);
102 
103 // Encodes / decodes a double as Major type 7 (SIMPLE_VALUE),
104 // with additional info = 27, followed by 8 bytes in big endian.
105 void EncodeDouble(double value, std::vector<uint8_t>* out);
106 
107 // =============================================================================
108 // cbor::EnvelopeEncoder - for wrapping submessages
109 // =============================================================================
110 
111 // An envelope indicates the byte length of a wrapped item.
112 // We use this for maps and array, which allows the decoder
113 // to skip such (nested) values whole sale.
114 // It's implemented as a CBOR tag (major type 6) with additional
115 // info = 24, followed by a byte string with a 32 bit length value;
116 // so the maximal structure that we can wrap is 2^32 bits long.
117 // See also: https://tools.ietf.org/html/rfc7049#section-2.4.4.1
118 class EnvelopeEncoder {
119  public:
120   // Emits the envelope start bytes and records the position for the
121   // byte size in |byte_size_pos_|. Also emits empty bytes for the
122   // byte sisze so that encoding can continue.
123   void EncodeStart(std::vector<uint8_t>* out);
124   // This records the current size in |out| at position byte_size_pos_.
125   // Returns true iff successful.
126   bool EncodeStop(std::vector<uint8_t>* out);
127 
128  private:
129   size_t byte_size_pos_ = 0;
130 };
131 
132 // =============================================================================
133 // cbor::NewCBOREncoder - for encoding from a streaming parser
134 // =============================================================================
135 
136 // This can be used to convert to CBOR, by passing the return value to a parser
137 // that drives it. The handler will encode into |out|, and iff an error occurs
138 // it will set |status| to an error and clear |out|. Otherwise, |status.ok()|
139 // will be |true|.
140 std::unique_ptr<ParserHandler> NewCBOREncoder(std::vector<uint8_t>* out,
141                                               Status* status);
142 
143 // =============================================================================
144 // cbor::CBORTokenizer - for parsing individual CBOR items
145 // =============================================================================
146 
147 // Tags for the tokens within a CBOR message that CBORTokenizer understands.
148 // Note that this is not the same terminology as the CBOR spec (RFC 7049),
149 // but rather, our adaptation. For instance, we lump unsigned and signed
150 // major type into INT32 here (and disallow values outside the int32_t range).
151 enum class CBORTokenTag {
152   // Encountered an error in the structure of the message. Consult
153   // status() for details.
154   ERROR_VALUE,
155   // Booleans and NULL.
156   TRUE_VALUE,
157   FALSE_VALUE,
158   NULL_VALUE,
159   // An int32_t (signed 32 bit integer).
160   INT32,
161   // A double (64 bit floating point).
162   DOUBLE,
163   // A UTF8 string.
164   STRING8,
165   // A UTF16 string.
166   STRING16,
167   // A binary string.
168   BINARY,
169   // Starts an indefinite length map; after the map start we expect
170   // alternating keys and values, followed by STOP.
171   MAP_START,
172   // Starts an indefinite length array; after the array start we
173   // expect values, followed by STOP.
174   ARRAY_START,
175   // Ends a map or an array.
176   STOP,
177   // An envelope indicator, wrapping a map or array.
178   // Internally this carries the byte length of the wrapped
179   // map or array. While CBORTokenizer::Next() will read / skip the entire
180   // envelope, CBORTokenizer::EnterEnvelope() reads the tokens
181   // inside of it.
182   ENVELOPE,
183   // We've reached the end there is nothing else to read.
184   DONE,
185 };
186 
187 // The major types from RFC 7049 Section 2.1.
188 enum class MajorType {
189   UNSIGNED = 0,
190   NEGATIVE = 1,
191   BYTE_STRING = 2,
192   STRING = 3,
193   ARRAY = 4,
194   MAP = 5,
195   TAG = 6,
196   SIMPLE_VALUE = 7
197 };
198 
199 // CBORTokenizer segments a CBOR message, presenting the tokens therein as
200 // numbers, strings, etc. This is not a complete CBOR parser, but makes it much
201 // easier to implement one (e.g. ParseCBOR, above). It can also be used to parse
202 // messages partially.
203 class CBORTokenizer {
204  public:
205   explicit CBORTokenizer(span<uint8_t> bytes);
206   ~CBORTokenizer();
207 
208   // Identifies the current token that we're looking at,
209   // or ERROR_VALUE (in which ase ::Status() has details)
210   // or DONE (if we're past the last token).
211   CBORTokenTag TokenTag() const;
212 
213   // Advances to the next token.
214   void Next();
215   // Can only be called if TokenTag() == CBORTokenTag::ENVELOPE.
216   // While Next() would skip past the entire envelope / what it's
217   // wrapping, EnterEnvelope positions the cursor inside of the envelope,
218   // letting the client explore the nested structure.
219   void EnterEnvelope();
220 
221   // If TokenTag() is CBORTokenTag::ERROR_VALUE, then Status().error describes
222   // the error more precisely; otherwise it'll be set to Error::OK.
223   // In either case, Status().pos is the current position.
224   struct Status Status() const;
225 
226   // The following methods retrieve the token values. They can only
227   // be called if TokenTag() matches.
228 
229   // To be called only if ::TokenTag() == CBORTokenTag::INT32.
230   int32_t GetInt32() const;
231 
232   // To be called only if ::TokenTag() == CBORTokenTag::DOUBLE.
233   double GetDouble() const;
234 
235   // To be called only if ::TokenTag() == CBORTokenTag::STRING8.
236   span<uint8_t> GetString8() const;
237 
238   // Wire representation for STRING16 is low byte first (little endian).
239   // To be called only if ::TokenTag() == CBORTokenTag::STRING16.
240   span<uint8_t> GetString16WireRep() const;
241 
242   // To be called only if ::TokenTag() == CBORTokenTag::BINARY.
243   span<uint8_t> GetBinary() const;
244 
245   // To be called only if ::TokenTag() == CBORTokenTag::ENVELOPE.
246   // Returns the envelope including its payload; message which
247   // can be passed to the CBORTokenizer constructor, which will
248   // then see the envelope token first (looking at it a second time,
249   // basically).
250   span<uint8_t> GetEnvelope() const;
251 
252   // To be called only if ::TokenTag() == CBORTokenTag::ENVELOPE.
253   // Returns only the payload inside the envelope, e.g., a map
254   // or an array. This is not a complete message by our
255   // IsCBORMessage definition, since it doesn't include the
256   // enclosing envelope (the header, basically).
257   span<uint8_t> GetEnvelopeContents() const;
258 
259  private:
260   void ReadNextToken(bool enter_envelope);
261   void SetToken(CBORTokenTag token, size_t token_byte_length);
262   void SetError(Error error);
263 
264   span<uint8_t> bytes_;
265   CBORTokenTag token_tag_;
266   struct Status status_;
267   size_t token_byte_length_;
268   MajorType token_start_type_;
269   uint64_t token_start_internal_value_;
270 };
271 
272 // =============================================================================
273 // cbor::ParseCBOR - for receiving streaming parser events for CBOR messages
274 // =============================================================================
275 
276 // Parses a CBOR encoded message from |bytes|, sending events to
277 // |out|. If an error occurs, sends |out->HandleError|, and parsing stops.
278 // The client is responsible for discarding the already received information in
279 // that case.
280 void ParseCBOR(span<uint8_t> bytes, ParserHandler* out);
281 
282 // =============================================================================
283 // cbor::AppendString8EntryToMap - for limited in-place editing of messages
284 // =============================================================================
285 
286 // Modifies the |cbor| message by appending a new key/value entry at the end
287 // of the map. Patches up the envelope size; Status.ok() iff successful.
288 // If not successful, |cbor| may be corrupted after this call.
289 Status AppendString8EntryToCBORMap(span<uint8_t> string8_key,
290                                    span<uint8_t> string8_value,
291                                    std::vector<uint8_t>* cbor);
292 
293 namespace internals {  // Exposed only for writing tests.
294 size_t ReadTokenStart(span<uint8_t> bytes,
295                       cbor::MajorType* type,
296                       uint64_t* value);
297 
298 void WriteTokenStart(cbor::MajorType type,
299                      uint64_t value,
300                      std::vector<uint8_t>* encoded);
301 }  // namespace internals
302 }  // namespace cbor
303 }  // namespace v8_crdtp
304 
305 #endif  // V8_CRDTP_CBOR_H_
306