• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2019 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef V8_INSPECTOR_PROTOCOL_ENCODING_ENCODING_H_
6 #define V8_INSPECTOR_PROTOCOL_ENCODING_ENCODING_H_
7 
8 #include <cstddef>
9 #include <cstdint>
10 #include <cstring>
11 #include <limits>
12 #include <memory>
13 #include <string>
14 #include <vector>
15 
16 namespace v8_inspector_protocol_encoding {
17 
18 // =============================================================================
19 // span - sequence of bytes
20 // =============================================================================
21 
22 // This template is similar to std::span, which will be included in C++20.
23 template <typename T>
24 class span {
25  public:
26   using index_type = size_t;
27 
span()28   span() : data_(nullptr), size_(0) {}
span(const T * data,index_type size)29   span(const T* data, index_type size) : data_(data), size_(size) {}
30 
data()31   const T* data() const { return data_; }
32 
begin()33   const T* begin() const { return data_; }
end()34   const T* end() const { return data_ + size_; }
35 
36   const T& operator[](index_type idx) const { return data_[idx]; }
37 
subspan(index_type offset,index_type count)38   span<T> subspan(index_type offset, index_type count) const {
39     return span(data_ + offset, count);
40   }
41 
subspan(index_type offset)42   span<T> subspan(index_type offset) const {
43     return span(data_ + offset, size_ - offset);
44   }
45 
empty()46   bool empty() const { return size_ == 0; }
47 
size()48   index_type size() const { return size_; }
size_bytes()49   index_type size_bytes() const { return size_ * sizeof(T); }
50 
51  private:
52   const T* data_;
53   index_type size_;
54 };
55 
56 template <typename T>
SpanFrom(const std::vector<T> & v)57 span<T> SpanFrom(const std::vector<T>& v) {
58   return span<T>(v.data(), v.size());
59 }
60 
61 template <size_t N>
SpanFrom(const char (& str)[N])62 span<uint8_t> SpanFrom(const char (&str)[N]) {
63   return span<uint8_t>(reinterpret_cast<const uint8_t*>(str), N - 1);
64 }
65 
SpanFrom(const char * str)66 inline span<uint8_t> SpanFrom(const char* str) {
67   return str ? span<uint8_t>(reinterpret_cast<const uint8_t*>(str), strlen(str))
68              : span<uint8_t>();
69 }
70 
SpanFrom(const std::string & v)71 inline span<uint8_t> SpanFrom(const std::string& v) {
72   return span<uint8_t>(reinterpret_cast<const uint8_t*>(v.data()), v.size());
73 }
74 
75 // =============================================================================
76 // Status and Error codes
77 // =============================================================================
78 enum class Error {
79   OK = 0,
80   // JSON parsing errors - json_parser.{h,cc}.
81   JSON_PARSER_UNPROCESSED_INPUT_REMAINS = 0x01,
82   JSON_PARSER_STACK_LIMIT_EXCEEDED = 0x02,
83   JSON_PARSER_NO_INPUT = 0x03,
84   JSON_PARSER_INVALID_TOKEN = 0x04,
85   JSON_PARSER_INVALID_NUMBER = 0x05,
86   JSON_PARSER_INVALID_STRING = 0x06,
87   JSON_PARSER_UNEXPECTED_ARRAY_END = 0x07,
88   JSON_PARSER_COMMA_OR_ARRAY_END_EXPECTED = 0x08,
89   JSON_PARSER_STRING_LITERAL_EXPECTED = 0x09,
90   JSON_PARSER_COLON_EXPECTED = 0x0a,
91   JSON_PARSER_UNEXPECTED_MAP_END = 0x0b,
92   JSON_PARSER_COMMA_OR_MAP_END_EXPECTED = 0x0c,
93   JSON_PARSER_VALUE_EXPECTED = 0x0d,
94 
95   CBOR_INVALID_INT32 = 0x0e,
96   CBOR_INVALID_DOUBLE = 0x0f,
97   CBOR_INVALID_ENVELOPE = 0x10,
98   CBOR_INVALID_STRING8 = 0x11,
99   CBOR_INVALID_STRING16 = 0x12,
100   CBOR_INVALID_BINARY = 0x13,
101   CBOR_UNSUPPORTED_VALUE = 0x14,
102   CBOR_NO_INPUT = 0x15,
103   CBOR_INVALID_START_BYTE = 0x16,
104   CBOR_UNEXPECTED_EOF_EXPECTED_VALUE = 0x17,
105   CBOR_UNEXPECTED_EOF_IN_ARRAY = 0x18,
106   CBOR_UNEXPECTED_EOF_IN_MAP = 0x19,
107   CBOR_INVALID_MAP_KEY = 0x1a,
108   CBOR_STACK_LIMIT_EXCEEDED = 0x1b,
109   CBOR_TRAILING_JUNK = 0x1c,
110   CBOR_MAP_START_EXPECTED = 0x1d,
111   CBOR_MAP_STOP_EXPECTED = 0x1e,
112   CBOR_ENVELOPE_SIZE_LIMIT_EXCEEDED = 0x1f,
113 };
114 
115 // A status value with position that can be copied. The default status
116 // is OK. Usually, error status values should come with a valid position.
117 struct Status {
nposStatus118   static constexpr size_t npos() { return std::numeric_limits<size_t>::max(); }
119 
okStatus120   bool ok() const { return error == Error::OK; }
121 
122   Error error = Error::OK;
123   size_t pos = npos();
StatusStatus124   Status(Error error, size_t pos) : error(error), pos(pos) {}
125   Status() = default;
126 
127   // Returns a 7 bit US-ASCII string, either "OK" or an error message
128   // that includes the position.
129   std::string ToASCIIString() const;
130 
131  private:
132   std::string ToASCIIString(const char* msg) const;
133 };
134 
135 // Handler interface for parser events emitted by a streaming parser.
136 // See cbor::NewCBOREncoder, cbor::ParseCBOR, json::NewJSONEncoder,
137 // json::ParseJSON.
138 class StreamingParserHandler {
139  public:
140   virtual ~StreamingParserHandler() = default;
141   virtual void HandleMapBegin() = 0;
142   virtual void HandleMapEnd() = 0;
143   virtual void HandleArrayBegin() = 0;
144   virtual void HandleArrayEnd() = 0;
145   virtual void HandleString8(span<uint8_t> chars) = 0;
146   virtual void HandleString16(span<uint16_t> chars) = 0;
147   virtual void HandleBinary(span<uint8_t> bytes) = 0;
148   virtual void HandleDouble(double value) = 0;
149   virtual void HandleInt32(int32_t value) = 0;
150   virtual void HandleBool(bool value) = 0;
151   virtual void HandleNull() = 0;
152 
153   // The parser may send one error even after other events have already
154   // been received. Client code is reponsible to then discard the
155   // already processed events.
156   // |error| must be an eror, as in, |error.is_ok()| can't be true.
157   virtual void HandleError(Status error) = 0;
158 };
159 
160 namespace cbor {
161 // The binary encoding for the inspector protocol follows the CBOR specification
162 // (RFC 7049). Additional constraints:
163 // - Only indefinite length maps and arrays are supported.
164 // - Maps and arrays are wrapped with an envelope, that is, a
165 //   CBOR tag with value 24 followed by a byte string specifying
166 //   the byte length of the enclosed map / array. The byte string
167 //   must use a 32 bit wide length.
168 // - At the top level, a message must be an indefinite length map
169 //   wrapped by an envelope.
170 // - Maximal size for messages is 2^32 (4 GB).
171 // - For scalars, we support only the int32_t range, encoded as
172 //   UNSIGNED/NEGATIVE (major types 0 / 1).
173 // - UTF16 strings, including with unbalanced surrogate pairs, are encoded
174 //   as CBOR BYTE_STRING (major type 2). For such strings, the number of
175 //   bytes encoded must be even.
176 // - UTF8 strings (major type 3) are supported.
177 // - 7 bit US-ASCII strings must always be encoded as UTF8 strings, never
178 //   as UTF16 strings.
179 // - Arbitrary byte arrays, in the inspector protocol called 'binary',
180 //   are encoded as BYTE_STRING (major type 2), prefixed with a byte
181 //   indicating base64 when rendered as JSON.
182 
183 // =============================================================================
184 // Detecting CBOR content
185 // =============================================================================
186 
187 // The first byte for an envelope, which we use for wrapping dictionaries
188 // and arrays; and the byte that indicates a byte string with 32 bit length.
189 // These two bytes start an envelope, and thereby also any CBOR message
190 // produced or consumed by this protocol. See also |EnvelopeEncoder| below.
191 uint8_t InitialByteForEnvelope();
192 uint8_t InitialByteFor32BitLengthByteString();
193 
194 // Checks whether |msg| is a cbor message.
195 bool IsCBORMessage(span<uint8_t> msg);
196 
197 // =============================================================================
198 // Encoding individual CBOR items
199 // =============================================================================
200 
201 // Some constants for CBOR tokens that only take a single byte on the wire.
202 uint8_t EncodeTrue();
203 uint8_t EncodeFalse();
204 uint8_t EncodeNull();
205 uint8_t EncodeIndefiniteLengthArrayStart();
206 uint8_t EncodeIndefiniteLengthMapStart();
207 uint8_t EncodeStop();
208 
209 // Encodes |value| as |UNSIGNED| (major type 0) iff >= 0, or |NEGATIVE|
210 // (major type 1) iff < 0.
211 void EncodeInt32(int32_t value, std::vector<uint8_t>* out);
212 void EncodeInt32(int32_t value, std::string* out);
213 
214 // Encodes a UTF16 string as a BYTE_STRING (major type 2). Each utf16
215 // character in |in| is emitted with most significant byte first,
216 // appending to |out|.
217 void EncodeString16(span<uint16_t> in, std::vector<uint8_t>* out);
218 void EncodeString16(span<uint16_t> in, std::string* out);
219 
220 // Encodes a UTF8 string |in| as STRING (major type 3).
221 void EncodeString8(span<uint8_t> in, std::vector<uint8_t>* out);
222 void EncodeString8(span<uint8_t> in, std::string* out);
223 
224 // Encodes the given |latin1| string as STRING8.
225 // If any non-ASCII character is present, it will be represented
226 // as a 2 byte UTF8 sequence.
227 void EncodeFromLatin1(span<uint8_t> latin1, std::vector<uint8_t>* out);
228 void EncodeFromLatin1(span<uint8_t> latin1, std::string* out);
229 
230 // Encodes the given |utf16| string as STRING8 if it's entirely US-ASCII.
231 // Otherwise, encodes as STRING16.
232 void EncodeFromUTF16(span<uint16_t> utf16, std::vector<uint8_t>* out);
233 void EncodeFromUTF16(span<uint16_t> utf16, std::string* out);
234 
235 // Encodes arbitrary binary data in |in| as a BYTE_STRING (major type 2) with
236 // definitive length, prefixed with tag 22 indicating expected conversion to
237 // base64 (see RFC 7049, Table 3 and Section 2.4.4.2).
238 void EncodeBinary(span<uint8_t> in, std::vector<uint8_t>* out);
239 void EncodeBinary(span<uint8_t> in, std::string* out);
240 
241 // Encodes / decodes a double as Major type 7 (SIMPLE_VALUE),
242 // with additional info = 27, followed by 8 bytes in big endian.
243 void EncodeDouble(double value, std::vector<uint8_t>* out);
244 void EncodeDouble(double value, std::string* out);
245 
246 // =============================================================================
247 // cbor::EnvelopeEncoder - for wrapping submessages
248 // =============================================================================
249 
250 // An envelope indicates the byte length of a wrapped item.
251 // We use this for maps and array, which allows the decoder
252 // to skip such (nested) values whole sale.
253 // It's implemented as a CBOR tag (major type 6) with additional
254 // info = 24, followed by a byte string with a 32 bit length value;
255 // so the maximal structure that we can wrap is 2^32 bits long.
256 // See also: https://tools.ietf.org/html/rfc7049#section-2.4.4.1
257 class EnvelopeEncoder {
258  public:
259   // Emits the envelope start bytes and records the position for the
260   // byte size in |byte_size_pos_|. Also emits empty bytes for the
261   // byte sisze so that encoding can continue.
262   void EncodeStart(std::vector<uint8_t>* out);
263   void EncodeStart(std::string* out);
264   // This records the current size in |out| at position byte_size_pos_.
265   // Returns true iff successful.
266   bool EncodeStop(std::vector<uint8_t>* out);
267   bool EncodeStop(std::string* out);
268 
269  private:
270   size_t byte_size_pos_ = 0;
271 };
272 
273 // =============================================================================
274 // cbor::NewCBOREncoder - for encoding from a streaming parser
275 // =============================================================================
276 
277 // This can be used to convert to CBOR, by passing the return value to a parser
278 // that drives it. The handler will encode into |out|, and iff an error occurs
279 // it will set |status| to an error and clear |out|. Otherwise, |status.ok()|
280 // will be |true|.
281 std::unique_ptr<StreamingParserHandler> NewCBOREncoder(
282     std::vector<uint8_t>* out,
283     Status* status);
284 std::unique_ptr<StreamingParserHandler> NewCBOREncoder(std::string* out,
285                                                        Status* status);
286 
287 // =============================================================================
288 // cbor::CBORTokenizer - for parsing individual CBOR items
289 // =============================================================================
290 
291 // Tags for the tokens within a CBOR message that CBORTokenizer understands.
292 // Note that this is not the same terminology as the CBOR spec (RFC 7049),
293 // but rather, our adaptation. For instance, we lump unsigned and signed
294 // major type into INT32 here (and disallow values outside the int32_t range).
295 enum class CBORTokenTag {
296   // Encountered an error in the structure of the message. Consult
297   // status() for details.
298   ERROR_VALUE,
299   // Booleans and NULL.
300   TRUE_VALUE,
301   FALSE_VALUE,
302   NULL_VALUE,
303   // An int32_t (signed 32 bit integer).
304   INT32,
305   // A double (64 bit floating point).
306   DOUBLE,
307   // A UTF8 string.
308   STRING8,
309   // A UTF16 string.
310   STRING16,
311   // A binary string.
312   BINARY,
313   // Starts an indefinite length map; after the map start we expect
314   // alternating keys and values, followed by STOP.
315   MAP_START,
316   // Starts an indefinite length array; after the array start we
317   // expect values, followed by STOP.
318   ARRAY_START,
319   // Ends a map or an array.
320   STOP,
321   // An envelope indicator, wrapping a map or array.
322   // Internally this carries the byte length of the wrapped
323   // map or array. While CBORTokenizer::Next() will read / skip the entire
324   // envelope, CBORTokenizer::EnterEnvelope() reads the tokens
325   // inside of it.
326   ENVELOPE,
327   // We've reached the end there is nothing else to read.
328   DONE,
329 };
330 
331 // The major types from RFC 7049 Section 2.1.
332 enum class MajorType {
333   UNSIGNED = 0,
334   NEGATIVE = 1,
335   BYTE_STRING = 2,
336   STRING = 3,
337   ARRAY = 4,
338   MAP = 5,
339   TAG = 6,
340   SIMPLE_VALUE = 7
341 };
342 
343 // CBORTokenizer segments a CBOR message, presenting the tokens therein as
344 // numbers, strings, etc. This is not a complete CBOR parser, but makes it much
345 // easier to implement one (e.g. ParseCBOR, above). It can also be used to parse
346 // messages partially.
347 class CBORTokenizer {
348  public:
349   explicit CBORTokenizer(span<uint8_t> bytes);
350   ~CBORTokenizer();
351 
352   // Identifies the current token that we're looking at,
353   // or ERROR_VALUE (in which ase ::Status() has details)
354   // or DONE (if we're past the last token).
355   CBORTokenTag TokenTag() const;
356 
357   // Advances to the next token.
358   void Next();
359   // Can only be called if TokenTag() == CBORTokenTag::ENVELOPE.
360   // While Next() would skip past the entire envelope / what it's
361   // wrapping, EnterEnvelope positions the cursor inside of the envelope,
362   // letting the client explore the nested structure.
363   void EnterEnvelope();
364 
365   // If TokenTag() is CBORTokenTag::ERROR_VALUE, then Status().error describes
366   // the error more precisely; otherwise it'll be set to Error::OK.
367   // In either case, Status().pos is the current position.
368   struct Status Status() const;
369 
370   // The following methods retrieve the token values. They can only
371   // be called if TokenTag() matches.
372 
373   // To be called only if ::TokenTag() == CBORTokenTag::INT32.
374   int32_t GetInt32() const;
375 
376   // To be called only if ::TokenTag() == CBORTokenTag::DOUBLE.
377   double GetDouble() const;
378 
379   // To be called only if ::TokenTag() == CBORTokenTag::STRING8.
380   span<uint8_t> GetString8() const;
381 
382   // Wire representation for STRING16 is low byte first (little endian).
383   // To be called only if ::TokenTag() == CBORTokenTag::STRING16.
384   span<uint8_t> GetString16WireRep() const;
385 
386   // To be called only if ::TokenTag() == CBORTokenTag::BINARY.
387   span<uint8_t> GetBinary() const;
388 
389   // To be called only if ::TokenTag() == CBORTokenTag::ENVELOPE.
390   span<uint8_t> GetEnvelopeContents() const;
391 
392  private:
393   void ReadNextToken(bool enter_envelope);
394   void SetToken(CBORTokenTag token, size_t token_byte_length);
395   void SetError(Error error);
396 
397   span<uint8_t> bytes_;
398   CBORTokenTag token_tag_;
399   struct Status status_;
400   size_t token_byte_length_;
401   MajorType token_start_type_;
402   uint64_t token_start_internal_value_;
403 };
404 
405 // =============================================================================
406 // cbor::ParseCBOR - for receiving streaming parser events for CBOR messages
407 // =============================================================================
408 
409 // Parses a CBOR encoded message from |bytes|, sending events to
410 // |out|. If an error occurs, sends |out->HandleError|, and parsing stops.
411 // The client is responsible for discarding the already received information in
412 // that case.
413 void ParseCBOR(span<uint8_t> bytes, StreamingParserHandler* out);
414 
415 // =============================================================================
416 // cbor::AppendString8EntryToMap - for limited in-place editing of messages
417 // =============================================================================
418 
419 // Modifies the |cbor| message by appending a new key/value entry at the end
420 // of the map. Patches up the envelope size; Status.ok() iff successful.
421 // If not successful, |cbor| may be corrupted after this call.
422 Status AppendString8EntryToCBORMap(span<uint8_t> string8_key,
423                                    span<uint8_t> string8_value,
424                                    std::vector<uint8_t>* cbor);
425 Status AppendString8EntryToCBORMap(span<uint8_t> string8_key,
426                                    span<uint8_t> string8_value,
427                                    std::string* cbor);
428 
429 namespace internals {  // Exposed only for writing tests.
430 size_t ReadTokenStart(span<uint8_t> bytes,
431                       cbor::MajorType* type,
432                       uint64_t* value);
433 
434 void WriteTokenStart(cbor::MajorType type,
435                      uint64_t value,
436                      std::vector<uint8_t>* encoded);
437 void WriteTokenStart(cbor::MajorType type,
438                      uint64_t value,
439                      std::string* encoded);
440 }  // namespace internals
441 }  // namespace cbor
442 
443 namespace json {
444 // Client code must provide an instance. Implementation should delegate
445 // to whatever is appropriate.
446 class Platform {
447  public:
448   virtual ~Platform() = default;
449   // Parses |str| into |result|. Returns false iff there are
450   // leftover characters or parsing errors.
451   virtual bool StrToD(const char* str, double* result) const = 0;
452 
453   // Prints |value| in a format suitable for JSON.
454   virtual std::unique_ptr<char[]> DToStr(double value) const = 0;
455 };
456 
457 // =============================================================================
458 // json::NewJSONEncoder - for encoding streaming parser events as JSON
459 // =============================================================================
460 
461 // Returns a handler object which will write ascii characters to |out|.
462 // |status->ok()| will be false iff the handler routine HandleError() is called.
463 // In that case, we'll stop emitting output.
464 // Except for calling the HandleError routine at any time, the client
465 // code must call the Handle* methods in an order in which they'd occur
466 // in valid JSON; otherwise we may crash (the code uses assert).
467 std::unique_ptr<StreamingParserHandler> NewJSONEncoder(
468     const Platform* platform,
469     std::vector<uint8_t>* out,
470     Status* status);
471 std::unique_ptr<StreamingParserHandler> NewJSONEncoder(const Platform* platform,
472                                                        std::string* out,
473                                                        Status* status);
474 
475 // =============================================================================
476 // json::ParseJSON - for receiving streaming parser events for JSON
477 // =============================================================================
478 
479 void ParseJSON(const Platform& platform,
480                span<uint8_t> chars,
481                StreamingParserHandler* handler);
482 void ParseJSON(const Platform& platform,
483                span<uint16_t> chars,
484                StreamingParserHandler* handler);
485 
486 // =============================================================================
487 // json::ConvertCBORToJSON, json::ConvertJSONToCBOR - for transcoding
488 // =============================================================================
489 Status ConvertCBORToJSON(const Platform& platform,
490                          span<uint8_t> cbor,
491                          std::string* json);
492 Status ConvertCBORToJSON(const Platform& platform,
493                          span<uint8_t> cbor,
494                          std::vector<uint8_t>* json);
495 Status ConvertJSONToCBOR(const Platform& platform,
496                          span<uint8_t> json,
497                          std::vector<uint8_t>* cbor);
498 Status ConvertJSONToCBOR(const Platform& platform,
499                          span<uint16_t> json,
500                          std::vector<uint8_t>* cbor);
501 Status ConvertJSONToCBOR(const Platform& platform,
502                          span<uint8_t> json,
503                          std::string* cbor);
504 Status ConvertJSONToCBOR(const Platform& platform,
505                          span<uint16_t> json,
506                          std::string* cbor);
507 }  // namespace json
508 }  // namespace v8_inspector_protocol_encoding
509 
510 #endif  // V8_INSPECTOR_PROTOCOL_ENCODING_ENCODING_H_
511