1 /* 2 * libwebsockets - small server side websockets and web server implementation 3 * 4 * Copyright (C) 2010 - 2021 Andy Green <andy@warmcat.com> 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to 8 * deal in the Software without restriction, including without limitation the 9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 10 * sell copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 22 * IN THE SOFTWARE. 23 */ 24 25 /** \defgroup lecp CBOR parser 26 * ##CBOR parsing related functions 27 * \ingroup lwsapi 28 * 29 * LECP is an extremely lightweight CBOR stream parser included in lws. It 30 * is aligned in approach with the LEJP JSON stream parser, with some additional 31 * things needed for CBOR. 32 */ 33 //@{ 34 35 #ifndef LECP_MAX_PARSING_STACK_DEPTH 36 #define LECP_MAX_PARSING_STACK_DEPTH 5 37 #endif 38 #ifndef LECP_MAX_DEPTH 39 #define LECP_MAX_DEPTH 12 40 #endif 41 #ifndef LECP_MAX_INDEX_DEPTH 42 #define LECP_MAX_INDEX_DEPTH 8 43 #endif 44 #ifndef LECP_MAX_PATH 45 #define LECP_MAX_PATH 128 46 #endif 47 #ifndef LECP_STRING_CHUNK 48 /* must be >= 30 to assemble floats */ 49 #define LECP_STRING_CHUNK 254 50 #endif 51 52 #define LECP_FLAG_CB_IS_VALUE 64 53 54 /* 55 * CBOR initial byte 3 x MSB bits are these 56 */ 57 58 enum { 59 LWS_CBOR_MAJTYP_UINT = 0 << 5, 60 LWS_CBOR_MAJTYP_INT_NEG = 1 << 5, 61 LWS_CBOR_MAJTYP_BSTR = 2 << 5, 62 LWS_CBOR_MAJTYP_TSTR = 3 << 5, 63 LWS_CBOR_MAJTYP_ARRAY = 4 << 5, 64 LWS_CBOR_MAJTYP_MAP = 5 << 5, 65 LWS_CBOR_MAJTYP_TAG = 6 << 5, 66 LWS_CBOR_MAJTYP_FLOAT = 7 << 5, /* also BREAK */ 67 68 LWS_CBOR_MAJTYP_MASK = 7 << 5, 69 70 /* 71 * For the low 5 bits of the opcode, 0-23 are literals, unless it's 72 * FLOAT. 73 * 74 * 24 = 1 byte; 25 = 2..., 26 = 4... and 27 = 8 bytes following literal. 75 */ 76 LWS_CBOR_1 = 24, 77 LWS_CBOR_2 = 25, 78 LWS_CBOR_4 = 26, 79 LWS_CBOR_8 = 27, 80 81 LWS_CBOR_RESERVED = 28, 82 83 LWS_CBOR_SUBMASK = 0x1f, 84 85 /* 86 * Major type 7 discriminators in low 5 bits 87 * 0 - 23 is SIMPLE implicit value (like, eg, LWS_CBOR_SWK_TRUE) 88 */ 89 LWS_CBOR_SWK_FALSE = 20, 90 LWS_CBOR_SWK_TRUE = 21, 91 LWS_CBOR_SWK_NULL = 22, 92 LWS_CBOR_SWK_UNDEFINED = 23, 93 94 LWS_CBOR_M7_SUBTYP_SIMPLE_X8 = 24, /* simple with additional byte */ 95 LWS_CBOR_M7_SUBTYP_FLOAT16 = 25, 96 LWS_CBOR_M7_SUBTYP_FLOAT32 = 26, 97 LWS_CBOR_M7_SUBTYP_FLOAT64 = 27, 98 LWS_CBOR_M7_BREAK = 31, 99 100 /* 28, 29, 30 are illegal. 101 * 102 * 31 is illegal for UINT, INT_NEG, and TAG; 103 * for BSTR, TSTR, ARRAY and MAP it means "indefinite length", ie, 104 * it's made up of an endless amount of determinite-length 105 * fragments terminated with a BREAK (FLOAT | 31) instead of the 106 * next determinite-length fragment. The second framing level 107 * means no need for escapes for BREAK in the data. 108 */ 109 110 LWS_CBOR_INDETERMINITE = 31, 111 112 /* 113 * Well-known tags 114 */ 115 116 LWS_CBOR_WKTAG_DATETIME_STD = 0, /* text */ 117 LWS_CBOR_WKTAG_DATETIME_EPOCH = 1, /* int or float */ 118 LWS_CBOR_WKTAG_BIGNUM_UNSIGNED = 2, /* byte string */ 119 LWS_CBOR_WKTAG_BIGNUM_NEGATIVE = 3, /* byte string */ 120 LWS_CBOR_WKTAG_DECIMAL_FRAC = 4, /* array */ 121 LWS_CBOR_WKTAG_BIGFLOAT = 5, /* array */ 122 123 LWS_CBOR_WKTAG_COSE_ENC0 = 16, 124 LWS_CBOR_WKTAG_COSE_MAC0 = 17, 125 LWS_CBOR_WKTAG_COSE_SIGN1 = 18, 126 127 LWS_CBOR_WKTAG_TO_B64U = 21, /* any */ 128 LWS_CBOR_WKTAG_TO_B64 = 22, /* any */ 129 LWS_CBOR_WKTAG_TO_B16 = 23, /* any */ 130 LWS_CBOR_WKTAG_CBOR = 24, /* byte string */ 131 132 LWS_CBOR_WKTAG_URI = 32, /* text string */ 133 LWS_CBOR_WKTAG_B64U = 33, /* text string */ 134 LWS_CBOR_WKTAG_B64 = 34, /* text string */ 135 LWS_CBOR_WKTAG_MIME = 36, /* text string */ 136 137 LWS_CBOR_WKTAG_COSE_ENC = 96, 138 LWS_CBOR_WKTAG_COSE_MAC = 97, 139 LWS_CBOR_WKTAG_COSE_SIGN = 98, 140 141 LWS_CBOR_WKTAG_SELFDESCCBOR = 55799 142 }; 143 144 enum lecp_callbacks { 145 LECPCB_CONSTRUCTED = 0, 146 LECPCB_DESTRUCTED = 1, 147 148 LECPCB_COMPLETE = 3, 149 LECPCB_FAILED = 4, 150 151 LECPCB_PAIR_NAME = 5, 152 153 LECPCB_VAL_TRUE = LECP_FLAG_CB_IS_VALUE | 6, 154 LECPCB_VAL_FALSE = LECP_FLAG_CB_IS_VALUE | 7, 155 LECPCB_VAL_NULL = LECP_FLAG_CB_IS_VALUE | 8, 156 LECPCB_VAL_NUM_INT = LECP_FLAG_CB_IS_VALUE | 9, 157 LECPCB_VAL_RESERVED = LECP_FLAG_CB_IS_VALUE | 10, 158 LECPCB_VAL_STR_START = 11, /* notice handle separately */ 159 LECPCB_VAL_STR_CHUNK = LECP_FLAG_CB_IS_VALUE | 12, 160 LECPCB_VAL_STR_END = LECP_FLAG_CB_IS_VALUE | 13, 161 162 LECPCB_ARRAY_START = 14, 163 LECPCB_ARRAY_END = 15, 164 165 LECPCB_OBJECT_START = 16, 166 LECPCB_OBJECT_END = 17, 167 168 LECPCB_TAG_START = 18, 169 LECPCB_TAG_END = 19, 170 171 LECPCB_VAL_NUM_UINT = LECP_FLAG_CB_IS_VALUE | 20, 172 LECPCB_VAL_UNDEFINED = LECP_FLAG_CB_IS_VALUE | 21, 173 LECPCB_VAL_FLOAT16 = LECP_FLAG_CB_IS_VALUE | 22, 174 LECPCB_VAL_FLOAT32 = LECP_FLAG_CB_IS_VALUE | 23, 175 LECPCB_VAL_FLOAT64 = LECP_FLAG_CB_IS_VALUE | 24, 176 177 LECPCB_VAL_SIMPLE = LECP_FLAG_CB_IS_VALUE | 25, 178 179 LECPCB_VAL_BLOB_START = 26, /* notice handle separately */ 180 LECPCB_VAL_BLOB_CHUNK = LECP_FLAG_CB_IS_VALUE | 27, 181 LECPCB_VAL_BLOB_END = LECP_FLAG_CB_IS_VALUE | 28, 182 183 LECPCB_ARRAY_ITEM_START = 29, 184 LECPCB_ARRAY_ITEM_END = 30, 185 186 LECPCB_LITERAL_CBOR = 31, 187 }; 188 189 enum lecp_reasons { 190 LECP_CONTINUE = -1, 191 LECP_REJECT_BAD_CODING = -2, 192 LECP_REJECT_UNKNOWN = -3, 193 LECP_REJECT_CALLBACK = -4, 194 LECP_STACK_OVERFLOW = -5, 195 }; 196 197 198 struct lecp_item { 199 union { 200 uint64_t u64; 201 int64_t i64; 202 203 uint64_t u32; 204 205 uint16_t hf; 206 #if defined(LWS_WITH_CBOR_FLOAT) 207 float f; 208 double d; 209 #else 210 uint32_t f; 211 uint64_t d; 212 #endif 213 } u; 214 uint8_t opcode; 215 }; 216 217 struct lecp_ctx; 218 typedef signed char (*lecp_callback)(struct lecp_ctx *ctx, char reason); 219 220 struct _lecp_stack { 221 char s; /* lejp_state stack*/ 222 uint8_t p; /* path length */ 223 char i; /* index array length */ 224 char indet; /* indeterminite */ 225 char intermediate; /* in middle of string */ 226 227 char pop_iss; 228 uint64_t tag; 229 uint64_t collect_rem; 230 uint32_t ordinal; 231 uint8_t opcode; 232 uint8_t send_new_array_item; 233 uint8_t barrier; 234 }; 235 236 struct _lecp_parsing_stack { 237 void *user; /* private to the stack level */ 238 lecp_callback cb; 239 const char * const *paths; 240 uint8_t count_paths; 241 uint8_t ppos; 242 uint8_t path_match; 243 }; 244 245 struct lecp_ctx { 246 247 /* sorted by type for most compact alignment 248 * 249 * pointers 250 */ 251 void *user; 252 uint8_t *collect_tgt; 253 254 /* arrays */ 255 256 struct _lecp_parsing_stack pst[LECP_MAX_PARSING_STACK_DEPTH]; 257 struct _lecp_stack st[LECP_MAX_DEPTH]; 258 uint16_t i[LECP_MAX_INDEX_DEPTH]; /* index array */ 259 uint16_t wild[LECP_MAX_INDEX_DEPTH]; /* index array */ 260 char path[LECP_MAX_PATH]; 261 uint8_t cbor[64]; /* literal cbor capture */ 262 263 struct lecp_item item; 264 265 266 /* size_t */ 267 268 size_t path_stride; /* 0 means default ptr size, else 269 * stride... allows paths to be 270 * provided composed inside a 271 * larger user struct instead of a 272 * duplicated array */ 273 size_t used_in; /* bytes of input consumed */ 274 275 /* short */ 276 277 uint16_t uni; 278 279 /* char */ 280 281 uint8_t npos; 282 uint8_t dcount; 283 uint8_t f; 284 uint8_t sp; /* stack head */ 285 uint8_t ipos; /* index stack depth */ 286 uint8_t count_paths; 287 uint8_t path_match; 288 uint8_t path_match_len; 289 uint8_t wildcount; 290 uint8_t pst_sp; /* parsing stack head */ 291 uint8_t outer_array; 292 uint8_t cbor_pos; 293 uint8_t literal_cbor_report; 294 char present; /* temp for cb reason to use */ 295 296 uint8_t be; /* big endian */ 297 298 /* at end so we can memset the rest of it */ 299 300 char buf[LECP_STRING_CHUNK + 1]; 301 }; 302 303 enum lws_lec_pctx_ret { 304 LWS_LECPCTX_RET_FINISHED = 0, 305 LWS_LECPCTX_RET_AGAIN, /* call again to continue writing buffer */ 306 LWS_LECPCTX_RET_FAIL /* something broken, eg, format string */ 307 }; 308 309 enum cbp_state { 310 CBPS_IDLE, 311 CBPS_PC1, 312 CBPS_PC2, 313 CBPS_PC3, 314 315 CBPS_STRING_BODY, 316 317 CBPS_NUM_LIT, 318 319 CBPS_STRING_LIT, 320 321 CBPS_CONTYPE, 322 }; 323 324 typedef struct lws_lec_pctx { 325 uint8_t stack[16]; 326 uint8_t vaa[16]; 327 uint8_t indet[16]; 328 uint8_t scratch[24]; 329 uint8_t *start; /* the beginning of the out buf */ 330 uint8_t *buf; /* cur pos in output buf */ 331 uint8_t *end; /* the end of the output buf */ 332 333 const uint8_t *ongoing_src; 334 uint64_t ongoing_len; 335 uint64_t ongoing_done; 336 337 struct lecp_item item; 338 339 size_t used; /* number of bytes valid from start */ 340 341 int opaque[4]; /* ignored by lws, caller may use */ 342 343 enum cbp_state state; 344 unsigned int fmt_pos; 345 uint8_t sp; 346 uint8_t scratch_len; 347 uint8_t escflag; 348 uint8_t _long; 349 uint8_t vaa_pos; 350 uint8_t dotstar; 351 } lws_lec_pctx_t; 352 353 LWS_VISIBLE LWS_EXTERN void 354 lws_lec_int(lws_lec_pctx_t *ctx, uint8_t opcode, uint8_t indet, uint64_t num); 355 356 LWS_VISIBLE LWS_EXTERN int 357 lws_lec_scratch(lws_lec_pctx_t *ctx); 358 359 /* 360 * lws_lec_init() - prepare a cbor writing context 361 * 362 * \param ctx: the cbor writing context to prepare 363 * \param buf: the output buffer start 364 * \param len: the amount of the output buffer we can use 365 * 366 * Prepares a cbor writing context so that les_lec_printf can be used to 367 * write into it. 368 */ 369 LWS_VISIBLE LWS_EXTERN void 370 lws_lec_init(lws_lec_pctx_t *ctx, uint8_t *buf, size_t len); 371 372 /* 373 * lws_lec_setbuf() - update the output buffer for an initialized cbor writing ctx 374 * 375 * \param ctx: the cbor writing context to prepare 376 * \param buf: the output buffer start 377 * \param len: the amount of the output buffer we can use 378 * 379 * Leaves the cbor writing context state as it is, but resets the output buffer 380 * it writes into as given in \p buf and \p len 381 */ 382 LWS_VISIBLE LWS_EXTERN void 383 lws_lec_setbuf(lws_lec_pctx_t *ctx, uint8_t *buf, size_t len); 384 385 /* 386 * lws_lec_vsprintf() - write into a cbor writing context 387 * 388 * \param ctx: the cbor writing context to prepare 389 * \param format: a printf style argument map 390 * \param args: the va args 391 * 392 * CBOR-aware vsprintf which pauses output when it fills the output buffer. You 393 * can call it again with the same args and same lws_lex_pctx to resume filling 394 * 395 * Returns either LWS_LECPCTX_RET_FINISHED if we have nothing left over that we 396 * want to put in the buffer, or LWS_LECPCTX_RET_AGAIN if the function should 397 * be called again with the same arguments (perhaps into a different output 398 * buffer) to continue emitting output from where it left off. 399 * 400 * If LWS_LECPCTX_RET_AGAIN is returned, lws_lec_setbuf() must be used on the 401 * context to reset or change the output buffer before calling again. 402 * 403 * The number of bytes placed in the output buffer is available in ctx->used. 404 * 405 * \p format is a printf-type format string that is specialized for CBOR 406 * generation. It understands the following specifiers 407 * 408 * |`123`||unsigned literal number| 409 * |`-123`||signed literal number| 410 * |`%u`|`unsigned int`|number| 411 * |`%lu`|`unsigned long int`|number| 412 * |`%llu`|`unsigned long long int`|number| 413 * |`%d`|`signed int`|number| 414 * |`%ld`|`signed long int`|number| 415 * |`%lld`|`signed long long int`|number| 416 * |`%f`|`double`|floating point number| 417 * |`123(...)`||literal tag and scope| 418 * |`%t(...)`|`unsigned int`|tag and scope| 419 * |`%lt(...)`|`unsigned long int`|tag and scope| 420 * |`%llt(...)`|`unsigned long long int`|tag and scope| 421 * |`[...]`||Array (fixed len if `]` in same format string)| 422 * |`{...}`||Map (fixed len if `}` in same format string)| 423 * |`<t...>`||Container for indeterminite text string frags| 424 * |`<b...>`||Container for indeterminite binary string frags| 425 * |`'string'`||Literal text of known length| 426 * |`%s`|`const char *`|NUL-terminated string| 427 * |`%.*s`|`int`, `const char *`|length-specified string| 428 * |`%.*b`|`int`, `const uint8_t *`|length-specified binary| 429 * |`:`||separator between Map items (a:b)| 430 * |`,`||separator between Map pairs or array items| 431 * 432 * See READMEs/README.cbor-lecp.md for more details. 433 */ 434 LWS_VISIBLE LWS_EXTERN enum lws_lec_pctx_ret 435 lws_lec_vsprintf(lws_lec_pctx_t *ctx, const char *format, va_list args); 436 437 /* 438 * lws_lec_printf() - write into a cbor writing context 439 * 440 * \param ctx: the cbor writing context to prepare 441 * \param format: a printf style argument map 442 * \param ...: format args 443 * 444 * See lws_lec_vsprintf() for format details. This is the most common way 445 * to format the CBOR output. 446 * 447 * See READMEs/README.cbor-lecp.md for more details. 448 */ 449 LWS_VISIBLE LWS_EXTERN enum lws_lec_pctx_ret 450 lws_lec_printf(lws_lec_pctx_t *ctx, const char *format, ...); 451 452 /** 453 * lecp_construct() - Construct an LECP parser context 454 * 455 * \param ctx: the parser context object to be initialized 456 * \param cb: the user callback to receive the parsing events 457 * \param user: an opaque user pointer available at \p cb 458 * \param paths: an optional array of parsing paths 459 * \param paths_count: how many paths in \p paths 460 * 461 * Prepares an LECP parser context for parsing. 462 */ 463 LWS_VISIBLE LWS_EXTERN void 464 lecp_construct(struct lecp_ctx *ctx, lecp_callback cb, void *user, 465 const char * const *paths, unsigned char paths_count); 466 467 /** 468 * lecp_destruct() - Destroys an LECP parser context 469 * 470 * \param ctx: the parser context object to be destroyed 471 */ 472 LWS_VISIBLE LWS_EXTERN void 473 lecp_destruct(struct lecp_ctx *ctx); 474 475 /** 476 * lecp_parse() - parses a chunk of input CBOR 477 * 478 * \p ctx: the parsing context 479 * \p cbor: the start of the chunk of CBOR 480 * \p len: the number of bytes of CBOR available at \p cbor 481 * 482 * Returns LECP_CONTINUE if more input needed, one of enum lecp_reasons for a 483 * fatal error, else 0 for successful parsing completion. 484 * 485 * On success or _CONTINUE, ctx->used_in is set to the number of input bytes 486 * consumed. 487 */ 488 LWS_VISIBLE LWS_EXTERN int 489 lecp_parse(struct lecp_ctx *ctx, const uint8_t *cbor, size_t len); 490 491 LWS_VISIBLE LWS_EXTERN void 492 lecp_change_callback(struct lecp_ctx *ctx, lecp_callback cb); 493 494 LWS_VISIBLE LWS_EXTERN const char * 495 lecp_error_to_string(int e); 496 497 /** 498 * lecp_parse_report_raw() - turn cbor raw reporting on and off 499 * 500 * \param ctx: the lecp context 501 * \param on: 0 to disable (defaults disabled), 1 to enable 502 * 503 * For cose_sign, it needs access to raw cbor subtrees for the hash input. 504 * This api causes LECPCB_LITERAL_CBOR parse callbacks when there are 505 * ctx->cbor_pos bytes of raw cbor available in ctx->cbor[]. the callbacks 506 * occur when the ctx->cbor[] buffer fills or if it holds anything when this 507 * spi is used to stop the reports. 508 * 509 * The same CBOR that is being captured continues to be passed for parsing. 510 */ 511 LWS_VISIBLE LWS_EXTERN void 512 lecp_parse_report_raw(struct lecp_ctx *ctx, int on); 513 514 /** 515 * lecp_parse_map_is_key() - return nonzero if we're in a map and this is a key 516 * 517 * \param ctx: the lwcp context 518 * 519 * Checks if the current value is a key in a map, ie, that you are on a "key" in 520 * a list of "{key: value}" pairs. Zero means you're either not in a map or not 521 * on the key part, and nonzero means you are in a map and on a key part. 522 */ 523 LWS_VISIBLE LWS_EXTERN int 524 lecp_parse_map_is_key(struct lecp_ctx *ctx); 525 526 LWS_VISIBLE LWS_EXTERN int 527 lecp_parse_subtree(struct lecp_ctx *ctx, const uint8_t *in, size_t len); 528 529 /* 530 * Helpers for half-float 531 */ 532 533 LWS_VISIBLE LWS_EXTERN void 534 lws_singles2halfp(uint16_t *hp, uint32_t x); 535 536 LWS_VISIBLE LWS_EXTERN void 537 lws_halfp2singles(uint32_t *xp, uint16_t h); 538 539 //@} 540