1/* 2** upb::json::Parser (upb_json_parser) 3** 4** A parser that uses the Ragel State Machine Compiler to generate 5** the finite automata. 6** 7** Ragel only natively handles regular languages, but we can manually 8** program it a bit to handle context-free languages like JSON, by using 9** the "fcall" and "fret" constructs. 10** 11** This parser can handle the basics, but needs several things to be fleshed 12** out: 13** 14** - handling of unicode escape sequences (including high surrogate pairs). 15** - properly check and report errors for unknown fields, stack overflow, 16** improper array nesting (or lack of nesting). 17** - handling of base64 sequences with padding characters. 18** - handling of push-back (non-success returns from sink functions). 19** - handling of keys/escape-sequences/etc that span input buffers. 20*/ 21 22#include <ctype.h> 23#include <errno.h> 24#include <float.h> 25#include <math.h> 26#include <stdint.h> 27#include <stdio.h> 28#include <stdlib.h> 29#include <string.h> 30 31#include <time.h> 32 33#include "upb/json/parser.h" 34#include "upb/pb/encoder.h" 35 36#include "upb/port_def.inc" 37 38#define UPB_JSON_MAX_DEPTH 64 39 40/* Type of value message */ 41enum { 42 VALUE_NULLVALUE = 0, 43 VALUE_NUMBERVALUE = 1, 44 VALUE_STRINGVALUE = 2, 45 VALUE_BOOLVALUE = 3, 46 VALUE_STRUCTVALUE = 4, 47 VALUE_LISTVALUE = 5 48}; 49 50/* Forward declare */ 51static bool is_top_level(upb_json_parser *p); 52static bool is_wellknown_msg(upb_json_parser *p, upb_wellknowntype_t type); 53static bool is_wellknown_field(upb_json_parser *p, upb_wellknowntype_t type); 54 55static bool is_number_wrapper_object(upb_json_parser *p); 56static bool does_number_wrapper_start(upb_json_parser *p); 57static bool does_number_wrapper_end(upb_json_parser *p); 58 59static bool is_string_wrapper_object(upb_json_parser *p); 60static bool does_string_wrapper_start(upb_json_parser *p); 61static bool does_string_wrapper_end(upb_json_parser *p); 62 63static bool does_fieldmask_start(upb_json_parser *p); 64static bool does_fieldmask_end(upb_json_parser *p); 65static void start_fieldmask_object(upb_json_parser *p); 66static void end_fieldmask_object(upb_json_parser *p); 67 68static void start_wrapper_object(upb_json_parser *p); 69static void end_wrapper_object(upb_json_parser *p); 70 71static void start_value_object(upb_json_parser *p, int value_type); 72static void end_value_object(upb_json_parser *p); 73 74static void start_listvalue_object(upb_json_parser *p); 75static void end_listvalue_object(upb_json_parser *p); 76 77static void start_structvalue_object(upb_json_parser *p); 78static void end_structvalue_object(upb_json_parser *p); 79 80static void start_object(upb_json_parser *p); 81static void end_object(upb_json_parser *p); 82 83static void start_any_object(upb_json_parser *p, const char *ptr); 84static bool end_any_object(upb_json_parser *p, const char *ptr); 85 86static bool start_subobject(upb_json_parser *p); 87static void end_subobject(upb_json_parser *p); 88 89static void start_member(upb_json_parser *p); 90static void end_member(upb_json_parser *p); 91static bool end_membername(upb_json_parser *p); 92 93static void start_any_member(upb_json_parser *p, const char *ptr); 94static void end_any_member(upb_json_parser *p, const char *ptr); 95static bool end_any_membername(upb_json_parser *p); 96 97size_t parse(void *closure, const void *hd, const char *buf, size_t size, 98 const upb_bufhandle *handle); 99static bool end(void *closure, const void *hd); 100 101static const char eof_ch = 'e'; 102 103/* stringsink */ 104typedef struct { 105 upb_byteshandler handler; 106 upb_bytessink sink; 107 char *ptr; 108 size_t len, size; 109} upb_stringsink; 110 111 112static void *stringsink_start(void *_sink, const void *hd, size_t size_hint) { 113 upb_stringsink *sink = _sink; 114 sink->len = 0; 115 UPB_UNUSED(hd); 116 UPB_UNUSED(size_hint); 117 return sink; 118} 119 120static size_t stringsink_string(void *_sink, const void *hd, const char *ptr, 121 size_t len, const upb_bufhandle *handle) { 122 upb_stringsink *sink = _sink; 123 size_t new_size = sink->size; 124 125 UPB_UNUSED(hd); 126 UPB_UNUSED(handle); 127 128 while (sink->len + len > new_size) { 129 new_size *= 2; 130 } 131 132 if (new_size != sink->size) { 133 sink->ptr = realloc(sink->ptr, new_size); 134 sink->size = new_size; 135 } 136 137 memcpy(sink->ptr + sink->len, ptr, len); 138 sink->len += len; 139 140 return len; 141} 142 143void upb_stringsink_init(upb_stringsink *sink) { 144 upb_byteshandler_init(&sink->handler); 145 upb_byteshandler_setstartstr(&sink->handler, stringsink_start, NULL); 146 upb_byteshandler_setstring(&sink->handler, stringsink_string, NULL); 147 148 upb_bytessink_reset(&sink->sink, &sink->handler, sink); 149 150 sink->size = 32; 151 sink->ptr = malloc(sink->size); 152 sink->len = 0; 153} 154 155void upb_stringsink_uninit(upb_stringsink *sink) { free(sink->ptr); } 156 157typedef struct { 158 /* For encoding Any value field in binary format. */ 159 upb_handlercache *encoder_handlercache; 160 upb_stringsink stringsink; 161 162 /* For decoding Any value field in json format. */ 163 upb_json_codecache *parser_codecache; 164 upb_sink sink; 165 upb_json_parser *parser; 166 167 /* Mark the range of uninterpreted values in json input before type url. */ 168 const char *before_type_url_start; 169 const char *before_type_url_end; 170 171 /* Mark the range of uninterpreted values in json input after type url. */ 172 const char *after_type_url_start; 173} upb_jsonparser_any_frame; 174 175typedef struct { 176 upb_sink sink; 177 178 /* The current message in which we're parsing, and the field whose value we're 179 * expecting next. */ 180 const upb_msgdef *m; 181 const upb_fielddef *f; 182 183 /* The table mapping json name to fielddef for this message. */ 184 const upb_strtable *name_table; 185 186 /* We are in a repeated-field context. We need this flag to decide whether to 187 * handle the array as a normal repeated field or a 188 * google.protobuf.ListValue/google.protobuf.Value. */ 189 bool is_repeated; 190 191 /* We are in a repeated-field context, ready to emit mapentries as 192 * submessages. This flag alters the start-of-object (open-brace) behavior to 193 * begin a sequence of mapentry messages rather than a single submessage. */ 194 bool is_map; 195 196 /* We are in a map-entry message context. This flag is set when parsing the 197 * value field of a single map entry and indicates to all value-field parsers 198 * (subobjects, strings, numbers, and bools) that the map-entry submessage 199 * should end as soon as the value is parsed. */ 200 bool is_mapentry; 201 202 /* If |is_map| or |is_mapentry| is true, |mapfield| refers to the parent 203 * message's map field that we're currently parsing. This differs from |f| 204 * because |f| is the field in the *current* message (i.e., the map-entry 205 * message itself), not the parent's field that leads to this map. */ 206 const upb_fielddef *mapfield; 207 208 /* We are in an Any message context. This flag is set when parsing the Any 209 * message and indicates to all field parsers (subobjects, strings, numbers, 210 * and bools) that the parsed field should be serialized as binary data or 211 * cached (type url not found yet). */ 212 bool is_any; 213 214 /* The type of packed message in Any. */ 215 upb_jsonparser_any_frame *any_frame; 216 217 /* True if the field to be parsed is unknown. */ 218 bool is_unknown_field; 219} upb_jsonparser_frame; 220 221static void init_frame(upb_jsonparser_frame* frame) { 222 frame->m = NULL; 223 frame->f = NULL; 224 frame->name_table = NULL; 225 frame->is_repeated = false; 226 frame->is_map = false; 227 frame->is_mapentry = false; 228 frame->mapfield = NULL; 229 frame->is_any = false; 230 frame->any_frame = NULL; 231 frame->is_unknown_field = false; 232} 233 234struct upb_json_parser { 235 upb_arena *arena; 236 const upb_json_parsermethod *method; 237 upb_bytessink input_; 238 239 /* Stack to track the JSON scopes we are in. */ 240 upb_jsonparser_frame stack[UPB_JSON_MAX_DEPTH]; 241 upb_jsonparser_frame *top; 242 upb_jsonparser_frame *limit; 243 244 upb_status *status; 245 246 /* Ragel's internal parsing stack for the parsing state machine. */ 247 int current_state; 248 int parser_stack[UPB_JSON_MAX_DEPTH]; 249 int parser_top; 250 251 /* The handle for the current buffer. */ 252 const upb_bufhandle *handle; 253 254 /* Accumulate buffer. See details in parser.rl. */ 255 const char *accumulated; 256 size_t accumulated_len; 257 char *accumulate_buf; 258 size_t accumulate_buf_size; 259 260 /* Multi-part text data. See details in parser.rl. */ 261 int multipart_state; 262 upb_selector_t string_selector; 263 264 /* Input capture. See details in parser.rl. */ 265 const char *capture; 266 267 /* Intermediate result of parsing a unicode escape sequence. */ 268 uint32_t digit; 269 270 /* For resolve type url in Any. */ 271 const upb_symtab *symtab; 272 273 /* Whether to proceed if unknown field is met. */ 274 bool ignore_json_unknown; 275 276 /* Cache for parsing timestamp due to base and zone are handled in different 277 * handlers. */ 278 struct tm tm; 279}; 280 281static upb_jsonparser_frame* start_jsonparser_frame(upb_json_parser *p) { 282 upb_jsonparser_frame *inner; 283 inner = p->top + 1; 284 init_frame(inner); 285 return inner; 286} 287 288struct upb_json_codecache { 289 upb_arena *arena; 290 upb_inttable methods; /* upb_msgdef* -> upb_json_parsermethod* */ 291}; 292 293struct upb_json_parsermethod { 294 const upb_json_codecache *cache; 295 upb_byteshandler input_handler_; 296 297 /* Maps json_name -> fielddef */ 298 upb_strtable name_table; 299}; 300 301#define PARSER_CHECK_RETURN(x) if (!(x)) return false 302 303static upb_jsonparser_any_frame *json_parser_any_frame_new( 304 upb_json_parser *p) { 305 upb_jsonparser_any_frame *frame; 306 307 frame = upb_arena_malloc(p->arena, sizeof(upb_jsonparser_any_frame)); 308 309 frame->encoder_handlercache = upb_pb_encoder_newcache(); 310 frame->parser_codecache = upb_json_codecache_new(); 311 frame->parser = NULL; 312 frame->before_type_url_start = NULL; 313 frame->before_type_url_end = NULL; 314 frame->after_type_url_start = NULL; 315 316 upb_stringsink_init(&frame->stringsink); 317 318 return frame; 319} 320 321static void json_parser_any_frame_set_payload_type( 322 upb_json_parser *p, 323 upb_jsonparser_any_frame *frame, 324 const upb_msgdef *payload_type) { 325 const upb_handlers *h; 326 const upb_json_parsermethod *parser_method; 327 upb_pb_encoder *encoder; 328 329 /* Initialize encoder. */ 330 h = upb_handlercache_get(frame->encoder_handlercache, payload_type); 331 encoder = upb_pb_encoder_create(p->arena, h, frame->stringsink.sink); 332 333 /* Initialize parser. */ 334 parser_method = upb_json_codecache_get(frame->parser_codecache, payload_type); 335 upb_sink_reset(&frame->sink, h, encoder); 336 frame->parser = 337 upb_json_parser_create(p->arena, parser_method, p->symtab, frame->sink, 338 p->status, p->ignore_json_unknown); 339} 340 341static void json_parser_any_frame_free(upb_jsonparser_any_frame *frame) { 342 upb_handlercache_free(frame->encoder_handlercache); 343 upb_json_codecache_free(frame->parser_codecache); 344 upb_stringsink_uninit(&frame->stringsink); 345} 346 347static bool json_parser_any_frame_has_type_url( 348 upb_jsonparser_any_frame *frame) { 349 return frame->parser != NULL; 350} 351 352static bool json_parser_any_frame_has_value_before_type_url( 353 upb_jsonparser_any_frame *frame) { 354 return frame->before_type_url_start != frame->before_type_url_end; 355} 356 357static bool json_parser_any_frame_has_value_after_type_url( 358 upb_jsonparser_any_frame *frame) { 359 return frame->after_type_url_start != NULL; 360} 361 362static bool json_parser_any_frame_has_value( 363 upb_jsonparser_any_frame *frame) { 364 return json_parser_any_frame_has_value_before_type_url(frame) || 365 json_parser_any_frame_has_value_after_type_url(frame); 366} 367 368static void json_parser_any_frame_set_before_type_url_end( 369 upb_jsonparser_any_frame *frame, 370 const char *ptr) { 371 if (frame->parser == NULL) { 372 frame->before_type_url_end = ptr; 373 } 374} 375 376static void json_parser_any_frame_set_after_type_url_start_once( 377 upb_jsonparser_any_frame *frame, 378 const char *ptr) { 379 if (json_parser_any_frame_has_type_url(frame) && 380 frame->after_type_url_start == NULL) { 381 frame->after_type_url_start = ptr; 382 } 383} 384 385/* Used to signal that a capture has been suspended. */ 386static char suspend_capture; 387 388static upb_selector_t getsel_for_handlertype(upb_json_parser *p, 389 upb_handlertype_t type) { 390 upb_selector_t sel; 391 bool ok = upb_handlers_getselector(p->top->f, type, &sel); 392 UPB_ASSUME(ok); 393 return sel; 394} 395 396static upb_selector_t parser_getsel(upb_json_parser *p) { 397 return getsel_for_handlertype( 398 p, upb_handlers_getprimitivehandlertype(p->top->f)); 399} 400 401static bool check_stack(upb_json_parser *p) { 402 if ((p->top + 1) == p->limit) { 403 upb_status_seterrmsg(p->status, "Nesting too deep"); 404 return false; 405 } 406 407 return true; 408} 409 410static void set_name_table(upb_json_parser *p, upb_jsonparser_frame *frame) { 411 upb_value v; 412 const upb_json_codecache *cache = p->method->cache; 413 bool ok; 414 const upb_json_parsermethod *method; 415 416 ok = upb_inttable_lookupptr(&cache->methods, frame->m, &v); 417 UPB_ASSUME(ok); 418 method = upb_value_getconstptr(v); 419 420 frame->name_table = &method->name_table; 421} 422 423/* There are GCC/Clang built-ins for overflow checking which we could start 424 * using if there was any performance benefit to it. */ 425 426static bool checked_add(size_t a, size_t b, size_t *c) { 427 if (SIZE_MAX - a < b) return false; 428 *c = a + b; 429 return true; 430} 431 432static size_t saturating_multiply(size_t a, size_t b) { 433 /* size_t is unsigned, so this is defined behavior even on overflow. */ 434 size_t ret = a * b; 435 if (b != 0 && ret / b != a) { 436 ret = SIZE_MAX; 437 } 438 return ret; 439} 440 441 442/* Base64 decoding ************************************************************/ 443 444/* TODO(haberman): make this streaming. */ 445 446static const signed char b64table[] = { 447 -1, -1, -1, -1, -1, -1, -1, -1, 448 -1, -1, -1, -1, -1, -1, -1, -1, 449 -1, -1, -1, -1, -1, -1, -1, -1, 450 -1, -1, -1, -1, -1, -1, -1, -1, 451 -1, -1, -1, -1, -1, -1, -1, -1, 452 -1, -1, -1, 62/*+*/, -1, -1, -1, 63/*/ */, 453 52/*0*/, 53/*1*/, 54/*2*/, 55/*3*/, 56/*4*/, 57/*5*/, 58/*6*/, 59/*7*/, 454 60/*8*/, 61/*9*/, -1, -1, -1, -1, -1, -1, 455 -1, 0/*A*/, 1/*B*/, 2/*C*/, 3/*D*/, 4/*E*/, 5/*F*/, 6/*G*/, 456 07/*H*/, 8/*I*/, 9/*J*/, 10/*K*/, 11/*L*/, 12/*M*/, 13/*N*/, 14/*O*/, 457 15/*P*/, 16/*Q*/, 17/*R*/, 18/*S*/, 19/*T*/, 20/*U*/, 21/*V*/, 22/*W*/, 458 23/*X*/, 24/*Y*/, 25/*Z*/, -1, -1, -1, -1, -1, 459 -1, 26/*a*/, 27/*b*/, 28/*c*/, 29/*d*/, 30/*e*/, 31/*f*/, 32/*g*/, 460 33/*h*/, 34/*i*/, 35/*j*/, 36/*k*/, 37/*l*/, 38/*m*/, 39/*n*/, 40/*o*/, 461 41/*p*/, 42/*q*/, 43/*r*/, 44/*s*/, 45/*t*/, 46/*u*/, 47/*v*/, 48/*w*/, 462 49/*x*/, 50/*y*/, 51/*z*/, -1, -1, -1, -1, -1, 463 -1, -1, -1, -1, -1, -1, -1, -1, 464 -1, -1, -1, -1, -1, -1, -1, -1, 465 -1, -1, -1, -1, -1, -1, -1, -1, 466 -1, -1, -1, -1, -1, -1, -1, -1, 467 -1, -1, -1, -1, -1, -1, -1, -1, 468 -1, -1, -1, -1, -1, -1, -1, -1, 469 -1, -1, -1, -1, -1, -1, -1, -1, 470 -1, -1, -1, -1, -1, -1, -1, -1, 471 -1, -1, -1, -1, -1, -1, -1, -1, 472 -1, -1, -1, -1, -1, -1, -1, -1, 473 -1, -1, -1, -1, -1, -1, -1, -1, 474 -1, -1, -1, -1, -1, -1, -1, -1, 475 -1, -1, -1, -1, -1, -1, -1, -1, 476 -1, -1, -1, -1, -1, -1, -1, -1, 477 -1, -1, -1, -1, -1, -1, -1, -1, 478 -1, -1, -1, -1, -1, -1, -1, -1 479}; 480 481/* Returns the table value sign-extended to 32 bits. Knowing that the upper 482 * bits will be 1 for unrecognized characters makes it easier to check for 483 * this error condition later (see below). */ 484int32_t b64lookup(unsigned char ch) { return b64table[ch]; } 485 486/* Returns true if the given character is not a valid base64 character or 487 * padding. */ 488bool nonbase64(unsigned char ch) { return b64lookup(ch) == -1 && ch != '='; } 489 490static bool base64_push(upb_json_parser *p, upb_selector_t sel, const char *ptr, 491 size_t len) { 492 const char *limit = ptr + len; 493 for (; ptr < limit; ptr += 4) { 494 uint32_t val; 495 char output[3]; 496 497 if (limit - ptr < 4) { 498 upb_status_seterrf(p->status, 499 "Base64 input for bytes field not a multiple of 4: %s", 500 upb_fielddef_name(p->top->f)); 501 return false; 502 } 503 504 val = b64lookup(ptr[0]) << 18 | 505 b64lookup(ptr[1]) << 12 | 506 b64lookup(ptr[2]) << 6 | 507 b64lookup(ptr[3]); 508 509 /* Test the upper bit; returns true if any of the characters returned -1. */ 510 if (val & 0x80000000) { 511 goto otherchar; 512 } 513 514 output[0] = val >> 16; 515 output[1] = (val >> 8) & 0xff; 516 output[2] = val & 0xff; 517 upb_sink_putstring(p->top->sink, sel, output, 3, NULL); 518 } 519 return true; 520 521otherchar: 522 if (nonbase64(ptr[0]) || nonbase64(ptr[1]) || nonbase64(ptr[2]) || 523 nonbase64(ptr[3]) ) { 524 upb_status_seterrf(p->status, 525 "Non-base64 characters in bytes field: %s", 526 upb_fielddef_name(p->top->f)); 527 return false; 528 } if (ptr[2] == '=') { 529 uint32_t val; 530 char output; 531 532 /* Last group contains only two input bytes, one output byte. */ 533 if (ptr[0] == '=' || ptr[1] == '=' || ptr[3] != '=') { 534 goto badpadding; 535 } 536 537 val = b64lookup(ptr[0]) << 18 | 538 b64lookup(ptr[1]) << 12; 539 540 UPB_ASSERT(!(val & 0x80000000)); 541 output = val >> 16; 542 upb_sink_putstring(p->top->sink, sel, &output, 1, NULL); 543 return true; 544 } else { 545 uint32_t val; 546 char output[2]; 547 548 /* Last group contains only three input bytes, two output bytes. */ 549 if (ptr[0] == '=' || ptr[1] == '=' || ptr[2] == '=') { 550 goto badpadding; 551 } 552 553 val = b64lookup(ptr[0]) << 18 | 554 b64lookup(ptr[1]) << 12 | 555 b64lookup(ptr[2]) << 6; 556 557 output[0] = val >> 16; 558 output[1] = (val >> 8) & 0xff; 559 upb_sink_putstring(p->top->sink, sel, output, 2, NULL); 560 return true; 561 } 562 563badpadding: 564 upb_status_seterrf(p->status, 565 "Incorrect base64 padding for field: %s (%.*s)", 566 upb_fielddef_name(p->top->f), 567 4, ptr); 568 return false; 569} 570 571 572/* Accumulate buffer **********************************************************/ 573 574/* Functionality for accumulating a buffer. 575 * 576 * Some parts of the parser need an entire value as a contiguous string. For 577 * example, to look up a member name in a hash table, or to turn a string into 578 * a number, the relevant library routines need the input string to be in 579 * contiguous memory, even if the value spanned two or more buffers in the 580 * input. These routines handle that. 581 * 582 * In the common case we can just point to the input buffer to get this 583 * contiguous string and avoid any actual copy. So we optimistically begin 584 * this way. But there are a few cases where we must instead copy into a 585 * separate buffer: 586 * 587 * 1. The string was not contiguous in the input (it spanned buffers). 588 * 589 * 2. The string included escape sequences that need to be interpreted to get 590 * the true value in a contiguous buffer. */ 591 592static void assert_accumulate_empty(upb_json_parser *p) { 593 UPB_ASSERT(p->accumulated == NULL); 594 UPB_ASSERT(p->accumulated_len == 0); 595} 596 597static void accumulate_clear(upb_json_parser *p) { 598 p->accumulated = NULL; 599 p->accumulated_len = 0; 600} 601 602/* Used internally by accumulate_append(). */ 603static bool accumulate_realloc(upb_json_parser *p, size_t need) { 604 void *mem; 605 size_t old_size = p->accumulate_buf_size; 606 size_t new_size = UPB_MAX(old_size, 128); 607 while (new_size < need) { 608 new_size = saturating_multiply(new_size, 2); 609 } 610 611 mem = upb_arena_realloc(p->arena, p->accumulate_buf, old_size, new_size); 612 if (!mem) { 613 upb_status_seterrmsg(p->status, "Out of memory allocating buffer."); 614 return false; 615 } 616 617 p->accumulate_buf = mem; 618 p->accumulate_buf_size = new_size; 619 return true; 620} 621 622/* Logically appends the given data to the append buffer. 623 * If "can_alias" is true, we will try to avoid actually copying, but the buffer 624 * must be valid until the next accumulate_append() call (if any). */ 625static bool accumulate_append(upb_json_parser *p, const char *buf, size_t len, 626 bool can_alias) { 627 size_t need; 628 629 if (!p->accumulated && can_alias) { 630 p->accumulated = buf; 631 p->accumulated_len = len; 632 return true; 633 } 634 635 if (!checked_add(p->accumulated_len, len, &need)) { 636 upb_status_seterrmsg(p->status, "Integer overflow."); 637 return false; 638 } 639 640 if (need > p->accumulate_buf_size && !accumulate_realloc(p, need)) { 641 return false; 642 } 643 644 if (p->accumulated != p->accumulate_buf) { 645 if (p->accumulated_len) { 646 memcpy(p->accumulate_buf, p->accumulated, p->accumulated_len); 647 } 648 p->accumulated = p->accumulate_buf; 649 } 650 651 memcpy(p->accumulate_buf + p->accumulated_len, buf, len); 652 p->accumulated_len += len; 653 return true; 654} 655 656/* Returns a pointer to the data accumulated since the last accumulate_clear() 657 * call, and writes the length to *len. This with point either to the input 658 * buffer or a temporary accumulate buffer. */ 659static const char *accumulate_getptr(upb_json_parser *p, size_t *len) { 660 UPB_ASSERT(p->accumulated); 661 *len = p->accumulated_len; 662 return p->accumulated; 663} 664 665 666/* Mult-part text data ********************************************************/ 667 668/* When we have text data in the input, it can often come in multiple segments. 669 * For example, there may be some raw string data followed by an escape 670 * sequence. The two segments are processed with different logic. Also buffer 671 * seams in the input can cause multiple segments. 672 * 673 * As we see segments, there are two main cases for how we want to process them: 674 * 675 * 1. we want to push the captured input directly to string handlers. 676 * 677 * 2. we need to accumulate all the parts into a contiguous buffer for further 678 * processing (field name lookup, string->number conversion, etc). */ 679 680/* This is the set of states for p->multipart_state. */ 681enum { 682 /* We are not currently processing multipart data. */ 683 MULTIPART_INACTIVE = 0, 684 685 /* We are processing multipart data by accumulating it into a contiguous 686 * buffer. */ 687 MULTIPART_ACCUMULATE = 1, 688 689 /* We are processing multipart data by pushing each part directly to the 690 * current string handlers. */ 691 MULTIPART_PUSHEAGERLY = 2 692}; 693 694/* Start a multi-part text value where we accumulate the data for processing at 695 * the end. */ 696static void multipart_startaccum(upb_json_parser *p) { 697 assert_accumulate_empty(p); 698 UPB_ASSERT(p->multipart_state == MULTIPART_INACTIVE); 699 p->multipart_state = MULTIPART_ACCUMULATE; 700} 701 702/* Start a multi-part text value where we immediately push text data to a string 703 * value with the given selector. */ 704static void multipart_start(upb_json_parser *p, upb_selector_t sel) { 705 assert_accumulate_empty(p); 706 UPB_ASSERT(p->multipart_state == MULTIPART_INACTIVE); 707 p->multipart_state = MULTIPART_PUSHEAGERLY; 708 p->string_selector = sel; 709} 710 711static bool multipart_text(upb_json_parser *p, const char *buf, size_t len, 712 bool can_alias) { 713 switch (p->multipart_state) { 714 case MULTIPART_INACTIVE: 715 upb_status_seterrmsg( 716 p->status, "Internal error: unexpected state MULTIPART_INACTIVE"); 717 return false; 718 719 case MULTIPART_ACCUMULATE: 720 if (!accumulate_append(p, buf, len, can_alias)) { 721 return false; 722 } 723 break; 724 725 case MULTIPART_PUSHEAGERLY: { 726 const upb_bufhandle *handle = can_alias ? p->handle : NULL; 727 upb_sink_putstring(p->top->sink, p->string_selector, buf, len, handle); 728 break; 729 } 730 } 731 732 return true; 733} 734 735/* Note: this invalidates the accumulate buffer! Call only after reading its 736 * contents. */ 737static void multipart_end(upb_json_parser *p) { 738 /* This is false sometimes. Probably a bug of some sort, but this code is 739 * intended for deletion soon. */ 740 /* UPB_ASSERT(p->multipart_state != MULTIPART_INACTIVE); */ 741 p->multipart_state = MULTIPART_INACTIVE; 742 accumulate_clear(p); 743} 744 745 746/* Input capture **************************************************************/ 747 748/* Functionality for capturing a region of the input as text. Gracefully 749 * handles the case where a buffer seam occurs in the middle of the captured 750 * region. */ 751 752static void capture_begin(upb_json_parser *p, const char *ptr) { 753 UPB_ASSERT(p->multipart_state != MULTIPART_INACTIVE); 754 UPB_ASSERT(p->capture == NULL); 755 p->capture = ptr; 756} 757 758static bool capture_end(upb_json_parser *p, const char *ptr) { 759 UPB_ASSERT(p->capture); 760 if (multipart_text(p, p->capture, ptr - p->capture, true)) { 761 p->capture = NULL; 762 return true; 763 } else { 764 return false; 765 } 766} 767 768/* This is called at the end of each input buffer (ie. when we have hit a 769 * buffer seam). If we are in the middle of capturing the input, this 770 * processes the unprocessed capture region. */ 771static void capture_suspend(upb_json_parser *p, const char **ptr) { 772 if (!p->capture) return; 773 774 if (multipart_text(p, p->capture, *ptr - p->capture, false)) { 775 /* We use this as a signal that we were in the middle of capturing, and 776 * that capturing should resume at the beginning of the next buffer. 777 * 778 * We can't use *ptr here, because we have no guarantee that this pointer 779 * will be valid when we resume (if the underlying memory is freed, then 780 * using the pointer at all, even to compare to NULL, is likely undefined 781 * behavior). */ 782 p->capture = &suspend_capture; 783 } else { 784 /* Need to back up the pointer to the beginning of the capture, since 785 * we were not able to actually preserve it. */ 786 *ptr = p->capture; 787 } 788} 789 790static void capture_resume(upb_json_parser *p, const char *ptr) { 791 if (p->capture) { 792 UPB_ASSERT(p->capture == &suspend_capture); 793 p->capture = ptr; 794 } 795} 796 797 798/* Callbacks from the parser **************************************************/ 799 800/* These are the functions called directly from the parser itself. 801 * We define these in the same order as their declarations in the parser. */ 802 803static char escape_char(char in) { 804 switch (in) { 805 case 'r': return '\r'; 806 case 't': return '\t'; 807 case 'n': return '\n'; 808 case 'f': return '\f'; 809 case 'b': return '\b'; 810 case '/': return '/'; 811 case '"': return '"'; 812 case '\\': return '\\'; 813 default: 814 UPB_ASSERT(0); 815 return 'x'; 816 } 817} 818 819static bool escape(upb_json_parser *p, const char *ptr) { 820 char ch = escape_char(*ptr); 821 return multipart_text(p, &ch, 1, false); 822} 823 824static void start_hex(upb_json_parser *p) { 825 p->digit = 0; 826} 827 828static void hexdigit(upb_json_parser *p, const char *ptr) { 829 char ch = *ptr; 830 831 p->digit <<= 4; 832 833 if (ch >= '0' && ch <= '9') { 834 p->digit += (ch - '0'); 835 } else if (ch >= 'a' && ch <= 'f') { 836 p->digit += ((ch - 'a') + 10); 837 } else { 838 UPB_ASSERT(ch >= 'A' && ch <= 'F'); 839 p->digit += ((ch - 'A') + 10); 840 } 841} 842 843static bool end_hex(upb_json_parser *p) { 844 uint32_t codepoint = p->digit; 845 846 /* emit the codepoint as UTF-8. */ 847 char utf8[3]; /* support \u0000 -- \uFFFF -- need only three bytes. */ 848 int length = 0; 849 if (codepoint <= 0x7F) { 850 utf8[0] = codepoint; 851 length = 1; 852 } else if (codepoint <= 0x07FF) { 853 utf8[1] = (codepoint & 0x3F) | 0x80; 854 codepoint >>= 6; 855 utf8[0] = (codepoint & 0x1F) | 0xC0; 856 length = 2; 857 } else /* codepoint <= 0xFFFF */ { 858 utf8[2] = (codepoint & 0x3F) | 0x80; 859 codepoint >>= 6; 860 utf8[1] = (codepoint & 0x3F) | 0x80; 861 codepoint >>= 6; 862 utf8[0] = (codepoint & 0x0F) | 0xE0; 863 length = 3; 864 } 865 /* TODO(haberman): Handle high surrogates: if codepoint is a high surrogate 866 * we have to wait for the next escape to get the full code point). */ 867 868 return multipart_text(p, utf8, length, false); 869} 870 871static void start_text(upb_json_parser *p, const char *ptr) { 872 capture_begin(p, ptr); 873} 874 875static bool end_text(upb_json_parser *p, const char *ptr) { 876 return capture_end(p, ptr); 877} 878 879static bool start_number(upb_json_parser *p, const char *ptr) { 880 if (is_top_level(p)) { 881 if (is_number_wrapper_object(p)) { 882 start_wrapper_object(p); 883 } else if (is_wellknown_msg(p, UPB_WELLKNOWN_VALUE)) { 884 start_value_object(p, VALUE_NUMBERVALUE); 885 } else { 886 return false; 887 } 888 } else if (does_number_wrapper_start(p)) { 889 if (!start_subobject(p)) { 890 return false; 891 } 892 start_wrapper_object(p); 893 } else if (is_wellknown_field(p, UPB_WELLKNOWN_VALUE)) { 894 if (!start_subobject(p)) { 895 return false; 896 } 897 start_value_object(p, VALUE_NUMBERVALUE); 898 } 899 900 multipart_startaccum(p); 901 capture_begin(p, ptr); 902 return true; 903} 904 905static bool parse_number(upb_json_parser *p, bool is_quoted); 906 907static bool end_number_nontop(upb_json_parser *p, const char *ptr) { 908 if (!capture_end(p, ptr)) { 909 return false; 910 } 911 912 if (p->top->f == NULL) { 913 multipart_end(p); 914 return true; 915 } 916 917 return parse_number(p, false); 918} 919 920static bool end_number(upb_json_parser *p, const char *ptr) { 921 if (!end_number_nontop(p, ptr)) { 922 return false; 923 } 924 925 if (does_number_wrapper_end(p)) { 926 end_wrapper_object(p); 927 if (!is_top_level(p)) { 928 end_subobject(p); 929 } 930 return true; 931 } 932 933 if (is_wellknown_msg(p, UPB_WELLKNOWN_VALUE)) { 934 end_value_object(p); 935 if (!is_top_level(p)) { 936 end_subobject(p); 937 } 938 return true; 939 } 940 941 return true; 942} 943 944/* |buf| is NULL-terminated. |buf| itself will never include quotes; 945 * |is_quoted| tells us whether this text originally appeared inside quotes. */ 946static bool parse_number_from_buffer(upb_json_parser *p, const char *buf, 947 bool is_quoted) { 948 size_t len = strlen(buf); 949 const char *bufend = buf + len; 950 char *end; 951 upb_fieldtype_t type = upb_fielddef_type(p->top->f); 952 double val; 953 double dummy; 954 double inf = INFINITY; 955 956 errno = 0; 957 958 if (len == 0 || buf[0] == ' ') { 959 return false; 960 } 961 962 /* For integer types, first try parsing with integer-specific routines. 963 * If these succeed, they will be more accurate for int64/uint64 than 964 * strtod(). 965 */ 966 switch (type) { 967 case UPB_TYPE_ENUM: 968 case UPB_TYPE_INT32: { 969 long val = strtol(buf, &end, 0); 970 if (errno == ERANGE || end != bufend) { 971 break; 972 } else if (val > INT32_MAX || val < INT32_MIN) { 973 return false; 974 } else { 975 upb_sink_putint32(p->top->sink, parser_getsel(p), (int32_t)val); 976 return true; 977 } 978 UPB_UNREACHABLE(); 979 } 980 case UPB_TYPE_UINT32: { 981 unsigned long val = strtoul(buf, &end, 0); 982 if (end != bufend) { 983 break; 984 } else if (val > UINT32_MAX || errno == ERANGE) { 985 return false; 986 } else { 987 upb_sink_putuint32(p->top->sink, parser_getsel(p), (uint32_t)val); 988 return true; 989 } 990 UPB_UNREACHABLE(); 991 } 992 /* XXX: We can't handle [u]int64 properly on 32-bit machines because 993 * strto[u]ll isn't in C89. */ 994 case UPB_TYPE_INT64: { 995 long val = strtol(buf, &end, 0); 996 if (errno == ERANGE || end != bufend) { 997 break; 998 } else { 999 upb_sink_putint64(p->top->sink, parser_getsel(p), val); 1000 return true; 1001 } 1002 UPB_UNREACHABLE(); 1003 } 1004 case UPB_TYPE_UINT64: { 1005 unsigned long val = strtoul(p->accumulated, &end, 0); 1006 if (end != bufend) { 1007 break; 1008 } else if (errno == ERANGE) { 1009 return false; 1010 } else { 1011 upb_sink_putuint64(p->top->sink, parser_getsel(p), val); 1012 return true; 1013 } 1014 UPB_UNREACHABLE(); 1015 } 1016 default: 1017 break; 1018 } 1019 1020 if (type != UPB_TYPE_DOUBLE && type != UPB_TYPE_FLOAT && is_quoted) { 1021 /* Quoted numbers for integer types are not allowed to be in double form. */ 1022 return false; 1023 } 1024 1025 if (len == strlen("Infinity") && strcmp(buf, "Infinity") == 0) { 1026 /* C89 does not have an INFINITY macro. */ 1027 val = inf; 1028 } else if (len == strlen("-Infinity") && strcmp(buf, "-Infinity") == 0) { 1029 val = -inf; 1030 } else { 1031 val = strtod(buf, &end); 1032 if (errno == ERANGE || end != bufend) { 1033 return false; 1034 } 1035 } 1036 1037 switch (type) { 1038#define CASE(capitaltype, smalltype, ctype, min, max) \ 1039 case UPB_TYPE_ ## capitaltype: { \ 1040 if (modf(val, &dummy) != 0 || val > max || val < min) { \ 1041 return false; \ 1042 } else { \ 1043 upb_sink_put ## smalltype(p->top->sink, parser_getsel(p), \ 1044 (ctype)val); \ 1045 return true; \ 1046 } \ 1047 break; \ 1048 } 1049 case UPB_TYPE_ENUM: 1050 CASE(INT32, int32, int32_t, INT32_MIN, INT32_MAX); 1051 CASE(INT64, int64, int64_t, INT64_MIN, INT64_MAX); 1052 CASE(UINT32, uint32, uint32_t, 0, UINT32_MAX); 1053 CASE(UINT64, uint64, uint64_t, 0, UINT64_MAX); 1054#undef CASE 1055 1056 case UPB_TYPE_DOUBLE: 1057 upb_sink_putdouble(p->top->sink, parser_getsel(p), val); 1058 return true; 1059 case UPB_TYPE_FLOAT: 1060 if ((val > FLT_MAX || val < -FLT_MAX) && val != inf && val != -inf) { 1061 return false; 1062 } else { 1063 upb_sink_putfloat(p->top->sink, parser_getsel(p), val); 1064 return true; 1065 } 1066 default: 1067 return false; 1068 } 1069} 1070 1071static bool parse_number(upb_json_parser *p, bool is_quoted) { 1072 size_t len; 1073 const char *buf; 1074 1075 /* strtol() and friends unfortunately do not support specifying the length of 1076 * the input string, so we need to force a copy into a NULL-terminated buffer. */ 1077 if (!multipart_text(p, "\0", 1, false)) { 1078 return false; 1079 } 1080 1081 buf = accumulate_getptr(p, &len); 1082 1083 if (parse_number_from_buffer(p, buf, is_quoted)) { 1084 multipart_end(p); 1085 return true; 1086 } else { 1087 upb_status_seterrf(p->status, "error parsing number: %s", buf); 1088 multipart_end(p); 1089 return false; 1090 } 1091} 1092 1093static bool parser_putbool(upb_json_parser *p, bool val) { 1094 bool ok; 1095 1096 if (p->top->f == NULL) { 1097 return true; 1098 } 1099 1100 if (upb_fielddef_type(p->top->f) != UPB_TYPE_BOOL) { 1101 upb_status_seterrf(p->status, 1102 "Boolean value specified for non-bool field: %s", 1103 upb_fielddef_name(p->top->f)); 1104 return false; 1105 } 1106 1107 ok = upb_sink_putbool(p->top->sink, parser_getsel(p), val); 1108 UPB_ASSERT(ok); 1109 1110 return true; 1111} 1112 1113static bool end_bool(upb_json_parser *p, bool val) { 1114 if (is_top_level(p)) { 1115 if (is_wellknown_msg(p, UPB_WELLKNOWN_BOOLVALUE)) { 1116 start_wrapper_object(p); 1117 } else if (is_wellknown_msg(p, UPB_WELLKNOWN_VALUE)) { 1118 start_value_object(p, VALUE_BOOLVALUE); 1119 } else { 1120 return false; 1121 } 1122 } else if (is_wellknown_field(p, UPB_WELLKNOWN_BOOLVALUE)) { 1123 if (!start_subobject(p)) { 1124 return false; 1125 } 1126 start_wrapper_object(p); 1127 } else if (is_wellknown_field(p, UPB_WELLKNOWN_VALUE)) { 1128 if (!start_subobject(p)) { 1129 return false; 1130 } 1131 start_value_object(p, VALUE_BOOLVALUE); 1132 } 1133 1134 if (p->top->is_unknown_field) { 1135 return true; 1136 } 1137 1138 if (!parser_putbool(p, val)) { 1139 return false; 1140 } 1141 1142 if (is_wellknown_msg(p, UPB_WELLKNOWN_BOOLVALUE)) { 1143 end_wrapper_object(p); 1144 if (!is_top_level(p)) { 1145 end_subobject(p); 1146 } 1147 return true; 1148 } 1149 1150 if (is_wellknown_msg(p, UPB_WELLKNOWN_VALUE)) { 1151 end_value_object(p); 1152 if (!is_top_level(p)) { 1153 end_subobject(p); 1154 } 1155 return true; 1156 } 1157 1158 return true; 1159} 1160 1161static bool end_null(upb_json_parser *p) { 1162 const char *zero_ptr = "0"; 1163 1164 if (is_top_level(p)) { 1165 if (is_wellknown_msg(p, UPB_WELLKNOWN_VALUE)) { 1166 start_value_object(p, VALUE_NULLVALUE); 1167 } else { 1168 return true; 1169 } 1170 } else if (is_wellknown_field(p, UPB_WELLKNOWN_VALUE)) { 1171 if (!start_subobject(p)) { 1172 return false; 1173 } 1174 start_value_object(p, VALUE_NULLVALUE); 1175 } else { 1176 return true; 1177 } 1178 1179 /* Fill null_value field. */ 1180 multipart_startaccum(p); 1181 capture_begin(p, zero_ptr); 1182 capture_end(p, zero_ptr + 1); 1183 parse_number(p, false); 1184 1185 end_value_object(p); 1186 if (!is_top_level(p)) { 1187 end_subobject(p); 1188 } 1189 1190 return true; 1191} 1192 1193static bool start_any_stringval(upb_json_parser *p) { 1194 multipart_startaccum(p); 1195 return true; 1196} 1197 1198static bool start_stringval(upb_json_parser *p) { 1199 if (is_top_level(p)) { 1200 if (is_string_wrapper_object(p) || 1201 is_number_wrapper_object(p)) { 1202 start_wrapper_object(p); 1203 } else if (is_wellknown_msg(p, UPB_WELLKNOWN_FIELDMASK)) { 1204 start_fieldmask_object(p); 1205 return true; 1206 } else if (is_wellknown_msg(p, UPB_WELLKNOWN_TIMESTAMP) || 1207 is_wellknown_msg(p, UPB_WELLKNOWN_DURATION)) { 1208 start_object(p); 1209 } else if (is_wellknown_msg(p, UPB_WELLKNOWN_VALUE)) { 1210 start_value_object(p, VALUE_STRINGVALUE); 1211 } else { 1212 return false; 1213 } 1214 } else if (does_string_wrapper_start(p) || 1215 does_number_wrapper_start(p)) { 1216 if (!start_subobject(p)) { 1217 return false; 1218 } 1219 start_wrapper_object(p); 1220 } else if (does_fieldmask_start(p)) { 1221 if (!start_subobject(p)) { 1222 return false; 1223 } 1224 start_fieldmask_object(p); 1225 return true; 1226 } else if (is_wellknown_field(p, UPB_WELLKNOWN_TIMESTAMP) || 1227 is_wellknown_field(p, UPB_WELLKNOWN_DURATION)) { 1228 if (!start_subobject(p)) { 1229 return false; 1230 } 1231 start_object(p); 1232 } else if (is_wellknown_field(p, UPB_WELLKNOWN_VALUE)) { 1233 if (!start_subobject(p)) { 1234 return false; 1235 } 1236 start_value_object(p, VALUE_STRINGVALUE); 1237 } 1238 1239 if (p->top->f == NULL) { 1240 multipart_startaccum(p); 1241 return true; 1242 } 1243 1244 if (p->top->is_any) { 1245 return start_any_stringval(p); 1246 } 1247 1248 if (upb_fielddef_isstring(p->top->f)) { 1249 upb_jsonparser_frame *inner; 1250 upb_selector_t sel; 1251 1252 if (!check_stack(p)) return false; 1253 1254 /* Start a new parser frame: parser frames correspond one-to-one with 1255 * handler frames, and string events occur in a sub-frame. */ 1256 inner = start_jsonparser_frame(p); 1257 sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSTR); 1258 upb_sink_startstr(p->top->sink, sel, 0, &inner->sink); 1259 inner->m = p->top->m; 1260 inner->f = p->top->f; 1261 p->top = inner; 1262 1263 if (upb_fielddef_type(p->top->f) == UPB_TYPE_STRING) { 1264 /* For STRING fields we push data directly to the handlers as it is 1265 * parsed. We don't do this yet for BYTES fields, because our base64 1266 * decoder is not streaming. 1267 * 1268 * TODO(haberman): make base64 decoding streaming also. */ 1269 multipart_start(p, getsel_for_handlertype(p, UPB_HANDLER_STRING)); 1270 return true; 1271 } else { 1272 multipart_startaccum(p); 1273 return true; 1274 } 1275 } else if (upb_fielddef_type(p->top->f) != UPB_TYPE_BOOL && 1276 upb_fielddef_type(p->top->f) != UPB_TYPE_MESSAGE) { 1277 /* No need to push a frame -- numeric values in quotes remain in the 1278 * current parser frame. These values must accmulate so we can convert 1279 * them all at once at the end. */ 1280 multipart_startaccum(p); 1281 return true; 1282 } else { 1283 upb_status_seterrf(p->status, 1284 "String specified for bool or submessage field: %s", 1285 upb_fielddef_name(p->top->f)); 1286 return false; 1287 } 1288} 1289 1290static bool end_any_stringval(upb_json_parser *p) { 1291 size_t len; 1292 const char *buf = accumulate_getptr(p, &len); 1293 1294 /* Set type_url */ 1295 upb_selector_t sel; 1296 upb_jsonparser_frame *inner; 1297 if (!check_stack(p)) return false; 1298 inner = p->top + 1; 1299 1300 sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSTR); 1301 upb_sink_startstr(p->top->sink, sel, 0, &inner->sink); 1302 sel = getsel_for_handlertype(p, UPB_HANDLER_STRING); 1303 upb_sink_putstring(inner->sink, sel, buf, len, NULL); 1304 sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSTR); 1305 upb_sink_endstr(inner->sink, sel); 1306 1307 multipart_end(p); 1308 1309 /* Resolve type url */ 1310 if (strncmp(buf, "type.googleapis.com/", 20) == 0 && len > 20) { 1311 const upb_msgdef *payload_type = NULL; 1312 buf += 20; 1313 len -= 20; 1314 1315 payload_type = upb_symtab_lookupmsg2(p->symtab, buf, len); 1316 if (payload_type == NULL) { 1317 upb_status_seterrf( 1318 p->status, "Cannot find packed type: %.*s\n", (int)len, buf); 1319 return false; 1320 } 1321 1322 json_parser_any_frame_set_payload_type(p, p->top->any_frame, payload_type); 1323 1324 return true; 1325 } else { 1326 upb_status_seterrf( 1327 p->status, "Invalid type url: %.*s\n", (int)len, buf); 1328 return false; 1329 } 1330} 1331 1332static bool end_stringval_nontop(upb_json_parser *p) { 1333 bool ok = true; 1334 1335 if (is_wellknown_msg(p, UPB_WELLKNOWN_TIMESTAMP) || 1336 is_wellknown_msg(p, UPB_WELLKNOWN_DURATION)) { 1337 multipart_end(p); 1338 return true; 1339 } 1340 1341 if (p->top->f == NULL) { 1342 multipart_end(p); 1343 return true; 1344 } 1345 1346 if (p->top->is_any) { 1347 return end_any_stringval(p); 1348 } 1349 1350 switch (upb_fielddef_type(p->top->f)) { 1351 case UPB_TYPE_BYTES: 1352 if (!base64_push(p, getsel_for_handlertype(p, UPB_HANDLER_STRING), 1353 p->accumulated, p->accumulated_len)) { 1354 return false; 1355 } 1356 /* Fall through. */ 1357 1358 case UPB_TYPE_STRING: { 1359 upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSTR); 1360 upb_sink_endstr(p->top->sink, sel); 1361 p->top--; 1362 break; 1363 } 1364 1365 case UPB_TYPE_ENUM: { 1366 /* Resolve enum symbolic name to integer value. */ 1367 const upb_enumdef *enumdef = upb_fielddef_enumsubdef(p->top->f); 1368 1369 size_t len; 1370 const char *buf = accumulate_getptr(p, &len); 1371 1372 int32_t int_val = 0; 1373 ok = upb_enumdef_ntoi(enumdef, buf, len, &int_val); 1374 1375 if (ok) { 1376 upb_selector_t sel = parser_getsel(p); 1377 upb_sink_putint32(p->top->sink, sel, int_val); 1378 } else { 1379 if (p->ignore_json_unknown) { 1380 ok = true; 1381 /* TODO(teboring): Should also clean this field. */ 1382 } else { 1383 upb_status_seterrf(p->status, "Enum value unknown: '%.*s'", len, buf); 1384 } 1385 } 1386 1387 break; 1388 } 1389 1390 case UPB_TYPE_INT32: 1391 case UPB_TYPE_INT64: 1392 case UPB_TYPE_UINT32: 1393 case UPB_TYPE_UINT64: 1394 case UPB_TYPE_DOUBLE: 1395 case UPB_TYPE_FLOAT: 1396 ok = parse_number(p, true); 1397 break; 1398 1399 default: 1400 UPB_ASSERT(false); 1401 upb_status_seterrmsg(p->status, "Internal error in JSON decoder"); 1402 ok = false; 1403 break; 1404 } 1405 1406 multipart_end(p); 1407 1408 return ok; 1409} 1410 1411static bool end_stringval(upb_json_parser *p) { 1412 /* FieldMask's stringvals have been ended when handling them. Only need to 1413 * close FieldMask here.*/ 1414 if (does_fieldmask_end(p)) { 1415 end_fieldmask_object(p); 1416 if (!is_top_level(p)) { 1417 end_subobject(p); 1418 } 1419 return true; 1420 } 1421 1422 if (!end_stringval_nontop(p)) { 1423 return false; 1424 } 1425 1426 if (does_string_wrapper_end(p) || 1427 does_number_wrapper_end(p)) { 1428 end_wrapper_object(p); 1429 if (!is_top_level(p)) { 1430 end_subobject(p); 1431 } 1432 return true; 1433 } 1434 1435 if (is_wellknown_msg(p, UPB_WELLKNOWN_VALUE)) { 1436 end_value_object(p); 1437 if (!is_top_level(p)) { 1438 end_subobject(p); 1439 } 1440 return true; 1441 } 1442 1443 if (is_wellknown_msg(p, UPB_WELLKNOWN_TIMESTAMP) || 1444 is_wellknown_msg(p, UPB_WELLKNOWN_DURATION) || 1445 is_wellknown_msg(p, UPB_WELLKNOWN_FIELDMASK)) { 1446 end_object(p); 1447 if (!is_top_level(p)) { 1448 end_subobject(p); 1449 } 1450 return true; 1451 } 1452 1453 return true; 1454} 1455 1456static void start_duration_base(upb_json_parser *p, const char *ptr) { 1457 capture_begin(p, ptr); 1458} 1459 1460static bool end_duration_base(upb_json_parser *p, const char *ptr) { 1461 size_t len; 1462 const char *buf; 1463 char seconds_buf[14]; 1464 char nanos_buf[12]; 1465 char *end; 1466 int64_t seconds = 0; 1467 int32_t nanos = 0; 1468 double val = 0.0; 1469 const char *seconds_membername = "seconds"; 1470 const char *nanos_membername = "nanos"; 1471 size_t fraction_start; 1472 1473 if (!capture_end(p, ptr)) { 1474 return false; 1475 } 1476 1477 buf = accumulate_getptr(p, &len); 1478 1479 memset(seconds_buf, 0, 14); 1480 memset(nanos_buf, 0, 12); 1481 1482 /* Find out base end. The maximus duration is 315576000000, which cannot be 1483 * represented by double without losing precision. Thus, we need to handle 1484 * fraction and base separately. */ 1485 for (fraction_start = 0; fraction_start < len && buf[fraction_start] != '.'; 1486 fraction_start++); 1487 1488 /* Parse base */ 1489 memcpy(seconds_buf, buf, fraction_start); 1490 seconds = strtol(seconds_buf, &end, 10); 1491 if (errno == ERANGE || end != seconds_buf + fraction_start) { 1492 upb_status_seterrf(p->status, "error parsing duration: %s", 1493 seconds_buf); 1494 return false; 1495 } 1496 1497 if (seconds > 315576000000) { 1498 upb_status_seterrf(p->status, "error parsing duration: " 1499 "maximum acceptable value is " 1500 "315576000000"); 1501 return false; 1502 } 1503 1504 if (seconds < -315576000000) { 1505 upb_status_seterrf(p->status, "error parsing duration: " 1506 "minimum acceptable value is " 1507 "-315576000000"); 1508 return false; 1509 } 1510 1511 /* Parse fraction */ 1512 nanos_buf[0] = '0'; 1513 memcpy(nanos_buf + 1, buf + fraction_start, len - fraction_start); 1514 val = strtod(nanos_buf, &end); 1515 if (errno == ERANGE || end != nanos_buf + len - fraction_start + 1) { 1516 upb_status_seterrf(p->status, "error parsing duration: %s", 1517 nanos_buf); 1518 return false; 1519 } 1520 1521 nanos = val * 1000000000; 1522 if (seconds < 0) nanos = -nanos; 1523 1524 /* Clean up buffer */ 1525 multipart_end(p); 1526 1527 /* Set seconds */ 1528 start_member(p); 1529 capture_begin(p, seconds_membername); 1530 capture_end(p, seconds_membername + 7); 1531 end_membername(p); 1532 upb_sink_putint64(p->top->sink, parser_getsel(p), seconds); 1533 end_member(p); 1534 1535 /* Set nanos */ 1536 start_member(p); 1537 capture_begin(p, nanos_membername); 1538 capture_end(p, nanos_membername + 5); 1539 end_membername(p); 1540 upb_sink_putint32(p->top->sink, parser_getsel(p), nanos); 1541 end_member(p); 1542 1543 /* Continue previous arena */ 1544 multipart_startaccum(p); 1545 1546 return true; 1547} 1548 1549static int parse_timestamp_number(upb_json_parser *p) { 1550 size_t len; 1551 const char *buf; 1552 int val; 1553 1554 /* atoi() and friends unfortunately do not support specifying the length of 1555 * the input string, so we need to force a copy into a NULL-terminated buffer. */ 1556 multipart_text(p, "\0", 1, false); 1557 1558 buf = accumulate_getptr(p, &len); 1559 val = atoi(buf); 1560 multipart_end(p); 1561 multipart_startaccum(p); 1562 1563 return val; 1564} 1565 1566static void start_year(upb_json_parser *p, const char *ptr) { 1567 capture_begin(p, ptr); 1568} 1569 1570static bool end_year(upb_json_parser *p, const char *ptr) { 1571 if (!capture_end(p, ptr)) { 1572 return false; 1573 } 1574 p->tm.tm_year = parse_timestamp_number(p) - 1900; 1575 return true; 1576} 1577 1578static void start_month(upb_json_parser *p, const char *ptr) { 1579 capture_begin(p, ptr); 1580} 1581 1582static bool end_month(upb_json_parser *p, const char *ptr) { 1583 if (!capture_end(p, ptr)) { 1584 return false; 1585 } 1586 p->tm.tm_mon = parse_timestamp_number(p) - 1; 1587 return true; 1588} 1589 1590static void start_day(upb_json_parser *p, const char *ptr) { 1591 capture_begin(p, ptr); 1592} 1593 1594static bool end_day(upb_json_parser *p, const char *ptr) { 1595 if (!capture_end(p, ptr)) { 1596 return false; 1597 } 1598 p->tm.tm_mday = parse_timestamp_number(p); 1599 return true; 1600} 1601 1602static void start_hour(upb_json_parser *p, const char *ptr) { 1603 capture_begin(p, ptr); 1604} 1605 1606static bool end_hour(upb_json_parser *p, const char *ptr) { 1607 if (!capture_end(p, ptr)) { 1608 return false; 1609 } 1610 p->tm.tm_hour = parse_timestamp_number(p); 1611 return true; 1612} 1613 1614static void start_minute(upb_json_parser *p, const char *ptr) { 1615 capture_begin(p, ptr); 1616} 1617 1618static bool end_minute(upb_json_parser *p, const char *ptr) { 1619 if (!capture_end(p, ptr)) { 1620 return false; 1621 } 1622 p->tm.tm_min = parse_timestamp_number(p); 1623 return true; 1624} 1625 1626static void start_second(upb_json_parser *p, const char *ptr) { 1627 capture_begin(p, ptr); 1628} 1629 1630static bool end_second(upb_json_parser *p, const char *ptr) { 1631 if (!capture_end(p, ptr)) { 1632 return false; 1633 } 1634 p->tm.tm_sec = parse_timestamp_number(p); 1635 return true; 1636} 1637 1638static void start_timestamp_base(upb_json_parser *p) { 1639 memset(&p->tm, 0, sizeof(struct tm)); 1640} 1641 1642static void start_timestamp_fraction(upb_json_parser *p, const char *ptr) { 1643 capture_begin(p, ptr); 1644} 1645 1646static bool end_timestamp_fraction(upb_json_parser *p, const char *ptr) { 1647 size_t len; 1648 const char *buf; 1649 char nanos_buf[12]; 1650 char *end; 1651 double val = 0.0; 1652 int32_t nanos; 1653 const char *nanos_membername = "nanos"; 1654 1655 memset(nanos_buf, 0, 12); 1656 1657 if (!capture_end(p, ptr)) { 1658 return false; 1659 } 1660 1661 buf = accumulate_getptr(p, &len); 1662 1663 if (len > 10) { 1664 upb_status_seterrf(p->status, 1665 "error parsing timestamp: at most 9-digit fraction."); 1666 return false; 1667 } 1668 1669 /* Parse nanos */ 1670 nanos_buf[0] = '0'; 1671 memcpy(nanos_buf + 1, buf, len); 1672 val = strtod(nanos_buf, &end); 1673 1674 if (errno == ERANGE || end != nanos_buf + len + 1) { 1675 upb_status_seterrf(p->status, "error parsing timestamp nanos: %s", 1676 nanos_buf); 1677 return false; 1678 } 1679 1680 nanos = val * 1000000000; 1681 1682 /* Clean up previous environment */ 1683 multipart_end(p); 1684 1685 /* Set nanos */ 1686 start_member(p); 1687 capture_begin(p, nanos_membername); 1688 capture_end(p, nanos_membername + 5); 1689 end_membername(p); 1690 upb_sink_putint32(p->top->sink, parser_getsel(p), nanos); 1691 end_member(p); 1692 1693 /* Continue previous environment */ 1694 multipart_startaccum(p); 1695 1696 return true; 1697} 1698 1699static void start_timestamp_zone(upb_json_parser *p, const char *ptr) { 1700 capture_begin(p, ptr); 1701} 1702 1703/* epoch_days(1970, 1, 1) == 1970-01-01 == 0. */ 1704static int epoch_days(int year, int month, int day) { 1705 static const uint16_t month_yday[12] = {0, 31, 59, 90, 120, 151, 1706 181, 212, 243, 273, 304, 334}; 1707 uint32_t year_adj = year + 4800; /* Ensure positive year, multiple of 400. */ 1708 uint32_t febs = year_adj - (month <= 2 ? 1 : 0); /* Februaries since base. */ 1709 uint32_t leap_days = 1 + (febs / 4) - (febs / 100) + (febs / 400); 1710 uint32_t days = 365 * year_adj + leap_days + month_yday[month - 1] + day - 1; 1711 return days - 2472692; /* Adjust to Unix epoch. */ 1712} 1713 1714static int64_t upb_timegm(const struct tm *tp) { 1715 int64_t ret = epoch_days(tp->tm_year + 1900, tp->tm_mon + 1, tp->tm_mday); 1716 ret = (ret * 24) + tp->tm_hour; 1717 ret = (ret * 60) + tp->tm_min; 1718 ret = (ret * 60) + tp->tm_sec; 1719 return ret; 1720} 1721 1722static bool end_timestamp_zone(upb_json_parser *p, const char *ptr) { 1723 size_t len; 1724 const char *buf; 1725 int hours; 1726 int64_t seconds; 1727 const char *seconds_membername = "seconds"; 1728 1729 if (!capture_end(p, ptr)) { 1730 return false; 1731 } 1732 1733 buf = accumulate_getptr(p, &len); 1734 1735 if (buf[0] != 'Z') { 1736 if (sscanf(buf + 1, "%2d:00", &hours) != 1) { 1737 upb_status_seterrf(p->status, "error parsing timestamp offset"); 1738 return false; 1739 } 1740 1741 if (buf[0] == '+') { 1742 hours = -hours; 1743 } 1744 1745 p->tm.tm_hour += hours; 1746 } 1747 1748 /* Normalize tm */ 1749 seconds = upb_timegm(&p->tm); 1750 1751 /* Check timestamp boundary */ 1752 if (seconds < -62135596800) { 1753 upb_status_seterrf(p->status, "error parsing timestamp: " 1754 "minimum acceptable value is " 1755 "0001-01-01T00:00:00Z"); 1756 return false; 1757 } 1758 1759 /* Clean up previous environment */ 1760 multipart_end(p); 1761 1762 /* Set seconds */ 1763 start_member(p); 1764 capture_begin(p, seconds_membername); 1765 capture_end(p, seconds_membername + 7); 1766 end_membername(p); 1767 upb_sink_putint64(p->top->sink, parser_getsel(p), seconds); 1768 end_member(p); 1769 1770 /* Continue previous environment */ 1771 multipart_startaccum(p); 1772 1773 return true; 1774} 1775 1776static void start_fieldmask_path_text(upb_json_parser *p, const char *ptr) { 1777 capture_begin(p, ptr); 1778} 1779 1780static bool end_fieldmask_path_text(upb_json_parser *p, const char *ptr) { 1781 return capture_end(p, ptr); 1782} 1783 1784static bool start_fieldmask_path(upb_json_parser *p) { 1785 upb_jsonparser_frame *inner; 1786 upb_selector_t sel; 1787 1788 if (!check_stack(p)) return false; 1789 1790 /* Start a new parser frame: parser frames correspond one-to-one with 1791 * handler frames, and string events occur in a sub-frame. */ 1792 inner = start_jsonparser_frame(p); 1793 sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSTR); 1794 upb_sink_startstr(p->top->sink, sel, 0, &inner->sink); 1795 inner->m = p->top->m; 1796 inner->f = p->top->f; 1797 p->top = inner; 1798 1799 multipart_startaccum(p); 1800 return true; 1801} 1802 1803static bool lower_camel_push( 1804 upb_json_parser *p, upb_selector_t sel, const char *ptr, size_t len) { 1805 const char *limit = ptr + len; 1806 bool first = true; 1807 for (;ptr < limit; ptr++) { 1808 if (*ptr >= 'A' && *ptr <= 'Z' && !first) { 1809 char lower = tolower(*ptr); 1810 upb_sink_putstring(p->top->sink, sel, "_", 1, NULL); 1811 upb_sink_putstring(p->top->sink, sel, &lower, 1, NULL); 1812 } else { 1813 upb_sink_putstring(p->top->sink, sel, ptr, 1, NULL); 1814 } 1815 first = false; 1816 } 1817 return true; 1818} 1819 1820static bool end_fieldmask_path(upb_json_parser *p) { 1821 upb_selector_t sel; 1822 1823 if (!lower_camel_push( 1824 p, getsel_for_handlertype(p, UPB_HANDLER_STRING), 1825 p->accumulated, p->accumulated_len)) { 1826 return false; 1827 } 1828 1829 sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSTR); 1830 upb_sink_endstr(p->top->sink, sel); 1831 p->top--; 1832 1833 multipart_end(p); 1834 return true; 1835} 1836 1837static void start_member(upb_json_parser *p) { 1838 UPB_ASSERT(!p->top->f); 1839 multipart_startaccum(p); 1840} 1841 1842/* Helper: invoked during parse_mapentry() to emit the mapentry message's key 1843 * field based on the current contents of the accumulate buffer. */ 1844static bool parse_mapentry_key(upb_json_parser *p) { 1845 1846 size_t len; 1847 const char *buf = accumulate_getptr(p, &len); 1848 1849 /* Emit the key field. We do a bit of ad-hoc parsing here because the 1850 * parser state machine has already decided that this is a string field 1851 * name, and we are reinterpreting it as some arbitrary key type. In 1852 * particular, integer and bool keys are quoted, so we need to parse the 1853 * quoted string contents here. */ 1854 1855 p->top->f = upb_msgdef_itof(p->top->m, UPB_MAPENTRY_KEY); 1856 if (p->top->f == NULL) { 1857 upb_status_seterrmsg(p->status, "mapentry message has no key"); 1858 return false; 1859 } 1860 switch (upb_fielddef_type(p->top->f)) { 1861 case UPB_TYPE_INT32: 1862 case UPB_TYPE_INT64: 1863 case UPB_TYPE_UINT32: 1864 case UPB_TYPE_UINT64: 1865 /* Invoke end_number. The accum buffer has the number's text already. */ 1866 if (!parse_number(p, true)) { 1867 return false; 1868 } 1869 break; 1870 case UPB_TYPE_BOOL: 1871 if (len == 4 && !strncmp(buf, "true", 4)) { 1872 if (!parser_putbool(p, true)) { 1873 return false; 1874 } 1875 } else if (len == 5 && !strncmp(buf, "false", 5)) { 1876 if (!parser_putbool(p, false)) { 1877 return false; 1878 } 1879 } else { 1880 upb_status_seterrmsg(p->status, 1881 "Map bool key not 'true' or 'false'"); 1882 return false; 1883 } 1884 multipart_end(p); 1885 break; 1886 case UPB_TYPE_STRING: 1887 case UPB_TYPE_BYTES: { 1888 upb_sink subsink; 1889 upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSTR); 1890 upb_sink_startstr(p->top->sink, sel, len, &subsink); 1891 sel = getsel_for_handlertype(p, UPB_HANDLER_STRING); 1892 upb_sink_putstring(subsink, sel, buf, len, NULL); 1893 sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSTR); 1894 upb_sink_endstr(subsink, sel); 1895 multipart_end(p); 1896 break; 1897 } 1898 default: 1899 upb_status_seterrmsg(p->status, "Invalid field type for map key"); 1900 return false; 1901 } 1902 1903 return true; 1904} 1905 1906/* Helper: emit one map entry (as a submessage in the map field sequence). This 1907 * is invoked from end_membername(), at the end of the map entry's key string, 1908 * with the map key in the accumulate buffer. It parses the key from that 1909 * buffer, emits the handler calls to start the mapentry submessage (setting up 1910 * its subframe in the process), and sets up state in the subframe so that the 1911 * value parser (invoked next) will emit the mapentry's value field and then 1912 * end the mapentry message. */ 1913 1914static bool handle_mapentry(upb_json_parser *p) { 1915 const upb_fielddef *mapfield; 1916 const upb_msgdef *mapentrymsg; 1917 upb_jsonparser_frame *inner; 1918 upb_selector_t sel; 1919 1920 /* Map entry: p->top->sink is the seq frame, so we need to start a frame 1921 * for the mapentry itself, and then set |f| in that frame so that the map 1922 * value field is parsed, and also set a flag to end the frame after the 1923 * map-entry value is parsed. */ 1924 if (!check_stack(p)) return false; 1925 1926 mapfield = p->top->mapfield; 1927 mapentrymsg = upb_fielddef_msgsubdef(mapfield); 1928 1929 inner = start_jsonparser_frame(p); 1930 p->top->f = mapfield; 1931 sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSUBMSG); 1932 upb_sink_startsubmsg(p->top->sink, sel, &inner->sink); 1933 inner->m = mapentrymsg; 1934 inner->mapfield = mapfield; 1935 1936 /* Don't set this to true *yet* -- we reuse parsing handlers below to push 1937 * the key field value to the sink, and these handlers will pop the frame 1938 * if they see is_mapentry (when invoked by the parser state machine, they 1939 * would have just seen the map-entry value, not key). */ 1940 inner->is_mapentry = false; 1941 p->top = inner; 1942 1943 /* send STARTMSG in submsg frame. */ 1944 upb_sink_startmsg(p->top->sink); 1945 1946 parse_mapentry_key(p); 1947 1948 /* Set up the value field to receive the map-entry value. */ 1949 p->top->f = upb_msgdef_itof(p->top->m, UPB_MAPENTRY_VALUE); 1950 p->top->is_mapentry = true; /* set up to pop frame after value is parsed. */ 1951 p->top->mapfield = mapfield; 1952 if (p->top->f == NULL) { 1953 upb_status_seterrmsg(p->status, "mapentry message has no value"); 1954 return false; 1955 } 1956 1957 return true; 1958} 1959 1960static bool end_membername(upb_json_parser *p) { 1961 UPB_ASSERT(!p->top->f); 1962 1963 if (!p->top->m) { 1964 p->top->is_unknown_field = true; 1965 multipart_end(p); 1966 return true; 1967 } 1968 1969 if (p->top->is_any) { 1970 return end_any_membername(p); 1971 } else if (p->top->is_map) { 1972 return handle_mapentry(p); 1973 } else { 1974 size_t len; 1975 const char *buf = accumulate_getptr(p, &len); 1976 upb_value v; 1977 1978 if (upb_strtable_lookup2(p->top->name_table, buf, len, &v)) { 1979 p->top->f = upb_value_getconstptr(v); 1980 multipart_end(p); 1981 1982 return true; 1983 } else if (p->ignore_json_unknown) { 1984 p->top->is_unknown_field = true; 1985 multipart_end(p); 1986 return true; 1987 } else { 1988 upb_status_seterrf(p->status, "No such field: %.*s\n", (int)len, buf); 1989 return false; 1990 } 1991 } 1992} 1993 1994static bool end_any_membername(upb_json_parser *p) { 1995 size_t len; 1996 const char *buf = accumulate_getptr(p, &len); 1997 upb_value v; 1998 1999 if (len == 5 && strncmp(buf, "@type", len) == 0) { 2000 upb_strtable_lookup2(p->top->name_table, "type_url", 8, &v); 2001 p->top->f = upb_value_getconstptr(v); 2002 multipart_end(p); 2003 return true; 2004 } else { 2005 p->top->is_unknown_field = true; 2006 multipart_end(p); 2007 return true; 2008 } 2009} 2010 2011static void end_member(upb_json_parser *p) { 2012 /* If we just parsed a map-entry value, end that frame too. */ 2013 if (p->top->is_mapentry) { 2014 upb_selector_t sel; 2015 bool ok; 2016 const upb_fielddef *mapfield; 2017 2018 UPB_ASSERT(p->top > p->stack); 2019 /* send ENDMSG on submsg. */ 2020 upb_sink_endmsg(p->top->sink, p->status); 2021 mapfield = p->top->mapfield; 2022 2023 /* send ENDSUBMSG in repeated-field-of-mapentries frame. */ 2024 p->top--; 2025 ok = upb_handlers_getselector(mapfield, UPB_HANDLER_ENDSUBMSG, &sel); 2026 UPB_ASSUME(ok); 2027 upb_sink_endsubmsg(p->top->sink, (p->top + 1)->sink, sel); 2028 } 2029 2030 p->top->f = NULL; 2031 p->top->is_unknown_field = false; 2032} 2033 2034static void start_any_member(upb_json_parser *p, const char *ptr) { 2035 start_member(p); 2036 json_parser_any_frame_set_after_type_url_start_once(p->top->any_frame, ptr); 2037} 2038 2039static void end_any_member(upb_json_parser *p, const char *ptr) { 2040 json_parser_any_frame_set_before_type_url_end(p->top->any_frame, ptr); 2041 end_member(p); 2042} 2043 2044static bool start_subobject(upb_json_parser *p) { 2045 if (p->top->is_unknown_field) { 2046 if (!check_stack(p)) return false; 2047 2048 p->top = start_jsonparser_frame(p); 2049 return true; 2050 } 2051 2052 if (upb_fielddef_ismap(p->top->f)) { 2053 upb_jsonparser_frame *inner; 2054 upb_selector_t sel; 2055 2056 /* Beginning of a map. Start a new parser frame in a repeated-field 2057 * context. */ 2058 if (!check_stack(p)) return false; 2059 2060 inner = start_jsonparser_frame(p); 2061 sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSEQ); 2062 upb_sink_startseq(p->top->sink, sel, &inner->sink); 2063 inner->m = upb_fielddef_msgsubdef(p->top->f); 2064 inner->mapfield = p->top->f; 2065 inner->is_map = true; 2066 p->top = inner; 2067 2068 return true; 2069 } else if (upb_fielddef_issubmsg(p->top->f)) { 2070 upb_jsonparser_frame *inner; 2071 upb_selector_t sel; 2072 2073 /* Beginning of a subobject. Start a new parser frame in the submsg 2074 * context. */ 2075 if (!check_stack(p)) return false; 2076 2077 inner = start_jsonparser_frame(p); 2078 sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSUBMSG); 2079 upb_sink_startsubmsg(p->top->sink, sel, &inner->sink); 2080 inner->m = upb_fielddef_msgsubdef(p->top->f); 2081 set_name_table(p, inner); 2082 p->top = inner; 2083 2084 if (is_wellknown_msg(p, UPB_WELLKNOWN_ANY)) { 2085 p->top->is_any = true; 2086 p->top->any_frame = json_parser_any_frame_new(p); 2087 } else { 2088 p->top->is_any = false; 2089 p->top->any_frame = NULL; 2090 } 2091 2092 return true; 2093 } else { 2094 upb_status_seterrf(p->status, 2095 "Object specified for non-message/group field: %s", 2096 upb_fielddef_name(p->top->f)); 2097 return false; 2098 } 2099} 2100 2101static bool start_subobject_full(upb_json_parser *p) { 2102 if (is_top_level(p)) { 2103 if (is_wellknown_msg(p, UPB_WELLKNOWN_VALUE)) { 2104 start_value_object(p, VALUE_STRUCTVALUE); 2105 if (!start_subobject(p)) return false; 2106 start_structvalue_object(p); 2107 } else if (is_wellknown_msg(p, UPB_WELLKNOWN_STRUCT)) { 2108 start_structvalue_object(p); 2109 } else { 2110 return true; 2111 } 2112 } else if (is_wellknown_field(p, UPB_WELLKNOWN_STRUCT)) { 2113 if (!start_subobject(p)) return false; 2114 start_structvalue_object(p); 2115 } else if (is_wellknown_field(p, UPB_WELLKNOWN_VALUE)) { 2116 if (!start_subobject(p)) return false; 2117 start_value_object(p, VALUE_STRUCTVALUE); 2118 if (!start_subobject(p)) return false; 2119 start_structvalue_object(p); 2120 } 2121 2122 return start_subobject(p); 2123} 2124 2125static void end_subobject(upb_json_parser *p) { 2126 if (is_top_level(p)) { 2127 return; 2128 } 2129 2130 if (p->top->is_map) { 2131 upb_selector_t sel; 2132 p->top--; 2133 sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSEQ); 2134 upb_sink_endseq(p->top->sink, sel); 2135 } else { 2136 upb_selector_t sel; 2137 bool is_unknown = p->top->m == NULL; 2138 p->top--; 2139 if (!is_unknown) { 2140 sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSUBMSG); 2141 upb_sink_endsubmsg(p->top->sink, (p->top + 1)->sink, sel); 2142 } 2143 } 2144} 2145 2146static void end_subobject_full(upb_json_parser *p) { 2147 end_subobject(p); 2148 2149 if (is_wellknown_msg(p, UPB_WELLKNOWN_STRUCT)) { 2150 end_structvalue_object(p); 2151 if (!is_top_level(p)) { 2152 end_subobject(p); 2153 } 2154 } 2155 2156 if (is_wellknown_msg(p, UPB_WELLKNOWN_VALUE)) { 2157 end_value_object(p); 2158 if (!is_top_level(p)) { 2159 end_subobject(p); 2160 } 2161 } 2162} 2163 2164static bool start_array(upb_json_parser *p) { 2165 upb_jsonparser_frame *inner; 2166 upb_selector_t sel; 2167 2168 if (is_top_level(p)) { 2169 if (is_wellknown_msg(p, UPB_WELLKNOWN_VALUE)) { 2170 start_value_object(p, VALUE_LISTVALUE); 2171 if (!start_subobject(p)) return false; 2172 start_listvalue_object(p); 2173 } else if (is_wellknown_msg(p, UPB_WELLKNOWN_LISTVALUE)) { 2174 start_listvalue_object(p); 2175 } else { 2176 return false; 2177 } 2178 } else if (is_wellknown_field(p, UPB_WELLKNOWN_LISTVALUE) && 2179 (!upb_fielddef_isseq(p->top->f) || 2180 p->top->is_repeated)) { 2181 if (!start_subobject(p)) return false; 2182 start_listvalue_object(p); 2183 } else if (is_wellknown_field(p, UPB_WELLKNOWN_VALUE) && 2184 (!upb_fielddef_isseq(p->top->f) || 2185 p->top->is_repeated)) { 2186 if (!start_subobject(p)) return false; 2187 start_value_object(p, VALUE_LISTVALUE); 2188 if (!start_subobject(p)) return false; 2189 start_listvalue_object(p); 2190 } 2191 2192 if (p->top->is_unknown_field) { 2193 inner = start_jsonparser_frame(p); 2194 inner->is_unknown_field = true; 2195 p->top = inner; 2196 2197 return true; 2198 } 2199 2200 if (!upb_fielddef_isseq(p->top->f)) { 2201 upb_status_seterrf(p->status, 2202 "Array specified for non-repeated field: %s", 2203 upb_fielddef_name(p->top->f)); 2204 return false; 2205 } 2206 2207 if (!check_stack(p)) return false; 2208 2209 inner = start_jsonparser_frame(p); 2210 sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSEQ); 2211 upb_sink_startseq(p->top->sink, sel, &inner->sink); 2212 inner->m = p->top->m; 2213 inner->f = p->top->f; 2214 inner->is_repeated = true; 2215 p->top = inner; 2216 2217 return true; 2218} 2219 2220static void end_array(upb_json_parser *p) { 2221 upb_selector_t sel; 2222 2223 UPB_ASSERT(p->top > p->stack); 2224 2225 p->top--; 2226 2227 if (p->top->is_unknown_field) { 2228 return; 2229 } 2230 2231 sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSEQ); 2232 upb_sink_endseq(p->top->sink, sel); 2233 2234 if (is_wellknown_msg(p, UPB_WELLKNOWN_LISTVALUE)) { 2235 end_listvalue_object(p); 2236 if (!is_top_level(p)) { 2237 end_subobject(p); 2238 } 2239 } 2240 2241 if (is_wellknown_msg(p, UPB_WELLKNOWN_VALUE)) { 2242 end_value_object(p); 2243 if (!is_top_level(p)) { 2244 end_subobject(p); 2245 } 2246 } 2247} 2248 2249static void start_object(upb_json_parser *p) { 2250 if (!p->top->is_map && p->top->m != NULL) { 2251 upb_sink_startmsg(p->top->sink); 2252 } 2253} 2254 2255static void end_object(upb_json_parser *p) { 2256 if (!p->top->is_map && p->top->m != NULL) { 2257 upb_sink_endmsg(p->top->sink, p->status); 2258 } 2259} 2260 2261static void start_any_object(upb_json_parser *p, const char *ptr) { 2262 start_object(p); 2263 p->top->any_frame->before_type_url_start = ptr; 2264 p->top->any_frame->before_type_url_end = ptr; 2265} 2266 2267static bool end_any_object(upb_json_parser *p, const char *ptr) { 2268 const char *value_membername = "value"; 2269 bool is_well_known_packed = false; 2270 const char *packed_end = ptr + 1; 2271 upb_selector_t sel; 2272 upb_jsonparser_frame *inner; 2273 2274 if (json_parser_any_frame_has_value(p->top->any_frame) && 2275 !json_parser_any_frame_has_type_url(p->top->any_frame)) { 2276 upb_status_seterrmsg(p->status, "No valid type url"); 2277 return false; 2278 } 2279 2280 /* Well known types data is represented as value field. */ 2281 if (upb_msgdef_wellknowntype(p->top->any_frame->parser->top->m) != 2282 UPB_WELLKNOWN_UNSPECIFIED) { 2283 is_well_known_packed = true; 2284 2285 if (json_parser_any_frame_has_value_before_type_url(p->top->any_frame)) { 2286 p->top->any_frame->before_type_url_start = 2287 memchr(p->top->any_frame->before_type_url_start, ':', 2288 p->top->any_frame->before_type_url_end - 2289 p->top->any_frame->before_type_url_start); 2290 if (p->top->any_frame->before_type_url_start == NULL) { 2291 upb_status_seterrmsg(p->status, "invalid data for well known type."); 2292 return false; 2293 } 2294 p->top->any_frame->before_type_url_start++; 2295 } 2296 2297 if (json_parser_any_frame_has_value_after_type_url(p->top->any_frame)) { 2298 p->top->any_frame->after_type_url_start = 2299 memchr(p->top->any_frame->after_type_url_start, ':', 2300 (ptr + 1) - 2301 p->top->any_frame->after_type_url_start); 2302 if (p->top->any_frame->after_type_url_start == NULL) { 2303 upb_status_seterrmsg(p->status, "Invalid data for well known type."); 2304 return false; 2305 } 2306 p->top->any_frame->after_type_url_start++; 2307 packed_end = ptr; 2308 } 2309 } 2310 2311 if (json_parser_any_frame_has_value_before_type_url(p->top->any_frame)) { 2312 if (!parse(p->top->any_frame->parser, NULL, 2313 p->top->any_frame->before_type_url_start, 2314 p->top->any_frame->before_type_url_end - 2315 p->top->any_frame->before_type_url_start, NULL)) { 2316 return false; 2317 } 2318 } else { 2319 if (!is_well_known_packed) { 2320 if (!parse(p->top->any_frame->parser, NULL, "{", 1, NULL)) { 2321 return false; 2322 } 2323 } 2324 } 2325 2326 if (json_parser_any_frame_has_value_before_type_url(p->top->any_frame) && 2327 json_parser_any_frame_has_value_after_type_url(p->top->any_frame)) { 2328 if (!parse(p->top->any_frame->parser, NULL, ",", 1, NULL)) { 2329 return false; 2330 } 2331 } 2332 2333 if (json_parser_any_frame_has_value_after_type_url(p->top->any_frame)) { 2334 if (!parse(p->top->any_frame->parser, NULL, 2335 p->top->any_frame->after_type_url_start, 2336 packed_end - p->top->any_frame->after_type_url_start, NULL)) { 2337 return false; 2338 } 2339 } else { 2340 if (!is_well_known_packed) { 2341 if (!parse(p->top->any_frame->parser, NULL, "}", 1, NULL)) { 2342 return false; 2343 } 2344 } 2345 } 2346 2347 if (!end(p->top->any_frame->parser, NULL)) { 2348 return false; 2349 } 2350 2351 p->top->is_any = false; 2352 2353 /* Set value */ 2354 start_member(p); 2355 capture_begin(p, value_membername); 2356 capture_end(p, value_membername + 5); 2357 end_membername(p); 2358 2359 if (!check_stack(p)) return false; 2360 inner = p->top + 1; 2361 2362 sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSTR); 2363 upb_sink_startstr(p->top->sink, sel, 0, &inner->sink); 2364 sel = getsel_for_handlertype(p, UPB_HANDLER_STRING); 2365 upb_sink_putstring(inner->sink, sel, p->top->any_frame->stringsink.ptr, 2366 p->top->any_frame->stringsink.len, NULL); 2367 sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSTR); 2368 upb_sink_endstr(inner->sink, sel); 2369 2370 end_member(p); 2371 2372 end_object(p); 2373 2374 /* Deallocate any parse frame. */ 2375 json_parser_any_frame_free(p->top->any_frame); 2376 2377 return true; 2378} 2379 2380static bool is_string_wrapper(const upb_msgdef *m) { 2381 upb_wellknowntype_t type = upb_msgdef_wellknowntype(m); 2382 return type == UPB_WELLKNOWN_STRINGVALUE || 2383 type == UPB_WELLKNOWN_BYTESVALUE; 2384} 2385 2386static bool is_fieldmask(const upb_msgdef *m) { 2387 upb_wellknowntype_t type = upb_msgdef_wellknowntype(m); 2388 return type == UPB_WELLKNOWN_FIELDMASK; 2389} 2390 2391static void start_fieldmask_object(upb_json_parser *p) { 2392 const char *membername = "paths"; 2393 2394 start_object(p); 2395 2396 /* Set up context for parsing value */ 2397 start_member(p); 2398 capture_begin(p, membername); 2399 capture_end(p, membername + 5); 2400 end_membername(p); 2401 2402 start_array(p); 2403} 2404 2405static void end_fieldmask_object(upb_json_parser *p) { 2406 end_array(p); 2407 end_member(p); 2408 end_object(p); 2409} 2410 2411static void start_wrapper_object(upb_json_parser *p) { 2412 const char *membername = "value"; 2413 2414 start_object(p); 2415 2416 /* Set up context for parsing value */ 2417 start_member(p); 2418 capture_begin(p, membername); 2419 capture_end(p, membername + 5); 2420 end_membername(p); 2421} 2422 2423static void end_wrapper_object(upb_json_parser *p) { 2424 end_member(p); 2425 end_object(p); 2426} 2427 2428static void start_value_object(upb_json_parser *p, int value_type) { 2429 const char *nullmember = "null_value"; 2430 const char *numbermember = "number_value"; 2431 const char *stringmember = "string_value"; 2432 const char *boolmember = "bool_value"; 2433 const char *structmember = "struct_value"; 2434 const char *listmember = "list_value"; 2435 const char *membername = ""; 2436 2437 switch (value_type) { 2438 case VALUE_NULLVALUE: 2439 membername = nullmember; 2440 break; 2441 case VALUE_NUMBERVALUE: 2442 membername = numbermember; 2443 break; 2444 case VALUE_STRINGVALUE: 2445 membername = stringmember; 2446 break; 2447 case VALUE_BOOLVALUE: 2448 membername = boolmember; 2449 break; 2450 case VALUE_STRUCTVALUE: 2451 membername = structmember; 2452 break; 2453 case VALUE_LISTVALUE: 2454 membername = listmember; 2455 break; 2456 } 2457 2458 start_object(p); 2459 2460 /* Set up context for parsing value */ 2461 start_member(p); 2462 capture_begin(p, membername); 2463 capture_end(p, membername + strlen(membername)); 2464 end_membername(p); 2465} 2466 2467static void end_value_object(upb_json_parser *p) { 2468 end_member(p); 2469 end_object(p); 2470} 2471 2472static void start_listvalue_object(upb_json_parser *p) { 2473 const char *membername = "values"; 2474 2475 start_object(p); 2476 2477 /* Set up context for parsing value */ 2478 start_member(p); 2479 capture_begin(p, membername); 2480 capture_end(p, membername + strlen(membername)); 2481 end_membername(p); 2482} 2483 2484static void end_listvalue_object(upb_json_parser *p) { 2485 end_member(p); 2486 end_object(p); 2487} 2488 2489static void start_structvalue_object(upb_json_parser *p) { 2490 const char *membername = "fields"; 2491 2492 start_object(p); 2493 2494 /* Set up context for parsing value */ 2495 start_member(p); 2496 capture_begin(p, membername); 2497 capture_end(p, membername + strlen(membername)); 2498 end_membername(p); 2499} 2500 2501static void end_structvalue_object(upb_json_parser *p) { 2502 end_member(p); 2503 end_object(p); 2504} 2505 2506static bool is_top_level(upb_json_parser *p) { 2507 return p->top == p->stack && p->top->f == NULL && !p->top->is_unknown_field; 2508} 2509 2510static bool is_wellknown_msg(upb_json_parser *p, upb_wellknowntype_t type) { 2511 return p->top->m != NULL && upb_msgdef_wellknowntype(p->top->m) == type; 2512} 2513 2514static bool is_wellknown_field(upb_json_parser *p, upb_wellknowntype_t type) { 2515 return p->top->f != NULL && 2516 upb_fielddef_issubmsg(p->top->f) && 2517 (upb_msgdef_wellknowntype(upb_fielddef_msgsubdef(p->top->f)) 2518 == type); 2519} 2520 2521static bool does_number_wrapper_start(upb_json_parser *p) { 2522 return p->top->f != NULL && 2523 upb_fielddef_issubmsg(p->top->f) && 2524 upb_msgdef_isnumberwrapper(upb_fielddef_msgsubdef(p->top->f)); 2525} 2526 2527static bool does_number_wrapper_end(upb_json_parser *p) { 2528 return p->top->m != NULL && upb_msgdef_isnumberwrapper(p->top->m); 2529} 2530 2531static bool is_number_wrapper_object(upb_json_parser *p) { 2532 return p->top->m != NULL && upb_msgdef_isnumberwrapper(p->top->m); 2533} 2534 2535static bool does_string_wrapper_start(upb_json_parser *p) { 2536 return p->top->f != NULL && 2537 upb_fielddef_issubmsg(p->top->f) && 2538 is_string_wrapper(upb_fielddef_msgsubdef(p->top->f)); 2539} 2540 2541static bool does_string_wrapper_end(upb_json_parser *p) { 2542 return p->top->m != NULL && is_string_wrapper(p->top->m); 2543} 2544 2545static bool is_string_wrapper_object(upb_json_parser *p) { 2546 return p->top->m != NULL && is_string_wrapper(p->top->m); 2547} 2548 2549static bool does_fieldmask_start(upb_json_parser *p) { 2550 return p->top->f != NULL && 2551 upb_fielddef_issubmsg(p->top->f) && 2552 is_fieldmask(upb_fielddef_msgsubdef(p->top->f)); 2553} 2554 2555static bool does_fieldmask_end(upb_json_parser *p) { 2556 return p->top->m != NULL && is_fieldmask(p->top->m); 2557} 2558 2559#define CHECK_RETURN_TOP(x) if (!(x)) goto error 2560 2561 2562/* The actual parser **********************************************************/ 2563 2564/* What follows is the Ragel parser itself. The language is specified in Ragel 2565 * and the actions call our C functions above. 2566 * 2567 * Ragel has an extensive set of functionality, and we use only a small part of 2568 * it. There are many action types but we only use a few: 2569 * 2570 * ">" -- transition into a machine 2571 * "%" -- transition out of a machine 2572 * "@" -- transition into a final state of a machine. 2573 * 2574 * "@" transitions are tricky because a machine can transition into a final 2575 * state repeatedly. But in some cases we know this can't happen, for example 2576 * a string which is delimited by a final '"' can only transition into its 2577 * final state once, when the closing '"' is seen. */ 2578 2579%%{ 2580 machine json; 2581 2582 ws = space*; 2583 2584 integer = "0" | /[1-9]/ /[0-9]/*; 2585 decimal = "." /[0-9]/+; 2586 exponent = /[eE]/ /[+\-]/? /[0-9]/+; 2587 2588 number_machine := 2589 ("-"? integer decimal? exponent?) 2590 %/{ fhold; fret; } 2591 <: any 2592 >{ fhold; fret; } 2593 ; 2594 number = /[0-9\-]/ >{ fhold; fcall number_machine; }; 2595 2596 text = 2597 /[^\\"]/+ 2598 >{ start_text(parser, p); } 2599 %{ CHECK_RETURN_TOP(end_text(parser, p)); } 2600 ; 2601 2602 unicode_char = 2603 "\\u" 2604 /[0-9A-Fa-f]/{4} 2605 >{ start_hex(parser); } 2606 ${ hexdigit(parser, p); } 2607 %{ CHECK_RETURN_TOP(end_hex(parser)); } 2608 ; 2609 2610 escape_char = 2611 "\\" 2612 /[rtbfn"\/\\]/ 2613 >{ CHECK_RETURN_TOP(escape(parser, p)); } 2614 ; 2615 2616 string_machine := 2617 (text | unicode_char | escape_char)** 2618 '"' 2619 @{ fhold; fret; } 2620 ; 2621 2622 year = 2623 (digit digit digit digit) 2624 >{ start_year(parser, p); } 2625 %{ CHECK_RETURN_TOP(end_year(parser, p)); } 2626 ; 2627 month = 2628 (digit digit) 2629 >{ start_month(parser, p); } 2630 %{ CHECK_RETURN_TOP(end_month(parser, p)); } 2631 ; 2632 day = 2633 (digit digit) 2634 >{ start_day(parser, p); } 2635 %{ CHECK_RETURN_TOP(end_day(parser, p)); } 2636 ; 2637 hour = 2638 (digit digit) 2639 >{ start_hour(parser, p); } 2640 %{ CHECK_RETURN_TOP(end_hour(parser, p)); } 2641 ; 2642 minute = 2643 (digit digit) 2644 >{ start_minute(parser, p); } 2645 %{ CHECK_RETURN_TOP(end_minute(parser, p)); } 2646 ; 2647 second = 2648 (digit digit) 2649 >{ start_second(parser, p); } 2650 %{ CHECK_RETURN_TOP(end_second(parser, p)); } 2651 ; 2652 2653 duration_machine := 2654 ("-"? integer decimal?) 2655 >{ start_duration_base(parser, p); } 2656 %{ CHECK_RETURN_TOP(end_duration_base(parser, p)); } 2657 's"' 2658 @{ fhold; fret; } 2659 ; 2660 2661 timestamp_machine := 2662 (year "-" month "-" day "T" hour ":" minute ":" second) 2663 >{ start_timestamp_base(parser); } 2664 ("." digit+)? 2665 >{ start_timestamp_fraction(parser, p); } 2666 %{ CHECK_RETURN_TOP(end_timestamp_fraction(parser, p)); } 2667 ([+\-] digit digit ":00" | "Z") 2668 >{ start_timestamp_zone(parser, p); } 2669 %{ CHECK_RETURN_TOP(end_timestamp_zone(parser, p)); } 2670 '"' 2671 @{ fhold; fret; } 2672 ; 2673 2674 fieldmask_path_text = 2675 /[^",]/+ 2676 >{ start_fieldmask_path_text(parser, p); } 2677 %{ end_fieldmask_path_text(parser, p); } 2678 ; 2679 2680 fieldmask_path = 2681 fieldmask_path_text 2682 >{ start_fieldmask_path(parser); } 2683 %{ end_fieldmask_path(parser); } 2684 ; 2685 2686 fieldmask_machine := 2687 (fieldmask_path ("," fieldmask_path)*)? 2688 '"' 2689 @{ fhold; fret; } 2690 ; 2691 2692 string = 2693 '"' 2694 @{ 2695 if (is_wellknown_msg(parser, UPB_WELLKNOWN_TIMESTAMP)) { 2696 fcall timestamp_machine; 2697 } else if (is_wellknown_msg(parser, UPB_WELLKNOWN_DURATION)) { 2698 fcall duration_machine; 2699 } else if (is_wellknown_msg(parser, UPB_WELLKNOWN_FIELDMASK)) { 2700 fcall fieldmask_machine; 2701 } else { 2702 fcall string_machine; 2703 } 2704 } 2705 '"'; 2706 2707 value2 = ^(space | "]" | "}") >{ fhold; fcall value_machine; } ; 2708 2709 member = 2710 ws 2711 string 2712 >{ 2713 if (is_wellknown_msg(parser, UPB_WELLKNOWN_ANY)) { 2714 start_any_member(parser, p); 2715 } else { 2716 start_member(parser); 2717 } 2718 } 2719 @{ CHECK_RETURN_TOP(end_membername(parser)); } 2720 ws ":" ws 2721 value2 2722 %{ 2723 if (is_wellknown_msg(parser, UPB_WELLKNOWN_ANY)) { 2724 end_any_member(parser, p); 2725 } else { 2726 end_member(parser); 2727 } 2728 } 2729 ws; 2730 2731 object = 2732 ("{" ws) 2733 >{ 2734 if (is_wellknown_msg(parser, UPB_WELLKNOWN_ANY)) { 2735 start_any_object(parser, p); 2736 } else { 2737 start_object(parser); 2738 } 2739 } 2740 (member ("," member)*)? 2741 "}" 2742 >{ 2743 if (is_wellknown_msg(parser, UPB_WELLKNOWN_ANY)) { 2744 CHECK_RETURN_TOP(end_any_object(parser, p)); 2745 } else { 2746 end_object(parser); 2747 } 2748 } 2749 ; 2750 2751 element = ws value2 ws; 2752 array = 2753 "[" 2754 >{ CHECK_RETURN_TOP(start_array(parser)); } 2755 ws 2756 (element ("," element)*)? 2757 "]" 2758 >{ end_array(parser); } 2759 ; 2760 2761 value = 2762 number 2763 >{ CHECK_RETURN_TOP(start_number(parser, p)); } 2764 %{ CHECK_RETURN_TOP(end_number(parser, p)); } 2765 | string 2766 >{ CHECK_RETURN_TOP(start_stringval(parser)); } 2767 @{ CHECK_RETURN_TOP(end_stringval(parser)); } 2768 | "true" 2769 %{ CHECK_RETURN_TOP(end_bool(parser, true)); } 2770 | "false" 2771 %{ CHECK_RETURN_TOP(end_bool(parser, false)); } 2772 | "null" 2773 %{ CHECK_RETURN_TOP(end_null(parser)); } 2774 | object 2775 >{ CHECK_RETURN_TOP(start_subobject_full(parser)); } 2776 %{ end_subobject_full(parser); } 2777 | array; 2778 2779 value_machine := 2780 value 2781 <: any >{ fhold; fret; } ; 2782 2783 main := ws value ws; 2784}%% 2785 2786%% write data noerror nofinal; 2787 2788size_t parse(void *closure, const void *hd, const char *buf, size_t size, 2789 const upb_bufhandle *handle) { 2790 upb_json_parser *parser = closure; 2791 2792 /* Variables used by Ragel's generated code. */ 2793 int cs = parser->current_state; 2794 int *stack = parser->parser_stack; 2795 int top = parser->parser_top; 2796 2797 const char *p = buf; 2798 const char *pe = buf + size; 2799 const char *eof = &eof_ch; 2800 2801 parser->handle = handle; 2802 2803 UPB_UNUSED(hd); 2804 UPB_UNUSED(handle); 2805 2806 capture_resume(parser, buf); 2807 2808 %% write exec; 2809 2810 if (p != pe) { 2811 upb_status_seterrf(parser->status, "Parse error at '%.*s'\n", pe - p, p); 2812 } else { 2813 capture_suspend(parser, &p); 2814 } 2815 2816error: 2817 /* Save parsing state back to parser. */ 2818 parser->current_state = cs; 2819 parser->parser_top = top; 2820 2821 return p - buf; 2822} 2823 2824static bool end(void *closure, const void *hd) { 2825 upb_json_parser *parser = closure; 2826 2827 /* Prevent compile warning on unused static constants. */ 2828 UPB_UNUSED(json_start); 2829 UPB_UNUSED(json_en_duration_machine); 2830 UPB_UNUSED(json_en_fieldmask_machine); 2831 UPB_UNUSED(json_en_number_machine); 2832 UPB_UNUSED(json_en_string_machine); 2833 UPB_UNUSED(json_en_timestamp_machine); 2834 UPB_UNUSED(json_en_value_machine); 2835 UPB_UNUSED(json_en_main); 2836 2837 parse(parser, hd, &eof_ch, 0, NULL); 2838 2839 return parser->current_state >= %%{ write first_final; }%%; 2840} 2841 2842static void json_parser_reset(upb_json_parser *p) { 2843 int cs; 2844 int top; 2845 2846 p->top = p->stack; 2847 init_frame(p->top); 2848 2849 /* Emit Ragel initialization of the parser. */ 2850 %% write init; 2851 p->current_state = cs; 2852 p->parser_top = top; 2853 accumulate_clear(p); 2854 p->multipart_state = MULTIPART_INACTIVE; 2855 p->capture = NULL; 2856 p->accumulated = NULL; 2857} 2858 2859static upb_json_parsermethod *parsermethod_new(upb_json_codecache *c, 2860 const upb_msgdef *md) { 2861 int i, n; 2862 upb_alloc *alloc = upb_arena_alloc(c->arena); 2863 2864 upb_json_parsermethod *m = upb_malloc(alloc, sizeof(*m)); 2865 2866 m->cache = c; 2867 2868 upb_byteshandler_init(&m->input_handler_); 2869 upb_byteshandler_setstring(&m->input_handler_, parse, m); 2870 upb_byteshandler_setendstr(&m->input_handler_, end, m); 2871 2872 upb_strtable_init2(&m->name_table, UPB_CTYPE_CONSTPTR, 4, alloc); 2873 2874 /* Build name_table */ 2875 2876 n = upb_msgdef_fieldcount(md); 2877 for(i = 0; i < n; i++) { 2878 const upb_fielddef *f = upb_msgdef_field(md, i); 2879 upb_value v = upb_value_constptr(f); 2880 const char *name; 2881 2882 /* Add an entry for the JSON name. */ 2883 name = upb_fielddef_jsonname(f); 2884 upb_strtable_insert3(&m->name_table, name, strlen(name), v, alloc); 2885 2886 if (strcmp(name, upb_fielddef_name(f)) != 0) { 2887 /* Since the JSON name is different from the regular field name, add an 2888 * entry for the raw name (compliant proto3 JSON parsers must accept 2889 * both). */ 2890 const char *name = upb_fielddef_name(f); 2891 upb_strtable_insert3(&m->name_table, name, strlen(name), v, alloc); 2892 } 2893 } 2894 2895 return m; 2896} 2897 2898/* Public API *****************************************************************/ 2899 2900upb_json_parser *upb_json_parser_create(upb_arena *arena, 2901 const upb_json_parsermethod *method, 2902 const upb_symtab* symtab, 2903 upb_sink output, 2904 upb_status *status, 2905 bool ignore_json_unknown) { 2906 upb_json_parser *p = upb_arena_malloc(arena, sizeof(upb_json_parser)); 2907 if (!p) return false; 2908 2909 p->arena = arena; 2910 p->method = method; 2911 p->status = status; 2912 p->limit = p->stack + UPB_JSON_MAX_DEPTH; 2913 p->accumulate_buf = NULL; 2914 p->accumulate_buf_size = 0; 2915 upb_bytessink_reset(&p->input_, &method->input_handler_, p); 2916 2917 json_parser_reset(p); 2918 p->top->sink = output; 2919 p->top->m = upb_handlers_msgdef(output.handlers); 2920 if (is_wellknown_msg(p, UPB_WELLKNOWN_ANY)) { 2921 p->top->is_any = true; 2922 p->top->any_frame = json_parser_any_frame_new(p); 2923 } else { 2924 p->top->is_any = false; 2925 p->top->any_frame = NULL; 2926 } 2927 set_name_table(p, p->top); 2928 p->symtab = symtab; 2929 2930 p->ignore_json_unknown = ignore_json_unknown; 2931 2932 return p; 2933} 2934 2935upb_bytessink upb_json_parser_input(upb_json_parser *p) { 2936 return p->input_; 2937} 2938 2939const upb_byteshandler *upb_json_parsermethod_inputhandler( 2940 const upb_json_parsermethod *m) { 2941 return &m->input_handler_; 2942} 2943 2944upb_json_codecache *upb_json_codecache_new(void) { 2945 upb_alloc *alloc; 2946 upb_json_codecache *c; 2947 2948 c = upb_gmalloc(sizeof(*c)); 2949 2950 c->arena = upb_arena_new(); 2951 alloc = upb_arena_alloc(c->arena); 2952 2953 upb_inttable_init2(&c->methods, UPB_CTYPE_CONSTPTR, alloc); 2954 2955 return c; 2956} 2957 2958void upb_json_codecache_free(upb_json_codecache *c) { 2959 upb_arena_free(c->arena); 2960 upb_gfree(c); 2961} 2962 2963const upb_json_parsermethod *upb_json_codecache_get(upb_json_codecache *c, 2964 const upb_msgdef *md) { 2965 upb_json_parsermethod *m; 2966 upb_value v; 2967 int i, n; 2968 upb_alloc *alloc = upb_arena_alloc(c->arena); 2969 2970 if (upb_inttable_lookupptr(&c->methods, md, &v)) { 2971 return upb_value_getconstptr(v); 2972 } 2973 2974 m = parsermethod_new(c, md); 2975 v = upb_value_constptr(m); 2976 2977 if (!m) return NULL; 2978 if (!upb_inttable_insertptr2(&c->methods, md, v, alloc)) return NULL; 2979 2980 /* Populate parser methods for all submessages, so the name tables will 2981 * be available during parsing. */ 2982 n = upb_msgdef_fieldcount(md); 2983 for(i = 0; i < n; i++) { 2984 const upb_fielddef *f = upb_msgdef_field(md, i); 2985 2986 if (upb_fielddef_issubmsg(f)) { 2987 const upb_msgdef *subdef = upb_fielddef_msgsubdef(f); 2988 const upb_json_parsermethod *sub_method = 2989 upb_json_codecache_get(c, subdef); 2990 2991 if (!sub_method) return NULL; 2992 } 2993 } 2994 2995 return m; 2996} 2997