1
2 #line 1 "upb/json/parser.rl"
3 /*
4 ** upb::json::Parser (upb_json_parser)
5 **
6 ** A parser that uses the Ragel State Machine Compiler to generate
7 ** the finite automata.
8 **
9 ** Ragel only natively handles regular languages, but we can manually
10 ** program it a bit to handle context-free languages like JSON, by using
11 ** the "fcall" and "fret" constructs.
12 **
13 ** This parser can handle the basics, but needs several things to be fleshed
14 ** out:
15 **
16 ** - handling of unicode escape sequences (including high surrogate pairs).
17 ** - properly check and report errors for unknown fields, stack overflow,
18 ** improper array nesting (or lack of nesting).
19 ** - handling of base64 sequences with padding characters.
20 ** - handling of push-back (non-success returns from sink functions).
21 ** - handling of keys/escape-sequences/etc that span input buffers.
22 */
23
24 #include <ctype.h>
25 #include <errno.h>
26 #include <float.h>
27 #include <math.h>
28 #include <stdint.h>
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32
33 #include <time.h>
34
35 #include "upb/json/parser.h"
36 #include "upb/pb/encoder.h"
37
38 #include "upb/port_def.inc"
39
40 #define UPB_JSON_MAX_DEPTH 64
41
42 /* Type of value message */
43 enum {
44 VALUE_NULLVALUE = 0,
45 VALUE_NUMBERVALUE = 1,
46 VALUE_STRINGVALUE = 2,
47 VALUE_BOOLVALUE = 3,
48 VALUE_STRUCTVALUE = 4,
49 VALUE_LISTVALUE = 5
50 };
51
52 /* Forward declare */
53 static bool is_top_level(upb_json_parser *p);
54 static bool is_wellknown_msg(upb_json_parser *p, upb_wellknowntype_t type);
55 static bool is_wellknown_field(upb_json_parser *p, upb_wellknowntype_t type);
56
57 static bool is_number_wrapper_object(upb_json_parser *p);
58 static bool does_number_wrapper_start(upb_json_parser *p);
59 static bool does_number_wrapper_end(upb_json_parser *p);
60
61 static bool is_string_wrapper_object(upb_json_parser *p);
62 static bool does_string_wrapper_start(upb_json_parser *p);
63 static bool does_string_wrapper_end(upb_json_parser *p);
64
65 static bool does_fieldmask_start(upb_json_parser *p);
66 static bool does_fieldmask_end(upb_json_parser *p);
67 static void start_fieldmask_object(upb_json_parser *p);
68 static void end_fieldmask_object(upb_json_parser *p);
69
70 static void start_wrapper_object(upb_json_parser *p);
71 static void end_wrapper_object(upb_json_parser *p);
72
73 static void start_value_object(upb_json_parser *p, int value_type);
74 static void end_value_object(upb_json_parser *p);
75
76 static void start_listvalue_object(upb_json_parser *p);
77 static void end_listvalue_object(upb_json_parser *p);
78
79 static void start_structvalue_object(upb_json_parser *p);
80 static void end_structvalue_object(upb_json_parser *p);
81
82 static void start_object(upb_json_parser *p);
83 static void end_object(upb_json_parser *p);
84
85 static void start_any_object(upb_json_parser *p, const char *ptr);
86 static bool end_any_object(upb_json_parser *p, const char *ptr);
87
88 static bool start_subobject(upb_json_parser *p);
89 static void end_subobject(upb_json_parser *p);
90
91 static void start_member(upb_json_parser *p);
92 static void end_member(upb_json_parser *p);
93 static bool end_membername(upb_json_parser *p);
94
95 static void start_any_member(upb_json_parser *p, const char *ptr);
96 static void end_any_member(upb_json_parser *p, const char *ptr);
97 static bool end_any_membername(upb_json_parser *p);
98
99 size_t parse(void *closure, const void *hd, const char *buf, size_t size,
100 const upb_bufhandle *handle);
101 static bool end(void *closure, const void *hd);
102
103 static const char eof_ch = 'e';
104
105 /* stringsink */
106 typedef struct {
107 upb_byteshandler handler;
108 upb_bytessink sink;
109 char *ptr;
110 size_t len, size;
111 } upb_stringsink;
112
113
stringsink_start(void * _sink,const void * hd,size_t size_hint)114 static void *stringsink_start(void *_sink, const void *hd, size_t size_hint) {
115 upb_stringsink *sink = _sink;
116 sink->len = 0;
117 UPB_UNUSED(hd);
118 UPB_UNUSED(size_hint);
119 return sink;
120 }
121
stringsink_string(void * _sink,const void * hd,const char * ptr,size_t len,const upb_bufhandle * handle)122 static size_t stringsink_string(void *_sink, const void *hd, const char *ptr,
123 size_t len, const upb_bufhandle *handle) {
124 upb_stringsink *sink = _sink;
125 size_t new_size = sink->size;
126
127 UPB_UNUSED(hd);
128 UPB_UNUSED(handle);
129
130 while (sink->len + len > new_size) {
131 new_size *= 2;
132 }
133
134 if (new_size != sink->size) {
135 sink->ptr = realloc(sink->ptr, new_size);
136 sink->size = new_size;
137 }
138
139 memcpy(sink->ptr + sink->len, ptr, len);
140 sink->len += len;
141
142 return len;
143 }
144
upb_stringsink_init(upb_stringsink * sink)145 void upb_stringsink_init(upb_stringsink *sink) {
146 upb_byteshandler_init(&sink->handler);
147 upb_byteshandler_setstartstr(&sink->handler, stringsink_start, NULL);
148 upb_byteshandler_setstring(&sink->handler, stringsink_string, NULL);
149
150 upb_bytessink_reset(&sink->sink, &sink->handler, sink);
151
152 sink->size = 32;
153 sink->ptr = malloc(sink->size);
154 sink->len = 0;
155 }
156
upb_stringsink_uninit(upb_stringsink * sink)157 void upb_stringsink_uninit(upb_stringsink *sink) { free(sink->ptr); }
158
159 typedef struct {
160 /* For encoding Any value field in binary format. */
161 upb_handlercache *encoder_handlercache;
162 upb_stringsink stringsink;
163
164 /* For decoding Any value field in json format. */
165 upb_json_codecache *parser_codecache;
166 upb_sink sink;
167 upb_json_parser *parser;
168
169 /* Mark the range of uninterpreted values in json input before type url. */
170 const char *before_type_url_start;
171 const char *before_type_url_end;
172
173 /* Mark the range of uninterpreted values in json input after type url. */
174 const char *after_type_url_start;
175 } upb_jsonparser_any_frame;
176
177 typedef struct {
178 upb_sink sink;
179
180 /* The current message in which we're parsing, and the field whose value we're
181 * expecting next. */
182 const upb_msgdef *m;
183 const upb_fielddef *f;
184
185 /* The table mapping json name to fielddef for this message. */
186 const upb_strtable *name_table;
187
188 /* We are in a repeated-field context. We need this flag to decide whether to
189 * handle the array as a normal repeated field or a
190 * google.protobuf.ListValue/google.protobuf.Value. */
191 bool is_repeated;
192
193 /* We are in a repeated-field context, ready to emit mapentries as
194 * submessages. This flag alters the start-of-object (open-brace) behavior to
195 * begin a sequence of mapentry messages rather than a single submessage. */
196 bool is_map;
197
198 /* We are in a map-entry message context. This flag is set when parsing the
199 * value field of a single map entry and indicates to all value-field parsers
200 * (subobjects, strings, numbers, and bools) that the map-entry submessage
201 * should end as soon as the value is parsed. */
202 bool is_mapentry;
203
204 /* If |is_map| or |is_mapentry| is true, |mapfield| refers to the parent
205 * message's map field that we're currently parsing. This differs from |f|
206 * because |f| is the field in the *current* message (i.e., the map-entry
207 * message itself), not the parent's field that leads to this map. */
208 const upb_fielddef *mapfield;
209
210 /* We are in an Any message context. This flag is set when parsing the Any
211 * message and indicates to all field parsers (subobjects, strings, numbers,
212 * and bools) that the parsed field should be serialized as binary data or
213 * cached (type url not found yet). */
214 bool is_any;
215
216 /* The type of packed message in Any. */
217 upb_jsonparser_any_frame *any_frame;
218
219 /* True if the field to be parsed is unknown. */
220 bool is_unknown_field;
221 } upb_jsonparser_frame;
222
init_frame(upb_jsonparser_frame * frame)223 static void init_frame(upb_jsonparser_frame* frame) {
224 frame->m = NULL;
225 frame->f = NULL;
226 frame->name_table = NULL;
227 frame->is_repeated = false;
228 frame->is_map = false;
229 frame->is_mapentry = false;
230 frame->mapfield = NULL;
231 frame->is_any = false;
232 frame->any_frame = NULL;
233 frame->is_unknown_field = false;
234 }
235
236 struct upb_json_parser {
237 upb_arena *arena;
238 const upb_json_parsermethod *method;
239 upb_bytessink input_;
240
241 /* Stack to track the JSON scopes we are in. */
242 upb_jsonparser_frame stack[UPB_JSON_MAX_DEPTH];
243 upb_jsonparser_frame *top;
244 upb_jsonparser_frame *limit;
245
246 upb_status *status;
247
248 /* Ragel's internal parsing stack for the parsing state machine. */
249 int current_state;
250 int parser_stack[UPB_JSON_MAX_DEPTH];
251 int parser_top;
252
253 /* The handle for the current buffer. */
254 const upb_bufhandle *handle;
255
256 /* Accumulate buffer. See details in parser.rl. */
257 const char *accumulated;
258 size_t accumulated_len;
259 char *accumulate_buf;
260 size_t accumulate_buf_size;
261
262 /* Multi-part text data. See details in parser.rl. */
263 int multipart_state;
264 upb_selector_t string_selector;
265
266 /* Input capture. See details in parser.rl. */
267 const char *capture;
268
269 /* Intermediate result of parsing a unicode escape sequence. */
270 uint32_t digit;
271
272 /* For resolve type url in Any. */
273 const upb_symtab *symtab;
274
275 /* Whether to proceed if unknown field is met. */
276 bool ignore_json_unknown;
277
278 /* Cache for parsing timestamp due to base and zone are handled in different
279 * handlers. */
280 struct tm tm;
281 };
282
start_jsonparser_frame(upb_json_parser * p)283 static upb_jsonparser_frame* start_jsonparser_frame(upb_json_parser *p) {
284 upb_jsonparser_frame *inner;
285 inner = p->top + 1;
286 init_frame(inner);
287 return inner;
288 }
289
290 struct upb_json_codecache {
291 upb_arena *arena;
292 upb_inttable methods; /* upb_msgdef* -> upb_json_parsermethod* */
293 };
294
295 struct upb_json_parsermethod {
296 const upb_json_codecache *cache;
297 upb_byteshandler input_handler_;
298
299 /* Maps json_name -> fielddef */
300 upb_strtable name_table;
301 };
302
303 #define PARSER_CHECK_RETURN(x) if (!(x)) return false
304
json_parser_any_frame_new(upb_json_parser * p)305 static upb_jsonparser_any_frame *json_parser_any_frame_new(
306 upb_json_parser *p) {
307 upb_jsonparser_any_frame *frame;
308
309 frame = upb_arena_malloc(p->arena, sizeof(upb_jsonparser_any_frame));
310
311 frame->encoder_handlercache = upb_pb_encoder_newcache();
312 frame->parser_codecache = upb_json_codecache_new();
313 frame->parser = NULL;
314 frame->before_type_url_start = NULL;
315 frame->before_type_url_end = NULL;
316 frame->after_type_url_start = NULL;
317
318 upb_stringsink_init(&frame->stringsink);
319
320 return frame;
321 }
322
json_parser_any_frame_set_payload_type(upb_json_parser * p,upb_jsonparser_any_frame * frame,const upb_msgdef * payload_type)323 static void json_parser_any_frame_set_payload_type(
324 upb_json_parser *p,
325 upb_jsonparser_any_frame *frame,
326 const upb_msgdef *payload_type) {
327 const upb_handlers *h;
328 const upb_json_parsermethod *parser_method;
329 upb_pb_encoder *encoder;
330
331 /* Initialize encoder. */
332 h = upb_handlercache_get(frame->encoder_handlercache, payload_type);
333 encoder = upb_pb_encoder_create(p->arena, h, frame->stringsink.sink);
334
335 /* Initialize parser. */
336 parser_method = upb_json_codecache_get(frame->parser_codecache, payload_type);
337 upb_sink_reset(&frame->sink, h, encoder);
338 frame->parser =
339 upb_json_parser_create(p->arena, parser_method, p->symtab, frame->sink,
340 p->status, p->ignore_json_unknown);
341 }
342
json_parser_any_frame_free(upb_jsonparser_any_frame * frame)343 static void json_parser_any_frame_free(upb_jsonparser_any_frame *frame) {
344 upb_handlercache_free(frame->encoder_handlercache);
345 upb_json_codecache_free(frame->parser_codecache);
346 upb_stringsink_uninit(&frame->stringsink);
347 }
348
json_parser_any_frame_has_type_url(upb_jsonparser_any_frame * frame)349 static bool json_parser_any_frame_has_type_url(
350 upb_jsonparser_any_frame *frame) {
351 return frame->parser != NULL;
352 }
353
json_parser_any_frame_has_value_before_type_url(upb_jsonparser_any_frame * frame)354 static bool json_parser_any_frame_has_value_before_type_url(
355 upb_jsonparser_any_frame *frame) {
356 return frame->before_type_url_start != frame->before_type_url_end;
357 }
358
json_parser_any_frame_has_value_after_type_url(upb_jsonparser_any_frame * frame)359 static bool json_parser_any_frame_has_value_after_type_url(
360 upb_jsonparser_any_frame *frame) {
361 return frame->after_type_url_start != NULL;
362 }
363
json_parser_any_frame_has_value(upb_jsonparser_any_frame * frame)364 static bool json_parser_any_frame_has_value(
365 upb_jsonparser_any_frame *frame) {
366 return json_parser_any_frame_has_value_before_type_url(frame) ||
367 json_parser_any_frame_has_value_after_type_url(frame);
368 }
369
json_parser_any_frame_set_before_type_url_end(upb_jsonparser_any_frame * frame,const char * ptr)370 static void json_parser_any_frame_set_before_type_url_end(
371 upb_jsonparser_any_frame *frame,
372 const char *ptr) {
373 if (frame->parser == NULL) {
374 frame->before_type_url_end = ptr;
375 }
376 }
377
json_parser_any_frame_set_after_type_url_start_once(upb_jsonparser_any_frame * frame,const char * ptr)378 static void json_parser_any_frame_set_after_type_url_start_once(
379 upb_jsonparser_any_frame *frame,
380 const char *ptr) {
381 if (json_parser_any_frame_has_type_url(frame) &&
382 frame->after_type_url_start == NULL) {
383 frame->after_type_url_start = ptr;
384 }
385 }
386
387 /* Used to signal that a capture has been suspended. */
388 static char suspend_capture;
389
getsel_for_handlertype(upb_json_parser * p,upb_handlertype_t type)390 static upb_selector_t getsel_for_handlertype(upb_json_parser *p,
391 upb_handlertype_t type) {
392 upb_selector_t sel;
393 bool ok = upb_handlers_getselector(p->top->f, type, &sel);
394 UPB_ASSUME(ok);
395 return sel;
396 }
397
parser_getsel(upb_json_parser * p)398 static upb_selector_t parser_getsel(upb_json_parser *p) {
399 return getsel_for_handlertype(
400 p, upb_handlers_getprimitivehandlertype(p->top->f));
401 }
402
check_stack(upb_json_parser * p)403 static bool check_stack(upb_json_parser *p) {
404 if ((p->top + 1) == p->limit) {
405 upb_status_seterrmsg(p->status, "Nesting too deep");
406 return false;
407 }
408
409 return true;
410 }
411
set_name_table(upb_json_parser * p,upb_jsonparser_frame * frame)412 static void set_name_table(upb_json_parser *p, upb_jsonparser_frame *frame) {
413 upb_value v;
414 const upb_json_codecache *cache = p->method->cache;
415 bool ok;
416 const upb_json_parsermethod *method;
417
418 ok = upb_inttable_lookupptr(&cache->methods, frame->m, &v);
419 UPB_ASSUME(ok);
420 method = upb_value_getconstptr(v);
421
422 frame->name_table = &method->name_table;
423 }
424
425 /* There are GCC/Clang built-ins for overflow checking which we could start
426 * using if there was any performance benefit to it. */
427
checked_add(size_t a,size_t b,size_t * c)428 static bool checked_add(size_t a, size_t b, size_t *c) {
429 if (SIZE_MAX - a < b) return false;
430 *c = a + b;
431 return true;
432 }
433
saturating_multiply(size_t a,size_t b)434 static size_t saturating_multiply(size_t a, size_t b) {
435 /* size_t is unsigned, so this is defined behavior even on overflow. */
436 size_t ret = a * b;
437 if (b != 0 && ret / b != a) {
438 ret = SIZE_MAX;
439 }
440 return ret;
441 }
442
443
444 /* Base64 decoding ************************************************************/
445
446 /* TODO(haberman): make this streaming. */
447
448 static const signed char b64table[] = {
449 -1, -1, -1, -1, -1, -1, -1, -1,
450 -1, -1, -1, -1, -1, -1, -1, -1,
451 -1, -1, -1, -1, -1, -1, -1, -1,
452 -1, -1, -1, -1, -1, -1, -1, -1,
453 -1, -1, -1, -1, -1, -1, -1, -1,
454 -1, -1, -1, 62/*+*/, -1, -1, -1, 63/*/ */,
455 52/*0*/, 53/*1*/, 54/*2*/, 55/*3*/, 56/*4*/, 57/*5*/, 58/*6*/, 59/*7*/,
456 60/*8*/, 61/*9*/, -1, -1, -1, -1, -1, -1,
457 -1, 0/*A*/, 1/*B*/, 2/*C*/, 3/*D*/, 4/*E*/, 5/*F*/, 6/*G*/,
458 07/*H*/, 8/*I*/, 9/*J*/, 10/*K*/, 11/*L*/, 12/*M*/, 13/*N*/, 14/*O*/,
459 15/*P*/, 16/*Q*/, 17/*R*/, 18/*S*/, 19/*T*/, 20/*U*/, 21/*V*/, 22/*W*/,
460 23/*X*/, 24/*Y*/, 25/*Z*/, -1, -1, -1, -1, -1,
461 -1, 26/*a*/, 27/*b*/, 28/*c*/, 29/*d*/, 30/*e*/, 31/*f*/, 32/*g*/,
462 33/*h*/, 34/*i*/, 35/*j*/, 36/*k*/, 37/*l*/, 38/*m*/, 39/*n*/, 40/*o*/,
463 41/*p*/, 42/*q*/, 43/*r*/, 44/*s*/, 45/*t*/, 46/*u*/, 47/*v*/, 48/*w*/,
464 49/*x*/, 50/*y*/, 51/*z*/, -1, -1, -1, -1, -1,
465 -1, -1, -1, -1, -1, -1, -1, -1,
466 -1, -1, -1, -1, -1, -1, -1, -1,
467 -1, -1, -1, -1, -1, -1, -1, -1,
468 -1, -1, -1, -1, -1, -1, -1, -1,
469 -1, -1, -1, -1, -1, -1, -1, -1,
470 -1, -1, -1, -1, -1, -1, -1, -1,
471 -1, -1, -1, -1, -1, -1, -1, -1,
472 -1, -1, -1, -1, -1, -1, -1, -1,
473 -1, -1, -1, -1, -1, -1, -1, -1,
474 -1, -1, -1, -1, -1, -1, -1, -1,
475 -1, -1, -1, -1, -1, -1, -1, -1,
476 -1, -1, -1, -1, -1, -1, -1, -1,
477 -1, -1, -1, -1, -1, -1, -1, -1,
478 -1, -1, -1, -1, -1, -1, -1, -1,
479 -1, -1, -1, -1, -1, -1, -1, -1,
480 -1, -1, -1, -1, -1, -1, -1, -1
481 };
482
483 /* Returns the table value sign-extended to 32 bits. Knowing that the upper
484 * bits will be 1 for unrecognized characters makes it easier to check for
485 * this error condition later (see below). */
b64lookup(unsigned char ch)486 int32_t b64lookup(unsigned char ch) { return b64table[ch]; }
487
488 /* Returns true if the given character is not a valid base64 character or
489 * padding. */
nonbase64(unsigned char ch)490 bool nonbase64(unsigned char ch) { return b64lookup(ch) == -1 && ch != '='; }
491
base64_push(upb_json_parser * p,upb_selector_t sel,const char * ptr,size_t len)492 static bool base64_push(upb_json_parser *p, upb_selector_t sel, const char *ptr,
493 size_t len) {
494 const char *limit = ptr + len;
495 for (; ptr < limit; ptr += 4) {
496 uint32_t val;
497 char output[3];
498
499 if (limit - ptr < 4) {
500 upb_status_seterrf(p->status,
501 "Base64 input for bytes field not a multiple of 4: %s",
502 upb_fielddef_name(p->top->f));
503 return false;
504 }
505
506 val = b64lookup(ptr[0]) << 18 |
507 b64lookup(ptr[1]) << 12 |
508 b64lookup(ptr[2]) << 6 |
509 b64lookup(ptr[3]);
510
511 /* Test the upper bit; returns true if any of the characters returned -1. */
512 if (val & 0x80000000) {
513 goto otherchar;
514 }
515
516 output[0] = val >> 16;
517 output[1] = (val >> 8) & 0xff;
518 output[2] = val & 0xff;
519 upb_sink_putstring(p->top->sink, sel, output, 3, NULL);
520 }
521 return true;
522
523 otherchar:
524 if (nonbase64(ptr[0]) || nonbase64(ptr[1]) || nonbase64(ptr[2]) ||
525 nonbase64(ptr[3]) ) {
526 upb_status_seterrf(p->status,
527 "Non-base64 characters in bytes field: %s",
528 upb_fielddef_name(p->top->f));
529 return false;
530 } if (ptr[2] == '=') {
531 uint32_t val;
532 char output;
533
534 /* Last group contains only two input bytes, one output byte. */
535 if (ptr[0] == '=' || ptr[1] == '=' || ptr[3] != '=') {
536 goto badpadding;
537 }
538
539 val = b64lookup(ptr[0]) << 18 |
540 b64lookup(ptr[1]) << 12;
541
542 UPB_ASSERT(!(val & 0x80000000));
543 output = val >> 16;
544 upb_sink_putstring(p->top->sink, sel, &output, 1, NULL);
545 return true;
546 } else {
547 uint32_t val;
548 char output[2];
549
550 /* Last group contains only three input bytes, two output bytes. */
551 if (ptr[0] == '=' || ptr[1] == '=' || ptr[2] == '=') {
552 goto badpadding;
553 }
554
555 val = b64lookup(ptr[0]) << 18 |
556 b64lookup(ptr[1]) << 12 |
557 b64lookup(ptr[2]) << 6;
558
559 output[0] = val >> 16;
560 output[1] = (val >> 8) & 0xff;
561 upb_sink_putstring(p->top->sink, sel, output, 2, NULL);
562 return true;
563 }
564
565 badpadding:
566 upb_status_seterrf(p->status,
567 "Incorrect base64 padding for field: %s (%.*s)",
568 upb_fielddef_name(p->top->f),
569 4, ptr);
570 return false;
571 }
572
573
574 /* Accumulate buffer **********************************************************/
575
576 /* Functionality for accumulating a buffer.
577 *
578 * Some parts of the parser need an entire value as a contiguous string. For
579 * example, to look up a member name in a hash table, or to turn a string into
580 * a number, the relevant library routines need the input string to be in
581 * contiguous memory, even if the value spanned two or more buffers in the
582 * input. These routines handle that.
583 *
584 * In the common case we can just point to the input buffer to get this
585 * contiguous string and avoid any actual copy. So we optimistically begin
586 * this way. But there are a few cases where we must instead copy into a
587 * separate buffer:
588 *
589 * 1. The string was not contiguous in the input (it spanned buffers).
590 *
591 * 2. The string included escape sequences that need to be interpreted to get
592 * the true value in a contiguous buffer. */
593
assert_accumulate_empty(upb_json_parser * p)594 static void assert_accumulate_empty(upb_json_parser *p) {
595 UPB_ASSERT(p->accumulated == NULL);
596 UPB_ASSERT(p->accumulated_len == 0);
597 }
598
accumulate_clear(upb_json_parser * p)599 static void accumulate_clear(upb_json_parser *p) {
600 p->accumulated = NULL;
601 p->accumulated_len = 0;
602 }
603
604 /* Used internally by accumulate_append(). */
accumulate_realloc(upb_json_parser * p,size_t need)605 static bool accumulate_realloc(upb_json_parser *p, size_t need) {
606 void *mem;
607 size_t old_size = p->accumulate_buf_size;
608 size_t new_size = UPB_MAX(old_size, 128);
609 while (new_size < need) {
610 new_size = saturating_multiply(new_size, 2);
611 }
612
613 mem = upb_arena_realloc(p->arena, p->accumulate_buf, old_size, new_size);
614 if (!mem) {
615 upb_status_seterrmsg(p->status, "Out of memory allocating buffer.");
616 return false;
617 }
618
619 p->accumulate_buf = mem;
620 p->accumulate_buf_size = new_size;
621 return true;
622 }
623
624 /* Logically appends the given data to the append buffer.
625 * If "can_alias" is true, we will try to avoid actually copying, but the buffer
626 * must be valid until the next accumulate_append() call (if any). */
accumulate_append(upb_json_parser * p,const char * buf,size_t len,bool can_alias)627 static bool accumulate_append(upb_json_parser *p, const char *buf, size_t len,
628 bool can_alias) {
629 size_t need;
630
631 if (!p->accumulated && can_alias) {
632 p->accumulated = buf;
633 p->accumulated_len = len;
634 return true;
635 }
636
637 if (!checked_add(p->accumulated_len, len, &need)) {
638 upb_status_seterrmsg(p->status, "Integer overflow.");
639 return false;
640 }
641
642 if (need > p->accumulate_buf_size && !accumulate_realloc(p, need)) {
643 return false;
644 }
645
646 if (p->accumulated != p->accumulate_buf) {
647 if (p->accumulated_len) {
648 memcpy(p->accumulate_buf, p->accumulated, p->accumulated_len);
649 }
650 p->accumulated = p->accumulate_buf;
651 }
652
653 memcpy(p->accumulate_buf + p->accumulated_len, buf, len);
654 p->accumulated_len += len;
655 return true;
656 }
657
658 /* Returns a pointer to the data accumulated since the last accumulate_clear()
659 * call, and writes the length to *len. This with point either to the input
660 * buffer or a temporary accumulate buffer. */
accumulate_getptr(upb_json_parser * p,size_t * len)661 static const char *accumulate_getptr(upb_json_parser *p, size_t *len) {
662 UPB_ASSERT(p->accumulated);
663 *len = p->accumulated_len;
664 return p->accumulated;
665 }
666
667
668 /* Mult-part text data ********************************************************/
669
670 /* When we have text data in the input, it can often come in multiple segments.
671 * For example, there may be some raw string data followed by an escape
672 * sequence. The two segments are processed with different logic. Also buffer
673 * seams in the input can cause multiple segments.
674 *
675 * As we see segments, there are two main cases for how we want to process them:
676 *
677 * 1. we want to push the captured input directly to string handlers.
678 *
679 * 2. we need to accumulate all the parts into a contiguous buffer for further
680 * processing (field name lookup, string->number conversion, etc). */
681
682 /* This is the set of states for p->multipart_state. */
683 enum {
684 /* We are not currently processing multipart data. */
685 MULTIPART_INACTIVE = 0,
686
687 /* We are processing multipart data by accumulating it into a contiguous
688 * buffer. */
689 MULTIPART_ACCUMULATE = 1,
690
691 /* We are processing multipart data by pushing each part directly to the
692 * current string handlers. */
693 MULTIPART_PUSHEAGERLY = 2
694 };
695
696 /* Start a multi-part text value where we accumulate the data for processing at
697 * the end. */
multipart_startaccum(upb_json_parser * p)698 static void multipart_startaccum(upb_json_parser *p) {
699 assert_accumulate_empty(p);
700 UPB_ASSERT(p->multipart_state == MULTIPART_INACTIVE);
701 p->multipart_state = MULTIPART_ACCUMULATE;
702 }
703
704 /* Start a multi-part text value where we immediately push text data to a string
705 * value with the given selector. */
multipart_start(upb_json_parser * p,upb_selector_t sel)706 static void multipart_start(upb_json_parser *p, upb_selector_t sel) {
707 assert_accumulate_empty(p);
708 UPB_ASSERT(p->multipart_state == MULTIPART_INACTIVE);
709 p->multipart_state = MULTIPART_PUSHEAGERLY;
710 p->string_selector = sel;
711 }
712
multipart_text(upb_json_parser * p,const char * buf,size_t len,bool can_alias)713 static bool multipart_text(upb_json_parser *p, const char *buf, size_t len,
714 bool can_alias) {
715 switch (p->multipart_state) {
716 case MULTIPART_INACTIVE:
717 upb_status_seterrmsg(
718 p->status, "Internal error: unexpected state MULTIPART_INACTIVE");
719 return false;
720
721 case MULTIPART_ACCUMULATE:
722 if (!accumulate_append(p, buf, len, can_alias)) {
723 return false;
724 }
725 break;
726
727 case MULTIPART_PUSHEAGERLY: {
728 const upb_bufhandle *handle = can_alias ? p->handle : NULL;
729 upb_sink_putstring(p->top->sink, p->string_selector, buf, len, handle);
730 break;
731 }
732 }
733
734 return true;
735 }
736
737 /* Note: this invalidates the accumulate buffer! Call only after reading its
738 * contents. */
multipart_end(upb_json_parser * p)739 static void multipart_end(upb_json_parser *p) {
740 /* This is false sometimes. Probably a bug of some sort, but this code is
741 * intended for deletion soon. */
742 /* UPB_ASSERT(p->multipart_state != MULTIPART_INACTIVE); */
743 p->multipart_state = MULTIPART_INACTIVE;
744 accumulate_clear(p);
745 }
746
747
748 /* Input capture **************************************************************/
749
750 /* Functionality for capturing a region of the input as text. Gracefully
751 * handles the case where a buffer seam occurs in the middle of the captured
752 * region. */
753
capture_begin(upb_json_parser * p,const char * ptr)754 static void capture_begin(upb_json_parser *p, const char *ptr) {
755 UPB_ASSERT(p->multipart_state != MULTIPART_INACTIVE);
756 UPB_ASSERT(p->capture == NULL);
757 p->capture = ptr;
758 }
759
capture_end(upb_json_parser * p,const char * ptr)760 static bool capture_end(upb_json_parser *p, const char *ptr) {
761 UPB_ASSERT(p->capture);
762 if (multipart_text(p, p->capture, ptr - p->capture, true)) {
763 p->capture = NULL;
764 return true;
765 } else {
766 return false;
767 }
768 }
769
770 /* This is called at the end of each input buffer (ie. when we have hit a
771 * buffer seam). If we are in the middle of capturing the input, this
772 * processes the unprocessed capture region. */
capture_suspend(upb_json_parser * p,const char ** ptr)773 static void capture_suspend(upb_json_parser *p, const char **ptr) {
774 if (!p->capture) return;
775
776 if (multipart_text(p, p->capture, *ptr - p->capture, false)) {
777 /* We use this as a signal that we were in the middle of capturing, and
778 * that capturing should resume at the beginning of the next buffer.
779 *
780 * We can't use *ptr here, because we have no guarantee that this pointer
781 * will be valid when we resume (if the underlying memory is freed, then
782 * using the pointer at all, even to compare to NULL, is likely undefined
783 * behavior). */
784 p->capture = &suspend_capture;
785 } else {
786 /* Need to back up the pointer to the beginning of the capture, since
787 * we were not able to actually preserve it. */
788 *ptr = p->capture;
789 }
790 }
791
capture_resume(upb_json_parser * p,const char * ptr)792 static void capture_resume(upb_json_parser *p, const char *ptr) {
793 if (p->capture) {
794 UPB_ASSERT(p->capture == &suspend_capture);
795 p->capture = ptr;
796 }
797 }
798
799
800 /* Callbacks from the parser **************************************************/
801
802 /* These are the functions called directly from the parser itself.
803 * We define these in the same order as their declarations in the parser. */
804
escape_char(char in)805 static char escape_char(char in) {
806 switch (in) {
807 case 'r': return '\r';
808 case 't': return '\t';
809 case 'n': return '\n';
810 case 'f': return '\f';
811 case 'b': return '\b';
812 case '/': return '/';
813 case '"': return '"';
814 case '\\': return '\\';
815 default:
816 UPB_ASSERT(0);
817 return 'x';
818 }
819 }
820
escape(upb_json_parser * p,const char * ptr)821 static bool escape(upb_json_parser *p, const char *ptr) {
822 char ch = escape_char(*ptr);
823 return multipart_text(p, &ch, 1, false);
824 }
825
start_hex(upb_json_parser * p)826 static void start_hex(upb_json_parser *p) {
827 p->digit = 0;
828 }
829
hexdigit(upb_json_parser * p,const char * ptr)830 static void hexdigit(upb_json_parser *p, const char *ptr) {
831 char ch = *ptr;
832
833 p->digit <<= 4;
834
835 if (ch >= '0' && ch <= '9') {
836 p->digit += (ch - '0');
837 } else if (ch >= 'a' && ch <= 'f') {
838 p->digit += ((ch - 'a') + 10);
839 } else {
840 UPB_ASSERT(ch >= 'A' && ch <= 'F');
841 p->digit += ((ch - 'A') + 10);
842 }
843 }
844
end_hex(upb_json_parser * p)845 static bool end_hex(upb_json_parser *p) {
846 uint32_t codepoint = p->digit;
847
848 /* emit the codepoint as UTF-8. */
849 char utf8[3]; /* support \u0000 -- \uFFFF -- need only three bytes. */
850 int length = 0;
851 if (codepoint <= 0x7F) {
852 utf8[0] = codepoint;
853 length = 1;
854 } else if (codepoint <= 0x07FF) {
855 utf8[1] = (codepoint & 0x3F) | 0x80;
856 codepoint >>= 6;
857 utf8[0] = (codepoint & 0x1F) | 0xC0;
858 length = 2;
859 } else /* codepoint <= 0xFFFF */ {
860 utf8[2] = (codepoint & 0x3F) | 0x80;
861 codepoint >>= 6;
862 utf8[1] = (codepoint & 0x3F) | 0x80;
863 codepoint >>= 6;
864 utf8[0] = (codepoint & 0x0F) | 0xE0;
865 length = 3;
866 }
867 /* TODO(haberman): Handle high surrogates: if codepoint is a high surrogate
868 * we have to wait for the next escape to get the full code point). */
869
870 return multipart_text(p, utf8, length, false);
871 }
872
start_text(upb_json_parser * p,const char * ptr)873 static void start_text(upb_json_parser *p, const char *ptr) {
874 capture_begin(p, ptr);
875 }
876
end_text(upb_json_parser * p,const char * ptr)877 static bool end_text(upb_json_parser *p, const char *ptr) {
878 return capture_end(p, ptr);
879 }
880
start_number(upb_json_parser * p,const char * ptr)881 static bool start_number(upb_json_parser *p, const char *ptr) {
882 if (is_top_level(p)) {
883 if (is_number_wrapper_object(p)) {
884 start_wrapper_object(p);
885 } else if (is_wellknown_msg(p, UPB_WELLKNOWN_VALUE)) {
886 start_value_object(p, VALUE_NUMBERVALUE);
887 } else {
888 return false;
889 }
890 } else if (does_number_wrapper_start(p)) {
891 if (!start_subobject(p)) {
892 return false;
893 }
894 start_wrapper_object(p);
895 } else if (is_wellknown_field(p, UPB_WELLKNOWN_VALUE)) {
896 if (!start_subobject(p)) {
897 return false;
898 }
899 start_value_object(p, VALUE_NUMBERVALUE);
900 }
901
902 multipart_startaccum(p);
903 capture_begin(p, ptr);
904 return true;
905 }
906
907 static bool parse_number(upb_json_parser *p, bool is_quoted);
908
end_number_nontop(upb_json_parser * p,const char * ptr)909 static bool end_number_nontop(upb_json_parser *p, const char *ptr) {
910 if (!capture_end(p, ptr)) {
911 return false;
912 }
913
914 if (p->top->f == NULL) {
915 multipart_end(p);
916 return true;
917 }
918
919 return parse_number(p, false);
920 }
921
end_number(upb_json_parser * p,const char * ptr)922 static bool end_number(upb_json_parser *p, const char *ptr) {
923 if (!end_number_nontop(p, ptr)) {
924 return false;
925 }
926
927 if (does_number_wrapper_end(p)) {
928 end_wrapper_object(p);
929 if (!is_top_level(p)) {
930 end_subobject(p);
931 }
932 return true;
933 }
934
935 if (is_wellknown_msg(p, UPB_WELLKNOWN_VALUE)) {
936 end_value_object(p);
937 if (!is_top_level(p)) {
938 end_subobject(p);
939 }
940 return true;
941 }
942
943 return true;
944 }
945
946 /* |buf| is NULL-terminated. |buf| itself will never include quotes;
947 * |is_quoted| tells us whether this text originally appeared inside quotes. */
parse_number_from_buffer(upb_json_parser * p,const char * buf,bool is_quoted)948 static bool parse_number_from_buffer(upb_json_parser *p, const char *buf,
949 bool is_quoted) {
950 size_t len = strlen(buf);
951 const char *bufend = buf + len;
952 char *end;
953 upb_fieldtype_t type = upb_fielddef_type(p->top->f);
954 double val;
955 double dummy;
956 double inf = INFINITY;
957
958 errno = 0;
959
960 if (len == 0 || buf[0] == ' ') {
961 return false;
962 }
963
964 /* For integer types, first try parsing with integer-specific routines.
965 * If these succeed, they will be more accurate for int64/uint64 than
966 * strtod().
967 */
968 switch (type) {
969 case UPB_TYPE_ENUM:
970 case UPB_TYPE_INT32: {
971 long val = strtol(buf, &end, 0);
972 if (errno == ERANGE || end != bufend) {
973 break;
974 } else if (val > INT32_MAX || val < INT32_MIN) {
975 return false;
976 } else {
977 upb_sink_putint32(p->top->sink, parser_getsel(p), (int32_t)val);
978 return true;
979 }
980 UPB_UNREACHABLE();
981 }
982 case UPB_TYPE_UINT32: {
983 unsigned long val = strtoul(buf, &end, 0);
984 if (end != bufend) {
985 break;
986 } else if (val > UINT32_MAX || errno == ERANGE) {
987 return false;
988 } else {
989 upb_sink_putuint32(p->top->sink, parser_getsel(p), (uint32_t)val);
990 return true;
991 }
992 UPB_UNREACHABLE();
993 }
994 /* XXX: We can't handle [u]int64 properly on 32-bit machines because
995 * strto[u]ll isn't in C89. */
996 case UPB_TYPE_INT64: {
997 long val = strtol(buf, &end, 0);
998 if (errno == ERANGE || end != bufend) {
999 break;
1000 } else {
1001 upb_sink_putint64(p->top->sink, parser_getsel(p), val);
1002 return true;
1003 }
1004 UPB_UNREACHABLE();
1005 }
1006 case UPB_TYPE_UINT64: {
1007 unsigned long val = strtoul(p->accumulated, &end, 0);
1008 if (end != bufend) {
1009 break;
1010 } else if (errno == ERANGE) {
1011 return false;
1012 } else {
1013 upb_sink_putuint64(p->top->sink, parser_getsel(p), val);
1014 return true;
1015 }
1016 UPB_UNREACHABLE();
1017 }
1018 default:
1019 break;
1020 }
1021
1022 if (type != UPB_TYPE_DOUBLE && type != UPB_TYPE_FLOAT && is_quoted) {
1023 /* Quoted numbers for integer types are not allowed to be in double form. */
1024 return false;
1025 }
1026
1027 if (len == strlen("Infinity") && strcmp(buf, "Infinity") == 0) {
1028 /* C89 does not have an INFINITY macro. */
1029 val = inf;
1030 } else if (len == strlen("-Infinity") && strcmp(buf, "-Infinity") == 0) {
1031 val = -inf;
1032 } else {
1033 val = strtod(buf, &end);
1034 if (errno == ERANGE || end != bufend) {
1035 return false;
1036 }
1037 }
1038
1039 switch (type) {
1040 #define CASE(capitaltype, smalltype, ctype, min, max) \
1041 case UPB_TYPE_ ## capitaltype: { \
1042 if (modf(val, &dummy) != 0 || val > max || val < min) { \
1043 return false; \
1044 } else { \
1045 upb_sink_put ## smalltype(p->top->sink, parser_getsel(p), \
1046 (ctype)val); \
1047 return true; \
1048 } \
1049 break; \
1050 }
1051 case UPB_TYPE_ENUM:
1052 CASE(INT32, int32, int32_t, INT32_MIN, INT32_MAX);
1053 CASE(INT64, int64, int64_t, INT64_MIN, INT64_MAX);
1054 CASE(UINT32, uint32, uint32_t, 0, UINT32_MAX);
1055 CASE(UINT64, uint64, uint64_t, 0, UINT64_MAX);
1056 #undef CASE
1057
1058 case UPB_TYPE_DOUBLE:
1059 upb_sink_putdouble(p->top->sink, parser_getsel(p), val);
1060 return true;
1061 case UPB_TYPE_FLOAT:
1062 if ((val > FLT_MAX || val < -FLT_MAX) && val != inf && val != -inf) {
1063 return false;
1064 } else {
1065 upb_sink_putfloat(p->top->sink, parser_getsel(p), val);
1066 return true;
1067 }
1068 default:
1069 return false;
1070 }
1071 }
1072
parse_number(upb_json_parser * p,bool is_quoted)1073 static bool parse_number(upb_json_parser *p, bool is_quoted) {
1074 size_t len;
1075 const char *buf;
1076
1077 /* strtol() and friends unfortunately do not support specifying the length of
1078 * the input string, so we need to force a copy into a NULL-terminated buffer. */
1079 if (!multipart_text(p, "\0", 1, false)) {
1080 return false;
1081 }
1082
1083 buf = accumulate_getptr(p, &len);
1084
1085 if (parse_number_from_buffer(p, buf, is_quoted)) {
1086 multipart_end(p);
1087 return true;
1088 } else {
1089 upb_status_seterrf(p->status, "error parsing number: %s", buf);
1090 multipart_end(p);
1091 return false;
1092 }
1093 }
1094
parser_putbool(upb_json_parser * p,bool val)1095 static bool parser_putbool(upb_json_parser *p, bool val) {
1096 bool ok;
1097
1098 if (p->top->f == NULL) {
1099 return true;
1100 }
1101
1102 if (upb_fielddef_type(p->top->f) != UPB_TYPE_BOOL) {
1103 upb_status_seterrf(p->status,
1104 "Boolean value specified for non-bool field: %s",
1105 upb_fielddef_name(p->top->f));
1106 return false;
1107 }
1108
1109 ok = upb_sink_putbool(p->top->sink, parser_getsel(p), val);
1110 UPB_ASSERT(ok);
1111
1112 return true;
1113 }
1114
end_bool(upb_json_parser * p,bool val)1115 static bool end_bool(upb_json_parser *p, bool val) {
1116 if (is_top_level(p)) {
1117 if (is_wellknown_msg(p, UPB_WELLKNOWN_BOOLVALUE)) {
1118 start_wrapper_object(p);
1119 } else if (is_wellknown_msg(p, UPB_WELLKNOWN_VALUE)) {
1120 start_value_object(p, VALUE_BOOLVALUE);
1121 } else {
1122 return false;
1123 }
1124 } else if (is_wellknown_field(p, UPB_WELLKNOWN_BOOLVALUE)) {
1125 if (!start_subobject(p)) {
1126 return false;
1127 }
1128 start_wrapper_object(p);
1129 } else if (is_wellknown_field(p, UPB_WELLKNOWN_VALUE)) {
1130 if (!start_subobject(p)) {
1131 return false;
1132 }
1133 start_value_object(p, VALUE_BOOLVALUE);
1134 }
1135
1136 if (p->top->is_unknown_field) {
1137 return true;
1138 }
1139
1140 if (!parser_putbool(p, val)) {
1141 return false;
1142 }
1143
1144 if (is_wellknown_msg(p, UPB_WELLKNOWN_BOOLVALUE)) {
1145 end_wrapper_object(p);
1146 if (!is_top_level(p)) {
1147 end_subobject(p);
1148 }
1149 return true;
1150 }
1151
1152 if (is_wellknown_msg(p, UPB_WELLKNOWN_VALUE)) {
1153 end_value_object(p);
1154 if (!is_top_level(p)) {
1155 end_subobject(p);
1156 }
1157 return true;
1158 }
1159
1160 return true;
1161 }
1162
end_null(upb_json_parser * p)1163 static bool end_null(upb_json_parser *p) {
1164 const char *zero_ptr = "0";
1165
1166 if (is_top_level(p)) {
1167 if (is_wellknown_msg(p, UPB_WELLKNOWN_VALUE)) {
1168 start_value_object(p, VALUE_NULLVALUE);
1169 } else {
1170 return true;
1171 }
1172 } else if (is_wellknown_field(p, UPB_WELLKNOWN_VALUE)) {
1173 if (!start_subobject(p)) {
1174 return false;
1175 }
1176 start_value_object(p, VALUE_NULLVALUE);
1177 } else {
1178 return true;
1179 }
1180
1181 /* Fill null_value field. */
1182 multipart_startaccum(p);
1183 capture_begin(p, zero_ptr);
1184 capture_end(p, zero_ptr + 1);
1185 parse_number(p, false);
1186
1187 end_value_object(p);
1188 if (!is_top_level(p)) {
1189 end_subobject(p);
1190 }
1191
1192 return true;
1193 }
1194
start_any_stringval(upb_json_parser * p)1195 static bool start_any_stringval(upb_json_parser *p) {
1196 multipart_startaccum(p);
1197 return true;
1198 }
1199
start_stringval(upb_json_parser * p)1200 static bool start_stringval(upb_json_parser *p) {
1201 if (is_top_level(p)) {
1202 if (is_string_wrapper_object(p) ||
1203 is_number_wrapper_object(p)) {
1204 start_wrapper_object(p);
1205 } else if (is_wellknown_msg(p, UPB_WELLKNOWN_FIELDMASK)) {
1206 start_fieldmask_object(p);
1207 return true;
1208 } else if (is_wellknown_msg(p, UPB_WELLKNOWN_TIMESTAMP) ||
1209 is_wellknown_msg(p, UPB_WELLKNOWN_DURATION)) {
1210 start_object(p);
1211 } else if (is_wellknown_msg(p, UPB_WELLKNOWN_VALUE)) {
1212 start_value_object(p, VALUE_STRINGVALUE);
1213 } else {
1214 return false;
1215 }
1216 } else if (does_string_wrapper_start(p) ||
1217 does_number_wrapper_start(p)) {
1218 if (!start_subobject(p)) {
1219 return false;
1220 }
1221 start_wrapper_object(p);
1222 } else if (does_fieldmask_start(p)) {
1223 if (!start_subobject(p)) {
1224 return false;
1225 }
1226 start_fieldmask_object(p);
1227 return true;
1228 } else if (is_wellknown_field(p, UPB_WELLKNOWN_TIMESTAMP) ||
1229 is_wellknown_field(p, UPB_WELLKNOWN_DURATION)) {
1230 if (!start_subobject(p)) {
1231 return false;
1232 }
1233 start_object(p);
1234 } else if (is_wellknown_field(p, UPB_WELLKNOWN_VALUE)) {
1235 if (!start_subobject(p)) {
1236 return false;
1237 }
1238 start_value_object(p, VALUE_STRINGVALUE);
1239 }
1240
1241 if (p->top->f == NULL) {
1242 multipart_startaccum(p);
1243 return true;
1244 }
1245
1246 if (p->top->is_any) {
1247 return start_any_stringval(p);
1248 }
1249
1250 if (upb_fielddef_isstring(p->top->f)) {
1251 upb_jsonparser_frame *inner;
1252 upb_selector_t sel;
1253
1254 if (!check_stack(p)) return false;
1255
1256 /* Start a new parser frame: parser frames correspond one-to-one with
1257 * handler frames, and string events occur in a sub-frame. */
1258 inner = start_jsonparser_frame(p);
1259 sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSTR);
1260 upb_sink_startstr(p->top->sink, sel, 0, &inner->sink);
1261 inner->m = p->top->m;
1262 inner->f = p->top->f;
1263 p->top = inner;
1264
1265 if (upb_fielddef_type(p->top->f) == UPB_TYPE_STRING) {
1266 /* For STRING fields we push data directly to the handlers as it is
1267 * parsed. We don't do this yet for BYTES fields, because our base64
1268 * decoder is not streaming.
1269 *
1270 * TODO(haberman): make base64 decoding streaming also. */
1271 multipart_start(p, getsel_for_handlertype(p, UPB_HANDLER_STRING));
1272 return true;
1273 } else {
1274 multipart_startaccum(p);
1275 return true;
1276 }
1277 } else if (upb_fielddef_type(p->top->f) != UPB_TYPE_BOOL &&
1278 upb_fielddef_type(p->top->f) != UPB_TYPE_MESSAGE) {
1279 /* No need to push a frame -- numeric values in quotes remain in the
1280 * current parser frame. These values must accmulate so we can convert
1281 * them all at once at the end. */
1282 multipart_startaccum(p);
1283 return true;
1284 } else {
1285 upb_status_seterrf(p->status,
1286 "String specified for bool or submessage field: %s",
1287 upb_fielddef_name(p->top->f));
1288 return false;
1289 }
1290 }
1291
end_any_stringval(upb_json_parser * p)1292 static bool end_any_stringval(upb_json_parser *p) {
1293 size_t len;
1294 const char *buf = accumulate_getptr(p, &len);
1295
1296 /* Set type_url */
1297 upb_selector_t sel;
1298 upb_jsonparser_frame *inner;
1299 if (!check_stack(p)) return false;
1300 inner = p->top + 1;
1301
1302 sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSTR);
1303 upb_sink_startstr(p->top->sink, sel, 0, &inner->sink);
1304 sel = getsel_for_handlertype(p, UPB_HANDLER_STRING);
1305 upb_sink_putstring(inner->sink, sel, buf, len, NULL);
1306 sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSTR);
1307 upb_sink_endstr(inner->sink, sel);
1308
1309 multipart_end(p);
1310
1311 /* Resolve type url */
1312 if (strncmp(buf, "type.googleapis.com/", 20) == 0 && len > 20) {
1313 const upb_msgdef *payload_type = NULL;
1314 buf += 20;
1315 len -= 20;
1316
1317 payload_type = upb_symtab_lookupmsg2(p->symtab, buf, len);
1318 if (payload_type == NULL) {
1319 upb_status_seterrf(
1320 p->status, "Cannot find packed type: %.*s\n", (int)len, buf);
1321 return false;
1322 }
1323
1324 json_parser_any_frame_set_payload_type(p, p->top->any_frame, payload_type);
1325
1326 return true;
1327 } else {
1328 upb_status_seterrf(
1329 p->status, "Invalid type url: %.*s\n", (int)len, buf);
1330 return false;
1331 }
1332 }
1333
end_stringval_nontop(upb_json_parser * p)1334 static bool end_stringval_nontop(upb_json_parser *p) {
1335 bool ok = true;
1336
1337 if (is_wellknown_msg(p, UPB_WELLKNOWN_TIMESTAMP) ||
1338 is_wellknown_msg(p, UPB_WELLKNOWN_DURATION)) {
1339 multipart_end(p);
1340 return true;
1341 }
1342
1343 if (p->top->f == NULL) {
1344 multipart_end(p);
1345 return true;
1346 }
1347
1348 if (p->top->is_any) {
1349 return end_any_stringval(p);
1350 }
1351
1352 switch (upb_fielddef_type(p->top->f)) {
1353 case UPB_TYPE_BYTES:
1354 if (!base64_push(p, getsel_for_handlertype(p, UPB_HANDLER_STRING),
1355 p->accumulated, p->accumulated_len)) {
1356 return false;
1357 }
1358 /* Fall through. */
1359
1360 case UPB_TYPE_STRING: {
1361 upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSTR);
1362 upb_sink_endstr(p->top->sink, sel);
1363 p->top--;
1364 break;
1365 }
1366
1367 case UPB_TYPE_ENUM: {
1368 /* Resolve enum symbolic name to integer value. */
1369 const upb_enumdef *enumdef = upb_fielddef_enumsubdef(p->top->f);
1370
1371 size_t len;
1372 const char *buf = accumulate_getptr(p, &len);
1373
1374 int32_t int_val = 0;
1375 ok = upb_enumdef_ntoi(enumdef, buf, len, &int_val);
1376
1377 if (ok) {
1378 upb_selector_t sel = parser_getsel(p);
1379 upb_sink_putint32(p->top->sink, sel, int_val);
1380 } else {
1381 if (p->ignore_json_unknown) {
1382 ok = true;
1383 /* TODO(teboring): Should also clean this field. */
1384 } else {
1385 upb_status_seterrf(p->status, "Enum value unknown: '%.*s'", len, buf);
1386 }
1387 }
1388
1389 break;
1390 }
1391
1392 case UPB_TYPE_INT32:
1393 case UPB_TYPE_INT64:
1394 case UPB_TYPE_UINT32:
1395 case UPB_TYPE_UINT64:
1396 case UPB_TYPE_DOUBLE:
1397 case UPB_TYPE_FLOAT:
1398 ok = parse_number(p, true);
1399 break;
1400
1401 default:
1402 UPB_ASSERT(false);
1403 upb_status_seterrmsg(p->status, "Internal error in JSON decoder");
1404 ok = false;
1405 break;
1406 }
1407
1408 multipart_end(p);
1409
1410 return ok;
1411 }
1412
end_stringval(upb_json_parser * p)1413 static bool end_stringval(upb_json_parser *p) {
1414 /* FieldMask's stringvals have been ended when handling them. Only need to
1415 * close FieldMask here.*/
1416 if (does_fieldmask_end(p)) {
1417 end_fieldmask_object(p);
1418 if (!is_top_level(p)) {
1419 end_subobject(p);
1420 }
1421 return true;
1422 }
1423
1424 if (!end_stringval_nontop(p)) {
1425 return false;
1426 }
1427
1428 if (does_string_wrapper_end(p) ||
1429 does_number_wrapper_end(p)) {
1430 end_wrapper_object(p);
1431 if (!is_top_level(p)) {
1432 end_subobject(p);
1433 }
1434 return true;
1435 }
1436
1437 if (is_wellknown_msg(p, UPB_WELLKNOWN_VALUE)) {
1438 end_value_object(p);
1439 if (!is_top_level(p)) {
1440 end_subobject(p);
1441 }
1442 return true;
1443 }
1444
1445 if (is_wellknown_msg(p, UPB_WELLKNOWN_TIMESTAMP) ||
1446 is_wellknown_msg(p, UPB_WELLKNOWN_DURATION) ||
1447 is_wellknown_msg(p, UPB_WELLKNOWN_FIELDMASK)) {
1448 end_object(p);
1449 if (!is_top_level(p)) {
1450 end_subobject(p);
1451 }
1452 return true;
1453 }
1454
1455 return true;
1456 }
1457
start_duration_base(upb_json_parser * p,const char * ptr)1458 static void start_duration_base(upb_json_parser *p, const char *ptr) {
1459 capture_begin(p, ptr);
1460 }
1461
end_duration_base(upb_json_parser * p,const char * ptr)1462 static bool end_duration_base(upb_json_parser *p, const char *ptr) {
1463 size_t len;
1464 const char *buf;
1465 char seconds_buf[14];
1466 char nanos_buf[12];
1467 char *end;
1468 int64_t seconds = 0;
1469 int32_t nanos = 0;
1470 double val = 0.0;
1471 const char *seconds_membername = "seconds";
1472 const char *nanos_membername = "nanos";
1473 size_t fraction_start;
1474
1475 if (!capture_end(p, ptr)) {
1476 return false;
1477 }
1478
1479 buf = accumulate_getptr(p, &len);
1480
1481 memset(seconds_buf, 0, 14);
1482 memset(nanos_buf, 0, 12);
1483
1484 /* Find out base end. The maximus duration is 315576000000, which cannot be
1485 * represented by double without losing precision. Thus, we need to handle
1486 * fraction and base separately. */
1487 for (fraction_start = 0; fraction_start < len && buf[fraction_start] != '.';
1488 fraction_start++);
1489
1490 /* Parse base */
1491 memcpy(seconds_buf, buf, fraction_start);
1492 seconds = strtol(seconds_buf, &end, 10);
1493 if (errno == ERANGE || end != seconds_buf + fraction_start) {
1494 upb_status_seterrf(p->status, "error parsing duration: %s",
1495 seconds_buf);
1496 return false;
1497 }
1498
1499 if (seconds > 315576000000) {
1500 upb_status_seterrf(p->status, "error parsing duration: "
1501 "maximum acceptable value is "
1502 "315576000000");
1503 return false;
1504 }
1505
1506 if (seconds < -315576000000) {
1507 upb_status_seterrf(p->status, "error parsing duration: "
1508 "minimum acceptable value is "
1509 "-315576000000");
1510 return false;
1511 }
1512
1513 /* Parse fraction */
1514 nanos_buf[0] = '0';
1515 memcpy(nanos_buf + 1, buf + fraction_start, len - fraction_start);
1516 val = strtod(nanos_buf, &end);
1517 if (errno == ERANGE || end != nanos_buf + len - fraction_start + 1) {
1518 upb_status_seterrf(p->status, "error parsing duration: %s",
1519 nanos_buf);
1520 return false;
1521 }
1522
1523 nanos = val * 1000000000;
1524 if (seconds < 0) nanos = -nanos;
1525
1526 /* Clean up buffer */
1527 multipart_end(p);
1528
1529 /* Set seconds */
1530 start_member(p);
1531 capture_begin(p, seconds_membername);
1532 capture_end(p, seconds_membername + 7);
1533 end_membername(p);
1534 upb_sink_putint64(p->top->sink, parser_getsel(p), seconds);
1535 end_member(p);
1536
1537 /* Set nanos */
1538 start_member(p);
1539 capture_begin(p, nanos_membername);
1540 capture_end(p, nanos_membername + 5);
1541 end_membername(p);
1542 upb_sink_putint32(p->top->sink, parser_getsel(p), nanos);
1543 end_member(p);
1544
1545 /* Continue previous arena */
1546 multipart_startaccum(p);
1547
1548 return true;
1549 }
1550
parse_timestamp_number(upb_json_parser * p)1551 static int parse_timestamp_number(upb_json_parser *p) {
1552 size_t len;
1553 const char *buf;
1554 int val;
1555
1556 /* atoi() and friends unfortunately do not support specifying the length of
1557 * the input string, so we need to force a copy into a NULL-terminated buffer. */
1558 multipart_text(p, "\0", 1, false);
1559
1560 buf = accumulate_getptr(p, &len);
1561 val = atoi(buf);
1562 multipart_end(p);
1563 multipart_startaccum(p);
1564
1565 return val;
1566 }
1567
start_year(upb_json_parser * p,const char * ptr)1568 static void start_year(upb_json_parser *p, const char *ptr) {
1569 capture_begin(p, ptr);
1570 }
1571
end_year(upb_json_parser * p,const char * ptr)1572 static bool end_year(upb_json_parser *p, const char *ptr) {
1573 if (!capture_end(p, ptr)) {
1574 return false;
1575 }
1576 p->tm.tm_year = parse_timestamp_number(p) - 1900;
1577 return true;
1578 }
1579
start_month(upb_json_parser * p,const char * ptr)1580 static void start_month(upb_json_parser *p, const char *ptr) {
1581 capture_begin(p, ptr);
1582 }
1583
end_month(upb_json_parser * p,const char * ptr)1584 static bool end_month(upb_json_parser *p, const char *ptr) {
1585 if (!capture_end(p, ptr)) {
1586 return false;
1587 }
1588 p->tm.tm_mon = parse_timestamp_number(p) - 1;
1589 return true;
1590 }
1591
start_day(upb_json_parser * p,const char * ptr)1592 static void start_day(upb_json_parser *p, const char *ptr) {
1593 capture_begin(p, ptr);
1594 }
1595
end_day(upb_json_parser * p,const char * ptr)1596 static bool end_day(upb_json_parser *p, const char *ptr) {
1597 if (!capture_end(p, ptr)) {
1598 return false;
1599 }
1600 p->tm.tm_mday = parse_timestamp_number(p);
1601 return true;
1602 }
1603
start_hour(upb_json_parser * p,const char * ptr)1604 static void start_hour(upb_json_parser *p, const char *ptr) {
1605 capture_begin(p, ptr);
1606 }
1607
end_hour(upb_json_parser * p,const char * ptr)1608 static bool end_hour(upb_json_parser *p, const char *ptr) {
1609 if (!capture_end(p, ptr)) {
1610 return false;
1611 }
1612 p->tm.tm_hour = parse_timestamp_number(p);
1613 return true;
1614 }
1615
start_minute(upb_json_parser * p,const char * ptr)1616 static void start_minute(upb_json_parser *p, const char *ptr) {
1617 capture_begin(p, ptr);
1618 }
1619
end_minute(upb_json_parser * p,const char * ptr)1620 static bool end_minute(upb_json_parser *p, const char *ptr) {
1621 if (!capture_end(p, ptr)) {
1622 return false;
1623 }
1624 p->tm.tm_min = parse_timestamp_number(p);
1625 return true;
1626 }
1627
start_second(upb_json_parser * p,const char * ptr)1628 static void start_second(upb_json_parser *p, const char *ptr) {
1629 capture_begin(p, ptr);
1630 }
1631
end_second(upb_json_parser * p,const char * ptr)1632 static bool end_second(upb_json_parser *p, const char *ptr) {
1633 if (!capture_end(p, ptr)) {
1634 return false;
1635 }
1636 p->tm.tm_sec = parse_timestamp_number(p);
1637 return true;
1638 }
1639
start_timestamp_base(upb_json_parser * p)1640 static void start_timestamp_base(upb_json_parser *p) {
1641 memset(&p->tm, 0, sizeof(struct tm));
1642 }
1643
start_timestamp_fraction(upb_json_parser * p,const char * ptr)1644 static void start_timestamp_fraction(upb_json_parser *p, const char *ptr) {
1645 capture_begin(p, ptr);
1646 }
1647
end_timestamp_fraction(upb_json_parser * p,const char * ptr)1648 static bool end_timestamp_fraction(upb_json_parser *p, const char *ptr) {
1649 size_t len;
1650 const char *buf;
1651 char nanos_buf[12];
1652 char *end;
1653 double val = 0.0;
1654 int32_t nanos;
1655 const char *nanos_membername = "nanos";
1656
1657 memset(nanos_buf, 0, 12);
1658
1659 if (!capture_end(p, ptr)) {
1660 return false;
1661 }
1662
1663 buf = accumulate_getptr(p, &len);
1664
1665 if (len > 10) {
1666 upb_status_seterrf(p->status,
1667 "error parsing timestamp: at most 9-digit fraction.");
1668 return false;
1669 }
1670
1671 /* Parse nanos */
1672 nanos_buf[0] = '0';
1673 memcpy(nanos_buf + 1, buf, len);
1674 val = strtod(nanos_buf, &end);
1675
1676 if (errno == ERANGE || end != nanos_buf + len + 1) {
1677 upb_status_seterrf(p->status, "error parsing timestamp nanos: %s",
1678 nanos_buf);
1679 return false;
1680 }
1681
1682 nanos = val * 1000000000;
1683
1684 /* Clean up previous environment */
1685 multipart_end(p);
1686
1687 /* Set nanos */
1688 start_member(p);
1689 capture_begin(p, nanos_membername);
1690 capture_end(p, nanos_membername + 5);
1691 end_membername(p);
1692 upb_sink_putint32(p->top->sink, parser_getsel(p), nanos);
1693 end_member(p);
1694
1695 /* Continue previous environment */
1696 multipart_startaccum(p);
1697
1698 return true;
1699 }
1700
start_timestamp_zone(upb_json_parser * p,const char * ptr)1701 static void start_timestamp_zone(upb_json_parser *p, const char *ptr) {
1702 capture_begin(p, ptr);
1703 }
1704
1705 /* epoch_days(1970, 1, 1) == 1970-01-01 == 0. */
epoch_days(int year,int month,int day)1706 static int epoch_days(int year, int month, int day) {
1707 static const uint16_t month_yday[12] = {0, 31, 59, 90, 120, 151,
1708 181, 212, 243, 273, 304, 334};
1709 uint32_t year_adj = year + 4800; /* Ensure positive year, multiple of 400. */
1710 uint32_t febs = year_adj - (month <= 2 ? 1 : 0); /* Februaries since base. */
1711 uint32_t leap_days = 1 + (febs / 4) - (febs / 100) + (febs / 400);
1712 uint32_t days = 365 * year_adj + leap_days + month_yday[month - 1] + day - 1;
1713 return days - 2472692; /* Adjust to Unix epoch. */
1714 }
1715
upb_timegm(const struct tm * tp)1716 static int64_t upb_timegm(const struct tm *tp) {
1717 int64_t ret = epoch_days(tp->tm_year + 1900, tp->tm_mon + 1, tp->tm_mday);
1718 ret = (ret * 24) + tp->tm_hour;
1719 ret = (ret * 60) + tp->tm_min;
1720 ret = (ret * 60) + tp->tm_sec;
1721 return ret;
1722 }
1723
end_timestamp_zone(upb_json_parser * p,const char * ptr)1724 static bool end_timestamp_zone(upb_json_parser *p, const char *ptr) {
1725 size_t len;
1726 const char *buf;
1727 int hours;
1728 int64_t seconds;
1729 const char *seconds_membername = "seconds";
1730
1731 if (!capture_end(p, ptr)) {
1732 return false;
1733 }
1734
1735 buf = accumulate_getptr(p, &len);
1736
1737 if (buf[0] != 'Z') {
1738 if (sscanf(buf + 1, "%2d:00", &hours) != 1) {
1739 upb_status_seterrf(p->status, "error parsing timestamp offset");
1740 return false;
1741 }
1742
1743 if (buf[0] == '+') {
1744 hours = -hours;
1745 }
1746
1747 p->tm.tm_hour += hours;
1748 }
1749
1750 /* Normalize tm */
1751 seconds = upb_timegm(&p->tm);
1752
1753 /* Check timestamp boundary */
1754 if (seconds < -62135596800) {
1755 upb_status_seterrf(p->status, "error parsing timestamp: "
1756 "minimum acceptable value is "
1757 "0001-01-01T00:00:00Z");
1758 return false;
1759 }
1760
1761 /* Clean up previous environment */
1762 multipart_end(p);
1763
1764 /* Set seconds */
1765 start_member(p);
1766 capture_begin(p, seconds_membername);
1767 capture_end(p, seconds_membername + 7);
1768 end_membername(p);
1769 upb_sink_putint64(p->top->sink, parser_getsel(p), seconds);
1770 end_member(p);
1771
1772 /* Continue previous environment */
1773 multipart_startaccum(p);
1774
1775 return true;
1776 }
1777
start_fieldmask_path_text(upb_json_parser * p,const char * ptr)1778 static void start_fieldmask_path_text(upb_json_parser *p, const char *ptr) {
1779 capture_begin(p, ptr);
1780 }
1781
end_fieldmask_path_text(upb_json_parser * p,const char * ptr)1782 static bool end_fieldmask_path_text(upb_json_parser *p, const char *ptr) {
1783 return capture_end(p, ptr);
1784 }
1785
start_fieldmask_path(upb_json_parser * p)1786 static bool start_fieldmask_path(upb_json_parser *p) {
1787 upb_jsonparser_frame *inner;
1788 upb_selector_t sel;
1789
1790 if (!check_stack(p)) return false;
1791
1792 /* Start a new parser frame: parser frames correspond one-to-one with
1793 * handler frames, and string events occur in a sub-frame. */
1794 inner = start_jsonparser_frame(p);
1795 sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSTR);
1796 upb_sink_startstr(p->top->sink, sel, 0, &inner->sink);
1797 inner->m = p->top->m;
1798 inner->f = p->top->f;
1799 p->top = inner;
1800
1801 multipart_startaccum(p);
1802 return true;
1803 }
1804
lower_camel_push(upb_json_parser * p,upb_selector_t sel,const char * ptr,size_t len)1805 static bool lower_camel_push(
1806 upb_json_parser *p, upb_selector_t sel, const char *ptr, size_t len) {
1807 const char *limit = ptr + len;
1808 bool first = true;
1809 for (;ptr < limit; ptr++) {
1810 if (*ptr >= 'A' && *ptr <= 'Z' && !first) {
1811 char lower = tolower(*ptr);
1812 upb_sink_putstring(p->top->sink, sel, "_", 1, NULL);
1813 upb_sink_putstring(p->top->sink, sel, &lower, 1, NULL);
1814 } else {
1815 upb_sink_putstring(p->top->sink, sel, ptr, 1, NULL);
1816 }
1817 first = false;
1818 }
1819 return true;
1820 }
1821
end_fieldmask_path(upb_json_parser * p)1822 static bool end_fieldmask_path(upb_json_parser *p) {
1823 upb_selector_t sel;
1824
1825 if (!lower_camel_push(
1826 p, getsel_for_handlertype(p, UPB_HANDLER_STRING),
1827 p->accumulated, p->accumulated_len)) {
1828 return false;
1829 }
1830
1831 sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSTR);
1832 upb_sink_endstr(p->top->sink, sel);
1833 p->top--;
1834
1835 multipart_end(p);
1836 return true;
1837 }
1838
start_member(upb_json_parser * p)1839 static void start_member(upb_json_parser *p) {
1840 UPB_ASSERT(!p->top->f);
1841 multipart_startaccum(p);
1842 }
1843
1844 /* Helper: invoked during parse_mapentry() to emit the mapentry message's key
1845 * field based on the current contents of the accumulate buffer. */
parse_mapentry_key(upb_json_parser * p)1846 static bool parse_mapentry_key(upb_json_parser *p) {
1847
1848 size_t len;
1849 const char *buf = accumulate_getptr(p, &len);
1850
1851 /* Emit the key field. We do a bit of ad-hoc parsing here because the
1852 * parser state machine has already decided that this is a string field
1853 * name, and we are reinterpreting it as some arbitrary key type. In
1854 * particular, integer and bool keys are quoted, so we need to parse the
1855 * quoted string contents here. */
1856
1857 p->top->f = upb_msgdef_itof(p->top->m, UPB_MAPENTRY_KEY);
1858 if (p->top->f == NULL) {
1859 upb_status_seterrmsg(p->status, "mapentry message has no key");
1860 return false;
1861 }
1862 switch (upb_fielddef_type(p->top->f)) {
1863 case UPB_TYPE_INT32:
1864 case UPB_TYPE_INT64:
1865 case UPB_TYPE_UINT32:
1866 case UPB_TYPE_UINT64:
1867 /* Invoke end_number. The accum buffer has the number's text already. */
1868 if (!parse_number(p, true)) {
1869 return false;
1870 }
1871 break;
1872 case UPB_TYPE_BOOL:
1873 if (len == 4 && !strncmp(buf, "true", 4)) {
1874 if (!parser_putbool(p, true)) {
1875 return false;
1876 }
1877 } else if (len == 5 && !strncmp(buf, "false", 5)) {
1878 if (!parser_putbool(p, false)) {
1879 return false;
1880 }
1881 } else {
1882 upb_status_seterrmsg(p->status,
1883 "Map bool key not 'true' or 'false'");
1884 return false;
1885 }
1886 multipart_end(p);
1887 break;
1888 case UPB_TYPE_STRING:
1889 case UPB_TYPE_BYTES: {
1890 upb_sink subsink;
1891 upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSTR);
1892 upb_sink_startstr(p->top->sink, sel, len, &subsink);
1893 sel = getsel_for_handlertype(p, UPB_HANDLER_STRING);
1894 upb_sink_putstring(subsink, sel, buf, len, NULL);
1895 sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSTR);
1896 upb_sink_endstr(subsink, sel);
1897 multipart_end(p);
1898 break;
1899 }
1900 default:
1901 upb_status_seterrmsg(p->status, "Invalid field type for map key");
1902 return false;
1903 }
1904
1905 return true;
1906 }
1907
1908 /* Helper: emit one map entry (as a submessage in the map field sequence). This
1909 * is invoked from end_membername(), at the end of the map entry's key string,
1910 * with the map key in the accumulate buffer. It parses the key from that
1911 * buffer, emits the handler calls to start the mapentry submessage (setting up
1912 * its subframe in the process), and sets up state in the subframe so that the
1913 * value parser (invoked next) will emit the mapentry's value field and then
1914 * end the mapentry message. */
1915
handle_mapentry(upb_json_parser * p)1916 static bool handle_mapentry(upb_json_parser *p) {
1917 const upb_fielddef *mapfield;
1918 const upb_msgdef *mapentrymsg;
1919 upb_jsonparser_frame *inner;
1920 upb_selector_t sel;
1921
1922 /* Map entry: p->top->sink is the seq frame, so we need to start a frame
1923 * for the mapentry itself, and then set |f| in that frame so that the map
1924 * value field is parsed, and also set a flag to end the frame after the
1925 * map-entry value is parsed. */
1926 if (!check_stack(p)) return false;
1927
1928 mapfield = p->top->mapfield;
1929 mapentrymsg = upb_fielddef_msgsubdef(mapfield);
1930
1931 inner = start_jsonparser_frame(p);
1932 p->top->f = mapfield;
1933 sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSUBMSG);
1934 upb_sink_startsubmsg(p->top->sink, sel, &inner->sink);
1935 inner->m = mapentrymsg;
1936 inner->mapfield = mapfield;
1937
1938 /* Don't set this to true *yet* -- we reuse parsing handlers below to push
1939 * the key field value to the sink, and these handlers will pop the frame
1940 * if they see is_mapentry (when invoked by the parser state machine, they
1941 * would have just seen the map-entry value, not key). */
1942 inner->is_mapentry = false;
1943 p->top = inner;
1944
1945 /* send STARTMSG in submsg frame. */
1946 upb_sink_startmsg(p->top->sink);
1947
1948 parse_mapentry_key(p);
1949
1950 /* Set up the value field to receive the map-entry value. */
1951 p->top->f = upb_msgdef_itof(p->top->m, UPB_MAPENTRY_VALUE);
1952 p->top->is_mapentry = true; /* set up to pop frame after value is parsed. */
1953 p->top->mapfield = mapfield;
1954 if (p->top->f == NULL) {
1955 upb_status_seterrmsg(p->status, "mapentry message has no value");
1956 return false;
1957 }
1958
1959 return true;
1960 }
1961
end_membername(upb_json_parser * p)1962 static bool end_membername(upb_json_parser *p) {
1963 UPB_ASSERT(!p->top->f);
1964
1965 if (!p->top->m) {
1966 p->top->is_unknown_field = true;
1967 multipart_end(p);
1968 return true;
1969 }
1970
1971 if (p->top->is_any) {
1972 return end_any_membername(p);
1973 } else if (p->top->is_map) {
1974 return handle_mapentry(p);
1975 } else {
1976 size_t len;
1977 const char *buf = accumulate_getptr(p, &len);
1978 upb_value v;
1979
1980 if (upb_strtable_lookup2(p->top->name_table, buf, len, &v)) {
1981 p->top->f = upb_value_getconstptr(v);
1982 multipart_end(p);
1983
1984 return true;
1985 } else if (p->ignore_json_unknown) {
1986 p->top->is_unknown_field = true;
1987 multipart_end(p);
1988 return true;
1989 } else {
1990 upb_status_seterrf(p->status, "No such field: %.*s\n", (int)len, buf);
1991 return false;
1992 }
1993 }
1994 }
1995
end_any_membername(upb_json_parser * p)1996 static bool end_any_membername(upb_json_parser *p) {
1997 size_t len;
1998 const char *buf = accumulate_getptr(p, &len);
1999 upb_value v;
2000
2001 if (len == 5 && strncmp(buf, "@type", len) == 0) {
2002 upb_strtable_lookup2(p->top->name_table, "type_url", 8, &v);
2003 p->top->f = upb_value_getconstptr(v);
2004 multipart_end(p);
2005 return true;
2006 } else {
2007 p->top->is_unknown_field = true;
2008 multipart_end(p);
2009 return true;
2010 }
2011 }
2012
end_member(upb_json_parser * p)2013 static void end_member(upb_json_parser *p) {
2014 /* If we just parsed a map-entry value, end that frame too. */
2015 if (p->top->is_mapentry) {
2016 upb_selector_t sel;
2017 bool ok;
2018 const upb_fielddef *mapfield;
2019
2020 UPB_ASSERT(p->top > p->stack);
2021 /* send ENDMSG on submsg. */
2022 upb_sink_endmsg(p->top->sink, p->status);
2023 mapfield = p->top->mapfield;
2024
2025 /* send ENDSUBMSG in repeated-field-of-mapentries frame. */
2026 p->top--;
2027 ok = upb_handlers_getselector(mapfield, UPB_HANDLER_ENDSUBMSG, &sel);
2028 UPB_ASSUME(ok);
2029 upb_sink_endsubmsg(p->top->sink, (p->top + 1)->sink, sel);
2030 }
2031
2032 p->top->f = NULL;
2033 p->top->is_unknown_field = false;
2034 }
2035
start_any_member(upb_json_parser * p,const char * ptr)2036 static void start_any_member(upb_json_parser *p, const char *ptr) {
2037 start_member(p);
2038 json_parser_any_frame_set_after_type_url_start_once(p->top->any_frame, ptr);
2039 }
2040
end_any_member(upb_json_parser * p,const char * ptr)2041 static void end_any_member(upb_json_parser *p, const char *ptr) {
2042 json_parser_any_frame_set_before_type_url_end(p->top->any_frame, ptr);
2043 end_member(p);
2044 }
2045
start_subobject(upb_json_parser * p)2046 static bool start_subobject(upb_json_parser *p) {
2047 if (p->top->is_unknown_field) {
2048 if (!check_stack(p)) return false;
2049
2050 p->top = start_jsonparser_frame(p);
2051 return true;
2052 }
2053
2054 if (upb_fielddef_ismap(p->top->f)) {
2055 upb_jsonparser_frame *inner;
2056 upb_selector_t sel;
2057
2058 /* Beginning of a map. Start a new parser frame in a repeated-field
2059 * context. */
2060 if (!check_stack(p)) return false;
2061
2062 inner = start_jsonparser_frame(p);
2063 sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSEQ);
2064 upb_sink_startseq(p->top->sink, sel, &inner->sink);
2065 inner->m = upb_fielddef_msgsubdef(p->top->f);
2066 inner->mapfield = p->top->f;
2067 inner->is_map = true;
2068 p->top = inner;
2069
2070 return true;
2071 } else if (upb_fielddef_issubmsg(p->top->f)) {
2072 upb_jsonparser_frame *inner;
2073 upb_selector_t sel;
2074
2075 /* Beginning of a subobject. Start a new parser frame in the submsg
2076 * context. */
2077 if (!check_stack(p)) return false;
2078
2079 inner = start_jsonparser_frame(p);
2080 sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSUBMSG);
2081 upb_sink_startsubmsg(p->top->sink, sel, &inner->sink);
2082 inner->m = upb_fielddef_msgsubdef(p->top->f);
2083 set_name_table(p, inner);
2084 p->top = inner;
2085
2086 if (is_wellknown_msg(p, UPB_WELLKNOWN_ANY)) {
2087 p->top->is_any = true;
2088 p->top->any_frame = json_parser_any_frame_new(p);
2089 } else {
2090 p->top->is_any = false;
2091 p->top->any_frame = NULL;
2092 }
2093
2094 return true;
2095 } else {
2096 upb_status_seterrf(p->status,
2097 "Object specified for non-message/group field: %s",
2098 upb_fielddef_name(p->top->f));
2099 return false;
2100 }
2101 }
2102
start_subobject_full(upb_json_parser * p)2103 static bool start_subobject_full(upb_json_parser *p) {
2104 if (is_top_level(p)) {
2105 if (is_wellknown_msg(p, UPB_WELLKNOWN_VALUE)) {
2106 start_value_object(p, VALUE_STRUCTVALUE);
2107 if (!start_subobject(p)) return false;
2108 start_structvalue_object(p);
2109 } else if (is_wellknown_msg(p, UPB_WELLKNOWN_STRUCT)) {
2110 start_structvalue_object(p);
2111 } else {
2112 return true;
2113 }
2114 } else if (is_wellknown_field(p, UPB_WELLKNOWN_STRUCT)) {
2115 if (!start_subobject(p)) return false;
2116 start_structvalue_object(p);
2117 } else if (is_wellknown_field(p, UPB_WELLKNOWN_VALUE)) {
2118 if (!start_subobject(p)) return false;
2119 start_value_object(p, VALUE_STRUCTVALUE);
2120 if (!start_subobject(p)) return false;
2121 start_structvalue_object(p);
2122 }
2123
2124 return start_subobject(p);
2125 }
2126
end_subobject(upb_json_parser * p)2127 static void end_subobject(upb_json_parser *p) {
2128 if (is_top_level(p)) {
2129 return;
2130 }
2131
2132 if (p->top->is_map) {
2133 upb_selector_t sel;
2134 p->top--;
2135 sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSEQ);
2136 upb_sink_endseq(p->top->sink, sel);
2137 } else {
2138 upb_selector_t sel;
2139 bool is_unknown = p->top->m == NULL;
2140 p->top--;
2141 if (!is_unknown) {
2142 sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSUBMSG);
2143 upb_sink_endsubmsg(p->top->sink, (p->top + 1)->sink, sel);
2144 }
2145 }
2146 }
2147
end_subobject_full(upb_json_parser * p)2148 static void end_subobject_full(upb_json_parser *p) {
2149 end_subobject(p);
2150
2151 if (is_wellknown_msg(p, UPB_WELLKNOWN_STRUCT)) {
2152 end_structvalue_object(p);
2153 if (!is_top_level(p)) {
2154 end_subobject(p);
2155 }
2156 }
2157
2158 if (is_wellknown_msg(p, UPB_WELLKNOWN_VALUE)) {
2159 end_value_object(p);
2160 if (!is_top_level(p)) {
2161 end_subobject(p);
2162 }
2163 }
2164 }
2165
start_array(upb_json_parser * p)2166 static bool start_array(upb_json_parser *p) {
2167 upb_jsonparser_frame *inner;
2168 upb_selector_t sel;
2169
2170 if (is_top_level(p)) {
2171 if (is_wellknown_msg(p, UPB_WELLKNOWN_VALUE)) {
2172 start_value_object(p, VALUE_LISTVALUE);
2173 if (!start_subobject(p)) return false;
2174 start_listvalue_object(p);
2175 } else if (is_wellknown_msg(p, UPB_WELLKNOWN_LISTVALUE)) {
2176 start_listvalue_object(p);
2177 } else {
2178 return false;
2179 }
2180 } else if (is_wellknown_field(p, UPB_WELLKNOWN_LISTVALUE) &&
2181 (!upb_fielddef_isseq(p->top->f) ||
2182 p->top->is_repeated)) {
2183 if (!start_subobject(p)) return false;
2184 start_listvalue_object(p);
2185 } else if (is_wellknown_field(p, UPB_WELLKNOWN_VALUE) &&
2186 (!upb_fielddef_isseq(p->top->f) ||
2187 p->top->is_repeated)) {
2188 if (!start_subobject(p)) return false;
2189 start_value_object(p, VALUE_LISTVALUE);
2190 if (!start_subobject(p)) return false;
2191 start_listvalue_object(p);
2192 }
2193
2194 if (p->top->is_unknown_field) {
2195 inner = start_jsonparser_frame(p);
2196 inner->is_unknown_field = true;
2197 p->top = inner;
2198
2199 return true;
2200 }
2201
2202 if (!upb_fielddef_isseq(p->top->f)) {
2203 upb_status_seterrf(p->status,
2204 "Array specified for non-repeated field: %s",
2205 upb_fielddef_name(p->top->f));
2206 return false;
2207 }
2208
2209 if (!check_stack(p)) return false;
2210
2211 inner = start_jsonparser_frame(p);
2212 sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSEQ);
2213 upb_sink_startseq(p->top->sink, sel, &inner->sink);
2214 inner->m = p->top->m;
2215 inner->f = p->top->f;
2216 inner->is_repeated = true;
2217 p->top = inner;
2218
2219 return true;
2220 }
2221
end_array(upb_json_parser * p)2222 static void end_array(upb_json_parser *p) {
2223 upb_selector_t sel;
2224
2225 UPB_ASSERT(p->top > p->stack);
2226
2227 p->top--;
2228
2229 if (p->top->is_unknown_field) {
2230 return;
2231 }
2232
2233 sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSEQ);
2234 upb_sink_endseq(p->top->sink, sel);
2235
2236 if (is_wellknown_msg(p, UPB_WELLKNOWN_LISTVALUE)) {
2237 end_listvalue_object(p);
2238 if (!is_top_level(p)) {
2239 end_subobject(p);
2240 }
2241 }
2242
2243 if (is_wellknown_msg(p, UPB_WELLKNOWN_VALUE)) {
2244 end_value_object(p);
2245 if (!is_top_level(p)) {
2246 end_subobject(p);
2247 }
2248 }
2249 }
2250
start_object(upb_json_parser * p)2251 static void start_object(upb_json_parser *p) {
2252 if (!p->top->is_map && p->top->m != NULL) {
2253 upb_sink_startmsg(p->top->sink);
2254 }
2255 }
2256
end_object(upb_json_parser * p)2257 static void end_object(upb_json_parser *p) {
2258 if (!p->top->is_map && p->top->m != NULL) {
2259 upb_sink_endmsg(p->top->sink, p->status);
2260 }
2261 }
2262
start_any_object(upb_json_parser * p,const char * ptr)2263 static void start_any_object(upb_json_parser *p, const char *ptr) {
2264 start_object(p);
2265 p->top->any_frame->before_type_url_start = ptr;
2266 p->top->any_frame->before_type_url_end = ptr;
2267 }
2268
end_any_object(upb_json_parser * p,const char * ptr)2269 static bool end_any_object(upb_json_parser *p, const char *ptr) {
2270 const char *value_membername = "value";
2271 bool is_well_known_packed = false;
2272 const char *packed_end = ptr + 1;
2273 upb_selector_t sel;
2274 upb_jsonparser_frame *inner;
2275
2276 if (json_parser_any_frame_has_value(p->top->any_frame) &&
2277 !json_parser_any_frame_has_type_url(p->top->any_frame)) {
2278 upb_status_seterrmsg(p->status, "No valid type url");
2279 return false;
2280 }
2281
2282 /* Well known types data is represented as value field. */
2283 if (upb_msgdef_wellknowntype(p->top->any_frame->parser->top->m) !=
2284 UPB_WELLKNOWN_UNSPECIFIED) {
2285 is_well_known_packed = true;
2286
2287 if (json_parser_any_frame_has_value_before_type_url(p->top->any_frame)) {
2288 p->top->any_frame->before_type_url_start =
2289 memchr(p->top->any_frame->before_type_url_start, ':',
2290 p->top->any_frame->before_type_url_end -
2291 p->top->any_frame->before_type_url_start);
2292 if (p->top->any_frame->before_type_url_start == NULL) {
2293 upb_status_seterrmsg(p->status, "invalid data for well known type.");
2294 return false;
2295 }
2296 p->top->any_frame->before_type_url_start++;
2297 }
2298
2299 if (json_parser_any_frame_has_value_after_type_url(p->top->any_frame)) {
2300 p->top->any_frame->after_type_url_start =
2301 memchr(p->top->any_frame->after_type_url_start, ':',
2302 (ptr + 1) -
2303 p->top->any_frame->after_type_url_start);
2304 if (p->top->any_frame->after_type_url_start == NULL) {
2305 upb_status_seterrmsg(p->status, "Invalid data for well known type.");
2306 return false;
2307 }
2308 p->top->any_frame->after_type_url_start++;
2309 packed_end = ptr;
2310 }
2311 }
2312
2313 if (json_parser_any_frame_has_value_before_type_url(p->top->any_frame)) {
2314 if (!parse(p->top->any_frame->parser, NULL,
2315 p->top->any_frame->before_type_url_start,
2316 p->top->any_frame->before_type_url_end -
2317 p->top->any_frame->before_type_url_start, NULL)) {
2318 return false;
2319 }
2320 } else {
2321 if (!is_well_known_packed) {
2322 if (!parse(p->top->any_frame->parser, NULL, "{", 1, NULL)) {
2323 return false;
2324 }
2325 }
2326 }
2327
2328 if (json_parser_any_frame_has_value_before_type_url(p->top->any_frame) &&
2329 json_parser_any_frame_has_value_after_type_url(p->top->any_frame)) {
2330 if (!parse(p->top->any_frame->parser, NULL, ",", 1, NULL)) {
2331 return false;
2332 }
2333 }
2334
2335 if (json_parser_any_frame_has_value_after_type_url(p->top->any_frame)) {
2336 if (!parse(p->top->any_frame->parser, NULL,
2337 p->top->any_frame->after_type_url_start,
2338 packed_end - p->top->any_frame->after_type_url_start, NULL)) {
2339 return false;
2340 }
2341 } else {
2342 if (!is_well_known_packed) {
2343 if (!parse(p->top->any_frame->parser, NULL, "}", 1, NULL)) {
2344 return false;
2345 }
2346 }
2347 }
2348
2349 if (!end(p->top->any_frame->parser, NULL)) {
2350 return false;
2351 }
2352
2353 p->top->is_any = false;
2354
2355 /* Set value */
2356 start_member(p);
2357 capture_begin(p, value_membername);
2358 capture_end(p, value_membername + 5);
2359 end_membername(p);
2360
2361 if (!check_stack(p)) return false;
2362 inner = p->top + 1;
2363
2364 sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSTR);
2365 upb_sink_startstr(p->top->sink, sel, 0, &inner->sink);
2366 sel = getsel_for_handlertype(p, UPB_HANDLER_STRING);
2367 upb_sink_putstring(inner->sink, sel, p->top->any_frame->stringsink.ptr,
2368 p->top->any_frame->stringsink.len, NULL);
2369 sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSTR);
2370 upb_sink_endstr(inner->sink, sel);
2371
2372 end_member(p);
2373
2374 end_object(p);
2375
2376 /* Deallocate any parse frame. */
2377 json_parser_any_frame_free(p->top->any_frame);
2378
2379 return true;
2380 }
2381
is_string_wrapper(const upb_msgdef * m)2382 static bool is_string_wrapper(const upb_msgdef *m) {
2383 upb_wellknowntype_t type = upb_msgdef_wellknowntype(m);
2384 return type == UPB_WELLKNOWN_STRINGVALUE ||
2385 type == UPB_WELLKNOWN_BYTESVALUE;
2386 }
2387
is_fieldmask(const upb_msgdef * m)2388 static bool is_fieldmask(const upb_msgdef *m) {
2389 upb_wellknowntype_t type = upb_msgdef_wellknowntype(m);
2390 return type == UPB_WELLKNOWN_FIELDMASK;
2391 }
2392
start_fieldmask_object(upb_json_parser * p)2393 static void start_fieldmask_object(upb_json_parser *p) {
2394 const char *membername = "paths";
2395
2396 start_object(p);
2397
2398 /* Set up context for parsing value */
2399 start_member(p);
2400 capture_begin(p, membername);
2401 capture_end(p, membername + 5);
2402 end_membername(p);
2403
2404 start_array(p);
2405 }
2406
end_fieldmask_object(upb_json_parser * p)2407 static void end_fieldmask_object(upb_json_parser *p) {
2408 end_array(p);
2409 end_member(p);
2410 end_object(p);
2411 }
2412
start_wrapper_object(upb_json_parser * p)2413 static void start_wrapper_object(upb_json_parser *p) {
2414 const char *membername = "value";
2415
2416 start_object(p);
2417
2418 /* Set up context for parsing value */
2419 start_member(p);
2420 capture_begin(p, membername);
2421 capture_end(p, membername + 5);
2422 end_membername(p);
2423 }
2424
end_wrapper_object(upb_json_parser * p)2425 static void end_wrapper_object(upb_json_parser *p) {
2426 end_member(p);
2427 end_object(p);
2428 }
2429
start_value_object(upb_json_parser * p,int value_type)2430 static void start_value_object(upb_json_parser *p, int value_type) {
2431 const char *nullmember = "null_value";
2432 const char *numbermember = "number_value";
2433 const char *stringmember = "string_value";
2434 const char *boolmember = "bool_value";
2435 const char *structmember = "struct_value";
2436 const char *listmember = "list_value";
2437 const char *membername = "";
2438
2439 switch (value_type) {
2440 case VALUE_NULLVALUE:
2441 membername = nullmember;
2442 break;
2443 case VALUE_NUMBERVALUE:
2444 membername = numbermember;
2445 break;
2446 case VALUE_STRINGVALUE:
2447 membername = stringmember;
2448 break;
2449 case VALUE_BOOLVALUE:
2450 membername = boolmember;
2451 break;
2452 case VALUE_STRUCTVALUE:
2453 membername = structmember;
2454 break;
2455 case VALUE_LISTVALUE:
2456 membername = listmember;
2457 break;
2458 }
2459
2460 start_object(p);
2461
2462 /* Set up context for parsing value */
2463 start_member(p);
2464 capture_begin(p, membername);
2465 capture_end(p, membername + strlen(membername));
2466 end_membername(p);
2467 }
2468
end_value_object(upb_json_parser * p)2469 static void end_value_object(upb_json_parser *p) {
2470 end_member(p);
2471 end_object(p);
2472 }
2473
start_listvalue_object(upb_json_parser * p)2474 static void start_listvalue_object(upb_json_parser *p) {
2475 const char *membername = "values";
2476
2477 start_object(p);
2478
2479 /* Set up context for parsing value */
2480 start_member(p);
2481 capture_begin(p, membername);
2482 capture_end(p, membername + strlen(membername));
2483 end_membername(p);
2484 }
2485
end_listvalue_object(upb_json_parser * p)2486 static void end_listvalue_object(upb_json_parser *p) {
2487 end_member(p);
2488 end_object(p);
2489 }
2490
start_structvalue_object(upb_json_parser * p)2491 static void start_structvalue_object(upb_json_parser *p) {
2492 const char *membername = "fields";
2493
2494 start_object(p);
2495
2496 /* Set up context for parsing value */
2497 start_member(p);
2498 capture_begin(p, membername);
2499 capture_end(p, membername + strlen(membername));
2500 end_membername(p);
2501 }
2502
end_structvalue_object(upb_json_parser * p)2503 static void end_structvalue_object(upb_json_parser *p) {
2504 end_member(p);
2505 end_object(p);
2506 }
2507
is_top_level(upb_json_parser * p)2508 static bool is_top_level(upb_json_parser *p) {
2509 return p->top == p->stack && p->top->f == NULL && !p->top->is_unknown_field;
2510 }
2511
is_wellknown_msg(upb_json_parser * p,upb_wellknowntype_t type)2512 static bool is_wellknown_msg(upb_json_parser *p, upb_wellknowntype_t type) {
2513 return p->top->m != NULL && upb_msgdef_wellknowntype(p->top->m) == type;
2514 }
2515
is_wellknown_field(upb_json_parser * p,upb_wellknowntype_t type)2516 static bool is_wellknown_field(upb_json_parser *p, upb_wellknowntype_t type) {
2517 return p->top->f != NULL &&
2518 upb_fielddef_issubmsg(p->top->f) &&
2519 (upb_msgdef_wellknowntype(upb_fielddef_msgsubdef(p->top->f))
2520 == type);
2521 }
2522
does_number_wrapper_start(upb_json_parser * p)2523 static bool does_number_wrapper_start(upb_json_parser *p) {
2524 return p->top->f != NULL &&
2525 upb_fielddef_issubmsg(p->top->f) &&
2526 upb_msgdef_isnumberwrapper(upb_fielddef_msgsubdef(p->top->f));
2527 }
2528
does_number_wrapper_end(upb_json_parser * p)2529 static bool does_number_wrapper_end(upb_json_parser *p) {
2530 return p->top->m != NULL && upb_msgdef_isnumberwrapper(p->top->m);
2531 }
2532
is_number_wrapper_object(upb_json_parser * p)2533 static bool is_number_wrapper_object(upb_json_parser *p) {
2534 return p->top->m != NULL && upb_msgdef_isnumberwrapper(p->top->m);
2535 }
2536
does_string_wrapper_start(upb_json_parser * p)2537 static bool does_string_wrapper_start(upb_json_parser *p) {
2538 return p->top->f != NULL &&
2539 upb_fielddef_issubmsg(p->top->f) &&
2540 is_string_wrapper(upb_fielddef_msgsubdef(p->top->f));
2541 }
2542
does_string_wrapper_end(upb_json_parser * p)2543 static bool does_string_wrapper_end(upb_json_parser *p) {
2544 return p->top->m != NULL && is_string_wrapper(p->top->m);
2545 }
2546
is_string_wrapper_object(upb_json_parser * p)2547 static bool is_string_wrapper_object(upb_json_parser *p) {
2548 return p->top->m != NULL && is_string_wrapper(p->top->m);
2549 }
2550
does_fieldmask_start(upb_json_parser * p)2551 static bool does_fieldmask_start(upb_json_parser *p) {
2552 return p->top->f != NULL &&
2553 upb_fielddef_issubmsg(p->top->f) &&
2554 is_fieldmask(upb_fielddef_msgsubdef(p->top->f));
2555 }
2556
does_fieldmask_end(upb_json_parser * p)2557 static bool does_fieldmask_end(upb_json_parser *p) {
2558 return p->top->m != NULL && is_fieldmask(p->top->m);
2559 }
2560
2561 #define CHECK_RETURN_TOP(x) if (!(x)) goto error
2562
2563
2564 /* The actual parser **********************************************************/
2565
2566 /* What follows is the Ragel parser itself. The language is specified in Ragel
2567 * and the actions call our C functions above.
2568 *
2569 * Ragel has an extensive set of functionality, and we use only a small part of
2570 * it. There are many action types but we only use a few:
2571 *
2572 * ">" -- transition into a machine
2573 * "%" -- transition out of a machine
2574 * "@" -- transition into a final state of a machine.
2575 *
2576 * "@" transitions are tricky because a machine can transition into a final
2577 * state repeatedly. But in some cases we know this can't happen, for example
2578 * a string which is delimited by a final '"' can only transition into its
2579 * final state once, when the closing '"' is seen. */
2580
2581
2582 #line 2784 "upb/json/parser.rl"
2583
2584
2585
2586 #line 2587 "upb/json/parser.c"
2587 static const char _json_actions[] = {
2588 0, 1, 0, 1, 1, 1, 3, 1,
2589 4, 1, 6, 1, 7, 1, 8, 1,
2590 9, 1, 11, 1, 12, 1, 13, 1,
2591 14, 1, 15, 1, 16, 1, 17, 1,
2592 18, 1, 19, 1, 20, 1, 22, 1,
2593 23, 1, 24, 1, 35, 1, 37, 1,
2594 39, 1, 40, 1, 42, 1, 43, 1,
2595 44, 1, 46, 1, 48, 1, 49, 1,
2596 50, 1, 51, 1, 53, 1, 54, 2,
2597 4, 9, 2, 5, 6, 2, 7, 3,
2598 2, 7, 9, 2, 21, 26, 2, 25,
2599 10, 2, 27, 28, 2, 29, 30, 2,
2600 32, 34, 2, 33, 31, 2, 38, 36,
2601 2, 40, 42, 2, 45, 2, 2, 46,
2602 54, 2, 47, 36, 2, 49, 54, 2,
2603 50, 54, 2, 51, 54, 2, 52, 41,
2604 2, 53, 54, 3, 32, 34, 35, 4,
2605 21, 26, 27, 28
2606 };
2607
2608 static const short _json_key_offsets[] = {
2609 0, 0, 12, 13, 18, 23, 28, 29,
2610 30, 31, 32, 33, 34, 35, 36, 37,
2611 38, 43, 44, 48, 53, 58, 63, 67,
2612 71, 74, 77, 79, 83, 87, 89, 91,
2613 96, 98, 100, 109, 115, 121, 127, 133,
2614 135, 139, 142, 144, 146, 149, 150, 154,
2615 156, 158, 160, 162, 163, 165, 167, 168,
2616 170, 172, 173, 175, 177, 178, 180, 182,
2617 183, 185, 187, 191, 193, 195, 196, 197,
2618 198, 199, 201, 206, 208, 210, 212, 221,
2619 222, 222, 222, 227, 232, 237, 238, 239,
2620 240, 241, 241, 242, 243, 244, 244, 245,
2621 246, 247, 247, 252, 253, 257, 262, 267,
2622 272, 276, 276, 279, 282, 285, 288, 291,
2623 294, 294, 294, 294, 294, 294
2624 };
2625
2626 static const char _json_trans_keys[] = {
2627 32, 34, 45, 91, 102, 110, 116, 123,
2628 9, 13, 48, 57, 34, 32, 93, 125,
2629 9, 13, 32, 44, 93, 9, 13, 32,
2630 93, 125, 9, 13, 97, 108, 115, 101,
2631 117, 108, 108, 114, 117, 101, 32, 34,
2632 125, 9, 13, 34, 32, 58, 9, 13,
2633 32, 93, 125, 9, 13, 32, 44, 125,
2634 9, 13, 32, 44, 125, 9, 13, 32,
2635 34, 9, 13, 45, 48, 49, 57, 48,
2636 49, 57, 46, 69, 101, 48, 57, 69,
2637 101, 48, 57, 43, 45, 48, 57, 48,
2638 57, 48, 57, 46, 69, 101, 48, 57,
2639 34, 92, 34, 92, 34, 47, 92, 98,
2640 102, 110, 114, 116, 117, 48, 57, 65,
2641 70, 97, 102, 48, 57, 65, 70, 97,
2642 102, 48, 57, 65, 70, 97, 102, 48,
2643 57, 65, 70, 97, 102, 34, 92, 45,
2644 48, 49, 57, 48, 49, 57, 46, 115,
2645 48, 57, 115, 48, 57, 34, 46, 115,
2646 48, 57, 48, 57, 48, 57, 48, 57,
2647 48, 57, 45, 48, 57, 48, 57, 45,
2648 48, 57, 48, 57, 84, 48, 57, 48,
2649 57, 58, 48, 57, 48, 57, 58, 48,
2650 57, 48, 57, 43, 45, 46, 90, 48,
2651 57, 48, 57, 58, 48, 48, 34, 48,
2652 57, 43, 45, 90, 48, 57, 34, 44,
2653 34, 44, 34, 44, 34, 45, 91, 102,
2654 110, 116, 123, 48, 57, 34, 32, 93,
2655 125, 9, 13, 32, 44, 93, 9, 13,
2656 32, 93, 125, 9, 13, 97, 108, 115,
2657 101, 117, 108, 108, 114, 117, 101, 32,
2658 34, 125, 9, 13, 34, 32, 58, 9,
2659 13, 32, 93, 125, 9, 13, 32, 44,
2660 125, 9, 13, 32, 44, 125, 9, 13,
2661 32, 34, 9, 13, 32, 9, 13, 32,
2662 9, 13, 32, 9, 13, 32, 9, 13,
2663 32, 9, 13, 32, 9, 13, 0
2664 };
2665
2666 static const char _json_single_lengths[] = {
2667 0, 8, 1, 3, 3, 3, 1, 1,
2668 1, 1, 1, 1, 1, 1, 1, 1,
2669 3, 1, 2, 3, 3, 3, 2, 2,
2670 1, 3, 0, 2, 2, 0, 0, 3,
2671 2, 2, 9, 0, 0, 0, 0, 2,
2672 2, 1, 2, 0, 1, 1, 2, 0,
2673 0, 0, 0, 1, 0, 0, 1, 0,
2674 0, 1, 0, 0, 1, 0, 0, 1,
2675 0, 0, 4, 0, 0, 1, 1, 1,
2676 1, 0, 3, 2, 2, 2, 7, 1,
2677 0, 0, 3, 3, 3, 1, 1, 1,
2678 1, 0, 1, 1, 1, 0, 1, 1,
2679 1, 0, 3, 1, 2, 3, 3, 3,
2680 2, 0, 1, 1, 1, 1, 1, 1,
2681 0, 0, 0, 0, 0, 0
2682 };
2683
2684 static const char _json_range_lengths[] = {
2685 0, 2, 0, 1, 1, 1, 0, 0,
2686 0, 0, 0, 0, 0, 0, 0, 0,
2687 1, 0, 1, 1, 1, 1, 1, 1,
2688 1, 0, 1, 1, 1, 1, 1, 1,
2689 0, 0, 0, 3, 3, 3, 3, 0,
2690 1, 1, 0, 1, 1, 0, 1, 1,
2691 1, 1, 1, 0, 1, 1, 0, 1,
2692 1, 0, 1, 1, 0, 1, 1, 0,
2693 1, 1, 0, 1, 1, 0, 0, 0,
2694 0, 1, 1, 0, 0, 0, 1, 0,
2695 0, 0, 1, 1, 1, 0, 0, 0,
2696 0, 0, 0, 0, 0, 0, 0, 0,
2697 0, 0, 1, 0, 1, 1, 1, 1,
2698 1, 0, 1, 1, 1, 1, 1, 1,
2699 0, 0, 0, 0, 0, 0
2700 };
2701
2702 static const short _json_index_offsets[] = {
2703 0, 0, 11, 13, 18, 23, 28, 30,
2704 32, 34, 36, 38, 40, 42, 44, 46,
2705 48, 53, 55, 59, 64, 69, 74, 78,
2706 82, 85, 89, 91, 95, 99, 101, 103,
2707 108, 111, 114, 124, 128, 132, 136, 140,
2708 143, 147, 150, 153, 155, 158, 160, 164,
2709 166, 168, 170, 172, 174, 176, 178, 180,
2710 182, 184, 186, 188, 190, 192, 194, 196,
2711 198, 200, 202, 207, 209, 211, 213, 215,
2712 217, 219, 221, 226, 229, 232, 235, 244,
2713 246, 247, 248, 253, 258, 263, 265, 267,
2714 269, 271, 272, 274, 276, 278, 279, 281,
2715 283, 285, 286, 291, 293, 297, 302, 307,
2716 312, 316, 317, 320, 323, 326, 329, 332,
2717 335, 336, 337, 338, 339, 340
2718 };
2719
2720 static const unsigned char _json_indicies[] = {
2721 0, 2, 3, 4, 5, 6, 7, 8,
2722 0, 3, 1, 9, 1, 11, 12, 1,
2723 11, 10, 13, 14, 12, 13, 1, 14,
2724 1, 1, 14, 10, 15, 1, 16, 1,
2725 17, 1, 18, 1, 19, 1, 20, 1,
2726 21, 1, 22, 1, 23, 1, 24, 1,
2727 25, 26, 27, 25, 1, 28, 1, 29,
2728 30, 29, 1, 30, 1, 1, 30, 31,
2729 32, 33, 34, 32, 1, 35, 36, 27,
2730 35, 1, 36, 26, 36, 1, 37, 38,
2731 39, 1, 38, 39, 1, 41, 42, 42,
2732 40, 43, 1, 42, 42, 43, 40, 44,
2733 44, 45, 1, 45, 1, 45, 40, 41,
2734 42, 42, 39, 40, 47, 48, 46, 50,
2735 51, 49, 52, 52, 52, 52, 52, 52,
2736 52, 52, 53, 1, 54, 54, 54, 1,
2737 55, 55, 55, 1, 56, 56, 56, 1,
2738 57, 57, 57, 1, 59, 60, 58, 61,
2739 62, 63, 1, 64, 65, 1, 66, 67,
2740 1, 68, 1, 67, 68, 1, 69, 1,
2741 66, 67, 65, 1, 70, 1, 71, 1,
2742 72, 1, 73, 1, 74, 1, 75, 1,
2743 76, 1, 77, 1, 78, 1, 79, 1,
2744 80, 1, 81, 1, 82, 1, 83, 1,
2745 84, 1, 85, 1, 86, 1, 87, 1,
2746 88, 1, 89, 89, 90, 91, 1, 92,
2747 1, 93, 1, 94, 1, 95, 1, 96,
2748 1, 97, 1, 98, 1, 99, 99, 100,
2749 98, 1, 102, 1, 101, 104, 105, 103,
2750 1, 1, 101, 106, 107, 108, 109, 110,
2751 111, 112, 107, 1, 113, 1, 114, 115,
2752 117, 118, 1, 117, 116, 119, 120, 118,
2753 119, 1, 120, 1, 1, 120, 116, 121,
2754 1, 122, 1, 123, 1, 124, 1, 125,
2755 126, 1, 127, 1, 128, 1, 129, 130,
2756 1, 131, 1, 132, 1, 133, 134, 135,
2757 136, 134, 1, 137, 1, 138, 139, 138,
2758 1, 139, 1, 1, 139, 140, 141, 142,
2759 143, 141, 1, 144, 145, 136, 144, 1,
2760 145, 135, 145, 1, 146, 147, 147, 1,
2761 148, 148, 1, 149, 149, 1, 150, 150,
2762 1, 151, 151, 1, 152, 152, 1, 1,
2763 1, 1, 1, 1, 1, 0
2764 };
2765
2766 static const char _json_trans_targs[] = {
2767 1, 0, 2, 107, 3, 6, 10, 13,
2768 16, 106, 4, 3, 106, 4, 5, 7,
2769 8, 9, 108, 11, 12, 109, 14, 15,
2770 110, 16, 17, 111, 18, 18, 19, 20,
2771 21, 22, 111, 21, 22, 24, 25, 31,
2772 112, 26, 28, 27, 29, 30, 33, 113,
2773 34, 33, 113, 34, 32, 35, 36, 37,
2774 38, 39, 33, 113, 34, 41, 42, 46,
2775 42, 46, 43, 45, 44, 114, 48, 49,
2776 50, 51, 52, 53, 54, 55, 56, 57,
2777 58, 59, 60, 61, 62, 63, 64, 65,
2778 66, 67, 73, 72, 68, 69, 70, 71,
2779 72, 115, 74, 67, 72, 76, 116, 76,
2780 116, 77, 79, 81, 82, 85, 90, 94,
2781 98, 80, 117, 117, 83, 82, 80, 83,
2782 84, 86, 87, 88, 89, 117, 91, 92,
2783 93, 117, 95, 96, 97, 117, 98, 99,
2784 105, 100, 100, 101, 102, 103, 104, 105,
2785 103, 104, 117, 106, 106, 106, 106, 106,
2786 106
2787 };
2788
2789 static const unsigned char _json_trans_actions[] = {
2790 0, 0, 113, 107, 53, 0, 0, 0,
2791 125, 59, 45, 0, 55, 0, 0, 0,
2792 0, 0, 0, 0, 0, 0, 0, 0,
2793 0, 0, 101, 51, 47, 0, 0, 45,
2794 49, 49, 104, 0, 0, 0, 0, 0,
2795 3, 0, 0, 0, 0, 0, 5, 15,
2796 0, 0, 71, 7, 13, 0, 74, 9,
2797 9, 9, 77, 80, 11, 37, 37, 37,
2798 0, 0, 0, 39, 0, 41, 86, 0,
2799 0, 0, 17, 19, 0, 21, 23, 0,
2800 25, 27, 0, 29, 31, 0, 33, 35,
2801 0, 135, 83, 135, 0, 0, 0, 0,
2802 0, 92, 0, 89, 89, 98, 43, 0,
2803 131, 95, 113, 107, 53, 0, 0, 0,
2804 125, 59, 69, 110, 45, 0, 55, 0,
2805 0, 0, 0, 0, 0, 119, 0, 0,
2806 0, 122, 0, 0, 0, 116, 0, 101,
2807 51, 47, 0, 0, 45, 49, 49, 104,
2808 0, 0, 128, 0, 57, 63, 65, 61,
2809 67
2810 };
2811
2812 static const unsigned char _json_eof_actions[] = {
2813 0, 0, 0, 0, 0, 0, 0, 0,
2814 0, 0, 0, 0, 0, 0, 0, 0,
2815 0, 0, 0, 0, 0, 0, 0, 0,
2816 0, 1, 0, 1, 0, 0, 1, 1,
2817 0, 0, 0, 0, 0, 0, 0, 0,
2818 0, 0, 0, 0, 0, 0, 0, 0,
2819 0, 0, 0, 0, 0, 0, 0, 0,
2820 0, 0, 0, 0, 0, 0, 0, 0,
2821 0, 0, 0, 0, 0, 0, 0, 0,
2822 0, 0, 0, 0, 0, 0, 0, 0,
2823 0, 0, 0, 0, 0, 0, 0, 0,
2824 0, 0, 0, 0, 0, 0, 0, 0,
2825 0, 0, 0, 0, 0, 0, 0, 0,
2826 0, 0, 0, 57, 63, 65, 61, 67,
2827 0, 0, 0, 0, 0, 0
2828 };
2829
2830 static const int json_start = 1;
2831
2832 static const int json_en_number_machine = 23;
2833 static const int json_en_string_machine = 32;
2834 static const int json_en_duration_machine = 40;
2835 static const int json_en_timestamp_machine = 47;
2836 static const int json_en_fieldmask_machine = 75;
2837 static const int json_en_value_machine = 78;
2838 static const int json_en_main = 1;
2839
2840
2841 #line 2787 "upb/json/parser.rl"
2842
parse(void * closure,const void * hd,const char * buf,size_t size,const upb_bufhandle * handle)2843 size_t parse(void *closure, const void *hd, const char *buf, size_t size,
2844 const upb_bufhandle *handle) {
2845 upb_json_parser *parser = closure;
2846
2847 /* Variables used by Ragel's generated code. */
2848 int cs = parser->current_state;
2849 int *stack = parser->parser_stack;
2850 int top = parser->parser_top;
2851
2852 const char *p = buf;
2853 const char *pe = buf + size;
2854 const char *eof = &eof_ch;
2855
2856 parser->handle = handle;
2857
2858 UPB_UNUSED(hd);
2859 UPB_UNUSED(handle);
2860
2861 capture_resume(parser, buf);
2862
2863
2864 #line 2865 "upb/json/parser.c"
2865 {
2866 int _klen;
2867 unsigned int _trans;
2868 const char *_acts;
2869 unsigned int _nacts;
2870 const char *_keys;
2871
2872 if ( p == pe )
2873 goto _test_eof;
2874 if ( cs == 0 )
2875 goto _out;
2876 _resume:
2877 _keys = _json_trans_keys + _json_key_offsets[cs];
2878 _trans = _json_index_offsets[cs];
2879
2880 _klen = _json_single_lengths[cs];
2881 if ( _klen > 0 ) {
2882 const char *_lower = _keys;
2883 const char *_mid;
2884 const char *_upper = _keys + _klen - 1;
2885 while (1) {
2886 if ( _upper < _lower )
2887 break;
2888
2889 _mid = _lower + ((_upper-_lower) >> 1);
2890 if ( (*p) < *_mid )
2891 _upper = _mid - 1;
2892 else if ( (*p) > *_mid )
2893 _lower = _mid + 1;
2894 else {
2895 _trans += (unsigned int)(_mid - _keys);
2896 goto _match;
2897 }
2898 }
2899 _keys += _klen;
2900 _trans += _klen;
2901 }
2902
2903 _klen = _json_range_lengths[cs];
2904 if ( _klen > 0 ) {
2905 const char *_lower = _keys;
2906 const char *_mid;
2907 const char *_upper = _keys + (_klen<<1) - 2;
2908 while (1) {
2909 if ( _upper < _lower )
2910 break;
2911
2912 _mid = _lower + (((_upper-_lower) >> 1) & ~1);
2913 if ( (*p) < _mid[0] )
2914 _upper = _mid - 2;
2915 else if ( (*p) > _mid[1] )
2916 _lower = _mid + 2;
2917 else {
2918 _trans += (unsigned int)((_mid - _keys)>>1);
2919 goto _match;
2920 }
2921 }
2922 _trans += _klen;
2923 }
2924
2925 _match:
2926 _trans = _json_indicies[_trans];
2927 cs = _json_trans_targs[_trans];
2928
2929 if ( _json_trans_actions[_trans] == 0 )
2930 goto _again;
2931
2932 _acts = _json_actions + _json_trans_actions[_trans];
2933 _nacts = (unsigned int) *_acts++;
2934 while ( _nacts-- > 0 )
2935 {
2936 switch ( *_acts++ )
2937 {
2938 case 1:
2939 #line 2592 "upb/json/parser.rl"
2940 { p--; {cs = stack[--top]; goto _again;} }
2941 break;
2942 case 2:
2943 #line 2594 "upb/json/parser.rl"
2944 { p--; {stack[top++] = cs; cs = 23;goto _again;} }
2945 break;
2946 case 3:
2947 #line 2598 "upb/json/parser.rl"
2948 { start_text(parser, p); }
2949 break;
2950 case 4:
2951 #line 2599 "upb/json/parser.rl"
2952 { CHECK_RETURN_TOP(end_text(parser, p)); }
2953 break;
2954 case 5:
2955 #line 2605 "upb/json/parser.rl"
2956 { start_hex(parser); }
2957 break;
2958 case 6:
2959 #line 2606 "upb/json/parser.rl"
2960 { hexdigit(parser, p); }
2961 break;
2962 case 7:
2963 #line 2607 "upb/json/parser.rl"
2964 { CHECK_RETURN_TOP(end_hex(parser)); }
2965 break;
2966 case 8:
2967 #line 2613 "upb/json/parser.rl"
2968 { CHECK_RETURN_TOP(escape(parser, p)); }
2969 break;
2970 case 9:
2971 #line 2619 "upb/json/parser.rl"
2972 { p--; {cs = stack[--top]; goto _again;} }
2973 break;
2974 case 10:
2975 #line 2624 "upb/json/parser.rl"
2976 { start_year(parser, p); }
2977 break;
2978 case 11:
2979 #line 2625 "upb/json/parser.rl"
2980 { CHECK_RETURN_TOP(end_year(parser, p)); }
2981 break;
2982 case 12:
2983 #line 2629 "upb/json/parser.rl"
2984 { start_month(parser, p); }
2985 break;
2986 case 13:
2987 #line 2630 "upb/json/parser.rl"
2988 { CHECK_RETURN_TOP(end_month(parser, p)); }
2989 break;
2990 case 14:
2991 #line 2634 "upb/json/parser.rl"
2992 { start_day(parser, p); }
2993 break;
2994 case 15:
2995 #line 2635 "upb/json/parser.rl"
2996 { CHECK_RETURN_TOP(end_day(parser, p)); }
2997 break;
2998 case 16:
2999 #line 2639 "upb/json/parser.rl"
3000 { start_hour(parser, p); }
3001 break;
3002 case 17:
3003 #line 2640 "upb/json/parser.rl"
3004 { CHECK_RETURN_TOP(end_hour(parser, p)); }
3005 break;
3006 case 18:
3007 #line 2644 "upb/json/parser.rl"
3008 { start_minute(parser, p); }
3009 break;
3010 case 19:
3011 #line 2645 "upb/json/parser.rl"
3012 { CHECK_RETURN_TOP(end_minute(parser, p)); }
3013 break;
3014 case 20:
3015 #line 2649 "upb/json/parser.rl"
3016 { start_second(parser, p); }
3017 break;
3018 case 21:
3019 #line 2650 "upb/json/parser.rl"
3020 { CHECK_RETURN_TOP(end_second(parser, p)); }
3021 break;
3022 case 22:
3023 #line 2655 "upb/json/parser.rl"
3024 { start_duration_base(parser, p); }
3025 break;
3026 case 23:
3027 #line 2656 "upb/json/parser.rl"
3028 { CHECK_RETURN_TOP(end_duration_base(parser, p)); }
3029 break;
3030 case 24:
3031 #line 2658 "upb/json/parser.rl"
3032 { p--; {cs = stack[--top]; goto _again;} }
3033 break;
3034 case 25:
3035 #line 2663 "upb/json/parser.rl"
3036 { start_timestamp_base(parser); }
3037 break;
3038 case 26:
3039 #line 2665 "upb/json/parser.rl"
3040 { start_timestamp_fraction(parser, p); }
3041 break;
3042 case 27:
3043 #line 2666 "upb/json/parser.rl"
3044 { CHECK_RETURN_TOP(end_timestamp_fraction(parser, p)); }
3045 break;
3046 case 28:
3047 #line 2668 "upb/json/parser.rl"
3048 { start_timestamp_zone(parser, p); }
3049 break;
3050 case 29:
3051 #line 2669 "upb/json/parser.rl"
3052 { CHECK_RETURN_TOP(end_timestamp_zone(parser, p)); }
3053 break;
3054 case 30:
3055 #line 2671 "upb/json/parser.rl"
3056 { p--; {cs = stack[--top]; goto _again;} }
3057 break;
3058 case 31:
3059 #line 2676 "upb/json/parser.rl"
3060 { start_fieldmask_path_text(parser, p); }
3061 break;
3062 case 32:
3063 #line 2677 "upb/json/parser.rl"
3064 { end_fieldmask_path_text(parser, p); }
3065 break;
3066 case 33:
3067 #line 2682 "upb/json/parser.rl"
3068 { start_fieldmask_path(parser); }
3069 break;
3070 case 34:
3071 #line 2683 "upb/json/parser.rl"
3072 { end_fieldmask_path(parser); }
3073 break;
3074 case 35:
3075 #line 2689 "upb/json/parser.rl"
3076 { p--; {cs = stack[--top]; goto _again;} }
3077 break;
3078 case 36:
3079 #line 2694 "upb/json/parser.rl"
3080 {
3081 if (is_wellknown_msg(parser, UPB_WELLKNOWN_TIMESTAMP)) {
3082 {stack[top++] = cs; cs = 47;goto _again;}
3083 } else if (is_wellknown_msg(parser, UPB_WELLKNOWN_DURATION)) {
3084 {stack[top++] = cs; cs = 40;goto _again;}
3085 } else if (is_wellknown_msg(parser, UPB_WELLKNOWN_FIELDMASK)) {
3086 {stack[top++] = cs; cs = 75;goto _again;}
3087 } else {
3088 {stack[top++] = cs; cs = 32;goto _again;}
3089 }
3090 }
3091 break;
3092 case 37:
3093 #line 2707 "upb/json/parser.rl"
3094 { p--; {stack[top++] = cs; cs = 78;goto _again;} }
3095 break;
3096 case 38:
3097 #line 2712 "upb/json/parser.rl"
3098 {
3099 if (is_wellknown_msg(parser, UPB_WELLKNOWN_ANY)) {
3100 start_any_member(parser, p);
3101 } else {
3102 start_member(parser);
3103 }
3104 }
3105 break;
3106 case 39:
3107 #line 2719 "upb/json/parser.rl"
3108 { CHECK_RETURN_TOP(end_membername(parser)); }
3109 break;
3110 case 40:
3111 #line 2722 "upb/json/parser.rl"
3112 {
3113 if (is_wellknown_msg(parser, UPB_WELLKNOWN_ANY)) {
3114 end_any_member(parser, p);
3115 } else {
3116 end_member(parser);
3117 }
3118 }
3119 break;
3120 case 41:
3121 #line 2733 "upb/json/parser.rl"
3122 {
3123 if (is_wellknown_msg(parser, UPB_WELLKNOWN_ANY)) {
3124 start_any_object(parser, p);
3125 } else {
3126 start_object(parser);
3127 }
3128 }
3129 break;
3130 case 42:
3131 #line 2742 "upb/json/parser.rl"
3132 {
3133 if (is_wellknown_msg(parser, UPB_WELLKNOWN_ANY)) {
3134 CHECK_RETURN_TOP(end_any_object(parser, p));
3135 } else {
3136 end_object(parser);
3137 }
3138 }
3139 break;
3140 case 43:
3141 #line 2754 "upb/json/parser.rl"
3142 { CHECK_RETURN_TOP(start_array(parser)); }
3143 break;
3144 case 44:
3145 #line 2758 "upb/json/parser.rl"
3146 { end_array(parser); }
3147 break;
3148 case 45:
3149 #line 2763 "upb/json/parser.rl"
3150 { CHECK_RETURN_TOP(start_number(parser, p)); }
3151 break;
3152 case 46:
3153 #line 2764 "upb/json/parser.rl"
3154 { CHECK_RETURN_TOP(end_number(parser, p)); }
3155 break;
3156 case 47:
3157 #line 2766 "upb/json/parser.rl"
3158 { CHECK_RETURN_TOP(start_stringval(parser)); }
3159 break;
3160 case 48:
3161 #line 2767 "upb/json/parser.rl"
3162 { CHECK_RETURN_TOP(end_stringval(parser)); }
3163 break;
3164 case 49:
3165 #line 2769 "upb/json/parser.rl"
3166 { CHECK_RETURN_TOP(end_bool(parser, true)); }
3167 break;
3168 case 50:
3169 #line 2771 "upb/json/parser.rl"
3170 { CHECK_RETURN_TOP(end_bool(parser, false)); }
3171 break;
3172 case 51:
3173 #line 2773 "upb/json/parser.rl"
3174 { CHECK_RETURN_TOP(end_null(parser)); }
3175 break;
3176 case 52:
3177 #line 2775 "upb/json/parser.rl"
3178 { CHECK_RETURN_TOP(start_subobject_full(parser)); }
3179 break;
3180 case 53:
3181 #line 2776 "upb/json/parser.rl"
3182 { end_subobject_full(parser); }
3183 break;
3184 case 54:
3185 #line 2781 "upb/json/parser.rl"
3186 { p--; {cs = stack[--top]; goto _again;} }
3187 break;
3188 #line 3189 "upb/json/parser.c"
3189 }
3190 }
3191
3192 _again:
3193 if ( cs == 0 )
3194 goto _out;
3195 if ( ++p != pe )
3196 goto _resume;
3197 _test_eof: {}
3198 if ( p == eof )
3199 {
3200 const char *__acts = _json_actions + _json_eof_actions[cs];
3201 unsigned int __nacts = (unsigned int) *__acts++;
3202 while ( __nacts-- > 0 ) {
3203 switch ( *__acts++ ) {
3204 case 0:
3205 #line 2590 "upb/json/parser.rl"
3206 { p--; {cs = stack[--top]; if ( p == pe )
3207 goto _test_eof;
3208 goto _again;} }
3209 break;
3210 case 46:
3211 #line 2764 "upb/json/parser.rl"
3212 { CHECK_RETURN_TOP(end_number(parser, p)); }
3213 break;
3214 case 49:
3215 #line 2769 "upb/json/parser.rl"
3216 { CHECK_RETURN_TOP(end_bool(parser, true)); }
3217 break;
3218 case 50:
3219 #line 2771 "upb/json/parser.rl"
3220 { CHECK_RETURN_TOP(end_bool(parser, false)); }
3221 break;
3222 case 51:
3223 #line 2773 "upb/json/parser.rl"
3224 { CHECK_RETURN_TOP(end_null(parser)); }
3225 break;
3226 case 53:
3227 #line 2776 "upb/json/parser.rl"
3228 { end_subobject_full(parser); }
3229 break;
3230 #line 3231 "upb/json/parser.c"
3231 }
3232 }
3233 }
3234
3235 _out: {}
3236 }
3237
3238 #line 2809 "upb/json/parser.rl"
3239
3240 if (p != pe) {
3241 upb_status_seterrf(parser->status, "Parse error at '%.*s'\n", pe - p, p);
3242 } else {
3243 capture_suspend(parser, &p);
3244 }
3245
3246 error:
3247 /* Save parsing state back to parser. */
3248 parser->current_state = cs;
3249 parser->parser_top = top;
3250
3251 return p - buf;
3252 }
3253
end(void * closure,const void * hd)3254 static bool end(void *closure, const void *hd) {
3255 upb_json_parser *parser = closure;
3256
3257 /* Prevent compile warning on unused static constants. */
3258 UPB_UNUSED(json_start);
3259 UPB_UNUSED(json_en_duration_machine);
3260 UPB_UNUSED(json_en_fieldmask_machine);
3261 UPB_UNUSED(json_en_number_machine);
3262 UPB_UNUSED(json_en_string_machine);
3263 UPB_UNUSED(json_en_timestamp_machine);
3264 UPB_UNUSED(json_en_value_machine);
3265 UPB_UNUSED(json_en_main);
3266
3267 parse(parser, hd, &eof_ch, 0, NULL);
3268
3269 return parser->current_state >= 106;
3270 }
3271
json_parser_reset(upb_json_parser * p)3272 static void json_parser_reset(upb_json_parser *p) {
3273 int cs;
3274 int top;
3275
3276 p->top = p->stack;
3277 init_frame(p->top);
3278
3279 /* Emit Ragel initialization of the parser. */
3280
3281 #line 3282 "upb/json/parser.c"
3282 {
3283 cs = json_start;
3284 top = 0;
3285 }
3286
3287 #line 2851 "upb/json/parser.rl"
3288 p->current_state = cs;
3289 p->parser_top = top;
3290 accumulate_clear(p);
3291 p->multipart_state = MULTIPART_INACTIVE;
3292 p->capture = NULL;
3293 p->accumulated = NULL;
3294 }
3295
parsermethod_new(upb_json_codecache * c,const upb_msgdef * md)3296 static upb_json_parsermethod *parsermethod_new(upb_json_codecache *c,
3297 const upb_msgdef *md) {
3298 int i, n;
3299 upb_alloc *alloc = upb_arena_alloc(c->arena);
3300
3301 upb_json_parsermethod *m = upb_malloc(alloc, sizeof(*m));
3302
3303 m->cache = c;
3304
3305 upb_byteshandler_init(&m->input_handler_);
3306 upb_byteshandler_setstring(&m->input_handler_, parse, m);
3307 upb_byteshandler_setendstr(&m->input_handler_, end, m);
3308
3309 upb_strtable_init2(&m->name_table, UPB_CTYPE_CONSTPTR, 4, alloc);
3310
3311 /* Build name_table */
3312
3313 n = upb_msgdef_fieldcount(md);
3314 for(i = 0; i < n; i++) {
3315 const upb_fielddef *f = upb_msgdef_field(md, i);
3316 upb_value v = upb_value_constptr(f);
3317 const char *name;
3318
3319 /* Add an entry for the JSON name. */
3320 name = upb_fielddef_jsonname(f);
3321 upb_strtable_insert3(&m->name_table, name, strlen(name), v, alloc);
3322
3323 if (strcmp(name, upb_fielddef_name(f)) != 0) {
3324 /* Since the JSON name is different from the regular field name, add an
3325 * entry for the raw name (compliant proto3 JSON parsers must accept
3326 * both). */
3327 const char *name = upb_fielddef_name(f);
3328 upb_strtable_insert3(&m->name_table, name, strlen(name), v, alloc);
3329 }
3330 }
3331
3332 return m;
3333 }
3334
3335 /* Public API *****************************************************************/
3336
upb_json_parser_create(upb_arena * arena,const upb_json_parsermethod * method,const upb_symtab * symtab,upb_sink output,upb_status * status,bool ignore_json_unknown)3337 upb_json_parser *upb_json_parser_create(upb_arena *arena,
3338 const upb_json_parsermethod *method,
3339 const upb_symtab* symtab,
3340 upb_sink output,
3341 upb_status *status,
3342 bool ignore_json_unknown) {
3343 upb_json_parser *p = upb_arena_malloc(arena, sizeof(upb_json_parser));
3344 if (!p) return false;
3345
3346 p->arena = arena;
3347 p->method = method;
3348 p->status = status;
3349 p->limit = p->stack + UPB_JSON_MAX_DEPTH;
3350 p->accumulate_buf = NULL;
3351 p->accumulate_buf_size = 0;
3352 upb_bytessink_reset(&p->input_, &method->input_handler_, p);
3353
3354 json_parser_reset(p);
3355 p->top->sink = output;
3356 p->top->m = upb_handlers_msgdef(output.handlers);
3357 if (is_wellknown_msg(p, UPB_WELLKNOWN_ANY)) {
3358 p->top->is_any = true;
3359 p->top->any_frame = json_parser_any_frame_new(p);
3360 } else {
3361 p->top->is_any = false;
3362 p->top->any_frame = NULL;
3363 }
3364 set_name_table(p, p->top);
3365 p->symtab = symtab;
3366
3367 p->ignore_json_unknown = ignore_json_unknown;
3368
3369 return p;
3370 }
3371
upb_json_parser_input(upb_json_parser * p)3372 upb_bytessink upb_json_parser_input(upb_json_parser *p) {
3373 return p->input_;
3374 }
3375
upb_json_parsermethod_inputhandler(const upb_json_parsermethod * m)3376 const upb_byteshandler *upb_json_parsermethod_inputhandler(
3377 const upb_json_parsermethod *m) {
3378 return &m->input_handler_;
3379 }
3380
upb_json_codecache_new(void)3381 upb_json_codecache *upb_json_codecache_new(void) {
3382 upb_alloc *alloc;
3383 upb_json_codecache *c;
3384
3385 c = upb_gmalloc(sizeof(*c));
3386
3387 c->arena = upb_arena_new();
3388 alloc = upb_arena_alloc(c->arena);
3389
3390 upb_inttable_init2(&c->methods, UPB_CTYPE_CONSTPTR, alloc);
3391
3392 return c;
3393 }
3394
upb_json_codecache_free(upb_json_codecache * c)3395 void upb_json_codecache_free(upb_json_codecache *c) {
3396 upb_arena_free(c->arena);
3397 upb_gfree(c);
3398 }
3399
upb_json_codecache_get(upb_json_codecache * c,const upb_msgdef * md)3400 const upb_json_parsermethod *upb_json_codecache_get(upb_json_codecache *c,
3401 const upb_msgdef *md) {
3402 upb_json_parsermethod *m;
3403 upb_value v;
3404 int i, n;
3405 upb_alloc *alloc = upb_arena_alloc(c->arena);
3406
3407 if (upb_inttable_lookupptr(&c->methods, md, &v)) {
3408 return upb_value_getconstptr(v);
3409 }
3410
3411 m = parsermethod_new(c, md);
3412 v = upb_value_constptr(m);
3413
3414 if (!m) return NULL;
3415 if (!upb_inttable_insertptr2(&c->methods, md, v, alloc)) return NULL;
3416
3417 /* Populate parser methods for all submessages, so the name tables will
3418 * be available during parsing. */
3419 n = upb_msgdef_fieldcount(md);
3420 for(i = 0; i < n; i++) {
3421 const upb_fielddef *f = upb_msgdef_field(md, i);
3422
3423 if (upb_fielddef_issubmsg(f)) {
3424 const upb_msgdef *subdef = upb_fielddef_msgsubdef(f);
3425 const upb_json_parsermethod *sub_method =
3426 upb_json_codecache_get(c, subdef);
3427
3428 if (!sub_method) return NULL;
3429 }
3430 }
3431
3432 return m;
3433 }
3434