1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2023 Google LLC. All rights reserved.
3 //
4 // Use of this source code is governed by a BSD-style
5 // license that can be found in the LICENSE file or at
6 // https://developers.google.com/open-source/licenses/bsd
7
8 #include "upb/json/decode.h"
9
10 #include <errno.h>
11 #include <float.h>
12 #include <inttypes.h>
13 #include <limits.h>
14 #include <math.h>
15 #include <setjmp.h>
16 #include <stdarg.h>
17 #include <stddef.h>
18 #include <stdint.h>
19 #include <stdlib.h>
20 #include <string.h>
21
22 #include "upb/base/descriptor_constants.h"
23 #include "upb/base/status.h"
24 #include "upb/base/string_view.h"
25 #include "upb/lex/atoi.h"
26 #include "upb/lex/unicode.h"
27 #include "upb/mem/arena.h"
28 #include "upb/message/array.h"
29 #include "upb/message/map.h"
30 #include "upb/message/message.h"
31 #include "upb/mini_table/message.h"
32 #include "upb/reflection/def.h"
33 #include "upb/reflection/message.h"
34 #include "upb/wire/encode.h"
35
36 // Must be last.
37 #include "upb/port/def.inc"
38
39 typedef struct {
40 const char *ptr, *end;
41 upb_Arena* arena; /* TODO: should we have a tmp arena for tmp data? */
42 const upb_DefPool* symtab;
43 int depth;
44 int result;
45 upb_Status* status;
46 jmp_buf err;
47 int line;
48 const char* line_begin;
49 bool is_first;
50 int options;
51 const upb_FieldDef* debug_field;
52 } jsondec;
53
54 typedef struct {
55 upb_MessageValue value;
56 bool ignore;
57 } upb_JsonMessageValue;
58
59 enum { JD_OBJECT, JD_ARRAY, JD_STRING, JD_NUMBER, JD_TRUE, JD_FALSE, JD_NULL };
60
61 /* Forward declarations of mutually-recursive functions. */
62 static void jsondec_wellknown(jsondec* d, upb_Message* msg,
63 const upb_MessageDef* m);
64 static upb_JsonMessageValue jsondec_value(jsondec* d, const upb_FieldDef* f);
65 static void jsondec_wellknownvalue(jsondec* d, upb_Message* msg,
66 const upb_MessageDef* m);
67 static void jsondec_object(jsondec* d, upb_Message* msg,
68 const upb_MessageDef* m);
69
jsondec_streql(upb_StringView str,const char * lit)70 static bool jsondec_streql(upb_StringView str, const char* lit) {
71 return str.size == strlen(lit) && memcmp(str.data, lit, str.size) == 0;
72 }
73
jsondec_isnullvalue(const upb_FieldDef * f)74 static bool jsondec_isnullvalue(const upb_FieldDef* f) {
75 return upb_FieldDef_CType(f) == kUpb_CType_Enum &&
76 strcmp(upb_EnumDef_FullName(upb_FieldDef_EnumSubDef(f)),
77 "google.protobuf.NullValue") == 0;
78 }
79
jsondec_isvalue(const upb_FieldDef * f)80 static bool jsondec_isvalue(const upb_FieldDef* f) {
81 return (upb_FieldDef_CType(f) == kUpb_CType_Message &&
82 upb_MessageDef_WellKnownType(upb_FieldDef_MessageSubDef(f)) ==
83 kUpb_WellKnown_Value) ||
84 jsondec_isnullvalue(f);
85 }
86
jsondec_seterrmsg(jsondec * d,const char * msg)87 static void jsondec_seterrmsg(jsondec* d, const char* msg) {
88 upb_Status_SetErrorFormat(d->status, "Error parsing JSON @%d:%d: %s", d->line,
89 (int)(d->ptr - d->line_begin), msg);
90 }
91
jsondec_err(jsondec * d,const char * msg)92 UPB_NORETURN static void jsondec_err(jsondec* d, const char* msg) {
93 jsondec_seterrmsg(d, msg);
94 UPB_LONGJMP(d->err, 1);
95 }
96
97 UPB_PRINTF(2, 3)
jsondec_errf(jsondec * d,const char * fmt,...)98 UPB_NORETURN static void jsondec_errf(jsondec* d, const char* fmt, ...) {
99 va_list argp;
100 upb_Status_SetErrorFormat(d->status, "Error parsing JSON @%d:%d: ", d->line,
101 (int)(d->ptr - d->line_begin));
102 va_start(argp, fmt);
103 upb_Status_VAppendErrorFormat(d->status, fmt, argp);
104 va_end(argp);
105 UPB_LONGJMP(d->err, 1);
106 }
107
108 // Advances d->ptr until the next non-whitespace character or to the end of
109 // the buffer.
jsondec_consumews(jsondec * d)110 static void jsondec_consumews(jsondec* d) {
111 while (d->ptr != d->end) {
112 switch (*d->ptr) {
113 case '\n':
114 d->line++;
115 d->line_begin = d->ptr;
116 /* Fallthrough. */
117 case '\r':
118 case '\t':
119 case ' ':
120 d->ptr++;
121 break;
122 default:
123 return;
124 }
125 }
126 }
127
128 // Advances d->ptr until the next non-whitespace character. Postcondition that
129 // d->ptr is pointing at a valid non-whitespace character (will err if end of
130 // buffer is reached).
jsondec_skipws(jsondec * d)131 static void jsondec_skipws(jsondec* d) {
132 jsondec_consumews(d);
133 if (d->ptr == d->end) {
134 jsondec_err(d, "Unexpected EOF");
135 }
136 }
137
jsondec_tryparsech(jsondec * d,char ch)138 static bool jsondec_tryparsech(jsondec* d, char ch) {
139 if (d->ptr == d->end || *d->ptr != ch) return false;
140 d->ptr++;
141 return true;
142 }
143
jsondec_parselit(jsondec * d,const char * lit)144 static void jsondec_parselit(jsondec* d, const char* lit) {
145 size_t avail = d->end - d->ptr;
146 size_t len = strlen(lit);
147 if (avail < len || memcmp(d->ptr, lit, len) != 0) {
148 jsondec_errf(d, "Expected: '%s'", lit);
149 }
150 d->ptr += len;
151 }
152
jsondec_wsch(jsondec * d,char ch)153 static void jsondec_wsch(jsondec* d, char ch) {
154 jsondec_skipws(d);
155 if (!jsondec_tryparsech(d, ch)) {
156 jsondec_errf(d, "Expected: '%c'", ch);
157 }
158 }
159
jsondec_true(jsondec * d)160 static void jsondec_true(jsondec* d) { jsondec_parselit(d, "true"); }
jsondec_false(jsondec * d)161 static void jsondec_false(jsondec* d) { jsondec_parselit(d, "false"); }
jsondec_null(jsondec * d)162 static void jsondec_null(jsondec* d) { jsondec_parselit(d, "null"); }
163
jsondec_entrysep(jsondec * d)164 static void jsondec_entrysep(jsondec* d) {
165 jsondec_skipws(d);
166 jsondec_parselit(d, ":");
167 }
168
jsondec_rawpeek(jsondec * d)169 static int jsondec_rawpeek(jsondec* d) {
170 if (d->ptr == d->end) {
171 jsondec_err(d, "Unexpected EOF");
172 }
173
174 switch (*d->ptr) {
175 case '{':
176 return JD_OBJECT;
177 case '[':
178 return JD_ARRAY;
179 case '"':
180 return JD_STRING;
181 case '-':
182 case '0':
183 case '1':
184 case '2':
185 case '3':
186 case '4':
187 case '5':
188 case '6':
189 case '7':
190 case '8':
191 case '9':
192 return JD_NUMBER;
193 case 't':
194 return JD_TRUE;
195 case 'f':
196 return JD_FALSE;
197 case 'n':
198 return JD_NULL;
199 default:
200 jsondec_errf(d, "Unexpected character: '%c'", *d->ptr);
201 }
202 }
203
204 /* JSON object/array **********************************************************/
205
206 /* These are used like so:
207 *
208 * jsondec_objstart(d);
209 * while (jsondec_objnext(d)) {
210 * ...
211 * }
212 * jsondec_objend(d) */
213
jsondec_peek(jsondec * d)214 static int jsondec_peek(jsondec* d) {
215 jsondec_skipws(d);
216 return jsondec_rawpeek(d);
217 }
218
jsondec_push(jsondec * d)219 static void jsondec_push(jsondec* d) {
220 if (--d->depth < 0) {
221 jsondec_err(d, "Recursion limit exceeded");
222 }
223 d->is_first = true;
224 }
225
jsondec_seqnext(jsondec * d,char end_ch)226 static bool jsondec_seqnext(jsondec* d, char end_ch) {
227 bool is_first = d->is_first;
228 d->is_first = false;
229 jsondec_skipws(d);
230 if (*d->ptr == end_ch) return false;
231 if (!is_first) jsondec_parselit(d, ",");
232 return true;
233 }
234
jsondec_arrstart(jsondec * d)235 static void jsondec_arrstart(jsondec* d) {
236 jsondec_push(d);
237 jsondec_wsch(d, '[');
238 }
239
jsondec_arrend(jsondec * d)240 static void jsondec_arrend(jsondec* d) {
241 d->depth++;
242 jsondec_wsch(d, ']');
243 }
244
jsondec_arrnext(jsondec * d)245 static bool jsondec_arrnext(jsondec* d) { return jsondec_seqnext(d, ']'); }
246
jsondec_objstart(jsondec * d)247 static void jsondec_objstart(jsondec* d) {
248 jsondec_push(d);
249 jsondec_wsch(d, '{');
250 }
251
jsondec_objend(jsondec * d)252 static void jsondec_objend(jsondec* d) {
253 d->depth++;
254 jsondec_wsch(d, '}');
255 }
256
jsondec_objnext(jsondec * d)257 static bool jsondec_objnext(jsondec* d) {
258 if (!jsondec_seqnext(d, '}')) return false;
259 if (jsondec_peek(d) != JD_STRING) {
260 jsondec_err(d, "Object must start with string");
261 }
262 return true;
263 }
264
265 /* JSON number ****************************************************************/
266
jsondec_tryskipdigits(jsondec * d)267 static bool jsondec_tryskipdigits(jsondec* d) {
268 const char* start = d->ptr;
269
270 while (d->ptr < d->end) {
271 if (*d->ptr < '0' || *d->ptr > '9') {
272 break;
273 }
274 d->ptr++;
275 }
276
277 return d->ptr != start;
278 }
279
jsondec_skipdigits(jsondec * d)280 static void jsondec_skipdigits(jsondec* d) {
281 if (!jsondec_tryskipdigits(d)) {
282 jsondec_err(d, "Expected one or more digits");
283 }
284 }
285
jsondec_number(jsondec * d)286 static double jsondec_number(jsondec* d) {
287 const char* start = d->ptr;
288
289 UPB_ASSERT(jsondec_rawpeek(d) == JD_NUMBER);
290
291 /* Skip over the syntax of a number, as specified by JSON. */
292 if (*d->ptr == '-') d->ptr++;
293
294 if (jsondec_tryparsech(d, '0')) {
295 if (jsondec_tryskipdigits(d)) {
296 jsondec_err(d, "number cannot have leading zero");
297 }
298 } else {
299 jsondec_skipdigits(d);
300 }
301
302 if (d->ptr == d->end) goto parse;
303 if (jsondec_tryparsech(d, '.')) {
304 jsondec_skipdigits(d);
305 }
306 if (d->ptr == d->end) goto parse;
307
308 if (*d->ptr == 'e' || *d->ptr == 'E') {
309 d->ptr++;
310 if (d->ptr == d->end) {
311 jsondec_err(d, "Unexpected EOF in number");
312 }
313 if (*d->ptr == '+' || *d->ptr == '-') {
314 d->ptr++;
315 }
316 jsondec_skipdigits(d);
317 }
318
319 parse:
320 /* Having verified the syntax of a JSON number, use strtod() to parse
321 * (strtod() accepts a superset of JSON syntax). */
322 errno = 0;
323 {
324 // Copy the number into a null-terminated scratch buffer since strtod
325 // expects a null-terminated string.
326 char nullz[64];
327 ptrdiff_t len = d->ptr - start;
328 if (len > (ptrdiff_t)(sizeof(nullz) - 1)) {
329 jsondec_err(d, "excessively long number");
330 }
331 memcpy(nullz, start, len);
332 nullz[len] = '\0';
333
334 char* end;
335 double val = strtod(nullz, &end);
336 UPB_ASSERT(end - nullz == len);
337
338 /* Currently the min/max-val conformance tests fail if we check this. Does
339 * this mean the conformance tests are wrong or strtod() is wrong, or
340 * something else? Investigate further. */
341 /*
342 if (errno == ERANGE) {
343 jsondec_err(d, "Number out of range");
344 }
345 */
346
347 if (val > DBL_MAX || val < -DBL_MAX) {
348 jsondec_err(d, "Number out of range");
349 }
350
351 return val;
352 }
353 }
354
355 /* JSON string ****************************************************************/
356
jsondec_escape(jsondec * d)357 static char jsondec_escape(jsondec* d) {
358 switch (*d->ptr++) {
359 case '"':
360 return '\"';
361 case '\\':
362 return '\\';
363 case '/':
364 return '/';
365 case 'b':
366 return '\b';
367 case 'f':
368 return '\f';
369 case 'n':
370 return '\n';
371 case 'r':
372 return '\r';
373 case 't':
374 return '\t';
375 default:
376 jsondec_err(d, "Invalid escape char");
377 }
378 }
379
jsondec_codepoint(jsondec * d)380 static uint32_t jsondec_codepoint(jsondec* d) {
381 uint32_t cp = 0;
382 const char* end;
383
384 if (d->end - d->ptr < 4) {
385 jsondec_err(d, "EOF inside string");
386 }
387
388 end = d->ptr + 4;
389 while (d->ptr < end) {
390 char ch = *d->ptr++;
391 if (ch >= '0' && ch <= '9') {
392 ch -= '0';
393 } else if (ch >= 'a' && ch <= 'f') {
394 ch = ch - 'a' + 10;
395 } else if (ch >= 'A' && ch <= 'F') {
396 ch = ch - 'A' + 10;
397 } else {
398 jsondec_err(d, "Invalid hex digit");
399 }
400 cp = (cp << 4) | ch;
401 }
402
403 return cp;
404 }
405
406 /* Parses a \uXXXX unicode escape (possibly a surrogate pair). */
jsondec_unicode(jsondec * d,char * out)407 static size_t jsondec_unicode(jsondec* d, char* out) {
408 uint32_t cp = jsondec_codepoint(d);
409 if (upb_Unicode_IsHigh(cp)) {
410 /* Surrogate pair: two 16-bit codepoints become a 32-bit codepoint. */
411 jsondec_parselit(d, "\\u");
412 uint32_t low = jsondec_codepoint(d);
413 if (!upb_Unicode_IsLow(low)) jsondec_err(d, "Invalid low surrogate");
414 cp = upb_Unicode_FromPair(cp, low);
415 } else if (upb_Unicode_IsLow(cp)) {
416 jsondec_err(d, "Unpaired low surrogate");
417 }
418
419 /* Write to UTF-8 */
420 int bytes = upb_Unicode_ToUTF8(cp, out);
421 if (bytes == 0) jsondec_err(d, "Invalid codepoint");
422 return bytes;
423 }
424
jsondec_resize(jsondec * d,char ** buf,char ** end,char ** buf_end)425 static void jsondec_resize(jsondec* d, char** buf, char** end, char** buf_end) {
426 size_t oldsize = *buf_end - *buf;
427 size_t len = *end - *buf;
428 size_t size = UPB_MAX(8, 2 * oldsize);
429
430 *buf = upb_Arena_Realloc(d->arena, *buf, len, size);
431 if (!*buf) jsondec_err(d, "Out of memory");
432
433 *end = *buf + len;
434 *buf_end = *buf + size;
435 }
436
jsondec_string(jsondec * d)437 static upb_StringView jsondec_string(jsondec* d) {
438 char* buf = NULL;
439 char* end = NULL;
440 char* buf_end = NULL;
441
442 jsondec_skipws(d);
443
444 if (*d->ptr++ != '"') {
445 jsondec_err(d, "Expected string");
446 }
447
448 while (d->ptr < d->end) {
449 char ch = *d->ptr++;
450
451 if (end == buf_end) {
452 jsondec_resize(d, &buf, &end, &buf_end);
453 }
454
455 switch (ch) {
456 case '"': {
457 upb_StringView ret;
458 ret.data = buf;
459 ret.size = end - buf;
460 *end = '\0'; /* Needed for possible strtod(). */
461 return ret;
462 }
463 case '\\':
464 if (d->ptr == d->end) goto eof;
465 if (*d->ptr == 'u') {
466 d->ptr++;
467 if (buf_end - end < 4) {
468 /* Allow space for maximum-sized codepoint (4 bytes). */
469 jsondec_resize(d, &buf, &end, &buf_end);
470 }
471 end += jsondec_unicode(d, end);
472 } else {
473 *end++ = jsondec_escape(d);
474 }
475 break;
476 default:
477 if ((unsigned char)ch < 0x20) {
478 jsondec_err(d, "Invalid char in JSON string");
479 }
480 *end++ = ch;
481 break;
482 }
483 }
484
485 eof:
486 jsondec_err(d, "EOF inside string");
487 }
488
jsondec_skipval(jsondec * d)489 static void jsondec_skipval(jsondec* d) {
490 switch (jsondec_peek(d)) {
491 case JD_OBJECT:
492 jsondec_objstart(d);
493 while (jsondec_objnext(d)) {
494 jsondec_string(d);
495 jsondec_entrysep(d);
496 jsondec_skipval(d);
497 }
498 jsondec_objend(d);
499 break;
500 case JD_ARRAY:
501 jsondec_arrstart(d);
502 while (jsondec_arrnext(d)) {
503 jsondec_skipval(d);
504 }
505 jsondec_arrend(d);
506 break;
507 case JD_TRUE:
508 jsondec_true(d);
509 break;
510 case JD_FALSE:
511 jsondec_false(d);
512 break;
513 case JD_NULL:
514 jsondec_null(d);
515 break;
516 case JD_STRING:
517 jsondec_string(d);
518 break;
519 case JD_NUMBER:
520 jsondec_number(d);
521 break;
522 }
523 }
524
525 /* Base64 decoding for bytes fields. ******************************************/
526
jsondec_base64_tablelookup(const char ch)527 static unsigned int jsondec_base64_tablelookup(const char ch) {
528 /* Table includes the normal base64 chars plus the URL-safe variant. */
529 const signed char table[256] = {
530 -1, -1, -1, -1, -1, -1, -1,
531 -1, -1, -1, -1, -1, -1, -1,
532 -1, -1, -1, -1, -1, -1, -1,
533 -1, -1, -1, -1, -1, -1, -1,
534 -1, -1, -1, -1, -1, -1, -1,
535 -1, -1, -1, -1, -1, -1, -1,
536 -1, 62 /*+*/, -1, 62 /*-*/, -1, 63 /*/ */, 52 /*0*/,
537 53 /*1*/, 54 /*2*/, 55 /*3*/, 56 /*4*/, 57 /*5*/, 58 /*6*/, 59 /*7*/,
538 60 /*8*/, 61 /*9*/, -1, -1, -1, -1, -1,
539 -1, -1, 0 /*A*/, 1 /*B*/, 2 /*C*/, 3 /*D*/, 4 /*E*/,
540 5 /*F*/, 6 /*G*/, 07 /*H*/, 8 /*I*/, 9 /*J*/, 10 /*K*/, 11 /*L*/,
541 12 /*M*/, 13 /*N*/, 14 /*O*/, 15 /*P*/, 16 /*Q*/, 17 /*R*/, 18 /*S*/,
542 19 /*T*/, 20 /*U*/, 21 /*V*/, 22 /*W*/, 23 /*X*/, 24 /*Y*/, 25 /*Z*/,
543 -1, -1, -1, -1, 63 /*_*/, -1, 26 /*a*/,
544 27 /*b*/, 28 /*c*/, 29 /*d*/, 30 /*e*/, 31 /*f*/, 32 /*g*/, 33 /*h*/,
545 34 /*i*/, 35 /*j*/, 36 /*k*/, 37 /*l*/, 38 /*m*/, 39 /*n*/, 40 /*o*/,
546 41 /*p*/, 42 /*q*/, 43 /*r*/, 44 /*s*/, 45 /*t*/, 46 /*u*/, 47 /*v*/,
547 48 /*w*/, 49 /*x*/, 50 /*y*/, 51 /*z*/, -1, -1, -1,
548 -1, -1, -1, -1, -1, -1, -1,
549 -1, -1, -1, -1, -1, -1, -1,
550 -1, -1, -1, -1, -1, -1, -1,
551 -1, -1, -1, -1, -1, -1, -1,
552 -1, -1, -1, -1, -1, -1, -1,
553 -1, -1, -1, -1, -1, -1, -1,
554 -1, -1, -1, -1, -1, -1, -1,
555 -1, -1, -1, -1, -1, -1, -1,
556 -1, -1, -1, -1, -1, -1, -1,
557 -1, -1, -1, -1, -1, -1, -1,
558 -1, -1, -1, -1, -1, -1, -1,
559 -1, -1, -1, -1, -1, -1, -1,
560 -1, -1, -1, -1, -1, -1, -1,
561 -1, -1, -1, -1, -1, -1, -1,
562 -1, -1, -1, -1, -1, -1, -1,
563 -1, -1, -1, -1, -1, -1, -1,
564 -1, -1, -1, -1, -1, -1, -1,
565 -1, -1, -1, -1, -1, -1, -1,
566 -1, -1, -1, -1};
567
568 /* Sign-extend return value so high bit will be set on any unexpected char. */
569 return table[(unsigned)ch];
570 }
571
jsondec_partialbase64(jsondec * d,const char * ptr,const char * end,char * out)572 static char* jsondec_partialbase64(jsondec* d, const char* ptr, const char* end,
573 char* out) {
574 int32_t val = -1;
575
576 switch (end - ptr) {
577 case 2:
578 val = jsondec_base64_tablelookup(ptr[0]) << 18 |
579 jsondec_base64_tablelookup(ptr[1]) << 12;
580 out[0] = val >> 16;
581 out += 1;
582 break;
583 case 3:
584 val = jsondec_base64_tablelookup(ptr[0]) << 18 |
585 jsondec_base64_tablelookup(ptr[1]) << 12 |
586 jsondec_base64_tablelookup(ptr[2]) << 6;
587 out[0] = val >> 16;
588 out[1] = (val >> 8) & 0xff;
589 out += 2;
590 break;
591 }
592
593 if (val < 0) {
594 jsondec_err(d, "Corrupt base64");
595 }
596
597 return out;
598 }
599
jsondec_base64(jsondec * d,upb_StringView str)600 static size_t jsondec_base64(jsondec* d, upb_StringView str) {
601 /* We decode in place. This is safe because this is a new buffer (not
602 * aliasing the input) and because base64 decoding shrinks 4 bytes into 3. */
603 char* out = (char*)str.data;
604 const char* ptr = str.data;
605 const char* end = ptr + str.size;
606 const char* end4 = ptr + (str.size & -4); /* Round down to multiple of 4. */
607
608 for (; ptr < end4; ptr += 4, out += 3) {
609 int val = jsondec_base64_tablelookup(ptr[0]) << 18 |
610 jsondec_base64_tablelookup(ptr[1]) << 12 |
611 jsondec_base64_tablelookup(ptr[2]) << 6 |
612 jsondec_base64_tablelookup(ptr[3]) << 0;
613
614 if (val < 0) {
615 /* Junk chars or padding. Remove trailing padding, if any. */
616 if (end - ptr == 4 && ptr[3] == '=') {
617 if (ptr[2] == '=') {
618 end -= 2;
619 } else {
620 end -= 1;
621 }
622 }
623 break;
624 }
625
626 out[0] = val >> 16;
627 out[1] = (val >> 8) & 0xff;
628 out[2] = val & 0xff;
629 }
630
631 if (ptr < end) {
632 /* Process remaining chars. We do not require padding. */
633 out = jsondec_partialbase64(d, ptr, end, out);
634 }
635
636 return out - str.data;
637 }
638
639 /* Low-level integer parsing **************************************************/
640
jsondec_buftouint64(jsondec * d,const char * ptr,const char * end,uint64_t * val)641 static const char* jsondec_buftouint64(jsondec* d, const char* ptr,
642 const char* end, uint64_t* val) {
643 const char* out = upb_BufToUint64(ptr, end, val);
644 if (!out) jsondec_err(d, "Integer overflow");
645 return out;
646 }
647
jsondec_buftoint64(jsondec * d,const char * ptr,const char * end,int64_t * val,bool * is_neg)648 static const char* jsondec_buftoint64(jsondec* d, const char* ptr,
649 const char* end, int64_t* val,
650 bool* is_neg) {
651 const char* out = upb_BufToInt64(ptr, end, val, is_neg);
652 if (!out) jsondec_err(d, "Integer overflow");
653 return out;
654 }
655
jsondec_strtouint64(jsondec * d,upb_StringView str)656 static uint64_t jsondec_strtouint64(jsondec* d, upb_StringView str) {
657 const char* end = str.data + str.size;
658 uint64_t ret;
659 if (jsondec_buftouint64(d, str.data, end, &ret) != end) {
660 jsondec_err(d, "Non-number characters in quoted integer");
661 }
662 return ret;
663 }
664
jsondec_strtoint64(jsondec * d,upb_StringView str)665 static int64_t jsondec_strtoint64(jsondec* d, upb_StringView str) {
666 const char* end = str.data + str.size;
667 int64_t ret;
668 if (jsondec_buftoint64(d, str.data, end, &ret, NULL) != end) {
669 jsondec_err(d, "Non-number characters in quoted integer");
670 }
671 return ret;
672 }
673
jsondec_checkempty(jsondec * d,upb_StringView str,const upb_FieldDef * f)674 static void jsondec_checkempty(jsondec* d, upb_StringView str,
675 const upb_FieldDef* f) {
676 if (str.size != 0) return;
677 d->result = kUpb_JsonDecodeResult_OkWithEmptyStringNumerics;
678 upb_Status_SetErrorFormat(d->status,
679 "Empty string is not a valid number (field: %s). "
680 "This will be an error in a future version.",
681 upb_FieldDef_FullName(f));
682 }
683
684 /* Primitive value types ******************************************************/
685
686 /* Parse INT32 or INT64 value. */
jsondec_int(jsondec * d,const upb_FieldDef * f)687 static upb_MessageValue jsondec_int(jsondec* d, const upb_FieldDef* f) {
688 upb_MessageValue val;
689
690 switch (jsondec_peek(d)) {
691 case JD_NUMBER: {
692 double dbl = jsondec_number(d);
693 if (dbl > 9223372036854774784.0 || dbl < -9223372036854775808.0) {
694 jsondec_err(d, "JSON number is out of range.");
695 }
696 val.int64_val = dbl; /* must be guarded, overflow here is UB */
697 if (val.int64_val != dbl) {
698 jsondec_errf(d, "JSON number was not integral (%f != %" PRId64 ")", dbl,
699 val.int64_val);
700 }
701 break;
702 }
703 case JD_STRING: {
704 upb_StringView str = jsondec_string(d);
705 jsondec_checkempty(d, str, f);
706 val.int64_val = jsondec_strtoint64(d, str);
707 break;
708 }
709 default:
710 jsondec_err(d, "Expected number or string");
711 }
712
713 if (upb_FieldDef_CType(f) == kUpb_CType_Int32 ||
714 upb_FieldDef_CType(f) == kUpb_CType_Enum) {
715 if (val.int64_val > INT32_MAX || val.int64_val < INT32_MIN) {
716 jsondec_err(d, "Integer out of range.");
717 }
718 val.int32_val = (int32_t)val.int64_val;
719 }
720
721 return val;
722 }
723
724 /* Parse UINT32 or UINT64 value. */
jsondec_uint(jsondec * d,const upb_FieldDef * f)725 static upb_MessageValue jsondec_uint(jsondec* d, const upb_FieldDef* f) {
726 upb_MessageValue val;
727
728 switch (jsondec_peek(d)) {
729 case JD_NUMBER: {
730 double dbl = jsondec_number(d);
731 if (dbl > 18446744073709549568.0 || dbl < 0) {
732 jsondec_err(d, "JSON number is out of range.");
733 }
734 val.uint64_val = dbl; /* must be guarded, overflow here is UB */
735 if (val.uint64_val != dbl) {
736 jsondec_errf(d, "JSON number was not integral (%f != %" PRIu64 ")", dbl,
737 val.uint64_val);
738 }
739 break;
740 }
741 case JD_STRING: {
742 upb_StringView str = jsondec_string(d);
743 jsondec_checkempty(d, str, f);
744 val.uint64_val = jsondec_strtouint64(d, str);
745 break;
746 }
747 default:
748 jsondec_err(d, "Expected number or string");
749 }
750
751 if (upb_FieldDef_CType(f) == kUpb_CType_UInt32) {
752 if (val.uint64_val > UINT32_MAX) {
753 jsondec_err(d, "Integer out of range.");
754 }
755 val.uint32_val = (uint32_t)val.uint64_val;
756 }
757
758 return val;
759 }
760
761 /* Parse DOUBLE or FLOAT value. */
jsondec_double(jsondec * d,const upb_FieldDef * f)762 static upb_MessageValue jsondec_double(jsondec* d, const upb_FieldDef* f) {
763 upb_StringView str;
764 upb_MessageValue val;
765
766 switch (jsondec_peek(d)) {
767 case JD_NUMBER:
768 val.double_val = jsondec_number(d);
769 break;
770 case JD_STRING:
771 str = jsondec_string(d);
772 if (str.size == 0) {
773 jsondec_checkempty(d, str, f);
774 val.double_val = 0.0;
775 } else if (jsondec_streql(str, "NaN")) {
776 val.double_val = NAN;
777 } else if (jsondec_streql(str, "Infinity")) {
778 val.double_val = INFINITY;
779 } else if (jsondec_streql(str, "-Infinity")) {
780 val.double_val = -INFINITY;
781 } else {
782 char* end;
783 val.double_val = strtod(str.data, &end);
784 if (end != str.data + str.size) {
785 d->result = kUpb_JsonDecodeResult_OkWithEmptyStringNumerics;
786 upb_Status_SetErrorFormat(
787 d->status,
788 "Non-number characters in quoted number (field: %s). "
789 "This will be an error in a future version.",
790 upb_FieldDef_FullName(f));
791 }
792 }
793 break;
794 default:
795 jsondec_err(d, "Expected number or string");
796 }
797
798 if (upb_FieldDef_CType(f) == kUpb_CType_Float) {
799 float f = val.double_val;
800 if (val.double_val != INFINITY && val.double_val != -INFINITY) {
801 if (f == INFINITY || f == -INFINITY) jsondec_err(d, "Float out of range");
802 }
803 val.float_val = f;
804 }
805
806 return val;
807 }
808
809 /* Parse STRING or BYTES value. */
jsondec_strfield(jsondec * d,const upb_FieldDef * f)810 static upb_MessageValue jsondec_strfield(jsondec* d, const upb_FieldDef* f) {
811 upb_MessageValue val;
812 val.str_val = jsondec_string(d);
813 if (upb_FieldDef_CType(f) == kUpb_CType_Bytes) {
814 val.str_val.size = jsondec_base64(d, val.str_val);
815 }
816 return val;
817 }
818
jsondec_enum(jsondec * d,const upb_FieldDef * f)819 static upb_JsonMessageValue jsondec_enum(jsondec* d, const upb_FieldDef* f) {
820 switch (jsondec_peek(d)) {
821 case JD_STRING: {
822 upb_StringView str = jsondec_string(d);
823 const upb_EnumDef* e = upb_FieldDef_EnumSubDef(f);
824 const upb_EnumValueDef* ev =
825 upb_EnumDef_FindValueByNameWithSize(e, str.data, str.size);
826 upb_JsonMessageValue val = {.ignore = false};
827 if (ev) {
828 val.value.int32_val = upb_EnumValueDef_Number(ev);
829 } else {
830 if (d->options & upb_JsonDecode_IgnoreUnknown) {
831 val.ignore = true;
832 } else {
833 jsondec_errf(d, "Unknown enumerator: '" UPB_STRINGVIEW_FORMAT "'",
834 UPB_STRINGVIEW_ARGS(str));
835 }
836 }
837 return val;
838 }
839 case JD_NULL: {
840 if (jsondec_isnullvalue(f)) {
841 upb_JsonMessageValue val = {.ignore = false};
842 jsondec_null(d);
843 val.value.int32_val = 0;
844 return val;
845 }
846 }
847 /* Fallthrough. */
848 default:
849 return (upb_JsonMessageValue){.value = jsondec_int(d, f),
850 .ignore = false};
851 }
852 }
853
jsondec_bool(jsondec * d,const upb_FieldDef * f)854 static upb_MessageValue jsondec_bool(jsondec* d, const upb_FieldDef* f) {
855 bool is_map_key = upb_FieldDef_Number(f) == 1 &&
856 upb_MessageDef_IsMapEntry(upb_FieldDef_ContainingType(f));
857 upb_MessageValue val;
858
859 if (is_map_key) {
860 upb_StringView str = jsondec_string(d);
861 if (jsondec_streql(str, "true")) {
862 val.bool_val = true;
863 } else if (jsondec_streql(str, "false")) {
864 val.bool_val = false;
865 } else {
866 jsondec_err(d, "Invalid boolean map key");
867 }
868 } else {
869 switch (jsondec_peek(d)) {
870 case JD_TRUE:
871 val.bool_val = true;
872 jsondec_true(d);
873 break;
874 case JD_FALSE:
875 val.bool_val = false;
876 jsondec_false(d);
877 break;
878 default:
879 jsondec_err(d, "Expected true or false");
880 }
881 }
882
883 return val;
884 }
885
886 /* Composite types (array/message/map) ****************************************/
887
jsondec_array(jsondec * d,upb_Message * msg,const upb_FieldDef * f)888 static void jsondec_array(jsondec* d, upb_Message* msg, const upb_FieldDef* f) {
889 UPB_ASSERT(!upb_Message_IsFrozen(msg));
890 upb_Array* arr = upb_Message_Mutable(msg, f, d->arena).array;
891
892 jsondec_arrstart(d);
893 while (jsondec_arrnext(d)) {
894 upb_JsonMessageValue elem = jsondec_value(d, f);
895 if (!elem.ignore) {
896 upb_Array_Append(arr, elem.value, d->arena);
897 }
898 }
899 jsondec_arrend(d);
900 }
901
jsondec_map(jsondec * d,upb_Message * msg,const upb_FieldDef * f)902 static void jsondec_map(jsondec* d, upb_Message* msg, const upb_FieldDef* f) {
903 UPB_ASSERT(!upb_Message_IsFrozen(msg));
904 upb_Map* map = upb_Message_Mutable(msg, f, d->arena).map;
905 const upb_MessageDef* entry = upb_FieldDef_MessageSubDef(f);
906 const upb_FieldDef* key_f = upb_MessageDef_FindFieldByNumber(entry, 1);
907 const upb_FieldDef* val_f = upb_MessageDef_FindFieldByNumber(entry, 2);
908
909 jsondec_objstart(d);
910 while (jsondec_objnext(d)) {
911 upb_JsonMessageValue key, val;
912 key = jsondec_value(d, key_f);
913 UPB_ASSUME(!key.ignore); // Map key cannot be enum.
914 jsondec_entrysep(d);
915 val = jsondec_value(d, val_f);
916 if (!val.ignore) {
917 upb_Map_Set(map, key.value, val.value, d->arena);
918 }
919 }
920 jsondec_objend(d);
921 }
922
jsondec_tomsg(jsondec * d,upb_Message * msg,const upb_MessageDef * m)923 static void jsondec_tomsg(jsondec* d, upb_Message* msg,
924 const upb_MessageDef* m) {
925 UPB_ASSERT(!upb_Message_IsFrozen(msg));
926 if (upb_MessageDef_WellKnownType(m) == kUpb_WellKnown_Unspecified) {
927 jsondec_object(d, msg, m);
928 } else {
929 jsondec_wellknown(d, msg, m);
930 }
931 }
932
jsondec_msg(jsondec * d,const upb_FieldDef * f)933 static upb_MessageValue jsondec_msg(jsondec* d, const upb_FieldDef* f) {
934 const upb_MessageDef* m = upb_FieldDef_MessageSubDef(f);
935 const upb_MiniTable* layout = upb_MessageDef_MiniTable(m);
936 upb_Message* msg = upb_Message_New(layout, d->arena);
937 upb_MessageValue val;
938
939 jsondec_tomsg(d, msg, m);
940 val.msg_val = msg;
941 return val;
942 }
943
jsondec_field(jsondec * d,upb_Message * msg,const upb_MessageDef * m)944 static void jsondec_field(jsondec* d, upb_Message* msg,
945 const upb_MessageDef* m) {
946 UPB_ASSERT(!upb_Message_IsFrozen(msg));
947 upb_StringView name;
948 const upb_FieldDef* f;
949 const upb_FieldDef* preserved;
950
951 name = jsondec_string(d);
952 jsondec_entrysep(d);
953
954 if (name.size >= 2 && name.data[0] == '[' &&
955 name.data[name.size - 1] == ']') {
956 f = upb_DefPool_FindExtensionByNameWithSize(d->symtab, name.data + 1,
957 name.size - 2);
958 if (f && upb_FieldDef_ContainingType(f) != m) {
959 jsondec_errf(
960 d, "Extension %s extends message %s, but was seen in message %s",
961 upb_FieldDef_FullName(f),
962 upb_MessageDef_FullName(upb_FieldDef_ContainingType(f)),
963 upb_MessageDef_FullName(m));
964 }
965 } else {
966 f = upb_MessageDef_FindByJsonNameWithSize(m, name.data, name.size);
967 }
968
969 if (!f) {
970 if ((d->options & upb_JsonDecode_IgnoreUnknown) == 0) {
971 jsondec_errf(d, "No such field: " UPB_STRINGVIEW_FORMAT,
972 UPB_STRINGVIEW_ARGS(name));
973 }
974 jsondec_skipval(d);
975 return;
976 }
977
978 if (jsondec_peek(d) == JD_NULL && !jsondec_isvalue(f)) {
979 /* JSON "null" indicates a default value, so no need to set anything. */
980 jsondec_null(d);
981 return;
982 }
983
984 if (upb_FieldDef_RealContainingOneof(f) &&
985 upb_Message_WhichOneofByDef(msg, upb_FieldDef_ContainingOneof(f))) {
986 jsondec_err(d, "More than one field for this oneof.");
987 }
988
989 preserved = d->debug_field;
990 d->debug_field = f;
991
992 if (upb_FieldDef_IsMap(f)) {
993 jsondec_map(d, msg, f);
994 } else if (upb_FieldDef_IsRepeated(f)) {
995 jsondec_array(d, msg, f);
996 } else if (upb_FieldDef_IsSubMessage(f)) {
997 upb_Message* submsg = upb_Message_Mutable(msg, f, d->arena).msg;
998 const upb_MessageDef* subm = upb_FieldDef_MessageSubDef(f);
999 jsondec_tomsg(d, submsg, subm);
1000 } else {
1001 upb_JsonMessageValue val = jsondec_value(d, f);
1002 if (!val.ignore) {
1003 upb_Message_SetFieldByDef(msg, f, val.value, d->arena);
1004 }
1005 }
1006
1007 d->debug_field = preserved;
1008 }
1009
jsondec_object(jsondec * d,upb_Message * msg,const upb_MessageDef * m)1010 static void jsondec_object(jsondec* d, upb_Message* msg,
1011 const upb_MessageDef* m) {
1012 UPB_ASSERT(!upb_Message_IsFrozen(msg));
1013 jsondec_objstart(d);
1014 while (jsondec_objnext(d)) {
1015 jsondec_field(d, msg, m);
1016 }
1017 jsondec_objend(d);
1018 }
1019
jsondec_nonenum(jsondec * d,const upb_FieldDef * f)1020 static upb_MessageValue jsondec_nonenum(jsondec* d, const upb_FieldDef* f) {
1021 switch (upb_FieldDef_CType(f)) {
1022 case kUpb_CType_Bool:
1023 return jsondec_bool(d, f);
1024 case kUpb_CType_Float:
1025 case kUpb_CType_Double:
1026 return jsondec_double(d, f);
1027 case kUpb_CType_UInt32:
1028 case kUpb_CType_UInt64:
1029 return jsondec_uint(d, f);
1030 case kUpb_CType_Int32:
1031 case kUpb_CType_Int64:
1032 return jsondec_int(d, f);
1033 case kUpb_CType_String:
1034 case kUpb_CType_Bytes:
1035 return jsondec_strfield(d, f);
1036 case kUpb_CType_Message:
1037 return jsondec_msg(d, f);
1038 case kUpb_CType_Enum:
1039 default:
1040 UPB_UNREACHABLE();
1041 }
1042 }
1043
jsondec_value(jsondec * d,const upb_FieldDef * f)1044 static upb_JsonMessageValue jsondec_value(jsondec* d, const upb_FieldDef* f) {
1045 if (upb_FieldDef_CType(f) == kUpb_CType_Enum) {
1046 return jsondec_enum(d, f);
1047 } else {
1048 return (upb_JsonMessageValue){.value = jsondec_nonenum(d, f),
1049 .ignore = false};
1050 }
1051 }
1052
1053 /* Well-known types ***********************************************************/
1054
jsondec_tsdigits(jsondec * d,const char ** ptr,size_t digits,const char * after)1055 static int jsondec_tsdigits(jsondec* d, const char** ptr, size_t digits,
1056 const char* after) {
1057 uint64_t val;
1058 const char* p = *ptr;
1059 const char* end = p + digits;
1060 size_t after_len = after ? strlen(after) : 0;
1061
1062 UPB_ASSERT(digits <= 9); /* int can't overflow. */
1063
1064 if (jsondec_buftouint64(d, p, end, &val) != end ||
1065 (after_len && memcmp(end, after, after_len) != 0)) {
1066 jsondec_err(d, "Malformed timestamp");
1067 }
1068
1069 UPB_ASSERT(val < INT_MAX);
1070
1071 *ptr = end + after_len;
1072 return (int)val;
1073 }
1074
jsondec_nanos(jsondec * d,const char ** ptr,const char * end)1075 static int jsondec_nanos(jsondec* d, const char** ptr, const char* end) {
1076 uint64_t nanos = 0;
1077 const char* p = *ptr;
1078
1079 if (p != end && *p == '.') {
1080 const char* nano_end = jsondec_buftouint64(d, p + 1, end, &nanos);
1081 int digits = (int)(nano_end - p - 1);
1082 int exp_lg10 = 9 - digits;
1083 if (digits > 9) {
1084 jsondec_err(d, "Too many digits for partial seconds");
1085 }
1086 while (exp_lg10--) nanos *= 10;
1087 *ptr = nano_end;
1088 }
1089
1090 UPB_ASSERT(nanos < INT_MAX);
1091
1092 return (int)nanos;
1093 }
1094
1095 /* jsondec_epochdays(1970, 1, 1) == 1970-01-01 == 0. */
jsondec_epochdays(int y,int m,int d)1096 int jsondec_epochdays(int y, int m, int d) {
1097 const uint32_t year_base = 4800; /* Before min year, multiple of 400. */
1098 const uint32_t m_adj = m - 3; /* March-based month. */
1099 const uint32_t carry = m_adj > (uint32_t)m ? 1 : 0;
1100 const uint32_t adjust = carry ? 12 : 0;
1101 const uint32_t y_adj = y + year_base - carry;
1102 const uint32_t month_days = ((m_adj + adjust) * 62719 + 769) / 2048;
1103 const uint32_t leap_days = y_adj / 4 - y_adj / 100 + y_adj / 400;
1104 return y_adj * 365 + leap_days + month_days + (d - 1) - 2472632;
1105 }
1106
jsondec_unixtime(int y,int m,int d,int h,int min,int s)1107 static int64_t jsondec_unixtime(int y, int m, int d, int h, int min, int s) {
1108 return (int64_t)jsondec_epochdays(y, m, d) * 86400 + h * 3600 + min * 60 + s;
1109 }
1110
jsondec_timestamp(jsondec * d,upb_Message * msg,const upb_MessageDef * m)1111 static void jsondec_timestamp(jsondec* d, upb_Message* msg,
1112 const upb_MessageDef* m) {
1113 UPB_ASSERT(!upb_Message_IsFrozen(msg));
1114 upb_MessageValue seconds;
1115 upb_MessageValue nanos;
1116 upb_StringView str = jsondec_string(d);
1117 const char* ptr = str.data;
1118 const char* end = ptr + str.size;
1119
1120 if (str.size < 20) goto malformed;
1121
1122 {
1123 /* 1972-01-01T01:00:00 */
1124 int year = jsondec_tsdigits(d, &ptr, 4, "-");
1125 int mon = jsondec_tsdigits(d, &ptr, 2, "-");
1126 int day = jsondec_tsdigits(d, &ptr, 2, "T");
1127 int hour = jsondec_tsdigits(d, &ptr, 2, ":");
1128 int min = jsondec_tsdigits(d, &ptr, 2, ":");
1129 int sec = jsondec_tsdigits(d, &ptr, 2, NULL);
1130
1131 seconds.int64_val = jsondec_unixtime(year, mon, day, hour, min, sec);
1132 }
1133
1134 nanos.int32_val = jsondec_nanos(d, &ptr, end);
1135
1136 {
1137 /* [+-]08:00 or Z */
1138 int ofs_hour = 0;
1139 int ofs_min = 0;
1140 bool neg = false;
1141
1142 if (ptr == end) goto malformed;
1143
1144 switch (*ptr++) {
1145 case '-':
1146 neg = true;
1147 /* fallthrough */
1148 case '+':
1149 if ((end - ptr) != 5) goto malformed;
1150 ofs_hour = jsondec_tsdigits(d, &ptr, 2, ":");
1151 ofs_min = jsondec_tsdigits(d, &ptr, 2, NULL);
1152 ofs_min = ((ofs_hour * 60) + ofs_min) * 60;
1153 seconds.int64_val += (neg ? ofs_min : -ofs_min);
1154 break;
1155 case 'Z':
1156 if (ptr != end) goto malformed;
1157 break;
1158 default:
1159 goto malformed;
1160 }
1161 }
1162
1163 if (seconds.int64_val < -62135596800) {
1164 jsondec_err(d, "Timestamp out of range");
1165 }
1166
1167 upb_Message_SetFieldByDef(msg, upb_MessageDef_FindFieldByNumber(m, 1),
1168 seconds, d->arena);
1169 upb_Message_SetFieldByDef(msg, upb_MessageDef_FindFieldByNumber(m, 2), nanos,
1170 d->arena);
1171 return;
1172
1173 malformed:
1174 jsondec_err(d, "Malformed timestamp");
1175 }
1176
jsondec_duration(jsondec * d,upb_Message * msg,const upb_MessageDef * m)1177 static void jsondec_duration(jsondec* d, upb_Message* msg,
1178 const upb_MessageDef* m) {
1179 UPB_ASSERT(!upb_Message_IsFrozen(msg));
1180 upb_MessageValue seconds;
1181 upb_MessageValue nanos;
1182 upb_StringView str = jsondec_string(d);
1183 const char* ptr = str.data;
1184 const char* end = ptr + str.size;
1185 const int64_t max = (uint64_t)3652500 * 86400;
1186 bool neg = false;
1187
1188 /* "3.000000001s", "3s", etc. */
1189 ptr = jsondec_buftoint64(d, ptr, end, &seconds.int64_val, &neg);
1190 nanos.int32_val = jsondec_nanos(d, &ptr, end);
1191
1192 if (end - ptr != 1 || *ptr != 's') {
1193 jsondec_err(d, "Malformed duration");
1194 }
1195
1196 if (seconds.int64_val < -max || seconds.int64_val > max) {
1197 jsondec_err(d, "Duration out of range");
1198 }
1199
1200 if (neg) {
1201 nanos.int32_val = -nanos.int32_val;
1202 }
1203
1204 upb_Message_SetFieldByDef(msg, upb_MessageDef_FindFieldByNumber(m, 1),
1205 seconds, d->arena);
1206 upb_Message_SetFieldByDef(msg, upb_MessageDef_FindFieldByNumber(m, 2), nanos,
1207 d->arena);
1208 }
1209
jsondec_listvalue(jsondec * d,upb_Message * msg,const upb_MessageDef * m)1210 static void jsondec_listvalue(jsondec* d, upb_Message* msg,
1211 const upb_MessageDef* m) {
1212 UPB_ASSERT(!upb_Message_IsFrozen(msg));
1213 const upb_FieldDef* values_f = upb_MessageDef_FindFieldByNumber(m, 1);
1214 const upb_MessageDef* value_m = upb_FieldDef_MessageSubDef(values_f);
1215 const upb_MiniTable* value_layout = upb_MessageDef_MiniTable(value_m);
1216 upb_Array* values = upb_Message_Mutable(msg, values_f, d->arena).array;
1217
1218 jsondec_arrstart(d);
1219 while (jsondec_arrnext(d)) {
1220 upb_Message* value_msg = upb_Message_New(value_layout, d->arena);
1221 upb_MessageValue value;
1222 value.msg_val = value_msg;
1223 upb_Array_Append(values, value, d->arena);
1224 jsondec_wellknownvalue(d, value_msg, value_m);
1225 }
1226 jsondec_arrend(d);
1227 }
1228
jsondec_struct(jsondec * d,upb_Message * msg,const upb_MessageDef * m)1229 static void jsondec_struct(jsondec* d, upb_Message* msg,
1230 const upb_MessageDef* m) {
1231 UPB_ASSERT(!upb_Message_IsFrozen(msg));
1232 const upb_FieldDef* fields_f = upb_MessageDef_FindFieldByNumber(m, 1);
1233 const upb_MessageDef* entry_m = upb_FieldDef_MessageSubDef(fields_f);
1234 const upb_FieldDef* value_f = upb_MessageDef_FindFieldByNumber(entry_m, 2);
1235 const upb_MessageDef* value_m = upb_FieldDef_MessageSubDef(value_f);
1236 const upb_MiniTable* value_layout = upb_MessageDef_MiniTable(value_m);
1237 upb_Map* fields = upb_Message_Mutable(msg, fields_f, d->arena).map;
1238
1239 jsondec_objstart(d);
1240 while (jsondec_objnext(d)) {
1241 upb_MessageValue key, value;
1242 upb_Message* value_msg = upb_Message_New(value_layout, d->arena);
1243 key.str_val = jsondec_string(d);
1244 value.msg_val = value_msg;
1245 upb_Map_Set(fields, key, value, d->arena);
1246 jsondec_entrysep(d);
1247 jsondec_wellknownvalue(d, value_msg, value_m);
1248 }
1249 jsondec_objend(d);
1250 }
1251
jsondec_wellknownvalue(jsondec * d,upb_Message * msg,const upb_MessageDef * m)1252 static void jsondec_wellknownvalue(jsondec* d, upb_Message* msg,
1253 const upb_MessageDef* m) {
1254 UPB_ASSERT(!upb_Message_IsFrozen(msg));
1255 upb_MessageValue val;
1256 const upb_FieldDef* f;
1257 upb_Message* submsg;
1258
1259 switch (jsondec_peek(d)) {
1260 case JD_NUMBER:
1261 /* double number_value = 2; */
1262 f = upb_MessageDef_FindFieldByNumber(m, 2);
1263 val.double_val = jsondec_number(d);
1264 break;
1265 case JD_STRING:
1266 /* string string_value = 3; */
1267 f = upb_MessageDef_FindFieldByNumber(m, 3);
1268 val.str_val = jsondec_string(d);
1269 break;
1270 case JD_FALSE:
1271 /* bool bool_value = 4; */
1272 f = upb_MessageDef_FindFieldByNumber(m, 4);
1273 val.bool_val = false;
1274 jsondec_false(d);
1275 break;
1276 case JD_TRUE:
1277 /* bool bool_value = 4; */
1278 f = upb_MessageDef_FindFieldByNumber(m, 4);
1279 val.bool_val = true;
1280 jsondec_true(d);
1281 break;
1282 case JD_NULL:
1283 /* NullValue null_value = 1; */
1284 f = upb_MessageDef_FindFieldByNumber(m, 1);
1285 val.int32_val = 0;
1286 jsondec_null(d);
1287 break;
1288 /* Note: these cases return, because upb_Message_Mutable() is enough. */
1289 case JD_OBJECT:
1290 /* Struct struct_value = 5; */
1291 f = upb_MessageDef_FindFieldByNumber(m, 5);
1292 submsg = upb_Message_Mutable(msg, f, d->arena).msg;
1293 jsondec_struct(d, submsg, upb_FieldDef_MessageSubDef(f));
1294 return;
1295 case JD_ARRAY:
1296 /* ListValue list_value = 6; */
1297 f = upb_MessageDef_FindFieldByNumber(m, 6);
1298 submsg = upb_Message_Mutable(msg, f, d->arena).msg;
1299 jsondec_listvalue(d, submsg, upb_FieldDef_MessageSubDef(f));
1300 return;
1301 default:
1302 UPB_UNREACHABLE();
1303 }
1304
1305 upb_Message_SetFieldByDef(msg, f, val, d->arena);
1306 }
1307
jsondec_mask(jsondec * d,const char * buf,const char * end)1308 static upb_StringView jsondec_mask(jsondec* d, const char* buf,
1309 const char* end) {
1310 /* FieldMask fields grow due to inserted '_' characters, so we can't do the
1311 * transform in place. */
1312 const char* ptr = buf;
1313 upb_StringView ret;
1314 char* out;
1315
1316 ret.size = end - ptr;
1317 while (ptr < end) {
1318 ret.size += (*ptr >= 'A' && *ptr <= 'Z');
1319 ptr++;
1320 }
1321
1322 out = upb_Arena_Malloc(d->arena, ret.size);
1323 ptr = buf;
1324 ret.data = out;
1325
1326 while (ptr < end) {
1327 char ch = *ptr++;
1328 if (ch >= 'A' && ch <= 'Z') {
1329 *out++ = '_';
1330 *out++ = ch + 32;
1331 } else if (ch == '_') {
1332 jsondec_err(d, "field mask may not contain '_'");
1333 } else {
1334 *out++ = ch;
1335 }
1336 }
1337
1338 return ret;
1339 }
1340
jsondec_fieldmask(jsondec * d,upb_Message * msg,const upb_MessageDef * m)1341 static void jsondec_fieldmask(jsondec* d, upb_Message* msg,
1342 const upb_MessageDef* m) {
1343 UPB_ASSERT(!upb_Message_IsFrozen(msg));
1344 /* repeated string paths = 1; */
1345 const upb_FieldDef* paths_f = upb_MessageDef_FindFieldByNumber(m, 1);
1346 upb_Array* arr = upb_Message_Mutable(msg, paths_f, d->arena).array;
1347 upb_StringView str = jsondec_string(d);
1348 const char* ptr = str.data;
1349 const char* end = ptr + str.size;
1350 upb_MessageValue val;
1351
1352 while (ptr < end) {
1353 const char* elem_end = memchr(ptr, ',', end - ptr);
1354 if (elem_end) {
1355 val.str_val = jsondec_mask(d, ptr, elem_end);
1356 ptr = elem_end + 1;
1357 } else {
1358 val.str_val = jsondec_mask(d, ptr, end);
1359 ptr = end;
1360 }
1361 upb_Array_Append(arr, val, d->arena);
1362 }
1363 }
1364
jsondec_anyfield(jsondec * d,upb_Message * msg,const upb_MessageDef * m)1365 static void jsondec_anyfield(jsondec* d, upb_Message* msg,
1366 const upb_MessageDef* m) {
1367 UPB_ASSERT(!upb_Message_IsFrozen(msg));
1368 if (upb_MessageDef_WellKnownType(m) == kUpb_WellKnown_Unspecified) {
1369 /* For regular types: {"@type": "[user type]", "f1": <V1>, "f2": <V2>}
1370 * where f1, f2, etc. are the normal fields of this type. */
1371 jsondec_field(d, msg, m);
1372 } else {
1373 /* For well-known types: {"@type": "[well-known type]", "value": <X>}
1374 * where <X> is whatever encoding the WKT normally uses. */
1375 upb_StringView str = jsondec_string(d);
1376 jsondec_entrysep(d);
1377 if (!jsondec_streql(str, "value")) {
1378 jsondec_err(d, "Key for well-known type must be 'value'");
1379 }
1380 jsondec_wellknown(d, msg, m);
1381 }
1382 }
1383
jsondec_typeurl(jsondec * d,upb_Message * msg,const upb_MessageDef * m)1384 static const upb_MessageDef* jsondec_typeurl(jsondec* d, upb_Message* msg,
1385 const upb_MessageDef* m) {
1386 UPB_ASSERT(!upb_Message_IsFrozen(msg));
1387 const upb_FieldDef* type_url_f = upb_MessageDef_FindFieldByNumber(m, 1);
1388 const upb_MessageDef* type_m;
1389 upb_StringView type_url = jsondec_string(d);
1390 const char* end = type_url.data + type_url.size;
1391 const char* ptr = end;
1392 upb_MessageValue val;
1393
1394 val.str_val = type_url;
1395 upb_Message_SetFieldByDef(msg, type_url_f, val, d->arena);
1396
1397 /* Find message name after the last '/' */
1398 while (ptr > type_url.data && *--ptr != '/') {
1399 }
1400
1401 if (ptr == type_url.data || ptr == end) {
1402 jsondec_err(d, "Type url must have at least one '/' and non-empty host");
1403 }
1404
1405 ptr++;
1406 type_m = upb_DefPool_FindMessageByNameWithSize(d->symtab, ptr, end - ptr);
1407
1408 if (!type_m) {
1409 jsondec_err(d, "Type was not found");
1410 }
1411
1412 return type_m;
1413 }
1414
jsondec_any(jsondec * d,upb_Message * msg,const upb_MessageDef * m)1415 static void jsondec_any(jsondec* d, upb_Message* msg, const upb_MessageDef* m) {
1416 UPB_ASSERT(!upb_Message_IsFrozen(msg));
1417 /* string type_url = 1;
1418 * bytes value = 2; */
1419 const upb_FieldDef* value_f = upb_MessageDef_FindFieldByNumber(m, 2);
1420 upb_Message* any_msg;
1421 const upb_MessageDef* any_m = NULL;
1422 const char* pre_type_data = NULL;
1423 const char* pre_type_end = NULL;
1424 upb_MessageValue encoded;
1425
1426 jsondec_objstart(d);
1427
1428 /* Scan looking for "@type", which is not necessarily first. */
1429 while (!any_m && jsondec_objnext(d)) {
1430 const char* start = d->ptr;
1431 upb_StringView name = jsondec_string(d);
1432 jsondec_entrysep(d);
1433 if (jsondec_streql(name, "@type")) {
1434 any_m = jsondec_typeurl(d, msg, m);
1435 if (pre_type_data) {
1436 pre_type_end = start;
1437 while (*pre_type_end != ',') pre_type_end--;
1438 }
1439 } else {
1440 if (!pre_type_data) pre_type_data = start;
1441 jsondec_skipval(d);
1442 }
1443 }
1444
1445 if (!any_m) {
1446 jsondec_err(d, "Any object didn't contain a '@type' field");
1447 }
1448
1449 const upb_MiniTable* any_layout = upb_MessageDef_MiniTable(any_m);
1450 any_msg = upb_Message_New(any_layout, d->arena);
1451
1452 if (pre_type_data) {
1453 size_t len = pre_type_end - pre_type_data + 1;
1454 char* tmp = upb_Arena_Malloc(d->arena, len);
1455 const char* saved_ptr = d->ptr;
1456 const char* saved_end = d->end;
1457 memcpy(tmp, pre_type_data, len - 1);
1458 tmp[len - 1] = '}';
1459 d->ptr = tmp;
1460 d->end = tmp + len;
1461 d->is_first = true;
1462 while (jsondec_objnext(d)) {
1463 jsondec_anyfield(d, any_msg, any_m);
1464 }
1465 d->ptr = saved_ptr;
1466 d->end = saved_end;
1467 }
1468
1469 while (jsondec_objnext(d)) {
1470 jsondec_anyfield(d, any_msg, any_m);
1471 }
1472
1473 jsondec_objend(d);
1474
1475 upb_EncodeStatus status =
1476 upb_Encode(any_msg, upb_MessageDef_MiniTable(any_m), 0, d->arena,
1477 (char**)&encoded.str_val.data, &encoded.str_val.size);
1478 // TODO: We should fail gracefully here on a bad return status.
1479 UPB_ASSERT(status == kUpb_EncodeStatus_Ok);
1480 upb_Message_SetFieldByDef(msg, value_f, encoded, d->arena);
1481 }
1482
jsondec_wrapper(jsondec * d,upb_Message * msg,const upb_MessageDef * m)1483 static void jsondec_wrapper(jsondec* d, upb_Message* msg,
1484 const upb_MessageDef* m) {
1485 UPB_ASSERT(!upb_Message_IsFrozen(msg));
1486 const upb_FieldDef* value_f = upb_MessageDef_FindFieldByNumber(m, 1);
1487 upb_JsonMessageValue val = jsondec_value(d, value_f);
1488 UPB_ASSUME(val.ignore == false); // Wrapper cannot be an enum.
1489 upb_Message_SetFieldByDef(msg, value_f, val.value, d->arena);
1490 }
1491
jsondec_wellknown(jsondec * d,upb_Message * msg,const upb_MessageDef * m)1492 static void jsondec_wellknown(jsondec* d, upb_Message* msg,
1493 const upb_MessageDef* m) {
1494 UPB_ASSERT(!upb_Message_IsFrozen(msg));
1495 switch (upb_MessageDef_WellKnownType(m)) {
1496 case kUpb_WellKnown_Any:
1497 jsondec_any(d, msg, m);
1498 break;
1499 case kUpb_WellKnown_FieldMask:
1500 jsondec_fieldmask(d, msg, m);
1501 break;
1502 case kUpb_WellKnown_Duration:
1503 jsondec_duration(d, msg, m);
1504 break;
1505 case kUpb_WellKnown_Timestamp:
1506 jsondec_timestamp(d, msg, m);
1507 break;
1508 case kUpb_WellKnown_Value:
1509 jsondec_wellknownvalue(d, msg, m);
1510 break;
1511 case kUpb_WellKnown_ListValue:
1512 jsondec_listvalue(d, msg, m);
1513 break;
1514 case kUpb_WellKnown_Struct:
1515 jsondec_struct(d, msg, m);
1516 break;
1517 case kUpb_WellKnown_DoubleValue:
1518 case kUpb_WellKnown_FloatValue:
1519 case kUpb_WellKnown_Int64Value:
1520 case kUpb_WellKnown_UInt64Value:
1521 case kUpb_WellKnown_Int32Value:
1522 case kUpb_WellKnown_UInt32Value:
1523 case kUpb_WellKnown_StringValue:
1524 case kUpb_WellKnown_BytesValue:
1525 case kUpb_WellKnown_BoolValue:
1526 jsondec_wrapper(d, msg, m);
1527 break;
1528 default:
1529 UPB_UNREACHABLE();
1530 }
1531 }
1532
upb_JsonDecoder_Decode(jsondec * const d,upb_Message * const msg,const upb_MessageDef * const m)1533 static int upb_JsonDecoder_Decode(jsondec* const d, upb_Message* const msg,
1534 const upb_MessageDef* const m) {
1535 UPB_ASSERT(!upb_Message_IsFrozen(msg));
1536 if (UPB_SETJMP(d->err)) return kUpb_JsonDecodeResult_Error;
1537
1538 jsondec_tomsg(d, msg, m);
1539
1540 // Consume any trailing whitespace before checking if we read the entire
1541 // input.
1542 jsondec_consumews(d);
1543
1544 if (d->ptr == d->end) {
1545 return d->result;
1546 } else {
1547 jsondec_seterrmsg(d, "unexpected trailing characters");
1548 return kUpb_JsonDecodeResult_Error;
1549 }
1550 }
1551
upb_JsonDecodeDetectingNonconformance(const char * buf,size_t size,upb_Message * msg,const upb_MessageDef * m,const upb_DefPool * symtab,int options,upb_Arena * arena,upb_Status * status)1552 int upb_JsonDecodeDetectingNonconformance(const char* buf, size_t size,
1553 upb_Message* msg,
1554 const upb_MessageDef* m,
1555 const upb_DefPool* symtab,
1556 int options, upb_Arena* arena,
1557 upb_Status* status) {
1558 UPB_ASSERT(!upb_Message_IsFrozen(msg));
1559 jsondec d;
1560
1561 if (size == 0) return true;
1562
1563 d.ptr = buf;
1564 d.end = buf + size;
1565 d.arena = arena;
1566 d.symtab = symtab;
1567 d.status = status;
1568 d.options = options;
1569 d.depth = 64;
1570 d.result = kUpb_JsonDecodeResult_Ok;
1571 d.line = 1;
1572 d.line_begin = d.ptr;
1573 d.debug_field = NULL;
1574 d.is_first = false;
1575
1576 return upb_JsonDecoder_Decode(&d, msg, m);
1577 }
1578