• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2023 Google LLC.  All rights reserved.
3 //
4 // Use of this source code is governed by a BSD-style
5 // license that can be found in the LICENSE file or at
6 // https://developers.google.com/open-source/licenses/bsd
7 
8 #include "upb/json/decode.h"
9 
10 #include <errno.h>
11 #include <float.h>
12 #include <inttypes.h>
13 #include <limits.h>
14 #include <math.h>
15 #include <setjmp.h>
16 #include <stdarg.h>
17 #include <stddef.h>
18 #include <stdint.h>
19 #include <stdlib.h>
20 #include <string.h>
21 
22 #include "upb/base/descriptor_constants.h"
23 #include "upb/base/status.h"
24 #include "upb/base/string_view.h"
25 #include "upb/lex/atoi.h"
26 #include "upb/lex/unicode.h"
27 #include "upb/mem/arena.h"
28 #include "upb/message/array.h"
29 #include "upb/message/map.h"
30 #include "upb/message/message.h"
31 #include "upb/mini_table/message.h"
32 #include "upb/reflection/def.h"
33 #include "upb/reflection/message.h"
34 #include "upb/wire/encode.h"
35 
36 // Must be last.
37 #include "upb/port/def.inc"
38 
39 typedef struct {
40   const char *ptr, *end;
41   upb_Arena* arena; /* TODO: should we have a tmp arena for tmp data? */
42   const upb_DefPool* symtab;
43   int depth;
44   int result;
45   upb_Status* status;
46   jmp_buf err;
47   int line;
48   const char* line_begin;
49   bool is_first;
50   int options;
51   const upb_FieldDef* debug_field;
52 } jsondec;
53 
54 typedef struct {
55   upb_MessageValue value;
56   bool ignore;
57 } upb_JsonMessageValue;
58 
59 enum { JD_OBJECT, JD_ARRAY, JD_STRING, JD_NUMBER, JD_TRUE, JD_FALSE, JD_NULL };
60 
61 /* Forward declarations of mutually-recursive functions. */
62 static void jsondec_wellknown(jsondec* d, upb_Message* msg,
63                               const upb_MessageDef* m);
64 static upb_JsonMessageValue jsondec_value(jsondec* d, const upb_FieldDef* f);
65 static void jsondec_wellknownvalue(jsondec* d, upb_Message* msg,
66                                    const upb_MessageDef* m);
67 static void jsondec_object(jsondec* d, upb_Message* msg,
68                            const upb_MessageDef* m);
69 
jsondec_streql(upb_StringView str,const char * lit)70 static bool jsondec_streql(upb_StringView str, const char* lit) {
71   return str.size == strlen(lit) && memcmp(str.data, lit, str.size) == 0;
72 }
73 
jsondec_isnullvalue(const upb_FieldDef * f)74 static bool jsondec_isnullvalue(const upb_FieldDef* f) {
75   return upb_FieldDef_CType(f) == kUpb_CType_Enum &&
76          strcmp(upb_EnumDef_FullName(upb_FieldDef_EnumSubDef(f)),
77                 "google.protobuf.NullValue") == 0;
78 }
79 
jsondec_isvalue(const upb_FieldDef * f)80 static bool jsondec_isvalue(const upb_FieldDef* f) {
81   return (upb_FieldDef_CType(f) == kUpb_CType_Message &&
82           upb_MessageDef_WellKnownType(upb_FieldDef_MessageSubDef(f)) ==
83               kUpb_WellKnown_Value) ||
84          jsondec_isnullvalue(f);
85 }
86 
jsondec_seterrmsg(jsondec * d,const char * msg)87 static void jsondec_seterrmsg(jsondec* d, const char* msg) {
88   upb_Status_SetErrorFormat(d->status, "Error parsing JSON @%d:%d: %s", d->line,
89                             (int)(d->ptr - d->line_begin), msg);
90 }
91 
jsondec_err(jsondec * d,const char * msg)92 UPB_NORETURN static void jsondec_err(jsondec* d, const char* msg) {
93   jsondec_seterrmsg(d, msg);
94   UPB_LONGJMP(d->err, 1);
95 }
96 
97 UPB_PRINTF(2, 3)
jsondec_errf(jsondec * d,const char * fmt,...)98 UPB_NORETURN static void jsondec_errf(jsondec* d, const char* fmt, ...) {
99   va_list argp;
100   upb_Status_SetErrorFormat(d->status, "Error parsing JSON @%d:%d: ", d->line,
101                             (int)(d->ptr - d->line_begin));
102   va_start(argp, fmt);
103   upb_Status_VAppendErrorFormat(d->status, fmt, argp);
104   va_end(argp);
105   UPB_LONGJMP(d->err, 1);
106 }
107 
108 // Advances d->ptr until the next non-whitespace character or to the end of
109 // the buffer.
jsondec_consumews(jsondec * d)110 static void jsondec_consumews(jsondec* d) {
111   while (d->ptr != d->end) {
112     switch (*d->ptr) {
113       case '\n':
114         d->line++;
115         d->line_begin = d->ptr;
116         /* Fallthrough. */
117       case '\r':
118       case '\t':
119       case ' ':
120         d->ptr++;
121         break;
122       default:
123         return;
124     }
125   }
126 }
127 
128 // Advances d->ptr until the next non-whitespace character. Postcondition that
129 // d->ptr is pointing at a valid non-whitespace character (will err if end of
130 // buffer is reached).
jsondec_skipws(jsondec * d)131 static void jsondec_skipws(jsondec* d) {
132   jsondec_consumews(d);
133   if (d->ptr == d->end) {
134     jsondec_err(d, "Unexpected EOF");
135   }
136 }
137 
jsondec_tryparsech(jsondec * d,char ch)138 static bool jsondec_tryparsech(jsondec* d, char ch) {
139   if (d->ptr == d->end || *d->ptr != ch) return false;
140   d->ptr++;
141   return true;
142 }
143 
jsondec_parselit(jsondec * d,const char * lit)144 static void jsondec_parselit(jsondec* d, const char* lit) {
145   size_t avail = d->end - d->ptr;
146   size_t len = strlen(lit);
147   if (avail < len || memcmp(d->ptr, lit, len) != 0) {
148     jsondec_errf(d, "Expected: '%s'", lit);
149   }
150   d->ptr += len;
151 }
152 
jsondec_wsch(jsondec * d,char ch)153 static void jsondec_wsch(jsondec* d, char ch) {
154   jsondec_skipws(d);
155   if (!jsondec_tryparsech(d, ch)) {
156     jsondec_errf(d, "Expected: '%c'", ch);
157   }
158 }
159 
jsondec_true(jsondec * d)160 static void jsondec_true(jsondec* d) { jsondec_parselit(d, "true"); }
jsondec_false(jsondec * d)161 static void jsondec_false(jsondec* d) { jsondec_parselit(d, "false"); }
jsondec_null(jsondec * d)162 static void jsondec_null(jsondec* d) { jsondec_parselit(d, "null"); }
163 
jsondec_entrysep(jsondec * d)164 static void jsondec_entrysep(jsondec* d) {
165   jsondec_skipws(d);
166   jsondec_parselit(d, ":");
167 }
168 
jsondec_rawpeek(jsondec * d)169 static int jsondec_rawpeek(jsondec* d) {
170   if (d->ptr == d->end) {
171     jsondec_err(d, "Unexpected EOF");
172   }
173 
174   switch (*d->ptr) {
175     case '{':
176       return JD_OBJECT;
177     case '[':
178       return JD_ARRAY;
179     case '"':
180       return JD_STRING;
181     case '-':
182     case '0':
183     case '1':
184     case '2':
185     case '3':
186     case '4':
187     case '5':
188     case '6':
189     case '7':
190     case '8':
191     case '9':
192       return JD_NUMBER;
193     case 't':
194       return JD_TRUE;
195     case 'f':
196       return JD_FALSE;
197     case 'n':
198       return JD_NULL;
199     default:
200       jsondec_errf(d, "Unexpected character: '%c'", *d->ptr);
201   }
202 }
203 
204 /* JSON object/array **********************************************************/
205 
206 /* These are used like so:
207  *
208  * jsondec_objstart(d);
209  * while (jsondec_objnext(d)) {
210  *   ...
211  * }
212  * jsondec_objend(d) */
213 
jsondec_peek(jsondec * d)214 static int jsondec_peek(jsondec* d) {
215   jsondec_skipws(d);
216   return jsondec_rawpeek(d);
217 }
218 
jsondec_push(jsondec * d)219 static void jsondec_push(jsondec* d) {
220   if (--d->depth < 0) {
221     jsondec_err(d, "Recursion limit exceeded");
222   }
223   d->is_first = true;
224 }
225 
jsondec_seqnext(jsondec * d,char end_ch)226 static bool jsondec_seqnext(jsondec* d, char end_ch) {
227   bool is_first = d->is_first;
228   d->is_first = false;
229   jsondec_skipws(d);
230   if (*d->ptr == end_ch) return false;
231   if (!is_first) jsondec_parselit(d, ",");
232   return true;
233 }
234 
jsondec_arrstart(jsondec * d)235 static void jsondec_arrstart(jsondec* d) {
236   jsondec_push(d);
237   jsondec_wsch(d, '[');
238 }
239 
jsondec_arrend(jsondec * d)240 static void jsondec_arrend(jsondec* d) {
241   d->depth++;
242   jsondec_wsch(d, ']');
243 }
244 
jsondec_arrnext(jsondec * d)245 static bool jsondec_arrnext(jsondec* d) { return jsondec_seqnext(d, ']'); }
246 
jsondec_objstart(jsondec * d)247 static void jsondec_objstart(jsondec* d) {
248   jsondec_push(d);
249   jsondec_wsch(d, '{');
250 }
251 
jsondec_objend(jsondec * d)252 static void jsondec_objend(jsondec* d) {
253   d->depth++;
254   jsondec_wsch(d, '}');
255 }
256 
jsondec_objnext(jsondec * d)257 static bool jsondec_objnext(jsondec* d) {
258   if (!jsondec_seqnext(d, '}')) return false;
259   if (jsondec_peek(d) != JD_STRING) {
260     jsondec_err(d, "Object must start with string");
261   }
262   return true;
263 }
264 
265 /* JSON number ****************************************************************/
266 
jsondec_tryskipdigits(jsondec * d)267 static bool jsondec_tryskipdigits(jsondec* d) {
268   const char* start = d->ptr;
269 
270   while (d->ptr < d->end) {
271     if (*d->ptr < '0' || *d->ptr > '9') {
272       break;
273     }
274     d->ptr++;
275   }
276 
277   return d->ptr != start;
278 }
279 
jsondec_skipdigits(jsondec * d)280 static void jsondec_skipdigits(jsondec* d) {
281   if (!jsondec_tryskipdigits(d)) {
282     jsondec_err(d, "Expected one or more digits");
283   }
284 }
285 
jsondec_number(jsondec * d)286 static double jsondec_number(jsondec* d) {
287   const char* start = d->ptr;
288 
289   UPB_ASSERT(jsondec_rawpeek(d) == JD_NUMBER);
290 
291   /* Skip over the syntax of a number, as specified by JSON. */
292   if (*d->ptr == '-') d->ptr++;
293 
294   if (jsondec_tryparsech(d, '0')) {
295     if (jsondec_tryskipdigits(d)) {
296       jsondec_err(d, "number cannot have leading zero");
297     }
298   } else {
299     jsondec_skipdigits(d);
300   }
301 
302   if (d->ptr == d->end) goto parse;
303   if (jsondec_tryparsech(d, '.')) {
304     jsondec_skipdigits(d);
305   }
306   if (d->ptr == d->end) goto parse;
307 
308   if (*d->ptr == 'e' || *d->ptr == 'E') {
309     d->ptr++;
310     if (d->ptr == d->end) {
311       jsondec_err(d, "Unexpected EOF in number");
312     }
313     if (*d->ptr == '+' || *d->ptr == '-') {
314       d->ptr++;
315     }
316     jsondec_skipdigits(d);
317   }
318 
319 parse:
320   /* Having verified the syntax of a JSON number, use strtod() to parse
321    * (strtod() accepts a superset of JSON syntax). */
322   errno = 0;
323   {
324     // Copy the number into a null-terminated scratch buffer since strtod
325     // expects a null-terminated string.
326     char nullz[64];
327     ptrdiff_t len = d->ptr - start;
328     if (len > (ptrdiff_t)(sizeof(nullz) - 1)) {
329       jsondec_err(d, "excessively long number");
330     }
331     memcpy(nullz, start, len);
332     nullz[len] = '\0';
333 
334     char* end;
335     double val = strtod(nullz, &end);
336     UPB_ASSERT(end - nullz == len);
337 
338     /* Currently the min/max-val conformance tests fail if we check this.  Does
339      * this mean the conformance tests are wrong or strtod() is wrong, or
340      * something else?  Investigate further. */
341     /*
342     if (errno == ERANGE) {
343       jsondec_err(d, "Number out of range");
344     }
345     */
346 
347     if (val > DBL_MAX || val < -DBL_MAX) {
348       jsondec_err(d, "Number out of range");
349     }
350 
351     return val;
352   }
353 }
354 
355 /* JSON string ****************************************************************/
356 
jsondec_escape(jsondec * d)357 static char jsondec_escape(jsondec* d) {
358   switch (*d->ptr++) {
359     case '"':
360       return '\"';
361     case '\\':
362       return '\\';
363     case '/':
364       return '/';
365     case 'b':
366       return '\b';
367     case 'f':
368       return '\f';
369     case 'n':
370       return '\n';
371     case 'r':
372       return '\r';
373     case 't':
374       return '\t';
375     default:
376       jsondec_err(d, "Invalid escape char");
377   }
378 }
379 
jsondec_codepoint(jsondec * d)380 static uint32_t jsondec_codepoint(jsondec* d) {
381   uint32_t cp = 0;
382   const char* end;
383 
384   if (d->end - d->ptr < 4) {
385     jsondec_err(d, "EOF inside string");
386   }
387 
388   end = d->ptr + 4;
389   while (d->ptr < end) {
390     char ch = *d->ptr++;
391     if (ch >= '0' && ch <= '9') {
392       ch -= '0';
393     } else if (ch >= 'a' && ch <= 'f') {
394       ch = ch - 'a' + 10;
395     } else if (ch >= 'A' && ch <= 'F') {
396       ch = ch - 'A' + 10;
397     } else {
398       jsondec_err(d, "Invalid hex digit");
399     }
400     cp = (cp << 4) | ch;
401   }
402 
403   return cp;
404 }
405 
406 /* Parses a \uXXXX unicode escape (possibly a surrogate pair). */
jsondec_unicode(jsondec * d,char * out)407 static size_t jsondec_unicode(jsondec* d, char* out) {
408   uint32_t cp = jsondec_codepoint(d);
409   if (upb_Unicode_IsHigh(cp)) {
410     /* Surrogate pair: two 16-bit codepoints become a 32-bit codepoint. */
411     jsondec_parselit(d, "\\u");
412     uint32_t low = jsondec_codepoint(d);
413     if (!upb_Unicode_IsLow(low)) jsondec_err(d, "Invalid low surrogate");
414     cp = upb_Unicode_FromPair(cp, low);
415   } else if (upb_Unicode_IsLow(cp)) {
416     jsondec_err(d, "Unpaired low surrogate");
417   }
418 
419   /* Write to UTF-8 */
420   int bytes = upb_Unicode_ToUTF8(cp, out);
421   if (bytes == 0) jsondec_err(d, "Invalid codepoint");
422   return bytes;
423 }
424 
jsondec_resize(jsondec * d,char ** buf,char ** end,char ** buf_end)425 static void jsondec_resize(jsondec* d, char** buf, char** end, char** buf_end) {
426   size_t oldsize = *buf_end - *buf;
427   size_t len = *end - *buf;
428   size_t size = UPB_MAX(8, 2 * oldsize);
429 
430   *buf = upb_Arena_Realloc(d->arena, *buf, len, size);
431   if (!*buf) jsondec_err(d, "Out of memory");
432 
433   *end = *buf + len;
434   *buf_end = *buf + size;
435 }
436 
jsondec_string(jsondec * d)437 static upb_StringView jsondec_string(jsondec* d) {
438   char* buf = NULL;
439   char* end = NULL;
440   char* buf_end = NULL;
441 
442   jsondec_skipws(d);
443 
444   if (*d->ptr++ != '"') {
445     jsondec_err(d, "Expected string");
446   }
447 
448   while (d->ptr < d->end) {
449     char ch = *d->ptr++;
450 
451     if (end == buf_end) {
452       jsondec_resize(d, &buf, &end, &buf_end);
453     }
454 
455     switch (ch) {
456       case '"': {
457         upb_StringView ret;
458         ret.data = buf;
459         ret.size = end - buf;
460         *end = '\0'; /* Needed for possible strtod(). */
461         return ret;
462       }
463       case '\\':
464         if (d->ptr == d->end) goto eof;
465         if (*d->ptr == 'u') {
466           d->ptr++;
467           if (buf_end - end < 4) {
468             /* Allow space for maximum-sized codepoint (4 bytes). */
469             jsondec_resize(d, &buf, &end, &buf_end);
470           }
471           end += jsondec_unicode(d, end);
472         } else {
473           *end++ = jsondec_escape(d);
474         }
475         break;
476       default:
477         if ((unsigned char)ch < 0x20) {
478           jsondec_err(d, "Invalid char in JSON string");
479         }
480         *end++ = ch;
481         break;
482     }
483   }
484 
485 eof:
486   jsondec_err(d, "EOF inside string");
487 }
488 
jsondec_skipval(jsondec * d)489 static void jsondec_skipval(jsondec* d) {
490   switch (jsondec_peek(d)) {
491     case JD_OBJECT:
492       jsondec_objstart(d);
493       while (jsondec_objnext(d)) {
494         jsondec_string(d);
495         jsondec_entrysep(d);
496         jsondec_skipval(d);
497       }
498       jsondec_objend(d);
499       break;
500     case JD_ARRAY:
501       jsondec_arrstart(d);
502       while (jsondec_arrnext(d)) {
503         jsondec_skipval(d);
504       }
505       jsondec_arrend(d);
506       break;
507     case JD_TRUE:
508       jsondec_true(d);
509       break;
510     case JD_FALSE:
511       jsondec_false(d);
512       break;
513     case JD_NULL:
514       jsondec_null(d);
515       break;
516     case JD_STRING:
517       jsondec_string(d);
518       break;
519     case JD_NUMBER:
520       jsondec_number(d);
521       break;
522   }
523 }
524 
525 /* Base64 decoding for bytes fields. ******************************************/
526 
jsondec_base64_tablelookup(const char ch)527 static unsigned int jsondec_base64_tablelookup(const char ch) {
528   /* Table includes the normal base64 chars plus the URL-safe variant. */
529   const signed char table[256] = {
530       -1,       -1,       -1,       -1,       -1,       -1,        -1,
531       -1,       -1,       -1,       -1,       -1,       -1,        -1,
532       -1,       -1,       -1,       -1,       -1,       -1,        -1,
533       -1,       -1,       -1,       -1,       -1,       -1,        -1,
534       -1,       -1,       -1,       -1,       -1,       -1,        -1,
535       -1,       -1,       -1,       -1,       -1,       -1,        -1,
536       -1,       62 /*+*/, -1,       62 /*-*/, -1,       63 /*/ */, 52 /*0*/,
537       53 /*1*/, 54 /*2*/, 55 /*3*/, 56 /*4*/, 57 /*5*/, 58 /*6*/,  59 /*7*/,
538       60 /*8*/, 61 /*9*/, -1,       -1,       -1,       -1,        -1,
539       -1,       -1,       0 /*A*/,  1 /*B*/,  2 /*C*/,  3 /*D*/,   4 /*E*/,
540       5 /*F*/,  6 /*G*/,  07 /*H*/, 8 /*I*/,  9 /*J*/,  10 /*K*/,  11 /*L*/,
541       12 /*M*/, 13 /*N*/, 14 /*O*/, 15 /*P*/, 16 /*Q*/, 17 /*R*/,  18 /*S*/,
542       19 /*T*/, 20 /*U*/, 21 /*V*/, 22 /*W*/, 23 /*X*/, 24 /*Y*/,  25 /*Z*/,
543       -1,       -1,       -1,       -1,       63 /*_*/, -1,        26 /*a*/,
544       27 /*b*/, 28 /*c*/, 29 /*d*/, 30 /*e*/, 31 /*f*/, 32 /*g*/,  33 /*h*/,
545       34 /*i*/, 35 /*j*/, 36 /*k*/, 37 /*l*/, 38 /*m*/, 39 /*n*/,  40 /*o*/,
546       41 /*p*/, 42 /*q*/, 43 /*r*/, 44 /*s*/, 45 /*t*/, 46 /*u*/,  47 /*v*/,
547       48 /*w*/, 49 /*x*/, 50 /*y*/, 51 /*z*/, -1,       -1,        -1,
548       -1,       -1,       -1,       -1,       -1,       -1,        -1,
549       -1,       -1,       -1,       -1,       -1,       -1,        -1,
550       -1,       -1,       -1,       -1,       -1,       -1,        -1,
551       -1,       -1,       -1,       -1,       -1,       -1,        -1,
552       -1,       -1,       -1,       -1,       -1,       -1,        -1,
553       -1,       -1,       -1,       -1,       -1,       -1,        -1,
554       -1,       -1,       -1,       -1,       -1,       -1,        -1,
555       -1,       -1,       -1,       -1,       -1,       -1,        -1,
556       -1,       -1,       -1,       -1,       -1,       -1,        -1,
557       -1,       -1,       -1,       -1,       -1,       -1,        -1,
558       -1,       -1,       -1,       -1,       -1,       -1,        -1,
559       -1,       -1,       -1,       -1,       -1,       -1,        -1,
560       -1,       -1,       -1,       -1,       -1,       -1,        -1,
561       -1,       -1,       -1,       -1,       -1,       -1,        -1,
562       -1,       -1,       -1,       -1,       -1,       -1,        -1,
563       -1,       -1,       -1,       -1,       -1,       -1,        -1,
564       -1,       -1,       -1,       -1,       -1,       -1,        -1,
565       -1,       -1,       -1,       -1,       -1,       -1,        -1,
566       -1,       -1,       -1,       -1};
567 
568   /* Sign-extend return value so high bit will be set on any unexpected char. */
569   return table[(unsigned)ch];
570 }
571 
jsondec_partialbase64(jsondec * d,const char * ptr,const char * end,char * out)572 static char* jsondec_partialbase64(jsondec* d, const char* ptr, const char* end,
573                                    char* out) {
574   int32_t val = -1;
575 
576   switch (end - ptr) {
577     case 2:
578       val = jsondec_base64_tablelookup(ptr[0]) << 18 |
579             jsondec_base64_tablelookup(ptr[1]) << 12;
580       out[0] = val >> 16;
581       out += 1;
582       break;
583     case 3:
584       val = jsondec_base64_tablelookup(ptr[0]) << 18 |
585             jsondec_base64_tablelookup(ptr[1]) << 12 |
586             jsondec_base64_tablelookup(ptr[2]) << 6;
587       out[0] = val >> 16;
588       out[1] = (val >> 8) & 0xff;
589       out += 2;
590       break;
591   }
592 
593   if (val < 0) {
594     jsondec_err(d, "Corrupt base64");
595   }
596 
597   return out;
598 }
599 
jsondec_base64(jsondec * d,upb_StringView str)600 static size_t jsondec_base64(jsondec* d, upb_StringView str) {
601   /* We decode in place. This is safe because this is a new buffer (not
602    * aliasing the input) and because base64 decoding shrinks 4 bytes into 3. */
603   char* out = (char*)str.data;
604   const char* ptr = str.data;
605   const char* end = ptr + str.size;
606   const char* end4 = ptr + (str.size & -4); /* Round down to multiple of 4. */
607 
608   for (; ptr < end4; ptr += 4, out += 3) {
609     int val = jsondec_base64_tablelookup(ptr[0]) << 18 |
610               jsondec_base64_tablelookup(ptr[1]) << 12 |
611               jsondec_base64_tablelookup(ptr[2]) << 6 |
612               jsondec_base64_tablelookup(ptr[3]) << 0;
613 
614     if (val < 0) {
615       /* Junk chars or padding. Remove trailing padding, if any. */
616       if (end - ptr == 4 && ptr[3] == '=') {
617         if (ptr[2] == '=') {
618           end -= 2;
619         } else {
620           end -= 1;
621         }
622       }
623       break;
624     }
625 
626     out[0] = val >> 16;
627     out[1] = (val >> 8) & 0xff;
628     out[2] = val & 0xff;
629   }
630 
631   if (ptr < end) {
632     /* Process remaining chars. We do not require padding. */
633     out = jsondec_partialbase64(d, ptr, end, out);
634   }
635 
636   return out - str.data;
637 }
638 
639 /* Low-level integer parsing **************************************************/
640 
jsondec_buftouint64(jsondec * d,const char * ptr,const char * end,uint64_t * val)641 static const char* jsondec_buftouint64(jsondec* d, const char* ptr,
642                                        const char* end, uint64_t* val) {
643   const char* out = upb_BufToUint64(ptr, end, val);
644   if (!out) jsondec_err(d, "Integer overflow");
645   return out;
646 }
647 
jsondec_buftoint64(jsondec * d,const char * ptr,const char * end,int64_t * val,bool * is_neg)648 static const char* jsondec_buftoint64(jsondec* d, const char* ptr,
649                                       const char* end, int64_t* val,
650                                       bool* is_neg) {
651   const char* out = upb_BufToInt64(ptr, end, val, is_neg);
652   if (!out) jsondec_err(d, "Integer overflow");
653   return out;
654 }
655 
jsondec_strtouint64(jsondec * d,upb_StringView str)656 static uint64_t jsondec_strtouint64(jsondec* d, upb_StringView str) {
657   const char* end = str.data + str.size;
658   uint64_t ret;
659   if (jsondec_buftouint64(d, str.data, end, &ret) != end) {
660     jsondec_err(d, "Non-number characters in quoted integer");
661   }
662   return ret;
663 }
664 
jsondec_strtoint64(jsondec * d,upb_StringView str)665 static int64_t jsondec_strtoint64(jsondec* d, upb_StringView str) {
666   const char* end = str.data + str.size;
667   int64_t ret;
668   if (jsondec_buftoint64(d, str.data, end, &ret, NULL) != end) {
669     jsondec_err(d, "Non-number characters in quoted integer");
670   }
671   return ret;
672 }
673 
jsondec_checkempty(jsondec * d,upb_StringView str,const upb_FieldDef * f)674 static void jsondec_checkempty(jsondec* d, upb_StringView str,
675                                const upb_FieldDef* f) {
676   if (str.size != 0) return;
677   d->result = kUpb_JsonDecodeResult_OkWithEmptyStringNumerics;
678   upb_Status_SetErrorFormat(d->status,
679                             "Empty string is not a valid number (field: %s). "
680                             "This will be an error in a future version.",
681                             upb_FieldDef_FullName(f));
682 }
683 
684 /* Primitive value types ******************************************************/
685 
686 /* Parse INT32 or INT64 value. */
jsondec_int(jsondec * d,const upb_FieldDef * f)687 static upb_MessageValue jsondec_int(jsondec* d, const upb_FieldDef* f) {
688   upb_MessageValue val;
689 
690   switch (jsondec_peek(d)) {
691     case JD_NUMBER: {
692       double dbl = jsondec_number(d);
693       if (dbl > 9223372036854774784.0 || dbl < -9223372036854775808.0) {
694         jsondec_err(d, "JSON number is out of range.");
695       }
696       val.int64_val = dbl; /* must be guarded, overflow here is UB */
697       if (val.int64_val != dbl) {
698         jsondec_errf(d, "JSON number was not integral (%f != %" PRId64 ")", dbl,
699                      val.int64_val);
700       }
701       break;
702     }
703     case JD_STRING: {
704       upb_StringView str = jsondec_string(d);
705       jsondec_checkempty(d, str, f);
706       val.int64_val = jsondec_strtoint64(d, str);
707       break;
708     }
709     default:
710       jsondec_err(d, "Expected number or string");
711   }
712 
713   if (upb_FieldDef_CType(f) == kUpb_CType_Int32 ||
714       upb_FieldDef_CType(f) == kUpb_CType_Enum) {
715     if (val.int64_val > INT32_MAX || val.int64_val < INT32_MIN) {
716       jsondec_err(d, "Integer out of range.");
717     }
718     val.int32_val = (int32_t)val.int64_val;
719   }
720 
721   return val;
722 }
723 
724 /* Parse UINT32 or UINT64 value. */
jsondec_uint(jsondec * d,const upb_FieldDef * f)725 static upb_MessageValue jsondec_uint(jsondec* d, const upb_FieldDef* f) {
726   upb_MessageValue val;
727 
728   switch (jsondec_peek(d)) {
729     case JD_NUMBER: {
730       double dbl = jsondec_number(d);
731       if (dbl > 18446744073709549568.0 || dbl < 0) {
732         jsondec_err(d, "JSON number is out of range.");
733       }
734       val.uint64_val = dbl; /* must be guarded, overflow here is UB */
735       if (val.uint64_val != dbl) {
736         jsondec_errf(d, "JSON number was not integral (%f != %" PRIu64 ")", dbl,
737                      val.uint64_val);
738       }
739       break;
740     }
741     case JD_STRING: {
742       upb_StringView str = jsondec_string(d);
743       jsondec_checkempty(d, str, f);
744       val.uint64_val = jsondec_strtouint64(d, str);
745       break;
746     }
747     default:
748       jsondec_err(d, "Expected number or string");
749   }
750 
751   if (upb_FieldDef_CType(f) == kUpb_CType_UInt32) {
752     if (val.uint64_val > UINT32_MAX) {
753       jsondec_err(d, "Integer out of range.");
754     }
755     val.uint32_val = (uint32_t)val.uint64_val;
756   }
757 
758   return val;
759 }
760 
761 /* Parse DOUBLE or FLOAT value. */
jsondec_double(jsondec * d,const upb_FieldDef * f)762 static upb_MessageValue jsondec_double(jsondec* d, const upb_FieldDef* f) {
763   upb_StringView str;
764   upb_MessageValue val;
765 
766   switch (jsondec_peek(d)) {
767     case JD_NUMBER:
768       val.double_val = jsondec_number(d);
769       break;
770     case JD_STRING:
771       str = jsondec_string(d);
772       if (str.size == 0) {
773         jsondec_checkempty(d, str, f);
774         val.double_val = 0.0;
775       } else if (jsondec_streql(str, "NaN")) {
776         val.double_val = NAN;
777       } else if (jsondec_streql(str, "Infinity")) {
778         val.double_val = INFINITY;
779       } else if (jsondec_streql(str, "-Infinity")) {
780         val.double_val = -INFINITY;
781       } else {
782         char* end;
783         val.double_val = strtod(str.data, &end);
784         if (end != str.data + str.size) {
785           d->result = kUpb_JsonDecodeResult_OkWithEmptyStringNumerics;
786           upb_Status_SetErrorFormat(
787               d->status,
788               "Non-number characters in quoted number (field: %s). "
789               "This will be an error in a future version.",
790               upb_FieldDef_FullName(f));
791         }
792       }
793       break;
794     default:
795       jsondec_err(d, "Expected number or string");
796   }
797 
798   if (upb_FieldDef_CType(f) == kUpb_CType_Float) {
799     float f = val.double_val;
800     if (val.double_val != INFINITY && val.double_val != -INFINITY) {
801       if (f == INFINITY || f == -INFINITY) jsondec_err(d, "Float out of range");
802     }
803     val.float_val = f;
804   }
805 
806   return val;
807 }
808 
809 /* Parse STRING or BYTES value. */
jsondec_strfield(jsondec * d,const upb_FieldDef * f)810 static upb_MessageValue jsondec_strfield(jsondec* d, const upb_FieldDef* f) {
811   upb_MessageValue val;
812   val.str_val = jsondec_string(d);
813   if (upb_FieldDef_CType(f) == kUpb_CType_Bytes) {
814     val.str_val.size = jsondec_base64(d, val.str_val);
815   }
816   return val;
817 }
818 
jsondec_enum(jsondec * d,const upb_FieldDef * f)819 static upb_JsonMessageValue jsondec_enum(jsondec* d, const upb_FieldDef* f) {
820   switch (jsondec_peek(d)) {
821     case JD_STRING: {
822       upb_StringView str = jsondec_string(d);
823       const upb_EnumDef* e = upb_FieldDef_EnumSubDef(f);
824       const upb_EnumValueDef* ev =
825           upb_EnumDef_FindValueByNameWithSize(e, str.data, str.size);
826       upb_JsonMessageValue val = {.ignore = false};
827       if (ev) {
828         val.value.int32_val = upb_EnumValueDef_Number(ev);
829       } else {
830         if (d->options & upb_JsonDecode_IgnoreUnknown) {
831           val.ignore = true;
832         } else {
833           jsondec_errf(d, "Unknown enumerator: '" UPB_STRINGVIEW_FORMAT "'",
834                        UPB_STRINGVIEW_ARGS(str));
835         }
836       }
837       return val;
838     }
839     case JD_NULL: {
840       if (jsondec_isnullvalue(f)) {
841         upb_JsonMessageValue val = {.ignore = false};
842         jsondec_null(d);
843         val.value.int32_val = 0;
844         return val;
845       }
846     }
847       /* Fallthrough. */
848     default:
849       return (upb_JsonMessageValue){.value = jsondec_int(d, f),
850                                     .ignore = false};
851   }
852 }
853 
jsondec_bool(jsondec * d,const upb_FieldDef * f)854 static upb_MessageValue jsondec_bool(jsondec* d, const upb_FieldDef* f) {
855   bool is_map_key = upb_FieldDef_Number(f) == 1 &&
856                     upb_MessageDef_IsMapEntry(upb_FieldDef_ContainingType(f));
857   upb_MessageValue val;
858 
859   if (is_map_key) {
860     upb_StringView str = jsondec_string(d);
861     if (jsondec_streql(str, "true")) {
862       val.bool_val = true;
863     } else if (jsondec_streql(str, "false")) {
864       val.bool_val = false;
865     } else {
866       jsondec_err(d, "Invalid boolean map key");
867     }
868   } else {
869     switch (jsondec_peek(d)) {
870       case JD_TRUE:
871         val.bool_val = true;
872         jsondec_true(d);
873         break;
874       case JD_FALSE:
875         val.bool_val = false;
876         jsondec_false(d);
877         break;
878       default:
879         jsondec_err(d, "Expected true or false");
880     }
881   }
882 
883   return val;
884 }
885 
886 /* Composite types (array/message/map) ****************************************/
887 
jsondec_array(jsondec * d,upb_Message * msg,const upb_FieldDef * f)888 static void jsondec_array(jsondec* d, upb_Message* msg, const upb_FieldDef* f) {
889   UPB_ASSERT(!upb_Message_IsFrozen(msg));
890   upb_Array* arr = upb_Message_Mutable(msg, f, d->arena).array;
891 
892   jsondec_arrstart(d);
893   while (jsondec_arrnext(d)) {
894     upb_JsonMessageValue elem = jsondec_value(d, f);
895     if (!elem.ignore) {
896       upb_Array_Append(arr, elem.value, d->arena);
897     }
898   }
899   jsondec_arrend(d);
900 }
901 
jsondec_map(jsondec * d,upb_Message * msg,const upb_FieldDef * f)902 static void jsondec_map(jsondec* d, upb_Message* msg, const upb_FieldDef* f) {
903   UPB_ASSERT(!upb_Message_IsFrozen(msg));
904   upb_Map* map = upb_Message_Mutable(msg, f, d->arena).map;
905   const upb_MessageDef* entry = upb_FieldDef_MessageSubDef(f);
906   const upb_FieldDef* key_f = upb_MessageDef_FindFieldByNumber(entry, 1);
907   const upb_FieldDef* val_f = upb_MessageDef_FindFieldByNumber(entry, 2);
908 
909   jsondec_objstart(d);
910   while (jsondec_objnext(d)) {
911     upb_JsonMessageValue key, val;
912     key = jsondec_value(d, key_f);
913     UPB_ASSUME(!key.ignore);  // Map key cannot be enum.
914     jsondec_entrysep(d);
915     val = jsondec_value(d, val_f);
916     if (!val.ignore) {
917       upb_Map_Set(map, key.value, val.value, d->arena);
918     }
919   }
920   jsondec_objend(d);
921 }
922 
jsondec_tomsg(jsondec * d,upb_Message * msg,const upb_MessageDef * m)923 static void jsondec_tomsg(jsondec* d, upb_Message* msg,
924                           const upb_MessageDef* m) {
925   UPB_ASSERT(!upb_Message_IsFrozen(msg));
926   if (upb_MessageDef_WellKnownType(m) == kUpb_WellKnown_Unspecified) {
927     jsondec_object(d, msg, m);
928   } else {
929     jsondec_wellknown(d, msg, m);
930   }
931 }
932 
jsondec_msg(jsondec * d,const upb_FieldDef * f)933 static upb_MessageValue jsondec_msg(jsondec* d, const upb_FieldDef* f) {
934   const upb_MessageDef* m = upb_FieldDef_MessageSubDef(f);
935   const upb_MiniTable* layout = upb_MessageDef_MiniTable(m);
936   upb_Message* msg = upb_Message_New(layout, d->arena);
937   upb_MessageValue val;
938 
939   jsondec_tomsg(d, msg, m);
940   val.msg_val = msg;
941   return val;
942 }
943 
jsondec_field(jsondec * d,upb_Message * msg,const upb_MessageDef * m)944 static void jsondec_field(jsondec* d, upb_Message* msg,
945                           const upb_MessageDef* m) {
946   UPB_ASSERT(!upb_Message_IsFrozen(msg));
947   upb_StringView name;
948   const upb_FieldDef* f;
949   const upb_FieldDef* preserved;
950 
951   name = jsondec_string(d);
952   jsondec_entrysep(d);
953 
954   if (name.size >= 2 && name.data[0] == '[' &&
955       name.data[name.size - 1] == ']') {
956     f = upb_DefPool_FindExtensionByNameWithSize(d->symtab, name.data + 1,
957                                                 name.size - 2);
958     if (f && upb_FieldDef_ContainingType(f) != m) {
959       jsondec_errf(
960           d, "Extension %s extends message %s, but was seen in message %s",
961           upb_FieldDef_FullName(f),
962           upb_MessageDef_FullName(upb_FieldDef_ContainingType(f)),
963           upb_MessageDef_FullName(m));
964     }
965   } else {
966     f = upb_MessageDef_FindByJsonNameWithSize(m, name.data, name.size);
967   }
968 
969   if (!f) {
970     if ((d->options & upb_JsonDecode_IgnoreUnknown) == 0) {
971       jsondec_errf(d, "No such field: " UPB_STRINGVIEW_FORMAT,
972                    UPB_STRINGVIEW_ARGS(name));
973     }
974     jsondec_skipval(d);
975     return;
976   }
977 
978   if (jsondec_peek(d) == JD_NULL && !jsondec_isvalue(f)) {
979     /* JSON "null" indicates a default value, so no need to set anything. */
980     jsondec_null(d);
981     return;
982   }
983 
984   if (upb_FieldDef_RealContainingOneof(f) &&
985       upb_Message_WhichOneofByDef(msg, upb_FieldDef_ContainingOneof(f))) {
986     jsondec_err(d, "More than one field for this oneof.");
987   }
988 
989   preserved = d->debug_field;
990   d->debug_field = f;
991 
992   if (upb_FieldDef_IsMap(f)) {
993     jsondec_map(d, msg, f);
994   } else if (upb_FieldDef_IsRepeated(f)) {
995     jsondec_array(d, msg, f);
996   } else if (upb_FieldDef_IsSubMessage(f)) {
997     upb_Message* submsg = upb_Message_Mutable(msg, f, d->arena).msg;
998     const upb_MessageDef* subm = upb_FieldDef_MessageSubDef(f);
999     jsondec_tomsg(d, submsg, subm);
1000   } else {
1001     upb_JsonMessageValue val = jsondec_value(d, f);
1002     if (!val.ignore) {
1003       upb_Message_SetFieldByDef(msg, f, val.value, d->arena);
1004     }
1005   }
1006 
1007   d->debug_field = preserved;
1008 }
1009 
jsondec_object(jsondec * d,upb_Message * msg,const upb_MessageDef * m)1010 static void jsondec_object(jsondec* d, upb_Message* msg,
1011                            const upb_MessageDef* m) {
1012   UPB_ASSERT(!upb_Message_IsFrozen(msg));
1013   jsondec_objstart(d);
1014   while (jsondec_objnext(d)) {
1015     jsondec_field(d, msg, m);
1016   }
1017   jsondec_objend(d);
1018 }
1019 
jsondec_nonenum(jsondec * d,const upb_FieldDef * f)1020 static upb_MessageValue jsondec_nonenum(jsondec* d, const upb_FieldDef* f) {
1021   switch (upb_FieldDef_CType(f)) {
1022     case kUpb_CType_Bool:
1023       return jsondec_bool(d, f);
1024     case kUpb_CType_Float:
1025     case kUpb_CType_Double:
1026       return jsondec_double(d, f);
1027     case kUpb_CType_UInt32:
1028     case kUpb_CType_UInt64:
1029       return jsondec_uint(d, f);
1030     case kUpb_CType_Int32:
1031     case kUpb_CType_Int64:
1032       return jsondec_int(d, f);
1033     case kUpb_CType_String:
1034     case kUpb_CType_Bytes:
1035       return jsondec_strfield(d, f);
1036     case kUpb_CType_Message:
1037       return jsondec_msg(d, f);
1038     case kUpb_CType_Enum:
1039     default:
1040       UPB_UNREACHABLE();
1041   }
1042 }
1043 
jsondec_value(jsondec * d,const upb_FieldDef * f)1044 static upb_JsonMessageValue jsondec_value(jsondec* d, const upb_FieldDef* f) {
1045   if (upb_FieldDef_CType(f) == kUpb_CType_Enum) {
1046     return jsondec_enum(d, f);
1047   } else {
1048     return (upb_JsonMessageValue){.value = jsondec_nonenum(d, f),
1049                                   .ignore = false};
1050   }
1051 }
1052 
1053 /* Well-known types ***********************************************************/
1054 
jsondec_tsdigits(jsondec * d,const char ** ptr,size_t digits,const char * after)1055 static int jsondec_tsdigits(jsondec* d, const char** ptr, size_t digits,
1056                             const char* after) {
1057   uint64_t val;
1058   const char* p = *ptr;
1059   const char* end = p + digits;
1060   size_t after_len = after ? strlen(after) : 0;
1061 
1062   UPB_ASSERT(digits <= 9); /* int can't overflow. */
1063 
1064   if (jsondec_buftouint64(d, p, end, &val) != end ||
1065       (after_len && memcmp(end, after, after_len) != 0)) {
1066     jsondec_err(d, "Malformed timestamp");
1067   }
1068 
1069   UPB_ASSERT(val < INT_MAX);
1070 
1071   *ptr = end + after_len;
1072   return (int)val;
1073 }
1074 
jsondec_nanos(jsondec * d,const char ** ptr,const char * end)1075 static int jsondec_nanos(jsondec* d, const char** ptr, const char* end) {
1076   uint64_t nanos = 0;
1077   const char* p = *ptr;
1078 
1079   if (p != end && *p == '.') {
1080     const char* nano_end = jsondec_buftouint64(d, p + 1, end, &nanos);
1081     int digits = (int)(nano_end - p - 1);
1082     int exp_lg10 = 9 - digits;
1083     if (digits > 9) {
1084       jsondec_err(d, "Too many digits for partial seconds");
1085     }
1086     while (exp_lg10--) nanos *= 10;
1087     *ptr = nano_end;
1088   }
1089 
1090   UPB_ASSERT(nanos < INT_MAX);
1091 
1092   return (int)nanos;
1093 }
1094 
1095 /* jsondec_epochdays(1970, 1, 1) == 1970-01-01 == 0. */
jsondec_epochdays(int y,int m,int d)1096 int jsondec_epochdays(int y, int m, int d) {
1097   const uint32_t year_base = 4800; /* Before min year, multiple of 400. */
1098   const uint32_t m_adj = m - 3;    /* March-based month. */
1099   const uint32_t carry = m_adj > (uint32_t)m ? 1 : 0;
1100   const uint32_t adjust = carry ? 12 : 0;
1101   const uint32_t y_adj = y + year_base - carry;
1102   const uint32_t month_days = ((m_adj + adjust) * 62719 + 769) / 2048;
1103   const uint32_t leap_days = y_adj / 4 - y_adj / 100 + y_adj / 400;
1104   return y_adj * 365 + leap_days + month_days + (d - 1) - 2472632;
1105 }
1106 
jsondec_unixtime(int y,int m,int d,int h,int min,int s)1107 static int64_t jsondec_unixtime(int y, int m, int d, int h, int min, int s) {
1108   return (int64_t)jsondec_epochdays(y, m, d) * 86400 + h * 3600 + min * 60 + s;
1109 }
1110 
jsondec_timestamp(jsondec * d,upb_Message * msg,const upb_MessageDef * m)1111 static void jsondec_timestamp(jsondec* d, upb_Message* msg,
1112                               const upb_MessageDef* m) {
1113   UPB_ASSERT(!upb_Message_IsFrozen(msg));
1114   upb_MessageValue seconds;
1115   upb_MessageValue nanos;
1116   upb_StringView str = jsondec_string(d);
1117   const char* ptr = str.data;
1118   const char* end = ptr + str.size;
1119 
1120   if (str.size < 20) goto malformed;
1121 
1122   {
1123     /* 1972-01-01T01:00:00 */
1124     int year = jsondec_tsdigits(d, &ptr, 4, "-");
1125     int mon = jsondec_tsdigits(d, &ptr, 2, "-");
1126     int day = jsondec_tsdigits(d, &ptr, 2, "T");
1127     int hour = jsondec_tsdigits(d, &ptr, 2, ":");
1128     int min = jsondec_tsdigits(d, &ptr, 2, ":");
1129     int sec = jsondec_tsdigits(d, &ptr, 2, NULL);
1130 
1131     seconds.int64_val = jsondec_unixtime(year, mon, day, hour, min, sec);
1132   }
1133 
1134   nanos.int32_val = jsondec_nanos(d, &ptr, end);
1135 
1136   {
1137     /* [+-]08:00 or Z */
1138     int ofs_hour = 0;
1139     int ofs_min = 0;
1140     bool neg = false;
1141 
1142     if (ptr == end) goto malformed;
1143 
1144     switch (*ptr++) {
1145       case '-':
1146         neg = true;
1147         /* fallthrough */
1148       case '+':
1149         if ((end - ptr) != 5) goto malformed;
1150         ofs_hour = jsondec_tsdigits(d, &ptr, 2, ":");
1151         ofs_min = jsondec_tsdigits(d, &ptr, 2, NULL);
1152         ofs_min = ((ofs_hour * 60) + ofs_min) * 60;
1153         seconds.int64_val += (neg ? ofs_min : -ofs_min);
1154         break;
1155       case 'Z':
1156         if (ptr != end) goto malformed;
1157         break;
1158       default:
1159         goto malformed;
1160     }
1161   }
1162 
1163   if (seconds.int64_val < -62135596800) {
1164     jsondec_err(d, "Timestamp out of range");
1165   }
1166 
1167   upb_Message_SetFieldByDef(msg, upb_MessageDef_FindFieldByNumber(m, 1),
1168                             seconds, d->arena);
1169   upb_Message_SetFieldByDef(msg, upb_MessageDef_FindFieldByNumber(m, 2), nanos,
1170                             d->arena);
1171   return;
1172 
1173 malformed:
1174   jsondec_err(d, "Malformed timestamp");
1175 }
1176 
jsondec_duration(jsondec * d,upb_Message * msg,const upb_MessageDef * m)1177 static void jsondec_duration(jsondec* d, upb_Message* msg,
1178                              const upb_MessageDef* m) {
1179   UPB_ASSERT(!upb_Message_IsFrozen(msg));
1180   upb_MessageValue seconds;
1181   upb_MessageValue nanos;
1182   upb_StringView str = jsondec_string(d);
1183   const char* ptr = str.data;
1184   const char* end = ptr + str.size;
1185   const int64_t max = (uint64_t)3652500 * 86400;
1186   bool neg = false;
1187 
1188   /* "3.000000001s", "3s", etc. */
1189   ptr = jsondec_buftoint64(d, ptr, end, &seconds.int64_val, &neg);
1190   nanos.int32_val = jsondec_nanos(d, &ptr, end);
1191 
1192   if (end - ptr != 1 || *ptr != 's') {
1193     jsondec_err(d, "Malformed duration");
1194   }
1195 
1196   if (seconds.int64_val < -max || seconds.int64_val > max) {
1197     jsondec_err(d, "Duration out of range");
1198   }
1199 
1200   if (neg) {
1201     nanos.int32_val = -nanos.int32_val;
1202   }
1203 
1204   upb_Message_SetFieldByDef(msg, upb_MessageDef_FindFieldByNumber(m, 1),
1205                             seconds, d->arena);
1206   upb_Message_SetFieldByDef(msg, upb_MessageDef_FindFieldByNumber(m, 2), nanos,
1207                             d->arena);
1208 }
1209 
jsondec_listvalue(jsondec * d,upb_Message * msg,const upb_MessageDef * m)1210 static void jsondec_listvalue(jsondec* d, upb_Message* msg,
1211                               const upb_MessageDef* m) {
1212   UPB_ASSERT(!upb_Message_IsFrozen(msg));
1213   const upb_FieldDef* values_f = upb_MessageDef_FindFieldByNumber(m, 1);
1214   const upb_MessageDef* value_m = upb_FieldDef_MessageSubDef(values_f);
1215   const upb_MiniTable* value_layout = upb_MessageDef_MiniTable(value_m);
1216   upb_Array* values = upb_Message_Mutable(msg, values_f, d->arena).array;
1217 
1218   jsondec_arrstart(d);
1219   while (jsondec_arrnext(d)) {
1220     upb_Message* value_msg = upb_Message_New(value_layout, d->arena);
1221     upb_MessageValue value;
1222     value.msg_val = value_msg;
1223     upb_Array_Append(values, value, d->arena);
1224     jsondec_wellknownvalue(d, value_msg, value_m);
1225   }
1226   jsondec_arrend(d);
1227 }
1228 
jsondec_struct(jsondec * d,upb_Message * msg,const upb_MessageDef * m)1229 static void jsondec_struct(jsondec* d, upb_Message* msg,
1230                            const upb_MessageDef* m) {
1231   UPB_ASSERT(!upb_Message_IsFrozen(msg));
1232   const upb_FieldDef* fields_f = upb_MessageDef_FindFieldByNumber(m, 1);
1233   const upb_MessageDef* entry_m = upb_FieldDef_MessageSubDef(fields_f);
1234   const upb_FieldDef* value_f = upb_MessageDef_FindFieldByNumber(entry_m, 2);
1235   const upb_MessageDef* value_m = upb_FieldDef_MessageSubDef(value_f);
1236   const upb_MiniTable* value_layout = upb_MessageDef_MiniTable(value_m);
1237   upb_Map* fields = upb_Message_Mutable(msg, fields_f, d->arena).map;
1238 
1239   jsondec_objstart(d);
1240   while (jsondec_objnext(d)) {
1241     upb_MessageValue key, value;
1242     upb_Message* value_msg = upb_Message_New(value_layout, d->arena);
1243     key.str_val = jsondec_string(d);
1244     value.msg_val = value_msg;
1245     upb_Map_Set(fields, key, value, d->arena);
1246     jsondec_entrysep(d);
1247     jsondec_wellknownvalue(d, value_msg, value_m);
1248   }
1249   jsondec_objend(d);
1250 }
1251 
jsondec_wellknownvalue(jsondec * d,upb_Message * msg,const upb_MessageDef * m)1252 static void jsondec_wellknownvalue(jsondec* d, upb_Message* msg,
1253                                    const upb_MessageDef* m) {
1254   UPB_ASSERT(!upb_Message_IsFrozen(msg));
1255   upb_MessageValue val;
1256   const upb_FieldDef* f;
1257   upb_Message* submsg;
1258 
1259   switch (jsondec_peek(d)) {
1260     case JD_NUMBER:
1261       /* double number_value = 2; */
1262       f = upb_MessageDef_FindFieldByNumber(m, 2);
1263       val.double_val = jsondec_number(d);
1264       break;
1265     case JD_STRING:
1266       /* string string_value = 3; */
1267       f = upb_MessageDef_FindFieldByNumber(m, 3);
1268       val.str_val = jsondec_string(d);
1269       break;
1270     case JD_FALSE:
1271       /* bool bool_value = 4; */
1272       f = upb_MessageDef_FindFieldByNumber(m, 4);
1273       val.bool_val = false;
1274       jsondec_false(d);
1275       break;
1276     case JD_TRUE:
1277       /* bool bool_value = 4; */
1278       f = upb_MessageDef_FindFieldByNumber(m, 4);
1279       val.bool_val = true;
1280       jsondec_true(d);
1281       break;
1282     case JD_NULL:
1283       /* NullValue null_value = 1; */
1284       f = upb_MessageDef_FindFieldByNumber(m, 1);
1285       val.int32_val = 0;
1286       jsondec_null(d);
1287       break;
1288     /* Note: these cases return, because upb_Message_Mutable() is enough. */
1289     case JD_OBJECT:
1290       /* Struct struct_value = 5; */
1291       f = upb_MessageDef_FindFieldByNumber(m, 5);
1292       submsg = upb_Message_Mutable(msg, f, d->arena).msg;
1293       jsondec_struct(d, submsg, upb_FieldDef_MessageSubDef(f));
1294       return;
1295     case JD_ARRAY:
1296       /* ListValue list_value = 6; */
1297       f = upb_MessageDef_FindFieldByNumber(m, 6);
1298       submsg = upb_Message_Mutable(msg, f, d->arena).msg;
1299       jsondec_listvalue(d, submsg, upb_FieldDef_MessageSubDef(f));
1300       return;
1301     default:
1302       UPB_UNREACHABLE();
1303   }
1304 
1305   upb_Message_SetFieldByDef(msg, f, val, d->arena);
1306 }
1307 
jsondec_mask(jsondec * d,const char * buf,const char * end)1308 static upb_StringView jsondec_mask(jsondec* d, const char* buf,
1309                                    const char* end) {
1310   /* FieldMask fields grow due to inserted '_' characters, so we can't do the
1311    * transform in place. */
1312   const char* ptr = buf;
1313   upb_StringView ret;
1314   char* out;
1315 
1316   ret.size = end - ptr;
1317   while (ptr < end) {
1318     ret.size += (*ptr >= 'A' && *ptr <= 'Z');
1319     ptr++;
1320   }
1321 
1322   out = upb_Arena_Malloc(d->arena, ret.size);
1323   ptr = buf;
1324   ret.data = out;
1325 
1326   while (ptr < end) {
1327     char ch = *ptr++;
1328     if (ch >= 'A' && ch <= 'Z') {
1329       *out++ = '_';
1330       *out++ = ch + 32;
1331     } else if (ch == '_') {
1332       jsondec_err(d, "field mask may not contain '_'");
1333     } else {
1334       *out++ = ch;
1335     }
1336   }
1337 
1338   return ret;
1339 }
1340 
jsondec_fieldmask(jsondec * d,upb_Message * msg,const upb_MessageDef * m)1341 static void jsondec_fieldmask(jsondec* d, upb_Message* msg,
1342                               const upb_MessageDef* m) {
1343   UPB_ASSERT(!upb_Message_IsFrozen(msg));
1344   /* repeated string paths = 1; */
1345   const upb_FieldDef* paths_f = upb_MessageDef_FindFieldByNumber(m, 1);
1346   upb_Array* arr = upb_Message_Mutable(msg, paths_f, d->arena).array;
1347   upb_StringView str = jsondec_string(d);
1348   const char* ptr = str.data;
1349   const char* end = ptr + str.size;
1350   upb_MessageValue val;
1351 
1352   while (ptr < end) {
1353     const char* elem_end = memchr(ptr, ',', end - ptr);
1354     if (elem_end) {
1355       val.str_val = jsondec_mask(d, ptr, elem_end);
1356       ptr = elem_end + 1;
1357     } else {
1358       val.str_val = jsondec_mask(d, ptr, end);
1359       ptr = end;
1360     }
1361     upb_Array_Append(arr, val, d->arena);
1362   }
1363 }
1364 
jsondec_anyfield(jsondec * d,upb_Message * msg,const upb_MessageDef * m)1365 static void jsondec_anyfield(jsondec* d, upb_Message* msg,
1366                              const upb_MessageDef* m) {
1367   UPB_ASSERT(!upb_Message_IsFrozen(msg));
1368   if (upb_MessageDef_WellKnownType(m) == kUpb_WellKnown_Unspecified) {
1369     /* For regular types: {"@type": "[user type]", "f1": <V1>, "f2": <V2>}
1370      * where f1, f2, etc. are the normal fields of this type. */
1371     jsondec_field(d, msg, m);
1372   } else {
1373     /* For well-known types: {"@type": "[well-known type]", "value": <X>}
1374      * where <X> is whatever encoding the WKT normally uses. */
1375     upb_StringView str = jsondec_string(d);
1376     jsondec_entrysep(d);
1377     if (!jsondec_streql(str, "value")) {
1378       jsondec_err(d, "Key for well-known type must be 'value'");
1379     }
1380     jsondec_wellknown(d, msg, m);
1381   }
1382 }
1383 
jsondec_typeurl(jsondec * d,upb_Message * msg,const upb_MessageDef * m)1384 static const upb_MessageDef* jsondec_typeurl(jsondec* d, upb_Message* msg,
1385                                              const upb_MessageDef* m) {
1386   UPB_ASSERT(!upb_Message_IsFrozen(msg));
1387   const upb_FieldDef* type_url_f = upb_MessageDef_FindFieldByNumber(m, 1);
1388   const upb_MessageDef* type_m;
1389   upb_StringView type_url = jsondec_string(d);
1390   const char* end = type_url.data + type_url.size;
1391   const char* ptr = end;
1392   upb_MessageValue val;
1393 
1394   val.str_val = type_url;
1395   upb_Message_SetFieldByDef(msg, type_url_f, val, d->arena);
1396 
1397   /* Find message name after the last '/' */
1398   while (ptr > type_url.data && *--ptr != '/') {
1399   }
1400 
1401   if (ptr == type_url.data || ptr == end) {
1402     jsondec_err(d, "Type url must have at least one '/' and non-empty host");
1403   }
1404 
1405   ptr++;
1406   type_m = upb_DefPool_FindMessageByNameWithSize(d->symtab, ptr, end - ptr);
1407 
1408   if (!type_m) {
1409     jsondec_err(d, "Type was not found");
1410   }
1411 
1412   return type_m;
1413 }
1414 
jsondec_any(jsondec * d,upb_Message * msg,const upb_MessageDef * m)1415 static void jsondec_any(jsondec* d, upb_Message* msg, const upb_MessageDef* m) {
1416   UPB_ASSERT(!upb_Message_IsFrozen(msg));
1417   /* string type_url = 1;
1418    * bytes value = 2; */
1419   const upb_FieldDef* value_f = upb_MessageDef_FindFieldByNumber(m, 2);
1420   upb_Message* any_msg;
1421   const upb_MessageDef* any_m = NULL;
1422   const char* pre_type_data = NULL;
1423   const char* pre_type_end = NULL;
1424   upb_MessageValue encoded;
1425 
1426   jsondec_objstart(d);
1427 
1428   /* Scan looking for "@type", which is not necessarily first. */
1429   while (!any_m && jsondec_objnext(d)) {
1430     const char* start = d->ptr;
1431     upb_StringView name = jsondec_string(d);
1432     jsondec_entrysep(d);
1433     if (jsondec_streql(name, "@type")) {
1434       any_m = jsondec_typeurl(d, msg, m);
1435       if (pre_type_data) {
1436         pre_type_end = start;
1437         while (*pre_type_end != ',') pre_type_end--;
1438       }
1439     } else {
1440       if (!pre_type_data) pre_type_data = start;
1441       jsondec_skipval(d);
1442     }
1443   }
1444 
1445   if (!any_m) {
1446     jsondec_err(d, "Any object didn't contain a '@type' field");
1447   }
1448 
1449   const upb_MiniTable* any_layout = upb_MessageDef_MiniTable(any_m);
1450   any_msg = upb_Message_New(any_layout, d->arena);
1451 
1452   if (pre_type_data) {
1453     size_t len = pre_type_end - pre_type_data + 1;
1454     char* tmp = upb_Arena_Malloc(d->arena, len);
1455     const char* saved_ptr = d->ptr;
1456     const char* saved_end = d->end;
1457     memcpy(tmp, pre_type_data, len - 1);
1458     tmp[len - 1] = '}';
1459     d->ptr = tmp;
1460     d->end = tmp + len;
1461     d->is_first = true;
1462     while (jsondec_objnext(d)) {
1463       jsondec_anyfield(d, any_msg, any_m);
1464     }
1465     d->ptr = saved_ptr;
1466     d->end = saved_end;
1467   }
1468 
1469   while (jsondec_objnext(d)) {
1470     jsondec_anyfield(d, any_msg, any_m);
1471   }
1472 
1473   jsondec_objend(d);
1474 
1475   upb_EncodeStatus status =
1476       upb_Encode(any_msg, upb_MessageDef_MiniTable(any_m), 0, d->arena,
1477                  (char**)&encoded.str_val.data, &encoded.str_val.size);
1478   // TODO: We should fail gracefully here on a bad return status.
1479   UPB_ASSERT(status == kUpb_EncodeStatus_Ok);
1480   upb_Message_SetFieldByDef(msg, value_f, encoded, d->arena);
1481 }
1482 
jsondec_wrapper(jsondec * d,upb_Message * msg,const upb_MessageDef * m)1483 static void jsondec_wrapper(jsondec* d, upb_Message* msg,
1484                             const upb_MessageDef* m) {
1485   UPB_ASSERT(!upb_Message_IsFrozen(msg));
1486   const upb_FieldDef* value_f = upb_MessageDef_FindFieldByNumber(m, 1);
1487   upb_JsonMessageValue val = jsondec_value(d, value_f);
1488   UPB_ASSUME(val.ignore == false);  // Wrapper cannot be an enum.
1489   upb_Message_SetFieldByDef(msg, value_f, val.value, d->arena);
1490 }
1491 
jsondec_wellknown(jsondec * d,upb_Message * msg,const upb_MessageDef * m)1492 static void jsondec_wellknown(jsondec* d, upb_Message* msg,
1493                               const upb_MessageDef* m) {
1494   UPB_ASSERT(!upb_Message_IsFrozen(msg));
1495   switch (upb_MessageDef_WellKnownType(m)) {
1496     case kUpb_WellKnown_Any:
1497       jsondec_any(d, msg, m);
1498       break;
1499     case kUpb_WellKnown_FieldMask:
1500       jsondec_fieldmask(d, msg, m);
1501       break;
1502     case kUpb_WellKnown_Duration:
1503       jsondec_duration(d, msg, m);
1504       break;
1505     case kUpb_WellKnown_Timestamp:
1506       jsondec_timestamp(d, msg, m);
1507       break;
1508     case kUpb_WellKnown_Value:
1509       jsondec_wellknownvalue(d, msg, m);
1510       break;
1511     case kUpb_WellKnown_ListValue:
1512       jsondec_listvalue(d, msg, m);
1513       break;
1514     case kUpb_WellKnown_Struct:
1515       jsondec_struct(d, msg, m);
1516       break;
1517     case kUpb_WellKnown_DoubleValue:
1518     case kUpb_WellKnown_FloatValue:
1519     case kUpb_WellKnown_Int64Value:
1520     case kUpb_WellKnown_UInt64Value:
1521     case kUpb_WellKnown_Int32Value:
1522     case kUpb_WellKnown_UInt32Value:
1523     case kUpb_WellKnown_StringValue:
1524     case kUpb_WellKnown_BytesValue:
1525     case kUpb_WellKnown_BoolValue:
1526       jsondec_wrapper(d, msg, m);
1527       break;
1528     default:
1529       UPB_UNREACHABLE();
1530   }
1531 }
1532 
upb_JsonDecoder_Decode(jsondec * const d,upb_Message * const msg,const upb_MessageDef * const m)1533 static int upb_JsonDecoder_Decode(jsondec* const d, upb_Message* const msg,
1534                                   const upb_MessageDef* const m) {
1535   UPB_ASSERT(!upb_Message_IsFrozen(msg));
1536   if (UPB_SETJMP(d->err)) return kUpb_JsonDecodeResult_Error;
1537 
1538   jsondec_tomsg(d, msg, m);
1539 
1540   // Consume any trailing whitespace before checking if we read the entire
1541   // input.
1542   jsondec_consumews(d);
1543 
1544   if (d->ptr == d->end) {
1545     return d->result;
1546   } else {
1547     jsondec_seterrmsg(d, "unexpected trailing characters");
1548     return kUpb_JsonDecodeResult_Error;
1549   }
1550 }
1551 
upb_JsonDecodeDetectingNonconformance(const char * buf,size_t size,upb_Message * msg,const upb_MessageDef * m,const upb_DefPool * symtab,int options,upb_Arena * arena,upb_Status * status)1552 int upb_JsonDecodeDetectingNonconformance(const char* buf, size_t size,
1553                                           upb_Message* msg,
1554                                           const upb_MessageDef* m,
1555                                           const upb_DefPool* symtab,
1556                                           int options, upb_Arena* arena,
1557                                           upb_Status* status) {
1558   UPB_ASSERT(!upb_Message_IsFrozen(msg));
1559   jsondec d;
1560 
1561   if (size == 0) return true;
1562 
1563   d.ptr = buf;
1564   d.end = buf + size;
1565   d.arena = arena;
1566   d.symtab = symtab;
1567   d.status = status;
1568   d.options = options;
1569   d.depth = 64;
1570   d.result = kUpb_JsonDecodeResult_Ok;
1571   d.line = 1;
1572   d.line_begin = d.ptr;
1573   d.debug_field = NULL;
1574   d.is_first = false;
1575 
1576   return upb_JsonDecoder_Decode(&d, msg, m);
1577 }
1578