• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Amalgamated source file */
2 #define _XOPEN_SOURCE 700
3 #include "upb.h"
4 
5 #if UINTPTR_MAX == 0xffffffff
6 #define UPB_SIZE(size32, size64) size32
7 #else
8 #define UPB_SIZE(size32, size64) size64
9 #endif
10 
11 #define UPB_FIELD_AT(msg, fieldtype, offset) \
12   *(fieldtype*)((const char*)(msg) + offset)
13 
14 #define UPB_READ_ONEOF(msg, fieldtype, offset, case_offset, case_val, default) \
15   UPB_FIELD_AT(msg, int, case_offset) == case_val                              \
16       ? UPB_FIELD_AT(msg, fieldtype, offset)                                   \
17       : default
18 
19 #define UPB_WRITE_ONEOF(msg, fieldtype, offset, value, case_offset, case_val) \
20   UPB_FIELD_AT(msg, int, case_offset) = case_val;                             \
21   UPB_FIELD_AT(msg, fieldtype, offset) = value;
22 /* This file was generated by upbc (the upb compiler) from the input
23  * file:
24  *
25  *     google/protobuf/descriptor.proto
26  *
27  * Do not edit -- your changes will be discarded when the file is
28  * regenerated. */
29 
30 #include <stddef.h>
31 
32 
33 static const upb_msglayout *const google_protobuf_FileDescriptorSet_submsgs[1] = {
34   &google_protobuf_FileDescriptorProto_msginit,
35 };
36 
37 static const upb_msglayout_field google_protobuf_FileDescriptorSet__fields[1] = {
38   {1, UPB_SIZE(0, 0), 0, 0, 11, 3},
39 };
40 
41 const upb_msglayout google_protobuf_FileDescriptorSet_msginit = {
42   &google_protobuf_FileDescriptorSet_submsgs[0],
43   &google_protobuf_FileDescriptorSet__fields[0],
44   UPB_SIZE(4, 8), 1, false,
45 };
46 
47 static const upb_msglayout *const google_protobuf_FileDescriptorProto_submsgs[6] = {
48   &google_protobuf_DescriptorProto_msginit,
49   &google_protobuf_EnumDescriptorProto_msginit,
50   &google_protobuf_FieldDescriptorProto_msginit,
51   &google_protobuf_FileOptions_msginit,
52   &google_protobuf_ServiceDescriptorProto_msginit,
53   &google_protobuf_SourceCodeInfo_msginit,
54 };
55 
56 static const upb_msglayout_field google_protobuf_FileDescriptorProto__fields[12] = {
57   {1, UPB_SIZE(4, 8), 1, 0, 9, 1},
58   {2, UPB_SIZE(12, 24), 2, 0, 9, 1},
59   {3, UPB_SIZE(36, 72), 0, 0, 9, 3},
60   {4, UPB_SIZE(40, 80), 0, 0, 11, 3},
61   {5, UPB_SIZE(44, 88), 0, 1, 11, 3},
62   {6, UPB_SIZE(48, 96), 0, 4, 11, 3},
63   {7, UPB_SIZE(52, 104), 0, 2, 11, 3},
64   {8, UPB_SIZE(28, 56), 4, 3, 11, 1},
65   {9, UPB_SIZE(32, 64), 5, 5, 11, 1},
66   {10, UPB_SIZE(56, 112), 0, 0, 5, 3},
67   {11, UPB_SIZE(60, 120), 0, 0, 5, 3},
68   {12, UPB_SIZE(20, 40), 3, 0, 9, 1},
69 };
70 
71 const upb_msglayout google_protobuf_FileDescriptorProto_msginit = {
72   &google_protobuf_FileDescriptorProto_submsgs[0],
73   &google_protobuf_FileDescriptorProto__fields[0],
74   UPB_SIZE(64, 128), 12, false,
75 };
76 
77 static const upb_msglayout *const google_protobuf_DescriptorProto_submsgs[8] = {
78   &google_protobuf_DescriptorProto_msginit,
79   &google_protobuf_DescriptorProto_ExtensionRange_msginit,
80   &google_protobuf_DescriptorProto_ReservedRange_msginit,
81   &google_protobuf_EnumDescriptorProto_msginit,
82   &google_protobuf_FieldDescriptorProto_msginit,
83   &google_protobuf_MessageOptions_msginit,
84   &google_protobuf_OneofDescriptorProto_msginit,
85 };
86 
87 static const upb_msglayout_field google_protobuf_DescriptorProto__fields[10] = {
88   {1, UPB_SIZE(4, 8), 1, 0, 9, 1},
89   {2, UPB_SIZE(16, 32), 0, 4, 11, 3},
90   {3, UPB_SIZE(20, 40), 0, 0, 11, 3},
91   {4, UPB_SIZE(24, 48), 0, 3, 11, 3},
92   {5, UPB_SIZE(28, 56), 0, 1, 11, 3},
93   {6, UPB_SIZE(32, 64), 0, 4, 11, 3},
94   {7, UPB_SIZE(12, 24), 2, 5, 11, 1},
95   {8, UPB_SIZE(36, 72), 0, 6, 11, 3},
96   {9, UPB_SIZE(40, 80), 0, 2, 11, 3},
97   {10, UPB_SIZE(44, 88), 0, 0, 9, 3},
98 };
99 
100 const upb_msglayout google_protobuf_DescriptorProto_msginit = {
101   &google_protobuf_DescriptorProto_submsgs[0],
102   &google_protobuf_DescriptorProto__fields[0],
103   UPB_SIZE(48, 96), 10, false,
104 };
105 
106 static const upb_msglayout *const google_protobuf_DescriptorProto_ExtensionRange_submsgs[1] = {
107   &google_protobuf_ExtensionRangeOptions_msginit,
108 };
109 
110 static const upb_msglayout_field google_protobuf_DescriptorProto_ExtensionRange__fields[3] = {
111   {1, UPB_SIZE(4, 4), 1, 0, 5, 1},
112   {2, UPB_SIZE(8, 8), 2, 0, 5, 1},
113   {3, UPB_SIZE(12, 16), 3, 0, 11, 1},
114 };
115 
116 const upb_msglayout google_protobuf_DescriptorProto_ExtensionRange_msginit = {
117   &google_protobuf_DescriptorProto_ExtensionRange_submsgs[0],
118   &google_protobuf_DescriptorProto_ExtensionRange__fields[0],
119   UPB_SIZE(16, 24), 3, false,
120 };
121 
122 static const upb_msglayout_field google_protobuf_DescriptorProto_ReservedRange__fields[2] = {
123   {1, UPB_SIZE(4, 4), 1, 0, 5, 1},
124   {2, UPB_SIZE(8, 8), 2, 0, 5, 1},
125 };
126 
127 const upb_msglayout google_protobuf_DescriptorProto_ReservedRange_msginit = {
128   NULL,
129   &google_protobuf_DescriptorProto_ReservedRange__fields[0],
130   UPB_SIZE(12, 12), 2, false,
131 };
132 
133 static const upb_msglayout *const google_protobuf_ExtensionRangeOptions_submsgs[1] = {
134   &google_protobuf_UninterpretedOption_msginit,
135 };
136 
137 static const upb_msglayout_field google_protobuf_ExtensionRangeOptions__fields[1] = {
138   {999, UPB_SIZE(0, 0), 0, 0, 11, 3},
139 };
140 
141 const upb_msglayout google_protobuf_ExtensionRangeOptions_msginit = {
142   &google_protobuf_ExtensionRangeOptions_submsgs[0],
143   &google_protobuf_ExtensionRangeOptions__fields[0],
144   UPB_SIZE(4, 8), 1, false,
145 };
146 
147 static const upb_msglayout *const google_protobuf_FieldDescriptorProto_submsgs[1] = {
148   &google_protobuf_FieldOptions_msginit,
149 };
150 
151 static const upb_msglayout_field google_protobuf_FieldDescriptorProto__fields[10] = {
152   {1, UPB_SIZE(32, 32), 5, 0, 9, 1},
153   {2, UPB_SIZE(40, 48), 6, 0, 9, 1},
154   {3, UPB_SIZE(24, 24), 3, 0, 5, 1},
155   {4, UPB_SIZE(8, 8), 1, 0, 14, 1},
156   {5, UPB_SIZE(16, 16), 2, 0, 14, 1},
157   {6, UPB_SIZE(48, 64), 7, 0, 9, 1},
158   {7, UPB_SIZE(56, 80), 8, 0, 9, 1},
159   {8, UPB_SIZE(72, 112), 10, 0, 11, 1},
160   {9, UPB_SIZE(28, 28), 4, 0, 5, 1},
161   {10, UPB_SIZE(64, 96), 9, 0, 9, 1},
162 };
163 
164 const upb_msglayout google_protobuf_FieldDescriptorProto_msginit = {
165   &google_protobuf_FieldDescriptorProto_submsgs[0],
166   &google_protobuf_FieldDescriptorProto__fields[0],
167   UPB_SIZE(80, 128), 10, false,
168 };
169 
170 static const upb_msglayout *const google_protobuf_OneofDescriptorProto_submsgs[1] = {
171   &google_protobuf_OneofOptions_msginit,
172 };
173 
174 static const upb_msglayout_field google_protobuf_OneofDescriptorProto__fields[2] = {
175   {1, UPB_SIZE(4, 8), 1, 0, 9, 1},
176   {2, UPB_SIZE(12, 24), 2, 0, 11, 1},
177 };
178 
179 const upb_msglayout google_protobuf_OneofDescriptorProto_msginit = {
180   &google_protobuf_OneofDescriptorProto_submsgs[0],
181   &google_protobuf_OneofDescriptorProto__fields[0],
182   UPB_SIZE(16, 32), 2, false,
183 };
184 
185 static const upb_msglayout *const google_protobuf_EnumDescriptorProto_submsgs[3] = {
186   &google_protobuf_EnumDescriptorProto_EnumReservedRange_msginit,
187   &google_protobuf_EnumOptions_msginit,
188   &google_protobuf_EnumValueDescriptorProto_msginit,
189 };
190 
191 static const upb_msglayout_field google_protobuf_EnumDescriptorProto__fields[5] = {
192   {1, UPB_SIZE(4, 8), 1, 0, 9, 1},
193   {2, UPB_SIZE(16, 32), 0, 2, 11, 3},
194   {3, UPB_SIZE(12, 24), 2, 1, 11, 1},
195   {4, UPB_SIZE(20, 40), 0, 0, 11, 3},
196   {5, UPB_SIZE(24, 48), 0, 0, 9, 3},
197 };
198 
199 const upb_msglayout google_protobuf_EnumDescriptorProto_msginit = {
200   &google_protobuf_EnumDescriptorProto_submsgs[0],
201   &google_protobuf_EnumDescriptorProto__fields[0],
202   UPB_SIZE(32, 64), 5, false,
203 };
204 
205 static const upb_msglayout_field google_protobuf_EnumDescriptorProto_EnumReservedRange__fields[2] = {
206   {1, UPB_SIZE(4, 4), 1, 0, 5, 1},
207   {2, UPB_SIZE(8, 8), 2, 0, 5, 1},
208 };
209 
210 const upb_msglayout google_protobuf_EnumDescriptorProto_EnumReservedRange_msginit = {
211   NULL,
212   &google_protobuf_EnumDescriptorProto_EnumReservedRange__fields[0],
213   UPB_SIZE(12, 12), 2, false,
214 };
215 
216 static const upb_msglayout *const google_protobuf_EnumValueDescriptorProto_submsgs[1] = {
217   &google_protobuf_EnumValueOptions_msginit,
218 };
219 
220 static const upb_msglayout_field google_protobuf_EnumValueDescriptorProto__fields[3] = {
221   {1, UPB_SIZE(8, 8), 2, 0, 9, 1},
222   {2, UPB_SIZE(4, 4), 1, 0, 5, 1},
223   {3, UPB_SIZE(16, 24), 3, 0, 11, 1},
224 };
225 
226 const upb_msglayout google_protobuf_EnumValueDescriptorProto_msginit = {
227   &google_protobuf_EnumValueDescriptorProto_submsgs[0],
228   &google_protobuf_EnumValueDescriptorProto__fields[0],
229   UPB_SIZE(24, 32), 3, false,
230 };
231 
232 static const upb_msglayout *const google_protobuf_ServiceDescriptorProto_submsgs[2] = {
233   &google_protobuf_MethodDescriptorProto_msginit,
234   &google_protobuf_ServiceOptions_msginit,
235 };
236 
237 static const upb_msglayout_field google_protobuf_ServiceDescriptorProto__fields[3] = {
238   {1, UPB_SIZE(4, 8), 1, 0, 9, 1},
239   {2, UPB_SIZE(16, 32), 0, 0, 11, 3},
240   {3, UPB_SIZE(12, 24), 2, 1, 11, 1},
241 };
242 
243 const upb_msglayout google_protobuf_ServiceDescriptorProto_msginit = {
244   &google_protobuf_ServiceDescriptorProto_submsgs[0],
245   &google_protobuf_ServiceDescriptorProto__fields[0],
246   UPB_SIZE(24, 48), 3, false,
247 };
248 
249 static const upb_msglayout *const google_protobuf_MethodDescriptorProto_submsgs[1] = {
250   &google_protobuf_MethodOptions_msginit,
251 };
252 
253 static const upb_msglayout_field google_protobuf_MethodDescriptorProto__fields[6] = {
254   {1, UPB_SIZE(4, 8), 3, 0, 9, 1},
255   {2, UPB_SIZE(12, 24), 4, 0, 9, 1},
256   {3, UPB_SIZE(20, 40), 5, 0, 9, 1},
257   {4, UPB_SIZE(28, 56), 6, 0, 11, 1},
258   {5, UPB_SIZE(1, 1), 1, 0, 8, 1},
259   {6, UPB_SIZE(2, 2), 2, 0, 8, 1},
260 };
261 
262 const upb_msglayout google_protobuf_MethodDescriptorProto_msginit = {
263   &google_protobuf_MethodDescriptorProto_submsgs[0],
264   &google_protobuf_MethodDescriptorProto__fields[0],
265   UPB_SIZE(32, 64), 6, false,
266 };
267 
268 static const upb_msglayout *const google_protobuf_FileOptions_submsgs[1] = {
269   &google_protobuf_UninterpretedOption_msginit,
270 };
271 
272 static const upb_msglayout_field google_protobuf_FileOptions__fields[19] = {
273   {1, UPB_SIZE(28, 32), 11, 0, 9, 1},
274   {8, UPB_SIZE(36, 48), 12, 0, 9, 1},
275   {9, UPB_SIZE(8, 8), 1, 0, 14, 1},
276   {10, UPB_SIZE(16, 16), 2, 0, 8, 1},
277   {11, UPB_SIZE(44, 64), 13, 0, 9, 1},
278   {16, UPB_SIZE(17, 17), 3, 0, 8, 1},
279   {17, UPB_SIZE(18, 18), 4, 0, 8, 1},
280   {18, UPB_SIZE(19, 19), 5, 0, 8, 1},
281   {20, UPB_SIZE(20, 20), 6, 0, 8, 1},
282   {23, UPB_SIZE(21, 21), 7, 0, 8, 1},
283   {27, UPB_SIZE(22, 22), 8, 0, 8, 1},
284   {31, UPB_SIZE(23, 23), 9, 0, 8, 1},
285   {36, UPB_SIZE(52, 80), 14, 0, 9, 1},
286   {37, UPB_SIZE(60, 96), 15, 0, 9, 1},
287   {39, UPB_SIZE(68, 112), 16, 0, 9, 1},
288   {40, UPB_SIZE(76, 128), 17, 0, 9, 1},
289   {41, UPB_SIZE(84, 144), 18, 0, 9, 1},
290   {42, UPB_SIZE(24, 24), 10, 0, 8, 1},
291   {999, UPB_SIZE(92, 160), 0, 0, 11, 3},
292 };
293 
294 const upb_msglayout google_protobuf_FileOptions_msginit = {
295   &google_protobuf_FileOptions_submsgs[0],
296   &google_protobuf_FileOptions__fields[0],
297   UPB_SIZE(96, 176), 19, false,
298 };
299 
300 static const upb_msglayout *const google_protobuf_MessageOptions_submsgs[1] = {
301   &google_protobuf_UninterpretedOption_msginit,
302 };
303 
304 static const upb_msglayout_field google_protobuf_MessageOptions__fields[5] = {
305   {1, UPB_SIZE(1, 1), 1, 0, 8, 1},
306   {2, UPB_SIZE(2, 2), 2, 0, 8, 1},
307   {3, UPB_SIZE(3, 3), 3, 0, 8, 1},
308   {7, UPB_SIZE(4, 4), 4, 0, 8, 1},
309   {999, UPB_SIZE(8, 8), 0, 0, 11, 3},
310 };
311 
312 const upb_msglayout google_protobuf_MessageOptions_msginit = {
313   &google_protobuf_MessageOptions_submsgs[0],
314   &google_protobuf_MessageOptions__fields[0],
315   UPB_SIZE(12, 16), 5, false,
316 };
317 
318 static const upb_msglayout *const google_protobuf_FieldOptions_submsgs[1] = {
319   &google_protobuf_UninterpretedOption_msginit,
320 };
321 
322 static const upb_msglayout_field google_protobuf_FieldOptions__fields[7] = {
323   {1, UPB_SIZE(8, 8), 1, 0, 14, 1},
324   {2, UPB_SIZE(24, 24), 3, 0, 8, 1},
325   {3, UPB_SIZE(25, 25), 4, 0, 8, 1},
326   {5, UPB_SIZE(26, 26), 5, 0, 8, 1},
327   {6, UPB_SIZE(16, 16), 2, 0, 14, 1},
328   {10, UPB_SIZE(27, 27), 6, 0, 8, 1},
329   {999, UPB_SIZE(28, 32), 0, 0, 11, 3},
330 };
331 
332 const upb_msglayout google_protobuf_FieldOptions_msginit = {
333   &google_protobuf_FieldOptions_submsgs[0],
334   &google_protobuf_FieldOptions__fields[0],
335   UPB_SIZE(32, 40), 7, false,
336 };
337 
338 static const upb_msglayout *const google_protobuf_OneofOptions_submsgs[1] = {
339   &google_protobuf_UninterpretedOption_msginit,
340 };
341 
342 static const upb_msglayout_field google_protobuf_OneofOptions__fields[1] = {
343   {999, UPB_SIZE(0, 0), 0, 0, 11, 3},
344 };
345 
346 const upb_msglayout google_protobuf_OneofOptions_msginit = {
347   &google_protobuf_OneofOptions_submsgs[0],
348   &google_protobuf_OneofOptions__fields[0],
349   UPB_SIZE(4, 8), 1, false,
350 };
351 
352 static const upb_msglayout *const google_protobuf_EnumOptions_submsgs[1] = {
353   &google_protobuf_UninterpretedOption_msginit,
354 };
355 
356 static const upb_msglayout_field google_protobuf_EnumOptions__fields[3] = {
357   {2, UPB_SIZE(1, 1), 1, 0, 8, 1},
358   {3, UPB_SIZE(2, 2), 2, 0, 8, 1},
359   {999, UPB_SIZE(4, 8), 0, 0, 11, 3},
360 };
361 
362 const upb_msglayout google_protobuf_EnumOptions_msginit = {
363   &google_protobuf_EnumOptions_submsgs[0],
364   &google_protobuf_EnumOptions__fields[0],
365   UPB_SIZE(8, 16), 3, false,
366 };
367 
368 static const upb_msglayout *const google_protobuf_EnumValueOptions_submsgs[1] = {
369   &google_protobuf_UninterpretedOption_msginit,
370 };
371 
372 static const upb_msglayout_field google_protobuf_EnumValueOptions__fields[2] = {
373   {1, UPB_SIZE(1, 1), 1, 0, 8, 1},
374   {999, UPB_SIZE(4, 8), 0, 0, 11, 3},
375 };
376 
377 const upb_msglayout google_protobuf_EnumValueOptions_msginit = {
378   &google_protobuf_EnumValueOptions_submsgs[0],
379   &google_protobuf_EnumValueOptions__fields[0],
380   UPB_SIZE(8, 16), 2, false,
381 };
382 
383 static const upb_msglayout *const google_protobuf_ServiceOptions_submsgs[1] = {
384   &google_protobuf_UninterpretedOption_msginit,
385 };
386 
387 static const upb_msglayout_field google_protobuf_ServiceOptions__fields[2] = {
388   {33, UPB_SIZE(1, 1), 1, 0, 8, 1},
389   {999, UPB_SIZE(4, 8), 0, 0, 11, 3},
390 };
391 
392 const upb_msglayout google_protobuf_ServiceOptions_msginit = {
393   &google_protobuf_ServiceOptions_submsgs[0],
394   &google_protobuf_ServiceOptions__fields[0],
395   UPB_SIZE(8, 16), 2, false,
396 };
397 
398 static const upb_msglayout *const google_protobuf_MethodOptions_submsgs[1] = {
399   &google_protobuf_UninterpretedOption_msginit,
400 };
401 
402 static const upb_msglayout_field google_protobuf_MethodOptions__fields[3] = {
403   {33, UPB_SIZE(16, 16), 2, 0, 8, 1},
404   {34, UPB_SIZE(8, 8), 1, 0, 14, 1},
405   {999, UPB_SIZE(20, 24), 0, 0, 11, 3},
406 };
407 
408 const upb_msglayout google_protobuf_MethodOptions_msginit = {
409   &google_protobuf_MethodOptions_submsgs[0],
410   &google_protobuf_MethodOptions__fields[0],
411   UPB_SIZE(24, 32), 3, false,
412 };
413 
414 static const upb_msglayout *const google_protobuf_UninterpretedOption_submsgs[1] = {
415   &google_protobuf_UninterpretedOption_NamePart_msginit,
416 };
417 
418 static const upb_msglayout_field google_protobuf_UninterpretedOption__fields[7] = {
419   {2, UPB_SIZE(56, 80), 0, 0, 11, 3},
420   {3, UPB_SIZE(32, 32), 4, 0, 9, 1},
421   {4, UPB_SIZE(8, 8), 1, 0, 4, 1},
422   {5, UPB_SIZE(16, 16), 2, 0, 3, 1},
423   {6, UPB_SIZE(24, 24), 3, 0, 1, 1},
424   {7, UPB_SIZE(40, 48), 5, 0, 12, 1},
425   {8, UPB_SIZE(48, 64), 6, 0, 9, 1},
426 };
427 
428 const upb_msglayout google_protobuf_UninterpretedOption_msginit = {
429   &google_protobuf_UninterpretedOption_submsgs[0],
430   &google_protobuf_UninterpretedOption__fields[0],
431   UPB_SIZE(64, 96), 7, false,
432 };
433 
434 static const upb_msglayout_field google_protobuf_UninterpretedOption_NamePart__fields[2] = {
435   {1, UPB_SIZE(4, 8), 2, 0, 9, 2},
436   {2, UPB_SIZE(1, 1), 1, 0, 8, 2},
437 };
438 
439 const upb_msglayout google_protobuf_UninterpretedOption_NamePart_msginit = {
440   NULL,
441   &google_protobuf_UninterpretedOption_NamePart__fields[0],
442   UPB_SIZE(16, 32), 2, false,
443 };
444 
445 static const upb_msglayout *const google_protobuf_SourceCodeInfo_submsgs[1] = {
446   &google_protobuf_SourceCodeInfo_Location_msginit,
447 };
448 
449 static const upb_msglayout_field google_protobuf_SourceCodeInfo__fields[1] = {
450   {1, UPB_SIZE(0, 0), 0, 0, 11, 3},
451 };
452 
453 const upb_msglayout google_protobuf_SourceCodeInfo_msginit = {
454   &google_protobuf_SourceCodeInfo_submsgs[0],
455   &google_protobuf_SourceCodeInfo__fields[0],
456   UPB_SIZE(4, 8), 1, false,
457 };
458 
459 static const upb_msglayout_field google_protobuf_SourceCodeInfo_Location__fields[5] = {
460   {1, UPB_SIZE(20, 40), 0, 0, 5, 3},
461   {2, UPB_SIZE(24, 48), 0, 0, 5, 3},
462   {3, UPB_SIZE(4, 8), 1, 0, 9, 1},
463   {4, UPB_SIZE(12, 24), 2, 0, 9, 1},
464   {6, UPB_SIZE(28, 56), 0, 0, 9, 3},
465 };
466 
467 const upb_msglayout google_protobuf_SourceCodeInfo_Location_msginit = {
468   NULL,
469   &google_protobuf_SourceCodeInfo_Location__fields[0],
470   UPB_SIZE(32, 64), 5, false,
471 };
472 
473 static const upb_msglayout *const google_protobuf_GeneratedCodeInfo_submsgs[1] = {
474   &google_protobuf_GeneratedCodeInfo_Annotation_msginit,
475 };
476 
477 static const upb_msglayout_field google_protobuf_GeneratedCodeInfo__fields[1] = {
478   {1, UPB_SIZE(0, 0), 0, 0, 11, 3},
479 };
480 
481 const upb_msglayout google_protobuf_GeneratedCodeInfo_msginit = {
482   &google_protobuf_GeneratedCodeInfo_submsgs[0],
483   &google_protobuf_GeneratedCodeInfo__fields[0],
484   UPB_SIZE(4, 8), 1, false,
485 };
486 
487 static const upb_msglayout_field google_protobuf_GeneratedCodeInfo_Annotation__fields[4] = {
488   {1, UPB_SIZE(20, 32), 0, 0, 5, 3},
489   {2, UPB_SIZE(12, 16), 3, 0, 9, 1},
490   {3, UPB_SIZE(4, 4), 1, 0, 5, 1},
491   {4, UPB_SIZE(8, 8), 2, 0, 5, 1},
492 };
493 
494 const upb_msglayout google_protobuf_GeneratedCodeInfo_Annotation_msginit = {
495   NULL,
496   &google_protobuf_GeneratedCodeInfo_Annotation__fields[0],
497   UPB_SIZE(24, 48), 4, false,
498 };
499 
500 
501 
502 
503 /* Maps descriptor type -> upb field type.  */
504 const uint8_t upb_desctype_to_fieldtype[] = {
505   UPB_WIRE_TYPE_END_GROUP,  /* ENDGROUP */
506   UPB_TYPE_DOUBLE,          /* DOUBLE */
507   UPB_TYPE_FLOAT,           /* FLOAT */
508   UPB_TYPE_INT64,           /* INT64 */
509   UPB_TYPE_UINT64,          /* UINT64 */
510   UPB_TYPE_INT32,           /* INT32 */
511   UPB_TYPE_UINT64,          /* FIXED64 */
512   UPB_TYPE_UINT32,          /* FIXED32 */
513   UPB_TYPE_BOOL,            /* BOOL */
514   UPB_TYPE_STRING,          /* STRING */
515   UPB_TYPE_MESSAGE,         /* GROUP */
516   UPB_TYPE_MESSAGE,         /* MESSAGE */
517   UPB_TYPE_BYTES,           /* BYTES */
518   UPB_TYPE_UINT32,          /* UINT32 */
519   UPB_TYPE_ENUM,            /* ENUM */
520   UPB_TYPE_INT32,           /* SFIXED32 */
521   UPB_TYPE_INT64,           /* SFIXED64 */
522   UPB_TYPE_INT32,           /* SINT32 */
523   UPB_TYPE_INT64,           /* SINT64 */
524 };
525 
526 /* Data pertaining to the parse. */
527 typedef struct {
528   /* Current decoding pointer.  Points to the beginning of a field until we
529    * have finished decoding the whole field. */
530   const char *ptr;
531 } upb_decstate;
532 
533 /* Data pertaining to a single message frame. */
534 typedef struct {
535   const char *limit;
536   int32_t group_number;  /* 0 if we are not parsing a group. */
537 
538   /* These members are unset for an unknown group frame. */
539   char *msg;
540   const upb_msglayout *m;
541 } upb_decframe;
542 
543 #define CHK(x) if (!(x)) { return false; }
544 
545 static bool upb_skip_unknowngroup(upb_decstate *d, int field_number,
546                                   const char *limit);
547 static bool upb_decode_message(upb_decstate *d, const char *limit,
548                                int group_number, char *msg,
549                                const upb_msglayout *l);
550 
upb_decode_varint(const char ** ptr,const char * limit,uint64_t * val)551 static bool upb_decode_varint(const char **ptr, const char *limit,
552                               uint64_t *val) {
553   uint8_t byte;
554   int bitpos = 0;
555   const char *p = *ptr;
556   *val = 0;
557 
558   do {
559     CHK(bitpos < 70 && p < limit);
560     byte = *p;
561     *val |= (uint64_t)(byte & 0x7F) << bitpos;
562     p++;
563     bitpos += 7;
564   } while (byte & 0x80);
565 
566   *ptr = p;
567   return true;
568 }
569 
upb_decode_varint32(const char ** ptr,const char * limit,uint32_t * val)570 static bool upb_decode_varint32(const char **ptr, const char *limit,
571                                 uint32_t *val) {
572   uint64_t u64;
573   CHK(upb_decode_varint(ptr, limit, &u64) && u64 <= UINT32_MAX);
574   *val = u64;
575   return true;
576 }
577 
upb_decode_64bit(const char ** ptr,const char * limit,uint64_t * val)578 static bool upb_decode_64bit(const char **ptr, const char *limit,
579                              uint64_t *val) {
580   CHK(limit - *ptr >= 8);
581   memcpy(val, *ptr, 8);
582   *ptr += 8;
583   return true;
584 }
585 
upb_decode_32bit(const char ** ptr,const char * limit,uint32_t * val)586 static bool upb_decode_32bit(const char **ptr, const char *limit,
587                              uint32_t *val) {
588   CHK(limit - *ptr >= 4);
589   memcpy(val, *ptr, 4);
590   *ptr += 4;
591   return true;
592 }
593 
upb_decode_tag(const char ** ptr,const char * limit,int * field_number,int * wire_type)594 static bool upb_decode_tag(const char **ptr, const char *limit,
595                            int *field_number, int *wire_type) {
596   uint32_t tag = 0;
597   CHK(upb_decode_varint32(ptr, limit, &tag));
598   *field_number = tag >> 3;
599   *wire_type = tag & 7;
600   return true;
601 }
602 
upb_zzdecode_32(uint32_t n)603 static int32_t upb_zzdecode_32(uint32_t n) {
604   return (n >> 1) ^ -(int32_t)(n & 1);
605 }
606 
upb_zzdecode_64(uint64_t n)607 static int64_t upb_zzdecode_64(uint64_t n) {
608   return (n >> 1) ^ -(int64_t)(n & 1);
609 }
610 
upb_decode_string(const char ** ptr,const char * limit,upb_strview * val)611 static bool upb_decode_string(const char **ptr, const char *limit,
612                               upb_strview *val) {
613   uint32_t len;
614 
615   CHK(upb_decode_varint32(ptr, limit, &len) &&
616       len < INT32_MAX &&
617       limit - *ptr >= (int32_t)len);
618 
619   *val = upb_strview_make(*ptr, len);
620   *ptr += len;
621   return true;
622 }
623 
upb_set32(void * msg,size_t ofs,uint32_t val)624 static void upb_set32(void *msg, size_t ofs, uint32_t val) {
625   memcpy((char*)msg + ofs, &val, sizeof(val));
626 }
627 
upb_append_unknown(upb_decstate * d,upb_decframe * frame,const char * start)628 static bool upb_append_unknown(upb_decstate *d, upb_decframe *frame,
629                                const char *start) {
630   upb_msg_addunknown(frame->msg, start, d->ptr - start);
631   return true;
632 }
633 
upb_skip_unknownfielddata(upb_decstate * d,upb_decframe * frame,int field_number,int wire_type)634 static bool upb_skip_unknownfielddata(upb_decstate *d, upb_decframe *frame,
635                                       int field_number, int wire_type) {
636   switch (wire_type) {
637     case UPB_WIRE_TYPE_VARINT: {
638       uint64_t val;
639       return upb_decode_varint(&d->ptr, frame->limit, &val);
640     }
641     case UPB_WIRE_TYPE_32BIT: {
642       uint32_t val;
643       return upb_decode_32bit(&d->ptr, frame->limit, &val);
644     }
645     case UPB_WIRE_TYPE_64BIT: {
646       uint64_t val;
647       return upb_decode_64bit(&d->ptr, frame->limit, &val);
648     }
649     case UPB_WIRE_TYPE_DELIMITED: {
650       upb_strview val;
651       return upb_decode_string(&d->ptr, frame->limit, &val);
652     }
653     case UPB_WIRE_TYPE_START_GROUP:
654       return upb_skip_unknowngroup(d, field_number, frame->limit);
655     case UPB_WIRE_TYPE_END_GROUP:
656       CHK(field_number == frame->group_number);
657       frame->limit = d->ptr;
658       return true;
659   }
660   return false;
661 }
662 
upb_array_grow(upb_array * arr,size_t elements)663 static bool upb_array_grow(upb_array *arr, size_t elements) {
664   size_t needed = arr->len + elements;
665   size_t new_size = UPB_MAX(arr->size, 8);
666   size_t new_bytes;
667   size_t old_bytes;
668   void *new_data;
669   upb_alloc *alloc = upb_arena_alloc(arr->arena);
670 
671   while (new_size < needed) {
672     new_size *= 2;
673   }
674 
675   old_bytes = arr->len * arr->element_size;
676   new_bytes = new_size * arr->element_size;
677   new_data = upb_realloc(alloc, arr->data, old_bytes, new_bytes);
678   CHK(new_data);
679 
680   arr->data = new_data;
681   arr->size = new_size;
682   return true;
683 }
684 
upb_array_reserve(upb_array * arr,size_t elements)685 static void *upb_array_reserve(upb_array *arr, size_t elements) {
686   if (arr->size - arr->len < elements) {
687     CHK(upb_array_grow(arr, elements));
688   }
689   return (char*)arr->data + (arr->len * arr->element_size);
690 }
691 
upb_array_add(upb_array * arr,size_t elements)692 static void *upb_array_add(upb_array *arr, size_t elements) {
693   void *ret = upb_array_reserve(arr, elements);
694   arr->len += elements;
695   return ret;
696 }
697 
upb_getarr(upb_decframe * frame,const upb_msglayout_field * field)698 static upb_array *upb_getarr(upb_decframe *frame,
699                              const upb_msglayout_field *field) {
700   UPB_ASSERT(field->label == UPB_LABEL_REPEATED);
701   return *(upb_array**)&frame->msg[field->offset];
702 }
703 
upb_getorcreatearr(upb_decframe * frame,const upb_msglayout_field * field)704 static upb_array *upb_getorcreatearr(upb_decframe *frame,
705                                      const upb_msglayout_field *field) {
706   upb_array *arr = upb_getarr(frame, field);
707 
708   if (!arr) {
709     upb_fieldtype_t type = upb_desctype_to_fieldtype[field->descriptortype];
710     arr = upb_array_new(type, upb_msg_arena(frame->msg));
711     if (!arr) {
712       return NULL;
713     }
714     *(upb_array**)&frame->msg[field->offset] = arr;
715   }
716 
717   return arr;
718 }
719 
upb_sethasbit(upb_decframe * frame,const upb_msglayout_field * field)720 static void upb_sethasbit(upb_decframe *frame,
721                           const upb_msglayout_field *field) {
722   int32_t hasbit = field->presence;
723   UPB_ASSERT(field->presence > 0);
724   frame->msg[hasbit / 8] |= (1 << (hasbit % 8));
725 }
726 
upb_setoneofcase(upb_decframe * frame,const upb_msglayout_field * field)727 static void upb_setoneofcase(upb_decframe *frame,
728                              const upb_msglayout_field *field) {
729   UPB_ASSERT(field->presence < 0);
730   upb_set32(frame->msg, ~field->presence, field->number);
731 }
732 
upb_decode_prepareslot(upb_decframe * frame,const upb_msglayout_field * field)733 static char *upb_decode_prepareslot(upb_decframe *frame,
734                                     const upb_msglayout_field *field) {
735   char *field_mem = frame->msg + field->offset;
736   upb_array *arr;
737 
738   if (field->label == UPB_LABEL_REPEATED) {
739     arr = upb_getorcreatearr(frame, field);
740     field_mem = upb_array_reserve(arr, 1);
741   }
742 
743   return field_mem;
744 }
745 
upb_decode_setpresent(upb_decframe * frame,const upb_msglayout_field * field)746 static void upb_decode_setpresent(upb_decframe *frame,
747                                   const upb_msglayout_field *field) {
748   if (field->label == UPB_LABEL_REPEATED) {
749    upb_array *arr = upb_getarr(frame, field);
750    UPB_ASSERT(arr->len < arr->size);
751    arr->len++;
752   } else if (field->presence < 0) {
753     upb_setoneofcase(frame, field);
754   } else if (field->presence > 0) {
755     upb_sethasbit(frame, field);
756   }
757 }
758 
upb_decode_submsg(upb_decstate * d,upb_decframe * frame,const char * limit,const upb_msglayout_field * field,int group_number)759 static bool upb_decode_submsg(upb_decstate *d, upb_decframe *frame,
760                               const char *limit,
761                               const upb_msglayout_field *field,
762                               int group_number) {
763   char *submsg_slot = upb_decode_prepareslot(frame, field);
764   char *submsg = *(void **)submsg_slot;
765   const upb_msglayout *subm;
766 
767   subm = frame->m->submsgs[field->submsg_index];
768   UPB_ASSERT(subm);
769 
770   if (!submsg) {
771     submsg = upb_msg_new(subm, upb_msg_arena(frame->msg));
772     CHK(submsg);
773     *(void**)submsg_slot = submsg;
774   }
775 
776   upb_decode_message(d, limit, group_number, submsg, subm);
777 
778   return true;
779 }
780 
upb_decode_varintfield(upb_decstate * d,upb_decframe * frame,const char * field_start,const upb_msglayout_field * field)781 static bool upb_decode_varintfield(upb_decstate *d, upb_decframe *frame,
782                                    const char *field_start,
783                                    const upb_msglayout_field *field) {
784   uint64_t val;
785   void *field_mem;
786 
787   field_mem = upb_decode_prepareslot(frame, field);
788   CHK(field_mem);
789   CHK(upb_decode_varint(&d->ptr, frame->limit, &val));
790 
791   switch ((upb_descriptortype_t)field->descriptortype) {
792     case UPB_DESCRIPTOR_TYPE_INT64:
793     case UPB_DESCRIPTOR_TYPE_UINT64:
794       memcpy(field_mem, &val, sizeof(val));
795       break;
796     case UPB_DESCRIPTOR_TYPE_INT32:
797     case UPB_DESCRIPTOR_TYPE_UINT32:
798     case UPB_DESCRIPTOR_TYPE_ENUM: {
799       uint32_t val32 = val;
800       memcpy(field_mem, &val32, sizeof(val32));
801       break;
802     }
803     case UPB_DESCRIPTOR_TYPE_BOOL: {
804       bool valbool = val != 0;
805       memcpy(field_mem, &valbool, sizeof(valbool));
806       break;
807     }
808     case UPB_DESCRIPTOR_TYPE_SINT32: {
809       int32_t decoded = upb_zzdecode_32(val);
810       memcpy(field_mem, &decoded, sizeof(decoded));
811       break;
812     }
813     case UPB_DESCRIPTOR_TYPE_SINT64: {
814       int64_t decoded = upb_zzdecode_64(val);
815       memcpy(field_mem, &decoded, sizeof(decoded));
816       break;
817     }
818     default:
819       return upb_append_unknown(d, frame, field_start);
820   }
821 
822   upb_decode_setpresent(frame, field);
823   return true;
824 }
825 
upb_decode_64bitfield(upb_decstate * d,upb_decframe * frame,const char * field_start,const upb_msglayout_field * field)826 static bool upb_decode_64bitfield(upb_decstate *d, upb_decframe *frame,
827                                   const char *field_start,
828                                   const upb_msglayout_field *field) {
829   void *field_mem;
830   uint64_t val;
831 
832   field_mem = upb_decode_prepareslot(frame, field);
833   CHK(field_mem);
834   CHK(upb_decode_64bit(&d->ptr, frame->limit, &val));
835 
836   switch ((upb_descriptortype_t)field->descriptortype) {
837     case UPB_DESCRIPTOR_TYPE_DOUBLE:
838     case UPB_DESCRIPTOR_TYPE_FIXED64:
839     case UPB_DESCRIPTOR_TYPE_SFIXED64:
840       memcpy(field_mem, &val, sizeof(val));
841       break;
842     default:
843       return upb_append_unknown(d, frame, field_start);
844   }
845 
846   upb_decode_setpresent(frame, field);
847   return true;
848 }
849 
upb_decode_32bitfield(upb_decstate * d,upb_decframe * frame,const char * field_start,const upb_msglayout_field * field)850 static bool upb_decode_32bitfield(upb_decstate *d, upb_decframe *frame,
851                                   const char *field_start,
852                                   const upb_msglayout_field *field) {
853   void *field_mem;
854   uint32_t val;
855 
856   field_mem = upb_decode_prepareslot(frame, field);
857   CHK(field_mem);
858   CHK(upb_decode_32bit(&d->ptr, frame->limit, &val));
859 
860   switch ((upb_descriptortype_t)field->descriptortype) {
861     case UPB_DESCRIPTOR_TYPE_FLOAT:
862     case UPB_DESCRIPTOR_TYPE_FIXED32:
863     case UPB_DESCRIPTOR_TYPE_SFIXED32:
864       memcpy(field_mem, &val, sizeof(val));
865       break;
866     default:
867       return upb_append_unknown(d, frame, field_start);
868   }
869 
870   upb_decode_setpresent(frame, field);
871   return true;
872 }
873 
upb_decode_fixedpacked(upb_array * arr,upb_strview data,int elem_size)874 static bool upb_decode_fixedpacked(upb_array *arr, upb_strview data,
875                                    int elem_size) {
876   int elements = data.size / elem_size;
877   void *field_mem;
878 
879   CHK((size_t)(elements * elem_size) == data.size);
880   field_mem = upb_array_add(arr, elements);
881   CHK(field_mem);
882   memcpy(field_mem, data.data, data.size);
883   return true;
884 }
885 
upb_decode_toarray(upb_decstate * d,upb_decframe * frame,const char * field_start,const upb_msglayout_field * field,upb_strview val)886 static bool upb_decode_toarray(upb_decstate *d, upb_decframe *frame,
887                                const char *field_start,
888                                const upb_msglayout_field *field,
889                                upb_strview val) {
890   upb_array *arr = upb_getorcreatearr(frame, field);
891 
892 #define VARINT_CASE(ctype, decode) { \
893   const char *ptr = val.data; \
894   const char *limit = ptr + val.size; \
895   while (ptr < limit) { \
896     uint64_t val; \
897     void *field_mem; \
898     ctype decoded; \
899     CHK(upb_decode_varint(&ptr, limit, &val)); \
900     decoded = (decode)(val); \
901     field_mem = upb_array_add(arr, 1); \
902     CHK(field_mem); \
903     memcpy(field_mem, &decoded, sizeof(ctype)); \
904   } \
905   return true; \
906 }
907 
908   switch ((upb_descriptortype_t)field->descriptortype) {
909     case UPB_DESCRIPTOR_TYPE_STRING:
910     case UPB_DESCRIPTOR_TYPE_BYTES: {
911       void *field_mem = upb_array_add(arr, 1);
912       CHK(field_mem);
913       memcpy(field_mem, &val, sizeof(val));
914       return true;
915     }
916     case UPB_DESCRIPTOR_TYPE_FLOAT:
917     case UPB_DESCRIPTOR_TYPE_FIXED32:
918     case UPB_DESCRIPTOR_TYPE_SFIXED32:
919       return upb_decode_fixedpacked(arr, val, sizeof(int32_t));
920     case UPB_DESCRIPTOR_TYPE_DOUBLE:
921     case UPB_DESCRIPTOR_TYPE_FIXED64:
922     case UPB_DESCRIPTOR_TYPE_SFIXED64:
923       return upb_decode_fixedpacked(arr, val, sizeof(int64_t));
924     case UPB_DESCRIPTOR_TYPE_INT32:
925     case UPB_DESCRIPTOR_TYPE_UINT32:
926     case UPB_DESCRIPTOR_TYPE_ENUM:
927       /* TODO: proto2 enum field that isn't in the enum. */
928       VARINT_CASE(uint32_t, uint32_t);
929     case UPB_DESCRIPTOR_TYPE_INT64:
930     case UPB_DESCRIPTOR_TYPE_UINT64:
931       VARINT_CASE(uint64_t, uint64_t);
932     case UPB_DESCRIPTOR_TYPE_BOOL:
933       VARINT_CASE(bool, bool);
934     case UPB_DESCRIPTOR_TYPE_SINT32:
935       VARINT_CASE(int32_t, upb_zzdecode_32);
936     case UPB_DESCRIPTOR_TYPE_SINT64:
937       VARINT_CASE(int64_t, upb_zzdecode_64);
938     case UPB_DESCRIPTOR_TYPE_MESSAGE: {
939       const upb_msglayout *subm;
940       char *submsg;
941       void *field_mem;
942 
943       CHK(val.size <= (size_t)(frame->limit - val.data));
944       d->ptr -= val.size;
945 
946       /* Create elemente message. */
947       subm = frame->m->submsgs[field->submsg_index];
948       UPB_ASSERT(subm);
949 
950       submsg = upb_msg_new(subm, upb_msg_arena(frame->msg));
951       CHK(submsg);
952 
953       field_mem = upb_array_add(arr, 1);
954       CHK(field_mem);
955       *(void**)field_mem = submsg;
956 
957       return upb_decode_message(
958           d, val.data + val.size, frame->group_number, submsg, subm);
959     }
960     case UPB_DESCRIPTOR_TYPE_GROUP:
961       return upb_append_unknown(d, frame, field_start);
962   }
963 #undef VARINT_CASE
964   UPB_UNREACHABLE();
965 }
966 
upb_decode_delimitedfield(upb_decstate * d,upb_decframe * frame,const char * field_start,const upb_msglayout_field * field)967 static bool upb_decode_delimitedfield(upb_decstate *d, upb_decframe *frame,
968                                       const char *field_start,
969                                       const upb_msglayout_field *field) {
970   upb_strview val;
971 
972   CHK(upb_decode_string(&d->ptr, frame->limit, &val));
973 
974   if (field->label == UPB_LABEL_REPEATED) {
975     return upb_decode_toarray(d, frame, field_start, field, val);
976   } else {
977     switch ((upb_descriptortype_t)field->descriptortype) {
978       case UPB_DESCRIPTOR_TYPE_STRING:
979       case UPB_DESCRIPTOR_TYPE_BYTES: {
980         void *field_mem = upb_decode_prepareslot(frame, field);
981         CHK(field_mem);
982         memcpy(field_mem, &val, sizeof(val));
983         break;
984       }
985       case UPB_DESCRIPTOR_TYPE_MESSAGE:
986         CHK(val.size <= (size_t)(frame->limit - val.data));
987         d->ptr -= val.size;
988         CHK(upb_decode_submsg(d, frame, val.data + val.size, field, 0));
989         break;
990       default:
991         /* TODO(haberman): should we accept the last element of a packed? */
992         return upb_append_unknown(d, frame, field_start);
993     }
994     upb_decode_setpresent(frame, field);
995     return true;
996   }
997 }
998 
upb_find_field(const upb_msglayout * l,uint32_t field_number)999 static const upb_msglayout_field *upb_find_field(const upb_msglayout *l,
1000                                                  uint32_t field_number) {
1001   /* Lots of optimization opportunities here. */
1002   int i;
1003   for (i = 0; i < l->field_count; i++) {
1004     if (l->fields[i].number == field_number) {
1005       return &l->fields[i];
1006     }
1007   }
1008 
1009   return NULL;  /* Unknown field. */
1010 }
1011 
upb_decode_field(upb_decstate * d,upb_decframe * frame)1012 static bool upb_decode_field(upb_decstate *d, upb_decframe *frame) {
1013   int field_number;
1014   int wire_type;
1015   const char *field_start = d->ptr;
1016   const upb_msglayout_field *field;
1017 
1018   CHK(upb_decode_tag(&d->ptr, frame->limit, &field_number, &wire_type));
1019   field = upb_find_field(frame->m, field_number);
1020 
1021   if (field) {
1022     switch (wire_type) {
1023       case UPB_WIRE_TYPE_VARINT:
1024         return upb_decode_varintfield(d, frame, field_start, field);
1025       case UPB_WIRE_TYPE_32BIT:
1026         return upb_decode_32bitfield(d, frame, field_start, field);
1027       case UPB_WIRE_TYPE_64BIT:
1028         return upb_decode_64bitfield(d, frame, field_start, field);
1029       case UPB_WIRE_TYPE_DELIMITED:
1030         return upb_decode_delimitedfield(d, frame, field_start, field);
1031       case UPB_WIRE_TYPE_START_GROUP:
1032         CHK(field->descriptortype == UPB_DESCRIPTOR_TYPE_GROUP);
1033         return upb_decode_submsg(d, frame, frame->limit, field, field_number);
1034       case UPB_WIRE_TYPE_END_GROUP:
1035         CHK(frame->group_number == field_number)
1036         frame->limit = d->ptr;
1037         return true;
1038       default:
1039         return false;
1040     }
1041   } else {
1042     CHK(field_number != 0);
1043     CHK(upb_skip_unknownfielddata(d, frame, field_number, wire_type));
1044     CHK(upb_append_unknown(d, frame, field_start));
1045     return true;
1046   }
1047 }
1048 
upb_skip_unknowngroup(upb_decstate * d,int field_number,const char * limit)1049 static bool upb_skip_unknowngroup(upb_decstate *d, int field_number,
1050                                   const char *limit) {
1051   upb_decframe frame;
1052   frame.msg = NULL;
1053   frame.m = NULL;
1054   frame.group_number = field_number;
1055   frame.limit = limit;
1056 
1057   while (d->ptr < frame.limit) {
1058     int wire_type;
1059     int field_number;
1060 
1061     CHK(upb_decode_tag(&d->ptr, frame.limit, &field_number, &wire_type));
1062     CHK(upb_skip_unknownfielddata(d, &frame, field_number, wire_type));
1063   }
1064 
1065   return true;
1066 }
1067 
upb_decode_message(upb_decstate * d,const char * limit,int group_number,char * msg,const upb_msglayout * l)1068 static bool upb_decode_message(upb_decstate *d, const char *limit,
1069                                int group_number, char *msg,
1070                                const upb_msglayout *l) {
1071   upb_decframe frame;
1072   frame.group_number = group_number;
1073   frame.limit = limit;
1074   frame.msg = msg;
1075   frame.m = l;
1076 
1077   while (d->ptr < frame.limit) {
1078     CHK(upb_decode_field(d, &frame));
1079   }
1080 
1081   return true;
1082 }
1083 
upb_decode(upb_strview buf,void * msg,const upb_msglayout * l)1084 bool upb_decode(upb_strview buf, void *msg, const upb_msglayout *l) {
1085   upb_decstate state;
1086   state.ptr = buf.data;
1087 
1088   return upb_decode_message(&state, buf.data + buf.size, 0, msg, l);
1089 }
1090 
1091 #undef CHK
1092 
1093 
1094 #include <ctype.h>
1095 #include <stdlib.h>
1096 #include <string.h>
1097 
1098 typedef struct {
1099   size_t len;
1100   char str[1];  /* Null-terminated string data follows. */
1101 } str_t;
1102 
newstr(const char * data,size_t len)1103 static str_t *newstr(const char *data, size_t len) {
1104   str_t *ret = upb_gmalloc(sizeof(*ret) + len);
1105   if (!ret) return NULL;
1106   ret->len = len;
1107   memcpy(ret->str, data, len);
1108   ret->str[len] = '\0';
1109   return ret;
1110 }
1111 
freestr(str_t * s)1112 static void freestr(str_t *s) { upb_gfree(s); }
1113 
1114 /* isalpha() etc. from <ctype.h> are locale-dependent, which we don't want. */
upb_isbetween(char c,char low,char high)1115 static bool upb_isbetween(char c, char low, char high) {
1116   return c >= low && c <= high;
1117 }
1118 
upb_isletter(char c)1119 static bool upb_isletter(char c) {
1120   return upb_isbetween(c, 'A', 'Z') || upb_isbetween(c, 'a', 'z') || c == '_';
1121 }
1122 
upb_isalphanum(char c)1123 static bool upb_isalphanum(char c) {
1124   return upb_isletter(c) || upb_isbetween(c, '0', '9');
1125 }
1126 
upb_isident(const char * str,size_t len,bool full,upb_status * s)1127 static bool upb_isident(const char *str, size_t len, bool full, upb_status *s) {
1128   bool start = true;
1129   size_t i;
1130   for (i = 0; i < len; i++) {
1131     char c = str[i];
1132     if (c == '.') {
1133       if (start || !full) {
1134         upb_status_seterrf(s, "invalid name: unexpected '.' (%s)", str);
1135         return false;
1136       }
1137       start = true;
1138     } else if (start) {
1139       if (!upb_isletter(c)) {
1140         upb_status_seterrf(
1141             s, "invalid name: path components must start with a letter (%s)",
1142             str);
1143         return false;
1144       }
1145       start = false;
1146     } else {
1147       if (!upb_isalphanum(c)) {
1148         upb_status_seterrf(s, "invalid name: non-alphanumeric character (%s)",
1149                            str);
1150         return false;
1151       }
1152     }
1153   }
1154   return !start;
1155 }
1156 
upb_isoneof(const upb_refcounted * def)1157 static bool upb_isoneof(const upb_refcounted *def) {
1158   return def->vtbl == &upb_oneofdef_vtbl;
1159 }
1160 
upb_isfield(const upb_refcounted * def)1161 static bool upb_isfield(const upb_refcounted *def) {
1162   return def->vtbl == &upb_fielddef_vtbl;
1163 }
1164 
upb_trygetoneof(const upb_refcounted * def)1165 static const upb_oneofdef *upb_trygetoneof(const upb_refcounted *def) {
1166   return upb_isoneof(def) ? (const upb_oneofdef*)def : NULL;
1167 }
1168 
upb_trygetfield(const upb_refcounted * def)1169 static const upb_fielddef *upb_trygetfield(const upb_refcounted *def) {
1170   return upb_isfield(def) ? (const upb_fielddef*)def : NULL;
1171 }
1172 
1173 
1174 /* upb_def ********************************************************************/
1175 
upb_def_type(const upb_def * d)1176 upb_deftype_t upb_def_type(const upb_def *d) { return d->type; }
1177 
upb_def_fullname(const upb_def * d)1178 const char *upb_def_fullname(const upb_def *d) { return d->fullname; }
1179 
upb_def_name(const upb_def * d)1180 const char *upb_def_name(const upb_def *d) {
1181   const char *p;
1182 
1183   if (d->fullname == NULL) {
1184     return NULL;
1185   } else if ((p = strrchr(d->fullname, '.')) == NULL) {
1186     /* No '.' in the name, return the full string. */
1187     return d->fullname;
1188   } else {
1189     /* Return one past the last '.'. */
1190     return p + 1;
1191   }
1192 }
1193 
upb_def_setfullname(upb_def * def,const char * fullname,upb_status * s)1194 bool upb_def_setfullname(upb_def *def, const char *fullname, upb_status *s) {
1195   UPB_ASSERT(!upb_def_isfrozen(def));
1196   if (!upb_isident(fullname, strlen(fullname), true, s)) {
1197     return false;
1198   }
1199 
1200   fullname = upb_gstrdup(fullname);
1201   if (!fullname) {
1202     upb_upberr_setoom(s);
1203     return false;
1204   }
1205 
1206   upb_gfree((void*)def->fullname);
1207   def->fullname = fullname;
1208   return true;
1209 }
1210 
upb_def_file(const upb_def * d)1211 const upb_filedef *upb_def_file(const upb_def *d) { return d->file; }
1212 
upb_def_init(upb_def * def,upb_deftype_t type,const struct upb_refcounted_vtbl * vtbl,const void * owner)1213 static bool upb_def_init(upb_def *def, upb_deftype_t type,
1214                          const struct upb_refcounted_vtbl *vtbl,
1215                          const void *owner) {
1216   if (!upb_refcounted_init(upb_def_upcast_mutable(def), vtbl, owner)) return false;
1217   def->type = type;
1218   def->fullname = NULL;
1219   def->came_from_user = false;
1220   def->file = NULL;
1221   return true;
1222 }
1223 
upb_def_uninit(upb_def * def)1224 static void upb_def_uninit(upb_def *def) {
1225   upb_gfree((void*)def->fullname);
1226 }
1227 
msgdef_name(const upb_msgdef * m)1228 static const char *msgdef_name(const upb_msgdef *m) {
1229   const char *name = upb_def_fullname(upb_msgdef_upcast(m));
1230   return name ? name : "(anonymous)";
1231 }
1232 
upb_validate_field(upb_fielddef * f,upb_status * s)1233 static bool upb_validate_field(upb_fielddef *f, upb_status *s) {
1234   if (upb_fielddef_name(f) == NULL || upb_fielddef_number(f) == 0) {
1235     upb_status_seterrmsg(s, "fielddef must have name and number set");
1236     return false;
1237   }
1238 
1239   if (!f->type_is_set_) {
1240     upb_status_seterrmsg(s, "fielddef type was not initialized");
1241     return false;
1242   }
1243 
1244   if (upb_fielddef_lazy(f) &&
1245       upb_fielddef_descriptortype(f) != UPB_DESCRIPTOR_TYPE_MESSAGE) {
1246     upb_status_seterrmsg(s,
1247                          "only length-delimited submessage fields may be lazy");
1248     return false;
1249   }
1250 
1251   if (upb_fielddef_hassubdef(f)) {
1252     const upb_def *subdef;
1253 
1254     if (f->subdef_is_symbolic) {
1255       upb_status_seterrf(s, "field '%s.%s' has not been resolved",
1256                          msgdef_name(f->msg.def), upb_fielddef_name(f));
1257       return false;
1258     }
1259 
1260     subdef = upb_fielddef_subdef(f);
1261     if (subdef == NULL) {
1262       upb_status_seterrf(s, "field %s.%s is missing required subdef",
1263                          msgdef_name(f->msg.def), upb_fielddef_name(f));
1264       return false;
1265     }
1266 
1267     if (!upb_def_isfrozen(subdef) && !subdef->came_from_user) {
1268       upb_status_seterrf(s,
1269                          "subdef of field %s.%s is not frozen or being frozen",
1270                          msgdef_name(f->msg.def), upb_fielddef_name(f));
1271       return false;
1272     }
1273   }
1274 
1275   if (upb_fielddef_type(f) == UPB_TYPE_ENUM) {
1276     bool has_default_name = upb_fielddef_enumhasdefaultstr(f);
1277     bool has_default_number = upb_fielddef_enumhasdefaultint32(f);
1278 
1279     /* Previously verified by upb_validate_enumdef(). */
1280     UPB_ASSERT(upb_enumdef_numvals(upb_fielddef_enumsubdef(f)) > 0);
1281 
1282     /* We've already validated that we have an associated enumdef and that it
1283      * has at least one member, so at least one of these should be true.
1284      * Because if the user didn't set anything, we'll pick up the enum's
1285      * default, but if the user *did* set something we should at least pick up
1286      * the one they set (int32 or string). */
1287     UPB_ASSERT(has_default_name || has_default_number);
1288 
1289     if (!has_default_name) {
1290       upb_status_seterrf(s,
1291                          "enum default for field %s.%s (%d) is not in the enum",
1292                          msgdef_name(f->msg.def), upb_fielddef_name(f),
1293                          upb_fielddef_defaultint32(f));
1294       return false;
1295     }
1296 
1297     if (!has_default_number) {
1298       upb_status_seterrf(s,
1299                          "enum default for field %s.%s (%s) is not in the enum",
1300                          msgdef_name(f->msg.def), upb_fielddef_name(f),
1301                          upb_fielddef_defaultstr(f, NULL));
1302       return false;
1303     }
1304 
1305     /* Lift the effective numeric default into the field's default slot, in case
1306      * we were only getting it "by reference" from the enumdef. */
1307     upb_fielddef_setdefaultint32(f, upb_fielddef_defaultint32(f));
1308   }
1309 
1310   /* Ensure that MapEntry submessages only appear as repeated fields, not
1311    * optional/required (singular) fields. */
1312   if (upb_fielddef_type(f) == UPB_TYPE_MESSAGE &&
1313       upb_fielddef_msgsubdef(f) != NULL) {
1314     const upb_msgdef *subdef = upb_fielddef_msgsubdef(f);
1315     if (upb_msgdef_mapentry(subdef) && !upb_fielddef_isseq(f)) {
1316       upb_status_seterrf(s,
1317                          "Field %s refers to mapentry message but is not "
1318                          "a repeated field",
1319                          upb_fielddef_name(f) ? upb_fielddef_name(f) :
1320                          "(unnamed)");
1321       return false;
1322     }
1323   }
1324 
1325   return true;
1326 }
1327 
upb_validate_enumdef(const upb_enumdef * e,upb_status * s)1328 static bool upb_validate_enumdef(const upb_enumdef *e, upb_status *s) {
1329   if (upb_enumdef_numvals(e) == 0) {
1330     upb_status_seterrf(s, "enum %s has no members (must have at least one)",
1331                        upb_enumdef_fullname(e));
1332     return false;
1333   }
1334 
1335   return true;
1336 }
1337 
1338 /* All submessage fields are lower than all other fields.
1339  * Secondly, fields are increasing in order. */
field_rank(const upb_fielddef * f)1340 uint32_t field_rank(const upb_fielddef *f) {
1341   uint32_t ret = upb_fielddef_number(f);
1342   const uint32_t high_bit = 1 << 30;
1343   UPB_ASSERT(ret < high_bit);
1344   if (!upb_fielddef_issubmsg(f))
1345     ret |= high_bit;
1346   return ret;
1347 }
1348 
cmp_fields(const void * p1,const void * p2)1349 int cmp_fields(const void *p1, const void *p2) {
1350   const upb_fielddef *f1 = *(upb_fielddef*const*)p1;
1351   const upb_fielddef *f2 = *(upb_fielddef*const*)p2;
1352   return field_rank(f1) - field_rank(f2);
1353 }
1354 
assign_msg_indices(upb_msgdef * m,upb_status * s)1355 static bool assign_msg_indices(upb_msgdef *m, upb_status *s) {
1356   /* Sort fields.  upb internally relies on UPB_TYPE_MESSAGE fields having the
1357    * lowest indexes, but we do not publicly guarantee this. */
1358   upb_msg_field_iter j;
1359   upb_msg_oneof_iter k;
1360   int i;
1361   uint32_t selector;
1362   int n = upb_msgdef_numfields(m);
1363   upb_fielddef **fields;
1364 
1365   if (n == 0) {
1366     m->selector_count = UPB_STATIC_SELECTOR_COUNT;
1367     m->submsg_field_count = 0;
1368     return true;
1369   }
1370 
1371   fields = upb_gmalloc(n * sizeof(*fields));
1372   if (!fields) {
1373     upb_upberr_setoom(s);
1374     return false;
1375   }
1376 
1377   m->submsg_field_count = 0;
1378   for(i = 0, upb_msg_field_begin(&j, m);
1379       !upb_msg_field_done(&j);
1380       upb_msg_field_next(&j), i++) {
1381     upb_fielddef *f = upb_msg_iter_field(&j);
1382     UPB_ASSERT(f->msg.def == m);
1383     if (!upb_validate_field(f, s)) {
1384       upb_gfree(fields);
1385       return false;
1386     }
1387     if (upb_fielddef_issubmsg(f)) {
1388       m->submsg_field_count++;
1389     }
1390     fields[i] = f;
1391   }
1392 
1393   qsort(fields, n, sizeof(*fields), cmp_fields);
1394 
1395   selector = UPB_STATIC_SELECTOR_COUNT + m->submsg_field_count;
1396   for (i = 0; i < n; i++) {
1397     upb_fielddef *f = fields[i];
1398     f->index_ = i;
1399     f->selector_base = selector + upb_handlers_selectorbaseoffset(f);
1400     selector += upb_handlers_selectorcount(f);
1401   }
1402   m->selector_count = selector;
1403 
1404 #ifndef NDEBUG
1405   {
1406     /* Verify that all selectors for the message are distinct. */
1407 #define TRY(type) \
1408     if (upb_handlers_getselector(f, type, &sel)) upb_inttable_insert(&t, sel, v);
1409 
1410     upb_inttable t;
1411     upb_value v;
1412     upb_selector_t sel;
1413 
1414     upb_inttable_init(&t, UPB_CTYPE_BOOL);
1415     v = upb_value_bool(true);
1416     upb_inttable_insert(&t, UPB_STARTMSG_SELECTOR, v);
1417     upb_inttable_insert(&t, UPB_ENDMSG_SELECTOR, v);
1418     upb_inttable_insert(&t, UPB_UNKNOWN_SELECTOR, v);
1419     for(upb_msg_field_begin(&j, m);
1420         !upb_msg_field_done(&j);
1421         upb_msg_field_next(&j)) {
1422       upb_fielddef *f = upb_msg_iter_field(&j);
1423       /* These calls will assert-fail in upb_table if the value already
1424        * exists. */
1425       TRY(UPB_HANDLER_INT32);
1426       TRY(UPB_HANDLER_INT64)
1427       TRY(UPB_HANDLER_UINT32)
1428       TRY(UPB_HANDLER_UINT64)
1429       TRY(UPB_HANDLER_FLOAT)
1430       TRY(UPB_HANDLER_DOUBLE)
1431       TRY(UPB_HANDLER_BOOL)
1432       TRY(UPB_HANDLER_STARTSTR)
1433       TRY(UPB_HANDLER_STRING)
1434       TRY(UPB_HANDLER_ENDSTR)
1435       TRY(UPB_HANDLER_STARTSUBMSG)
1436       TRY(UPB_HANDLER_ENDSUBMSG)
1437       TRY(UPB_HANDLER_STARTSEQ)
1438       TRY(UPB_HANDLER_ENDSEQ)
1439     }
1440     upb_inttable_uninit(&t);
1441   }
1442 #undef TRY
1443 #endif
1444 
1445   for(upb_msg_oneof_begin(&k, m), i = 0;
1446       !upb_msg_oneof_done(&k);
1447       upb_msg_oneof_next(&k), i++) {
1448     upb_oneofdef *o = upb_msg_iter_oneof(&k);
1449     o->index = i;
1450   }
1451 
1452   upb_gfree(fields);
1453   return true;
1454 }
1455 
assign_msg_wellknowntype(upb_msgdef * m)1456 static void assign_msg_wellknowntype(upb_msgdef *m) {
1457   const char *name = upb_msgdef_fullname(m);
1458   if (name == NULL) {
1459     m->well_known_type = UPB_WELLKNOWN_UNSPECIFIED;
1460     return;
1461   }
1462   if (!strcmp(name, "google.protobuf.Any")) {
1463     m->well_known_type = UPB_WELLKNOWN_ANY;
1464   } else if (!strcmp(name, "google.protobuf.FieldMask")) {
1465     m->well_known_type = UPB_WELLKNOWN_FIELDMASK;
1466   } else if (!strcmp(name, "google.protobuf.Duration")) {
1467     m->well_known_type = UPB_WELLKNOWN_DURATION;
1468   } else if (!strcmp(name, "google.protobuf.Timestamp")) {
1469     m->well_known_type = UPB_WELLKNOWN_TIMESTAMP;
1470   } else if (!strcmp(name, "google.protobuf.DoubleValue")) {
1471     m->well_known_type = UPB_WELLKNOWN_DOUBLEVALUE;
1472   } else if (!strcmp(name, "google.protobuf.FloatValue")) {
1473     m->well_known_type = UPB_WELLKNOWN_FLOATVALUE;
1474   } else if (!strcmp(name, "google.protobuf.Int64Value")) {
1475     m->well_known_type = UPB_WELLKNOWN_INT64VALUE;
1476   } else if (!strcmp(name, "google.protobuf.UInt64Value")) {
1477     m->well_known_type = UPB_WELLKNOWN_UINT64VALUE;
1478   } else if (!strcmp(name, "google.protobuf.Int32Value")) {
1479     m->well_known_type = UPB_WELLKNOWN_INT32VALUE;
1480   } else if (!strcmp(name, "google.protobuf.UInt32Value")) {
1481     m->well_known_type = UPB_WELLKNOWN_UINT32VALUE;
1482   } else if (!strcmp(name, "google.protobuf.BoolValue")) {
1483     m->well_known_type = UPB_WELLKNOWN_BOOLVALUE;
1484   } else if (!strcmp(name, "google.protobuf.StringValue")) {
1485     m->well_known_type = UPB_WELLKNOWN_STRINGVALUE;
1486   } else if (!strcmp(name, "google.protobuf.BytesValue")) {
1487     m->well_known_type = UPB_WELLKNOWN_BYTESVALUE;
1488   } else if (!strcmp(name, "google.protobuf.Value")) {
1489     m->well_known_type = UPB_WELLKNOWN_VALUE;
1490   } else if (!strcmp(name, "google.protobuf.ListValue")) {
1491     m->well_known_type = UPB_WELLKNOWN_LISTVALUE;
1492   } else if (!strcmp(name, "google.protobuf.Struct")) {
1493     m->well_known_type = UPB_WELLKNOWN_STRUCT;
1494   } else {
1495     m->well_known_type = UPB_WELLKNOWN_UNSPECIFIED;
1496   }
1497 }
1498 
_upb_def_validate(upb_def * const * defs,size_t n,upb_status * s)1499 bool _upb_def_validate(upb_def *const*defs, size_t n, upb_status *s) {
1500   size_t i;
1501 
1502   /* First perform validation, in two passes so we can check that we have a
1503    * transitive closure without needing to search. */
1504   for (i = 0; i < n; i++) {
1505     upb_def *def = defs[i];
1506     if (upb_def_isfrozen(def)) {
1507       /* Could relax this requirement if it's annoying. */
1508       upb_status_seterrmsg(s, "def is already frozen");
1509       goto err;
1510     } else if (def->type == UPB_DEF_FIELD) {
1511       upb_status_seterrmsg(s, "standalone fielddefs can not be frozen");
1512       goto err;
1513     } else {
1514       /* Set now to detect transitive closure in the second pass. */
1515       def->came_from_user = true;
1516 
1517       if (def->type == UPB_DEF_ENUM &&
1518           !upb_validate_enumdef(upb_dyncast_enumdef(def), s)) {
1519         goto err;
1520       }
1521     }
1522   }
1523 
1524   /* Second pass of validation.  Also assign selector bases and indexes, and
1525    * compact tables. */
1526   for (i = 0; i < n; i++) {
1527     upb_def *def = defs[i];
1528     upb_msgdef *m = upb_dyncast_msgdef_mutable(def);
1529     upb_enumdef *e = upb_dyncast_enumdef_mutable(def);
1530     if (m) {
1531       upb_inttable_compact(&m->itof);
1532       if (!assign_msg_indices(m, s)) {
1533         goto err;
1534       }
1535       assign_msg_wellknowntype(m);
1536       /* m->well_known_type = UPB_WELLKNOWN_UNSPECIFIED; */
1537     } else if (e) {
1538       upb_inttable_compact(&e->iton);
1539     }
1540   }
1541 
1542   return true;
1543 
1544 err:
1545   for (i = 0; i < n; i++) {
1546     upb_def *def = defs[i];
1547     def->came_from_user = false;
1548   }
1549   UPB_ASSERT(!(s && upb_ok(s)));
1550   return false;
1551 }
1552 
upb_def_freeze(upb_def * const * defs,size_t n,upb_status * s)1553 bool upb_def_freeze(upb_def *const* defs, size_t n, upb_status *s) {
1554   /* Def graph contains FieldDefs between each MessageDef, so double the
1555    * limit. */
1556   const size_t maxdepth = UPB_MAX_MESSAGE_DEPTH * 2;
1557 
1558   if (!_upb_def_validate(defs, n, s)) {
1559     return false;
1560   }
1561 
1562 
1563   /* Validation all passed; freeze the objects. */
1564   return upb_refcounted_freeze((upb_refcounted *const*)defs, n, s, maxdepth);
1565 }
1566 
1567 
1568 /* upb_enumdef ****************************************************************/
1569 
visitenum(const upb_refcounted * r,upb_refcounted_visit * visit,void * closure)1570 static void visitenum(const upb_refcounted *r, upb_refcounted_visit *visit,
1571                       void *closure) {
1572   const upb_enumdef *e = (const upb_enumdef*)r;
1573   const upb_def *def = upb_enumdef_upcast(e);
1574   if (upb_def_file(def)) {
1575     visit(r, upb_filedef_upcast(upb_def_file(def)), closure);
1576   }
1577 }
1578 
freeenum(upb_refcounted * r)1579 static void freeenum(upb_refcounted *r) {
1580   upb_enumdef *e = (upb_enumdef*)r;
1581   upb_inttable_iter i;
1582   upb_inttable_begin(&i, &e->iton);
1583   for( ; !upb_inttable_done(&i); upb_inttable_next(&i)) {
1584     /* To clean up the upb_gstrdup() from upb_enumdef_addval(). */
1585     upb_gfree(upb_value_getcstr(upb_inttable_iter_value(&i)));
1586   }
1587   upb_strtable_uninit(&e->ntoi);
1588   upb_inttable_uninit(&e->iton);
1589   upb_def_uninit(upb_enumdef_upcast_mutable(e));
1590   upb_gfree(e);
1591 }
1592 
1593 const struct upb_refcounted_vtbl upb_enumdef_vtbl = {&visitenum, &freeenum};
1594 
upb_enumdef_new(const void * owner)1595 upb_enumdef *upb_enumdef_new(const void *owner) {
1596   upb_enumdef *e = upb_gmalloc(sizeof(*e));
1597   if (!e) return NULL;
1598 
1599   if (!upb_def_init(upb_enumdef_upcast_mutable(e), UPB_DEF_ENUM,
1600                     &upb_enumdef_vtbl, owner)) {
1601     goto err2;
1602   }
1603 
1604   if (!upb_strtable_init(&e->ntoi, UPB_CTYPE_INT32)) goto err2;
1605   if (!upb_inttable_init(&e->iton, UPB_CTYPE_CSTR)) goto err1;
1606   return e;
1607 
1608 err1:
1609   upb_strtable_uninit(&e->ntoi);
1610 err2:
1611   upb_gfree(e);
1612   return NULL;
1613 }
1614 
upb_enumdef_freeze(upb_enumdef * e,upb_status * status)1615 bool upb_enumdef_freeze(upb_enumdef *e, upb_status *status) {
1616   upb_def *d = upb_enumdef_upcast_mutable(e);
1617   return upb_def_freeze(&d, 1, status);
1618 }
1619 
upb_enumdef_fullname(const upb_enumdef * e)1620 const char *upb_enumdef_fullname(const upb_enumdef *e) {
1621   return upb_def_fullname(upb_enumdef_upcast(e));
1622 }
1623 
upb_enumdef_name(const upb_enumdef * e)1624 const char *upb_enumdef_name(const upb_enumdef *e) {
1625   return upb_def_name(upb_enumdef_upcast(e));
1626 }
1627 
upb_enumdef_setfullname(upb_enumdef * e,const char * fullname,upb_status * s)1628 bool upb_enumdef_setfullname(upb_enumdef *e, const char *fullname,
1629                              upb_status *s) {
1630   return upb_def_setfullname(upb_enumdef_upcast_mutable(e), fullname, s);
1631 }
1632 
upb_enumdef_addval(upb_enumdef * e,const char * name,int32_t num,upb_status * status)1633 bool upb_enumdef_addval(upb_enumdef *e, const char *name, int32_t num,
1634                         upb_status *status) {
1635   char *name2;
1636 
1637   if (!upb_isident(name, strlen(name), false, status)) {
1638     return false;
1639   }
1640 
1641   if (upb_enumdef_ntoiz(e, name, NULL)) {
1642     upb_status_seterrf(status, "name '%s' is already defined", name);
1643     return false;
1644   }
1645 
1646   if (!upb_strtable_insert(&e->ntoi, name, upb_value_int32(num))) {
1647     upb_status_seterrmsg(status, "out of memory");
1648     return false;
1649   }
1650 
1651   if (!upb_inttable_lookup(&e->iton, num, NULL)) {
1652     name2 = upb_gstrdup(name);
1653     if (!name2 || !upb_inttable_insert(&e->iton, num, upb_value_cstr(name2))) {
1654       upb_status_seterrmsg(status, "out of memory");
1655       upb_strtable_remove(&e->ntoi, name, NULL);
1656       return false;
1657     }
1658   }
1659 
1660   if (upb_enumdef_numvals(e) == 1) {
1661     bool ok = upb_enumdef_setdefault(e, num, NULL);
1662     UPB_ASSERT(ok);
1663   }
1664 
1665   return true;
1666 }
1667 
upb_enumdef_default(const upb_enumdef * e)1668 int32_t upb_enumdef_default(const upb_enumdef *e) {
1669   UPB_ASSERT(upb_enumdef_iton(e, e->defaultval));
1670   return e->defaultval;
1671 }
1672 
upb_enumdef_setdefault(upb_enumdef * e,int32_t val,upb_status * s)1673 bool upb_enumdef_setdefault(upb_enumdef *e, int32_t val, upb_status *s) {
1674   UPB_ASSERT(!upb_enumdef_isfrozen(e));
1675   if (!upb_enumdef_iton(e, val)) {
1676     upb_status_seterrf(s, "number '%d' is not in the enum.", val);
1677     return false;
1678   }
1679   e->defaultval = val;
1680   return true;
1681 }
1682 
upb_enumdef_numvals(const upb_enumdef * e)1683 int upb_enumdef_numvals(const upb_enumdef *e) {
1684   return upb_strtable_count(&e->ntoi);
1685 }
1686 
upb_enum_begin(upb_enum_iter * i,const upb_enumdef * e)1687 void upb_enum_begin(upb_enum_iter *i, const upb_enumdef *e) {
1688   /* We iterate over the ntoi table, to account for duplicate numbers. */
1689   upb_strtable_begin(i, &e->ntoi);
1690 }
1691 
upb_enum_next(upb_enum_iter * iter)1692 void upb_enum_next(upb_enum_iter *iter) { upb_strtable_next(iter); }
upb_enum_done(upb_enum_iter * iter)1693 bool upb_enum_done(upb_enum_iter *iter) { return upb_strtable_done(iter); }
1694 
upb_enumdef_ntoi(const upb_enumdef * def,const char * name,size_t len,int32_t * num)1695 bool upb_enumdef_ntoi(const upb_enumdef *def, const char *name,
1696                       size_t len, int32_t *num) {
1697   upb_value v;
1698   if (!upb_strtable_lookup2(&def->ntoi, name, len, &v)) {
1699     return false;
1700   }
1701   if (num) *num = upb_value_getint32(v);
1702   return true;
1703 }
1704 
upb_enumdef_iton(const upb_enumdef * def,int32_t num)1705 const char *upb_enumdef_iton(const upb_enumdef *def, int32_t num) {
1706   upb_value v;
1707   return upb_inttable_lookup32(&def->iton, num, &v) ?
1708       upb_value_getcstr(v) : NULL;
1709 }
1710 
upb_enum_iter_name(upb_enum_iter * iter)1711 const char *upb_enum_iter_name(upb_enum_iter *iter) {
1712   return upb_strtable_iter_key(iter);
1713 }
1714 
upb_enum_iter_number(upb_enum_iter * iter)1715 int32_t upb_enum_iter_number(upb_enum_iter *iter) {
1716   return upb_value_getint32(upb_strtable_iter_value(iter));
1717 }
1718 
1719 
1720 /* upb_fielddef ***************************************************************/
1721 
1722 static void upb_fielddef_init_default(upb_fielddef *f);
1723 
upb_fielddef_uninit_default(upb_fielddef * f)1724 static void upb_fielddef_uninit_default(upb_fielddef *f) {
1725   if (f->type_is_set_ && f->default_is_string && f->defaultval.bytes)
1726     freestr(f->defaultval.bytes);
1727 }
1728 
upb_fielddef_fullname(const upb_fielddef * e)1729 const char *upb_fielddef_fullname(const upb_fielddef *e) {
1730   return upb_def_fullname(upb_fielddef_upcast(e));
1731 }
1732 
visitfield(const upb_refcounted * r,upb_refcounted_visit * visit,void * closure)1733 static void visitfield(const upb_refcounted *r, upb_refcounted_visit *visit,
1734                        void *closure) {
1735   const upb_fielddef *f = (const upb_fielddef*)r;
1736   const upb_def *def = upb_fielddef_upcast(f);
1737   if (upb_fielddef_containingtype(f)) {
1738     visit(r, upb_msgdef_upcast2(upb_fielddef_containingtype(f)), closure);
1739   }
1740   if (upb_fielddef_containingoneof(f)) {
1741     visit(r, upb_oneofdef_upcast(upb_fielddef_containingoneof(f)), closure);
1742   }
1743   if (upb_fielddef_subdef(f)) {
1744     visit(r, upb_def_upcast(upb_fielddef_subdef(f)), closure);
1745   }
1746   if (upb_def_file(def)) {
1747     visit(r, upb_filedef_upcast(upb_def_file(def)), closure);
1748   }
1749 }
1750 
freefield(upb_refcounted * r)1751 static void freefield(upb_refcounted *r) {
1752   upb_fielddef *f = (upb_fielddef*)r;
1753   upb_fielddef_uninit_default(f);
1754   if (f->subdef_is_symbolic)
1755     upb_gfree(f->sub.name);
1756   if (f->msg_is_symbolic)
1757     upb_gfree(f->msg.name);
1758   upb_def_uninit(upb_fielddef_upcast_mutable(f));
1759   upb_gfree(f);
1760 }
1761 
enumdefaultstr(const upb_fielddef * f)1762 static const char *enumdefaultstr(const upb_fielddef *f) {
1763   const upb_enumdef *e;
1764   UPB_ASSERT(f->type_is_set_ && f->type_ == UPB_TYPE_ENUM);
1765   e = upb_fielddef_enumsubdef(f);
1766   if (f->default_is_string && f->defaultval.bytes) {
1767     /* Default was explicitly set as a string. */
1768     str_t *s = f->defaultval.bytes;
1769     return s->str;
1770   } else if (e) {
1771     if (!f->default_is_string) {
1772       /* Default was explicitly set as an integer; look it up in enumdef. */
1773       const char *name = upb_enumdef_iton(e, f->defaultval.sint);
1774       if (name) {
1775         return name;
1776       }
1777     } else {
1778       /* Default is completely unset; pull enumdef default. */
1779       if (upb_enumdef_numvals(e) > 0) {
1780         const char *name = upb_enumdef_iton(e, upb_enumdef_default(e));
1781         UPB_ASSERT(name);
1782         return name;
1783       }
1784     }
1785   }
1786   return NULL;
1787 }
1788 
enumdefaultint32(const upb_fielddef * f,int32_t * val)1789 static bool enumdefaultint32(const upb_fielddef *f, int32_t *val) {
1790   const upb_enumdef *e;
1791   UPB_ASSERT(f->type_is_set_ && f->type_ == UPB_TYPE_ENUM);
1792   e = upb_fielddef_enumsubdef(f);
1793   if (!f->default_is_string) {
1794     /* Default was explicitly set as an integer. */
1795     *val = f->defaultval.sint;
1796     return true;
1797   } else if (e) {
1798     if (f->defaultval.bytes) {
1799       /* Default was explicitly set as a str; try to lookup corresponding int. */
1800       str_t *s = f->defaultval.bytes;
1801       if (upb_enumdef_ntoiz(e, s->str, val)) {
1802         return true;
1803       }
1804     } else {
1805       /* Default is unset; try to pull in enumdef default. */
1806       if (upb_enumdef_numvals(e) > 0) {
1807         *val = upb_enumdef_default(e);
1808         return true;
1809       }
1810     }
1811   }
1812   return false;
1813 }
1814 
1815 const struct upb_refcounted_vtbl upb_fielddef_vtbl = {visitfield, freefield};
1816 
upb_fielddef_new(const void * o)1817 upb_fielddef *upb_fielddef_new(const void *o) {
1818   upb_fielddef *f = upb_gmalloc(sizeof(*f));
1819   if (!f) return NULL;
1820   if (!upb_def_init(upb_fielddef_upcast_mutable(f), UPB_DEF_FIELD,
1821                     &upb_fielddef_vtbl, o)) {
1822     upb_gfree(f);
1823     return NULL;
1824   }
1825   f->msg.def = NULL;
1826   f->sub.def = NULL;
1827   f->oneof = NULL;
1828   f->subdef_is_symbolic = false;
1829   f->msg_is_symbolic = false;
1830   f->label_ = UPB_LABEL_OPTIONAL;
1831   f->type_ = UPB_TYPE_INT32;
1832   f->number_ = 0;
1833   f->type_is_set_ = false;
1834   f->tagdelim = false;
1835   f->is_extension_ = false;
1836   f->lazy_ = false;
1837   f->packed_ = true;
1838 
1839   /* For the moment we default this to UPB_INTFMT_VARIABLE, since it will work
1840    * with all integer types and is in some since more "default" since the most
1841    * normal-looking proto2 types int32/int64/uint32/uint64 use variable.
1842    *
1843    * Other options to consider:
1844    * - there is no default; users must set this manually (like type).
1845    * - default signed integers to UPB_INTFMT_ZIGZAG, since it's more likely to
1846    *   be an optimal default for signed integers. */
1847   f->intfmt = UPB_INTFMT_VARIABLE;
1848   return f;
1849 }
1850 
upb_fielddef_typeisset(const upb_fielddef * f)1851 bool upb_fielddef_typeisset(const upb_fielddef *f) {
1852   return f->type_is_set_;
1853 }
1854 
upb_fielddef_type(const upb_fielddef * f)1855 upb_fieldtype_t upb_fielddef_type(const upb_fielddef *f) {
1856   UPB_ASSERT(f->type_is_set_);
1857   return f->type_;
1858 }
1859 
upb_fielddef_index(const upb_fielddef * f)1860 uint32_t upb_fielddef_index(const upb_fielddef *f) {
1861   return f->index_;
1862 }
1863 
upb_fielddef_label(const upb_fielddef * f)1864 upb_label_t upb_fielddef_label(const upb_fielddef *f) {
1865   return f->label_;
1866 }
1867 
upb_fielddef_intfmt(const upb_fielddef * f)1868 upb_intfmt_t upb_fielddef_intfmt(const upb_fielddef *f) {
1869   return f->intfmt;
1870 }
1871 
upb_fielddef_istagdelim(const upb_fielddef * f)1872 bool upb_fielddef_istagdelim(const upb_fielddef *f) {
1873   return f->tagdelim;
1874 }
1875 
upb_fielddef_number(const upb_fielddef * f)1876 uint32_t upb_fielddef_number(const upb_fielddef *f) {
1877   return f->number_;
1878 }
1879 
upb_fielddef_isextension(const upb_fielddef * f)1880 bool upb_fielddef_isextension(const upb_fielddef *f) {
1881   return f->is_extension_;
1882 }
1883 
upb_fielddef_lazy(const upb_fielddef * f)1884 bool upb_fielddef_lazy(const upb_fielddef *f) {
1885   return f->lazy_;
1886 }
1887 
upb_fielddef_packed(const upb_fielddef * f)1888 bool upb_fielddef_packed(const upb_fielddef *f) {
1889   return f->packed_;
1890 }
1891 
upb_fielddef_name(const upb_fielddef * f)1892 const char *upb_fielddef_name(const upb_fielddef *f) {
1893   return upb_def_fullname(upb_fielddef_upcast(f));
1894 }
1895 
upb_fielddef_getjsonname(const upb_fielddef * f,char * buf,size_t len)1896 size_t upb_fielddef_getjsonname(const upb_fielddef *f, char *buf, size_t len) {
1897   const char *name = upb_fielddef_name(f);
1898   size_t src, dst = 0;
1899   bool ucase_next = false;
1900 
1901 #define WRITE(byte) \
1902   ++dst; \
1903   if (dst < len) buf[dst - 1] = byte; \
1904   else if (dst == len) buf[dst - 1] = '\0'
1905 
1906   if (!name) {
1907     WRITE('\0');
1908     return 0;
1909   }
1910 
1911   /* Implement the transformation as described in the spec:
1912    *   1. upper case all letters after an underscore.
1913    *   2. remove all underscores.
1914    */
1915   for (src = 0; name[src]; src++) {
1916     if (name[src] == '_') {
1917       ucase_next = true;
1918       continue;
1919     }
1920 
1921     if (ucase_next) {
1922       WRITE(toupper(name[src]));
1923       ucase_next = false;
1924     } else {
1925       WRITE(name[src]);
1926     }
1927   }
1928 
1929   WRITE('\0');
1930   return dst;
1931 
1932 #undef WRITE
1933 }
1934 
upb_fielddef_containingtype(const upb_fielddef * f)1935 const upb_msgdef *upb_fielddef_containingtype(const upb_fielddef *f) {
1936   return f->msg_is_symbolic ? NULL : f->msg.def;
1937 }
1938 
upb_fielddef_containingoneof(const upb_fielddef * f)1939 const upb_oneofdef *upb_fielddef_containingoneof(const upb_fielddef *f) {
1940   return f->oneof;
1941 }
1942 
upb_fielddef_containingtype_mutable(upb_fielddef * f)1943 upb_msgdef *upb_fielddef_containingtype_mutable(upb_fielddef *f) {
1944   return (upb_msgdef*)upb_fielddef_containingtype(f);
1945 }
1946 
upb_fielddef_containingtypename(upb_fielddef * f)1947 const char *upb_fielddef_containingtypename(upb_fielddef *f) {
1948   return f->msg_is_symbolic ? f->msg.name : NULL;
1949 }
1950 
release_containingtype(upb_fielddef * f)1951 static void release_containingtype(upb_fielddef *f) {
1952   if (f->msg_is_symbolic) upb_gfree(f->msg.name);
1953 }
1954 
upb_fielddef_setcontainingtypename(upb_fielddef * f,const char * name,upb_status * s)1955 bool upb_fielddef_setcontainingtypename(upb_fielddef *f, const char *name,
1956                                         upb_status *s) {
1957   char *name_copy;
1958   UPB_ASSERT(!upb_fielddef_isfrozen(f));
1959   if (upb_fielddef_containingtype(f)) {
1960     upb_status_seterrmsg(s, "field has already been added to a message.");
1961     return false;
1962   }
1963   /* TODO: validate name (upb_isident() doesn't quite work atm because this name
1964    * may have a leading "."). */
1965 
1966   name_copy = upb_gstrdup(name);
1967   if (!name_copy) {
1968     upb_upberr_setoom(s);
1969     return false;
1970   }
1971 
1972   release_containingtype(f);
1973   f->msg.name = name_copy;
1974   f->msg_is_symbolic = true;
1975   return true;
1976 }
1977 
upb_fielddef_setname(upb_fielddef * f,const char * name,upb_status * s)1978 bool upb_fielddef_setname(upb_fielddef *f, const char *name, upb_status *s) {
1979   if (upb_fielddef_containingtype(f) || upb_fielddef_containingoneof(f)) {
1980     upb_status_seterrmsg(s, "Already added to message or oneof");
1981     return false;
1982   }
1983   return upb_def_setfullname(upb_fielddef_upcast_mutable(f), name, s);
1984 }
1985 
chkdefaulttype(const upb_fielddef * f,upb_fieldtype_t type)1986 static void chkdefaulttype(const upb_fielddef *f, upb_fieldtype_t type) {
1987   UPB_UNUSED(f);
1988   UPB_UNUSED(type);
1989   UPB_ASSERT(f->type_is_set_ && upb_fielddef_type(f) == type);
1990 }
1991 
upb_fielddef_defaultint64(const upb_fielddef * f)1992 int64_t upb_fielddef_defaultint64(const upb_fielddef *f) {
1993   chkdefaulttype(f, UPB_TYPE_INT64);
1994   return f->defaultval.sint;
1995 }
1996 
upb_fielddef_defaultint32(const upb_fielddef * f)1997 int32_t upb_fielddef_defaultint32(const upb_fielddef *f) {
1998   if (f->type_is_set_ && upb_fielddef_type(f) == UPB_TYPE_ENUM) {
1999     int32_t val;
2000     bool ok = enumdefaultint32(f, &val);
2001     UPB_ASSERT(ok);
2002     return val;
2003   } else {
2004     chkdefaulttype(f, UPB_TYPE_INT32);
2005     return f->defaultval.sint;
2006   }
2007 }
2008 
upb_fielddef_defaultuint64(const upb_fielddef * f)2009 uint64_t upb_fielddef_defaultuint64(const upb_fielddef *f) {
2010   chkdefaulttype(f, UPB_TYPE_UINT64);
2011   return f->defaultval.uint;
2012 }
2013 
upb_fielddef_defaultuint32(const upb_fielddef * f)2014 uint32_t upb_fielddef_defaultuint32(const upb_fielddef *f) {
2015   chkdefaulttype(f, UPB_TYPE_UINT32);
2016   return f->defaultval.uint;
2017 }
2018 
upb_fielddef_defaultbool(const upb_fielddef * f)2019 bool upb_fielddef_defaultbool(const upb_fielddef *f) {
2020   chkdefaulttype(f, UPB_TYPE_BOOL);
2021   return f->defaultval.uint;
2022 }
2023 
upb_fielddef_defaultfloat(const upb_fielddef * f)2024 float upb_fielddef_defaultfloat(const upb_fielddef *f) {
2025   chkdefaulttype(f, UPB_TYPE_FLOAT);
2026   return f->defaultval.flt;
2027 }
2028 
upb_fielddef_defaultdouble(const upb_fielddef * f)2029 double upb_fielddef_defaultdouble(const upb_fielddef *f) {
2030   chkdefaulttype(f, UPB_TYPE_DOUBLE);
2031   return f->defaultval.dbl;
2032 }
2033 
upb_fielddef_defaultstr(const upb_fielddef * f,size_t * len)2034 const char *upb_fielddef_defaultstr(const upb_fielddef *f, size_t *len) {
2035   UPB_ASSERT(f->type_is_set_);
2036   UPB_ASSERT(upb_fielddef_type(f) == UPB_TYPE_STRING ||
2037          upb_fielddef_type(f) == UPB_TYPE_BYTES ||
2038          upb_fielddef_type(f) == UPB_TYPE_ENUM);
2039 
2040   if (upb_fielddef_type(f) == UPB_TYPE_ENUM) {
2041     const char *ret = enumdefaultstr(f);
2042     UPB_ASSERT(ret);
2043     /* Enum defaults can't have embedded NULLs. */
2044     if (len) *len = strlen(ret);
2045     return ret;
2046   }
2047 
2048   if (f->default_is_string) {
2049     str_t *str = f->defaultval.bytes;
2050     if (len) *len = str->len;
2051     return str->str;
2052   }
2053 
2054   return NULL;
2055 }
2056 
upb_fielddef_init_default(upb_fielddef * f)2057 static void upb_fielddef_init_default(upb_fielddef *f) {
2058   f->default_is_string = false;
2059   switch (upb_fielddef_type(f)) {
2060     case UPB_TYPE_DOUBLE: f->defaultval.dbl = 0; break;
2061     case UPB_TYPE_FLOAT: f->defaultval.flt = 0; break;
2062     case UPB_TYPE_INT32:
2063     case UPB_TYPE_INT64: f->defaultval.sint = 0; break;
2064     case UPB_TYPE_UINT64:
2065     case UPB_TYPE_UINT32:
2066     case UPB_TYPE_BOOL: f->defaultval.uint = 0; break;
2067     case UPB_TYPE_STRING:
2068     case UPB_TYPE_BYTES:
2069       f->defaultval.bytes = newstr("", 0);
2070       f->default_is_string = true;
2071       break;
2072     case UPB_TYPE_MESSAGE: break;
2073     case UPB_TYPE_ENUM:
2074       /* This is our special sentinel that indicates "not set" for an enum. */
2075       f->default_is_string = true;
2076       f->defaultval.bytes = NULL;
2077       break;
2078   }
2079 }
2080 
upb_fielddef_subdef(const upb_fielddef * f)2081 const upb_def *upb_fielddef_subdef(const upb_fielddef *f) {
2082   return f->subdef_is_symbolic ? NULL : f->sub.def;
2083 }
2084 
upb_fielddef_msgsubdef(const upb_fielddef * f)2085 const upb_msgdef *upb_fielddef_msgsubdef(const upb_fielddef *f) {
2086   const upb_def *def = upb_fielddef_subdef(f);
2087   return def ? upb_dyncast_msgdef(def) : NULL;
2088 }
2089 
upb_fielddef_enumsubdef(const upb_fielddef * f)2090 const upb_enumdef *upb_fielddef_enumsubdef(const upb_fielddef *f) {
2091   const upb_def *def = upb_fielddef_subdef(f);
2092   return def ? upb_dyncast_enumdef(def) : NULL;
2093 }
2094 
upb_fielddef_subdef_mutable(upb_fielddef * f)2095 upb_def *upb_fielddef_subdef_mutable(upb_fielddef *f) {
2096   return (upb_def*)upb_fielddef_subdef(f);
2097 }
2098 
upb_fielddef_subdefname(const upb_fielddef * f)2099 const char *upb_fielddef_subdefname(const upb_fielddef *f) {
2100   if (f->subdef_is_symbolic) {
2101     return f->sub.name;
2102   } else if (f->sub.def) {
2103     return upb_def_fullname(f->sub.def);
2104   } else {
2105     return NULL;
2106   }
2107 }
2108 
upb_fielddef_setnumber(upb_fielddef * f,uint32_t number,upb_status * s)2109 bool upb_fielddef_setnumber(upb_fielddef *f, uint32_t number, upb_status *s) {
2110   if (upb_fielddef_containingtype(f)) {
2111     upb_status_seterrmsg(
2112         s, "cannot change field number after adding to a message");
2113     return false;
2114   }
2115   if (number == 0) {
2116     upb_status_seterrf(s, "invalid field number (%u)", number);
2117     return false;
2118   }
2119   f->number_ = number;
2120   return true;
2121 }
2122 
upb_fielddef_settype(upb_fielddef * f,upb_fieldtype_t type)2123 void upb_fielddef_settype(upb_fielddef *f, upb_fieldtype_t type) {
2124   UPB_ASSERT(!upb_fielddef_isfrozen(f));
2125   UPB_ASSERT(upb_fielddef_checktype(type));
2126   upb_fielddef_uninit_default(f);
2127   f->type_ = type;
2128   f->type_is_set_ = true;
2129   upb_fielddef_init_default(f);
2130 }
2131 
upb_fielddef_setdescriptortype(upb_fielddef * f,int type)2132 void upb_fielddef_setdescriptortype(upb_fielddef *f, int type) {
2133   UPB_ASSERT(!upb_fielddef_isfrozen(f));
2134   switch (type) {
2135     case UPB_DESCRIPTOR_TYPE_DOUBLE:
2136       upb_fielddef_settype(f, UPB_TYPE_DOUBLE);
2137       break;
2138     case UPB_DESCRIPTOR_TYPE_FLOAT:
2139       upb_fielddef_settype(f, UPB_TYPE_FLOAT);
2140       break;
2141     case UPB_DESCRIPTOR_TYPE_INT64:
2142     case UPB_DESCRIPTOR_TYPE_SFIXED64:
2143     case UPB_DESCRIPTOR_TYPE_SINT64:
2144       upb_fielddef_settype(f, UPB_TYPE_INT64);
2145       break;
2146     case UPB_DESCRIPTOR_TYPE_UINT64:
2147     case UPB_DESCRIPTOR_TYPE_FIXED64:
2148       upb_fielddef_settype(f, UPB_TYPE_UINT64);
2149       break;
2150     case UPB_DESCRIPTOR_TYPE_INT32:
2151     case UPB_DESCRIPTOR_TYPE_SFIXED32:
2152     case UPB_DESCRIPTOR_TYPE_SINT32:
2153       upb_fielddef_settype(f, UPB_TYPE_INT32);
2154       break;
2155     case UPB_DESCRIPTOR_TYPE_UINT32:
2156     case UPB_DESCRIPTOR_TYPE_FIXED32:
2157       upb_fielddef_settype(f, UPB_TYPE_UINT32);
2158       break;
2159     case UPB_DESCRIPTOR_TYPE_BOOL:
2160       upb_fielddef_settype(f, UPB_TYPE_BOOL);
2161       break;
2162     case UPB_DESCRIPTOR_TYPE_STRING:
2163       upb_fielddef_settype(f, UPB_TYPE_STRING);
2164       break;
2165     case UPB_DESCRIPTOR_TYPE_BYTES:
2166       upb_fielddef_settype(f, UPB_TYPE_BYTES);
2167       break;
2168     case UPB_DESCRIPTOR_TYPE_GROUP:
2169     case UPB_DESCRIPTOR_TYPE_MESSAGE:
2170       upb_fielddef_settype(f, UPB_TYPE_MESSAGE);
2171       break;
2172     case UPB_DESCRIPTOR_TYPE_ENUM:
2173       upb_fielddef_settype(f, UPB_TYPE_ENUM);
2174       break;
2175     default: UPB_ASSERT(false);
2176   }
2177 
2178   if (type == UPB_DESCRIPTOR_TYPE_FIXED64 ||
2179       type == UPB_DESCRIPTOR_TYPE_FIXED32 ||
2180       type == UPB_DESCRIPTOR_TYPE_SFIXED64 ||
2181       type == UPB_DESCRIPTOR_TYPE_SFIXED32) {
2182     upb_fielddef_setintfmt(f, UPB_INTFMT_FIXED);
2183   } else if (type == UPB_DESCRIPTOR_TYPE_SINT64 ||
2184              type == UPB_DESCRIPTOR_TYPE_SINT32) {
2185     upb_fielddef_setintfmt(f, UPB_INTFMT_ZIGZAG);
2186   } else {
2187     upb_fielddef_setintfmt(f, UPB_INTFMT_VARIABLE);
2188   }
2189 
2190   upb_fielddef_settagdelim(f, type == UPB_DESCRIPTOR_TYPE_GROUP);
2191 }
2192 
upb_fielddef_descriptortype(const upb_fielddef * f)2193 upb_descriptortype_t upb_fielddef_descriptortype(const upb_fielddef *f) {
2194   switch (upb_fielddef_type(f)) {
2195     case UPB_TYPE_FLOAT:  return UPB_DESCRIPTOR_TYPE_FLOAT;
2196     case UPB_TYPE_DOUBLE: return UPB_DESCRIPTOR_TYPE_DOUBLE;
2197     case UPB_TYPE_BOOL:   return UPB_DESCRIPTOR_TYPE_BOOL;
2198     case UPB_TYPE_STRING: return UPB_DESCRIPTOR_TYPE_STRING;
2199     case UPB_TYPE_BYTES:  return UPB_DESCRIPTOR_TYPE_BYTES;
2200     case UPB_TYPE_ENUM:   return UPB_DESCRIPTOR_TYPE_ENUM;
2201     case UPB_TYPE_INT32:
2202       switch (upb_fielddef_intfmt(f)) {
2203         case UPB_INTFMT_VARIABLE: return UPB_DESCRIPTOR_TYPE_INT32;
2204         case UPB_INTFMT_FIXED:    return UPB_DESCRIPTOR_TYPE_SFIXED32;
2205         case UPB_INTFMT_ZIGZAG:   return UPB_DESCRIPTOR_TYPE_SINT32;
2206       }
2207     case UPB_TYPE_INT64:
2208       switch (upb_fielddef_intfmt(f)) {
2209         case UPB_INTFMT_VARIABLE: return UPB_DESCRIPTOR_TYPE_INT64;
2210         case UPB_INTFMT_FIXED:    return UPB_DESCRIPTOR_TYPE_SFIXED64;
2211         case UPB_INTFMT_ZIGZAG:   return UPB_DESCRIPTOR_TYPE_SINT64;
2212       }
2213     case UPB_TYPE_UINT32:
2214       switch (upb_fielddef_intfmt(f)) {
2215         case UPB_INTFMT_VARIABLE: return UPB_DESCRIPTOR_TYPE_UINT32;
2216         case UPB_INTFMT_FIXED:    return UPB_DESCRIPTOR_TYPE_FIXED32;
2217         case UPB_INTFMT_ZIGZAG:   return -1;
2218       }
2219     case UPB_TYPE_UINT64:
2220       switch (upb_fielddef_intfmt(f)) {
2221         case UPB_INTFMT_VARIABLE: return UPB_DESCRIPTOR_TYPE_UINT64;
2222         case UPB_INTFMT_FIXED:    return UPB_DESCRIPTOR_TYPE_FIXED64;
2223         case UPB_INTFMT_ZIGZAG:   return -1;
2224       }
2225     case UPB_TYPE_MESSAGE:
2226       return upb_fielddef_istagdelim(f) ?
2227           UPB_DESCRIPTOR_TYPE_GROUP : UPB_DESCRIPTOR_TYPE_MESSAGE;
2228   }
2229   return 0;
2230 }
2231 
upb_fielddef_setisextension(upb_fielddef * f,bool is_extension)2232 void upb_fielddef_setisextension(upb_fielddef *f, bool is_extension) {
2233   UPB_ASSERT(!upb_fielddef_isfrozen(f));
2234   f->is_extension_ = is_extension;
2235 }
2236 
upb_fielddef_setlazy(upb_fielddef * f,bool lazy)2237 void upb_fielddef_setlazy(upb_fielddef *f, bool lazy) {
2238   UPB_ASSERT(!upb_fielddef_isfrozen(f));
2239   f->lazy_ = lazy;
2240 }
2241 
upb_fielddef_setpacked(upb_fielddef * f,bool packed)2242 void upb_fielddef_setpacked(upb_fielddef *f, bool packed) {
2243   UPB_ASSERT(!upb_fielddef_isfrozen(f));
2244   f->packed_ = packed;
2245 }
2246 
upb_fielddef_setlabel(upb_fielddef * f,upb_label_t label)2247 void upb_fielddef_setlabel(upb_fielddef *f, upb_label_t label) {
2248   UPB_ASSERT(!upb_fielddef_isfrozen(f));
2249   UPB_ASSERT(upb_fielddef_checklabel(label));
2250   f->label_ = label;
2251 }
2252 
upb_fielddef_setintfmt(upb_fielddef * f,upb_intfmt_t fmt)2253 void upb_fielddef_setintfmt(upb_fielddef *f, upb_intfmt_t fmt) {
2254   UPB_ASSERT(!upb_fielddef_isfrozen(f));
2255   UPB_ASSERT(upb_fielddef_checkintfmt(fmt));
2256   f->intfmt = fmt;
2257 }
2258 
upb_fielddef_settagdelim(upb_fielddef * f,bool tag_delim)2259 void upb_fielddef_settagdelim(upb_fielddef *f, bool tag_delim) {
2260   UPB_ASSERT(!upb_fielddef_isfrozen(f));
2261   f->tagdelim = tag_delim;
2262   f->tagdelim = tag_delim;
2263 }
2264 
checksetdefault(upb_fielddef * f,upb_fieldtype_t type)2265 static bool checksetdefault(upb_fielddef *f, upb_fieldtype_t type) {
2266   if (!f->type_is_set_ || upb_fielddef_isfrozen(f) ||
2267       upb_fielddef_type(f) != type) {
2268     UPB_ASSERT(false);
2269     return false;
2270   }
2271   if (f->default_is_string) {
2272     str_t *s = f->defaultval.bytes;
2273     UPB_ASSERT(s || type == UPB_TYPE_ENUM);
2274     if (s) freestr(s);
2275   }
2276   f->default_is_string = false;
2277   return true;
2278 }
2279 
upb_fielddef_setdefaultint64(upb_fielddef * f,int64_t value)2280 void upb_fielddef_setdefaultint64(upb_fielddef *f, int64_t value) {
2281   if (checksetdefault(f, UPB_TYPE_INT64))
2282     f->defaultval.sint = value;
2283 }
2284 
upb_fielddef_setdefaultint32(upb_fielddef * f,int32_t value)2285 void upb_fielddef_setdefaultint32(upb_fielddef *f, int32_t value) {
2286   if ((upb_fielddef_type(f) == UPB_TYPE_ENUM &&
2287        checksetdefault(f, UPB_TYPE_ENUM)) ||
2288       checksetdefault(f, UPB_TYPE_INT32)) {
2289     f->defaultval.sint = value;
2290   }
2291 }
2292 
upb_fielddef_setdefaultuint64(upb_fielddef * f,uint64_t value)2293 void upb_fielddef_setdefaultuint64(upb_fielddef *f, uint64_t value) {
2294   if (checksetdefault(f, UPB_TYPE_UINT64))
2295     f->defaultval.uint = value;
2296 }
2297 
upb_fielddef_setdefaultuint32(upb_fielddef * f,uint32_t value)2298 void upb_fielddef_setdefaultuint32(upb_fielddef *f, uint32_t value) {
2299   if (checksetdefault(f, UPB_TYPE_UINT32))
2300     f->defaultval.uint = value;
2301 }
2302 
upb_fielddef_setdefaultbool(upb_fielddef * f,bool value)2303 void upb_fielddef_setdefaultbool(upb_fielddef *f, bool value) {
2304   if (checksetdefault(f, UPB_TYPE_BOOL))
2305     f->defaultval.uint = value;
2306 }
2307 
upb_fielddef_setdefaultfloat(upb_fielddef * f,float value)2308 void upb_fielddef_setdefaultfloat(upb_fielddef *f, float value) {
2309   if (checksetdefault(f, UPB_TYPE_FLOAT))
2310     f->defaultval.flt = value;
2311 }
2312 
upb_fielddef_setdefaultdouble(upb_fielddef * f,double value)2313 void upb_fielddef_setdefaultdouble(upb_fielddef *f, double value) {
2314   if (checksetdefault(f, UPB_TYPE_DOUBLE))
2315     f->defaultval.dbl = value;
2316 }
2317 
upb_fielddef_setdefaultstr(upb_fielddef * f,const void * str,size_t len,upb_status * s)2318 bool upb_fielddef_setdefaultstr(upb_fielddef *f, const void *str, size_t len,
2319                                 upb_status *s) {
2320   str_t *str2;
2321   UPB_ASSERT(upb_fielddef_isstring(f) || f->type_ == UPB_TYPE_ENUM);
2322   if (f->type_ == UPB_TYPE_ENUM && !upb_isident(str, len, false, s))
2323     return false;
2324 
2325   if (f->default_is_string) {
2326     str_t *s = f->defaultval.bytes;
2327     UPB_ASSERT(s || f->type_ == UPB_TYPE_ENUM);
2328     if (s) freestr(s);
2329   } else {
2330     UPB_ASSERT(f->type_ == UPB_TYPE_ENUM);
2331   }
2332 
2333   str2 = newstr(str, len);
2334   f->defaultval.bytes = str2;
2335   f->default_is_string = true;
2336   return true;
2337 }
2338 
upb_fielddef_setdefaultcstr(upb_fielddef * f,const char * str,upb_status * s)2339 void upb_fielddef_setdefaultcstr(upb_fielddef *f, const char *str,
2340                                  upb_status *s) {
2341   UPB_ASSERT(f->type_is_set_);
2342   upb_fielddef_setdefaultstr(f, str, str ? strlen(str) : 0, s);
2343 }
2344 
upb_fielddef_enumhasdefaultint32(const upb_fielddef * f)2345 bool upb_fielddef_enumhasdefaultint32(const upb_fielddef *f) {
2346   int32_t val;
2347   UPB_ASSERT(f->type_is_set_ && f->type_ == UPB_TYPE_ENUM);
2348   return enumdefaultint32(f, &val);
2349 }
2350 
upb_fielddef_enumhasdefaultstr(const upb_fielddef * f)2351 bool upb_fielddef_enumhasdefaultstr(const upb_fielddef *f) {
2352   UPB_ASSERT(f->type_is_set_ && f->type_ == UPB_TYPE_ENUM);
2353   return enumdefaultstr(f) != NULL;
2354 }
2355 
upb_subdef_typecheck(upb_fielddef * f,const upb_def * subdef,upb_status * s)2356 static bool upb_subdef_typecheck(upb_fielddef *f, const upb_def *subdef,
2357                                  upb_status *s) {
2358   if (f->type_ == UPB_TYPE_MESSAGE) {
2359     if (upb_dyncast_msgdef(subdef)) return true;
2360     upb_status_seterrmsg(s, "invalid subdef type for this submessage field");
2361     return false;
2362   } else if (f->type_ == UPB_TYPE_ENUM) {
2363     if (upb_dyncast_enumdef(subdef)) return true;
2364     upb_status_seterrmsg(s, "invalid subdef type for this enum field");
2365     return false;
2366   } else {
2367     upb_status_seterrmsg(s, "only message and enum fields can have a subdef");
2368     return false;
2369   }
2370 }
2371 
release_subdef(upb_fielddef * f)2372 static void release_subdef(upb_fielddef *f) {
2373   if (f->subdef_is_symbolic) {
2374     upb_gfree(f->sub.name);
2375   } else if (f->sub.def) {
2376     upb_unref2(f->sub.def, f);
2377   }
2378 }
2379 
upb_fielddef_setsubdef(upb_fielddef * f,const upb_def * subdef,upb_status * s)2380 bool upb_fielddef_setsubdef(upb_fielddef *f, const upb_def *subdef,
2381                             upb_status *s) {
2382   UPB_ASSERT(!upb_fielddef_isfrozen(f));
2383   UPB_ASSERT(upb_fielddef_hassubdef(f));
2384   if (subdef && !upb_subdef_typecheck(f, subdef, s)) return false;
2385   release_subdef(f);
2386   f->sub.def = subdef;
2387   f->subdef_is_symbolic = false;
2388   if (f->sub.def) upb_ref2(f->sub.def, f);
2389   return true;
2390 }
2391 
upb_fielddef_setmsgsubdef(upb_fielddef * f,const upb_msgdef * subdef,upb_status * s)2392 bool upb_fielddef_setmsgsubdef(upb_fielddef *f, const upb_msgdef *subdef,
2393                                upb_status *s) {
2394   return upb_fielddef_setsubdef(f, upb_msgdef_upcast(subdef), s);
2395 }
2396 
upb_fielddef_setenumsubdef(upb_fielddef * f,const upb_enumdef * subdef,upb_status * s)2397 bool upb_fielddef_setenumsubdef(upb_fielddef *f, const upb_enumdef *subdef,
2398                                 upb_status *s) {
2399   return upb_fielddef_setsubdef(f, upb_enumdef_upcast(subdef), s);
2400 }
2401 
upb_fielddef_setsubdefname(upb_fielddef * f,const char * name,upb_status * s)2402 bool upb_fielddef_setsubdefname(upb_fielddef *f, const char *name,
2403                                 upb_status *s) {
2404   char *name_copy;
2405   UPB_ASSERT(!upb_fielddef_isfrozen(f));
2406   if (!upb_fielddef_hassubdef(f)) {
2407     upb_status_seterrmsg(s, "field type does not accept a subdef");
2408     return false;
2409   }
2410 
2411   name_copy = upb_gstrdup(name);
2412   if (!name_copy) {
2413     upb_upberr_setoom(s);
2414     return false;
2415   }
2416 
2417   /* TODO: validate name (upb_isident() doesn't quite work atm because this name
2418    * may have a leading "."). */
2419   release_subdef(f);
2420   f->sub.name = name_copy;
2421   f->subdef_is_symbolic = true;
2422   return true;
2423 }
2424 
upb_fielddef_issubmsg(const upb_fielddef * f)2425 bool upb_fielddef_issubmsg(const upb_fielddef *f) {
2426   return upb_fielddef_type(f) == UPB_TYPE_MESSAGE;
2427 }
2428 
upb_fielddef_isstring(const upb_fielddef * f)2429 bool upb_fielddef_isstring(const upb_fielddef *f) {
2430   return upb_fielddef_type(f) == UPB_TYPE_STRING ||
2431          upb_fielddef_type(f) == UPB_TYPE_BYTES;
2432 }
2433 
upb_fielddef_isseq(const upb_fielddef * f)2434 bool upb_fielddef_isseq(const upb_fielddef *f) {
2435   return upb_fielddef_label(f) == UPB_LABEL_REPEATED;
2436 }
2437 
upb_fielddef_isprimitive(const upb_fielddef * f)2438 bool upb_fielddef_isprimitive(const upb_fielddef *f) {
2439   return !upb_fielddef_isstring(f) && !upb_fielddef_issubmsg(f);
2440 }
2441 
upb_fielddef_ismap(const upb_fielddef * f)2442 bool upb_fielddef_ismap(const upb_fielddef *f) {
2443   return upb_fielddef_isseq(f) && upb_fielddef_issubmsg(f) &&
2444          upb_msgdef_mapentry(upb_fielddef_msgsubdef(f));
2445 }
2446 
upb_fielddef_haspresence(const upb_fielddef * f)2447 bool upb_fielddef_haspresence(const upb_fielddef *f) {
2448   if (upb_fielddef_isseq(f)) return false;
2449   if (upb_fielddef_issubmsg(f)) return true;
2450 
2451   /* Primitive field: return true unless there is a message that specifies
2452    * presence should not exist. */
2453   if (f->msg_is_symbolic || !f->msg.def) return true;
2454   return f->msg.def->syntax == UPB_SYNTAX_PROTO2;
2455 }
2456 
upb_fielddef_hassubdef(const upb_fielddef * f)2457 bool upb_fielddef_hassubdef(const upb_fielddef *f) {
2458   return upb_fielddef_issubmsg(f) || upb_fielddef_type(f) == UPB_TYPE_ENUM;
2459 }
2460 
between(int32_t x,int32_t low,int32_t high)2461 static bool between(int32_t x, int32_t low, int32_t high) {
2462   return x >= low && x <= high;
2463 }
2464 
upb_fielddef_checklabel(int32_t label)2465 bool upb_fielddef_checklabel(int32_t label) { return between(label, 1, 3); }
upb_fielddef_checktype(int32_t type)2466 bool upb_fielddef_checktype(int32_t type) { return between(type, 1, 11); }
upb_fielddef_checkintfmt(int32_t fmt)2467 bool upb_fielddef_checkintfmt(int32_t fmt) { return between(fmt, 1, 3); }
2468 
upb_fielddef_checkdescriptortype(int32_t type)2469 bool upb_fielddef_checkdescriptortype(int32_t type) {
2470   return between(type, 1, 18);
2471 }
2472 
2473 /* upb_msgdef *****************************************************************/
2474 
visitmsg(const upb_refcounted * r,upb_refcounted_visit * visit,void * closure)2475 static void visitmsg(const upb_refcounted *r, upb_refcounted_visit *visit,
2476                      void *closure) {
2477   upb_msg_oneof_iter o;
2478   const upb_msgdef *m = (const upb_msgdef*)r;
2479   const upb_def *def = upb_msgdef_upcast(m);
2480   upb_msg_field_iter i;
2481   for(upb_msg_field_begin(&i, m);
2482       !upb_msg_field_done(&i);
2483       upb_msg_field_next(&i)) {
2484     upb_fielddef *f = upb_msg_iter_field(&i);
2485     visit(r, upb_fielddef_upcast2(f), closure);
2486   }
2487   for(upb_msg_oneof_begin(&o, m);
2488       !upb_msg_oneof_done(&o);
2489       upb_msg_oneof_next(&o)) {
2490     upb_oneofdef *f = upb_msg_iter_oneof(&o);
2491     visit(r, upb_oneofdef_upcast(f), closure);
2492   }
2493   if (upb_def_file(def)) {
2494     visit(r, upb_filedef_upcast(upb_def_file(def)), closure);
2495   }
2496 }
2497 
freemsg(upb_refcounted * r)2498 static void freemsg(upb_refcounted *r) {
2499   upb_msgdef *m = (upb_msgdef*)r;
2500   upb_strtable_uninit(&m->ntof);
2501   upb_inttable_uninit(&m->itof);
2502   upb_def_uninit(upb_msgdef_upcast_mutable(m));
2503   upb_gfree(m);
2504 }
2505 
2506 const struct upb_refcounted_vtbl upb_msgdef_vtbl = {visitmsg, freemsg};
2507 
upb_msgdef_new(const void * owner)2508 upb_msgdef *upb_msgdef_new(const void *owner) {
2509   upb_msgdef *m = upb_gmalloc(sizeof(*m));
2510   if (!m) return NULL;
2511 
2512   if (!upb_def_init(upb_msgdef_upcast_mutable(m), UPB_DEF_MSG, &upb_msgdef_vtbl,
2513                     owner)) {
2514     goto err2;
2515   }
2516 
2517   if (!upb_inttable_init(&m->itof, UPB_CTYPE_PTR)) goto err2;
2518   if (!upb_strtable_init(&m->ntof, UPB_CTYPE_PTR)) goto err1;
2519   m->map_entry = false;
2520   m->syntax = UPB_SYNTAX_PROTO2;
2521   return m;
2522 
2523 err1:
2524   upb_inttable_uninit(&m->itof);
2525 err2:
2526   upb_gfree(m);
2527   return NULL;
2528 }
2529 
upb_msgdef_freeze(upb_msgdef * m,upb_status * status)2530 bool upb_msgdef_freeze(upb_msgdef *m, upb_status *status) {
2531   upb_def *d = upb_msgdef_upcast_mutable(m);
2532   return upb_def_freeze(&d, 1, status);
2533 }
2534 
upb_msgdef_fullname(const upb_msgdef * m)2535 const char *upb_msgdef_fullname(const upb_msgdef *m) {
2536   return upb_def_fullname(upb_msgdef_upcast(m));
2537 }
2538 
upb_msgdef_name(const upb_msgdef * m)2539 const char *upb_msgdef_name(const upb_msgdef *m) {
2540   return upb_def_name(upb_msgdef_upcast(m));
2541 }
2542 
upb_msgdef_setfullname(upb_msgdef * m,const char * fullname,upb_status * s)2543 bool upb_msgdef_setfullname(upb_msgdef *m, const char *fullname,
2544                             upb_status *s) {
2545   return upb_def_setfullname(upb_msgdef_upcast_mutable(m), fullname, s);
2546 }
2547 
upb_msgdef_setsyntax(upb_msgdef * m,upb_syntax_t syntax)2548 bool upb_msgdef_setsyntax(upb_msgdef *m, upb_syntax_t syntax) {
2549   if (syntax != UPB_SYNTAX_PROTO2 && syntax != UPB_SYNTAX_PROTO3) {
2550     return false;
2551   }
2552 
2553   m->syntax = syntax;
2554   return true;
2555 }
2556 
upb_msgdef_syntax(const upb_msgdef * m)2557 upb_syntax_t upb_msgdef_syntax(const upb_msgdef *m) {
2558   return m->syntax;
2559 }
2560 
2561 /* Helper: check that the field |f| is safe to add to msgdef |m|. Set an error
2562  * on status |s| and return false if not. */
check_field_add(const upb_msgdef * m,const upb_fielddef * f,upb_status * s)2563 static bool check_field_add(const upb_msgdef *m, const upb_fielddef *f,
2564                             upb_status *s) {
2565   if (upb_fielddef_containingtype(f) != NULL) {
2566     upb_status_seterrmsg(s, "fielddef already belongs to a message");
2567     return false;
2568   } else if (upb_fielddef_name(f) == NULL || upb_fielddef_number(f) == 0) {
2569     upb_status_seterrmsg(s, "field name or number were not set");
2570     return false;
2571   } else if (upb_msgdef_itof(m, upb_fielddef_number(f))) {
2572     upb_status_seterrmsg(s, "duplicate field number");
2573     return false;
2574   } else if (upb_strtable_lookup(&m->ntof, upb_fielddef_name(f), NULL)) {
2575     upb_status_seterrmsg(s, "name conflicts with existing field or oneof");
2576     return false;
2577   }
2578   return true;
2579 }
2580 
add_field(upb_msgdef * m,upb_fielddef * f,const void * ref_donor)2581 static void add_field(upb_msgdef *m, upb_fielddef *f, const void *ref_donor) {
2582   release_containingtype(f);
2583   f->msg.def = m;
2584   f->msg_is_symbolic = false;
2585   upb_inttable_insert(&m->itof, upb_fielddef_number(f), upb_value_ptr(f));
2586   upb_strtable_insert(&m->ntof, upb_fielddef_name(f), upb_value_ptr(f));
2587   upb_ref2(f, m);
2588   upb_ref2(m, f);
2589   if (ref_donor) upb_fielddef_unref(f, ref_donor);
2590 }
2591 
upb_msgdef_addfield(upb_msgdef * m,upb_fielddef * f,const void * ref_donor,upb_status * s)2592 bool upb_msgdef_addfield(upb_msgdef *m, upb_fielddef *f, const void *ref_donor,
2593                          upb_status *s) {
2594   /* TODO: extensions need to have a separate namespace, because proto2 allows a
2595    * top-level extension (ie. one not in any package) to have the same name as a
2596    * field from the message.
2597    *
2598    * This also implies that there needs to be a separate lookup-by-name method
2599    * for extensions.  It seems desirable for iteration to return both extensions
2600    * and non-extensions though.
2601    *
2602    * We also need to validate that the field number is in an extension range iff
2603    * it is an extension.
2604    *
2605    * This method is idempotent. Check if |f| is already part of this msgdef and
2606    * return immediately if so. */
2607   if (upb_fielddef_containingtype(f) == m) {
2608     if (ref_donor) upb_fielddef_unref(f, ref_donor);
2609     return true;
2610   }
2611 
2612   /* Check constraints for all fields before performing any action. */
2613   if (!check_field_add(m, f, s)) {
2614     return false;
2615   } else if (upb_fielddef_containingoneof(f) != NULL) {
2616     /* Fields in a oneof can only be added by adding the oneof to the msgdef. */
2617     upb_status_seterrmsg(s, "fielddef is part of a oneof");
2618     return false;
2619   }
2620 
2621   /* Constraint checks ok, perform the action. */
2622   add_field(m, f, ref_donor);
2623   return true;
2624 }
2625 
upb_msgdef_addoneof(upb_msgdef * m,upb_oneofdef * o,const void * ref_donor,upb_status * s)2626 bool upb_msgdef_addoneof(upb_msgdef *m, upb_oneofdef *o, const void *ref_donor,
2627                          upb_status *s) {
2628   upb_oneof_iter it;
2629 
2630   /* Check various conditions that would prevent this oneof from being added. */
2631   if (upb_oneofdef_containingtype(o)) {
2632     upb_status_seterrmsg(s, "oneofdef already belongs to a message");
2633     return false;
2634   } else if (upb_oneofdef_name(o) == NULL) {
2635     upb_status_seterrmsg(s, "oneofdef name was not set");
2636     return false;
2637   } else if (upb_strtable_lookup(&m->ntof, upb_oneofdef_name(o), NULL)) {
2638     upb_status_seterrmsg(s, "name conflicts with existing field or oneof");
2639     return false;
2640   }
2641 
2642   /* Check that all of the oneof's fields do not conflict with names or numbers
2643    * of fields already in the message. */
2644   for (upb_oneof_begin(&it, o); !upb_oneof_done(&it); upb_oneof_next(&it)) {
2645     const upb_fielddef *f = upb_oneof_iter_field(&it);
2646     if (!check_field_add(m, f, s)) {
2647       return false;
2648     }
2649   }
2650 
2651   /* Everything checks out -- commit now. */
2652 
2653   /* Add oneof itself first. */
2654   o->parent = m;
2655   upb_strtable_insert(&m->ntof, upb_oneofdef_name(o), upb_value_ptr(o));
2656   upb_ref2(o, m);
2657   upb_ref2(m, o);
2658 
2659   /* Add each field of the oneof directly to the msgdef. */
2660   for (upb_oneof_begin(&it, o); !upb_oneof_done(&it); upb_oneof_next(&it)) {
2661     upb_fielddef *f = upb_oneof_iter_field(&it);
2662     add_field(m, f, NULL);
2663   }
2664 
2665   if (ref_donor) upb_oneofdef_unref(o, ref_donor);
2666 
2667   return true;
2668 }
2669 
upb_msgdef_itof(const upb_msgdef * m,uint32_t i)2670 const upb_fielddef *upb_msgdef_itof(const upb_msgdef *m, uint32_t i) {
2671   upb_value val;
2672   return upb_inttable_lookup32(&m->itof, i, &val) ?
2673       upb_value_getptr(val) : NULL;
2674 }
2675 
upb_msgdef_ntof(const upb_msgdef * m,const char * name,size_t len)2676 const upb_fielddef *upb_msgdef_ntof(const upb_msgdef *m, const char *name,
2677                                     size_t len) {
2678   upb_value val;
2679 
2680   if (!upb_strtable_lookup2(&m->ntof, name, len, &val)) {
2681     return NULL;
2682   }
2683 
2684   return upb_trygetfield(upb_value_getptr(val));
2685 }
2686 
upb_msgdef_ntoo(const upb_msgdef * m,const char * name,size_t len)2687 const upb_oneofdef *upb_msgdef_ntoo(const upb_msgdef *m, const char *name,
2688                                     size_t len) {
2689   upb_value val;
2690 
2691   if (!upb_strtable_lookup2(&m->ntof, name, len, &val)) {
2692     return NULL;
2693   }
2694 
2695   return upb_trygetoneof(upb_value_getptr(val));
2696 }
2697 
upb_msgdef_lookupname(const upb_msgdef * m,const char * name,size_t len,const upb_fielddef ** f,const upb_oneofdef ** o)2698 bool upb_msgdef_lookupname(const upb_msgdef *m, const char *name, size_t len,
2699                            const upb_fielddef **f, const upb_oneofdef **o) {
2700   upb_value val;
2701 
2702   if (!upb_strtable_lookup2(&m->ntof, name, len, &val)) {
2703     return false;
2704   }
2705 
2706   *o = upb_trygetoneof(upb_value_getptr(val));
2707   *f = upb_trygetfield(upb_value_getptr(val));
2708   UPB_ASSERT((*o != NULL) ^ (*f != NULL));  /* Exactly one of the two should be set. */
2709   return true;
2710 }
2711 
upb_msgdef_numfields(const upb_msgdef * m)2712 int upb_msgdef_numfields(const upb_msgdef *m) {
2713   /* The number table contains only fields. */
2714   return upb_inttable_count(&m->itof);
2715 }
2716 
upb_msgdef_numoneofs(const upb_msgdef * m)2717 int upb_msgdef_numoneofs(const upb_msgdef *m) {
2718   /* The name table includes oneofs, and the number table does not. */
2719   return upb_strtable_count(&m->ntof) - upb_inttable_count(&m->itof);
2720 }
2721 
upb_msgdef_setmapentry(upb_msgdef * m,bool map_entry)2722 void upb_msgdef_setmapentry(upb_msgdef *m, bool map_entry) {
2723   UPB_ASSERT(!upb_msgdef_isfrozen(m));
2724   m->map_entry = map_entry;
2725 }
2726 
upb_msgdef_mapentry(const upb_msgdef * m)2727 bool upb_msgdef_mapentry(const upb_msgdef *m) {
2728   return m->map_entry;
2729 }
2730 
upb_msgdef_wellknowntype(const upb_msgdef * m)2731 upb_wellknowntype_t upb_msgdef_wellknowntype(const upb_msgdef *m) {
2732   return m->well_known_type;
2733 }
2734 
upb_msgdef_isnumberwrapper(const upb_msgdef * m)2735 bool upb_msgdef_isnumberwrapper(const upb_msgdef *m) {
2736   upb_wellknowntype_t type = upb_msgdef_wellknowntype(m);
2737   return type >= UPB_WELLKNOWN_DOUBLEVALUE &&
2738          type <= UPB_WELLKNOWN_UINT32VALUE;
2739 }
2740 
upb_msg_field_begin(upb_msg_field_iter * iter,const upb_msgdef * m)2741 void upb_msg_field_begin(upb_msg_field_iter *iter, const upb_msgdef *m) {
2742   upb_inttable_begin(iter, &m->itof);
2743 }
2744 
upb_msg_field_next(upb_msg_field_iter * iter)2745 void upb_msg_field_next(upb_msg_field_iter *iter) { upb_inttable_next(iter); }
2746 
upb_msg_field_done(const upb_msg_field_iter * iter)2747 bool upb_msg_field_done(const upb_msg_field_iter *iter) {
2748   return upb_inttable_done(iter);
2749 }
2750 
upb_msg_iter_field(const upb_msg_field_iter * iter)2751 upb_fielddef *upb_msg_iter_field(const upb_msg_field_iter *iter) {
2752   return (upb_fielddef*)upb_value_getptr(upb_inttable_iter_value(iter));
2753 }
2754 
upb_msg_field_iter_setdone(upb_msg_field_iter * iter)2755 void upb_msg_field_iter_setdone(upb_msg_field_iter *iter) {
2756   upb_inttable_iter_setdone(iter);
2757 }
2758 
upb_msg_oneof_begin(upb_msg_oneof_iter * iter,const upb_msgdef * m)2759 void upb_msg_oneof_begin(upb_msg_oneof_iter *iter, const upb_msgdef *m) {
2760   upb_strtable_begin(iter, &m->ntof);
2761   /* We need to skip past any initial fields. */
2762   while (!upb_strtable_done(iter) &&
2763          !upb_isoneof(upb_value_getptr(upb_strtable_iter_value(iter)))) {
2764     upb_strtable_next(iter);
2765   }
2766 }
2767 
upb_msg_oneof_next(upb_msg_oneof_iter * iter)2768 void upb_msg_oneof_next(upb_msg_oneof_iter *iter) {
2769   /* We need to skip past fields to return only oneofs. */
2770   do {
2771     upb_strtable_next(iter);
2772   } while (!upb_strtable_done(iter) &&
2773            !upb_isoneof(upb_value_getptr(upb_strtable_iter_value(iter))));
2774 }
2775 
upb_msg_oneof_done(const upb_msg_oneof_iter * iter)2776 bool upb_msg_oneof_done(const upb_msg_oneof_iter *iter) {
2777   return upb_strtable_done(iter);
2778 }
2779 
upb_msg_iter_oneof(const upb_msg_oneof_iter * iter)2780 upb_oneofdef *upb_msg_iter_oneof(const upb_msg_oneof_iter *iter) {
2781   return (upb_oneofdef*)upb_value_getptr(upb_strtable_iter_value(iter));
2782 }
2783 
upb_msg_oneof_iter_setdone(upb_msg_oneof_iter * iter)2784 void upb_msg_oneof_iter_setdone(upb_msg_oneof_iter *iter) {
2785   upb_strtable_iter_setdone(iter);
2786 }
2787 
2788 /* upb_oneofdef ***************************************************************/
2789 
visitoneof(const upb_refcounted * r,upb_refcounted_visit * visit,void * closure)2790 static void visitoneof(const upb_refcounted *r, upb_refcounted_visit *visit,
2791                        void *closure) {
2792   const upb_oneofdef *o = (const upb_oneofdef*)r;
2793   upb_oneof_iter i;
2794   for (upb_oneof_begin(&i, o); !upb_oneof_done(&i); upb_oneof_next(&i)) {
2795     const upb_fielddef *f = upb_oneof_iter_field(&i);
2796     visit(r, upb_fielddef_upcast2(f), closure);
2797   }
2798   if (o->parent) {
2799     visit(r, upb_msgdef_upcast2(o->parent), closure);
2800   }
2801 }
2802 
freeoneof(upb_refcounted * r)2803 static void freeoneof(upb_refcounted *r) {
2804   upb_oneofdef *o = (upb_oneofdef*)r;
2805   upb_strtable_uninit(&o->ntof);
2806   upb_inttable_uninit(&o->itof);
2807   upb_gfree((void*)o->name);
2808   upb_gfree(o);
2809 }
2810 
2811 const struct upb_refcounted_vtbl upb_oneofdef_vtbl = {visitoneof, freeoneof};
2812 
upb_oneofdef_new(const void * owner)2813 upb_oneofdef *upb_oneofdef_new(const void *owner) {
2814   upb_oneofdef *o = upb_gmalloc(sizeof(*o));
2815 
2816   if (!o) {
2817     return NULL;
2818   }
2819 
2820   o->parent = NULL;
2821   o->name = NULL;
2822 
2823   if (!upb_refcounted_init(upb_oneofdef_upcast_mutable(o), &upb_oneofdef_vtbl,
2824                            owner)) {
2825     goto err2;
2826   }
2827 
2828   if (!upb_inttable_init(&o->itof, UPB_CTYPE_PTR)) goto err2;
2829   if (!upb_strtable_init(&o->ntof, UPB_CTYPE_PTR)) goto err1;
2830 
2831   return o;
2832 
2833 err1:
2834   upb_inttable_uninit(&o->itof);
2835 err2:
2836   upb_gfree(o);
2837   return NULL;
2838 }
2839 
upb_oneofdef_name(const upb_oneofdef * o)2840 const char *upb_oneofdef_name(const upb_oneofdef *o) { return o->name; }
2841 
upb_oneofdef_setname(upb_oneofdef * o,const char * name,upb_status * s)2842 bool upb_oneofdef_setname(upb_oneofdef *o, const char *name, upb_status *s) {
2843   UPB_ASSERT(!upb_oneofdef_isfrozen(o));
2844   if (upb_oneofdef_containingtype(o)) {
2845     upb_status_seterrmsg(s, "oneof already added to a message");
2846     return false;
2847   }
2848 
2849   if (!upb_isident(name, strlen(name), true, s)) {
2850     return false;
2851   }
2852 
2853   name = upb_gstrdup(name);
2854   if (!name) {
2855     upb_status_seterrmsg(s, "One of memory");
2856     return false;
2857   }
2858 
2859   upb_gfree((void*)o->name);
2860   o->name = name;
2861   return true;
2862 }
2863 
upb_oneofdef_containingtype(const upb_oneofdef * o)2864 const upb_msgdef *upb_oneofdef_containingtype(const upb_oneofdef *o) {
2865   return o->parent;
2866 }
2867 
upb_oneofdef_numfields(const upb_oneofdef * o)2868 int upb_oneofdef_numfields(const upb_oneofdef *o) {
2869   return upb_strtable_count(&o->ntof);
2870 }
2871 
upb_oneofdef_index(const upb_oneofdef * o)2872 uint32_t upb_oneofdef_index(const upb_oneofdef *o) {
2873   return o->index;
2874 }
2875 
upb_oneofdef_addfield(upb_oneofdef * o,upb_fielddef * f,const void * ref_donor,upb_status * s)2876 bool upb_oneofdef_addfield(upb_oneofdef *o, upb_fielddef *f,
2877                            const void *ref_donor,
2878                            upb_status *s) {
2879   UPB_ASSERT(!upb_oneofdef_isfrozen(o));
2880   UPB_ASSERT(!o->parent || !upb_msgdef_isfrozen(o->parent));
2881 
2882   /* This method is idempotent. Check if |f| is already part of this oneofdef
2883    * and return immediately if so. */
2884   if (upb_fielddef_containingoneof(f) == o) {
2885     return true;
2886   }
2887 
2888   /* The field must have an OPTIONAL label. */
2889   if (upb_fielddef_label(f) != UPB_LABEL_OPTIONAL) {
2890     upb_status_seterrmsg(s, "fields in oneof must have OPTIONAL label");
2891     return false;
2892   }
2893 
2894   /* Check that no field with this name or number exists already in the oneof.
2895    * Also check that the field is not already part of a oneof. */
2896   if (upb_fielddef_name(f) == NULL || upb_fielddef_number(f) == 0) {
2897     upb_status_seterrmsg(s, "field name or number were not set");
2898     return false;
2899   } else if (upb_oneofdef_itof(o, upb_fielddef_number(f)) ||
2900              upb_oneofdef_ntofz(o, upb_fielddef_name(f))) {
2901     upb_status_seterrmsg(s, "duplicate field name or number");
2902     return false;
2903   } else if (upb_fielddef_containingoneof(f) != NULL) {
2904     upb_status_seterrmsg(s, "fielddef already belongs to a oneof");
2905     return false;
2906   }
2907 
2908   /* We allow adding a field to the oneof either if the field is not part of a
2909    * msgdef, or if it is and we are also part of the same msgdef. */
2910   if (o->parent == NULL) {
2911     /* If we're not in a msgdef, the field cannot be either. Otherwise we would
2912      * need to magically add this oneof to a msgdef to remain consistent, which
2913      * is surprising behavior. */
2914     if (upb_fielddef_containingtype(f) != NULL) {
2915       upb_status_seterrmsg(s, "fielddef already belongs to a message, but "
2916                               "oneof does not");
2917       return false;
2918     }
2919   } else {
2920     /* If we're in a msgdef, the user can add fields that either aren't in any
2921      * msgdef (in which case they're added to our msgdef) or already a part of
2922      * our msgdef. */
2923     if (upb_fielddef_containingtype(f) != NULL &&
2924         upb_fielddef_containingtype(f) != o->parent) {
2925       upb_status_seterrmsg(s, "fielddef belongs to a different message "
2926                               "than oneof");
2927       return false;
2928     }
2929   }
2930 
2931   /* Commit phase. First add the field to our parent msgdef, if any, because
2932    * that may fail; then add the field to our own tables. */
2933 
2934   if (o->parent != NULL && upb_fielddef_containingtype(f) == NULL) {
2935     if (!upb_msgdef_addfield((upb_msgdef*)o->parent, f, NULL, s)) {
2936       return false;
2937     }
2938   }
2939 
2940   release_containingtype(f);
2941   f->oneof = o;
2942   upb_inttable_insert(&o->itof, upb_fielddef_number(f), upb_value_ptr(f));
2943   upb_strtable_insert(&o->ntof, upb_fielddef_name(f), upb_value_ptr(f));
2944   upb_ref2(f, o);
2945   upb_ref2(o, f);
2946   if (ref_donor) upb_fielddef_unref(f, ref_donor);
2947 
2948   return true;
2949 }
2950 
upb_oneofdef_ntof(const upb_oneofdef * o,const char * name,size_t length)2951 const upb_fielddef *upb_oneofdef_ntof(const upb_oneofdef *o,
2952                                       const char *name, size_t length) {
2953   upb_value val;
2954   return upb_strtable_lookup2(&o->ntof, name, length, &val) ?
2955       upb_value_getptr(val) : NULL;
2956 }
2957 
upb_oneofdef_itof(const upb_oneofdef * o,uint32_t num)2958 const upb_fielddef *upb_oneofdef_itof(const upb_oneofdef *o, uint32_t num) {
2959   upb_value val;
2960   return upb_inttable_lookup32(&o->itof, num, &val) ?
2961       upb_value_getptr(val) : NULL;
2962 }
2963 
upb_oneof_begin(upb_oneof_iter * iter,const upb_oneofdef * o)2964 void upb_oneof_begin(upb_oneof_iter *iter, const upb_oneofdef *o) {
2965   upb_inttable_begin(iter, &o->itof);
2966 }
2967 
upb_oneof_next(upb_oneof_iter * iter)2968 void upb_oneof_next(upb_oneof_iter *iter) {
2969   upb_inttable_next(iter);
2970 }
2971 
upb_oneof_done(upb_oneof_iter * iter)2972 bool upb_oneof_done(upb_oneof_iter *iter) {
2973   return upb_inttable_done(iter);
2974 }
2975 
upb_oneof_iter_field(const upb_oneof_iter * iter)2976 upb_fielddef *upb_oneof_iter_field(const upb_oneof_iter *iter) {
2977   return (upb_fielddef*)upb_value_getptr(upb_inttable_iter_value(iter));
2978 }
2979 
upb_oneof_iter_setdone(upb_oneof_iter * iter)2980 void upb_oneof_iter_setdone(upb_oneof_iter *iter) {
2981   upb_inttable_iter_setdone(iter);
2982 }
2983 
2984 /* upb_filedef ****************************************************************/
2985 
visitfiledef(const upb_refcounted * r,upb_refcounted_visit * visit,void * closure)2986 static void visitfiledef(const upb_refcounted *r, upb_refcounted_visit *visit,
2987                          void *closure) {
2988   const upb_filedef *f = (const upb_filedef*)r;
2989   size_t i;
2990 
2991   for(i = 0; i < upb_filedef_defcount(f); i++) {
2992     visit(r, upb_def_upcast(upb_filedef_def(f, i)), closure);
2993   }
2994 }
2995 
freefiledef(upb_refcounted * r)2996 static void freefiledef(upb_refcounted *r) {
2997   upb_filedef *f = (upb_filedef*)r;
2998   size_t i;
2999 
3000   for(i = 0; i < upb_filedef_depcount(f); i++) {
3001     upb_filedef_unref(upb_filedef_dep(f, i), f);
3002   }
3003 
3004   upb_inttable_uninit(&f->defs);
3005   upb_inttable_uninit(&f->deps);
3006   upb_gfree((void*)f->name);
3007   upb_gfree((void*)f->package);
3008   upb_gfree((void*)f->phpprefix);
3009   upb_gfree((void*)f->phpnamespace);
3010   upb_gfree(f);
3011 }
3012 
3013 const struct upb_refcounted_vtbl upb_filedef_vtbl = {visitfiledef, freefiledef};
3014 
upb_filedef_new(const void * owner)3015 upb_filedef *upb_filedef_new(const void *owner) {
3016   upb_filedef *f = upb_gmalloc(sizeof(*f));
3017 
3018   if (!f) {
3019     return NULL;
3020   }
3021 
3022   f->package = NULL;
3023   f->name = NULL;
3024   f->phpprefix = NULL;
3025   f->phpnamespace = NULL;
3026   f->syntax = UPB_SYNTAX_PROTO2;
3027 
3028   if (!upb_refcounted_init(upb_filedef_upcast_mutable(f), &upb_filedef_vtbl,
3029                            owner)) {
3030     goto err;
3031   }
3032 
3033   if (!upb_inttable_init(&f->defs, UPB_CTYPE_CONSTPTR)) {
3034     goto err;
3035   }
3036 
3037   if (!upb_inttable_init(&f->deps, UPB_CTYPE_CONSTPTR)) {
3038     goto err2;
3039   }
3040 
3041   return f;
3042 
3043 
3044 err2:
3045   upb_inttable_uninit(&f->defs);
3046 
3047 err:
3048   upb_gfree(f);
3049   return NULL;
3050 }
3051 
upb_filedef_name(const upb_filedef * f)3052 const char *upb_filedef_name(const upb_filedef *f) {
3053   return f->name;
3054 }
3055 
upb_filedef_package(const upb_filedef * f)3056 const char *upb_filedef_package(const upb_filedef *f) {
3057   return f->package;
3058 }
3059 
upb_filedef_phpprefix(const upb_filedef * f)3060 const char *upb_filedef_phpprefix(const upb_filedef *f) {
3061   return f->phpprefix;
3062 }
3063 
upb_filedef_phpnamespace(const upb_filedef * f)3064 const char *upb_filedef_phpnamespace(const upb_filedef *f) {
3065   return f->phpnamespace;
3066 }
3067 
upb_filedef_syntax(const upb_filedef * f)3068 upb_syntax_t upb_filedef_syntax(const upb_filedef *f) {
3069   return f->syntax;
3070 }
3071 
upb_filedef_defcount(const upb_filedef * f)3072 size_t upb_filedef_defcount(const upb_filedef *f) {
3073   return upb_inttable_count(&f->defs);
3074 }
3075 
upb_filedef_depcount(const upb_filedef * f)3076 size_t upb_filedef_depcount(const upb_filedef *f) {
3077   return upb_inttable_count(&f->deps);
3078 }
3079 
upb_filedef_def(const upb_filedef * f,size_t i)3080 const upb_def *upb_filedef_def(const upb_filedef *f, size_t i) {
3081   upb_value v;
3082 
3083   if (upb_inttable_lookup32(&f->defs, i, &v)) {
3084     return upb_value_getconstptr(v);
3085   } else {
3086     return NULL;
3087   }
3088 }
3089 
upb_filedef_dep(const upb_filedef * f,size_t i)3090 const upb_filedef *upb_filedef_dep(const upb_filedef *f, size_t i) {
3091   upb_value v;
3092 
3093   if (upb_inttable_lookup32(&f->deps, i, &v)) {
3094     return upb_value_getconstptr(v);
3095   } else {
3096     return NULL;
3097   }
3098 }
3099 
upb_filedef_setname(upb_filedef * f,const char * name,upb_status * s)3100 bool upb_filedef_setname(upb_filedef *f, const char *name, upb_status *s) {
3101   name = upb_gstrdup(name);
3102   if (!name) {
3103     upb_upberr_setoom(s);
3104     return false;
3105   }
3106   upb_gfree((void*)f->name);
3107   f->name = name;
3108   return true;
3109 }
3110 
upb_filedef_setpackage(upb_filedef * f,const char * package,upb_status * s)3111 bool upb_filedef_setpackage(upb_filedef *f, const char *package,
3112                             upb_status *s) {
3113   if (!upb_isident(package, strlen(package), true, s)) return false;
3114   package = upb_gstrdup(package);
3115   if (!package) {
3116     upb_upberr_setoom(s);
3117     return false;
3118   }
3119   upb_gfree((void*)f->package);
3120   f->package = package;
3121   return true;
3122 }
3123 
upb_filedef_setphpprefix(upb_filedef * f,const char * phpprefix,upb_status * s)3124 bool upb_filedef_setphpprefix(upb_filedef *f, const char *phpprefix,
3125                               upb_status *s) {
3126   phpprefix = upb_gstrdup(phpprefix);
3127   if (!phpprefix) {
3128     upb_upberr_setoom(s);
3129     return false;
3130   }
3131   upb_gfree((void*)f->phpprefix);
3132   f->phpprefix = phpprefix;
3133   return true;
3134 }
3135 
upb_filedef_setphpnamespace(upb_filedef * f,const char * phpnamespace,upb_status * s)3136 bool upb_filedef_setphpnamespace(upb_filedef *f, const char *phpnamespace,
3137                                  upb_status *s) {
3138   phpnamespace = upb_gstrdup(phpnamespace);
3139   if (!phpnamespace) {
3140     upb_upberr_setoom(s);
3141     return false;
3142   }
3143   upb_gfree((void*)f->phpnamespace);
3144   f->phpnamespace = phpnamespace;
3145   return true;
3146 }
3147 
upb_filedef_setsyntax(upb_filedef * f,upb_syntax_t syntax,upb_status * s)3148 bool upb_filedef_setsyntax(upb_filedef *f, upb_syntax_t syntax,
3149                            upb_status *s) {
3150   UPB_UNUSED(s);
3151   if (syntax != UPB_SYNTAX_PROTO2 &&
3152       syntax != UPB_SYNTAX_PROTO3) {
3153     upb_status_seterrmsg(s, "Unknown syntax value.");
3154     return false;
3155   }
3156   f->syntax = syntax;
3157 
3158   {
3159     /* Set all messages in this file to match. */
3160     size_t i;
3161     for (i = 0; i < upb_filedef_defcount(f); i++) {
3162       /* Casting const away is safe since all defs in mutable filedef must
3163        * also be mutable. */
3164       upb_def *def = (upb_def*)upb_filedef_def(f, i);
3165 
3166       upb_msgdef *m = upb_dyncast_msgdef_mutable(def);
3167       if (m) {
3168         m->syntax = syntax;
3169       }
3170     }
3171   }
3172 
3173   return true;
3174 }
3175 
upb_filedef_adddef(upb_filedef * f,upb_def * def,const void * ref_donor,upb_status * s)3176 bool upb_filedef_adddef(upb_filedef *f, upb_def *def, const void *ref_donor,
3177                         upb_status *s) {
3178   if (def->file) {
3179     upb_status_seterrmsg(s, "Def is already part of another filedef.");
3180     return false;
3181   }
3182 
3183   if (upb_inttable_push(&f->defs, upb_value_constptr(def))) {
3184     def->file = f;
3185     upb_ref2(def, f);
3186     upb_ref2(f, def);
3187     if (ref_donor) upb_def_unref(def, ref_donor);
3188     if (def->type == UPB_DEF_MSG) {
3189       upb_downcast_msgdef_mutable(def)->syntax = f->syntax;
3190     }
3191     return true;
3192   } else {
3193     upb_upberr_setoom(s);
3194     return false;
3195   }
3196 }
3197 
upb_filedef_adddep(upb_filedef * f,const upb_filedef * dep)3198 bool upb_filedef_adddep(upb_filedef *f, const upb_filedef *dep) {
3199   if (upb_inttable_push(&f->deps, upb_value_constptr(dep))) {
3200     /* Regular ref instead of ref2 because files can't form cycles. */
3201     upb_filedef_ref(dep, f);
3202     return true;
3203   } else {
3204     return false;
3205   }
3206 }
3207 
upb_symtab_free(upb_symtab * s)3208 void upb_symtab_free(upb_symtab *s) {
3209   upb_strtable_iter i;
3210   upb_strtable_begin(&i, &s->symtab);
3211   for (; !upb_strtable_done(&i); upb_strtable_next(&i)) {
3212     const upb_def *def = upb_value_getptr(upb_strtable_iter_value(&i));
3213     upb_def_unref(def, s);
3214   }
3215   upb_strtable_uninit(&s->symtab);
3216   upb_gfree(s);
3217 }
3218 
upb_symtab_new()3219 upb_symtab *upb_symtab_new() {
3220   upb_symtab *s = upb_gmalloc(sizeof(*s));
3221   if (!s) {
3222     return NULL;
3223   }
3224 
3225   upb_strtable_init(&s->symtab, UPB_CTYPE_PTR);
3226   return s;
3227 }
3228 
upb_symtab_lookup(const upb_symtab * s,const char * sym)3229 const upb_def *upb_symtab_lookup(const upb_symtab *s, const char *sym) {
3230   upb_value v;
3231   upb_def *ret = upb_strtable_lookup(&s->symtab, sym, &v) ?
3232       upb_value_getptr(v) : NULL;
3233   return ret;
3234 }
3235 
upb_symtab_lookupmsg(const upb_symtab * s,const char * sym)3236 const upb_msgdef *upb_symtab_lookupmsg(const upb_symtab *s, const char *sym) {
3237   upb_value v;
3238   upb_def *def = upb_strtable_lookup(&s->symtab, sym, &v) ?
3239       upb_value_getptr(v) : NULL;
3240   return def ? upb_dyncast_msgdef(def) : NULL;
3241 }
3242 
upb_symtab_lookupmsg2(const upb_symtab * s,const char * sym,size_t len)3243 const upb_msgdef *upb_symtab_lookupmsg2(const upb_symtab *s, const char *sym,
3244                                         size_t len) {
3245   upb_value v;
3246   upb_def *def = upb_strtable_lookup2(&s->symtab, sym, len, &v) ?
3247       upb_value_getptr(v) : NULL;
3248   return def ? upb_dyncast_msgdef(def) : NULL;
3249 }
3250 
upb_symtab_lookupenum(const upb_symtab * s,const char * sym)3251 const upb_enumdef *upb_symtab_lookupenum(const upb_symtab *s, const char *sym) {
3252   upb_value v;
3253   upb_def *def = upb_strtable_lookup(&s->symtab, sym, &v) ?
3254       upb_value_getptr(v) : NULL;
3255   return def ? upb_dyncast_enumdef(def) : NULL;
3256 }
3257 
3258 /* Given a symbol and the base symbol inside which it is defined, find the
3259  * symbol's definition in t. */
upb_resolvename(const upb_strtable * t,const char * base,const char * sym)3260 static upb_def *upb_resolvename(const upb_strtable *t,
3261                                 const char *base, const char *sym) {
3262   if(strlen(sym) == 0) return NULL;
3263   if(sym[0] == '.') {
3264     /* Symbols starting with '.' are absolute, so we do a single lookup.
3265      * Slice to omit the leading '.' */
3266     upb_value v;
3267     return upb_strtable_lookup(t, sym + 1, &v) ? upb_value_getptr(v) : NULL;
3268   } else {
3269     /* Remove components from base until we find an entry or run out.
3270      * TODO: This branch is totally broken, but currently not used. */
3271     (void)base;
3272     UPB_ASSERT(false);
3273     return NULL;
3274   }
3275 }
3276 
upb_symtab_resolve(const upb_symtab * s,const char * base,const char * sym)3277 const upb_def *upb_symtab_resolve(const upb_symtab *s, const char *base,
3278                                   const char *sym) {
3279   upb_def *ret = upb_resolvename(&s->symtab, base, sym);
3280   return ret;
3281 }
3282 
3283 /* TODO(haberman): we need a lot more testing of error conditions. */
symtab_add(upb_symtab * s,upb_def * const * defs,size_t n,void * ref_donor,upb_refcounted * freeze_also,upb_status * status)3284 static bool symtab_add(upb_symtab *s, upb_def *const*defs, size_t n,
3285                        void *ref_donor, upb_refcounted *freeze_also,
3286                        upb_status *status) {
3287   size_t i;
3288   size_t add_n;
3289   size_t freeze_n;
3290   upb_strtable_iter iter;
3291   upb_refcounted **add_objs = NULL;
3292   upb_def **add_defs = NULL;
3293   size_t add_objs_size;
3294   upb_strtable addtab;
3295 
3296   if (n == 0 && !freeze_also) {
3297     return true;
3298   }
3299 
3300   if (!upb_strtable_init(&addtab, UPB_CTYPE_PTR)) {
3301     upb_status_seterrmsg(status, "out of memory");
3302     return false;
3303   }
3304 
3305   /* Add new defs to our "add" set. */
3306   for (i = 0; i < n; i++) {
3307     upb_def *def = defs[i];
3308     const char *fullname;
3309     upb_fielddef *f;
3310 
3311     if (upb_def_isfrozen(def)) {
3312       upb_status_seterrmsg(status, "added defs must be mutable");
3313       goto err;
3314     }
3315     UPB_ASSERT(!upb_def_isfrozen(def));
3316     fullname = upb_def_fullname(def);
3317     if (!fullname) {
3318       upb_status_seterrmsg(
3319           status, "Anonymous defs cannot be added to a symtab");
3320       goto err;
3321     }
3322 
3323     f = upb_dyncast_fielddef_mutable(def);
3324 
3325     if (f) {
3326       if (!upb_fielddef_containingtypename(f)) {
3327         upb_status_seterrmsg(status,
3328                              "Standalone fielddefs must have a containing type "
3329                              "(extendee) name set");
3330         goto err;
3331       }
3332     } else {
3333       if (upb_strtable_lookup(&addtab, fullname, NULL)) {
3334         upb_status_seterrf(status, "Conflicting defs named '%s'", fullname);
3335         goto err;
3336       }
3337       if (upb_strtable_lookup(&s->symtab, fullname, NULL)) {
3338         upb_status_seterrf(status, "Symtab already has a def named '%s'",
3339                            fullname);
3340         goto err;
3341       }
3342       if (!upb_strtable_insert(&addtab, fullname, upb_value_ptr(def)))
3343         goto oom_err;
3344       upb_def_donateref(def, ref_donor, s);
3345     }
3346 
3347     if (upb_dyncast_fielddef_mutable(def)) {
3348       /* TODO(haberman): allow adding extensions attached to files. */
3349       upb_status_seterrf(status, "Can't add extensions to symtab.\n");
3350       goto err;
3351     }
3352   }
3353 
3354   /* Now using the table, resolve symbolic references for subdefs. */
3355   upb_strtable_begin(&iter, &addtab);
3356   for (; !upb_strtable_done(&iter); upb_strtable_next(&iter)) {
3357     const char *base;
3358     upb_def *def = upb_value_getptr(upb_strtable_iter_value(&iter));
3359     upb_msgdef *m = upb_dyncast_msgdef_mutable(def);
3360     upb_msg_field_iter j;
3361 
3362     if (!m) continue;
3363     /* Type names are resolved relative to the message in which they appear. */
3364     base = upb_msgdef_fullname(m);
3365 
3366     for(upb_msg_field_begin(&j, m);
3367         !upb_msg_field_done(&j);
3368         upb_msg_field_next(&j)) {
3369       upb_fielddef *f = upb_msg_iter_field(&j);
3370       const char *name = upb_fielddef_subdefname(f);
3371       if (name && !upb_fielddef_subdef(f)) {
3372         /* Try the lookup in the current set of to-be-added defs first. If not
3373          * there, try existing defs. */
3374         upb_def *subdef = upb_resolvename(&addtab, base, name);
3375         if (subdef == NULL) {
3376           subdef = upb_resolvename(&s->symtab, base, name);
3377         }
3378         if (subdef == NULL) {
3379           upb_status_seterrf(
3380               status, "couldn't resolve name '%s' in message '%s'", name, base);
3381           goto err;
3382         } else if (!upb_fielddef_setsubdef(f, subdef, status)) {
3383           goto err;
3384         }
3385       }
3386     }
3387   }
3388 
3389   /* We need an array of the defs in addtab, for passing to
3390    * upb_refcounted_freeze(). */
3391   add_objs_size = upb_strtable_count(&addtab);
3392   if (freeze_also) {
3393     add_objs_size++;
3394   }
3395 
3396   add_defs = upb_gmalloc(sizeof(void*) * add_objs_size);
3397   if (add_defs == NULL) goto oom_err;
3398   upb_strtable_begin(&iter, &addtab);
3399   for (add_n = 0; !upb_strtable_done(&iter); upb_strtable_next(&iter)) {
3400     add_defs[add_n++] = upb_value_getptr(upb_strtable_iter_value(&iter));
3401   }
3402 
3403   /* Validate defs. */
3404   if (!_upb_def_validate(add_defs, add_n, status)) {
3405     goto err;
3406   }
3407 
3408   /* Cheat a little and give the array a new type.
3409    * This is probably undefined behavior, but this code will be deleted soon. */
3410   add_objs = (upb_refcounted**)add_defs;
3411 
3412   freeze_n = add_n;
3413   if (freeze_also) {
3414     add_objs[freeze_n++] = freeze_also;
3415   }
3416 
3417   if (!upb_refcounted_freeze(add_objs, freeze_n, status,
3418                              UPB_MAX_MESSAGE_DEPTH * 2)) {
3419     goto err;
3420   }
3421 
3422   /* This must be delayed until all errors have been detected, since error
3423    * recovery code uses this table to cleanup defs. */
3424   upb_strtable_uninit(&addtab);
3425 
3426   /* TODO(haberman) we don't properly handle errors after this point (like
3427    * OOM in upb_strtable_insert() below). */
3428   for (i = 0; i < add_n; i++) {
3429     upb_def *def = (upb_def*)add_objs[i];
3430     const char *name = upb_def_fullname(def);
3431     bool success;
3432     success = upb_strtable_insert(&s->symtab, name, upb_value_ptr(def));
3433     UPB_ASSERT(success);
3434   }
3435   upb_gfree(add_defs);
3436   return true;
3437 
3438 oom_err:
3439   upb_status_seterrmsg(status, "out of memory");
3440 err: {
3441     /* We need to donate the refs back. */
3442     upb_strtable_begin(&iter, &addtab);
3443     for (; !upb_strtable_done(&iter); upb_strtable_next(&iter)) {
3444       upb_def *def = upb_value_getptr(upb_strtable_iter_value(&iter));
3445       upb_def_donateref(def, s, ref_donor);
3446     }
3447   }
3448   upb_strtable_uninit(&addtab);
3449   upb_gfree(add_defs);
3450   UPB_ASSERT(!upb_ok(status));
3451   return false;
3452 }
3453 
upb_symtab_add(upb_symtab * s,upb_def * const * defs,size_t n,void * ref_donor,upb_status * status)3454 bool upb_symtab_add(upb_symtab *s, upb_def *const*defs, size_t n,
3455                     void *ref_donor, upb_status *status) {
3456   return symtab_add(s, defs, n, ref_donor, NULL, status);
3457 }
3458 
upb_symtab_addfile(upb_symtab * s,upb_filedef * file,upb_status * status)3459 bool upb_symtab_addfile(upb_symtab *s, upb_filedef *file, upb_status *status) {
3460   size_t n;
3461   size_t i;
3462   upb_def **defs;
3463   bool ret;
3464 
3465   n = upb_filedef_defcount(file);
3466   if (n == 0) {
3467     return true;
3468   }
3469   defs = upb_gmalloc(sizeof(*defs) * n);
3470 
3471   if (defs == NULL) {
3472     upb_status_seterrmsg(status, "Out of memory");
3473     return false;
3474   }
3475 
3476   for (i = 0; i < n; i++) {
3477     defs[i] = upb_filedef_mutabledef(file, i);
3478   }
3479 
3480   ret = symtab_add(s, defs, n, NULL, upb_filedef_upcast_mutable(file), status);
3481 
3482   upb_gfree(defs);
3483   return ret;
3484 }
3485 
3486 /* Iteration. */
3487 
advance_to_matching(upb_symtab_iter * iter)3488 static void advance_to_matching(upb_symtab_iter *iter) {
3489   if (iter->type == UPB_DEF_ANY)
3490     return;
3491 
3492   while (!upb_strtable_done(&iter->iter) &&
3493          iter->type != upb_symtab_iter_def(iter)->type) {
3494     upb_strtable_next(&iter->iter);
3495   }
3496 }
3497 
upb_symtab_begin(upb_symtab_iter * iter,const upb_symtab * s,upb_deftype_t type)3498 void upb_symtab_begin(upb_symtab_iter *iter, const upb_symtab *s,
3499                       upb_deftype_t type) {
3500   upb_strtable_begin(&iter->iter, &s->symtab);
3501   iter->type = type;
3502   advance_to_matching(iter);
3503 }
3504 
upb_symtab_next(upb_symtab_iter * iter)3505 void upb_symtab_next(upb_symtab_iter *iter) {
3506   upb_strtable_next(&iter->iter);
3507   advance_to_matching(iter);
3508 }
3509 
upb_symtab_done(const upb_symtab_iter * iter)3510 bool upb_symtab_done(const upb_symtab_iter *iter) {
3511   return upb_strtable_done(&iter->iter);
3512 }
3513 
upb_symtab_iter_def(const upb_symtab_iter * iter)3514 const upb_def *upb_symtab_iter_def(const upb_symtab_iter *iter) {
3515   return upb_value_getptr(upb_strtable_iter_value(&iter->iter));
3516 }
3517 /* We encode backwards, to avoid pre-computing lengths (one-pass encode). */
3518 
3519 
3520 #define UPB_PB_VARINT_MAX_LEN 10
3521 #define CHK(x) do { if (!(x)) { return false; } } while(0)
3522 
3523 /* Maps descriptor type -> upb field type.  */
3524 static const uint8_t upb_desctype_to_fieldtype2[] = {
3525   UPB_WIRE_TYPE_END_GROUP,  /* ENDGROUP */
3526   UPB_TYPE_DOUBLE,          /* DOUBLE */
3527   UPB_TYPE_FLOAT,           /* FLOAT */
3528   UPB_TYPE_INT64,           /* INT64 */
3529   UPB_TYPE_UINT64,          /* UINT64 */
3530   UPB_TYPE_INT32,           /* INT32 */
3531   UPB_TYPE_UINT64,          /* FIXED64 */
3532   UPB_TYPE_UINT32,          /* FIXED32 */
3533   UPB_TYPE_BOOL,            /* BOOL */
3534   UPB_TYPE_STRING,          /* STRING */
3535   UPB_TYPE_MESSAGE,         /* GROUP */
3536   UPB_TYPE_MESSAGE,         /* MESSAGE */
3537   UPB_TYPE_BYTES,           /* BYTES */
3538   UPB_TYPE_UINT32,          /* UINT32 */
3539   UPB_TYPE_ENUM,            /* ENUM */
3540   UPB_TYPE_INT32,           /* SFIXED32 */
3541   UPB_TYPE_INT64,           /* SFIXED64 */
3542   UPB_TYPE_INT32,           /* SINT32 */
3543   UPB_TYPE_INT64,           /* SINT64 */
3544 };
3545 
upb_encode_varint(uint64_t val,char * buf)3546 static size_t upb_encode_varint(uint64_t val, char *buf) {
3547   size_t i;
3548   if (val < 128) { buf[0] = val; return 1; }
3549   i = 0;
3550   while (val) {
3551     uint8_t byte = val & 0x7fU;
3552     val >>= 7;
3553     if (val) byte |= 0x80U;
3554     buf[i++] = byte;
3555   }
3556   return i;
3557 }
3558 
upb_zzencode_32(int32_t n)3559 static uint32_t upb_zzencode_32(int32_t n) { return (n << 1) ^ (n >> 31); }
upb_zzencode_64(int64_t n)3560 static uint64_t upb_zzencode_64(int64_t n) { return (n << 1) ^ (n >> 63); }
3561 
3562 typedef struct {
3563   upb_alloc *alloc;
3564   char *buf, *ptr, *limit;
3565 } upb_encstate;
3566 
upb_roundup_pow2(size_t bytes)3567 static size_t upb_roundup_pow2(size_t bytes) {
3568   size_t ret = 128;
3569   while (ret < bytes) {
3570     ret *= 2;
3571   }
3572   return ret;
3573 }
3574 
upb_encode_growbuffer(upb_encstate * e,size_t bytes)3575 static bool upb_encode_growbuffer(upb_encstate *e, size_t bytes) {
3576   size_t old_size = e->limit - e->buf;
3577   size_t new_size = upb_roundup_pow2(bytes + (e->limit - e->ptr));
3578   char *new_buf = upb_realloc(e->alloc, e->buf, old_size, new_size);
3579   CHK(new_buf);
3580 
3581   /* We want previous data at the end, realloc() put it at the beginning. */
3582   memmove(new_buf + new_size - old_size, e->buf, old_size);
3583 
3584   e->ptr = new_buf + new_size - (e->limit - e->ptr);
3585   e->limit = new_buf + new_size;
3586   e->buf = new_buf;
3587   return true;
3588 }
3589 
3590 /* Call to ensure that at least "bytes" bytes are available for writing at
3591  * e->ptr.  Returns false if the bytes could not be allocated. */
upb_encode_reserve(upb_encstate * e,size_t bytes)3592 static bool upb_encode_reserve(upb_encstate *e, size_t bytes) {
3593   CHK(UPB_LIKELY((size_t)(e->ptr - e->buf) >= bytes) ||
3594       upb_encode_growbuffer(e, bytes));
3595 
3596   e->ptr -= bytes;
3597   return true;
3598 }
3599 
3600 /* Writes the given bytes to the buffer, handling reserve/advance. */
upb_put_bytes(upb_encstate * e,const void * data,size_t len)3601 static bool upb_put_bytes(upb_encstate *e, const void *data, size_t len) {
3602   CHK(upb_encode_reserve(e, len));
3603   memcpy(e->ptr, data, len);
3604   return true;
3605 }
3606 
upb_put_fixed64(upb_encstate * e,uint64_t val)3607 static bool upb_put_fixed64(upb_encstate *e, uint64_t val) {
3608   /* TODO(haberman): byte-swap for big endian. */
3609   return upb_put_bytes(e, &val, sizeof(uint64_t));
3610 }
3611 
upb_put_fixed32(upb_encstate * e,uint32_t val)3612 static bool upb_put_fixed32(upb_encstate *e, uint32_t val) {
3613   /* TODO(haberman): byte-swap for big endian. */
3614   return upb_put_bytes(e, &val, sizeof(uint32_t));
3615 }
3616 
upb_put_varint(upb_encstate * e,uint64_t val)3617 static bool upb_put_varint(upb_encstate *e, uint64_t val) {
3618   size_t len;
3619   char *start;
3620   CHK(upb_encode_reserve(e, UPB_PB_VARINT_MAX_LEN));
3621   len = upb_encode_varint(val, e->ptr);
3622   start = e->ptr + UPB_PB_VARINT_MAX_LEN - len;
3623   memmove(start, e->ptr, len);
3624   e->ptr = start;
3625   return true;
3626 }
3627 
upb_put_double(upb_encstate * e,double d)3628 static bool upb_put_double(upb_encstate *e, double d) {
3629   uint64_t u64;
3630   UPB_ASSERT(sizeof(double) == sizeof(uint64_t));
3631   memcpy(&u64, &d, sizeof(uint64_t));
3632   return upb_put_fixed64(e, u64);
3633 }
3634 
upb_put_float(upb_encstate * e,float d)3635 static bool upb_put_float(upb_encstate *e, float d) {
3636   uint32_t u32;
3637   UPB_ASSERT(sizeof(float) == sizeof(uint32_t));
3638   memcpy(&u32, &d, sizeof(uint32_t));
3639   return upb_put_fixed32(e, u32);
3640 }
3641 
upb_readcase(const char * msg,const upb_msglayout_field * f)3642 static uint32_t upb_readcase(const char *msg, const upb_msglayout_field *f) {
3643   uint32_t ret;
3644   uint32_t offset = ~f->presence;
3645   memcpy(&ret, msg + offset, sizeof(ret));
3646   return ret;
3647 }
3648 
upb_readhasbit(const char * msg,const upb_msglayout_field * f)3649 static bool upb_readhasbit(const char *msg, const upb_msglayout_field *f) {
3650   uint32_t hasbit = f->presence;
3651   UPB_ASSERT(f->presence > 0);
3652   return msg[hasbit / 8] & (1 << (hasbit % 8));
3653 }
3654 
upb_put_tag(upb_encstate * e,int field_number,int wire_type)3655 static bool upb_put_tag(upb_encstate *e, int field_number, int wire_type) {
3656   return upb_put_varint(e, (field_number << 3) | wire_type);
3657 }
3658 
upb_put_fixedarray(upb_encstate * e,const upb_array * arr,size_t size)3659 static bool upb_put_fixedarray(upb_encstate *e, const upb_array *arr,
3660                                size_t size) {
3661   size_t bytes = arr->len * size;
3662   return upb_put_bytes(e, arr->data, bytes) && upb_put_varint(e, bytes);
3663 }
3664 
3665 bool upb_encode_message(upb_encstate *e, const char *msg,
3666                         const upb_msglayout *m, size_t *size);
3667 
upb_encode_array(upb_encstate * e,const char * field_mem,const upb_msglayout * m,const upb_msglayout_field * f)3668 static bool upb_encode_array(upb_encstate *e, const char *field_mem,
3669                              const upb_msglayout *m,
3670                              const upb_msglayout_field *f) {
3671   const upb_array *arr = *(const upb_array**)field_mem;
3672 
3673   if (arr == NULL || arr->len == 0) {
3674     return true;
3675   }
3676 
3677   UPB_ASSERT(arr->type == upb_desctype_to_fieldtype2[f->descriptortype]);
3678 
3679 #define VARINT_CASE(ctype, encode) { \
3680   ctype *start = arr->data; \
3681   ctype *ptr = start + arr->len; \
3682   size_t pre_len = e->limit - e->ptr; \
3683   do { \
3684     ptr--; \
3685     CHK(upb_put_varint(e, encode)); \
3686   } while (ptr != start); \
3687   CHK(upb_put_varint(e, e->limit - e->ptr - pre_len)); \
3688 } \
3689 break; \
3690 do { ; } while(0)
3691 
3692   switch (f->descriptortype) {
3693     case UPB_DESCRIPTOR_TYPE_DOUBLE:
3694       CHK(upb_put_fixedarray(e, arr, sizeof(double)));
3695       break;
3696     case UPB_DESCRIPTOR_TYPE_FLOAT:
3697       CHK(upb_put_fixedarray(e, arr, sizeof(float)));
3698       break;
3699     case UPB_DESCRIPTOR_TYPE_SFIXED64:
3700     case UPB_DESCRIPTOR_TYPE_FIXED64:
3701       CHK(upb_put_fixedarray(e, arr, sizeof(uint64_t)));
3702       break;
3703     case UPB_DESCRIPTOR_TYPE_FIXED32:
3704     case UPB_DESCRIPTOR_TYPE_SFIXED32:
3705       CHK(upb_put_fixedarray(e, arr, sizeof(uint32_t)));
3706       break;
3707     case UPB_DESCRIPTOR_TYPE_INT64:
3708     case UPB_DESCRIPTOR_TYPE_UINT64:
3709       VARINT_CASE(uint64_t, *ptr);
3710     case UPB_DESCRIPTOR_TYPE_UINT32:
3711       VARINT_CASE(uint32_t, *ptr);
3712     case UPB_DESCRIPTOR_TYPE_INT32:
3713     case UPB_DESCRIPTOR_TYPE_ENUM:
3714       VARINT_CASE(int32_t, (int64_t)*ptr);
3715     case UPB_DESCRIPTOR_TYPE_BOOL:
3716       VARINT_CASE(bool, *ptr);
3717     case UPB_DESCRIPTOR_TYPE_SINT32:
3718       VARINT_CASE(int32_t, upb_zzencode_32(*ptr));
3719     case UPB_DESCRIPTOR_TYPE_SINT64:
3720       VARINT_CASE(int64_t, upb_zzencode_64(*ptr));
3721     case UPB_DESCRIPTOR_TYPE_STRING:
3722     case UPB_DESCRIPTOR_TYPE_BYTES: {
3723       upb_strview *start = arr->data;
3724       upb_strview *ptr = start + arr->len;
3725       do {
3726         ptr--;
3727         CHK(upb_put_bytes(e, ptr->data, ptr->size) &&
3728             upb_put_varint(e, ptr->size) &&
3729             upb_put_tag(e, f->number, UPB_WIRE_TYPE_DELIMITED));
3730       } while (ptr != start);
3731       return true;
3732     }
3733     case UPB_DESCRIPTOR_TYPE_GROUP: {
3734       void **start = arr->data;
3735       void **ptr = start + arr->len;
3736       const upb_msglayout *subm = m->submsgs[f->submsg_index];
3737       do {
3738         size_t size;
3739         ptr--;
3740         CHK(upb_put_tag(e, f->number, UPB_WIRE_TYPE_END_GROUP) &&
3741             upb_encode_message(e, *ptr, subm, &size) &&
3742             upb_put_tag(e, f->number, UPB_WIRE_TYPE_START_GROUP));
3743       } while (ptr != start);
3744       return true;
3745     }
3746     case UPB_DESCRIPTOR_TYPE_MESSAGE: {
3747       void **start = arr->data;
3748       void **ptr = start + arr->len;
3749       const upb_msglayout *subm = m->submsgs[f->submsg_index];
3750       do {
3751         size_t size;
3752         ptr--;
3753         CHK(upb_encode_message(e, *ptr, subm, &size) &&
3754             upb_put_varint(e, size) &&
3755             upb_put_tag(e, f->number, UPB_WIRE_TYPE_DELIMITED));
3756       } while (ptr != start);
3757       return true;
3758     }
3759   }
3760 #undef VARINT_CASE
3761 
3762   /* We encode all primitive arrays as packed, regardless of what was specified
3763    * in the .proto file.  Could special case 1-sized arrays. */
3764   CHK(upb_put_tag(e, f->number, UPB_WIRE_TYPE_DELIMITED));
3765   return true;
3766 }
3767 
upb_encode_scalarfield(upb_encstate * e,const char * field_mem,const upb_msglayout * m,const upb_msglayout_field * f,bool skip_zero_value)3768 static bool upb_encode_scalarfield(upb_encstate *e, const char *field_mem,
3769                                    const upb_msglayout *m,
3770                                    const upb_msglayout_field *f,
3771                                    bool skip_zero_value) {
3772 #define CASE(ctype, type, wire_type, encodeval) do { \
3773   ctype val = *(ctype*)field_mem; \
3774   if (skip_zero_value && val == 0) { \
3775     return true; \
3776   } \
3777   return upb_put_ ## type(e, encodeval) && \
3778       upb_put_tag(e, f->number, wire_type); \
3779 } while(0)
3780 
3781   switch (f->descriptortype) {
3782     case UPB_DESCRIPTOR_TYPE_DOUBLE:
3783       CASE(double, double, UPB_WIRE_TYPE_64BIT, val);
3784     case UPB_DESCRIPTOR_TYPE_FLOAT:
3785       CASE(float, float, UPB_WIRE_TYPE_32BIT, val);
3786     case UPB_DESCRIPTOR_TYPE_INT64:
3787     case UPB_DESCRIPTOR_TYPE_UINT64:
3788       CASE(uint64_t, varint, UPB_WIRE_TYPE_VARINT, val);
3789     case UPB_DESCRIPTOR_TYPE_UINT32:
3790       CASE(uint32_t, varint, UPB_WIRE_TYPE_VARINT, val);
3791     case UPB_DESCRIPTOR_TYPE_INT32:
3792     case UPB_DESCRIPTOR_TYPE_ENUM:
3793       CASE(int32_t, varint, UPB_WIRE_TYPE_VARINT, (int64_t)val);
3794     case UPB_DESCRIPTOR_TYPE_SFIXED64:
3795     case UPB_DESCRIPTOR_TYPE_FIXED64:
3796       CASE(uint64_t, fixed64, UPB_WIRE_TYPE_64BIT, val);
3797     case UPB_DESCRIPTOR_TYPE_FIXED32:
3798     case UPB_DESCRIPTOR_TYPE_SFIXED32:
3799       CASE(uint32_t, fixed32, UPB_WIRE_TYPE_32BIT, val);
3800     case UPB_DESCRIPTOR_TYPE_BOOL:
3801       CASE(bool, varint, UPB_WIRE_TYPE_VARINT, val);
3802     case UPB_DESCRIPTOR_TYPE_SINT32:
3803       CASE(int32_t, varint, UPB_WIRE_TYPE_VARINT, upb_zzencode_32(val));
3804     case UPB_DESCRIPTOR_TYPE_SINT64:
3805       CASE(int64_t, varint, UPB_WIRE_TYPE_VARINT, upb_zzencode_64(val));
3806     case UPB_DESCRIPTOR_TYPE_STRING:
3807     case UPB_DESCRIPTOR_TYPE_BYTES: {
3808       upb_strview view = *(upb_strview*)field_mem;
3809       if (skip_zero_value && view.size == 0) {
3810         return true;
3811       }
3812       return upb_put_bytes(e, view.data, view.size) &&
3813           upb_put_varint(e, view.size) &&
3814           upb_put_tag(e, f->number, UPB_WIRE_TYPE_DELIMITED);
3815     }
3816     case UPB_DESCRIPTOR_TYPE_GROUP: {
3817       size_t size;
3818       void *submsg = *(void **)field_mem;
3819       const upb_msglayout *subm = m->submsgs[f->submsg_index];
3820       if (submsg == NULL) {
3821         return true;
3822       }
3823       return upb_put_tag(e, f->number, UPB_WIRE_TYPE_END_GROUP) &&
3824           upb_encode_message(e, submsg, subm, &size) &&
3825           upb_put_tag(e, f->number, UPB_WIRE_TYPE_START_GROUP);
3826     }
3827     case UPB_DESCRIPTOR_TYPE_MESSAGE: {
3828       size_t size;
3829       void *submsg = *(void **)field_mem;
3830       const upb_msglayout *subm = m->submsgs[f->submsg_index];
3831       if (submsg == NULL) {
3832         return true;
3833       }
3834       return upb_encode_message(e, submsg, subm, &size) &&
3835           upb_put_varint(e, size) &&
3836           upb_put_tag(e, f->number, UPB_WIRE_TYPE_DELIMITED);
3837     }
3838   }
3839 #undef CASE
3840   UPB_UNREACHABLE();
3841 }
3842 
upb_encode_message(upb_encstate * e,const char * msg,const upb_msglayout * m,size_t * size)3843 bool upb_encode_message(upb_encstate *e, const char *msg,
3844                         const upb_msglayout *m, size_t *size) {
3845   int i;
3846   size_t pre_len = e->limit - e->ptr;
3847   const char *unknown;
3848   size_t unknown_size;
3849 
3850   for (i = m->field_count - 1; i >= 0; i--) {
3851     const upb_msglayout_field *f = &m->fields[i];
3852 
3853     if (f->label == UPB_LABEL_REPEATED) {
3854       CHK(upb_encode_array(e, msg + f->offset, m, f));
3855     } else {
3856       bool skip_empty = false;
3857       if (f->presence == 0) {
3858         /* Proto3 presence. */
3859         skip_empty = true;
3860       } else if (f->presence > 0) {
3861         /* Proto2 presence: hasbit. */
3862         if (!upb_readhasbit(msg, f)) {
3863           continue;
3864         }
3865       } else {
3866         /* Field is in a oneof. */
3867         if (upb_readcase(msg, f) != f->number) {
3868           continue;
3869         }
3870       }
3871       CHK(upb_encode_scalarfield(e, msg + f->offset, m, f, skip_empty));
3872     }
3873   }
3874 
3875   unknown = upb_msg_getunknown(msg, &unknown_size);
3876 
3877   if (unknown) {
3878     upb_put_bytes(e, unknown, unknown_size);
3879   }
3880 
3881   *size = (e->limit - e->ptr) - pre_len;
3882   return true;
3883 }
3884 
upb_encode(const void * msg,const upb_msglayout * m,upb_arena * arena,size_t * size)3885 char *upb_encode(const void *msg, const upb_msglayout *m, upb_arena *arena,
3886                  size_t *size) {
3887   upb_encstate e;
3888   e.alloc = upb_arena_alloc(arena);
3889   e.buf = NULL;
3890   e.limit = NULL;
3891   e.ptr = NULL;
3892 
3893   if (!upb_encode_message(&e, msg, m, size)) {
3894     *size = 0;
3895     return NULL;
3896   }
3897 
3898   *size = e.limit - e.ptr;
3899 
3900   if (*size == 0) {
3901     static char ch;
3902     return &ch;
3903   } else {
3904     UPB_ASSERT(e.ptr);
3905     return e.ptr;
3906   }
3907 }
3908 
3909 #undef CHK
3910 /*
3911 ** TODO(haberman): it's unclear whether a lot of the consistency checks should
3912 ** UPB_ASSERT() or return false.
3913 */
3914 
3915 
3916 #include <string.h>
3917 
3918 
upb_calloc(size_t size)3919 static void *upb_calloc(size_t size) {
3920   void *mem = upb_gmalloc(size);
3921   if (mem) {
3922     memset(mem, 0, size);
3923   }
3924   return mem;
3925 }
3926 
3927 /* Defined for the sole purpose of having a unique pointer value for
3928  * UPB_NO_CLOSURE. */
3929 char _upb_noclosure;
3930 
freehandlers(upb_refcounted * r)3931 static void freehandlers(upb_refcounted *r) {
3932   upb_handlers *h = (upb_handlers*)r;
3933 
3934   upb_inttable_iter i;
3935   upb_inttable_begin(&i, &h->cleanup_);
3936   for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
3937     void *val = (void*)upb_inttable_iter_key(&i);
3938     upb_value func_val = upb_inttable_iter_value(&i);
3939     upb_handlerfree *func = upb_value_getfptr(func_val);
3940     func(val);
3941   }
3942 
3943   upb_inttable_uninit(&h->cleanup_);
3944   upb_msgdef_unref(h->msg, h);
3945   upb_gfree(h->sub);
3946   upb_gfree(h);
3947 }
3948 
visithandlers(const upb_refcounted * r,upb_refcounted_visit * visit,void * closure)3949 static void visithandlers(const upb_refcounted *r, upb_refcounted_visit *visit,
3950                           void *closure) {
3951   const upb_handlers *h = (const upb_handlers*)r;
3952   upb_msg_field_iter i;
3953   for(upb_msg_field_begin(&i, h->msg);
3954       !upb_msg_field_done(&i);
3955       upb_msg_field_next(&i)) {
3956     upb_fielddef *f = upb_msg_iter_field(&i);
3957     const upb_handlers *sub;
3958     if (!upb_fielddef_issubmsg(f)) continue;
3959     sub = upb_handlers_getsubhandlers(h, f);
3960     if (sub) visit(r, upb_handlers_upcast(sub), closure);
3961   }
3962 }
3963 
3964 static const struct upb_refcounted_vtbl vtbl = {visithandlers, freehandlers};
3965 
3966 typedef struct {
3967   upb_inttable tab;  /* maps upb_msgdef* -> upb_handlers*. */
3968   upb_handlers_callback *callback;
3969   const void *closure;
3970 } dfs_state;
3971 
3972 /* TODO(haberman): discard upb_handlers* objects that do not actually have any
3973  * handlers set and cannot reach any upb_handlers* object that does.  This is
3974  * slightly tricky to do correctly. */
newformsg(const upb_msgdef * m,const void * owner,dfs_state * s)3975 static upb_handlers *newformsg(const upb_msgdef *m, const void *owner,
3976                                dfs_state *s) {
3977   upb_msg_field_iter i;
3978   upb_handlers *h = upb_handlers_new(m, owner);
3979   if (!h) return NULL;
3980   if (!upb_inttable_insertptr(&s->tab, m, upb_value_ptr(h))) goto oom;
3981 
3982   s->callback(s->closure, h);
3983 
3984   /* For each submessage field, get or create a handlers object and set it as
3985    * the subhandlers. */
3986   for(upb_msg_field_begin(&i, m);
3987       !upb_msg_field_done(&i);
3988       upb_msg_field_next(&i)) {
3989     upb_fielddef *f = upb_msg_iter_field(&i);
3990     const upb_msgdef *subdef;
3991     upb_value subm_ent;
3992 
3993     if (!upb_fielddef_issubmsg(f)) continue;
3994 
3995     subdef = upb_downcast_msgdef(upb_fielddef_subdef(f));
3996     if (upb_inttable_lookupptr(&s->tab, subdef, &subm_ent)) {
3997       upb_handlers_setsubhandlers(h, f, upb_value_getptr(subm_ent));
3998     } else {
3999       upb_handlers *sub_mh = newformsg(subdef, &sub_mh, s);
4000       if (!sub_mh) goto oom;
4001       upb_handlers_setsubhandlers(h, f, sub_mh);
4002       upb_handlers_unref(sub_mh, &sub_mh);
4003     }
4004   }
4005   return h;
4006 
4007 oom:
4008   upb_handlers_unref(h, owner);
4009   return NULL;
4010 }
4011 
4012 /* Given a selector for a STARTSUBMSG handler, resolves to a pointer to the
4013  * subhandlers for this submessage field. */
4014 #define SUBH(h, selector) (h->sub[selector])
4015 
4016 /* The selector for a submessage field is the field index. */
4017 #define SUBH_F(h, f) SUBH(h, f->index_)
4018 
trygetsel(upb_handlers * h,const upb_fielddef * f,upb_handlertype_t type)4019 static int32_t trygetsel(upb_handlers *h, const upb_fielddef *f,
4020                          upb_handlertype_t type) {
4021   upb_selector_t sel;
4022   UPB_ASSERT(!upb_handlers_isfrozen(h));
4023   if (upb_handlers_msgdef(h) != upb_fielddef_containingtype(f)) {
4024     upb_status_seterrf(
4025         &h->status_, "type mismatch: field %s does not belong to message %s",
4026         upb_fielddef_name(f), upb_msgdef_fullname(upb_handlers_msgdef(h)));
4027     return -1;
4028   }
4029   if (!upb_handlers_getselector(f, type, &sel)) {
4030     upb_status_seterrf(
4031         &h->status_,
4032         "type mismatch: cannot register handler type %d for field %s",
4033         type, upb_fielddef_name(f));
4034     return -1;
4035   }
4036   return sel;
4037 }
4038 
handlers_getsel(upb_handlers * h,const upb_fielddef * f,upb_handlertype_t type)4039 static upb_selector_t handlers_getsel(upb_handlers *h, const upb_fielddef *f,
4040                              upb_handlertype_t type) {
4041   int32_t sel = trygetsel(h, f, type);
4042   UPB_ASSERT(sel >= 0);
4043   return sel;
4044 }
4045 
returntype(upb_handlers * h,const upb_fielddef * f,upb_handlertype_t type)4046 static const void **returntype(upb_handlers *h, const upb_fielddef *f,
4047                                upb_handlertype_t type) {
4048   return &h->table[handlers_getsel(h, f, type)].attr.return_closure_type_;
4049 }
4050 
doset(upb_handlers * h,int32_t sel,const upb_fielddef * f,upb_handlertype_t type,upb_func * func,upb_handlerattr * attr)4051 static bool doset(upb_handlers *h, int32_t sel, const upb_fielddef *f,
4052                   upb_handlertype_t type, upb_func *func,
4053                   upb_handlerattr *attr) {
4054   upb_handlerattr set_attr = UPB_HANDLERATTR_INITIALIZER;
4055   const void *closure_type;
4056   const void **context_closure_type;
4057 
4058   UPB_ASSERT(!upb_handlers_isfrozen(h));
4059 
4060   if (sel < 0) {
4061     upb_status_seterrmsg(&h->status_,
4062                          "incorrect handler type for this field.");
4063     return false;
4064   }
4065 
4066   if (h->table[sel].func) {
4067     upb_status_seterrmsg(&h->status_,
4068                          "cannot change handler once it has been set.");
4069     return false;
4070   }
4071 
4072   if (attr) {
4073     set_attr = *attr;
4074   }
4075 
4076   /* Check that the given closure type matches the closure type that has been
4077    * established for this context (if any). */
4078   closure_type = upb_handlerattr_closuretype(&set_attr);
4079 
4080   if (type == UPB_HANDLER_STRING) {
4081     context_closure_type = returntype(h, f, UPB_HANDLER_STARTSTR);
4082   } else if (f && upb_fielddef_isseq(f) &&
4083              type != UPB_HANDLER_STARTSEQ &&
4084              type != UPB_HANDLER_ENDSEQ) {
4085     context_closure_type = returntype(h, f, UPB_HANDLER_STARTSEQ);
4086   } else {
4087     context_closure_type = &h->top_closure_type;
4088   }
4089 
4090   if (closure_type && *context_closure_type &&
4091       closure_type != *context_closure_type) {
4092     /* TODO(haberman): better message for debugging. */
4093     if (f) {
4094       upb_status_seterrf(&h->status_,
4095                          "closure type does not match for field %s",
4096                          upb_fielddef_name(f));
4097     } else {
4098       upb_status_seterrmsg(
4099           &h->status_, "closure type does not match for message-level handler");
4100     }
4101     return false;
4102   }
4103 
4104   if (closure_type)
4105     *context_closure_type = closure_type;
4106 
4107   /* If this is a STARTSEQ or STARTSTR handler, check that the returned pointer
4108    * matches any pre-existing expectations about what type is expected. */
4109   if (type == UPB_HANDLER_STARTSEQ || type == UPB_HANDLER_STARTSTR) {
4110     const void *return_type = upb_handlerattr_returnclosuretype(&set_attr);
4111     const void *table_return_type =
4112         upb_handlerattr_returnclosuretype(&h->table[sel].attr);
4113     if (return_type && table_return_type && return_type != table_return_type) {
4114       upb_status_seterrmsg(&h->status_, "closure return type does not match");
4115       return false;
4116     }
4117 
4118     if (table_return_type && !return_type)
4119       upb_handlerattr_setreturnclosuretype(&set_attr, table_return_type);
4120   }
4121 
4122   h->table[sel].func = (upb_func*)func;
4123   h->table[sel].attr = set_attr;
4124   return true;
4125 }
4126 
4127 /* Returns the effective closure type for this handler (which will propagate
4128  * from outer frames if this frame has no START* handler).  Not implemented for
4129  * UPB_HANDLER_STRING at the moment since this is not needed.  Returns NULL is
4130  * the effective closure type is unspecified (either no handler was registered
4131  * to specify it or the handler that was registered did not specify the closure
4132  * type). */
effective_closure_type(upb_handlers * h,const upb_fielddef * f,upb_handlertype_t type)4133 const void *effective_closure_type(upb_handlers *h, const upb_fielddef *f,
4134                                    upb_handlertype_t type) {
4135   const void *ret;
4136   upb_selector_t sel;
4137 
4138   UPB_ASSERT(type != UPB_HANDLER_STRING);
4139   ret = h->top_closure_type;
4140 
4141   if (upb_fielddef_isseq(f) &&
4142       type != UPB_HANDLER_STARTSEQ &&
4143       type != UPB_HANDLER_ENDSEQ &&
4144       h->table[sel = handlers_getsel(h, f, UPB_HANDLER_STARTSEQ)].func) {
4145     ret = upb_handlerattr_returnclosuretype(&h->table[sel].attr);
4146   }
4147 
4148   if (type == UPB_HANDLER_STRING &&
4149       h->table[sel = handlers_getsel(h, f, UPB_HANDLER_STARTSTR)].func) {
4150     ret = upb_handlerattr_returnclosuretype(&h->table[sel].attr);
4151   }
4152 
4153   /* The effective type of the submessage; not used yet.
4154    * if (type == SUBMESSAGE &&
4155    *     h->table[sel = handlers_getsel(h, f, UPB_HANDLER_STARTSUBMSG)].func) {
4156    *   ret = upb_handlerattr_returnclosuretype(&h->table[sel].attr);
4157    * } */
4158 
4159   return ret;
4160 }
4161 
4162 /* Checks whether the START* handler specified by f & type is missing even
4163  * though it is required to convert the established type of an outer frame
4164  * ("closure_type") into the established type of an inner frame (represented in
4165  * the return closure type of this handler's attr. */
checkstart(upb_handlers * h,const upb_fielddef * f,upb_handlertype_t type,upb_status * status)4166 bool checkstart(upb_handlers *h, const upb_fielddef *f, upb_handlertype_t type,
4167                 upb_status *status) {
4168   const void *closure_type;
4169   const upb_handlerattr *attr;
4170   const void *return_closure_type;
4171 
4172   upb_selector_t sel = handlers_getsel(h, f, type);
4173   if (h->table[sel].func) return true;
4174   closure_type = effective_closure_type(h, f, type);
4175   attr = &h->table[sel].attr;
4176   return_closure_type = upb_handlerattr_returnclosuretype(attr);
4177   if (closure_type && return_closure_type &&
4178       closure_type != return_closure_type) {
4179     upb_status_seterrf(status,
4180                        "expected start handler to return sub type for field %f",
4181                        upb_fielddef_name(f));
4182     return false;
4183   }
4184   return true;
4185 }
4186 
4187 /* Public interface ***********************************************************/
4188 
upb_handlers_new(const upb_msgdef * md,const void * owner)4189 upb_handlers *upb_handlers_new(const upb_msgdef *md, const void *owner) {
4190   int extra;
4191   upb_handlers *h;
4192 
4193   UPB_ASSERT(upb_msgdef_isfrozen(md));
4194 
4195   extra = sizeof(upb_handlers_tabent) * (md->selector_count - 1);
4196   h = upb_calloc(sizeof(*h) + extra);
4197   if (!h) return NULL;
4198 
4199   h->msg = md;
4200   upb_msgdef_ref(h->msg, h);
4201   upb_status_clear(&h->status_);
4202 
4203   if (md->submsg_field_count > 0) {
4204     h->sub = upb_calloc(md->submsg_field_count * sizeof(*h->sub));
4205     if (!h->sub) goto oom;
4206   } else {
4207     h->sub = 0;
4208   }
4209 
4210   if (!upb_refcounted_init(upb_handlers_upcast_mutable(h), &vtbl, owner))
4211     goto oom;
4212   if (!upb_inttable_init(&h->cleanup_, UPB_CTYPE_FPTR)) goto oom;
4213 
4214   /* calloc() above initialized all handlers to NULL. */
4215   return h;
4216 
4217 oom:
4218   freehandlers(upb_handlers_upcast_mutable(h));
4219   return NULL;
4220 }
4221 
upb_handlers_newfrozen(const upb_msgdef * m,const void * owner,upb_handlers_callback * callback,const void * closure)4222 const upb_handlers *upb_handlers_newfrozen(const upb_msgdef *m,
4223                                            const void *owner,
4224                                            upb_handlers_callback *callback,
4225                                            const void *closure) {
4226   dfs_state state;
4227   upb_handlers *ret;
4228   bool ok;
4229   upb_refcounted *r;
4230 
4231   state.callback = callback;
4232   state.closure = closure;
4233   if (!upb_inttable_init(&state.tab, UPB_CTYPE_PTR)) return NULL;
4234 
4235   ret = newformsg(m, owner, &state);
4236 
4237   upb_inttable_uninit(&state.tab);
4238   if (!ret) return NULL;
4239 
4240   r = upb_handlers_upcast_mutable(ret);
4241   ok = upb_refcounted_freeze(&r, 1, NULL, UPB_MAX_HANDLER_DEPTH);
4242   UPB_ASSERT(ok);
4243 
4244   return ret;
4245 }
4246 
upb_handlers_status(upb_handlers * h)4247 const upb_status *upb_handlers_status(upb_handlers *h) {
4248   UPB_ASSERT(!upb_handlers_isfrozen(h));
4249   return &h->status_;
4250 }
4251 
upb_handlers_clearerr(upb_handlers * h)4252 void upb_handlers_clearerr(upb_handlers *h) {
4253   UPB_ASSERT(!upb_handlers_isfrozen(h));
4254   upb_status_clear(&h->status_);
4255 }
4256 
4257 #define SETTER(name, handlerctype, handlertype) \
4258   bool upb_handlers_set ## name(upb_handlers *h, const upb_fielddef *f, \
4259                                 handlerctype func, upb_handlerattr *attr) { \
4260     int32_t sel = trygetsel(h, f, handlertype); \
4261     return doset(h, sel, f, handlertype, (upb_func*)func, attr); \
4262   }
4263 
SETTER(int32,upb_int32_handlerfunc *,UPB_HANDLER_INT32)4264 SETTER(int32,       upb_int32_handlerfunc*,       UPB_HANDLER_INT32)
4265 SETTER(int64,       upb_int64_handlerfunc*,       UPB_HANDLER_INT64)
4266 SETTER(uint32,      upb_uint32_handlerfunc*,      UPB_HANDLER_UINT32)
4267 SETTER(uint64,      upb_uint64_handlerfunc*,      UPB_HANDLER_UINT64)
4268 SETTER(float,       upb_float_handlerfunc*,       UPB_HANDLER_FLOAT)
4269 SETTER(double,      upb_double_handlerfunc*,      UPB_HANDLER_DOUBLE)
4270 SETTER(bool,        upb_bool_handlerfunc*,        UPB_HANDLER_BOOL)
4271 SETTER(startstr,    upb_startstr_handlerfunc*,    UPB_HANDLER_STARTSTR)
4272 SETTER(string,      upb_string_handlerfunc*,      UPB_HANDLER_STRING)
4273 SETTER(endstr,      upb_endfield_handlerfunc*,    UPB_HANDLER_ENDSTR)
4274 SETTER(startseq,    upb_startfield_handlerfunc*,  UPB_HANDLER_STARTSEQ)
4275 SETTER(startsubmsg, upb_startfield_handlerfunc*,  UPB_HANDLER_STARTSUBMSG)
4276 SETTER(endsubmsg,   upb_endfield_handlerfunc*,    UPB_HANDLER_ENDSUBMSG)
4277 SETTER(endseq,      upb_endfield_handlerfunc*,    UPB_HANDLER_ENDSEQ)
4278 
4279 #undef SETTER
4280 
4281 bool upb_handlers_setunknown(upb_handlers *h, upb_unknown_handlerfunc *func,
4282                              upb_handlerattr *attr) {
4283   return doset(h, UPB_UNKNOWN_SELECTOR, NULL, UPB_HANDLER_INT32,
4284                (upb_func *)func, attr);
4285 }
4286 
upb_handlers_setstartmsg(upb_handlers * h,upb_startmsg_handlerfunc * func,upb_handlerattr * attr)4287 bool upb_handlers_setstartmsg(upb_handlers *h, upb_startmsg_handlerfunc *func,
4288                               upb_handlerattr *attr) {
4289   return doset(h, UPB_STARTMSG_SELECTOR, NULL, UPB_HANDLER_INT32,
4290                (upb_func *)func, attr);
4291 }
4292 
upb_handlers_setendmsg(upb_handlers * h,upb_endmsg_handlerfunc * func,upb_handlerattr * attr)4293 bool upb_handlers_setendmsg(upb_handlers *h, upb_endmsg_handlerfunc *func,
4294                             upb_handlerattr *attr) {
4295   UPB_ASSERT(!upb_handlers_isfrozen(h));
4296   return doset(h, UPB_ENDMSG_SELECTOR, NULL, UPB_HANDLER_INT32,
4297                (upb_func *)func, attr);
4298 }
4299 
upb_handlers_setsubhandlers(upb_handlers * h,const upb_fielddef * f,const upb_handlers * sub)4300 bool upb_handlers_setsubhandlers(upb_handlers *h, const upb_fielddef *f,
4301                                  const upb_handlers *sub) {
4302   UPB_ASSERT(sub);
4303   UPB_ASSERT(!upb_handlers_isfrozen(h));
4304   UPB_ASSERT(upb_fielddef_issubmsg(f));
4305   if (SUBH_F(h, f)) return false;  /* Can't reset. */
4306   if (upb_msgdef_upcast(upb_handlers_msgdef(sub)) != upb_fielddef_subdef(f)) {
4307     return false;
4308   }
4309   SUBH_F(h, f) = sub;
4310   upb_ref2(sub, h);
4311   return true;
4312 }
4313 
upb_handlers_getsubhandlers(const upb_handlers * h,const upb_fielddef * f)4314 const upb_handlers *upb_handlers_getsubhandlers(const upb_handlers *h,
4315                                                 const upb_fielddef *f) {
4316   UPB_ASSERT(upb_fielddef_issubmsg(f));
4317   return SUBH_F(h, f);
4318 }
4319 
upb_handlers_getattr(const upb_handlers * h,upb_selector_t sel,upb_handlerattr * attr)4320 bool upb_handlers_getattr(const upb_handlers *h, upb_selector_t sel,
4321                           upb_handlerattr *attr) {
4322   if (!upb_handlers_gethandler(h, sel))
4323     return false;
4324   *attr = h->table[sel].attr;
4325   return true;
4326 }
4327 
upb_handlers_getsubhandlers_sel(const upb_handlers * h,upb_selector_t sel)4328 const upb_handlers *upb_handlers_getsubhandlers_sel(const upb_handlers *h,
4329                                                     upb_selector_t sel) {
4330   /* STARTSUBMSG selector in sel is the field's selector base. */
4331   return SUBH(h, sel - UPB_STATIC_SELECTOR_COUNT);
4332 }
4333 
upb_handlers_msgdef(const upb_handlers * h)4334 const upb_msgdef *upb_handlers_msgdef(const upb_handlers *h) { return h->msg; }
4335 
upb_handlers_addcleanup(upb_handlers * h,void * p,upb_handlerfree * func)4336 bool upb_handlers_addcleanup(upb_handlers *h, void *p, upb_handlerfree *func) {
4337   bool ok;
4338   if (upb_inttable_lookupptr(&h->cleanup_, p, NULL)) {
4339     return false;
4340   }
4341   ok = upb_inttable_insertptr(&h->cleanup_, p, upb_value_fptr(func));
4342   UPB_ASSERT(ok);
4343   return true;
4344 }
4345 
4346 
4347 /* "Static" methods ***********************************************************/
4348 
upb_handlers_freeze(upb_handlers * const * handlers,int n,upb_status * s)4349 bool upb_handlers_freeze(upb_handlers *const*handlers, int n, upb_status *s) {
4350   /* TODO: verify we have a transitive closure. */
4351   int i;
4352   for (i = 0; i < n; i++) {
4353     upb_msg_field_iter j;
4354     upb_handlers *h = handlers[i];
4355 
4356     if (!upb_ok(&h->status_)) {
4357       upb_status_seterrf(s, "handlers for message %s had error status: %s",
4358                          upb_msgdef_fullname(upb_handlers_msgdef(h)),
4359                          upb_status_errmsg(&h->status_));
4360       return false;
4361     }
4362 
4363     /* Check that there are no closure mismatches due to missing Start* handlers
4364      * or subhandlers with different type-level types. */
4365     for(upb_msg_field_begin(&j, h->msg);
4366         !upb_msg_field_done(&j);
4367         upb_msg_field_next(&j)) {
4368 
4369       const upb_fielddef *f = upb_msg_iter_field(&j);
4370       if (upb_fielddef_isseq(f)) {
4371         if (!checkstart(h, f, UPB_HANDLER_STARTSEQ, s))
4372           return false;
4373       }
4374 
4375       if (upb_fielddef_isstring(f)) {
4376         if (!checkstart(h, f, UPB_HANDLER_STARTSTR, s))
4377           return false;
4378       }
4379 
4380       if (upb_fielddef_issubmsg(f)) {
4381         bool hashandler = false;
4382         if (upb_handlers_gethandler(
4383                 h, handlers_getsel(h, f, UPB_HANDLER_STARTSUBMSG)) ||
4384             upb_handlers_gethandler(
4385                 h, handlers_getsel(h, f, UPB_HANDLER_ENDSUBMSG))) {
4386           hashandler = true;
4387         }
4388 
4389         if (upb_fielddef_isseq(f) &&
4390             (upb_handlers_gethandler(
4391                  h, handlers_getsel(h, f, UPB_HANDLER_STARTSEQ)) ||
4392              upb_handlers_gethandler(
4393                  h, handlers_getsel(h, f, UPB_HANDLER_ENDSEQ)))) {
4394           hashandler = true;
4395         }
4396 
4397         if (hashandler && !upb_handlers_getsubhandlers(h, f)) {
4398           /* For now we add an empty subhandlers in this case.  It makes the
4399            * decoder code generator simpler, because it only has to handle two
4400            * cases (submessage has handlers or not) as opposed to three
4401            * (submessage has handlers in enclosing message but no subhandlers).
4402            *
4403            * This makes parsing less efficient in the case that we want to
4404            * notice a submessage but skip its contents (like if we're testing
4405            * for submessage presence or counting the number of repeated
4406            * submessages).  In this case we will end up parsing the submessage
4407            * field by field and throwing away the results for each, instead of
4408            * skipping the whole delimited thing at once.  If this is an issue we
4409            * can revisit it, but do remember that this only arises when you have
4410            * handlers (startseq/startsubmsg/endsubmsg/endseq) set for the
4411            * submessage but no subhandlers.  The uses cases for this are
4412            * limited. */
4413           upb_handlers *sub = upb_handlers_new(upb_fielddef_msgsubdef(f), &sub);
4414           upb_handlers_setsubhandlers(h, f, sub);
4415           upb_handlers_unref(sub, &sub);
4416         }
4417 
4418         /* TODO(haberman): check type of submessage.
4419          * This is slightly tricky; also consider whether we should check that
4420          * they match at setsubhandlers time. */
4421       }
4422     }
4423   }
4424 
4425   if (!upb_refcounted_freeze((upb_refcounted*const*)handlers, n, s,
4426                              UPB_MAX_HANDLER_DEPTH)) {
4427     return false;
4428   }
4429 
4430   return true;
4431 }
4432 
upb_handlers_getprimitivehandlertype(const upb_fielddef * f)4433 upb_handlertype_t upb_handlers_getprimitivehandlertype(const upb_fielddef *f) {
4434   switch (upb_fielddef_type(f)) {
4435     case UPB_TYPE_INT32:
4436     case UPB_TYPE_ENUM: return UPB_HANDLER_INT32;
4437     case UPB_TYPE_INT64: return UPB_HANDLER_INT64;
4438     case UPB_TYPE_UINT32: return UPB_HANDLER_UINT32;
4439     case UPB_TYPE_UINT64: return UPB_HANDLER_UINT64;
4440     case UPB_TYPE_FLOAT: return UPB_HANDLER_FLOAT;
4441     case UPB_TYPE_DOUBLE: return UPB_HANDLER_DOUBLE;
4442     case UPB_TYPE_BOOL: return UPB_HANDLER_BOOL;
4443     default: UPB_ASSERT(false); return -1;  /* Invalid input. */
4444   }
4445 }
4446 
upb_handlers_getselector(const upb_fielddef * f,upb_handlertype_t type,upb_selector_t * s)4447 bool upb_handlers_getselector(const upb_fielddef *f, upb_handlertype_t type,
4448                               upb_selector_t *s) {
4449   switch (type) {
4450     case UPB_HANDLER_INT32:
4451     case UPB_HANDLER_INT64:
4452     case UPB_HANDLER_UINT32:
4453     case UPB_HANDLER_UINT64:
4454     case UPB_HANDLER_FLOAT:
4455     case UPB_HANDLER_DOUBLE:
4456     case UPB_HANDLER_BOOL:
4457       if (!upb_fielddef_isprimitive(f) ||
4458           upb_handlers_getprimitivehandlertype(f) != type)
4459         return false;
4460       *s = f->selector_base;
4461       break;
4462     case UPB_HANDLER_STRING:
4463       if (upb_fielddef_isstring(f)) {
4464         *s = f->selector_base;
4465       } else if (upb_fielddef_lazy(f)) {
4466         *s = f->selector_base + 3;
4467       } else {
4468         return false;
4469       }
4470       break;
4471     case UPB_HANDLER_STARTSTR:
4472       if (upb_fielddef_isstring(f) || upb_fielddef_lazy(f)) {
4473         *s = f->selector_base + 1;
4474       } else {
4475         return false;
4476       }
4477       break;
4478     case UPB_HANDLER_ENDSTR:
4479       if (upb_fielddef_isstring(f) || upb_fielddef_lazy(f)) {
4480         *s = f->selector_base + 2;
4481       } else {
4482         return false;
4483       }
4484       break;
4485     case UPB_HANDLER_STARTSEQ:
4486       if (!upb_fielddef_isseq(f)) return false;
4487       *s = f->selector_base - 2;
4488       break;
4489     case UPB_HANDLER_ENDSEQ:
4490       if (!upb_fielddef_isseq(f)) return false;
4491       *s = f->selector_base - 1;
4492       break;
4493     case UPB_HANDLER_STARTSUBMSG:
4494       if (!upb_fielddef_issubmsg(f)) return false;
4495       /* Selectors for STARTSUBMSG are at the beginning of the table so that the
4496        * selector can also be used as an index into the "sub" array of
4497        * subhandlers.  The indexes for the two into these two tables are the
4498        * same, except that in the handler table the static selectors come first. */
4499       *s = f->index_ + UPB_STATIC_SELECTOR_COUNT;
4500       break;
4501     case UPB_HANDLER_ENDSUBMSG:
4502       if (!upb_fielddef_issubmsg(f)) return false;
4503       *s = f->selector_base;
4504       break;
4505   }
4506   UPB_ASSERT((size_t)*s < upb_fielddef_containingtype(f)->selector_count);
4507   return true;
4508 }
4509 
upb_handlers_selectorbaseoffset(const upb_fielddef * f)4510 uint32_t upb_handlers_selectorbaseoffset(const upb_fielddef *f) {
4511   return upb_fielddef_isseq(f) ? 2 : 0;
4512 }
4513 
upb_handlers_selectorcount(const upb_fielddef * f)4514 uint32_t upb_handlers_selectorcount(const upb_fielddef *f) {
4515   uint32_t ret = 1;
4516   if (upb_fielddef_isseq(f)) ret += 2;    /* STARTSEQ/ENDSEQ */
4517   if (upb_fielddef_isstring(f)) ret += 2; /* [STRING]/STARTSTR/ENDSTR */
4518   if (upb_fielddef_issubmsg(f)) {
4519     /* ENDSUBMSG (STARTSUBMSG is at table beginning) */
4520     ret += 0;
4521     if (upb_fielddef_lazy(f)) {
4522       /* STARTSTR/ENDSTR/STRING (for lazy) */
4523       ret += 3;
4524     }
4525   }
4526   return ret;
4527 }
4528 
4529 
4530 /* upb_handlerattr ************************************************************/
4531 
upb_handlerattr_init(upb_handlerattr * attr)4532 void upb_handlerattr_init(upb_handlerattr *attr) {
4533   upb_handlerattr from = UPB_HANDLERATTR_INITIALIZER;
4534   memcpy(attr, &from, sizeof(*attr));
4535 }
4536 
upb_handlerattr_uninit(upb_handlerattr * attr)4537 void upb_handlerattr_uninit(upb_handlerattr *attr) {
4538   UPB_UNUSED(attr);
4539 }
4540 
upb_handlerattr_sethandlerdata(upb_handlerattr * attr,const void * hd)4541 bool upb_handlerattr_sethandlerdata(upb_handlerattr *attr, const void *hd) {
4542   attr->handler_data_ = hd;
4543   return true;
4544 }
4545 
upb_handlerattr_setclosuretype(upb_handlerattr * attr,const void * type)4546 bool upb_handlerattr_setclosuretype(upb_handlerattr *attr, const void *type) {
4547   attr->closure_type_ = type;
4548   return true;
4549 }
4550 
upb_handlerattr_closuretype(const upb_handlerattr * attr)4551 const void *upb_handlerattr_closuretype(const upb_handlerattr *attr) {
4552   return attr->closure_type_;
4553 }
4554 
upb_handlerattr_setreturnclosuretype(upb_handlerattr * attr,const void * type)4555 bool upb_handlerattr_setreturnclosuretype(upb_handlerattr *attr,
4556                                           const void *type) {
4557   attr->return_closure_type_ = type;
4558   return true;
4559 }
4560 
upb_handlerattr_returnclosuretype(const upb_handlerattr * attr)4561 const void *upb_handlerattr_returnclosuretype(const upb_handlerattr *attr) {
4562   return attr->return_closure_type_;
4563 }
4564 
upb_handlerattr_setalwaysok(upb_handlerattr * attr,bool alwaysok)4565 bool upb_handlerattr_setalwaysok(upb_handlerattr *attr, bool alwaysok) {
4566   attr->alwaysok_ = alwaysok;
4567   return true;
4568 }
4569 
upb_handlerattr_alwaysok(const upb_handlerattr * attr)4570 bool upb_handlerattr_alwaysok(const upb_handlerattr *attr) {
4571   return attr->alwaysok_;
4572 }
4573 
4574 /* upb_bufhandle **************************************************************/
4575 
upb_bufhandle_objofs(const upb_bufhandle * h)4576 size_t upb_bufhandle_objofs(const upb_bufhandle *h) {
4577   return h->objofs_;
4578 }
4579 
4580 /* upb_byteshandler ***********************************************************/
4581 
upb_byteshandler_init(upb_byteshandler * h)4582 void upb_byteshandler_init(upb_byteshandler* h) {
4583   memset(h, 0, sizeof(*h));
4584 }
4585 
4586 /* For when we support handlerfree callbacks. */
upb_byteshandler_uninit(upb_byteshandler * h)4587 void upb_byteshandler_uninit(upb_byteshandler* h) {
4588   UPB_UNUSED(h);
4589 }
4590 
upb_byteshandler_setstartstr(upb_byteshandler * h,upb_startstr_handlerfunc * func,void * d)4591 bool upb_byteshandler_setstartstr(upb_byteshandler *h,
4592                                   upb_startstr_handlerfunc *func, void *d) {
4593   h->table[UPB_STARTSTR_SELECTOR].func = (upb_func*)func;
4594   h->table[UPB_STARTSTR_SELECTOR].attr.handler_data_ = d;
4595   return true;
4596 }
4597 
upb_byteshandler_setstring(upb_byteshandler * h,upb_string_handlerfunc * func,void * d)4598 bool upb_byteshandler_setstring(upb_byteshandler *h,
4599                                 upb_string_handlerfunc *func, void *d) {
4600   h->table[UPB_STRING_SELECTOR].func = (upb_func*)func;
4601   h->table[UPB_STRING_SELECTOR].attr.handler_data_ = d;
4602   return true;
4603 }
4604 
upb_byteshandler_setendstr(upb_byteshandler * h,upb_endfield_handlerfunc * func,void * d)4605 bool upb_byteshandler_setendstr(upb_byteshandler *h,
4606                                 upb_endfield_handlerfunc *func, void *d) {
4607   h->table[UPB_ENDSTR_SELECTOR].func = (upb_func*)func;
4608   h->table[UPB_ENDSTR_SELECTOR].attr.handler_data_ = d;
4609   return true;
4610 }
4611 
4612 
4613 /** Handlers for upb_msg ******************************************************/
4614 
4615 typedef struct {
4616   size_t offset;
4617   int32_t hasbit;
4618 } upb_msg_handlerdata;
4619 
4620 /* Fallback implementation if the handler is not specialized by the producer. */
4621 #define MSG_WRITER(type, ctype)                                               \
4622   bool upb_msg_set ## type (void *c, const void *hd, ctype val) {             \
4623     uint8_t *m = c;                                                           \
4624     const upb_msg_handlerdata *d = hd;                                        \
4625     if (d->hasbit > 0)                                                        \
4626       *(uint8_t*)&m[d->hasbit / 8] |= 1 << (d->hasbit % 8);                   \
4627     *(ctype*)&m[d->offset] = val;                                             \
4628     return true;                                                              \
4629   }                                                                           \
4630 
MSG_WRITER(double,double)4631 MSG_WRITER(double, double)
4632 MSG_WRITER(float,  float)
4633 MSG_WRITER(int32,  int32_t)
4634 MSG_WRITER(int64,  int64_t)
4635 MSG_WRITER(uint32, uint32_t)
4636 MSG_WRITER(uint64, uint64_t)
4637 MSG_WRITER(bool,   bool)
4638 
4639 bool upb_msg_setscalarhandler(upb_handlers *h, const upb_fielddef *f,
4640                               size_t offset, int32_t hasbit) {
4641   upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER;
4642   bool ok;
4643 
4644   upb_msg_handlerdata *d = upb_gmalloc(sizeof(*d));
4645   if (!d) return false;
4646   d->offset = offset;
4647   d->hasbit = hasbit;
4648 
4649   upb_handlerattr_sethandlerdata(&attr, d);
4650   upb_handlerattr_setalwaysok(&attr, true);
4651   upb_handlers_addcleanup(h, d, upb_gfree);
4652 
4653 #define TYPE(u, l) \
4654   case UPB_TYPE_##u: \
4655     ok = upb_handlers_set##l(h, f, upb_msg_set##l, &attr); break;
4656 
4657   ok = false;
4658 
4659   switch (upb_fielddef_type(f)) {
4660     TYPE(INT64,  int64);
4661     TYPE(INT32,  int32);
4662     TYPE(ENUM,   int32);
4663     TYPE(UINT64, uint64);
4664     TYPE(UINT32, uint32);
4665     TYPE(DOUBLE, double);
4666     TYPE(FLOAT,  float);
4667     TYPE(BOOL,   bool);
4668     default: UPB_ASSERT(false); break;
4669   }
4670 #undef TYPE
4671 
4672   upb_handlerattr_uninit(&attr);
4673   return ok;
4674 }
4675 
upb_msg_getscalarhandlerdata(const upb_handlers * h,upb_selector_t s,upb_fieldtype_t * type,size_t * offset,int32_t * hasbit)4676 bool upb_msg_getscalarhandlerdata(const upb_handlers *h,
4677                                   upb_selector_t s,
4678                                   upb_fieldtype_t *type,
4679                                   size_t *offset,
4680                                   int32_t *hasbit) {
4681   const upb_msg_handlerdata *d;
4682   upb_func *f = upb_handlers_gethandler(h, s);
4683 
4684   if ((upb_int64_handlerfunc*)f == upb_msg_setint64) {
4685     *type = UPB_TYPE_INT64;
4686   } else if ((upb_int32_handlerfunc*)f == upb_msg_setint32) {
4687     *type = UPB_TYPE_INT32;
4688   } else if ((upb_uint64_handlerfunc*)f == upb_msg_setuint64) {
4689     *type = UPB_TYPE_UINT64;
4690   } else if ((upb_uint32_handlerfunc*)f == upb_msg_setuint32) {
4691     *type = UPB_TYPE_UINT32;
4692   } else if ((upb_double_handlerfunc*)f == upb_msg_setdouble) {
4693     *type = UPB_TYPE_DOUBLE;
4694   } else if ((upb_float_handlerfunc*)f == upb_msg_setfloat) {
4695     *type = UPB_TYPE_FLOAT;
4696   } else if ((upb_bool_handlerfunc*)f == upb_msg_setbool) {
4697     *type = UPB_TYPE_BOOL;
4698   } else {
4699     return false;
4700   }
4701 
4702   d = upb_handlers_gethandlerdata(h, s);
4703   *offset = d->offset;
4704   *hasbit = d->hasbit;
4705   return true;
4706 }
4707 
4708 
upb_fieldtype_mapkeyok(upb_fieldtype_t type)4709 bool upb_fieldtype_mapkeyok(upb_fieldtype_t type) {
4710   return type == UPB_TYPE_BOOL || type == UPB_TYPE_INT32 ||
4711          type == UPB_TYPE_UINT32 || type == UPB_TYPE_INT64 ||
4712          type == UPB_TYPE_UINT64 || type == UPB_TYPE_STRING;
4713 }
4714 
4715 #define PTR_AT(msg, ofs, type) (type*)((char*)msg + ofs)
4716 #define VOIDPTR_AT(msg, ofs) PTR_AT(msg, ofs, void)
4717 #define ENCODE_MAX_NESTING 64
4718 #define CHECK_TRUE(x) if (!(x)) { return false; }
4719 
4720 /** upb_msgval ****************************************************************/
4721 
4722 #define upb_alignof(t) offsetof(struct { char c; t x; }, x)
4723 
4724 /* These functions will generate real memcpy() calls on ARM sadly, because
4725  * the compiler assumes they might not be aligned. */
4726 
upb_msgval_read(const void * p,size_t ofs,uint8_t size)4727 static upb_msgval upb_msgval_read(const void *p, size_t ofs,
4728                                   uint8_t size) {
4729   upb_msgval val;
4730   p = (char*)p + ofs;
4731   memcpy(&val, p, size);
4732   return val;
4733 }
4734 
upb_msgval_write(void * p,size_t ofs,upb_msgval val,uint8_t size)4735 static void upb_msgval_write(void *p, size_t ofs, upb_msgval val,
4736                              uint8_t size) {
4737   p = (char*)p + ofs;
4738   memcpy(p, &val, size);
4739 }
4740 
upb_msgval_sizeof(upb_fieldtype_t type)4741 static size_t upb_msgval_sizeof(upb_fieldtype_t type) {
4742   switch (type) {
4743     case UPB_TYPE_DOUBLE:
4744     case UPB_TYPE_INT64:
4745     case UPB_TYPE_UINT64:
4746       return 8;
4747     case UPB_TYPE_ENUM:
4748     case UPB_TYPE_INT32:
4749     case UPB_TYPE_UINT32:
4750     case UPB_TYPE_FLOAT:
4751       return 4;
4752     case UPB_TYPE_BOOL:
4753       return 1;
4754     case UPB_TYPE_MESSAGE:
4755       return sizeof(void*);
4756     case UPB_TYPE_BYTES:
4757     case UPB_TYPE_STRING:
4758       return sizeof(upb_strview);
4759   }
4760   UPB_UNREACHABLE();
4761 }
4762 
upb_msg_fieldsize(const upb_msglayout_field * field)4763 static uint8_t upb_msg_fieldsize(const upb_msglayout_field *field) {
4764   if (field->label == UPB_LABEL_REPEATED) {
4765     return sizeof(void*);
4766   } else {
4767     return upb_msgval_sizeof(upb_desctype_to_fieldtype[field->descriptortype]);
4768   }
4769 }
4770 
4771 /* TODO(haberman): this is broken right now because upb_msgval can contain
4772  * a char* / size_t pair, which is too big for a upb_value.  To fix this
4773  * we'll probably need to dynamically allocate a upb_msgval and store a
4774  * pointer to that in the tables for extensions/maps. */
upb_toval(upb_msgval val)4775 static upb_value upb_toval(upb_msgval val) {
4776   upb_value ret;
4777   UPB_UNUSED(val);
4778   memset(&ret, 0, sizeof(upb_value));  /* XXX */
4779   return ret;
4780 }
4781 
upb_msgval_fromval(upb_value val)4782 static upb_msgval upb_msgval_fromval(upb_value val) {
4783   upb_msgval ret;
4784   UPB_UNUSED(val);
4785   memset(&ret, 0, sizeof(upb_msgval));  /* XXX */
4786   return ret;
4787 }
4788 
upb_fieldtotabtype(upb_fieldtype_t type)4789 static upb_ctype_t upb_fieldtotabtype(upb_fieldtype_t type) {
4790   switch (type) {
4791     case UPB_TYPE_FLOAT: return UPB_CTYPE_FLOAT;
4792     case UPB_TYPE_DOUBLE: return UPB_CTYPE_DOUBLE;
4793     case UPB_TYPE_BOOL: return UPB_CTYPE_BOOL;
4794     case UPB_TYPE_BYTES:
4795     case UPB_TYPE_MESSAGE:
4796     case UPB_TYPE_STRING: return UPB_CTYPE_CONSTPTR;
4797     case UPB_TYPE_ENUM:
4798     case UPB_TYPE_INT32: return UPB_CTYPE_INT32;
4799     case UPB_TYPE_UINT32: return UPB_CTYPE_UINT32;
4800     case UPB_TYPE_INT64: return UPB_CTYPE_INT64;
4801     case UPB_TYPE_UINT64: return UPB_CTYPE_UINT64;
4802     default: UPB_ASSERT(false); return 0;
4803   }
4804 }
4805 
4806 
4807 /** upb_msg *******************************************************************/
4808 
4809 /* If we always read/write as a consistent type to each address, this shouldn't
4810  * violate aliasing.
4811  */
4812 #define DEREF(msg, ofs, type) *PTR_AT(msg, ofs, type)
4813 
4814 /* Internal members of a upb_msg.  We can change this without breaking binary
4815  * compatibility.  We put these before the user's data.  The user's upb_msg*
4816  * points after the upb_msg_internal. */
4817 
4818 /* Used when a message is not extendable. */
4819 typedef struct {
4820   /* TODO(haberman): use pointer tagging so we we are slim when known unknown
4821    * fields are not present. */
4822   upb_arena *arena;
4823   char *unknown;
4824   size_t unknown_len;
4825   size_t unknown_size;
4826 } upb_msg_internal;
4827 
4828 /* Used when a message is extendable. */
4829 typedef struct {
4830   upb_inttable *extdict;
4831   upb_msg_internal base;
4832 } upb_msg_internal_withext;
4833 
upb_msg_internalsize(const upb_msglayout * l)4834 static int upb_msg_internalsize(const upb_msglayout *l) {
4835   return sizeof(upb_msg_internal) - l->extendable * sizeof(void *);
4836 }
4837 
upb_msg_getinternal(upb_msg * msg)4838 static upb_msg_internal *upb_msg_getinternal(upb_msg *msg) {
4839   return VOIDPTR_AT(msg, -sizeof(upb_msg_internal));
4840 }
4841 
upb_msg_getinternal_const(const upb_msg * msg)4842 static const upb_msg_internal *upb_msg_getinternal_const(const upb_msg *msg) {
4843   return VOIDPTR_AT(msg, -sizeof(upb_msg_internal));
4844 }
4845 
upb_msg_getinternalwithext(upb_msg * msg,const upb_msglayout * l)4846 static upb_msg_internal_withext *upb_msg_getinternalwithext(
4847     upb_msg *msg, const upb_msglayout *l) {
4848   UPB_ASSERT(l->extendable);
4849   return VOIDPTR_AT(msg, -sizeof(upb_msg_internal_withext));
4850 }
4851 
upb_msg_addunknown(upb_msg * msg,const char * data,size_t len)4852 void upb_msg_addunknown(upb_msg *msg, const char *data, size_t len) {
4853   upb_msg_internal* in = upb_msg_getinternal(msg);
4854   if (len > in->unknown_size - in->unknown_len) {
4855     upb_alloc *alloc = upb_arena_alloc(in->arena);
4856     size_t need = in->unknown_size + len;
4857     size_t newsize = UPB_MAX(in->unknown_size * 2, need);
4858     in->unknown = upb_realloc(alloc, in->unknown, in->unknown_size, newsize);
4859     in->unknown_size = newsize;
4860   }
4861   memcpy(in->unknown + in->unknown_len, data, len);
4862   in->unknown_len += len;
4863 }
4864 
upb_msg_getunknown(const upb_msg * msg,size_t * len)4865 const char *upb_msg_getunknown(const upb_msg *msg, size_t *len) {
4866   const upb_msg_internal* in = upb_msg_getinternal_const(msg);
4867   *len = in->unknown_len;
4868   return in->unknown;
4869 }
4870 
upb_msg_checkfield(int field_index,const upb_msglayout * l)4871 static const upb_msglayout_field *upb_msg_checkfield(int field_index,
4872                                                      const upb_msglayout *l) {
4873   UPB_ASSERT(field_index >= 0 && field_index < l->field_count);
4874   return &l->fields[field_index];
4875 }
4876 
upb_msg_inoneof(const upb_msglayout_field * field)4877 static bool upb_msg_inoneof(const upb_msglayout_field *field) {
4878   return field->presence < 0;
4879 }
4880 
upb_msg_oneofcase(const upb_msg * msg,int field_index,const upb_msglayout * l)4881 static uint32_t *upb_msg_oneofcase(const upb_msg *msg, int field_index,
4882                                    const upb_msglayout *l) {
4883   const upb_msglayout_field *field = upb_msg_checkfield(field_index, l);
4884   UPB_ASSERT(upb_msg_inoneof(field));
4885   return PTR_AT(msg, ~field->presence, uint32_t);
4886 }
4887 
upb_msg_sizeof(const upb_msglayout * l)4888 static size_t upb_msg_sizeof(const upb_msglayout *l) {
4889   return l->size + upb_msg_internalsize(l);
4890 }
4891 
upb_msg_new(const upb_msglayout * l,upb_arena * a)4892 upb_msg *upb_msg_new(const upb_msglayout *l, upb_arena *a) {
4893   upb_alloc *alloc = upb_arena_alloc(a);
4894   void *mem = upb_malloc(alloc, upb_msg_sizeof(l));
4895   upb_msg_internal *in;
4896   upb_msg *msg;
4897 
4898   if (!mem) {
4899     return NULL;
4900   }
4901 
4902   msg = VOIDPTR_AT(mem, upb_msg_internalsize(l));
4903 
4904   /* Initialize normal members. */
4905   memset(msg, 0, l->size);
4906 
4907   /* Initialize internal members. */
4908   in = upb_msg_getinternal(msg);
4909   in->arena = a;
4910   in->unknown = NULL;
4911   in->unknown_len = 0;
4912   in->unknown_size = 0;
4913 
4914   if (l->extendable) {
4915     upb_msg_getinternalwithext(msg, l)->extdict = NULL;
4916   }
4917 
4918   return msg;
4919 }
4920 
upb_msg_arena(const upb_msg * msg)4921 upb_arena *upb_msg_arena(const upb_msg *msg) {
4922   return upb_msg_getinternal_const(msg)->arena;
4923 }
4924 
upb_msg_has(const upb_msg * msg,int field_index,const upb_msglayout * l)4925 bool upb_msg_has(const upb_msg *msg,
4926                  int field_index,
4927                  const upb_msglayout *l) {
4928   const upb_msglayout_field *field = upb_msg_checkfield(field_index, l);
4929 
4930   UPB_ASSERT(field->presence);
4931 
4932   if (upb_msg_inoneof(field)) {
4933     /* Oneofs are set when the oneof number is set to this field. */
4934     return *upb_msg_oneofcase(msg, field_index, l) == field->number;
4935   } else {
4936     /* Other fields are set when their hasbit is set. */
4937     uint32_t hasbit = field->presence;
4938     return DEREF(msg, hasbit / 8, char) | (1 << (hasbit % 8));
4939   }
4940 }
4941 
upb_msg_get(const upb_msg * msg,int field_index,const upb_msglayout * l)4942 upb_msgval upb_msg_get(const upb_msg *msg, int field_index,
4943                        const upb_msglayout *l) {
4944   const upb_msglayout_field *field = upb_msg_checkfield(field_index, l);
4945   int size = upb_msg_fieldsize(field);
4946   return upb_msgval_read(msg, field->offset, size);
4947 }
4948 
upb_msg_set(upb_msg * msg,int field_index,upb_msgval val,const upb_msglayout * l)4949 void upb_msg_set(upb_msg *msg, int field_index, upb_msgval val,
4950                  const upb_msglayout *l) {
4951   const upb_msglayout_field *field = upb_msg_checkfield(field_index, l);
4952   int size = upb_msg_fieldsize(field);
4953   upb_msgval_write(msg, field->offset, val, size);
4954 }
4955 
4956 
4957 /** upb_array *****************************************************************/
4958 
4959 #define DEREF_ARR(arr, i, type) ((type*)arr->data)[i]
4960 
upb_array_new(upb_fieldtype_t type,upb_arena * a)4961 upb_array *upb_array_new(upb_fieldtype_t type, upb_arena *a) {
4962   upb_alloc *alloc = upb_arena_alloc(a);
4963   upb_array *ret = upb_malloc(alloc, sizeof(upb_array));
4964 
4965   if (!ret) {
4966     return NULL;
4967   }
4968 
4969   ret->type = type;
4970   ret->data = NULL;
4971   ret->len = 0;
4972   ret->size = 0;
4973   ret->element_size = upb_msgval_sizeof(type);
4974   ret->arena = a;
4975 
4976   return ret;
4977 }
4978 
upb_array_size(const upb_array * arr)4979 size_t upb_array_size(const upb_array *arr) {
4980   return arr->len;
4981 }
4982 
upb_array_type(const upb_array * arr)4983 upb_fieldtype_t upb_array_type(const upb_array *arr) {
4984   return arr->type;
4985 }
4986 
upb_array_get(const upb_array * arr,size_t i)4987 upb_msgval upb_array_get(const upb_array *arr, size_t i) {
4988   UPB_ASSERT(i < arr->len);
4989   return upb_msgval_read(arr->data, i * arr->element_size, arr->element_size);
4990 }
4991 
upb_array_set(upb_array * arr,size_t i,upb_msgval val)4992 bool upb_array_set(upb_array *arr, size_t i, upb_msgval val) {
4993   UPB_ASSERT(i <= arr->len);
4994 
4995   if (i == arr->len) {
4996     /* Extending the array. */
4997 
4998     if (i == arr->size) {
4999       /* Need to reallocate. */
5000       size_t new_size = UPB_MAX(arr->size * 2, 8);
5001       size_t new_bytes = new_size * arr->element_size;
5002       size_t old_bytes = arr->size * arr->element_size;
5003       upb_alloc *alloc = upb_arena_alloc(arr->arena);
5004       upb_msgval *new_data =
5005           upb_realloc(alloc, arr->data, old_bytes, new_bytes);
5006 
5007       if (!new_data) {
5008         return false;
5009       }
5010 
5011       arr->data = new_data;
5012       arr->size = new_size;
5013     }
5014 
5015     arr->len = i + 1;
5016   }
5017 
5018   upb_msgval_write(arr->data, i * arr->element_size, val, arr->element_size);
5019   return true;
5020 }
5021 
5022 
5023 /** upb_map *******************************************************************/
5024 
5025 struct upb_map {
5026   upb_fieldtype_t key_type;
5027   upb_fieldtype_t val_type;
5028   /* We may want to optimize this to use inttable where possible, for greater
5029    * efficiency and lower memory footprint. */
5030   upb_strtable strtab;
5031   upb_arena *arena;
5032 };
5033 
upb_map_tokey(upb_fieldtype_t type,upb_msgval * key,const char ** out_key,size_t * out_len)5034 static void upb_map_tokey(upb_fieldtype_t type, upb_msgval *key,
5035                           const char **out_key, size_t *out_len) {
5036   switch (type) {
5037     case UPB_TYPE_STRING:
5038       /* Point to string data of the input key. */
5039       *out_key = key->str.data;
5040       *out_len = key->str.size;
5041       return;
5042     case UPB_TYPE_BOOL:
5043     case UPB_TYPE_INT32:
5044     case UPB_TYPE_UINT32:
5045     case UPB_TYPE_INT64:
5046     case UPB_TYPE_UINT64:
5047       /* Point to the key itself.  XXX: big-endian. */
5048       *out_key = (const char*)key;
5049       *out_len = upb_msgval_sizeof(type);
5050       return;
5051     case UPB_TYPE_BYTES:
5052     case UPB_TYPE_DOUBLE:
5053     case UPB_TYPE_ENUM:
5054     case UPB_TYPE_FLOAT:
5055     case UPB_TYPE_MESSAGE:
5056       break;  /* Cannot be a map key. */
5057   }
5058   UPB_UNREACHABLE();
5059 }
5060 
upb_map_fromkey(upb_fieldtype_t type,const char * key,size_t len)5061 static upb_msgval upb_map_fromkey(upb_fieldtype_t type, const char *key,
5062                                   size_t len) {
5063   switch (type) {
5064     case UPB_TYPE_STRING:
5065       return upb_msgval_makestr(key, len);
5066     case UPB_TYPE_BOOL:
5067     case UPB_TYPE_INT32:
5068     case UPB_TYPE_UINT32:
5069     case UPB_TYPE_INT64:
5070     case UPB_TYPE_UINT64:
5071       return upb_msgval_read(key, 0, upb_msgval_sizeof(type));
5072     case UPB_TYPE_BYTES:
5073     case UPB_TYPE_DOUBLE:
5074     case UPB_TYPE_ENUM:
5075     case UPB_TYPE_FLOAT:
5076     case UPB_TYPE_MESSAGE:
5077       break;  /* Cannot be a map key. */
5078   }
5079   UPB_UNREACHABLE();
5080 }
5081 
upb_map_new(upb_fieldtype_t ktype,upb_fieldtype_t vtype,upb_arena * a)5082 upb_map *upb_map_new(upb_fieldtype_t ktype, upb_fieldtype_t vtype,
5083                      upb_arena *a) {
5084   upb_ctype_t vtabtype = upb_fieldtotabtype(vtype);
5085   upb_alloc *alloc = upb_arena_alloc(a);
5086   upb_map *map = upb_malloc(alloc, sizeof(upb_map));
5087 
5088   if (!map) {
5089     return NULL;
5090   }
5091 
5092   UPB_ASSERT(upb_fieldtype_mapkeyok(ktype));
5093   map->key_type = ktype;
5094   map->val_type = vtype;
5095   map->arena = a;
5096 
5097   if (!upb_strtable_init2(&map->strtab, vtabtype, alloc)) {
5098     return NULL;
5099   }
5100 
5101   return map;
5102 }
5103 
upb_map_size(const upb_map * map)5104 size_t upb_map_size(const upb_map *map) {
5105   return upb_strtable_count(&map->strtab);
5106 }
5107 
upb_map_keytype(const upb_map * map)5108 upb_fieldtype_t upb_map_keytype(const upb_map *map) {
5109   return map->key_type;
5110 }
5111 
upb_map_valuetype(const upb_map * map)5112 upb_fieldtype_t upb_map_valuetype(const upb_map *map) {
5113   return map->val_type;
5114 }
5115 
upb_map_get(const upb_map * map,upb_msgval key,upb_msgval * val)5116 bool upb_map_get(const upb_map *map, upb_msgval key, upb_msgval *val) {
5117   upb_value tabval;
5118   const char *key_str;
5119   size_t key_len;
5120   bool ret;
5121 
5122   upb_map_tokey(map->key_type, &key, &key_str, &key_len);
5123   ret = upb_strtable_lookup2(&map->strtab, key_str, key_len, &tabval);
5124   if (ret) {
5125     memcpy(val, &tabval, sizeof(tabval));
5126   }
5127 
5128   return ret;
5129 }
5130 
upb_map_set(upb_map * map,upb_msgval key,upb_msgval val,upb_msgval * removed)5131 bool upb_map_set(upb_map *map, upb_msgval key, upb_msgval val,
5132                  upb_msgval *removed) {
5133   const char *key_str;
5134   size_t key_len;
5135   upb_value tabval = upb_toval(val);
5136   upb_value removedtabval;
5137   upb_alloc *a = upb_arena_alloc(map->arena);
5138 
5139   upb_map_tokey(map->key_type, &key, &key_str, &key_len);
5140 
5141   /* TODO(haberman): add overwrite operation to minimize number of lookups. */
5142   if (upb_strtable_lookup2(&map->strtab, key_str, key_len, NULL)) {
5143     upb_strtable_remove3(&map->strtab, key_str, key_len, &removedtabval, a);
5144     memcpy(&removed, &removedtabval, sizeof(removed));
5145   }
5146 
5147   return upb_strtable_insert3(&map->strtab, key_str, key_len, tabval, a);
5148 }
5149 
upb_map_del(upb_map * map,upb_msgval key)5150 bool upb_map_del(upb_map *map, upb_msgval key) {
5151   const char *key_str;
5152   size_t key_len;
5153   upb_alloc *a = upb_arena_alloc(map->arena);
5154 
5155   upb_map_tokey(map->key_type, &key, &key_str, &key_len);
5156   return upb_strtable_remove3(&map->strtab, key_str, key_len, NULL, a);
5157 }
5158 
5159 
5160 /** upb_mapiter ***************************************************************/
5161 
5162 struct upb_mapiter {
5163   upb_strtable_iter iter;
5164   upb_fieldtype_t key_type;
5165 };
5166 
upb_mapiter_sizeof()5167 size_t upb_mapiter_sizeof() {
5168   return sizeof(upb_mapiter);
5169 }
5170 
upb_mapiter_begin(upb_mapiter * i,const upb_map * map)5171 void upb_mapiter_begin(upb_mapiter *i, const upb_map *map) {
5172   upb_strtable_begin(&i->iter, &map->strtab);
5173   i->key_type = map->key_type;
5174 }
5175 
upb_mapiter_new(const upb_map * t,upb_alloc * a)5176 upb_mapiter *upb_mapiter_new(const upb_map *t, upb_alloc *a) {
5177   upb_mapiter *ret = upb_malloc(a, upb_mapiter_sizeof());
5178 
5179   if (!ret) {
5180     return NULL;
5181   }
5182 
5183   upb_mapiter_begin(ret, t);
5184   return ret;
5185 }
5186 
upb_mapiter_free(upb_mapiter * i,upb_alloc * a)5187 void upb_mapiter_free(upb_mapiter *i, upb_alloc *a) {
5188   upb_free(a, i);
5189 }
5190 
upb_mapiter_next(upb_mapiter * i)5191 void upb_mapiter_next(upb_mapiter *i) {
5192   upb_strtable_next(&i->iter);
5193 }
5194 
upb_mapiter_done(const upb_mapiter * i)5195 bool upb_mapiter_done(const upb_mapiter *i) {
5196   return upb_strtable_done(&i->iter);
5197 }
5198 
upb_mapiter_key(const upb_mapiter * i)5199 upb_msgval upb_mapiter_key(const upb_mapiter *i) {
5200   return upb_map_fromkey(i->key_type, upb_strtable_iter_key(&i->iter),
5201                          upb_strtable_iter_keylength(&i->iter));
5202 }
5203 
upb_mapiter_value(const upb_mapiter * i)5204 upb_msgval upb_mapiter_value(const upb_mapiter *i) {
5205   return upb_msgval_fromval(upb_strtable_iter_value(&i->iter));
5206 }
5207 
upb_mapiter_setdone(upb_mapiter * i)5208 void upb_mapiter_setdone(upb_mapiter *i) {
5209   upb_strtable_iter_setdone(&i->iter);
5210 }
5211 
upb_mapiter_isequal(const upb_mapiter * i1,const upb_mapiter * i2)5212 bool upb_mapiter_isequal(const upb_mapiter *i1, const upb_mapiter *i2) {
5213   return upb_strtable_iter_isequal(&i1->iter, &i2->iter);
5214 }
5215 
5216 
is_power_of_two(size_t val)5217 static bool is_power_of_two(size_t val) {
5218   return (val & (val - 1)) == 0;
5219 }
5220 
5221 /* Align up to the given power of 2. */
align_up(size_t val,size_t align)5222 static size_t align_up(size_t val, size_t align) {
5223   UPB_ASSERT(is_power_of_two(align));
5224   return (val + align - 1) & ~(align - 1);
5225 }
5226 
div_round_up(size_t n,size_t d)5227 static size_t div_round_up(size_t n, size_t d) {
5228   return (n + d - 1) / d;
5229 }
5230 
upb_msgval_sizeof2(upb_fieldtype_t type)5231 static size_t upb_msgval_sizeof2(upb_fieldtype_t type) {
5232   switch (type) {
5233     case UPB_TYPE_DOUBLE:
5234     case UPB_TYPE_INT64:
5235     case UPB_TYPE_UINT64:
5236       return 8;
5237     case UPB_TYPE_ENUM:
5238     case UPB_TYPE_INT32:
5239     case UPB_TYPE_UINT32:
5240     case UPB_TYPE_FLOAT:
5241       return 4;
5242     case UPB_TYPE_BOOL:
5243       return 1;
5244     case UPB_TYPE_MESSAGE:
5245       return sizeof(void*);
5246     case UPB_TYPE_BYTES:
5247     case UPB_TYPE_STRING:
5248       return sizeof(upb_strview);
5249   }
5250   UPB_UNREACHABLE();
5251 }
5252 
upb_msg_fielddefsize(const upb_fielddef * f)5253 static uint8_t upb_msg_fielddefsize(const upb_fielddef *f) {
5254   if (upb_fielddef_isseq(f)) {
5255     return sizeof(void*);
5256   } else {
5257     return upb_msgval_sizeof2(upb_fielddef_type(f));
5258   }
5259 }
5260 
5261 
5262 /** upb_msglayout *************************************************************/
5263 
upb_msglayout_free(upb_msglayout * l)5264 static void upb_msglayout_free(upb_msglayout *l) {
5265   upb_gfree(l);
5266 }
5267 
upb_msglayout_place(upb_msglayout * l,size_t size)5268 static size_t upb_msglayout_place(upb_msglayout *l, size_t size) {
5269   size_t ret;
5270 
5271   l->size = align_up(l->size, size);
5272   ret = l->size;
5273   l->size += size;
5274   return ret;
5275 }
5276 
upb_msglayout_init(const upb_msgdef * m,upb_msglayout * l,upb_msgfactory * factory)5277 static bool upb_msglayout_init(const upb_msgdef *m,
5278                                upb_msglayout *l,
5279                                upb_msgfactory *factory) {
5280   upb_msg_field_iter it;
5281   upb_msg_oneof_iter oit;
5282   size_t hasbit;
5283   size_t submsg_count = 0;
5284   const upb_msglayout **submsgs;
5285   upb_msglayout_field *fields;
5286 
5287   for (upb_msg_field_begin(&it, m);
5288        !upb_msg_field_done(&it);
5289        upb_msg_field_next(&it)) {
5290     const upb_fielddef* f = upb_msg_iter_field(&it);
5291     if (upb_fielddef_issubmsg(f)) {
5292       submsg_count++;
5293     }
5294   }
5295 
5296   memset(l, 0, sizeof(*l));
5297 
5298   fields = upb_gmalloc(upb_msgdef_numfields(m) * sizeof(*fields));
5299   submsgs = upb_gmalloc(submsg_count * sizeof(*submsgs));
5300 
5301   if ((!fields && upb_msgdef_numfields(m)) ||
5302       (!submsgs && submsg_count)) {
5303     /* OOM. */
5304     upb_gfree(fields);
5305     upb_gfree(submsgs);
5306     return false;
5307   }
5308 
5309   l->field_count = upb_msgdef_numfields(m);
5310   l->fields = fields;
5311   l->submsgs = submsgs;
5312 
5313   /* Allocate data offsets in three stages:
5314    *
5315    * 1. hasbits.
5316    * 2. regular fields.
5317    * 3. oneof fields.
5318    *
5319    * OPT: There is a lot of room for optimization here to minimize the size.
5320    */
5321 
5322   /* Allocate hasbits and set basic field attributes. */
5323   submsg_count = 0;
5324   for (upb_msg_field_begin(&it, m), hasbit = 0;
5325        !upb_msg_field_done(&it);
5326        upb_msg_field_next(&it)) {
5327     const upb_fielddef* f = upb_msg_iter_field(&it);
5328     upb_msglayout_field *field = &fields[upb_fielddef_index(f)];
5329 
5330     field->number = upb_fielddef_number(f);
5331     field->descriptortype = upb_fielddef_descriptortype(f);
5332     field->label = upb_fielddef_label(f);
5333 
5334     if (upb_fielddef_issubmsg(f)) {
5335       const upb_msglayout *sub_layout =
5336           upb_msgfactory_getlayout(factory, upb_fielddef_msgsubdef(f));
5337       field->submsg_index = submsg_count++;
5338       submsgs[field->submsg_index] = sub_layout;
5339     }
5340 
5341     if (upb_fielddef_haspresence(f) && !upb_fielddef_containingoneof(f)) {
5342       field->presence = (hasbit++);
5343     } else {
5344       field->presence = 0;
5345     }
5346   }
5347 
5348   /* Account for space used by hasbits. */
5349   l->size = div_round_up(hasbit, 8);
5350 
5351   /* Allocate non-oneof fields. */
5352   for (upb_msg_field_begin(&it, m); !upb_msg_field_done(&it);
5353        upb_msg_field_next(&it)) {
5354     const upb_fielddef* f = upb_msg_iter_field(&it);
5355     size_t field_size = upb_msg_fielddefsize(f);
5356     size_t index = upb_fielddef_index(f);
5357 
5358     if (upb_fielddef_containingoneof(f)) {
5359       /* Oneofs are handled separately below. */
5360       continue;
5361     }
5362 
5363     fields[index].offset = upb_msglayout_place(l, field_size);
5364   }
5365 
5366   /* Allocate oneof fields.  Each oneof field consists of a uint32 for the case
5367    * and space for the actual data. */
5368   for (upb_msg_oneof_begin(&oit, m); !upb_msg_oneof_done(&oit);
5369        upb_msg_oneof_next(&oit)) {
5370     const upb_oneofdef* o = upb_msg_iter_oneof(&oit);
5371     upb_oneof_iter fit;
5372 
5373     size_t case_size = sizeof(uint32_t);  /* Could potentially optimize this. */
5374     size_t field_size = 0;
5375     uint32_t case_offset;
5376     uint32_t data_offset;
5377 
5378     /* Calculate field size: the max of all field sizes. */
5379     for (upb_oneof_begin(&fit, o);
5380          !upb_oneof_done(&fit);
5381          upb_oneof_next(&fit)) {
5382       const upb_fielddef* f = upb_oneof_iter_field(&fit);
5383       field_size = UPB_MAX(field_size, upb_msg_fielddefsize(f));
5384     }
5385 
5386     /* Align and allocate case offset. */
5387     case_offset = upb_msglayout_place(l, case_size);
5388     data_offset = upb_msglayout_place(l, field_size);
5389 
5390     for (upb_oneof_begin(&fit, o);
5391          !upb_oneof_done(&fit);
5392          upb_oneof_next(&fit)) {
5393       const upb_fielddef* f = upb_oneof_iter_field(&fit);
5394       fields[upb_fielddef_index(f)].offset = data_offset;
5395       fields[upb_fielddef_index(f)].presence = ~case_offset;
5396     }
5397   }
5398 
5399   /* Size of the entire structure should be a multiple of its greatest
5400    * alignment.  TODO: track overall alignment for real? */
5401   l->size = align_up(l->size, 8);
5402 
5403   return true;
5404 }
5405 
5406 
5407 /** upb_msgfactory ************************************************************/
5408 
5409 struct upb_msgfactory {
5410   const upb_symtab *symtab;  /* We own a ref. */
5411   upb_inttable layouts;
5412   upb_inttable mergehandlers;
5413 };
5414 
upb_msgfactory_new(const upb_symtab * symtab)5415 upb_msgfactory *upb_msgfactory_new(const upb_symtab *symtab) {
5416   upb_msgfactory *ret = upb_gmalloc(sizeof(*ret));
5417 
5418   ret->symtab = symtab;
5419   upb_inttable_init(&ret->layouts, UPB_CTYPE_PTR);
5420   upb_inttable_init(&ret->mergehandlers, UPB_CTYPE_CONSTPTR);
5421 
5422   return ret;
5423 }
5424 
upb_msgfactory_free(upb_msgfactory * f)5425 void upb_msgfactory_free(upb_msgfactory *f) {
5426   upb_inttable_iter i;
5427   upb_inttable_begin(&i, &f->layouts);
5428   for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
5429     upb_msglayout *l = upb_value_getptr(upb_inttable_iter_value(&i));
5430     upb_msglayout_free(l);
5431   }
5432 
5433   upb_inttable_begin(&i, &f->mergehandlers);
5434   for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
5435     const upb_handlers *h = upb_value_getconstptr(upb_inttable_iter_value(&i));
5436     upb_handlers_unref(h, f);
5437   }
5438 
5439   upb_inttable_uninit(&f->layouts);
5440   upb_inttable_uninit(&f->mergehandlers);
5441   upb_gfree(f);
5442 }
5443 
upb_msgfactory_symtab(const upb_msgfactory * f)5444 const upb_symtab *upb_msgfactory_symtab(const upb_msgfactory *f) {
5445   return f->symtab;
5446 }
5447 
upb_msgfactory_getlayout(upb_msgfactory * f,const upb_msgdef * m)5448 const upb_msglayout *upb_msgfactory_getlayout(upb_msgfactory *f,
5449                                               const upb_msgdef *m) {
5450   upb_value v;
5451   UPB_ASSERT(upb_symtab_lookupmsg(f->symtab, upb_msgdef_fullname(m)) == m);
5452   UPB_ASSERT(!upb_msgdef_mapentry(m));
5453 
5454   if (upb_inttable_lookupptr(&f->layouts, m, &v)) {
5455     UPB_ASSERT(upb_value_getptr(v));
5456     return upb_value_getptr(v);
5457   } else {
5458     /* In case of circular dependency, layout has to be inserted first. */
5459     upb_msglayout *l = upb_gmalloc(sizeof(*l));
5460     upb_msgfactory *mutable_f = (void*)f;
5461     upb_inttable_insertptr(&mutable_f->layouts, m, upb_value_ptr(l));
5462     UPB_ASSERT(l);
5463     if (!upb_msglayout_init(m, l, f)) {
5464       upb_msglayout_free(l);
5465     }
5466     return l;
5467   }
5468 }
5469 
5470 #if UINTPTR_MAX == 0xffffffff
5471 #define UPB_SIZE(size32, size64) size32
5472 #else
5473 #define UPB_SIZE(size32, size64) size64
5474 #endif
5475 
5476 #define UPB_FIELD_AT(msg, fieldtype, offset) \
5477   *(fieldtype*)((const char*)(msg) + offset)
5478 
5479 #define UPB_READ_ONEOF(msg, fieldtype, offset, case_offset, case_val, default) \
5480   UPB_FIELD_AT(msg, int, case_offset) == case_val                              \
5481       ? UPB_FIELD_AT(msg, fieldtype, offset)                                   \
5482       : default
5483 
5484 #define UPB_WRITE_ONEOF(msg, fieldtype, offset, value, case_offset, case_val) \
5485   UPB_FIELD_AT(msg, int, case_offset) = case_val;                             \
5486   UPB_FIELD_AT(msg, fieldtype, offset) = value;
5487 
5488 #undef UPB_SIZE
5489 #undef UPB_FIELD_AT
5490 #undef UPB_READ_ONEOF
5491 #undef UPB_WRITE_ONEOF
5492 /*
5493 ** upb::RefCounted Implementation
5494 **
5495 ** Our key invariants are:
5496 ** 1. reference cycles never span groups
5497 ** 2. for ref2(to, from), we increment to's count iff group(from) != group(to)
5498 **
5499 ** The previous two are how we avoid leaking cycles.  Other important
5500 ** invariants are:
5501 ** 3. for mutable objects "from" and "to", if there exists a ref2(to, from)
5502 **    this implies group(from) == group(to).  (In practice, what we implement
5503 **    is even stronger; "from" and "to" will share a group if there has *ever*
5504 **    been a ref2(to, from), but all that is necessary for correctness is the
5505 **    weaker one).
5506 ** 4. mutable and immutable objects are never in the same group.
5507 */
5508 
5509 
5510 #include <setjmp.h>
5511 
5512 static void freeobj(upb_refcounted *o);
5513 
5514 const char untracked_val;
5515 const void *UPB_UNTRACKED_REF = &untracked_val;
5516 
5517 /* arch-specific atomic primitives  *******************************************/
5518 
5519 #ifdef UPB_THREAD_UNSAFE /*---------------------------------------------------*/
5520 
atomic_inc(uint32_t * a)5521 static void atomic_inc(uint32_t *a) { (*a)++; }
atomic_dec(uint32_t * a)5522 static bool atomic_dec(uint32_t *a) { return --(*a) == 0; }
5523 
5524 #elif defined(__GNUC__) || defined(__clang__) /*------------------------------*/
5525 
atomic_inc(uint32_t * a)5526 static void atomic_inc(uint32_t *a) { __sync_fetch_and_add(a, 1); }
atomic_dec(uint32_t * a)5527 static bool atomic_dec(uint32_t *a) { return __sync_sub_and_fetch(a, 1) == 0; }
5528 
5529 #elif defined(WIN32) /*-------------------------------------------------------*/
5530 
5531 #include <Windows.h>
5532 
atomic_inc(upb_atomic_t * a)5533 static void atomic_inc(upb_atomic_t *a) { InterlockedIncrement(&a->val); }
atomic_dec(upb_atomic_t * a)5534 static bool atomic_dec(upb_atomic_t *a) {
5535   return InterlockedDecrement(&a->val) == 0;
5536 }
5537 
5538 #else
5539 #error Atomic primitives not defined for your platform/CPU.  \
5540        Implement them or compile with UPB_THREAD_UNSAFE.
5541 #endif
5542 
5543 /* All static objects point to this refcount.
5544  * It is special-cased in ref/unref below.  */
5545 uint32_t static_refcount = -1;
5546 
5547 /* We can avoid atomic ops for statically-declared objects.
5548  * This is a minor optimization but nice since we can avoid degrading under
5549  * contention in this case. */
5550 
refgroup(uint32_t * group)5551 static void refgroup(uint32_t *group) {
5552   if (group != &static_refcount)
5553     atomic_inc(group);
5554 }
5555 
unrefgroup(uint32_t * group)5556 static bool unrefgroup(uint32_t *group) {
5557   if (group == &static_refcount) {
5558     return false;
5559   } else {
5560     return atomic_dec(group);
5561   }
5562 }
5563 
5564 
5565 /* Reference tracking (debug only) ********************************************/
5566 
5567 #ifdef UPB_DEBUG_REFS
5568 
5569 #ifdef UPB_THREAD_UNSAFE
5570 
upb_lock()5571 static void upb_lock() {}
upb_unlock()5572 static void upb_unlock() {}
5573 
5574 #else
5575 
5576 /* User must define functions that lock/unlock a global mutex and link this
5577  * file against them. */
5578 void upb_lock();
5579 void upb_unlock();
5580 
5581 #endif
5582 
5583 /* UPB_DEBUG_REFS mode counts on being able to malloc() memory in some
5584  * code-paths that can normally never fail, like upb_refcounted_ref().  Since
5585  * we have no way to propagage out-of-memory errors back to the user, and since
5586  * these errors can only occur in UPB_DEBUG_REFS mode, we use an allocator that
5587  * immediately aborts on failure (avoiding the global allocator, which might
5588  * inject failures). */
5589 
5590 #include <stdlib.h>
5591 
upb_debugrefs_allocfunc(upb_alloc * alloc,void * ptr,size_t oldsize,size_t size)5592 static void *upb_debugrefs_allocfunc(upb_alloc *alloc, void *ptr,
5593                                      size_t oldsize, size_t size) {
5594   UPB_UNUSED(alloc);
5595   UPB_UNUSED(oldsize);
5596   if (size == 0) {
5597     free(ptr);
5598     return NULL;
5599   } else {
5600     void *ret = realloc(ptr, size);
5601 
5602     if (!ret) {
5603       abort();
5604     }
5605 
5606     return ret;
5607   }
5608 }
5609 
5610 upb_alloc upb_alloc_debugrefs = {&upb_debugrefs_allocfunc};
5611 
5612 typedef struct {
5613   int count;  /* How many refs there are (duplicates only allowed for ref2). */
5614   bool is_ref2;
5615 } trackedref;
5616 
trackedref_new(bool is_ref2)5617 static trackedref *trackedref_new(bool is_ref2) {
5618   trackedref *ret = upb_malloc(&upb_alloc_debugrefs, sizeof(*ret));
5619   ret->count = 1;
5620   ret->is_ref2 = is_ref2;
5621   return ret;
5622 }
5623 
track(const upb_refcounted * r,const void * owner,bool ref2)5624 static void track(const upb_refcounted *r, const void *owner, bool ref2) {
5625   upb_value v;
5626 
5627   UPB_ASSERT(owner);
5628   if (owner == UPB_UNTRACKED_REF) return;
5629 
5630   upb_lock();
5631   if (upb_inttable_lookupptr(r->refs, owner, &v)) {
5632     trackedref *ref = upb_value_getptr(v);
5633     /* Since we allow multiple ref2's for the same to/from pair without
5634      * allocating separate memory for each one, we lose the fine-grained
5635      * tracking behavior we get with regular refs.  Since ref2s only happen
5636      * inside upb, we'll accept this limitation until/unless there is a really
5637      * difficult upb-internal bug that can't be figured out without it. */
5638     UPB_ASSERT(ref2);
5639     UPB_ASSERT(ref->is_ref2);
5640     ref->count++;
5641   } else {
5642     trackedref *ref = trackedref_new(ref2);
5643     upb_inttable_insertptr2(r->refs, owner, upb_value_ptr(ref),
5644                             &upb_alloc_debugrefs);
5645     if (ref2) {
5646       /* We know this cast is safe when it is a ref2, because it's coming from
5647        * another refcounted object. */
5648       const upb_refcounted *from = owner;
5649       UPB_ASSERT(!upb_inttable_lookupptr(from->ref2s, r, NULL));
5650       upb_inttable_insertptr2(from->ref2s, r, upb_value_ptr(NULL),
5651                               &upb_alloc_debugrefs);
5652     }
5653   }
5654   upb_unlock();
5655 }
5656 
untrack(const upb_refcounted * r,const void * owner,bool ref2)5657 static void untrack(const upb_refcounted *r, const void *owner, bool ref2) {
5658   upb_value v;
5659   bool found;
5660   trackedref *ref;
5661 
5662   UPB_ASSERT(owner);
5663   if (owner == UPB_UNTRACKED_REF) return;
5664 
5665   upb_lock();
5666   found = upb_inttable_lookupptr(r->refs, owner, &v);
5667   /* This assert will fail if an owner attempts to release a ref it didn't have. */
5668   UPB_ASSERT(found);
5669   ref = upb_value_getptr(v);
5670   UPB_ASSERT(ref->is_ref2 == ref2);
5671   if (--ref->count == 0) {
5672     free(ref);
5673     upb_inttable_removeptr(r->refs, owner, NULL);
5674     if (ref2) {
5675       /* We know this cast is safe when it is a ref2, because it's coming from
5676        * another refcounted object. */
5677       const upb_refcounted *from = owner;
5678       bool removed = upb_inttable_removeptr(from->ref2s, r, NULL);
5679       UPB_ASSERT(removed);
5680     }
5681   }
5682   upb_unlock();
5683 }
5684 
checkref(const upb_refcounted * r,const void * owner,bool ref2)5685 static void checkref(const upb_refcounted *r, const void *owner, bool ref2) {
5686   upb_value v;
5687   bool found;
5688   trackedref *ref;
5689 
5690   upb_lock();
5691   found = upb_inttable_lookupptr(r->refs, owner, &v);
5692   UPB_ASSERT(found);
5693   ref = upb_value_getptr(v);
5694   UPB_ASSERT(ref->is_ref2 == ref2);
5695   upb_unlock();
5696 }
5697 
5698 /* Populates the given UPB_CTYPE_INT32 inttable with counts of ref2's that
5699  * originate from the given owner. */
getref2s(const upb_refcounted * owner,upb_inttable * tab)5700 static void getref2s(const upb_refcounted *owner, upb_inttable *tab) {
5701   upb_inttable_iter i;
5702 
5703   upb_lock();
5704   upb_inttable_begin(&i, owner->ref2s);
5705   for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
5706     upb_value v;
5707     upb_value count;
5708     trackedref *ref;
5709     bool found;
5710 
5711     upb_refcounted *to = (upb_refcounted*)upb_inttable_iter_key(&i);
5712 
5713     /* To get the count we need to look in the target's table. */
5714     found = upb_inttable_lookupptr(to->refs, owner, &v);
5715     UPB_ASSERT(found);
5716     ref = upb_value_getptr(v);
5717     count = upb_value_int32(ref->count);
5718 
5719     upb_inttable_insertptr2(tab, to, count, &upb_alloc_debugrefs);
5720   }
5721   upb_unlock();
5722 }
5723 
5724 typedef struct {
5725   upb_inttable ref2;
5726   const upb_refcounted *obj;
5727 } check_state;
5728 
visit_check(const upb_refcounted * obj,const upb_refcounted * subobj,void * closure)5729 static void visit_check(const upb_refcounted *obj, const upb_refcounted *subobj,
5730                         void *closure) {
5731   check_state *s = closure;
5732   upb_inttable *ref2 = &s->ref2;
5733   upb_value v;
5734   bool removed;
5735   int32_t newcount;
5736 
5737   UPB_ASSERT(obj == s->obj);
5738   UPB_ASSERT(subobj);
5739   removed = upb_inttable_removeptr(ref2, subobj, &v);
5740   /* The following assertion will fail if the visit() function visits a subobj
5741    * that it did not have a ref2 on, or visits the same subobj too many times. */
5742   UPB_ASSERT(removed);
5743   newcount = upb_value_getint32(v) - 1;
5744   if (newcount > 0) {
5745     upb_inttable_insert2(ref2, (uintptr_t)subobj, upb_value_int32(newcount),
5746                          &upb_alloc_debugrefs);
5747   }
5748 }
5749 
visit(const upb_refcounted * r,upb_refcounted_visit * v,void * closure)5750 static void visit(const upb_refcounted *r, upb_refcounted_visit *v,
5751                   void *closure) {
5752   /* In DEBUG_REFS mode we know what existing ref2 refs there are, so we know
5753    * exactly the set of nodes that visit() should visit.  So we verify visit()'s
5754    * correctness here. */
5755   check_state state;
5756   state.obj = r;
5757   upb_inttable_init2(&state.ref2, UPB_CTYPE_INT32, &upb_alloc_debugrefs);
5758   getref2s(r, &state.ref2);
5759 
5760   /* This should visit any children in the ref2 table. */
5761   if (r->vtbl->visit) r->vtbl->visit(r, visit_check, &state);
5762 
5763   /* This assertion will fail if the visit() function missed any children. */
5764   UPB_ASSERT(upb_inttable_count(&state.ref2) == 0);
5765   upb_inttable_uninit2(&state.ref2, &upb_alloc_debugrefs);
5766   if (r->vtbl->visit) r->vtbl->visit(r, v, closure);
5767 }
5768 
trackinit(upb_refcounted * r)5769 static void trackinit(upb_refcounted *r) {
5770   r->refs = upb_malloc(&upb_alloc_debugrefs, sizeof(*r->refs));
5771   r->ref2s = upb_malloc(&upb_alloc_debugrefs, sizeof(*r->ref2s));
5772   upb_inttable_init2(r->refs, UPB_CTYPE_PTR, &upb_alloc_debugrefs);
5773   upb_inttable_init2(r->ref2s, UPB_CTYPE_PTR, &upb_alloc_debugrefs);
5774 }
5775 
trackfree(const upb_refcounted * r)5776 static void trackfree(const upb_refcounted *r) {
5777   upb_inttable_uninit2(r->refs, &upb_alloc_debugrefs);
5778   upb_inttable_uninit2(r->ref2s, &upb_alloc_debugrefs);
5779   upb_free(&upb_alloc_debugrefs, r->refs);
5780   upb_free(&upb_alloc_debugrefs, r->ref2s);
5781 }
5782 
5783 #else
5784 
track(const upb_refcounted * r,const void * owner,bool ref2)5785 static void track(const upb_refcounted *r, const void *owner, bool ref2) {
5786   UPB_UNUSED(r);
5787   UPB_UNUSED(owner);
5788   UPB_UNUSED(ref2);
5789 }
5790 
untrack(const upb_refcounted * r,const void * owner,bool ref2)5791 static void untrack(const upb_refcounted *r, const void *owner, bool ref2) {
5792   UPB_UNUSED(r);
5793   UPB_UNUSED(owner);
5794   UPB_UNUSED(ref2);
5795 }
5796 
checkref(const upb_refcounted * r,const void * owner,bool ref2)5797 static void checkref(const upb_refcounted *r, const void *owner, bool ref2) {
5798   UPB_UNUSED(r);
5799   UPB_UNUSED(owner);
5800   UPB_UNUSED(ref2);
5801 }
5802 
trackinit(upb_refcounted * r)5803 static void trackinit(upb_refcounted *r) {
5804   UPB_UNUSED(r);
5805 }
5806 
trackfree(const upb_refcounted * r)5807 static void trackfree(const upb_refcounted *r) {
5808   UPB_UNUSED(r);
5809 }
5810 
visit(const upb_refcounted * r,upb_refcounted_visit * v,void * closure)5811 static void visit(const upb_refcounted *r, upb_refcounted_visit *v,
5812                   void *closure) {
5813   if (r->vtbl->visit) r->vtbl->visit(r, v, closure);
5814 }
5815 
5816 #endif  /* UPB_DEBUG_REFS */
5817 
5818 
5819 /* freeze() *******************************************************************/
5820 
5821 /* The freeze() operation is by far the most complicated part of this scheme.
5822  * We compute strongly-connected components and then mutate the graph such that
5823  * we preserve the invariants documented at the top of this file.  And we must
5824  * handle out-of-memory errors gracefully (without leaving the graph
5825  * inconsistent), which adds to the fun. */
5826 
5827 /* The state used by the freeze operation (shared across many functions). */
5828 typedef struct {
5829   int depth;
5830   int maxdepth;
5831   uint64_t index;
5832   /* Maps upb_refcounted* -> attributes (color, etc).  attr layout varies by
5833    * color. */
5834   upb_inttable objattr;
5835   upb_inttable stack;   /* stack of upb_refcounted* for Tarjan's algorithm. */
5836   upb_inttable groups;  /* array of uint32_t*, malloc'd refcounts for new groups */
5837   upb_status *status;
5838   jmp_buf err;
5839 } tarjan;
5840 
5841 static void release_ref2(const upb_refcounted *obj,
5842                          const upb_refcounted *subobj,
5843                          void *closure);
5844 
5845 /* Node attributes -----------------------------------------------------------*/
5846 
5847 /* After our analysis phase all nodes will be either GRAY or WHITE. */
5848 
5849 typedef enum {
5850   BLACK = 0,  /* Object has not been seen. */
5851   GRAY,   /* Object has been found via a refgroup but may not be reachable. */
5852   GREEN,  /* Object is reachable and is currently on the Tarjan stack. */
5853   WHITE   /* Object is reachable and has been assigned a group (SCC). */
5854 } color_t;
5855 
err(tarjan * t)5856 UPB_NORETURN static void err(tarjan *t) { longjmp(t->err, 1); }
oom(tarjan * t)5857 UPB_NORETURN static void oom(tarjan *t) {
5858   upb_status_seterrmsg(t->status, "out of memory");
5859   err(t);
5860 }
5861 
trygetattr(const tarjan * t,const upb_refcounted * r)5862 static uint64_t trygetattr(const tarjan *t, const upb_refcounted *r) {
5863   upb_value v;
5864   return upb_inttable_lookupptr(&t->objattr, r, &v) ?
5865       upb_value_getuint64(v) : 0;
5866 }
5867 
getattr(const tarjan * t,const upb_refcounted * r)5868 static uint64_t getattr(const tarjan *t, const upb_refcounted *r) {
5869   upb_value v;
5870   bool found = upb_inttable_lookupptr(&t->objattr, r, &v);
5871   UPB_ASSERT(found);
5872   return upb_value_getuint64(v);
5873 }
5874 
setattr(tarjan * t,const upb_refcounted * r,uint64_t attr)5875 static void setattr(tarjan *t, const upb_refcounted *r, uint64_t attr) {
5876   upb_inttable_removeptr(&t->objattr, r, NULL);
5877   upb_inttable_insertptr(&t->objattr, r, upb_value_uint64(attr));
5878 }
5879 
color(tarjan * t,const upb_refcounted * r)5880 static color_t color(tarjan *t, const upb_refcounted *r) {
5881   return trygetattr(t, r) & 0x3;  /* Color is always stored in the low 2 bits. */
5882 }
5883 
set_gray(tarjan * t,const upb_refcounted * r)5884 static void set_gray(tarjan *t, const upb_refcounted *r) {
5885   UPB_ASSERT(color(t, r) == BLACK);
5886   setattr(t, r, GRAY);
5887 }
5888 
5889 /* Pushes an obj onto the Tarjan stack and sets it to GREEN. */
push(tarjan * t,const upb_refcounted * r)5890 static void push(tarjan *t, const upb_refcounted *r) {
5891   UPB_ASSERT(color(t, r) == BLACK || color(t, r) == GRAY);
5892   /* This defines the attr layout for the GREEN state.  "index" and "lowlink"
5893    * get 31 bits, which is plenty (limit of 2B objects frozen at a time). */
5894   setattr(t, r, GREEN | (t->index << 2) | (t->index << 33));
5895   if (++t->index == 0x80000000) {
5896     upb_status_seterrmsg(t->status, "too many objects to freeze");
5897     err(t);
5898   }
5899   upb_inttable_push(&t->stack, upb_value_ptr((void*)r));
5900 }
5901 
5902 /* Pops an obj from the Tarjan stack and sets it to WHITE, with a ptr to its
5903  * SCC group. */
pop(tarjan * t)5904 static upb_refcounted *pop(tarjan *t) {
5905   upb_refcounted *r = upb_value_getptr(upb_inttable_pop(&t->stack));
5906   UPB_ASSERT(color(t, r) == GREEN);
5907   /* This defines the attr layout for nodes in the WHITE state.
5908    * Top of group stack is [group, NULL]; we point at group. */
5909   setattr(t, r, WHITE | (upb_inttable_count(&t->groups) - 2) << 8);
5910   return r;
5911 }
5912 
tarjan_newgroup(tarjan * t)5913 static void tarjan_newgroup(tarjan *t) {
5914   uint32_t *group = upb_gmalloc(sizeof(*group));
5915   if (!group) oom(t);
5916   /* Push group and empty group leader (we'll fill in leader later). */
5917   if (!upb_inttable_push(&t->groups, upb_value_ptr(group)) ||
5918       !upb_inttable_push(&t->groups, upb_value_ptr(NULL))) {
5919     upb_gfree(group);
5920     oom(t);
5921   }
5922   *group = 0;
5923 }
5924 
idx(tarjan * t,const upb_refcounted * r)5925 static uint32_t idx(tarjan *t, const upb_refcounted *r) {
5926   UPB_ASSERT(color(t, r) == GREEN);
5927   return (getattr(t, r) >> 2) & 0x7FFFFFFF;
5928 }
5929 
lowlink(tarjan * t,const upb_refcounted * r)5930 static uint32_t lowlink(tarjan *t, const upb_refcounted *r) {
5931   if (color(t, r) == GREEN) {
5932     return getattr(t, r) >> 33;
5933   } else {
5934     return UINT32_MAX;
5935   }
5936 }
5937 
set_lowlink(tarjan * t,const upb_refcounted * r,uint32_t lowlink)5938 static void set_lowlink(tarjan *t, const upb_refcounted *r, uint32_t lowlink) {
5939   UPB_ASSERT(color(t, r) == GREEN);
5940   setattr(t, r, ((uint64_t)lowlink << 33) | (getattr(t, r) & 0x1FFFFFFFF));
5941 }
5942 
group(tarjan * t,upb_refcounted * r)5943 static uint32_t *group(tarjan *t, upb_refcounted *r) {
5944   uint64_t groupnum;
5945   upb_value v;
5946   bool found;
5947 
5948   UPB_ASSERT(color(t, r) == WHITE);
5949   groupnum = getattr(t, r) >> 8;
5950   found = upb_inttable_lookup(&t->groups, groupnum, &v);
5951   UPB_ASSERT(found);
5952   return upb_value_getptr(v);
5953 }
5954 
5955 /* If the group leader for this object's group has not previously been set,
5956  * the given object is assigned to be its leader. */
groupleader(tarjan * t,upb_refcounted * r)5957 static upb_refcounted *groupleader(tarjan *t, upb_refcounted *r) {
5958   uint64_t leader_slot;
5959   upb_value v;
5960   bool found;
5961 
5962   UPB_ASSERT(color(t, r) == WHITE);
5963   leader_slot = (getattr(t, r) >> 8) + 1;
5964   found = upb_inttable_lookup(&t->groups, leader_slot, &v);
5965   UPB_ASSERT(found);
5966   if (upb_value_getptr(v)) {
5967     return upb_value_getptr(v);
5968   } else {
5969     upb_inttable_remove(&t->groups, leader_slot, NULL);
5970     upb_inttable_insert(&t->groups, leader_slot, upb_value_ptr(r));
5971     return r;
5972   }
5973 }
5974 
5975 
5976 /* Tarjan's algorithm --------------------------------------------------------*/
5977 
5978 /* See:
5979  *   http://en.wikipedia.org/wiki/Tarjan%27s_strongly_connected_components_algorithm */
5980 static void do_tarjan(const upb_refcounted *obj, tarjan *t);
5981 
tarjan_visit(const upb_refcounted * obj,const upb_refcounted * subobj,void * closure)5982 static void tarjan_visit(const upb_refcounted *obj,
5983                          const upb_refcounted *subobj,
5984                          void *closure) {
5985   tarjan *t = closure;
5986   if (++t->depth > t->maxdepth) {
5987     upb_status_seterrf(t->status, "graph too deep to freeze (%d)", t->maxdepth);
5988     err(t);
5989   } else if (subobj->is_frozen || color(t, subobj) == WHITE) {
5990     /* Do nothing: we don't want to visit or color already-frozen nodes,
5991      * and WHITE nodes have already been assigned a SCC. */
5992   } else if (color(t, subobj) < GREEN) {
5993     /* Subdef has not yet been visited; recurse on it. */
5994     do_tarjan(subobj, t);
5995     set_lowlink(t, obj, UPB_MIN(lowlink(t, obj), lowlink(t, subobj)));
5996   } else if (color(t, subobj) == GREEN) {
5997     /* Subdef is in the stack and hence in the current SCC. */
5998     set_lowlink(t, obj, UPB_MIN(lowlink(t, obj), idx(t, subobj)));
5999   }
6000   --t->depth;
6001 }
6002 
do_tarjan(const upb_refcounted * obj,tarjan * t)6003 static void do_tarjan(const upb_refcounted *obj, tarjan *t) {
6004   if (color(t, obj) == BLACK) {
6005     /* We haven't seen this object's group; mark the whole group GRAY. */
6006     const upb_refcounted *o = obj;
6007     do { set_gray(t, o); } while ((o = o->next) != obj);
6008   }
6009 
6010   push(t, obj);
6011   visit(obj, tarjan_visit, t);
6012   if (lowlink(t, obj) == idx(t, obj)) {
6013     tarjan_newgroup(t);
6014     while (pop(t) != obj)
6015       ;
6016   }
6017 }
6018 
6019 
6020 /* freeze() ------------------------------------------------------------------*/
6021 
crossref(const upb_refcounted * r,const upb_refcounted * subobj,void * _t)6022 static void crossref(const upb_refcounted *r, const upb_refcounted *subobj,
6023                      void *_t) {
6024   tarjan *t = _t;
6025   UPB_ASSERT(color(t, r) > BLACK);
6026   if (color(t, subobj) > BLACK && r->group != subobj->group) {
6027     /* Previously this ref was not reflected in subobj->group because they
6028      * were in the same group; now that they are split a ref must be taken. */
6029     refgroup(subobj->group);
6030   }
6031 }
6032 
freeze(upb_refcounted * const * roots,int n,upb_status * s,int maxdepth)6033 static bool freeze(upb_refcounted *const*roots, int n, upb_status *s,
6034                    int maxdepth) {
6035   volatile bool ret = false;
6036   int i;
6037   upb_inttable_iter iter;
6038 
6039   /* We run in two passes so that we can allocate all memory before performing
6040    * any mutation of the input -- this allows us to leave the input unchanged
6041    * in the case of memory allocation failure. */
6042   tarjan t;
6043   t.index = 0;
6044   t.depth = 0;
6045   t.maxdepth = maxdepth;
6046   t.status = s;
6047   if (!upb_inttable_init(&t.objattr, UPB_CTYPE_UINT64)) goto err1;
6048   if (!upb_inttable_init(&t.stack, UPB_CTYPE_PTR)) goto err2;
6049   if (!upb_inttable_init(&t.groups, UPB_CTYPE_PTR)) goto err3;
6050   if (setjmp(t.err) != 0) goto err4;
6051 
6052 
6053   for (i = 0; i < n; i++) {
6054     if (color(&t, roots[i]) < GREEN) {
6055       do_tarjan(roots[i], &t);
6056     }
6057   }
6058 
6059   /* If we've made it this far, no further errors are possible so it's safe to
6060    * mutate the objects without risk of leaving them in an inconsistent state. */
6061   ret = true;
6062 
6063   /* The transformation that follows requires care.  The preconditions are:
6064    * - all objects in attr map are WHITE or GRAY, and are in mutable groups
6065    *   (groups of all mutable objs)
6066    * - no ref2(to, from) refs have incremented count(to) if both "to" and
6067    *   "from" are in our attr map (this follows from invariants (2) and (3)) */
6068 
6069   /* Pass 1: we remove WHITE objects from their mutable groups, and add them to
6070    * new groups  according to the SCC's we computed.  These new groups will
6071    * consist of only frozen objects.  None will be immediately collectible,
6072    * because WHITE objects are by definition reachable from one of "roots",
6073    * which the caller must own refs on. */
6074   upb_inttable_begin(&iter, &t.objattr);
6075   for(; !upb_inttable_done(&iter); upb_inttable_next(&iter)) {
6076     upb_refcounted *obj = (upb_refcounted*)upb_inttable_iter_key(&iter);
6077     /* Since removal from a singly-linked list requires access to the object's
6078      * predecessor, we consider obj->next instead of obj for moving.  With the
6079      * while() loop we guarantee that we will visit every node's predecessor.
6080      * Proof:
6081      *  1. every node's predecessor is in our attr map.
6082      *  2. though the loop body may change a node's predecessor, it will only
6083      *     change it to be the node we are currently operating on, so with a
6084      *     while() loop we guarantee ourselves the chance to remove each node. */
6085     while (color(&t, obj->next) == WHITE &&
6086            group(&t, obj->next) != obj->next->group) {
6087       upb_refcounted *leader;
6088 
6089       /* Remove from old group. */
6090       upb_refcounted *move = obj->next;
6091       if (obj == move) {
6092         /* Removing the last object from a group. */
6093         UPB_ASSERT(*obj->group == obj->individual_count);
6094         upb_gfree(obj->group);
6095       } else {
6096         obj->next = move->next;
6097         /* This may decrease to zero; we'll collect GRAY objects (if any) that
6098          * remain in the group in the third pass. */
6099         UPB_ASSERT(*move->group >= move->individual_count);
6100         *move->group -= move->individual_count;
6101       }
6102 
6103       /* Add to new group. */
6104       leader = groupleader(&t, move);
6105       if (move == leader) {
6106         /* First object added to new group is its leader. */
6107         move->group = group(&t, move);
6108         move->next = move;
6109         *move->group = move->individual_count;
6110       } else {
6111         /* Group already has at least one object in it. */
6112         UPB_ASSERT(leader->group == group(&t, move));
6113         move->group = group(&t, move);
6114         move->next = leader->next;
6115         leader->next = move;
6116         *move->group += move->individual_count;
6117       }
6118 
6119       move->is_frozen = true;
6120     }
6121   }
6122 
6123   /* Pass 2: GRAY and WHITE objects "obj" with ref2(to, obj) references must
6124    * increment count(to) if group(obj) != group(to) (which could now be the
6125    * case if "to" was just frozen). */
6126   upb_inttable_begin(&iter, &t.objattr);
6127   for(; !upb_inttable_done(&iter); upb_inttable_next(&iter)) {
6128     upb_refcounted *obj = (upb_refcounted*)upb_inttable_iter_key(&iter);
6129     visit(obj, crossref, &t);
6130   }
6131 
6132   /* Pass 3: GRAY objects are collected if their group's refcount dropped to
6133    * zero when we removed its white nodes.  This can happen if they had only
6134    * been kept alive by virtue of sharing a group with an object that was just
6135    * frozen.
6136    *
6137    * It is important that we do this last, since the GRAY object's free()
6138    * function could call unref2() on just-frozen objects, which will decrement
6139    * refs that were added in pass 2. */
6140   upb_inttable_begin(&iter, &t.objattr);
6141   for(; !upb_inttable_done(&iter); upb_inttable_next(&iter)) {
6142     upb_refcounted *obj = (upb_refcounted*)upb_inttable_iter_key(&iter);
6143     if (obj->group == NULL || *obj->group == 0) {
6144       if (obj->group) {
6145         upb_refcounted *o;
6146 
6147         /* We eagerly free() the group's count (since we can't easily determine
6148          * the group's remaining size it's the easiest way to ensure it gets
6149          * done). */
6150         upb_gfree(obj->group);
6151 
6152         /* Visit to release ref2's (done in a separate pass since release_ref2
6153          * depends on o->group being unmodified so it can test merged()). */
6154         o = obj;
6155         do { visit(o, release_ref2, NULL); } while ((o = o->next) != obj);
6156 
6157         /* Mark "group" fields as NULL so we know to free the objects later in
6158          * this loop, but also don't try to delete the group twice. */
6159         o = obj;
6160         do { o->group = NULL; } while ((o = o->next) != obj);
6161       }
6162       freeobj(obj);
6163     }
6164   }
6165 
6166 err4:
6167   if (!ret) {
6168     upb_inttable_begin(&iter, &t.groups);
6169     for(; !upb_inttable_done(&iter); upb_inttable_next(&iter))
6170       upb_gfree(upb_value_getptr(upb_inttable_iter_value(&iter)));
6171   }
6172   upb_inttable_uninit(&t.groups);
6173 err3:
6174   upb_inttable_uninit(&t.stack);
6175 err2:
6176   upb_inttable_uninit(&t.objattr);
6177 err1:
6178   return ret;
6179 }
6180 
6181 
6182 /* Misc internal functions  ***************************************************/
6183 
merged(const upb_refcounted * r,const upb_refcounted * r2)6184 static bool merged(const upb_refcounted *r, const upb_refcounted *r2) {
6185   return r->group == r2->group;
6186 }
6187 
merge(upb_refcounted * r,upb_refcounted * from)6188 static void merge(upb_refcounted *r, upb_refcounted *from) {
6189   upb_refcounted *base;
6190   upb_refcounted *tmp;
6191 
6192   if (merged(r, from)) return;
6193   *r->group += *from->group;
6194   upb_gfree(from->group);
6195   base = from;
6196 
6197   /* Set all refcount pointers in the "from" chain to the merged refcount.
6198    *
6199    * TODO(haberman): this linear algorithm can result in an overall O(n^2) bound
6200    * if the user continuously extends a group by one object.  Prevent this by
6201    * using one of the techniques in this paper:
6202    *     http://bioinfo.ict.ac.cn/~dbu/AlgorithmCourses/Lectures/Union-Find-Tarjan.pdf */
6203   do { from->group = r->group; } while ((from = from->next) != base);
6204 
6205   /* Merge the two circularly linked lists by swapping their next pointers. */
6206   tmp = r->next;
6207   r->next = base->next;
6208   base->next = tmp;
6209 }
6210 
6211 static void unref(const upb_refcounted *r);
6212 
release_ref2(const upb_refcounted * obj,const upb_refcounted * subobj,void * closure)6213 static void release_ref2(const upb_refcounted *obj,
6214                          const upb_refcounted *subobj,
6215                          void *closure) {
6216   UPB_UNUSED(closure);
6217   untrack(subobj, obj, true);
6218   if (!merged(obj, subobj)) {
6219     UPB_ASSERT(subobj->is_frozen);
6220     unref(subobj);
6221   }
6222 }
6223 
unref(const upb_refcounted * r)6224 static void unref(const upb_refcounted *r) {
6225   if (unrefgroup(r->group)) {
6226     const upb_refcounted *o;
6227 
6228     upb_gfree(r->group);
6229 
6230     /* In two passes, since release_ref2 needs a guarantee that any subobjs
6231      * are alive. */
6232     o = r;
6233     do { visit(o, release_ref2, NULL); } while((o = o->next) != r);
6234 
6235     o = r;
6236     do {
6237       const upb_refcounted *next = o->next;
6238       UPB_ASSERT(o->is_frozen || o->individual_count == 0);
6239       freeobj((upb_refcounted*)o);
6240       o = next;
6241     } while(o != r);
6242   }
6243 }
6244 
freeobj(upb_refcounted * o)6245 static void freeobj(upb_refcounted *o) {
6246   trackfree(o);
6247   o->vtbl->free((upb_refcounted*)o);
6248 }
6249 
6250 
6251 /* Public interface ***********************************************************/
6252 
upb_refcounted_init(upb_refcounted * r,const struct upb_refcounted_vtbl * vtbl,const void * owner)6253 bool upb_refcounted_init(upb_refcounted *r,
6254                          const struct upb_refcounted_vtbl *vtbl,
6255                          const void *owner) {
6256 #ifndef NDEBUG
6257   /* Endianness check.  This is unrelated to upb_refcounted, it's just a
6258    * convenient place to put the check that we can be assured will run for
6259    * basically every program using upb. */
6260   const int x = 1;
6261 #ifdef UPB_BIG_ENDIAN
6262   UPB_ASSERT(*(char*)&x != 1);
6263 #else
6264   UPB_ASSERT(*(char*)&x == 1);
6265 #endif
6266 #endif
6267 
6268   r->next = r;
6269   r->vtbl = vtbl;
6270   r->individual_count = 0;
6271   r->is_frozen = false;
6272   r->group = upb_gmalloc(sizeof(*r->group));
6273   if (!r->group) return false;
6274   *r->group = 0;
6275   trackinit(r);
6276   upb_refcounted_ref(r, owner);
6277   return true;
6278 }
6279 
upb_refcounted_isfrozen(const upb_refcounted * r)6280 bool upb_refcounted_isfrozen(const upb_refcounted *r) {
6281   return r->is_frozen;
6282 }
6283 
upb_refcounted_ref(const upb_refcounted * r,const void * owner)6284 void upb_refcounted_ref(const upb_refcounted *r, const void *owner) {
6285   track(r, owner, false);
6286   if (!r->is_frozen)
6287     ((upb_refcounted*)r)->individual_count++;
6288   refgroup(r->group);
6289 }
6290 
upb_refcounted_unref(const upb_refcounted * r,const void * owner)6291 void upb_refcounted_unref(const upb_refcounted *r, const void *owner) {
6292   untrack(r, owner, false);
6293   if (!r->is_frozen)
6294     ((upb_refcounted*)r)->individual_count--;
6295   unref(r);
6296 }
6297 
upb_refcounted_ref2(const upb_refcounted * r,upb_refcounted * from)6298 void upb_refcounted_ref2(const upb_refcounted *r, upb_refcounted *from) {
6299   UPB_ASSERT(!from->is_frozen);  /* Non-const pointer implies this. */
6300   track(r, from, true);
6301   if (r->is_frozen) {
6302     refgroup(r->group);
6303   } else {
6304     merge((upb_refcounted*)r, from);
6305   }
6306 }
6307 
upb_refcounted_unref2(const upb_refcounted * r,upb_refcounted * from)6308 void upb_refcounted_unref2(const upb_refcounted *r, upb_refcounted *from) {
6309   UPB_ASSERT(!from->is_frozen);  /* Non-const pointer implies this. */
6310   untrack(r, from, true);
6311   if (r->is_frozen) {
6312     unref(r);
6313   } else {
6314     UPB_ASSERT(merged(r, from));
6315   }
6316 }
6317 
upb_refcounted_donateref(const upb_refcounted * r,const void * from,const void * to)6318 void upb_refcounted_donateref(
6319     const upb_refcounted *r, const void *from, const void *to) {
6320   UPB_ASSERT(from != to);
6321   if (to != NULL)
6322     upb_refcounted_ref(r, to);
6323   if (from != NULL)
6324     upb_refcounted_unref(r, from);
6325 }
6326 
upb_refcounted_checkref(const upb_refcounted * r,const void * owner)6327 void upb_refcounted_checkref(const upb_refcounted *r, const void *owner) {
6328   checkref(r, owner, false);
6329 }
6330 
upb_refcounted_freeze(upb_refcounted * const * roots,int n,upb_status * s,int maxdepth)6331 bool upb_refcounted_freeze(upb_refcounted *const*roots, int n, upb_status *s,
6332                            int maxdepth) {
6333   int i;
6334   bool ret;
6335   for (i = 0; i < n; i++) {
6336     UPB_ASSERT(!roots[i]->is_frozen);
6337   }
6338   ret = freeze(roots, n, s, maxdepth);
6339   UPB_ASSERT(!s || ret == upb_ok(s));
6340   return ret;
6341 }
6342 
6343 
upb_bufsrc_putbuf(const char * buf,size_t len,upb_bytessink * sink)6344 bool upb_bufsrc_putbuf(const char *buf, size_t len, upb_bytessink *sink) {
6345   void *subc;
6346   bool ret;
6347   upb_bufhandle handle;
6348   upb_bufhandle_init(&handle);
6349   upb_bufhandle_setbuf(&handle, buf, 0);
6350   ret = upb_bytessink_start(sink, len, &subc);
6351   if (ret && len != 0) {
6352     ret = (upb_bytessink_putbuf(sink, subc, buf, len, &handle) >= len);
6353   }
6354   if (ret) {
6355     ret = upb_bytessink_end(sink);
6356   }
6357   upb_bufhandle_uninit(&handle);
6358   return ret;
6359 }
6360 
6361 struct upb_bufsink {
6362   upb_byteshandler handler;
6363   upb_bytessink sink;
6364   upb_env *env;
6365   char *ptr;
6366   size_t len, size;
6367 };
6368 
upb_bufsink_start(void * _sink,const void * hd,size_t size_hint)6369 static void *upb_bufsink_start(void *_sink, const void *hd, size_t size_hint) {
6370   upb_bufsink *sink = _sink;
6371   UPB_UNUSED(hd);
6372   UPB_UNUSED(size_hint);
6373   sink->len = 0;
6374   return sink;
6375 }
6376 
upb_bufsink_string(void * _sink,const void * hd,const char * ptr,size_t len,const upb_bufhandle * handle)6377 static size_t upb_bufsink_string(void *_sink, const void *hd, const char *ptr,
6378                                 size_t len, const upb_bufhandle *handle) {
6379   upb_bufsink *sink = _sink;
6380   size_t new_size = sink->size;
6381 
6382   UPB_ASSERT(new_size > 0);
6383   UPB_UNUSED(hd);
6384   UPB_UNUSED(handle);
6385 
6386   while (sink->len + len > new_size) {
6387     new_size *= 2;
6388   }
6389 
6390   if (new_size != sink->size) {
6391     sink->ptr = upb_env_realloc(sink->env, sink->ptr, sink->size, new_size);
6392     sink->size = new_size;
6393   }
6394 
6395   memcpy(sink->ptr + sink->len, ptr, len);
6396   sink->len += len;
6397 
6398   return len;
6399 }
6400 
upb_bufsink_new(upb_env * env)6401 upb_bufsink *upb_bufsink_new(upb_env *env) {
6402   upb_bufsink *sink = upb_env_malloc(env, sizeof(upb_bufsink));
6403   upb_byteshandler_init(&sink->handler);
6404   upb_byteshandler_setstartstr(&sink->handler, upb_bufsink_start, NULL);
6405   upb_byteshandler_setstring(&sink->handler, upb_bufsink_string, NULL);
6406 
6407   upb_bytessink_reset(&sink->sink, &sink->handler, sink);
6408 
6409   sink->env = env;
6410   sink->size = 32;
6411   sink->ptr = upb_env_malloc(env, sink->size);
6412   sink->len = 0;
6413 
6414   return sink;
6415 }
6416 
upb_bufsink_free(upb_bufsink * sink)6417 void upb_bufsink_free(upb_bufsink *sink) {
6418   upb_env_free(sink->env, sink->ptr);
6419   upb_env_free(sink->env, sink);
6420 }
6421 
upb_bufsink_sink(upb_bufsink * sink)6422 upb_bytessink *upb_bufsink_sink(upb_bufsink *sink) {
6423   return &sink->sink;
6424 }
6425 
upb_bufsink_getdata(const upb_bufsink * sink,size_t * len)6426 const char *upb_bufsink_getdata(const upb_bufsink *sink, size_t *len) {
6427   *len = sink->len;
6428   return sink->ptr;
6429 }
6430 /*
6431 ** upb_table Implementation
6432 **
6433 ** Implementation is heavily inspired by Lua's ltable.c.
6434 */
6435 
6436 
6437 #include <string.h>
6438 
6439 #define UPB_MAXARRSIZE 16  /* 64k. */
6440 
6441 /* From Chromium. */
6442 #define ARRAY_SIZE(x) \
6443     ((sizeof(x)/sizeof(0[x])) / ((size_t)(!(sizeof(x) % sizeof(0[x])))))
6444 
upb_check_alloc(upb_table * t,upb_alloc * a)6445 static void upb_check_alloc(upb_table *t, upb_alloc *a) {
6446   UPB_UNUSED(t);
6447   UPB_UNUSED(a);
6448   UPB_ASSERT_DEBUGVAR(t->alloc == a);
6449 }
6450 
6451 static const double MAX_LOAD = 0.85;
6452 
6453 /* The minimum utilization of the array part of a mixed hash/array table.  This
6454  * is a speed/memory-usage tradeoff (though it's not straightforward because of
6455  * cache effects).  The lower this is, the more memory we'll use. */
6456 static const double MIN_DENSITY = 0.1;
6457 
is_pow2(uint64_t v)6458 bool is_pow2(uint64_t v) { return v == 0 || (v & (v - 1)) == 0; }
6459 
log2ceil(uint64_t v)6460 int log2ceil(uint64_t v) {
6461   int ret = 0;
6462   bool pow2 = is_pow2(v);
6463   while (v >>= 1) ret++;
6464   ret = pow2 ? ret : ret + 1;  /* Ceiling. */
6465   return UPB_MIN(UPB_MAXARRSIZE, ret);
6466 }
6467 
upb_strdup(const char * s,upb_alloc * a)6468 char *upb_strdup(const char *s, upb_alloc *a) {
6469   return upb_strdup2(s, strlen(s), a);
6470 }
6471 
upb_strdup2(const char * s,size_t len,upb_alloc * a)6472 char *upb_strdup2(const char *s, size_t len, upb_alloc *a) {
6473   size_t n;
6474   char *p;
6475 
6476   /* Prevent overflow errors. */
6477   if (len == SIZE_MAX) return NULL;
6478   /* Always null-terminate, even if binary data; but don't rely on the input to
6479    * have a null-terminating byte since it may be a raw binary buffer. */
6480   n = len + 1;
6481   p = upb_malloc(a, n);
6482   if (p) {
6483     memcpy(p, s, len);
6484     p[len] = 0;
6485   }
6486   return p;
6487 }
6488 
6489 /* A type to represent the lookup key of either a strtable or an inttable. */
6490 typedef union {
6491   uintptr_t num;
6492   struct {
6493     const char *str;
6494     size_t len;
6495   } str;
6496 } lookupkey_t;
6497 
strkey2(const char * str,size_t len)6498 static lookupkey_t strkey2(const char *str, size_t len) {
6499   lookupkey_t k;
6500   k.str.str = str;
6501   k.str.len = len;
6502   return k;
6503 }
6504 
intkey(uintptr_t key)6505 static lookupkey_t intkey(uintptr_t key) {
6506   lookupkey_t k;
6507   k.num = key;
6508   return k;
6509 }
6510 
6511 typedef uint32_t hashfunc_t(upb_tabkey key);
6512 typedef bool eqlfunc_t(upb_tabkey k1, lookupkey_t k2);
6513 
6514 /* Base table (shared code) ***************************************************/
6515 
6516 /* For when we need to cast away const. */
mutable_entries(upb_table * t)6517 static upb_tabent *mutable_entries(upb_table *t) {
6518   return (upb_tabent*)t->entries;
6519 }
6520 
isfull(upb_table * t)6521 static bool isfull(upb_table *t) {
6522   if (upb_table_size(t) == 0) {
6523     return true;
6524   } else {
6525     return ((double)(t->count + 1) / upb_table_size(t)) > MAX_LOAD;
6526   }
6527 }
6528 
init(upb_table * t,upb_ctype_t ctype,uint8_t size_lg2,upb_alloc * a)6529 static bool init(upb_table *t, upb_ctype_t ctype, uint8_t size_lg2,
6530                  upb_alloc *a) {
6531   size_t bytes;
6532 
6533   t->count = 0;
6534   t->ctype = ctype;
6535   t->size_lg2 = size_lg2;
6536   t->mask = upb_table_size(t) ? upb_table_size(t) - 1 : 0;
6537 #ifndef NDEBUG
6538   t->alloc = a;
6539 #endif
6540   bytes = upb_table_size(t) * sizeof(upb_tabent);
6541   if (bytes > 0) {
6542     t->entries = upb_malloc(a, bytes);
6543     if (!t->entries) return false;
6544     memset(mutable_entries(t), 0, bytes);
6545   } else {
6546     t->entries = NULL;
6547   }
6548   return true;
6549 }
6550 
uninit(upb_table * t,upb_alloc * a)6551 static void uninit(upb_table *t, upb_alloc *a) {
6552   upb_check_alloc(t, a);
6553   upb_free(a, mutable_entries(t));
6554 }
6555 
emptyent(upb_table * t)6556 static upb_tabent *emptyent(upb_table *t) {
6557   upb_tabent *e = mutable_entries(t) + upb_table_size(t);
6558   while (1) { if (upb_tabent_isempty(--e)) return e; UPB_ASSERT(e > t->entries); }
6559 }
6560 
getentry_mutable(upb_table * t,uint32_t hash)6561 static upb_tabent *getentry_mutable(upb_table *t, uint32_t hash) {
6562   return (upb_tabent*)upb_getentry(t, hash);
6563 }
6564 
findentry(const upb_table * t,lookupkey_t key,uint32_t hash,eqlfunc_t * eql)6565 static const upb_tabent *findentry(const upb_table *t, lookupkey_t key,
6566                                    uint32_t hash, eqlfunc_t *eql) {
6567   const upb_tabent *e;
6568 
6569   if (t->size_lg2 == 0) return NULL;
6570   e = upb_getentry(t, hash);
6571   if (upb_tabent_isempty(e)) return NULL;
6572   while (1) {
6573     if (eql(e->key, key)) return e;
6574     if ((e = e->next) == NULL) return NULL;
6575   }
6576 }
6577 
findentry_mutable(upb_table * t,lookupkey_t key,uint32_t hash,eqlfunc_t * eql)6578 static upb_tabent *findentry_mutable(upb_table *t, lookupkey_t key,
6579                                      uint32_t hash, eqlfunc_t *eql) {
6580   return (upb_tabent*)findentry(t, key, hash, eql);
6581 }
6582 
lookup(const upb_table * t,lookupkey_t key,upb_value * v,uint32_t hash,eqlfunc_t * eql)6583 static bool lookup(const upb_table *t, lookupkey_t key, upb_value *v,
6584                    uint32_t hash, eqlfunc_t *eql) {
6585   const upb_tabent *e = findentry(t, key, hash, eql);
6586   if (e) {
6587     if (v) {
6588       _upb_value_setval(v, e->val.val, t->ctype);
6589     }
6590     return true;
6591   } else {
6592     return false;
6593   }
6594 }
6595 
6596 /* The given key must not already exist in the table. */
insert(upb_table * t,lookupkey_t key,upb_tabkey tabkey,upb_value val,uint32_t hash,hashfunc_t * hashfunc,eqlfunc_t * eql)6597 static void insert(upb_table *t, lookupkey_t key, upb_tabkey tabkey,
6598                    upb_value val, uint32_t hash,
6599                    hashfunc_t *hashfunc, eqlfunc_t *eql) {
6600   upb_tabent *mainpos_e;
6601   upb_tabent *our_e;
6602 
6603   UPB_ASSERT(findentry(t, key, hash, eql) == NULL);
6604   UPB_ASSERT_DEBUGVAR(val.ctype == t->ctype);
6605 
6606   t->count++;
6607   mainpos_e = getentry_mutable(t, hash);
6608   our_e = mainpos_e;
6609 
6610   if (upb_tabent_isempty(mainpos_e)) {
6611     /* Our main position is empty; use it. */
6612     our_e->next = NULL;
6613   } else {
6614     /* Collision. */
6615     upb_tabent *new_e = emptyent(t);
6616     /* Head of collider's chain. */
6617     upb_tabent *chain = getentry_mutable(t, hashfunc(mainpos_e->key));
6618     if (chain == mainpos_e) {
6619       /* Existing ent is in its main posisiton (it has the same hash as us, and
6620        * is the head of our chain).  Insert to new ent and append to this chain. */
6621       new_e->next = mainpos_e->next;
6622       mainpos_e->next = new_e;
6623       our_e = new_e;
6624     } else {
6625       /* Existing ent is not in its main position (it is a node in some other
6626        * chain).  This implies that no existing ent in the table has our hash.
6627        * Evict it (updating its chain) and use its ent for head of our chain. */
6628       *new_e = *mainpos_e;  /* copies next. */
6629       while (chain->next != mainpos_e) {
6630         chain = (upb_tabent*)chain->next;
6631         UPB_ASSERT(chain);
6632       }
6633       chain->next = new_e;
6634       our_e = mainpos_e;
6635       our_e->next = NULL;
6636     }
6637   }
6638   our_e->key = tabkey;
6639   our_e->val.val = val.val;
6640   UPB_ASSERT(findentry(t, key, hash, eql) == our_e);
6641 }
6642 
rm(upb_table * t,lookupkey_t key,upb_value * val,upb_tabkey * removed,uint32_t hash,eqlfunc_t * eql)6643 static bool rm(upb_table *t, lookupkey_t key, upb_value *val,
6644                upb_tabkey *removed, uint32_t hash, eqlfunc_t *eql) {
6645   upb_tabent *chain = getentry_mutable(t, hash);
6646   if (upb_tabent_isempty(chain)) return false;
6647   if (eql(chain->key, key)) {
6648     /* Element to remove is at the head of its chain. */
6649     t->count--;
6650     if (val) _upb_value_setval(val, chain->val.val, t->ctype);
6651     if (removed) *removed = chain->key;
6652     if (chain->next) {
6653       upb_tabent *move = (upb_tabent*)chain->next;
6654       *chain = *move;
6655       move->key = 0;  /* Make the slot empty. */
6656     } else {
6657       chain->key = 0;  /* Make the slot empty. */
6658     }
6659     return true;
6660   } else {
6661     /* Element to remove is either in a non-head position or not in the
6662      * table. */
6663     while (chain->next && !eql(chain->next->key, key)) {
6664       chain = (upb_tabent*)chain->next;
6665     }
6666     if (chain->next) {
6667       /* Found element to remove. */
6668       upb_tabent *rm = (upb_tabent*)chain->next;
6669       t->count--;
6670       if (val) _upb_value_setval(val, chain->next->val.val, t->ctype);
6671       if (removed) *removed = rm->key;
6672       rm->key = 0;  /* Make the slot empty. */
6673       chain->next = rm->next;
6674       return true;
6675     } else {
6676       /* Element to remove is not in the table. */
6677       return false;
6678     }
6679   }
6680 }
6681 
next(const upb_table * t,size_t i)6682 static size_t next(const upb_table *t, size_t i) {
6683   do {
6684     if (++i >= upb_table_size(t))
6685       return SIZE_MAX;
6686   } while(upb_tabent_isempty(&t->entries[i]));
6687 
6688   return i;
6689 }
6690 
begin(const upb_table * t)6691 static size_t begin(const upb_table *t) {
6692   return next(t, -1);
6693 }
6694 
6695 
6696 /* upb_strtable ***************************************************************/
6697 
6698 /* A simple "subclass" of upb_table that only adds a hash function for strings. */
6699 
strcopy(lookupkey_t k2,upb_alloc * a)6700 static upb_tabkey strcopy(lookupkey_t k2, upb_alloc *a) {
6701   uint32_t len = (uint32_t) k2.str.len;
6702   char *str = upb_malloc(a, k2.str.len + sizeof(uint32_t) + 1);
6703   if (str == NULL) return 0;
6704   memcpy(str, &len, sizeof(uint32_t));
6705   memcpy(str + sizeof(uint32_t), k2.str.str, k2.str.len + 1);
6706   return (uintptr_t)str;
6707 }
6708 
strhash(upb_tabkey key)6709 static uint32_t strhash(upb_tabkey key) {
6710   uint32_t len;
6711   char *str = upb_tabstr(key, &len);
6712   return MurmurHash2(str, len, 0);
6713 }
6714 
streql(upb_tabkey k1,lookupkey_t k2)6715 static bool streql(upb_tabkey k1, lookupkey_t k2) {
6716   uint32_t len;
6717   char *str = upb_tabstr(k1, &len);
6718   return len == k2.str.len && memcmp(str, k2.str.str, len) == 0;
6719 }
6720 
upb_strtable_init2(upb_strtable * t,upb_ctype_t ctype,upb_alloc * a)6721 bool upb_strtable_init2(upb_strtable *t, upb_ctype_t ctype, upb_alloc *a) {
6722   return init(&t->t, ctype, 2, a);
6723 }
6724 
upb_strtable_uninit2(upb_strtable * t,upb_alloc * a)6725 void upb_strtable_uninit2(upb_strtable *t, upb_alloc *a) {
6726   size_t i;
6727   for (i = 0; i < upb_table_size(&t->t); i++)
6728     upb_free(a, (void*)t->t.entries[i].key);
6729   uninit(&t->t, a);
6730 }
6731 
upb_strtable_resize(upb_strtable * t,size_t size_lg2,upb_alloc * a)6732 bool upb_strtable_resize(upb_strtable *t, size_t size_lg2, upb_alloc *a) {
6733   upb_strtable new_table;
6734   upb_strtable_iter i;
6735 
6736   upb_check_alloc(&t->t, a);
6737 
6738   if (!init(&new_table.t, t->t.ctype, size_lg2, a))
6739     return false;
6740   upb_strtable_begin(&i, t);
6741   for ( ; !upb_strtable_done(&i); upb_strtable_next(&i)) {
6742     upb_strtable_insert3(
6743         &new_table,
6744         upb_strtable_iter_key(&i),
6745         upb_strtable_iter_keylength(&i),
6746         upb_strtable_iter_value(&i),
6747         a);
6748   }
6749   upb_strtable_uninit2(t, a);
6750   *t = new_table;
6751   return true;
6752 }
6753 
upb_strtable_insert3(upb_strtable * t,const char * k,size_t len,upb_value v,upb_alloc * a)6754 bool upb_strtable_insert3(upb_strtable *t, const char *k, size_t len,
6755                           upb_value v, upb_alloc *a) {
6756   lookupkey_t key;
6757   upb_tabkey tabkey;
6758   uint32_t hash;
6759 
6760   upb_check_alloc(&t->t, a);
6761 
6762   if (isfull(&t->t)) {
6763     /* Need to resize.  New table of double the size, add old elements to it. */
6764     if (!upb_strtable_resize(t, t->t.size_lg2 + 1, a)) {
6765       return false;
6766     }
6767   }
6768 
6769   key = strkey2(k, len);
6770   tabkey = strcopy(key, a);
6771   if (tabkey == 0) return false;
6772 
6773   hash = MurmurHash2(key.str.str, key.str.len, 0);
6774   insert(&t->t, key, tabkey, v, hash, &strhash, &streql);
6775   return true;
6776 }
6777 
upb_strtable_lookup2(const upb_strtable * t,const char * key,size_t len,upb_value * v)6778 bool upb_strtable_lookup2(const upb_strtable *t, const char *key, size_t len,
6779                           upb_value *v) {
6780   uint32_t hash = MurmurHash2(key, len, 0);
6781   return lookup(&t->t, strkey2(key, len), v, hash, &streql);
6782 }
6783 
upb_strtable_remove3(upb_strtable * t,const char * key,size_t len,upb_value * val,upb_alloc * alloc)6784 bool upb_strtable_remove3(upb_strtable *t, const char *key, size_t len,
6785                          upb_value *val, upb_alloc *alloc) {
6786   uint32_t hash = MurmurHash2(key, len, 0);
6787   upb_tabkey tabkey;
6788   if (rm(&t->t, strkey2(key, len), val, &tabkey, hash, &streql)) {
6789     upb_free(alloc, (void*)tabkey);
6790     return true;
6791   } else {
6792     return false;
6793   }
6794 }
6795 
6796 /* Iteration */
6797 
str_tabent(const upb_strtable_iter * i)6798 static const upb_tabent *str_tabent(const upb_strtable_iter *i) {
6799   return &i->t->t.entries[i->index];
6800 }
6801 
upb_strtable_begin(upb_strtable_iter * i,const upb_strtable * t)6802 void upb_strtable_begin(upb_strtable_iter *i, const upb_strtable *t) {
6803   i->t = t;
6804   i->index = begin(&t->t);
6805 }
6806 
upb_strtable_next(upb_strtable_iter * i)6807 void upb_strtable_next(upb_strtable_iter *i) {
6808   i->index = next(&i->t->t, i->index);
6809 }
6810 
upb_strtable_done(const upb_strtable_iter * i)6811 bool upb_strtable_done(const upb_strtable_iter *i) {
6812   return i->index >= upb_table_size(&i->t->t) ||
6813          upb_tabent_isempty(str_tabent(i));
6814 }
6815 
upb_strtable_iter_key(const upb_strtable_iter * i)6816 const char *upb_strtable_iter_key(const upb_strtable_iter *i) {
6817   UPB_ASSERT(!upb_strtable_done(i));
6818   return upb_tabstr(str_tabent(i)->key, NULL);
6819 }
6820 
upb_strtable_iter_keylength(const upb_strtable_iter * i)6821 size_t upb_strtable_iter_keylength(const upb_strtable_iter *i) {
6822   uint32_t len;
6823   UPB_ASSERT(!upb_strtable_done(i));
6824   upb_tabstr(str_tabent(i)->key, &len);
6825   return len;
6826 }
6827 
upb_strtable_iter_value(const upb_strtable_iter * i)6828 upb_value upb_strtable_iter_value(const upb_strtable_iter *i) {
6829   UPB_ASSERT(!upb_strtable_done(i));
6830   return _upb_value_val(str_tabent(i)->val.val, i->t->t.ctype);
6831 }
6832 
upb_strtable_iter_setdone(upb_strtable_iter * i)6833 void upb_strtable_iter_setdone(upb_strtable_iter *i) {
6834   i->index = SIZE_MAX;
6835 }
6836 
upb_strtable_iter_isequal(const upb_strtable_iter * i1,const upb_strtable_iter * i2)6837 bool upb_strtable_iter_isequal(const upb_strtable_iter *i1,
6838                                const upb_strtable_iter *i2) {
6839   if (upb_strtable_done(i1) && upb_strtable_done(i2))
6840     return true;
6841   return i1->t == i2->t && i1->index == i2->index;
6842 }
6843 
6844 
6845 /* upb_inttable ***************************************************************/
6846 
6847 /* For inttables we use a hybrid structure where small keys are kept in an
6848  * array and large keys are put in the hash table. */
6849 
inthash(upb_tabkey key)6850 static uint32_t inthash(upb_tabkey key) { return upb_inthash(key); }
6851 
inteql(upb_tabkey k1,lookupkey_t k2)6852 static bool inteql(upb_tabkey k1, lookupkey_t k2) {
6853   return k1 == k2.num;
6854 }
6855 
mutable_array(upb_inttable * t)6856 static upb_tabval *mutable_array(upb_inttable *t) {
6857   return (upb_tabval*)t->array;
6858 }
6859 
inttable_val(upb_inttable * t,uintptr_t key)6860 static upb_tabval *inttable_val(upb_inttable *t, uintptr_t key) {
6861   if (key < t->array_size) {
6862     return upb_arrhas(t->array[key]) ? &(mutable_array(t)[key]) : NULL;
6863   } else {
6864     upb_tabent *e =
6865         findentry_mutable(&t->t, intkey(key), upb_inthash(key), &inteql);
6866     return e ? &e->val : NULL;
6867   }
6868 }
6869 
inttable_val_const(const upb_inttable * t,uintptr_t key)6870 static const upb_tabval *inttable_val_const(const upb_inttable *t,
6871                                             uintptr_t key) {
6872   return inttable_val((upb_inttable*)t, key);
6873 }
6874 
upb_inttable_count(const upb_inttable * t)6875 size_t upb_inttable_count(const upb_inttable *t) {
6876   return t->t.count + t->array_count;
6877 }
6878 
check(upb_inttable * t)6879 static void check(upb_inttable *t) {
6880   UPB_UNUSED(t);
6881 #if defined(UPB_DEBUG_TABLE) && !defined(NDEBUG)
6882   {
6883     /* This check is very expensive (makes inserts/deletes O(N)). */
6884     size_t count = 0;
6885     upb_inttable_iter i;
6886     upb_inttable_begin(&i, t);
6887     for(; !upb_inttable_done(&i); upb_inttable_next(&i), count++) {
6888       UPB_ASSERT(upb_inttable_lookup(t, upb_inttable_iter_key(&i), NULL));
6889     }
6890     UPB_ASSERT(count == upb_inttable_count(t));
6891   }
6892 #endif
6893 }
6894 
upb_inttable_sizedinit(upb_inttable * t,upb_ctype_t ctype,size_t asize,int hsize_lg2,upb_alloc * a)6895 bool upb_inttable_sizedinit(upb_inttable *t, upb_ctype_t ctype,
6896                             size_t asize, int hsize_lg2, upb_alloc *a) {
6897   size_t array_bytes;
6898 
6899   if (!init(&t->t, ctype, hsize_lg2, a)) return false;
6900   /* Always make the array part at least 1 long, so that we know key 0
6901    * won't be in the hash part, which simplifies things. */
6902   t->array_size = UPB_MAX(1, asize);
6903   t->array_count = 0;
6904   array_bytes = t->array_size * sizeof(upb_value);
6905   t->array = upb_malloc(a, array_bytes);
6906   if (!t->array) {
6907     uninit(&t->t, a);
6908     return false;
6909   }
6910   memset(mutable_array(t), 0xff, array_bytes);
6911   check(t);
6912   return true;
6913 }
6914 
upb_inttable_init2(upb_inttable * t,upb_ctype_t ctype,upb_alloc * a)6915 bool upb_inttable_init2(upb_inttable *t, upb_ctype_t ctype, upb_alloc *a) {
6916   return upb_inttable_sizedinit(t, ctype, 0, 4, a);
6917 }
6918 
upb_inttable_uninit2(upb_inttable * t,upb_alloc * a)6919 void upb_inttable_uninit2(upb_inttable *t, upb_alloc *a) {
6920   uninit(&t->t, a);
6921   upb_free(a, mutable_array(t));
6922 }
6923 
upb_inttable_insert2(upb_inttable * t,uintptr_t key,upb_value val,upb_alloc * a)6924 bool upb_inttable_insert2(upb_inttable *t, uintptr_t key, upb_value val,
6925                           upb_alloc *a) {
6926   upb_tabval tabval;
6927   tabval.val = val.val;
6928   UPB_ASSERT(upb_arrhas(tabval));  /* This will reject (uint64_t)-1.  Fix this. */
6929 
6930   upb_check_alloc(&t->t, a);
6931 
6932   if (key < t->array_size) {
6933     UPB_ASSERT(!upb_arrhas(t->array[key]));
6934     t->array_count++;
6935     mutable_array(t)[key].val = val.val;
6936   } else {
6937     if (isfull(&t->t)) {
6938       /* Need to resize the hash part, but we re-use the array part. */
6939       size_t i;
6940       upb_table new_table;
6941 
6942       if (!init(&new_table, t->t.ctype, t->t.size_lg2 + 1, a)) {
6943         return false;
6944       }
6945 
6946       for (i = begin(&t->t); i < upb_table_size(&t->t); i = next(&t->t, i)) {
6947         const upb_tabent *e = &t->t.entries[i];
6948         uint32_t hash;
6949         upb_value v;
6950 
6951         _upb_value_setval(&v, e->val.val, t->t.ctype);
6952         hash = upb_inthash(e->key);
6953         insert(&new_table, intkey(e->key), e->key, v, hash, &inthash, &inteql);
6954       }
6955 
6956       UPB_ASSERT(t->t.count == new_table.count);
6957 
6958       uninit(&t->t, a);
6959       t->t = new_table;
6960     }
6961     insert(&t->t, intkey(key), key, val, upb_inthash(key), &inthash, &inteql);
6962   }
6963   check(t);
6964   return true;
6965 }
6966 
upb_inttable_lookup(const upb_inttable * t,uintptr_t key,upb_value * v)6967 bool upb_inttable_lookup(const upb_inttable *t, uintptr_t key, upb_value *v) {
6968   const upb_tabval *table_v = inttable_val_const(t, key);
6969   if (!table_v) return false;
6970   if (v) _upb_value_setval(v, table_v->val, t->t.ctype);
6971   return true;
6972 }
6973 
upb_inttable_replace(upb_inttable * t,uintptr_t key,upb_value val)6974 bool upb_inttable_replace(upb_inttable *t, uintptr_t key, upb_value val) {
6975   upb_tabval *table_v = inttable_val(t, key);
6976   if (!table_v) return false;
6977   table_v->val = val.val;
6978   return true;
6979 }
6980 
upb_inttable_remove(upb_inttable * t,uintptr_t key,upb_value * val)6981 bool upb_inttable_remove(upb_inttable *t, uintptr_t key, upb_value *val) {
6982   bool success;
6983   if (key < t->array_size) {
6984     if (upb_arrhas(t->array[key])) {
6985       upb_tabval empty = UPB_TABVALUE_EMPTY_INIT;
6986       t->array_count--;
6987       if (val) {
6988         _upb_value_setval(val, t->array[key].val, t->t.ctype);
6989       }
6990       mutable_array(t)[key] = empty;
6991       success = true;
6992     } else {
6993       success = false;
6994     }
6995   } else {
6996     success = rm(&t->t, intkey(key), val, NULL, upb_inthash(key), &inteql);
6997   }
6998   check(t);
6999   return success;
7000 }
7001 
upb_inttable_push2(upb_inttable * t,upb_value val,upb_alloc * a)7002 bool upb_inttable_push2(upb_inttable *t, upb_value val, upb_alloc *a) {
7003   upb_check_alloc(&t->t, a);
7004   return upb_inttable_insert2(t, upb_inttable_count(t), val, a);
7005 }
7006 
upb_inttable_pop(upb_inttable * t)7007 upb_value upb_inttable_pop(upb_inttable *t) {
7008   upb_value val;
7009   bool ok = upb_inttable_remove(t, upb_inttable_count(t) - 1, &val);
7010   UPB_ASSERT(ok);
7011   return val;
7012 }
7013 
upb_inttable_insertptr2(upb_inttable * t,const void * key,upb_value val,upb_alloc * a)7014 bool upb_inttable_insertptr2(upb_inttable *t, const void *key, upb_value val,
7015                              upb_alloc *a) {
7016   upb_check_alloc(&t->t, a);
7017   return upb_inttable_insert2(t, (uintptr_t)key, val, a);
7018 }
7019 
upb_inttable_lookupptr(const upb_inttable * t,const void * key,upb_value * v)7020 bool upb_inttable_lookupptr(const upb_inttable *t, const void *key,
7021                             upb_value *v) {
7022   return upb_inttable_lookup(t, (uintptr_t)key, v);
7023 }
7024 
upb_inttable_removeptr(upb_inttable * t,const void * key,upb_value * val)7025 bool upb_inttable_removeptr(upb_inttable *t, const void *key, upb_value *val) {
7026   return upb_inttable_remove(t, (uintptr_t)key, val);
7027 }
7028 
upb_inttable_compact2(upb_inttable * t,upb_alloc * a)7029 void upb_inttable_compact2(upb_inttable *t, upb_alloc *a) {
7030   /* A power-of-two histogram of the table keys. */
7031   size_t counts[UPB_MAXARRSIZE + 1] = {0};
7032 
7033   /* The max key in each bucket. */
7034   uintptr_t max[UPB_MAXARRSIZE + 1] = {0};
7035 
7036   upb_inttable_iter i;
7037   size_t arr_count;
7038   int size_lg2;
7039   upb_inttable new_t;
7040 
7041   upb_check_alloc(&t->t, a);
7042 
7043   upb_inttable_begin(&i, t);
7044   for (; !upb_inttable_done(&i); upb_inttable_next(&i)) {
7045     uintptr_t key = upb_inttable_iter_key(&i);
7046     int bucket = log2ceil(key);
7047     max[bucket] = UPB_MAX(max[bucket], key);
7048     counts[bucket]++;
7049   }
7050 
7051   /* Find the largest power of two that satisfies the MIN_DENSITY
7052    * definition (while actually having some keys). */
7053   arr_count = upb_inttable_count(t);
7054 
7055   for (size_lg2 = ARRAY_SIZE(counts) - 1; size_lg2 > 0; size_lg2--) {
7056     if (counts[size_lg2] == 0) {
7057       /* We can halve again without losing any entries. */
7058       continue;
7059     } else if (arr_count >= (1 << size_lg2) * MIN_DENSITY) {
7060       break;
7061     }
7062 
7063     arr_count -= counts[size_lg2];
7064   }
7065 
7066   UPB_ASSERT(arr_count <= upb_inttable_count(t));
7067 
7068   {
7069     /* Insert all elements into new, perfectly-sized table. */
7070     size_t arr_size = max[size_lg2] + 1;  /* +1 so arr[max] will fit. */
7071     size_t hash_count = upb_inttable_count(t) - arr_count;
7072     size_t hash_size = hash_count ? (hash_count / MAX_LOAD) + 1 : 0;
7073     size_t hashsize_lg2 = log2ceil(hash_size);
7074 
7075     upb_inttable_sizedinit(&new_t, t->t.ctype, arr_size, hashsize_lg2, a);
7076     upb_inttable_begin(&i, t);
7077     for (; !upb_inttable_done(&i); upb_inttable_next(&i)) {
7078       uintptr_t k = upb_inttable_iter_key(&i);
7079       upb_inttable_insert2(&new_t, k, upb_inttable_iter_value(&i), a);
7080     }
7081     UPB_ASSERT(new_t.array_size == arr_size);
7082     UPB_ASSERT(new_t.t.size_lg2 == hashsize_lg2);
7083   }
7084   upb_inttable_uninit2(t, a);
7085   *t = new_t;
7086 }
7087 
7088 /* Iteration. */
7089 
int_tabent(const upb_inttable_iter * i)7090 static const upb_tabent *int_tabent(const upb_inttable_iter *i) {
7091   UPB_ASSERT(!i->array_part);
7092   return &i->t->t.entries[i->index];
7093 }
7094 
int_arrent(const upb_inttable_iter * i)7095 static upb_tabval int_arrent(const upb_inttable_iter *i) {
7096   UPB_ASSERT(i->array_part);
7097   return i->t->array[i->index];
7098 }
7099 
upb_inttable_begin(upb_inttable_iter * i,const upb_inttable * t)7100 void upb_inttable_begin(upb_inttable_iter *i, const upb_inttable *t) {
7101   i->t = t;
7102   i->index = -1;
7103   i->array_part = true;
7104   upb_inttable_next(i);
7105 }
7106 
upb_inttable_next(upb_inttable_iter * iter)7107 void upb_inttable_next(upb_inttable_iter *iter) {
7108   const upb_inttable *t = iter->t;
7109   if (iter->array_part) {
7110     while (++iter->index < t->array_size) {
7111       if (upb_arrhas(int_arrent(iter))) {
7112         return;
7113       }
7114     }
7115     iter->array_part = false;
7116     iter->index = begin(&t->t);
7117   } else {
7118     iter->index = next(&t->t, iter->index);
7119   }
7120 }
7121 
upb_inttable_done(const upb_inttable_iter * i)7122 bool upb_inttable_done(const upb_inttable_iter *i) {
7123   if (i->array_part) {
7124     return i->index >= i->t->array_size ||
7125            !upb_arrhas(int_arrent(i));
7126   } else {
7127     return i->index >= upb_table_size(&i->t->t) ||
7128            upb_tabent_isempty(int_tabent(i));
7129   }
7130 }
7131 
upb_inttable_iter_key(const upb_inttable_iter * i)7132 uintptr_t upb_inttable_iter_key(const upb_inttable_iter *i) {
7133   UPB_ASSERT(!upb_inttable_done(i));
7134   return i->array_part ? i->index : int_tabent(i)->key;
7135 }
7136 
upb_inttable_iter_value(const upb_inttable_iter * i)7137 upb_value upb_inttable_iter_value(const upb_inttable_iter *i) {
7138   UPB_ASSERT(!upb_inttable_done(i));
7139   return _upb_value_val(
7140       i->array_part ? i->t->array[i->index].val : int_tabent(i)->val.val,
7141       i->t->t.ctype);
7142 }
7143 
upb_inttable_iter_setdone(upb_inttable_iter * i)7144 void upb_inttable_iter_setdone(upb_inttable_iter *i) {
7145   i->index = SIZE_MAX;
7146   i->array_part = false;
7147 }
7148 
upb_inttable_iter_isequal(const upb_inttable_iter * i1,const upb_inttable_iter * i2)7149 bool upb_inttable_iter_isequal(const upb_inttable_iter *i1,
7150                                           const upb_inttable_iter *i2) {
7151   if (upb_inttable_done(i1) && upb_inttable_done(i2))
7152     return true;
7153   return i1->t == i2->t && i1->index == i2->index &&
7154          i1->array_part == i2->array_part;
7155 }
7156 
7157 #ifdef UPB_UNALIGNED_READS_OK
7158 /* -----------------------------------------------------------------------------
7159  * MurmurHash2, by Austin Appleby (released as public domain).
7160  * Reformatted and C99-ified by Joshua Haberman.
7161  * Note - This code makes a few assumptions about how your machine behaves -
7162  *   1. We can read a 4-byte value from any address without crashing
7163  *   2. sizeof(int) == 4 (in upb this limitation is removed by using uint32_t
7164  * And it has a few limitations -
7165  *   1. It will not work incrementally.
7166  *   2. It will not produce the same results on little-endian and big-endian
7167  *      machines. */
MurmurHash2(const void * key,size_t len,uint32_t seed)7168 uint32_t MurmurHash2(const void *key, size_t len, uint32_t seed) {
7169   /* 'm' and 'r' are mixing constants generated offline.
7170    * They're not really 'magic', they just happen to work well. */
7171   const uint32_t m = 0x5bd1e995;
7172   const int32_t r = 24;
7173 
7174   /* Initialize the hash to a 'random' value */
7175   uint32_t h = seed ^ len;
7176 
7177   /* Mix 4 bytes at a time into the hash */
7178   const uint8_t * data = (const uint8_t *)key;
7179   while(len >= 4) {
7180     uint32_t k = *(uint32_t *)data;
7181 
7182     k *= m;
7183     k ^= k >> r;
7184     k *= m;
7185 
7186     h *= m;
7187     h ^= k;
7188 
7189     data += 4;
7190     len -= 4;
7191   }
7192 
7193   /* Handle the last few bytes of the input array */
7194   switch(len) {
7195     case 3: h ^= data[2] << 16;
7196     case 2: h ^= data[1] << 8;
7197     case 1: h ^= data[0]; h *= m;
7198   };
7199 
7200   /* Do a few final mixes of the hash to ensure the last few
7201    * bytes are well-incorporated. */
7202   h ^= h >> 13;
7203   h *= m;
7204   h ^= h >> 15;
7205 
7206   return h;
7207 }
7208 
7209 #else /* !UPB_UNALIGNED_READS_OK */
7210 
7211 /* -----------------------------------------------------------------------------
7212  * MurmurHashAligned2, by Austin Appleby
7213  * Same algorithm as MurmurHash2, but only does aligned reads - should be safer
7214  * on certain platforms.
7215  * Performance will be lower than MurmurHash2 */
7216 
7217 #define MIX(h,k,m) { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; }
7218 
MurmurHash2(const void * key,size_t len,uint32_t seed)7219 uint32_t MurmurHash2(const void * key, size_t len, uint32_t seed) {
7220   const uint32_t m = 0x5bd1e995;
7221   const int32_t r = 24;
7222   const uint8_t * data = (const uint8_t *)key;
7223   uint32_t h = seed ^ len;
7224   uint8_t align = (uintptr_t)data & 3;
7225 
7226   if(align && (len >= 4)) {
7227     /* Pre-load the temp registers */
7228     uint32_t t = 0, d = 0;
7229     int32_t sl;
7230     int32_t sr;
7231 
7232     switch(align) {
7233       case 1: t |= data[2] << 16;
7234       case 2: t |= data[1] << 8;
7235       case 3: t |= data[0];
7236     }
7237 
7238     t <<= (8 * align);
7239 
7240     data += 4-align;
7241     len -= 4-align;
7242 
7243     sl = 8 * (4-align);
7244     sr = 8 * align;
7245 
7246     /* Mix */
7247 
7248     while(len >= 4) {
7249       uint32_t k;
7250 
7251       d = *(uint32_t *)data;
7252       t = (t >> sr) | (d << sl);
7253 
7254       k = t;
7255 
7256       MIX(h,k,m);
7257 
7258       t = d;
7259 
7260       data += 4;
7261       len -= 4;
7262     }
7263 
7264     /* Handle leftover data in temp registers */
7265 
7266     d = 0;
7267 
7268     if(len >= align) {
7269       uint32_t k;
7270 
7271       switch(align) {
7272         case 3: d |= data[2] << 16;
7273         case 2: d |= data[1] << 8;
7274         case 1: d |= data[0];
7275       }
7276 
7277       k = (t >> sr) | (d << sl);
7278       MIX(h,k,m);
7279 
7280       data += align;
7281       len -= align;
7282 
7283       /* ----------
7284        * Handle tail bytes */
7285 
7286       switch(len) {
7287         case 3: h ^= data[2] << 16;
7288         case 2: h ^= data[1] << 8;
7289         case 1: h ^= data[0]; h *= m;
7290       };
7291     } else {
7292       switch(len) {
7293         case 3: d |= data[2] << 16;
7294         case 2: d |= data[1] << 8;
7295         case 1: d |= data[0];
7296         case 0: h ^= (t >> sr) | (d << sl); h *= m;
7297       }
7298     }
7299 
7300     h ^= h >> 13;
7301     h *= m;
7302     h ^= h >> 15;
7303 
7304     return h;
7305   } else {
7306     while(len >= 4) {
7307       uint32_t k = *(uint32_t *)data;
7308 
7309       MIX(h,k,m);
7310 
7311       data += 4;
7312       len -= 4;
7313     }
7314 
7315     /* ----------
7316      * Handle tail bytes */
7317 
7318     switch(len) {
7319       case 3: h ^= data[2] << 16;
7320       case 2: h ^= data[1] << 8;
7321       case 1: h ^= data[0]; h *= m;
7322     };
7323 
7324     h ^= h >> 13;
7325     h *= m;
7326     h ^= h >> 15;
7327 
7328     return h;
7329   }
7330 }
7331 #undef MIX
7332 
7333 #endif /* UPB_UNALIGNED_READS_OK */
7334 
7335 #include <errno.h>
7336 #include <stdarg.h>
7337 #include <stddef.h>
7338 #include <stdint.h>
7339 #include <stdio.h>
7340 #include <stdlib.h>
7341 #include <string.h>
7342 
upb_dumptostderr(void * closure,const upb_status * status)7343 bool upb_dumptostderr(void *closure, const upb_status* status) {
7344   UPB_UNUSED(closure);
7345   fprintf(stderr, "%s\n", upb_status_errmsg(status));
7346   return false;
7347 }
7348 
7349 /* Guarantee null-termination and provide ellipsis truncation.
7350  * It may be tempting to "optimize" this by initializing these final
7351  * four bytes up-front and then being careful never to overwrite them,
7352  * this is safer and simpler. */
nullz(upb_status * status)7353 static void nullz(upb_status *status) {
7354   const char *ellipsis = "...";
7355   size_t len = strlen(ellipsis);
7356   UPB_ASSERT(sizeof(status->msg) > len);
7357   memcpy(status->msg + sizeof(status->msg) - len, ellipsis, len);
7358 }
7359 
7360 
7361 /* upb_upberr *****************************************************************/
7362 
7363 upb_errorspace upb_upberr = {"upb error"};
7364 
upb_upberr_setoom(upb_status * status)7365 void upb_upberr_setoom(upb_status *status) {
7366   status->error_space_ = &upb_upberr;
7367   upb_status_seterrmsg(status, "Out of memory");
7368 }
7369 
7370 
7371 /* upb_status *****************************************************************/
7372 
upb_status_clear(upb_status * status)7373 void upb_status_clear(upb_status *status) {
7374   if (!status) return;
7375   status->ok_ = true;
7376   status->code_ = 0;
7377   status->msg[0] = '\0';
7378 }
7379 
upb_ok(const upb_status * status)7380 bool upb_ok(const upb_status *status) { return status->ok_; }
7381 
upb_status_errspace(const upb_status * status)7382 upb_errorspace *upb_status_errspace(const upb_status *status) {
7383   return status->error_space_;
7384 }
7385 
upb_status_errcode(const upb_status * status)7386 int upb_status_errcode(const upb_status *status) { return status->code_; }
7387 
upb_status_errmsg(const upb_status * status)7388 const char *upb_status_errmsg(const upb_status *status) { return status->msg; }
7389 
upb_status_seterrmsg(upb_status * status,const char * msg)7390 void upb_status_seterrmsg(upb_status *status, const char *msg) {
7391   if (!status) return;
7392   status->ok_ = false;
7393   strncpy(status->msg, msg, sizeof(status->msg));
7394   nullz(status);
7395 }
7396 
upb_status_seterrf(upb_status * status,const char * fmt,...)7397 void upb_status_seterrf(upb_status *status, const char *fmt, ...) {
7398   va_list args;
7399   va_start(args, fmt);
7400   upb_status_vseterrf(status, fmt, args);
7401   va_end(args);
7402 }
7403 
upb_status_vseterrf(upb_status * status,const char * fmt,va_list args)7404 void upb_status_vseterrf(upb_status *status, const char *fmt, va_list args) {
7405   if (!status) return;
7406   status->ok_ = false;
7407   _upb_vsnprintf(status->msg, sizeof(status->msg), fmt, args);
7408   nullz(status);
7409 }
7410 
upb_status_copy(upb_status * to,const upb_status * from)7411 void upb_status_copy(upb_status *to, const upb_status *from) {
7412   if (!to) return;
7413   *to = *from;
7414 }
7415 
7416 
7417 /* upb_alloc ******************************************************************/
7418 
upb_global_allocfunc(upb_alloc * alloc,void * ptr,size_t oldsize,size_t size)7419 static void *upb_global_allocfunc(upb_alloc *alloc, void *ptr, size_t oldsize,
7420                                   size_t size) {
7421   UPB_UNUSED(alloc);
7422   UPB_UNUSED(oldsize);
7423   if (size == 0) {
7424     free(ptr);
7425     return NULL;
7426   } else {
7427     return realloc(ptr, size);
7428   }
7429 }
7430 
7431 upb_alloc upb_alloc_global = {&upb_global_allocfunc};
7432 
7433 
7434 /* upb_arena ******************************************************************/
7435 
7436 /* Be conservative and choose 16 in case anyone is using SSE. */
7437 static const size_t maxalign = 16;
7438 
align_up_max(size_t size)7439 static size_t align_up_max(size_t size) {
7440   return ((size + maxalign - 1) / maxalign) * maxalign;
7441 }
7442 
7443 typedef struct mem_block {
7444   struct mem_block *next;
7445   size_t size;
7446   size_t used;
7447   bool owned;
7448   /* Data follows. */
7449 } mem_block;
7450 
7451 typedef struct cleanup_ent {
7452   struct cleanup_ent *next;
7453   upb_cleanup_func *cleanup;
7454   void *ud;
7455 } cleanup_ent;
7456 
upb_arena_addblock(upb_arena * a,void * ptr,size_t size,bool owned)7457 static void upb_arena_addblock(upb_arena *a, void *ptr, size_t size,
7458                                bool owned) {
7459   mem_block *block = ptr;
7460 
7461   block->next = a->block_head;
7462   block->size = size;
7463   block->used = align_up_max(sizeof(mem_block));
7464   block->owned = owned;
7465 
7466   a->block_head = block;
7467 
7468   /* TODO(haberman): ASAN poison. */
7469 }
7470 
7471 
upb_arena_allocblock(upb_arena * a,size_t size)7472 static mem_block *upb_arena_allocblock(upb_arena *a, size_t size) {
7473   size_t block_size = UPB_MAX(size, a->next_block_size) + sizeof(mem_block);
7474   mem_block *block = upb_malloc(a->block_alloc, block_size);
7475 
7476   if (!block) {
7477     return NULL;
7478   }
7479 
7480   upb_arena_addblock(a, block, block_size, true);
7481   a->next_block_size = UPB_MIN(block_size * 2, a->max_block_size);
7482 
7483   return block;
7484 }
7485 
upb_arena_doalloc(upb_alloc * alloc,void * ptr,size_t oldsize,size_t size)7486 static void *upb_arena_doalloc(upb_alloc *alloc, void *ptr, size_t oldsize,
7487                                size_t size) {
7488   upb_arena *a = (upb_arena*)alloc;  /* upb_alloc is initial member. */
7489   mem_block *block = a->block_head;
7490   void *ret;
7491 
7492   if (size == 0) {
7493     return NULL;  /* We are an arena, don't need individual frees. */
7494   }
7495 
7496   size = align_up_max(size);
7497 
7498   /* TODO(haberman): special-case if this is a realloc of the last alloc? */
7499 
7500   if (!block || block->size - block->used < size) {
7501     /* Slow path: have to allocate a new block. */
7502     block = upb_arena_allocblock(a, size);
7503 
7504     if (!block) {
7505       return NULL;  /* Out of memory. */
7506     }
7507   }
7508 
7509   ret = (char*)block + block->used;
7510   block->used += size;
7511 
7512   if (oldsize > 0) {
7513     memcpy(ret, ptr, oldsize);  /* Preserve existing data. */
7514   }
7515 
7516   /* TODO(haberman): ASAN unpoison. */
7517 
7518   a->bytes_allocated += size;
7519   return ret;
7520 }
7521 
7522 /* Public Arena API ***********************************************************/
7523 
upb_arena_init(upb_arena * a)7524 void upb_arena_init(upb_arena *a) {
7525   a->alloc.func = &upb_arena_doalloc;
7526   a->block_alloc = &upb_alloc_global;
7527   a->bytes_allocated = 0;
7528   a->next_block_size = 256;
7529   a->max_block_size = 16384;
7530   a->cleanup_head = NULL;
7531   a->block_head = NULL;
7532 }
7533 
upb_arena_init2(upb_arena * a,void * mem,size_t size,upb_alloc * alloc)7534 void upb_arena_init2(upb_arena *a, void *mem, size_t size, upb_alloc *alloc) {
7535   upb_arena_init(a);
7536 
7537   if (size > sizeof(mem_block)) {
7538     upb_arena_addblock(a, mem, size, false);
7539   }
7540 
7541   if (alloc) {
7542     a->block_alloc = alloc;
7543   }
7544 }
7545 
upb_arena_uninit(upb_arena * a)7546 void upb_arena_uninit(upb_arena *a) {
7547   cleanup_ent *ent = a->cleanup_head;
7548   mem_block *block = a->block_head;
7549 
7550   while (ent) {
7551     ent->cleanup(ent->ud);
7552     ent = ent->next;
7553   }
7554 
7555   /* Must do this after running cleanup functions, because this will delete
7556    * the memory we store our cleanup entries in! */
7557   while (block) {
7558     mem_block *next = block->next;
7559 
7560     if (block->owned) {
7561       upb_free(a->block_alloc, block);
7562     }
7563 
7564     block = next;
7565   }
7566 
7567   /* Protect against multiple-uninit. */
7568   a->cleanup_head = NULL;
7569   a->block_head = NULL;
7570 }
7571 
upb_arena_addcleanup(upb_arena * a,upb_cleanup_func * func,void * ud)7572 bool upb_arena_addcleanup(upb_arena *a, upb_cleanup_func *func, void *ud) {
7573   cleanup_ent *ent = upb_malloc(&a->alloc, sizeof(cleanup_ent));
7574   if (!ent) {
7575     return false;  /* Out of memory. */
7576   }
7577 
7578   ent->cleanup = func;
7579   ent->ud = ud;
7580   ent->next = a->cleanup_head;
7581   a->cleanup_head = ent;
7582 
7583   return true;
7584 }
7585 
upb_arena_bytesallocated(const upb_arena * a)7586 size_t upb_arena_bytesallocated(const upb_arena *a) {
7587   return a->bytes_allocated;
7588 }
7589 
7590 
7591 /* Standard error functions ***************************************************/
7592 
default_err(void * ud,const upb_status * status)7593 static bool default_err(void *ud, const upb_status *status) {
7594   UPB_UNUSED(ud);
7595   UPB_UNUSED(status);
7596   return false;
7597 }
7598 
write_err_to(void * ud,const upb_status * status)7599 static bool write_err_to(void *ud, const upb_status *status) {
7600   upb_status *copy_to = ud;
7601   upb_status_copy(copy_to, status);
7602   return false;
7603 }
7604 
7605 
7606 /* upb_env ********************************************************************/
7607 
upb_env_initonly(upb_env * e)7608 void upb_env_initonly(upb_env *e) {
7609   e->ok_ = true;
7610   e->error_func_ = &default_err;
7611   e->error_ud_ = NULL;
7612 }
7613 
upb_env_init(upb_env * e)7614 void upb_env_init(upb_env *e) {
7615   upb_arena_init(&e->arena_);
7616   upb_env_initonly(e);
7617 }
7618 
upb_env_init2(upb_env * e,void * mem,size_t n,upb_alloc * alloc)7619 void upb_env_init2(upb_env *e, void *mem, size_t n, upb_alloc *alloc) {
7620   upb_arena_init2(&e->arena_, mem, n, alloc);
7621   upb_env_initonly(e);
7622 }
7623 
upb_env_uninit(upb_env * e)7624 void upb_env_uninit(upb_env *e) {
7625   upb_arena_uninit(&e->arena_);
7626 }
7627 
upb_env_seterrorfunc(upb_env * e,upb_error_func * func,void * ud)7628 void upb_env_seterrorfunc(upb_env *e, upb_error_func *func, void *ud) {
7629   e->error_func_ = func;
7630   e->error_ud_ = ud;
7631 }
7632 
upb_env_reporterrorsto(upb_env * e,upb_status * s)7633 void upb_env_reporterrorsto(upb_env *e, upb_status *s) {
7634   e->error_func_ = &write_err_to;
7635   e->error_ud_ = s;
7636 }
7637 
upb_env_reporterror(upb_env * e,const upb_status * status)7638 bool upb_env_reporterror(upb_env *e, const upb_status *status) {
7639   e->ok_ = false;
7640   return e->error_func_(e->error_ud_, status);
7641 }
7642 
upb_env_malloc(upb_env * e,size_t size)7643 void *upb_env_malloc(upb_env *e, size_t size) {
7644   return upb_malloc(&e->arena_.alloc, size);
7645 }
7646 
upb_env_realloc(upb_env * e,void * ptr,size_t oldsize,size_t size)7647 void *upb_env_realloc(upb_env *e, void *ptr, size_t oldsize, size_t size) {
7648   return upb_realloc(&e->arena_.alloc, ptr, oldsize, size);
7649 }
7650 
upb_env_free(upb_env * e,void * ptr)7651 void upb_env_free(upb_env *e, void *ptr) {
7652   upb_free(&e->arena_.alloc, ptr);
7653 }
7654 
upb_env_addcleanup(upb_env * e,upb_cleanup_func * func,void * ud)7655 bool upb_env_addcleanup(upb_env *e, upb_cleanup_func *func, void *ud) {
7656   return upb_arena_addcleanup(&e->arena_, func, ud);
7657 }
7658 
upb_env_bytesallocated(const upb_env * e)7659 size_t upb_env_bytesallocated(const upb_env *e) {
7660   return upb_arena_bytesallocated(&e->arena_);
7661 }
7662 /* This file was generated by upbc (the upb compiler) from the input
7663  * file:
7664  *
7665  *     upb/descriptor/descriptor.proto
7666  *
7667  * Do not edit -- your changes will be discarded when the file is
7668  * regenerated. */
7669 
7670 static const upb_msgdef msgs[22];
7671 static const upb_fielddef fields[107];
7672 static const upb_enumdef enums[5];
7673 static const upb_tabent strentries[236];
7674 static const upb_tabent intentries[18];
7675 static const upb_tabval arrays[187];
7676 
7677 #ifdef UPB_DEBUG_REFS
7678 static upb_inttable reftables[268];
7679 #endif
7680 
7681 static const upb_msgdef msgs[22] = {
7682   UPB_MSGDEF_INIT("google.protobuf.DescriptorProto", 41, 8, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[0], 11, 10), UPB_STRTABLE_INIT(10, 15, UPB_CTYPE_PTR, 4, &strentries[0]), false, UPB_SYNTAX_PROTO2, UPB_WELLKNOWN_UNSPECIFIED, &reftables[0], &reftables[1]),
7683   UPB_MSGDEF_INIT("google.protobuf.DescriptorProto.ExtensionRange", 5, 0, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[11], 3, 2), UPB_STRTABLE_INIT(2, 3, UPB_CTYPE_PTR, 2, &strentries[16]), false, UPB_SYNTAX_PROTO2, UPB_WELLKNOWN_UNSPECIFIED, &reftables[2], &reftables[3]),
7684   UPB_MSGDEF_INIT("google.protobuf.DescriptorProto.ReservedRange", 5, 0, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[14], 3, 2), UPB_STRTABLE_INIT(2, 3, UPB_CTYPE_PTR, 2, &strentries[20]), false, UPB_SYNTAX_PROTO2, UPB_WELLKNOWN_UNSPECIFIED, &reftables[4], &reftables[5]),
7685   UPB_MSGDEF_INIT("google.protobuf.EnumDescriptorProto", 12, 2, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[17], 4, 3), UPB_STRTABLE_INIT(3, 3, UPB_CTYPE_PTR, 2, &strentries[24]), false, UPB_SYNTAX_PROTO2, UPB_WELLKNOWN_UNSPECIFIED, &reftables[6], &reftables[7]),
7686   UPB_MSGDEF_INIT("google.protobuf.EnumOptions", 9, 1, UPB_INTTABLE_INIT(1, 1, UPB_CTYPE_PTR, 1, &intentries[0], &arrays[21], 4, 2), UPB_STRTABLE_INIT(3, 3, UPB_CTYPE_PTR, 2, &strentries[28]), false, UPB_SYNTAX_PROTO2, UPB_WELLKNOWN_UNSPECIFIED, &reftables[8], &reftables[9]),
7687   UPB_MSGDEF_INIT("google.protobuf.EnumValueDescriptorProto", 9, 1, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[25], 4, 3), UPB_STRTABLE_INIT(3, 3, UPB_CTYPE_PTR, 2, &strentries[32]), false, UPB_SYNTAX_PROTO2, UPB_WELLKNOWN_UNSPECIFIED, &reftables[10], &reftables[11]),
7688   UPB_MSGDEF_INIT("google.protobuf.EnumValueOptions", 8, 1, UPB_INTTABLE_INIT(1, 1, UPB_CTYPE_PTR, 1, &intentries[2], &arrays[29], 2, 1), UPB_STRTABLE_INIT(2, 3, UPB_CTYPE_PTR, 2, &strentries[36]), false, UPB_SYNTAX_PROTO2, UPB_WELLKNOWN_UNSPECIFIED, &reftables[12], &reftables[13]),
7689   UPB_MSGDEF_INIT("google.protobuf.FieldDescriptorProto", 24, 1, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[31], 11, 10), UPB_STRTABLE_INIT(10, 15, UPB_CTYPE_PTR, 4, &strentries[40]), false, UPB_SYNTAX_PROTO2, UPB_WELLKNOWN_UNSPECIFIED, &reftables[14], &reftables[15]),
7690   UPB_MSGDEF_INIT("google.protobuf.FieldOptions", 13, 1, UPB_INTTABLE_INIT(1, 1, UPB_CTYPE_PTR, 1, &intentries[4], &arrays[42], 11, 6), UPB_STRTABLE_INIT(7, 15, UPB_CTYPE_PTR, 4, &strentries[56]), false, UPB_SYNTAX_PROTO2, UPB_WELLKNOWN_UNSPECIFIED, &reftables[16], &reftables[17]),
7691   UPB_MSGDEF_INIT("google.protobuf.FileDescriptorProto", 43, 6, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[53], 13, 12), UPB_STRTABLE_INIT(12, 15, UPB_CTYPE_PTR, 4, &strentries[72]), false, UPB_SYNTAX_PROTO2, UPB_WELLKNOWN_UNSPECIFIED, &reftables[18], &reftables[19]),
7692   UPB_MSGDEF_INIT("google.protobuf.FileDescriptorSet", 7, 1, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[66], 2, 1), UPB_STRTABLE_INIT(1, 3, UPB_CTYPE_PTR, 2, &strentries[88]), false, UPB_SYNTAX_PROTO2, UPB_WELLKNOWN_UNSPECIFIED, &reftables[20], &reftables[21]),
7693   UPB_MSGDEF_INIT("google.protobuf.FileOptions", 38, 1, UPB_INTTABLE_INIT(1, 1, UPB_CTYPE_PTR, 1, &intentries[6], &arrays[68], 42, 17), UPB_STRTABLE_INIT(18, 31, UPB_CTYPE_PTR, 5, &strentries[92]), false, UPB_SYNTAX_PROTO2, UPB_WELLKNOWN_UNSPECIFIED, &reftables[22], &reftables[23]),
7694   UPB_MSGDEF_INIT("google.protobuf.MessageOptions", 11, 1, UPB_INTTABLE_INIT(1, 1, UPB_CTYPE_PTR, 1, &intentries[8], &arrays[110], 8, 4), UPB_STRTABLE_INIT(5, 7, UPB_CTYPE_PTR, 3, &strentries[124]), false, UPB_SYNTAX_PROTO2, UPB_WELLKNOWN_UNSPECIFIED, &reftables[24], &reftables[25]),
7695   UPB_MSGDEF_INIT("google.protobuf.MethodDescriptorProto", 16, 1, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[118], 7, 6), UPB_STRTABLE_INIT(6, 7, UPB_CTYPE_PTR, 3, &strentries[132]), false, UPB_SYNTAX_PROTO2, UPB_WELLKNOWN_UNSPECIFIED, &reftables[26], &reftables[27]),
7696   UPB_MSGDEF_INIT("google.protobuf.MethodOptions", 8, 1, UPB_INTTABLE_INIT(2, 3, UPB_CTYPE_PTR, 2, &intentries[10], &arrays[125], 1, 0), UPB_STRTABLE_INIT(2, 3, UPB_CTYPE_PTR, 2, &strentries[140]), false, UPB_SYNTAX_PROTO2, UPB_WELLKNOWN_UNSPECIFIED, &reftables[28], &reftables[29]),
7697   UPB_MSGDEF_INIT("google.protobuf.OneofDescriptorProto", 6, 0, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[126], 2, 1), UPB_STRTABLE_INIT(1, 3, UPB_CTYPE_PTR, 2, &strentries[144]), false, UPB_SYNTAX_PROTO2, UPB_WELLKNOWN_UNSPECIFIED, &reftables[30], &reftables[31]),
7698   UPB_MSGDEF_INIT("google.protobuf.ServiceDescriptorProto", 12, 2, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[128], 4, 3), UPB_STRTABLE_INIT(3, 3, UPB_CTYPE_PTR, 2, &strentries[148]), false, UPB_SYNTAX_PROTO2, UPB_WELLKNOWN_UNSPECIFIED, &reftables[32], &reftables[33]),
7699   UPB_MSGDEF_INIT("google.protobuf.ServiceOptions", 8, 1, UPB_INTTABLE_INIT(2, 3, UPB_CTYPE_PTR, 2, &intentries[14], &arrays[132], 1, 0), UPB_STRTABLE_INIT(2, 3, UPB_CTYPE_PTR, 2, &strentries[152]), false, UPB_SYNTAX_PROTO2, UPB_WELLKNOWN_UNSPECIFIED, &reftables[34], &reftables[35]),
7700   UPB_MSGDEF_INIT("google.protobuf.SourceCodeInfo", 7, 1, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[133], 2, 1), UPB_STRTABLE_INIT(1, 3, UPB_CTYPE_PTR, 2, &strentries[156]), false, UPB_SYNTAX_PROTO2, UPB_WELLKNOWN_UNSPECIFIED, &reftables[36], &reftables[37]),
7701   UPB_MSGDEF_INIT("google.protobuf.SourceCodeInfo.Location", 20, 0, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[135], 7, 5), UPB_STRTABLE_INIT(5, 7, UPB_CTYPE_PTR, 3, &strentries[160]), false, UPB_SYNTAX_PROTO2, UPB_WELLKNOWN_UNSPECIFIED, &reftables[38], &reftables[39]),
7702   UPB_MSGDEF_INIT("google.protobuf.UninterpretedOption", 19, 1, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[142], 9, 7), UPB_STRTABLE_INIT(7, 15, UPB_CTYPE_PTR, 4, &strentries[168]), false, UPB_SYNTAX_PROTO2, UPB_WELLKNOWN_UNSPECIFIED, &reftables[40], &reftables[41]),
7703   UPB_MSGDEF_INIT("google.protobuf.UninterpretedOption.NamePart", 7, 0, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[151], 3, 2), UPB_STRTABLE_INIT(2, 3, UPB_CTYPE_PTR, 2, &strentries[184]), false, UPB_SYNTAX_PROTO2, UPB_WELLKNOWN_UNSPECIFIED, &reftables[42], &reftables[43]),
7704 };
7705 
7706 static const upb_fielddef fields[107] = {
7707   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "aggregate_value", 8, &msgs[20], NULL, 16, 6, {0},&reftables[44], &reftables[45]),
7708   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "allow_alias", 2, &msgs[4], NULL, 7, 1, {0},&reftables[46], &reftables[47]),
7709   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "cc_enable_arenas", 31, &msgs[11], NULL, 24, 12, {0},&reftables[48], &reftables[49]),
7710   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "cc_generic_services", 16, &msgs[11], NULL, 18, 6, {0},&reftables[50], &reftables[51]),
7711   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "client_streaming", 5, &msgs[13], NULL, 14, 4, {0},&reftables[52], &reftables[53]),
7712   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "csharp_namespace", 37, &msgs[11], NULL, 28, 14, {0},&reftables[54], &reftables[55]),
7713   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_ENUM, 0, false, false, false, false, "ctype", 1, &msgs[8], (const upb_def*)(&enums[2]), 7, 1, {0},&reftables[56], &reftables[57]),
7714   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "default_value", 7, &msgs[7], NULL, 17, 7, {0},&reftables[58], &reftables[59]),
7715   UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_STRING, 0, false, false, false, false, "dependency", 3, &msgs[9], NULL, 31, 8, {0},&reftables[60], &reftables[61]),
7716   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "deprecated", 3, &msgs[8], NULL, 9, 3, {0},&reftables[62], &reftables[63]),
7717   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "deprecated", 33, &msgs[14], NULL, 7, 1, {0},&reftables[64], &reftables[65]),
7718   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "deprecated", 3, &msgs[12], NULL, 9, 3, {0},&reftables[66], &reftables[67]),
7719   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "deprecated", 23, &msgs[11], NULL, 22, 10, {0},&reftables[68], &reftables[69]),
7720   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "deprecated", 1, &msgs[6], NULL, 7, 1, {0},&reftables[70], &reftables[71]),
7721   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "deprecated", 3, &msgs[4], NULL, 8, 2, {0},&reftables[72], &reftables[73]),
7722   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "deprecated", 33, &msgs[17], NULL, 7, 1, {0},&reftables[74], &reftables[75]),
7723   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_DOUBLE, 0, false, false, false, false, "double_value", 6, &msgs[20], NULL, 12, 4, {0},&reftables[76], &reftables[77]),
7724   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, false, "end", 2, &msgs[2], NULL, 4, 1, {0},&reftables[78], &reftables[79]),
7725   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, false, "end", 2, &msgs[1], NULL, 4, 1, {0},&reftables[80], &reftables[81]),
7726   UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "enum_type", 5, &msgs[9], (const upb_def*)(&msgs[3]), 14, 1, {0},&reftables[82], &reftables[83]),
7727   UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "enum_type", 4, &msgs[0], (const upb_def*)(&msgs[3]), 19, 2, {0},&reftables[84], &reftables[85]),
7728   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "extendee", 2, &msgs[7], NULL, 8, 2, {0},&reftables[86], &reftables[87]),
7729   UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "extension", 6, &msgs[0], (const upb_def*)(&msgs[7]), 25, 4, {0},&reftables[88], &reftables[89]),
7730   UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "extension", 7, &msgs[9], (const upb_def*)(&msgs[7]), 20, 3, {0},&reftables[90], &reftables[91]),
7731   UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "extension_range", 5, &msgs[0], (const upb_def*)(&msgs[1]), 22, 3, {0},&reftables[92], &reftables[93]),
7732   UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "field", 2, &msgs[0], (const upb_def*)(&msgs[7]), 13, 0, {0},&reftables[94], &reftables[95]),
7733   UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "file", 1, &msgs[10], (const upb_def*)(&msgs[9]), 6, 0, {0},&reftables[96], &reftables[97]),
7734   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "go_package", 11, &msgs[11], NULL, 15, 5, {0},&reftables[98], &reftables[99]),
7735   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "identifier_value", 3, &msgs[20], NULL, 7, 1, {0},&reftables[100], &reftables[101]),
7736   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "input_type", 2, &msgs[13], NULL, 8, 2, {0},&reftables[102], &reftables[103]),
7737   UPB_FIELDDEF_INIT(UPB_LABEL_REQUIRED, UPB_TYPE_BOOL, 0, false, false, false, false, "is_extension", 2, &msgs[21], NULL, 6, 1, {0},&reftables[104], &reftables[105]),
7738   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "java_generate_equals_and_hash", 20, &msgs[11], NULL, 21, 9, {0},&reftables[106], &reftables[107]),
7739   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "java_generic_services", 17, &msgs[11], NULL, 19, 7, {0},&reftables[108], &reftables[109]),
7740   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "java_multiple_files", 10, &msgs[11], NULL, 14, 4, {0},&reftables[110], &reftables[111]),
7741   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "java_outer_classname", 8, &msgs[11], NULL, 10, 2, {0},&reftables[112], &reftables[113]),
7742   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "java_package", 1, &msgs[11], NULL, 7, 1, {0},&reftables[114], &reftables[115]),
7743   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "java_string_check_utf8", 27, &msgs[11], NULL, 23, 11, {0},&reftables[116], &reftables[117]),
7744   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "javanano_use_deprecated_package", 38, &msgs[11], NULL, 31, 15, {0},&reftables[118], &reftables[119]),
7745   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "json_name", 10, &msgs[7], NULL, 21, 9, {0},&reftables[120], &reftables[121]),
7746   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_ENUM, 0, false, false, false, false, "jstype", 6, &msgs[8], (const upb_def*)(&enums[3]), 11, 5, {0},&reftables[122], &reftables[123]),
7747   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_ENUM, 0, false, false, false, false, "label", 4, &msgs[7], (const upb_def*)(&enums[0]), 12, 4, {0},&reftables[124], &reftables[125]),
7748   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "lazy", 5, &msgs[8], NULL, 10, 4, {0},&reftables[126], &reftables[127]),
7749   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "leading_comments", 3, &msgs[19], NULL, 9, 2, {0},&reftables[128], &reftables[129]),
7750   UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_STRING, 0, false, false, false, false, "leading_detached_comments", 6, &msgs[19], NULL, 17, 4, {0},&reftables[130], &reftables[131]),
7751   UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "location", 1, &msgs[18], (const upb_def*)(&msgs[19]), 6, 0, {0},&reftables[132], &reftables[133]),
7752   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "map_entry", 7, &msgs[12], NULL, 10, 4, {0},&reftables[134], &reftables[135]),
7753   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "message_set_wire_format", 1, &msgs[12], NULL, 7, 1, {0},&reftables[136], &reftables[137]),
7754   UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "message_type", 4, &msgs[9], (const upb_def*)(&msgs[0]), 11, 0, {0},&reftables[138], &reftables[139]),
7755   UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "method", 2, &msgs[16], (const upb_def*)(&msgs[13]), 7, 0, {0},&reftables[140], &reftables[141]),
7756   UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "name", 2, &msgs[20], (const upb_def*)(&msgs[21]), 6, 0, {0},&reftables[142], &reftables[143]),
7757   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "name", 1, &msgs[5], NULL, 5, 1, {0},&reftables[144], &reftables[145]),
7758   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "name", 1, &msgs[9], NULL, 23, 6, {0},&reftables[146], &reftables[147]),
7759   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "name", 1, &msgs[3], NULL, 9, 2, {0},&reftables[148], &reftables[149]),
7760   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "name", 1, &msgs[16], NULL, 9, 2, {0},&reftables[150], &reftables[151]),
7761   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "name", 1, &msgs[15], NULL, 3, 0, {0},&reftables[152], &reftables[153]),
7762   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "name", 1, &msgs[13], NULL, 5, 1, {0},&reftables[154], &reftables[155]),
7763   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "name", 1, &msgs[7], NULL, 5, 1, {0},&reftables[156], &reftables[157]),
7764   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "name", 1, &msgs[0], NULL, 33, 8, {0},&reftables[158], &reftables[159]),
7765   UPB_FIELDDEF_INIT(UPB_LABEL_REQUIRED, UPB_TYPE_STRING, 0, false, false, false, false, "name_part", 1, &msgs[21], NULL, 3, 0, {0},&reftables[160], &reftables[161]),
7766   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_INT64, UPB_INTFMT_VARIABLE, false, false, false, false, "negative_int_value", 5, &msgs[20], NULL, 11, 3, {0},&reftables[162], &reftables[163]),
7767   UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "nested_type", 3, &msgs[0], (const upb_def*)(&msgs[0]), 16, 1, {0},&reftables[164], &reftables[165]),
7768   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "no_standard_descriptor_accessor", 2, &msgs[12], NULL, 8, 2, {0},&reftables[166], &reftables[167]),
7769   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, false, "number", 3, &msgs[7], NULL, 11, 3, {0},&reftables[168], &reftables[169]),
7770   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, false, "number", 2, &msgs[5], NULL, 8, 2, {0},&reftables[170], &reftables[171]),
7771   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "objc_class_prefix", 36, &msgs[11], NULL, 25, 13, {0},&reftables[172], &reftables[173]),
7772   UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "oneof_decl", 8, &msgs[0], (const upb_def*)(&msgs[15]), 29, 6, {0},&reftables[174], &reftables[175]),
7773   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, false, "oneof_index", 9, &msgs[7], NULL, 20, 8, {0},&reftables[176], &reftables[177]),
7774   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_ENUM, 0, false, false, false, false, "optimize_for", 9, &msgs[11], (const upb_def*)(&enums[4]), 13, 3, {0},&reftables[178], &reftables[179]),
7775   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, 0, false, false, false, false, "options", 7, &msgs[0], (const upb_def*)(&msgs[12]), 26, 5, {0},&reftables[180], &reftables[181]),
7776   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, 0, false, false, false, false, "options", 8, &msgs[9], (const upb_def*)(&msgs[11]), 21, 4, {0},&reftables[182], &reftables[183]),
7777   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, 0, false, false, false, false, "options", 8, &msgs[7], (const upb_def*)(&msgs[8]), 4, 0, {0},&reftables[184], &reftables[185]),
7778   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, 0, false, false, false, false, "options", 4, &msgs[13], (const upb_def*)(&msgs[14]), 4, 0, {0},&reftables[186], &reftables[187]),
7779   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, 0, false, false, false, false, "options", 3, &msgs[16], (const upb_def*)(&msgs[17]), 8, 1, {0},&reftables[188], &reftables[189]),
7780   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, 0, false, false, false, false, "options", 3, &msgs[3], (const upb_def*)(&msgs[4]), 8, 1, {0},&reftables[190], &reftables[191]),
7781   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, 0, false, false, false, false, "options", 3, &msgs[5], (const upb_def*)(&msgs[6]), 4, 0, {0},&reftables[192], &reftables[193]),
7782   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "output_type", 3, &msgs[13], NULL, 11, 3, {0},&reftables[194], &reftables[195]),
7783   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "package", 2, &msgs[9], NULL, 26, 7, {0},&reftables[196], &reftables[197]),
7784   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "packed", 2, &msgs[8], NULL, 8, 2, {0},&reftables[198], &reftables[199]),
7785   UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, true, "path", 1, &msgs[19], NULL, 5, 0, {0},&reftables[200], &reftables[201]),
7786   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "php_class_prefix", 40, &msgs[11], NULL, 32, 16, {0},&reftables[202], &reftables[203]),
7787   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "php_namespace", 41, &msgs[11], NULL, 35, 17, {0},&reftables[204], &reftables[205]),
7788   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_UINT64, UPB_INTFMT_VARIABLE, false, false, false, false, "positive_int_value", 4, &msgs[20], NULL, 10, 2, {0},&reftables[206], &reftables[207]),
7789   UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, false, "public_dependency", 10, &msgs[9], NULL, 36, 9, {0},&reftables[208], &reftables[209]),
7790   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "py_generic_services", 18, &msgs[11], NULL, 20, 8, {0},&reftables[210], &reftables[211]),
7791   UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_STRING, 0, false, false, false, false, "reserved_name", 10, &msgs[0], NULL, 38, 9, {0},&reftables[212], &reftables[213]),
7792   UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "reserved_range", 9, &msgs[0], (const upb_def*)(&msgs[2]), 32, 7, {0},&reftables[214], &reftables[215]),
7793   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "server_streaming", 6, &msgs[13], NULL, 15, 5, {0},&reftables[216], &reftables[217]),
7794   UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "service", 6, &msgs[9], (const upb_def*)(&msgs[16]), 17, 2, {0},&reftables[218], &reftables[219]),
7795   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, 0, false, false, false, false, "source_code_info", 9, &msgs[9], (const upb_def*)(&msgs[18]), 22, 5, {0},&reftables[220], &reftables[221]),
7796   UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, true, "span", 2, &msgs[19], NULL, 8, 1, {0},&reftables[222], &reftables[223]),
7797   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, false, "start", 1, &msgs[2], NULL, 3, 0, {0},&reftables[224], &reftables[225]),
7798   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, false, "start", 1, &msgs[1], NULL, 3, 0, {0},&reftables[226], &reftables[227]),
7799   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BYTES, 0, false, false, false, false, "string_value", 7, &msgs[20], NULL, 13, 5, {0},&reftables[228], &reftables[229]),
7800   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "syntax", 12, &msgs[9], NULL, 40, 11, {0},&reftables[230], &reftables[231]),
7801   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "trailing_comments", 4, &msgs[19], NULL, 12, 3, {0},&reftables[232], &reftables[233]),
7802   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_ENUM, 0, false, false, false, false, "type", 5, &msgs[7], (const upb_def*)(&enums[1]), 13, 5, {0},&reftables[234], &reftables[235]),
7803   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "type_name", 6, &msgs[7], NULL, 14, 6, {0},&reftables[236], &reftables[237]),
7804   UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "uninterpreted_option", 999, &msgs[12], (const upb_def*)(&msgs[20]), 6, 0, {0},&reftables[238], &reftables[239]),
7805   UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "uninterpreted_option", 999, &msgs[17], (const upb_def*)(&msgs[20]), 6, 0, {0},&reftables[240], &reftables[241]),
7806   UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "uninterpreted_option", 999, &msgs[11], (const upb_def*)(&msgs[20]), 6, 0, {0},&reftables[242], &reftables[243]),
7807   UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "uninterpreted_option", 999, &msgs[14], (const upb_def*)(&msgs[20]), 6, 0, {0},&reftables[244], &reftables[245]),
7808   UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "uninterpreted_option", 999, &msgs[8], (const upb_def*)(&msgs[20]), 6, 0, {0},&reftables[246], &reftables[247]),
7809   UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "uninterpreted_option", 999, &msgs[6], (const upb_def*)(&msgs[20]), 6, 0, {0},&reftables[248], &reftables[249]),
7810   UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "uninterpreted_option", 999, &msgs[4], (const upb_def*)(&msgs[20]), 6, 0, {0},&reftables[250], &reftables[251]),
7811   UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "value", 2, &msgs[3], (const upb_def*)(&msgs[5]), 7, 0, {0},&reftables[252], &reftables[253]),
7812   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "weak", 10, &msgs[8], NULL, 12, 6, {0},&reftables[254], &reftables[255]),
7813   UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, false, "weak_dependency", 11, &msgs[9], NULL, 39, 10, {0},&reftables[256], &reftables[257]),
7814 };
7815 
7816 static const upb_enumdef enums[5] = {
7817   UPB_ENUMDEF_INIT("google.protobuf.FieldDescriptorProto.Label", UPB_STRTABLE_INIT(3, 3, UPB_CTYPE_INT32, 2, &strentries[188]), UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_CSTR, 0, NULL, &arrays[154], 4, 3), 0, &reftables[258], &reftables[259]),
7818   UPB_ENUMDEF_INIT("google.protobuf.FieldDescriptorProto.Type", UPB_STRTABLE_INIT(18, 31, UPB_CTYPE_INT32, 5, &strentries[192]), UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_CSTR, 0, NULL, &arrays[158], 19, 18), 0, &reftables[260], &reftables[261]),
7819   UPB_ENUMDEF_INIT("google.protobuf.FieldOptions.CType", UPB_STRTABLE_INIT(3, 3, UPB_CTYPE_INT32, 2, &strentries[224]), UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_CSTR, 0, NULL, &arrays[177], 3, 3), 0, &reftables[262], &reftables[263]),
7820   UPB_ENUMDEF_INIT("google.protobuf.FieldOptions.JSType", UPB_STRTABLE_INIT(3, 3, UPB_CTYPE_INT32, 2, &strentries[228]), UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_CSTR, 0, NULL, &arrays[180], 3, 3), 0, &reftables[264], &reftables[265]),
7821   UPB_ENUMDEF_INIT("google.protobuf.FileOptions.OptimizeMode", UPB_STRTABLE_INIT(3, 3, UPB_CTYPE_INT32, 2, &strentries[232]), UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_CSTR, 0, NULL, &arrays[183], 4, 3), 0, &reftables[266], &reftables[267]),
7822 };
7823 
7824 static const upb_tabent strentries[236] = {
7825   {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "extension"), UPB_TABVALUE_PTR_INIT(&fields[22]), NULL},
7826   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7827   {UPB_TABKEY_STR("\015", "\000", "\000", "\000", "reserved_name"), UPB_TABVALUE_PTR_INIT(&fields[84]), NULL},
7828   {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "name"), UPB_TABVALUE_PTR_INIT(&fields[57]), NULL},
7829   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7830   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7831   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7832   {UPB_TABKEY_STR("\005", "\000", "\000", "\000", "field"), UPB_TABVALUE_PTR_INIT(&fields[25]), &strentries[12]},
7833   {UPB_TABKEY_STR("\017", "\000", "\000", "\000", "extension_range"), UPB_TABVALUE_PTR_INIT(&fields[24]), &strentries[14]},
7834   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7835   {UPB_TABKEY_STR("\013", "\000", "\000", "\000", "nested_type"), UPB_TABVALUE_PTR_INIT(&fields[60]), NULL},
7836   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7837   {UPB_TABKEY_STR("\016", "\000", "\000", "\000", "reserved_range"), UPB_TABVALUE_PTR_INIT(&fields[85]), NULL},
7838   {UPB_TABKEY_STR("\007", "\000", "\000", "\000", "options"), UPB_TABVALUE_PTR_INIT(&fields[68]), NULL},
7839   {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "oneof_decl"), UPB_TABVALUE_PTR_INIT(&fields[65]), NULL},
7840   {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "enum_type"), UPB_TABVALUE_PTR_INIT(&fields[20]), &strentries[13]},
7841   {UPB_TABKEY_STR("\005", "\000", "\000", "\000", "start"), UPB_TABVALUE_PTR_INIT(&fields[91]), NULL},
7842   {UPB_TABKEY_STR("\003", "\000", "\000", "\000", "end"), UPB_TABVALUE_PTR_INIT(&fields[18]), NULL},
7843   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7844   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7845   {UPB_TABKEY_STR("\005", "\000", "\000", "\000", "start"), UPB_TABVALUE_PTR_INIT(&fields[90]), NULL},
7846   {UPB_TABKEY_STR("\003", "\000", "\000", "\000", "end"), UPB_TABVALUE_PTR_INIT(&fields[17]), NULL},
7847   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7848   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7849   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7850   {UPB_TABKEY_STR("\005", "\000", "\000", "\000", "value"), UPB_TABVALUE_PTR_INIT(&fields[104]), NULL},
7851   {UPB_TABKEY_STR("\007", "\000", "\000", "\000", "options"), UPB_TABVALUE_PTR_INIT(&fields[73]), NULL},
7852   {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "name"), UPB_TABVALUE_PTR_INIT(&fields[52]), &strentries[26]},
7853   {UPB_TABKEY_STR("\024", "\000", "\000", "\000", "uninterpreted_option"), UPB_TABVALUE_PTR_INIT(&fields[103]), NULL},
7854   {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "deprecated"), UPB_TABVALUE_PTR_INIT(&fields[14]), NULL},
7855   {UPB_TABKEY_STR("\013", "\000", "\000", "\000", "allow_alias"), UPB_TABVALUE_PTR_INIT(&fields[1]), NULL},
7856   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7857   {UPB_TABKEY_STR("\006", "\000", "\000", "\000", "number"), UPB_TABVALUE_PTR_INIT(&fields[63]), NULL},
7858   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7859   {UPB_TABKEY_STR("\007", "\000", "\000", "\000", "options"), UPB_TABVALUE_PTR_INIT(&fields[74]), NULL},
7860   {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "name"), UPB_TABVALUE_PTR_INIT(&fields[50]), &strentries[34]},
7861   {UPB_TABKEY_STR("\024", "\000", "\000", "\000", "uninterpreted_option"), UPB_TABVALUE_PTR_INIT(&fields[102]), NULL},
7862   {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "deprecated"), UPB_TABVALUE_PTR_INIT(&fields[13]), NULL},
7863   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7864   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7865   {UPB_TABKEY_STR("\013", "\000", "\000", "\000", "oneof_index"), UPB_TABVALUE_PTR_INIT(&fields[66]), NULL},
7866   {UPB_TABKEY_STR("\005", "\000", "\000", "\000", "label"), UPB_TABVALUE_PTR_INIT(&fields[40]), NULL},
7867   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7868   {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "name"), UPB_TABVALUE_PTR_INIT(&fields[56]), NULL},
7869   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7870   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7871   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7872   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7873   {UPB_TABKEY_STR("\006", "\000", "\000", "\000", "number"), UPB_TABVALUE_PTR_INIT(&fields[62]), &strentries[53]},
7874   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7875   {UPB_TABKEY_STR("\010", "\000", "\000", "\000", "extendee"), UPB_TABVALUE_PTR_INIT(&fields[21]), NULL},
7876   {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "type_name"), UPB_TABVALUE_PTR_INIT(&fields[96]), NULL},
7877   {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "json_name"), UPB_TABVALUE_PTR_INIT(&fields[38]), NULL},
7878   {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "type"), UPB_TABVALUE_PTR_INIT(&fields[95]), &strentries[50]},
7879   {UPB_TABKEY_STR("\015", "\000", "\000", "\000", "default_value"), UPB_TABVALUE_PTR_INIT(&fields[7]), NULL},
7880   {UPB_TABKEY_STR("\007", "\000", "\000", "\000", "options"), UPB_TABVALUE_PTR_INIT(&fields[70]), NULL},
7881   {UPB_TABKEY_STR("\024", "\000", "\000", "\000", "uninterpreted_option"), UPB_TABVALUE_PTR_INIT(&fields[101]), NULL},
7882   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7883   {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "weak"), UPB_TABVALUE_PTR_INIT(&fields[105]), NULL},
7884   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7885   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7886   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7887   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7888   {UPB_TABKEY_STR("\006", "\000", "\000", "\000", "packed"), UPB_TABVALUE_PTR_INIT(&fields[77]), NULL},
7889   {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "lazy"), UPB_TABVALUE_PTR_INIT(&fields[41]), NULL},
7890   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7891   {UPB_TABKEY_STR("\005", "\000", "\000", "\000", "ctype"), UPB_TABVALUE_PTR_INIT(&fields[6]), NULL},
7892   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7893   {UPB_TABKEY_STR("\006", "\000", "\000", "\000", "jstype"), UPB_TABVALUE_PTR_INIT(&fields[39]), NULL},
7894   {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "deprecated"), UPB_TABVALUE_PTR_INIT(&fields[9]), NULL},
7895   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7896   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7897   {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "extension"), UPB_TABVALUE_PTR_INIT(&fields[23]), NULL},
7898   {UPB_TABKEY_STR("\017", "\000", "\000", "\000", "weak_dependency"), UPB_TABVALUE_PTR_INIT(&fields[106]), NULL},
7899   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7900   {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "name"), UPB_TABVALUE_PTR_INIT(&fields[51]), NULL},
7901   {UPB_TABKEY_STR("\007", "\000", "\000", "\000", "service"), UPB_TABVALUE_PTR_INIT(&fields[87]), NULL},
7902   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7903   {UPB_TABKEY_STR("\020", "\000", "\000", "\000", "source_code_info"), UPB_TABVALUE_PTR_INIT(&fields[88]), NULL},
7904   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7905   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7906   {UPB_TABKEY_STR("\006", "\000", "\000", "\000", "syntax"), UPB_TABVALUE_PTR_INIT(&fields[93]), NULL},
7907   {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "dependency"), UPB_TABVALUE_PTR_INIT(&fields[8]), NULL},
7908   {UPB_TABKEY_STR("\014", "\000", "\000", "\000", "message_type"), UPB_TABVALUE_PTR_INIT(&fields[47]), NULL},
7909   {UPB_TABKEY_STR("\007", "\000", "\000", "\000", "package"), UPB_TABVALUE_PTR_INIT(&fields[76]), NULL},
7910   {UPB_TABKEY_STR("\007", "\000", "\000", "\000", "options"), UPB_TABVALUE_PTR_INIT(&fields[69]), &strentries[86]},
7911   {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "enum_type"), UPB_TABVALUE_PTR_INIT(&fields[19]), NULL},
7912   {UPB_TABKEY_STR("\021", "\000", "\000", "\000", "public_dependency"), UPB_TABVALUE_PTR_INIT(&fields[82]), &strentries[85]},
7913   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7914   {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "file"), UPB_TABVALUE_PTR_INIT(&fields[26]), NULL},
7915   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7916   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7917   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7918   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7919   {UPB_TABKEY_STR("\023", "\000", "\000", "\000", "cc_generic_services"), UPB_TABVALUE_PTR_INIT(&fields[3]), NULL},
7920   {UPB_TABKEY_STR("\020", "\000", "\000", "\000", "csharp_namespace"), UPB_TABVALUE_PTR_INIT(&fields[5]), &strentries[116]},
7921   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7922   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7923   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7924   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7925   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7926   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7927   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7928   {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "go_package"), UPB_TABVALUE_PTR_INIT(&fields[27]), NULL},
7929   {UPB_TABKEY_STR("\014", "\000", "\000", "\000", "java_package"), UPB_TABVALUE_PTR_INIT(&fields[35]), &strentries[120]},
7930   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7931   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7932   {UPB_TABKEY_STR("\024", "\000", "\000", "\000", "java_outer_classname"), UPB_TABVALUE_PTR_INIT(&fields[34]), NULL},
7933   {UPB_TABKEY_STR("\015", "\000", "\000", "\000", "php_namespace"), UPB_TABVALUE_PTR_INIT(&fields[80]), &strentries[113]},
7934   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7935   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7936   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7937   {UPB_TABKEY_STR("\023", "\000", "\000", "\000", "java_multiple_files"), UPB_TABVALUE_PTR_INIT(&fields[33]), &strentries[117]},
7938   {UPB_TABKEY_STR("\024", "\000", "\000", "\000", "uninterpreted_option"), UPB_TABVALUE_PTR_INIT(&fields[99]), NULL},
7939   {UPB_TABKEY_STR("\025", "\000", "\000", "\000", "java_generic_services"), UPB_TABVALUE_PTR_INIT(&fields[32]), &strentries[118]},
7940   {UPB_TABKEY_STR("\035", "\000", "\000", "\000", "java_generate_equals_and_hash"), UPB_TABVALUE_PTR_INIT(&fields[31]), NULL},
7941   {UPB_TABKEY_STR("\020", "\000", "\000", "\000", "php_class_prefix"), UPB_TABVALUE_PTR_INIT(&fields[79]), NULL},
7942   {UPB_TABKEY_STR("\037", "\000", "\000", "\000", "javanano_use_deprecated_package"), UPB_TABVALUE_PTR_INIT(&fields[37]), &strentries[123]},
7943   {UPB_TABKEY_STR("\023", "\000", "\000", "\000", "py_generic_services"), UPB_TABVALUE_PTR_INIT(&fields[83]), NULL},
7944   {UPB_TABKEY_STR("\014", "\000", "\000", "\000", "optimize_for"), UPB_TABVALUE_PTR_INIT(&fields[67]), NULL},
7945   {UPB_TABKEY_STR("\026", "\000", "\000", "\000", "java_string_check_utf8"), UPB_TABVALUE_PTR_INIT(&fields[36]), NULL},
7946   {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "deprecated"), UPB_TABVALUE_PTR_INIT(&fields[12]), &strentries[119]},
7947   {UPB_TABKEY_STR("\021", "\000", "\000", "\000", "objc_class_prefix"), UPB_TABVALUE_PTR_INIT(&fields[64]), NULL},
7948   {UPB_TABKEY_STR("\020", "\000", "\000", "\000", "cc_enable_arenas"), UPB_TABVALUE_PTR_INIT(&fields[2]), NULL},
7949   {UPB_TABKEY_STR("\027", "\000", "\000", "\000", "message_set_wire_format"), UPB_TABVALUE_PTR_INIT(&fields[46]), &strentries[128]},
7950   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7951   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7952   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7953   {UPB_TABKEY_STR("\024", "\000", "\000", "\000", "uninterpreted_option"), UPB_TABVALUE_PTR_INIT(&fields[97]), NULL},
7954   {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "deprecated"), UPB_TABVALUE_PTR_INIT(&fields[11]), NULL},
7955   {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "map_entry"), UPB_TABVALUE_PTR_INIT(&fields[45]), NULL},
7956   {UPB_TABKEY_STR("\037", "\000", "\000", "\000", "no_standard_descriptor_accessor"), UPB_TABVALUE_PTR_INIT(&fields[61]), NULL},
7957   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7958   {UPB_TABKEY_STR("\020", "\000", "\000", "\000", "client_streaming"), UPB_TABVALUE_PTR_INIT(&fields[4]), NULL},
7959   {UPB_TABKEY_STR("\020", "\000", "\000", "\000", "server_streaming"), UPB_TABVALUE_PTR_INIT(&fields[86]), NULL},
7960   {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "name"), UPB_TABVALUE_PTR_INIT(&fields[55]), NULL},
7961   {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "input_type"), UPB_TABVALUE_PTR_INIT(&fields[29]), NULL},
7962   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7963   {UPB_TABKEY_STR("\013", "\000", "\000", "\000", "output_type"), UPB_TABVALUE_PTR_INIT(&fields[75]), NULL},
7964   {UPB_TABKEY_STR("\007", "\000", "\000", "\000", "options"), UPB_TABVALUE_PTR_INIT(&fields[71]), NULL},
7965   {UPB_TABKEY_STR("\024", "\000", "\000", "\000", "uninterpreted_option"), UPB_TABVALUE_PTR_INIT(&fields[100]), NULL},
7966   {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "deprecated"), UPB_TABVALUE_PTR_INIT(&fields[10]), NULL},
7967   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7968   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7969   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7970   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7971   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7972   {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "name"), UPB_TABVALUE_PTR_INIT(&fields[54]), NULL},
7973   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7974   {UPB_TABKEY_STR("\007", "\000", "\000", "\000", "options"), UPB_TABVALUE_PTR_INIT(&fields[72]), &strentries[150]},
7975   {UPB_TABKEY_STR("\006", "\000", "\000", "\000", "method"), UPB_TABVALUE_PTR_INIT(&fields[48]), NULL},
7976   {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "name"), UPB_TABVALUE_PTR_INIT(&fields[53]), &strentries[149]},
7977   {UPB_TABKEY_STR("\024", "\000", "\000", "\000", "uninterpreted_option"), UPB_TABVALUE_PTR_INIT(&fields[98]), NULL},
7978   {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "deprecated"), UPB_TABVALUE_PTR_INIT(&fields[15]), NULL},
7979   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7980   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7981   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7982   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7983   {UPB_TABKEY_STR("\010", "\000", "\000", "\000", "location"), UPB_TABVALUE_PTR_INIT(&fields[44]), NULL},
7984   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7985   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7986   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7987   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7988   {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "span"), UPB_TABVALUE_PTR_INIT(&fields[89]), &strentries[167]},
7989   {UPB_TABKEY_STR("\031", "\000", "\000", "\000", "leading_detached_comments"), UPB_TABVALUE_PTR_INIT(&fields[43]), &strentries[165]},
7990   {UPB_TABKEY_STR("\021", "\000", "\000", "\000", "trailing_comments"), UPB_TABVALUE_PTR_INIT(&fields[94]), NULL},
7991   {UPB_TABKEY_STR("\020", "\000", "\000", "\000", "leading_comments"), UPB_TABVALUE_PTR_INIT(&fields[42]), &strentries[164]},
7992   {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "path"), UPB_TABVALUE_PTR_INIT(&fields[78]), NULL},
7993   {UPB_TABKEY_STR("\014", "\000", "\000", "\000", "double_value"), UPB_TABVALUE_PTR_INIT(&fields[16]), NULL},
7994   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7995   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7996   {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "name"), UPB_TABVALUE_PTR_INIT(&fields[49]), NULL},
7997   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7998   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7999   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
8000   {UPB_TABKEY_STR("\022", "\000", "\000", "\000", "negative_int_value"), UPB_TABVALUE_PTR_INIT(&fields[59]), NULL},
8001   {UPB_TABKEY_STR("\017", "\000", "\000", "\000", "aggregate_value"), UPB_TABVALUE_PTR_INIT(&fields[0]), NULL},
8002   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
8003   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
8004   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
8005   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
8006   {UPB_TABKEY_STR("\022", "\000", "\000", "\000", "positive_int_value"), UPB_TABVALUE_PTR_INIT(&fields[81]), NULL},
8007   {UPB_TABKEY_STR("\020", "\000", "\000", "\000", "identifier_value"), UPB_TABVALUE_PTR_INIT(&fields[28]), NULL},
8008   {UPB_TABKEY_STR("\014", "\000", "\000", "\000", "string_value"), UPB_TABVALUE_PTR_INIT(&fields[92]), &strentries[182]},
8009   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
8010   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
8011   {UPB_TABKEY_STR("\014", "\000", "\000", "\000", "is_extension"), UPB_TABVALUE_PTR_INIT(&fields[30]), NULL},
8012   {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "name_part"), UPB_TABVALUE_PTR_INIT(&fields[58]), NULL},
8013   {UPB_TABKEY_STR("\016", "\000", "\000", "\000", "LABEL_REQUIRED"), UPB_TABVALUE_INT_INIT(2), &strentries[190]},
8014   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
8015   {UPB_TABKEY_STR("\016", "\000", "\000", "\000", "LABEL_REPEATED"), UPB_TABVALUE_INT_INIT(3), NULL},
8016   {UPB_TABKEY_STR("\016", "\000", "\000", "\000", "LABEL_OPTIONAL"), UPB_TABVALUE_INT_INIT(1), NULL},
8017   {UPB_TABKEY_STR("\014", "\000", "\000", "\000", "TYPE_FIXED64"), UPB_TABVALUE_INT_INIT(6), NULL},
8018   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
8019   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
8020   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
8021   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
8022   {UPB_TABKEY_STR("\013", "\000", "\000", "\000", "TYPE_STRING"), UPB_TABVALUE_INT_INIT(9), NULL},
8023   {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "TYPE_FLOAT"), UPB_TABVALUE_INT_INIT(2), &strentries[221]},
8024   {UPB_TABKEY_STR("\013", "\000", "\000", "\000", "TYPE_DOUBLE"), UPB_TABVALUE_INT_INIT(1), NULL},
8025   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
8026   {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "TYPE_INT32"), UPB_TABVALUE_INT_INIT(5), NULL},
8027   {UPB_TABKEY_STR("\015", "\000", "\000", "\000", "TYPE_SFIXED32"), UPB_TABVALUE_INT_INIT(15), NULL},
8028   {UPB_TABKEY_STR("\014", "\000", "\000", "\000", "TYPE_FIXED32"), UPB_TABVALUE_INT_INIT(7), NULL},
8029   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
8030   {UPB_TABKEY_STR("\014", "\000", "\000", "\000", "TYPE_MESSAGE"), UPB_TABVALUE_INT_INIT(11), &strentries[222]},
8031   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
8032   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
8033   {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "TYPE_INT64"), UPB_TABVALUE_INT_INIT(3), &strentries[219]},
8034   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
8035   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
8036   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
8037   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
8038   {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "TYPE_ENUM"), UPB_TABVALUE_INT_INIT(14), NULL},
8039   {UPB_TABKEY_STR("\013", "\000", "\000", "\000", "TYPE_UINT32"), UPB_TABVALUE_INT_INIT(13), NULL},
8040   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
8041   {UPB_TABKEY_STR("\013", "\000", "\000", "\000", "TYPE_UINT64"), UPB_TABVALUE_INT_INIT(4), &strentries[218]},
8042   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
8043   {UPB_TABKEY_STR("\015", "\000", "\000", "\000", "TYPE_SFIXED64"), UPB_TABVALUE_INT_INIT(16), NULL},
8044   {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "TYPE_BYTES"), UPB_TABVALUE_INT_INIT(12), NULL},
8045   {UPB_TABKEY_STR("\013", "\000", "\000", "\000", "TYPE_SINT64"), UPB_TABVALUE_INT_INIT(18), NULL},
8046   {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "TYPE_BOOL"), UPB_TABVALUE_INT_INIT(8), NULL},
8047   {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "TYPE_GROUP"), UPB_TABVALUE_INT_INIT(10), NULL},
8048   {UPB_TABKEY_STR("\013", "\000", "\000", "\000", "TYPE_SINT32"), UPB_TABVALUE_INT_INIT(17), NULL},
8049   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
8050   {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "CORD"), UPB_TABVALUE_INT_INIT(1), NULL},
8051   {UPB_TABKEY_STR("\006", "\000", "\000", "\000", "STRING"), UPB_TABVALUE_INT_INIT(0), &strentries[225]},
8052   {UPB_TABKEY_STR("\014", "\000", "\000", "\000", "STRING_PIECE"), UPB_TABVALUE_INT_INIT(2), NULL},
8053   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
8054   {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "JS_NORMAL"), UPB_TABVALUE_INT_INIT(0), NULL},
8055   {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "JS_NUMBER"), UPB_TABVALUE_INT_INIT(2), NULL},
8056   {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "JS_STRING"), UPB_TABVALUE_INT_INIT(1), NULL},
8057   {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "CODE_SIZE"), UPB_TABVALUE_INT_INIT(2), NULL},
8058   {UPB_TABKEY_STR("\005", "\000", "\000", "\000", "SPEED"), UPB_TABVALUE_INT_INIT(1), &strentries[235]},
8059   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
8060   {UPB_TABKEY_STR("\014", "\000", "\000", "\000", "LITE_RUNTIME"), UPB_TABVALUE_INT_INIT(3), NULL},
8061 };
8062 
8063 static const upb_tabent intentries[18] = {
8064   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
8065   {UPB_TABKEY_NUM(999), UPB_TABVALUE_PTR_INIT(&fields[103]), NULL},
8066   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
8067   {UPB_TABKEY_NUM(999), UPB_TABVALUE_PTR_INIT(&fields[102]), NULL},
8068   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
8069   {UPB_TABKEY_NUM(999), UPB_TABVALUE_PTR_INIT(&fields[101]), NULL},
8070   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
8071   {UPB_TABKEY_NUM(999), UPB_TABVALUE_PTR_INIT(&fields[99]), NULL},
8072   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
8073   {UPB_TABKEY_NUM(999), UPB_TABVALUE_PTR_INIT(&fields[97]), NULL},
8074   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
8075   {UPB_TABKEY_NUM(33), UPB_TABVALUE_PTR_INIT(&fields[10]), NULL},
8076   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
8077   {UPB_TABKEY_NUM(999), UPB_TABVALUE_PTR_INIT(&fields[100]), NULL},
8078   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
8079   {UPB_TABKEY_NUM(33), UPB_TABVALUE_PTR_INIT(&fields[15]), NULL},
8080   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
8081   {UPB_TABKEY_NUM(999), UPB_TABVALUE_PTR_INIT(&fields[98]), NULL},
8082 };
8083 
8084 static const upb_tabval arrays[187] = {
8085   UPB_TABVALUE_EMPTY_INIT,
8086   UPB_TABVALUE_PTR_INIT(&fields[57]),
8087   UPB_TABVALUE_PTR_INIT(&fields[25]),
8088   UPB_TABVALUE_PTR_INIT(&fields[60]),
8089   UPB_TABVALUE_PTR_INIT(&fields[20]),
8090   UPB_TABVALUE_PTR_INIT(&fields[24]),
8091   UPB_TABVALUE_PTR_INIT(&fields[22]),
8092   UPB_TABVALUE_PTR_INIT(&fields[68]),
8093   UPB_TABVALUE_PTR_INIT(&fields[65]),
8094   UPB_TABVALUE_PTR_INIT(&fields[85]),
8095   UPB_TABVALUE_PTR_INIT(&fields[84]),
8096   UPB_TABVALUE_EMPTY_INIT,
8097   UPB_TABVALUE_PTR_INIT(&fields[91]),
8098   UPB_TABVALUE_PTR_INIT(&fields[18]),
8099   UPB_TABVALUE_EMPTY_INIT,
8100   UPB_TABVALUE_PTR_INIT(&fields[90]),
8101   UPB_TABVALUE_PTR_INIT(&fields[17]),
8102   UPB_TABVALUE_EMPTY_INIT,
8103   UPB_TABVALUE_PTR_INIT(&fields[52]),
8104   UPB_TABVALUE_PTR_INIT(&fields[104]),
8105   UPB_TABVALUE_PTR_INIT(&fields[73]),
8106   UPB_TABVALUE_EMPTY_INIT,
8107   UPB_TABVALUE_EMPTY_INIT,
8108   UPB_TABVALUE_PTR_INIT(&fields[1]),
8109   UPB_TABVALUE_PTR_INIT(&fields[14]),
8110   UPB_TABVALUE_EMPTY_INIT,
8111   UPB_TABVALUE_PTR_INIT(&fields[50]),
8112   UPB_TABVALUE_PTR_INIT(&fields[63]),
8113   UPB_TABVALUE_PTR_INIT(&fields[74]),
8114   UPB_TABVALUE_EMPTY_INIT,
8115   UPB_TABVALUE_PTR_INIT(&fields[13]),
8116   UPB_TABVALUE_EMPTY_INIT,
8117   UPB_TABVALUE_PTR_INIT(&fields[56]),
8118   UPB_TABVALUE_PTR_INIT(&fields[21]),
8119   UPB_TABVALUE_PTR_INIT(&fields[62]),
8120   UPB_TABVALUE_PTR_INIT(&fields[40]),
8121   UPB_TABVALUE_PTR_INIT(&fields[95]),
8122   UPB_TABVALUE_PTR_INIT(&fields[96]),
8123   UPB_TABVALUE_PTR_INIT(&fields[7]),
8124   UPB_TABVALUE_PTR_INIT(&fields[70]),
8125   UPB_TABVALUE_PTR_INIT(&fields[66]),
8126   UPB_TABVALUE_PTR_INIT(&fields[38]),
8127   UPB_TABVALUE_EMPTY_INIT,
8128   UPB_TABVALUE_PTR_INIT(&fields[6]),
8129   UPB_TABVALUE_PTR_INIT(&fields[77]),
8130   UPB_TABVALUE_PTR_INIT(&fields[9]),
8131   UPB_TABVALUE_EMPTY_INIT,
8132   UPB_TABVALUE_PTR_INIT(&fields[41]),
8133   UPB_TABVALUE_PTR_INIT(&fields[39]),
8134   UPB_TABVALUE_EMPTY_INIT,
8135   UPB_TABVALUE_EMPTY_INIT,
8136   UPB_TABVALUE_EMPTY_INIT,
8137   UPB_TABVALUE_PTR_INIT(&fields[105]),
8138   UPB_TABVALUE_EMPTY_INIT,
8139   UPB_TABVALUE_PTR_INIT(&fields[51]),
8140   UPB_TABVALUE_PTR_INIT(&fields[76]),
8141   UPB_TABVALUE_PTR_INIT(&fields[8]),
8142   UPB_TABVALUE_PTR_INIT(&fields[47]),
8143   UPB_TABVALUE_PTR_INIT(&fields[19]),
8144   UPB_TABVALUE_PTR_INIT(&fields[87]),
8145   UPB_TABVALUE_PTR_INIT(&fields[23]),
8146   UPB_TABVALUE_PTR_INIT(&fields[69]),
8147   UPB_TABVALUE_PTR_INIT(&fields[88]),
8148   UPB_TABVALUE_PTR_INIT(&fields[82]),
8149   UPB_TABVALUE_PTR_INIT(&fields[106]),
8150   UPB_TABVALUE_PTR_INIT(&fields[93]),
8151   UPB_TABVALUE_EMPTY_INIT,
8152   UPB_TABVALUE_PTR_INIT(&fields[26]),
8153   UPB_TABVALUE_EMPTY_INIT,
8154   UPB_TABVALUE_PTR_INIT(&fields[35]),
8155   UPB_TABVALUE_EMPTY_INIT,
8156   UPB_TABVALUE_EMPTY_INIT,
8157   UPB_TABVALUE_EMPTY_INIT,
8158   UPB_TABVALUE_EMPTY_INIT,
8159   UPB_TABVALUE_EMPTY_INIT,
8160   UPB_TABVALUE_EMPTY_INIT,
8161   UPB_TABVALUE_PTR_INIT(&fields[34]),
8162   UPB_TABVALUE_PTR_INIT(&fields[67]),
8163   UPB_TABVALUE_PTR_INIT(&fields[33]),
8164   UPB_TABVALUE_PTR_INIT(&fields[27]),
8165   UPB_TABVALUE_EMPTY_INIT,
8166   UPB_TABVALUE_EMPTY_INIT,
8167   UPB_TABVALUE_EMPTY_INIT,
8168   UPB_TABVALUE_EMPTY_INIT,
8169   UPB_TABVALUE_PTR_INIT(&fields[3]),
8170   UPB_TABVALUE_PTR_INIT(&fields[32]),
8171   UPB_TABVALUE_PTR_INIT(&fields[83]),
8172   UPB_TABVALUE_EMPTY_INIT,
8173   UPB_TABVALUE_PTR_INIT(&fields[31]),
8174   UPB_TABVALUE_EMPTY_INIT,
8175   UPB_TABVALUE_EMPTY_INIT,
8176   UPB_TABVALUE_PTR_INIT(&fields[12]),
8177   UPB_TABVALUE_EMPTY_INIT,
8178   UPB_TABVALUE_EMPTY_INIT,
8179   UPB_TABVALUE_EMPTY_INIT,
8180   UPB_TABVALUE_PTR_INIT(&fields[36]),
8181   UPB_TABVALUE_EMPTY_INIT,
8182   UPB_TABVALUE_EMPTY_INIT,
8183   UPB_TABVALUE_EMPTY_INIT,
8184   UPB_TABVALUE_PTR_INIT(&fields[2]),
8185   UPB_TABVALUE_EMPTY_INIT,
8186   UPB_TABVALUE_EMPTY_INIT,
8187   UPB_TABVALUE_EMPTY_INIT,
8188   UPB_TABVALUE_EMPTY_INIT,
8189   UPB_TABVALUE_PTR_INIT(&fields[64]),
8190   UPB_TABVALUE_PTR_INIT(&fields[5]),
8191   UPB_TABVALUE_PTR_INIT(&fields[37]),
8192   UPB_TABVALUE_EMPTY_INIT,
8193   UPB_TABVALUE_PTR_INIT(&fields[79]),
8194   UPB_TABVALUE_PTR_INIT(&fields[80]),
8195   UPB_TABVALUE_EMPTY_INIT,
8196   UPB_TABVALUE_PTR_INIT(&fields[46]),
8197   UPB_TABVALUE_PTR_INIT(&fields[61]),
8198   UPB_TABVALUE_PTR_INIT(&fields[11]),
8199   UPB_TABVALUE_EMPTY_INIT,
8200   UPB_TABVALUE_EMPTY_INIT,
8201   UPB_TABVALUE_EMPTY_INIT,
8202   UPB_TABVALUE_PTR_INIT(&fields[45]),
8203   UPB_TABVALUE_EMPTY_INIT,
8204   UPB_TABVALUE_PTR_INIT(&fields[55]),
8205   UPB_TABVALUE_PTR_INIT(&fields[29]),
8206   UPB_TABVALUE_PTR_INIT(&fields[75]),
8207   UPB_TABVALUE_PTR_INIT(&fields[71]),
8208   UPB_TABVALUE_PTR_INIT(&fields[4]),
8209   UPB_TABVALUE_PTR_INIT(&fields[86]),
8210   UPB_TABVALUE_EMPTY_INIT,
8211   UPB_TABVALUE_EMPTY_INIT,
8212   UPB_TABVALUE_PTR_INIT(&fields[54]),
8213   UPB_TABVALUE_EMPTY_INIT,
8214   UPB_TABVALUE_PTR_INIT(&fields[53]),
8215   UPB_TABVALUE_PTR_INIT(&fields[48]),
8216   UPB_TABVALUE_PTR_INIT(&fields[72]),
8217   UPB_TABVALUE_EMPTY_INIT,
8218   UPB_TABVALUE_EMPTY_INIT,
8219   UPB_TABVALUE_PTR_INIT(&fields[44]),
8220   UPB_TABVALUE_EMPTY_INIT,
8221   UPB_TABVALUE_PTR_INIT(&fields[78]),
8222   UPB_TABVALUE_PTR_INIT(&fields[89]),
8223   UPB_TABVALUE_PTR_INIT(&fields[42]),
8224   UPB_TABVALUE_PTR_INIT(&fields[94]),
8225   UPB_TABVALUE_EMPTY_INIT,
8226   UPB_TABVALUE_PTR_INIT(&fields[43]),
8227   UPB_TABVALUE_EMPTY_INIT,
8228   UPB_TABVALUE_EMPTY_INIT,
8229   UPB_TABVALUE_PTR_INIT(&fields[49]),
8230   UPB_TABVALUE_PTR_INIT(&fields[28]),
8231   UPB_TABVALUE_PTR_INIT(&fields[81]),
8232   UPB_TABVALUE_PTR_INIT(&fields[59]),
8233   UPB_TABVALUE_PTR_INIT(&fields[16]),
8234   UPB_TABVALUE_PTR_INIT(&fields[92]),
8235   UPB_TABVALUE_PTR_INIT(&fields[0]),
8236   UPB_TABVALUE_EMPTY_INIT,
8237   UPB_TABVALUE_PTR_INIT(&fields[58]),
8238   UPB_TABVALUE_PTR_INIT(&fields[30]),
8239   UPB_TABVALUE_EMPTY_INIT,
8240   UPB_TABVALUE_PTR_INIT("LABEL_OPTIONAL"),
8241   UPB_TABVALUE_PTR_INIT("LABEL_REQUIRED"),
8242   UPB_TABVALUE_PTR_INIT("LABEL_REPEATED"),
8243   UPB_TABVALUE_EMPTY_INIT,
8244   UPB_TABVALUE_PTR_INIT("TYPE_DOUBLE"),
8245   UPB_TABVALUE_PTR_INIT("TYPE_FLOAT"),
8246   UPB_TABVALUE_PTR_INIT("TYPE_INT64"),
8247   UPB_TABVALUE_PTR_INIT("TYPE_UINT64"),
8248   UPB_TABVALUE_PTR_INIT("TYPE_INT32"),
8249   UPB_TABVALUE_PTR_INIT("TYPE_FIXED64"),
8250   UPB_TABVALUE_PTR_INIT("TYPE_FIXED32"),
8251   UPB_TABVALUE_PTR_INIT("TYPE_BOOL"),
8252   UPB_TABVALUE_PTR_INIT("TYPE_STRING"),
8253   UPB_TABVALUE_PTR_INIT("TYPE_GROUP"),
8254   UPB_TABVALUE_PTR_INIT("TYPE_MESSAGE"),
8255   UPB_TABVALUE_PTR_INIT("TYPE_BYTES"),
8256   UPB_TABVALUE_PTR_INIT("TYPE_UINT32"),
8257   UPB_TABVALUE_PTR_INIT("TYPE_ENUM"),
8258   UPB_TABVALUE_PTR_INIT("TYPE_SFIXED32"),
8259   UPB_TABVALUE_PTR_INIT("TYPE_SFIXED64"),
8260   UPB_TABVALUE_PTR_INIT("TYPE_SINT32"),
8261   UPB_TABVALUE_PTR_INIT("TYPE_SINT64"),
8262   UPB_TABVALUE_PTR_INIT("STRING"),
8263   UPB_TABVALUE_PTR_INIT("CORD"),
8264   UPB_TABVALUE_PTR_INIT("STRING_PIECE"),
8265   UPB_TABVALUE_PTR_INIT("JS_NORMAL"),
8266   UPB_TABVALUE_PTR_INIT("JS_STRING"),
8267   UPB_TABVALUE_PTR_INIT("JS_NUMBER"),
8268   UPB_TABVALUE_EMPTY_INIT,
8269   UPB_TABVALUE_PTR_INIT("SPEED"),
8270   UPB_TABVALUE_PTR_INIT("CODE_SIZE"),
8271   UPB_TABVALUE_PTR_INIT("LITE_RUNTIME"),
8272 };
8273 
8274 #ifdef UPB_DEBUG_REFS
8275 static upb_inttable reftables[268] = {
8276   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8277   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8278   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8279   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8280   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8281   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8282   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8283   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8284   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8285   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8286   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8287   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8288   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8289   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8290   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8291   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8292   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8293   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8294   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8295   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8296   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8297   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8298   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8299   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8300   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8301   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8302   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8303   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8304   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8305   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8306   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8307   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8308   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8309   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8310   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8311   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8312   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8313   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8314   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8315   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8316   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8317   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8318   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8319   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8320   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8321   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8322   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8323   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8324   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8325   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8326   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8327   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8328   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8329   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8330   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8331   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8332   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8333   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8334   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8335   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8336   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8337   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8338   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8339   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8340   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8341   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8342   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8343   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8344   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8345   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8346   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8347   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8348   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8349   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8350   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8351   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8352   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8353   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8354   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8355   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8356   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8357   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8358   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8359   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8360   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8361   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8362   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8363   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8364   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8365   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8366   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8367   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8368   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8369   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8370   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8371   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8372   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8373   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8374   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8375   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8376   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8377   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8378   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8379   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8380   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8381   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8382   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8383   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8384   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8385   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8386   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8387   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8388   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8389   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8390   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8391   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8392   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8393   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8394   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8395   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8396   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8397   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8398   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8399   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8400   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8401   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8402   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8403   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8404   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8405   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8406   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8407   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8408   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8409   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8410   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8411   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8412   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8413   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8414   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8415   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8416   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8417   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8418   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8419   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8420   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8421   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8422   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8423   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8424   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8425   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8426   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8427   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8428   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8429   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8430   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8431   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8432   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8433   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8434   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8435   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8436   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8437   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8438   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8439   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8440   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8441   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8442   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8443   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8444   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8445   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8446   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8447   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8448   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8449   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8450   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8451   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8452   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8453   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8454   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8455   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8456   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8457   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8458   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8459   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8460   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8461   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8462   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8463   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8464   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8465   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8466   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8467   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8468   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8469   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8470   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8471   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8472   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8473   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8474   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8475   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8476   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8477   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8478   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8479   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8480   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8481   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8482   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8483   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8484   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8485   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8486   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8487   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8488   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8489   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8490   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8491   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8492   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8493   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8494   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8495   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8496   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8497   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8498   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8499   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8500   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8501   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8502   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8503   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8504   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8505   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8506   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8507   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8508   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8509   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8510   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8511   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8512   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8513   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8514   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8515   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8516   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8517   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8518   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8519   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8520   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8521   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8522   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8523   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8524   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8525   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8526   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8527   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8528   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8529   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8530   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8531   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8532   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8533   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8534   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8535   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8536   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8537   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8538   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8539   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8540   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8541   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8542   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8543   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8544 };
8545 #endif
8546 
refm(const upb_msgdef * m,const void * owner)8547 static const upb_msgdef *refm(const upb_msgdef *m, const void *owner) {
8548   upb_msgdef_ref(m, owner);
8549   return m;
8550 }
8551 
refe(const upb_enumdef * e,const void * owner)8552 static const upb_enumdef *refe(const upb_enumdef *e, const void *owner) {
8553   upb_enumdef_ref(e, owner);
8554   return e;
8555 }
8556 
8557 /* Public API. */
upbdefs_google_protobuf_DescriptorProto_get(const void * owner)8558 const upb_msgdef *upbdefs_google_protobuf_DescriptorProto_get(const void *owner) { return refm(&msgs[0], owner); }
upbdefs_google_protobuf_DescriptorProto_ExtensionRange_get(const void * owner)8559 const upb_msgdef *upbdefs_google_protobuf_DescriptorProto_ExtensionRange_get(const void *owner) { return refm(&msgs[1], owner); }
upbdefs_google_protobuf_DescriptorProto_ReservedRange_get(const void * owner)8560 const upb_msgdef *upbdefs_google_protobuf_DescriptorProto_ReservedRange_get(const void *owner) { return refm(&msgs[2], owner); }
upbdefs_google_protobuf_EnumDescriptorProto_get(const void * owner)8561 const upb_msgdef *upbdefs_google_protobuf_EnumDescriptorProto_get(const void *owner) { return refm(&msgs[3], owner); }
upbdefs_google_protobuf_EnumOptions_get(const void * owner)8562 const upb_msgdef *upbdefs_google_protobuf_EnumOptions_get(const void *owner) { return refm(&msgs[4], owner); }
upbdefs_google_protobuf_EnumValueDescriptorProto_get(const void * owner)8563 const upb_msgdef *upbdefs_google_protobuf_EnumValueDescriptorProto_get(const void *owner) { return refm(&msgs[5], owner); }
upbdefs_google_protobuf_EnumValueOptions_get(const void * owner)8564 const upb_msgdef *upbdefs_google_protobuf_EnumValueOptions_get(const void *owner) { return refm(&msgs[6], owner); }
upbdefs_google_protobuf_FieldDescriptorProto_get(const void * owner)8565 const upb_msgdef *upbdefs_google_protobuf_FieldDescriptorProto_get(const void *owner) { return refm(&msgs[7], owner); }
upbdefs_google_protobuf_FieldOptions_get(const void * owner)8566 const upb_msgdef *upbdefs_google_protobuf_FieldOptions_get(const void *owner) { return refm(&msgs[8], owner); }
upbdefs_google_protobuf_FileDescriptorProto_get(const void * owner)8567 const upb_msgdef *upbdefs_google_protobuf_FileDescriptorProto_get(const void *owner) { return refm(&msgs[9], owner); }
upbdefs_google_protobuf_FileDescriptorSet_get(const void * owner)8568 const upb_msgdef *upbdefs_google_protobuf_FileDescriptorSet_get(const void *owner) { return refm(&msgs[10], owner); }
upbdefs_google_protobuf_FileOptions_get(const void * owner)8569 const upb_msgdef *upbdefs_google_protobuf_FileOptions_get(const void *owner) { return refm(&msgs[11], owner); }
upbdefs_google_protobuf_MessageOptions_get(const void * owner)8570 const upb_msgdef *upbdefs_google_protobuf_MessageOptions_get(const void *owner) { return refm(&msgs[12], owner); }
upbdefs_google_protobuf_MethodDescriptorProto_get(const void * owner)8571 const upb_msgdef *upbdefs_google_protobuf_MethodDescriptorProto_get(const void *owner) { return refm(&msgs[13], owner); }
upbdefs_google_protobuf_MethodOptions_get(const void * owner)8572 const upb_msgdef *upbdefs_google_protobuf_MethodOptions_get(const void *owner) { return refm(&msgs[14], owner); }
upbdefs_google_protobuf_OneofDescriptorProto_get(const void * owner)8573 const upb_msgdef *upbdefs_google_protobuf_OneofDescriptorProto_get(const void *owner) { return refm(&msgs[15], owner); }
upbdefs_google_protobuf_ServiceDescriptorProto_get(const void * owner)8574 const upb_msgdef *upbdefs_google_protobuf_ServiceDescriptorProto_get(const void *owner) { return refm(&msgs[16], owner); }
upbdefs_google_protobuf_ServiceOptions_get(const void * owner)8575 const upb_msgdef *upbdefs_google_protobuf_ServiceOptions_get(const void *owner) { return refm(&msgs[17], owner); }
upbdefs_google_protobuf_SourceCodeInfo_get(const void * owner)8576 const upb_msgdef *upbdefs_google_protobuf_SourceCodeInfo_get(const void *owner) { return refm(&msgs[18], owner); }
upbdefs_google_protobuf_SourceCodeInfo_Location_get(const void * owner)8577 const upb_msgdef *upbdefs_google_protobuf_SourceCodeInfo_Location_get(const void *owner) { return refm(&msgs[19], owner); }
upbdefs_google_protobuf_UninterpretedOption_get(const void * owner)8578 const upb_msgdef *upbdefs_google_protobuf_UninterpretedOption_get(const void *owner) { return refm(&msgs[20], owner); }
upbdefs_google_protobuf_UninterpretedOption_NamePart_get(const void * owner)8579 const upb_msgdef *upbdefs_google_protobuf_UninterpretedOption_NamePart_get(const void *owner) { return refm(&msgs[21], owner); }
8580 
upbdefs_google_protobuf_FieldDescriptorProto_Label_get(const void * owner)8581 const upb_enumdef *upbdefs_google_protobuf_FieldDescriptorProto_Label_get(const void *owner) { return refe(&enums[0], owner); }
upbdefs_google_protobuf_FieldDescriptorProto_Type_get(const void * owner)8582 const upb_enumdef *upbdefs_google_protobuf_FieldDescriptorProto_Type_get(const void *owner) { return refe(&enums[1], owner); }
upbdefs_google_protobuf_FieldOptions_CType_get(const void * owner)8583 const upb_enumdef *upbdefs_google_protobuf_FieldOptions_CType_get(const void *owner) { return refe(&enums[2], owner); }
upbdefs_google_protobuf_FieldOptions_JSType_get(const void * owner)8584 const upb_enumdef *upbdefs_google_protobuf_FieldOptions_JSType_get(const void *owner) { return refe(&enums[3], owner); }
upbdefs_google_protobuf_FileOptions_OptimizeMode_get(const void * owner)8585 const upb_enumdef *upbdefs_google_protobuf_FileOptions_OptimizeMode_get(const void *owner) { return refe(&enums[4], owner); }
8586 /*
8587 ** XXX: The routines in this file that consume a string do not currently
8588 ** support having the string span buffers.  In the future, as upb_sink and
8589 ** its buffering/sharing functionality evolve there should be an easy and
8590 ** idiomatic way of correctly handling this case.  For now, we accept this
8591 ** limitation since we currently only parse descriptors from single strings.
8592 */
8593 
8594 
8595 #include <errno.h>
8596 #include <stdlib.h>
8597 #include <string.h>
8598 
8599 /* Compares a NULL-terminated string with a non-NULL-terminated string. */
upb_streq(const char * str,const char * buf,size_t n)8600 static bool upb_streq(const char *str, const char *buf, size_t n) {
8601   return strlen(str) == n && memcmp(str, buf, n) == 0;
8602 }
8603 
8604 /* We keep a stack of all the messages scopes we are currently in, as well as
8605  * the top-level file scope.  This is necessary to correctly qualify the
8606  * definitions that are contained inside.  "name" tracks the name of the
8607  * message or package (a bare name -- not qualified by any enclosing scopes). */
8608 typedef struct {
8609   char *name;
8610   /* Index of the first def that is under this scope.  For msgdefs, the
8611    * msgdef itself is at start-1. */
8612   int start;
8613   uint32_t oneof_start;
8614   uint32_t oneof_index;
8615 } upb_descreader_frame;
8616 
8617 /* The maximum number of nested declarations that are allowed, ie.
8618  * message Foo {
8619  *   message Bar {
8620  *     message Baz {
8621  *     }
8622  *   }
8623  * }
8624  *
8625  * This is a resource limit that affects how big our runtime stack can grow.
8626  * TODO: make this a runtime-settable property of the Reader instance. */
8627 #define UPB_MAX_MESSAGE_NESTING 64
8628 
8629 struct upb_descreader {
8630   upb_sink sink;
8631   upb_inttable files;
8632   upb_strtable files_by_name;
8633   upb_filedef *file;  /* The last file in files. */
8634   upb_descreader_frame stack[UPB_MAX_MESSAGE_NESTING];
8635   int stack_len;
8636   upb_inttable oneofs;
8637 
8638   uint32_t number;
8639   char *name;
8640   bool saw_number;
8641   bool saw_name;
8642 
8643   char *default_string;
8644 
8645   upb_fielddef *f;
8646 };
8647 
upb_gstrndup(const char * buf,size_t n)8648 static char *upb_gstrndup(const char *buf, size_t n) {
8649   char *ret = upb_gmalloc(n + 1);
8650   if (!ret) return NULL;
8651   memcpy(ret, buf, n);
8652   ret[n] = '\0';
8653   return ret;
8654 }
8655 
8656 /* Returns a newly allocated string that joins input strings together, for
8657  * example:
8658  *   join("Foo.Bar", "Baz") -> "Foo.Bar.Baz"
8659  *   join("", "Baz") -> "Baz"
8660  * Caller owns a ref on the returned string. */
upb_join(const char * base,const char * name)8661 static char *upb_join(const char *base, const char *name) {
8662   if (!base || strlen(base) == 0) {
8663     return upb_gstrdup(name);
8664   } else {
8665     char *ret = upb_gmalloc(strlen(base) + strlen(name) + 2);
8666     if (!ret) {
8667       return NULL;
8668     }
8669     ret[0] = '\0';
8670     strcat(ret, base);
8671     strcat(ret, ".");
8672     strcat(ret, name);
8673     return ret;
8674   }
8675 }
8676 
8677 /* Qualify the defname for all defs starting with offset "start" with "str". */
upb_descreader_qualify(upb_filedef * f,char * str,int32_t start)8678 static bool upb_descreader_qualify(upb_filedef *f, char *str, int32_t start) {
8679   size_t i;
8680   for (i = start; i < upb_filedef_defcount(f); i++) {
8681     upb_def *def = upb_filedef_mutabledef(f, i);
8682     char *name = upb_join(str, upb_def_fullname(def));
8683     if (!name) {
8684       /* Need better logic here; at this point we've qualified some names but
8685        * not others. */
8686       return false;
8687     }
8688     upb_def_setfullname(def, name, NULL);
8689     upb_gfree(name);
8690   }
8691   return true;
8692 }
8693 
8694 
8695 /* upb_descreader  ************************************************************/
8696 
upb_descreader_top(upb_descreader * r)8697 static upb_msgdef *upb_descreader_top(upb_descreader *r) {
8698   int index;
8699   UPB_ASSERT(r->stack_len > 1);
8700   index = r->stack[r->stack_len-1].start - 1;
8701   UPB_ASSERT(index >= 0);
8702   return upb_downcast_msgdef_mutable(upb_filedef_mutabledef(r->file, index));
8703 }
8704 
upb_descreader_last(upb_descreader * r)8705 static upb_def *upb_descreader_last(upb_descreader *r) {
8706   return upb_filedef_mutabledef(r->file, upb_filedef_defcount(r->file) - 1);
8707 }
8708 
8709 /* Start/end handlers for FileDescriptorProto and DescriptorProto (the two
8710  * entities that have names and can contain sub-definitions. */
upb_descreader_startcontainer(upb_descreader * r)8711 void upb_descreader_startcontainer(upb_descreader *r) {
8712   upb_descreader_frame *f = &r->stack[r->stack_len++];
8713   f->start = upb_filedef_defcount(r->file);
8714   f->oneof_start = upb_inttable_count(&r->oneofs);
8715   f->oneof_index = 0;
8716   f->name = NULL;
8717 }
8718 
upb_descreader_endcontainer(upb_descreader * r)8719 bool upb_descreader_endcontainer(upb_descreader *r) {
8720   upb_descreader_frame *f = &r->stack[r->stack_len - 1];
8721 
8722   while (upb_inttable_count(&r->oneofs) > f->oneof_start) {
8723     upb_oneofdef *o = upb_value_getptr(upb_inttable_pop(&r->oneofs));
8724     bool ok = upb_msgdef_addoneof(upb_descreader_top(r), o, &r->oneofs, NULL);
8725     UPB_ASSERT(ok);
8726   }
8727 
8728   if (!upb_descreader_qualify(r->file, f->name, f->start)) {
8729     return false;
8730   }
8731   upb_gfree(f->name);
8732   f->name = NULL;
8733 
8734   r->stack_len--;
8735   return true;
8736 }
8737 
upb_descreader_setscopename(upb_descreader * r,char * str)8738 void upb_descreader_setscopename(upb_descreader *r, char *str) {
8739   upb_descreader_frame *f = &r->stack[r->stack_len-1];
8740   upb_gfree(f->name);
8741   f->name = str;
8742 }
8743 
upb_descreader_getoneof(upb_descreader * r,uint32_t index)8744 static upb_oneofdef *upb_descreader_getoneof(upb_descreader *r,
8745                                              uint32_t index) {
8746   bool found;
8747   upb_value val;
8748   upb_descreader_frame *f = &r->stack[r->stack_len-1];
8749 
8750   /* DescriptorProto messages can be nested, so we will see the nested messages
8751    * between when we see the FieldDescriptorProto and the OneofDescriptorProto.
8752    * We need to preserve the oneofs in between these two things. */
8753   index += f->oneof_start;
8754 
8755   while (upb_inttable_count(&r->oneofs) <= index) {
8756     upb_inttable_push(&r->oneofs, upb_value_ptr(upb_oneofdef_new(&r->oneofs)));
8757   }
8758 
8759   found = upb_inttable_lookup(&r->oneofs, index, &val);
8760   UPB_ASSERT(found);
8761   return upb_value_getptr(val);
8762 }
8763 
8764 /** Handlers for google.protobuf.FileDescriptorSet. ***************************/
8765 
fileset_startfile(void * closure,const void * hd)8766 static void *fileset_startfile(void *closure, const void *hd) {
8767   upb_descreader *r = closure;
8768   UPB_UNUSED(hd);
8769   r->file = upb_filedef_new(&r->files);
8770   upb_inttable_push(&r->files, upb_value_ptr(r->file));
8771   return r;
8772 }
8773 
8774 /** Handlers for google.protobuf.FileDescriptorProto. *************************/
8775 
file_start(void * closure,const void * hd)8776 static bool file_start(void *closure, const void *hd) {
8777   upb_descreader *r = closure;
8778   UPB_UNUSED(hd);
8779   upb_descreader_startcontainer(r);
8780   return true;
8781 }
8782 
file_end(void * closure,const void * hd,upb_status * status)8783 static bool file_end(void *closure, const void *hd, upb_status *status) {
8784   upb_descreader *r = closure;
8785   UPB_UNUSED(hd);
8786   UPB_UNUSED(status);
8787   return upb_descreader_endcontainer(r);
8788 }
8789 
file_onname(void * closure,const void * hd,const char * buf,size_t n,const upb_bufhandle * handle)8790 static size_t file_onname(void *closure, const void *hd, const char *buf,
8791                           size_t n, const upb_bufhandle *handle) {
8792   upb_descreader *r = closure;
8793   char *name;
8794   bool ok;
8795   UPB_UNUSED(hd);
8796   UPB_UNUSED(handle);
8797 
8798   name = upb_gstrndup(buf, n);
8799   upb_strtable_insert(&r->files_by_name, name, upb_value_ptr(r->file));
8800   /* XXX: see comment at the top of the file. */
8801   ok = upb_filedef_setname(r->file, name, NULL);
8802   upb_gfree(name);
8803   UPB_ASSERT(ok);
8804   return n;
8805 }
8806 
file_onpackage(void * closure,const void * hd,const char * buf,size_t n,const upb_bufhandle * handle)8807 static size_t file_onpackage(void *closure, const void *hd, const char *buf,
8808                              size_t n, const upb_bufhandle *handle) {
8809   upb_descreader *r = closure;
8810   char *package;
8811   bool ok;
8812   UPB_UNUSED(hd);
8813   UPB_UNUSED(handle);
8814 
8815   package = upb_gstrndup(buf, n);
8816   /* XXX: see comment at the top of the file. */
8817   upb_descreader_setscopename(r, package);
8818   ok = upb_filedef_setpackage(r->file, package, NULL);
8819   UPB_ASSERT(ok);
8820   return n;
8821 }
8822 
file_startphpnamespace(void * closure,const void * hd,size_t size_hint)8823 static void *file_startphpnamespace(void *closure, const void *hd,
8824                                     size_t size_hint) {
8825   upb_descreader *r = closure;
8826   bool ok;
8827   UPB_UNUSED(hd);
8828   UPB_UNUSED(size_hint);
8829 
8830   ok = upb_filedef_setphpnamespace(r->file, "", NULL);
8831   UPB_ASSERT(ok);
8832   return closure;
8833 }
8834 
file_onphpnamespace(void * closure,const void * hd,const char * buf,size_t n,const upb_bufhandle * handle)8835 static size_t file_onphpnamespace(void *closure, const void *hd,
8836                                   const char *buf, size_t n,
8837                                   const upb_bufhandle *handle) {
8838   upb_descreader *r = closure;
8839   char *php_namespace;
8840   bool ok;
8841   UPB_UNUSED(hd);
8842   UPB_UNUSED(handle);
8843 
8844   php_namespace = upb_gstrndup(buf, n);
8845   ok = upb_filedef_setphpnamespace(r->file, php_namespace, NULL);
8846   upb_gfree(php_namespace);
8847   UPB_ASSERT(ok);
8848   return n;
8849 }
8850 
file_onphpprefix(void * closure,const void * hd,const char * buf,size_t n,const upb_bufhandle * handle)8851 static size_t file_onphpprefix(void *closure, const void *hd, const char *buf,
8852                              size_t n, const upb_bufhandle *handle) {
8853   upb_descreader *r = closure;
8854   char *prefix;
8855   bool ok;
8856   UPB_UNUSED(hd);
8857   UPB_UNUSED(handle);
8858 
8859   prefix = upb_gstrndup(buf, n);
8860   ok = upb_filedef_setphpprefix(r->file, prefix, NULL);
8861   upb_gfree(prefix);
8862   UPB_ASSERT(ok);
8863   return n;
8864 }
8865 
file_onsyntax(void * closure,const void * hd,const char * buf,size_t n,const upb_bufhandle * handle)8866 static size_t file_onsyntax(void *closure, const void *hd, const char *buf,
8867                             size_t n, const upb_bufhandle *handle) {
8868   upb_descreader *r = closure;
8869   bool ok;
8870   UPB_UNUSED(hd);
8871   UPB_UNUSED(handle);
8872   /* XXX: see comment at the top of the file. */
8873   if (upb_streq("proto2", buf, n)) {
8874     ok = upb_filedef_setsyntax(r->file, UPB_SYNTAX_PROTO2, NULL);
8875   } else if (upb_streq("proto3", buf, n)) {
8876     ok = upb_filedef_setsyntax(r->file, UPB_SYNTAX_PROTO3, NULL);
8877   } else {
8878     ok = false;
8879   }
8880 
8881   UPB_ASSERT(ok);
8882   return n;
8883 }
8884 
file_startmsg(void * closure,const void * hd)8885 static void *file_startmsg(void *closure, const void *hd) {
8886   upb_descreader *r = closure;
8887   upb_msgdef *m = upb_msgdef_new(&m);
8888   bool ok = upb_filedef_addmsg(r->file, m, &m, NULL);
8889   UPB_UNUSED(hd);
8890   UPB_ASSERT(ok);
8891   return r;
8892 }
8893 
file_startenum(void * closure,const void * hd)8894 static void *file_startenum(void *closure, const void *hd) {
8895   upb_descreader *r = closure;
8896   upb_enumdef *e = upb_enumdef_new(&e);
8897   bool ok = upb_filedef_addenum(r->file, e, &e, NULL);
8898   UPB_UNUSED(hd);
8899   UPB_ASSERT(ok);
8900   return r;
8901 }
8902 
file_startext(void * closure,const void * hd)8903 static void *file_startext(void *closure, const void *hd) {
8904   upb_descreader *r = closure;
8905   r->f = upb_fielddef_new(r);
8906   UPB_UNUSED(hd);
8907   return r;
8908 }
8909 
file_endext(void * closure,const void * hd)8910 static bool file_endext(void *closure, const void *hd) {
8911   /* The current symtab code can't handle extensions, so we just discard
8912    * them for now. */
8913   upb_descreader *r = closure;
8914   upb_fielddef_unref(r->f, r);
8915   UPB_UNUSED(hd);
8916   r->f = NULL;
8917   return true;
8918 }
8919 
file_ondep(void * closure,const void * hd,const char * buf,size_t n,const upb_bufhandle * handle)8920 static size_t file_ondep(void *closure, const void *hd, const char *buf,
8921                          size_t n, const upb_bufhandle *handle) {
8922   upb_descreader *r = closure;
8923   upb_value val;
8924   if (upb_strtable_lookup2(&r->files_by_name, buf, n, &val)) {
8925     upb_filedef_adddep(r->file, upb_value_getptr(val));
8926   }
8927   UPB_UNUSED(hd);
8928   UPB_UNUSED(handle);
8929   return n;
8930 }
8931 
8932 /** Handlers for google.protobuf.EnumValueDescriptorProto. *********************/
8933 
enumval_startmsg(void * closure,const void * hd)8934 static bool enumval_startmsg(void *closure, const void *hd) {
8935   upb_descreader *r = closure;
8936   UPB_UNUSED(hd);
8937   r->saw_number = false;
8938   r->saw_name = false;
8939   return true;
8940 }
8941 
enumval_onname(void * closure,const void * hd,const char * buf,size_t n,const upb_bufhandle * handle)8942 static size_t enumval_onname(void *closure, const void *hd, const char *buf,
8943                              size_t n, const upb_bufhandle *handle) {
8944   upb_descreader *r = closure;
8945   UPB_UNUSED(hd);
8946   UPB_UNUSED(handle);
8947   /* XXX: see comment at the top of the file. */
8948   upb_gfree(r->name);
8949   r->name = upb_gstrndup(buf, n);
8950   r->saw_name = true;
8951   return n;
8952 }
8953 
enumval_onnumber(void * closure,const void * hd,int32_t val)8954 static bool enumval_onnumber(void *closure, const void *hd, int32_t val) {
8955   upb_descreader *r = closure;
8956   UPB_UNUSED(hd);
8957   r->number = val;
8958   r->saw_number = true;
8959   return true;
8960 }
8961 
enumval_endmsg(void * closure,const void * hd,upb_status * status)8962 static bool enumval_endmsg(void *closure, const void *hd, upb_status *status) {
8963   upb_descreader *r = closure;
8964   upb_enumdef *e;
8965   UPB_UNUSED(hd);
8966 
8967   if(!r->saw_number || !r->saw_name) {
8968     upb_status_seterrmsg(status, "Enum value missing name or number.");
8969     return false;
8970   }
8971   e = upb_downcast_enumdef_mutable(upb_descreader_last(r));
8972   upb_enumdef_addval(e, r->name, r->number, status);
8973   upb_gfree(r->name);
8974   r->name = NULL;
8975   return true;
8976 }
8977 
8978 /** Handlers for google.protobuf.EnumDescriptorProto. *************************/
8979 
enum_endmsg(void * closure,const void * hd,upb_status * status)8980 static bool enum_endmsg(void *closure, const void *hd, upb_status *status) {
8981   upb_descreader *r = closure;
8982   upb_enumdef *e;
8983   UPB_UNUSED(hd);
8984 
8985   e = upb_downcast_enumdef_mutable(upb_descreader_last(r));
8986   if (upb_def_fullname(upb_descreader_last(r)) == NULL) {
8987     upb_status_seterrmsg(status, "Enum had no name.");
8988     return false;
8989   }
8990   if (upb_enumdef_numvals(e) == 0) {
8991     upb_status_seterrmsg(status, "Enum had no values.");
8992     return false;
8993   }
8994   return true;
8995 }
8996 
enum_onname(void * closure,const void * hd,const char * buf,size_t n,const upb_bufhandle * handle)8997 static size_t enum_onname(void *closure, const void *hd, const char *buf,
8998                           size_t n, const upb_bufhandle *handle) {
8999   upb_descreader *r = closure;
9000   char *fullname = upb_gstrndup(buf, n);
9001   UPB_UNUSED(hd);
9002   UPB_UNUSED(handle);
9003   /* XXX: see comment at the top of the file. */
9004   upb_def_setfullname(upb_descreader_last(r), fullname, NULL);
9005   upb_gfree(fullname);
9006   return n;
9007 }
9008 
9009 /** Handlers for google.protobuf.FieldDescriptorProto *************************/
9010 
field_startmsg(void * closure,const void * hd)9011 static bool field_startmsg(void *closure, const void *hd) {
9012   upb_descreader *r = closure;
9013   UPB_UNUSED(hd);
9014   UPB_ASSERT(r->f);
9015   upb_gfree(r->default_string);
9016   r->default_string = NULL;
9017 
9018   /* fielddefs default to packed, but descriptors default to non-packed. */
9019   upb_fielddef_setpacked(r->f, false);
9020   return true;
9021 }
9022 
9023 /* Converts the default value in string "str" into "d".  Passes a ref on str.
9024  * Returns true on success. */
parse_default(char * str,upb_fielddef * f)9025 static bool parse_default(char *str, upb_fielddef *f) {
9026   bool success = true;
9027   char *end;
9028   switch (upb_fielddef_type(f)) {
9029     case UPB_TYPE_INT32: {
9030       long val = strtol(str, &end, 0);
9031       if (val > INT32_MAX || val < INT32_MIN || errno == ERANGE || *end)
9032         success = false;
9033       else
9034         upb_fielddef_setdefaultint32(f, val);
9035       break;
9036     }
9037     case UPB_TYPE_INT64: {
9038       /* XXX: Need to write our own strtoll, since it's not available in c89. */
9039       long long val = strtol(str, &end, 0);
9040       if (val > INT64_MAX || val < INT64_MIN || errno == ERANGE || *end)
9041         success = false;
9042       else
9043         upb_fielddef_setdefaultint64(f, val);
9044       break;
9045     }
9046     case UPB_TYPE_UINT32: {
9047       unsigned long val = strtoul(str, &end, 0);
9048       if (val > UINT32_MAX || errno == ERANGE || *end)
9049         success = false;
9050       else
9051         upb_fielddef_setdefaultuint32(f, val);
9052       break;
9053     }
9054     case UPB_TYPE_UINT64: {
9055       /* XXX: Need to write our own strtoull, since it's not available in c89. */
9056       unsigned long long val = strtoul(str, &end, 0);
9057       if (val > UINT64_MAX || errno == ERANGE || *end)
9058         success = false;
9059       else
9060         upb_fielddef_setdefaultuint64(f, val);
9061       break;
9062     }
9063     case UPB_TYPE_DOUBLE: {
9064       double val = strtod(str, &end);
9065       if (errno == ERANGE || *end)
9066         success = false;
9067       else
9068         upb_fielddef_setdefaultdouble(f, val);
9069       break;
9070     }
9071     case UPB_TYPE_FLOAT: {
9072       /* XXX: Need to write our own strtof, since it's not available in c89. */
9073       float val = strtod(str, &end);
9074       if (errno == ERANGE || *end)
9075         success = false;
9076       else
9077         upb_fielddef_setdefaultfloat(f, val);
9078       break;
9079     }
9080     case UPB_TYPE_BOOL: {
9081       if (strcmp(str, "false") == 0)
9082         upb_fielddef_setdefaultbool(f, false);
9083       else if (strcmp(str, "true") == 0)
9084         upb_fielddef_setdefaultbool(f, true);
9085       else
9086         success = false;
9087       break;
9088     }
9089     default: abort();
9090   }
9091   return success;
9092 }
9093 
field_endmsg(void * closure,const void * hd,upb_status * status)9094 static bool field_endmsg(void *closure, const void *hd, upb_status *status) {
9095   upb_descreader *r = closure;
9096   upb_fielddef *f = r->f;
9097   UPB_UNUSED(hd);
9098 
9099   /* TODO: verify that all required fields were present. */
9100   UPB_ASSERT(upb_fielddef_number(f) != 0);
9101   UPB_ASSERT(upb_fielddef_name(f) != NULL);
9102   UPB_ASSERT((upb_fielddef_subdefname(f) != NULL) == upb_fielddef_hassubdef(f));
9103 
9104   if (r->default_string) {
9105     if (upb_fielddef_issubmsg(f)) {
9106       upb_status_seterrmsg(status, "Submessages cannot have defaults.");
9107       return false;
9108     }
9109     if (upb_fielddef_isstring(f) || upb_fielddef_type(f) == UPB_TYPE_ENUM) {
9110       upb_fielddef_setdefaultcstr(f, r->default_string, NULL);
9111     } else {
9112       if (r->default_string && !parse_default(r->default_string, f)) {
9113         /* We don't worry too much about giving a great error message since the
9114          * compiler should have ensured this was correct. */
9115         upb_status_seterrmsg(status, "Error converting default value.");
9116         return false;
9117       }
9118     }
9119   }
9120   return true;
9121 }
9122 
field_onlazy(void * closure,const void * hd,bool val)9123 static bool field_onlazy(void *closure, const void *hd, bool val) {
9124   upb_descreader *r = closure;
9125   UPB_UNUSED(hd);
9126 
9127   upb_fielddef_setlazy(r->f, val);
9128   return true;
9129 }
9130 
field_onpacked(void * closure,const void * hd,bool val)9131 static bool field_onpacked(void *closure, const void *hd, bool val) {
9132   upb_descreader *r = closure;
9133   UPB_UNUSED(hd);
9134 
9135   upb_fielddef_setpacked(r->f, val);
9136   return true;
9137 }
9138 
field_ontype(void * closure,const void * hd,int32_t val)9139 static bool field_ontype(void *closure, const void *hd, int32_t val) {
9140   upb_descreader *r = closure;
9141   UPB_UNUSED(hd);
9142 
9143   upb_fielddef_setdescriptortype(r->f, val);
9144   return true;
9145 }
9146 
field_onlabel(void * closure,const void * hd,int32_t val)9147 static bool field_onlabel(void *closure, const void *hd, int32_t val) {
9148   upb_descreader *r = closure;
9149   UPB_UNUSED(hd);
9150 
9151   upb_fielddef_setlabel(r->f, val);
9152   return true;
9153 }
9154 
field_onnumber(void * closure,const void * hd,int32_t val)9155 static bool field_onnumber(void *closure, const void *hd, int32_t val) {
9156   upb_descreader *r = closure;
9157   bool ok;
9158   UPB_UNUSED(hd);
9159 
9160   ok = upb_fielddef_setnumber(r->f, val, NULL);
9161   UPB_ASSERT(ok);
9162   return true;
9163 }
9164 
field_onname(void * closure,const void * hd,const char * buf,size_t n,const upb_bufhandle * handle)9165 static size_t field_onname(void *closure, const void *hd, const char *buf,
9166                            size_t n, const upb_bufhandle *handle) {
9167   upb_descreader *r = closure;
9168   char *name = upb_gstrndup(buf, n);
9169   UPB_UNUSED(hd);
9170   UPB_UNUSED(handle);
9171 
9172   /* XXX: see comment at the top of the file. */
9173   upb_fielddef_setname(r->f, name, NULL);
9174   upb_gfree(name);
9175   return n;
9176 }
9177 
field_ontypename(void * closure,const void * hd,const char * buf,size_t n,const upb_bufhandle * handle)9178 static size_t field_ontypename(void *closure, const void *hd, const char *buf,
9179                                size_t n, const upb_bufhandle *handle) {
9180   upb_descreader *r = closure;
9181   char *name = upb_gstrndup(buf, n);
9182   UPB_UNUSED(hd);
9183   UPB_UNUSED(handle);
9184 
9185   /* XXX: see comment at the top of the file. */
9186   upb_fielddef_setsubdefname(r->f, name, NULL);
9187   upb_gfree(name);
9188   return n;
9189 }
9190 
field_onextendee(void * closure,const void * hd,const char * buf,size_t n,const upb_bufhandle * handle)9191 static size_t field_onextendee(void *closure, const void *hd, const char *buf,
9192                                size_t n, const upb_bufhandle *handle) {
9193   upb_descreader *r = closure;
9194   char *name = upb_gstrndup(buf, n);
9195   UPB_UNUSED(hd);
9196   UPB_UNUSED(handle);
9197 
9198   /* XXX: see comment at the top of the file. */
9199   upb_fielddef_setcontainingtypename(r->f, name, NULL);
9200   upb_gfree(name);
9201   return n;
9202 }
9203 
field_ondefaultval(void * closure,const void * hd,const char * buf,size_t n,const upb_bufhandle * handle)9204 static size_t field_ondefaultval(void *closure, const void *hd, const char *buf,
9205                                  size_t n, const upb_bufhandle *handle) {
9206   upb_descreader *r = closure;
9207   UPB_UNUSED(hd);
9208   UPB_UNUSED(handle);
9209 
9210   /* Have to convert from string to the correct type, but we might not know the
9211    * type yet, so we save it as a string until the end of the field.
9212    * XXX: see comment at the top of the file. */
9213   upb_gfree(r->default_string);
9214   r->default_string = upb_gstrndup(buf, n);
9215   return n;
9216 }
9217 
field_ononeofindex(void * closure,const void * hd,int32_t index)9218 static bool field_ononeofindex(void *closure, const void *hd, int32_t index) {
9219   upb_descreader *r = closure;
9220   upb_oneofdef *o = upb_descreader_getoneof(r, index);
9221   bool ok = upb_oneofdef_addfield(o, r->f, &r->f, NULL);
9222   UPB_UNUSED(hd);
9223 
9224   UPB_ASSERT(ok);
9225   return true;
9226 }
9227 
9228 /** Handlers for google.protobuf.OneofDescriptorProto. ************************/
9229 
oneof_name(void * closure,const void * hd,const char * buf,size_t n,const upb_bufhandle * handle)9230 static size_t oneof_name(void *closure, const void *hd, const char *buf,
9231                          size_t n, const upb_bufhandle *handle) {
9232   upb_descreader *r = closure;
9233   upb_descreader_frame *f = &r->stack[r->stack_len-1];
9234   upb_oneofdef *o = upb_descreader_getoneof(r, f->oneof_index++);
9235   char *name_null_terminated = upb_gstrndup(buf, n);
9236   bool ok = upb_oneofdef_setname(o, name_null_terminated, NULL);
9237   UPB_UNUSED(hd);
9238   UPB_UNUSED(handle);
9239 
9240   UPB_ASSERT(ok);
9241   free(name_null_terminated);
9242   return n;
9243 }
9244 
9245 /** Handlers for google.protobuf.DescriptorProto ******************************/
9246 
msg_start(void * closure,const void * hd)9247 static bool msg_start(void *closure, const void *hd) {
9248   upb_descreader *r = closure;
9249   UPB_UNUSED(hd);
9250 
9251   upb_descreader_startcontainer(r);
9252   return true;
9253 }
9254 
msg_end(void * closure,const void * hd,upb_status * status)9255 static bool msg_end(void *closure, const void *hd, upb_status *status) {
9256   upb_descreader *r = closure;
9257   upb_msgdef *m = upb_descreader_top(r);
9258   UPB_UNUSED(hd);
9259 
9260   if(!upb_def_fullname(upb_msgdef_upcast_mutable(m))) {
9261     upb_status_seterrmsg(status, "Encountered message with no name.");
9262     return false;
9263   }
9264   return upb_descreader_endcontainer(r);
9265 }
9266 
msg_name(void * closure,const void * hd,const char * buf,size_t n,const upb_bufhandle * handle)9267 static size_t msg_name(void *closure, const void *hd, const char *buf,
9268                        size_t n, const upb_bufhandle *handle) {
9269   upb_descreader *r = closure;
9270   upb_msgdef *m = upb_descreader_top(r);
9271   /* XXX: see comment at the top of the file. */
9272   char *name = upb_gstrndup(buf, n);
9273   UPB_UNUSED(hd);
9274   UPB_UNUSED(handle);
9275 
9276   upb_def_setfullname(upb_msgdef_upcast_mutable(m), name, NULL);
9277   upb_descreader_setscopename(r, name);  /* Passes ownership of name. */
9278 
9279   return n;
9280 }
9281 
msg_startmsg(void * closure,const void * hd)9282 static void *msg_startmsg(void *closure, const void *hd) {
9283   upb_descreader *r = closure;
9284   upb_msgdef *m = upb_msgdef_new(&m);
9285   bool ok = upb_filedef_addmsg(r->file, m, &m, NULL);
9286   UPB_UNUSED(hd);
9287   UPB_ASSERT(ok);
9288   return r;
9289 }
9290 
msg_startext(void * closure,const void * hd)9291 static void *msg_startext(void *closure, const void *hd) {
9292   upb_descreader *r = closure;
9293   r->f = upb_fielddef_new(r);
9294   UPB_UNUSED(hd);
9295   return r;
9296 }
9297 
msg_endext(void * closure,const void * hd)9298 static bool msg_endext(void *closure, const void *hd) {
9299   /* The current symtab code can't handle extensions, so we just discard
9300    * them for now. */
9301   upb_descreader *r = closure;
9302   upb_fielddef_unref(r->f, r);
9303   UPB_UNUSED(hd);
9304   r->f = NULL;
9305   return true;
9306 }
9307 
msg_startfield(void * closure,const void * hd)9308 static void *msg_startfield(void *closure, const void *hd) {
9309   upb_descreader *r = closure;
9310   r->f = upb_fielddef_new(&r->f);
9311   /* We can't add the new field to the message until its name/number are
9312    * filled in. */
9313   UPB_UNUSED(hd);
9314   return r;
9315 }
9316 
msg_endfield(void * closure,const void * hd)9317 static bool msg_endfield(void *closure, const void *hd) {
9318   upb_descreader *r = closure;
9319   upb_msgdef *m = upb_descreader_top(r);
9320   bool ok;
9321   UPB_UNUSED(hd);
9322 
9323   /* Oneof fields are added to the msgdef through their oneof, so don't need to
9324    * be added here. */
9325   if (upb_fielddef_containingoneof(r->f) == NULL) {
9326     ok = upb_msgdef_addfield(m, r->f, &r->f, NULL);
9327     UPB_ASSERT(ok);
9328   }
9329   r->f = NULL;
9330   return true;
9331 }
9332 
msg_onmapentry(void * closure,const void * hd,bool mapentry)9333 static bool msg_onmapentry(void *closure, const void *hd, bool mapentry) {
9334   upb_descreader *r = closure;
9335   upb_msgdef *m = upb_descreader_top(r);
9336   UPB_UNUSED(hd);
9337 
9338   upb_msgdef_setmapentry(m, mapentry);
9339   r->f = NULL;
9340   return true;
9341 }
9342 
9343 
9344 
9345 /** Code to register handlers *************************************************/
9346 
9347 #define F(msg, field) upbdefs_google_protobuf_ ## msg ## _f_ ## field(m)
9348 
reghandlers(const void * closure,upb_handlers * h)9349 static void reghandlers(const void *closure, upb_handlers *h) {
9350   const upb_msgdef *m = upb_handlers_msgdef(h);
9351   UPB_UNUSED(closure);
9352 
9353   if (upbdefs_google_protobuf_FileDescriptorSet_is(m)) {
9354     upb_handlers_setstartsubmsg(h, F(FileDescriptorSet, file),
9355                                 &fileset_startfile, NULL);
9356   } else if (upbdefs_google_protobuf_DescriptorProto_is(m)) {
9357     upb_handlers_setstartmsg(h, &msg_start, NULL);
9358     upb_handlers_setendmsg(h, &msg_end, NULL);
9359     upb_handlers_setstring(h, F(DescriptorProto, name), &msg_name, NULL);
9360     upb_handlers_setstartsubmsg(h, F(DescriptorProto, extension), &msg_startext,
9361                                 NULL);
9362     upb_handlers_setendsubmsg(h, F(DescriptorProto, extension), &msg_endext,
9363                               NULL);
9364     upb_handlers_setstartsubmsg(h, F(DescriptorProto, nested_type),
9365                                 &msg_startmsg, NULL);
9366     upb_handlers_setstartsubmsg(h, F(DescriptorProto, field),
9367                                 &msg_startfield, NULL);
9368     upb_handlers_setendsubmsg(h, F(DescriptorProto, field),
9369                               &msg_endfield, NULL);
9370     upb_handlers_setstartsubmsg(h, F(DescriptorProto, enum_type),
9371                                 &file_startenum, NULL);
9372   } else if (upbdefs_google_protobuf_FileDescriptorProto_is(m)) {
9373     upb_handlers_setstartmsg(h, &file_start, NULL);
9374     upb_handlers_setendmsg(h, &file_end, NULL);
9375     upb_handlers_setstring(h, F(FileDescriptorProto, name), &file_onname,
9376                            NULL);
9377     upb_handlers_setstring(h, F(FileDescriptorProto, package), &file_onpackage,
9378                            NULL);
9379     upb_handlers_setstring(h, F(FileDescriptorProto, syntax), &file_onsyntax,
9380                            NULL);
9381     upb_handlers_setstartsubmsg(h, F(FileDescriptorProto, message_type),
9382                                 &file_startmsg, NULL);
9383     upb_handlers_setstartsubmsg(h, F(FileDescriptorProto, enum_type),
9384                                 &file_startenum, NULL);
9385     upb_handlers_setstartsubmsg(h, F(FileDescriptorProto, extension),
9386                                 &file_startext, NULL);
9387     upb_handlers_setendsubmsg(h, F(FileDescriptorProto, extension),
9388                               &file_endext, NULL);
9389     upb_handlers_setstring(h, F(FileDescriptorProto, dependency),
9390                            &file_ondep, NULL);
9391   } else if (upbdefs_google_protobuf_EnumValueDescriptorProto_is(m)) {
9392     upb_handlers_setstartmsg(h, &enumval_startmsg, NULL);
9393     upb_handlers_setendmsg(h, &enumval_endmsg, NULL);
9394     upb_handlers_setstring(h, F(EnumValueDescriptorProto, name), &enumval_onname, NULL);
9395     upb_handlers_setint32(h, F(EnumValueDescriptorProto, number), &enumval_onnumber,
9396                           NULL);
9397   } else if (upbdefs_google_protobuf_EnumDescriptorProto_is(m)) {
9398     upb_handlers_setendmsg(h, &enum_endmsg, NULL);
9399     upb_handlers_setstring(h, F(EnumDescriptorProto, name), &enum_onname, NULL);
9400   } else if (upbdefs_google_protobuf_FieldDescriptorProto_is(m)) {
9401     upb_handlers_setstartmsg(h, &field_startmsg, NULL);
9402     upb_handlers_setendmsg(h, &field_endmsg, NULL);
9403     upb_handlers_setint32(h, F(FieldDescriptorProto, type), &field_ontype,
9404                           NULL);
9405     upb_handlers_setint32(h, F(FieldDescriptorProto, label), &field_onlabel,
9406                           NULL);
9407     upb_handlers_setint32(h, F(FieldDescriptorProto, number), &field_onnumber,
9408                           NULL);
9409     upb_handlers_setstring(h, F(FieldDescriptorProto, name), &field_onname,
9410                            NULL);
9411     upb_handlers_setstring(h, F(FieldDescriptorProto, type_name),
9412                            &field_ontypename, NULL);
9413     upb_handlers_setstring(h, F(FieldDescriptorProto, extendee),
9414                            &field_onextendee, NULL);
9415     upb_handlers_setstring(h, F(FieldDescriptorProto, default_value),
9416                            &field_ondefaultval, NULL);
9417     upb_handlers_setint32(h, F(FieldDescriptorProto, oneof_index),
9418                           &field_ononeofindex, NULL);
9419   } else if (upbdefs_google_protobuf_OneofDescriptorProto_is(m)) {
9420     upb_handlers_setstring(h, F(OneofDescriptorProto, name), &oneof_name, NULL);
9421   } else if (upbdefs_google_protobuf_FieldOptions_is(m)) {
9422     upb_handlers_setbool(h, F(FieldOptions, lazy), &field_onlazy, NULL);
9423     upb_handlers_setbool(h, F(FieldOptions, packed), &field_onpacked, NULL);
9424   } else if (upbdefs_google_protobuf_MessageOptions_is(m)) {
9425     upb_handlers_setbool(h, F(MessageOptions, map_entry), &msg_onmapentry, NULL);
9426   } else if (upbdefs_google_protobuf_FileOptions_is(m)) {
9427     upb_handlers_setstring(h, F(FileOptions, php_class_prefix),
9428                            &file_onphpprefix, NULL);
9429     upb_handlers_setstartstr(h, F(FileOptions, php_namespace),
9430                              &file_startphpnamespace, NULL);
9431     upb_handlers_setstring(h, F(FileOptions, php_namespace),
9432                            &file_onphpnamespace, NULL);
9433   }
9434 
9435   UPB_ASSERT(upb_ok(upb_handlers_status(h)));
9436 }
9437 
9438 #undef F
9439 
descreader_cleanup(void * _r)9440 void descreader_cleanup(void *_r) {
9441   upb_descreader *r = _r;
9442   size_t i;
9443 
9444   for (i = 0; i < upb_descreader_filecount(r); i++) {
9445     upb_filedef_unref(upb_descreader_file(r, i), &r->files);
9446   }
9447 
9448   upb_gfree(r->name);
9449   upb_inttable_uninit(&r->files);
9450   upb_strtable_uninit(&r->files_by_name);
9451   upb_inttable_uninit(&r->oneofs);
9452   upb_gfree(r->default_string);
9453   while (r->stack_len > 0) {
9454     upb_descreader_frame *f = &r->stack[--r->stack_len];
9455     upb_gfree(f->name);
9456   }
9457 }
9458 
9459 
9460 /* Public API  ****************************************************************/
9461 
upb_descreader_create(upb_env * e,const upb_handlers * h)9462 upb_descreader *upb_descreader_create(upb_env *e, const upb_handlers *h) {
9463   upb_descreader *r = upb_env_malloc(e, sizeof(upb_descreader));
9464   if (!r || !upb_env_addcleanup(e, descreader_cleanup, r)) {
9465     return NULL;
9466   }
9467 
9468   upb_inttable_init(&r->files, UPB_CTYPE_PTR);
9469   upb_strtable_init(&r->files_by_name, UPB_CTYPE_PTR);
9470   upb_inttable_init(&r->oneofs, UPB_CTYPE_PTR);
9471   upb_sink_reset(upb_descreader_input(r), h, r);
9472   r->stack_len = 0;
9473   r->name = NULL;
9474   r->default_string = NULL;
9475 
9476   return r;
9477 }
9478 
upb_descreader_filecount(const upb_descreader * r)9479 size_t upb_descreader_filecount(const upb_descreader *r) {
9480   return upb_inttable_count(&r->files);
9481 }
9482 
upb_descreader_file(const upb_descreader * r,size_t i)9483 upb_filedef *upb_descreader_file(const upb_descreader *r, size_t i) {
9484   upb_value v;
9485   if (upb_inttable_lookup(&r->files, i, &v)) {
9486     return upb_value_getptr(v);
9487   } else {
9488     return NULL;
9489   }
9490 }
9491 
upb_descreader_input(upb_descreader * r)9492 upb_sink *upb_descreader_input(upb_descreader *r) {
9493   return &r->sink;
9494 }
9495 
upb_descreader_newhandlers(const void * owner)9496 const upb_handlers *upb_descreader_newhandlers(const void *owner) {
9497   const upb_msgdef *m = upbdefs_google_protobuf_FileDescriptorSet_get(&m);
9498   const upb_handlers *h = upb_handlers_newfrozen(m, owner, reghandlers, NULL);
9499   upb_msgdef_unref(m, &m);
9500   return h;
9501 }
9502 /*
9503 ** protobuf decoder bytecode compiler
9504 **
9505 ** Code to compile a upb::Handlers into bytecode for decoding a protobuf
9506 ** according to that specific schema and destination handlers.
9507 **
9508 ** Compiling to bytecode is always the first step.  If we are using the
9509 ** interpreted decoder we leave it as bytecode and interpret that.  If we are
9510 ** using a JIT decoder we use a code generator to turn the bytecode into native
9511 ** code, LLVM IR, etc.
9512 **
9513 ** Bytecode definition is in decoder.int.h.
9514 */
9515 
9516 #include <stdarg.h>
9517 
9518 #ifdef UPB_DUMP_BYTECODE
9519 #include <stdio.h>
9520 #endif
9521 
9522 #define MAXLABEL 5
9523 #define EMPTYLABEL -1
9524 
9525 /* mgroup *********************************************************************/
9526 
freegroup(upb_refcounted * r)9527 static void freegroup(upb_refcounted *r) {
9528   mgroup *g = (mgroup*)r;
9529   upb_inttable_uninit(&g->methods);
9530 #ifdef UPB_USE_JIT_X64
9531   upb_pbdecoder_freejit(g);
9532 #endif
9533   upb_gfree(g->bytecode);
9534   upb_gfree(g);
9535 }
9536 
visitgroup(const upb_refcounted * r,upb_refcounted_visit * visit,void * closure)9537 static void visitgroup(const upb_refcounted *r, upb_refcounted_visit *visit,
9538                        void *closure) {
9539   const mgroup *g = (const mgroup*)r;
9540   upb_inttable_iter i;
9541   upb_inttable_begin(&i, &g->methods);
9542   for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
9543     upb_pbdecodermethod *method = upb_value_getptr(upb_inttable_iter_value(&i));
9544     visit(r, upb_pbdecodermethod_upcast(method), closure);
9545   }
9546 }
9547 
newgroup(const void * owner)9548 mgroup *newgroup(const void *owner) {
9549   mgroup *g = upb_gmalloc(sizeof(*g));
9550   static const struct upb_refcounted_vtbl vtbl = {visitgroup, freegroup};
9551   upb_refcounted_init(mgroup_upcast_mutable(g), &vtbl, owner);
9552   upb_inttable_init(&g->methods, UPB_CTYPE_PTR);
9553   g->bytecode = NULL;
9554   g->bytecode_end = NULL;
9555   return g;
9556 }
9557 
9558 
9559 /* upb_pbdecodermethod ********************************************************/
9560 
freemethod(upb_refcounted * r)9561 static void freemethod(upb_refcounted *r) {
9562   upb_pbdecodermethod *method = (upb_pbdecodermethod*)r;
9563 
9564   if (method->dest_handlers_) {
9565     upb_handlers_unref(method->dest_handlers_, method);
9566   }
9567 
9568   upb_inttable_uninit(&method->dispatch);
9569   upb_gfree(method);
9570 }
9571 
visitmethod(const upb_refcounted * r,upb_refcounted_visit * visit,void * closure)9572 static void visitmethod(const upb_refcounted *r, upb_refcounted_visit *visit,
9573                         void *closure) {
9574   const upb_pbdecodermethod *m = (const upb_pbdecodermethod*)r;
9575   visit(r, m->group, closure);
9576 }
9577 
newmethod(const upb_handlers * dest_handlers,mgroup * group)9578 static upb_pbdecodermethod *newmethod(const upb_handlers *dest_handlers,
9579                                       mgroup *group) {
9580   static const struct upb_refcounted_vtbl vtbl = {visitmethod, freemethod};
9581   upb_pbdecodermethod *ret = upb_gmalloc(sizeof(*ret));
9582   upb_refcounted_init(upb_pbdecodermethod_upcast_mutable(ret), &vtbl, &ret);
9583   upb_byteshandler_init(&ret->input_handler_);
9584 
9585   /* The method references the group and vice-versa, in a circular reference. */
9586   upb_ref2(ret, group);
9587   upb_ref2(group, ret);
9588   upb_inttable_insertptr(&group->methods, dest_handlers, upb_value_ptr(ret));
9589   upb_pbdecodermethod_unref(ret, &ret);
9590 
9591   ret->group = mgroup_upcast_mutable(group);
9592   ret->dest_handlers_ = dest_handlers;
9593   ret->is_native_ = false;  /* If we JIT, it will update this later. */
9594   upb_inttable_init(&ret->dispatch, UPB_CTYPE_UINT64);
9595 
9596   if (ret->dest_handlers_) {
9597     upb_handlers_ref(ret->dest_handlers_, ret);
9598   }
9599   return ret;
9600 }
9601 
upb_pbdecodermethod_desthandlers(const upb_pbdecodermethod * m)9602 const upb_handlers *upb_pbdecodermethod_desthandlers(
9603     const upb_pbdecodermethod *m) {
9604   return m->dest_handlers_;
9605 }
9606 
upb_pbdecodermethod_inputhandler(const upb_pbdecodermethod * m)9607 const upb_byteshandler *upb_pbdecodermethod_inputhandler(
9608     const upb_pbdecodermethod *m) {
9609   return &m->input_handler_;
9610 }
9611 
upb_pbdecodermethod_isnative(const upb_pbdecodermethod * m)9612 bool upb_pbdecodermethod_isnative(const upb_pbdecodermethod *m) {
9613   return m->is_native_;
9614 }
9615 
upb_pbdecodermethod_new(const upb_pbdecodermethodopts * opts,const void * owner)9616 const upb_pbdecodermethod *upb_pbdecodermethod_new(
9617     const upb_pbdecodermethodopts *opts, const void *owner) {
9618   const upb_pbdecodermethod *ret;
9619   upb_pbcodecache cache;
9620 
9621   upb_pbcodecache_init(&cache);
9622   ret = upb_pbcodecache_getdecodermethod(&cache, opts);
9623   upb_pbdecodermethod_ref(ret, owner);
9624   upb_pbcodecache_uninit(&cache);
9625   return ret;
9626 }
9627 
9628 
9629 /* bytecode compiler **********************************************************/
9630 
9631 /* Data used only at compilation time. */
9632 typedef struct {
9633   mgroup *group;
9634 
9635   uint32_t *pc;
9636   int fwd_labels[MAXLABEL];
9637   int back_labels[MAXLABEL];
9638 
9639   /* For fields marked "lazy", parse them lazily or eagerly? */
9640   bool lazy;
9641 } compiler;
9642 
newcompiler(mgroup * group,bool lazy)9643 static compiler *newcompiler(mgroup *group, bool lazy) {
9644   compiler *ret = upb_gmalloc(sizeof(*ret));
9645   int i;
9646 
9647   ret->group = group;
9648   ret->lazy = lazy;
9649   for (i = 0; i < MAXLABEL; i++) {
9650     ret->fwd_labels[i] = EMPTYLABEL;
9651     ret->back_labels[i] = EMPTYLABEL;
9652   }
9653   return ret;
9654 }
9655 
freecompiler(compiler * c)9656 static void freecompiler(compiler *c) {
9657   upb_gfree(c);
9658 }
9659 
9660 const size_t ptr_words = sizeof(void*) / sizeof(uint32_t);
9661 
9662 /* How many words an instruction is. */
instruction_len(uint32_t instr)9663 static int instruction_len(uint32_t instr) {
9664   switch (getop(instr)) {
9665     case OP_SETDISPATCH: return 1 + ptr_words;
9666     case OP_TAGN: return 3;
9667     case OP_SETBIGGROUPNUM: return 2;
9668     default: return 1;
9669   }
9670 }
9671 
op_has_longofs(int32_t instruction)9672 bool op_has_longofs(int32_t instruction) {
9673   switch (getop(instruction)) {
9674     case OP_CALL:
9675     case OP_BRANCH:
9676     case OP_CHECKDELIM:
9677       return true;
9678     /* The "tag" instructions only have 8 bytes available for the jump target,
9679      * but that is ok because these opcodes only require short jumps. */
9680     case OP_TAG1:
9681     case OP_TAG2:
9682     case OP_TAGN:
9683       return false;
9684     default:
9685       UPB_ASSERT(false);
9686       return false;
9687   }
9688 }
9689 
getofs(uint32_t instruction)9690 static int32_t getofs(uint32_t instruction) {
9691   if (op_has_longofs(instruction)) {
9692     return (int32_t)instruction >> 8;
9693   } else {
9694     return (int8_t)(instruction >> 8);
9695   }
9696 }
9697 
setofs(uint32_t * instruction,int32_t ofs)9698 static void setofs(uint32_t *instruction, int32_t ofs) {
9699   if (op_has_longofs(*instruction)) {
9700     *instruction = getop(*instruction) | ofs << 8;
9701   } else {
9702     *instruction = (*instruction & ~0xff00) | ((ofs & 0xff) << 8);
9703   }
9704   UPB_ASSERT(getofs(*instruction) == ofs);  /* Would fail in cases of overflow. */
9705 }
9706 
pcofs(compiler * c)9707 static uint32_t pcofs(compiler *c) { return c->pc - c->group->bytecode; }
9708 
9709 /* Defines a local label at the current PC location.  All previous forward
9710  * references are updated to point to this location.  The location is noted
9711  * for any future backward references. */
label(compiler * c,unsigned int label)9712 static void label(compiler *c, unsigned int label) {
9713   int val;
9714   uint32_t *codep;
9715 
9716   UPB_ASSERT(label < MAXLABEL);
9717   val = c->fwd_labels[label];
9718   codep = (val == EMPTYLABEL) ? NULL : c->group->bytecode + val;
9719   while (codep) {
9720     int ofs = getofs(*codep);
9721     setofs(codep, c->pc - codep - instruction_len(*codep));
9722     codep = ofs ? codep + ofs : NULL;
9723   }
9724   c->fwd_labels[label] = EMPTYLABEL;
9725   c->back_labels[label] = pcofs(c);
9726 }
9727 
9728 /* Creates a reference to a numbered label; either a forward reference
9729  * (positive arg) or backward reference (negative arg).  For forward references
9730  * the value returned now is actually a "next" pointer into a linked list of all
9731  * instructions that use this label and will be patched later when the label is
9732  * defined with label().
9733  *
9734  * The returned value is the offset that should be written into the instruction.
9735  */
labelref(compiler * c,int label)9736 static int32_t labelref(compiler *c, int label) {
9737   UPB_ASSERT(label < MAXLABEL);
9738   if (label == LABEL_DISPATCH) {
9739     /* No resolving required. */
9740     return 0;
9741   } else if (label < 0) {
9742     /* Backward local label.  Relative to the next instruction. */
9743     uint32_t from = (c->pc + 1) - c->group->bytecode;
9744     return c->back_labels[-label] - from;
9745   } else {
9746     /* Forward local label: prepend to (possibly-empty) linked list. */
9747     int *lptr = &c->fwd_labels[label];
9748     int32_t ret = (*lptr == EMPTYLABEL) ? 0 : *lptr - pcofs(c);
9749     *lptr = pcofs(c);
9750     return ret;
9751   }
9752 }
9753 
put32(compiler * c,uint32_t v)9754 static void put32(compiler *c, uint32_t v) {
9755   mgroup *g = c->group;
9756   if (c->pc == g->bytecode_end) {
9757     int ofs = pcofs(c);
9758     size_t oldsize = g->bytecode_end - g->bytecode;
9759     size_t newsize = UPB_MAX(oldsize * 2, 64);
9760     /* TODO(haberman): handle OOM. */
9761     g->bytecode = upb_grealloc(g->bytecode, oldsize * sizeof(uint32_t),
9762                                             newsize * sizeof(uint32_t));
9763     g->bytecode_end = g->bytecode + newsize;
9764     c->pc = g->bytecode + ofs;
9765   }
9766   *c->pc++ = v;
9767 }
9768 
putop(compiler * c,int op,...)9769 static void putop(compiler *c, int op, ...) {
9770   va_list ap;
9771   va_start(ap, op);
9772 
9773   switch (op) {
9774     case OP_SETDISPATCH: {
9775       uintptr_t ptr = (uintptr_t)va_arg(ap, void*);
9776       put32(c, OP_SETDISPATCH);
9777       put32(c, ptr);
9778       if (sizeof(uintptr_t) > sizeof(uint32_t))
9779         put32(c, (uint64_t)ptr >> 32);
9780       break;
9781     }
9782     case OP_STARTMSG:
9783     case OP_ENDMSG:
9784     case OP_PUSHLENDELIM:
9785     case OP_POP:
9786     case OP_SETDELIM:
9787     case OP_HALT:
9788     case OP_RET:
9789     case OP_DISPATCH:
9790       put32(c, op);
9791       break;
9792     case OP_PARSE_DOUBLE:
9793     case OP_PARSE_FLOAT:
9794     case OP_PARSE_INT64:
9795     case OP_PARSE_UINT64:
9796     case OP_PARSE_INT32:
9797     case OP_PARSE_FIXED64:
9798     case OP_PARSE_FIXED32:
9799     case OP_PARSE_BOOL:
9800     case OP_PARSE_UINT32:
9801     case OP_PARSE_SFIXED32:
9802     case OP_PARSE_SFIXED64:
9803     case OP_PARSE_SINT32:
9804     case OP_PARSE_SINT64:
9805     case OP_STARTSEQ:
9806     case OP_ENDSEQ:
9807     case OP_STARTSUBMSG:
9808     case OP_ENDSUBMSG:
9809     case OP_STARTSTR:
9810     case OP_STRING:
9811     case OP_ENDSTR:
9812     case OP_PUSHTAGDELIM:
9813       put32(c, op | va_arg(ap, upb_selector_t) << 8);
9814       break;
9815     case OP_SETBIGGROUPNUM:
9816       put32(c, op);
9817       put32(c, va_arg(ap, int));
9818       break;
9819     case OP_CALL: {
9820       const upb_pbdecodermethod *method = va_arg(ap, upb_pbdecodermethod *);
9821       put32(c, op | (method->code_base.ofs - (pcofs(c) + 1)) << 8);
9822       break;
9823     }
9824     case OP_CHECKDELIM:
9825     case OP_BRANCH: {
9826       uint32_t instruction = op;
9827       int label = va_arg(ap, int);
9828       setofs(&instruction, labelref(c, label));
9829       put32(c, instruction);
9830       break;
9831     }
9832     case OP_TAG1:
9833     case OP_TAG2: {
9834       int label = va_arg(ap, int);
9835       uint64_t tag = va_arg(ap, uint64_t);
9836       uint32_t instruction = op | (tag << 16);
9837       UPB_ASSERT(tag <= 0xffff);
9838       setofs(&instruction, labelref(c, label));
9839       put32(c, instruction);
9840       break;
9841     }
9842     case OP_TAGN: {
9843       int label = va_arg(ap, int);
9844       uint64_t tag = va_arg(ap, uint64_t);
9845       uint32_t instruction = op | (upb_value_size(tag) << 16);
9846       setofs(&instruction, labelref(c, label));
9847       put32(c, instruction);
9848       put32(c, tag);
9849       put32(c, tag >> 32);
9850       break;
9851     }
9852   }
9853 
9854   va_end(ap);
9855 }
9856 
9857 #if defined(UPB_USE_JIT_X64) || defined(UPB_DUMP_BYTECODE)
9858 
upb_pbdecoder_getopname(unsigned int op)9859 const char *upb_pbdecoder_getopname(unsigned int op) {
9860 #define QUOTE(x) #x
9861 #define EXPAND_AND_QUOTE(x) QUOTE(x)
9862 #define OPNAME(x) OP_##x
9863 #define OP(x) case OPNAME(x): return EXPAND_AND_QUOTE(OPNAME(x));
9864 #define T(x) OP(PARSE_##x)
9865   /* Keep in sync with list in decoder.int.h. */
9866   switch ((opcode)op) {
9867     T(DOUBLE) T(FLOAT) T(INT64) T(UINT64) T(INT32) T(FIXED64) T(FIXED32)
9868     T(BOOL) T(UINT32) T(SFIXED32) T(SFIXED64) T(SINT32) T(SINT64)
9869     OP(STARTMSG) OP(ENDMSG) OP(STARTSEQ) OP(ENDSEQ) OP(STARTSUBMSG)
9870     OP(ENDSUBMSG) OP(STARTSTR) OP(STRING) OP(ENDSTR) OP(CALL) OP(RET)
9871     OP(PUSHLENDELIM) OP(PUSHTAGDELIM) OP(SETDELIM) OP(CHECKDELIM)
9872     OP(BRANCH) OP(TAG1) OP(TAG2) OP(TAGN) OP(SETDISPATCH) OP(POP)
9873     OP(SETBIGGROUPNUM) OP(DISPATCH) OP(HALT)
9874   }
9875   return "<unknown op>";
9876 #undef OP
9877 #undef T
9878 }
9879 
9880 #endif
9881 
9882 #ifdef UPB_DUMP_BYTECODE
9883 
dumpbc(uint32_t * p,uint32_t * end,FILE * f)9884 static void dumpbc(uint32_t *p, uint32_t *end, FILE *f) {
9885 
9886   uint32_t *begin = p;
9887 
9888   while (p < end) {
9889     fprintf(f, "%p  %8tx", p, p - begin);
9890     uint32_t instr = *p++;
9891     uint8_t op = getop(instr);
9892     fprintf(f, " %s", upb_pbdecoder_getopname(op));
9893     switch ((opcode)op) {
9894       case OP_SETDISPATCH: {
9895         const upb_inttable *dispatch;
9896         memcpy(&dispatch, p, sizeof(void*));
9897         p += ptr_words;
9898         const upb_pbdecodermethod *method =
9899             (void *)((char *)dispatch -
9900                      offsetof(upb_pbdecodermethod, dispatch));
9901         fprintf(f, " %s", upb_msgdef_fullname(
9902                               upb_handlers_msgdef(method->dest_handlers_)));
9903         break;
9904       }
9905       case OP_DISPATCH:
9906       case OP_STARTMSG:
9907       case OP_ENDMSG:
9908       case OP_PUSHLENDELIM:
9909       case OP_POP:
9910       case OP_SETDELIM:
9911       case OP_HALT:
9912       case OP_RET:
9913         break;
9914       case OP_PARSE_DOUBLE:
9915       case OP_PARSE_FLOAT:
9916       case OP_PARSE_INT64:
9917       case OP_PARSE_UINT64:
9918       case OP_PARSE_INT32:
9919       case OP_PARSE_FIXED64:
9920       case OP_PARSE_FIXED32:
9921       case OP_PARSE_BOOL:
9922       case OP_PARSE_UINT32:
9923       case OP_PARSE_SFIXED32:
9924       case OP_PARSE_SFIXED64:
9925       case OP_PARSE_SINT32:
9926       case OP_PARSE_SINT64:
9927       case OP_STARTSEQ:
9928       case OP_ENDSEQ:
9929       case OP_STARTSUBMSG:
9930       case OP_ENDSUBMSG:
9931       case OP_STARTSTR:
9932       case OP_STRING:
9933       case OP_ENDSTR:
9934       case OP_PUSHTAGDELIM:
9935         fprintf(f, " %d", instr >> 8);
9936         break;
9937       case OP_SETBIGGROUPNUM:
9938         fprintf(f, " %d", *p++);
9939         break;
9940       case OP_CHECKDELIM:
9941       case OP_CALL:
9942       case OP_BRANCH:
9943         fprintf(f, " =>0x%tx", p + getofs(instr) - begin);
9944         break;
9945       case OP_TAG1:
9946       case OP_TAG2: {
9947         fprintf(f, " tag:0x%x", instr >> 16);
9948         if (getofs(instr)) {
9949           fprintf(f, " =>0x%tx", p + getofs(instr) - begin);
9950         }
9951         break;
9952       }
9953       case OP_TAGN: {
9954         uint64_t tag = *p++;
9955         tag |= (uint64_t)*p++ << 32;
9956         fprintf(f, " tag:0x%llx", (long long)tag);
9957         fprintf(f, " n:%d", instr >> 16);
9958         if (getofs(instr)) {
9959           fprintf(f, " =>0x%tx", p + getofs(instr) - begin);
9960         }
9961         break;
9962       }
9963     }
9964     fputs("\n", f);
9965   }
9966 }
9967 
9968 #endif
9969 
get_encoded_tag(const upb_fielddef * f,int wire_type)9970 static uint64_t get_encoded_tag(const upb_fielddef *f, int wire_type) {
9971   uint32_t tag = (upb_fielddef_number(f) << 3) | wire_type;
9972   uint64_t encoded_tag = upb_vencode32(tag);
9973   /* No tag should be greater than 5 bytes. */
9974   UPB_ASSERT(encoded_tag <= 0xffffffffff);
9975   return encoded_tag;
9976 }
9977 
putchecktag(compiler * c,const upb_fielddef * f,int wire_type,int dest)9978 static void putchecktag(compiler *c, const upb_fielddef *f,
9979                         int wire_type, int dest) {
9980   uint64_t tag = get_encoded_tag(f, wire_type);
9981   switch (upb_value_size(tag)) {
9982     case 1:
9983       putop(c, OP_TAG1, dest, tag);
9984       break;
9985     case 2:
9986       putop(c, OP_TAG2, dest, tag);
9987       break;
9988     default:
9989       putop(c, OP_TAGN, dest, tag);
9990       break;
9991   }
9992 }
9993 
getsel(const upb_fielddef * f,upb_handlertype_t type)9994 static upb_selector_t getsel(const upb_fielddef *f, upb_handlertype_t type) {
9995   upb_selector_t selector;
9996   bool ok = upb_handlers_getselector(f, type, &selector);
9997   UPB_ASSERT(ok);
9998   return selector;
9999 }
10000 
10001 /* Takes an existing, primary dispatch table entry and repacks it with a
10002  * different alternate wire type.  Called when we are inserting a secondary
10003  * dispatch table entry for an alternate wire type. */
repack(uint64_t dispatch,int new_wt2)10004 static uint64_t repack(uint64_t dispatch, int new_wt2) {
10005   uint64_t ofs;
10006   uint8_t wt1;
10007   uint8_t old_wt2;
10008   upb_pbdecoder_unpackdispatch(dispatch, &ofs, &wt1, &old_wt2);
10009   UPB_ASSERT(old_wt2 == NO_WIRE_TYPE);  /* wt2 should not be set yet. */
10010   return upb_pbdecoder_packdispatch(ofs, wt1, new_wt2);
10011 }
10012 
10013 /* Marks the current bytecode position as the dispatch target for this message,
10014  * field, and wire type. */
dispatchtarget(compiler * c,upb_pbdecodermethod * method,const upb_fielddef * f,int wire_type)10015 static void dispatchtarget(compiler *c, upb_pbdecodermethod *method,
10016                            const upb_fielddef *f, int wire_type) {
10017   /* Offset is relative to msg base. */
10018   uint64_t ofs = pcofs(c) - method->code_base.ofs;
10019   uint32_t fn = upb_fielddef_number(f);
10020   upb_inttable *d = &method->dispatch;
10021   upb_value v;
10022   if (upb_inttable_remove(d, fn, &v)) {
10023     /* TODO: prioritize based on packed setting in .proto file. */
10024     uint64_t repacked = repack(upb_value_getuint64(v), wire_type);
10025     upb_inttable_insert(d, fn, upb_value_uint64(repacked));
10026     upb_inttable_insert(d, fn + UPB_MAX_FIELDNUMBER, upb_value_uint64(ofs));
10027   } else {
10028     uint64_t val = upb_pbdecoder_packdispatch(ofs, wire_type, NO_WIRE_TYPE);
10029     upb_inttable_insert(d, fn, upb_value_uint64(val));
10030   }
10031 }
10032 
putpush(compiler * c,const upb_fielddef * f)10033 static void putpush(compiler *c, const upb_fielddef *f) {
10034   if (upb_fielddef_descriptortype(f) == UPB_DESCRIPTOR_TYPE_MESSAGE) {
10035     putop(c, OP_PUSHLENDELIM);
10036   } else {
10037     uint32_t fn = upb_fielddef_number(f);
10038     if (fn >= 1 << 24) {
10039       putop(c, OP_PUSHTAGDELIM, 0);
10040       putop(c, OP_SETBIGGROUPNUM, fn);
10041     } else {
10042       putop(c, OP_PUSHTAGDELIM, fn);
10043     }
10044   }
10045 }
10046 
find_submethod(const compiler * c,const upb_pbdecodermethod * method,const upb_fielddef * f)10047 static upb_pbdecodermethod *find_submethod(const compiler *c,
10048                                            const upb_pbdecodermethod *method,
10049                                            const upb_fielddef *f) {
10050   const upb_handlers *sub =
10051       upb_handlers_getsubhandlers(method->dest_handlers_, f);
10052   upb_value v;
10053   return upb_inttable_lookupptr(&c->group->methods, sub, &v)
10054              ? upb_value_getptr(v)
10055              : NULL;
10056 }
10057 
putsel(compiler * c,opcode op,upb_selector_t sel,const upb_handlers * h)10058 static void putsel(compiler *c, opcode op, upb_selector_t sel,
10059                    const upb_handlers *h) {
10060   if (upb_handlers_gethandler(h, sel)) {
10061     putop(c, op, sel);
10062   }
10063 }
10064 
10065 /* Puts an opcode to call a callback, but only if a callback actually exists for
10066  * this field and handler type. */
maybeput(compiler * c,opcode op,const upb_handlers * h,const upb_fielddef * f,upb_handlertype_t type)10067 static void maybeput(compiler *c, opcode op, const upb_handlers *h,
10068                      const upb_fielddef *f, upb_handlertype_t type) {
10069   putsel(c, op, getsel(f, type), h);
10070 }
10071 
haslazyhandlers(const upb_handlers * h,const upb_fielddef * f)10072 static bool haslazyhandlers(const upb_handlers *h, const upb_fielddef *f) {
10073   if (!upb_fielddef_lazy(f))
10074     return false;
10075 
10076   return upb_handlers_gethandler(h, getsel(f, UPB_HANDLER_STARTSTR)) ||
10077          upb_handlers_gethandler(h, getsel(f, UPB_HANDLER_STRING)) ||
10078          upb_handlers_gethandler(h, getsel(f, UPB_HANDLER_ENDSTR));
10079 }
10080 
10081 
10082 /* bytecode compiler code generation ******************************************/
10083 
10084 /* Symbolic names for our local labels. */
10085 #define LABEL_LOOPSTART 1  /* Top of a repeated field loop. */
10086 #define LABEL_LOOPBREAK 2  /* To jump out of a repeated loop */
10087 #define LABEL_FIELD     3  /* Jump backward to find the most recent field. */
10088 #define LABEL_ENDMSG    4  /* To reach the OP_ENDMSG instr for this msg. */
10089 
10090 /* Generates bytecode to parse a single non-lazy message field. */
generate_msgfield(compiler * c,const upb_fielddef * f,upb_pbdecodermethod * method)10091 static void generate_msgfield(compiler *c, const upb_fielddef *f,
10092                               upb_pbdecodermethod *method) {
10093   const upb_handlers *h = upb_pbdecodermethod_desthandlers(method);
10094   const upb_pbdecodermethod *sub_m = find_submethod(c, method, f);
10095   int wire_type;
10096 
10097   if (!sub_m) {
10098     /* Don't emit any code for this field at all; it will be parsed as an
10099      * unknown field.
10100      *
10101      * TODO(haberman): we should change this to parse it as a string field
10102      * instead.  It will probably be faster, but more importantly, once we
10103      * start vending unknown fields, a field shouldn't be treated as unknown
10104      * just because it doesn't have subhandlers registered. */
10105     return;
10106   }
10107 
10108   label(c, LABEL_FIELD);
10109 
10110   wire_type =
10111       (upb_fielddef_descriptortype(f) == UPB_DESCRIPTOR_TYPE_MESSAGE)
10112           ? UPB_WIRE_TYPE_DELIMITED
10113           : UPB_WIRE_TYPE_START_GROUP;
10114 
10115   if (upb_fielddef_isseq(f)) {
10116     putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
10117     putchecktag(c, f, wire_type, LABEL_DISPATCH);
10118    dispatchtarget(c, method, f, wire_type);
10119     putop(c, OP_PUSHTAGDELIM, 0);
10120     putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ));
10121    label(c, LABEL_LOOPSTART);
10122     putpush(c, f);
10123     putop(c, OP_STARTSUBMSG, getsel(f, UPB_HANDLER_STARTSUBMSG));
10124     putop(c, OP_CALL, sub_m);
10125     putop(c, OP_POP);
10126     maybeput(c, OP_ENDSUBMSG, h, f, UPB_HANDLER_ENDSUBMSG);
10127     if (wire_type == UPB_WIRE_TYPE_DELIMITED) {
10128       putop(c, OP_SETDELIM);
10129     }
10130     putop(c, OP_CHECKDELIM, LABEL_LOOPBREAK);
10131     putchecktag(c, f, wire_type, LABEL_LOOPBREAK);
10132     putop(c, OP_BRANCH, -LABEL_LOOPSTART);
10133    label(c, LABEL_LOOPBREAK);
10134     putop(c, OP_POP);
10135     maybeput(c, OP_ENDSEQ, h, f, UPB_HANDLER_ENDSEQ);
10136   } else {
10137     putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
10138     putchecktag(c, f, wire_type, LABEL_DISPATCH);
10139    dispatchtarget(c, method, f, wire_type);
10140     putpush(c, f);
10141     putop(c, OP_STARTSUBMSG, getsel(f, UPB_HANDLER_STARTSUBMSG));
10142     putop(c, OP_CALL, sub_m);
10143     putop(c, OP_POP);
10144     maybeput(c, OP_ENDSUBMSG, h, f, UPB_HANDLER_ENDSUBMSG);
10145     if (wire_type == UPB_WIRE_TYPE_DELIMITED) {
10146       putop(c, OP_SETDELIM);
10147     }
10148   }
10149 }
10150 
10151 /* Generates bytecode to parse a single string or lazy submessage field. */
generate_delimfield(compiler * c,const upb_fielddef * f,upb_pbdecodermethod * method)10152 static void generate_delimfield(compiler *c, const upb_fielddef *f,
10153                                 upb_pbdecodermethod *method) {
10154   const upb_handlers *h = upb_pbdecodermethod_desthandlers(method);
10155 
10156   label(c, LABEL_FIELD);
10157   if (upb_fielddef_isseq(f)) {
10158     putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
10159     putchecktag(c, f, UPB_WIRE_TYPE_DELIMITED, LABEL_DISPATCH);
10160    dispatchtarget(c, method, f, UPB_WIRE_TYPE_DELIMITED);
10161     putop(c, OP_PUSHTAGDELIM, 0);
10162     putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ));
10163    label(c, LABEL_LOOPSTART);
10164     putop(c, OP_PUSHLENDELIM);
10165     putop(c, OP_STARTSTR, getsel(f, UPB_HANDLER_STARTSTR));
10166     /* Need to emit even if no handler to skip past the string. */
10167     putop(c, OP_STRING, getsel(f, UPB_HANDLER_STRING));
10168     maybeput(c, OP_ENDSTR, h, f, UPB_HANDLER_ENDSTR);
10169     putop(c, OP_POP);
10170     putop(c, OP_SETDELIM);
10171     putop(c, OP_CHECKDELIM, LABEL_LOOPBREAK);
10172     putchecktag(c, f, UPB_WIRE_TYPE_DELIMITED, LABEL_LOOPBREAK);
10173     putop(c, OP_BRANCH, -LABEL_LOOPSTART);
10174    label(c, LABEL_LOOPBREAK);
10175     putop(c, OP_POP);
10176     maybeput(c, OP_ENDSEQ, h, f, UPB_HANDLER_ENDSEQ);
10177   } else {
10178     putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
10179     putchecktag(c, f, UPB_WIRE_TYPE_DELIMITED, LABEL_DISPATCH);
10180    dispatchtarget(c, method, f, UPB_WIRE_TYPE_DELIMITED);
10181     putop(c, OP_PUSHLENDELIM);
10182     putop(c, OP_STARTSTR, getsel(f, UPB_HANDLER_STARTSTR));
10183     putop(c, OP_STRING, getsel(f, UPB_HANDLER_STRING));
10184     maybeput(c, OP_ENDSTR, h, f, UPB_HANDLER_ENDSTR);
10185     putop(c, OP_POP);
10186     putop(c, OP_SETDELIM);
10187   }
10188 }
10189 
10190 /* Generates bytecode to parse a single primitive field. */
generate_primitivefield(compiler * c,const upb_fielddef * f,upb_pbdecodermethod * method)10191 static void generate_primitivefield(compiler *c, const upb_fielddef *f,
10192                                     upb_pbdecodermethod *method) {
10193   const upb_handlers *h = upb_pbdecodermethod_desthandlers(method);
10194   upb_descriptortype_t descriptor_type = upb_fielddef_descriptortype(f);
10195   opcode parse_type;
10196   upb_selector_t sel;
10197   int wire_type;
10198 
10199   label(c, LABEL_FIELD);
10200 
10201   /* From a decoding perspective, ENUM is the same as INT32. */
10202   if (descriptor_type == UPB_DESCRIPTOR_TYPE_ENUM)
10203     descriptor_type = UPB_DESCRIPTOR_TYPE_INT32;
10204 
10205   parse_type = (opcode)descriptor_type;
10206 
10207   /* TODO(haberman): generate packed or non-packed first depending on "packed"
10208    * setting in the fielddef.  This will favor (in speed) whichever was
10209    * specified. */
10210 
10211   UPB_ASSERT((int)parse_type >= 0 && parse_type <= OP_MAX);
10212   sel = getsel(f, upb_handlers_getprimitivehandlertype(f));
10213   wire_type = upb_pb_native_wire_types[upb_fielddef_descriptortype(f)];
10214   if (upb_fielddef_isseq(f)) {
10215     putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
10216     putchecktag(c, f, UPB_WIRE_TYPE_DELIMITED, LABEL_DISPATCH);
10217    dispatchtarget(c, method, f, UPB_WIRE_TYPE_DELIMITED);
10218     putop(c, OP_PUSHLENDELIM);
10219     putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ));  /* Packed */
10220    label(c, LABEL_LOOPSTART);
10221     putop(c, parse_type, sel);
10222     putop(c, OP_CHECKDELIM, LABEL_LOOPBREAK);
10223     putop(c, OP_BRANCH, -LABEL_LOOPSTART);
10224    dispatchtarget(c, method, f, wire_type);
10225     putop(c, OP_PUSHTAGDELIM, 0);
10226     putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ));  /* Non-packed */
10227    label(c, LABEL_LOOPSTART);
10228     putop(c, parse_type, sel);
10229     putop(c, OP_CHECKDELIM, LABEL_LOOPBREAK);
10230     putchecktag(c, f, wire_type, LABEL_LOOPBREAK);
10231     putop(c, OP_BRANCH, -LABEL_LOOPSTART);
10232    label(c, LABEL_LOOPBREAK);
10233     putop(c, OP_POP);  /* Packed and non-packed join. */
10234     maybeput(c, OP_ENDSEQ, h, f, UPB_HANDLER_ENDSEQ);
10235     putop(c, OP_SETDELIM);  /* Could remove for non-packed by dup ENDSEQ. */
10236   } else {
10237     putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
10238     putchecktag(c, f, wire_type, LABEL_DISPATCH);
10239    dispatchtarget(c, method, f, wire_type);
10240     putop(c, parse_type, sel);
10241   }
10242 }
10243 
10244 /* Adds bytecode for parsing the given message to the given decoderplan,
10245  * while adding all dispatch targets to this message's dispatch table. */
compile_method(compiler * c,upb_pbdecodermethod * method)10246 static void compile_method(compiler *c, upb_pbdecodermethod *method) {
10247   const upb_handlers *h;
10248   const upb_msgdef *md;
10249   uint32_t* start_pc;
10250   upb_msg_field_iter i;
10251   upb_value val;
10252 
10253   UPB_ASSERT(method);
10254 
10255   /* Clear all entries in the dispatch table. */
10256   upb_inttable_uninit(&method->dispatch);
10257   upb_inttable_init(&method->dispatch, UPB_CTYPE_UINT64);
10258 
10259   h = upb_pbdecodermethod_desthandlers(method);
10260   md = upb_handlers_msgdef(h);
10261 
10262  method->code_base.ofs = pcofs(c);
10263   putop(c, OP_SETDISPATCH, &method->dispatch);
10264   putsel(c, OP_STARTMSG, UPB_STARTMSG_SELECTOR, h);
10265  label(c, LABEL_FIELD);
10266   start_pc = c->pc;
10267   for(upb_msg_field_begin(&i, md);
10268       !upb_msg_field_done(&i);
10269       upb_msg_field_next(&i)) {
10270     const upb_fielddef *f = upb_msg_iter_field(&i);
10271     upb_fieldtype_t type = upb_fielddef_type(f);
10272 
10273     if (type == UPB_TYPE_MESSAGE && !(haslazyhandlers(h, f) && c->lazy)) {
10274       generate_msgfield(c, f, method);
10275     } else if (type == UPB_TYPE_STRING || type == UPB_TYPE_BYTES ||
10276                type == UPB_TYPE_MESSAGE) {
10277       generate_delimfield(c, f, method);
10278     } else {
10279       generate_primitivefield(c, f, method);
10280     }
10281   }
10282 
10283   /* If there were no fields, or if no handlers were defined, we need to
10284    * generate a non-empty loop body so that we can at least dispatch for unknown
10285    * fields and check for the end of the message. */
10286   if (c->pc == start_pc) {
10287     /* Check for end-of-message. */
10288     putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
10289     /* Unconditionally dispatch. */
10290     putop(c, OP_DISPATCH, 0);
10291   }
10292 
10293   /* For now we just loop back to the last field of the message (or if none,
10294    * the DISPATCH opcode for the message). */
10295   putop(c, OP_BRANCH, -LABEL_FIELD);
10296 
10297   /* Insert both a label and a dispatch table entry for this end-of-msg. */
10298  label(c, LABEL_ENDMSG);
10299   val = upb_value_uint64(pcofs(c) - method->code_base.ofs);
10300   upb_inttable_insert(&method->dispatch, DISPATCH_ENDMSG, val);
10301 
10302   putsel(c, OP_ENDMSG, UPB_ENDMSG_SELECTOR, h);
10303   putop(c, OP_RET);
10304 
10305   upb_inttable_compact(&method->dispatch);
10306 }
10307 
10308 /* Populate "methods" with new upb_pbdecodermethod objects reachable from "h".
10309  * Returns the method for these handlers.
10310  *
10311  * Generates a new method for every destination handlers reachable from "h". */
find_methods(compiler * c,const upb_handlers * h)10312 static void find_methods(compiler *c, const upb_handlers *h) {
10313   upb_value v;
10314   upb_msg_field_iter i;
10315   const upb_msgdef *md;
10316 
10317   if (upb_inttable_lookupptr(&c->group->methods, h, &v))
10318     return;
10319   newmethod(h, c->group);
10320 
10321   /* Find submethods. */
10322   md = upb_handlers_msgdef(h);
10323   for(upb_msg_field_begin(&i, md);
10324       !upb_msg_field_done(&i);
10325       upb_msg_field_next(&i)) {
10326     const upb_fielddef *f = upb_msg_iter_field(&i);
10327     const upb_handlers *sub_h;
10328     if (upb_fielddef_type(f) == UPB_TYPE_MESSAGE &&
10329         (sub_h = upb_handlers_getsubhandlers(h, f)) != NULL) {
10330       /* We only generate a decoder method for submessages with handlers.
10331        * Others will be parsed as unknown fields. */
10332       find_methods(c, sub_h);
10333     }
10334   }
10335 }
10336 
10337 /* (Re-)compile bytecode for all messages in "msgs."
10338  * Overwrites any existing bytecode in "c". */
compile_methods(compiler * c)10339 static void compile_methods(compiler *c) {
10340   upb_inttable_iter i;
10341 
10342   /* Start over at the beginning of the bytecode. */
10343   c->pc = c->group->bytecode;
10344 
10345   upb_inttable_begin(&i, &c->group->methods);
10346   for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
10347     upb_pbdecodermethod *method = upb_value_getptr(upb_inttable_iter_value(&i));
10348     compile_method(c, method);
10349   }
10350 }
10351 
set_bytecode_handlers(mgroup * g)10352 static void set_bytecode_handlers(mgroup *g) {
10353   upb_inttable_iter i;
10354   upb_inttable_begin(&i, &g->methods);
10355   for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
10356     upb_pbdecodermethod *m = upb_value_getptr(upb_inttable_iter_value(&i));
10357     upb_byteshandler *h = &m->input_handler_;
10358 
10359     m->code_base.ptr = g->bytecode + m->code_base.ofs;
10360 
10361     upb_byteshandler_setstartstr(h, upb_pbdecoder_startbc, m->code_base.ptr);
10362     upb_byteshandler_setstring(h, upb_pbdecoder_decode, g);
10363     upb_byteshandler_setendstr(h, upb_pbdecoder_end, m);
10364   }
10365 }
10366 
10367 
10368 /* JIT setup. *****************************************************************/
10369 
10370 #ifdef UPB_USE_JIT_X64
10371 
sethandlers(mgroup * g,bool allowjit)10372 static void sethandlers(mgroup *g, bool allowjit) {
10373   g->jit_code = NULL;
10374   if (allowjit) {
10375     /* Compile byte-code into machine code, create handlers. */
10376     upb_pbdecoder_jit(g);
10377   } else {
10378     set_bytecode_handlers(g);
10379   }
10380 }
10381 
10382 #else  /* UPB_USE_JIT_X64 */
10383 
sethandlers(mgroup * g,bool allowjit)10384 static void sethandlers(mgroup *g, bool allowjit) {
10385   /* No JIT compiled in; use bytecode handlers unconditionally. */
10386   UPB_UNUSED(allowjit);
10387   set_bytecode_handlers(g);
10388 }
10389 
10390 #endif  /* UPB_USE_JIT_X64 */
10391 
10392 
10393 /* TODO(haberman): allow this to be constructed for an arbitrary set of dest
10394  * handlers and other mgroups (but verify we have a transitive closure). */
mgroup_new(const upb_handlers * dest,bool allowjit,bool lazy,const void * owner)10395 const mgroup *mgroup_new(const upb_handlers *dest, bool allowjit, bool lazy,
10396                          const void *owner) {
10397   mgroup *g;
10398   compiler *c;
10399 
10400   UPB_UNUSED(allowjit);
10401   UPB_ASSERT(upb_handlers_isfrozen(dest));
10402 
10403   g = newgroup(owner);
10404   c = newcompiler(g, lazy);
10405   find_methods(c, dest);
10406 
10407   /* We compile in two passes:
10408    * 1. all messages are assigned relative offsets from the beginning of the
10409    *    bytecode (saved in method->code_base).
10410    * 2. forwards OP_CALL instructions can be correctly linked since message
10411    *    offsets have been previously assigned.
10412    *
10413    * Could avoid the second pass by linking OP_CALL instructions somehow. */
10414   compile_methods(c);
10415   compile_methods(c);
10416   g->bytecode_end = c->pc;
10417   freecompiler(c);
10418 
10419 #ifdef UPB_DUMP_BYTECODE
10420   {
10421     FILE *f = fopen("/tmp/upb-bytecode", "w");
10422     UPB_ASSERT(f);
10423     dumpbc(g->bytecode, g->bytecode_end, stderr);
10424     dumpbc(g->bytecode, g->bytecode_end, f);
10425     fclose(f);
10426 
10427     f = fopen("/tmp/upb-bytecode.bin", "wb");
10428     UPB_ASSERT(f);
10429     fwrite(g->bytecode, 1, g->bytecode_end - g->bytecode, f);
10430     fclose(f);
10431   }
10432 #endif
10433 
10434   sethandlers(g, allowjit);
10435   return g;
10436 }
10437 
10438 
10439 /* upb_pbcodecache ************************************************************/
10440 
upb_pbcodecache_init(upb_pbcodecache * c)10441 void upb_pbcodecache_init(upb_pbcodecache *c) {
10442   upb_inttable_init(&c->groups, UPB_CTYPE_CONSTPTR);
10443   c->allow_jit_ = true;
10444 }
10445 
upb_pbcodecache_uninit(upb_pbcodecache * c)10446 void upb_pbcodecache_uninit(upb_pbcodecache *c) {
10447   upb_inttable_iter i;
10448   upb_inttable_begin(&i, &c->groups);
10449   for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
10450     const mgroup *group = upb_value_getconstptr(upb_inttable_iter_value(&i));
10451     mgroup_unref(group, c);
10452   }
10453   upb_inttable_uninit(&c->groups);
10454 }
10455 
upb_pbcodecache_allowjit(const upb_pbcodecache * c)10456 bool upb_pbcodecache_allowjit(const upb_pbcodecache *c) {
10457   return c->allow_jit_;
10458 }
10459 
upb_pbcodecache_setallowjit(upb_pbcodecache * c,bool allow)10460 bool upb_pbcodecache_setallowjit(upb_pbcodecache *c, bool allow) {
10461   if (upb_inttable_count(&c->groups) > 0)
10462     return false;
10463   c->allow_jit_ = allow;
10464   return true;
10465 }
10466 
upb_pbcodecache_getdecodermethod(upb_pbcodecache * c,const upb_pbdecodermethodopts * opts)10467 const upb_pbdecodermethod *upb_pbcodecache_getdecodermethod(
10468     upb_pbcodecache *c, const upb_pbdecodermethodopts *opts) {
10469   upb_value v;
10470   bool ok;
10471 
10472   /* Right now we build a new DecoderMethod every time.
10473    * TODO(haberman): properly cache methods by their true key. */
10474   const mgroup *g = mgroup_new(opts->handlers, c->allow_jit_, opts->lazy, c);
10475   upb_inttable_push(&c->groups, upb_value_constptr(g));
10476 
10477   ok = upb_inttable_lookupptr(&g->methods, opts->handlers, &v);
10478   UPB_ASSERT(ok);
10479   return upb_value_getptr(v);
10480 }
10481 
10482 
10483 /* upb_pbdecodermethodopts ****************************************************/
10484 
upb_pbdecodermethodopts_init(upb_pbdecodermethodopts * opts,const upb_handlers * h)10485 void upb_pbdecodermethodopts_init(upb_pbdecodermethodopts *opts,
10486                                   const upb_handlers *h) {
10487   opts->handlers = h;
10488   opts->lazy = false;
10489 }
10490 
upb_pbdecodermethodopts_setlazy(upb_pbdecodermethodopts * opts,bool lazy)10491 void upb_pbdecodermethodopts_setlazy(upb_pbdecodermethodopts *opts, bool lazy) {
10492   opts->lazy = lazy;
10493 }
10494 /*
10495 ** upb::Decoder (Bytecode Decoder VM)
10496 **
10497 ** Bytecode must previously have been generated using the bytecode compiler in
10498 ** compile_decoder.c.  This decoder then walks through the bytecode op-by-op to
10499 ** parse the input.
10500 **
10501 ** Decoding is fully resumable; we just keep a pointer to the current bytecode
10502 ** instruction and resume from there.  A fair amount of the logic here is to
10503 ** handle the fact that values can span buffer seams and we have to be able to
10504 ** be capable of suspending/resuming from any byte in the stream.  This
10505 ** sometimes requires keeping a few trailing bytes from the last buffer around
10506 ** in the "residual" buffer.
10507 */
10508 
10509 #include <inttypes.h>
10510 #include <stddef.h>
10511 
10512 #ifdef UPB_DUMP_BYTECODE
10513 #include <stdio.h>
10514 #endif
10515 
10516 #define CHECK_SUSPEND(x) if (!(x)) return upb_pbdecoder_suspend(d);
10517 
10518 /* Error messages that are shared between the bytecode and JIT decoders. */
10519 const char *kPbDecoderStackOverflow = "Nesting too deep.";
10520 const char *kPbDecoderSubmessageTooLong =
10521     "Submessage end extends past enclosing submessage.";
10522 
10523 /* Error messages shared within this file. */
10524 static const char *kUnterminatedVarint = "Unterminated varint.";
10525 
10526 /* upb_pbdecoder **************************************************************/
10527 
10528 static opcode halt = OP_HALT;
10529 
10530 /* A dummy character we can point to when the user passes us a NULL buffer.
10531  * We need this because in C (NULL + 0) and (NULL - NULL) are undefined
10532  * behavior, which would invalidate functions like curbufleft(). */
10533 static const char dummy_char;
10534 
10535 /* Whether an op consumes any of the input buffer. */
consumes_input(opcode op)10536 static bool consumes_input(opcode op) {
10537   switch (op) {
10538     case OP_SETDISPATCH:
10539     case OP_STARTMSG:
10540     case OP_ENDMSG:
10541     case OP_STARTSEQ:
10542     case OP_ENDSEQ:
10543     case OP_STARTSUBMSG:
10544     case OP_ENDSUBMSG:
10545     case OP_STARTSTR:
10546     case OP_ENDSTR:
10547     case OP_PUSHTAGDELIM:
10548     case OP_POP:
10549     case OP_SETDELIM:
10550     case OP_SETBIGGROUPNUM:
10551     case OP_CHECKDELIM:
10552     case OP_CALL:
10553     case OP_RET:
10554     case OP_BRANCH:
10555       return false;
10556     default:
10557       return true;
10558   }
10559 }
10560 
stacksize(upb_pbdecoder * d,size_t entries)10561 static size_t stacksize(upb_pbdecoder *d, size_t entries) {
10562   UPB_UNUSED(d);
10563   return entries * sizeof(upb_pbdecoder_frame);
10564 }
10565 
callstacksize(upb_pbdecoder * d,size_t entries)10566 static size_t callstacksize(upb_pbdecoder *d, size_t entries) {
10567   UPB_UNUSED(d);
10568 
10569 #ifdef UPB_USE_JIT_X64
10570   if (d->method_->is_native_) {
10571     /* Each native stack frame needs two pointers, plus we need a few frames for
10572      * the enter/exit trampolines. */
10573     size_t ret = entries * sizeof(void*) * 2;
10574     ret += sizeof(void*) * 10;
10575     return ret;
10576   }
10577 #endif
10578 
10579   return entries * sizeof(uint32_t*);
10580 }
10581 
10582 
10583 static bool in_residual_buf(const upb_pbdecoder *d, const char *p);
10584 
10585 /* It's unfortunate that we have to micro-manage the compiler with
10586  * UPB_FORCEINLINE and UPB_NOINLINE, especially since this tuning is necessarily
10587  * specific to one hardware configuration.  But empirically on a Core i7,
10588  * performance increases 30-50% with these annotations.  Every instance where
10589  * these appear, gcc 4.2.1 made the wrong decision and degraded performance in
10590  * benchmarks. */
10591 
seterr(upb_pbdecoder * d,const char * msg)10592 static void seterr(upb_pbdecoder *d, const char *msg) {
10593   upb_status status = UPB_STATUS_INIT;
10594   upb_status_seterrmsg(&status, msg);
10595   upb_env_reporterror(d->env, &status);
10596 }
10597 
upb_pbdecoder_seterr(upb_pbdecoder * d,const char * msg)10598 void upb_pbdecoder_seterr(upb_pbdecoder *d, const char *msg) {
10599   seterr(d, msg);
10600 }
10601 
10602 
10603 /* Buffering ******************************************************************/
10604 
10605 /* We operate on one buffer at a time, which is either the user's buffer passed
10606  * to our "decode" callback or some residual bytes from the previous buffer. */
10607 
10608 /* How many bytes can be safely read from d->ptr without reading past end-of-buf
10609  * or past the current delimited end. */
curbufleft(const upb_pbdecoder * d)10610 static size_t curbufleft(const upb_pbdecoder *d) {
10611   UPB_ASSERT(d->data_end >= d->ptr);
10612   return d->data_end - d->ptr;
10613 }
10614 
10615 /* How many bytes are available before end-of-buffer. */
bufleft(const upb_pbdecoder * d)10616 static size_t bufleft(const upb_pbdecoder *d) {
10617   return d->end - d->ptr;
10618 }
10619 
10620 /* Overall stream offset of d->ptr. */
offset(const upb_pbdecoder * d)10621 uint64_t offset(const upb_pbdecoder *d) {
10622   return d->bufstart_ofs + (d->ptr - d->buf);
10623 }
10624 
10625 /* How many bytes are available before the end of this delimited region. */
delim_remaining(const upb_pbdecoder * d)10626 size_t delim_remaining(const upb_pbdecoder *d) {
10627   return d->top->end_ofs - offset(d);
10628 }
10629 
10630 /* Advances d->ptr. */
advance(upb_pbdecoder * d,size_t len)10631 static void advance(upb_pbdecoder *d, size_t len) {
10632   UPB_ASSERT(curbufleft(d) >= len);
10633   d->ptr += len;
10634 }
10635 
in_buf(const char * p,const char * buf,const char * end)10636 static bool in_buf(const char *p, const char *buf, const char *end) {
10637   return p >= buf && p <= end;
10638 }
10639 
in_residual_buf(const upb_pbdecoder * d,const char * p)10640 static bool in_residual_buf(const upb_pbdecoder *d, const char *p) {
10641   return in_buf(p, d->residual, d->residual_end);
10642 }
10643 
10644 /* Calculates the delim_end value, which is affected by both the current buffer
10645  * and the parsing stack, so must be called whenever either is updated. */
set_delim_end(upb_pbdecoder * d)10646 static void set_delim_end(upb_pbdecoder *d) {
10647   size_t delim_ofs = d->top->end_ofs - d->bufstart_ofs;
10648   if (delim_ofs <= (size_t)(d->end - d->buf)) {
10649     d->delim_end = d->buf + delim_ofs;
10650     d->data_end = d->delim_end;
10651   } else {
10652     d->data_end = d->end;
10653     d->delim_end = NULL;
10654   }
10655 }
10656 
switchtobuf(upb_pbdecoder * d,const char * buf,const char * end)10657 static void switchtobuf(upb_pbdecoder *d, const char *buf, const char *end) {
10658   d->ptr = buf;
10659   d->buf = buf;
10660   d->end = end;
10661   set_delim_end(d);
10662 }
10663 
advancetobuf(upb_pbdecoder * d,const char * buf,size_t len)10664 static void advancetobuf(upb_pbdecoder *d, const char *buf, size_t len) {
10665   UPB_ASSERT(curbufleft(d) == 0);
10666   d->bufstart_ofs += (d->end - d->buf);
10667   switchtobuf(d, buf, buf + len);
10668 }
10669 
checkpoint(upb_pbdecoder * d)10670 static void checkpoint(upb_pbdecoder *d) {
10671   /* The assertion here is in the interests of efficiency, not correctness.
10672    * We are trying to ensure that we don't checkpoint() more often than
10673    * necessary. */
10674   UPB_ASSERT(d->checkpoint != d->ptr);
10675   d->checkpoint = d->ptr;
10676 }
10677 
10678 /* Skips "bytes" bytes in the stream, which may be more than available.  If we
10679  * skip more bytes than are available, we return a long read count to the caller
10680  * indicating how many bytes can be skipped over before passing actual data
10681  * again.  Skipped bytes can pass a NULL buffer and the decoder guarantees they
10682  * won't actually be read.
10683  */
skip(upb_pbdecoder * d,size_t bytes)10684 static int32_t skip(upb_pbdecoder *d, size_t bytes) {
10685   UPB_ASSERT(!in_residual_buf(d, d->ptr) || d->size_param == 0);
10686   UPB_ASSERT(d->skip == 0);
10687   if (bytes > delim_remaining(d)) {
10688     seterr(d, "Skipped value extended beyond enclosing submessage.");
10689     return upb_pbdecoder_suspend(d);
10690   } else if (bufleft(d) >= bytes) {
10691     /* Skipped data is all in current buffer, and more is still available. */
10692     advance(d, bytes);
10693     d->skip = 0;
10694     return DECODE_OK;
10695   } else {
10696     /* Skipped data extends beyond currently available buffers. */
10697     d->pc = d->last;
10698     d->skip = bytes - curbufleft(d);
10699     d->bufstart_ofs += (d->end - d->buf);
10700     d->residual_end = d->residual;
10701     switchtobuf(d, d->residual, d->residual_end);
10702     return d->size_param + d->skip;
10703   }
10704 }
10705 
10706 
10707 /* Resumes the decoder from an initial state or from a previous suspend. */
upb_pbdecoder_resume(upb_pbdecoder * d,void * p,const char * buf,size_t size,const upb_bufhandle * handle)10708 int32_t upb_pbdecoder_resume(upb_pbdecoder *d, void *p, const char *buf,
10709                              size_t size, const upb_bufhandle *handle) {
10710   UPB_UNUSED(p);  /* Useless; just for the benefit of the JIT. */
10711 
10712   /* d->skip and d->residual_end could probably elegantly be represented
10713    * as a single variable, to more easily represent this invariant. */
10714   UPB_ASSERT(!(d->skip && d->residual_end > d->residual));
10715 
10716   /* We need to remember the original size_param, so that the value we return
10717    * is relative to it, even if we do some skipping first. */
10718   d->size_param = size;
10719   d->handle = handle;
10720 
10721   /* Have to handle this case specially (ie. not with skip()) because the user
10722    * is allowed to pass a NULL buffer here, which won't allow us to safely
10723    * calculate a d->end or use our normal functions like curbufleft(). */
10724   if (d->skip && d->skip >= size) {
10725     d->skip -= size;
10726     d->bufstart_ofs += size;
10727     buf = &dummy_char;
10728     size = 0;
10729 
10730     /* We can't just return now, because we might need to execute some ops
10731      * like CHECKDELIM, which could call some callbacks and pop the stack. */
10732   }
10733 
10734   /* We need to pretend that this was the actual buffer param, since some of the
10735    * calculations assume that d->ptr/d->buf is relative to this. */
10736   d->buf_param = buf;
10737 
10738   if (!buf) {
10739     /* NULL buf is ok if its entire span is covered by the "skip" above, but
10740      * by this point we know that "skip" doesn't cover the buffer. */
10741     seterr(d, "Passed NULL buffer over non-skippable region.");
10742     return upb_pbdecoder_suspend(d);
10743   }
10744 
10745   if (d->residual_end > d->residual) {
10746     /* We have residual bytes from the last buffer. */
10747     UPB_ASSERT(d->ptr == d->residual);
10748   } else {
10749     switchtobuf(d, buf, buf + size);
10750   }
10751 
10752   d->checkpoint = d->ptr;
10753 
10754   /* Handle skips that don't cover the whole buffer (as above). */
10755   if (d->skip) {
10756     size_t skip_bytes = d->skip;
10757     d->skip = 0;
10758     CHECK_RETURN(skip(d, skip_bytes));
10759     checkpoint(d);
10760   }
10761 
10762   /* If we're inside an unknown group, continue to parse unknown values. */
10763   if (d->top->groupnum < 0) {
10764     CHECK_RETURN(upb_pbdecoder_skipunknown(d, -1, 0));
10765     checkpoint(d);
10766   }
10767 
10768   return DECODE_OK;
10769 }
10770 
10771 /* Suspends the decoder at the last checkpoint, without saving any residual
10772  * bytes.  If there are any unconsumed bytes, returns a short byte count. */
upb_pbdecoder_suspend(upb_pbdecoder * d)10773 size_t upb_pbdecoder_suspend(upb_pbdecoder *d) {
10774   d->pc = d->last;
10775   if (d->checkpoint == d->residual) {
10776     /* Checkpoint was in residual buf; no user bytes were consumed. */
10777     d->ptr = d->residual;
10778     return 0;
10779   } else {
10780     size_t ret = d->size_param - (d->end - d->checkpoint);
10781     UPB_ASSERT(!in_residual_buf(d, d->checkpoint));
10782     UPB_ASSERT(d->buf == d->buf_param || d->buf == &dummy_char);
10783 
10784     d->bufstart_ofs += (d->checkpoint - d->buf);
10785     d->residual_end = d->residual;
10786     switchtobuf(d, d->residual, d->residual_end);
10787     return ret;
10788   }
10789 }
10790 
10791 /* Suspends the decoder at the last checkpoint, and saves any unconsumed
10792  * bytes in our residual buffer.  This is necessary if we need more user
10793  * bytes to form a complete value, which might not be contiguous in the
10794  * user's buffers.  Always consumes all user bytes. */
suspend_save(upb_pbdecoder * d)10795 static size_t suspend_save(upb_pbdecoder *d) {
10796   /* We hit end-of-buffer before we could parse a full value.
10797    * Save any unconsumed bytes (if any) to the residual buffer. */
10798   d->pc = d->last;
10799 
10800   if (d->checkpoint == d->residual) {
10801     /* Checkpoint was in residual buf; append user byte(s) to residual buf. */
10802     UPB_ASSERT((d->residual_end - d->residual) + d->size_param <=
10803            sizeof(d->residual));
10804     if (!in_residual_buf(d, d->ptr)) {
10805       d->bufstart_ofs -= (d->residual_end - d->residual);
10806     }
10807     memcpy(d->residual_end, d->buf_param, d->size_param);
10808     d->residual_end += d->size_param;
10809   } else {
10810     /* Checkpoint was in user buf; old residual bytes not needed. */
10811     size_t save;
10812     UPB_ASSERT(!in_residual_buf(d, d->checkpoint));
10813 
10814     d->ptr = d->checkpoint;
10815     save = curbufleft(d);
10816     UPB_ASSERT(save <= sizeof(d->residual));
10817     memcpy(d->residual, d->ptr, save);
10818     d->residual_end = d->residual + save;
10819     d->bufstart_ofs = offset(d);
10820   }
10821 
10822   switchtobuf(d, d->residual, d->residual_end);
10823   return d->size_param;
10824 }
10825 
10826 /* Copies the next "bytes" bytes into "buf" and advances the stream.
10827  * Requires that this many bytes are available in the current buffer. */
consumebytes(upb_pbdecoder * d,void * buf,size_t bytes)10828 UPB_FORCEINLINE static void consumebytes(upb_pbdecoder *d, void *buf,
10829                                          size_t bytes) {
10830   UPB_ASSERT(bytes <= curbufleft(d));
10831   memcpy(buf, d->ptr, bytes);
10832   advance(d, bytes);
10833 }
10834 
10835 /* Slow path for getting the next "bytes" bytes, regardless of whether they are
10836  * available in the current buffer or not.  Returns a status code as described
10837  * in decoder.int.h. */
getbytes_slow(upb_pbdecoder * d,void * buf,size_t bytes)10838 UPB_NOINLINE static int32_t getbytes_slow(upb_pbdecoder *d, void *buf,
10839                                           size_t bytes) {
10840   const size_t avail = curbufleft(d);
10841   consumebytes(d, buf, avail);
10842   bytes -= avail;
10843   UPB_ASSERT(bytes > 0);
10844   if (in_residual_buf(d, d->ptr)) {
10845     advancetobuf(d, d->buf_param, d->size_param);
10846   }
10847   if (curbufleft(d) >= bytes) {
10848     consumebytes(d, (char *)buf + avail, bytes);
10849     return DECODE_OK;
10850   } else if (d->data_end == d->delim_end) {
10851     seterr(d, "Submessage ended in the middle of a value or group");
10852     return upb_pbdecoder_suspend(d);
10853   } else {
10854     return suspend_save(d);
10855   }
10856 }
10857 
10858 /* Gets the next "bytes" bytes, regardless of whether they are available in the
10859  * current buffer or not.  Returns a status code as described in decoder.int.h.
10860  */
getbytes(upb_pbdecoder * d,void * buf,size_t bytes)10861 UPB_FORCEINLINE static int32_t getbytes(upb_pbdecoder *d, void *buf,
10862                                         size_t bytes) {
10863   if (curbufleft(d) >= bytes) {
10864     /* Buffer has enough data to satisfy. */
10865     consumebytes(d, buf, bytes);
10866     return DECODE_OK;
10867   } else {
10868     return getbytes_slow(d, buf, bytes);
10869   }
10870 }
10871 
peekbytes_slow(upb_pbdecoder * d,void * buf,size_t bytes)10872 UPB_NOINLINE static size_t peekbytes_slow(upb_pbdecoder *d, void *buf,
10873                                           size_t bytes) {
10874   size_t ret = curbufleft(d);
10875   memcpy(buf, d->ptr, ret);
10876   if (in_residual_buf(d, d->ptr)) {
10877     size_t copy = UPB_MIN(bytes - ret, d->size_param);
10878     memcpy((char *)buf + ret, d->buf_param, copy);
10879     ret += copy;
10880   }
10881   return ret;
10882 }
10883 
peekbytes(upb_pbdecoder * d,void * buf,size_t bytes)10884 UPB_FORCEINLINE static size_t peekbytes(upb_pbdecoder *d, void *buf,
10885                                         size_t bytes) {
10886   if (curbufleft(d) >= bytes) {
10887     memcpy(buf, d->ptr, bytes);
10888     return bytes;
10889   } else {
10890     return peekbytes_slow(d, buf, bytes);
10891   }
10892 }
10893 
10894 
10895 /* Decoding of wire types *****************************************************/
10896 
10897 /* Slow path for decoding a varint from the current buffer position.
10898  * Returns a status code as described in decoder.int.h. */
upb_pbdecoder_decode_varint_slow(upb_pbdecoder * d,uint64_t * u64)10899 UPB_NOINLINE int32_t upb_pbdecoder_decode_varint_slow(upb_pbdecoder *d,
10900                                                       uint64_t *u64) {
10901   uint8_t byte = 0x80;
10902   int bitpos;
10903   *u64 = 0;
10904   for(bitpos = 0; bitpos < 70 && (byte & 0x80); bitpos += 7) {
10905     CHECK_RETURN(getbytes(d, &byte, 1));
10906     *u64 |= (uint64_t)(byte & 0x7F) << bitpos;
10907   }
10908   if(bitpos == 70 && (byte & 0x80)) {
10909     seterr(d, kUnterminatedVarint);
10910     return upb_pbdecoder_suspend(d);
10911   }
10912   return DECODE_OK;
10913 }
10914 
10915 /* Decodes a varint from the current buffer position.
10916  * Returns a status code as described in decoder.int.h. */
decode_varint(upb_pbdecoder * d,uint64_t * u64)10917 UPB_FORCEINLINE static int32_t decode_varint(upb_pbdecoder *d, uint64_t *u64) {
10918   if (curbufleft(d) > 0 && !(*d->ptr & 0x80)) {
10919     *u64 = *d->ptr;
10920     advance(d, 1);
10921     return DECODE_OK;
10922   } else if (curbufleft(d) >= 10) {
10923     /* Fast case. */
10924     upb_decoderet r = upb_vdecode_fast(d->ptr);
10925     if (r.p == NULL) {
10926       seterr(d, kUnterminatedVarint);
10927       return upb_pbdecoder_suspend(d);
10928     }
10929     advance(d, r.p - d->ptr);
10930     *u64 = r.val;
10931     return DECODE_OK;
10932   } else {
10933     /* Slow case -- varint spans buffer seam. */
10934     return upb_pbdecoder_decode_varint_slow(d, u64);
10935   }
10936 }
10937 
10938 /* Decodes a 32-bit varint from the current buffer position.
10939  * Returns a status code as described in decoder.int.h. */
decode_v32(upb_pbdecoder * d,uint32_t * u32)10940 UPB_FORCEINLINE static int32_t decode_v32(upb_pbdecoder *d, uint32_t *u32) {
10941   uint64_t u64;
10942   int32_t ret = decode_varint(d, &u64);
10943   if (ret >= 0) return ret;
10944   if (u64 > UINT32_MAX) {
10945     seterr(d, "Unterminated 32-bit varint");
10946     /* TODO(haberman) guarantee that this function return is >= 0 somehow,
10947      * so we know this path will always be treated as error by our caller.
10948      * Right now the size_t -> int32_t can overflow and produce negative values.
10949      */
10950     *u32 = 0;
10951     return upb_pbdecoder_suspend(d);
10952   }
10953   *u32 = u64;
10954   return DECODE_OK;
10955 }
10956 
10957 /* Decodes a fixed32 from the current buffer position.
10958  * Returns a status code as described in decoder.int.h.
10959  * TODO: proper byte swapping for big-endian machines. */
decode_fixed32(upb_pbdecoder * d,uint32_t * u32)10960 UPB_FORCEINLINE static int32_t decode_fixed32(upb_pbdecoder *d, uint32_t *u32) {
10961   return getbytes(d, u32, 4);
10962 }
10963 
10964 /* Decodes a fixed64 from the current buffer position.
10965  * Returns a status code as described in decoder.int.h.
10966  * TODO: proper byte swapping for big-endian machines. */
decode_fixed64(upb_pbdecoder * d,uint64_t * u64)10967 UPB_FORCEINLINE static int32_t decode_fixed64(upb_pbdecoder *d, uint64_t *u64) {
10968   return getbytes(d, u64, 8);
10969 }
10970 
10971 /* Non-static versions of the above functions.
10972  * These are called by the JIT for fallback paths. */
upb_pbdecoder_decode_f32(upb_pbdecoder * d,uint32_t * u32)10973 int32_t upb_pbdecoder_decode_f32(upb_pbdecoder *d, uint32_t *u32) {
10974   return decode_fixed32(d, u32);
10975 }
10976 
upb_pbdecoder_decode_f64(upb_pbdecoder * d,uint64_t * u64)10977 int32_t upb_pbdecoder_decode_f64(upb_pbdecoder *d, uint64_t *u64) {
10978   return decode_fixed64(d, u64);
10979 }
10980 
as_double(uint64_t n)10981 static double as_double(uint64_t n) { double d; memcpy(&d, &n, 8); return d; }
as_float(uint32_t n)10982 static float  as_float(uint32_t n)  { float  f; memcpy(&f, &n, 4); return f; }
10983 
10984 /* Pushes a frame onto the decoder stack. */
decoder_push(upb_pbdecoder * d,uint64_t end)10985 static bool decoder_push(upb_pbdecoder *d, uint64_t end) {
10986   upb_pbdecoder_frame *fr = d->top;
10987 
10988   if (end > fr->end_ofs) {
10989     seterr(d, kPbDecoderSubmessageTooLong);
10990     return false;
10991   } else if (fr == d->limit) {
10992     seterr(d, kPbDecoderStackOverflow);
10993     return false;
10994   }
10995 
10996   fr++;
10997   fr->end_ofs = end;
10998   fr->dispatch = NULL;
10999   fr->groupnum = 0;
11000   d->top = fr;
11001   return true;
11002 }
11003 
pushtagdelim(upb_pbdecoder * d,uint32_t arg)11004 static bool pushtagdelim(upb_pbdecoder *d, uint32_t arg) {
11005   /* While we expect to see an "end" tag (either ENDGROUP or a non-sequence
11006    * field number) prior to hitting any enclosing submessage end, pushing our
11007    * existing delim end prevents us from continuing to parse values from a
11008    * corrupt proto that doesn't give us an END tag in time. */
11009   if (!decoder_push(d, d->top->end_ofs))
11010     return false;
11011   d->top->groupnum = arg;
11012   return true;
11013 }
11014 
11015 /* Pops a frame from the decoder stack. */
decoder_pop(upb_pbdecoder * d)11016 static void decoder_pop(upb_pbdecoder *d) { d->top--; }
11017 
upb_pbdecoder_checktag_slow(upb_pbdecoder * d,uint64_t expected)11018 UPB_NOINLINE int32_t upb_pbdecoder_checktag_slow(upb_pbdecoder *d,
11019                                                  uint64_t expected) {
11020   uint64_t data = 0;
11021   size_t bytes = upb_value_size(expected);
11022   size_t read = peekbytes(d, &data, bytes);
11023   if (read == bytes && data == expected) {
11024     /* Advance past matched bytes. */
11025     int32_t ok = getbytes(d, &data, read);
11026     UPB_ASSERT(ok < 0);
11027     return DECODE_OK;
11028   } else if (read < bytes && memcmp(&data, &expected, read) == 0) {
11029     return suspend_save(d);
11030   } else {
11031     return DECODE_MISMATCH;
11032   }
11033 }
11034 
upb_pbdecoder_skipunknown(upb_pbdecoder * d,int32_t fieldnum,uint8_t wire_type)11035 int32_t upb_pbdecoder_skipunknown(upb_pbdecoder *d, int32_t fieldnum,
11036                                   uint8_t wire_type) {
11037   if (fieldnum >= 0)
11038     goto have_tag;
11039 
11040   while (true) {
11041     uint32_t tag;
11042     CHECK_RETURN(decode_v32(d, &tag));
11043     wire_type = tag & 0x7;
11044     fieldnum = tag >> 3;
11045 
11046 have_tag:
11047     if (fieldnum == 0) {
11048       seterr(d, "Saw invalid field number (0)");
11049       return upb_pbdecoder_suspend(d);
11050     }
11051 
11052     switch (wire_type) {
11053       case UPB_WIRE_TYPE_32BIT:
11054         CHECK_RETURN(skip(d, 4));
11055         break;
11056       case UPB_WIRE_TYPE_64BIT:
11057         CHECK_RETURN(skip(d, 8));
11058         break;
11059       case UPB_WIRE_TYPE_VARINT: {
11060         uint64_t u64;
11061         CHECK_RETURN(decode_varint(d, &u64));
11062         break;
11063       }
11064       case UPB_WIRE_TYPE_DELIMITED: {
11065         uint32_t len;
11066         CHECK_RETURN(decode_v32(d, &len));
11067         CHECK_RETURN(skip(d, len));
11068         break;
11069       }
11070       case UPB_WIRE_TYPE_START_GROUP:
11071         CHECK_SUSPEND(pushtagdelim(d, -fieldnum));
11072         break;
11073       case UPB_WIRE_TYPE_END_GROUP:
11074         if (fieldnum == -d->top->groupnum) {
11075           decoder_pop(d);
11076         } else if (fieldnum == d->top->groupnum) {
11077           return DECODE_ENDGROUP;
11078         } else {
11079           seterr(d, "Unmatched ENDGROUP tag.");
11080           return upb_pbdecoder_suspend(d);
11081         }
11082         break;
11083       default:
11084         seterr(d, "Invalid wire type");
11085         return upb_pbdecoder_suspend(d);
11086     }
11087 
11088     if (d->top->groupnum >= 0) {
11089       /* TODO: More code needed for handling unknown groups. */
11090       upb_sink_putunknown(&d->top->sink, d->checkpoint, d->ptr - d->checkpoint);
11091       return DECODE_OK;
11092     }
11093 
11094     /* Unknown group -- continue looping over unknown fields. */
11095     checkpoint(d);
11096   }
11097 }
11098 
goto_endmsg(upb_pbdecoder * d)11099 static void goto_endmsg(upb_pbdecoder *d) {
11100   upb_value v;
11101   bool found = upb_inttable_lookup32(d->top->dispatch, DISPATCH_ENDMSG, &v);
11102   UPB_ASSERT(found);
11103   d->pc = d->top->base + upb_value_getuint64(v);
11104 }
11105 
11106 /* Parses a tag and jumps to the corresponding bytecode instruction for this
11107  * field.
11108  *
11109  * If the tag is unknown (or the wire type doesn't match), parses the field as
11110  * unknown.  If the tag is a valid ENDGROUP tag, jumps to the bytecode
11111  * instruction for the end of message. */
dispatch(upb_pbdecoder * d)11112 static int32_t dispatch(upb_pbdecoder *d) {
11113   upb_inttable *dispatch = d->top->dispatch;
11114   uint32_t tag;
11115   uint8_t wire_type;
11116   uint32_t fieldnum;
11117   upb_value val;
11118   int32_t retval;
11119 
11120   /* Decode tag. */
11121   CHECK_RETURN(decode_v32(d, &tag));
11122   wire_type = tag & 0x7;
11123   fieldnum = tag >> 3;
11124 
11125   /* Lookup tag.  Because of packed/non-packed compatibility, we have to
11126    * check the wire type against two possibilities. */
11127   if (fieldnum != DISPATCH_ENDMSG &&
11128       upb_inttable_lookup32(dispatch, fieldnum, &val)) {
11129     uint64_t v = upb_value_getuint64(val);
11130     if (wire_type == (v & 0xff)) {
11131       d->pc = d->top->base + (v >> 16);
11132       return DECODE_OK;
11133     } else if (wire_type == ((v >> 8) & 0xff)) {
11134       bool found =
11135           upb_inttable_lookup(dispatch, fieldnum + UPB_MAX_FIELDNUMBER, &val);
11136       UPB_ASSERT(found);
11137       d->pc = d->top->base + upb_value_getuint64(val);
11138       return DECODE_OK;
11139     }
11140   }
11141 
11142   /* We have some unknown fields (or ENDGROUP) to parse.  The DISPATCH or TAG
11143    * bytecode that triggered this is preceded by a CHECKDELIM bytecode which
11144    * we need to back up to, so that when we're done skipping unknown data we
11145    * can re-check the delimited end. */
11146   d->last--;  /* Necessary if we get suspended */
11147   d->pc = d->last;
11148   UPB_ASSERT(getop(*d->last) == OP_CHECKDELIM);
11149 
11150   /* Unknown field or ENDGROUP. */
11151   retval = upb_pbdecoder_skipunknown(d, fieldnum, wire_type);
11152 
11153   CHECK_RETURN(retval);
11154 
11155   if (retval == DECODE_ENDGROUP) {
11156     goto_endmsg(d);
11157     return DECODE_OK;
11158   }
11159 
11160   return DECODE_OK;
11161 }
11162 
11163 /* Callers know that the stack is more than one deep because the opcodes that
11164  * call this only occur after PUSH operations. */
outer_frame(upb_pbdecoder * d)11165 upb_pbdecoder_frame *outer_frame(upb_pbdecoder *d) {
11166   UPB_ASSERT(d->top != d->stack);
11167   return d->top - 1;
11168 }
11169 
11170 
11171 /* The main decoding loop *****************************************************/
11172 
11173 /* The main decoder VM function.  Uses traditional bytecode dispatch loop with a
11174  * switch() statement. */
run_decoder_vm(upb_pbdecoder * d,const mgroup * group,const upb_bufhandle * handle)11175 size_t run_decoder_vm(upb_pbdecoder *d, const mgroup *group,
11176                       const upb_bufhandle* handle) {
11177 
11178 #define VMCASE(op, code) \
11179   case op: { code; if (consumes_input(op)) checkpoint(d); break; }
11180 #define PRIMITIVE_OP(type, wt, name, convfunc, ctype) \
11181   VMCASE(OP_PARSE_ ## type, { \
11182     ctype val; \
11183     CHECK_RETURN(decode_ ## wt(d, &val)); \
11184     upb_sink_put ## name(&d->top->sink, arg, (convfunc)(val)); \
11185   })
11186 
11187   while(1) {
11188     int32_t instruction;
11189     opcode op;
11190     uint32_t arg;
11191     int32_t longofs;
11192 
11193     d->last = d->pc;
11194     instruction = *d->pc++;
11195     op = getop(instruction);
11196     arg = instruction >> 8;
11197     longofs = arg;
11198     UPB_ASSERT(d->ptr != d->residual_end);
11199     UPB_UNUSED(group);
11200 #ifdef UPB_DUMP_BYTECODE
11201     fprintf(stderr, "s_ofs=%d buf_ofs=%d data_rem=%d buf_rem=%d delim_rem=%d "
11202                     "%x %s (%d)\n",
11203             (int)offset(d),
11204             (int)(d->ptr - d->buf),
11205             (int)(d->data_end - d->ptr),
11206             (int)(d->end - d->ptr),
11207             (int)((d->top->end_ofs - d->bufstart_ofs) - (d->ptr - d->buf)),
11208             (int)(d->pc - 1 - group->bytecode),
11209             upb_pbdecoder_getopname(op),
11210             arg);
11211 #endif
11212     switch (op) {
11213       /* Technically, we are losing data if we see a 32-bit varint that is not
11214        * properly sign-extended.  We could detect this and error about the data
11215        * loss, but proto2 does not do this, so we pass. */
11216       PRIMITIVE_OP(INT32,    varint,  int32,  int32_t,      uint64_t)
11217       PRIMITIVE_OP(INT64,    varint,  int64,  int64_t,      uint64_t)
11218       PRIMITIVE_OP(UINT32,   varint,  uint32, uint32_t,     uint64_t)
11219       PRIMITIVE_OP(UINT64,   varint,  uint64, uint64_t,     uint64_t)
11220       PRIMITIVE_OP(FIXED32,  fixed32, uint32, uint32_t,     uint32_t)
11221       PRIMITIVE_OP(FIXED64,  fixed64, uint64, uint64_t,     uint64_t)
11222       PRIMITIVE_OP(SFIXED32, fixed32, int32,  int32_t,      uint32_t)
11223       PRIMITIVE_OP(SFIXED64, fixed64, int64,  int64_t,      uint64_t)
11224       PRIMITIVE_OP(BOOL,     varint,  bool,   bool,         uint64_t)
11225       PRIMITIVE_OP(DOUBLE,   fixed64, double, as_double,    uint64_t)
11226       PRIMITIVE_OP(FLOAT,    fixed32, float,  as_float,     uint32_t)
11227       PRIMITIVE_OP(SINT32,   varint,  int32,  upb_zzdec_32, uint64_t)
11228       PRIMITIVE_OP(SINT64,   varint,  int64,  upb_zzdec_64, uint64_t)
11229 
11230       VMCASE(OP_SETDISPATCH,
11231         d->top->base = d->pc - 1;
11232         memcpy(&d->top->dispatch, d->pc, sizeof(void*));
11233         d->pc += sizeof(void*) / sizeof(uint32_t);
11234       )
11235       VMCASE(OP_STARTMSG,
11236         CHECK_SUSPEND(upb_sink_startmsg(&d->top->sink));
11237       )
11238       VMCASE(OP_ENDMSG,
11239         CHECK_SUSPEND(upb_sink_endmsg(&d->top->sink, d->status));
11240       )
11241       VMCASE(OP_STARTSEQ,
11242         upb_pbdecoder_frame *outer = outer_frame(d);
11243         CHECK_SUSPEND(upb_sink_startseq(&outer->sink, arg, &d->top->sink));
11244       )
11245       VMCASE(OP_ENDSEQ,
11246         CHECK_SUSPEND(upb_sink_endseq(&d->top->sink, arg));
11247       )
11248       VMCASE(OP_STARTSUBMSG,
11249         upb_pbdecoder_frame *outer = outer_frame(d);
11250         CHECK_SUSPEND(upb_sink_startsubmsg(&outer->sink, arg, &d->top->sink));
11251       )
11252       VMCASE(OP_ENDSUBMSG,
11253         CHECK_SUSPEND(upb_sink_endsubmsg(&d->top->sink, arg));
11254       )
11255       VMCASE(OP_STARTSTR,
11256         uint32_t len = delim_remaining(d);
11257         upb_pbdecoder_frame *outer = outer_frame(d);
11258         CHECK_SUSPEND(upb_sink_startstr(&outer->sink, arg, len, &d->top->sink));
11259         if (len == 0) {
11260           d->pc++;  /* Skip OP_STRING. */
11261         }
11262       )
11263       VMCASE(OP_STRING,
11264         uint32_t len = curbufleft(d);
11265         size_t n = upb_sink_putstring(&d->top->sink, arg, d->ptr, len, handle);
11266         if (n > len) {
11267           if (n > delim_remaining(d)) {
11268             seterr(d, "Tried to skip past end of string.");
11269             return upb_pbdecoder_suspend(d);
11270           } else {
11271             int32_t ret = skip(d, n);
11272             /* This shouldn't return DECODE_OK, because n > len. */
11273             UPB_ASSERT(ret >= 0);
11274             return ret;
11275           }
11276         }
11277         advance(d, n);
11278         if (n < len || d->delim_end == NULL) {
11279           /* We aren't finished with this string yet. */
11280           d->pc--;  /* Repeat OP_STRING. */
11281           if (n > 0) checkpoint(d);
11282           return upb_pbdecoder_suspend(d);
11283         }
11284       )
11285       VMCASE(OP_ENDSTR,
11286         CHECK_SUSPEND(upb_sink_endstr(&d->top->sink, arg));
11287       )
11288       VMCASE(OP_PUSHTAGDELIM,
11289         CHECK_SUSPEND(pushtagdelim(d, arg));
11290       )
11291       VMCASE(OP_SETBIGGROUPNUM,
11292         d->top->groupnum = *d->pc++;
11293       )
11294       VMCASE(OP_POP,
11295         UPB_ASSERT(d->top > d->stack);
11296         decoder_pop(d);
11297       )
11298       VMCASE(OP_PUSHLENDELIM,
11299         uint32_t len;
11300         CHECK_RETURN(decode_v32(d, &len));
11301         CHECK_SUSPEND(decoder_push(d, offset(d) + len));
11302         set_delim_end(d);
11303       )
11304       VMCASE(OP_SETDELIM,
11305         set_delim_end(d);
11306       )
11307       VMCASE(OP_CHECKDELIM,
11308         /* We are guaranteed of this assert because we never allow ourselves to
11309          * consume bytes beyond data_end, which covers delim_end when non-NULL.
11310          */
11311         UPB_ASSERT(!(d->delim_end && d->ptr > d->delim_end));
11312         if (d->ptr == d->delim_end)
11313           d->pc += longofs;
11314       )
11315       VMCASE(OP_CALL,
11316         d->callstack[d->call_len++] = d->pc;
11317         d->pc += longofs;
11318       )
11319       VMCASE(OP_RET,
11320         UPB_ASSERT(d->call_len > 0);
11321         d->pc = d->callstack[--d->call_len];
11322       )
11323       VMCASE(OP_BRANCH,
11324         d->pc += longofs;
11325       )
11326       VMCASE(OP_TAG1,
11327         uint8_t expected;
11328         CHECK_SUSPEND(curbufleft(d) > 0);
11329         expected = (arg >> 8) & 0xff;
11330         if (*d->ptr == expected) {
11331           advance(d, 1);
11332         } else {
11333           int8_t shortofs;
11334          badtag:
11335           shortofs = arg;
11336           if (shortofs == LABEL_DISPATCH) {
11337             CHECK_RETURN(dispatch(d));
11338           } else {
11339             d->pc += shortofs;
11340             break; /* Avoid checkpoint(). */
11341           }
11342         }
11343       )
11344       VMCASE(OP_TAG2,
11345         uint16_t expected;
11346         CHECK_SUSPEND(curbufleft(d) > 0);
11347         expected = (arg >> 8) & 0xffff;
11348         if (curbufleft(d) >= 2) {
11349           uint16_t actual;
11350           memcpy(&actual, d->ptr, 2);
11351           if (expected == actual) {
11352             advance(d, 2);
11353           } else {
11354             goto badtag;
11355           }
11356         } else {
11357           int32_t result = upb_pbdecoder_checktag_slow(d, expected);
11358           if (result == DECODE_MISMATCH) goto badtag;
11359           if (result >= 0) return result;
11360         }
11361       )
11362       VMCASE(OP_TAGN, {
11363         uint64_t expected;
11364         int32_t result;
11365         memcpy(&expected, d->pc, 8);
11366         d->pc += 2;
11367         result = upb_pbdecoder_checktag_slow(d, expected);
11368         if (result == DECODE_MISMATCH) goto badtag;
11369         if (result >= 0) return result;
11370       })
11371       VMCASE(OP_DISPATCH, {
11372         CHECK_RETURN(dispatch(d));
11373       })
11374       VMCASE(OP_HALT, {
11375         return d->size_param;
11376       })
11377     }
11378   }
11379 }
11380 
11381 
11382 /* BytesHandler handlers ******************************************************/
11383 
upb_pbdecoder_startbc(void * closure,const void * pc,size_t size_hint)11384 void *upb_pbdecoder_startbc(void *closure, const void *pc, size_t size_hint) {
11385   upb_pbdecoder *d = closure;
11386   UPB_UNUSED(size_hint);
11387   d->top->end_ofs = UINT64_MAX;
11388   d->bufstart_ofs = 0;
11389   d->call_len = 1;
11390   d->callstack[0] = &halt;
11391   d->pc = pc;
11392   d->skip = 0;
11393   return d;
11394 }
11395 
upb_pbdecoder_startjit(void * closure,const void * hd,size_t size_hint)11396 void *upb_pbdecoder_startjit(void *closure, const void *hd, size_t size_hint) {
11397   upb_pbdecoder *d = closure;
11398   UPB_UNUSED(hd);
11399   UPB_UNUSED(size_hint);
11400   d->top->end_ofs = UINT64_MAX;
11401   d->bufstart_ofs = 0;
11402   d->call_len = 0;
11403   d->skip = 0;
11404   return d;
11405 }
11406 
upb_pbdecoder_end(void * closure,const void * handler_data)11407 bool upb_pbdecoder_end(void *closure, const void *handler_data) {
11408   upb_pbdecoder *d = closure;
11409   const upb_pbdecodermethod *method = handler_data;
11410   uint64_t end;
11411   char dummy;
11412 
11413   if (d->residual_end > d->residual) {
11414     seterr(d, "Unexpected EOF: decoder still has buffered unparsed data");
11415     return false;
11416   }
11417 
11418   if (d->skip) {
11419     seterr(d, "Unexpected EOF inside skipped data");
11420     return false;
11421   }
11422 
11423   if (d->top->end_ofs != UINT64_MAX) {
11424     seterr(d, "Unexpected EOF inside delimited string");
11425     return false;
11426   }
11427 
11428   /* The user's end() call indicates that the message ends here. */
11429   end = offset(d);
11430   d->top->end_ofs = end;
11431 
11432 #ifdef UPB_USE_JIT_X64
11433   if (method->is_native_) {
11434     const mgroup *group = (const mgroup*)method->group;
11435     if (d->top != d->stack)
11436       d->stack->end_ofs = 0;
11437     group->jit_code(closure, method->code_base.ptr, &dummy, 0, NULL);
11438   } else
11439 #endif
11440   {
11441     const uint32_t *p = d->pc;
11442     d->stack->end_ofs = end;
11443     /* Check the previous bytecode, but guard against beginning. */
11444     if (p != method->code_base.ptr) p--;
11445     if (getop(*p) == OP_CHECKDELIM) {
11446       /* Rewind from OP_TAG* to OP_CHECKDELIM. */
11447       UPB_ASSERT(getop(*d->pc) == OP_TAG1 ||
11448              getop(*d->pc) == OP_TAG2 ||
11449              getop(*d->pc) == OP_TAGN ||
11450              getop(*d->pc) == OP_DISPATCH);
11451       d->pc = p;
11452     }
11453     upb_pbdecoder_decode(closure, handler_data, &dummy, 0, NULL);
11454   }
11455 
11456   if (d->call_len != 0) {
11457     seterr(d, "Unexpected EOF inside submessage or group");
11458     return false;
11459   }
11460 
11461   return true;
11462 }
11463 
upb_pbdecoder_decode(void * decoder,const void * group,const char * buf,size_t size,const upb_bufhandle * handle)11464 size_t upb_pbdecoder_decode(void *decoder, const void *group, const char *buf,
11465                             size_t size, const upb_bufhandle *handle) {
11466   int32_t result = upb_pbdecoder_resume(decoder, NULL, buf, size, handle);
11467 
11468   if (result == DECODE_ENDGROUP) goto_endmsg(decoder);
11469   CHECK_RETURN(result);
11470 
11471   return run_decoder_vm(decoder, group, handle);
11472 }
11473 
11474 
11475 /* Public API *****************************************************************/
11476 
upb_pbdecoder_reset(upb_pbdecoder * d)11477 void upb_pbdecoder_reset(upb_pbdecoder *d) {
11478   d->top = d->stack;
11479   d->top->groupnum = 0;
11480   d->ptr = d->residual;
11481   d->buf = d->residual;
11482   d->end = d->residual;
11483   d->residual_end = d->residual;
11484 }
11485 
upb_pbdecoder_create(upb_env * e,const upb_pbdecodermethod * m,upb_sink * sink)11486 upb_pbdecoder *upb_pbdecoder_create(upb_env *e, const upb_pbdecodermethod *m,
11487                                     upb_sink *sink) {
11488   const size_t default_max_nesting = 64;
11489 #ifndef NDEBUG
11490   size_t size_before = upb_env_bytesallocated(e);
11491 #endif
11492 
11493   upb_pbdecoder *d = upb_env_malloc(e, sizeof(upb_pbdecoder));
11494   if (!d) return NULL;
11495 
11496   d->method_ = m;
11497   d->callstack = upb_env_malloc(e, callstacksize(d, default_max_nesting));
11498   d->stack = upb_env_malloc(e, stacksize(d, default_max_nesting));
11499   if (!d->stack || !d->callstack) {
11500     return NULL;
11501   }
11502 
11503   d->env = e;
11504   d->limit = d->stack + default_max_nesting - 1;
11505   d->stack_size = default_max_nesting;
11506   d->status = NULL;
11507 
11508   upb_pbdecoder_reset(d);
11509   upb_bytessink_reset(&d->input_, &m->input_handler_, d);
11510 
11511   UPB_ASSERT(sink);
11512   if (d->method_->dest_handlers_) {
11513     if (sink->handlers != d->method_->dest_handlers_)
11514       return NULL;
11515   }
11516   upb_sink_reset(&d->top->sink, sink->handlers, sink->closure);
11517 
11518   /* If this fails, increase the value in decoder.h. */
11519   UPB_ASSERT_DEBUGVAR(upb_env_bytesallocated(e) - size_before <=
11520                       UPB_PB_DECODER_SIZE);
11521   return d;
11522 }
11523 
upb_pbdecoder_bytesparsed(const upb_pbdecoder * d)11524 uint64_t upb_pbdecoder_bytesparsed(const upb_pbdecoder *d) {
11525   return offset(d);
11526 }
11527 
upb_pbdecoder_method(const upb_pbdecoder * d)11528 const upb_pbdecodermethod *upb_pbdecoder_method(const upb_pbdecoder *d) {
11529   return d->method_;
11530 }
11531 
upb_pbdecoder_input(upb_pbdecoder * d)11532 upb_bytessink *upb_pbdecoder_input(upb_pbdecoder *d) {
11533   return &d->input_;
11534 }
11535 
upb_pbdecoder_maxnesting(const upb_pbdecoder * d)11536 size_t upb_pbdecoder_maxnesting(const upb_pbdecoder *d) {
11537   return d->stack_size;
11538 }
11539 
upb_pbdecoder_setmaxnesting(upb_pbdecoder * d,size_t max)11540 bool upb_pbdecoder_setmaxnesting(upb_pbdecoder *d, size_t max) {
11541   UPB_ASSERT(d->top >= d->stack);
11542 
11543   if (max < (size_t)(d->top - d->stack)) {
11544     /* Can't set a limit smaller than what we are currently at. */
11545     return false;
11546   }
11547 
11548   if (max > d->stack_size) {
11549     /* Need to reallocate stack and callstack to accommodate. */
11550     size_t old_size = stacksize(d, d->stack_size);
11551     size_t new_size = stacksize(d, max);
11552     void *p = upb_env_realloc(d->env, d->stack, old_size, new_size);
11553     if (!p) {
11554       return false;
11555     }
11556     d->stack = p;
11557 
11558     old_size = callstacksize(d, d->stack_size);
11559     new_size = callstacksize(d, max);
11560     p = upb_env_realloc(d->env, d->callstack, old_size, new_size);
11561     if (!p) {
11562       return false;
11563     }
11564     d->callstack = p;
11565 
11566     d->stack_size = max;
11567   }
11568 
11569   d->limit = d->stack + max - 1;
11570   return true;
11571 }
11572 /*
11573 ** upb::Encoder
11574 **
11575 ** Since we are implementing pure handlers (ie. without any out-of-band access
11576 ** to pre-computed lengths), we have to buffer all submessages before we can
11577 ** emit even their first byte.
11578 **
11579 ** Not knowing the size of submessages also means we can't write a perfect
11580 ** zero-copy implementation, even with buffering.  Lengths are stored as
11581 ** varints, which means that we don't know how many bytes to reserve for the
11582 ** length until we know what the length is.
11583 **
11584 ** This leaves us with three main choices:
11585 **
11586 ** 1. buffer all submessage data in a temporary buffer, then copy it exactly
11587 **    once into the output buffer.
11588 **
11589 ** 2. attempt to buffer data directly into the output buffer, estimating how
11590 **    many bytes each length will take.  When our guesses are wrong, use
11591 **    memmove() to grow or shrink the allotted space.
11592 **
11593 ** 3. buffer directly into the output buffer, allocating a max length
11594 **    ahead-of-time for each submessage length.  If we overallocated, we waste
11595 **    space, but no memcpy() or memmove() is required.  This approach requires
11596 **    defining a maximum size for submessages and rejecting submessages that
11597 **    exceed that size.
11598 **
11599 ** (2) and (3) have the potential to have better performance, but they are more
11600 ** complicated and subtle to implement:
11601 **
11602 **   (3) requires making an arbitrary choice of the maximum message size; it
11603 **       wastes space when submessages are shorter than this and fails
11604 **       completely when they are longer.  This makes it more finicky and
11605 **       requires configuration based on the input.  It also makes it impossible
11606 **       to perfectly match the output of reference encoders that always use the
11607 **       optimal amount of space for each length.
11608 **
11609 **   (2) requires guessing the the size upfront, and if multiple lengths are
11610 **       guessed wrong the minimum required number of memmove() operations may
11611 **       be complicated to compute correctly.  Implemented properly, it may have
11612 **       a useful amortized or average cost, but more investigation is required
11613 **       to determine this and what the optimal algorithm is to achieve it.
11614 **
11615 **   (1) makes you always pay for exactly one copy, but its implementation is
11616 **       the simplest and its performance is predictable.
11617 **
11618 ** So for now, we implement (1) only.  If we wish to optimize later, we should
11619 ** be able to do it without affecting users.
11620 **
11621 ** The strategy is to buffer the segments of data that do *not* depend on
11622 ** unknown lengths in one buffer, and keep a separate buffer of segment pointers
11623 ** and lengths.  When the top-level submessage ends, we can go beginning to end,
11624 ** alternating the writing of lengths with memcpy() of the rest of the data.
11625 ** At the top level though, no buffering is required.
11626 */
11627 
11628 
11629 
11630 /* The output buffer is divided into segments; a segment is a string of data
11631  * that is "ready to go" -- it does not need any varint lengths inserted into
11632  * the middle.  The seams between segments are where varints will be inserted
11633  * once they are known.
11634  *
11635  * We also use the concept of a "run", which is a range of encoded bytes that
11636  * occur at a single submessage level.  Every segment contains one or more runs.
11637  *
11638  * A segment can span messages.  Consider:
11639  *
11640  *                  .--Submessage lengths---------.
11641  *                  |       |                     |
11642  *                  |       V                     V
11643  *                  V      | |---------------    | |-----------------
11644  * Submessages:    | |-----------------------------------------------
11645  * Top-level msg: ------------------------------------------------------------
11646  *
11647  * Segments:          -----   -------------------   -----------------
11648  * Runs:              *----   *--------------*---   *----------------
11649  * (* marks the start)
11650  *
11651  * Note that the top-level menssage is not in any segment because it does not
11652  * have any length preceding it.
11653  *
11654  * A segment is only interrupted when another length needs to be inserted.  So
11655  * observe how the second segment spans both the inner submessage and part of
11656  * the next enclosing message. */
11657 typedef struct {
11658   uint32_t msglen;  /* The length to varint-encode before this segment. */
11659   uint32_t seglen;  /* Length of the segment. */
11660 } upb_pb_encoder_segment;
11661 
11662 struct upb_pb_encoder {
11663   upb_env *env;
11664 
11665   /* Our input and output. */
11666   upb_sink input_;
11667   upb_bytessink *output_;
11668 
11669   /* The "subclosure" -- used as the inner closure as part of the bytessink
11670    * protocol. */
11671   void *subc;
11672 
11673   /* The output buffer and limit, and our current write position.  "buf"
11674    * initially points to "initbuf", but is dynamically allocated if we need to
11675    * grow beyond the initial size. */
11676   char *buf, *ptr, *limit;
11677 
11678   /* The beginning of the current run, or undefined if we are at the top
11679    * level. */
11680   char *runbegin;
11681 
11682   /* The list of segments we are accumulating. */
11683   upb_pb_encoder_segment *segbuf, *segptr, *seglimit;
11684 
11685   /* The stack of enclosing submessages.  Each entry in the stack points to the
11686    * segment where this submessage's length is being accumulated. */
11687   int *stack, *top, *stacklimit;
11688 
11689   /* Depth of startmsg/endmsg calls. */
11690   int depth;
11691 };
11692 
11693 /* low-level buffering ********************************************************/
11694 
11695 /* Low-level functions for interacting with the output buffer. */
11696 
11697 /* TODO(haberman): handle pushback */
putbuf(upb_pb_encoder * e,const char * buf,size_t len)11698 static void putbuf(upb_pb_encoder *e, const char *buf, size_t len) {
11699   size_t n = upb_bytessink_putbuf(e->output_, e->subc, buf, len, NULL);
11700   UPB_ASSERT(n == len);
11701 }
11702 
top(upb_pb_encoder * e)11703 static upb_pb_encoder_segment *top(upb_pb_encoder *e) {
11704   return &e->segbuf[*e->top];
11705 }
11706 
11707 /* Call to ensure that at least "bytes" bytes are available for writing at
11708  * e->ptr.  Returns false if the bytes could not be allocated. */
reserve(upb_pb_encoder * e,size_t bytes)11709 static bool reserve(upb_pb_encoder *e, size_t bytes) {
11710   if ((size_t)(e->limit - e->ptr) < bytes) {
11711     /* Grow buffer. */
11712     char *new_buf;
11713     size_t needed = bytes + (e->ptr - e->buf);
11714     size_t old_size = e->limit - e->buf;
11715 
11716     size_t new_size = old_size;
11717 
11718     while (new_size < needed) {
11719       new_size *= 2;
11720     }
11721 
11722     new_buf = upb_env_realloc(e->env, e->buf, old_size, new_size);
11723 
11724     if (new_buf == NULL) {
11725       return false;
11726     }
11727 
11728     e->ptr = new_buf + (e->ptr - e->buf);
11729     e->runbegin = new_buf + (e->runbegin - e->buf);
11730     e->limit = new_buf + new_size;
11731     e->buf = new_buf;
11732   }
11733 
11734   return true;
11735 }
11736 
11737 /* Call when "bytes" bytes have been writte at e->ptr.  The caller *must* have
11738  * previously called reserve() with at least this many bytes. */
encoder_advance(upb_pb_encoder * e,size_t bytes)11739 static void encoder_advance(upb_pb_encoder *e, size_t bytes) {
11740   UPB_ASSERT((size_t)(e->limit - e->ptr) >= bytes);
11741   e->ptr += bytes;
11742 }
11743 
11744 /* Call when all of the bytes for a handler have been written.  Flushes the
11745  * bytes if possible and necessary, returning false if this failed. */
commit(upb_pb_encoder * e)11746 static bool commit(upb_pb_encoder *e) {
11747   if (!e->top) {
11748     /* We aren't inside a delimited region.  Flush our accumulated bytes to
11749      * the output.
11750      *
11751      * TODO(haberman): in the future we may want to delay flushing for
11752      * efficiency reasons. */
11753     putbuf(e, e->buf, e->ptr - e->buf);
11754     e->ptr = e->buf;
11755   }
11756 
11757   return true;
11758 }
11759 
11760 /* Writes the given bytes to the buffer, handling reserve/advance. */
encode_bytes(upb_pb_encoder * e,const void * data,size_t len)11761 static bool encode_bytes(upb_pb_encoder *e, const void *data, size_t len) {
11762   if (!reserve(e, len)) {
11763     return false;
11764   }
11765 
11766   memcpy(e->ptr, data, len);
11767   encoder_advance(e, len);
11768   return true;
11769 }
11770 
11771 /* Finish the current run by adding the run totals to the segment and message
11772  * length. */
accumulate(upb_pb_encoder * e)11773 static void accumulate(upb_pb_encoder *e) {
11774   size_t run_len;
11775   UPB_ASSERT(e->ptr >= e->runbegin);
11776   run_len = e->ptr - e->runbegin;
11777   e->segptr->seglen += run_len;
11778   top(e)->msglen += run_len;
11779   e->runbegin = e->ptr;
11780 }
11781 
11782 /* Call to indicate the start of delimited region for which the full length is
11783  * not yet known.  All data will be buffered until the length is known.
11784  * Delimited regions may be nested; their lengths will all be tracked properly. */
start_delim(upb_pb_encoder * e)11785 static bool start_delim(upb_pb_encoder *e) {
11786   if (e->top) {
11787     /* We are already buffering, advance to the next segment and push it on the
11788      * stack. */
11789     accumulate(e);
11790 
11791     if (++e->top == e->stacklimit) {
11792       /* TODO(haberman): grow stack? */
11793       return false;
11794     }
11795 
11796     if (++e->segptr == e->seglimit) {
11797       /* Grow segment buffer. */
11798       size_t old_size =
11799           (e->seglimit - e->segbuf) * sizeof(upb_pb_encoder_segment);
11800       size_t new_size = old_size * 2;
11801       upb_pb_encoder_segment *new_buf =
11802           upb_env_realloc(e->env, e->segbuf, old_size, new_size);
11803 
11804       if (new_buf == NULL) {
11805         return false;
11806       }
11807 
11808       e->segptr = new_buf + (e->segptr - e->segbuf);
11809       e->seglimit = new_buf + (new_size / sizeof(upb_pb_encoder_segment));
11810       e->segbuf = new_buf;
11811     }
11812   } else {
11813     /* We were previously at the top level, start buffering. */
11814     e->segptr = e->segbuf;
11815     e->top = e->stack;
11816     e->runbegin = e->ptr;
11817   }
11818 
11819   *e->top = e->segptr - e->segbuf;
11820   e->segptr->seglen = 0;
11821   e->segptr->msglen = 0;
11822 
11823   return true;
11824 }
11825 
11826 /* Call to indicate the end of a delimited region.  We now know the length of
11827  * the delimited region.  If we are not nested inside any other delimited
11828  * regions, we can now emit all of the buffered data we accumulated. */
end_delim(upb_pb_encoder * e)11829 static bool end_delim(upb_pb_encoder *e) {
11830   size_t msglen;
11831   accumulate(e);
11832   msglen = top(e)->msglen;
11833 
11834   if (e->top == e->stack) {
11835     /* All lengths are now available, emit all buffered data. */
11836     char buf[UPB_PB_VARINT_MAX_LEN];
11837     upb_pb_encoder_segment *s;
11838     const char *ptr = e->buf;
11839     for (s = e->segbuf; s <= e->segptr; s++) {
11840       size_t lenbytes = upb_vencode64(s->msglen, buf);
11841       putbuf(e, buf, lenbytes);
11842       putbuf(e, ptr, s->seglen);
11843       ptr += s->seglen;
11844     }
11845 
11846     e->ptr = e->buf;
11847     e->top = NULL;
11848   } else {
11849     /* Need to keep buffering; propagate length info into enclosing
11850      * submessages. */
11851     --e->top;
11852     top(e)->msglen += msglen + upb_varint_size(msglen);
11853   }
11854 
11855   return true;
11856 }
11857 
11858 
11859 /* tag_t **********************************************************************/
11860 
11861 /* A precomputed (pre-encoded) tag and length. */
11862 
11863 typedef struct {
11864   uint8_t bytes;
11865   char tag[7];
11866 } tag_t;
11867 
11868 /* Allocates a new tag for this field, and sets it in these handlerattr. */
new_tag(upb_handlers * h,const upb_fielddef * f,upb_wiretype_t wt,upb_handlerattr * attr)11869 static void new_tag(upb_handlers *h, const upb_fielddef *f, upb_wiretype_t wt,
11870                     upb_handlerattr *attr) {
11871   uint32_t n = upb_fielddef_number(f);
11872 
11873   tag_t *tag = upb_gmalloc(sizeof(tag_t));
11874   tag->bytes = upb_vencode64((n << 3) | wt, tag->tag);
11875 
11876   upb_handlerattr_init(attr);
11877   upb_handlerattr_sethandlerdata(attr, tag);
11878   upb_handlers_addcleanup(h, tag, upb_gfree);
11879 }
11880 
encode_tag(upb_pb_encoder * e,const tag_t * tag)11881 static bool encode_tag(upb_pb_encoder *e, const tag_t *tag) {
11882   return encode_bytes(e, tag->tag, tag->bytes);
11883 }
11884 
11885 
11886 /* encoding of wire types *****************************************************/
11887 
encode_fixed64(upb_pb_encoder * e,uint64_t val)11888 static bool encode_fixed64(upb_pb_encoder *e, uint64_t val) {
11889   /* TODO(haberman): byte-swap for big endian. */
11890   return encode_bytes(e, &val, sizeof(uint64_t));
11891 }
11892 
encode_fixed32(upb_pb_encoder * e,uint32_t val)11893 static bool encode_fixed32(upb_pb_encoder *e, uint32_t val) {
11894   /* TODO(haberman): byte-swap for big endian. */
11895   return encode_bytes(e, &val, sizeof(uint32_t));
11896 }
11897 
encode_varint(upb_pb_encoder * e,uint64_t val)11898 static bool encode_varint(upb_pb_encoder *e, uint64_t val) {
11899   if (!reserve(e, UPB_PB_VARINT_MAX_LEN)) {
11900     return false;
11901   }
11902 
11903   encoder_advance(e, upb_vencode64(val, e->ptr));
11904   return true;
11905 }
11906 
dbl2uint64(double d)11907 static uint64_t dbl2uint64(double d) {
11908   uint64_t ret;
11909   memcpy(&ret, &d, sizeof(uint64_t));
11910   return ret;
11911 }
11912 
flt2uint32(float d)11913 static uint32_t flt2uint32(float d) {
11914   uint32_t ret;
11915   memcpy(&ret, &d, sizeof(uint32_t));
11916   return ret;
11917 }
11918 
11919 
11920 /* encoding of proto types ****************************************************/
11921 
startmsg(void * c,const void * hd)11922 static bool startmsg(void *c, const void *hd) {
11923   upb_pb_encoder *e = c;
11924   UPB_UNUSED(hd);
11925   if (e->depth++ == 0) {
11926     upb_bytessink_start(e->output_, 0, &e->subc);
11927   }
11928   return true;
11929 }
11930 
endmsg(void * c,const void * hd,upb_status * status)11931 static bool endmsg(void *c, const void *hd, upb_status *status) {
11932   upb_pb_encoder *e = c;
11933   UPB_UNUSED(hd);
11934   UPB_UNUSED(status);
11935   if (--e->depth == 0) {
11936     upb_bytessink_end(e->output_);
11937   }
11938   return true;
11939 }
11940 
encode_startdelimfield(void * c,const void * hd)11941 static void *encode_startdelimfield(void *c, const void *hd) {
11942   bool ok = encode_tag(c, hd) && commit(c) && start_delim(c);
11943   return ok ? c : UPB_BREAK;
11944 }
11945 
encode_unknown(void * c,const void * hd,const char * buf,size_t len)11946 static bool encode_unknown(void *c, const void *hd, const char *buf,
11947                            size_t len) {
11948   UPB_UNUSED(hd);
11949   return encode_bytes(c, buf, len) && commit(c);
11950 }
11951 
encode_enddelimfield(void * c,const void * hd)11952 static bool encode_enddelimfield(void *c, const void *hd) {
11953   UPB_UNUSED(hd);
11954   return end_delim(c);
11955 }
11956 
encode_startgroup(void * c,const void * hd)11957 static void *encode_startgroup(void *c, const void *hd) {
11958   return (encode_tag(c, hd) && commit(c)) ? c : UPB_BREAK;
11959 }
11960 
encode_endgroup(void * c,const void * hd)11961 static bool encode_endgroup(void *c, const void *hd) {
11962   return encode_tag(c, hd) && commit(c);
11963 }
11964 
encode_startstr(void * c,const void * hd,size_t size_hint)11965 static void *encode_startstr(void *c, const void *hd, size_t size_hint) {
11966   UPB_UNUSED(size_hint);
11967   return encode_startdelimfield(c, hd);
11968 }
11969 
encode_strbuf(void * c,const void * hd,const char * buf,size_t len,const upb_bufhandle * h)11970 static size_t encode_strbuf(void *c, const void *hd, const char *buf,
11971                             size_t len, const upb_bufhandle *h) {
11972   UPB_UNUSED(hd);
11973   UPB_UNUSED(h);
11974   return encode_bytes(c, buf, len) ? len : 0;
11975 }
11976 
11977 #define T(type, ctype, convert, encode)                                  \
11978   static bool encode_scalar_##type(void *e, const void *hd, ctype val) { \
11979     return encode_tag(e, hd) && encode(e, (convert)(val)) && commit(e);  \
11980   }                                                                      \
11981   static bool encode_packed_##type(void *e, const void *hd, ctype val) { \
11982     UPB_UNUSED(hd);                                                      \
11983     return encode(e, (convert)(val));                                    \
11984   }
11985 
T(double,double,dbl2uint64,encode_fixed64)11986 T(double,   double,   dbl2uint64,   encode_fixed64)
11987 T(float,    float,    flt2uint32,   encode_fixed32)
11988 T(int64,    int64_t,  uint64_t,     encode_varint)
11989 T(int32,    int32_t,  int64_t,      encode_varint)
11990 T(fixed64,  uint64_t, uint64_t,     encode_fixed64)
11991 T(fixed32,  uint32_t, uint32_t,     encode_fixed32)
11992 T(bool,     bool,     bool,         encode_varint)
11993 T(uint32,   uint32_t, uint32_t,     encode_varint)
11994 T(uint64,   uint64_t, uint64_t,     encode_varint)
11995 T(enum,     int32_t,  uint32_t,     encode_varint)
11996 T(sfixed32, int32_t,  uint32_t,     encode_fixed32)
11997 T(sfixed64, int64_t,  uint64_t,     encode_fixed64)
11998 T(sint32,   int32_t,  upb_zzenc_32, encode_varint)
11999 T(sint64,   int64_t,  upb_zzenc_64, encode_varint)
12000 
12001 #undef T
12002 
12003 
12004 /* code to build the handlers *************************************************/
12005 
12006 static void newhandlers_callback(const void *closure, upb_handlers *h) {
12007   const upb_msgdef *m;
12008   upb_msg_field_iter i;
12009 
12010   UPB_UNUSED(closure);
12011 
12012   upb_handlers_setstartmsg(h, startmsg, NULL);
12013   upb_handlers_setendmsg(h, endmsg, NULL);
12014   upb_handlers_setunknown(h, encode_unknown, NULL);
12015 
12016   m = upb_handlers_msgdef(h);
12017   for(upb_msg_field_begin(&i, m);
12018       !upb_msg_field_done(&i);
12019       upb_msg_field_next(&i)) {
12020     const upb_fielddef *f = upb_msg_iter_field(&i);
12021     bool packed = upb_fielddef_isseq(f) && upb_fielddef_isprimitive(f) &&
12022                   upb_fielddef_packed(f);
12023     upb_handlerattr attr;
12024     upb_wiretype_t wt =
12025         packed ? UPB_WIRE_TYPE_DELIMITED
12026                : upb_pb_native_wire_types[upb_fielddef_descriptortype(f)];
12027 
12028     /* Pre-encode the tag for this field. */
12029     new_tag(h, f, wt, &attr);
12030 
12031     if (packed) {
12032       upb_handlers_setstartseq(h, f, encode_startdelimfield, &attr);
12033       upb_handlers_setendseq(h, f, encode_enddelimfield, &attr);
12034     }
12035 
12036 #define T(upper, lower, upbtype)                                     \
12037   case UPB_DESCRIPTOR_TYPE_##upper:                                  \
12038     if (packed) {                                                    \
12039       upb_handlers_set##upbtype(h, f, encode_packed_##lower, &attr); \
12040     } else {                                                         \
12041       upb_handlers_set##upbtype(h, f, encode_scalar_##lower, &attr); \
12042     }                                                                \
12043     break;
12044 
12045     switch (upb_fielddef_descriptortype(f)) {
12046       T(DOUBLE,   double,   double);
12047       T(FLOAT,    float,    float);
12048       T(INT64,    int64,    int64);
12049       T(INT32,    int32,    int32);
12050       T(FIXED64,  fixed64,  uint64);
12051       T(FIXED32,  fixed32,  uint32);
12052       T(BOOL,     bool,     bool);
12053       T(UINT32,   uint32,   uint32);
12054       T(UINT64,   uint64,   uint64);
12055       T(ENUM,     enum,     int32);
12056       T(SFIXED32, sfixed32, int32);
12057       T(SFIXED64, sfixed64, int64);
12058       T(SINT32,   sint32,   int32);
12059       T(SINT64,   sint64,   int64);
12060       case UPB_DESCRIPTOR_TYPE_STRING:
12061       case UPB_DESCRIPTOR_TYPE_BYTES:
12062         upb_handlers_setstartstr(h, f, encode_startstr, &attr);
12063         upb_handlers_setendstr(h, f, encode_enddelimfield, &attr);
12064         upb_handlers_setstring(h, f, encode_strbuf, &attr);
12065         break;
12066       case UPB_DESCRIPTOR_TYPE_MESSAGE:
12067         upb_handlers_setstartsubmsg(h, f, encode_startdelimfield, &attr);
12068         upb_handlers_setendsubmsg(h, f, encode_enddelimfield, &attr);
12069         break;
12070       case UPB_DESCRIPTOR_TYPE_GROUP: {
12071         /* Endgroup takes a different tag (wire_type = END_GROUP). */
12072         upb_handlerattr attr2;
12073         new_tag(h, f, UPB_WIRE_TYPE_END_GROUP, &attr2);
12074 
12075         upb_handlers_setstartsubmsg(h, f, encode_startgroup, &attr);
12076         upb_handlers_setendsubmsg(h, f, encode_endgroup, &attr2);
12077 
12078         upb_handlerattr_uninit(&attr2);
12079         break;
12080       }
12081     }
12082 
12083 #undef T
12084 
12085     upb_handlerattr_uninit(&attr);
12086   }
12087 }
12088 
upb_pb_encoder_reset(upb_pb_encoder * e)12089 void upb_pb_encoder_reset(upb_pb_encoder *e) {
12090   e->segptr = NULL;
12091   e->top = NULL;
12092   e->depth = 0;
12093 }
12094 
12095 
12096 /* public API *****************************************************************/
12097 
upb_pb_encoder_newhandlers(const upb_msgdef * m,const void * owner)12098 const upb_handlers *upb_pb_encoder_newhandlers(const upb_msgdef *m,
12099                                                const void *owner) {
12100   return upb_handlers_newfrozen(m, owner, newhandlers_callback, NULL);
12101 }
12102 
upb_pb_encoder_create(upb_env * env,const upb_handlers * h,upb_bytessink * output)12103 upb_pb_encoder *upb_pb_encoder_create(upb_env *env, const upb_handlers *h,
12104                                       upb_bytessink *output) {
12105   const size_t initial_bufsize = 256;
12106   const size_t initial_segbufsize = 16;
12107   /* TODO(haberman): make this configurable. */
12108   const size_t stack_size = 64;
12109 #ifndef NDEBUG
12110   const size_t size_before = upb_env_bytesallocated(env);
12111 #endif
12112 
12113   upb_pb_encoder *e = upb_env_malloc(env, sizeof(upb_pb_encoder));
12114   if (!e) return NULL;
12115 
12116   e->buf = upb_env_malloc(env, initial_bufsize);
12117   e->segbuf = upb_env_malloc(env, initial_segbufsize * sizeof(*e->segbuf));
12118   e->stack = upb_env_malloc(env, stack_size * sizeof(*e->stack));
12119 
12120   if (!e->buf || !e->segbuf || !e->stack) {
12121     return NULL;
12122   }
12123 
12124   e->limit = e->buf + initial_bufsize;
12125   e->seglimit = e->segbuf + initial_segbufsize;
12126   e->stacklimit = e->stack + stack_size;
12127 
12128   upb_pb_encoder_reset(e);
12129   upb_sink_reset(&e->input_, h, e);
12130 
12131   e->env = env;
12132   e->output_ = output;
12133   e->subc = output->closure;
12134   e->ptr = e->buf;
12135 
12136   /* If this fails, increase the value in encoder.h. */
12137   UPB_ASSERT_DEBUGVAR(upb_env_bytesallocated(env) - size_before <=
12138                       UPB_PB_ENCODER_SIZE);
12139   return e;
12140 }
12141 
upb_pb_encoder_input(upb_pb_encoder * e)12142 upb_sink *upb_pb_encoder_input(upb_pb_encoder *e) { return &e->input_; }
12143 
12144 
12145 
upb_loaddescriptor(const char * buf,size_t n,const void * owner,upb_status * status)12146 upb_filedef **upb_loaddescriptor(const char *buf, size_t n, const void *owner,
12147                                  upb_status *status) {
12148   /* Create handlers. */
12149   const upb_pbdecodermethod *decoder_m;
12150   const upb_handlers *reader_h = upb_descreader_newhandlers(&reader_h);
12151   upb_env env;
12152   upb_pbdecodermethodopts opts;
12153   upb_pbdecoder *decoder;
12154   upb_descreader *reader;
12155   bool ok;
12156   size_t i;
12157   upb_filedef **ret = NULL;
12158 
12159   upb_pbdecodermethodopts_init(&opts, reader_h);
12160   decoder_m = upb_pbdecodermethod_new(&opts, &decoder_m);
12161 
12162   upb_env_init(&env);
12163   upb_env_reporterrorsto(&env, status);
12164 
12165   reader = upb_descreader_create(&env, reader_h);
12166   decoder = upb_pbdecoder_create(&env, decoder_m, upb_descreader_input(reader));
12167 
12168   /* Push input data. */
12169   ok = upb_bufsrc_putbuf(buf, n, upb_pbdecoder_input(decoder));
12170 
12171   if (!ok) {
12172     goto cleanup;
12173   }
12174 
12175   ret = upb_gmalloc(sizeof (*ret) * (upb_descreader_filecount(reader) + 1));
12176 
12177   if (!ret) {
12178     goto cleanup;
12179   }
12180 
12181   for (i = 0; i < upb_descreader_filecount(reader); i++) {
12182     ret[i] = upb_descreader_file(reader, i);
12183     upb_filedef_ref(ret[i], owner);
12184   }
12185 
12186   ret[i] = NULL;
12187 
12188 cleanup:
12189   upb_env_uninit(&env);
12190   upb_handlers_unref(reader_h, &reader_h);
12191   upb_pbdecodermethod_unref(decoder_m, &decoder_m);
12192   return ret;
12193 }
12194 /*
12195  * upb::pb::TextPrinter
12196  *
12197  * OPT: This is not optimized at all.  It uses printf() which parses the format
12198  * string every time, and it allocates memory for every put.
12199  */
12200 
12201 
12202 #include <ctype.h>
12203 #include <float.h>
12204 #include <inttypes.h>
12205 #include <stdarg.h>
12206 #include <stdio.h>
12207 #include <string.h>
12208 
12209 
12210 struct upb_textprinter {
12211   upb_sink input_;
12212   upb_bytessink *output_;
12213   int indent_depth_;
12214   bool single_line_;
12215   void *subc;
12216 };
12217 
12218 #define CHECK(x) if ((x) < 0) goto err;
12219 
shortname(const char * longname)12220 static const char *shortname(const char *longname) {
12221   const char *last = strrchr(longname, '.');
12222   return last ? last + 1 : longname;
12223 }
12224 
indent(upb_textprinter * p)12225 static int indent(upb_textprinter *p) {
12226   int i;
12227   if (!p->single_line_)
12228     for (i = 0; i < p->indent_depth_; i++)
12229       upb_bytessink_putbuf(p->output_, p->subc, "  ", 2, NULL);
12230   return 0;
12231 }
12232 
endfield(upb_textprinter * p)12233 static int endfield(upb_textprinter *p) {
12234   const char ch = (p->single_line_ ? ' ' : '\n');
12235   upb_bytessink_putbuf(p->output_, p->subc, &ch, 1, NULL);
12236   return 0;
12237 }
12238 
putescaped(upb_textprinter * p,const char * buf,size_t len,bool preserve_utf8)12239 static int putescaped(upb_textprinter *p, const char *buf, size_t len,
12240                       bool preserve_utf8) {
12241   /* Based on CEscapeInternal() from Google's protobuf release. */
12242   char dstbuf[4096], *dst = dstbuf, *dstend = dstbuf + sizeof(dstbuf);
12243   const char *end = buf + len;
12244 
12245   /* I think hex is prettier and more useful, but proto2 uses octal; should
12246    * investigate whether it can parse hex also. */
12247   const bool use_hex = false;
12248   bool last_hex_escape = false; /* true if last output char was \xNN */
12249 
12250   for (; buf < end; buf++) {
12251     bool is_hex_escape;
12252 
12253     if (dstend - dst < 4) {
12254       upb_bytessink_putbuf(p->output_, p->subc, dstbuf, dst - dstbuf, NULL);
12255       dst = dstbuf;
12256     }
12257 
12258     is_hex_escape = false;
12259     switch (*buf) {
12260       case '\n': *(dst++) = '\\'; *(dst++) = 'n';  break;
12261       case '\r': *(dst++) = '\\'; *(dst++) = 'r';  break;
12262       case '\t': *(dst++) = '\\'; *(dst++) = 't';  break;
12263       case '\"': *(dst++) = '\\'; *(dst++) = '\"'; break;
12264       case '\'': *(dst++) = '\\'; *(dst++) = '\''; break;
12265       case '\\': *(dst++) = '\\'; *(dst++) = '\\'; break;
12266       default:
12267         /* Note that if we emit \xNN and the buf character after that is a hex
12268          * digit then that digit must be escaped too to prevent it being
12269          * interpreted as part of the character code by C. */
12270         if ((!preserve_utf8 || (uint8_t)*buf < 0x80) &&
12271             (!isprint(*buf) || (last_hex_escape && isxdigit(*buf)))) {
12272           sprintf(dst, (use_hex ? "\\x%02x" : "\\%03o"), (uint8_t)*buf);
12273           is_hex_escape = use_hex;
12274           dst += 4;
12275         } else {
12276           *(dst++) = *buf; break;
12277         }
12278     }
12279     last_hex_escape = is_hex_escape;
12280   }
12281   /* Flush remaining data. */
12282   upb_bytessink_putbuf(p->output_, p->subc, dstbuf, dst - dstbuf, NULL);
12283   return 0;
12284 }
12285 
putf(upb_textprinter * p,const char * fmt,...)12286 bool putf(upb_textprinter *p, const char *fmt, ...) {
12287   va_list args;
12288   va_list args_copy;
12289   char *str;
12290   int written;
12291   int len;
12292   bool ok;
12293 
12294   va_start(args, fmt);
12295 
12296   /* Run once to get the length of the string. */
12297   _upb_va_copy(args_copy, args);
12298   len = _upb_vsnprintf(NULL, 0, fmt, args_copy);
12299   va_end(args_copy);
12300 
12301   /* + 1 for NULL terminator (vsprintf() requires it even if we don't). */
12302   str = upb_gmalloc(len + 1);
12303   if (!str) return false;
12304   written = vsprintf(str, fmt, args);
12305   va_end(args);
12306   UPB_ASSERT(written == len);
12307 
12308   ok = upb_bytessink_putbuf(p->output_, p->subc, str, len, NULL);
12309   upb_gfree(str);
12310   return ok;
12311 }
12312 
12313 
12314 /* handlers *******************************************************************/
12315 
textprinter_startmsg(void * c,const void * hd)12316 static bool textprinter_startmsg(void *c, const void *hd) {
12317   upb_textprinter *p = c;
12318   UPB_UNUSED(hd);
12319   if (p->indent_depth_ == 0) {
12320     upb_bytessink_start(p->output_, 0, &p->subc);
12321   }
12322   return true;
12323 }
12324 
textprinter_endmsg(void * c,const void * hd,upb_status * s)12325 static bool textprinter_endmsg(void *c, const void *hd, upb_status *s) {
12326   upb_textprinter *p = c;
12327   UPB_UNUSED(hd);
12328   UPB_UNUSED(s);
12329   if (p->indent_depth_ == 0) {
12330     upb_bytessink_end(p->output_);
12331   }
12332   return true;
12333 }
12334 
12335 #define TYPE(name, ctype, fmt) \
12336   static bool textprinter_put ## name(void *closure, const void *handler_data, \
12337                                       ctype val) {                             \
12338     upb_textprinter *p = closure;                                              \
12339     const upb_fielddef *f = handler_data;                                      \
12340     CHECK(indent(p));                                                          \
12341     putf(p, "%s: " fmt, upb_fielddef_name(f), val);                            \
12342     CHECK(endfield(p));                                                        \
12343     return true;                                                               \
12344   err:                                                                         \
12345     return false;                                                              \
12346 }
12347 
textprinter_putbool(void * closure,const void * handler_data,bool val)12348 static bool textprinter_putbool(void *closure, const void *handler_data,
12349                                 bool val) {
12350   upb_textprinter *p = closure;
12351   const upb_fielddef *f = handler_data;
12352   CHECK(indent(p));
12353   putf(p, "%s: %s", upb_fielddef_name(f), val ? "true" : "false");
12354   CHECK(endfield(p));
12355   return true;
12356 err:
12357   return false;
12358 }
12359 
12360 #define STRINGIFY_HELPER(x) #x
12361 #define STRINGIFY_MACROVAL(x) STRINGIFY_HELPER(x)
12362 
12363 TYPE(int32,  int32_t,  "%" PRId32)
12364 TYPE(int64,  int64_t,  "%" PRId64)
12365 TYPE(uint32, uint32_t, "%" PRIu32)
12366 TYPE(uint64, uint64_t, "%" PRIu64)
STRINGIFY_MACROVAL(FLT_DIG)12367 TYPE(float,  float,    "%." STRINGIFY_MACROVAL(FLT_DIG) "g")
12368 TYPE(double, double,   "%." STRINGIFY_MACROVAL(DBL_DIG) "g")
12369 
12370 #undef TYPE
12371 
12372 /* Output a symbolic value from the enum if found, else just print as int32. */
12373 static bool textprinter_putenum(void *closure, const void *handler_data,
12374                                 int32_t val) {
12375   upb_textprinter *p = closure;
12376   const upb_fielddef *f = handler_data;
12377   const upb_enumdef *enum_def = upb_downcast_enumdef(upb_fielddef_subdef(f));
12378   const char *label = upb_enumdef_iton(enum_def, val);
12379   if (label) {
12380     indent(p);
12381     putf(p, "%s: %s", upb_fielddef_name(f), label);
12382     endfield(p);
12383   } else {
12384     if (!textprinter_putint32(closure, handler_data, val))
12385       return false;
12386   }
12387   return true;
12388 }
12389 
textprinter_startstr(void * closure,const void * handler_data,size_t size_hint)12390 static void *textprinter_startstr(void *closure, const void *handler_data,
12391                       size_t size_hint) {
12392   upb_textprinter *p = closure;
12393   const upb_fielddef *f = handler_data;
12394   UPB_UNUSED(size_hint);
12395   indent(p);
12396   putf(p, "%s: \"", upb_fielddef_name(f));
12397   return p;
12398 }
12399 
textprinter_endstr(void * closure,const void * handler_data)12400 static bool textprinter_endstr(void *closure, const void *handler_data) {
12401   upb_textprinter *p = closure;
12402   UPB_UNUSED(handler_data);
12403   putf(p, "\"");
12404   endfield(p);
12405   return true;
12406 }
12407 
textprinter_putstr(void * closure,const void * hd,const char * buf,size_t len,const upb_bufhandle * handle)12408 static size_t textprinter_putstr(void *closure, const void *hd, const char *buf,
12409                                  size_t len, const upb_bufhandle *handle) {
12410   upb_textprinter *p = closure;
12411   const upb_fielddef *f = hd;
12412   UPB_UNUSED(handle);
12413   CHECK(putescaped(p, buf, len, upb_fielddef_type(f) == UPB_TYPE_STRING));
12414   return len;
12415 err:
12416   return 0;
12417 }
12418 
textprinter_startsubmsg(void * closure,const void * handler_data)12419 static void *textprinter_startsubmsg(void *closure, const void *handler_data) {
12420   upb_textprinter *p = closure;
12421   const char *name = handler_data;
12422   CHECK(indent(p));
12423   putf(p, "%s {%c", name, p->single_line_ ? ' ' : '\n');
12424   p->indent_depth_++;
12425   return p;
12426 err:
12427   return UPB_BREAK;
12428 }
12429 
textprinter_endsubmsg(void * closure,const void * handler_data)12430 static bool textprinter_endsubmsg(void *closure, const void *handler_data) {
12431   upb_textprinter *p = closure;
12432   UPB_UNUSED(handler_data);
12433   p->indent_depth_--;
12434   CHECK(indent(p));
12435   upb_bytessink_putbuf(p->output_, p->subc, "}", 1, NULL);
12436   CHECK(endfield(p));
12437   return true;
12438 err:
12439   return false;
12440 }
12441 
onmreg(const void * c,upb_handlers * h)12442 static void onmreg(const void *c, upb_handlers *h) {
12443   const upb_msgdef *m = upb_handlers_msgdef(h);
12444   upb_msg_field_iter i;
12445   UPB_UNUSED(c);
12446 
12447   upb_handlers_setstartmsg(h, textprinter_startmsg, NULL);
12448   upb_handlers_setendmsg(h, textprinter_endmsg, NULL);
12449 
12450   for(upb_msg_field_begin(&i, m);
12451       !upb_msg_field_done(&i);
12452       upb_msg_field_next(&i)) {
12453     upb_fielddef *f = upb_msg_iter_field(&i);
12454     upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER;
12455     upb_handlerattr_sethandlerdata(&attr, f);
12456     switch (upb_fielddef_type(f)) {
12457       case UPB_TYPE_INT32:
12458         upb_handlers_setint32(h, f, textprinter_putint32, &attr);
12459         break;
12460       case UPB_TYPE_INT64:
12461         upb_handlers_setint64(h, f, textprinter_putint64, &attr);
12462         break;
12463       case UPB_TYPE_UINT32:
12464         upb_handlers_setuint32(h, f, textprinter_putuint32, &attr);
12465         break;
12466       case UPB_TYPE_UINT64:
12467         upb_handlers_setuint64(h, f, textprinter_putuint64, &attr);
12468         break;
12469       case UPB_TYPE_FLOAT:
12470         upb_handlers_setfloat(h, f, textprinter_putfloat, &attr);
12471         break;
12472       case UPB_TYPE_DOUBLE:
12473         upb_handlers_setdouble(h, f, textprinter_putdouble, &attr);
12474         break;
12475       case UPB_TYPE_BOOL:
12476         upb_handlers_setbool(h, f, textprinter_putbool, &attr);
12477         break;
12478       case UPB_TYPE_STRING:
12479       case UPB_TYPE_BYTES:
12480         upb_handlers_setstartstr(h, f, textprinter_startstr, &attr);
12481         upb_handlers_setstring(h, f, textprinter_putstr, &attr);
12482         upb_handlers_setendstr(h, f, textprinter_endstr, &attr);
12483         break;
12484       case UPB_TYPE_MESSAGE: {
12485         const char *name =
12486             upb_fielddef_istagdelim(f)
12487                 ? shortname(upb_msgdef_fullname(upb_fielddef_msgsubdef(f)))
12488                 : upb_fielddef_name(f);
12489         upb_handlerattr_sethandlerdata(&attr, name);
12490         upb_handlers_setstartsubmsg(h, f, textprinter_startsubmsg, &attr);
12491         upb_handlers_setendsubmsg(h, f, textprinter_endsubmsg, &attr);
12492         break;
12493       }
12494       case UPB_TYPE_ENUM:
12495         upb_handlers_setint32(h, f, textprinter_putenum, &attr);
12496         break;
12497     }
12498   }
12499 }
12500 
textprinter_reset(upb_textprinter * p,bool single_line)12501 static void textprinter_reset(upb_textprinter *p, bool single_line) {
12502   p->single_line_ = single_line;
12503   p->indent_depth_ = 0;
12504 }
12505 
12506 
12507 /* Public API *****************************************************************/
12508 
upb_textprinter_create(upb_env * env,const upb_handlers * h,upb_bytessink * output)12509 upb_textprinter *upb_textprinter_create(upb_env *env, const upb_handlers *h,
12510                                         upb_bytessink *output) {
12511   upb_textprinter *p = upb_env_malloc(env, sizeof(upb_textprinter));
12512   if (!p) return NULL;
12513 
12514   p->output_ = output;
12515   upb_sink_reset(&p->input_, h, p);
12516   textprinter_reset(p, false);
12517 
12518   return p;
12519 }
12520 
upb_textprinter_newhandlers(const upb_msgdef * m,const void * owner)12521 const upb_handlers *upb_textprinter_newhandlers(const upb_msgdef *m,
12522                                                 const void *owner) {
12523   return upb_handlers_newfrozen(m, owner, &onmreg, NULL);
12524 }
12525 
upb_textprinter_input(upb_textprinter * p)12526 upb_sink *upb_textprinter_input(upb_textprinter *p) { return &p->input_; }
12527 
upb_textprinter_setsingleline(upb_textprinter * p,bool single_line)12528 void upb_textprinter_setsingleline(upb_textprinter *p, bool single_line) {
12529   p->single_line_ = single_line;
12530 }
12531 
12532 
12533 /* Index is descriptor type. */
12534 const uint8_t upb_pb_native_wire_types[] = {
12535   UPB_WIRE_TYPE_END_GROUP,     /* ENDGROUP */
12536   UPB_WIRE_TYPE_64BIT,         /* DOUBLE */
12537   UPB_WIRE_TYPE_32BIT,         /* FLOAT */
12538   UPB_WIRE_TYPE_VARINT,        /* INT64 */
12539   UPB_WIRE_TYPE_VARINT,        /* UINT64 */
12540   UPB_WIRE_TYPE_VARINT,        /* INT32 */
12541   UPB_WIRE_TYPE_64BIT,         /* FIXED64 */
12542   UPB_WIRE_TYPE_32BIT,         /* FIXED32 */
12543   UPB_WIRE_TYPE_VARINT,        /* BOOL */
12544   UPB_WIRE_TYPE_DELIMITED,     /* STRING */
12545   UPB_WIRE_TYPE_START_GROUP,   /* GROUP */
12546   UPB_WIRE_TYPE_DELIMITED,     /* MESSAGE */
12547   UPB_WIRE_TYPE_DELIMITED,     /* BYTES */
12548   UPB_WIRE_TYPE_VARINT,        /* UINT32 */
12549   UPB_WIRE_TYPE_VARINT,        /* ENUM */
12550   UPB_WIRE_TYPE_32BIT,         /* SFIXED32 */
12551   UPB_WIRE_TYPE_64BIT,         /* SFIXED64 */
12552   UPB_WIRE_TYPE_VARINT,        /* SINT32 */
12553   UPB_WIRE_TYPE_VARINT,        /* SINT64 */
12554 };
12555 
12556 /* A basic branch-based decoder, uses 32-bit values to get good performance
12557  * on 32-bit architectures (but performs well on 64-bits also).
12558  * This scheme comes from the original Google Protobuf implementation
12559  * (proto2). */
upb_vdecode_max8_branch32(upb_decoderet r)12560 upb_decoderet upb_vdecode_max8_branch32(upb_decoderet r) {
12561   upb_decoderet err = {NULL, 0};
12562   const char *p = r.p;
12563   uint32_t low = (uint32_t)r.val;
12564   uint32_t high = 0;
12565   uint32_t b;
12566   b = *(p++); low  |= (b & 0x7fU) << 14; if (!(b & 0x80)) goto done;
12567   b = *(p++); low  |= (b & 0x7fU) << 21; if (!(b & 0x80)) goto done;
12568   b = *(p++); low  |= (b & 0x7fU) << 28;
12569               high  = (b & 0x7fU) >>  4; if (!(b & 0x80)) goto done;
12570   b = *(p++); high |= (b & 0x7fU) <<  3; if (!(b & 0x80)) goto done;
12571   b = *(p++); high |= (b & 0x7fU) << 10; if (!(b & 0x80)) goto done;
12572   b = *(p++); high |= (b & 0x7fU) << 17; if (!(b & 0x80)) goto done;
12573   b = *(p++); high |= (b & 0x7fU) << 24; if (!(b & 0x80)) goto done;
12574   b = *(p++); high |= (b & 0x7fU) << 31; if (!(b & 0x80)) goto done;
12575   return err;
12576 
12577 done:
12578   r.val = ((uint64_t)high << 32) | low;
12579   r.p = p;
12580   return r;
12581 }
12582 
12583 /* Like the previous, but uses 64-bit values. */
upb_vdecode_max8_branch64(upb_decoderet r)12584 upb_decoderet upb_vdecode_max8_branch64(upb_decoderet r) {
12585   const char *p = r.p;
12586   uint64_t val = r.val;
12587   uint64_t b;
12588   upb_decoderet err = {NULL, 0};
12589   b = *(p++); val |= (b & 0x7fU) << 14; if (!(b & 0x80)) goto done;
12590   b = *(p++); val |= (b & 0x7fU) << 21; if (!(b & 0x80)) goto done;
12591   b = *(p++); val |= (b & 0x7fU) << 28; if (!(b & 0x80)) goto done;
12592   b = *(p++); val |= (b & 0x7fU) << 35; if (!(b & 0x80)) goto done;
12593   b = *(p++); val |= (b & 0x7fU) << 42; if (!(b & 0x80)) goto done;
12594   b = *(p++); val |= (b & 0x7fU) << 49; if (!(b & 0x80)) goto done;
12595   b = *(p++); val |= (b & 0x7fU) << 56; if (!(b & 0x80)) goto done;
12596   b = *(p++); val |= (b & 0x7fU) << 63; if (!(b & 0x80)) goto done;
12597   return err;
12598 
12599 done:
12600   r.val = val;
12601   r.p = p;
12602   return r;
12603 }
12604 
12605 #line 1 "upb/json/parser.rl"
12606 /*
12607 ** upb::json::Parser (upb_json_parser)
12608 **
12609 ** A parser that uses the Ragel State Machine Compiler to generate
12610 ** the finite automata.
12611 **
12612 ** Ragel only natively handles regular languages, but we can manually
12613 ** program it a bit to handle context-free languages like JSON, by using
12614 ** the "fcall" and "fret" constructs.
12615 **
12616 ** This parser can handle the basics, but needs several things to be fleshed
12617 ** out:
12618 **
12619 ** - handling of unicode escape sequences (including high surrogate pairs).
12620 ** - properly check and report errors for unknown fields, stack overflow,
12621 **   improper array nesting (or lack of nesting).
12622 ** - handling of base64 sequences with padding characters.
12623 ** - handling of push-back (non-success returns from sink functions).
12624 ** - handling of keys/escape-sequences/etc that span input buffers.
12625 */
12626 
12627 #include <ctype.h>
12628 #include <errno.h>
12629 #include <float.h>
12630 #include <math.h>
12631 #include <stdint.h>
12632 #include <stdio.h>
12633 #include <stdlib.h>
12634 #include <string.h>
12635 
12636 #include <time.h>
12637 
12638 
12639 #define UPB_JSON_MAX_DEPTH 64
12640 
12641 /* Type of value message */
12642 enum {
12643   VALUE_NULLVALUE   = 0,
12644   VALUE_NUMBERVALUE = 1,
12645   VALUE_STRINGVALUE = 2,
12646   VALUE_BOOLVALUE   = 3,
12647   VALUE_STRUCTVALUE = 4,
12648   VALUE_LISTVALUE   = 5
12649 };
12650 
12651 /* Forward declare */
12652 static bool is_top_level(upb_json_parser *p);
12653 static bool is_wellknown_msg(upb_json_parser *p, upb_wellknowntype_t type);
12654 static bool is_wellknown_field(upb_json_parser *p, upb_wellknowntype_t type);
12655 
12656 static bool is_number_wrapper_object(upb_json_parser *p);
12657 static bool does_number_wrapper_start(upb_json_parser *p);
12658 static bool does_number_wrapper_end(upb_json_parser *p);
12659 
12660 static bool is_string_wrapper_object(upb_json_parser *p);
12661 static bool does_string_wrapper_start(upb_json_parser *p);
12662 static bool does_string_wrapper_end(upb_json_parser *p);
12663 
12664 static bool is_fieldmask_object(upb_json_parser *p);
12665 static bool does_fieldmask_start(upb_json_parser *p);
12666 static bool does_fieldmask_end(upb_json_parser *p);
12667 static void start_fieldmask_object(upb_json_parser *p);
12668 static void end_fieldmask_object(upb_json_parser *p);
12669 
12670 static void start_wrapper_object(upb_json_parser *p);
12671 static void end_wrapper_object(upb_json_parser *p);
12672 
12673 static void start_value_object(upb_json_parser *p, int value_type);
12674 static void end_value_object(upb_json_parser *p);
12675 
12676 static void start_listvalue_object(upb_json_parser *p);
12677 static void end_listvalue_object(upb_json_parser *p);
12678 
12679 static void start_structvalue_object(upb_json_parser *p);
12680 static void end_structvalue_object(upb_json_parser *p);
12681 
12682 static void start_object(upb_json_parser *p);
12683 static void end_object(upb_json_parser *p);
12684 
12685 static void start_any_object(upb_json_parser *p, const char *ptr);
12686 static bool end_any_object(upb_json_parser *p, const char *ptr);
12687 
12688 static bool start_subobject(upb_json_parser *p);
12689 static void end_subobject(upb_json_parser *p);
12690 
12691 static void start_member(upb_json_parser *p);
12692 static void end_member(upb_json_parser *p);
12693 static bool end_membername(upb_json_parser *p);
12694 
12695 static void start_any_member(upb_json_parser *p, const char *ptr);
12696 static void end_any_member(upb_json_parser *p, const char *ptr);
12697 static bool end_any_membername(upb_json_parser *p);
12698 
12699 size_t parse(void *closure, const void *hd, const char *buf, size_t size,
12700              const upb_bufhandle *handle);
12701 static bool end(void *closure, const void *hd);
12702 
12703 static const char eof_ch = 'e';
12704 
12705 /* stringsink */
12706 typedef struct {
12707   upb_byteshandler handler;
12708   upb_bytessink sink;
12709   char *ptr;
12710   size_t len, size;
12711 } upb_stringsink;
12712 
12713 
stringsink_start(void * _sink,const void * hd,size_t size_hint)12714 static void *stringsink_start(void *_sink, const void *hd, size_t size_hint) {
12715   upb_stringsink *sink = _sink;
12716   sink->len = 0;
12717   UPB_UNUSED(hd);
12718   UPB_UNUSED(size_hint);
12719   return sink;
12720 }
12721 
stringsink_string(void * _sink,const void * hd,const char * ptr,size_t len,const upb_bufhandle * handle)12722 static size_t stringsink_string(void *_sink, const void *hd, const char *ptr,
12723                                 size_t len, const upb_bufhandle *handle) {
12724   upb_stringsink *sink = _sink;
12725   size_t new_size = sink->size;
12726 
12727   UPB_UNUSED(hd);
12728   UPB_UNUSED(handle);
12729 
12730   while (sink->len + len > new_size) {
12731     new_size *= 2;
12732   }
12733 
12734   if (new_size != sink->size) {
12735     sink->ptr = realloc(sink->ptr, new_size);
12736     sink->size = new_size;
12737   }
12738 
12739   memcpy(sink->ptr + sink->len, ptr, len);
12740   sink->len += len;
12741 
12742   return len;
12743 }
12744 
upb_stringsink_init(upb_stringsink * sink)12745 void upb_stringsink_init(upb_stringsink *sink) {
12746   upb_byteshandler_init(&sink->handler);
12747   upb_byteshandler_setstartstr(&sink->handler, stringsink_start, NULL);
12748   upb_byteshandler_setstring(&sink->handler, stringsink_string, NULL);
12749 
12750   upb_bytessink_reset(&sink->sink, &sink->handler, sink);
12751 
12752   sink->size = 32;
12753   sink->ptr = malloc(sink->size);
12754   sink->len = 0;
12755 }
12756 
upb_stringsink_uninit(upb_stringsink * sink)12757 void upb_stringsink_uninit(upb_stringsink *sink) { free(sink->ptr); }
12758 
12759 typedef struct {
12760   /* For encoding Any value field in binary format. */
12761   const upb_handlers *encoder_handlers;
12762   upb_pb_encoder *encoder;
12763   upb_stringsink stringsink;
12764 
12765   /* For decoding Any value field in json format. */
12766   upb_json_parsermethod *parser_method;
12767   upb_json_parser* parser;
12768   upb_sink sink;
12769 
12770   /* Mark the range of uninterpreted values in json input before type url. */
12771   const char *before_type_url_start;
12772   const char *before_type_url_end;
12773 
12774   /* Mark the range of uninterpreted values in json input after type url. */
12775   const char *after_type_url_start;
12776 } upb_jsonparser_any_frame;
12777 
12778 typedef struct {
12779   upb_sink sink;
12780 
12781   /* The current message in which we're parsing, and the field whose value we're
12782    * expecting next. */
12783   const upb_msgdef *m;
12784   const upb_fielddef *f;
12785 
12786   /* The table mapping json name to fielddef for this message. */
12787   upb_strtable *name_table;
12788 
12789   /* We are in a repeated-field context. We need this flag to decide whether to
12790    * handle the array as a normal repeated field or a
12791    * google.protobuf.ListValue/google.protobuf.Value. */
12792   bool is_repeated;
12793 
12794   /* We are in a repeated-field context, ready to emit mapentries as
12795    * submessages. This flag alters the start-of-object (open-brace) behavior to
12796    * begin a sequence of mapentry messages rather than a single submessage. */
12797   bool is_map;
12798 
12799   /* We are in a map-entry message context. This flag is set when parsing the
12800    * value field of a single map entry and indicates to all value-field parsers
12801    * (subobjects, strings, numbers, and bools) that the map-entry submessage
12802    * should end as soon as the value is parsed. */
12803   bool is_mapentry;
12804 
12805   /* If |is_map| or |is_mapentry| is true, |mapfield| refers to the parent
12806    * message's map field that we're currently parsing. This differs from |f|
12807    * because |f| is the field in the *current* message (i.e., the map-entry
12808    * message itself), not the parent's field that leads to this map. */
12809   const upb_fielddef *mapfield;
12810 
12811   /* We are in an Any message context. This flag is set when parsing the Any
12812    * message and indicates to all field parsers (subobjects, strings, numbers,
12813    * and bools) that the parsed field should be serialized as binary data or
12814    * cached (type url not found yet). */
12815   bool is_any;
12816 
12817   /* The type of packed message in Any. */
12818   upb_jsonparser_any_frame *any_frame;
12819 
12820   /* True if the field to be parsed is unknown. */
12821   bool is_unknown_field;
12822 } upb_jsonparser_frame;
12823 
init_frame(upb_jsonparser_frame * frame)12824 static void init_frame(upb_jsonparser_frame* frame) {
12825   frame->m = NULL;
12826   frame->f = NULL;
12827   frame->name_table = NULL;
12828   frame->is_repeated = false;
12829   frame->is_map = false;
12830   frame->is_mapentry = false;
12831   frame->mapfield = NULL;
12832   frame->is_any = false;
12833   frame->any_frame = NULL;
12834   frame->is_unknown_field = false;
12835 }
12836 
12837 struct upb_json_parser {
12838   upb_env *env;
12839   const upb_json_parsermethod *method;
12840   upb_bytessink input_;
12841 
12842   /* Stack to track the JSON scopes we are in. */
12843   upb_jsonparser_frame stack[UPB_JSON_MAX_DEPTH];
12844   upb_jsonparser_frame *top;
12845   upb_jsonparser_frame *limit;
12846 
12847   upb_status status;
12848 
12849   /* Ragel's internal parsing stack for the parsing state machine. */
12850   int current_state;
12851   int parser_stack[UPB_JSON_MAX_DEPTH];
12852   int parser_top;
12853 
12854   /* The handle for the current buffer. */
12855   const upb_bufhandle *handle;
12856 
12857   /* Accumulate buffer.  See details in parser.rl. */
12858   const char *accumulated;
12859   size_t accumulated_len;
12860   char *accumulate_buf;
12861   size_t accumulate_buf_size;
12862 
12863   /* Multi-part text data.  See details in parser.rl. */
12864   int multipart_state;
12865   upb_selector_t string_selector;
12866 
12867   /* Input capture.  See details in parser.rl. */
12868   const char *capture;
12869 
12870   /* Intermediate result of parsing a unicode escape sequence. */
12871   uint32_t digit;
12872 
12873   /* For resolve type url in Any. */
12874   const upb_symtab *symtab;
12875 
12876   /* Whether to proceed if unknown field is met. */
12877   bool ignore_json_unknown;
12878 
12879   /* Cache for parsing timestamp due to base and zone are handled in different
12880    * handlers. */
12881   struct tm tm;
12882 };
12883 
start_jsonparser_frame(upb_json_parser * p)12884 static upb_jsonparser_frame* start_jsonparser_frame(upb_json_parser *p) {
12885   upb_jsonparser_frame *inner;
12886   inner = p->top + 1;
12887   init_frame(inner);
12888   return inner;
12889 }
12890 
12891 struct upb_json_parsermethod {
12892   upb_refcounted base;
12893 
12894   upb_byteshandler input_handler_;
12895 
12896   /* Mainly for the purposes of refcounting, so all the fielddefs we point
12897    * to stay alive. */
12898   const upb_msgdef *msg;
12899 
12900   /* Keys are upb_msgdef*, values are upb_strtable (json_name -> fielddef) */
12901   upb_inttable name_tables;
12902 };
12903 
12904 #define PARSER_CHECK_RETURN(x) if (!(x)) return false
12905 
json_parser_any_frame_reset(upb_jsonparser_any_frame * frame)12906 static void json_parser_any_frame_reset(upb_jsonparser_any_frame *frame) {
12907   frame->encoder_handlers = NULL;
12908   frame->encoder = NULL;
12909   frame->parser_method = NULL;
12910   frame->parser = NULL;
12911   frame->before_type_url_start = NULL;
12912   frame->before_type_url_end = NULL;
12913   frame->after_type_url_start = NULL;
12914 }
12915 
json_parser_any_frame_set_payload_type(upb_json_parser * p,upb_jsonparser_any_frame * frame,const upb_msgdef * payload_type)12916 static void json_parser_any_frame_set_payload_type(
12917     upb_json_parser *p,
12918     upb_jsonparser_any_frame *frame,
12919     const upb_msgdef *payload_type) {
12920   /* Initialize encoder. */
12921   frame->encoder_handlers =
12922       upb_pb_encoder_newhandlers(payload_type, &frame->encoder_handlers);
12923   upb_stringsink_init(&frame->stringsink);
12924   frame->encoder =
12925       upb_pb_encoder_create(
12926           p->env, frame->encoder_handlers,
12927           &frame->stringsink.sink);
12928 
12929   /* Initialize parser. */
12930   frame->parser_method =
12931       upb_json_parsermethod_new(payload_type, &frame->parser_method);
12932   upb_sink_reset(&frame->sink, frame->encoder_handlers, frame->encoder);
12933   frame->parser =
12934       upb_json_parser_create(p->env, frame->parser_method, p->symtab,
12935                              &frame->sink, p->ignore_json_unknown);
12936 }
12937 
json_parser_any_frame_free(upb_jsonparser_any_frame * frame)12938 static void json_parser_any_frame_free(upb_jsonparser_any_frame *frame) {
12939   upb_handlers_unref(frame->encoder_handlers,
12940                      &frame->encoder_handlers);
12941   upb_json_parsermethod_unref(frame->parser_method,
12942                               &frame->parser_method);
12943   upb_stringsink_uninit(&frame->stringsink);
12944 }
12945 
json_parser_any_frame_has_type_url(upb_jsonparser_any_frame * frame)12946 static bool json_parser_any_frame_has_type_url(
12947   upb_jsonparser_any_frame *frame) {
12948   return frame->encoder != NULL;
12949 }
12950 
json_parser_any_frame_has_value_before_type_url(upb_jsonparser_any_frame * frame)12951 static bool json_parser_any_frame_has_value_before_type_url(
12952   upb_jsonparser_any_frame *frame) {
12953   return frame->before_type_url_start != frame->before_type_url_end;
12954 }
12955 
json_parser_any_frame_has_value_after_type_url(upb_jsonparser_any_frame * frame)12956 static bool json_parser_any_frame_has_value_after_type_url(
12957   upb_jsonparser_any_frame *frame) {
12958   return frame->after_type_url_start != NULL;
12959 }
12960 
json_parser_any_frame_has_value(upb_jsonparser_any_frame * frame)12961 static bool json_parser_any_frame_has_value(
12962   upb_jsonparser_any_frame *frame) {
12963   return json_parser_any_frame_has_value_before_type_url(frame) ||
12964          json_parser_any_frame_has_value_after_type_url(frame);
12965 }
12966 
json_parser_any_frame_set_before_type_url_end(upb_jsonparser_any_frame * frame,const char * ptr)12967 static void json_parser_any_frame_set_before_type_url_end(
12968     upb_jsonparser_any_frame *frame,
12969     const char *ptr) {
12970   if (frame->encoder == NULL) {
12971     frame->before_type_url_end = ptr;
12972   }
12973 }
12974 
json_parser_any_frame_set_after_type_url_start_once(upb_jsonparser_any_frame * frame,const char * ptr)12975 static void json_parser_any_frame_set_after_type_url_start_once(
12976     upb_jsonparser_any_frame *frame,
12977     const char *ptr) {
12978   if (json_parser_any_frame_has_type_url(frame) &&
12979       frame->after_type_url_start == NULL) {
12980     frame->after_type_url_start = ptr;
12981   }
12982 }
12983 
12984 /* Used to signal that a capture has been suspended. */
12985 static char suspend_capture;
12986 
getsel_for_handlertype(upb_json_parser * p,upb_handlertype_t type)12987 static upb_selector_t getsel_for_handlertype(upb_json_parser *p,
12988                                              upb_handlertype_t type) {
12989   upb_selector_t sel;
12990   bool ok = upb_handlers_getselector(p->top->f, type, &sel);
12991   UPB_ASSERT(ok);
12992   return sel;
12993 }
12994 
parser_getsel(upb_json_parser * p)12995 static upb_selector_t parser_getsel(upb_json_parser *p) {
12996   return getsel_for_handlertype(
12997       p, upb_handlers_getprimitivehandlertype(p->top->f));
12998 }
12999 
check_stack(upb_json_parser * p)13000 static bool check_stack(upb_json_parser *p) {
13001   if ((p->top + 1) == p->limit) {
13002     upb_status_seterrmsg(&p->status, "Nesting too deep");
13003     upb_env_reporterror(p->env, &p->status);
13004     return false;
13005   }
13006 
13007   return true;
13008 }
13009 
set_name_table(upb_json_parser * p,upb_jsonparser_frame * frame)13010 static void set_name_table(upb_json_parser *p, upb_jsonparser_frame *frame) {
13011   upb_value v;
13012   bool ok = upb_inttable_lookupptr(&p->method->name_tables, frame->m, &v);
13013   UPB_ASSERT(ok);
13014   frame->name_table = upb_value_getptr(v);
13015 }
13016 
13017 /* There are GCC/Clang built-ins for overflow checking which we could start
13018  * using if there was any performance benefit to it. */
13019 
checked_add(size_t a,size_t b,size_t * c)13020 static bool checked_add(size_t a, size_t b, size_t *c) {
13021   if (SIZE_MAX - a < b) return false;
13022   *c = a + b;
13023   return true;
13024 }
13025 
saturating_multiply(size_t a,size_t b)13026 static size_t saturating_multiply(size_t a, size_t b) {
13027   /* size_t is unsigned, so this is defined behavior even on overflow. */
13028   size_t ret = a * b;
13029   if (b != 0 && ret / b != a) {
13030     ret = SIZE_MAX;
13031   }
13032   return ret;
13033 }
13034 
13035 
13036 /* Base64 decoding ************************************************************/
13037 
13038 /* TODO(haberman): make this streaming. */
13039 
13040 static const signed char b64table[] = {
13041   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
13042   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
13043   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
13044   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
13045   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
13046   -1,      -1,      -1,      62/*+*/, -1,      -1,      -1,      63/*/ */,
13047   52/*0*/, 53/*1*/, 54/*2*/, 55/*3*/, 56/*4*/, 57/*5*/, 58/*6*/, 59/*7*/,
13048   60/*8*/, 61/*9*/, -1,      -1,      -1,      -1,      -1,      -1,
13049   -1,       0/*A*/,  1/*B*/,  2/*C*/,  3/*D*/,  4/*E*/,  5/*F*/,  6/*G*/,
13050   07/*H*/,  8/*I*/,  9/*J*/, 10/*K*/, 11/*L*/, 12/*M*/, 13/*N*/, 14/*O*/,
13051   15/*P*/, 16/*Q*/, 17/*R*/, 18/*S*/, 19/*T*/, 20/*U*/, 21/*V*/, 22/*W*/,
13052   23/*X*/, 24/*Y*/, 25/*Z*/, -1,      -1,      -1,      -1,      -1,
13053   -1,      26/*a*/, 27/*b*/, 28/*c*/, 29/*d*/, 30/*e*/, 31/*f*/, 32/*g*/,
13054   33/*h*/, 34/*i*/, 35/*j*/, 36/*k*/, 37/*l*/, 38/*m*/, 39/*n*/, 40/*o*/,
13055   41/*p*/, 42/*q*/, 43/*r*/, 44/*s*/, 45/*t*/, 46/*u*/, 47/*v*/, 48/*w*/,
13056   49/*x*/, 50/*y*/, 51/*z*/, -1,      -1,      -1,      -1,      -1,
13057   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
13058   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
13059   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
13060   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
13061   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
13062   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
13063   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
13064   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
13065   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
13066   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
13067   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
13068   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
13069   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
13070   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
13071   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
13072   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1
13073 };
13074 
13075 /* Returns the table value sign-extended to 32 bits.  Knowing that the upper
13076  * bits will be 1 for unrecognized characters makes it easier to check for
13077  * this error condition later (see below). */
b64lookup(unsigned char ch)13078 int32_t b64lookup(unsigned char ch) { return b64table[ch]; }
13079 
13080 /* Returns true if the given character is not a valid base64 character or
13081  * padding. */
nonbase64(unsigned char ch)13082 bool nonbase64(unsigned char ch) { return b64lookup(ch) == -1 && ch != '='; }
13083 
base64_push(upb_json_parser * p,upb_selector_t sel,const char * ptr,size_t len)13084 static bool base64_push(upb_json_parser *p, upb_selector_t sel, const char *ptr,
13085                         size_t len) {
13086   const char *limit = ptr + len;
13087   for (; ptr < limit; ptr += 4) {
13088     uint32_t val;
13089     char output[3];
13090 
13091     if (limit - ptr < 4) {
13092       upb_status_seterrf(&p->status,
13093                          "Base64 input for bytes field not a multiple of 4: %s",
13094                          upb_fielddef_name(p->top->f));
13095       upb_env_reporterror(p->env, &p->status);
13096       return false;
13097     }
13098 
13099     val = b64lookup(ptr[0]) << 18 |
13100           b64lookup(ptr[1]) << 12 |
13101           b64lookup(ptr[2]) << 6  |
13102           b64lookup(ptr[3]);
13103 
13104     /* Test the upper bit; returns true if any of the characters returned -1. */
13105     if (val & 0x80000000) {
13106       goto otherchar;
13107     }
13108 
13109     output[0] = val >> 16;
13110     output[1] = (val >> 8) & 0xff;
13111     output[2] = val & 0xff;
13112     upb_sink_putstring(&p->top->sink, sel, output, 3, NULL);
13113   }
13114   return true;
13115 
13116 otherchar:
13117   if (nonbase64(ptr[0]) || nonbase64(ptr[1]) || nonbase64(ptr[2]) ||
13118       nonbase64(ptr[3]) ) {
13119     upb_status_seterrf(&p->status,
13120                        "Non-base64 characters in bytes field: %s",
13121                        upb_fielddef_name(p->top->f));
13122     upb_env_reporterror(p->env, &p->status);
13123     return false;
13124   } if (ptr[2] == '=') {
13125     uint32_t val;
13126     char output;
13127 
13128     /* Last group contains only two input bytes, one output byte. */
13129     if (ptr[0] == '=' || ptr[1] == '=' || ptr[3] != '=') {
13130       goto badpadding;
13131     }
13132 
13133     val = b64lookup(ptr[0]) << 18 |
13134           b64lookup(ptr[1]) << 12;
13135 
13136     UPB_ASSERT(!(val & 0x80000000));
13137     output = val >> 16;
13138     upb_sink_putstring(&p->top->sink, sel, &output, 1, NULL);
13139     return true;
13140   } else {
13141     uint32_t val;
13142     char output[2];
13143 
13144     /* Last group contains only three input bytes, two output bytes. */
13145     if (ptr[0] == '=' || ptr[1] == '=' || ptr[2] == '=') {
13146       goto badpadding;
13147     }
13148 
13149     val = b64lookup(ptr[0]) << 18 |
13150           b64lookup(ptr[1]) << 12 |
13151           b64lookup(ptr[2]) << 6;
13152 
13153     output[0] = val >> 16;
13154     output[1] = (val >> 8) & 0xff;
13155     upb_sink_putstring(&p->top->sink, sel, output, 2, NULL);
13156     return true;
13157   }
13158 
13159 badpadding:
13160   upb_status_seterrf(&p->status,
13161                      "Incorrect base64 padding for field: %s (%.*s)",
13162                      upb_fielddef_name(p->top->f),
13163                      4, ptr);
13164   upb_env_reporterror(p->env, &p->status);
13165   return false;
13166 }
13167 
13168 
13169 /* Accumulate buffer **********************************************************/
13170 
13171 /* Functionality for accumulating a buffer.
13172  *
13173  * Some parts of the parser need an entire value as a contiguous string.  For
13174  * example, to look up a member name in a hash table, or to turn a string into
13175  * a number, the relevant library routines need the input string to be in
13176  * contiguous memory, even if the value spanned two or more buffers in the
13177  * input.  These routines handle that.
13178  *
13179  * In the common case we can just point to the input buffer to get this
13180  * contiguous string and avoid any actual copy.  So we optimistically begin
13181  * this way.  But there are a few cases where we must instead copy into a
13182  * separate buffer:
13183  *
13184  *   1. The string was not contiguous in the input (it spanned buffers).
13185  *
13186  *   2. The string included escape sequences that need to be interpreted to get
13187  *      the true value in a contiguous buffer. */
13188 
assert_accumulate_empty(upb_json_parser * p)13189 static void assert_accumulate_empty(upb_json_parser *p) {
13190   UPB_ASSERT(p->accumulated == NULL);
13191   UPB_ASSERT(p->accumulated_len == 0);
13192 }
13193 
accumulate_clear(upb_json_parser * p)13194 static void accumulate_clear(upb_json_parser *p) {
13195   p->accumulated = NULL;
13196   p->accumulated_len = 0;
13197 }
13198 
13199 /* Used internally by accumulate_append(). */
accumulate_realloc(upb_json_parser * p,size_t need)13200 static bool accumulate_realloc(upb_json_parser *p, size_t need) {
13201   void *mem;
13202   size_t old_size = p->accumulate_buf_size;
13203   size_t new_size = UPB_MAX(old_size, 128);
13204   while (new_size < need) {
13205     new_size = saturating_multiply(new_size, 2);
13206   }
13207 
13208   mem = upb_env_realloc(p->env, p->accumulate_buf, old_size, new_size);
13209   if (!mem) {
13210     upb_status_seterrmsg(&p->status, "Out of memory allocating buffer.");
13211     upb_env_reporterror(p->env, &p->status);
13212     return false;
13213   }
13214 
13215   p->accumulate_buf = mem;
13216   p->accumulate_buf_size = new_size;
13217   return true;
13218 }
13219 
13220 /* Logically appends the given data to the append buffer.
13221  * If "can_alias" is true, we will try to avoid actually copying, but the buffer
13222  * must be valid until the next accumulate_append() call (if any). */
accumulate_append(upb_json_parser * p,const char * buf,size_t len,bool can_alias)13223 static bool accumulate_append(upb_json_parser *p, const char *buf, size_t len,
13224                               bool can_alias) {
13225   size_t need;
13226 
13227   if (!p->accumulated && can_alias) {
13228     p->accumulated = buf;
13229     p->accumulated_len = len;
13230     return true;
13231   }
13232 
13233   if (!checked_add(p->accumulated_len, len, &need)) {
13234     upb_status_seterrmsg(&p->status, "Integer overflow.");
13235     upb_env_reporterror(p->env, &p->status);
13236     return false;
13237   }
13238 
13239   if (need > p->accumulate_buf_size && !accumulate_realloc(p, need)) {
13240     return false;
13241   }
13242 
13243   if (p->accumulated != p->accumulate_buf) {
13244     memcpy(p->accumulate_buf, p->accumulated, p->accumulated_len);
13245     p->accumulated = p->accumulate_buf;
13246   }
13247 
13248   memcpy(p->accumulate_buf + p->accumulated_len, buf, len);
13249   p->accumulated_len += len;
13250   return true;
13251 }
13252 
13253 /* Returns a pointer to the data accumulated since the last accumulate_clear()
13254  * call, and writes the length to *len.  This with point either to the input
13255  * buffer or a temporary accumulate buffer. */
accumulate_getptr(upb_json_parser * p,size_t * len)13256 static const char *accumulate_getptr(upb_json_parser *p, size_t *len) {
13257   UPB_ASSERT(p->accumulated);
13258   *len = p->accumulated_len;
13259   return p->accumulated;
13260 }
13261 
13262 
13263 /* Mult-part text data ********************************************************/
13264 
13265 /* When we have text data in the input, it can often come in multiple segments.
13266  * For example, there may be some raw string data followed by an escape
13267  * sequence.  The two segments are processed with different logic.  Also buffer
13268  * seams in the input can cause multiple segments.
13269  *
13270  * As we see segments, there are two main cases for how we want to process them:
13271  *
13272  *  1. we want to push the captured input directly to string handlers.
13273  *
13274  *  2. we need to accumulate all the parts into a contiguous buffer for further
13275  *     processing (field name lookup, string->number conversion, etc). */
13276 
13277 /* This is the set of states for p->multipart_state. */
13278 enum {
13279   /* We are not currently processing multipart data. */
13280   MULTIPART_INACTIVE = 0,
13281 
13282   /* We are processing multipart data by accumulating it into a contiguous
13283    * buffer. */
13284   MULTIPART_ACCUMULATE = 1,
13285 
13286   /* We are processing multipart data by pushing each part directly to the
13287    * current string handlers. */
13288   MULTIPART_PUSHEAGERLY = 2
13289 };
13290 
13291 /* Start a multi-part text value where we accumulate the data for processing at
13292  * the end. */
multipart_startaccum(upb_json_parser * p)13293 static void multipart_startaccum(upb_json_parser *p) {
13294   assert_accumulate_empty(p);
13295   UPB_ASSERT(p->multipart_state == MULTIPART_INACTIVE);
13296   p->multipart_state = MULTIPART_ACCUMULATE;
13297 }
13298 
13299 /* Start a multi-part text value where we immediately push text data to a string
13300  * value with the given selector. */
multipart_start(upb_json_parser * p,upb_selector_t sel)13301 static void multipart_start(upb_json_parser *p, upb_selector_t sel) {
13302   assert_accumulate_empty(p);
13303   UPB_ASSERT(p->multipart_state == MULTIPART_INACTIVE);
13304   p->multipart_state = MULTIPART_PUSHEAGERLY;
13305   p->string_selector = sel;
13306 }
13307 
multipart_text(upb_json_parser * p,const char * buf,size_t len,bool can_alias)13308 static bool multipart_text(upb_json_parser *p, const char *buf, size_t len,
13309                            bool can_alias) {
13310   switch (p->multipart_state) {
13311     case MULTIPART_INACTIVE:
13312       upb_status_seterrmsg(
13313           &p->status, "Internal error: unexpected state MULTIPART_INACTIVE");
13314       upb_env_reporterror(p->env, &p->status);
13315       return false;
13316 
13317     case MULTIPART_ACCUMULATE:
13318       if (!accumulate_append(p, buf, len, can_alias)) {
13319         return false;
13320       }
13321       break;
13322 
13323     case MULTIPART_PUSHEAGERLY: {
13324       const upb_bufhandle *handle = can_alias ? p->handle : NULL;
13325       upb_sink_putstring(&p->top->sink, p->string_selector, buf, len, handle);
13326       break;
13327     }
13328   }
13329 
13330   return true;
13331 }
13332 
13333 /* Note: this invalidates the accumulate buffer!  Call only after reading its
13334  * contents. */
multipart_end(upb_json_parser * p)13335 static void multipart_end(upb_json_parser *p) {
13336   UPB_ASSERT(p->multipart_state != MULTIPART_INACTIVE);
13337   p->multipart_state = MULTIPART_INACTIVE;
13338   accumulate_clear(p);
13339 }
13340 
13341 
13342 /* Input capture **************************************************************/
13343 
13344 /* Functionality for capturing a region of the input as text.  Gracefully
13345  * handles the case where a buffer seam occurs in the middle of the captured
13346  * region. */
13347 
capture_begin(upb_json_parser * p,const char * ptr)13348 static void capture_begin(upb_json_parser *p, const char *ptr) {
13349   UPB_ASSERT(p->multipart_state != MULTIPART_INACTIVE);
13350   UPB_ASSERT(p->capture == NULL);
13351   p->capture = ptr;
13352 }
13353 
capture_end(upb_json_parser * p,const char * ptr)13354 static bool capture_end(upb_json_parser *p, const char *ptr) {
13355   UPB_ASSERT(p->capture);
13356   if (multipart_text(p, p->capture, ptr - p->capture, true)) {
13357     p->capture = NULL;
13358     return true;
13359   } else {
13360     return false;
13361   }
13362 }
13363 
13364 /* This is called at the end of each input buffer (ie. when we have hit a
13365  * buffer seam).  If we are in the middle of capturing the input, this
13366  * processes the unprocessed capture region. */
capture_suspend(upb_json_parser * p,const char ** ptr)13367 static void capture_suspend(upb_json_parser *p, const char **ptr) {
13368   if (!p->capture) return;
13369 
13370   if (multipart_text(p, p->capture, *ptr - p->capture, false)) {
13371     /* We use this as a signal that we were in the middle of capturing, and
13372      * that capturing should resume at the beginning of the next buffer.
13373      *
13374      * We can't use *ptr here, because we have no guarantee that this pointer
13375      * will be valid when we resume (if the underlying memory is freed, then
13376      * using the pointer at all, even to compare to NULL, is likely undefined
13377      * behavior). */
13378     p->capture = &suspend_capture;
13379   } else {
13380     /* Need to back up the pointer to the beginning of the capture, since
13381      * we were not able to actually preserve it. */
13382     *ptr = p->capture;
13383   }
13384 }
13385 
capture_resume(upb_json_parser * p,const char * ptr)13386 static void capture_resume(upb_json_parser *p, const char *ptr) {
13387   if (p->capture) {
13388     UPB_ASSERT(p->capture == &suspend_capture);
13389     p->capture = ptr;
13390   }
13391 }
13392 
13393 
13394 /* Callbacks from the parser **************************************************/
13395 
13396 /* These are the functions called directly from the parser itself.
13397  * We define these in the same order as their declarations in the parser. */
13398 
escape_char(char in)13399 static char escape_char(char in) {
13400   switch (in) {
13401     case 'r': return '\r';
13402     case 't': return '\t';
13403     case 'n': return '\n';
13404     case 'f': return '\f';
13405     case 'b': return '\b';
13406     case '/': return '/';
13407     case '"': return '"';
13408     case '\\': return '\\';
13409     default:
13410       UPB_ASSERT(0);
13411       return 'x';
13412   }
13413 }
13414 
escape(upb_json_parser * p,const char * ptr)13415 static bool escape(upb_json_parser *p, const char *ptr) {
13416   char ch = escape_char(*ptr);
13417   return multipart_text(p, &ch, 1, false);
13418 }
13419 
start_hex(upb_json_parser * p)13420 static void start_hex(upb_json_parser *p) {
13421   p->digit = 0;
13422 }
13423 
hexdigit(upb_json_parser * p,const char * ptr)13424 static void hexdigit(upb_json_parser *p, const char *ptr) {
13425   char ch = *ptr;
13426 
13427   p->digit <<= 4;
13428 
13429   if (ch >= '0' && ch <= '9') {
13430     p->digit += (ch - '0');
13431   } else if (ch >= 'a' && ch <= 'f') {
13432     p->digit += ((ch - 'a') + 10);
13433   } else {
13434     UPB_ASSERT(ch >= 'A' && ch <= 'F');
13435     p->digit += ((ch - 'A') + 10);
13436   }
13437 }
13438 
end_hex(upb_json_parser * p)13439 static bool end_hex(upb_json_parser *p) {
13440   uint32_t codepoint = p->digit;
13441 
13442   /* emit the codepoint as UTF-8. */
13443   char utf8[3]; /* support \u0000 -- \uFFFF -- need only three bytes. */
13444   int length = 0;
13445   if (codepoint <= 0x7F) {
13446     utf8[0] = codepoint;
13447     length = 1;
13448   } else if (codepoint <= 0x07FF) {
13449     utf8[1] = (codepoint & 0x3F) | 0x80;
13450     codepoint >>= 6;
13451     utf8[0] = (codepoint & 0x1F) | 0xC0;
13452     length = 2;
13453   } else /* codepoint <= 0xFFFF */ {
13454     utf8[2] = (codepoint & 0x3F) | 0x80;
13455     codepoint >>= 6;
13456     utf8[1] = (codepoint & 0x3F) | 0x80;
13457     codepoint >>= 6;
13458     utf8[0] = (codepoint & 0x0F) | 0xE0;
13459     length = 3;
13460   }
13461   /* TODO(haberman): Handle high surrogates: if codepoint is a high surrogate
13462    * we have to wait for the next escape to get the full code point). */
13463 
13464   return multipart_text(p, utf8, length, false);
13465 }
13466 
start_text(upb_json_parser * p,const char * ptr)13467 static void start_text(upb_json_parser *p, const char *ptr) {
13468   capture_begin(p, ptr);
13469 }
13470 
end_text(upb_json_parser * p,const char * ptr)13471 static bool end_text(upb_json_parser *p, const char *ptr) {
13472   return capture_end(p, ptr);
13473 }
13474 
start_number(upb_json_parser * p,const char * ptr)13475 static bool start_number(upb_json_parser *p, const char *ptr) {
13476   if (is_top_level(p)) {
13477     if (is_number_wrapper_object(p)) {
13478       start_wrapper_object(p);
13479     } else if (is_wellknown_msg(p, UPB_WELLKNOWN_VALUE)) {
13480       start_value_object(p, VALUE_NUMBERVALUE);
13481     } else {
13482       return false;
13483     }
13484   } else if (does_number_wrapper_start(p)) {
13485     if (!start_subobject(p)) {
13486       return false;
13487     }
13488     start_wrapper_object(p);
13489   } else if (is_wellknown_field(p, UPB_WELLKNOWN_VALUE)) {
13490     if (!start_subobject(p)) {
13491       return false;
13492     }
13493     start_value_object(p, VALUE_NUMBERVALUE);
13494   }
13495 
13496   multipart_startaccum(p);
13497   capture_begin(p, ptr);
13498   return true;
13499 }
13500 
13501 static bool parse_number(upb_json_parser *p, bool is_quoted);
13502 
end_number_nontop(upb_json_parser * p,const char * ptr)13503 static bool end_number_nontop(upb_json_parser *p, const char *ptr) {
13504   if (!capture_end(p, ptr)) {
13505     return false;
13506   }
13507 
13508   if (p->top->f == NULL) {
13509     multipart_end(p);
13510     return true;
13511   }
13512 
13513   return parse_number(p, false);
13514 }
13515 
end_number(upb_json_parser * p,const char * ptr)13516 static bool end_number(upb_json_parser *p, const char *ptr) {
13517   if (!end_number_nontop(p, ptr)) {
13518     return false;
13519   }
13520 
13521   if (does_number_wrapper_end(p)) {
13522     end_wrapper_object(p);
13523     if (!is_top_level(p)) {
13524       end_subobject(p);
13525     }
13526     return true;
13527   }
13528 
13529   if (is_wellknown_msg(p, UPB_WELLKNOWN_VALUE)) {
13530     end_value_object(p);
13531     if (!is_top_level(p)) {
13532       end_subobject(p);
13533     }
13534     return true;
13535   }
13536 
13537   return true;
13538 }
13539 
13540 /* |buf| is NULL-terminated. |buf| itself will never include quotes;
13541  * |is_quoted| tells us whether this text originally appeared inside quotes. */
parse_number_from_buffer(upb_json_parser * p,const char * buf,bool is_quoted)13542 static bool parse_number_from_buffer(upb_json_parser *p, const char *buf,
13543                                      bool is_quoted) {
13544   size_t len = strlen(buf);
13545   const char *bufend = buf + len;
13546   char *end;
13547   upb_fieldtype_t type = upb_fielddef_type(p->top->f);
13548   double val;
13549   double dummy;
13550   double inf = 1.0 / 0.0;  /* C89 does not have an INFINITY macro. */
13551 
13552   errno = 0;
13553 
13554   if (len == 0 || buf[0] == ' ') {
13555     return false;
13556   }
13557 
13558   /* For integer types, first try parsing with integer-specific routines.
13559    * If these succeed, they will be more accurate for int64/uint64 than
13560    * strtod().
13561    */
13562   switch (type) {
13563     case UPB_TYPE_ENUM:
13564     case UPB_TYPE_INT32: {
13565       long val = strtol(buf, &end, 0);
13566       if (errno == ERANGE || end != bufend) {
13567         break;
13568       } else if (val > INT32_MAX || val < INT32_MIN) {
13569         return false;
13570       } else {
13571         upb_sink_putint32(&p->top->sink, parser_getsel(p), val);
13572         return true;
13573       }
13574     }
13575     case UPB_TYPE_UINT32: {
13576       unsigned long val = strtoul(buf, &end, 0);
13577       if (end != bufend) {
13578         break;
13579       } else if (val > UINT32_MAX || errno == ERANGE) {
13580         return false;
13581       } else {
13582         upb_sink_putuint32(&p->top->sink, parser_getsel(p), val);
13583         return true;
13584       }
13585     }
13586     /* XXX: We can't handle [u]int64 properly on 32-bit machines because
13587      * strto[u]ll isn't in C89. */
13588     case UPB_TYPE_INT64: {
13589       long val = strtol(buf, &end, 0);
13590       if (errno == ERANGE || end != bufend) {
13591         break;
13592       } else {
13593         upb_sink_putint64(&p->top->sink, parser_getsel(p), val);
13594         return true;
13595       }
13596     }
13597     case UPB_TYPE_UINT64: {
13598       unsigned long val = strtoul(p->accumulated, &end, 0);
13599       if (end != bufend) {
13600         break;
13601       } else if (errno == ERANGE) {
13602         return false;
13603       } else {
13604         upb_sink_putuint64(&p->top->sink, parser_getsel(p), val);
13605         return true;
13606       }
13607     }
13608     default:
13609       break;
13610   }
13611 
13612   if (type != UPB_TYPE_DOUBLE && type != UPB_TYPE_FLOAT && is_quoted) {
13613     /* Quoted numbers for integer types are not allowed to be in double form. */
13614     return false;
13615   }
13616 
13617   if (len == strlen("Infinity") && strcmp(buf, "Infinity") == 0) {
13618     /* C89 does not have an INFINITY macro. */
13619     val = inf;
13620   } else if (len == strlen("-Infinity") && strcmp(buf, "-Infinity") == 0) {
13621     val = -inf;
13622   } else {
13623     val = strtod(buf, &end);
13624     if (errno == ERANGE || end != bufend) {
13625       return false;
13626     }
13627   }
13628 
13629   switch (type) {
13630 #define CASE(capitaltype, smalltype, ctype, min, max)                     \
13631     case UPB_TYPE_ ## capitaltype: {                                      \
13632       if (modf(val, &dummy) != 0 || val > max || val < min) {             \
13633         return false;                                                     \
13634       } else {                                                            \
13635         upb_sink_put ## smalltype(&p->top->sink, parser_getsel(p),        \
13636                                   (ctype)val);                            \
13637         return true;                                                      \
13638       }                                                                   \
13639       break;                                                              \
13640     }
13641     case UPB_TYPE_ENUM:
13642     CASE(INT32, int32, int32_t, INT32_MIN, INT32_MAX);
13643     CASE(INT64, int64, int64_t, INT64_MIN, INT64_MAX);
13644     CASE(UINT32, uint32, uint32_t, 0, UINT32_MAX);
13645     CASE(UINT64, uint64, uint64_t, 0, UINT64_MAX);
13646 #undef CASE
13647 
13648     case UPB_TYPE_DOUBLE:
13649       upb_sink_putdouble(&p->top->sink, parser_getsel(p), val);
13650       return true;
13651     case UPB_TYPE_FLOAT:
13652       if ((val > FLT_MAX || val < -FLT_MAX) && val != inf && val != -inf) {
13653         return false;
13654       } else {
13655         upb_sink_putfloat(&p->top->sink, parser_getsel(p), val);
13656         return true;
13657       }
13658     default:
13659       return false;
13660   }
13661 }
13662 
parse_number(upb_json_parser * p,bool is_quoted)13663 static bool parse_number(upb_json_parser *p, bool is_quoted) {
13664   size_t len;
13665   const char *buf;
13666 
13667   /* strtol() and friends unfortunately do not support specifying the length of
13668    * the input string, so we need to force a copy into a NULL-terminated buffer. */
13669   if (!multipart_text(p, "\0", 1, false)) {
13670     return false;
13671   }
13672 
13673   buf = accumulate_getptr(p, &len);
13674 
13675   if (parse_number_from_buffer(p, buf, is_quoted)) {
13676     multipart_end(p);
13677     return true;
13678   } else {
13679     upb_status_seterrf(&p->status, "error parsing number: %s", buf);
13680     upb_env_reporterror(p->env, &p->status);
13681     multipart_end(p);
13682     return false;
13683   }
13684 }
13685 
parser_putbool(upb_json_parser * p,bool val)13686 static bool parser_putbool(upb_json_parser *p, bool val) {
13687   bool ok;
13688 
13689   if (p->top->f == NULL) {
13690     return true;
13691   }
13692 
13693   if (upb_fielddef_type(p->top->f) != UPB_TYPE_BOOL) {
13694     upb_status_seterrf(&p->status,
13695                        "Boolean value specified for non-bool field: %s",
13696                        upb_fielddef_name(p->top->f));
13697     upb_env_reporterror(p->env, &p->status);
13698     return false;
13699   }
13700 
13701   ok = upb_sink_putbool(&p->top->sink, parser_getsel(p), val);
13702   UPB_ASSERT(ok);
13703 
13704   return true;
13705 }
13706 
end_bool(upb_json_parser * p,bool val)13707 static bool end_bool(upb_json_parser *p, bool val) {
13708   if (is_top_level(p)) {
13709     if (is_wellknown_msg(p, UPB_WELLKNOWN_BOOLVALUE)) {
13710       start_wrapper_object(p);
13711     } else if (is_wellknown_msg(p, UPB_WELLKNOWN_VALUE)) {
13712       start_value_object(p, VALUE_BOOLVALUE);
13713     } else {
13714       return false;
13715     }
13716   } else if (is_wellknown_field(p, UPB_WELLKNOWN_BOOLVALUE)) {
13717     if (!start_subobject(p)) {
13718       return false;
13719     }
13720     start_wrapper_object(p);
13721   } else if (is_wellknown_field(p, UPB_WELLKNOWN_VALUE)) {
13722     if (!start_subobject(p)) {
13723       return false;
13724     }
13725     start_value_object(p, VALUE_BOOLVALUE);
13726   }
13727 
13728   if (p->top->is_unknown_field) {
13729     return true;
13730   }
13731 
13732   if (!parser_putbool(p, val)) {
13733     return false;
13734   }
13735 
13736   if (is_wellknown_msg(p, UPB_WELLKNOWN_BOOLVALUE)) {
13737     end_wrapper_object(p);
13738     if (!is_top_level(p)) {
13739       end_subobject(p);
13740     }
13741     return true;
13742   }
13743 
13744   if (is_wellknown_msg(p, UPB_WELLKNOWN_VALUE)) {
13745     end_value_object(p);
13746     if (!is_top_level(p)) {
13747       end_subobject(p);
13748     }
13749     return true;
13750   }
13751 
13752   return true;
13753 }
13754 
end_null(upb_json_parser * p)13755 static bool end_null(upb_json_parser *p) {
13756   const char *zero_ptr = "0";
13757 
13758   if (is_top_level(p)) {
13759     if (is_wellknown_msg(p, UPB_WELLKNOWN_VALUE)) {
13760       start_value_object(p, VALUE_NULLVALUE);
13761     } else {
13762       return true;
13763     }
13764   } else if (is_wellknown_field(p, UPB_WELLKNOWN_VALUE)) {
13765     if (!start_subobject(p)) {
13766       return false;
13767     }
13768     start_value_object(p, VALUE_NULLVALUE);
13769   } else {
13770     return true;
13771   }
13772 
13773   /* Fill null_value field. */
13774   multipart_startaccum(p);
13775   capture_begin(p, zero_ptr);
13776   capture_end(p, zero_ptr + 1);
13777   parse_number(p, false);
13778 
13779   end_value_object(p);
13780   if (!is_top_level(p)) {
13781     end_subobject(p);
13782   }
13783 
13784   return true;
13785 }
13786 
start_any_stringval(upb_json_parser * p)13787 static bool start_any_stringval(upb_json_parser *p) {
13788   multipart_startaccum(p);
13789   return true;
13790 }
13791 
start_stringval(upb_json_parser * p)13792 static bool start_stringval(upb_json_parser *p) {
13793   if (is_top_level(p)) {
13794     if (is_string_wrapper_object(p)) {
13795       start_wrapper_object(p);
13796     } else if (is_wellknown_msg(p, UPB_WELLKNOWN_FIELDMASK)) {
13797       start_fieldmask_object(p);
13798       return true;
13799     } else if (is_wellknown_msg(p, UPB_WELLKNOWN_TIMESTAMP) ||
13800                is_wellknown_msg(p, UPB_WELLKNOWN_DURATION)) {
13801       start_object(p);
13802     } else if (is_wellknown_msg(p, UPB_WELLKNOWN_VALUE)) {
13803       start_value_object(p, VALUE_STRINGVALUE);
13804     } else {
13805       return false;
13806     }
13807   } else if (does_string_wrapper_start(p)) {
13808     if (!start_subobject(p)) {
13809       return false;
13810     }
13811     start_wrapper_object(p);
13812   } else if (does_fieldmask_start(p)) {
13813     if (!start_subobject(p)) {
13814       return false;
13815     }
13816     start_fieldmask_object(p);
13817     return true;
13818   } else if (is_wellknown_field(p, UPB_WELLKNOWN_TIMESTAMP) ||
13819              is_wellknown_field(p, UPB_WELLKNOWN_DURATION)) {
13820     if (!start_subobject(p)) {
13821       return false;
13822     }
13823     start_object(p);
13824   } else if (is_wellknown_field(p, UPB_WELLKNOWN_VALUE)) {
13825     if (!start_subobject(p)) {
13826       return false;
13827     }
13828     start_value_object(p, VALUE_STRINGVALUE);
13829   }
13830 
13831   if (p->top->f == NULL) {
13832     multipart_startaccum(p);
13833     return true;
13834   }
13835 
13836   if (p->top->is_any) {
13837     return start_any_stringval(p);
13838   }
13839 
13840   if (upb_fielddef_isstring(p->top->f)) {
13841     upb_jsonparser_frame *inner;
13842     upb_selector_t sel;
13843 
13844     if (!check_stack(p)) return false;
13845 
13846     /* Start a new parser frame: parser frames correspond one-to-one with
13847      * handler frames, and string events occur in a sub-frame. */
13848     inner = start_jsonparser_frame(p);
13849     sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSTR);
13850     upb_sink_startstr(&p->top->sink, sel, 0, &inner->sink);
13851     inner->m = p->top->m;
13852     inner->f = p->top->f;
13853     p->top = inner;
13854 
13855     if (upb_fielddef_type(p->top->f) == UPB_TYPE_STRING) {
13856       /* For STRING fields we push data directly to the handlers as it is
13857        * parsed.  We don't do this yet for BYTES fields, because our base64
13858        * decoder is not streaming.
13859        *
13860        * TODO(haberman): make base64 decoding streaming also. */
13861       multipart_start(p, getsel_for_handlertype(p, UPB_HANDLER_STRING));
13862       return true;
13863     } else {
13864       multipart_startaccum(p);
13865       return true;
13866     }
13867   } else if (upb_fielddef_type(p->top->f) != UPB_TYPE_BOOL &&
13868              upb_fielddef_type(p->top->f) != UPB_TYPE_MESSAGE) {
13869     /* No need to push a frame -- numeric values in quotes remain in the
13870      * current parser frame.  These values must accmulate so we can convert
13871      * them all at once at the end. */
13872     multipart_startaccum(p);
13873     return true;
13874   } else {
13875     upb_status_seterrf(&p->status,
13876                        "String specified for bool or submessage field: %s",
13877                        upb_fielddef_name(p->top->f));
13878     upb_env_reporterror(p->env, &p->status);
13879     return false;
13880   }
13881 }
13882 
end_any_stringval(upb_json_parser * p)13883 static bool end_any_stringval(upb_json_parser *p) {
13884   size_t len;
13885   const char *buf = accumulate_getptr(p, &len);
13886 
13887   /* Set type_url */
13888   upb_selector_t sel;
13889   upb_jsonparser_frame *inner;
13890   if (!check_stack(p)) return false;
13891   inner = p->top + 1;
13892 
13893   sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSTR);
13894   upb_sink_startstr(&p->top->sink, sel, 0, &inner->sink);
13895   sel = getsel_for_handlertype(p, UPB_HANDLER_STRING);
13896   upb_sink_putstring(&inner->sink, sel, buf, len, NULL);
13897   sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSTR);
13898   upb_sink_endstr(&inner->sink, sel);
13899 
13900   multipart_end(p);
13901 
13902   /* Resolve type url */
13903   if (strncmp(buf, "type.googleapis.com/", 20) == 0 && len > 20) {
13904     const upb_msgdef *payload_type = NULL;
13905     buf += 20;
13906     len -= 20;
13907 
13908     payload_type = upb_symtab_lookupmsg2(p->symtab, buf, len);
13909     if (payload_type == NULL) {
13910       upb_status_seterrf(
13911           &p->status, "Cannot find packed type: %.*s\n", (int)len, buf);
13912       upb_env_reporterror(p->env, &p->status);
13913       return false;
13914     }
13915 
13916     json_parser_any_frame_set_payload_type(p, p->top->any_frame, payload_type);
13917 
13918     return true;
13919   } else {
13920     upb_status_seterrf(
13921         &p->status, "Invalid type url: %.*s\n", (int)len, buf);
13922     upb_env_reporterror(p->env, &p->status);
13923     return false;
13924   }
13925 }
13926 
end_stringval_nontop(upb_json_parser * p)13927 static bool end_stringval_nontop(upb_json_parser *p) {
13928   bool ok = true;
13929 
13930   if (is_wellknown_msg(p, UPB_WELLKNOWN_TIMESTAMP) ||
13931       is_wellknown_msg(p, UPB_WELLKNOWN_DURATION)) {
13932     multipart_end(p);
13933     return true;
13934   }
13935 
13936   if (p->top->f == NULL) {
13937     multipart_end(p);
13938     return true;
13939   }
13940 
13941   if (p->top->is_any) {
13942     return end_any_stringval(p);
13943   }
13944 
13945   switch (upb_fielddef_type(p->top->f)) {
13946     case UPB_TYPE_BYTES:
13947       if (!base64_push(p, getsel_for_handlertype(p, UPB_HANDLER_STRING),
13948                        p->accumulated, p->accumulated_len)) {
13949         return false;
13950       }
13951       /* Fall through. */
13952 
13953     case UPB_TYPE_STRING: {
13954       upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSTR);
13955       upb_sink_endstr(&p->top->sink, sel);
13956       p->top--;
13957       break;
13958     }
13959 
13960     case UPB_TYPE_ENUM: {
13961       /* Resolve enum symbolic name to integer value. */
13962       const upb_enumdef *enumdef =
13963           (const upb_enumdef*)upb_fielddef_subdef(p->top->f);
13964 
13965       size_t len;
13966       const char *buf = accumulate_getptr(p, &len);
13967 
13968       int32_t int_val = 0;
13969       ok = upb_enumdef_ntoi(enumdef, buf, len, &int_val);
13970 
13971       if (ok) {
13972         upb_selector_t sel = parser_getsel(p);
13973         upb_sink_putint32(&p->top->sink, sel, int_val);
13974       } else {
13975         upb_status_seterrf(&p->status, "Enum value unknown: '%.*s'", len, buf);
13976         upb_env_reporterror(p->env, &p->status);
13977       }
13978 
13979       break;
13980     }
13981 
13982     case UPB_TYPE_INT32:
13983     case UPB_TYPE_INT64:
13984     case UPB_TYPE_UINT32:
13985     case UPB_TYPE_UINT64:
13986     case UPB_TYPE_DOUBLE:
13987     case UPB_TYPE_FLOAT:
13988       ok = parse_number(p, true);
13989       break;
13990 
13991     default:
13992       UPB_ASSERT(false);
13993       upb_status_seterrmsg(&p->status, "Internal error in JSON decoder");
13994       upb_env_reporterror(p->env, &p->status);
13995       ok = false;
13996       break;
13997   }
13998 
13999   multipart_end(p);
14000 
14001   return ok;
14002 }
14003 
end_stringval(upb_json_parser * p)14004 static bool end_stringval(upb_json_parser *p) {
14005   /* FieldMask's stringvals have been ended when handling them. Only need to
14006    * close FieldMask here.*/
14007   if (does_fieldmask_end(p)) {
14008     end_fieldmask_object(p);
14009     if (!is_top_level(p)) {
14010       end_subobject(p);
14011     }
14012     return true;
14013   }
14014 
14015   if (!end_stringval_nontop(p)) {
14016     return false;
14017   }
14018 
14019   if (does_string_wrapper_end(p)) {
14020     end_wrapper_object(p);
14021     if (!is_top_level(p)) {
14022       end_subobject(p);
14023     }
14024     return true;
14025   }
14026 
14027   if (is_wellknown_msg(p, UPB_WELLKNOWN_VALUE)) {
14028     end_value_object(p);
14029     if (!is_top_level(p)) {
14030       end_subobject(p);
14031     }
14032     return true;
14033   }
14034 
14035   if (is_wellknown_msg(p, UPB_WELLKNOWN_TIMESTAMP) ||
14036       is_wellknown_msg(p, UPB_WELLKNOWN_DURATION) ||
14037       is_wellknown_msg(p, UPB_WELLKNOWN_FIELDMASK)) {
14038     end_object(p);
14039     if (!is_top_level(p)) {
14040       end_subobject(p);
14041     }
14042     return true;
14043   }
14044 
14045   return true;
14046 }
14047 
start_duration_base(upb_json_parser * p,const char * ptr)14048 static void start_duration_base(upb_json_parser *p, const char *ptr) {
14049   capture_begin(p, ptr);
14050 }
14051 
end_duration_base(upb_json_parser * p,const char * ptr)14052 static bool end_duration_base(upb_json_parser *p, const char *ptr) {
14053   size_t len;
14054   const char *buf;
14055   char seconds_buf[14];
14056   char nanos_buf[12];
14057   char *end;
14058   int64_t seconds = 0;
14059   int32_t nanos = 0;
14060   double val = 0.0;
14061   const char *seconds_membername = "seconds";
14062   const char *nanos_membername = "nanos";
14063   size_t fraction_start;
14064 
14065   if (!capture_end(p, ptr)) {
14066     return false;
14067   }
14068 
14069   buf = accumulate_getptr(p, &len);
14070 
14071   memset(seconds_buf, 0, 14);
14072   memset(nanos_buf, 0, 12);
14073 
14074   /* Find out base end. The maximus duration is 315576000000, which cannot be
14075    * represented by double without losing precision. Thus, we need to handle
14076    * fraction and base separately. */
14077   for (fraction_start = 0; fraction_start < len && buf[fraction_start] != '.';
14078        fraction_start++);
14079 
14080   /* Parse base */
14081   memcpy(seconds_buf, buf, fraction_start);
14082   seconds = strtol(seconds_buf, &end, 10);
14083   if (errno == ERANGE || end != seconds_buf + fraction_start) {
14084     upb_status_seterrf(&p->status, "error parsing duration: %s",
14085                        seconds_buf);
14086     upb_env_reporterror(p->env, &p->status);
14087     return false;
14088   }
14089 
14090   if (seconds > 315576000000) {
14091     upb_status_seterrf(&p->status, "error parsing duration: "
14092                                    "maximum acceptable value is "
14093                                    "315576000000");
14094     upb_env_reporterror(p->env, &p->status);
14095     return false;
14096   }
14097 
14098   if (seconds < -315576000000) {
14099     upb_status_seterrf(&p->status, "error parsing duration: "
14100                                    "minimum acceptable value is "
14101                                    "-315576000000");
14102     upb_env_reporterror(p->env, &p->status);
14103     return false;
14104   }
14105 
14106   /* Parse fraction */
14107   nanos_buf[0] = '0';
14108   memcpy(nanos_buf + 1, buf + fraction_start, len - fraction_start);
14109   val = strtod(nanos_buf, &end);
14110   if (errno == ERANGE || end != nanos_buf + len - fraction_start + 1) {
14111     upb_status_seterrf(&p->status, "error parsing duration: %s",
14112                        nanos_buf);
14113     upb_env_reporterror(p->env, &p->status);
14114     return false;
14115   }
14116 
14117   nanos = val * 1000000000;
14118   if (seconds < 0) nanos = -nanos;
14119 
14120   /* Clean up buffer */
14121   multipart_end(p);
14122 
14123   /* Set seconds */
14124   start_member(p);
14125   capture_begin(p, seconds_membername);
14126   capture_end(p, seconds_membername + 7);
14127   end_membername(p);
14128   upb_sink_putint64(&p->top->sink, parser_getsel(p), seconds);
14129   end_member(p);
14130 
14131   /* Set nanos */
14132   start_member(p);
14133   capture_begin(p, nanos_membername);
14134   capture_end(p, nanos_membername + 5);
14135   end_membername(p);
14136   upb_sink_putint32(&p->top->sink, parser_getsel(p), nanos);
14137   end_member(p);
14138 
14139   /* Continue previous environment */
14140   multipart_startaccum(p);
14141 
14142   return true;
14143 }
14144 
parse_timestamp_number(upb_json_parser * p)14145 static int parse_timestamp_number(upb_json_parser *p) {
14146   size_t len;
14147   const char *buf;
14148   char *end;
14149   int val;
14150 
14151   /* atoi() and friends unfortunately do not support specifying the length of
14152    * the input string, so we need to force a copy into a NULL-terminated buffer. */
14153   multipart_text(p, "\0", 1, false);
14154 
14155   buf = accumulate_getptr(p, &len);
14156   val = atoi(buf);
14157   multipart_end(p);
14158   multipart_startaccum(p);
14159 
14160   return val;
14161 }
14162 
start_year(upb_json_parser * p,const char * ptr)14163 static void start_year(upb_json_parser *p, const char *ptr) {
14164   capture_begin(p, ptr);
14165 }
14166 
end_year(upb_json_parser * p,const char * ptr)14167 static bool end_year(upb_json_parser *p, const char *ptr) {
14168   if (!capture_end(p, ptr)) {
14169     return false;
14170   }
14171   p->tm.tm_year = parse_timestamp_number(p) - 1900;
14172   return true;
14173 }
14174 
start_month(upb_json_parser * p,const char * ptr)14175 static void start_month(upb_json_parser *p, const char *ptr) {
14176   capture_begin(p, ptr);
14177 }
14178 
end_month(upb_json_parser * p,const char * ptr)14179 static bool end_month(upb_json_parser *p, const char *ptr) {
14180   if (!capture_end(p, ptr)) {
14181     return false;
14182   }
14183   p->tm.tm_mon = parse_timestamp_number(p) - 1;
14184   return true;
14185 }
14186 
start_day(upb_json_parser * p,const char * ptr)14187 static void start_day(upb_json_parser *p, const char *ptr) {
14188   capture_begin(p, ptr);
14189 }
14190 
end_day(upb_json_parser * p,const char * ptr)14191 static bool end_day(upb_json_parser *p, const char *ptr) {
14192   if (!capture_end(p, ptr)) {
14193     return false;
14194   }
14195   p->tm.tm_mday = parse_timestamp_number(p);
14196   return true;
14197 }
14198 
start_hour(upb_json_parser * p,const char * ptr)14199 static void start_hour(upb_json_parser *p, const char *ptr) {
14200   capture_begin(p, ptr);
14201 }
14202 
end_hour(upb_json_parser * p,const char * ptr)14203 static bool end_hour(upb_json_parser *p, const char *ptr) {
14204   if (!capture_end(p, ptr)) {
14205     return false;
14206   }
14207   p->tm.tm_hour = parse_timestamp_number(p);
14208   return true;
14209 }
14210 
start_minute(upb_json_parser * p,const char * ptr)14211 static void start_minute(upb_json_parser *p, const char *ptr) {
14212   capture_begin(p, ptr);
14213 }
14214 
end_minute(upb_json_parser * p,const char * ptr)14215 static bool end_minute(upb_json_parser *p, const char *ptr) {
14216   if (!capture_end(p, ptr)) {
14217     return false;
14218   }
14219   p->tm.tm_min = parse_timestamp_number(p);
14220   return true;
14221 }
14222 
start_second(upb_json_parser * p,const char * ptr)14223 static void start_second(upb_json_parser *p, const char *ptr) {
14224   capture_begin(p, ptr);
14225 }
14226 
end_second(upb_json_parser * p,const char * ptr)14227 static bool end_second(upb_json_parser *p, const char *ptr) {
14228   if (!capture_end(p, ptr)) {
14229     return false;
14230   }
14231   p->tm.tm_sec = parse_timestamp_number(p);
14232   return true;
14233 }
14234 
start_timestamp_base(upb_json_parser * p)14235 static void start_timestamp_base(upb_json_parser *p) {
14236   memset(&p->tm, 0, sizeof(struct tm));
14237 }
14238 
start_timestamp_fraction(upb_json_parser * p,const char * ptr)14239 static void start_timestamp_fraction(upb_json_parser *p, const char *ptr) {
14240   capture_begin(p, ptr);
14241 }
14242 
end_timestamp_fraction(upb_json_parser * p,const char * ptr)14243 static bool end_timestamp_fraction(upb_json_parser *p, const char *ptr) {
14244   size_t len;
14245   const char *buf;
14246   char nanos_buf[12];
14247   char *end;
14248   double val = 0.0;
14249   int32_t nanos;
14250   const char *nanos_membername = "nanos";
14251 
14252   memset(nanos_buf, 0, 12);
14253 
14254   if (!capture_end(p, ptr)) {
14255     return false;
14256   }
14257 
14258   buf = accumulate_getptr(p, &len);
14259 
14260   if (len > 10) {
14261     upb_status_seterrf(&p->status,
14262         "error parsing timestamp: at most 9-digit fraction.");
14263     upb_env_reporterror(p->env, &p->status);
14264     return false;
14265   }
14266 
14267   /* Parse nanos */
14268   nanos_buf[0] = '0';
14269   memcpy(nanos_buf + 1, buf, len);
14270   val = strtod(nanos_buf, &end);
14271 
14272   if (errno == ERANGE || end != nanos_buf + len + 1) {
14273     upb_status_seterrf(&p->status, "error parsing timestamp nanos: %s",
14274                        nanos_buf);
14275     upb_env_reporterror(p->env, &p->status);
14276     return false;
14277   }
14278 
14279   nanos = val * 1000000000;
14280 
14281   /* Clean up previous environment */
14282   multipart_end(p);
14283 
14284   /* Set nanos */
14285   start_member(p);
14286   capture_begin(p, nanos_membername);
14287   capture_end(p, nanos_membername + 5);
14288   end_membername(p);
14289   upb_sink_putint32(&p->top->sink, parser_getsel(p), nanos);
14290   end_member(p);
14291 
14292   /* Continue previous environment */
14293   multipart_startaccum(p);
14294 
14295   return true;
14296 }
14297 
start_timestamp_zone(upb_json_parser * p,const char * ptr)14298 static void start_timestamp_zone(upb_json_parser *p, const char *ptr) {
14299   capture_begin(p, ptr);
14300 }
14301 
14302 #define EPOCH_YEAR 1970
14303 #define TM_YEAR_BASE 1900
14304 
isleap(int year)14305 static bool isleap(int year) {
14306   return (year % 4) == 0 && (year % 100 != 0 || (year % 400) == 0);
14307 }
14308 
14309 const unsigned short int __mon_yday[2][13] = {
14310     /* Normal years.  */
14311     { 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365 },
14312     /* Leap years.  */
14313     { 0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366 }
14314 };
14315 
epoch(int year,int yday,int hour,int min,int sec)14316 int64_t epoch(int year, int yday, int hour, int min, int sec) {
14317   int64_t years = year - EPOCH_YEAR;
14318 
14319   int64_t leap_days = years / 4 - years / 100 + years / 400;
14320 
14321   int64_t days = years * 365 + yday + leap_days;
14322   int64_t hours = days * 24 + hour;
14323   int64_t mins = hours * 60 + min;
14324   int64_t secs = mins * 60 + sec;
14325   return secs;
14326 }
14327 
upb_mktime(const struct tm * tp)14328 static int64_t upb_mktime(const struct tm *tp) {
14329   int sec = tp->tm_sec;
14330   int min = tp->tm_min;
14331   int hour = tp->tm_hour;
14332   int mday = tp->tm_mday;
14333   int mon = tp->tm_mon;
14334   int year = tp->tm_year + TM_YEAR_BASE;
14335 
14336   /* Calculate day of year from year, month, and day of month. */
14337   int mon_yday = ((__mon_yday[isleap(year)][mon]) - 1);
14338   int yday = mon_yday + mday;
14339 
14340   return epoch(year, yday, hour, min, sec);
14341 }
14342 
end_timestamp_zone(upb_json_parser * p,const char * ptr)14343 static bool end_timestamp_zone(upb_json_parser *p, const char *ptr) {
14344   size_t len;
14345   const char *buf;
14346   int hours = 0;
14347   int64_t seconds;
14348   const char *seconds_membername = "seconds";
14349 
14350   if (!capture_end(p, ptr)) {
14351     return false;
14352   }
14353 
14354   buf = accumulate_getptr(p, &len);
14355 
14356   if (buf[0] != 'Z') {
14357     if (sscanf(buf + 1, "%2d:00", &hours) != 1) {
14358       upb_status_seterrf(&p->status, "error parsing timestamp offset");
14359       upb_env_reporterror(p->env, &p->status);
14360       return false;
14361     }
14362 
14363     if (buf[0] == '+') {
14364       hours = -hours;
14365     }
14366   }
14367 
14368   /* Normalize tm */
14369   seconds = upb_mktime(&p->tm);
14370   seconds += 3600 * hours;
14371 
14372   /* Check timestamp boundary */
14373   if (seconds < -62135596800) {
14374     upb_status_seterrf(&p->status, "error parsing timestamp: "
14375                                    "minimum acceptable value is "
14376                                    "0001-01-01T00:00:00Z");
14377     upb_env_reporterror(p->env, &p->status);
14378     return false;
14379   }
14380 
14381   /* Clean up previous environment */
14382   multipart_end(p);
14383 
14384   /* Set seconds */
14385   start_member(p);
14386   capture_begin(p, seconds_membername);
14387   capture_end(p, seconds_membername + 7);
14388   end_membername(p);
14389   upb_sink_putint64(&p->top->sink, parser_getsel(p), seconds);
14390   end_member(p);
14391 
14392   /* Continue previous environment */
14393   multipart_startaccum(p);
14394 
14395   return true;
14396 }
14397 
start_fieldmask_path_text(upb_json_parser * p,const char * ptr)14398 static void start_fieldmask_path_text(upb_json_parser *p, const char *ptr) {
14399   capture_begin(p, ptr);
14400 }
14401 
end_fieldmask_path_text(upb_json_parser * p,const char * ptr)14402 static bool end_fieldmask_path_text(upb_json_parser *p, const char *ptr) {
14403   if (!capture_end(p, ptr)) {
14404     return false;
14405   }
14406 }
14407 
start_fieldmask_path(upb_json_parser * p)14408 static bool start_fieldmask_path(upb_json_parser *p) {
14409   upb_jsonparser_frame *inner;
14410   upb_selector_t sel;
14411 
14412   if (!check_stack(p)) return false;
14413 
14414   /* Start a new parser frame: parser frames correspond one-to-one with
14415    * handler frames, and string events occur in a sub-frame. */
14416   inner = start_jsonparser_frame(p);
14417   sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSTR);
14418   upb_sink_startstr(&p->top->sink, sel, 0, &inner->sink);
14419   inner->m = p->top->m;
14420   inner->f = p->top->f;
14421   p->top = inner;
14422 
14423   multipart_startaccum(p);
14424   return true;
14425 }
14426 
lower_camel_push(upb_json_parser * p,upb_selector_t sel,const char * ptr,size_t len)14427 static bool lower_camel_push(
14428     upb_json_parser *p, upb_selector_t sel, const char *ptr, size_t len) {
14429   const char *limit = ptr + len;
14430   bool first = true;
14431   for (;ptr < limit; ptr++) {
14432     if (*ptr >= 'A' && *ptr <= 'Z' && !first) {
14433       char lower = tolower(*ptr);
14434       upb_sink_putstring(&p->top->sink, sel, "_", 1, NULL);
14435       upb_sink_putstring(&p->top->sink, sel, &lower, 1, NULL);
14436     } else {
14437       upb_sink_putstring(&p->top->sink, sel, ptr, 1, NULL);
14438     }
14439     first = false;
14440   }
14441   return true;
14442 }
14443 
end_fieldmask_path(upb_json_parser * p)14444 static bool end_fieldmask_path(upb_json_parser *p) {
14445   upb_selector_t sel;
14446 
14447   if (!lower_camel_push(
14448            p, getsel_for_handlertype(p, UPB_HANDLER_STRING),
14449            p->accumulated, p->accumulated_len)) {
14450     return false;
14451   }
14452 
14453   sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSTR);
14454   upb_sink_endstr(&p->top->sink, sel);
14455   p->top--;
14456 
14457   multipart_end(p);
14458   return true;
14459 }
14460 
start_member(upb_json_parser * p)14461 static void start_member(upb_json_parser *p) {
14462   UPB_ASSERT(!p->top->f);
14463   multipart_startaccum(p);
14464 }
14465 
14466 /* Helper: invoked during parse_mapentry() to emit the mapentry message's key
14467  * field based on the current contents of the accumulate buffer. */
parse_mapentry_key(upb_json_parser * p)14468 static bool parse_mapentry_key(upb_json_parser *p) {
14469 
14470   size_t len;
14471   const char *buf = accumulate_getptr(p, &len);
14472 
14473   /* Emit the key field. We do a bit of ad-hoc parsing here because the
14474    * parser state machine has already decided that this is a string field
14475    * name, and we are reinterpreting it as some arbitrary key type. In
14476    * particular, integer and bool keys are quoted, so we need to parse the
14477    * quoted string contents here. */
14478 
14479   p->top->f = upb_msgdef_itof(p->top->m, UPB_MAPENTRY_KEY);
14480   if (p->top->f == NULL) {
14481     upb_status_seterrmsg(&p->status, "mapentry message has no key");
14482     upb_env_reporterror(p->env, &p->status);
14483     return false;
14484   }
14485   switch (upb_fielddef_type(p->top->f)) {
14486     case UPB_TYPE_INT32:
14487     case UPB_TYPE_INT64:
14488     case UPB_TYPE_UINT32:
14489     case UPB_TYPE_UINT64:
14490       /* Invoke end_number. The accum buffer has the number's text already. */
14491       if (!parse_number(p, true)) {
14492         return false;
14493       }
14494       break;
14495     case UPB_TYPE_BOOL:
14496       if (len == 4 && !strncmp(buf, "true", 4)) {
14497         if (!parser_putbool(p, true)) {
14498           return false;
14499         }
14500       } else if (len == 5 && !strncmp(buf, "false", 5)) {
14501         if (!parser_putbool(p, false)) {
14502           return false;
14503         }
14504       } else {
14505         upb_status_seterrmsg(&p->status,
14506                              "Map bool key not 'true' or 'false'");
14507         upb_env_reporterror(p->env, &p->status);
14508         return false;
14509       }
14510       multipart_end(p);
14511       break;
14512     case UPB_TYPE_STRING:
14513     case UPB_TYPE_BYTES: {
14514       upb_sink subsink;
14515       upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSTR);
14516       upb_sink_startstr(&p->top->sink, sel, len, &subsink);
14517       sel = getsel_for_handlertype(p, UPB_HANDLER_STRING);
14518       upb_sink_putstring(&subsink, sel, buf, len, NULL);
14519       sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSTR);
14520       upb_sink_endstr(&subsink, sel);
14521       multipart_end(p);
14522       break;
14523     }
14524     default:
14525       upb_status_seterrmsg(&p->status, "Invalid field type for map key");
14526       upb_env_reporterror(p->env, &p->status);
14527       return false;
14528   }
14529 
14530   return true;
14531 }
14532 
14533 /* Helper: emit one map entry (as a submessage in the map field sequence). This
14534  * is invoked from end_membername(), at the end of the map entry's key string,
14535  * with the map key in the accumulate buffer. It parses the key from that
14536  * buffer, emits the handler calls to start the mapentry submessage (setting up
14537  * its subframe in the process), and sets up state in the subframe so that the
14538  * value parser (invoked next) will emit the mapentry's value field and then
14539  * end the mapentry message. */
14540 
handle_mapentry(upb_json_parser * p)14541 static bool handle_mapentry(upb_json_parser *p) {
14542   const upb_fielddef *mapfield;
14543   const upb_msgdef *mapentrymsg;
14544   upb_jsonparser_frame *inner;
14545   upb_selector_t sel;
14546 
14547   /* Map entry: p->top->sink is the seq frame, so we need to start a frame
14548    * for the mapentry itself, and then set |f| in that frame so that the map
14549    * value field is parsed, and also set a flag to end the frame after the
14550    * map-entry value is parsed. */
14551   if (!check_stack(p)) return false;
14552 
14553   mapfield = p->top->mapfield;
14554   mapentrymsg = upb_fielddef_msgsubdef(mapfield);
14555 
14556   inner = start_jsonparser_frame(p);
14557   p->top->f = mapfield;
14558   sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSUBMSG);
14559   upb_sink_startsubmsg(&p->top->sink, sel, &inner->sink);
14560   inner->m = mapentrymsg;
14561   inner->mapfield = mapfield;
14562 
14563   /* Don't set this to true *yet* -- we reuse parsing handlers below to push
14564    * the key field value to the sink, and these handlers will pop the frame
14565    * if they see is_mapentry (when invoked by the parser state machine, they
14566    * would have just seen the map-entry value, not key). */
14567   inner->is_mapentry = false;
14568   p->top = inner;
14569 
14570   /* send STARTMSG in submsg frame. */
14571   upb_sink_startmsg(&p->top->sink);
14572 
14573   parse_mapentry_key(p);
14574 
14575   /* Set up the value field to receive the map-entry value. */
14576   p->top->f = upb_msgdef_itof(p->top->m, UPB_MAPENTRY_VALUE);
14577   p->top->is_mapentry = true;  /* set up to pop frame after value is parsed. */
14578   p->top->mapfield = mapfield;
14579   if (p->top->f == NULL) {
14580     upb_status_seterrmsg(&p->status, "mapentry message has no value");
14581     upb_env_reporterror(p->env, &p->status);
14582     return false;
14583   }
14584 
14585   return true;
14586 }
14587 
end_membername(upb_json_parser * p)14588 static bool end_membername(upb_json_parser *p) {
14589   UPB_ASSERT(!p->top->f);
14590 
14591   if (!p->top->m) {
14592     p->top->is_unknown_field = true;
14593     multipart_end(p);
14594     return true;
14595   }
14596 
14597   if (p->top->is_any) {
14598     return end_any_membername(p);
14599   } else if (p->top->is_map) {
14600     return handle_mapentry(p);
14601   } else {
14602     size_t len;
14603     const char *buf = accumulate_getptr(p, &len);
14604     upb_value v;
14605 
14606     if (upb_strtable_lookup2(p->top->name_table, buf, len, &v)) {
14607       p->top->f = upb_value_getconstptr(v);
14608       multipart_end(p);
14609 
14610       return true;
14611     } else if (p->ignore_json_unknown) {
14612       p->top->is_unknown_field = true;
14613       multipart_end(p);
14614       return true;
14615     } else {
14616       upb_status_seterrf(&p->status, "No such field: %.*s\n", (int)len, buf);
14617       upb_env_reporterror(p->env, &p->status);
14618       return false;
14619     }
14620   }
14621 }
14622 
end_any_membername(upb_json_parser * p)14623 static bool end_any_membername(upb_json_parser *p) {
14624   size_t len;
14625   const char *buf = accumulate_getptr(p, &len);
14626   upb_value v;
14627 
14628   if (len == 5 && strncmp(buf, "@type", len) == 0) {
14629     upb_strtable_lookup2(p->top->name_table, "type_url", 8, &v);
14630     p->top->f = upb_value_getconstptr(v);
14631     multipart_end(p);
14632     return true;
14633   } else {
14634     p->top->is_unknown_field = true;
14635     multipart_end(p);
14636     return true;
14637   }
14638 }
14639 
end_member(upb_json_parser * p)14640 static void end_member(upb_json_parser *p) {
14641   /* If we just parsed a map-entry value, end that frame too. */
14642   if (p->top->is_mapentry) {
14643     upb_status s = UPB_STATUS_INIT;
14644     upb_selector_t sel;
14645     bool ok;
14646     const upb_fielddef *mapfield;
14647 
14648     UPB_ASSERT(p->top > p->stack);
14649     /* send ENDMSG on submsg. */
14650     upb_sink_endmsg(&p->top->sink, &s);
14651     mapfield = p->top->mapfield;
14652 
14653     /* send ENDSUBMSG in repeated-field-of-mapentries frame. */
14654     p->top--;
14655     ok = upb_handlers_getselector(mapfield, UPB_HANDLER_ENDSUBMSG, &sel);
14656     UPB_ASSERT(ok);
14657     upb_sink_endsubmsg(&p->top->sink, sel);
14658   }
14659 
14660   p->top->f = NULL;
14661   p->top->is_unknown_field = false;
14662 }
14663 
start_any_member(upb_json_parser * p,const char * ptr)14664 static void start_any_member(upb_json_parser *p, const char *ptr) {
14665   start_member(p);
14666   json_parser_any_frame_set_after_type_url_start_once(p->top->any_frame, ptr);
14667 }
14668 
end_any_member(upb_json_parser * p,const char * ptr)14669 static void end_any_member(upb_json_parser *p, const char *ptr) {
14670   json_parser_any_frame_set_before_type_url_end(p->top->any_frame, ptr);
14671   end_member(p);
14672 }
14673 
start_subobject(upb_json_parser * p)14674 static bool start_subobject(upb_json_parser *p) {
14675   if (p->top->is_unknown_field) {
14676     upb_jsonparser_frame *inner;
14677     if (!check_stack(p)) return false;
14678 
14679     p->top = start_jsonparser_frame(p);
14680     return true;
14681   }
14682 
14683   if (upb_fielddef_ismap(p->top->f)) {
14684     upb_jsonparser_frame *inner;
14685     upb_selector_t sel;
14686 
14687     /* Beginning of a map. Start a new parser frame in a repeated-field
14688      * context. */
14689     if (!check_stack(p)) return false;
14690 
14691     inner = start_jsonparser_frame(p);
14692     sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSEQ);
14693     upb_sink_startseq(&p->top->sink, sel, &inner->sink);
14694     inner->m = upb_fielddef_msgsubdef(p->top->f);
14695     inner->mapfield = p->top->f;
14696     inner->is_map = true;
14697     p->top = inner;
14698 
14699     return true;
14700   } else if (upb_fielddef_issubmsg(p->top->f)) {
14701     upb_jsonparser_frame *inner;
14702     upb_selector_t sel;
14703 
14704     /* Beginning of a subobject. Start a new parser frame in the submsg
14705      * context. */
14706     if (!check_stack(p)) return false;
14707 
14708     inner = start_jsonparser_frame(p);
14709     sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSUBMSG);
14710     upb_sink_startsubmsg(&p->top->sink, sel, &inner->sink);
14711     inner->m = upb_fielddef_msgsubdef(p->top->f);
14712     set_name_table(p, inner);
14713     p->top = inner;
14714 
14715     if (is_wellknown_msg(p, UPB_WELLKNOWN_ANY)) {
14716       p->top->is_any = true;
14717       p->top->any_frame =
14718           upb_env_malloc(p->env, sizeof(upb_jsonparser_any_frame));
14719       json_parser_any_frame_reset(p->top->any_frame);
14720     } else {
14721       p->top->is_any = false;
14722       p->top->any_frame = NULL;
14723     }
14724 
14725     return true;
14726   } else {
14727     upb_status_seterrf(&p->status,
14728                        "Object specified for non-message/group field: %s",
14729                        upb_fielddef_name(p->top->f));
14730     upb_env_reporterror(p->env, &p->status);
14731     return false;
14732   }
14733 }
14734 
start_subobject_full(upb_json_parser * p)14735 static bool start_subobject_full(upb_json_parser *p) {
14736   if (is_top_level(p)) {
14737     if (is_wellknown_msg(p, UPB_WELLKNOWN_VALUE)) {
14738       start_value_object(p, VALUE_STRUCTVALUE);
14739       if (!start_subobject(p)) return false;
14740       start_structvalue_object(p);
14741     } else if (is_wellknown_msg(p, UPB_WELLKNOWN_STRUCT)) {
14742       start_structvalue_object(p);
14743     } else {
14744       return true;
14745     }
14746   } else if (is_wellknown_field(p, UPB_WELLKNOWN_STRUCT)) {
14747     if (!start_subobject(p)) return false;
14748     start_structvalue_object(p);
14749   } else if (is_wellknown_field(p, UPB_WELLKNOWN_VALUE)) {
14750     if (!start_subobject(p)) return false;
14751     start_value_object(p, VALUE_STRUCTVALUE);
14752     if (!start_subobject(p)) return false;
14753     start_structvalue_object(p);
14754   }
14755 
14756   return start_subobject(p);
14757 }
14758 
end_subobject(upb_json_parser * p)14759 static void end_subobject(upb_json_parser *p) {
14760   if (is_top_level(p)) {
14761     return;
14762   }
14763 
14764   if (p->top->is_map) {
14765     upb_selector_t sel;
14766     p->top--;
14767     sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSEQ);
14768     upb_sink_endseq(&p->top->sink, sel);
14769   } else {
14770     upb_selector_t sel;
14771     bool is_unknown = p->top->m == NULL;
14772     p->top--;
14773     if (!is_unknown) {
14774       sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSUBMSG);
14775       upb_sink_endsubmsg(&p->top->sink, sel);
14776     }
14777   }
14778 }
14779 
end_subobject_full(upb_json_parser * p)14780 static void end_subobject_full(upb_json_parser *p) {
14781   end_subobject(p);
14782 
14783   if (is_wellknown_msg(p, UPB_WELLKNOWN_STRUCT)) {
14784     end_structvalue_object(p);
14785     if (!is_top_level(p)) {
14786       end_subobject(p);
14787     }
14788   }
14789 
14790   if (is_wellknown_msg(p, UPB_WELLKNOWN_VALUE)) {
14791     end_value_object(p);
14792     if (!is_top_level(p)) {
14793       end_subobject(p);
14794     }
14795   }
14796 }
14797 
start_array(upb_json_parser * p)14798 static bool start_array(upb_json_parser *p) {
14799   upb_jsonparser_frame *inner;
14800   upb_selector_t sel;
14801 
14802   if (is_top_level(p)) {
14803     if (is_wellknown_msg(p, UPB_WELLKNOWN_VALUE)) {
14804       start_value_object(p, VALUE_LISTVALUE);
14805       if (!start_subobject(p)) return false;
14806       start_listvalue_object(p);
14807     } else if (is_wellknown_msg(p, UPB_WELLKNOWN_LISTVALUE)) {
14808       start_listvalue_object(p);
14809     } else {
14810       return false;
14811     }
14812   } else if (is_wellknown_field(p, UPB_WELLKNOWN_LISTVALUE) &&
14813              (!upb_fielddef_isseq(p->top->f) ||
14814               p->top->is_repeated)) {
14815     if (!start_subobject(p)) return false;
14816     start_listvalue_object(p);
14817   } else if (is_wellknown_field(p, UPB_WELLKNOWN_VALUE) &&
14818              (!upb_fielddef_isseq(p->top->f) ||
14819               p->top->is_repeated)) {
14820     if (!start_subobject(p)) return false;
14821     start_value_object(p, VALUE_LISTVALUE);
14822     if (!start_subobject(p)) return false;
14823     start_listvalue_object(p);
14824   }
14825 
14826   if (p->top->is_unknown_field) {
14827     inner = start_jsonparser_frame(p);
14828     inner->is_unknown_field = true;
14829     p->top = inner;
14830 
14831     return true;
14832   }
14833 
14834   if (!upb_fielddef_isseq(p->top->f)) {
14835     upb_status_seterrf(&p->status,
14836                        "Array specified for non-repeated field: %s",
14837                        upb_fielddef_name(p->top->f));
14838     upb_env_reporterror(p->env, &p->status);
14839     return false;
14840   }
14841 
14842   if (!check_stack(p)) return false;
14843 
14844   inner = start_jsonparser_frame(p);
14845   sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSEQ);
14846   upb_sink_startseq(&p->top->sink, sel, &inner->sink);
14847   inner->m = p->top->m;
14848   inner->f = p->top->f;
14849   inner->is_repeated = true;
14850   p->top = inner;
14851 
14852   return true;
14853 }
14854 
end_array(upb_json_parser * p)14855 static void end_array(upb_json_parser *p) {
14856   upb_selector_t sel;
14857 
14858   UPB_ASSERT(p->top > p->stack);
14859 
14860   p->top--;
14861 
14862   if (p->top->is_unknown_field) {
14863     return;
14864   }
14865 
14866   sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSEQ);
14867   upb_sink_endseq(&p->top->sink, sel);
14868 
14869   if (is_wellknown_msg(p, UPB_WELLKNOWN_LISTVALUE)) {
14870     end_listvalue_object(p);
14871     if (!is_top_level(p)) {
14872       end_subobject(p);
14873     }
14874   }
14875 
14876   if (is_wellknown_msg(p, UPB_WELLKNOWN_VALUE)) {
14877     end_value_object(p);
14878     if (!is_top_level(p)) {
14879       end_subobject(p);
14880     }
14881   }
14882 }
14883 
start_object(upb_json_parser * p)14884 static void start_object(upb_json_parser *p) {
14885   if (!p->top->is_map && p->top->m != NULL) {
14886     upb_sink_startmsg(&p->top->sink);
14887   }
14888 }
14889 
end_object(upb_json_parser * p)14890 static void end_object(upb_json_parser *p) {
14891   if (!p->top->is_map && p->top->m != NULL) {
14892     upb_status status;
14893     upb_status_clear(&status);
14894     upb_sink_endmsg(&p->top->sink, &status);
14895     if (!upb_ok(&status)) {
14896       upb_env_reporterror(p->env, &status);
14897     }
14898   }
14899 }
14900 
start_any_object(upb_json_parser * p,const char * ptr)14901 static void start_any_object(upb_json_parser *p, const char *ptr) {
14902   start_object(p);
14903   p->top->any_frame->before_type_url_start = ptr;
14904   p->top->any_frame->before_type_url_end = ptr;
14905 }
14906 
end_any_object(upb_json_parser * p,const char * ptr)14907 static bool end_any_object(upb_json_parser *p, const char *ptr) {
14908   const char *value_membername = "value";
14909   bool is_well_known_packed = false;
14910   const char *packed_end = ptr + 1;
14911   upb_selector_t sel;
14912   upb_jsonparser_frame *inner;
14913 
14914   if (json_parser_any_frame_has_value(p->top->any_frame) &&
14915       !json_parser_any_frame_has_type_url(p->top->any_frame)) {
14916     upb_status_seterrmsg(&p->status, "No valid type url");
14917     upb_env_reporterror(p->env, &p->status);
14918     return false;
14919   }
14920 
14921   /* Well known types data is represented as value field. */
14922   if (upb_msgdef_wellknowntype(p->top->any_frame->parser->top->m) !=
14923           UPB_WELLKNOWN_UNSPECIFIED) {
14924     is_well_known_packed = true;
14925 
14926     if (json_parser_any_frame_has_value_before_type_url(p->top->any_frame)) {
14927       p->top->any_frame->before_type_url_start =
14928           memchr(p->top->any_frame->before_type_url_start, ':',
14929                  p->top->any_frame->before_type_url_end -
14930                  p->top->any_frame->before_type_url_start);
14931       if (p->top->any_frame->before_type_url_start == NULL) {
14932         upb_status_seterrmsg(&p->status, "invalid data for well known type.");
14933         upb_env_reporterror(p->env, &p->status);
14934         return false;
14935       }
14936       p->top->any_frame->before_type_url_start++;
14937     }
14938 
14939     if (json_parser_any_frame_has_value_after_type_url(p->top->any_frame)) {
14940       p->top->any_frame->after_type_url_start =
14941           memchr(p->top->any_frame->after_type_url_start, ':',
14942                  (ptr + 1) -
14943                  p->top->any_frame->after_type_url_start);
14944       if (p->top->any_frame->after_type_url_start == NULL) {
14945         upb_status_seterrmsg(&p->status, "Invalid data for well known type.");
14946         upb_env_reporterror(p->env, &p->status);
14947         return false;
14948       }
14949       p->top->any_frame->after_type_url_start++;
14950       packed_end = ptr;
14951     }
14952   }
14953 
14954   if (json_parser_any_frame_has_value_before_type_url(p->top->any_frame)) {
14955     if (!parse(p->top->any_frame->parser, NULL,
14956                p->top->any_frame->before_type_url_start,
14957                p->top->any_frame->before_type_url_end -
14958                p->top->any_frame->before_type_url_start, NULL)) {
14959       return false;
14960     }
14961   } else {
14962     if (!is_well_known_packed) {
14963       if (!parse(p->top->any_frame->parser, NULL, "{", 1, NULL)) {
14964         return false;
14965       }
14966     }
14967   }
14968 
14969   if (json_parser_any_frame_has_value_before_type_url(p->top->any_frame) &&
14970       json_parser_any_frame_has_value_after_type_url(p->top->any_frame)) {
14971     if (!parse(p->top->any_frame->parser, NULL, ",", 1, NULL)) {
14972       return false;
14973     }
14974   }
14975 
14976   if (json_parser_any_frame_has_value_after_type_url(p->top->any_frame)) {
14977     if (!parse(p->top->any_frame->parser, NULL,
14978                p->top->any_frame->after_type_url_start,
14979                packed_end - p->top->any_frame->after_type_url_start, NULL)) {
14980       return false;
14981     }
14982   } else {
14983     if (!is_well_known_packed) {
14984       if (!parse(p->top->any_frame->parser, NULL, "}", 1, NULL)) {
14985         return false;
14986       }
14987     }
14988   }
14989 
14990   if (!end(p->top->any_frame->parser, NULL)) {
14991     return false;
14992   }
14993 
14994   p->top->is_any = false;
14995 
14996   /* Set value */
14997   start_member(p);
14998   capture_begin(p, value_membername);
14999   capture_end(p, value_membername + 5);
15000   end_membername(p);
15001 
15002   if (!check_stack(p)) return false;
15003   inner = p->top + 1;
15004 
15005   sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSTR);
15006   upb_sink_startstr(&p->top->sink, sel, 0, &inner->sink);
15007   sel = getsel_for_handlertype(p, UPB_HANDLER_STRING);
15008   upb_sink_putstring(&inner->sink, sel, p->top->any_frame->stringsink.ptr,
15009                      p->top->any_frame->stringsink.len, NULL);
15010   sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSTR);
15011   upb_sink_endstr(&inner->sink, sel);
15012 
15013   end_member(p);
15014 
15015   end_object(p);
15016 
15017   /* Deallocate any parse frame. */
15018   json_parser_any_frame_free(p->top->any_frame);
15019   upb_env_free(p->env, p->top->any_frame);
15020 
15021   return true;
15022 }
15023 
is_string_wrapper(const upb_msgdef * m)15024 static bool is_string_wrapper(const upb_msgdef *m) {
15025   upb_wellknowntype_t type = upb_msgdef_wellknowntype(m);
15026   return type == UPB_WELLKNOWN_STRINGVALUE ||
15027          type == UPB_WELLKNOWN_BYTESVALUE;
15028 }
15029 
is_fieldmask(const upb_msgdef * m)15030 static bool is_fieldmask(const upb_msgdef *m) {
15031   upb_wellknowntype_t type = upb_msgdef_wellknowntype(m);
15032   return type == UPB_WELLKNOWN_FIELDMASK;
15033 }
15034 
start_fieldmask_object(upb_json_parser * p)15035 static void start_fieldmask_object(upb_json_parser *p) {
15036   const char *membername = "paths";
15037 
15038   start_object(p);
15039 
15040   /* Set up context for parsing value */
15041   start_member(p);
15042   capture_begin(p, membername);
15043   capture_end(p, membername + 5);
15044   end_membername(p);
15045 
15046   start_array(p);
15047 }
15048 
end_fieldmask_object(upb_json_parser * p)15049 static void end_fieldmask_object(upb_json_parser *p) {
15050   end_array(p);
15051   end_member(p);
15052   end_object(p);
15053 }
15054 
start_wrapper_object(upb_json_parser * p)15055 static void start_wrapper_object(upb_json_parser *p) {
15056   const char *membername = "value";
15057 
15058   start_object(p);
15059 
15060   /* Set up context for parsing value */
15061   start_member(p);
15062   capture_begin(p, membername);
15063   capture_end(p, membername + 5);
15064   end_membername(p);
15065 }
15066 
end_wrapper_object(upb_json_parser * p)15067 static void end_wrapper_object(upb_json_parser *p) {
15068   end_member(p);
15069   end_object(p);
15070 }
15071 
start_value_object(upb_json_parser * p,int value_type)15072 static void start_value_object(upb_json_parser *p, int value_type) {
15073   const char *nullmember = "null_value";
15074   const char *numbermember = "number_value";
15075   const char *stringmember = "string_value";
15076   const char *boolmember = "bool_value";
15077   const char *structmember = "struct_value";
15078   const char *listmember = "list_value";
15079   const char *membername = "";
15080 
15081   switch (value_type) {
15082     case VALUE_NULLVALUE:
15083       membername = nullmember;
15084       break;
15085     case VALUE_NUMBERVALUE:
15086       membername = numbermember;
15087       break;
15088     case VALUE_STRINGVALUE:
15089       membername = stringmember;
15090       break;
15091     case VALUE_BOOLVALUE:
15092       membername = boolmember;
15093       break;
15094     case VALUE_STRUCTVALUE:
15095       membername = structmember;
15096       break;
15097     case VALUE_LISTVALUE:
15098       membername = listmember;
15099       break;
15100   }
15101 
15102   start_object(p);
15103 
15104   /* Set up context for parsing value */
15105   start_member(p);
15106   capture_begin(p, membername);
15107   capture_end(p, membername + strlen(membername));
15108   end_membername(p);
15109 }
15110 
end_value_object(upb_json_parser * p)15111 static void end_value_object(upb_json_parser *p) {
15112   end_member(p);
15113   end_object(p);
15114 }
15115 
start_listvalue_object(upb_json_parser * p)15116 static void start_listvalue_object(upb_json_parser *p) {
15117   const char *membername = "values";
15118 
15119   start_object(p);
15120 
15121   /* Set up context for parsing value */
15122   start_member(p);
15123   capture_begin(p, membername);
15124   capture_end(p, membername + strlen(membername));
15125   end_membername(p);
15126 }
15127 
end_listvalue_object(upb_json_parser * p)15128 static void end_listvalue_object(upb_json_parser *p) {
15129   end_member(p);
15130   end_object(p);
15131 }
15132 
start_structvalue_object(upb_json_parser * p)15133 static void start_structvalue_object(upb_json_parser *p) {
15134   const char *membername = "fields";
15135 
15136   start_object(p);
15137 
15138   /* Set up context for parsing value */
15139   start_member(p);
15140   capture_begin(p, membername);
15141   capture_end(p, membername + strlen(membername));
15142   end_membername(p);
15143 }
15144 
end_structvalue_object(upb_json_parser * p)15145 static void end_structvalue_object(upb_json_parser *p) {
15146   end_member(p);
15147   end_object(p);
15148 }
15149 
is_top_level(upb_json_parser * p)15150 static bool is_top_level(upb_json_parser *p) {
15151   return p->top == p->stack && p->top->f == NULL && !p->top->is_unknown_field;
15152 }
15153 
is_wellknown_msg(upb_json_parser * p,upb_wellknowntype_t type)15154 static bool is_wellknown_msg(upb_json_parser *p, upb_wellknowntype_t type) {
15155   return p->top->m != NULL && upb_msgdef_wellknowntype(p->top->m) == type;
15156 }
15157 
is_wellknown_field(upb_json_parser * p,upb_wellknowntype_t type)15158 static bool is_wellknown_field(upb_json_parser *p, upb_wellknowntype_t type) {
15159   return p->top->f != NULL &&
15160          upb_fielddef_issubmsg(p->top->f) &&
15161          (upb_msgdef_wellknowntype(upb_fielddef_msgsubdef(p->top->f))
15162               == type);
15163 }
15164 
does_number_wrapper_start(upb_json_parser * p)15165 static bool does_number_wrapper_start(upb_json_parser *p) {
15166   return p->top->f != NULL &&
15167          upb_fielddef_issubmsg(p->top->f) &&
15168          upb_msgdef_isnumberwrapper(upb_fielddef_msgsubdef(p->top->f));
15169 }
15170 
does_number_wrapper_end(upb_json_parser * p)15171 static bool does_number_wrapper_end(upb_json_parser *p) {
15172   return p->top->m != NULL && upb_msgdef_isnumberwrapper(p->top->m);
15173 }
15174 
is_number_wrapper_object(upb_json_parser * p)15175 static bool is_number_wrapper_object(upb_json_parser *p) {
15176   return p->top->m != NULL && upb_msgdef_isnumberwrapper(p->top->m);
15177 }
15178 
does_string_wrapper_start(upb_json_parser * p)15179 static bool does_string_wrapper_start(upb_json_parser *p) {
15180   return p->top->f != NULL &&
15181          upb_fielddef_issubmsg(p->top->f) &&
15182          is_string_wrapper(upb_fielddef_msgsubdef(p->top->f));
15183 }
15184 
does_string_wrapper_end(upb_json_parser * p)15185 static bool does_string_wrapper_end(upb_json_parser *p) {
15186   return p->top->m != NULL && is_string_wrapper(p->top->m);
15187 }
15188 
is_string_wrapper_object(upb_json_parser * p)15189 static bool is_string_wrapper_object(upb_json_parser *p) {
15190   return p->top->m != NULL && is_string_wrapper(p->top->m);
15191 }
15192 
does_fieldmask_start(upb_json_parser * p)15193 static bool does_fieldmask_start(upb_json_parser *p) {
15194   return p->top->f != NULL &&
15195          upb_fielddef_issubmsg(p->top->f) &&
15196          is_fieldmask(upb_fielddef_msgsubdef(p->top->f));
15197 }
15198 
does_fieldmask_end(upb_json_parser * p)15199 static bool does_fieldmask_end(upb_json_parser *p) {
15200   return p->top->m != NULL && is_fieldmask(p->top->m);
15201 }
15202 
is_fieldmask_object(upb_json_parser * p)15203 static bool is_fieldmask_object(upb_json_parser *p) {
15204   return p->top->m != NULL && is_fieldmask(p->top->m);
15205 }
15206 
15207 #define CHECK_RETURN_TOP(x) if (!(x)) goto error
15208 
15209 
15210 /* The actual parser **********************************************************/
15211 
15212 /* What follows is the Ragel parser itself.  The language is specified in Ragel
15213  * and the actions call our C functions above.
15214  *
15215  * Ragel has an extensive set of functionality, and we use only a small part of
15216  * it.  There are many action types but we only use a few:
15217  *
15218  *   ">" -- transition into a machine
15219  *   "%" -- transition out of a machine
15220  *   "@" -- transition into a final state of a machine.
15221  *
15222  * "@" transitions are tricky because a machine can transition into a final
15223  * state repeatedly.  But in some cases we know this can't happen, for example
15224  * a string which is delimited by a final '"' can only transition into its
15225  * final state once, when the closing '"' is seen. */
15226 
15227 
15228 #line 2789 "upb/json/parser.rl"
15229 
15230 
15231 
15232 #line 2592 "upb/json/parser.c"
15233 static const char _json_actions[] = {
15234 	0, 1, 0, 1, 1, 1, 3, 1,
15235 	4, 1, 6, 1, 7, 1, 8, 1,
15236 	9, 1, 11, 1, 12, 1, 13, 1,
15237 	14, 1, 15, 1, 16, 1, 17, 1,
15238 	18, 1, 19, 1, 20, 1, 22, 1,
15239 	23, 1, 24, 1, 35, 1, 37, 1,
15240 	39, 1, 40, 1, 42, 1, 43, 1,
15241 	44, 1, 46, 1, 48, 1, 49, 1,
15242 	50, 1, 51, 1, 53, 1, 54, 2,
15243 	4, 9, 2, 5, 6, 2, 7, 3,
15244 	2, 7, 9, 2, 21, 26, 2, 25,
15245 	10, 2, 27, 28, 2, 29, 30, 2,
15246 	32, 34, 2, 33, 31, 2, 38, 36,
15247 	2, 40, 42, 2, 45, 2, 2, 46,
15248 	54, 2, 47, 36, 2, 49, 54, 2,
15249 	50, 54, 2, 51, 54, 2, 52, 41,
15250 	2, 53, 54, 3, 32, 34, 35, 4,
15251 	21, 26, 27, 28
15252 };
15253 
15254 static const short _json_key_offsets[] = {
15255 	0, 0, 12, 13, 18, 23, 28, 29,
15256 	30, 31, 32, 33, 34, 35, 36, 37,
15257 	38, 43, 44, 48, 53, 58, 63, 67,
15258 	71, 74, 77, 79, 83, 87, 89, 91,
15259 	96, 98, 100, 109, 115, 121, 127, 133,
15260 	135, 139, 142, 144, 146, 149, 150, 154,
15261 	156, 158, 160, 162, 163, 165, 167, 168,
15262 	170, 172, 173, 175, 177, 178, 180, 182,
15263 	183, 185, 187, 191, 193, 195, 196, 197,
15264 	198, 199, 201, 206, 208, 210, 212, 221,
15265 	222, 222, 222, 227, 232, 237, 238, 239,
15266 	240, 241, 241, 242, 243, 244, 244, 245,
15267 	246, 247, 247, 252, 253, 257, 262, 267,
15268 	272, 276, 276, 279, 282, 285, 288, 291,
15269 	294, 294, 294, 294, 294, 294
15270 };
15271 
15272 static const char _json_trans_keys[] = {
15273 	32, 34, 45, 91, 102, 110, 116, 123,
15274 	9, 13, 48, 57, 34, 32, 93, 125,
15275 	9, 13, 32, 44, 93, 9, 13, 32,
15276 	93, 125, 9, 13, 97, 108, 115, 101,
15277 	117, 108, 108, 114, 117, 101, 32, 34,
15278 	125, 9, 13, 34, 32, 58, 9, 13,
15279 	32, 93, 125, 9, 13, 32, 44, 125,
15280 	9, 13, 32, 44, 125, 9, 13, 32,
15281 	34, 9, 13, 45, 48, 49, 57, 48,
15282 	49, 57, 46, 69, 101, 48, 57, 69,
15283 	101, 48, 57, 43, 45, 48, 57, 48,
15284 	57, 48, 57, 46, 69, 101, 48, 57,
15285 	34, 92, 34, 92, 34, 47, 92, 98,
15286 	102, 110, 114, 116, 117, 48, 57, 65,
15287 	70, 97, 102, 48, 57, 65, 70, 97,
15288 	102, 48, 57, 65, 70, 97, 102, 48,
15289 	57, 65, 70, 97, 102, 34, 92, 45,
15290 	48, 49, 57, 48, 49, 57, 46, 115,
15291 	48, 57, 115, 48, 57, 34, 46, 115,
15292 	48, 57, 48, 57, 48, 57, 48, 57,
15293 	48, 57, 45, 48, 57, 48, 57, 45,
15294 	48, 57, 48, 57, 84, 48, 57, 48,
15295 	57, 58, 48, 57, 48, 57, 58, 48,
15296 	57, 48, 57, 43, 45, 46, 90, 48,
15297 	57, 48, 57, 58, 48, 48, 34, 48,
15298 	57, 43, 45, 90, 48, 57, 34, 44,
15299 	34, 44, 34, 44, 34, 45, 91, 102,
15300 	110, 116, 123, 48, 57, 34, 32, 93,
15301 	125, 9, 13, 32, 44, 93, 9, 13,
15302 	32, 93, 125, 9, 13, 97, 108, 115,
15303 	101, 117, 108, 108, 114, 117, 101, 32,
15304 	34, 125, 9, 13, 34, 32, 58, 9,
15305 	13, 32, 93, 125, 9, 13, 32, 44,
15306 	125, 9, 13, 32, 44, 125, 9, 13,
15307 	32, 34, 9, 13, 32, 9, 13, 32,
15308 	9, 13, 32, 9, 13, 32, 9, 13,
15309 	32, 9, 13, 32, 9, 13, 0
15310 };
15311 
15312 static const char _json_single_lengths[] = {
15313 	0, 8, 1, 3, 3, 3, 1, 1,
15314 	1, 1, 1, 1, 1, 1, 1, 1,
15315 	3, 1, 2, 3, 3, 3, 2, 2,
15316 	1, 3, 0, 2, 2, 0, 0, 3,
15317 	2, 2, 9, 0, 0, 0, 0, 2,
15318 	2, 1, 2, 0, 1, 1, 2, 0,
15319 	0, 0, 0, 1, 0, 0, 1, 0,
15320 	0, 1, 0, 0, 1, 0, 0, 1,
15321 	0, 0, 4, 0, 0, 1, 1, 1,
15322 	1, 0, 3, 2, 2, 2, 7, 1,
15323 	0, 0, 3, 3, 3, 1, 1, 1,
15324 	1, 0, 1, 1, 1, 0, 1, 1,
15325 	1, 0, 3, 1, 2, 3, 3, 3,
15326 	2, 0, 1, 1, 1, 1, 1, 1,
15327 	0, 0, 0, 0, 0, 0
15328 };
15329 
15330 static const char _json_range_lengths[] = {
15331 	0, 2, 0, 1, 1, 1, 0, 0,
15332 	0, 0, 0, 0, 0, 0, 0, 0,
15333 	1, 0, 1, 1, 1, 1, 1, 1,
15334 	1, 0, 1, 1, 1, 1, 1, 1,
15335 	0, 0, 0, 3, 3, 3, 3, 0,
15336 	1, 1, 0, 1, 1, 0, 1, 1,
15337 	1, 1, 1, 0, 1, 1, 0, 1,
15338 	1, 0, 1, 1, 0, 1, 1, 0,
15339 	1, 1, 0, 1, 1, 0, 0, 0,
15340 	0, 1, 1, 0, 0, 0, 1, 0,
15341 	0, 0, 1, 1, 1, 0, 0, 0,
15342 	0, 0, 0, 0, 0, 0, 0, 0,
15343 	0, 0, 1, 0, 1, 1, 1, 1,
15344 	1, 0, 1, 1, 1, 1, 1, 1,
15345 	0, 0, 0, 0, 0, 0
15346 };
15347 
15348 static const short _json_index_offsets[] = {
15349 	0, 0, 11, 13, 18, 23, 28, 30,
15350 	32, 34, 36, 38, 40, 42, 44, 46,
15351 	48, 53, 55, 59, 64, 69, 74, 78,
15352 	82, 85, 89, 91, 95, 99, 101, 103,
15353 	108, 111, 114, 124, 128, 132, 136, 140,
15354 	143, 147, 150, 153, 155, 158, 160, 164,
15355 	166, 168, 170, 172, 174, 176, 178, 180,
15356 	182, 184, 186, 188, 190, 192, 194, 196,
15357 	198, 200, 202, 207, 209, 211, 213, 215,
15358 	217, 219, 221, 226, 229, 232, 235, 244,
15359 	246, 247, 248, 253, 258, 263, 265, 267,
15360 	269, 271, 272, 274, 276, 278, 279, 281,
15361 	283, 285, 286, 291, 293, 297, 302, 307,
15362 	312, 316, 317, 320, 323, 326, 329, 332,
15363 	335, 336, 337, 338, 339, 340
15364 };
15365 
15366 static const unsigned char _json_indicies[] = {
15367 	0, 2, 3, 4, 5, 6, 7, 8,
15368 	0, 3, 1, 9, 1, 11, 12, 1,
15369 	11, 10, 13, 14, 12, 13, 1, 14,
15370 	1, 1, 14, 10, 15, 1, 16, 1,
15371 	17, 1, 18, 1, 19, 1, 20, 1,
15372 	21, 1, 22, 1, 23, 1, 24, 1,
15373 	25, 26, 27, 25, 1, 28, 1, 29,
15374 	30, 29, 1, 30, 1, 1, 30, 31,
15375 	32, 33, 34, 32, 1, 35, 36, 27,
15376 	35, 1, 36, 26, 36, 1, 37, 38,
15377 	39, 1, 38, 39, 1, 41, 42, 42,
15378 	40, 43, 1, 42, 42, 43, 40, 44,
15379 	44, 45, 1, 45, 1, 45, 40, 41,
15380 	42, 42, 39, 40, 47, 48, 46, 50,
15381 	51, 49, 52, 52, 52, 52, 52, 52,
15382 	52, 52, 53, 1, 54, 54, 54, 1,
15383 	55, 55, 55, 1, 56, 56, 56, 1,
15384 	57, 57, 57, 1, 59, 60, 58, 61,
15385 	62, 63, 1, 64, 65, 1, 66, 67,
15386 	1, 68, 1, 67, 68, 1, 69, 1,
15387 	66, 67, 65, 1, 70, 1, 71, 1,
15388 	72, 1, 73, 1, 74, 1, 75, 1,
15389 	76, 1, 77, 1, 78, 1, 79, 1,
15390 	80, 1, 81, 1, 82, 1, 83, 1,
15391 	84, 1, 85, 1, 86, 1, 87, 1,
15392 	88, 1, 89, 89, 90, 91, 1, 92,
15393 	1, 93, 1, 94, 1, 95, 1, 96,
15394 	1, 97, 1, 98, 1, 99, 99, 100,
15395 	98, 1, 102, 1, 101, 104, 105, 103,
15396 	1, 1, 101, 106, 107, 108, 109, 110,
15397 	111, 112, 107, 1, 113, 1, 114, 115,
15398 	117, 118, 1, 117, 116, 119, 120, 118,
15399 	119, 1, 120, 1, 1, 120, 116, 121,
15400 	1, 122, 1, 123, 1, 124, 1, 125,
15401 	126, 1, 127, 1, 128, 1, 129, 130,
15402 	1, 131, 1, 132, 1, 133, 134, 135,
15403 	136, 134, 1, 137, 1, 138, 139, 138,
15404 	1, 139, 1, 1, 139, 140, 141, 142,
15405 	143, 141, 1, 144, 145, 136, 144, 1,
15406 	145, 135, 145, 1, 146, 147, 147, 1,
15407 	148, 148, 1, 149, 149, 1, 150, 150,
15408 	1, 151, 151, 1, 152, 152, 1, 1,
15409 	1, 1, 1, 1, 1, 0
15410 };
15411 
15412 static const char _json_trans_targs[] = {
15413 	1, 0, 2, 107, 3, 6, 10, 13,
15414 	16, 106, 4, 3, 106, 4, 5, 7,
15415 	8, 9, 108, 11, 12, 109, 14, 15,
15416 	110, 16, 17, 111, 18, 18, 19, 20,
15417 	21, 22, 111, 21, 22, 24, 25, 31,
15418 	112, 26, 28, 27, 29, 30, 33, 113,
15419 	34, 33, 113, 34, 32, 35, 36, 37,
15420 	38, 39, 33, 113, 34, 41, 42, 46,
15421 	42, 46, 43, 45, 44, 114, 48, 49,
15422 	50, 51, 52, 53, 54, 55, 56, 57,
15423 	58, 59, 60, 61, 62, 63, 64, 65,
15424 	66, 67, 73, 72, 68, 69, 70, 71,
15425 	72, 115, 74, 67, 72, 76, 116, 76,
15426 	116, 77, 79, 81, 82, 85, 90, 94,
15427 	98, 80, 117, 117, 83, 82, 80, 83,
15428 	84, 86, 87, 88, 89, 117, 91, 92,
15429 	93, 117, 95, 96, 97, 117, 98, 99,
15430 	105, 100, 100, 101, 102, 103, 104, 105,
15431 	103, 104, 117, 106, 106, 106, 106, 106,
15432 	106
15433 };
15434 
15435 static const unsigned char _json_trans_actions[] = {
15436 	0, 0, 113, 107, 53, 0, 0, 0,
15437 	125, 59, 45, 0, 55, 0, 0, 0,
15438 	0, 0, 0, 0, 0, 0, 0, 0,
15439 	0, 0, 101, 51, 47, 0, 0, 45,
15440 	49, 49, 104, 0, 0, 0, 0, 0,
15441 	3, 0, 0, 0, 0, 0, 5, 15,
15442 	0, 0, 71, 7, 13, 0, 74, 9,
15443 	9, 9, 77, 80, 11, 37, 37, 37,
15444 	0, 0, 0, 39, 0, 41, 86, 0,
15445 	0, 0, 17, 19, 0, 21, 23, 0,
15446 	25, 27, 0, 29, 31, 0, 33, 35,
15447 	0, 135, 83, 135, 0, 0, 0, 0,
15448 	0, 92, 0, 89, 89, 98, 43, 0,
15449 	131, 95, 113, 107, 53, 0, 0, 0,
15450 	125, 59, 69, 110, 45, 0, 55, 0,
15451 	0, 0, 0, 0, 0, 119, 0, 0,
15452 	0, 122, 0, 0, 0, 116, 0, 101,
15453 	51, 47, 0, 0, 45, 49, 49, 104,
15454 	0, 0, 128, 0, 57, 63, 65, 61,
15455 	67
15456 };
15457 
15458 static const unsigned char _json_eof_actions[] = {
15459 	0, 0, 0, 0, 0, 0, 0, 0,
15460 	0, 0, 0, 0, 0, 0, 0, 0,
15461 	0, 0, 0, 0, 0, 0, 0, 0,
15462 	0, 1, 0, 1, 0, 0, 1, 1,
15463 	0, 0, 0, 0, 0, 0, 0, 0,
15464 	0, 0, 0, 0, 0, 0, 0, 0,
15465 	0, 0, 0, 0, 0, 0, 0, 0,
15466 	0, 0, 0, 0, 0, 0, 0, 0,
15467 	0, 0, 0, 0, 0, 0, 0, 0,
15468 	0, 0, 0, 0, 0, 0, 0, 0,
15469 	0, 0, 0, 0, 0, 0, 0, 0,
15470 	0, 0, 0, 0, 0, 0, 0, 0,
15471 	0, 0, 0, 0, 0, 0, 0, 0,
15472 	0, 0, 0, 57, 63, 65, 61, 67,
15473 	0, 0, 0, 0, 0, 0
15474 };
15475 
15476 static const int json_start = 1;
15477 
15478 static const int json_en_number_machine = 23;
15479 static const int json_en_string_machine = 32;
15480 static const int json_en_duration_machine = 40;
15481 static const int json_en_timestamp_machine = 47;
15482 static const int json_en_fieldmask_machine = 75;
15483 static const int json_en_value_machine = 78;
15484 static const int json_en_main = 1;
15485 
15486 
15487 #line 2792 "upb/json/parser.rl"
15488 
parse(void * closure,const void * hd,const char * buf,size_t size,const upb_bufhandle * handle)15489 size_t parse(void *closure, const void *hd, const char *buf, size_t size,
15490              const upb_bufhandle *handle) {
15491   upb_json_parser *parser = closure;
15492 
15493   /* Variables used by Ragel's generated code. */
15494   int cs = parser->current_state;
15495   int *stack = parser->parser_stack;
15496   int top = parser->parser_top;
15497 
15498   const char *p = buf;
15499   const char *pe = buf + size;
15500   const char *eof = &eof_ch;
15501 
15502   parser->handle = handle;
15503 
15504   UPB_UNUSED(hd);
15505   UPB_UNUSED(handle);
15506 
15507   capture_resume(parser, buf);
15508 
15509 
15510 #line 2870 "upb/json/parser.c"
15511 	{
15512 	int _klen;
15513 	unsigned int _trans;
15514 	const char *_acts;
15515 	unsigned int _nacts;
15516 	const char *_keys;
15517 
15518 	if ( p == pe )
15519 		goto _test_eof;
15520 	if ( cs == 0 )
15521 		goto _out;
15522 _resume:
15523 	_keys = _json_trans_keys + _json_key_offsets[cs];
15524 	_trans = _json_index_offsets[cs];
15525 
15526 	_klen = _json_single_lengths[cs];
15527 	if ( _klen > 0 ) {
15528 		const char *_lower = _keys;
15529 		const char *_mid;
15530 		const char *_upper = _keys + _klen - 1;
15531 		while (1) {
15532 			if ( _upper < _lower )
15533 				break;
15534 
15535 			_mid = _lower + ((_upper-_lower) >> 1);
15536 			if ( (*p) < *_mid )
15537 				_upper = _mid - 1;
15538 			else if ( (*p) > *_mid )
15539 				_lower = _mid + 1;
15540 			else {
15541 				_trans += (unsigned int)(_mid - _keys);
15542 				goto _match;
15543 			}
15544 		}
15545 		_keys += _klen;
15546 		_trans += _klen;
15547 	}
15548 
15549 	_klen = _json_range_lengths[cs];
15550 	if ( _klen > 0 ) {
15551 		const char *_lower = _keys;
15552 		const char *_mid;
15553 		const char *_upper = _keys + (_klen<<1) - 2;
15554 		while (1) {
15555 			if ( _upper < _lower )
15556 				break;
15557 
15558 			_mid = _lower + (((_upper-_lower) >> 1) & ~1);
15559 			if ( (*p) < _mid[0] )
15560 				_upper = _mid - 2;
15561 			else if ( (*p) > _mid[1] )
15562 				_lower = _mid + 2;
15563 			else {
15564 				_trans += (unsigned int)((_mid - _keys)>>1);
15565 				goto _match;
15566 			}
15567 		}
15568 		_trans += _klen;
15569 	}
15570 
15571 _match:
15572 	_trans = _json_indicies[_trans];
15573 	cs = _json_trans_targs[_trans];
15574 
15575 	if ( _json_trans_actions[_trans] == 0 )
15576 		goto _again;
15577 
15578 	_acts = _json_actions + _json_trans_actions[_trans];
15579 	_nacts = (unsigned int) *_acts++;
15580 	while ( _nacts-- > 0 )
15581 	{
15582 		switch ( *_acts++ )
15583 		{
15584 	case 1:
15585 #line 2597 "upb/json/parser.rl"
15586 	{ p--; {cs = stack[--top]; goto _again;} }
15587 	break;
15588 	case 2:
15589 #line 2599 "upb/json/parser.rl"
15590 	{ p--; {stack[top++] = cs; cs = 23;goto _again;} }
15591 	break;
15592 	case 3:
15593 #line 2603 "upb/json/parser.rl"
15594 	{ start_text(parser, p); }
15595 	break;
15596 	case 4:
15597 #line 2604 "upb/json/parser.rl"
15598 	{ CHECK_RETURN_TOP(end_text(parser, p)); }
15599 	break;
15600 	case 5:
15601 #line 2610 "upb/json/parser.rl"
15602 	{ start_hex(parser); }
15603 	break;
15604 	case 6:
15605 #line 2611 "upb/json/parser.rl"
15606 	{ hexdigit(parser, p); }
15607 	break;
15608 	case 7:
15609 #line 2612 "upb/json/parser.rl"
15610 	{ CHECK_RETURN_TOP(end_hex(parser)); }
15611 	break;
15612 	case 8:
15613 #line 2618 "upb/json/parser.rl"
15614 	{ CHECK_RETURN_TOP(escape(parser, p)); }
15615 	break;
15616 	case 9:
15617 #line 2624 "upb/json/parser.rl"
15618 	{ p--; {cs = stack[--top]; goto _again;} }
15619 	break;
15620 	case 10:
15621 #line 2629 "upb/json/parser.rl"
15622 	{ start_year(parser, p); }
15623 	break;
15624 	case 11:
15625 #line 2630 "upb/json/parser.rl"
15626 	{ CHECK_RETURN_TOP(end_year(parser, p)); }
15627 	break;
15628 	case 12:
15629 #line 2634 "upb/json/parser.rl"
15630 	{ start_month(parser, p); }
15631 	break;
15632 	case 13:
15633 #line 2635 "upb/json/parser.rl"
15634 	{ CHECK_RETURN_TOP(end_month(parser, p)); }
15635 	break;
15636 	case 14:
15637 #line 2639 "upb/json/parser.rl"
15638 	{ start_day(parser, p); }
15639 	break;
15640 	case 15:
15641 #line 2640 "upb/json/parser.rl"
15642 	{ CHECK_RETURN_TOP(end_day(parser, p)); }
15643 	break;
15644 	case 16:
15645 #line 2644 "upb/json/parser.rl"
15646 	{ start_hour(parser, p); }
15647 	break;
15648 	case 17:
15649 #line 2645 "upb/json/parser.rl"
15650 	{ CHECK_RETURN_TOP(end_hour(parser, p)); }
15651 	break;
15652 	case 18:
15653 #line 2649 "upb/json/parser.rl"
15654 	{ start_minute(parser, p); }
15655 	break;
15656 	case 19:
15657 #line 2650 "upb/json/parser.rl"
15658 	{ CHECK_RETURN_TOP(end_minute(parser, p)); }
15659 	break;
15660 	case 20:
15661 #line 2654 "upb/json/parser.rl"
15662 	{ start_second(parser, p); }
15663 	break;
15664 	case 21:
15665 #line 2655 "upb/json/parser.rl"
15666 	{ CHECK_RETURN_TOP(end_second(parser, p)); }
15667 	break;
15668 	case 22:
15669 #line 2660 "upb/json/parser.rl"
15670 	{ start_duration_base(parser, p); }
15671 	break;
15672 	case 23:
15673 #line 2661 "upb/json/parser.rl"
15674 	{ CHECK_RETURN_TOP(end_duration_base(parser, p)); }
15675 	break;
15676 	case 24:
15677 #line 2663 "upb/json/parser.rl"
15678 	{ p--; {cs = stack[--top]; goto _again;} }
15679 	break;
15680 	case 25:
15681 #line 2668 "upb/json/parser.rl"
15682 	{ start_timestamp_base(parser); }
15683 	break;
15684 	case 26:
15685 #line 2670 "upb/json/parser.rl"
15686 	{ start_timestamp_fraction(parser, p); }
15687 	break;
15688 	case 27:
15689 #line 2671 "upb/json/parser.rl"
15690 	{ CHECK_RETURN_TOP(end_timestamp_fraction(parser, p)); }
15691 	break;
15692 	case 28:
15693 #line 2673 "upb/json/parser.rl"
15694 	{ start_timestamp_zone(parser, p); }
15695 	break;
15696 	case 29:
15697 #line 2674 "upb/json/parser.rl"
15698 	{ CHECK_RETURN_TOP(end_timestamp_zone(parser, p)); }
15699 	break;
15700 	case 30:
15701 #line 2676 "upb/json/parser.rl"
15702 	{ p--; {cs = stack[--top]; goto _again;} }
15703 	break;
15704 	case 31:
15705 #line 2681 "upb/json/parser.rl"
15706 	{ start_fieldmask_path_text(parser, p); }
15707 	break;
15708 	case 32:
15709 #line 2682 "upb/json/parser.rl"
15710 	{ end_fieldmask_path_text(parser, p); }
15711 	break;
15712 	case 33:
15713 #line 2687 "upb/json/parser.rl"
15714 	{ start_fieldmask_path(parser); }
15715 	break;
15716 	case 34:
15717 #line 2688 "upb/json/parser.rl"
15718 	{ end_fieldmask_path(parser); }
15719 	break;
15720 	case 35:
15721 #line 2694 "upb/json/parser.rl"
15722 	{ p--; {cs = stack[--top]; goto _again;} }
15723 	break;
15724 	case 36:
15725 #line 2699 "upb/json/parser.rl"
15726 	{
15727         if (is_wellknown_msg(parser, UPB_WELLKNOWN_TIMESTAMP)) {
15728           {stack[top++] = cs; cs = 47;goto _again;}
15729         } else if (is_wellknown_msg(parser, UPB_WELLKNOWN_DURATION)) {
15730           {stack[top++] = cs; cs = 40;goto _again;}
15731         } else if (is_wellknown_msg(parser, UPB_WELLKNOWN_FIELDMASK)) {
15732           {stack[top++] = cs; cs = 75;goto _again;}
15733         } else {
15734           {stack[top++] = cs; cs = 32;goto _again;}
15735         }
15736       }
15737 	break;
15738 	case 37:
15739 #line 2712 "upb/json/parser.rl"
15740 	{ p--; {stack[top++] = cs; cs = 78;goto _again;} }
15741 	break;
15742 	case 38:
15743 #line 2717 "upb/json/parser.rl"
15744 	{
15745         if (is_wellknown_msg(parser, UPB_WELLKNOWN_ANY)) {
15746           start_any_member(parser, p);
15747         } else {
15748           start_member(parser);
15749         }
15750       }
15751 	break;
15752 	case 39:
15753 #line 2724 "upb/json/parser.rl"
15754 	{ CHECK_RETURN_TOP(end_membername(parser)); }
15755 	break;
15756 	case 40:
15757 #line 2727 "upb/json/parser.rl"
15758 	{
15759         if (is_wellknown_msg(parser, UPB_WELLKNOWN_ANY)) {
15760           end_any_member(parser, p);
15761         } else {
15762           end_member(parser);
15763         }
15764       }
15765 	break;
15766 	case 41:
15767 #line 2738 "upb/json/parser.rl"
15768 	{
15769         if (is_wellknown_msg(parser, UPB_WELLKNOWN_ANY)) {
15770           start_any_object(parser, p);
15771         } else {
15772           start_object(parser);
15773         }
15774       }
15775 	break;
15776 	case 42:
15777 #line 2747 "upb/json/parser.rl"
15778 	{
15779         if (is_wellknown_msg(parser, UPB_WELLKNOWN_ANY)) {
15780           CHECK_RETURN_TOP(end_any_object(parser, p));
15781         } else {
15782           end_object(parser);
15783         }
15784       }
15785 	break;
15786 	case 43:
15787 #line 2759 "upb/json/parser.rl"
15788 	{ CHECK_RETURN_TOP(start_array(parser)); }
15789 	break;
15790 	case 44:
15791 #line 2763 "upb/json/parser.rl"
15792 	{ end_array(parser); }
15793 	break;
15794 	case 45:
15795 #line 2768 "upb/json/parser.rl"
15796 	{ CHECK_RETURN_TOP(start_number(parser, p)); }
15797 	break;
15798 	case 46:
15799 #line 2769 "upb/json/parser.rl"
15800 	{ CHECK_RETURN_TOP(end_number(parser, p)); }
15801 	break;
15802 	case 47:
15803 #line 2771 "upb/json/parser.rl"
15804 	{ CHECK_RETURN_TOP(start_stringval(parser)); }
15805 	break;
15806 	case 48:
15807 #line 2772 "upb/json/parser.rl"
15808 	{ CHECK_RETURN_TOP(end_stringval(parser)); }
15809 	break;
15810 	case 49:
15811 #line 2774 "upb/json/parser.rl"
15812 	{ CHECK_RETURN_TOP(end_bool(parser, true)); }
15813 	break;
15814 	case 50:
15815 #line 2776 "upb/json/parser.rl"
15816 	{ CHECK_RETURN_TOP(end_bool(parser, false)); }
15817 	break;
15818 	case 51:
15819 #line 2778 "upb/json/parser.rl"
15820 	{ CHECK_RETURN_TOP(end_null(parser)); }
15821 	break;
15822 	case 52:
15823 #line 2780 "upb/json/parser.rl"
15824 	{ CHECK_RETURN_TOP(start_subobject_full(parser)); }
15825 	break;
15826 	case 53:
15827 #line 2781 "upb/json/parser.rl"
15828 	{ end_subobject_full(parser); }
15829 	break;
15830 	case 54:
15831 #line 2786 "upb/json/parser.rl"
15832 	{ p--; {cs = stack[--top]; goto _again;} }
15833 	break;
15834 #line 3194 "upb/json/parser.c"
15835 		}
15836 	}
15837 
15838 _again:
15839 	if ( cs == 0 )
15840 		goto _out;
15841 	if ( ++p != pe )
15842 		goto _resume;
15843 	_test_eof: {}
15844 	if ( p == eof )
15845 	{
15846 	const char *__acts = _json_actions + _json_eof_actions[cs];
15847 	unsigned int __nacts = (unsigned int) *__acts++;
15848 	while ( __nacts-- > 0 ) {
15849 		switch ( *__acts++ ) {
15850 	case 0:
15851 #line 2595 "upb/json/parser.rl"
15852 	{ p--; {cs = stack[--top]; 	if ( p == pe )
15853 		goto _test_eof;
15854 goto _again;} }
15855 	break;
15856 	case 46:
15857 #line 2769 "upb/json/parser.rl"
15858 	{ CHECK_RETURN_TOP(end_number(parser, p)); }
15859 	break;
15860 	case 49:
15861 #line 2774 "upb/json/parser.rl"
15862 	{ CHECK_RETURN_TOP(end_bool(parser, true)); }
15863 	break;
15864 	case 50:
15865 #line 2776 "upb/json/parser.rl"
15866 	{ CHECK_RETURN_TOP(end_bool(parser, false)); }
15867 	break;
15868 	case 51:
15869 #line 2778 "upb/json/parser.rl"
15870 	{ CHECK_RETURN_TOP(end_null(parser)); }
15871 	break;
15872 	case 53:
15873 #line 2781 "upb/json/parser.rl"
15874 	{ end_subobject_full(parser); }
15875 	break;
15876 #line 3236 "upb/json/parser.c"
15877 		}
15878 	}
15879 	}
15880 
15881 	_out: {}
15882 	}
15883 
15884 #line 2814 "upb/json/parser.rl"
15885 
15886   if (p != pe) {
15887     upb_status_seterrf(&parser->status, "Parse error at '%.*s'\n", pe - p, p);
15888     upb_env_reporterror(parser->env, &parser->status);
15889   } else {
15890     capture_suspend(parser, &p);
15891   }
15892 
15893 error:
15894   /* Save parsing state back to parser. */
15895   parser->current_state = cs;
15896   parser->parser_top = top;
15897 
15898   return p - buf;
15899 }
15900 
end(void * closure,const void * hd)15901 static bool end(void *closure, const void *hd) {
15902   upb_json_parser *parser = closure;
15903 
15904   /* Prevent compile warning on unused static constants. */
15905   UPB_UNUSED(json_start);
15906   UPB_UNUSED(json_en_duration_machine);
15907   UPB_UNUSED(json_en_fieldmask_machine);
15908   UPB_UNUSED(json_en_number_machine);
15909   UPB_UNUSED(json_en_string_machine);
15910   UPB_UNUSED(json_en_timestamp_machine);
15911   UPB_UNUSED(json_en_value_machine);
15912   UPB_UNUSED(json_en_main);
15913 
15914   parse(parser, hd, &eof_ch, 0, NULL);
15915 
15916   return parser->current_state >= 106;
15917 }
15918 
json_parser_reset(upb_json_parser * p)15919 static void json_parser_reset(upb_json_parser *p) {
15920   int cs;
15921   int top;
15922 
15923   p->top = p->stack;
15924   init_frame(p->top);
15925 
15926   /* Emit Ragel initialization of the parser. */
15927 
15928 #line 3288 "upb/json/parser.c"
15929 	{
15930 	cs = json_start;
15931 	top = 0;
15932 	}
15933 
15934 #line 2857 "upb/json/parser.rl"
15935   p->current_state = cs;
15936   p->parser_top = top;
15937   accumulate_clear(p);
15938   p->multipart_state = MULTIPART_INACTIVE;
15939   p->capture = NULL;
15940   p->accumulated = NULL;
15941   upb_status_clear(&p->status);
15942 }
15943 
visit_json_parsermethod(const upb_refcounted * r,upb_refcounted_visit * visit,void * closure)15944 static void visit_json_parsermethod(const upb_refcounted *r,
15945                                     upb_refcounted_visit *visit,
15946                                     void *closure) {
15947   const upb_json_parsermethod *method = (upb_json_parsermethod*)r;
15948   visit(r, upb_msgdef_upcast2(method->msg), closure);
15949 }
15950 
free_json_parsermethod(upb_refcounted * r)15951 static void free_json_parsermethod(upb_refcounted *r) {
15952   upb_json_parsermethod *method = (upb_json_parsermethod*)r;
15953 
15954   upb_inttable_iter i;
15955   upb_inttable_begin(&i, &method->name_tables);
15956   for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
15957     upb_value val = upb_inttable_iter_value(&i);
15958     upb_strtable *t = upb_value_getptr(val);
15959     upb_strtable_uninit(t);
15960     upb_gfree(t);
15961   }
15962 
15963   upb_inttable_uninit(&method->name_tables);
15964 
15965   upb_gfree(r);
15966 }
15967 
add_jsonname_table(upb_json_parsermethod * m,const upb_msgdef * md)15968 static void add_jsonname_table(upb_json_parsermethod *m, const upb_msgdef* md) {
15969   upb_msg_field_iter i;
15970   upb_strtable *t;
15971 
15972   /* It would be nice to stack-allocate this, but protobufs do not limit the
15973    * length of fields to any reasonable limit. */
15974   char *buf = NULL;
15975   size_t len = 0;
15976 
15977   if (upb_inttable_lookupptr(&m->name_tables, md, NULL)) {
15978     return;
15979   }
15980 
15981   /* TODO(haberman): handle malloc failure. */
15982   t = upb_gmalloc(sizeof(*t));
15983   upb_strtable_init(t, UPB_CTYPE_CONSTPTR);
15984   upb_inttable_insertptr(&m->name_tables, md, upb_value_ptr(t));
15985 
15986   for(upb_msg_field_begin(&i, md);
15987       !upb_msg_field_done(&i);
15988       upb_msg_field_next(&i)) {
15989     const upb_fielddef *f = upb_msg_iter_field(&i);
15990 
15991     /* Add an entry for the JSON name. */
15992     size_t field_len = upb_fielddef_getjsonname(f, buf, len);
15993     if (field_len > len) {
15994       size_t len2;
15995       buf = upb_grealloc(buf, 0, field_len);
15996       len = field_len;
15997       len2 = upb_fielddef_getjsonname(f, buf, len);
15998       UPB_ASSERT(len == len2);
15999     }
16000     upb_strtable_insert(t, buf, upb_value_constptr(f));
16001 
16002     if (strcmp(buf, upb_fielddef_name(f)) != 0) {
16003       /* Since the JSON name is different from the regular field name, add an
16004        * entry for the raw name (compliant proto3 JSON parsers must accept
16005        * both). */
16006       upb_strtable_insert(t, upb_fielddef_name(f), upb_value_constptr(f));
16007     }
16008 
16009     if (upb_fielddef_issubmsg(f)) {
16010       add_jsonname_table(m, upb_fielddef_msgsubdef(f));
16011     }
16012   }
16013 
16014   upb_gfree(buf);
16015 }
16016 
16017 /* Public API *****************************************************************/
16018 
upb_json_parser_create(upb_env * env,const upb_json_parsermethod * method,const upb_symtab * symtab,upb_sink * output,bool ignore_json_unknown)16019 upb_json_parser *upb_json_parser_create(upb_env *env,
16020                                         const upb_json_parsermethod *method,
16021                                         const upb_symtab* symtab,
16022                                         upb_sink *output,
16023                                         bool ignore_json_unknown) {
16024 #ifndef NDEBUG
16025   const size_t size_before = upb_env_bytesallocated(env);
16026 #endif
16027   upb_json_parser *p = upb_env_malloc(env, sizeof(upb_json_parser));
16028   if (!p) return false;
16029 
16030   p->env = env;
16031   p->method = method;
16032   p->limit = p->stack + UPB_JSON_MAX_DEPTH;
16033   p->accumulate_buf = NULL;
16034   p->accumulate_buf_size = 0;
16035   upb_bytessink_reset(&p->input_, &method->input_handler_, p);
16036 
16037   json_parser_reset(p);
16038   upb_sink_reset(&p->top->sink, output->handlers, output->closure);
16039   p->top->m = upb_handlers_msgdef(output->handlers);
16040   if (is_wellknown_msg(p, UPB_WELLKNOWN_ANY)) {
16041     p->top->is_any = true;
16042     p->top->any_frame =
16043         upb_env_malloc(p->env, sizeof(upb_jsonparser_any_frame));
16044     json_parser_any_frame_reset(p->top->any_frame);
16045   } else {
16046     p->top->is_any = false;
16047     p->top->any_frame = NULL;
16048   }
16049   set_name_table(p, p->top);
16050   p->symtab = symtab;
16051 
16052   p->ignore_json_unknown = ignore_json_unknown;
16053 
16054   /* If this fails, uncomment and increase the value in parser.h. */
16055   /* fprintf(stderr, "%zd\n", upb_env_bytesallocated(env) - size_before); */
16056   UPB_ASSERT_DEBUGVAR(upb_env_bytesallocated(env) - size_before <=
16057                       UPB_JSON_PARSER_SIZE);
16058   return p;
16059 }
16060 
upb_json_parser_input(upb_json_parser * p)16061 upb_bytessink *upb_json_parser_input(upb_json_parser *p) {
16062   return &p->input_;
16063 }
16064 
upb_json_parsermethod_new(const upb_msgdef * md,const void * owner)16065 upb_json_parsermethod *upb_json_parsermethod_new(const upb_msgdef* md,
16066                                                  const void* owner) {
16067   static const struct upb_refcounted_vtbl vtbl = {visit_json_parsermethod,
16068                                                   free_json_parsermethod};
16069   upb_json_parsermethod *ret = upb_gmalloc(sizeof(*ret));
16070   upb_refcounted_init(upb_json_parsermethod_upcast_mutable(ret), &vtbl, owner);
16071 
16072   ret->msg = md;
16073   upb_ref2(md, ret);
16074 
16075   upb_byteshandler_init(&ret->input_handler_);
16076   upb_byteshandler_setstring(&ret->input_handler_, parse, ret);
16077   upb_byteshandler_setendstr(&ret->input_handler_, end, ret);
16078 
16079   upb_inttable_init(&ret->name_tables, UPB_CTYPE_PTR);
16080 
16081   add_jsonname_table(ret, md);
16082 
16083   return ret;
16084 }
16085 
upb_json_parsermethod_inputhandler(const upb_json_parsermethod * m)16086 const upb_byteshandler *upb_json_parsermethod_inputhandler(
16087     const upb_json_parsermethod *m) {
16088   return &m->input_handler_;
16089 }
16090 /*
16091 ** This currently uses snprintf() to format primitives, and could be optimized
16092 ** further.
16093 */
16094 
16095 
16096 #include <string.h>
16097 #include <stdint.h>
16098 #include <time.h>
16099 
16100 struct upb_json_printer {
16101   upb_sink input_;
16102   /* BytesSink closure. */
16103   void *subc_;
16104   upb_bytessink *output_;
16105 
16106   /* We track the depth so that we know when to emit startstr/endstr on the
16107    * output. */
16108   int depth_;
16109 
16110   /* Have we emitted the first element? This state is necessary to emit commas
16111    * without leaving a trailing comma in arrays/maps. We keep this state per
16112    * frame depth.
16113    *
16114    * Why max_depth * 2? UPB_MAX_HANDLER_DEPTH counts depth as nested messages.
16115    * We count frames (contexts in which we separate elements by commas) as both
16116    * repeated fields and messages (maps), and the worst case is a
16117    * message->repeated field->submessage->repeated field->... nesting. */
16118   bool first_elem_[UPB_MAX_HANDLER_DEPTH * 2];
16119 
16120   /* To print timestamp, printer needs to cache its seconds and nanos values
16121    * and convert them when ending timestamp message. See comments of
16122    * printer_sethandlers_timestamp for more detail. */
16123   int64_t seconds;
16124   int32_t nanos;
16125 };
16126 
16127 /* StringPiece; a pointer plus a length. */
16128 typedef struct {
16129   char *ptr;
16130   size_t len;
16131 } strpc;
16132 
freestrpc(void * ptr)16133 void freestrpc(void *ptr) {
16134   strpc *pc = ptr;
16135   upb_gfree(pc->ptr);
16136   upb_gfree(pc);
16137 }
16138 
16139 /* Convert fielddef name to JSON name and return as a string piece. */
newstrpc(upb_handlers * h,const upb_fielddef * f,bool preserve_fieldnames)16140 strpc *newstrpc(upb_handlers *h, const upb_fielddef *f,
16141                 bool preserve_fieldnames) {
16142   /* TODO(haberman): handle malloc failure. */
16143   strpc *ret = upb_gmalloc(sizeof(*ret));
16144   if (preserve_fieldnames) {
16145     ret->ptr = upb_gstrdup(upb_fielddef_name(f));
16146     ret->len = strlen(ret->ptr);
16147   } else {
16148     size_t len;
16149     ret->len = upb_fielddef_getjsonname(f, NULL, 0);
16150     ret->ptr = upb_gmalloc(ret->len);
16151     len = upb_fielddef_getjsonname(f, ret->ptr, ret->len);
16152     UPB_ASSERT(len == ret->len);
16153     ret->len--;  /* NULL */
16154   }
16155 
16156   upb_handlers_addcleanup(h, ret, freestrpc);
16157   return ret;
16158 }
16159 
16160 /* Convert a null-terminated const char* to a string piece. */
newstrpc_str(upb_handlers * h,const char * str)16161 strpc *newstrpc_str(upb_handlers *h, const char * str) {
16162   strpc * ret = upb_gmalloc(sizeof(*ret));
16163   ret->ptr = upb_gstrdup(str);
16164   ret->len = strlen(str);
16165   upb_handlers_addcleanup(h, ret, freestrpc);
16166   return ret;
16167 }
16168 
16169 /* ------------ JSON string printing: values, maps, arrays ------------------ */
16170 
print_data(upb_json_printer * p,const char * buf,unsigned int len)16171 static void print_data(
16172     upb_json_printer *p, const char *buf, unsigned int len) {
16173   /* TODO: Will need to change if we support pushback from the sink. */
16174   size_t n = upb_bytessink_putbuf(p->output_, p->subc_, buf, len, NULL);
16175   UPB_ASSERT(n == len);
16176 }
16177 
print_comma(upb_json_printer * p)16178 static void print_comma(upb_json_printer *p) {
16179   if (!p->first_elem_[p->depth_]) {
16180     print_data(p, ",", 1);
16181   }
16182   p->first_elem_[p->depth_] = false;
16183 }
16184 
16185 /* Helpers that print properly formatted elements to the JSON output stream. */
16186 
16187 /* Used for escaping control chars in strings. */
16188 static const char kControlCharLimit = 0x20;
16189 
is_json_escaped(char c)16190 UPB_INLINE bool is_json_escaped(char c) {
16191   /* See RFC 4627. */
16192   unsigned char uc = (unsigned char)c;
16193   return uc < kControlCharLimit || uc == '"' || uc == '\\';
16194 }
16195 
json_nice_escape(char c)16196 UPB_INLINE const char* json_nice_escape(char c) {
16197   switch (c) {
16198     case '"':  return "\\\"";
16199     case '\\': return "\\\\";
16200     case '\b': return "\\b";
16201     case '\f': return "\\f";
16202     case '\n': return "\\n";
16203     case '\r': return "\\r";
16204     case '\t': return "\\t";
16205     default:   return NULL;
16206   }
16207 }
16208 
16209 /* Write a properly escaped string chunk. The surrounding quotes are *not*
16210  * printed; this is so that the caller has the option of emitting the string
16211  * content in chunks. */
putstring(upb_json_printer * p,const char * buf,unsigned int len)16212 static void putstring(upb_json_printer *p, const char *buf, unsigned int len) {
16213   const char* unescaped_run = NULL;
16214   unsigned int i;
16215   for (i = 0; i < len; i++) {
16216     char c = buf[i];
16217     /* Handle escaping. */
16218     if (is_json_escaped(c)) {
16219       /* Use a "nice" escape, like \n, if one exists for this character. */
16220       const char* escape = json_nice_escape(c);
16221       /* If we don't have a specific 'nice' escape code, use a \uXXXX-style
16222        * escape. */
16223       char escape_buf[8];
16224       if (!escape) {
16225         unsigned char byte = (unsigned char)c;
16226         _upb_snprintf(escape_buf, sizeof(escape_buf), "\\u%04x", (int)byte);
16227         escape = escape_buf;
16228       }
16229 
16230       /* N.B. that we assume that the input encoding is equal to the output
16231        * encoding (both UTF-8 for  now), so for chars >= 0x20 and != \, ", we
16232        * can simply pass the bytes through. */
16233 
16234       /* If there's a current run of unescaped chars, print that run first. */
16235       if (unescaped_run) {
16236         print_data(p, unescaped_run, &buf[i] - unescaped_run);
16237         unescaped_run = NULL;
16238       }
16239       /* Then print the escape code. */
16240       print_data(p, escape, strlen(escape));
16241     } else {
16242       /* Add to the current unescaped run of characters. */
16243       if (unescaped_run == NULL) {
16244         unescaped_run = &buf[i];
16245       }
16246     }
16247   }
16248 
16249   /* If the string ended in a run of unescaped characters, print that last run. */
16250   if (unescaped_run) {
16251     print_data(p, unescaped_run, &buf[len] - unescaped_run);
16252   }
16253 }
16254 
16255 #define CHKLENGTH(x) if (!(x)) return -1;
16256 
16257 /* Helpers that format floating point values according to our custom formats.
16258  * Right now we use %.8g and %.17g for float/double, respectively, to match
16259  * proto2::util::JsonFormat's defaults.  May want to change this later. */
16260 
16261 const char neginf[] = "\"-Infinity\"";
16262 const char inf[] = "\"Infinity\"";
16263 
fmt_double(double val,char * buf,size_t length)16264 static size_t fmt_double(double val, char* buf, size_t length) {
16265   if (val == (1.0 / 0.0)) {
16266     CHKLENGTH(length >= strlen(inf));
16267     strcpy(buf, inf);
16268     return strlen(inf);
16269   } else if (val == (-1.0 / 0.0)) {
16270     CHKLENGTH(length >= strlen(neginf));
16271     strcpy(buf, neginf);
16272     return strlen(neginf);
16273   } else {
16274     size_t n = _upb_snprintf(buf, length, "%.17g", val);
16275     CHKLENGTH(n > 0 && n < length);
16276     return n;
16277   }
16278 }
16279 
fmt_float(float val,char * buf,size_t length)16280 static size_t fmt_float(float val, char* buf, size_t length) {
16281   size_t n = _upb_snprintf(buf, length, "%.8g", val);
16282   CHKLENGTH(n > 0 && n < length);
16283   return n;
16284 }
16285 
fmt_bool(bool val,char * buf,size_t length)16286 static size_t fmt_bool(bool val, char* buf, size_t length) {
16287   size_t n = _upb_snprintf(buf, length, "%s", (val ? "true" : "false"));
16288   CHKLENGTH(n > 0 && n < length);
16289   return n;
16290 }
16291 
fmt_int64_as_number(long long val,char * buf,size_t length)16292 static size_t fmt_int64_as_number(long long val, char* buf, size_t length) {
16293   size_t n = _upb_snprintf(buf, length, "%lld", val);
16294   CHKLENGTH(n > 0 && n < length);
16295   return n;
16296 }
16297 
fmt_uint64_as_number(unsigned long long val,char * buf,size_t length)16298 static size_t fmt_uint64_as_number(
16299     unsigned long long val, char* buf, size_t length) {
16300   size_t n = _upb_snprintf(buf, length, "%llu", val);
16301   CHKLENGTH(n > 0 && n < length);
16302   return n;
16303 }
16304 
fmt_int64_as_string(long long val,char * buf,size_t length)16305 static size_t fmt_int64_as_string(long long val, char* buf, size_t length) {
16306   size_t n = _upb_snprintf(buf, length, "\"%lld\"", val);
16307   CHKLENGTH(n > 0 && n < length);
16308   return n;
16309 }
16310 
fmt_uint64_as_string(unsigned long long val,char * buf,size_t length)16311 static size_t fmt_uint64_as_string(
16312     unsigned long long val, char* buf, size_t length) {
16313   size_t n = _upb_snprintf(buf, length, "\"%llu\"", val);
16314   CHKLENGTH(n > 0 && n < length);
16315   return n;
16316 }
16317 
16318 /* Print a map key given a field name. Called by scalar field handlers and by
16319  * startseq for repeated fields. */
putkey(void * closure,const void * handler_data)16320 static bool putkey(void *closure, const void *handler_data) {
16321   upb_json_printer *p = closure;
16322   const strpc *key = handler_data;
16323   print_comma(p);
16324   print_data(p, "\"", 1);
16325   putstring(p, key->ptr, key->len);
16326   print_data(p, "\":", 2);
16327   return true;
16328 }
16329 
16330 #define CHKFMT(val) if ((val) == (size_t)-1) return false;
16331 #define CHK(val)    if (!(val)) return false;
16332 
16333 #define TYPE_HANDLERS(type, fmt_func)                                        \
16334   static bool put##type(void *closure, const void *handler_data, type val) { \
16335     upb_json_printer *p = closure;                                           \
16336     char data[64];                                                           \
16337     size_t length = fmt_func(val, data, sizeof(data));                       \
16338     UPB_UNUSED(handler_data);                                                \
16339     CHKFMT(length);                                                          \
16340     print_data(p, data, length);                                             \
16341     return true;                                                             \
16342   }                                                                          \
16343   static bool scalar_##type(void *closure, const void *handler_data,         \
16344                             type val) {                                      \
16345     CHK(putkey(closure, handler_data));                                      \
16346     CHK(put##type(closure, handler_data, val));                              \
16347     return true;                                                             \
16348   }                                                                          \
16349   static bool repeated_##type(void *closure, const void *handler_data,       \
16350                               type val) {                                    \
16351     upb_json_printer *p = closure;                                           \
16352     print_comma(p);                                                          \
16353     CHK(put##type(closure, handler_data, val));                              \
16354     return true;                                                             \
16355   }
16356 
16357 #define TYPE_HANDLERS_MAPKEY(type, fmt_func)                                 \
16358   static bool putmapkey_##type(void *closure, const void *handler_data,      \
16359                             type val) {                                      \
16360     upb_json_printer *p = closure;                                           \
16361     char data[64];                                                           \
16362     size_t length = fmt_func(val, data, sizeof(data));                       \
16363     UPB_UNUSED(handler_data);                                                \
16364     print_data(p, "\"", 1);                                                  \
16365     print_data(p, data, length);                                             \
16366     print_data(p, "\":", 2);                                                 \
16367     return true;                                                             \
16368   }
16369 
16370 TYPE_HANDLERS(double,   fmt_double)
16371 TYPE_HANDLERS(float,    fmt_float)
16372 TYPE_HANDLERS(bool,     fmt_bool)
16373 TYPE_HANDLERS(int32_t,  fmt_int64_as_number)
16374 TYPE_HANDLERS(uint32_t, fmt_int64_as_number)
16375 TYPE_HANDLERS(int64_t,  fmt_int64_as_string)
16376 TYPE_HANDLERS(uint64_t, fmt_uint64_as_string)
16377 
16378 /* double and float are not allowed to be map keys. */
16379 TYPE_HANDLERS_MAPKEY(bool,     fmt_bool)
16380 TYPE_HANDLERS_MAPKEY(int32_t,  fmt_int64_as_number)
16381 TYPE_HANDLERS_MAPKEY(uint32_t, fmt_int64_as_number)
16382 TYPE_HANDLERS_MAPKEY(int64_t,  fmt_int64_as_number)
16383 TYPE_HANDLERS_MAPKEY(uint64_t, fmt_uint64_as_number)
16384 
16385 #undef TYPE_HANDLERS
16386 #undef TYPE_HANDLERS_MAPKEY
16387 
16388 typedef struct {
16389   void *keyname;
16390   const upb_enumdef *enumdef;
16391 } EnumHandlerData;
16392 
scalar_enum(void * closure,const void * handler_data,int32_t val)16393 static bool scalar_enum(void *closure, const void *handler_data,
16394                         int32_t val) {
16395   const EnumHandlerData *hd = handler_data;
16396   upb_json_printer *p = closure;
16397   const char *symbolic_name;
16398 
16399   CHK(putkey(closure, hd->keyname));
16400 
16401   symbolic_name = upb_enumdef_iton(hd->enumdef, val);
16402   if (symbolic_name) {
16403     print_data(p, "\"", 1);
16404     putstring(p, symbolic_name, strlen(symbolic_name));
16405     print_data(p, "\"", 1);
16406   } else {
16407     putint32_t(closure, NULL, val);
16408   }
16409 
16410   return true;
16411 }
16412 
print_enum_symbolic_name(upb_json_printer * p,const upb_enumdef * def,int32_t val)16413 static void print_enum_symbolic_name(upb_json_printer *p,
16414                                      const upb_enumdef *def,
16415                                      int32_t val) {
16416   const char *symbolic_name = upb_enumdef_iton(def, val);
16417   if (symbolic_name) {
16418     print_data(p, "\"", 1);
16419     putstring(p, symbolic_name, strlen(symbolic_name));
16420     print_data(p, "\"", 1);
16421   } else {
16422     putint32_t(p, NULL, val);
16423   }
16424 }
16425 
repeated_enum(void * closure,const void * handler_data,int32_t val)16426 static bool repeated_enum(void *closure, const void *handler_data,
16427                           int32_t val) {
16428   const EnumHandlerData *hd = handler_data;
16429   upb_json_printer *p = closure;
16430   print_comma(p);
16431 
16432   print_enum_symbolic_name(p, hd->enumdef, val);
16433 
16434   return true;
16435 }
16436 
mapvalue_enum(void * closure,const void * handler_data,int32_t val)16437 static bool mapvalue_enum(void *closure, const void *handler_data,
16438                           int32_t val) {
16439   const EnumHandlerData *hd = handler_data;
16440   upb_json_printer *p = closure;
16441 
16442   print_enum_symbolic_name(p, hd->enumdef, val);
16443 
16444   return true;
16445 }
16446 
scalar_startsubmsg(void * closure,const void * handler_data)16447 static void *scalar_startsubmsg(void *closure, const void *handler_data) {
16448   return putkey(closure, handler_data) ? closure : UPB_BREAK;
16449 }
16450 
repeated_startsubmsg(void * closure,const void * handler_data)16451 static void *repeated_startsubmsg(void *closure, const void *handler_data) {
16452   upb_json_printer *p = closure;
16453   UPB_UNUSED(handler_data);
16454   print_comma(p);
16455   return closure;
16456 }
16457 
start_frame(upb_json_printer * p)16458 static void start_frame(upb_json_printer *p) {
16459   p->depth_++;
16460   p->first_elem_[p->depth_] = true;
16461   print_data(p, "{", 1);
16462 }
16463 
end_frame(upb_json_printer * p)16464 static void end_frame(upb_json_printer *p) {
16465   print_data(p, "}", 1);
16466   p->depth_--;
16467 }
16468 
printer_startmsg(void * closure,const void * handler_data)16469 static bool printer_startmsg(void *closure, const void *handler_data) {
16470   upb_json_printer *p = closure;
16471   UPB_UNUSED(handler_data);
16472   if (p->depth_ == 0) {
16473     upb_bytessink_start(p->output_, 0, &p->subc_);
16474   }
16475   start_frame(p);
16476   return true;
16477 }
16478 
printer_endmsg(void * closure,const void * handler_data,upb_status * s)16479 static bool printer_endmsg(void *closure, const void *handler_data, upb_status *s) {
16480   upb_json_printer *p = closure;
16481   UPB_UNUSED(handler_data);
16482   UPB_UNUSED(s);
16483   end_frame(p);
16484   if (p->depth_ == 0) {
16485     upb_bytessink_end(p->output_);
16486   }
16487   return true;
16488 }
16489 
startseq(void * closure,const void * handler_data)16490 static void *startseq(void *closure, const void *handler_data) {
16491   upb_json_printer *p = closure;
16492   CHK(putkey(closure, handler_data));
16493   p->depth_++;
16494   p->first_elem_[p->depth_] = true;
16495   print_data(p, "[", 1);
16496   return closure;
16497 }
16498 
endseq(void * closure,const void * handler_data)16499 static bool endseq(void *closure, const void *handler_data) {
16500   upb_json_printer *p = closure;
16501   UPB_UNUSED(handler_data);
16502   print_data(p, "]", 1);
16503   p->depth_--;
16504   return true;
16505 }
16506 
startmap(void * closure,const void * handler_data)16507 static void *startmap(void *closure, const void *handler_data) {
16508   upb_json_printer *p = closure;
16509   CHK(putkey(closure, handler_data));
16510   p->depth_++;
16511   p->first_elem_[p->depth_] = true;
16512   print_data(p, "{", 1);
16513   return closure;
16514 }
16515 
endmap(void * closure,const void * handler_data)16516 static bool endmap(void *closure, const void *handler_data) {
16517   upb_json_printer *p = closure;
16518   UPB_UNUSED(handler_data);
16519   print_data(p, "}", 1);
16520   p->depth_--;
16521   return true;
16522 }
16523 
putstr(void * closure,const void * handler_data,const char * str,size_t len,const upb_bufhandle * handle)16524 static size_t putstr(void *closure, const void *handler_data, const char *str,
16525                      size_t len, const upb_bufhandle *handle) {
16526   upb_json_printer *p = closure;
16527   UPB_UNUSED(handler_data);
16528   UPB_UNUSED(handle);
16529   putstring(p, str, len);
16530   return len;
16531 }
16532 
16533 /* This has to Base64 encode the bytes, because JSON has no "bytes" type. */
putbytes(void * closure,const void * handler_data,const char * str,size_t len,const upb_bufhandle * handle)16534 static size_t putbytes(void *closure, const void *handler_data, const char *str,
16535                        size_t len, const upb_bufhandle *handle) {
16536   upb_json_printer *p = closure;
16537 
16538   /* This is the regular base64, not the "web-safe" version. */
16539   static const char base64[] =
16540       "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
16541 
16542   /* Base64-encode. */
16543   char data[16000];
16544   const char *limit = data + sizeof(data);
16545   const unsigned char *from = (const unsigned char*)str;
16546   char *to = data;
16547   size_t remaining = len;
16548   size_t bytes;
16549 
16550   UPB_UNUSED(handler_data);
16551   UPB_UNUSED(handle);
16552 
16553   print_data(p, "\"", 1);
16554 
16555   while (remaining > 2) {
16556     if (limit - to < 4) {
16557       bytes = to - data;
16558       putstring(p, data, bytes);
16559       to = data;
16560     }
16561 
16562     to[0] = base64[from[0] >> 2];
16563     to[1] = base64[((from[0] & 0x3) << 4) | (from[1] >> 4)];
16564     to[2] = base64[((from[1] & 0xf) << 2) | (from[2] >> 6)];
16565     to[3] = base64[from[2] & 0x3f];
16566 
16567     remaining -= 3;
16568     to += 4;
16569     from += 3;
16570   }
16571 
16572   switch (remaining) {
16573     case 2:
16574       to[0] = base64[from[0] >> 2];
16575       to[1] = base64[((from[0] & 0x3) << 4) | (from[1] >> 4)];
16576       to[2] = base64[(from[1] & 0xf) << 2];
16577       to[3] = '=';
16578       to += 4;
16579       from += 2;
16580       break;
16581     case 1:
16582       to[0] = base64[from[0] >> 2];
16583       to[1] = base64[((from[0] & 0x3) << 4)];
16584       to[2] = '=';
16585       to[3] = '=';
16586       to += 4;
16587       from += 1;
16588       break;
16589   }
16590 
16591   bytes = to - data;
16592   putstring(p, data, bytes);
16593   print_data(p, "\"", 1);
16594   return len;
16595 }
16596 
scalar_startstr(void * closure,const void * handler_data,size_t size_hint)16597 static void *scalar_startstr(void *closure, const void *handler_data,
16598                              size_t size_hint) {
16599   upb_json_printer *p = closure;
16600   UPB_UNUSED(handler_data);
16601   UPB_UNUSED(size_hint);
16602   CHK(putkey(closure, handler_data));
16603   print_data(p, "\"", 1);
16604   return p;
16605 }
16606 
scalar_str(void * closure,const void * handler_data,const char * str,size_t len,const upb_bufhandle * handle)16607 static size_t scalar_str(void *closure, const void *handler_data,
16608                          const char *str, size_t len,
16609                          const upb_bufhandle *handle) {
16610   CHK(putstr(closure, handler_data, str, len, handle));
16611   return len;
16612 }
16613 
scalar_endstr(void * closure,const void * handler_data)16614 static bool scalar_endstr(void *closure, const void *handler_data) {
16615   upb_json_printer *p = closure;
16616   UPB_UNUSED(handler_data);
16617   print_data(p, "\"", 1);
16618   return true;
16619 }
16620 
repeated_startstr(void * closure,const void * handler_data,size_t size_hint)16621 static void *repeated_startstr(void *closure, const void *handler_data,
16622                                size_t size_hint) {
16623   upb_json_printer *p = closure;
16624   UPB_UNUSED(handler_data);
16625   UPB_UNUSED(size_hint);
16626   print_comma(p);
16627   print_data(p, "\"", 1);
16628   return p;
16629 }
16630 
repeated_str(void * closure,const void * handler_data,const char * str,size_t len,const upb_bufhandle * handle)16631 static size_t repeated_str(void *closure, const void *handler_data,
16632                            const char *str, size_t len,
16633                            const upb_bufhandle *handle) {
16634   CHK(putstr(closure, handler_data, str, len, handle));
16635   return len;
16636 }
16637 
repeated_endstr(void * closure,const void * handler_data)16638 static bool repeated_endstr(void *closure, const void *handler_data) {
16639   upb_json_printer *p = closure;
16640   UPB_UNUSED(handler_data);
16641   print_data(p, "\"", 1);
16642   return true;
16643 }
16644 
mapkeyval_startstr(void * closure,const void * handler_data,size_t size_hint)16645 static void *mapkeyval_startstr(void *closure, const void *handler_data,
16646                                 size_t size_hint) {
16647   upb_json_printer *p = closure;
16648   UPB_UNUSED(handler_data);
16649   UPB_UNUSED(size_hint);
16650   print_data(p, "\"", 1);
16651   return p;
16652 }
16653 
mapkey_str(void * closure,const void * handler_data,const char * str,size_t len,const upb_bufhandle * handle)16654 static size_t mapkey_str(void *closure, const void *handler_data,
16655                          const char *str, size_t len,
16656                          const upb_bufhandle *handle) {
16657   CHK(putstr(closure, handler_data, str, len, handle));
16658   return len;
16659 }
16660 
mapkey_endstr(void * closure,const void * handler_data)16661 static bool mapkey_endstr(void *closure, const void *handler_data) {
16662   upb_json_printer *p = closure;
16663   UPB_UNUSED(handler_data);
16664   print_data(p, "\":", 2);
16665   return true;
16666 }
16667 
mapvalue_endstr(void * closure,const void * handler_data)16668 static bool mapvalue_endstr(void *closure, const void *handler_data) {
16669   upb_json_printer *p = closure;
16670   UPB_UNUSED(handler_data);
16671   print_data(p, "\"", 1);
16672   return true;
16673 }
16674 
scalar_bytes(void * closure,const void * handler_data,const char * str,size_t len,const upb_bufhandle * handle)16675 static size_t scalar_bytes(void *closure, const void *handler_data,
16676                            const char *str, size_t len,
16677                            const upb_bufhandle *handle) {
16678   CHK(putkey(closure, handler_data));
16679   CHK(putbytes(closure, handler_data, str, len, handle));
16680   return len;
16681 }
16682 
repeated_bytes(void * closure,const void * handler_data,const char * str,size_t len,const upb_bufhandle * handle)16683 static size_t repeated_bytes(void *closure, const void *handler_data,
16684                              const char *str, size_t len,
16685                              const upb_bufhandle *handle) {
16686   upb_json_printer *p = closure;
16687   print_comma(p);
16688   CHK(putbytes(closure, handler_data, str, len, handle));
16689   return len;
16690 }
16691 
mapkey_bytes(void * closure,const void * handler_data,const char * str,size_t len,const upb_bufhandle * handle)16692 static size_t mapkey_bytes(void *closure, const void *handler_data,
16693                            const char *str, size_t len,
16694                            const upb_bufhandle *handle) {
16695   upb_json_printer *p = closure;
16696   CHK(putbytes(closure, handler_data, str, len, handle));
16697   print_data(p, ":", 1);
16698   return len;
16699 }
16700 
set_enum_hd(upb_handlers * h,const upb_fielddef * f,bool preserve_fieldnames,upb_handlerattr * attr)16701 static void set_enum_hd(upb_handlers *h,
16702                         const upb_fielddef *f,
16703                         bool preserve_fieldnames,
16704                         upb_handlerattr *attr) {
16705   EnumHandlerData *hd = upb_gmalloc(sizeof(EnumHandlerData));
16706   hd->enumdef = (const upb_enumdef *)upb_fielddef_subdef(f);
16707   hd->keyname = newstrpc(h, f, preserve_fieldnames);
16708   upb_handlers_addcleanup(h, hd, upb_gfree);
16709   upb_handlerattr_sethandlerdata(attr, hd);
16710 }
16711 
16712 /* Set up handlers for a mapentry submessage (i.e., an individual key/value pair
16713  * in a map).
16714  *
16715  * TODO: Handle missing key, missing value, out-of-order key/value, or repeated
16716  * key or value cases properly. The right way to do this is to allocate a
16717  * temporary structure at the start of a mapentry submessage, store key and
16718  * value data in it as key and value handlers are called, and then print the
16719  * key/value pair once at the end of the submessage. If we don't do this, we
16720  * should at least detect the case and throw an error. However, so far all of
16721  * our sources that emit mapentry messages do so canonically (with one key
16722  * field, and then one value field), so this is not a pressing concern at the
16723  * moment. */
printer_sethandlers_mapentry(const void * closure,bool preserve_fieldnames,upb_handlers * h)16724 void printer_sethandlers_mapentry(const void *closure, bool preserve_fieldnames,
16725                                   upb_handlers *h) {
16726   const upb_msgdef *md = upb_handlers_msgdef(h);
16727 
16728   /* A mapentry message is printed simply as '"key": value'. Rather than
16729    * special-case key and value for every type below, we just handle both
16730    * fields explicitly here. */
16731   const upb_fielddef* key_field = upb_msgdef_itof(md, UPB_MAPENTRY_KEY);
16732   const upb_fielddef* value_field = upb_msgdef_itof(md, UPB_MAPENTRY_VALUE);
16733 
16734   upb_handlerattr empty_attr = UPB_HANDLERATTR_INITIALIZER;
16735 
16736   UPB_UNUSED(closure);
16737 
16738   switch (upb_fielddef_type(key_field)) {
16739     case UPB_TYPE_INT32:
16740       upb_handlers_setint32(h, key_field, putmapkey_int32_t, &empty_attr);
16741       break;
16742     case UPB_TYPE_INT64:
16743       upb_handlers_setint64(h, key_field, putmapkey_int64_t, &empty_attr);
16744       break;
16745     case UPB_TYPE_UINT32:
16746       upb_handlers_setuint32(h, key_field, putmapkey_uint32_t, &empty_attr);
16747       break;
16748     case UPB_TYPE_UINT64:
16749       upb_handlers_setuint64(h, key_field, putmapkey_uint64_t, &empty_attr);
16750       break;
16751     case UPB_TYPE_BOOL:
16752       upb_handlers_setbool(h, key_field, putmapkey_bool, &empty_attr);
16753       break;
16754     case UPB_TYPE_STRING:
16755       upb_handlers_setstartstr(h, key_field, mapkeyval_startstr, &empty_attr);
16756       upb_handlers_setstring(h, key_field, mapkey_str, &empty_attr);
16757       upb_handlers_setendstr(h, key_field, mapkey_endstr, &empty_attr);
16758       break;
16759     case UPB_TYPE_BYTES:
16760       upb_handlers_setstring(h, key_field, mapkey_bytes, &empty_attr);
16761       break;
16762     default:
16763       UPB_ASSERT(false);
16764       break;
16765   }
16766 
16767   switch (upb_fielddef_type(value_field)) {
16768     case UPB_TYPE_INT32:
16769       upb_handlers_setint32(h, value_field, putint32_t, &empty_attr);
16770       break;
16771     case UPB_TYPE_INT64:
16772       upb_handlers_setint64(h, value_field, putint64_t, &empty_attr);
16773       break;
16774     case UPB_TYPE_UINT32:
16775       upb_handlers_setuint32(h, value_field, putuint32_t, &empty_attr);
16776       break;
16777     case UPB_TYPE_UINT64:
16778       upb_handlers_setuint64(h, value_field, putuint64_t, &empty_attr);
16779       break;
16780     case UPB_TYPE_BOOL:
16781       upb_handlers_setbool(h, value_field, putbool, &empty_attr);
16782       break;
16783     case UPB_TYPE_FLOAT:
16784       upb_handlers_setfloat(h, value_field, putfloat, &empty_attr);
16785       break;
16786     case UPB_TYPE_DOUBLE:
16787       upb_handlers_setdouble(h, value_field, putdouble, &empty_attr);
16788       break;
16789     case UPB_TYPE_STRING:
16790       upb_handlers_setstartstr(h, value_field, mapkeyval_startstr, &empty_attr);
16791       upb_handlers_setstring(h, value_field, putstr, &empty_attr);
16792       upb_handlers_setendstr(h, value_field, mapvalue_endstr, &empty_attr);
16793       break;
16794     case UPB_TYPE_BYTES:
16795       upb_handlers_setstring(h, value_field, putbytes, &empty_attr);
16796       break;
16797     case UPB_TYPE_ENUM: {
16798       upb_handlerattr enum_attr = UPB_HANDLERATTR_INITIALIZER;
16799       set_enum_hd(h, value_field, preserve_fieldnames, &enum_attr);
16800       upb_handlers_setint32(h, value_field, mapvalue_enum, &enum_attr);
16801       upb_handlerattr_uninit(&enum_attr);
16802       break;
16803     }
16804     case UPB_TYPE_MESSAGE:
16805       /* No handler necessary -- the submsg handlers will print the message
16806        * as appropriate. */
16807       break;
16808   }
16809 
16810   upb_handlerattr_uninit(&empty_attr);
16811 }
16812 
putseconds(void * closure,const void * handler_data,int64_t seconds)16813 static bool putseconds(void *closure, const void *handler_data,
16814                        int64_t seconds) {
16815   upb_json_printer *p = closure;
16816   p->seconds = seconds;
16817   UPB_UNUSED(handler_data);
16818   return true;
16819 }
16820 
putnanos(void * closure,const void * handler_data,int32_t nanos)16821 static bool putnanos(void *closure, const void *handler_data,
16822                      int32_t nanos) {
16823   upb_json_printer *p = closure;
16824   p->nanos = nanos;
16825   UPB_UNUSED(handler_data);
16826   return true;
16827 }
16828 
scalar_startstr_nokey(void * closure,const void * handler_data,size_t size_hint)16829 static void *scalar_startstr_nokey(void *closure, const void *handler_data,
16830                                    size_t size_hint) {
16831   upb_json_printer *p = closure;
16832   UPB_UNUSED(handler_data);
16833   UPB_UNUSED(size_hint);
16834   print_data(p, "\"", 1);
16835   return p;
16836 }
16837 
putstr_nokey(void * closure,const void * handler_data,const char * str,size_t len,const upb_bufhandle * handle)16838 static size_t putstr_nokey(void *closure, const void *handler_data,
16839                            const char *str, size_t len,
16840                            const upb_bufhandle *handle) {
16841   upb_json_printer *p = closure;
16842   UPB_UNUSED(handler_data);
16843   UPB_UNUSED(handle);
16844   print_data(p, "\"", 1);
16845   putstring(p, str, len);
16846   print_data(p, "\"", 1);
16847   return len + 2;
16848 }
16849 
startseq_nokey(void * closure,const void * handler_data)16850 static void *startseq_nokey(void *closure, const void *handler_data) {
16851   upb_json_printer *p = closure;
16852   UPB_UNUSED(handler_data);
16853   p->depth_++;
16854   p->first_elem_[p->depth_] = true;
16855   print_data(p, "[", 1);
16856   return closure;
16857 }
16858 
startseq_fieldmask(void * closure,const void * handler_data)16859 static void *startseq_fieldmask(void *closure, const void *handler_data) {
16860   upb_json_printer *p = closure;
16861   UPB_UNUSED(handler_data);
16862   p->depth_++;
16863   p->first_elem_[p->depth_] = true;
16864   return closure;
16865 }
16866 
endseq_fieldmask(void * closure,const void * handler_data)16867 static bool endseq_fieldmask(void *closure, const void *handler_data) {
16868   upb_json_printer *p = closure;
16869   UPB_UNUSED(handler_data);
16870   p->depth_--;
16871   return true;
16872 }
16873 
repeated_startstr_fieldmask(void * closure,const void * handler_data,size_t size_hint)16874 static void *repeated_startstr_fieldmask(
16875     void *closure, const void *handler_data,
16876     size_t size_hint) {
16877   upb_json_printer *p = closure;
16878   UPB_UNUSED(handler_data);
16879   UPB_UNUSED(size_hint);
16880   print_comma(p);
16881   return p;
16882 }
16883 
repeated_str_fieldmask(void * closure,const void * handler_data,const char * str,size_t len,const upb_bufhandle * handle)16884 static size_t repeated_str_fieldmask(
16885     void *closure, const void *handler_data,
16886     const char *str, size_t len,
16887     const upb_bufhandle *handle) {
16888   const char* limit = str + len;
16889   bool upper = false;
16890   size_t result_len = 0;
16891   for (; str < limit; str++) {
16892     if (*str == '_') {
16893       upper = true;
16894       continue;
16895     }
16896     if (upper && *str >= 'a' && *str <= 'z') {
16897       char upper_char = toupper(*str);
16898       CHK(putstr(closure, handler_data, &upper_char, 1, handle));
16899     } else {
16900       CHK(putstr(closure, handler_data, str, 1, handle));
16901     }
16902     upper = false;
16903     result_len++;
16904   }
16905   return result_len;
16906 }
16907 
startmap_nokey(void * closure,const void * handler_data)16908 static void *startmap_nokey(void *closure, const void *handler_data) {
16909   upb_json_printer *p = closure;
16910   UPB_UNUSED(handler_data);
16911   p->depth_++;
16912   p->first_elem_[p->depth_] = true;
16913   print_data(p, "{", 1);
16914   return closure;
16915 }
16916 
putnull(void * closure,const void * handler_data,int32_t null)16917 static bool putnull(void *closure, const void *handler_data,
16918                     int32_t null) {
16919   upb_json_printer *p = closure;
16920   print_data(p, "null", 4);
16921   UPB_UNUSED(handler_data);
16922   UPB_UNUSED(null);
16923   return true;
16924 }
16925 
printer_startdurationmsg(void * closure,const void * handler_data)16926 static bool printer_startdurationmsg(void *closure, const void *handler_data) {
16927   upb_json_printer *p = closure;
16928   UPB_UNUSED(handler_data);
16929   if (p->depth_ == 0) {
16930     upb_bytessink_start(p->output_, 0, &p->subc_);
16931   }
16932   return true;
16933 }
16934 
16935 #define UPB_DURATION_MAX_JSON_LEN 23
16936 #define UPB_DURATION_MAX_NANO_LEN 9
16937 
printer_enddurationmsg(void * closure,const void * handler_data,upb_status * s)16938 static bool printer_enddurationmsg(void *closure, const void *handler_data,
16939                                    upb_status *s) {
16940   upb_json_printer *p = closure;
16941   char buffer[UPB_DURATION_MAX_JSON_LEN];
16942   size_t base_len;
16943   size_t curr;
16944   size_t i;
16945 
16946   memset(buffer, 0, UPB_DURATION_MAX_JSON_LEN);
16947 
16948   if (p->seconds < -315576000000) {
16949     upb_status_seterrf(s, "error parsing duration: "
16950                           "minimum acceptable value is "
16951                           "-315576000000");
16952     return false;
16953   }
16954 
16955   if (p->seconds > 315576000000) {
16956     upb_status_seterrf(s, "error serializing duration: "
16957                           "maximum acceptable value is "
16958                           "315576000000");
16959     return false;
16960   }
16961 
16962   _upb_snprintf(buffer, sizeof(buffer), "%ld", (long)p->seconds);
16963   base_len = strlen(buffer);
16964 
16965   if (p->nanos != 0) {
16966     char nanos_buffer[UPB_DURATION_MAX_NANO_LEN + 3];
16967     _upb_snprintf(nanos_buffer, sizeof(nanos_buffer), "%.9f",
16968                   p->nanos / 1000000000.0);
16969     /* Remove trailing 0. */
16970     for (i = UPB_DURATION_MAX_NANO_LEN + 2;
16971          nanos_buffer[i] == '0'; i--) {
16972       nanos_buffer[i] = 0;
16973     }
16974     strcpy(buffer + base_len, nanos_buffer + 1);
16975   }
16976 
16977   curr = strlen(buffer);
16978   strcpy(buffer + curr, "s");
16979 
16980   p->seconds = 0;
16981   p->nanos = 0;
16982 
16983   print_data(p, "\"", 1);
16984   print_data(p, buffer, strlen(buffer));
16985   print_data(p, "\"", 1);
16986 
16987   if (p->depth_ == 0) {
16988     upb_bytessink_end(p->output_);
16989   }
16990 
16991   UPB_UNUSED(handler_data);
16992   return true;
16993 }
16994 
printer_starttimestampmsg(void * closure,const void * handler_data)16995 static bool printer_starttimestampmsg(void *closure, const void *handler_data) {
16996   upb_json_printer *p = closure;
16997   UPB_UNUSED(handler_data);
16998   if (p->depth_ == 0) {
16999     upb_bytessink_start(p->output_, 0, &p->subc_);
17000   }
17001   return true;
17002 }
17003 
17004 #define UPB_TIMESTAMP_MAX_JSON_LEN 31
17005 #define UPB_TIMESTAMP_BEFORE_NANO_LEN 19
17006 #define UPB_TIMESTAMP_MAX_NANO_LEN 9
17007 
printer_endtimestampmsg(void * closure,const void * handler_data,upb_status * s)17008 static bool printer_endtimestampmsg(void *closure, const void *handler_data,
17009                                     upb_status *s) {
17010   upb_json_printer *p = closure;
17011   char buffer[UPB_TIMESTAMP_MAX_JSON_LEN];
17012   time_t time = p->seconds;
17013   size_t curr;
17014   size_t i;
17015   size_t year_length =
17016       strftime(buffer, UPB_TIMESTAMP_MAX_JSON_LEN, "%Y", gmtime(&time));
17017 
17018   if (p->seconds < -62135596800) {
17019     upb_status_seterrf(s, "error parsing timestamp: "
17020                           "minimum acceptable value is "
17021                           "0001-01-01T00:00:00Z");
17022     return false;
17023   }
17024 
17025   if (p->seconds > 253402300799) {
17026     upb_status_seterrf(s, "error parsing timestamp: "
17027                           "maximum acceptable value is "
17028                           "9999-12-31T23:59:59Z");
17029     return false;
17030   }
17031 
17032   /* strftime doesn't guarantee 4 digits for year. Prepend 0 by ourselves. */
17033   for (i = 0; i < 4 - year_length; i++) {
17034     buffer[i] = '0';
17035   }
17036 
17037   strftime(buffer + (4 - year_length), UPB_TIMESTAMP_MAX_JSON_LEN,
17038            "%Y-%m-%dT%H:%M:%S", gmtime(&time));
17039   if (p->nanos != 0) {
17040     char nanos_buffer[UPB_TIMESTAMP_MAX_NANO_LEN + 3];
17041     _upb_snprintf(nanos_buffer, sizeof(nanos_buffer), "%.9f",
17042                   p->nanos / 1000000000.0);
17043     /* Remove trailing 0. */
17044     for (i = UPB_TIMESTAMP_MAX_NANO_LEN + 2;
17045          nanos_buffer[i] == '0'; i--) {
17046       nanos_buffer[i] = 0;
17047     }
17048     strcpy(buffer + UPB_TIMESTAMP_BEFORE_NANO_LEN, nanos_buffer + 1);
17049   }
17050 
17051   curr = strlen(buffer);
17052   strcpy(buffer + curr, "Z");
17053 
17054   p->seconds = 0;
17055   p->nanos = 0;
17056 
17057   print_data(p, "\"", 1);
17058   print_data(p, buffer, strlen(buffer));
17059   print_data(p, "\"", 1);
17060 
17061   if (p->depth_ == 0) {
17062     upb_bytessink_end(p->output_);
17063   }
17064 
17065   UPB_UNUSED(handler_data);
17066   UPB_UNUSED(s);
17067   return true;
17068 }
17069 
printer_startmsg_noframe(void * closure,const void * handler_data)17070 static bool printer_startmsg_noframe(void *closure, const void *handler_data) {
17071   upb_json_printer *p = closure;
17072   UPB_UNUSED(handler_data);
17073   if (p->depth_ == 0) {
17074     upb_bytessink_start(p->output_, 0, &p->subc_);
17075   }
17076   return true;
17077 }
17078 
printer_endmsg_noframe(void * closure,const void * handler_data,upb_status * s)17079 static bool printer_endmsg_noframe(
17080     void *closure, const void *handler_data, upb_status *s) {
17081   upb_json_printer *p = closure;
17082   UPB_UNUSED(handler_data);
17083   UPB_UNUSED(s);
17084   if (p->depth_ == 0) {
17085     upb_bytessink_end(p->output_);
17086   }
17087   return true;
17088 }
17089 
printer_startmsg_fieldmask(void * closure,const void * handler_data)17090 static bool printer_startmsg_fieldmask(
17091     void *closure, const void *handler_data) {
17092   upb_json_printer *p = closure;
17093   UPB_UNUSED(handler_data);
17094   if (p->depth_ == 0) {
17095     upb_bytessink_start(p->output_, 0, &p->subc_);
17096   }
17097   print_data(p, "\"", 1);
17098   return true;
17099 }
17100 
printer_endmsg_fieldmask(void * closure,const void * handler_data,upb_status * s)17101 static bool printer_endmsg_fieldmask(
17102     void *closure, const void *handler_data, upb_status *s) {
17103   upb_json_printer *p = closure;
17104   UPB_UNUSED(handler_data);
17105   UPB_UNUSED(s);
17106   print_data(p, "\"", 1);
17107   if (p->depth_ == 0) {
17108     upb_bytessink_end(p->output_);
17109   }
17110   return true;
17111 }
17112 
scalar_startstr_onlykey(void * closure,const void * handler_data,size_t size_hint)17113 static void *scalar_startstr_onlykey(
17114     void *closure, const void *handler_data, size_t size_hint) {
17115   upb_json_printer *p = closure;
17116   UPB_UNUSED(size_hint);
17117   CHK(putkey(closure, handler_data));
17118   return p;
17119 }
17120 
17121 /* Set up handlers for an Any submessage. */
printer_sethandlers_any(const void * closure,upb_handlers * h)17122 void printer_sethandlers_any(const void *closure, upb_handlers *h) {
17123   const upb_msgdef *md = upb_handlers_msgdef(h);
17124 
17125   const upb_fielddef* type_field = upb_msgdef_itof(md, UPB_ANY_TYPE);
17126   const upb_fielddef* value_field = upb_msgdef_itof(md, UPB_ANY_VALUE);
17127 
17128   upb_handlerattr empty_attr = UPB_HANDLERATTR_INITIALIZER;
17129 
17130   /* type_url's json name is "@type" */
17131   upb_handlerattr type_name_attr = UPB_HANDLERATTR_INITIALIZER;
17132   upb_handlerattr value_name_attr = UPB_HANDLERATTR_INITIALIZER;
17133   strpc *type_url_json_name = newstrpc_str(h, "@type");
17134   strpc *value_json_name = newstrpc_str(h, "value");
17135 
17136   upb_handlerattr_sethandlerdata(&type_name_attr, type_url_json_name);
17137   upb_handlerattr_sethandlerdata(&value_name_attr, value_json_name);
17138 
17139   /* Set up handlers. */
17140   upb_handlers_setstartmsg(h, printer_startmsg, &empty_attr);
17141   upb_handlers_setendmsg(h, printer_endmsg, &empty_attr);
17142 
17143   upb_handlers_setstartstr(h, type_field, scalar_startstr, &type_name_attr);
17144   upb_handlers_setstring(h, type_field, scalar_str, &empty_attr);
17145   upb_handlers_setendstr(h, type_field, scalar_endstr, &empty_attr);
17146 
17147   /* This is not the full and correct JSON encoding for the Any value field. It
17148    * requires further processing by the wrapper code based on the type URL.
17149    */
17150   upb_handlers_setstartstr(h, value_field, scalar_startstr_onlykey,
17151                            &value_name_attr);
17152 
17153   UPB_UNUSED(closure);
17154 }
17155 
17156 /* Set up handlers for a fieldmask submessage. */
printer_sethandlers_fieldmask(const void * closure,upb_handlers * h)17157 void printer_sethandlers_fieldmask(const void *closure, upb_handlers *h) {
17158   const upb_msgdef *md = upb_handlers_msgdef(h);
17159   const upb_fielddef* f = upb_msgdef_itof(md, 1);
17160 
17161   upb_handlerattr empty_attr = UPB_HANDLERATTR_INITIALIZER;
17162 
17163   upb_handlers_setstartseq(h, f, startseq_fieldmask, &empty_attr);
17164   upb_handlers_setendseq(h, f, endseq_fieldmask, &empty_attr);
17165 
17166   upb_handlers_setstartmsg(h, printer_startmsg_fieldmask, &empty_attr);
17167   upb_handlers_setendmsg(h, printer_endmsg_fieldmask, &empty_attr);
17168 
17169   upb_handlers_setstartstr(h, f, repeated_startstr_fieldmask, &empty_attr);
17170   upb_handlers_setstring(h, f, repeated_str_fieldmask, &empty_attr);
17171 
17172   UPB_UNUSED(closure);
17173 }
17174 
17175 /* Set up handlers for a duration submessage. */
printer_sethandlers_duration(const void * closure,upb_handlers * h)17176 void printer_sethandlers_duration(const void *closure, upb_handlers *h) {
17177   const upb_msgdef *md = upb_handlers_msgdef(h);
17178 
17179   const upb_fielddef* seconds_field =
17180       upb_msgdef_itof(md, UPB_DURATION_SECONDS);
17181   const upb_fielddef* nanos_field =
17182       upb_msgdef_itof(md, UPB_DURATION_NANOS);
17183 
17184   upb_handlerattr empty_attr = UPB_HANDLERATTR_INITIALIZER;
17185 
17186   upb_handlers_setstartmsg(h, printer_startdurationmsg, &empty_attr);
17187   upb_handlers_setint64(h, seconds_field, putseconds, &empty_attr);
17188   upb_handlers_setint32(h, nanos_field, putnanos, &empty_attr);
17189   upb_handlers_setendmsg(h, printer_enddurationmsg, &empty_attr);
17190 
17191   UPB_UNUSED(closure);
17192 }
17193 
17194 /* Set up handlers for a timestamp submessage. Instead of printing fields
17195  * separately, the json representation of timestamp follows RFC 3339 */
printer_sethandlers_timestamp(const void * closure,upb_handlers * h)17196 void printer_sethandlers_timestamp(const void *closure, upb_handlers *h) {
17197   const upb_msgdef *md = upb_handlers_msgdef(h);
17198 
17199   const upb_fielddef* seconds_field =
17200       upb_msgdef_itof(md, UPB_TIMESTAMP_SECONDS);
17201   const upb_fielddef* nanos_field =
17202       upb_msgdef_itof(md, UPB_TIMESTAMP_NANOS);
17203 
17204   upb_handlerattr empty_attr = UPB_HANDLERATTR_INITIALIZER;
17205 
17206   upb_handlers_setstartmsg(h, printer_starttimestampmsg, &empty_attr);
17207   upb_handlers_setint64(h, seconds_field, putseconds, &empty_attr);
17208   upb_handlers_setint32(h, nanos_field, putnanos, &empty_attr);
17209   upb_handlers_setendmsg(h, printer_endtimestampmsg, &empty_attr);
17210 
17211   UPB_UNUSED(closure);
17212 }
17213 
printer_sethandlers_value(const void * closure,upb_handlers * h)17214 void printer_sethandlers_value(const void *closure, upb_handlers *h) {
17215   const upb_msgdef *md = upb_handlers_msgdef(h);
17216   upb_msg_field_iter i;
17217 
17218   upb_handlerattr empty_attr = UPB_HANDLERATTR_INITIALIZER;
17219 
17220   upb_handlers_setstartmsg(h, printer_startmsg_noframe, &empty_attr);
17221   upb_handlers_setendmsg(h, printer_endmsg_noframe, &empty_attr);
17222 
17223   upb_msg_field_begin(&i, md);
17224   for(; !upb_msg_field_done(&i); upb_msg_field_next(&i)) {
17225     const upb_fielddef *f = upb_msg_iter_field(&i);
17226 
17227     switch (upb_fielddef_type(f)) {
17228       case UPB_TYPE_ENUM:
17229         upb_handlers_setint32(h, f, putnull, &empty_attr);
17230         break;
17231       case UPB_TYPE_DOUBLE:
17232         upb_handlers_setdouble(h, f, putdouble, &empty_attr);
17233         break;
17234       case UPB_TYPE_STRING:
17235         upb_handlers_setstartstr(h, f, scalar_startstr_nokey, &empty_attr);
17236         upb_handlers_setstring(h, f, scalar_str, &empty_attr);
17237         upb_handlers_setendstr(h, f, scalar_endstr, &empty_attr);
17238         break;
17239       case UPB_TYPE_BOOL:
17240         upb_handlers_setbool(h, f, putbool, &empty_attr);
17241         break;
17242       case UPB_TYPE_MESSAGE:
17243         break;
17244       default:
17245         UPB_ASSERT(false);
17246         break;
17247     }
17248   }
17249 
17250   UPB_UNUSED(closure);
17251 }
17252 
17253 #define WRAPPER_SETHANDLERS(wrapper, type, putmethod)                      \
17254 void printer_sethandlers_##wrapper(const void *closure, upb_handlers *h) { \
17255   const upb_msgdef *md = upb_handlers_msgdef(h);                           \
17256   const upb_fielddef* f = upb_msgdef_itof(md, 1);                          \
17257   upb_handlerattr empty_attr = UPB_HANDLERATTR_INITIALIZER;                \
17258   upb_handlers_setstartmsg(h, printer_startmsg_noframe, &empty_attr);      \
17259   upb_handlers_setendmsg(h, printer_endmsg_noframe, &empty_attr);          \
17260   upb_handlers_set##type(h, f, putmethod, &empty_attr);                    \
17261   UPB_UNUSED(closure);                                                     \
17262 }
17263 
WRAPPER_SETHANDLERS(doublevalue,double,putdouble)17264 WRAPPER_SETHANDLERS(doublevalue, double, putdouble)
17265 WRAPPER_SETHANDLERS(floatvalue,  float,  putfloat)
17266 WRAPPER_SETHANDLERS(int64value,  int64,  putint64_t)
17267 WRAPPER_SETHANDLERS(uint64value, uint64, putuint64_t)
17268 WRAPPER_SETHANDLERS(int32value,  int32,  putint32_t)
17269 WRAPPER_SETHANDLERS(uint32value, uint32, putuint32_t)
17270 WRAPPER_SETHANDLERS(boolvalue,   bool,   putbool)
17271 WRAPPER_SETHANDLERS(stringvalue, string, putstr_nokey)
17272 WRAPPER_SETHANDLERS(bytesvalue,  string, putbytes)
17273 
17274 #undef WRAPPER_SETHANDLERS
17275 
17276 void printer_sethandlers_listvalue(const void *closure, upb_handlers *h) {
17277   const upb_msgdef *md = upb_handlers_msgdef(h);
17278   const upb_fielddef* f = upb_msgdef_itof(md, 1);
17279 
17280   upb_handlerattr empty_attr = UPB_HANDLERATTR_INITIALIZER;
17281 
17282   upb_handlers_setstartseq(h, f, startseq_nokey, &empty_attr);
17283   upb_handlers_setendseq(h, f, endseq, &empty_attr);
17284 
17285   upb_handlers_setstartmsg(h, printer_startmsg_noframe, &empty_attr);
17286   upb_handlers_setendmsg(h, printer_endmsg_noframe, &empty_attr);
17287 
17288   upb_handlers_setstartsubmsg(h, f, repeated_startsubmsg, &empty_attr);
17289 
17290   UPB_UNUSED(closure);
17291 }
17292 
printer_sethandlers_structvalue(const void * closure,upb_handlers * h)17293 void printer_sethandlers_structvalue(const void *closure, upb_handlers *h) {
17294   const upb_msgdef *md = upb_handlers_msgdef(h);
17295   const upb_fielddef* f = upb_msgdef_itof(md, 1);
17296 
17297   upb_handlerattr empty_attr = UPB_HANDLERATTR_INITIALIZER;
17298 
17299   upb_handlers_setstartseq(h, f, startmap_nokey, &empty_attr);
17300   upb_handlers_setendseq(h, f, endmap, &empty_attr);
17301 
17302   upb_handlers_setstartmsg(h, printer_startmsg_noframe, &empty_attr);
17303   upb_handlers_setendmsg(h, printer_endmsg_noframe, &empty_attr);
17304 
17305   upb_handlers_setstartsubmsg(h, f, repeated_startsubmsg, &empty_attr);
17306 
17307   UPB_UNUSED(closure);
17308 }
17309 
printer_sethandlers(const void * closure,upb_handlers * h)17310 void printer_sethandlers(const void *closure, upb_handlers *h) {
17311   const upb_msgdef *md = upb_handlers_msgdef(h);
17312   bool is_mapentry = upb_msgdef_mapentry(md);
17313   upb_handlerattr empty_attr = UPB_HANDLERATTR_INITIALIZER;
17314   upb_msg_field_iter i;
17315   const bool *preserve_fieldnames_ptr = closure;
17316   const bool preserve_fieldnames = *preserve_fieldnames_ptr;
17317 
17318   if (is_mapentry) {
17319     /* mapentry messages are sufficiently different that we handle them
17320      * separately. */
17321     printer_sethandlers_mapentry(closure, preserve_fieldnames, h);
17322     return;
17323   }
17324 
17325   switch (upb_msgdef_wellknowntype(md)) {
17326     case UPB_WELLKNOWN_UNSPECIFIED:
17327       break;
17328     case UPB_WELLKNOWN_ANY:
17329       printer_sethandlers_any(closure, h);
17330       return;
17331     case UPB_WELLKNOWN_FIELDMASK:
17332       printer_sethandlers_fieldmask(closure, h);
17333       return;
17334     case UPB_WELLKNOWN_DURATION:
17335       printer_sethandlers_duration(closure, h);
17336       return;
17337     case UPB_WELLKNOWN_TIMESTAMP:
17338       printer_sethandlers_timestamp(closure, h);
17339       return;
17340     case UPB_WELLKNOWN_VALUE:
17341       printer_sethandlers_value(closure, h);
17342       return;
17343     case UPB_WELLKNOWN_LISTVALUE:
17344       printer_sethandlers_listvalue(closure, h);
17345       return;
17346     case UPB_WELLKNOWN_STRUCT:
17347       printer_sethandlers_structvalue(closure, h);
17348       return;
17349 #define WRAPPER(wellknowntype, name)        \
17350   case wellknowntype:                       \
17351     printer_sethandlers_##name(closure, h); \
17352     return;                                 \
17353 
17354     WRAPPER(UPB_WELLKNOWN_DOUBLEVALUE, doublevalue);
17355     WRAPPER(UPB_WELLKNOWN_FLOATVALUE, floatvalue);
17356     WRAPPER(UPB_WELLKNOWN_INT64VALUE, int64value);
17357     WRAPPER(UPB_WELLKNOWN_UINT64VALUE, uint64value);
17358     WRAPPER(UPB_WELLKNOWN_INT32VALUE, int32value);
17359     WRAPPER(UPB_WELLKNOWN_UINT32VALUE, uint32value);
17360     WRAPPER(UPB_WELLKNOWN_BOOLVALUE, boolvalue);
17361     WRAPPER(UPB_WELLKNOWN_STRINGVALUE, stringvalue);
17362     WRAPPER(UPB_WELLKNOWN_BYTESVALUE, bytesvalue);
17363 
17364 #undef WRAPPER
17365   }
17366 
17367   upb_handlers_setstartmsg(h, printer_startmsg, &empty_attr);
17368   upb_handlers_setendmsg(h, printer_endmsg, &empty_attr);
17369 
17370 #define TYPE(type, name, ctype)                                               \
17371   case type:                                                                  \
17372     if (upb_fielddef_isseq(f)) {                                              \
17373       upb_handlers_set##name(h, f, repeated_##ctype, &empty_attr);            \
17374     } else {                                                                  \
17375       upb_handlers_set##name(h, f, scalar_##ctype, &name_attr);               \
17376     }                                                                         \
17377     break;
17378 
17379   upb_msg_field_begin(&i, md);
17380   for(; !upb_msg_field_done(&i); upb_msg_field_next(&i)) {
17381     const upb_fielddef *f = upb_msg_iter_field(&i);
17382 
17383     upb_handlerattr name_attr = UPB_HANDLERATTR_INITIALIZER;
17384     upb_handlerattr_sethandlerdata(&name_attr,
17385                                    newstrpc(h, f, preserve_fieldnames));
17386 
17387     if (upb_fielddef_ismap(f)) {
17388       upb_handlers_setstartseq(h, f, startmap, &name_attr);
17389       upb_handlers_setendseq(h, f, endmap, &name_attr);
17390     } else if (upb_fielddef_isseq(f)) {
17391       upb_handlers_setstartseq(h, f, startseq, &name_attr);
17392       upb_handlers_setendseq(h, f, endseq, &empty_attr);
17393     }
17394 
17395     switch (upb_fielddef_type(f)) {
17396       TYPE(UPB_TYPE_FLOAT,  float,  float);
17397       TYPE(UPB_TYPE_DOUBLE, double, double);
17398       TYPE(UPB_TYPE_BOOL,   bool,   bool);
17399       TYPE(UPB_TYPE_INT32,  int32,  int32_t);
17400       TYPE(UPB_TYPE_UINT32, uint32, uint32_t);
17401       TYPE(UPB_TYPE_INT64,  int64,  int64_t);
17402       TYPE(UPB_TYPE_UINT64, uint64, uint64_t);
17403       case UPB_TYPE_ENUM: {
17404         /* For now, we always emit symbolic names for enums. We may want an
17405          * option later to control this behavior, but we will wait for a real
17406          * need first. */
17407         upb_handlerattr enum_attr = UPB_HANDLERATTR_INITIALIZER;
17408         set_enum_hd(h, f, preserve_fieldnames, &enum_attr);
17409 
17410         if (upb_fielddef_isseq(f)) {
17411           upb_handlers_setint32(h, f, repeated_enum, &enum_attr);
17412         } else {
17413           upb_handlers_setint32(h, f, scalar_enum, &enum_attr);
17414         }
17415 
17416         upb_handlerattr_uninit(&enum_attr);
17417         break;
17418       }
17419       case UPB_TYPE_STRING:
17420         if (upb_fielddef_isseq(f)) {
17421           upb_handlers_setstartstr(h, f, repeated_startstr, &empty_attr);
17422           upb_handlers_setstring(h, f, repeated_str, &empty_attr);
17423           upb_handlers_setendstr(h, f, repeated_endstr, &empty_attr);
17424         } else {
17425           upb_handlers_setstartstr(h, f, scalar_startstr, &name_attr);
17426           upb_handlers_setstring(h, f, scalar_str, &empty_attr);
17427           upb_handlers_setendstr(h, f, scalar_endstr, &empty_attr);
17428         }
17429         break;
17430       case UPB_TYPE_BYTES:
17431         /* XXX: this doesn't support strings that span buffers yet. The base64
17432          * encoder will need to be made resumable for this to work properly. */
17433         if (upb_fielddef_isseq(f)) {
17434           upb_handlers_setstring(h, f, repeated_bytes, &empty_attr);
17435         } else {
17436           upb_handlers_setstring(h, f, scalar_bytes, &name_attr);
17437         }
17438         break;
17439       case UPB_TYPE_MESSAGE:
17440         if (upb_fielddef_isseq(f)) {
17441           upb_handlers_setstartsubmsg(h, f, repeated_startsubmsg, &name_attr);
17442         } else {
17443           upb_handlers_setstartsubmsg(h, f, scalar_startsubmsg, &name_attr);
17444         }
17445         break;
17446     }
17447 
17448     upb_handlerattr_uninit(&name_attr);
17449   }
17450 
17451   upb_handlerattr_uninit(&empty_attr);
17452 #undef TYPE
17453 }
17454 
json_printer_reset(upb_json_printer * p)17455 static void json_printer_reset(upb_json_printer *p) {
17456   p->depth_ = 0;
17457 }
17458 
17459 
17460 /* Public API *****************************************************************/
17461 
upb_json_printer_create(upb_env * e,const upb_handlers * h,upb_bytessink * output)17462 upb_json_printer *upb_json_printer_create(upb_env *e, const upb_handlers *h,
17463                                           upb_bytessink *output) {
17464 #ifndef NDEBUG
17465   size_t size_before = upb_env_bytesallocated(e);
17466 #endif
17467 
17468   upb_json_printer *p = upb_env_malloc(e, sizeof(upb_json_printer));
17469   if (!p) return NULL;
17470 
17471   p->output_ = output;
17472   json_printer_reset(p);
17473   upb_sink_reset(&p->input_, h, p);
17474   p->seconds = 0;
17475   p->nanos = 0;
17476 
17477   /* If this fails, increase the value in printer.h. */
17478   UPB_ASSERT_DEBUGVAR(upb_env_bytesallocated(e) - size_before <=
17479                       UPB_JSON_PRINTER_SIZE);
17480   return p;
17481 }
17482 
upb_json_printer_input(upb_json_printer * p)17483 upb_sink *upb_json_printer_input(upb_json_printer *p) {
17484   return &p->input_;
17485 }
17486 
upb_json_printer_newhandlers(const upb_msgdef * md,bool preserve_fieldnames,const void * owner)17487 const upb_handlers *upb_json_printer_newhandlers(const upb_msgdef *md,
17488                                                  bool preserve_fieldnames,
17489                                                  const void *owner) {
17490   return upb_handlers_newfrozen(
17491       md, owner, printer_sethandlers, &preserve_fieldnames);
17492 }
17493 
17494 #undef UPB_SIZE
17495 #undef UPB_FIELD_AT
17496 #undef UPB_READ_ONEOF
17497 #undef UPB_WRITE_ONEOF
17498