1 /* Amalgamated source file */
2 #define _XOPEN_SOURCE 700
3 #include "upb.h"
4
5 #if UINTPTR_MAX == 0xffffffff
6 #define UPB_SIZE(size32, size64) size32
7 #else
8 #define UPB_SIZE(size32, size64) size64
9 #endif
10
11 #define UPB_FIELD_AT(msg, fieldtype, offset) \
12 *(fieldtype*)((const char*)(msg) + offset)
13
14 #define UPB_READ_ONEOF(msg, fieldtype, offset, case_offset, case_val, default) \
15 UPB_FIELD_AT(msg, int, case_offset) == case_val \
16 ? UPB_FIELD_AT(msg, fieldtype, offset) \
17 : default
18
19 #define UPB_WRITE_ONEOF(msg, fieldtype, offset, value, case_offset, case_val) \
20 UPB_FIELD_AT(msg, int, case_offset) = case_val; \
21 UPB_FIELD_AT(msg, fieldtype, offset) = value;
22 /* This file was generated by upbc (the upb compiler) from the input
23 * file:
24 *
25 * google/protobuf/descriptor.proto
26 *
27 * Do not edit -- your changes will be discarded when the file is
28 * regenerated. */
29
30 #include <stddef.h>
31
32
33 static const upb_msglayout *const google_protobuf_FileDescriptorSet_submsgs[1] = {
34 &google_protobuf_FileDescriptorProto_msginit,
35 };
36
37 static const upb_msglayout_field google_protobuf_FileDescriptorSet__fields[1] = {
38 {1, UPB_SIZE(0, 0), 0, 0, 11, 3},
39 };
40
41 const upb_msglayout google_protobuf_FileDescriptorSet_msginit = {
42 &google_protobuf_FileDescriptorSet_submsgs[0],
43 &google_protobuf_FileDescriptorSet__fields[0],
44 UPB_SIZE(4, 8), 1, false,
45 };
46
47 static const upb_msglayout *const google_protobuf_FileDescriptorProto_submsgs[6] = {
48 &google_protobuf_DescriptorProto_msginit,
49 &google_protobuf_EnumDescriptorProto_msginit,
50 &google_protobuf_FieldDescriptorProto_msginit,
51 &google_protobuf_FileOptions_msginit,
52 &google_protobuf_ServiceDescriptorProto_msginit,
53 &google_protobuf_SourceCodeInfo_msginit,
54 };
55
56 static const upb_msglayout_field google_protobuf_FileDescriptorProto__fields[12] = {
57 {1, UPB_SIZE(4, 8), 1, 0, 9, 1},
58 {2, UPB_SIZE(12, 24), 2, 0, 9, 1},
59 {3, UPB_SIZE(36, 72), 0, 0, 9, 3},
60 {4, UPB_SIZE(40, 80), 0, 0, 11, 3},
61 {5, UPB_SIZE(44, 88), 0, 1, 11, 3},
62 {6, UPB_SIZE(48, 96), 0, 4, 11, 3},
63 {7, UPB_SIZE(52, 104), 0, 2, 11, 3},
64 {8, UPB_SIZE(28, 56), 4, 3, 11, 1},
65 {9, UPB_SIZE(32, 64), 5, 5, 11, 1},
66 {10, UPB_SIZE(56, 112), 0, 0, 5, 3},
67 {11, UPB_SIZE(60, 120), 0, 0, 5, 3},
68 {12, UPB_SIZE(20, 40), 3, 0, 9, 1},
69 };
70
71 const upb_msglayout google_protobuf_FileDescriptorProto_msginit = {
72 &google_protobuf_FileDescriptorProto_submsgs[0],
73 &google_protobuf_FileDescriptorProto__fields[0],
74 UPB_SIZE(64, 128), 12, false,
75 };
76
77 static const upb_msglayout *const google_protobuf_DescriptorProto_submsgs[8] = {
78 &google_protobuf_DescriptorProto_msginit,
79 &google_protobuf_DescriptorProto_ExtensionRange_msginit,
80 &google_protobuf_DescriptorProto_ReservedRange_msginit,
81 &google_protobuf_EnumDescriptorProto_msginit,
82 &google_protobuf_FieldDescriptorProto_msginit,
83 &google_protobuf_MessageOptions_msginit,
84 &google_protobuf_OneofDescriptorProto_msginit,
85 };
86
87 static const upb_msglayout_field google_protobuf_DescriptorProto__fields[10] = {
88 {1, UPB_SIZE(4, 8), 1, 0, 9, 1},
89 {2, UPB_SIZE(16, 32), 0, 4, 11, 3},
90 {3, UPB_SIZE(20, 40), 0, 0, 11, 3},
91 {4, UPB_SIZE(24, 48), 0, 3, 11, 3},
92 {5, UPB_SIZE(28, 56), 0, 1, 11, 3},
93 {6, UPB_SIZE(32, 64), 0, 4, 11, 3},
94 {7, UPB_SIZE(12, 24), 2, 5, 11, 1},
95 {8, UPB_SIZE(36, 72), 0, 6, 11, 3},
96 {9, UPB_SIZE(40, 80), 0, 2, 11, 3},
97 {10, UPB_SIZE(44, 88), 0, 0, 9, 3},
98 };
99
100 const upb_msglayout google_protobuf_DescriptorProto_msginit = {
101 &google_protobuf_DescriptorProto_submsgs[0],
102 &google_protobuf_DescriptorProto__fields[0],
103 UPB_SIZE(48, 96), 10, false,
104 };
105
106 static const upb_msglayout *const google_protobuf_DescriptorProto_ExtensionRange_submsgs[1] = {
107 &google_protobuf_ExtensionRangeOptions_msginit,
108 };
109
110 static const upb_msglayout_field google_protobuf_DescriptorProto_ExtensionRange__fields[3] = {
111 {1, UPB_SIZE(4, 4), 1, 0, 5, 1},
112 {2, UPB_SIZE(8, 8), 2, 0, 5, 1},
113 {3, UPB_SIZE(12, 16), 3, 0, 11, 1},
114 };
115
116 const upb_msglayout google_protobuf_DescriptorProto_ExtensionRange_msginit = {
117 &google_protobuf_DescriptorProto_ExtensionRange_submsgs[0],
118 &google_protobuf_DescriptorProto_ExtensionRange__fields[0],
119 UPB_SIZE(16, 24), 3, false,
120 };
121
122 static const upb_msglayout_field google_protobuf_DescriptorProto_ReservedRange__fields[2] = {
123 {1, UPB_SIZE(4, 4), 1, 0, 5, 1},
124 {2, UPB_SIZE(8, 8), 2, 0, 5, 1},
125 };
126
127 const upb_msglayout google_protobuf_DescriptorProto_ReservedRange_msginit = {
128 NULL,
129 &google_protobuf_DescriptorProto_ReservedRange__fields[0],
130 UPB_SIZE(12, 12), 2, false,
131 };
132
133 static const upb_msglayout *const google_protobuf_ExtensionRangeOptions_submsgs[1] = {
134 &google_protobuf_UninterpretedOption_msginit,
135 };
136
137 static const upb_msglayout_field google_protobuf_ExtensionRangeOptions__fields[1] = {
138 {999, UPB_SIZE(0, 0), 0, 0, 11, 3},
139 };
140
141 const upb_msglayout google_protobuf_ExtensionRangeOptions_msginit = {
142 &google_protobuf_ExtensionRangeOptions_submsgs[0],
143 &google_protobuf_ExtensionRangeOptions__fields[0],
144 UPB_SIZE(4, 8), 1, false,
145 };
146
147 static const upb_msglayout *const google_protobuf_FieldDescriptorProto_submsgs[1] = {
148 &google_protobuf_FieldOptions_msginit,
149 };
150
151 static const upb_msglayout_field google_protobuf_FieldDescriptorProto__fields[10] = {
152 {1, UPB_SIZE(32, 32), 5, 0, 9, 1},
153 {2, UPB_SIZE(40, 48), 6, 0, 9, 1},
154 {3, UPB_SIZE(24, 24), 3, 0, 5, 1},
155 {4, UPB_SIZE(8, 8), 1, 0, 14, 1},
156 {5, UPB_SIZE(16, 16), 2, 0, 14, 1},
157 {6, UPB_SIZE(48, 64), 7, 0, 9, 1},
158 {7, UPB_SIZE(56, 80), 8, 0, 9, 1},
159 {8, UPB_SIZE(72, 112), 10, 0, 11, 1},
160 {9, UPB_SIZE(28, 28), 4, 0, 5, 1},
161 {10, UPB_SIZE(64, 96), 9, 0, 9, 1},
162 };
163
164 const upb_msglayout google_protobuf_FieldDescriptorProto_msginit = {
165 &google_protobuf_FieldDescriptorProto_submsgs[0],
166 &google_protobuf_FieldDescriptorProto__fields[0],
167 UPB_SIZE(80, 128), 10, false,
168 };
169
170 static const upb_msglayout *const google_protobuf_OneofDescriptorProto_submsgs[1] = {
171 &google_protobuf_OneofOptions_msginit,
172 };
173
174 static const upb_msglayout_field google_protobuf_OneofDescriptorProto__fields[2] = {
175 {1, UPB_SIZE(4, 8), 1, 0, 9, 1},
176 {2, UPB_SIZE(12, 24), 2, 0, 11, 1},
177 };
178
179 const upb_msglayout google_protobuf_OneofDescriptorProto_msginit = {
180 &google_protobuf_OneofDescriptorProto_submsgs[0],
181 &google_protobuf_OneofDescriptorProto__fields[0],
182 UPB_SIZE(16, 32), 2, false,
183 };
184
185 static const upb_msglayout *const google_protobuf_EnumDescriptorProto_submsgs[3] = {
186 &google_protobuf_EnumDescriptorProto_EnumReservedRange_msginit,
187 &google_protobuf_EnumOptions_msginit,
188 &google_protobuf_EnumValueDescriptorProto_msginit,
189 };
190
191 static const upb_msglayout_field google_protobuf_EnumDescriptorProto__fields[5] = {
192 {1, UPB_SIZE(4, 8), 1, 0, 9, 1},
193 {2, UPB_SIZE(16, 32), 0, 2, 11, 3},
194 {3, UPB_SIZE(12, 24), 2, 1, 11, 1},
195 {4, UPB_SIZE(20, 40), 0, 0, 11, 3},
196 {5, UPB_SIZE(24, 48), 0, 0, 9, 3},
197 };
198
199 const upb_msglayout google_protobuf_EnumDescriptorProto_msginit = {
200 &google_protobuf_EnumDescriptorProto_submsgs[0],
201 &google_protobuf_EnumDescriptorProto__fields[0],
202 UPB_SIZE(32, 64), 5, false,
203 };
204
205 static const upb_msglayout_field google_protobuf_EnumDescriptorProto_EnumReservedRange__fields[2] = {
206 {1, UPB_SIZE(4, 4), 1, 0, 5, 1},
207 {2, UPB_SIZE(8, 8), 2, 0, 5, 1},
208 };
209
210 const upb_msglayout google_protobuf_EnumDescriptorProto_EnumReservedRange_msginit = {
211 NULL,
212 &google_protobuf_EnumDescriptorProto_EnumReservedRange__fields[0],
213 UPB_SIZE(12, 12), 2, false,
214 };
215
216 static const upb_msglayout *const google_protobuf_EnumValueDescriptorProto_submsgs[1] = {
217 &google_protobuf_EnumValueOptions_msginit,
218 };
219
220 static const upb_msglayout_field google_protobuf_EnumValueDescriptorProto__fields[3] = {
221 {1, UPB_SIZE(8, 8), 2, 0, 9, 1},
222 {2, UPB_SIZE(4, 4), 1, 0, 5, 1},
223 {3, UPB_SIZE(16, 24), 3, 0, 11, 1},
224 };
225
226 const upb_msglayout google_protobuf_EnumValueDescriptorProto_msginit = {
227 &google_protobuf_EnumValueDescriptorProto_submsgs[0],
228 &google_protobuf_EnumValueDescriptorProto__fields[0],
229 UPB_SIZE(24, 32), 3, false,
230 };
231
232 static const upb_msglayout *const google_protobuf_ServiceDescriptorProto_submsgs[2] = {
233 &google_protobuf_MethodDescriptorProto_msginit,
234 &google_protobuf_ServiceOptions_msginit,
235 };
236
237 static const upb_msglayout_field google_protobuf_ServiceDescriptorProto__fields[3] = {
238 {1, UPB_SIZE(4, 8), 1, 0, 9, 1},
239 {2, UPB_SIZE(16, 32), 0, 0, 11, 3},
240 {3, UPB_SIZE(12, 24), 2, 1, 11, 1},
241 };
242
243 const upb_msglayout google_protobuf_ServiceDescriptorProto_msginit = {
244 &google_protobuf_ServiceDescriptorProto_submsgs[0],
245 &google_protobuf_ServiceDescriptorProto__fields[0],
246 UPB_SIZE(24, 48), 3, false,
247 };
248
249 static const upb_msglayout *const google_protobuf_MethodDescriptorProto_submsgs[1] = {
250 &google_protobuf_MethodOptions_msginit,
251 };
252
253 static const upb_msglayout_field google_protobuf_MethodDescriptorProto__fields[6] = {
254 {1, UPB_SIZE(4, 8), 3, 0, 9, 1},
255 {2, UPB_SIZE(12, 24), 4, 0, 9, 1},
256 {3, UPB_SIZE(20, 40), 5, 0, 9, 1},
257 {4, UPB_SIZE(28, 56), 6, 0, 11, 1},
258 {5, UPB_SIZE(1, 1), 1, 0, 8, 1},
259 {6, UPB_SIZE(2, 2), 2, 0, 8, 1},
260 };
261
262 const upb_msglayout google_protobuf_MethodDescriptorProto_msginit = {
263 &google_protobuf_MethodDescriptorProto_submsgs[0],
264 &google_protobuf_MethodDescriptorProto__fields[0],
265 UPB_SIZE(32, 64), 6, false,
266 };
267
268 static const upb_msglayout *const google_protobuf_FileOptions_submsgs[1] = {
269 &google_protobuf_UninterpretedOption_msginit,
270 };
271
272 static const upb_msglayout_field google_protobuf_FileOptions__fields[19] = {
273 {1, UPB_SIZE(28, 32), 11, 0, 9, 1},
274 {8, UPB_SIZE(36, 48), 12, 0, 9, 1},
275 {9, UPB_SIZE(8, 8), 1, 0, 14, 1},
276 {10, UPB_SIZE(16, 16), 2, 0, 8, 1},
277 {11, UPB_SIZE(44, 64), 13, 0, 9, 1},
278 {16, UPB_SIZE(17, 17), 3, 0, 8, 1},
279 {17, UPB_SIZE(18, 18), 4, 0, 8, 1},
280 {18, UPB_SIZE(19, 19), 5, 0, 8, 1},
281 {20, UPB_SIZE(20, 20), 6, 0, 8, 1},
282 {23, UPB_SIZE(21, 21), 7, 0, 8, 1},
283 {27, UPB_SIZE(22, 22), 8, 0, 8, 1},
284 {31, UPB_SIZE(23, 23), 9, 0, 8, 1},
285 {36, UPB_SIZE(52, 80), 14, 0, 9, 1},
286 {37, UPB_SIZE(60, 96), 15, 0, 9, 1},
287 {39, UPB_SIZE(68, 112), 16, 0, 9, 1},
288 {40, UPB_SIZE(76, 128), 17, 0, 9, 1},
289 {41, UPB_SIZE(84, 144), 18, 0, 9, 1},
290 {42, UPB_SIZE(24, 24), 10, 0, 8, 1},
291 {999, UPB_SIZE(92, 160), 0, 0, 11, 3},
292 };
293
294 const upb_msglayout google_protobuf_FileOptions_msginit = {
295 &google_protobuf_FileOptions_submsgs[0],
296 &google_protobuf_FileOptions__fields[0],
297 UPB_SIZE(96, 176), 19, false,
298 };
299
300 static const upb_msglayout *const google_protobuf_MessageOptions_submsgs[1] = {
301 &google_protobuf_UninterpretedOption_msginit,
302 };
303
304 static const upb_msglayout_field google_protobuf_MessageOptions__fields[5] = {
305 {1, UPB_SIZE(1, 1), 1, 0, 8, 1},
306 {2, UPB_SIZE(2, 2), 2, 0, 8, 1},
307 {3, UPB_SIZE(3, 3), 3, 0, 8, 1},
308 {7, UPB_SIZE(4, 4), 4, 0, 8, 1},
309 {999, UPB_SIZE(8, 8), 0, 0, 11, 3},
310 };
311
312 const upb_msglayout google_protobuf_MessageOptions_msginit = {
313 &google_protobuf_MessageOptions_submsgs[0],
314 &google_protobuf_MessageOptions__fields[0],
315 UPB_SIZE(12, 16), 5, false,
316 };
317
318 static const upb_msglayout *const google_protobuf_FieldOptions_submsgs[1] = {
319 &google_protobuf_UninterpretedOption_msginit,
320 };
321
322 static const upb_msglayout_field google_protobuf_FieldOptions__fields[7] = {
323 {1, UPB_SIZE(8, 8), 1, 0, 14, 1},
324 {2, UPB_SIZE(24, 24), 3, 0, 8, 1},
325 {3, UPB_SIZE(25, 25), 4, 0, 8, 1},
326 {5, UPB_SIZE(26, 26), 5, 0, 8, 1},
327 {6, UPB_SIZE(16, 16), 2, 0, 14, 1},
328 {10, UPB_SIZE(27, 27), 6, 0, 8, 1},
329 {999, UPB_SIZE(28, 32), 0, 0, 11, 3},
330 };
331
332 const upb_msglayout google_protobuf_FieldOptions_msginit = {
333 &google_protobuf_FieldOptions_submsgs[0],
334 &google_protobuf_FieldOptions__fields[0],
335 UPB_SIZE(32, 40), 7, false,
336 };
337
338 static const upb_msglayout *const google_protobuf_OneofOptions_submsgs[1] = {
339 &google_protobuf_UninterpretedOption_msginit,
340 };
341
342 static const upb_msglayout_field google_protobuf_OneofOptions__fields[1] = {
343 {999, UPB_SIZE(0, 0), 0, 0, 11, 3},
344 };
345
346 const upb_msglayout google_protobuf_OneofOptions_msginit = {
347 &google_protobuf_OneofOptions_submsgs[0],
348 &google_protobuf_OneofOptions__fields[0],
349 UPB_SIZE(4, 8), 1, false,
350 };
351
352 static const upb_msglayout *const google_protobuf_EnumOptions_submsgs[1] = {
353 &google_protobuf_UninterpretedOption_msginit,
354 };
355
356 static const upb_msglayout_field google_protobuf_EnumOptions__fields[3] = {
357 {2, UPB_SIZE(1, 1), 1, 0, 8, 1},
358 {3, UPB_SIZE(2, 2), 2, 0, 8, 1},
359 {999, UPB_SIZE(4, 8), 0, 0, 11, 3},
360 };
361
362 const upb_msglayout google_protobuf_EnumOptions_msginit = {
363 &google_protobuf_EnumOptions_submsgs[0],
364 &google_protobuf_EnumOptions__fields[0],
365 UPB_SIZE(8, 16), 3, false,
366 };
367
368 static const upb_msglayout *const google_protobuf_EnumValueOptions_submsgs[1] = {
369 &google_protobuf_UninterpretedOption_msginit,
370 };
371
372 static const upb_msglayout_field google_protobuf_EnumValueOptions__fields[2] = {
373 {1, UPB_SIZE(1, 1), 1, 0, 8, 1},
374 {999, UPB_SIZE(4, 8), 0, 0, 11, 3},
375 };
376
377 const upb_msglayout google_protobuf_EnumValueOptions_msginit = {
378 &google_protobuf_EnumValueOptions_submsgs[0],
379 &google_protobuf_EnumValueOptions__fields[0],
380 UPB_SIZE(8, 16), 2, false,
381 };
382
383 static const upb_msglayout *const google_protobuf_ServiceOptions_submsgs[1] = {
384 &google_protobuf_UninterpretedOption_msginit,
385 };
386
387 static const upb_msglayout_field google_protobuf_ServiceOptions__fields[2] = {
388 {33, UPB_SIZE(1, 1), 1, 0, 8, 1},
389 {999, UPB_SIZE(4, 8), 0, 0, 11, 3},
390 };
391
392 const upb_msglayout google_protobuf_ServiceOptions_msginit = {
393 &google_protobuf_ServiceOptions_submsgs[0],
394 &google_protobuf_ServiceOptions__fields[0],
395 UPB_SIZE(8, 16), 2, false,
396 };
397
398 static const upb_msglayout *const google_protobuf_MethodOptions_submsgs[1] = {
399 &google_protobuf_UninterpretedOption_msginit,
400 };
401
402 static const upb_msglayout_field google_protobuf_MethodOptions__fields[3] = {
403 {33, UPB_SIZE(16, 16), 2, 0, 8, 1},
404 {34, UPB_SIZE(8, 8), 1, 0, 14, 1},
405 {999, UPB_SIZE(20, 24), 0, 0, 11, 3},
406 };
407
408 const upb_msglayout google_protobuf_MethodOptions_msginit = {
409 &google_protobuf_MethodOptions_submsgs[0],
410 &google_protobuf_MethodOptions__fields[0],
411 UPB_SIZE(24, 32), 3, false,
412 };
413
414 static const upb_msglayout *const google_protobuf_UninterpretedOption_submsgs[1] = {
415 &google_protobuf_UninterpretedOption_NamePart_msginit,
416 };
417
418 static const upb_msglayout_field google_protobuf_UninterpretedOption__fields[7] = {
419 {2, UPB_SIZE(56, 80), 0, 0, 11, 3},
420 {3, UPB_SIZE(32, 32), 4, 0, 9, 1},
421 {4, UPB_SIZE(8, 8), 1, 0, 4, 1},
422 {5, UPB_SIZE(16, 16), 2, 0, 3, 1},
423 {6, UPB_SIZE(24, 24), 3, 0, 1, 1},
424 {7, UPB_SIZE(40, 48), 5, 0, 12, 1},
425 {8, UPB_SIZE(48, 64), 6, 0, 9, 1},
426 };
427
428 const upb_msglayout google_protobuf_UninterpretedOption_msginit = {
429 &google_protobuf_UninterpretedOption_submsgs[0],
430 &google_protobuf_UninterpretedOption__fields[0],
431 UPB_SIZE(64, 96), 7, false,
432 };
433
434 static const upb_msglayout_field google_protobuf_UninterpretedOption_NamePart__fields[2] = {
435 {1, UPB_SIZE(4, 8), 2, 0, 9, 2},
436 {2, UPB_SIZE(1, 1), 1, 0, 8, 2},
437 };
438
439 const upb_msglayout google_protobuf_UninterpretedOption_NamePart_msginit = {
440 NULL,
441 &google_protobuf_UninterpretedOption_NamePart__fields[0],
442 UPB_SIZE(16, 32), 2, false,
443 };
444
445 static const upb_msglayout *const google_protobuf_SourceCodeInfo_submsgs[1] = {
446 &google_protobuf_SourceCodeInfo_Location_msginit,
447 };
448
449 static const upb_msglayout_field google_protobuf_SourceCodeInfo__fields[1] = {
450 {1, UPB_SIZE(0, 0), 0, 0, 11, 3},
451 };
452
453 const upb_msglayout google_protobuf_SourceCodeInfo_msginit = {
454 &google_protobuf_SourceCodeInfo_submsgs[0],
455 &google_protobuf_SourceCodeInfo__fields[0],
456 UPB_SIZE(4, 8), 1, false,
457 };
458
459 static const upb_msglayout_field google_protobuf_SourceCodeInfo_Location__fields[5] = {
460 {1, UPB_SIZE(20, 40), 0, 0, 5, 3},
461 {2, UPB_SIZE(24, 48), 0, 0, 5, 3},
462 {3, UPB_SIZE(4, 8), 1, 0, 9, 1},
463 {4, UPB_SIZE(12, 24), 2, 0, 9, 1},
464 {6, UPB_SIZE(28, 56), 0, 0, 9, 3},
465 };
466
467 const upb_msglayout google_protobuf_SourceCodeInfo_Location_msginit = {
468 NULL,
469 &google_protobuf_SourceCodeInfo_Location__fields[0],
470 UPB_SIZE(32, 64), 5, false,
471 };
472
473 static const upb_msglayout *const google_protobuf_GeneratedCodeInfo_submsgs[1] = {
474 &google_protobuf_GeneratedCodeInfo_Annotation_msginit,
475 };
476
477 static const upb_msglayout_field google_protobuf_GeneratedCodeInfo__fields[1] = {
478 {1, UPB_SIZE(0, 0), 0, 0, 11, 3},
479 };
480
481 const upb_msglayout google_protobuf_GeneratedCodeInfo_msginit = {
482 &google_protobuf_GeneratedCodeInfo_submsgs[0],
483 &google_protobuf_GeneratedCodeInfo__fields[0],
484 UPB_SIZE(4, 8), 1, false,
485 };
486
487 static const upb_msglayout_field google_protobuf_GeneratedCodeInfo_Annotation__fields[4] = {
488 {1, UPB_SIZE(20, 32), 0, 0, 5, 3},
489 {2, UPB_SIZE(12, 16), 3, 0, 9, 1},
490 {3, UPB_SIZE(4, 4), 1, 0, 5, 1},
491 {4, UPB_SIZE(8, 8), 2, 0, 5, 1},
492 };
493
494 const upb_msglayout google_protobuf_GeneratedCodeInfo_Annotation_msginit = {
495 NULL,
496 &google_protobuf_GeneratedCodeInfo_Annotation__fields[0],
497 UPB_SIZE(24, 48), 4, false,
498 };
499
500
501
502
503 /* Maps descriptor type -> upb field type. */
504 const uint8_t upb_desctype_to_fieldtype[] = {
505 UPB_WIRE_TYPE_END_GROUP, /* ENDGROUP */
506 UPB_TYPE_DOUBLE, /* DOUBLE */
507 UPB_TYPE_FLOAT, /* FLOAT */
508 UPB_TYPE_INT64, /* INT64 */
509 UPB_TYPE_UINT64, /* UINT64 */
510 UPB_TYPE_INT32, /* INT32 */
511 UPB_TYPE_UINT64, /* FIXED64 */
512 UPB_TYPE_UINT32, /* FIXED32 */
513 UPB_TYPE_BOOL, /* BOOL */
514 UPB_TYPE_STRING, /* STRING */
515 UPB_TYPE_MESSAGE, /* GROUP */
516 UPB_TYPE_MESSAGE, /* MESSAGE */
517 UPB_TYPE_BYTES, /* BYTES */
518 UPB_TYPE_UINT32, /* UINT32 */
519 UPB_TYPE_ENUM, /* ENUM */
520 UPB_TYPE_INT32, /* SFIXED32 */
521 UPB_TYPE_INT64, /* SFIXED64 */
522 UPB_TYPE_INT32, /* SINT32 */
523 UPB_TYPE_INT64, /* SINT64 */
524 };
525
526 /* Data pertaining to the parse. */
527 typedef struct {
528 /* Current decoding pointer. Points to the beginning of a field until we
529 * have finished decoding the whole field. */
530 const char *ptr;
531 } upb_decstate;
532
533 /* Data pertaining to a single message frame. */
534 typedef struct {
535 const char *limit;
536 int32_t group_number; /* 0 if we are not parsing a group. */
537
538 /* These members are unset for an unknown group frame. */
539 char *msg;
540 const upb_msglayout *m;
541 } upb_decframe;
542
543 #define CHK(x) if (!(x)) { return false; }
544
545 static bool upb_skip_unknowngroup(upb_decstate *d, int field_number,
546 const char *limit);
547 static bool upb_decode_message(upb_decstate *d, const char *limit,
548 int group_number, char *msg,
549 const upb_msglayout *l);
550
upb_decode_varint(const char ** ptr,const char * limit,uint64_t * val)551 static bool upb_decode_varint(const char **ptr, const char *limit,
552 uint64_t *val) {
553 uint8_t byte;
554 int bitpos = 0;
555 const char *p = *ptr;
556 *val = 0;
557
558 do {
559 CHK(bitpos < 70 && p < limit);
560 byte = *p;
561 *val |= (uint64_t)(byte & 0x7F) << bitpos;
562 p++;
563 bitpos += 7;
564 } while (byte & 0x80);
565
566 *ptr = p;
567 return true;
568 }
569
upb_decode_varint32(const char ** ptr,const char * limit,uint32_t * val)570 static bool upb_decode_varint32(const char **ptr, const char *limit,
571 uint32_t *val) {
572 uint64_t u64;
573 CHK(upb_decode_varint(ptr, limit, &u64) && u64 <= UINT32_MAX);
574 *val = u64;
575 return true;
576 }
577
upb_decode_64bit(const char ** ptr,const char * limit,uint64_t * val)578 static bool upb_decode_64bit(const char **ptr, const char *limit,
579 uint64_t *val) {
580 CHK(limit - *ptr >= 8);
581 memcpy(val, *ptr, 8);
582 *ptr += 8;
583 return true;
584 }
585
upb_decode_32bit(const char ** ptr,const char * limit,uint32_t * val)586 static bool upb_decode_32bit(const char **ptr, const char *limit,
587 uint32_t *val) {
588 CHK(limit - *ptr >= 4);
589 memcpy(val, *ptr, 4);
590 *ptr += 4;
591 return true;
592 }
593
upb_decode_tag(const char ** ptr,const char * limit,int * field_number,int * wire_type)594 static bool upb_decode_tag(const char **ptr, const char *limit,
595 int *field_number, int *wire_type) {
596 uint32_t tag = 0;
597 CHK(upb_decode_varint32(ptr, limit, &tag));
598 *field_number = tag >> 3;
599 *wire_type = tag & 7;
600 return true;
601 }
602
upb_zzdecode_32(uint32_t n)603 static int32_t upb_zzdecode_32(uint32_t n) {
604 return (n >> 1) ^ -(int32_t)(n & 1);
605 }
606
upb_zzdecode_64(uint64_t n)607 static int64_t upb_zzdecode_64(uint64_t n) {
608 return (n >> 1) ^ -(int64_t)(n & 1);
609 }
610
upb_decode_string(const char ** ptr,const char * limit,upb_strview * val)611 static bool upb_decode_string(const char **ptr, const char *limit,
612 upb_strview *val) {
613 uint32_t len;
614
615 CHK(upb_decode_varint32(ptr, limit, &len) &&
616 len < INT32_MAX &&
617 limit - *ptr >= (int32_t)len);
618
619 *val = upb_strview_make(*ptr, len);
620 *ptr += len;
621 return true;
622 }
623
upb_set32(void * msg,size_t ofs,uint32_t val)624 static void upb_set32(void *msg, size_t ofs, uint32_t val) {
625 memcpy((char*)msg + ofs, &val, sizeof(val));
626 }
627
upb_append_unknown(upb_decstate * d,upb_decframe * frame,const char * start)628 static bool upb_append_unknown(upb_decstate *d, upb_decframe *frame,
629 const char *start) {
630 upb_msg_addunknown(frame->msg, start, d->ptr - start);
631 return true;
632 }
633
upb_skip_unknownfielddata(upb_decstate * d,upb_decframe * frame,int field_number,int wire_type)634 static bool upb_skip_unknownfielddata(upb_decstate *d, upb_decframe *frame,
635 int field_number, int wire_type) {
636 switch (wire_type) {
637 case UPB_WIRE_TYPE_VARINT: {
638 uint64_t val;
639 return upb_decode_varint(&d->ptr, frame->limit, &val);
640 }
641 case UPB_WIRE_TYPE_32BIT: {
642 uint32_t val;
643 return upb_decode_32bit(&d->ptr, frame->limit, &val);
644 }
645 case UPB_WIRE_TYPE_64BIT: {
646 uint64_t val;
647 return upb_decode_64bit(&d->ptr, frame->limit, &val);
648 }
649 case UPB_WIRE_TYPE_DELIMITED: {
650 upb_strview val;
651 return upb_decode_string(&d->ptr, frame->limit, &val);
652 }
653 case UPB_WIRE_TYPE_START_GROUP:
654 return upb_skip_unknowngroup(d, field_number, frame->limit);
655 case UPB_WIRE_TYPE_END_GROUP:
656 CHK(field_number == frame->group_number);
657 frame->limit = d->ptr;
658 return true;
659 }
660 return false;
661 }
662
upb_array_grow(upb_array * arr,size_t elements)663 static bool upb_array_grow(upb_array *arr, size_t elements) {
664 size_t needed = arr->len + elements;
665 size_t new_size = UPB_MAX(arr->size, 8);
666 size_t new_bytes;
667 size_t old_bytes;
668 void *new_data;
669 upb_alloc *alloc = upb_arena_alloc(arr->arena);
670
671 while (new_size < needed) {
672 new_size *= 2;
673 }
674
675 old_bytes = arr->len * arr->element_size;
676 new_bytes = new_size * arr->element_size;
677 new_data = upb_realloc(alloc, arr->data, old_bytes, new_bytes);
678 CHK(new_data);
679
680 arr->data = new_data;
681 arr->size = new_size;
682 return true;
683 }
684
upb_array_reserve(upb_array * arr,size_t elements)685 static void *upb_array_reserve(upb_array *arr, size_t elements) {
686 if (arr->size - arr->len < elements) {
687 CHK(upb_array_grow(arr, elements));
688 }
689 return (char*)arr->data + (arr->len * arr->element_size);
690 }
691
upb_array_add(upb_array * arr,size_t elements)692 static void *upb_array_add(upb_array *arr, size_t elements) {
693 void *ret = upb_array_reserve(arr, elements);
694 arr->len += elements;
695 return ret;
696 }
697
upb_getarr(upb_decframe * frame,const upb_msglayout_field * field)698 static upb_array *upb_getarr(upb_decframe *frame,
699 const upb_msglayout_field *field) {
700 UPB_ASSERT(field->label == UPB_LABEL_REPEATED);
701 return *(upb_array**)&frame->msg[field->offset];
702 }
703
upb_getorcreatearr(upb_decframe * frame,const upb_msglayout_field * field)704 static upb_array *upb_getorcreatearr(upb_decframe *frame,
705 const upb_msglayout_field *field) {
706 upb_array *arr = upb_getarr(frame, field);
707
708 if (!arr) {
709 upb_fieldtype_t type = upb_desctype_to_fieldtype[field->descriptortype];
710 arr = upb_array_new(type, upb_msg_arena(frame->msg));
711 if (!arr) {
712 return NULL;
713 }
714 *(upb_array**)&frame->msg[field->offset] = arr;
715 }
716
717 return arr;
718 }
719
upb_sethasbit(upb_decframe * frame,const upb_msglayout_field * field)720 static void upb_sethasbit(upb_decframe *frame,
721 const upb_msglayout_field *field) {
722 int32_t hasbit = field->presence;
723 UPB_ASSERT(field->presence > 0);
724 frame->msg[hasbit / 8] |= (1 << (hasbit % 8));
725 }
726
upb_setoneofcase(upb_decframe * frame,const upb_msglayout_field * field)727 static void upb_setoneofcase(upb_decframe *frame,
728 const upb_msglayout_field *field) {
729 UPB_ASSERT(field->presence < 0);
730 upb_set32(frame->msg, ~field->presence, field->number);
731 }
732
upb_decode_prepareslot(upb_decframe * frame,const upb_msglayout_field * field)733 static char *upb_decode_prepareslot(upb_decframe *frame,
734 const upb_msglayout_field *field) {
735 char *field_mem = frame->msg + field->offset;
736 upb_array *arr;
737
738 if (field->label == UPB_LABEL_REPEATED) {
739 arr = upb_getorcreatearr(frame, field);
740 field_mem = upb_array_reserve(arr, 1);
741 }
742
743 return field_mem;
744 }
745
upb_decode_setpresent(upb_decframe * frame,const upb_msglayout_field * field)746 static void upb_decode_setpresent(upb_decframe *frame,
747 const upb_msglayout_field *field) {
748 if (field->label == UPB_LABEL_REPEATED) {
749 upb_array *arr = upb_getarr(frame, field);
750 UPB_ASSERT(arr->len < arr->size);
751 arr->len++;
752 } else if (field->presence < 0) {
753 upb_setoneofcase(frame, field);
754 } else if (field->presence > 0) {
755 upb_sethasbit(frame, field);
756 }
757 }
758
upb_decode_submsg(upb_decstate * d,upb_decframe * frame,const char * limit,const upb_msglayout_field * field,int group_number)759 static bool upb_decode_submsg(upb_decstate *d, upb_decframe *frame,
760 const char *limit,
761 const upb_msglayout_field *field,
762 int group_number) {
763 char *submsg_slot = upb_decode_prepareslot(frame, field);
764 char *submsg = *(void **)submsg_slot;
765 const upb_msglayout *subm;
766
767 subm = frame->m->submsgs[field->submsg_index];
768 UPB_ASSERT(subm);
769
770 if (!submsg) {
771 submsg = upb_msg_new(subm, upb_msg_arena(frame->msg));
772 CHK(submsg);
773 *(void**)submsg_slot = submsg;
774 }
775
776 upb_decode_message(d, limit, group_number, submsg, subm);
777
778 return true;
779 }
780
upb_decode_varintfield(upb_decstate * d,upb_decframe * frame,const char * field_start,const upb_msglayout_field * field)781 static bool upb_decode_varintfield(upb_decstate *d, upb_decframe *frame,
782 const char *field_start,
783 const upb_msglayout_field *field) {
784 uint64_t val;
785 void *field_mem;
786
787 field_mem = upb_decode_prepareslot(frame, field);
788 CHK(field_mem);
789 CHK(upb_decode_varint(&d->ptr, frame->limit, &val));
790
791 switch ((upb_descriptortype_t)field->descriptortype) {
792 case UPB_DESCRIPTOR_TYPE_INT64:
793 case UPB_DESCRIPTOR_TYPE_UINT64:
794 memcpy(field_mem, &val, sizeof(val));
795 break;
796 case UPB_DESCRIPTOR_TYPE_INT32:
797 case UPB_DESCRIPTOR_TYPE_UINT32:
798 case UPB_DESCRIPTOR_TYPE_ENUM: {
799 uint32_t val32 = val;
800 memcpy(field_mem, &val32, sizeof(val32));
801 break;
802 }
803 case UPB_DESCRIPTOR_TYPE_BOOL: {
804 bool valbool = val != 0;
805 memcpy(field_mem, &valbool, sizeof(valbool));
806 break;
807 }
808 case UPB_DESCRIPTOR_TYPE_SINT32: {
809 int32_t decoded = upb_zzdecode_32(val);
810 memcpy(field_mem, &decoded, sizeof(decoded));
811 break;
812 }
813 case UPB_DESCRIPTOR_TYPE_SINT64: {
814 int64_t decoded = upb_zzdecode_64(val);
815 memcpy(field_mem, &decoded, sizeof(decoded));
816 break;
817 }
818 default:
819 return upb_append_unknown(d, frame, field_start);
820 }
821
822 upb_decode_setpresent(frame, field);
823 return true;
824 }
825
upb_decode_64bitfield(upb_decstate * d,upb_decframe * frame,const char * field_start,const upb_msglayout_field * field)826 static bool upb_decode_64bitfield(upb_decstate *d, upb_decframe *frame,
827 const char *field_start,
828 const upb_msglayout_field *field) {
829 void *field_mem;
830 uint64_t val;
831
832 field_mem = upb_decode_prepareslot(frame, field);
833 CHK(field_mem);
834 CHK(upb_decode_64bit(&d->ptr, frame->limit, &val));
835
836 switch ((upb_descriptortype_t)field->descriptortype) {
837 case UPB_DESCRIPTOR_TYPE_DOUBLE:
838 case UPB_DESCRIPTOR_TYPE_FIXED64:
839 case UPB_DESCRIPTOR_TYPE_SFIXED64:
840 memcpy(field_mem, &val, sizeof(val));
841 break;
842 default:
843 return upb_append_unknown(d, frame, field_start);
844 }
845
846 upb_decode_setpresent(frame, field);
847 return true;
848 }
849
upb_decode_32bitfield(upb_decstate * d,upb_decframe * frame,const char * field_start,const upb_msglayout_field * field)850 static bool upb_decode_32bitfield(upb_decstate *d, upb_decframe *frame,
851 const char *field_start,
852 const upb_msglayout_field *field) {
853 void *field_mem;
854 uint32_t val;
855
856 field_mem = upb_decode_prepareslot(frame, field);
857 CHK(field_mem);
858 CHK(upb_decode_32bit(&d->ptr, frame->limit, &val));
859
860 switch ((upb_descriptortype_t)field->descriptortype) {
861 case UPB_DESCRIPTOR_TYPE_FLOAT:
862 case UPB_DESCRIPTOR_TYPE_FIXED32:
863 case UPB_DESCRIPTOR_TYPE_SFIXED32:
864 memcpy(field_mem, &val, sizeof(val));
865 break;
866 default:
867 return upb_append_unknown(d, frame, field_start);
868 }
869
870 upb_decode_setpresent(frame, field);
871 return true;
872 }
873
upb_decode_fixedpacked(upb_array * arr,upb_strview data,int elem_size)874 static bool upb_decode_fixedpacked(upb_array *arr, upb_strview data,
875 int elem_size) {
876 int elements = data.size / elem_size;
877 void *field_mem;
878
879 CHK((size_t)(elements * elem_size) == data.size);
880 field_mem = upb_array_add(arr, elements);
881 CHK(field_mem);
882 memcpy(field_mem, data.data, data.size);
883 return true;
884 }
885
upb_decode_toarray(upb_decstate * d,upb_decframe * frame,const char * field_start,const upb_msglayout_field * field,upb_strview val)886 static bool upb_decode_toarray(upb_decstate *d, upb_decframe *frame,
887 const char *field_start,
888 const upb_msglayout_field *field,
889 upb_strview val) {
890 upb_array *arr = upb_getorcreatearr(frame, field);
891
892 #define VARINT_CASE(ctype, decode) { \
893 const char *ptr = val.data; \
894 const char *limit = ptr + val.size; \
895 while (ptr < limit) { \
896 uint64_t val; \
897 void *field_mem; \
898 ctype decoded; \
899 CHK(upb_decode_varint(&ptr, limit, &val)); \
900 decoded = (decode)(val); \
901 field_mem = upb_array_add(arr, 1); \
902 CHK(field_mem); \
903 memcpy(field_mem, &decoded, sizeof(ctype)); \
904 } \
905 return true; \
906 }
907
908 switch ((upb_descriptortype_t)field->descriptortype) {
909 case UPB_DESCRIPTOR_TYPE_STRING:
910 case UPB_DESCRIPTOR_TYPE_BYTES: {
911 void *field_mem = upb_array_add(arr, 1);
912 CHK(field_mem);
913 memcpy(field_mem, &val, sizeof(val));
914 return true;
915 }
916 case UPB_DESCRIPTOR_TYPE_FLOAT:
917 case UPB_DESCRIPTOR_TYPE_FIXED32:
918 case UPB_DESCRIPTOR_TYPE_SFIXED32:
919 return upb_decode_fixedpacked(arr, val, sizeof(int32_t));
920 case UPB_DESCRIPTOR_TYPE_DOUBLE:
921 case UPB_DESCRIPTOR_TYPE_FIXED64:
922 case UPB_DESCRIPTOR_TYPE_SFIXED64:
923 return upb_decode_fixedpacked(arr, val, sizeof(int64_t));
924 case UPB_DESCRIPTOR_TYPE_INT32:
925 case UPB_DESCRIPTOR_TYPE_UINT32:
926 case UPB_DESCRIPTOR_TYPE_ENUM:
927 /* TODO: proto2 enum field that isn't in the enum. */
928 VARINT_CASE(uint32_t, uint32_t);
929 case UPB_DESCRIPTOR_TYPE_INT64:
930 case UPB_DESCRIPTOR_TYPE_UINT64:
931 VARINT_CASE(uint64_t, uint64_t);
932 case UPB_DESCRIPTOR_TYPE_BOOL:
933 VARINT_CASE(bool, bool);
934 case UPB_DESCRIPTOR_TYPE_SINT32:
935 VARINT_CASE(int32_t, upb_zzdecode_32);
936 case UPB_DESCRIPTOR_TYPE_SINT64:
937 VARINT_CASE(int64_t, upb_zzdecode_64);
938 case UPB_DESCRIPTOR_TYPE_MESSAGE: {
939 const upb_msglayout *subm;
940 char *submsg;
941 void *field_mem;
942
943 CHK(val.size <= (size_t)(frame->limit - val.data));
944 d->ptr -= val.size;
945
946 /* Create elemente message. */
947 subm = frame->m->submsgs[field->submsg_index];
948 UPB_ASSERT(subm);
949
950 submsg = upb_msg_new(subm, upb_msg_arena(frame->msg));
951 CHK(submsg);
952
953 field_mem = upb_array_add(arr, 1);
954 CHK(field_mem);
955 *(void**)field_mem = submsg;
956
957 return upb_decode_message(
958 d, val.data + val.size, frame->group_number, submsg, subm);
959 }
960 case UPB_DESCRIPTOR_TYPE_GROUP:
961 return upb_append_unknown(d, frame, field_start);
962 }
963 #undef VARINT_CASE
964 UPB_UNREACHABLE();
965 }
966
upb_decode_delimitedfield(upb_decstate * d,upb_decframe * frame,const char * field_start,const upb_msglayout_field * field)967 static bool upb_decode_delimitedfield(upb_decstate *d, upb_decframe *frame,
968 const char *field_start,
969 const upb_msglayout_field *field) {
970 upb_strview val;
971
972 CHK(upb_decode_string(&d->ptr, frame->limit, &val));
973
974 if (field->label == UPB_LABEL_REPEATED) {
975 return upb_decode_toarray(d, frame, field_start, field, val);
976 } else {
977 switch ((upb_descriptortype_t)field->descriptortype) {
978 case UPB_DESCRIPTOR_TYPE_STRING:
979 case UPB_DESCRIPTOR_TYPE_BYTES: {
980 void *field_mem = upb_decode_prepareslot(frame, field);
981 CHK(field_mem);
982 memcpy(field_mem, &val, sizeof(val));
983 break;
984 }
985 case UPB_DESCRIPTOR_TYPE_MESSAGE:
986 CHK(val.size <= (size_t)(frame->limit - val.data));
987 d->ptr -= val.size;
988 CHK(upb_decode_submsg(d, frame, val.data + val.size, field, 0));
989 break;
990 default:
991 /* TODO(haberman): should we accept the last element of a packed? */
992 return upb_append_unknown(d, frame, field_start);
993 }
994 upb_decode_setpresent(frame, field);
995 return true;
996 }
997 }
998
upb_find_field(const upb_msglayout * l,uint32_t field_number)999 static const upb_msglayout_field *upb_find_field(const upb_msglayout *l,
1000 uint32_t field_number) {
1001 /* Lots of optimization opportunities here. */
1002 int i;
1003 for (i = 0; i < l->field_count; i++) {
1004 if (l->fields[i].number == field_number) {
1005 return &l->fields[i];
1006 }
1007 }
1008
1009 return NULL; /* Unknown field. */
1010 }
1011
upb_decode_field(upb_decstate * d,upb_decframe * frame)1012 static bool upb_decode_field(upb_decstate *d, upb_decframe *frame) {
1013 int field_number;
1014 int wire_type;
1015 const char *field_start = d->ptr;
1016 const upb_msglayout_field *field;
1017
1018 CHK(upb_decode_tag(&d->ptr, frame->limit, &field_number, &wire_type));
1019 field = upb_find_field(frame->m, field_number);
1020
1021 if (field) {
1022 switch (wire_type) {
1023 case UPB_WIRE_TYPE_VARINT:
1024 return upb_decode_varintfield(d, frame, field_start, field);
1025 case UPB_WIRE_TYPE_32BIT:
1026 return upb_decode_32bitfield(d, frame, field_start, field);
1027 case UPB_WIRE_TYPE_64BIT:
1028 return upb_decode_64bitfield(d, frame, field_start, field);
1029 case UPB_WIRE_TYPE_DELIMITED:
1030 return upb_decode_delimitedfield(d, frame, field_start, field);
1031 case UPB_WIRE_TYPE_START_GROUP:
1032 CHK(field->descriptortype == UPB_DESCRIPTOR_TYPE_GROUP);
1033 return upb_decode_submsg(d, frame, frame->limit, field, field_number);
1034 case UPB_WIRE_TYPE_END_GROUP:
1035 CHK(frame->group_number == field_number)
1036 frame->limit = d->ptr;
1037 return true;
1038 default:
1039 return false;
1040 }
1041 } else {
1042 CHK(field_number != 0);
1043 CHK(upb_skip_unknownfielddata(d, frame, field_number, wire_type));
1044 CHK(upb_append_unknown(d, frame, field_start));
1045 return true;
1046 }
1047 }
1048
upb_skip_unknowngroup(upb_decstate * d,int field_number,const char * limit)1049 static bool upb_skip_unknowngroup(upb_decstate *d, int field_number,
1050 const char *limit) {
1051 upb_decframe frame;
1052 frame.msg = NULL;
1053 frame.m = NULL;
1054 frame.group_number = field_number;
1055 frame.limit = limit;
1056
1057 while (d->ptr < frame.limit) {
1058 int wire_type;
1059 int field_number;
1060
1061 CHK(upb_decode_tag(&d->ptr, frame.limit, &field_number, &wire_type));
1062 CHK(upb_skip_unknownfielddata(d, &frame, field_number, wire_type));
1063 }
1064
1065 return true;
1066 }
1067
upb_decode_message(upb_decstate * d,const char * limit,int group_number,char * msg,const upb_msglayout * l)1068 static bool upb_decode_message(upb_decstate *d, const char *limit,
1069 int group_number, char *msg,
1070 const upb_msglayout *l) {
1071 upb_decframe frame;
1072 frame.group_number = group_number;
1073 frame.limit = limit;
1074 frame.msg = msg;
1075 frame.m = l;
1076
1077 while (d->ptr < frame.limit) {
1078 CHK(upb_decode_field(d, &frame));
1079 }
1080
1081 return true;
1082 }
1083
upb_decode(upb_strview buf,void * msg,const upb_msglayout * l)1084 bool upb_decode(upb_strview buf, void *msg, const upb_msglayout *l) {
1085 upb_decstate state;
1086 state.ptr = buf.data;
1087
1088 return upb_decode_message(&state, buf.data + buf.size, 0, msg, l);
1089 }
1090
1091 #undef CHK
1092
1093
1094 #include <ctype.h>
1095 #include <stdlib.h>
1096 #include <string.h>
1097
1098 typedef struct {
1099 size_t len;
1100 char str[1]; /* Null-terminated string data follows. */
1101 } str_t;
1102
newstr(const char * data,size_t len)1103 static str_t *newstr(const char *data, size_t len) {
1104 str_t *ret = upb_gmalloc(sizeof(*ret) + len);
1105 if (!ret) return NULL;
1106 ret->len = len;
1107 memcpy(ret->str, data, len);
1108 ret->str[len] = '\0';
1109 return ret;
1110 }
1111
freestr(str_t * s)1112 static void freestr(str_t *s) { upb_gfree(s); }
1113
1114 /* isalpha() etc. from <ctype.h> are locale-dependent, which we don't want. */
upb_isbetween(char c,char low,char high)1115 static bool upb_isbetween(char c, char low, char high) {
1116 return c >= low && c <= high;
1117 }
1118
upb_isletter(char c)1119 static bool upb_isletter(char c) {
1120 return upb_isbetween(c, 'A', 'Z') || upb_isbetween(c, 'a', 'z') || c == '_';
1121 }
1122
upb_isalphanum(char c)1123 static bool upb_isalphanum(char c) {
1124 return upb_isletter(c) || upb_isbetween(c, '0', '9');
1125 }
1126
upb_isident(const char * str,size_t len,bool full,upb_status * s)1127 static bool upb_isident(const char *str, size_t len, bool full, upb_status *s) {
1128 bool start = true;
1129 size_t i;
1130 for (i = 0; i < len; i++) {
1131 char c = str[i];
1132 if (c == '.') {
1133 if (start || !full) {
1134 upb_status_seterrf(s, "invalid name: unexpected '.' (%s)", str);
1135 return false;
1136 }
1137 start = true;
1138 } else if (start) {
1139 if (!upb_isletter(c)) {
1140 upb_status_seterrf(
1141 s, "invalid name: path components must start with a letter (%s)",
1142 str);
1143 return false;
1144 }
1145 start = false;
1146 } else {
1147 if (!upb_isalphanum(c)) {
1148 upb_status_seterrf(s, "invalid name: non-alphanumeric character (%s)",
1149 str);
1150 return false;
1151 }
1152 }
1153 }
1154 return !start;
1155 }
1156
upb_isoneof(const upb_refcounted * def)1157 static bool upb_isoneof(const upb_refcounted *def) {
1158 return def->vtbl == &upb_oneofdef_vtbl;
1159 }
1160
upb_isfield(const upb_refcounted * def)1161 static bool upb_isfield(const upb_refcounted *def) {
1162 return def->vtbl == &upb_fielddef_vtbl;
1163 }
1164
upb_trygetoneof(const upb_refcounted * def)1165 static const upb_oneofdef *upb_trygetoneof(const upb_refcounted *def) {
1166 return upb_isoneof(def) ? (const upb_oneofdef*)def : NULL;
1167 }
1168
upb_trygetfield(const upb_refcounted * def)1169 static const upb_fielddef *upb_trygetfield(const upb_refcounted *def) {
1170 return upb_isfield(def) ? (const upb_fielddef*)def : NULL;
1171 }
1172
1173
1174 /* upb_def ********************************************************************/
1175
upb_def_type(const upb_def * d)1176 upb_deftype_t upb_def_type(const upb_def *d) { return d->type; }
1177
upb_def_fullname(const upb_def * d)1178 const char *upb_def_fullname(const upb_def *d) { return d->fullname; }
1179
upb_def_name(const upb_def * d)1180 const char *upb_def_name(const upb_def *d) {
1181 const char *p;
1182
1183 if (d->fullname == NULL) {
1184 return NULL;
1185 } else if ((p = strrchr(d->fullname, '.')) == NULL) {
1186 /* No '.' in the name, return the full string. */
1187 return d->fullname;
1188 } else {
1189 /* Return one past the last '.'. */
1190 return p + 1;
1191 }
1192 }
1193
upb_def_setfullname(upb_def * def,const char * fullname,upb_status * s)1194 bool upb_def_setfullname(upb_def *def, const char *fullname, upb_status *s) {
1195 UPB_ASSERT(!upb_def_isfrozen(def));
1196 if (!upb_isident(fullname, strlen(fullname), true, s)) {
1197 return false;
1198 }
1199
1200 fullname = upb_gstrdup(fullname);
1201 if (!fullname) {
1202 upb_upberr_setoom(s);
1203 return false;
1204 }
1205
1206 upb_gfree((void*)def->fullname);
1207 def->fullname = fullname;
1208 return true;
1209 }
1210
upb_def_file(const upb_def * d)1211 const upb_filedef *upb_def_file(const upb_def *d) { return d->file; }
1212
upb_def_init(upb_def * def,upb_deftype_t type,const struct upb_refcounted_vtbl * vtbl,const void * owner)1213 static bool upb_def_init(upb_def *def, upb_deftype_t type,
1214 const struct upb_refcounted_vtbl *vtbl,
1215 const void *owner) {
1216 if (!upb_refcounted_init(upb_def_upcast_mutable(def), vtbl, owner)) return false;
1217 def->type = type;
1218 def->fullname = NULL;
1219 def->came_from_user = false;
1220 def->file = NULL;
1221 return true;
1222 }
1223
upb_def_uninit(upb_def * def)1224 static void upb_def_uninit(upb_def *def) {
1225 upb_gfree((void*)def->fullname);
1226 }
1227
msgdef_name(const upb_msgdef * m)1228 static const char *msgdef_name(const upb_msgdef *m) {
1229 const char *name = upb_def_fullname(upb_msgdef_upcast(m));
1230 return name ? name : "(anonymous)";
1231 }
1232
upb_validate_field(upb_fielddef * f,upb_status * s)1233 static bool upb_validate_field(upb_fielddef *f, upb_status *s) {
1234 if (upb_fielddef_name(f) == NULL || upb_fielddef_number(f) == 0) {
1235 upb_status_seterrmsg(s, "fielddef must have name and number set");
1236 return false;
1237 }
1238
1239 if (!f->type_is_set_) {
1240 upb_status_seterrmsg(s, "fielddef type was not initialized");
1241 return false;
1242 }
1243
1244 if (upb_fielddef_lazy(f) &&
1245 upb_fielddef_descriptortype(f) != UPB_DESCRIPTOR_TYPE_MESSAGE) {
1246 upb_status_seterrmsg(s,
1247 "only length-delimited submessage fields may be lazy");
1248 return false;
1249 }
1250
1251 if (upb_fielddef_hassubdef(f)) {
1252 const upb_def *subdef;
1253
1254 if (f->subdef_is_symbolic) {
1255 upb_status_seterrf(s, "field '%s.%s' has not been resolved",
1256 msgdef_name(f->msg.def), upb_fielddef_name(f));
1257 return false;
1258 }
1259
1260 subdef = upb_fielddef_subdef(f);
1261 if (subdef == NULL) {
1262 upb_status_seterrf(s, "field %s.%s is missing required subdef",
1263 msgdef_name(f->msg.def), upb_fielddef_name(f));
1264 return false;
1265 }
1266
1267 if (!upb_def_isfrozen(subdef) && !subdef->came_from_user) {
1268 upb_status_seterrf(s,
1269 "subdef of field %s.%s is not frozen or being frozen",
1270 msgdef_name(f->msg.def), upb_fielddef_name(f));
1271 return false;
1272 }
1273 }
1274
1275 if (upb_fielddef_type(f) == UPB_TYPE_ENUM) {
1276 bool has_default_name = upb_fielddef_enumhasdefaultstr(f);
1277 bool has_default_number = upb_fielddef_enumhasdefaultint32(f);
1278
1279 /* Previously verified by upb_validate_enumdef(). */
1280 UPB_ASSERT(upb_enumdef_numvals(upb_fielddef_enumsubdef(f)) > 0);
1281
1282 /* We've already validated that we have an associated enumdef and that it
1283 * has at least one member, so at least one of these should be true.
1284 * Because if the user didn't set anything, we'll pick up the enum's
1285 * default, but if the user *did* set something we should at least pick up
1286 * the one they set (int32 or string). */
1287 UPB_ASSERT(has_default_name || has_default_number);
1288
1289 if (!has_default_name) {
1290 upb_status_seterrf(s,
1291 "enum default for field %s.%s (%d) is not in the enum",
1292 msgdef_name(f->msg.def), upb_fielddef_name(f),
1293 upb_fielddef_defaultint32(f));
1294 return false;
1295 }
1296
1297 if (!has_default_number) {
1298 upb_status_seterrf(s,
1299 "enum default for field %s.%s (%s) is not in the enum",
1300 msgdef_name(f->msg.def), upb_fielddef_name(f),
1301 upb_fielddef_defaultstr(f, NULL));
1302 return false;
1303 }
1304
1305 /* Lift the effective numeric default into the field's default slot, in case
1306 * we were only getting it "by reference" from the enumdef. */
1307 upb_fielddef_setdefaultint32(f, upb_fielddef_defaultint32(f));
1308 }
1309
1310 /* Ensure that MapEntry submessages only appear as repeated fields, not
1311 * optional/required (singular) fields. */
1312 if (upb_fielddef_type(f) == UPB_TYPE_MESSAGE &&
1313 upb_fielddef_msgsubdef(f) != NULL) {
1314 const upb_msgdef *subdef = upb_fielddef_msgsubdef(f);
1315 if (upb_msgdef_mapentry(subdef) && !upb_fielddef_isseq(f)) {
1316 upb_status_seterrf(s,
1317 "Field %s refers to mapentry message but is not "
1318 "a repeated field",
1319 upb_fielddef_name(f) ? upb_fielddef_name(f) :
1320 "(unnamed)");
1321 return false;
1322 }
1323 }
1324
1325 return true;
1326 }
1327
upb_validate_enumdef(const upb_enumdef * e,upb_status * s)1328 static bool upb_validate_enumdef(const upb_enumdef *e, upb_status *s) {
1329 if (upb_enumdef_numvals(e) == 0) {
1330 upb_status_seterrf(s, "enum %s has no members (must have at least one)",
1331 upb_enumdef_fullname(e));
1332 return false;
1333 }
1334
1335 return true;
1336 }
1337
1338 /* All submessage fields are lower than all other fields.
1339 * Secondly, fields are increasing in order. */
field_rank(const upb_fielddef * f)1340 uint32_t field_rank(const upb_fielddef *f) {
1341 uint32_t ret = upb_fielddef_number(f);
1342 const uint32_t high_bit = 1 << 30;
1343 UPB_ASSERT(ret < high_bit);
1344 if (!upb_fielddef_issubmsg(f))
1345 ret |= high_bit;
1346 return ret;
1347 }
1348
cmp_fields(const void * p1,const void * p2)1349 int cmp_fields(const void *p1, const void *p2) {
1350 const upb_fielddef *f1 = *(upb_fielddef*const*)p1;
1351 const upb_fielddef *f2 = *(upb_fielddef*const*)p2;
1352 return field_rank(f1) - field_rank(f2);
1353 }
1354
assign_msg_indices(upb_msgdef * m,upb_status * s)1355 static bool assign_msg_indices(upb_msgdef *m, upb_status *s) {
1356 /* Sort fields. upb internally relies on UPB_TYPE_MESSAGE fields having the
1357 * lowest indexes, but we do not publicly guarantee this. */
1358 upb_msg_field_iter j;
1359 upb_msg_oneof_iter k;
1360 int i;
1361 uint32_t selector;
1362 int n = upb_msgdef_numfields(m);
1363 upb_fielddef **fields;
1364
1365 if (n == 0) {
1366 m->selector_count = UPB_STATIC_SELECTOR_COUNT;
1367 m->submsg_field_count = 0;
1368 return true;
1369 }
1370
1371 fields = upb_gmalloc(n * sizeof(*fields));
1372 if (!fields) {
1373 upb_upberr_setoom(s);
1374 return false;
1375 }
1376
1377 m->submsg_field_count = 0;
1378 for(i = 0, upb_msg_field_begin(&j, m);
1379 !upb_msg_field_done(&j);
1380 upb_msg_field_next(&j), i++) {
1381 upb_fielddef *f = upb_msg_iter_field(&j);
1382 UPB_ASSERT(f->msg.def == m);
1383 if (!upb_validate_field(f, s)) {
1384 upb_gfree(fields);
1385 return false;
1386 }
1387 if (upb_fielddef_issubmsg(f)) {
1388 m->submsg_field_count++;
1389 }
1390 fields[i] = f;
1391 }
1392
1393 qsort(fields, n, sizeof(*fields), cmp_fields);
1394
1395 selector = UPB_STATIC_SELECTOR_COUNT + m->submsg_field_count;
1396 for (i = 0; i < n; i++) {
1397 upb_fielddef *f = fields[i];
1398 f->index_ = i;
1399 f->selector_base = selector + upb_handlers_selectorbaseoffset(f);
1400 selector += upb_handlers_selectorcount(f);
1401 }
1402 m->selector_count = selector;
1403
1404 #ifndef NDEBUG
1405 {
1406 /* Verify that all selectors for the message are distinct. */
1407 #define TRY(type) \
1408 if (upb_handlers_getselector(f, type, &sel)) upb_inttable_insert(&t, sel, v);
1409
1410 upb_inttable t;
1411 upb_value v;
1412 upb_selector_t sel;
1413
1414 upb_inttable_init(&t, UPB_CTYPE_BOOL);
1415 v = upb_value_bool(true);
1416 upb_inttable_insert(&t, UPB_STARTMSG_SELECTOR, v);
1417 upb_inttable_insert(&t, UPB_ENDMSG_SELECTOR, v);
1418 upb_inttable_insert(&t, UPB_UNKNOWN_SELECTOR, v);
1419 for(upb_msg_field_begin(&j, m);
1420 !upb_msg_field_done(&j);
1421 upb_msg_field_next(&j)) {
1422 upb_fielddef *f = upb_msg_iter_field(&j);
1423 /* These calls will assert-fail in upb_table if the value already
1424 * exists. */
1425 TRY(UPB_HANDLER_INT32);
1426 TRY(UPB_HANDLER_INT64)
1427 TRY(UPB_HANDLER_UINT32)
1428 TRY(UPB_HANDLER_UINT64)
1429 TRY(UPB_HANDLER_FLOAT)
1430 TRY(UPB_HANDLER_DOUBLE)
1431 TRY(UPB_HANDLER_BOOL)
1432 TRY(UPB_HANDLER_STARTSTR)
1433 TRY(UPB_HANDLER_STRING)
1434 TRY(UPB_HANDLER_ENDSTR)
1435 TRY(UPB_HANDLER_STARTSUBMSG)
1436 TRY(UPB_HANDLER_ENDSUBMSG)
1437 TRY(UPB_HANDLER_STARTSEQ)
1438 TRY(UPB_HANDLER_ENDSEQ)
1439 }
1440 upb_inttable_uninit(&t);
1441 }
1442 #undef TRY
1443 #endif
1444
1445 for(upb_msg_oneof_begin(&k, m), i = 0;
1446 !upb_msg_oneof_done(&k);
1447 upb_msg_oneof_next(&k), i++) {
1448 upb_oneofdef *o = upb_msg_iter_oneof(&k);
1449 o->index = i;
1450 }
1451
1452 upb_gfree(fields);
1453 return true;
1454 }
1455
assign_msg_wellknowntype(upb_msgdef * m)1456 static void assign_msg_wellknowntype(upb_msgdef *m) {
1457 const char *name = upb_msgdef_fullname(m);
1458 if (name == NULL) {
1459 m->well_known_type = UPB_WELLKNOWN_UNSPECIFIED;
1460 return;
1461 }
1462 if (!strcmp(name, "google.protobuf.Any")) {
1463 m->well_known_type = UPB_WELLKNOWN_ANY;
1464 } else if (!strcmp(name, "google.protobuf.FieldMask")) {
1465 m->well_known_type = UPB_WELLKNOWN_FIELDMASK;
1466 } else if (!strcmp(name, "google.protobuf.Duration")) {
1467 m->well_known_type = UPB_WELLKNOWN_DURATION;
1468 } else if (!strcmp(name, "google.protobuf.Timestamp")) {
1469 m->well_known_type = UPB_WELLKNOWN_TIMESTAMP;
1470 } else if (!strcmp(name, "google.protobuf.DoubleValue")) {
1471 m->well_known_type = UPB_WELLKNOWN_DOUBLEVALUE;
1472 } else if (!strcmp(name, "google.protobuf.FloatValue")) {
1473 m->well_known_type = UPB_WELLKNOWN_FLOATVALUE;
1474 } else if (!strcmp(name, "google.protobuf.Int64Value")) {
1475 m->well_known_type = UPB_WELLKNOWN_INT64VALUE;
1476 } else if (!strcmp(name, "google.protobuf.UInt64Value")) {
1477 m->well_known_type = UPB_WELLKNOWN_UINT64VALUE;
1478 } else if (!strcmp(name, "google.protobuf.Int32Value")) {
1479 m->well_known_type = UPB_WELLKNOWN_INT32VALUE;
1480 } else if (!strcmp(name, "google.protobuf.UInt32Value")) {
1481 m->well_known_type = UPB_WELLKNOWN_UINT32VALUE;
1482 } else if (!strcmp(name, "google.protobuf.BoolValue")) {
1483 m->well_known_type = UPB_WELLKNOWN_BOOLVALUE;
1484 } else if (!strcmp(name, "google.protobuf.StringValue")) {
1485 m->well_known_type = UPB_WELLKNOWN_STRINGVALUE;
1486 } else if (!strcmp(name, "google.protobuf.BytesValue")) {
1487 m->well_known_type = UPB_WELLKNOWN_BYTESVALUE;
1488 } else if (!strcmp(name, "google.protobuf.Value")) {
1489 m->well_known_type = UPB_WELLKNOWN_VALUE;
1490 } else if (!strcmp(name, "google.protobuf.ListValue")) {
1491 m->well_known_type = UPB_WELLKNOWN_LISTVALUE;
1492 } else if (!strcmp(name, "google.protobuf.Struct")) {
1493 m->well_known_type = UPB_WELLKNOWN_STRUCT;
1494 } else {
1495 m->well_known_type = UPB_WELLKNOWN_UNSPECIFIED;
1496 }
1497 }
1498
_upb_def_validate(upb_def * const * defs,size_t n,upb_status * s)1499 bool _upb_def_validate(upb_def *const*defs, size_t n, upb_status *s) {
1500 size_t i;
1501
1502 /* First perform validation, in two passes so we can check that we have a
1503 * transitive closure without needing to search. */
1504 for (i = 0; i < n; i++) {
1505 upb_def *def = defs[i];
1506 if (upb_def_isfrozen(def)) {
1507 /* Could relax this requirement if it's annoying. */
1508 upb_status_seterrmsg(s, "def is already frozen");
1509 goto err;
1510 } else if (def->type == UPB_DEF_FIELD) {
1511 upb_status_seterrmsg(s, "standalone fielddefs can not be frozen");
1512 goto err;
1513 } else {
1514 /* Set now to detect transitive closure in the second pass. */
1515 def->came_from_user = true;
1516
1517 if (def->type == UPB_DEF_ENUM &&
1518 !upb_validate_enumdef(upb_dyncast_enumdef(def), s)) {
1519 goto err;
1520 }
1521 }
1522 }
1523
1524 /* Second pass of validation. Also assign selector bases and indexes, and
1525 * compact tables. */
1526 for (i = 0; i < n; i++) {
1527 upb_def *def = defs[i];
1528 upb_msgdef *m = upb_dyncast_msgdef_mutable(def);
1529 upb_enumdef *e = upb_dyncast_enumdef_mutable(def);
1530 if (m) {
1531 upb_inttable_compact(&m->itof);
1532 if (!assign_msg_indices(m, s)) {
1533 goto err;
1534 }
1535 assign_msg_wellknowntype(m);
1536 /* m->well_known_type = UPB_WELLKNOWN_UNSPECIFIED; */
1537 } else if (e) {
1538 upb_inttable_compact(&e->iton);
1539 }
1540 }
1541
1542 return true;
1543
1544 err:
1545 for (i = 0; i < n; i++) {
1546 upb_def *def = defs[i];
1547 def->came_from_user = false;
1548 }
1549 UPB_ASSERT(!(s && upb_ok(s)));
1550 return false;
1551 }
1552
upb_def_freeze(upb_def * const * defs,size_t n,upb_status * s)1553 bool upb_def_freeze(upb_def *const* defs, size_t n, upb_status *s) {
1554 /* Def graph contains FieldDefs between each MessageDef, so double the
1555 * limit. */
1556 const size_t maxdepth = UPB_MAX_MESSAGE_DEPTH * 2;
1557
1558 if (!_upb_def_validate(defs, n, s)) {
1559 return false;
1560 }
1561
1562
1563 /* Validation all passed; freeze the objects. */
1564 return upb_refcounted_freeze((upb_refcounted *const*)defs, n, s, maxdepth);
1565 }
1566
1567
1568 /* upb_enumdef ****************************************************************/
1569
visitenum(const upb_refcounted * r,upb_refcounted_visit * visit,void * closure)1570 static void visitenum(const upb_refcounted *r, upb_refcounted_visit *visit,
1571 void *closure) {
1572 const upb_enumdef *e = (const upb_enumdef*)r;
1573 const upb_def *def = upb_enumdef_upcast(e);
1574 if (upb_def_file(def)) {
1575 visit(r, upb_filedef_upcast(upb_def_file(def)), closure);
1576 }
1577 }
1578
freeenum(upb_refcounted * r)1579 static void freeenum(upb_refcounted *r) {
1580 upb_enumdef *e = (upb_enumdef*)r;
1581 upb_inttable_iter i;
1582 upb_inttable_begin(&i, &e->iton);
1583 for( ; !upb_inttable_done(&i); upb_inttable_next(&i)) {
1584 /* To clean up the upb_gstrdup() from upb_enumdef_addval(). */
1585 upb_gfree(upb_value_getcstr(upb_inttable_iter_value(&i)));
1586 }
1587 upb_strtable_uninit(&e->ntoi);
1588 upb_inttable_uninit(&e->iton);
1589 upb_def_uninit(upb_enumdef_upcast_mutable(e));
1590 upb_gfree(e);
1591 }
1592
1593 const struct upb_refcounted_vtbl upb_enumdef_vtbl = {&visitenum, &freeenum};
1594
upb_enumdef_new(const void * owner)1595 upb_enumdef *upb_enumdef_new(const void *owner) {
1596 upb_enumdef *e = upb_gmalloc(sizeof(*e));
1597 if (!e) return NULL;
1598
1599 if (!upb_def_init(upb_enumdef_upcast_mutable(e), UPB_DEF_ENUM,
1600 &upb_enumdef_vtbl, owner)) {
1601 goto err2;
1602 }
1603
1604 if (!upb_strtable_init(&e->ntoi, UPB_CTYPE_INT32)) goto err2;
1605 if (!upb_inttable_init(&e->iton, UPB_CTYPE_CSTR)) goto err1;
1606 return e;
1607
1608 err1:
1609 upb_strtable_uninit(&e->ntoi);
1610 err2:
1611 upb_gfree(e);
1612 return NULL;
1613 }
1614
upb_enumdef_freeze(upb_enumdef * e,upb_status * status)1615 bool upb_enumdef_freeze(upb_enumdef *e, upb_status *status) {
1616 upb_def *d = upb_enumdef_upcast_mutable(e);
1617 return upb_def_freeze(&d, 1, status);
1618 }
1619
upb_enumdef_fullname(const upb_enumdef * e)1620 const char *upb_enumdef_fullname(const upb_enumdef *e) {
1621 return upb_def_fullname(upb_enumdef_upcast(e));
1622 }
1623
upb_enumdef_name(const upb_enumdef * e)1624 const char *upb_enumdef_name(const upb_enumdef *e) {
1625 return upb_def_name(upb_enumdef_upcast(e));
1626 }
1627
upb_enumdef_setfullname(upb_enumdef * e,const char * fullname,upb_status * s)1628 bool upb_enumdef_setfullname(upb_enumdef *e, const char *fullname,
1629 upb_status *s) {
1630 return upb_def_setfullname(upb_enumdef_upcast_mutable(e), fullname, s);
1631 }
1632
upb_enumdef_addval(upb_enumdef * e,const char * name,int32_t num,upb_status * status)1633 bool upb_enumdef_addval(upb_enumdef *e, const char *name, int32_t num,
1634 upb_status *status) {
1635 char *name2;
1636
1637 if (!upb_isident(name, strlen(name), false, status)) {
1638 return false;
1639 }
1640
1641 if (upb_enumdef_ntoiz(e, name, NULL)) {
1642 upb_status_seterrf(status, "name '%s' is already defined", name);
1643 return false;
1644 }
1645
1646 if (!upb_strtable_insert(&e->ntoi, name, upb_value_int32(num))) {
1647 upb_status_seterrmsg(status, "out of memory");
1648 return false;
1649 }
1650
1651 if (!upb_inttable_lookup(&e->iton, num, NULL)) {
1652 name2 = upb_gstrdup(name);
1653 if (!name2 || !upb_inttable_insert(&e->iton, num, upb_value_cstr(name2))) {
1654 upb_status_seterrmsg(status, "out of memory");
1655 upb_strtable_remove(&e->ntoi, name, NULL);
1656 return false;
1657 }
1658 }
1659
1660 if (upb_enumdef_numvals(e) == 1) {
1661 bool ok = upb_enumdef_setdefault(e, num, NULL);
1662 UPB_ASSERT(ok);
1663 }
1664
1665 return true;
1666 }
1667
upb_enumdef_default(const upb_enumdef * e)1668 int32_t upb_enumdef_default(const upb_enumdef *e) {
1669 UPB_ASSERT(upb_enumdef_iton(e, e->defaultval));
1670 return e->defaultval;
1671 }
1672
upb_enumdef_setdefault(upb_enumdef * e,int32_t val,upb_status * s)1673 bool upb_enumdef_setdefault(upb_enumdef *e, int32_t val, upb_status *s) {
1674 UPB_ASSERT(!upb_enumdef_isfrozen(e));
1675 if (!upb_enumdef_iton(e, val)) {
1676 upb_status_seterrf(s, "number '%d' is not in the enum.", val);
1677 return false;
1678 }
1679 e->defaultval = val;
1680 return true;
1681 }
1682
upb_enumdef_numvals(const upb_enumdef * e)1683 int upb_enumdef_numvals(const upb_enumdef *e) {
1684 return upb_strtable_count(&e->ntoi);
1685 }
1686
upb_enum_begin(upb_enum_iter * i,const upb_enumdef * e)1687 void upb_enum_begin(upb_enum_iter *i, const upb_enumdef *e) {
1688 /* We iterate over the ntoi table, to account for duplicate numbers. */
1689 upb_strtable_begin(i, &e->ntoi);
1690 }
1691
upb_enum_next(upb_enum_iter * iter)1692 void upb_enum_next(upb_enum_iter *iter) { upb_strtable_next(iter); }
upb_enum_done(upb_enum_iter * iter)1693 bool upb_enum_done(upb_enum_iter *iter) { return upb_strtable_done(iter); }
1694
upb_enumdef_ntoi(const upb_enumdef * def,const char * name,size_t len,int32_t * num)1695 bool upb_enumdef_ntoi(const upb_enumdef *def, const char *name,
1696 size_t len, int32_t *num) {
1697 upb_value v;
1698 if (!upb_strtable_lookup2(&def->ntoi, name, len, &v)) {
1699 return false;
1700 }
1701 if (num) *num = upb_value_getint32(v);
1702 return true;
1703 }
1704
upb_enumdef_iton(const upb_enumdef * def,int32_t num)1705 const char *upb_enumdef_iton(const upb_enumdef *def, int32_t num) {
1706 upb_value v;
1707 return upb_inttable_lookup32(&def->iton, num, &v) ?
1708 upb_value_getcstr(v) : NULL;
1709 }
1710
upb_enum_iter_name(upb_enum_iter * iter)1711 const char *upb_enum_iter_name(upb_enum_iter *iter) {
1712 return upb_strtable_iter_key(iter);
1713 }
1714
upb_enum_iter_number(upb_enum_iter * iter)1715 int32_t upb_enum_iter_number(upb_enum_iter *iter) {
1716 return upb_value_getint32(upb_strtable_iter_value(iter));
1717 }
1718
1719
1720 /* upb_fielddef ***************************************************************/
1721
1722 static void upb_fielddef_init_default(upb_fielddef *f);
1723
upb_fielddef_uninit_default(upb_fielddef * f)1724 static void upb_fielddef_uninit_default(upb_fielddef *f) {
1725 if (f->type_is_set_ && f->default_is_string && f->defaultval.bytes)
1726 freestr(f->defaultval.bytes);
1727 }
1728
upb_fielddef_fullname(const upb_fielddef * e)1729 const char *upb_fielddef_fullname(const upb_fielddef *e) {
1730 return upb_def_fullname(upb_fielddef_upcast(e));
1731 }
1732
visitfield(const upb_refcounted * r,upb_refcounted_visit * visit,void * closure)1733 static void visitfield(const upb_refcounted *r, upb_refcounted_visit *visit,
1734 void *closure) {
1735 const upb_fielddef *f = (const upb_fielddef*)r;
1736 const upb_def *def = upb_fielddef_upcast(f);
1737 if (upb_fielddef_containingtype(f)) {
1738 visit(r, upb_msgdef_upcast2(upb_fielddef_containingtype(f)), closure);
1739 }
1740 if (upb_fielddef_containingoneof(f)) {
1741 visit(r, upb_oneofdef_upcast(upb_fielddef_containingoneof(f)), closure);
1742 }
1743 if (upb_fielddef_subdef(f)) {
1744 visit(r, upb_def_upcast(upb_fielddef_subdef(f)), closure);
1745 }
1746 if (upb_def_file(def)) {
1747 visit(r, upb_filedef_upcast(upb_def_file(def)), closure);
1748 }
1749 }
1750
freefield(upb_refcounted * r)1751 static void freefield(upb_refcounted *r) {
1752 upb_fielddef *f = (upb_fielddef*)r;
1753 upb_fielddef_uninit_default(f);
1754 if (f->subdef_is_symbolic)
1755 upb_gfree(f->sub.name);
1756 if (f->msg_is_symbolic)
1757 upb_gfree(f->msg.name);
1758 upb_def_uninit(upb_fielddef_upcast_mutable(f));
1759 upb_gfree(f);
1760 }
1761
enumdefaultstr(const upb_fielddef * f)1762 static const char *enumdefaultstr(const upb_fielddef *f) {
1763 const upb_enumdef *e;
1764 UPB_ASSERT(f->type_is_set_ && f->type_ == UPB_TYPE_ENUM);
1765 e = upb_fielddef_enumsubdef(f);
1766 if (f->default_is_string && f->defaultval.bytes) {
1767 /* Default was explicitly set as a string. */
1768 str_t *s = f->defaultval.bytes;
1769 return s->str;
1770 } else if (e) {
1771 if (!f->default_is_string) {
1772 /* Default was explicitly set as an integer; look it up in enumdef. */
1773 const char *name = upb_enumdef_iton(e, f->defaultval.sint);
1774 if (name) {
1775 return name;
1776 }
1777 } else {
1778 /* Default is completely unset; pull enumdef default. */
1779 if (upb_enumdef_numvals(e) > 0) {
1780 const char *name = upb_enumdef_iton(e, upb_enumdef_default(e));
1781 UPB_ASSERT(name);
1782 return name;
1783 }
1784 }
1785 }
1786 return NULL;
1787 }
1788
enumdefaultint32(const upb_fielddef * f,int32_t * val)1789 static bool enumdefaultint32(const upb_fielddef *f, int32_t *val) {
1790 const upb_enumdef *e;
1791 UPB_ASSERT(f->type_is_set_ && f->type_ == UPB_TYPE_ENUM);
1792 e = upb_fielddef_enumsubdef(f);
1793 if (!f->default_is_string) {
1794 /* Default was explicitly set as an integer. */
1795 *val = f->defaultval.sint;
1796 return true;
1797 } else if (e) {
1798 if (f->defaultval.bytes) {
1799 /* Default was explicitly set as a str; try to lookup corresponding int. */
1800 str_t *s = f->defaultval.bytes;
1801 if (upb_enumdef_ntoiz(e, s->str, val)) {
1802 return true;
1803 }
1804 } else {
1805 /* Default is unset; try to pull in enumdef default. */
1806 if (upb_enumdef_numvals(e) > 0) {
1807 *val = upb_enumdef_default(e);
1808 return true;
1809 }
1810 }
1811 }
1812 return false;
1813 }
1814
1815 const struct upb_refcounted_vtbl upb_fielddef_vtbl = {visitfield, freefield};
1816
upb_fielddef_new(const void * o)1817 upb_fielddef *upb_fielddef_new(const void *o) {
1818 upb_fielddef *f = upb_gmalloc(sizeof(*f));
1819 if (!f) return NULL;
1820 if (!upb_def_init(upb_fielddef_upcast_mutable(f), UPB_DEF_FIELD,
1821 &upb_fielddef_vtbl, o)) {
1822 upb_gfree(f);
1823 return NULL;
1824 }
1825 f->msg.def = NULL;
1826 f->sub.def = NULL;
1827 f->oneof = NULL;
1828 f->subdef_is_symbolic = false;
1829 f->msg_is_symbolic = false;
1830 f->label_ = UPB_LABEL_OPTIONAL;
1831 f->type_ = UPB_TYPE_INT32;
1832 f->number_ = 0;
1833 f->type_is_set_ = false;
1834 f->tagdelim = false;
1835 f->is_extension_ = false;
1836 f->lazy_ = false;
1837 f->packed_ = true;
1838
1839 /* For the moment we default this to UPB_INTFMT_VARIABLE, since it will work
1840 * with all integer types and is in some since more "default" since the most
1841 * normal-looking proto2 types int32/int64/uint32/uint64 use variable.
1842 *
1843 * Other options to consider:
1844 * - there is no default; users must set this manually (like type).
1845 * - default signed integers to UPB_INTFMT_ZIGZAG, since it's more likely to
1846 * be an optimal default for signed integers. */
1847 f->intfmt = UPB_INTFMT_VARIABLE;
1848 return f;
1849 }
1850
upb_fielddef_typeisset(const upb_fielddef * f)1851 bool upb_fielddef_typeisset(const upb_fielddef *f) {
1852 return f->type_is_set_;
1853 }
1854
upb_fielddef_type(const upb_fielddef * f)1855 upb_fieldtype_t upb_fielddef_type(const upb_fielddef *f) {
1856 UPB_ASSERT(f->type_is_set_);
1857 return f->type_;
1858 }
1859
upb_fielddef_index(const upb_fielddef * f)1860 uint32_t upb_fielddef_index(const upb_fielddef *f) {
1861 return f->index_;
1862 }
1863
upb_fielddef_label(const upb_fielddef * f)1864 upb_label_t upb_fielddef_label(const upb_fielddef *f) {
1865 return f->label_;
1866 }
1867
upb_fielddef_intfmt(const upb_fielddef * f)1868 upb_intfmt_t upb_fielddef_intfmt(const upb_fielddef *f) {
1869 return f->intfmt;
1870 }
1871
upb_fielddef_istagdelim(const upb_fielddef * f)1872 bool upb_fielddef_istagdelim(const upb_fielddef *f) {
1873 return f->tagdelim;
1874 }
1875
upb_fielddef_number(const upb_fielddef * f)1876 uint32_t upb_fielddef_number(const upb_fielddef *f) {
1877 return f->number_;
1878 }
1879
upb_fielddef_isextension(const upb_fielddef * f)1880 bool upb_fielddef_isextension(const upb_fielddef *f) {
1881 return f->is_extension_;
1882 }
1883
upb_fielddef_lazy(const upb_fielddef * f)1884 bool upb_fielddef_lazy(const upb_fielddef *f) {
1885 return f->lazy_;
1886 }
1887
upb_fielddef_packed(const upb_fielddef * f)1888 bool upb_fielddef_packed(const upb_fielddef *f) {
1889 return f->packed_;
1890 }
1891
upb_fielddef_name(const upb_fielddef * f)1892 const char *upb_fielddef_name(const upb_fielddef *f) {
1893 return upb_def_fullname(upb_fielddef_upcast(f));
1894 }
1895
upb_fielddef_getjsonname(const upb_fielddef * f,char * buf,size_t len)1896 size_t upb_fielddef_getjsonname(const upb_fielddef *f, char *buf, size_t len) {
1897 const char *name = upb_fielddef_name(f);
1898 size_t src, dst = 0;
1899 bool ucase_next = false;
1900
1901 #define WRITE(byte) \
1902 ++dst; \
1903 if (dst < len) buf[dst - 1] = byte; \
1904 else if (dst == len) buf[dst - 1] = '\0'
1905
1906 if (!name) {
1907 WRITE('\0');
1908 return 0;
1909 }
1910
1911 /* Implement the transformation as described in the spec:
1912 * 1. upper case all letters after an underscore.
1913 * 2. remove all underscores.
1914 */
1915 for (src = 0; name[src]; src++) {
1916 if (name[src] == '_') {
1917 ucase_next = true;
1918 continue;
1919 }
1920
1921 if (ucase_next) {
1922 WRITE(toupper(name[src]));
1923 ucase_next = false;
1924 } else {
1925 WRITE(name[src]);
1926 }
1927 }
1928
1929 WRITE('\0');
1930 return dst;
1931
1932 #undef WRITE
1933 }
1934
upb_fielddef_containingtype(const upb_fielddef * f)1935 const upb_msgdef *upb_fielddef_containingtype(const upb_fielddef *f) {
1936 return f->msg_is_symbolic ? NULL : f->msg.def;
1937 }
1938
upb_fielddef_containingoneof(const upb_fielddef * f)1939 const upb_oneofdef *upb_fielddef_containingoneof(const upb_fielddef *f) {
1940 return f->oneof;
1941 }
1942
upb_fielddef_containingtype_mutable(upb_fielddef * f)1943 upb_msgdef *upb_fielddef_containingtype_mutable(upb_fielddef *f) {
1944 return (upb_msgdef*)upb_fielddef_containingtype(f);
1945 }
1946
upb_fielddef_containingtypename(upb_fielddef * f)1947 const char *upb_fielddef_containingtypename(upb_fielddef *f) {
1948 return f->msg_is_symbolic ? f->msg.name : NULL;
1949 }
1950
release_containingtype(upb_fielddef * f)1951 static void release_containingtype(upb_fielddef *f) {
1952 if (f->msg_is_symbolic) upb_gfree(f->msg.name);
1953 }
1954
upb_fielddef_setcontainingtypename(upb_fielddef * f,const char * name,upb_status * s)1955 bool upb_fielddef_setcontainingtypename(upb_fielddef *f, const char *name,
1956 upb_status *s) {
1957 char *name_copy;
1958 UPB_ASSERT(!upb_fielddef_isfrozen(f));
1959 if (upb_fielddef_containingtype(f)) {
1960 upb_status_seterrmsg(s, "field has already been added to a message.");
1961 return false;
1962 }
1963 /* TODO: validate name (upb_isident() doesn't quite work atm because this name
1964 * may have a leading "."). */
1965
1966 name_copy = upb_gstrdup(name);
1967 if (!name_copy) {
1968 upb_upberr_setoom(s);
1969 return false;
1970 }
1971
1972 release_containingtype(f);
1973 f->msg.name = name_copy;
1974 f->msg_is_symbolic = true;
1975 return true;
1976 }
1977
upb_fielddef_setname(upb_fielddef * f,const char * name,upb_status * s)1978 bool upb_fielddef_setname(upb_fielddef *f, const char *name, upb_status *s) {
1979 if (upb_fielddef_containingtype(f) || upb_fielddef_containingoneof(f)) {
1980 upb_status_seterrmsg(s, "Already added to message or oneof");
1981 return false;
1982 }
1983 return upb_def_setfullname(upb_fielddef_upcast_mutable(f), name, s);
1984 }
1985
chkdefaulttype(const upb_fielddef * f,upb_fieldtype_t type)1986 static void chkdefaulttype(const upb_fielddef *f, upb_fieldtype_t type) {
1987 UPB_UNUSED(f);
1988 UPB_UNUSED(type);
1989 UPB_ASSERT(f->type_is_set_ && upb_fielddef_type(f) == type);
1990 }
1991
upb_fielddef_defaultint64(const upb_fielddef * f)1992 int64_t upb_fielddef_defaultint64(const upb_fielddef *f) {
1993 chkdefaulttype(f, UPB_TYPE_INT64);
1994 return f->defaultval.sint;
1995 }
1996
upb_fielddef_defaultint32(const upb_fielddef * f)1997 int32_t upb_fielddef_defaultint32(const upb_fielddef *f) {
1998 if (f->type_is_set_ && upb_fielddef_type(f) == UPB_TYPE_ENUM) {
1999 int32_t val;
2000 bool ok = enumdefaultint32(f, &val);
2001 UPB_ASSERT(ok);
2002 return val;
2003 } else {
2004 chkdefaulttype(f, UPB_TYPE_INT32);
2005 return f->defaultval.sint;
2006 }
2007 }
2008
upb_fielddef_defaultuint64(const upb_fielddef * f)2009 uint64_t upb_fielddef_defaultuint64(const upb_fielddef *f) {
2010 chkdefaulttype(f, UPB_TYPE_UINT64);
2011 return f->defaultval.uint;
2012 }
2013
upb_fielddef_defaultuint32(const upb_fielddef * f)2014 uint32_t upb_fielddef_defaultuint32(const upb_fielddef *f) {
2015 chkdefaulttype(f, UPB_TYPE_UINT32);
2016 return f->defaultval.uint;
2017 }
2018
upb_fielddef_defaultbool(const upb_fielddef * f)2019 bool upb_fielddef_defaultbool(const upb_fielddef *f) {
2020 chkdefaulttype(f, UPB_TYPE_BOOL);
2021 return f->defaultval.uint;
2022 }
2023
upb_fielddef_defaultfloat(const upb_fielddef * f)2024 float upb_fielddef_defaultfloat(const upb_fielddef *f) {
2025 chkdefaulttype(f, UPB_TYPE_FLOAT);
2026 return f->defaultval.flt;
2027 }
2028
upb_fielddef_defaultdouble(const upb_fielddef * f)2029 double upb_fielddef_defaultdouble(const upb_fielddef *f) {
2030 chkdefaulttype(f, UPB_TYPE_DOUBLE);
2031 return f->defaultval.dbl;
2032 }
2033
upb_fielddef_defaultstr(const upb_fielddef * f,size_t * len)2034 const char *upb_fielddef_defaultstr(const upb_fielddef *f, size_t *len) {
2035 UPB_ASSERT(f->type_is_set_);
2036 UPB_ASSERT(upb_fielddef_type(f) == UPB_TYPE_STRING ||
2037 upb_fielddef_type(f) == UPB_TYPE_BYTES ||
2038 upb_fielddef_type(f) == UPB_TYPE_ENUM);
2039
2040 if (upb_fielddef_type(f) == UPB_TYPE_ENUM) {
2041 const char *ret = enumdefaultstr(f);
2042 UPB_ASSERT(ret);
2043 /* Enum defaults can't have embedded NULLs. */
2044 if (len) *len = strlen(ret);
2045 return ret;
2046 }
2047
2048 if (f->default_is_string) {
2049 str_t *str = f->defaultval.bytes;
2050 if (len) *len = str->len;
2051 return str->str;
2052 }
2053
2054 return NULL;
2055 }
2056
upb_fielddef_init_default(upb_fielddef * f)2057 static void upb_fielddef_init_default(upb_fielddef *f) {
2058 f->default_is_string = false;
2059 switch (upb_fielddef_type(f)) {
2060 case UPB_TYPE_DOUBLE: f->defaultval.dbl = 0; break;
2061 case UPB_TYPE_FLOAT: f->defaultval.flt = 0; break;
2062 case UPB_TYPE_INT32:
2063 case UPB_TYPE_INT64: f->defaultval.sint = 0; break;
2064 case UPB_TYPE_UINT64:
2065 case UPB_TYPE_UINT32:
2066 case UPB_TYPE_BOOL: f->defaultval.uint = 0; break;
2067 case UPB_TYPE_STRING:
2068 case UPB_TYPE_BYTES:
2069 f->defaultval.bytes = newstr("", 0);
2070 f->default_is_string = true;
2071 break;
2072 case UPB_TYPE_MESSAGE: break;
2073 case UPB_TYPE_ENUM:
2074 /* This is our special sentinel that indicates "not set" for an enum. */
2075 f->default_is_string = true;
2076 f->defaultval.bytes = NULL;
2077 break;
2078 }
2079 }
2080
upb_fielddef_subdef(const upb_fielddef * f)2081 const upb_def *upb_fielddef_subdef(const upb_fielddef *f) {
2082 return f->subdef_is_symbolic ? NULL : f->sub.def;
2083 }
2084
upb_fielddef_msgsubdef(const upb_fielddef * f)2085 const upb_msgdef *upb_fielddef_msgsubdef(const upb_fielddef *f) {
2086 const upb_def *def = upb_fielddef_subdef(f);
2087 return def ? upb_dyncast_msgdef(def) : NULL;
2088 }
2089
upb_fielddef_enumsubdef(const upb_fielddef * f)2090 const upb_enumdef *upb_fielddef_enumsubdef(const upb_fielddef *f) {
2091 const upb_def *def = upb_fielddef_subdef(f);
2092 return def ? upb_dyncast_enumdef(def) : NULL;
2093 }
2094
upb_fielddef_subdef_mutable(upb_fielddef * f)2095 upb_def *upb_fielddef_subdef_mutable(upb_fielddef *f) {
2096 return (upb_def*)upb_fielddef_subdef(f);
2097 }
2098
upb_fielddef_subdefname(const upb_fielddef * f)2099 const char *upb_fielddef_subdefname(const upb_fielddef *f) {
2100 if (f->subdef_is_symbolic) {
2101 return f->sub.name;
2102 } else if (f->sub.def) {
2103 return upb_def_fullname(f->sub.def);
2104 } else {
2105 return NULL;
2106 }
2107 }
2108
upb_fielddef_setnumber(upb_fielddef * f,uint32_t number,upb_status * s)2109 bool upb_fielddef_setnumber(upb_fielddef *f, uint32_t number, upb_status *s) {
2110 if (upb_fielddef_containingtype(f)) {
2111 upb_status_seterrmsg(
2112 s, "cannot change field number after adding to a message");
2113 return false;
2114 }
2115 if (number == 0) {
2116 upb_status_seterrf(s, "invalid field number (%u)", number);
2117 return false;
2118 }
2119 f->number_ = number;
2120 return true;
2121 }
2122
upb_fielddef_settype(upb_fielddef * f,upb_fieldtype_t type)2123 void upb_fielddef_settype(upb_fielddef *f, upb_fieldtype_t type) {
2124 UPB_ASSERT(!upb_fielddef_isfrozen(f));
2125 UPB_ASSERT(upb_fielddef_checktype(type));
2126 upb_fielddef_uninit_default(f);
2127 f->type_ = type;
2128 f->type_is_set_ = true;
2129 upb_fielddef_init_default(f);
2130 }
2131
upb_fielddef_setdescriptortype(upb_fielddef * f,int type)2132 void upb_fielddef_setdescriptortype(upb_fielddef *f, int type) {
2133 UPB_ASSERT(!upb_fielddef_isfrozen(f));
2134 switch (type) {
2135 case UPB_DESCRIPTOR_TYPE_DOUBLE:
2136 upb_fielddef_settype(f, UPB_TYPE_DOUBLE);
2137 break;
2138 case UPB_DESCRIPTOR_TYPE_FLOAT:
2139 upb_fielddef_settype(f, UPB_TYPE_FLOAT);
2140 break;
2141 case UPB_DESCRIPTOR_TYPE_INT64:
2142 case UPB_DESCRIPTOR_TYPE_SFIXED64:
2143 case UPB_DESCRIPTOR_TYPE_SINT64:
2144 upb_fielddef_settype(f, UPB_TYPE_INT64);
2145 break;
2146 case UPB_DESCRIPTOR_TYPE_UINT64:
2147 case UPB_DESCRIPTOR_TYPE_FIXED64:
2148 upb_fielddef_settype(f, UPB_TYPE_UINT64);
2149 break;
2150 case UPB_DESCRIPTOR_TYPE_INT32:
2151 case UPB_DESCRIPTOR_TYPE_SFIXED32:
2152 case UPB_DESCRIPTOR_TYPE_SINT32:
2153 upb_fielddef_settype(f, UPB_TYPE_INT32);
2154 break;
2155 case UPB_DESCRIPTOR_TYPE_UINT32:
2156 case UPB_DESCRIPTOR_TYPE_FIXED32:
2157 upb_fielddef_settype(f, UPB_TYPE_UINT32);
2158 break;
2159 case UPB_DESCRIPTOR_TYPE_BOOL:
2160 upb_fielddef_settype(f, UPB_TYPE_BOOL);
2161 break;
2162 case UPB_DESCRIPTOR_TYPE_STRING:
2163 upb_fielddef_settype(f, UPB_TYPE_STRING);
2164 break;
2165 case UPB_DESCRIPTOR_TYPE_BYTES:
2166 upb_fielddef_settype(f, UPB_TYPE_BYTES);
2167 break;
2168 case UPB_DESCRIPTOR_TYPE_GROUP:
2169 case UPB_DESCRIPTOR_TYPE_MESSAGE:
2170 upb_fielddef_settype(f, UPB_TYPE_MESSAGE);
2171 break;
2172 case UPB_DESCRIPTOR_TYPE_ENUM:
2173 upb_fielddef_settype(f, UPB_TYPE_ENUM);
2174 break;
2175 default: UPB_ASSERT(false);
2176 }
2177
2178 if (type == UPB_DESCRIPTOR_TYPE_FIXED64 ||
2179 type == UPB_DESCRIPTOR_TYPE_FIXED32 ||
2180 type == UPB_DESCRIPTOR_TYPE_SFIXED64 ||
2181 type == UPB_DESCRIPTOR_TYPE_SFIXED32) {
2182 upb_fielddef_setintfmt(f, UPB_INTFMT_FIXED);
2183 } else if (type == UPB_DESCRIPTOR_TYPE_SINT64 ||
2184 type == UPB_DESCRIPTOR_TYPE_SINT32) {
2185 upb_fielddef_setintfmt(f, UPB_INTFMT_ZIGZAG);
2186 } else {
2187 upb_fielddef_setintfmt(f, UPB_INTFMT_VARIABLE);
2188 }
2189
2190 upb_fielddef_settagdelim(f, type == UPB_DESCRIPTOR_TYPE_GROUP);
2191 }
2192
upb_fielddef_descriptortype(const upb_fielddef * f)2193 upb_descriptortype_t upb_fielddef_descriptortype(const upb_fielddef *f) {
2194 switch (upb_fielddef_type(f)) {
2195 case UPB_TYPE_FLOAT: return UPB_DESCRIPTOR_TYPE_FLOAT;
2196 case UPB_TYPE_DOUBLE: return UPB_DESCRIPTOR_TYPE_DOUBLE;
2197 case UPB_TYPE_BOOL: return UPB_DESCRIPTOR_TYPE_BOOL;
2198 case UPB_TYPE_STRING: return UPB_DESCRIPTOR_TYPE_STRING;
2199 case UPB_TYPE_BYTES: return UPB_DESCRIPTOR_TYPE_BYTES;
2200 case UPB_TYPE_ENUM: return UPB_DESCRIPTOR_TYPE_ENUM;
2201 case UPB_TYPE_INT32:
2202 switch (upb_fielddef_intfmt(f)) {
2203 case UPB_INTFMT_VARIABLE: return UPB_DESCRIPTOR_TYPE_INT32;
2204 case UPB_INTFMT_FIXED: return UPB_DESCRIPTOR_TYPE_SFIXED32;
2205 case UPB_INTFMT_ZIGZAG: return UPB_DESCRIPTOR_TYPE_SINT32;
2206 }
2207 case UPB_TYPE_INT64:
2208 switch (upb_fielddef_intfmt(f)) {
2209 case UPB_INTFMT_VARIABLE: return UPB_DESCRIPTOR_TYPE_INT64;
2210 case UPB_INTFMT_FIXED: return UPB_DESCRIPTOR_TYPE_SFIXED64;
2211 case UPB_INTFMT_ZIGZAG: return UPB_DESCRIPTOR_TYPE_SINT64;
2212 }
2213 case UPB_TYPE_UINT32:
2214 switch (upb_fielddef_intfmt(f)) {
2215 case UPB_INTFMT_VARIABLE: return UPB_DESCRIPTOR_TYPE_UINT32;
2216 case UPB_INTFMT_FIXED: return UPB_DESCRIPTOR_TYPE_FIXED32;
2217 case UPB_INTFMT_ZIGZAG: return -1;
2218 }
2219 case UPB_TYPE_UINT64:
2220 switch (upb_fielddef_intfmt(f)) {
2221 case UPB_INTFMT_VARIABLE: return UPB_DESCRIPTOR_TYPE_UINT64;
2222 case UPB_INTFMT_FIXED: return UPB_DESCRIPTOR_TYPE_FIXED64;
2223 case UPB_INTFMT_ZIGZAG: return -1;
2224 }
2225 case UPB_TYPE_MESSAGE:
2226 return upb_fielddef_istagdelim(f) ?
2227 UPB_DESCRIPTOR_TYPE_GROUP : UPB_DESCRIPTOR_TYPE_MESSAGE;
2228 }
2229 return 0;
2230 }
2231
upb_fielddef_setisextension(upb_fielddef * f,bool is_extension)2232 void upb_fielddef_setisextension(upb_fielddef *f, bool is_extension) {
2233 UPB_ASSERT(!upb_fielddef_isfrozen(f));
2234 f->is_extension_ = is_extension;
2235 }
2236
upb_fielddef_setlazy(upb_fielddef * f,bool lazy)2237 void upb_fielddef_setlazy(upb_fielddef *f, bool lazy) {
2238 UPB_ASSERT(!upb_fielddef_isfrozen(f));
2239 f->lazy_ = lazy;
2240 }
2241
upb_fielddef_setpacked(upb_fielddef * f,bool packed)2242 void upb_fielddef_setpacked(upb_fielddef *f, bool packed) {
2243 UPB_ASSERT(!upb_fielddef_isfrozen(f));
2244 f->packed_ = packed;
2245 }
2246
upb_fielddef_setlabel(upb_fielddef * f,upb_label_t label)2247 void upb_fielddef_setlabel(upb_fielddef *f, upb_label_t label) {
2248 UPB_ASSERT(!upb_fielddef_isfrozen(f));
2249 UPB_ASSERT(upb_fielddef_checklabel(label));
2250 f->label_ = label;
2251 }
2252
upb_fielddef_setintfmt(upb_fielddef * f,upb_intfmt_t fmt)2253 void upb_fielddef_setintfmt(upb_fielddef *f, upb_intfmt_t fmt) {
2254 UPB_ASSERT(!upb_fielddef_isfrozen(f));
2255 UPB_ASSERT(upb_fielddef_checkintfmt(fmt));
2256 f->intfmt = fmt;
2257 }
2258
upb_fielddef_settagdelim(upb_fielddef * f,bool tag_delim)2259 void upb_fielddef_settagdelim(upb_fielddef *f, bool tag_delim) {
2260 UPB_ASSERT(!upb_fielddef_isfrozen(f));
2261 f->tagdelim = tag_delim;
2262 f->tagdelim = tag_delim;
2263 }
2264
checksetdefault(upb_fielddef * f,upb_fieldtype_t type)2265 static bool checksetdefault(upb_fielddef *f, upb_fieldtype_t type) {
2266 if (!f->type_is_set_ || upb_fielddef_isfrozen(f) ||
2267 upb_fielddef_type(f) != type) {
2268 UPB_ASSERT(false);
2269 return false;
2270 }
2271 if (f->default_is_string) {
2272 str_t *s = f->defaultval.bytes;
2273 UPB_ASSERT(s || type == UPB_TYPE_ENUM);
2274 if (s) freestr(s);
2275 }
2276 f->default_is_string = false;
2277 return true;
2278 }
2279
upb_fielddef_setdefaultint64(upb_fielddef * f,int64_t value)2280 void upb_fielddef_setdefaultint64(upb_fielddef *f, int64_t value) {
2281 if (checksetdefault(f, UPB_TYPE_INT64))
2282 f->defaultval.sint = value;
2283 }
2284
upb_fielddef_setdefaultint32(upb_fielddef * f,int32_t value)2285 void upb_fielddef_setdefaultint32(upb_fielddef *f, int32_t value) {
2286 if ((upb_fielddef_type(f) == UPB_TYPE_ENUM &&
2287 checksetdefault(f, UPB_TYPE_ENUM)) ||
2288 checksetdefault(f, UPB_TYPE_INT32)) {
2289 f->defaultval.sint = value;
2290 }
2291 }
2292
upb_fielddef_setdefaultuint64(upb_fielddef * f,uint64_t value)2293 void upb_fielddef_setdefaultuint64(upb_fielddef *f, uint64_t value) {
2294 if (checksetdefault(f, UPB_TYPE_UINT64))
2295 f->defaultval.uint = value;
2296 }
2297
upb_fielddef_setdefaultuint32(upb_fielddef * f,uint32_t value)2298 void upb_fielddef_setdefaultuint32(upb_fielddef *f, uint32_t value) {
2299 if (checksetdefault(f, UPB_TYPE_UINT32))
2300 f->defaultval.uint = value;
2301 }
2302
upb_fielddef_setdefaultbool(upb_fielddef * f,bool value)2303 void upb_fielddef_setdefaultbool(upb_fielddef *f, bool value) {
2304 if (checksetdefault(f, UPB_TYPE_BOOL))
2305 f->defaultval.uint = value;
2306 }
2307
upb_fielddef_setdefaultfloat(upb_fielddef * f,float value)2308 void upb_fielddef_setdefaultfloat(upb_fielddef *f, float value) {
2309 if (checksetdefault(f, UPB_TYPE_FLOAT))
2310 f->defaultval.flt = value;
2311 }
2312
upb_fielddef_setdefaultdouble(upb_fielddef * f,double value)2313 void upb_fielddef_setdefaultdouble(upb_fielddef *f, double value) {
2314 if (checksetdefault(f, UPB_TYPE_DOUBLE))
2315 f->defaultval.dbl = value;
2316 }
2317
upb_fielddef_setdefaultstr(upb_fielddef * f,const void * str,size_t len,upb_status * s)2318 bool upb_fielddef_setdefaultstr(upb_fielddef *f, const void *str, size_t len,
2319 upb_status *s) {
2320 str_t *str2;
2321 UPB_ASSERT(upb_fielddef_isstring(f) || f->type_ == UPB_TYPE_ENUM);
2322 if (f->type_ == UPB_TYPE_ENUM && !upb_isident(str, len, false, s))
2323 return false;
2324
2325 if (f->default_is_string) {
2326 str_t *s = f->defaultval.bytes;
2327 UPB_ASSERT(s || f->type_ == UPB_TYPE_ENUM);
2328 if (s) freestr(s);
2329 } else {
2330 UPB_ASSERT(f->type_ == UPB_TYPE_ENUM);
2331 }
2332
2333 str2 = newstr(str, len);
2334 f->defaultval.bytes = str2;
2335 f->default_is_string = true;
2336 return true;
2337 }
2338
upb_fielddef_setdefaultcstr(upb_fielddef * f,const char * str,upb_status * s)2339 void upb_fielddef_setdefaultcstr(upb_fielddef *f, const char *str,
2340 upb_status *s) {
2341 UPB_ASSERT(f->type_is_set_);
2342 upb_fielddef_setdefaultstr(f, str, str ? strlen(str) : 0, s);
2343 }
2344
upb_fielddef_enumhasdefaultint32(const upb_fielddef * f)2345 bool upb_fielddef_enumhasdefaultint32(const upb_fielddef *f) {
2346 int32_t val;
2347 UPB_ASSERT(f->type_is_set_ && f->type_ == UPB_TYPE_ENUM);
2348 return enumdefaultint32(f, &val);
2349 }
2350
upb_fielddef_enumhasdefaultstr(const upb_fielddef * f)2351 bool upb_fielddef_enumhasdefaultstr(const upb_fielddef *f) {
2352 UPB_ASSERT(f->type_is_set_ && f->type_ == UPB_TYPE_ENUM);
2353 return enumdefaultstr(f) != NULL;
2354 }
2355
upb_subdef_typecheck(upb_fielddef * f,const upb_def * subdef,upb_status * s)2356 static bool upb_subdef_typecheck(upb_fielddef *f, const upb_def *subdef,
2357 upb_status *s) {
2358 if (f->type_ == UPB_TYPE_MESSAGE) {
2359 if (upb_dyncast_msgdef(subdef)) return true;
2360 upb_status_seterrmsg(s, "invalid subdef type for this submessage field");
2361 return false;
2362 } else if (f->type_ == UPB_TYPE_ENUM) {
2363 if (upb_dyncast_enumdef(subdef)) return true;
2364 upb_status_seterrmsg(s, "invalid subdef type for this enum field");
2365 return false;
2366 } else {
2367 upb_status_seterrmsg(s, "only message and enum fields can have a subdef");
2368 return false;
2369 }
2370 }
2371
release_subdef(upb_fielddef * f)2372 static void release_subdef(upb_fielddef *f) {
2373 if (f->subdef_is_symbolic) {
2374 upb_gfree(f->sub.name);
2375 } else if (f->sub.def) {
2376 upb_unref2(f->sub.def, f);
2377 }
2378 }
2379
upb_fielddef_setsubdef(upb_fielddef * f,const upb_def * subdef,upb_status * s)2380 bool upb_fielddef_setsubdef(upb_fielddef *f, const upb_def *subdef,
2381 upb_status *s) {
2382 UPB_ASSERT(!upb_fielddef_isfrozen(f));
2383 UPB_ASSERT(upb_fielddef_hassubdef(f));
2384 if (subdef && !upb_subdef_typecheck(f, subdef, s)) return false;
2385 release_subdef(f);
2386 f->sub.def = subdef;
2387 f->subdef_is_symbolic = false;
2388 if (f->sub.def) upb_ref2(f->sub.def, f);
2389 return true;
2390 }
2391
upb_fielddef_setmsgsubdef(upb_fielddef * f,const upb_msgdef * subdef,upb_status * s)2392 bool upb_fielddef_setmsgsubdef(upb_fielddef *f, const upb_msgdef *subdef,
2393 upb_status *s) {
2394 return upb_fielddef_setsubdef(f, upb_msgdef_upcast(subdef), s);
2395 }
2396
upb_fielddef_setenumsubdef(upb_fielddef * f,const upb_enumdef * subdef,upb_status * s)2397 bool upb_fielddef_setenumsubdef(upb_fielddef *f, const upb_enumdef *subdef,
2398 upb_status *s) {
2399 return upb_fielddef_setsubdef(f, upb_enumdef_upcast(subdef), s);
2400 }
2401
upb_fielddef_setsubdefname(upb_fielddef * f,const char * name,upb_status * s)2402 bool upb_fielddef_setsubdefname(upb_fielddef *f, const char *name,
2403 upb_status *s) {
2404 char *name_copy;
2405 UPB_ASSERT(!upb_fielddef_isfrozen(f));
2406 if (!upb_fielddef_hassubdef(f)) {
2407 upb_status_seterrmsg(s, "field type does not accept a subdef");
2408 return false;
2409 }
2410
2411 name_copy = upb_gstrdup(name);
2412 if (!name_copy) {
2413 upb_upberr_setoom(s);
2414 return false;
2415 }
2416
2417 /* TODO: validate name (upb_isident() doesn't quite work atm because this name
2418 * may have a leading "."). */
2419 release_subdef(f);
2420 f->sub.name = name_copy;
2421 f->subdef_is_symbolic = true;
2422 return true;
2423 }
2424
upb_fielddef_issubmsg(const upb_fielddef * f)2425 bool upb_fielddef_issubmsg(const upb_fielddef *f) {
2426 return upb_fielddef_type(f) == UPB_TYPE_MESSAGE;
2427 }
2428
upb_fielddef_isstring(const upb_fielddef * f)2429 bool upb_fielddef_isstring(const upb_fielddef *f) {
2430 return upb_fielddef_type(f) == UPB_TYPE_STRING ||
2431 upb_fielddef_type(f) == UPB_TYPE_BYTES;
2432 }
2433
upb_fielddef_isseq(const upb_fielddef * f)2434 bool upb_fielddef_isseq(const upb_fielddef *f) {
2435 return upb_fielddef_label(f) == UPB_LABEL_REPEATED;
2436 }
2437
upb_fielddef_isprimitive(const upb_fielddef * f)2438 bool upb_fielddef_isprimitive(const upb_fielddef *f) {
2439 return !upb_fielddef_isstring(f) && !upb_fielddef_issubmsg(f);
2440 }
2441
upb_fielddef_ismap(const upb_fielddef * f)2442 bool upb_fielddef_ismap(const upb_fielddef *f) {
2443 return upb_fielddef_isseq(f) && upb_fielddef_issubmsg(f) &&
2444 upb_msgdef_mapentry(upb_fielddef_msgsubdef(f));
2445 }
2446
upb_fielddef_haspresence(const upb_fielddef * f)2447 bool upb_fielddef_haspresence(const upb_fielddef *f) {
2448 if (upb_fielddef_isseq(f)) return false;
2449 if (upb_fielddef_issubmsg(f)) return true;
2450
2451 /* Primitive field: return true unless there is a message that specifies
2452 * presence should not exist. */
2453 if (f->msg_is_symbolic || !f->msg.def) return true;
2454 return f->msg.def->syntax == UPB_SYNTAX_PROTO2;
2455 }
2456
upb_fielddef_hassubdef(const upb_fielddef * f)2457 bool upb_fielddef_hassubdef(const upb_fielddef *f) {
2458 return upb_fielddef_issubmsg(f) || upb_fielddef_type(f) == UPB_TYPE_ENUM;
2459 }
2460
between(int32_t x,int32_t low,int32_t high)2461 static bool between(int32_t x, int32_t low, int32_t high) {
2462 return x >= low && x <= high;
2463 }
2464
upb_fielddef_checklabel(int32_t label)2465 bool upb_fielddef_checklabel(int32_t label) { return between(label, 1, 3); }
upb_fielddef_checktype(int32_t type)2466 bool upb_fielddef_checktype(int32_t type) { return between(type, 1, 11); }
upb_fielddef_checkintfmt(int32_t fmt)2467 bool upb_fielddef_checkintfmt(int32_t fmt) { return between(fmt, 1, 3); }
2468
upb_fielddef_checkdescriptortype(int32_t type)2469 bool upb_fielddef_checkdescriptortype(int32_t type) {
2470 return between(type, 1, 18);
2471 }
2472
2473 /* upb_msgdef *****************************************************************/
2474
visitmsg(const upb_refcounted * r,upb_refcounted_visit * visit,void * closure)2475 static void visitmsg(const upb_refcounted *r, upb_refcounted_visit *visit,
2476 void *closure) {
2477 upb_msg_oneof_iter o;
2478 const upb_msgdef *m = (const upb_msgdef*)r;
2479 const upb_def *def = upb_msgdef_upcast(m);
2480 upb_msg_field_iter i;
2481 for(upb_msg_field_begin(&i, m);
2482 !upb_msg_field_done(&i);
2483 upb_msg_field_next(&i)) {
2484 upb_fielddef *f = upb_msg_iter_field(&i);
2485 visit(r, upb_fielddef_upcast2(f), closure);
2486 }
2487 for(upb_msg_oneof_begin(&o, m);
2488 !upb_msg_oneof_done(&o);
2489 upb_msg_oneof_next(&o)) {
2490 upb_oneofdef *f = upb_msg_iter_oneof(&o);
2491 visit(r, upb_oneofdef_upcast(f), closure);
2492 }
2493 if (upb_def_file(def)) {
2494 visit(r, upb_filedef_upcast(upb_def_file(def)), closure);
2495 }
2496 }
2497
freemsg(upb_refcounted * r)2498 static void freemsg(upb_refcounted *r) {
2499 upb_msgdef *m = (upb_msgdef*)r;
2500 upb_strtable_uninit(&m->ntof);
2501 upb_inttable_uninit(&m->itof);
2502 upb_def_uninit(upb_msgdef_upcast_mutable(m));
2503 upb_gfree(m);
2504 }
2505
2506 const struct upb_refcounted_vtbl upb_msgdef_vtbl = {visitmsg, freemsg};
2507
upb_msgdef_new(const void * owner)2508 upb_msgdef *upb_msgdef_new(const void *owner) {
2509 upb_msgdef *m = upb_gmalloc(sizeof(*m));
2510 if (!m) return NULL;
2511
2512 if (!upb_def_init(upb_msgdef_upcast_mutable(m), UPB_DEF_MSG, &upb_msgdef_vtbl,
2513 owner)) {
2514 goto err2;
2515 }
2516
2517 if (!upb_inttable_init(&m->itof, UPB_CTYPE_PTR)) goto err2;
2518 if (!upb_strtable_init(&m->ntof, UPB_CTYPE_PTR)) goto err1;
2519 m->map_entry = false;
2520 m->syntax = UPB_SYNTAX_PROTO2;
2521 return m;
2522
2523 err1:
2524 upb_inttable_uninit(&m->itof);
2525 err2:
2526 upb_gfree(m);
2527 return NULL;
2528 }
2529
upb_msgdef_freeze(upb_msgdef * m,upb_status * status)2530 bool upb_msgdef_freeze(upb_msgdef *m, upb_status *status) {
2531 upb_def *d = upb_msgdef_upcast_mutable(m);
2532 return upb_def_freeze(&d, 1, status);
2533 }
2534
upb_msgdef_fullname(const upb_msgdef * m)2535 const char *upb_msgdef_fullname(const upb_msgdef *m) {
2536 return upb_def_fullname(upb_msgdef_upcast(m));
2537 }
2538
upb_msgdef_name(const upb_msgdef * m)2539 const char *upb_msgdef_name(const upb_msgdef *m) {
2540 return upb_def_name(upb_msgdef_upcast(m));
2541 }
2542
upb_msgdef_setfullname(upb_msgdef * m,const char * fullname,upb_status * s)2543 bool upb_msgdef_setfullname(upb_msgdef *m, const char *fullname,
2544 upb_status *s) {
2545 return upb_def_setfullname(upb_msgdef_upcast_mutable(m), fullname, s);
2546 }
2547
upb_msgdef_setsyntax(upb_msgdef * m,upb_syntax_t syntax)2548 bool upb_msgdef_setsyntax(upb_msgdef *m, upb_syntax_t syntax) {
2549 if (syntax != UPB_SYNTAX_PROTO2 && syntax != UPB_SYNTAX_PROTO3) {
2550 return false;
2551 }
2552
2553 m->syntax = syntax;
2554 return true;
2555 }
2556
upb_msgdef_syntax(const upb_msgdef * m)2557 upb_syntax_t upb_msgdef_syntax(const upb_msgdef *m) {
2558 return m->syntax;
2559 }
2560
2561 /* Helper: check that the field |f| is safe to add to msgdef |m|. Set an error
2562 * on status |s| and return false if not. */
check_field_add(const upb_msgdef * m,const upb_fielddef * f,upb_status * s)2563 static bool check_field_add(const upb_msgdef *m, const upb_fielddef *f,
2564 upb_status *s) {
2565 if (upb_fielddef_containingtype(f) != NULL) {
2566 upb_status_seterrmsg(s, "fielddef already belongs to a message");
2567 return false;
2568 } else if (upb_fielddef_name(f) == NULL || upb_fielddef_number(f) == 0) {
2569 upb_status_seterrmsg(s, "field name or number were not set");
2570 return false;
2571 } else if (upb_msgdef_itof(m, upb_fielddef_number(f))) {
2572 upb_status_seterrmsg(s, "duplicate field number");
2573 return false;
2574 } else if (upb_strtable_lookup(&m->ntof, upb_fielddef_name(f), NULL)) {
2575 upb_status_seterrmsg(s, "name conflicts with existing field or oneof");
2576 return false;
2577 }
2578 return true;
2579 }
2580
add_field(upb_msgdef * m,upb_fielddef * f,const void * ref_donor)2581 static void add_field(upb_msgdef *m, upb_fielddef *f, const void *ref_donor) {
2582 release_containingtype(f);
2583 f->msg.def = m;
2584 f->msg_is_symbolic = false;
2585 upb_inttable_insert(&m->itof, upb_fielddef_number(f), upb_value_ptr(f));
2586 upb_strtable_insert(&m->ntof, upb_fielddef_name(f), upb_value_ptr(f));
2587 upb_ref2(f, m);
2588 upb_ref2(m, f);
2589 if (ref_donor) upb_fielddef_unref(f, ref_donor);
2590 }
2591
upb_msgdef_addfield(upb_msgdef * m,upb_fielddef * f,const void * ref_donor,upb_status * s)2592 bool upb_msgdef_addfield(upb_msgdef *m, upb_fielddef *f, const void *ref_donor,
2593 upb_status *s) {
2594 /* TODO: extensions need to have a separate namespace, because proto2 allows a
2595 * top-level extension (ie. one not in any package) to have the same name as a
2596 * field from the message.
2597 *
2598 * This also implies that there needs to be a separate lookup-by-name method
2599 * for extensions. It seems desirable for iteration to return both extensions
2600 * and non-extensions though.
2601 *
2602 * We also need to validate that the field number is in an extension range iff
2603 * it is an extension.
2604 *
2605 * This method is idempotent. Check if |f| is already part of this msgdef and
2606 * return immediately if so. */
2607 if (upb_fielddef_containingtype(f) == m) {
2608 if (ref_donor) upb_fielddef_unref(f, ref_donor);
2609 return true;
2610 }
2611
2612 /* Check constraints for all fields before performing any action. */
2613 if (!check_field_add(m, f, s)) {
2614 return false;
2615 } else if (upb_fielddef_containingoneof(f) != NULL) {
2616 /* Fields in a oneof can only be added by adding the oneof to the msgdef. */
2617 upb_status_seterrmsg(s, "fielddef is part of a oneof");
2618 return false;
2619 }
2620
2621 /* Constraint checks ok, perform the action. */
2622 add_field(m, f, ref_donor);
2623 return true;
2624 }
2625
upb_msgdef_addoneof(upb_msgdef * m,upb_oneofdef * o,const void * ref_donor,upb_status * s)2626 bool upb_msgdef_addoneof(upb_msgdef *m, upb_oneofdef *o, const void *ref_donor,
2627 upb_status *s) {
2628 upb_oneof_iter it;
2629
2630 /* Check various conditions that would prevent this oneof from being added. */
2631 if (upb_oneofdef_containingtype(o)) {
2632 upb_status_seterrmsg(s, "oneofdef already belongs to a message");
2633 return false;
2634 } else if (upb_oneofdef_name(o) == NULL) {
2635 upb_status_seterrmsg(s, "oneofdef name was not set");
2636 return false;
2637 } else if (upb_strtable_lookup(&m->ntof, upb_oneofdef_name(o), NULL)) {
2638 upb_status_seterrmsg(s, "name conflicts with existing field or oneof");
2639 return false;
2640 }
2641
2642 /* Check that all of the oneof's fields do not conflict with names or numbers
2643 * of fields already in the message. */
2644 for (upb_oneof_begin(&it, o); !upb_oneof_done(&it); upb_oneof_next(&it)) {
2645 const upb_fielddef *f = upb_oneof_iter_field(&it);
2646 if (!check_field_add(m, f, s)) {
2647 return false;
2648 }
2649 }
2650
2651 /* Everything checks out -- commit now. */
2652
2653 /* Add oneof itself first. */
2654 o->parent = m;
2655 upb_strtable_insert(&m->ntof, upb_oneofdef_name(o), upb_value_ptr(o));
2656 upb_ref2(o, m);
2657 upb_ref2(m, o);
2658
2659 /* Add each field of the oneof directly to the msgdef. */
2660 for (upb_oneof_begin(&it, o); !upb_oneof_done(&it); upb_oneof_next(&it)) {
2661 upb_fielddef *f = upb_oneof_iter_field(&it);
2662 add_field(m, f, NULL);
2663 }
2664
2665 if (ref_donor) upb_oneofdef_unref(o, ref_donor);
2666
2667 return true;
2668 }
2669
upb_msgdef_itof(const upb_msgdef * m,uint32_t i)2670 const upb_fielddef *upb_msgdef_itof(const upb_msgdef *m, uint32_t i) {
2671 upb_value val;
2672 return upb_inttable_lookup32(&m->itof, i, &val) ?
2673 upb_value_getptr(val) : NULL;
2674 }
2675
upb_msgdef_ntof(const upb_msgdef * m,const char * name,size_t len)2676 const upb_fielddef *upb_msgdef_ntof(const upb_msgdef *m, const char *name,
2677 size_t len) {
2678 upb_value val;
2679
2680 if (!upb_strtable_lookup2(&m->ntof, name, len, &val)) {
2681 return NULL;
2682 }
2683
2684 return upb_trygetfield(upb_value_getptr(val));
2685 }
2686
upb_msgdef_ntoo(const upb_msgdef * m,const char * name,size_t len)2687 const upb_oneofdef *upb_msgdef_ntoo(const upb_msgdef *m, const char *name,
2688 size_t len) {
2689 upb_value val;
2690
2691 if (!upb_strtable_lookup2(&m->ntof, name, len, &val)) {
2692 return NULL;
2693 }
2694
2695 return upb_trygetoneof(upb_value_getptr(val));
2696 }
2697
upb_msgdef_lookupname(const upb_msgdef * m,const char * name,size_t len,const upb_fielddef ** f,const upb_oneofdef ** o)2698 bool upb_msgdef_lookupname(const upb_msgdef *m, const char *name, size_t len,
2699 const upb_fielddef **f, const upb_oneofdef **o) {
2700 upb_value val;
2701
2702 if (!upb_strtable_lookup2(&m->ntof, name, len, &val)) {
2703 return false;
2704 }
2705
2706 *o = upb_trygetoneof(upb_value_getptr(val));
2707 *f = upb_trygetfield(upb_value_getptr(val));
2708 UPB_ASSERT((*o != NULL) ^ (*f != NULL)); /* Exactly one of the two should be set. */
2709 return true;
2710 }
2711
upb_msgdef_numfields(const upb_msgdef * m)2712 int upb_msgdef_numfields(const upb_msgdef *m) {
2713 /* The number table contains only fields. */
2714 return upb_inttable_count(&m->itof);
2715 }
2716
upb_msgdef_numoneofs(const upb_msgdef * m)2717 int upb_msgdef_numoneofs(const upb_msgdef *m) {
2718 /* The name table includes oneofs, and the number table does not. */
2719 return upb_strtable_count(&m->ntof) - upb_inttable_count(&m->itof);
2720 }
2721
upb_msgdef_setmapentry(upb_msgdef * m,bool map_entry)2722 void upb_msgdef_setmapentry(upb_msgdef *m, bool map_entry) {
2723 UPB_ASSERT(!upb_msgdef_isfrozen(m));
2724 m->map_entry = map_entry;
2725 }
2726
upb_msgdef_mapentry(const upb_msgdef * m)2727 bool upb_msgdef_mapentry(const upb_msgdef *m) {
2728 return m->map_entry;
2729 }
2730
upb_msgdef_wellknowntype(const upb_msgdef * m)2731 upb_wellknowntype_t upb_msgdef_wellknowntype(const upb_msgdef *m) {
2732 return m->well_known_type;
2733 }
2734
upb_msgdef_isnumberwrapper(const upb_msgdef * m)2735 bool upb_msgdef_isnumberwrapper(const upb_msgdef *m) {
2736 upb_wellknowntype_t type = upb_msgdef_wellknowntype(m);
2737 return type >= UPB_WELLKNOWN_DOUBLEVALUE &&
2738 type <= UPB_WELLKNOWN_UINT32VALUE;
2739 }
2740
upb_msg_field_begin(upb_msg_field_iter * iter,const upb_msgdef * m)2741 void upb_msg_field_begin(upb_msg_field_iter *iter, const upb_msgdef *m) {
2742 upb_inttable_begin(iter, &m->itof);
2743 }
2744
upb_msg_field_next(upb_msg_field_iter * iter)2745 void upb_msg_field_next(upb_msg_field_iter *iter) { upb_inttable_next(iter); }
2746
upb_msg_field_done(const upb_msg_field_iter * iter)2747 bool upb_msg_field_done(const upb_msg_field_iter *iter) {
2748 return upb_inttable_done(iter);
2749 }
2750
upb_msg_iter_field(const upb_msg_field_iter * iter)2751 upb_fielddef *upb_msg_iter_field(const upb_msg_field_iter *iter) {
2752 return (upb_fielddef*)upb_value_getptr(upb_inttable_iter_value(iter));
2753 }
2754
upb_msg_field_iter_setdone(upb_msg_field_iter * iter)2755 void upb_msg_field_iter_setdone(upb_msg_field_iter *iter) {
2756 upb_inttable_iter_setdone(iter);
2757 }
2758
upb_msg_oneof_begin(upb_msg_oneof_iter * iter,const upb_msgdef * m)2759 void upb_msg_oneof_begin(upb_msg_oneof_iter *iter, const upb_msgdef *m) {
2760 upb_strtable_begin(iter, &m->ntof);
2761 /* We need to skip past any initial fields. */
2762 while (!upb_strtable_done(iter) &&
2763 !upb_isoneof(upb_value_getptr(upb_strtable_iter_value(iter)))) {
2764 upb_strtable_next(iter);
2765 }
2766 }
2767
upb_msg_oneof_next(upb_msg_oneof_iter * iter)2768 void upb_msg_oneof_next(upb_msg_oneof_iter *iter) {
2769 /* We need to skip past fields to return only oneofs. */
2770 do {
2771 upb_strtable_next(iter);
2772 } while (!upb_strtable_done(iter) &&
2773 !upb_isoneof(upb_value_getptr(upb_strtable_iter_value(iter))));
2774 }
2775
upb_msg_oneof_done(const upb_msg_oneof_iter * iter)2776 bool upb_msg_oneof_done(const upb_msg_oneof_iter *iter) {
2777 return upb_strtable_done(iter);
2778 }
2779
upb_msg_iter_oneof(const upb_msg_oneof_iter * iter)2780 upb_oneofdef *upb_msg_iter_oneof(const upb_msg_oneof_iter *iter) {
2781 return (upb_oneofdef*)upb_value_getptr(upb_strtable_iter_value(iter));
2782 }
2783
upb_msg_oneof_iter_setdone(upb_msg_oneof_iter * iter)2784 void upb_msg_oneof_iter_setdone(upb_msg_oneof_iter *iter) {
2785 upb_strtable_iter_setdone(iter);
2786 }
2787
2788 /* upb_oneofdef ***************************************************************/
2789
visitoneof(const upb_refcounted * r,upb_refcounted_visit * visit,void * closure)2790 static void visitoneof(const upb_refcounted *r, upb_refcounted_visit *visit,
2791 void *closure) {
2792 const upb_oneofdef *o = (const upb_oneofdef*)r;
2793 upb_oneof_iter i;
2794 for (upb_oneof_begin(&i, o); !upb_oneof_done(&i); upb_oneof_next(&i)) {
2795 const upb_fielddef *f = upb_oneof_iter_field(&i);
2796 visit(r, upb_fielddef_upcast2(f), closure);
2797 }
2798 if (o->parent) {
2799 visit(r, upb_msgdef_upcast2(o->parent), closure);
2800 }
2801 }
2802
freeoneof(upb_refcounted * r)2803 static void freeoneof(upb_refcounted *r) {
2804 upb_oneofdef *o = (upb_oneofdef*)r;
2805 upb_strtable_uninit(&o->ntof);
2806 upb_inttable_uninit(&o->itof);
2807 upb_gfree((void*)o->name);
2808 upb_gfree(o);
2809 }
2810
2811 const struct upb_refcounted_vtbl upb_oneofdef_vtbl = {visitoneof, freeoneof};
2812
upb_oneofdef_new(const void * owner)2813 upb_oneofdef *upb_oneofdef_new(const void *owner) {
2814 upb_oneofdef *o = upb_gmalloc(sizeof(*o));
2815
2816 if (!o) {
2817 return NULL;
2818 }
2819
2820 o->parent = NULL;
2821 o->name = NULL;
2822
2823 if (!upb_refcounted_init(upb_oneofdef_upcast_mutable(o), &upb_oneofdef_vtbl,
2824 owner)) {
2825 goto err2;
2826 }
2827
2828 if (!upb_inttable_init(&o->itof, UPB_CTYPE_PTR)) goto err2;
2829 if (!upb_strtable_init(&o->ntof, UPB_CTYPE_PTR)) goto err1;
2830
2831 return o;
2832
2833 err1:
2834 upb_inttable_uninit(&o->itof);
2835 err2:
2836 upb_gfree(o);
2837 return NULL;
2838 }
2839
upb_oneofdef_name(const upb_oneofdef * o)2840 const char *upb_oneofdef_name(const upb_oneofdef *o) { return o->name; }
2841
upb_oneofdef_setname(upb_oneofdef * o,const char * name,upb_status * s)2842 bool upb_oneofdef_setname(upb_oneofdef *o, const char *name, upb_status *s) {
2843 UPB_ASSERT(!upb_oneofdef_isfrozen(o));
2844 if (upb_oneofdef_containingtype(o)) {
2845 upb_status_seterrmsg(s, "oneof already added to a message");
2846 return false;
2847 }
2848
2849 if (!upb_isident(name, strlen(name), true, s)) {
2850 return false;
2851 }
2852
2853 name = upb_gstrdup(name);
2854 if (!name) {
2855 upb_status_seterrmsg(s, "One of memory");
2856 return false;
2857 }
2858
2859 upb_gfree((void*)o->name);
2860 o->name = name;
2861 return true;
2862 }
2863
upb_oneofdef_containingtype(const upb_oneofdef * o)2864 const upb_msgdef *upb_oneofdef_containingtype(const upb_oneofdef *o) {
2865 return o->parent;
2866 }
2867
upb_oneofdef_numfields(const upb_oneofdef * o)2868 int upb_oneofdef_numfields(const upb_oneofdef *o) {
2869 return upb_strtable_count(&o->ntof);
2870 }
2871
upb_oneofdef_index(const upb_oneofdef * o)2872 uint32_t upb_oneofdef_index(const upb_oneofdef *o) {
2873 return o->index;
2874 }
2875
upb_oneofdef_addfield(upb_oneofdef * o,upb_fielddef * f,const void * ref_donor,upb_status * s)2876 bool upb_oneofdef_addfield(upb_oneofdef *o, upb_fielddef *f,
2877 const void *ref_donor,
2878 upb_status *s) {
2879 UPB_ASSERT(!upb_oneofdef_isfrozen(o));
2880 UPB_ASSERT(!o->parent || !upb_msgdef_isfrozen(o->parent));
2881
2882 /* This method is idempotent. Check if |f| is already part of this oneofdef
2883 * and return immediately if so. */
2884 if (upb_fielddef_containingoneof(f) == o) {
2885 return true;
2886 }
2887
2888 /* The field must have an OPTIONAL label. */
2889 if (upb_fielddef_label(f) != UPB_LABEL_OPTIONAL) {
2890 upb_status_seterrmsg(s, "fields in oneof must have OPTIONAL label");
2891 return false;
2892 }
2893
2894 /* Check that no field with this name or number exists already in the oneof.
2895 * Also check that the field is not already part of a oneof. */
2896 if (upb_fielddef_name(f) == NULL || upb_fielddef_number(f) == 0) {
2897 upb_status_seterrmsg(s, "field name or number were not set");
2898 return false;
2899 } else if (upb_oneofdef_itof(o, upb_fielddef_number(f)) ||
2900 upb_oneofdef_ntofz(o, upb_fielddef_name(f))) {
2901 upb_status_seterrmsg(s, "duplicate field name or number");
2902 return false;
2903 } else if (upb_fielddef_containingoneof(f) != NULL) {
2904 upb_status_seterrmsg(s, "fielddef already belongs to a oneof");
2905 return false;
2906 }
2907
2908 /* We allow adding a field to the oneof either if the field is not part of a
2909 * msgdef, or if it is and we are also part of the same msgdef. */
2910 if (o->parent == NULL) {
2911 /* If we're not in a msgdef, the field cannot be either. Otherwise we would
2912 * need to magically add this oneof to a msgdef to remain consistent, which
2913 * is surprising behavior. */
2914 if (upb_fielddef_containingtype(f) != NULL) {
2915 upb_status_seterrmsg(s, "fielddef already belongs to a message, but "
2916 "oneof does not");
2917 return false;
2918 }
2919 } else {
2920 /* If we're in a msgdef, the user can add fields that either aren't in any
2921 * msgdef (in which case they're added to our msgdef) or already a part of
2922 * our msgdef. */
2923 if (upb_fielddef_containingtype(f) != NULL &&
2924 upb_fielddef_containingtype(f) != o->parent) {
2925 upb_status_seterrmsg(s, "fielddef belongs to a different message "
2926 "than oneof");
2927 return false;
2928 }
2929 }
2930
2931 /* Commit phase. First add the field to our parent msgdef, if any, because
2932 * that may fail; then add the field to our own tables. */
2933
2934 if (o->parent != NULL && upb_fielddef_containingtype(f) == NULL) {
2935 if (!upb_msgdef_addfield((upb_msgdef*)o->parent, f, NULL, s)) {
2936 return false;
2937 }
2938 }
2939
2940 release_containingtype(f);
2941 f->oneof = o;
2942 upb_inttable_insert(&o->itof, upb_fielddef_number(f), upb_value_ptr(f));
2943 upb_strtable_insert(&o->ntof, upb_fielddef_name(f), upb_value_ptr(f));
2944 upb_ref2(f, o);
2945 upb_ref2(o, f);
2946 if (ref_donor) upb_fielddef_unref(f, ref_donor);
2947
2948 return true;
2949 }
2950
upb_oneofdef_ntof(const upb_oneofdef * o,const char * name,size_t length)2951 const upb_fielddef *upb_oneofdef_ntof(const upb_oneofdef *o,
2952 const char *name, size_t length) {
2953 upb_value val;
2954 return upb_strtable_lookup2(&o->ntof, name, length, &val) ?
2955 upb_value_getptr(val) : NULL;
2956 }
2957
upb_oneofdef_itof(const upb_oneofdef * o,uint32_t num)2958 const upb_fielddef *upb_oneofdef_itof(const upb_oneofdef *o, uint32_t num) {
2959 upb_value val;
2960 return upb_inttable_lookup32(&o->itof, num, &val) ?
2961 upb_value_getptr(val) : NULL;
2962 }
2963
upb_oneof_begin(upb_oneof_iter * iter,const upb_oneofdef * o)2964 void upb_oneof_begin(upb_oneof_iter *iter, const upb_oneofdef *o) {
2965 upb_inttable_begin(iter, &o->itof);
2966 }
2967
upb_oneof_next(upb_oneof_iter * iter)2968 void upb_oneof_next(upb_oneof_iter *iter) {
2969 upb_inttable_next(iter);
2970 }
2971
upb_oneof_done(upb_oneof_iter * iter)2972 bool upb_oneof_done(upb_oneof_iter *iter) {
2973 return upb_inttable_done(iter);
2974 }
2975
upb_oneof_iter_field(const upb_oneof_iter * iter)2976 upb_fielddef *upb_oneof_iter_field(const upb_oneof_iter *iter) {
2977 return (upb_fielddef*)upb_value_getptr(upb_inttable_iter_value(iter));
2978 }
2979
upb_oneof_iter_setdone(upb_oneof_iter * iter)2980 void upb_oneof_iter_setdone(upb_oneof_iter *iter) {
2981 upb_inttable_iter_setdone(iter);
2982 }
2983
2984 /* upb_filedef ****************************************************************/
2985
visitfiledef(const upb_refcounted * r,upb_refcounted_visit * visit,void * closure)2986 static void visitfiledef(const upb_refcounted *r, upb_refcounted_visit *visit,
2987 void *closure) {
2988 const upb_filedef *f = (const upb_filedef*)r;
2989 size_t i;
2990
2991 for(i = 0; i < upb_filedef_defcount(f); i++) {
2992 visit(r, upb_def_upcast(upb_filedef_def(f, i)), closure);
2993 }
2994 }
2995
freefiledef(upb_refcounted * r)2996 static void freefiledef(upb_refcounted *r) {
2997 upb_filedef *f = (upb_filedef*)r;
2998 size_t i;
2999
3000 for(i = 0; i < upb_filedef_depcount(f); i++) {
3001 upb_filedef_unref(upb_filedef_dep(f, i), f);
3002 }
3003
3004 upb_inttable_uninit(&f->defs);
3005 upb_inttable_uninit(&f->deps);
3006 upb_gfree((void*)f->name);
3007 upb_gfree((void*)f->package);
3008 upb_gfree((void*)f->phpprefix);
3009 upb_gfree((void*)f->phpnamespace);
3010 upb_gfree(f);
3011 }
3012
3013 const struct upb_refcounted_vtbl upb_filedef_vtbl = {visitfiledef, freefiledef};
3014
upb_filedef_new(const void * owner)3015 upb_filedef *upb_filedef_new(const void *owner) {
3016 upb_filedef *f = upb_gmalloc(sizeof(*f));
3017
3018 if (!f) {
3019 return NULL;
3020 }
3021
3022 f->package = NULL;
3023 f->name = NULL;
3024 f->phpprefix = NULL;
3025 f->phpnamespace = NULL;
3026 f->syntax = UPB_SYNTAX_PROTO2;
3027
3028 if (!upb_refcounted_init(upb_filedef_upcast_mutable(f), &upb_filedef_vtbl,
3029 owner)) {
3030 goto err;
3031 }
3032
3033 if (!upb_inttable_init(&f->defs, UPB_CTYPE_CONSTPTR)) {
3034 goto err;
3035 }
3036
3037 if (!upb_inttable_init(&f->deps, UPB_CTYPE_CONSTPTR)) {
3038 goto err2;
3039 }
3040
3041 return f;
3042
3043
3044 err2:
3045 upb_inttable_uninit(&f->defs);
3046
3047 err:
3048 upb_gfree(f);
3049 return NULL;
3050 }
3051
upb_filedef_name(const upb_filedef * f)3052 const char *upb_filedef_name(const upb_filedef *f) {
3053 return f->name;
3054 }
3055
upb_filedef_package(const upb_filedef * f)3056 const char *upb_filedef_package(const upb_filedef *f) {
3057 return f->package;
3058 }
3059
upb_filedef_phpprefix(const upb_filedef * f)3060 const char *upb_filedef_phpprefix(const upb_filedef *f) {
3061 return f->phpprefix;
3062 }
3063
upb_filedef_phpnamespace(const upb_filedef * f)3064 const char *upb_filedef_phpnamespace(const upb_filedef *f) {
3065 return f->phpnamespace;
3066 }
3067
upb_filedef_syntax(const upb_filedef * f)3068 upb_syntax_t upb_filedef_syntax(const upb_filedef *f) {
3069 return f->syntax;
3070 }
3071
upb_filedef_defcount(const upb_filedef * f)3072 size_t upb_filedef_defcount(const upb_filedef *f) {
3073 return upb_inttable_count(&f->defs);
3074 }
3075
upb_filedef_depcount(const upb_filedef * f)3076 size_t upb_filedef_depcount(const upb_filedef *f) {
3077 return upb_inttable_count(&f->deps);
3078 }
3079
upb_filedef_def(const upb_filedef * f,size_t i)3080 const upb_def *upb_filedef_def(const upb_filedef *f, size_t i) {
3081 upb_value v;
3082
3083 if (upb_inttable_lookup32(&f->defs, i, &v)) {
3084 return upb_value_getconstptr(v);
3085 } else {
3086 return NULL;
3087 }
3088 }
3089
upb_filedef_dep(const upb_filedef * f,size_t i)3090 const upb_filedef *upb_filedef_dep(const upb_filedef *f, size_t i) {
3091 upb_value v;
3092
3093 if (upb_inttable_lookup32(&f->deps, i, &v)) {
3094 return upb_value_getconstptr(v);
3095 } else {
3096 return NULL;
3097 }
3098 }
3099
upb_filedef_setname(upb_filedef * f,const char * name,upb_status * s)3100 bool upb_filedef_setname(upb_filedef *f, const char *name, upb_status *s) {
3101 name = upb_gstrdup(name);
3102 if (!name) {
3103 upb_upberr_setoom(s);
3104 return false;
3105 }
3106 upb_gfree((void*)f->name);
3107 f->name = name;
3108 return true;
3109 }
3110
upb_filedef_setpackage(upb_filedef * f,const char * package,upb_status * s)3111 bool upb_filedef_setpackage(upb_filedef *f, const char *package,
3112 upb_status *s) {
3113 if (!upb_isident(package, strlen(package), true, s)) return false;
3114 package = upb_gstrdup(package);
3115 if (!package) {
3116 upb_upberr_setoom(s);
3117 return false;
3118 }
3119 upb_gfree((void*)f->package);
3120 f->package = package;
3121 return true;
3122 }
3123
upb_filedef_setphpprefix(upb_filedef * f,const char * phpprefix,upb_status * s)3124 bool upb_filedef_setphpprefix(upb_filedef *f, const char *phpprefix,
3125 upb_status *s) {
3126 phpprefix = upb_gstrdup(phpprefix);
3127 if (!phpprefix) {
3128 upb_upberr_setoom(s);
3129 return false;
3130 }
3131 upb_gfree((void*)f->phpprefix);
3132 f->phpprefix = phpprefix;
3133 return true;
3134 }
3135
upb_filedef_setphpnamespace(upb_filedef * f,const char * phpnamespace,upb_status * s)3136 bool upb_filedef_setphpnamespace(upb_filedef *f, const char *phpnamespace,
3137 upb_status *s) {
3138 phpnamespace = upb_gstrdup(phpnamespace);
3139 if (!phpnamespace) {
3140 upb_upberr_setoom(s);
3141 return false;
3142 }
3143 upb_gfree((void*)f->phpnamespace);
3144 f->phpnamespace = phpnamespace;
3145 return true;
3146 }
3147
upb_filedef_setsyntax(upb_filedef * f,upb_syntax_t syntax,upb_status * s)3148 bool upb_filedef_setsyntax(upb_filedef *f, upb_syntax_t syntax,
3149 upb_status *s) {
3150 UPB_UNUSED(s);
3151 if (syntax != UPB_SYNTAX_PROTO2 &&
3152 syntax != UPB_SYNTAX_PROTO3) {
3153 upb_status_seterrmsg(s, "Unknown syntax value.");
3154 return false;
3155 }
3156 f->syntax = syntax;
3157
3158 {
3159 /* Set all messages in this file to match. */
3160 size_t i;
3161 for (i = 0; i < upb_filedef_defcount(f); i++) {
3162 /* Casting const away is safe since all defs in mutable filedef must
3163 * also be mutable. */
3164 upb_def *def = (upb_def*)upb_filedef_def(f, i);
3165
3166 upb_msgdef *m = upb_dyncast_msgdef_mutable(def);
3167 if (m) {
3168 m->syntax = syntax;
3169 }
3170 }
3171 }
3172
3173 return true;
3174 }
3175
upb_filedef_adddef(upb_filedef * f,upb_def * def,const void * ref_donor,upb_status * s)3176 bool upb_filedef_adddef(upb_filedef *f, upb_def *def, const void *ref_donor,
3177 upb_status *s) {
3178 if (def->file) {
3179 upb_status_seterrmsg(s, "Def is already part of another filedef.");
3180 return false;
3181 }
3182
3183 if (upb_inttable_push(&f->defs, upb_value_constptr(def))) {
3184 def->file = f;
3185 upb_ref2(def, f);
3186 upb_ref2(f, def);
3187 if (ref_donor) upb_def_unref(def, ref_donor);
3188 if (def->type == UPB_DEF_MSG) {
3189 upb_downcast_msgdef_mutable(def)->syntax = f->syntax;
3190 }
3191 return true;
3192 } else {
3193 upb_upberr_setoom(s);
3194 return false;
3195 }
3196 }
3197
upb_filedef_adddep(upb_filedef * f,const upb_filedef * dep)3198 bool upb_filedef_adddep(upb_filedef *f, const upb_filedef *dep) {
3199 if (upb_inttable_push(&f->deps, upb_value_constptr(dep))) {
3200 /* Regular ref instead of ref2 because files can't form cycles. */
3201 upb_filedef_ref(dep, f);
3202 return true;
3203 } else {
3204 return false;
3205 }
3206 }
3207
upb_symtab_free(upb_symtab * s)3208 void upb_symtab_free(upb_symtab *s) {
3209 upb_strtable_iter i;
3210 upb_strtable_begin(&i, &s->symtab);
3211 for (; !upb_strtable_done(&i); upb_strtable_next(&i)) {
3212 const upb_def *def = upb_value_getptr(upb_strtable_iter_value(&i));
3213 upb_def_unref(def, s);
3214 }
3215 upb_strtable_uninit(&s->symtab);
3216 upb_gfree(s);
3217 }
3218
upb_symtab_new()3219 upb_symtab *upb_symtab_new() {
3220 upb_symtab *s = upb_gmalloc(sizeof(*s));
3221 if (!s) {
3222 return NULL;
3223 }
3224
3225 upb_strtable_init(&s->symtab, UPB_CTYPE_PTR);
3226 return s;
3227 }
3228
upb_symtab_lookup(const upb_symtab * s,const char * sym)3229 const upb_def *upb_symtab_lookup(const upb_symtab *s, const char *sym) {
3230 upb_value v;
3231 upb_def *ret = upb_strtable_lookup(&s->symtab, sym, &v) ?
3232 upb_value_getptr(v) : NULL;
3233 return ret;
3234 }
3235
upb_symtab_lookupmsg(const upb_symtab * s,const char * sym)3236 const upb_msgdef *upb_symtab_lookupmsg(const upb_symtab *s, const char *sym) {
3237 upb_value v;
3238 upb_def *def = upb_strtable_lookup(&s->symtab, sym, &v) ?
3239 upb_value_getptr(v) : NULL;
3240 return def ? upb_dyncast_msgdef(def) : NULL;
3241 }
3242
upb_symtab_lookupmsg2(const upb_symtab * s,const char * sym,size_t len)3243 const upb_msgdef *upb_symtab_lookupmsg2(const upb_symtab *s, const char *sym,
3244 size_t len) {
3245 upb_value v;
3246 upb_def *def = upb_strtable_lookup2(&s->symtab, sym, len, &v) ?
3247 upb_value_getptr(v) : NULL;
3248 return def ? upb_dyncast_msgdef(def) : NULL;
3249 }
3250
upb_symtab_lookupenum(const upb_symtab * s,const char * sym)3251 const upb_enumdef *upb_symtab_lookupenum(const upb_symtab *s, const char *sym) {
3252 upb_value v;
3253 upb_def *def = upb_strtable_lookup(&s->symtab, sym, &v) ?
3254 upb_value_getptr(v) : NULL;
3255 return def ? upb_dyncast_enumdef(def) : NULL;
3256 }
3257
3258 /* Given a symbol and the base symbol inside which it is defined, find the
3259 * symbol's definition in t. */
upb_resolvename(const upb_strtable * t,const char * base,const char * sym)3260 static upb_def *upb_resolvename(const upb_strtable *t,
3261 const char *base, const char *sym) {
3262 if(strlen(sym) == 0) return NULL;
3263 if(sym[0] == '.') {
3264 /* Symbols starting with '.' are absolute, so we do a single lookup.
3265 * Slice to omit the leading '.' */
3266 upb_value v;
3267 return upb_strtable_lookup(t, sym + 1, &v) ? upb_value_getptr(v) : NULL;
3268 } else {
3269 /* Remove components from base until we find an entry or run out.
3270 * TODO: This branch is totally broken, but currently not used. */
3271 (void)base;
3272 UPB_ASSERT(false);
3273 return NULL;
3274 }
3275 }
3276
upb_symtab_resolve(const upb_symtab * s,const char * base,const char * sym)3277 const upb_def *upb_symtab_resolve(const upb_symtab *s, const char *base,
3278 const char *sym) {
3279 upb_def *ret = upb_resolvename(&s->symtab, base, sym);
3280 return ret;
3281 }
3282
3283 /* TODO(haberman): we need a lot more testing of error conditions. */
symtab_add(upb_symtab * s,upb_def * const * defs,size_t n,void * ref_donor,upb_refcounted * freeze_also,upb_status * status)3284 static bool symtab_add(upb_symtab *s, upb_def *const*defs, size_t n,
3285 void *ref_donor, upb_refcounted *freeze_also,
3286 upb_status *status) {
3287 size_t i;
3288 size_t add_n;
3289 size_t freeze_n;
3290 upb_strtable_iter iter;
3291 upb_refcounted **add_objs = NULL;
3292 upb_def **add_defs = NULL;
3293 size_t add_objs_size;
3294 upb_strtable addtab;
3295
3296 if (n == 0 && !freeze_also) {
3297 return true;
3298 }
3299
3300 if (!upb_strtable_init(&addtab, UPB_CTYPE_PTR)) {
3301 upb_status_seterrmsg(status, "out of memory");
3302 return false;
3303 }
3304
3305 /* Add new defs to our "add" set. */
3306 for (i = 0; i < n; i++) {
3307 upb_def *def = defs[i];
3308 const char *fullname;
3309 upb_fielddef *f;
3310
3311 if (upb_def_isfrozen(def)) {
3312 upb_status_seterrmsg(status, "added defs must be mutable");
3313 goto err;
3314 }
3315 UPB_ASSERT(!upb_def_isfrozen(def));
3316 fullname = upb_def_fullname(def);
3317 if (!fullname) {
3318 upb_status_seterrmsg(
3319 status, "Anonymous defs cannot be added to a symtab");
3320 goto err;
3321 }
3322
3323 f = upb_dyncast_fielddef_mutable(def);
3324
3325 if (f) {
3326 if (!upb_fielddef_containingtypename(f)) {
3327 upb_status_seterrmsg(status,
3328 "Standalone fielddefs must have a containing type "
3329 "(extendee) name set");
3330 goto err;
3331 }
3332 } else {
3333 if (upb_strtable_lookup(&addtab, fullname, NULL)) {
3334 upb_status_seterrf(status, "Conflicting defs named '%s'", fullname);
3335 goto err;
3336 }
3337 if (upb_strtable_lookup(&s->symtab, fullname, NULL)) {
3338 upb_status_seterrf(status, "Symtab already has a def named '%s'",
3339 fullname);
3340 goto err;
3341 }
3342 if (!upb_strtable_insert(&addtab, fullname, upb_value_ptr(def)))
3343 goto oom_err;
3344 upb_def_donateref(def, ref_donor, s);
3345 }
3346
3347 if (upb_dyncast_fielddef_mutable(def)) {
3348 /* TODO(haberman): allow adding extensions attached to files. */
3349 upb_status_seterrf(status, "Can't add extensions to symtab.\n");
3350 goto err;
3351 }
3352 }
3353
3354 /* Now using the table, resolve symbolic references for subdefs. */
3355 upb_strtable_begin(&iter, &addtab);
3356 for (; !upb_strtable_done(&iter); upb_strtable_next(&iter)) {
3357 const char *base;
3358 upb_def *def = upb_value_getptr(upb_strtable_iter_value(&iter));
3359 upb_msgdef *m = upb_dyncast_msgdef_mutable(def);
3360 upb_msg_field_iter j;
3361
3362 if (!m) continue;
3363 /* Type names are resolved relative to the message in which they appear. */
3364 base = upb_msgdef_fullname(m);
3365
3366 for(upb_msg_field_begin(&j, m);
3367 !upb_msg_field_done(&j);
3368 upb_msg_field_next(&j)) {
3369 upb_fielddef *f = upb_msg_iter_field(&j);
3370 const char *name = upb_fielddef_subdefname(f);
3371 if (name && !upb_fielddef_subdef(f)) {
3372 /* Try the lookup in the current set of to-be-added defs first. If not
3373 * there, try existing defs. */
3374 upb_def *subdef = upb_resolvename(&addtab, base, name);
3375 if (subdef == NULL) {
3376 subdef = upb_resolvename(&s->symtab, base, name);
3377 }
3378 if (subdef == NULL) {
3379 upb_status_seterrf(
3380 status, "couldn't resolve name '%s' in message '%s'", name, base);
3381 goto err;
3382 } else if (!upb_fielddef_setsubdef(f, subdef, status)) {
3383 goto err;
3384 }
3385 }
3386 }
3387 }
3388
3389 /* We need an array of the defs in addtab, for passing to
3390 * upb_refcounted_freeze(). */
3391 add_objs_size = upb_strtable_count(&addtab);
3392 if (freeze_also) {
3393 add_objs_size++;
3394 }
3395
3396 add_defs = upb_gmalloc(sizeof(void*) * add_objs_size);
3397 if (add_defs == NULL) goto oom_err;
3398 upb_strtable_begin(&iter, &addtab);
3399 for (add_n = 0; !upb_strtable_done(&iter); upb_strtable_next(&iter)) {
3400 add_defs[add_n++] = upb_value_getptr(upb_strtable_iter_value(&iter));
3401 }
3402
3403 /* Validate defs. */
3404 if (!_upb_def_validate(add_defs, add_n, status)) {
3405 goto err;
3406 }
3407
3408 /* Cheat a little and give the array a new type.
3409 * This is probably undefined behavior, but this code will be deleted soon. */
3410 add_objs = (upb_refcounted**)add_defs;
3411
3412 freeze_n = add_n;
3413 if (freeze_also) {
3414 add_objs[freeze_n++] = freeze_also;
3415 }
3416
3417 if (!upb_refcounted_freeze(add_objs, freeze_n, status,
3418 UPB_MAX_MESSAGE_DEPTH * 2)) {
3419 goto err;
3420 }
3421
3422 /* This must be delayed until all errors have been detected, since error
3423 * recovery code uses this table to cleanup defs. */
3424 upb_strtable_uninit(&addtab);
3425
3426 /* TODO(haberman) we don't properly handle errors after this point (like
3427 * OOM in upb_strtable_insert() below). */
3428 for (i = 0; i < add_n; i++) {
3429 upb_def *def = (upb_def*)add_objs[i];
3430 const char *name = upb_def_fullname(def);
3431 bool success;
3432 success = upb_strtable_insert(&s->symtab, name, upb_value_ptr(def));
3433 UPB_ASSERT(success);
3434 }
3435 upb_gfree(add_defs);
3436 return true;
3437
3438 oom_err:
3439 upb_status_seterrmsg(status, "out of memory");
3440 err: {
3441 /* We need to donate the refs back. */
3442 upb_strtable_begin(&iter, &addtab);
3443 for (; !upb_strtable_done(&iter); upb_strtable_next(&iter)) {
3444 upb_def *def = upb_value_getptr(upb_strtable_iter_value(&iter));
3445 upb_def_donateref(def, s, ref_donor);
3446 }
3447 }
3448 upb_strtable_uninit(&addtab);
3449 upb_gfree(add_defs);
3450 UPB_ASSERT(!upb_ok(status));
3451 return false;
3452 }
3453
upb_symtab_add(upb_symtab * s,upb_def * const * defs,size_t n,void * ref_donor,upb_status * status)3454 bool upb_symtab_add(upb_symtab *s, upb_def *const*defs, size_t n,
3455 void *ref_donor, upb_status *status) {
3456 return symtab_add(s, defs, n, ref_donor, NULL, status);
3457 }
3458
upb_symtab_addfile(upb_symtab * s,upb_filedef * file,upb_status * status)3459 bool upb_symtab_addfile(upb_symtab *s, upb_filedef *file, upb_status *status) {
3460 size_t n;
3461 size_t i;
3462 upb_def **defs;
3463 bool ret;
3464
3465 n = upb_filedef_defcount(file);
3466 if (n == 0) {
3467 return true;
3468 }
3469 defs = upb_gmalloc(sizeof(*defs) * n);
3470
3471 if (defs == NULL) {
3472 upb_status_seterrmsg(status, "Out of memory");
3473 return false;
3474 }
3475
3476 for (i = 0; i < n; i++) {
3477 defs[i] = upb_filedef_mutabledef(file, i);
3478 }
3479
3480 ret = symtab_add(s, defs, n, NULL, upb_filedef_upcast_mutable(file), status);
3481
3482 upb_gfree(defs);
3483 return ret;
3484 }
3485
3486 /* Iteration. */
3487
advance_to_matching(upb_symtab_iter * iter)3488 static void advance_to_matching(upb_symtab_iter *iter) {
3489 if (iter->type == UPB_DEF_ANY)
3490 return;
3491
3492 while (!upb_strtable_done(&iter->iter) &&
3493 iter->type != upb_symtab_iter_def(iter)->type) {
3494 upb_strtable_next(&iter->iter);
3495 }
3496 }
3497
upb_symtab_begin(upb_symtab_iter * iter,const upb_symtab * s,upb_deftype_t type)3498 void upb_symtab_begin(upb_symtab_iter *iter, const upb_symtab *s,
3499 upb_deftype_t type) {
3500 upb_strtable_begin(&iter->iter, &s->symtab);
3501 iter->type = type;
3502 advance_to_matching(iter);
3503 }
3504
upb_symtab_next(upb_symtab_iter * iter)3505 void upb_symtab_next(upb_symtab_iter *iter) {
3506 upb_strtable_next(&iter->iter);
3507 advance_to_matching(iter);
3508 }
3509
upb_symtab_done(const upb_symtab_iter * iter)3510 bool upb_symtab_done(const upb_symtab_iter *iter) {
3511 return upb_strtable_done(&iter->iter);
3512 }
3513
upb_symtab_iter_def(const upb_symtab_iter * iter)3514 const upb_def *upb_symtab_iter_def(const upb_symtab_iter *iter) {
3515 return upb_value_getptr(upb_strtable_iter_value(&iter->iter));
3516 }
3517 /* We encode backwards, to avoid pre-computing lengths (one-pass encode). */
3518
3519
3520 #define UPB_PB_VARINT_MAX_LEN 10
3521 #define CHK(x) do { if (!(x)) { return false; } } while(0)
3522
3523 /* Maps descriptor type -> upb field type. */
3524 static const uint8_t upb_desctype_to_fieldtype2[] = {
3525 UPB_WIRE_TYPE_END_GROUP, /* ENDGROUP */
3526 UPB_TYPE_DOUBLE, /* DOUBLE */
3527 UPB_TYPE_FLOAT, /* FLOAT */
3528 UPB_TYPE_INT64, /* INT64 */
3529 UPB_TYPE_UINT64, /* UINT64 */
3530 UPB_TYPE_INT32, /* INT32 */
3531 UPB_TYPE_UINT64, /* FIXED64 */
3532 UPB_TYPE_UINT32, /* FIXED32 */
3533 UPB_TYPE_BOOL, /* BOOL */
3534 UPB_TYPE_STRING, /* STRING */
3535 UPB_TYPE_MESSAGE, /* GROUP */
3536 UPB_TYPE_MESSAGE, /* MESSAGE */
3537 UPB_TYPE_BYTES, /* BYTES */
3538 UPB_TYPE_UINT32, /* UINT32 */
3539 UPB_TYPE_ENUM, /* ENUM */
3540 UPB_TYPE_INT32, /* SFIXED32 */
3541 UPB_TYPE_INT64, /* SFIXED64 */
3542 UPB_TYPE_INT32, /* SINT32 */
3543 UPB_TYPE_INT64, /* SINT64 */
3544 };
3545
upb_encode_varint(uint64_t val,char * buf)3546 static size_t upb_encode_varint(uint64_t val, char *buf) {
3547 size_t i;
3548 if (val < 128) { buf[0] = val; return 1; }
3549 i = 0;
3550 while (val) {
3551 uint8_t byte = val & 0x7fU;
3552 val >>= 7;
3553 if (val) byte |= 0x80U;
3554 buf[i++] = byte;
3555 }
3556 return i;
3557 }
3558
upb_zzencode_32(int32_t n)3559 static uint32_t upb_zzencode_32(int32_t n) { return (n << 1) ^ (n >> 31); }
upb_zzencode_64(int64_t n)3560 static uint64_t upb_zzencode_64(int64_t n) { return (n << 1) ^ (n >> 63); }
3561
3562 typedef struct {
3563 upb_alloc *alloc;
3564 char *buf, *ptr, *limit;
3565 } upb_encstate;
3566
upb_roundup_pow2(size_t bytes)3567 static size_t upb_roundup_pow2(size_t bytes) {
3568 size_t ret = 128;
3569 while (ret < bytes) {
3570 ret *= 2;
3571 }
3572 return ret;
3573 }
3574
upb_encode_growbuffer(upb_encstate * e,size_t bytes)3575 static bool upb_encode_growbuffer(upb_encstate *e, size_t bytes) {
3576 size_t old_size = e->limit - e->buf;
3577 size_t new_size = upb_roundup_pow2(bytes + (e->limit - e->ptr));
3578 char *new_buf = upb_realloc(e->alloc, e->buf, old_size, new_size);
3579 CHK(new_buf);
3580
3581 /* We want previous data at the end, realloc() put it at the beginning. */
3582 memmove(new_buf + new_size - old_size, e->buf, old_size);
3583
3584 e->ptr = new_buf + new_size - (e->limit - e->ptr);
3585 e->limit = new_buf + new_size;
3586 e->buf = new_buf;
3587 return true;
3588 }
3589
3590 /* Call to ensure that at least "bytes" bytes are available for writing at
3591 * e->ptr. Returns false if the bytes could not be allocated. */
upb_encode_reserve(upb_encstate * e,size_t bytes)3592 static bool upb_encode_reserve(upb_encstate *e, size_t bytes) {
3593 CHK(UPB_LIKELY((size_t)(e->ptr - e->buf) >= bytes) ||
3594 upb_encode_growbuffer(e, bytes));
3595
3596 e->ptr -= bytes;
3597 return true;
3598 }
3599
3600 /* Writes the given bytes to the buffer, handling reserve/advance. */
upb_put_bytes(upb_encstate * e,const void * data,size_t len)3601 static bool upb_put_bytes(upb_encstate *e, const void *data, size_t len) {
3602 CHK(upb_encode_reserve(e, len));
3603 memcpy(e->ptr, data, len);
3604 return true;
3605 }
3606
upb_put_fixed64(upb_encstate * e,uint64_t val)3607 static bool upb_put_fixed64(upb_encstate *e, uint64_t val) {
3608 /* TODO(haberman): byte-swap for big endian. */
3609 return upb_put_bytes(e, &val, sizeof(uint64_t));
3610 }
3611
upb_put_fixed32(upb_encstate * e,uint32_t val)3612 static bool upb_put_fixed32(upb_encstate *e, uint32_t val) {
3613 /* TODO(haberman): byte-swap for big endian. */
3614 return upb_put_bytes(e, &val, sizeof(uint32_t));
3615 }
3616
upb_put_varint(upb_encstate * e,uint64_t val)3617 static bool upb_put_varint(upb_encstate *e, uint64_t val) {
3618 size_t len;
3619 char *start;
3620 CHK(upb_encode_reserve(e, UPB_PB_VARINT_MAX_LEN));
3621 len = upb_encode_varint(val, e->ptr);
3622 start = e->ptr + UPB_PB_VARINT_MAX_LEN - len;
3623 memmove(start, e->ptr, len);
3624 e->ptr = start;
3625 return true;
3626 }
3627
upb_put_double(upb_encstate * e,double d)3628 static bool upb_put_double(upb_encstate *e, double d) {
3629 uint64_t u64;
3630 UPB_ASSERT(sizeof(double) == sizeof(uint64_t));
3631 memcpy(&u64, &d, sizeof(uint64_t));
3632 return upb_put_fixed64(e, u64);
3633 }
3634
upb_put_float(upb_encstate * e,float d)3635 static bool upb_put_float(upb_encstate *e, float d) {
3636 uint32_t u32;
3637 UPB_ASSERT(sizeof(float) == sizeof(uint32_t));
3638 memcpy(&u32, &d, sizeof(uint32_t));
3639 return upb_put_fixed32(e, u32);
3640 }
3641
upb_readcase(const char * msg,const upb_msglayout_field * f)3642 static uint32_t upb_readcase(const char *msg, const upb_msglayout_field *f) {
3643 uint32_t ret;
3644 uint32_t offset = ~f->presence;
3645 memcpy(&ret, msg + offset, sizeof(ret));
3646 return ret;
3647 }
3648
upb_readhasbit(const char * msg,const upb_msglayout_field * f)3649 static bool upb_readhasbit(const char *msg, const upb_msglayout_field *f) {
3650 uint32_t hasbit = f->presence;
3651 UPB_ASSERT(f->presence > 0);
3652 return msg[hasbit / 8] & (1 << (hasbit % 8));
3653 }
3654
upb_put_tag(upb_encstate * e,int field_number,int wire_type)3655 static bool upb_put_tag(upb_encstate *e, int field_number, int wire_type) {
3656 return upb_put_varint(e, (field_number << 3) | wire_type);
3657 }
3658
upb_put_fixedarray(upb_encstate * e,const upb_array * arr,size_t size)3659 static bool upb_put_fixedarray(upb_encstate *e, const upb_array *arr,
3660 size_t size) {
3661 size_t bytes = arr->len * size;
3662 return upb_put_bytes(e, arr->data, bytes) && upb_put_varint(e, bytes);
3663 }
3664
3665 bool upb_encode_message(upb_encstate *e, const char *msg,
3666 const upb_msglayout *m, size_t *size);
3667
upb_encode_array(upb_encstate * e,const char * field_mem,const upb_msglayout * m,const upb_msglayout_field * f)3668 static bool upb_encode_array(upb_encstate *e, const char *field_mem,
3669 const upb_msglayout *m,
3670 const upb_msglayout_field *f) {
3671 const upb_array *arr = *(const upb_array**)field_mem;
3672
3673 if (arr == NULL || arr->len == 0) {
3674 return true;
3675 }
3676
3677 UPB_ASSERT(arr->type == upb_desctype_to_fieldtype2[f->descriptortype]);
3678
3679 #define VARINT_CASE(ctype, encode) { \
3680 ctype *start = arr->data; \
3681 ctype *ptr = start + arr->len; \
3682 size_t pre_len = e->limit - e->ptr; \
3683 do { \
3684 ptr--; \
3685 CHK(upb_put_varint(e, encode)); \
3686 } while (ptr != start); \
3687 CHK(upb_put_varint(e, e->limit - e->ptr - pre_len)); \
3688 } \
3689 break; \
3690 do { ; } while(0)
3691
3692 switch (f->descriptortype) {
3693 case UPB_DESCRIPTOR_TYPE_DOUBLE:
3694 CHK(upb_put_fixedarray(e, arr, sizeof(double)));
3695 break;
3696 case UPB_DESCRIPTOR_TYPE_FLOAT:
3697 CHK(upb_put_fixedarray(e, arr, sizeof(float)));
3698 break;
3699 case UPB_DESCRIPTOR_TYPE_SFIXED64:
3700 case UPB_DESCRIPTOR_TYPE_FIXED64:
3701 CHK(upb_put_fixedarray(e, arr, sizeof(uint64_t)));
3702 break;
3703 case UPB_DESCRIPTOR_TYPE_FIXED32:
3704 case UPB_DESCRIPTOR_TYPE_SFIXED32:
3705 CHK(upb_put_fixedarray(e, arr, sizeof(uint32_t)));
3706 break;
3707 case UPB_DESCRIPTOR_TYPE_INT64:
3708 case UPB_DESCRIPTOR_TYPE_UINT64:
3709 VARINT_CASE(uint64_t, *ptr);
3710 case UPB_DESCRIPTOR_TYPE_UINT32:
3711 VARINT_CASE(uint32_t, *ptr);
3712 case UPB_DESCRIPTOR_TYPE_INT32:
3713 case UPB_DESCRIPTOR_TYPE_ENUM:
3714 VARINT_CASE(int32_t, (int64_t)*ptr);
3715 case UPB_DESCRIPTOR_TYPE_BOOL:
3716 VARINT_CASE(bool, *ptr);
3717 case UPB_DESCRIPTOR_TYPE_SINT32:
3718 VARINT_CASE(int32_t, upb_zzencode_32(*ptr));
3719 case UPB_DESCRIPTOR_TYPE_SINT64:
3720 VARINT_CASE(int64_t, upb_zzencode_64(*ptr));
3721 case UPB_DESCRIPTOR_TYPE_STRING:
3722 case UPB_DESCRIPTOR_TYPE_BYTES: {
3723 upb_strview *start = arr->data;
3724 upb_strview *ptr = start + arr->len;
3725 do {
3726 ptr--;
3727 CHK(upb_put_bytes(e, ptr->data, ptr->size) &&
3728 upb_put_varint(e, ptr->size) &&
3729 upb_put_tag(e, f->number, UPB_WIRE_TYPE_DELIMITED));
3730 } while (ptr != start);
3731 return true;
3732 }
3733 case UPB_DESCRIPTOR_TYPE_GROUP: {
3734 void **start = arr->data;
3735 void **ptr = start + arr->len;
3736 const upb_msglayout *subm = m->submsgs[f->submsg_index];
3737 do {
3738 size_t size;
3739 ptr--;
3740 CHK(upb_put_tag(e, f->number, UPB_WIRE_TYPE_END_GROUP) &&
3741 upb_encode_message(e, *ptr, subm, &size) &&
3742 upb_put_tag(e, f->number, UPB_WIRE_TYPE_START_GROUP));
3743 } while (ptr != start);
3744 return true;
3745 }
3746 case UPB_DESCRIPTOR_TYPE_MESSAGE: {
3747 void **start = arr->data;
3748 void **ptr = start + arr->len;
3749 const upb_msglayout *subm = m->submsgs[f->submsg_index];
3750 do {
3751 size_t size;
3752 ptr--;
3753 CHK(upb_encode_message(e, *ptr, subm, &size) &&
3754 upb_put_varint(e, size) &&
3755 upb_put_tag(e, f->number, UPB_WIRE_TYPE_DELIMITED));
3756 } while (ptr != start);
3757 return true;
3758 }
3759 }
3760 #undef VARINT_CASE
3761
3762 /* We encode all primitive arrays as packed, regardless of what was specified
3763 * in the .proto file. Could special case 1-sized arrays. */
3764 CHK(upb_put_tag(e, f->number, UPB_WIRE_TYPE_DELIMITED));
3765 return true;
3766 }
3767
upb_encode_scalarfield(upb_encstate * e,const char * field_mem,const upb_msglayout * m,const upb_msglayout_field * f,bool skip_zero_value)3768 static bool upb_encode_scalarfield(upb_encstate *e, const char *field_mem,
3769 const upb_msglayout *m,
3770 const upb_msglayout_field *f,
3771 bool skip_zero_value) {
3772 #define CASE(ctype, type, wire_type, encodeval) do { \
3773 ctype val = *(ctype*)field_mem; \
3774 if (skip_zero_value && val == 0) { \
3775 return true; \
3776 } \
3777 return upb_put_ ## type(e, encodeval) && \
3778 upb_put_tag(e, f->number, wire_type); \
3779 } while(0)
3780
3781 switch (f->descriptortype) {
3782 case UPB_DESCRIPTOR_TYPE_DOUBLE:
3783 CASE(double, double, UPB_WIRE_TYPE_64BIT, val);
3784 case UPB_DESCRIPTOR_TYPE_FLOAT:
3785 CASE(float, float, UPB_WIRE_TYPE_32BIT, val);
3786 case UPB_DESCRIPTOR_TYPE_INT64:
3787 case UPB_DESCRIPTOR_TYPE_UINT64:
3788 CASE(uint64_t, varint, UPB_WIRE_TYPE_VARINT, val);
3789 case UPB_DESCRIPTOR_TYPE_UINT32:
3790 CASE(uint32_t, varint, UPB_WIRE_TYPE_VARINT, val);
3791 case UPB_DESCRIPTOR_TYPE_INT32:
3792 case UPB_DESCRIPTOR_TYPE_ENUM:
3793 CASE(int32_t, varint, UPB_WIRE_TYPE_VARINT, (int64_t)val);
3794 case UPB_DESCRIPTOR_TYPE_SFIXED64:
3795 case UPB_DESCRIPTOR_TYPE_FIXED64:
3796 CASE(uint64_t, fixed64, UPB_WIRE_TYPE_64BIT, val);
3797 case UPB_DESCRIPTOR_TYPE_FIXED32:
3798 case UPB_DESCRIPTOR_TYPE_SFIXED32:
3799 CASE(uint32_t, fixed32, UPB_WIRE_TYPE_32BIT, val);
3800 case UPB_DESCRIPTOR_TYPE_BOOL:
3801 CASE(bool, varint, UPB_WIRE_TYPE_VARINT, val);
3802 case UPB_DESCRIPTOR_TYPE_SINT32:
3803 CASE(int32_t, varint, UPB_WIRE_TYPE_VARINT, upb_zzencode_32(val));
3804 case UPB_DESCRIPTOR_TYPE_SINT64:
3805 CASE(int64_t, varint, UPB_WIRE_TYPE_VARINT, upb_zzencode_64(val));
3806 case UPB_DESCRIPTOR_TYPE_STRING:
3807 case UPB_DESCRIPTOR_TYPE_BYTES: {
3808 upb_strview view = *(upb_strview*)field_mem;
3809 if (skip_zero_value && view.size == 0) {
3810 return true;
3811 }
3812 return upb_put_bytes(e, view.data, view.size) &&
3813 upb_put_varint(e, view.size) &&
3814 upb_put_tag(e, f->number, UPB_WIRE_TYPE_DELIMITED);
3815 }
3816 case UPB_DESCRIPTOR_TYPE_GROUP: {
3817 size_t size;
3818 void *submsg = *(void **)field_mem;
3819 const upb_msglayout *subm = m->submsgs[f->submsg_index];
3820 if (submsg == NULL) {
3821 return true;
3822 }
3823 return upb_put_tag(e, f->number, UPB_WIRE_TYPE_END_GROUP) &&
3824 upb_encode_message(e, submsg, subm, &size) &&
3825 upb_put_tag(e, f->number, UPB_WIRE_TYPE_START_GROUP);
3826 }
3827 case UPB_DESCRIPTOR_TYPE_MESSAGE: {
3828 size_t size;
3829 void *submsg = *(void **)field_mem;
3830 const upb_msglayout *subm = m->submsgs[f->submsg_index];
3831 if (submsg == NULL) {
3832 return true;
3833 }
3834 return upb_encode_message(e, submsg, subm, &size) &&
3835 upb_put_varint(e, size) &&
3836 upb_put_tag(e, f->number, UPB_WIRE_TYPE_DELIMITED);
3837 }
3838 }
3839 #undef CASE
3840 UPB_UNREACHABLE();
3841 }
3842
upb_encode_message(upb_encstate * e,const char * msg,const upb_msglayout * m,size_t * size)3843 bool upb_encode_message(upb_encstate *e, const char *msg,
3844 const upb_msglayout *m, size_t *size) {
3845 int i;
3846 size_t pre_len = e->limit - e->ptr;
3847 const char *unknown;
3848 size_t unknown_size;
3849
3850 for (i = m->field_count - 1; i >= 0; i--) {
3851 const upb_msglayout_field *f = &m->fields[i];
3852
3853 if (f->label == UPB_LABEL_REPEATED) {
3854 CHK(upb_encode_array(e, msg + f->offset, m, f));
3855 } else {
3856 bool skip_empty = false;
3857 if (f->presence == 0) {
3858 /* Proto3 presence. */
3859 skip_empty = true;
3860 } else if (f->presence > 0) {
3861 /* Proto2 presence: hasbit. */
3862 if (!upb_readhasbit(msg, f)) {
3863 continue;
3864 }
3865 } else {
3866 /* Field is in a oneof. */
3867 if (upb_readcase(msg, f) != f->number) {
3868 continue;
3869 }
3870 }
3871 CHK(upb_encode_scalarfield(e, msg + f->offset, m, f, skip_empty));
3872 }
3873 }
3874
3875 unknown = upb_msg_getunknown(msg, &unknown_size);
3876
3877 if (unknown) {
3878 upb_put_bytes(e, unknown, unknown_size);
3879 }
3880
3881 *size = (e->limit - e->ptr) - pre_len;
3882 return true;
3883 }
3884
upb_encode(const void * msg,const upb_msglayout * m,upb_arena * arena,size_t * size)3885 char *upb_encode(const void *msg, const upb_msglayout *m, upb_arena *arena,
3886 size_t *size) {
3887 upb_encstate e;
3888 e.alloc = upb_arena_alloc(arena);
3889 e.buf = NULL;
3890 e.limit = NULL;
3891 e.ptr = NULL;
3892
3893 if (!upb_encode_message(&e, msg, m, size)) {
3894 *size = 0;
3895 return NULL;
3896 }
3897
3898 *size = e.limit - e.ptr;
3899
3900 if (*size == 0) {
3901 static char ch;
3902 return &ch;
3903 } else {
3904 UPB_ASSERT(e.ptr);
3905 return e.ptr;
3906 }
3907 }
3908
3909 #undef CHK
3910 /*
3911 ** TODO(haberman): it's unclear whether a lot of the consistency checks should
3912 ** UPB_ASSERT() or return false.
3913 */
3914
3915
3916 #include <string.h>
3917
3918
upb_calloc(size_t size)3919 static void *upb_calloc(size_t size) {
3920 void *mem = upb_gmalloc(size);
3921 if (mem) {
3922 memset(mem, 0, size);
3923 }
3924 return mem;
3925 }
3926
3927 /* Defined for the sole purpose of having a unique pointer value for
3928 * UPB_NO_CLOSURE. */
3929 char _upb_noclosure;
3930
freehandlers(upb_refcounted * r)3931 static void freehandlers(upb_refcounted *r) {
3932 upb_handlers *h = (upb_handlers*)r;
3933
3934 upb_inttable_iter i;
3935 upb_inttable_begin(&i, &h->cleanup_);
3936 for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
3937 void *val = (void*)upb_inttable_iter_key(&i);
3938 upb_value func_val = upb_inttable_iter_value(&i);
3939 upb_handlerfree *func = upb_value_getfptr(func_val);
3940 func(val);
3941 }
3942
3943 upb_inttable_uninit(&h->cleanup_);
3944 upb_msgdef_unref(h->msg, h);
3945 upb_gfree(h->sub);
3946 upb_gfree(h);
3947 }
3948
visithandlers(const upb_refcounted * r,upb_refcounted_visit * visit,void * closure)3949 static void visithandlers(const upb_refcounted *r, upb_refcounted_visit *visit,
3950 void *closure) {
3951 const upb_handlers *h = (const upb_handlers*)r;
3952 upb_msg_field_iter i;
3953 for(upb_msg_field_begin(&i, h->msg);
3954 !upb_msg_field_done(&i);
3955 upb_msg_field_next(&i)) {
3956 upb_fielddef *f = upb_msg_iter_field(&i);
3957 const upb_handlers *sub;
3958 if (!upb_fielddef_issubmsg(f)) continue;
3959 sub = upb_handlers_getsubhandlers(h, f);
3960 if (sub) visit(r, upb_handlers_upcast(sub), closure);
3961 }
3962 }
3963
3964 static const struct upb_refcounted_vtbl vtbl = {visithandlers, freehandlers};
3965
3966 typedef struct {
3967 upb_inttable tab; /* maps upb_msgdef* -> upb_handlers*. */
3968 upb_handlers_callback *callback;
3969 const void *closure;
3970 } dfs_state;
3971
3972 /* TODO(haberman): discard upb_handlers* objects that do not actually have any
3973 * handlers set and cannot reach any upb_handlers* object that does. This is
3974 * slightly tricky to do correctly. */
newformsg(const upb_msgdef * m,const void * owner,dfs_state * s)3975 static upb_handlers *newformsg(const upb_msgdef *m, const void *owner,
3976 dfs_state *s) {
3977 upb_msg_field_iter i;
3978 upb_handlers *h = upb_handlers_new(m, owner);
3979 if (!h) return NULL;
3980 if (!upb_inttable_insertptr(&s->tab, m, upb_value_ptr(h))) goto oom;
3981
3982 s->callback(s->closure, h);
3983
3984 /* For each submessage field, get or create a handlers object and set it as
3985 * the subhandlers. */
3986 for(upb_msg_field_begin(&i, m);
3987 !upb_msg_field_done(&i);
3988 upb_msg_field_next(&i)) {
3989 upb_fielddef *f = upb_msg_iter_field(&i);
3990 const upb_msgdef *subdef;
3991 upb_value subm_ent;
3992
3993 if (!upb_fielddef_issubmsg(f)) continue;
3994
3995 subdef = upb_downcast_msgdef(upb_fielddef_subdef(f));
3996 if (upb_inttable_lookupptr(&s->tab, subdef, &subm_ent)) {
3997 upb_handlers_setsubhandlers(h, f, upb_value_getptr(subm_ent));
3998 } else {
3999 upb_handlers *sub_mh = newformsg(subdef, &sub_mh, s);
4000 if (!sub_mh) goto oom;
4001 upb_handlers_setsubhandlers(h, f, sub_mh);
4002 upb_handlers_unref(sub_mh, &sub_mh);
4003 }
4004 }
4005 return h;
4006
4007 oom:
4008 upb_handlers_unref(h, owner);
4009 return NULL;
4010 }
4011
4012 /* Given a selector for a STARTSUBMSG handler, resolves to a pointer to the
4013 * subhandlers for this submessage field. */
4014 #define SUBH(h, selector) (h->sub[selector])
4015
4016 /* The selector for a submessage field is the field index. */
4017 #define SUBH_F(h, f) SUBH(h, f->index_)
4018
trygetsel(upb_handlers * h,const upb_fielddef * f,upb_handlertype_t type)4019 static int32_t trygetsel(upb_handlers *h, const upb_fielddef *f,
4020 upb_handlertype_t type) {
4021 upb_selector_t sel;
4022 UPB_ASSERT(!upb_handlers_isfrozen(h));
4023 if (upb_handlers_msgdef(h) != upb_fielddef_containingtype(f)) {
4024 upb_status_seterrf(
4025 &h->status_, "type mismatch: field %s does not belong to message %s",
4026 upb_fielddef_name(f), upb_msgdef_fullname(upb_handlers_msgdef(h)));
4027 return -1;
4028 }
4029 if (!upb_handlers_getselector(f, type, &sel)) {
4030 upb_status_seterrf(
4031 &h->status_,
4032 "type mismatch: cannot register handler type %d for field %s",
4033 type, upb_fielddef_name(f));
4034 return -1;
4035 }
4036 return sel;
4037 }
4038
handlers_getsel(upb_handlers * h,const upb_fielddef * f,upb_handlertype_t type)4039 static upb_selector_t handlers_getsel(upb_handlers *h, const upb_fielddef *f,
4040 upb_handlertype_t type) {
4041 int32_t sel = trygetsel(h, f, type);
4042 UPB_ASSERT(sel >= 0);
4043 return sel;
4044 }
4045
returntype(upb_handlers * h,const upb_fielddef * f,upb_handlertype_t type)4046 static const void **returntype(upb_handlers *h, const upb_fielddef *f,
4047 upb_handlertype_t type) {
4048 return &h->table[handlers_getsel(h, f, type)].attr.return_closure_type_;
4049 }
4050
doset(upb_handlers * h,int32_t sel,const upb_fielddef * f,upb_handlertype_t type,upb_func * func,upb_handlerattr * attr)4051 static bool doset(upb_handlers *h, int32_t sel, const upb_fielddef *f,
4052 upb_handlertype_t type, upb_func *func,
4053 upb_handlerattr *attr) {
4054 upb_handlerattr set_attr = UPB_HANDLERATTR_INITIALIZER;
4055 const void *closure_type;
4056 const void **context_closure_type;
4057
4058 UPB_ASSERT(!upb_handlers_isfrozen(h));
4059
4060 if (sel < 0) {
4061 upb_status_seterrmsg(&h->status_,
4062 "incorrect handler type for this field.");
4063 return false;
4064 }
4065
4066 if (h->table[sel].func) {
4067 upb_status_seterrmsg(&h->status_,
4068 "cannot change handler once it has been set.");
4069 return false;
4070 }
4071
4072 if (attr) {
4073 set_attr = *attr;
4074 }
4075
4076 /* Check that the given closure type matches the closure type that has been
4077 * established for this context (if any). */
4078 closure_type = upb_handlerattr_closuretype(&set_attr);
4079
4080 if (type == UPB_HANDLER_STRING) {
4081 context_closure_type = returntype(h, f, UPB_HANDLER_STARTSTR);
4082 } else if (f && upb_fielddef_isseq(f) &&
4083 type != UPB_HANDLER_STARTSEQ &&
4084 type != UPB_HANDLER_ENDSEQ) {
4085 context_closure_type = returntype(h, f, UPB_HANDLER_STARTSEQ);
4086 } else {
4087 context_closure_type = &h->top_closure_type;
4088 }
4089
4090 if (closure_type && *context_closure_type &&
4091 closure_type != *context_closure_type) {
4092 /* TODO(haberman): better message for debugging. */
4093 if (f) {
4094 upb_status_seterrf(&h->status_,
4095 "closure type does not match for field %s",
4096 upb_fielddef_name(f));
4097 } else {
4098 upb_status_seterrmsg(
4099 &h->status_, "closure type does not match for message-level handler");
4100 }
4101 return false;
4102 }
4103
4104 if (closure_type)
4105 *context_closure_type = closure_type;
4106
4107 /* If this is a STARTSEQ or STARTSTR handler, check that the returned pointer
4108 * matches any pre-existing expectations about what type is expected. */
4109 if (type == UPB_HANDLER_STARTSEQ || type == UPB_HANDLER_STARTSTR) {
4110 const void *return_type = upb_handlerattr_returnclosuretype(&set_attr);
4111 const void *table_return_type =
4112 upb_handlerattr_returnclosuretype(&h->table[sel].attr);
4113 if (return_type && table_return_type && return_type != table_return_type) {
4114 upb_status_seterrmsg(&h->status_, "closure return type does not match");
4115 return false;
4116 }
4117
4118 if (table_return_type && !return_type)
4119 upb_handlerattr_setreturnclosuretype(&set_attr, table_return_type);
4120 }
4121
4122 h->table[sel].func = (upb_func*)func;
4123 h->table[sel].attr = set_attr;
4124 return true;
4125 }
4126
4127 /* Returns the effective closure type for this handler (which will propagate
4128 * from outer frames if this frame has no START* handler). Not implemented for
4129 * UPB_HANDLER_STRING at the moment since this is not needed. Returns NULL is
4130 * the effective closure type is unspecified (either no handler was registered
4131 * to specify it or the handler that was registered did not specify the closure
4132 * type). */
effective_closure_type(upb_handlers * h,const upb_fielddef * f,upb_handlertype_t type)4133 const void *effective_closure_type(upb_handlers *h, const upb_fielddef *f,
4134 upb_handlertype_t type) {
4135 const void *ret;
4136 upb_selector_t sel;
4137
4138 UPB_ASSERT(type != UPB_HANDLER_STRING);
4139 ret = h->top_closure_type;
4140
4141 if (upb_fielddef_isseq(f) &&
4142 type != UPB_HANDLER_STARTSEQ &&
4143 type != UPB_HANDLER_ENDSEQ &&
4144 h->table[sel = handlers_getsel(h, f, UPB_HANDLER_STARTSEQ)].func) {
4145 ret = upb_handlerattr_returnclosuretype(&h->table[sel].attr);
4146 }
4147
4148 if (type == UPB_HANDLER_STRING &&
4149 h->table[sel = handlers_getsel(h, f, UPB_HANDLER_STARTSTR)].func) {
4150 ret = upb_handlerattr_returnclosuretype(&h->table[sel].attr);
4151 }
4152
4153 /* The effective type of the submessage; not used yet.
4154 * if (type == SUBMESSAGE &&
4155 * h->table[sel = handlers_getsel(h, f, UPB_HANDLER_STARTSUBMSG)].func) {
4156 * ret = upb_handlerattr_returnclosuretype(&h->table[sel].attr);
4157 * } */
4158
4159 return ret;
4160 }
4161
4162 /* Checks whether the START* handler specified by f & type is missing even
4163 * though it is required to convert the established type of an outer frame
4164 * ("closure_type") into the established type of an inner frame (represented in
4165 * the return closure type of this handler's attr. */
checkstart(upb_handlers * h,const upb_fielddef * f,upb_handlertype_t type,upb_status * status)4166 bool checkstart(upb_handlers *h, const upb_fielddef *f, upb_handlertype_t type,
4167 upb_status *status) {
4168 const void *closure_type;
4169 const upb_handlerattr *attr;
4170 const void *return_closure_type;
4171
4172 upb_selector_t sel = handlers_getsel(h, f, type);
4173 if (h->table[sel].func) return true;
4174 closure_type = effective_closure_type(h, f, type);
4175 attr = &h->table[sel].attr;
4176 return_closure_type = upb_handlerattr_returnclosuretype(attr);
4177 if (closure_type && return_closure_type &&
4178 closure_type != return_closure_type) {
4179 upb_status_seterrf(status,
4180 "expected start handler to return sub type for field %f",
4181 upb_fielddef_name(f));
4182 return false;
4183 }
4184 return true;
4185 }
4186
4187 /* Public interface ***********************************************************/
4188
upb_handlers_new(const upb_msgdef * md,const void * owner)4189 upb_handlers *upb_handlers_new(const upb_msgdef *md, const void *owner) {
4190 int extra;
4191 upb_handlers *h;
4192
4193 UPB_ASSERT(upb_msgdef_isfrozen(md));
4194
4195 extra = sizeof(upb_handlers_tabent) * (md->selector_count - 1);
4196 h = upb_calloc(sizeof(*h) + extra);
4197 if (!h) return NULL;
4198
4199 h->msg = md;
4200 upb_msgdef_ref(h->msg, h);
4201 upb_status_clear(&h->status_);
4202
4203 if (md->submsg_field_count > 0) {
4204 h->sub = upb_calloc(md->submsg_field_count * sizeof(*h->sub));
4205 if (!h->sub) goto oom;
4206 } else {
4207 h->sub = 0;
4208 }
4209
4210 if (!upb_refcounted_init(upb_handlers_upcast_mutable(h), &vtbl, owner))
4211 goto oom;
4212 if (!upb_inttable_init(&h->cleanup_, UPB_CTYPE_FPTR)) goto oom;
4213
4214 /* calloc() above initialized all handlers to NULL. */
4215 return h;
4216
4217 oom:
4218 freehandlers(upb_handlers_upcast_mutable(h));
4219 return NULL;
4220 }
4221
upb_handlers_newfrozen(const upb_msgdef * m,const void * owner,upb_handlers_callback * callback,const void * closure)4222 const upb_handlers *upb_handlers_newfrozen(const upb_msgdef *m,
4223 const void *owner,
4224 upb_handlers_callback *callback,
4225 const void *closure) {
4226 dfs_state state;
4227 upb_handlers *ret;
4228 bool ok;
4229 upb_refcounted *r;
4230
4231 state.callback = callback;
4232 state.closure = closure;
4233 if (!upb_inttable_init(&state.tab, UPB_CTYPE_PTR)) return NULL;
4234
4235 ret = newformsg(m, owner, &state);
4236
4237 upb_inttable_uninit(&state.tab);
4238 if (!ret) return NULL;
4239
4240 r = upb_handlers_upcast_mutable(ret);
4241 ok = upb_refcounted_freeze(&r, 1, NULL, UPB_MAX_HANDLER_DEPTH);
4242 UPB_ASSERT(ok);
4243
4244 return ret;
4245 }
4246
upb_handlers_status(upb_handlers * h)4247 const upb_status *upb_handlers_status(upb_handlers *h) {
4248 UPB_ASSERT(!upb_handlers_isfrozen(h));
4249 return &h->status_;
4250 }
4251
upb_handlers_clearerr(upb_handlers * h)4252 void upb_handlers_clearerr(upb_handlers *h) {
4253 UPB_ASSERT(!upb_handlers_isfrozen(h));
4254 upb_status_clear(&h->status_);
4255 }
4256
4257 #define SETTER(name, handlerctype, handlertype) \
4258 bool upb_handlers_set ## name(upb_handlers *h, const upb_fielddef *f, \
4259 handlerctype func, upb_handlerattr *attr) { \
4260 int32_t sel = trygetsel(h, f, handlertype); \
4261 return doset(h, sel, f, handlertype, (upb_func*)func, attr); \
4262 }
4263
SETTER(int32,upb_int32_handlerfunc *,UPB_HANDLER_INT32)4264 SETTER(int32, upb_int32_handlerfunc*, UPB_HANDLER_INT32)
4265 SETTER(int64, upb_int64_handlerfunc*, UPB_HANDLER_INT64)
4266 SETTER(uint32, upb_uint32_handlerfunc*, UPB_HANDLER_UINT32)
4267 SETTER(uint64, upb_uint64_handlerfunc*, UPB_HANDLER_UINT64)
4268 SETTER(float, upb_float_handlerfunc*, UPB_HANDLER_FLOAT)
4269 SETTER(double, upb_double_handlerfunc*, UPB_HANDLER_DOUBLE)
4270 SETTER(bool, upb_bool_handlerfunc*, UPB_HANDLER_BOOL)
4271 SETTER(startstr, upb_startstr_handlerfunc*, UPB_HANDLER_STARTSTR)
4272 SETTER(string, upb_string_handlerfunc*, UPB_HANDLER_STRING)
4273 SETTER(endstr, upb_endfield_handlerfunc*, UPB_HANDLER_ENDSTR)
4274 SETTER(startseq, upb_startfield_handlerfunc*, UPB_HANDLER_STARTSEQ)
4275 SETTER(startsubmsg, upb_startfield_handlerfunc*, UPB_HANDLER_STARTSUBMSG)
4276 SETTER(endsubmsg, upb_endfield_handlerfunc*, UPB_HANDLER_ENDSUBMSG)
4277 SETTER(endseq, upb_endfield_handlerfunc*, UPB_HANDLER_ENDSEQ)
4278
4279 #undef SETTER
4280
4281 bool upb_handlers_setunknown(upb_handlers *h, upb_unknown_handlerfunc *func,
4282 upb_handlerattr *attr) {
4283 return doset(h, UPB_UNKNOWN_SELECTOR, NULL, UPB_HANDLER_INT32,
4284 (upb_func *)func, attr);
4285 }
4286
upb_handlers_setstartmsg(upb_handlers * h,upb_startmsg_handlerfunc * func,upb_handlerattr * attr)4287 bool upb_handlers_setstartmsg(upb_handlers *h, upb_startmsg_handlerfunc *func,
4288 upb_handlerattr *attr) {
4289 return doset(h, UPB_STARTMSG_SELECTOR, NULL, UPB_HANDLER_INT32,
4290 (upb_func *)func, attr);
4291 }
4292
upb_handlers_setendmsg(upb_handlers * h,upb_endmsg_handlerfunc * func,upb_handlerattr * attr)4293 bool upb_handlers_setendmsg(upb_handlers *h, upb_endmsg_handlerfunc *func,
4294 upb_handlerattr *attr) {
4295 UPB_ASSERT(!upb_handlers_isfrozen(h));
4296 return doset(h, UPB_ENDMSG_SELECTOR, NULL, UPB_HANDLER_INT32,
4297 (upb_func *)func, attr);
4298 }
4299
upb_handlers_setsubhandlers(upb_handlers * h,const upb_fielddef * f,const upb_handlers * sub)4300 bool upb_handlers_setsubhandlers(upb_handlers *h, const upb_fielddef *f,
4301 const upb_handlers *sub) {
4302 UPB_ASSERT(sub);
4303 UPB_ASSERT(!upb_handlers_isfrozen(h));
4304 UPB_ASSERT(upb_fielddef_issubmsg(f));
4305 if (SUBH_F(h, f)) return false; /* Can't reset. */
4306 if (upb_msgdef_upcast(upb_handlers_msgdef(sub)) != upb_fielddef_subdef(f)) {
4307 return false;
4308 }
4309 SUBH_F(h, f) = sub;
4310 upb_ref2(sub, h);
4311 return true;
4312 }
4313
upb_handlers_getsubhandlers(const upb_handlers * h,const upb_fielddef * f)4314 const upb_handlers *upb_handlers_getsubhandlers(const upb_handlers *h,
4315 const upb_fielddef *f) {
4316 UPB_ASSERT(upb_fielddef_issubmsg(f));
4317 return SUBH_F(h, f);
4318 }
4319
upb_handlers_getattr(const upb_handlers * h,upb_selector_t sel,upb_handlerattr * attr)4320 bool upb_handlers_getattr(const upb_handlers *h, upb_selector_t sel,
4321 upb_handlerattr *attr) {
4322 if (!upb_handlers_gethandler(h, sel))
4323 return false;
4324 *attr = h->table[sel].attr;
4325 return true;
4326 }
4327
upb_handlers_getsubhandlers_sel(const upb_handlers * h,upb_selector_t sel)4328 const upb_handlers *upb_handlers_getsubhandlers_sel(const upb_handlers *h,
4329 upb_selector_t sel) {
4330 /* STARTSUBMSG selector in sel is the field's selector base. */
4331 return SUBH(h, sel - UPB_STATIC_SELECTOR_COUNT);
4332 }
4333
upb_handlers_msgdef(const upb_handlers * h)4334 const upb_msgdef *upb_handlers_msgdef(const upb_handlers *h) { return h->msg; }
4335
upb_handlers_addcleanup(upb_handlers * h,void * p,upb_handlerfree * func)4336 bool upb_handlers_addcleanup(upb_handlers *h, void *p, upb_handlerfree *func) {
4337 bool ok;
4338 if (upb_inttable_lookupptr(&h->cleanup_, p, NULL)) {
4339 return false;
4340 }
4341 ok = upb_inttable_insertptr(&h->cleanup_, p, upb_value_fptr(func));
4342 UPB_ASSERT(ok);
4343 return true;
4344 }
4345
4346
4347 /* "Static" methods ***********************************************************/
4348
upb_handlers_freeze(upb_handlers * const * handlers,int n,upb_status * s)4349 bool upb_handlers_freeze(upb_handlers *const*handlers, int n, upb_status *s) {
4350 /* TODO: verify we have a transitive closure. */
4351 int i;
4352 for (i = 0; i < n; i++) {
4353 upb_msg_field_iter j;
4354 upb_handlers *h = handlers[i];
4355
4356 if (!upb_ok(&h->status_)) {
4357 upb_status_seterrf(s, "handlers for message %s had error status: %s",
4358 upb_msgdef_fullname(upb_handlers_msgdef(h)),
4359 upb_status_errmsg(&h->status_));
4360 return false;
4361 }
4362
4363 /* Check that there are no closure mismatches due to missing Start* handlers
4364 * or subhandlers with different type-level types. */
4365 for(upb_msg_field_begin(&j, h->msg);
4366 !upb_msg_field_done(&j);
4367 upb_msg_field_next(&j)) {
4368
4369 const upb_fielddef *f = upb_msg_iter_field(&j);
4370 if (upb_fielddef_isseq(f)) {
4371 if (!checkstart(h, f, UPB_HANDLER_STARTSEQ, s))
4372 return false;
4373 }
4374
4375 if (upb_fielddef_isstring(f)) {
4376 if (!checkstart(h, f, UPB_HANDLER_STARTSTR, s))
4377 return false;
4378 }
4379
4380 if (upb_fielddef_issubmsg(f)) {
4381 bool hashandler = false;
4382 if (upb_handlers_gethandler(
4383 h, handlers_getsel(h, f, UPB_HANDLER_STARTSUBMSG)) ||
4384 upb_handlers_gethandler(
4385 h, handlers_getsel(h, f, UPB_HANDLER_ENDSUBMSG))) {
4386 hashandler = true;
4387 }
4388
4389 if (upb_fielddef_isseq(f) &&
4390 (upb_handlers_gethandler(
4391 h, handlers_getsel(h, f, UPB_HANDLER_STARTSEQ)) ||
4392 upb_handlers_gethandler(
4393 h, handlers_getsel(h, f, UPB_HANDLER_ENDSEQ)))) {
4394 hashandler = true;
4395 }
4396
4397 if (hashandler && !upb_handlers_getsubhandlers(h, f)) {
4398 /* For now we add an empty subhandlers in this case. It makes the
4399 * decoder code generator simpler, because it only has to handle two
4400 * cases (submessage has handlers or not) as opposed to three
4401 * (submessage has handlers in enclosing message but no subhandlers).
4402 *
4403 * This makes parsing less efficient in the case that we want to
4404 * notice a submessage but skip its contents (like if we're testing
4405 * for submessage presence or counting the number of repeated
4406 * submessages). In this case we will end up parsing the submessage
4407 * field by field and throwing away the results for each, instead of
4408 * skipping the whole delimited thing at once. If this is an issue we
4409 * can revisit it, but do remember that this only arises when you have
4410 * handlers (startseq/startsubmsg/endsubmsg/endseq) set for the
4411 * submessage but no subhandlers. The uses cases for this are
4412 * limited. */
4413 upb_handlers *sub = upb_handlers_new(upb_fielddef_msgsubdef(f), &sub);
4414 upb_handlers_setsubhandlers(h, f, sub);
4415 upb_handlers_unref(sub, &sub);
4416 }
4417
4418 /* TODO(haberman): check type of submessage.
4419 * This is slightly tricky; also consider whether we should check that
4420 * they match at setsubhandlers time. */
4421 }
4422 }
4423 }
4424
4425 if (!upb_refcounted_freeze((upb_refcounted*const*)handlers, n, s,
4426 UPB_MAX_HANDLER_DEPTH)) {
4427 return false;
4428 }
4429
4430 return true;
4431 }
4432
upb_handlers_getprimitivehandlertype(const upb_fielddef * f)4433 upb_handlertype_t upb_handlers_getprimitivehandlertype(const upb_fielddef *f) {
4434 switch (upb_fielddef_type(f)) {
4435 case UPB_TYPE_INT32:
4436 case UPB_TYPE_ENUM: return UPB_HANDLER_INT32;
4437 case UPB_TYPE_INT64: return UPB_HANDLER_INT64;
4438 case UPB_TYPE_UINT32: return UPB_HANDLER_UINT32;
4439 case UPB_TYPE_UINT64: return UPB_HANDLER_UINT64;
4440 case UPB_TYPE_FLOAT: return UPB_HANDLER_FLOAT;
4441 case UPB_TYPE_DOUBLE: return UPB_HANDLER_DOUBLE;
4442 case UPB_TYPE_BOOL: return UPB_HANDLER_BOOL;
4443 default: UPB_ASSERT(false); return -1; /* Invalid input. */
4444 }
4445 }
4446
upb_handlers_getselector(const upb_fielddef * f,upb_handlertype_t type,upb_selector_t * s)4447 bool upb_handlers_getselector(const upb_fielddef *f, upb_handlertype_t type,
4448 upb_selector_t *s) {
4449 switch (type) {
4450 case UPB_HANDLER_INT32:
4451 case UPB_HANDLER_INT64:
4452 case UPB_HANDLER_UINT32:
4453 case UPB_HANDLER_UINT64:
4454 case UPB_HANDLER_FLOAT:
4455 case UPB_HANDLER_DOUBLE:
4456 case UPB_HANDLER_BOOL:
4457 if (!upb_fielddef_isprimitive(f) ||
4458 upb_handlers_getprimitivehandlertype(f) != type)
4459 return false;
4460 *s = f->selector_base;
4461 break;
4462 case UPB_HANDLER_STRING:
4463 if (upb_fielddef_isstring(f)) {
4464 *s = f->selector_base;
4465 } else if (upb_fielddef_lazy(f)) {
4466 *s = f->selector_base + 3;
4467 } else {
4468 return false;
4469 }
4470 break;
4471 case UPB_HANDLER_STARTSTR:
4472 if (upb_fielddef_isstring(f) || upb_fielddef_lazy(f)) {
4473 *s = f->selector_base + 1;
4474 } else {
4475 return false;
4476 }
4477 break;
4478 case UPB_HANDLER_ENDSTR:
4479 if (upb_fielddef_isstring(f) || upb_fielddef_lazy(f)) {
4480 *s = f->selector_base + 2;
4481 } else {
4482 return false;
4483 }
4484 break;
4485 case UPB_HANDLER_STARTSEQ:
4486 if (!upb_fielddef_isseq(f)) return false;
4487 *s = f->selector_base - 2;
4488 break;
4489 case UPB_HANDLER_ENDSEQ:
4490 if (!upb_fielddef_isseq(f)) return false;
4491 *s = f->selector_base - 1;
4492 break;
4493 case UPB_HANDLER_STARTSUBMSG:
4494 if (!upb_fielddef_issubmsg(f)) return false;
4495 /* Selectors for STARTSUBMSG are at the beginning of the table so that the
4496 * selector can also be used as an index into the "sub" array of
4497 * subhandlers. The indexes for the two into these two tables are the
4498 * same, except that in the handler table the static selectors come first. */
4499 *s = f->index_ + UPB_STATIC_SELECTOR_COUNT;
4500 break;
4501 case UPB_HANDLER_ENDSUBMSG:
4502 if (!upb_fielddef_issubmsg(f)) return false;
4503 *s = f->selector_base;
4504 break;
4505 }
4506 UPB_ASSERT((size_t)*s < upb_fielddef_containingtype(f)->selector_count);
4507 return true;
4508 }
4509
upb_handlers_selectorbaseoffset(const upb_fielddef * f)4510 uint32_t upb_handlers_selectorbaseoffset(const upb_fielddef *f) {
4511 return upb_fielddef_isseq(f) ? 2 : 0;
4512 }
4513
upb_handlers_selectorcount(const upb_fielddef * f)4514 uint32_t upb_handlers_selectorcount(const upb_fielddef *f) {
4515 uint32_t ret = 1;
4516 if (upb_fielddef_isseq(f)) ret += 2; /* STARTSEQ/ENDSEQ */
4517 if (upb_fielddef_isstring(f)) ret += 2; /* [STRING]/STARTSTR/ENDSTR */
4518 if (upb_fielddef_issubmsg(f)) {
4519 /* ENDSUBMSG (STARTSUBMSG is at table beginning) */
4520 ret += 0;
4521 if (upb_fielddef_lazy(f)) {
4522 /* STARTSTR/ENDSTR/STRING (for lazy) */
4523 ret += 3;
4524 }
4525 }
4526 return ret;
4527 }
4528
4529
4530 /* upb_handlerattr ************************************************************/
4531
upb_handlerattr_init(upb_handlerattr * attr)4532 void upb_handlerattr_init(upb_handlerattr *attr) {
4533 upb_handlerattr from = UPB_HANDLERATTR_INITIALIZER;
4534 memcpy(attr, &from, sizeof(*attr));
4535 }
4536
upb_handlerattr_uninit(upb_handlerattr * attr)4537 void upb_handlerattr_uninit(upb_handlerattr *attr) {
4538 UPB_UNUSED(attr);
4539 }
4540
upb_handlerattr_sethandlerdata(upb_handlerattr * attr,const void * hd)4541 bool upb_handlerattr_sethandlerdata(upb_handlerattr *attr, const void *hd) {
4542 attr->handler_data_ = hd;
4543 return true;
4544 }
4545
upb_handlerattr_setclosuretype(upb_handlerattr * attr,const void * type)4546 bool upb_handlerattr_setclosuretype(upb_handlerattr *attr, const void *type) {
4547 attr->closure_type_ = type;
4548 return true;
4549 }
4550
upb_handlerattr_closuretype(const upb_handlerattr * attr)4551 const void *upb_handlerattr_closuretype(const upb_handlerattr *attr) {
4552 return attr->closure_type_;
4553 }
4554
upb_handlerattr_setreturnclosuretype(upb_handlerattr * attr,const void * type)4555 bool upb_handlerattr_setreturnclosuretype(upb_handlerattr *attr,
4556 const void *type) {
4557 attr->return_closure_type_ = type;
4558 return true;
4559 }
4560
upb_handlerattr_returnclosuretype(const upb_handlerattr * attr)4561 const void *upb_handlerattr_returnclosuretype(const upb_handlerattr *attr) {
4562 return attr->return_closure_type_;
4563 }
4564
upb_handlerattr_setalwaysok(upb_handlerattr * attr,bool alwaysok)4565 bool upb_handlerattr_setalwaysok(upb_handlerattr *attr, bool alwaysok) {
4566 attr->alwaysok_ = alwaysok;
4567 return true;
4568 }
4569
upb_handlerattr_alwaysok(const upb_handlerattr * attr)4570 bool upb_handlerattr_alwaysok(const upb_handlerattr *attr) {
4571 return attr->alwaysok_;
4572 }
4573
4574 /* upb_bufhandle **************************************************************/
4575
upb_bufhandle_objofs(const upb_bufhandle * h)4576 size_t upb_bufhandle_objofs(const upb_bufhandle *h) {
4577 return h->objofs_;
4578 }
4579
4580 /* upb_byteshandler ***********************************************************/
4581
upb_byteshandler_init(upb_byteshandler * h)4582 void upb_byteshandler_init(upb_byteshandler* h) {
4583 memset(h, 0, sizeof(*h));
4584 }
4585
4586 /* For when we support handlerfree callbacks. */
upb_byteshandler_uninit(upb_byteshandler * h)4587 void upb_byteshandler_uninit(upb_byteshandler* h) {
4588 UPB_UNUSED(h);
4589 }
4590
upb_byteshandler_setstartstr(upb_byteshandler * h,upb_startstr_handlerfunc * func,void * d)4591 bool upb_byteshandler_setstartstr(upb_byteshandler *h,
4592 upb_startstr_handlerfunc *func, void *d) {
4593 h->table[UPB_STARTSTR_SELECTOR].func = (upb_func*)func;
4594 h->table[UPB_STARTSTR_SELECTOR].attr.handler_data_ = d;
4595 return true;
4596 }
4597
upb_byteshandler_setstring(upb_byteshandler * h,upb_string_handlerfunc * func,void * d)4598 bool upb_byteshandler_setstring(upb_byteshandler *h,
4599 upb_string_handlerfunc *func, void *d) {
4600 h->table[UPB_STRING_SELECTOR].func = (upb_func*)func;
4601 h->table[UPB_STRING_SELECTOR].attr.handler_data_ = d;
4602 return true;
4603 }
4604
upb_byteshandler_setendstr(upb_byteshandler * h,upb_endfield_handlerfunc * func,void * d)4605 bool upb_byteshandler_setendstr(upb_byteshandler *h,
4606 upb_endfield_handlerfunc *func, void *d) {
4607 h->table[UPB_ENDSTR_SELECTOR].func = (upb_func*)func;
4608 h->table[UPB_ENDSTR_SELECTOR].attr.handler_data_ = d;
4609 return true;
4610 }
4611
4612
4613 /** Handlers for upb_msg ******************************************************/
4614
4615 typedef struct {
4616 size_t offset;
4617 int32_t hasbit;
4618 } upb_msg_handlerdata;
4619
4620 /* Fallback implementation if the handler is not specialized by the producer. */
4621 #define MSG_WRITER(type, ctype) \
4622 bool upb_msg_set ## type (void *c, const void *hd, ctype val) { \
4623 uint8_t *m = c; \
4624 const upb_msg_handlerdata *d = hd; \
4625 if (d->hasbit > 0) \
4626 *(uint8_t*)&m[d->hasbit / 8] |= 1 << (d->hasbit % 8); \
4627 *(ctype*)&m[d->offset] = val; \
4628 return true; \
4629 } \
4630
MSG_WRITER(double,double)4631 MSG_WRITER(double, double)
4632 MSG_WRITER(float, float)
4633 MSG_WRITER(int32, int32_t)
4634 MSG_WRITER(int64, int64_t)
4635 MSG_WRITER(uint32, uint32_t)
4636 MSG_WRITER(uint64, uint64_t)
4637 MSG_WRITER(bool, bool)
4638
4639 bool upb_msg_setscalarhandler(upb_handlers *h, const upb_fielddef *f,
4640 size_t offset, int32_t hasbit) {
4641 upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER;
4642 bool ok;
4643
4644 upb_msg_handlerdata *d = upb_gmalloc(sizeof(*d));
4645 if (!d) return false;
4646 d->offset = offset;
4647 d->hasbit = hasbit;
4648
4649 upb_handlerattr_sethandlerdata(&attr, d);
4650 upb_handlerattr_setalwaysok(&attr, true);
4651 upb_handlers_addcleanup(h, d, upb_gfree);
4652
4653 #define TYPE(u, l) \
4654 case UPB_TYPE_##u: \
4655 ok = upb_handlers_set##l(h, f, upb_msg_set##l, &attr); break;
4656
4657 ok = false;
4658
4659 switch (upb_fielddef_type(f)) {
4660 TYPE(INT64, int64);
4661 TYPE(INT32, int32);
4662 TYPE(ENUM, int32);
4663 TYPE(UINT64, uint64);
4664 TYPE(UINT32, uint32);
4665 TYPE(DOUBLE, double);
4666 TYPE(FLOAT, float);
4667 TYPE(BOOL, bool);
4668 default: UPB_ASSERT(false); break;
4669 }
4670 #undef TYPE
4671
4672 upb_handlerattr_uninit(&attr);
4673 return ok;
4674 }
4675
upb_msg_getscalarhandlerdata(const upb_handlers * h,upb_selector_t s,upb_fieldtype_t * type,size_t * offset,int32_t * hasbit)4676 bool upb_msg_getscalarhandlerdata(const upb_handlers *h,
4677 upb_selector_t s,
4678 upb_fieldtype_t *type,
4679 size_t *offset,
4680 int32_t *hasbit) {
4681 const upb_msg_handlerdata *d;
4682 upb_func *f = upb_handlers_gethandler(h, s);
4683
4684 if ((upb_int64_handlerfunc*)f == upb_msg_setint64) {
4685 *type = UPB_TYPE_INT64;
4686 } else if ((upb_int32_handlerfunc*)f == upb_msg_setint32) {
4687 *type = UPB_TYPE_INT32;
4688 } else if ((upb_uint64_handlerfunc*)f == upb_msg_setuint64) {
4689 *type = UPB_TYPE_UINT64;
4690 } else if ((upb_uint32_handlerfunc*)f == upb_msg_setuint32) {
4691 *type = UPB_TYPE_UINT32;
4692 } else if ((upb_double_handlerfunc*)f == upb_msg_setdouble) {
4693 *type = UPB_TYPE_DOUBLE;
4694 } else if ((upb_float_handlerfunc*)f == upb_msg_setfloat) {
4695 *type = UPB_TYPE_FLOAT;
4696 } else if ((upb_bool_handlerfunc*)f == upb_msg_setbool) {
4697 *type = UPB_TYPE_BOOL;
4698 } else {
4699 return false;
4700 }
4701
4702 d = upb_handlers_gethandlerdata(h, s);
4703 *offset = d->offset;
4704 *hasbit = d->hasbit;
4705 return true;
4706 }
4707
4708
upb_fieldtype_mapkeyok(upb_fieldtype_t type)4709 bool upb_fieldtype_mapkeyok(upb_fieldtype_t type) {
4710 return type == UPB_TYPE_BOOL || type == UPB_TYPE_INT32 ||
4711 type == UPB_TYPE_UINT32 || type == UPB_TYPE_INT64 ||
4712 type == UPB_TYPE_UINT64 || type == UPB_TYPE_STRING;
4713 }
4714
4715 #define PTR_AT(msg, ofs, type) (type*)((char*)msg + ofs)
4716 #define VOIDPTR_AT(msg, ofs) PTR_AT(msg, ofs, void)
4717 #define ENCODE_MAX_NESTING 64
4718 #define CHECK_TRUE(x) if (!(x)) { return false; }
4719
4720 /** upb_msgval ****************************************************************/
4721
4722 #define upb_alignof(t) offsetof(struct { char c; t x; }, x)
4723
4724 /* These functions will generate real memcpy() calls on ARM sadly, because
4725 * the compiler assumes they might not be aligned. */
4726
upb_msgval_read(const void * p,size_t ofs,uint8_t size)4727 static upb_msgval upb_msgval_read(const void *p, size_t ofs,
4728 uint8_t size) {
4729 upb_msgval val;
4730 p = (char*)p + ofs;
4731 memcpy(&val, p, size);
4732 return val;
4733 }
4734
upb_msgval_write(void * p,size_t ofs,upb_msgval val,uint8_t size)4735 static void upb_msgval_write(void *p, size_t ofs, upb_msgval val,
4736 uint8_t size) {
4737 p = (char*)p + ofs;
4738 memcpy(p, &val, size);
4739 }
4740
upb_msgval_sizeof(upb_fieldtype_t type)4741 static size_t upb_msgval_sizeof(upb_fieldtype_t type) {
4742 switch (type) {
4743 case UPB_TYPE_DOUBLE:
4744 case UPB_TYPE_INT64:
4745 case UPB_TYPE_UINT64:
4746 return 8;
4747 case UPB_TYPE_ENUM:
4748 case UPB_TYPE_INT32:
4749 case UPB_TYPE_UINT32:
4750 case UPB_TYPE_FLOAT:
4751 return 4;
4752 case UPB_TYPE_BOOL:
4753 return 1;
4754 case UPB_TYPE_MESSAGE:
4755 return sizeof(void*);
4756 case UPB_TYPE_BYTES:
4757 case UPB_TYPE_STRING:
4758 return sizeof(upb_strview);
4759 }
4760 UPB_UNREACHABLE();
4761 }
4762
upb_msg_fieldsize(const upb_msglayout_field * field)4763 static uint8_t upb_msg_fieldsize(const upb_msglayout_field *field) {
4764 if (field->label == UPB_LABEL_REPEATED) {
4765 return sizeof(void*);
4766 } else {
4767 return upb_msgval_sizeof(upb_desctype_to_fieldtype[field->descriptortype]);
4768 }
4769 }
4770
4771 /* TODO(haberman): this is broken right now because upb_msgval can contain
4772 * a char* / size_t pair, which is too big for a upb_value. To fix this
4773 * we'll probably need to dynamically allocate a upb_msgval and store a
4774 * pointer to that in the tables for extensions/maps. */
upb_toval(upb_msgval val)4775 static upb_value upb_toval(upb_msgval val) {
4776 upb_value ret;
4777 UPB_UNUSED(val);
4778 memset(&ret, 0, sizeof(upb_value)); /* XXX */
4779 return ret;
4780 }
4781
upb_msgval_fromval(upb_value val)4782 static upb_msgval upb_msgval_fromval(upb_value val) {
4783 upb_msgval ret;
4784 UPB_UNUSED(val);
4785 memset(&ret, 0, sizeof(upb_msgval)); /* XXX */
4786 return ret;
4787 }
4788
upb_fieldtotabtype(upb_fieldtype_t type)4789 static upb_ctype_t upb_fieldtotabtype(upb_fieldtype_t type) {
4790 switch (type) {
4791 case UPB_TYPE_FLOAT: return UPB_CTYPE_FLOAT;
4792 case UPB_TYPE_DOUBLE: return UPB_CTYPE_DOUBLE;
4793 case UPB_TYPE_BOOL: return UPB_CTYPE_BOOL;
4794 case UPB_TYPE_BYTES:
4795 case UPB_TYPE_MESSAGE:
4796 case UPB_TYPE_STRING: return UPB_CTYPE_CONSTPTR;
4797 case UPB_TYPE_ENUM:
4798 case UPB_TYPE_INT32: return UPB_CTYPE_INT32;
4799 case UPB_TYPE_UINT32: return UPB_CTYPE_UINT32;
4800 case UPB_TYPE_INT64: return UPB_CTYPE_INT64;
4801 case UPB_TYPE_UINT64: return UPB_CTYPE_UINT64;
4802 default: UPB_ASSERT(false); return 0;
4803 }
4804 }
4805
4806
4807 /** upb_msg *******************************************************************/
4808
4809 /* If we always read/write as a consistent type to each address, this shouldn't
4810 * violate aliasing.
4811 */
4812 #define DEREF(msg, ofs, type) *PTR_AT(msg, ofs, type)
4813
4814 /* Internal members of a upb_msg. We can change this without breaking binary
4815 * compatibility. We put these before the user's data. The user's upb_msg*
4816 * points after the upb_msg_internal. */
4817
4818 /* Used when a message is not extendable. */
4819 typedef struct {
4820 /* TODO(haberman): use pointer tagging so we we are slim when known unknown
4821 * fields are not present. */
4822 upb_arena *arena;
4823 char *unknown;
4824 size_t unknown_len;
4825 size_t unknown_size;
4826 } upb_msg_internal;
4827
4828 /* Used when a message is extendable. */
4829 typedef struct {
4830 upb_inttable *extdict;
4831 upb_msg_internal base;
4832 } upb_msg_internal_withext;
4833
upb_msg_internalsize(const upb_msglayout * l)4834 static int upb_msg_internalsize(const upb_msglayout *l) {
4835 return sizeof(upb_msg_internal) - l->extendable * sizeof(void *);
4836 }
4837
upb_msg_getinternal(upb_msg * msg)4838 static upb_msg_internal *upb_msg_getinternal(upb_msg *msg) {
4839 return VOIDPTR_AT(msg, -sizeof(upb_msg_internal));
4840 }
4841
upb_msg_getinternal_const(const upb_msg * msg)4842 static const upb_msg_internal *upb_msg_getinternal_const(const upb_msg *msg) {
4843 return VOIDPTR_AT(msg, -sizeof(upb_msg_internal));
4844 }
4845
upb_msg_getinternalwithext(upb_msg * msg,const upb_msglayout * l)4846 static upb_msg_internal_withext *upb_msg_getinternalwithext(
4847 upb_msg *msg, const upb_msglayout *l) {
4848 UPB_ASSERT(l->extendable);
4849 return VOIDPTR_AT(msg, -sizeof(upb_msg_internal_withext));
4850 }
4851
upb_msg_addunknown(upb_msg * msg,const char * data,size_t len)4852 void upb_msg_addunknown(upb_msg *msg, const char *data, size_t len) {
4853 upb_msg_internal* in = upb_msg_getinternal(msg);
4854 if (len > in->unknown_size - in->unknown_len) {
4855 upb_alloc *alloc = upb_arena_alloc(in->arena);
4856 size_t need = in->unknown_size + len;
4857 size_t newsize = UPB_MAX(in->unknown_size * 2, need);
4858 in->unknown = upb_realloc(alloc, in->unknown, in->unknown_size, newsize);
4859 in->unknown_size = newsize;
4860 }
4861 memcpy(in->unknown + in->unknown_len, data, len);
4862 in->unknown_len += len;
4863 }
4864
upb_msg_getunknown(const upb_msg * msg,size_t * len)4865 const char *upb_msg_getunknown(const upb_msg *msg, size_t *len) {
4866 const upb_msg_internal* in = upb_msg_getinternal_const(msg);
4867 *len = in->unknown_len;
4868 return in->unknown;
4869 }
4870
upb_msg_checkfield(int field_index,const upb_msglayout * l)4871 static const upb_msglayout_field *upb_msg_checkfield(int field_index,
4872 const upb_msglayout *l) {
4873 UPB_ASSERT(field_index >= 0 && field_index < l->field_count);
4874 return &l->fields[field_index];
4875 }
4876
upb_msg_inoneof(const upb_msglayout_field * field)4877 static bool upb_msg_inoneof(const upb_msglayout_field *field) {
4878 return field->presence < 0;
4879 }
4880
upb_msg_oneofcase(const upb_msg * msg,int field_index,const upb_msglayout * l)4881 static uint32_t *upb_msg_oneofcase(const upb_msg *msg, int field_index,
4882 const upb_msglayout *l) {
4883 const upb_msglayout_field *field = upb_msg_checkfield(field_index, l);
4884 UPB_ASSERT(upb_msg_inoneof(field));
4885 return PTR_AT(msg, ~field->presence, uint32_t);
4886 }
4887
upb_msg_sizeof(const upb_msglayout * l)4888 static size_t upb_msg_sizeof(const upb_msglayout *l) {
4889 return l->size + upb_msg_internalsize(l);
4890 }
4891
upb_msg_new(const upb_msglayout * l,upb_arena * a)4892 upb_msg *upb_msg_new(const upb_msglayout *l, upb_arena *a) {
4893 upb_alloc *alloc = upb_arena_alloc(a);
4894 void *mem = upb_malloc(alloc, upb_msg_sizeof(l));
4895 upb_msg_internal *in;
4896 upb_msg *msg;
4897
4898 if (!mem) {
4899 return NULL;
4900 }
4901
4902 msg = VOIDPTR_AT(mem, upb_msg_internalsize(l));
4903
4904 /* Initialize normal members. */
4905 memset(msg, 0, l->size);
4906
4907 /* Initialize internal members. */
4908 in = upb_msg_getinternal(msg);
4909 in->arena = a;
4910 in->unknown = NULL;
4911 in->unknown_len = 0;
4912 in->unknown_size = 0;
4913
4914 if (l->extendable) {
4915 upb_msg_getinternalwithext(msg, l)->extdict = NULL;
4916 }
4917
4918 return msg;
4919 }
4920
upb_msg_arena(const upb_msg * msg)4921 upb_arena *upb_msg_arena(const upb_msg *msg) {
4922 return upb_msg_getinternal_const(msg)->arena;
4923 }
4924
upb_msg_has(const upb_msg * msg,int field_index,const upb_msglayout * l)4925 bool upb_msg_has(const upb_msg *msg,
4926 int field_index,
4927 const upb_msglayout *l) {
4928 const upb_msglayout_field *field = upb_msg_checkfield(field_index, l);
4929
4930 UPB_ASSERT(field->presence);
4931
4932 if (upb_msg_inoneof(field)) {
4933 /* Oneofs are set when the oneof number is set to this field. */
4934 return *upb_msg_oneofcase(msg, field_index, l) == field->number;
4935 } else {
4936 /* Other fields are set when their hasbit is set. */
4937 uint32_t hasbit = field->presence;
4938 return DEREF(msg, hasbit / 8, char) | (1 << (hasbit % 8));
4939 }
4940 }
4941
upb_msg_get(const upb_msg * msg,int field_index,const upb_msglayout * l)4942 upb_msgval upb_msg_get(const upb_msg *msg, int field_index,
4943 const upb_msglayout *l) {
4944 const upb_msglayout_field *field = upb_msg_checkfield(field_index, l);
4945 int size = upb_msg_fieldsize(field);
4946 return upb_msgval_read(msg, field->offset, size);
4947 }
4948
upb_msg_set(upb_msg * msg,int field_index,upb_msgval val,const upb_msglayout * l)4949 void upb_msg_set(upb_msg *msg, int field_index, upb_msgval val,
4950 const upb_msglayout *l) {
4951 const upb_msglayout_field *field = upb_msg_checkfield(field_index, l);
4952 int size = upb_msg_fieldsize(field);
4953 upb_msgval_write(msg, field->offset, val, size);
4954 }
4955
4956
4957 /** upb_array *****************************************************************/
4958
4959 #define DEREF_ARR(arr, i, type) ((type*)arr->data)[i]
4960
upb_array_new(upb_fieldtype_t type,upb_arena * a)4961 upb_array *upb_array_new(upb_fieldtype_t type, upb_arena *a) {
4962 upb_alloc *alloc = upb_arena_alloc(a);
4963 upb_array *ret = upb_malloc(alloc, sizeof(upb_array));
4964
4965 if (!ret) {
4966 return NULL;
4967 }
4968
4969 ret->type = type;
4970 ret->data = NULL;
4971 ret->len = 0;
4972 ret->size = 0;
4973 ret->element_size = upb_msgval_sizeof(type);
4974 ret->arena = a;
4975
4976 return ret;
4977 }
4978
upb_array_size(const upb_array * arr)4979 size_t upb_array_size(const upb_array *arr) {
4980 return arr->len;
4981 }
4982
upb_array_type(const upb_array * arr)4983 upb_fieldtype_t upb_array_type(const upb_array *arr) {
4984 return arr->type;
4985 }
4986
upb_array_get(const upb_array * arr,size_t i)4987 upb_msgval upb_array_get(const upb_array *arr, size_t i) {
4988 UPB_ASSERT(i < arr->len);
4989 return upb_msgval_read(arr->data, i * arr->element_size, arr->element_size);
4990 }
4991
upb_array_set(upb_array * arr,size_t i,upb_msgval val)4992 bool upb_array_set(upb_array *arr, size_t i, upb_msgval val) {
4993 UPB_ASSERT(i <= arr->len);
4994
4995 if (i == arr->len) {
4996 /* Extending the array. */
4997
4998 if (i == arr->size) {
4999 /* Need to reallocate. */
5000 size_t new_size = UPB_MAX(arr->size * 2, 8);
5001 size_t new_bytes = new_size * arr->element_size;
5002 size_t old_bytes = arr->size * arr->element_size;
5003 upb_alloc *alloc = upb_arena_alloc(arr->arena);
5004 upb_msgval *new_data =
5005 upb_realloc(alloc, arr->data, old_bytes, new_bytes);
5006
5007 if (!new_data) {
5008 return false;
5009 }
5010
5011 arr->data = new_data;
5012 arr->size = new_size;
5013 }
5014
5015 arr->len = i + 1;
5016 }
5017
5018 upb_msgval_write(arr->data, i * arr->element_size, val, arr->element_size);
5019 return true;
5020 }
5021
5022
5023 /** upb_map *******************************************************************/
5024
5025 struct upb_map {
5026 upb_fieldtype_t key_type;
5027 upb_fieldtype_t val_type;
5028 /* We may want to optimize this to use inttable where possible, for greater
5029 * efficiency and lower memory footprint. */
5030 upb_strtable strtab;
5031 upb_arena *arena;
5032 };
5033
upb_map_tokey(upb_fieldtype_t type,upb_msgval * key,const char ** out_key,size_t * out_len)5034 static void upb_map_tokey(upb_fieldtype_t type, upb_msgval *key,
5035 const char **out_key, size_t *out_len) {
5036 switch (type) {
5037 case UPB_TYPE_STRING:
5038 /* Point to string data of the input key. */
5039 *out_key = key->str.data;
5040 *out_len = key->str.size;
5041 return;
5042 case UPB_TYPE_BOOL:
5043 case UPB_TYPE_INT32:
5044 case UPB_TYPE_UINT32:
5045 case UPB_TYPE_INT64:
5046 case UPB_TYPE_UINT64:
5047 /* Point to the key itself. XXX: big-endian. */
5048 *out_key = (const char*)key;
5049 *out_len = upb_msgval_sizeof(type);
5050 return;
5051 case UPB_TYPE_BYTES:
5052 case UPB_TYPE_DOUBLE:
5053 case UPB_TYPE_ENUM:
5054 case UPB_TYPE_FLOAT:
5055 case UPB_TYPE_MESSAGE:
5056 break; /* Cannot be a map key. */
5057 }
5058 UPB_UNREACHABLE();
5059 }
5060
upb_map_fromkey(upb_fieldtype_t type,const char * key,size_t len)5061 static upb_msgval upb_map_fromkey(upb_fieldtype_t type, const char *key,
5062 size_t len) {
5063 switch (type) {
5064 case UPB_TYPE_STRING:
5065 return upb_msgval_makestr(key, len);
5066 case UPB_TYPE_BOOL:
5067 case UPB_TYPE_INT32:
5068 case UPB_TYPE_UINT32:
5069 case UPB_TYPE_INT64:
5070 case UPB_TYPE_UINT64:
5071 return upb_msgval_read(key, 0, upb_msgval_sizeof(type));
5072 case UPB_TYPE_BYTES:
5073 case UPB_TYPE_DOUBLE:
5074 case UPB_TYPE_ENUM:
5075 case UPB_TYPE_FLOAT:
5076 case UPB_TYPE_MESSAGE:
5077 break; /* Cannot be a map key. */
5078 }
5079 UPB_UNREACHABLE();
5080 }
5081
upb_map_new(upb_fieldtype_t ktype,upb_fieldtype_t vtype,upb_arena * a)5082 upb_map *upb_map_new(upb_fieldtype_t ktype, upb_fieldtype_t vtype,
5083 upb_arena *a) {
5084 upb_ctype_t vtabtype = upb_fieldtotabtype(vtype);
5085 upb_alloc *alloc = upb_arena_alloc(a);
5086 upb_map *map = upb_malloc(alloc, sizeof(upb_map));
5087
5088 if (!map) {
5089 return NULL;
5090 }
5091
5092 UPB_ASSERT(upb_fieldtype_mapkeyok(ktype));
5093 map->key_type = ktype;
5094 map->val_type = vtype;
5095 map->arena = a;
5096
5097 if (!upb_strtable_init2(&map->strtab, vtabtype, alloc)) {
5098 return NULL;
5099 }
5100
5101 return map;
5102 }
5103
upb_map_size(const upb_map * map)5104 size_t upb_map_size(const upb_map *map) {
5105 return upb_strtable_count(&map->strtab);
5106 }
5107
upb_map_keytype(const upb_map * map)5108 upb_fieldtype_t upb_map_keytype(const upb_map *map) {
5109 return map->key_type;
5110 }
5111
upb_map_valuetype(const upb_map * map)5112 upb_fieldtype_t upb_map_valuetype(const upb_map *map) {
5113 return map->val_type;
5114 }
5115
upb_map_get(const upb_map * map,upb_msgval key,upb_msgval * val)5116 bool upb_map_get(const upb_map *map, upb_msgval key, upb_msgval *val) {
5117 upb_value tabval;
5118 const char *key_str;
5119 size_t key_len;
5120 bool ret;
5121
5122 upb_map_tokey(map->key_type, &key, &key_str, &key_len);
5123 ret = upb_strtable_lookup2(&map->strtab, key_str, key_len, &tabval);
5124 if (ret) {
5125 memcpy(val, &tabval, sizeof(tabval));
5126 }
5127
5128 return ret;
5129 }
5130
upb_map_set(upb_map * map,upb_msgval key,upb_msgval val,upb_msgval * removed)5131 bool upb_map_set(upb_map *map, upb_msgval key, upb_msgval val,
5132 upb_msgval *removed) {
5133 const char *key_str;
5134 size_t key_len;
5135 upb_value tabval = upb_toval(val);
5136 upb_value removedtabval;
5137 upb_alloc *a = upb_arena_alloc(map->arena);
5138
5139 upb_map_tokey(map->key_type, &key, &key_str, &key_len);
5140
5141 /* TODO(haberman): add overwrite operation to minimize number of lookups. */
5142 if (upb_strtable_lookup2(&map->strtab, key_str, key_len, NULL)) {
5143 upb_strtable_remove3(&map->strtab, key_str, key_len, &removedtabval, a);
5144 memcpy(&removed, &removedtabval, sizeof(removed));
5145 }
5146
5147 return upb_strtable_insert3(&map->strtab, key_str, key_len, tabval, a);
5148 }
5149
upb_map_del(upb_map * map,upb_msgval key)5150 bool upb_map_del(upb_map *map, upb_msgval key) {
5151 const char *key_str;
5152 size_t key_len;
5153 upb_alloc *a = upb_arena_alloc(map->arena);
5154
5155 upb_map_tokey(map->key_type, &key, &key_str, &key_len);
5156 return upb_strtable_remove3(&map->strtab, key_str, key_len, NULL, a);
5157 }
5158
5159
5160 /** upb_mapiter ***************************************************************/
5161
5162 struct upb_mapiter {
5163 upb_strtable_iter iter;
5164 upb_fieldtype_t key_type;
5165 };
5166
upb_mapiter_sizeof()5167 size_t upb_mapiter_sizeof() {
5168 return sizeof(upb_mapiter);
5169 }
5170
upb_mapiter_begin(upb_mapiter * i,const upb_map * map)5171 void upb_mapiter_begin(upb_mapiter *i, const upb_map *map) {
5172 upb_strtable_begin(&i->iter, &map->strtab);
5173 i->key_type = map->key_type;
5174 }
5175
upb_mapiter_new(const upb_map * t,upb_alloc * a)5176 upb_mapiter *upb_mapiter_new(const upb_map *t, upb_alloc *a) {
5177 upb_mapiter *ret = upb_malloc(a, upb_mapiter_sizeof());
5178
5179 if (!ret) {
5180 return NULL;
5181 }
5182
5183 upb_mapiter_begin(ret, t);
5184 return ret;
5185 }
5186
upb_mapiter_free(upb_mapiter * i,upb_alloc * a)5187 void upb_mapiter_free(upb_mapiter *i, upb_alloc *a) {
5188 upb_free(a, i);
5189 }
5190
upb_mapiter_next(upb_mapiter * i)5191 void upb_mapiter_next(upb_mapiter *i) {
5192 upb_strtable_next(&i->iter);
5193 }
5194
upb_mapiter_done(const upb_mapiter * i)5195 bool upb_mapiter_done(const upb_mapiter *i) {
5196 return upb_strtable_done(&i->iter);
5197 }
5198
upb_mapiter_key(const upb_mapiter * i)5199 upb_msgval upb_mapiter_key(const upb_mapiter *i) {
5200 return upb_map_fromkey(i->key_type, upb_strtable_iter_key(&i->iter),
5201 upb_strtable_iter_keylength(&i->iter));
5202 }
5203
upb_mapiter_value(const upb_mapiter * i)5204 upb_msgval upb_mapiter_value(const upb_mapiter *i) {
5205 return upb_msgval_fromval(upb_strtable_iter_value(&i->iter));
5206 }
5207
upb_mapiter_setdone(upb_mapiter * i)5208 void upb_mapiter_setdone(upb_mapiter *i) {
5209 upb_strtable_iter_setdone(&i->iter);
5210 }
5211
upb_mapiter_isequal(const upb_mapiter * i1,const upb_mapiter * i2)5212 bool upb_mapiter_isequal(const upb_mapiter *i1, const upb_mapiter *i2) {
5213 return upb_strtable_iter_isequal(&i1->iter, &i2->iter);
5214 }
5215
5216
is_power_of_two(size_t val)5217 static bool is_power_of_two(size_t val) {
5218 return (val & (val - 1)) == 0;
5219 }
5220
5221 /* Align up to the given power of 2. */
align_up(size_t val,size_t align)5222 static size_t align_up(size_t val, size_t align) {
5223 UPB_ASSERT(is_power_of_two(align));
5224 return (val + align - 1) & ~(align - 1);
5225 }
5226
div_round_up(size_t n,size_t d)5227 static size_t div_round_up(size_t n, size_t d) {
5228 return (n + d - 1) / d;
5229 }
5230
upb_msgval_sizeof2(upb_fieldtype_t type)5231 static size_t upb_msgval_sizeof2(upb_fieldtype_t type) {
5232 switch (type) {
5233 case UPB_TYPE_DOUBLE:
5234 case UPB_TYPE_INT64:
5235 case UPB_TYPE_UINT64:
5236 return 8;
5237 case UPB_TYPE_ENUM:
5238 case UPB_TYPE_INT32:
5239 case UPB_TYPE_UINT32:
5240 case UPB_TYPE_FLOAT:
5241 return 4;
5242 case UPB_TYPE_BOOL:
5243 return 1;
5244 case UPB_TYPE_MESSAGE:
5245 return sizeof(void*);
5246 case UPB_TYPE_BYTES:
5247 case UPB_TYPE_STRING:
5248 return sizeof(upb_strview);
5249 }
5250 UPB_UNREACHABLE();
5251 }
5252
upb_msg_fielddefsize(const upb_fielddef * f)5253 static uint8_t upb_msg_fielddefsize(const upb_fielddef *f) {
5254 if (upb_fielddef_isseq(f)) {
5255 return sizeof(void*);
5256 } else {
5257 return upb_msgval_sizeof2(upb_fielddef_type(f));
5258 }
5259 }
5260
5261
5262 /** upb_msglayout *************************************************************/
5263
upb_msglayout_free(upb_msglayout * l)5264 static void upb_msglayout_free(upb_msglayout *l) {
5265 upb_gfree(l);
5266 }
5267
upb_msglayout_place(upb_msglayout * l,size_t size)5268 static size_t upb_msglayout_place(upb_msglayout *l, size_t size) {
5269 size_t ret;
5270
5271 l->size = align_up(l->size, size);
5272 ret = l->size;
5273 l->size += size;
5274 return ret;
5275 }
5276
upb_msglayout_init(const upb_msgdef * m,upb_msglayout * l,upb_msgfactory * factory)5277 static bool upb_msglayout_init(const upb_msgdef *m,
5278 upb_msglayout *l,
5279 upb_msgfactory *factory) {
5280 upb_msg_field_iter it;
5281 upb_msg_oneof_iter oit;
5282 size_t hasbit;
5283 size_t submsg_count = 0;
5284 const upb_msglayout **submsgs;
5285 upb_msglayout_field *fields;
5286
5287 for (upb_msg_field_begin(&it, m);
5288 !upb_msg_field_done(&it);
5289 upb_msg_field_next(&it)) {
5290 const upb_fielddef* f = upb_msg_iter_field(&it);
5291 if (upb_fielddef_issubmsg(f)) {
5292 submsg_count++;
5293 }
5294 }
5295
5296 memset(l, 0, sizeof(*l));
5297
5298 fields = upb_gmalloc(upb_msgdef_numfields(m) * sizeof(*fields));
5299 submsgs = upb_gmalloc(submsg_count * sizeof(*submsgs));
5300
5301 if ((!fields && upb_msgdef_numfields(m)) ||
5302 (!submsgs && submsg_count)) {
5303 /* OOM. */
5304 upb_gfree(fields);
5305 upb_gfree(submsgs);
5306 return false;
5307 }
5308
5309 l->field_count = upb_msgdef_numfields(m);
5310 l->fields = fields;
5311 l->submsgs = submsgs;
5312
5313 /* Allocate data offsets in three stages:
5314 *
5315 * 1. hasbits.
5316 * 2. regular fields.
5317 * 3. oneof fields.
5318 *
5319 * OPT: There is a lot of room for optimization here to minimize the size.
5320 */
5321
5322 /* Allocate hasbits and set basic field attributes. */
5323 submsg_count = 0;
5324 for (upb_msg_field_begin(&it, m), hasbit = 0;
5325 !upb_msg_field_done(&it);
5326 upb_msg_field_next(&it)) {
5327 const upb_fielddef* f = upb_msg_iter_field(&it);
5328 upb_msglayout_field *field = &fields[upb_fielddef_index(f)];
5329
5330 field->number = upb_fielddef_number(f);
5331 field->descriptortype = upb_fielddef_descriptortype(f);
5332 field->label = upb_fielddef_label(f);
5333
5334 if (upb_fielddef_issubmsg(f)) {
5335 const upb_msglayout *sub_layout =
5336 upb_msgfactory_getlayout(factory, upb_fielddef_msgsubdef(f));
5337 field->submsg_index = submsg_count++;
5338 submsgs[field->submsg_index] = sub_layout;
5339 }
5340
5341 if (upb_fielddef_haspresence(f) && !upb_fielddef_containingoneof(f)) {
5342 field->presence = (hasbit++);
5343 } else {
5344 field->presence = 0;
5345 }
5346 }
5347
5348 /* Account for space used by hasbits. */
5349 l->size = div_round_up(hasbit, 8);
5350
5351 /* Allocate non-oneof fields. */
5352 for (upb_msg_field_begin(&it, m); !upb_msg_field_done(&it);
5353 upb_msg_field_next(&it)) {
5354 const upb_fielddef* f = upb_msg_iter_field(&it);
5355 size_t field_size = upb_msg_fielddefsize(f);
5356 size_t index = upb_fielddef_index(f);
5357
5358 if (upb_fielddef_containingoneof(f)) {
5359 /* Oneofs are handled separately below. */
5360 continue;
5361 }
5362
5363 fields[index].offset = upb_msglayout_place(l, field_size);
5364 }
5365
5366 /* Allocate oneof fields. Each oneof field consists of a uint32 for the case
5367 * and space for the actual data. */
5368 for (upb_msg_oneof_begin(&oit, m); !upb_msg_oneof_done(&oit);
5369 upb_msg_oneof_next(&oit)) {
5370 const upb_oneofdef* o = upb_msg_iter_oneof(&oit);
5371 upb_oneof_iter fit;
5372
5373 size_t case_size = sizeof(uint32_t); /* Could potentially optimize this. */
5374 size_t field_size = 0;
5375 uint32_t case_offset;
5376 uint32_t data_offset;
5377
5378 /* Calculate field size: the max of all field sizes. */
5379 for (upb_oneof_begin(&fit, o);
5380 !upb_oneof_done(&fit);
5381 upb_oneof_next(&fit)) {
5382 const upb_fielddef* f = upb_oneof_iter_field(&fit);
5383 field_size = UPB_MAX(field_size, upb_msg_fielddefsize(f));
5384 }
5385
5386 /* Align and allocate case offset. */
5387 case_offset = upb_msglayout_place(l, case_size);
5388 data_offset = upb_msglayout_place(l, field_size);
5389
5390 for (upb_oneof_begin(&fit, o);
5391 !upb_oneof_done(&fit);
5392 upb_oneof_next(&fit)) {
5393 const upb_fielddef* f = upb_oneof_iter_field(&fit);
5394 fields[upb_fielddef_index(f)].offset = data_offset;
5395 fields[upb_fielddef_index(f)].presence = ~case_offset;
5396 }
5397 }
5398
5399 /* Size of the entire structure should be a multiple of its greatest
5400 * alignment. TODO: track overall alignment for real? */
5401 l->size = align_up(l->size, 8);
5402
5403 return true;
5404 }
5405
5406
5407 /** upb_msgfactory ************************************************************/
5408
5409 struct upb_msgfactory {
5410 const upb_symtab *symtab; /* We own a ref. */
5411 upb_inttable layouts;
5412 upb_inttable mergehandlers;
5413 };
5414
upb_msgfactory_new(const upb_symtab * symtab)5415 upb_msgfactory *upb_msgfactory_new(const upb_symtab *symtab) {
5416 upb_msgfactory *ret = upb_gmalloc(sizeof(*ret));
5417
5418 ret->symtab = symtab;
5419 upb_inttable_init(&ret->layouts, UPB_CTYPE_PTR);
5420 upb_inttable_init(&ret->mergehandlers, UPB_CTYPE_CONSTPTR);
5421
5422 return ret;
5423 }
5424
upb_msgfactory_free(upb_msgfactory * f)5425 void upb_msgfactory_free(upb_msgfactory *f) {
5426 upb_inttable_iter i;
5427 upb_inttable_begin(&i, &f->layouts);
5428 for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
5429 upb_msglayout *l = upb_value_getptr(upb_inttable_iter_value(&i));
5430 upb_msglayout_free(l);
5431 }
5432
5433 upb_inttable_begin(&i, &f->mergehandlers);
5434 for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
5435 const upb_handlers *h = upb_value_getconstptr(upb_inttable_iter_value(&i));
5436 upb_handlers_unref(h, f);
5437 }
5438
5439 upb_inttable_uninit(&f->layouts);
5440 upb_inttable_uninit(&f->mergehandlers);
5441 upb_gfree(f);
5442 }
5443
upb_msgfactory_symtab(const upb_msgfactory * f)5444 const upb_symtab *upb_msgfactory_symtab(const upb_msgfactory *f) {
5445 return f->symtab;
5446 }
5447
upb_msgfactory_getlayout(upb_msgfactory * f,const upb_msgdef * m)5448 const upb_msglayout *upb_msgfactory_getlayout(upb_msgfactory *f,
5449 const upb_msgdef *m) {
5450 upb_value v;
5451 UPB_ASSERT(upb_symtab_lookupmsg(f->symtab, upb_msgdef_fullname(m)) == m);
5452 UPB_ASSERT(!upb_msgdef_mapentry(m));
5453
5454 if (upb_inttable_lookupptr(&f->layouts, m, &v)) {
5455 UPB_ASSERT(upb_value_getptr(v));
5456 return upb_value_getptr(v);
5457 } else {
5458 /* In case of circular dependency, layout has to be inserted first. */
5459 upb_msglayout *l = upb_gmalloc(sizeof(*l));
5460 upb_msgfactory *mutable_f = (void*)f;
5461 upb_inttable_insertptr(&mutable_f->layouts, m, upb_value_ptr(l));
5462 UPB_ASSERT(l);
5463 if (!upb_msglayout_init(m, l, f)) {
5464 upb_msglayout_free(l);
5465 }
5466 return l;
5467 }
5468 }
5469
5470 #if UINTPTR_MAX == 0xffffffff
5471 #define UPB_SIZE(size32, size64) size32
5472 #else
5473 #define UPB_SIZE(size32, size64) size64
5474 #endif
5475
5476 #define UPB_FIELD_AT(msg, fieldtype, offset) \
5477 *(fieldtype*)((const char*)(msg) + offset)
5478
5479 #define UPB_READ_ONEOF(msg, fieldtype, offset, case_offset, case_val, default) \
5480 UPB_FIELD_AT(msg, int, case_offset) == case_val \
5481 ? UPB_FIELD_AT(msg, fieldtype, offset) \
5482 : default
5483
5484 #define UPB_WRITE_ONEOF(msg, fieldtype, offset, value, case_offset, case_val) \
5485 UPB_FIELD_AT(msg, int, case_offset) = case_val; \
5486 UPB_FIELD_AT(msg, fieldtype, offset) = value;
5487
5488 #undef UPB_SIZE
5489 #undef UPB_FIELD_AT
5490 #undef UPB_READ_ONEOF
5491 #undef UPB_WRITE_ONEOF
5492 /*
5493 ** upb::RefCounted Implementation
5494 **
5495 ** Our key invariants are:
5496 ** 1. reference cycles never span groups
5497 ** 2. for ref2(to, from), we increment to's count iff group(from) != group(to)
5498 **
5499 ** The previous two are how we avoid leaking cycles. Other important
5500 ** invariants are:
5501 ** 3. for mutable objects "from" and "to", if there exists a ref2(to, from)
5502 ** this implies group(from) == group(to). (In practice, what we implement
5503 ** is even stronger; "from" and "to" will share a group if there has *ever*
5504 ** been a ref2(to, from), but all that is necessary for correctness is the
5505 ** weaker one).
5506 ** 4. mutable and immutable objects are never in the same group.
5507 */
5508
5509
5510 #include <setjmp.h>
5511
5512 static void freeobj(upb_refcounted *o);
5513
5514 const char untracked_val;
5515 const void *UPB_UNTRACKED_REF = &untracked_val;
5516
5517 /* arch-specific atomic primitives *******************************************/
5518
5519 #ifdef UPB_THREAD_UNSAFE /*---------------------------------------------------*/
5520
atomic_inc(uint32_t * a)5521 static void atomic_inc(uint32_t *a) { (*a)++; }
atomic_dec(uint32_t * a)5522 static bool atomic_dec(uint32_t *a) { return --(*a) == 0; }
5523
5524 #elif defined(__GNUC__) || defined(__clang__) /*------------------------------*/
5525
atomic_inc(uint32_t * a)5526 static void atomic_inc(uint32_t *a) { __sync_fetch_and_add(a, 1); }
atomic_dec(uint32_t * a)5527 static bool atomic_dec(uint32_t *a) { return __sync_sub_and_fetch(a, 1) == 0; }
5528
5529 #elif defined(WIN32) /*-------------------------------------------------------*/
5530
5531 #include <Windows.h>
5532
atomic_inc(upb_atomic_t * a)5533 static void atomic_inc(upb_atomic_t *a) { InterlockedIncrement(&a->val); }
atomic_dec(upb_atomic_t * a)5534 static bool atomic_dec(upb_atomic_t *a) {
5535 return InterlockedDecrement(&a->val) == 0;
5536 }
5537
5538 #else
5539 #error Atomic primitives not defined for your platform/CPU. \
5540 Implement them or compile with UPB_THREAD_UNSAFE.
5541 #endif
5542
5543 /* All static objects point to this refcount.
5544 * It is special-cased in ref/unref below. */
5545 uint32_t static_refcount = -1;
5546
5547 /* We can avoid atomic ops for statically-declared objects.
5548 * This is a minor optimization but nice since we can avoid degrading under
5549 * contention in this case. */
5550
refgroup(uint32_t * group)5551 static void refgroup(uint32_t *group) {
5552 if (group != &static_refcount)
5553 atomic_inc(group);
5554 }
5555
unrefgroup(uint32_t * group)5556 static bool unrefgroup(uint32_t *group) {
5557 if (group == &static_refcount) {
5558 return false;
5559 } else {
5560 return atomic_dec(group);
5561 }
5562 }
5563
5564
5565 /* Reference tracking (debug only) ********************************************/
5566
5567 #ifdef UPB_DEBUG_REFS
5568
5569 #ifdef UPB_THREAD_UNSAFE
5570
upb_lock()5571 static void upb_lock() {}
upb_unlock()5572 static void upb_unlock() {}
5573
5574 #else
5575
5576 /* User must define functions that lock/unlock a global mutex and link this
5577 * file against them. */
5578 void upb_lock();
5579 void upb_unlock();
5580
5581 #endif
5582
5583 /* UPB_DEBUG_REFS mode counts on being able to malloc() memory in some
5584 * code-paths that can normally never fail, like upb_refcounted_ref(). Since
5585 * we have no way to propagage out-of-memory errors back to the user, and since
5586 * these errors can only occur in UPB_DEBUG_REFS mode, we use an allocator that
5587 * immediately aborts on failure (avoiding the global allocator, which might
5588 * inject failures). */
5589
5590 #include <stdlib.h>
5591
upb_debugrefs_allocfunc(upb_alloc * alloc,void * ptr,size_t oldsize,size_t size)5592 static void *upb_debugrefs_allocfunc(upb_alloc *alloc, void *ptr,
5593 size_t oldsize, size_t size) {
5594 UPB_UNUSED(alloc);
5595 UPB_UNUSED(oldsize);
5596 if (size == 0) {
5597 free(ptr);
5598 return NULL;
5599 } else {
5600 void *ret = realloc(ptr, size);
5601
5602 if (!ret) {
5603 abort();
5604 }
5605
5606 return ret;
5607 }
5608 }
5609
5610 upb_alloc upb_alloc_debugrefs = {&upb_debugrefs_allocfunc};
5611
5612 typedef struct {
5613 int count; /* How many refs there are (duplicates only allowed for ref2). */
5614 bool is_ref2;
5615 } trackedref;
5616
trackedref_new(bool is_ref2)5617 static trackedref *trackedref_new(bool is_ref2) {
5618 trackedref *ret = upb_malloc(&upb_alloc_debugrefs, sizeof(*ret));
5619 ret->count = 1;
5620 ret->is_ref2 = is_ref2;
5621 return ret;
5622 }
5623
track(const upb_refcounted * r,const void * owner,bool ref2)5624 static void track(const upb_refcounted *r, const void *owner, bool ref2) {
5625 upb_value v;
5626
5627 UPB_ASSERT(owner);
5628 if (owner == UPB_UNTRACKED_REF) return;
5629
5630 upb_lock();
5631 if (upb_inttable_lookupptr(r->refs, owner, &v)) {
5632 trackedref *ref = upb_value_getptr(v);
5633 /* Since we allow multiple ref2's for the same to/from pair without
5634 * allocating separate memory for each one, we lose the fine-grained
5635 * tracking behavior we get with regular refs. Since ref2s only happen
5636 * inside upb, we'll accept this limitation until/unless there is a really
5637 * difficult upb-internal bug that can't be figured out without it. */
5638 UPB_ASSERT(ref2);
5639 UPB_ASSERT(ref->is_ref2);
5640 ref->count++;
5641 } else {
5642 trackedref *ref = trackedref_new(ref2);
5643 upb_inttable_insertptr2(r->refs, owner, upb_value_ptr(ref),
5644 &upb_alloc_debugrefs);
5645 if (ref2) {
5646 /* We know this cast is safe when it is a ref2, because it's coming from
5647 * another refcounted object. */
5648 const upb_refcounted *from = owner;
5649 UPB_ASSERT(!upb_inttable_lookupptr(from->ref2s, r, NULL));
5650 upb_inttable_insertptr2(from->ref2s, r, upb_value_ptr(NULL),
5651 &upb_alloc_debugrefs);
5652 }
5653 }
5654 upb_unlock();
5655 }
5656
untrack(const upb_refcounted * r,const void * owner,bool ref2)5657 static void untrack(const upb_refcounted *r, const void *owner, bool ref2) {
5658 upb_value v;
5659 bool found;
5660 trackedref *ref;
5661
5662 UPB_ASSERT(owner);
5663 if (owner == UPB_UNTRACKED_REF) return;
5664
5665 upb_lock();
5666 found = upb_inttable_lookupptr(r->refs, owner, &v);
5667 /* This assert will fail if an owner attempts to release a ref it didn't have. */
5668 UPB_ASSERT(found);
5669 ref = upb_value_getptr(v);
5670 UPB_ASSERT(ref->is_ref2 == ref2);
5671 if (--ref->count == 0) {
5672 free(ref);
5673 upb_inttable_removeptr(r->refs, owner, NULL);
5674 if (ref2) {
5675 /* We know this cast is safe when it is a ref2, because it's coming from
5676 * another refcounted object. */
5677 const upb_refcounted *from = owner;
5678 bool removed = upb_inttable_removeptr(from->ref2s, r, NULL);
5679 UPB_ASSERT(removed);
5680 }
5681 }
5682 upb_unlock();
5683 }
5684
checkref(const upb_refcounted * r,const void * owner,bool ref2)5685 static void checkref(const upb_refcounted *r, const void *owner, bool ref2) {
5686 upb_value v;
5687 bool found;
5688 trackedref *ref;
5689
5690 upb_lock();
5691 found = upb_inttable_lookupptr(r->refs, owner, &v);
5692 UPB_ASSERT(found);
5693 ref = upb_value_getptr(v);
5694 UPB_ASSERT(ref->is_ref2 == ref2);
5695 upb_unlock();
5696 }
5697
5698 /* Populates the given UPB_CTYPE_INT32 inttable with counts of ref2's that
5699 * originate from the given owner. */
getref2s(const upb_refcounted * owner,upb_inttable * tab)5700 static void getref2s(const upb_refcounted *owner, upb_inttable *tab) {
5701 upb_inttable_iter i;
5702
5703 upb_lock();
5704 upb_inttable_begin(&i, owner->ref2s);
5705 for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
5706 upb_value v;
5707 upb_value count;
5708 trackedref *ref;
5709 bool found;
5710
5711 upb_refcounted *to = (upb_refcounted*)upb_inttable_iter_key(&i);
5712
5713 /* To get the count we need to look in the target's table. */
5714 found = upb_inttable_lookupptr(to->refs, owner, &v);
5715 UPB_ASSERT(found);
5716 ref = upb_value_getptr(v);
5717 count = upb_value_int32(ref->count);
5718
5719 upb_inttable_insertptr2(tab, to, count, &upb_alloc_debugrefs);
5720 }
5721 upb_unlock();
5722 }
5723
5724 typedef struct {
5725 upb_inttable ref2;
5726 const upb_refcounted *obj;
5727 } check_state;
5728
visit_check(const upb_refcounted * obj,const upb_refcounted * subobj,void * closure)5729 static void visit_check(const upb_refcounted *obj, const upb_refcounted *subobj,
5730 void *closure) {
5731 check_state *s = closure;
5732 upb_inttable *ref2 = &s->ref2;
5733 upb_value v;
5734 bool removed;
5735 int32_t newcount;
5736
5737 UPB_ASSERT(obj == s->obj);
5738 UPB_ASSERT(subobj);
5739 removed = upb_inttable_removeptr(ref2, subobj, &v);
5740 /* The following assertion will fail if the visit() function visits a subobj
5741 * that it did not have a ref2 on, or visits the same subobj too many times. */
5742 UPB_ASSERT(removed);
5743 newcount = upb_value_getint32(v) - 1;
5744 if (newcount > 0) {
5745 upb_inttable_insert2(ref2, (uintptr_t)subobj, upb_value_int32(newcount),
5746 &upb_alloc_debugrefs);
5747 }
5748 }
5749
visit(const upb_refcounted * r,upb_refcounted_visit * v,void * closure)5750 static void visit(const upb_refcounted *r, upb_refcounted_visit *v,
5751 void *closure) {
5752 /* In DEBUG_REFS mode we know what existing ref2 refs there are, so we know
5753 * exactly the set of nodes that visit() should visit. So we verify visit()'s
5754 * correctness here. */
5755 check_state state;
5756 state.obj = r;
5757 upb_inttable_init2(&state.ref2, UPB_CTYPE_INT32, &upb_alloc_debugrefs);
5758 getref2s(r, &state.ref2);
5759
5760 /* This should visit any children in the ref2 table. */
5761 if (r->vtbl->visit) r->vtbl->visit(r, visit_check, &state);
5762
5763 /* This assertion will fail if the visit() function missed any children. */
5764 UPB_ASSERT(upb_inttable_count(&state.ref2) == 0);
5765 upb_inttable_uninit2(&state.ref2, &upb_alloc_debugrefs);
5766 if (r->vtbl->visit) r->vtbl->visit(r, v, closure);
5767 }
5768
trackinit(upb_refcounted * r)5769 static void trackinit(upb_refcounted *r) {
5770 r->refs = upb_malloc(&upb_alloc_debugrefs, sizeof(*r->refs));
5771 r->ref2s = upb_malloc(&upb_alloc_debugrefs, sizeof(*r->ref2s));
5772 upb_inttable_init2(r->refs, UPB_CTYPE_PTR, &upb_alloc_debugrefs);
5773 upb_inttable_init2(r->ref2s, UPB_CTYPE_PTR, &upb_alloc_debugrefs);
5774 }
5775
trackfree(const upb_refcounted * r)5776 static void trackfree(const upb_refcounted *r) {
5777 upb_inttable_uninit2(r->refs, &upb_alloc_debugrefs);
5778 upb_inttable_uninit2(r->ref2s, &upb_alloc_debugrefs);
5779 upb_free(&upb_alloc_debugrefs, r->refs);
5780 upb_free(&upb_alloc_debugrefs, r->ref2s);
5781 }
5782
5783 #else
5784
track(const upb_refcounted * r,const void * owner,bool ref2)5785 static void track(const upb_refcounted *r, const void *owner, bool ref2) {
5786 UPB_UNUSED(r);
5787 UPB_UNUSED(owner);
5788 UPB_UNUSED(ref2);
5789 }
5790
untrack(const upb_refcounted * r,const void * owner,bool ref2)5791 static void untrack(const upb_refcounted *r, const void *owner, bool ref2) {
5792 UPB_UNUSED(r);
5793 UPB_UNUSED(owner);
5794 UPB_UNUSED(ref2);
5795 }
5796
checkref(const upb_refcounted * r,const void * owner,bool ref2)5797 static void checkref(const upb_refcounted *r, const void *owner, bool ref2) {
5798 UPB_UNUSED(r);
5799 UPB_UNUSED(owner);
5800 UPB_UNUSED(ref2);
5801 }
5802
trackinit(upb_refcounted * r)5803 static void trackinit(upb_refcounted *r) {
5804 UPB_UNUSED(r);
5805 }
5806
trackfree(const upb_refcounted * r)5807 static void trackfree(const upb_refcounted *r) {
5808 UPB_UNUSED(r);
5809 }
5810
visit(const upb_refcounted * r,upb_refcounted_visit * v,void * closure)5811 static void visit(const upb_refcounted *r, upb_refcounted_visit *v,
5812 void *closure) {
5813 if (r->vtbl->visit) r->vtbl->visit(r, v, closure);
5814 }
5815
5816 #endif /* UPB_DEBUG_REFS */
5817
5818
5819 /* freeze() *******************************************************************/
5820
5821 /* The freeze() operation is by far the most complicated part of this scheme.
5822 * We compute strongly-connected components and then mutate the graph such that
5823 * we preserve the invariants documented at the top of this file. And we must
5824 * handle out-of-memory errors gracefully (without leaving the graph
5825 * inconsistent), which adds to the fun. */
5826
5827 /* The state used by the freeze operation (shared across many functions). */
5828 typedef struct {
5829 int depth;
5830 int maxdepth;
5831 uint64_t index;
5832 /* Maps upb_refcounted* -> attributes (color, etc). attr layout varies by
5833 * color. */
5834 upb_inttable objattr;
5835 upb_inttable stack; /* stack of upb_refcounted* for Tarjan's algorithm. */
5836 upb_inttable groups; /* array of uint32_t*, malloc'd refcounts for new groups */
5837 upb_status *status;
5838 jmp_buf err;
5839 } tarjan;
5840
5841 static void release_ref2(const upb_refcounted *obj,
5842 const upb_refcounted *subobj,
5843 void *closure);
5844
5845 /* Node attributes -----------------------------------------------------------*/
5846
5847 /* After our analysis phase all nodes will be either GRAY or WHITE. */
5848
5849 typedef enum {
5850 BLACK = 0, /* Object has not been seen. */
5851 GRAY, /* Object has been found via a refgroup but may not be reachable. */
5852 GREEN, /* Object is reachable and is currently on the Tarjan stack. */
5853 WHITE /* Object is reachable and has been assigned a group (SCC). */
5854 } color_t;
5855
err(tarjan * t)5856 UPB_NORETURN static void err(tarjan *t) { longjmp(t->err, 1); }
oom(tarjan * t)5857 UPB_NORETURN static void oom(tarjan *t) {
5858 upb_status_seterrmsg(t->status, "out of memory");
5859 err(t);
5860 }
5861
trygetattr(const tarjan * t,const upb_refcounted * r)5862 static uint64_t trygetattr(const tarjan *t, const upb_refcounted *r) {
5863 upb_value v;
5864 return upb_inttable_lookupptr(&t->objattr, r, &v) ?
5865 upb_value_getuint64(v) : 0;
5866 }
5867
getattr(const tarjan * t,const upb_refcounted * r)5868 static uint64_t getattr(const tarjan *t, const upb_refcounted *r) {
5869 upb_value v;
5870 bool found = upb_inttable_lookupptr(&t->objattr, r, &v);
5871 UPB_ASSERT(found);
5872 return upb_value_getuint64(v);
5873 }
5874
setattr(tarjan * t,const upb_refcounted * r,uint64_t attr)5875 static void setattr(tarjan *t, const upb_refcounted *r, uint64_t attr) {
5876 upb_inttable_removeptr(&t->objattr, r, NULL);
5877 upb_inttable_insertptr(&t->objattr, r, upb_value_uint64(attr));
5878 }
5879
color(tarjan * t,const upb_refcounted * r)5880 static color_t color(tarjan *t, const upb_refcounted *r) {
5881 return trygetattr(t, r) & 0x3; /* Color is always stored in the low 2 bits. */
5882 }
5883
set_gray(tarjan * t,const upb_refcounted * r)5884 static void set_gray(tarjan *t, const upb_refcounted *r) {
5885 UPB_ASSERT(color(t, r) == BLACK);
5886 setattr(t, r, GRAY);
5887 }
5888
5889 /* Pushes an obj onto the Tarjan stack and sets it to GREEN. */
push(tarjan * t,const upb_refcounted * r)5890 static void push(tarjan *t, const upb_refcounted *r) {
5891 UPB_ASSERT(color(t, r) == BLACK || color(t, r) == GRAY);
5892 /* This defines the attr layout for the GREEN state. "index" and "lowlink"
5893 * get 31 bits, which is plenty (limit of 2B objects frozen at a time). */
5894 setattr(t, r, GREEN | (t->index << 2) | (t->index << 33));
5895 if (++t->index == 0x80000000) {
5896 upb_status_seterrmsg(t->status, "too many objects to freeze");
5897 err(t);
5898 }
5899 upb_inttable_push(&t->stack, upb_value_ptr((void*)r));
5900 }
5901
5902 /* Pops an obj from the Tarjan stack and sets it to WHITE, with a ptr to its
5903 * SCC group. */
pop(tarjan * t)5904 static upb_refcounted *pop(tarjan *t) {
5905 upb_refcounted *r = upb_value_getptr(upb_inttable_pop(&t->stack));
5906 UPB_ASSERT(color(t, r) == GREEN);
5907 /* This defines the attr layout for nodes in the WHITE state.
5908 * Top of group stack is [group, NULL]; we point at group. */
5909 setattr(t, r, WHITE | (upb_inttable_count(&t->groups) - 2) << 8);
5910 return r;
5911 }
5912
tarjan_newgroup(tarjan * t)5913 static void tarjan_newgroup(tarjan *t) {
5914 uint32_t *group = upb_gmalloc(sizeof(*group));
5915 if (!group) oom(t);
5916 /* Push group and empty group leader (we'll fill in leader later). */
5917 if (!upb_inttable_push(&t->groups, upb_value_ptr(group)) ||
5918 !upb_inttable_push(&t->groups, upb_value_ptr(NULL))) {
5919 upb_gfree(group);
5920 oom(t);
5921 }
5922 *group = 0;
5923 }
5924
idx(tarjan * t,const upb_refcounted * r)5925 static uint32_t idx(tarjan *t, const upb_refcounted *r) {
5926 UPB_ASSERT(color(t, r) == GREEN);
5927 return (getattr(t, r) >> 2) & 0x7FFFFFFF;
5928 }
5929
lowlink(tarjan * t,const upb_refcounted * r)5930 static uint32_t lowlink(tarjan *t, const upb_refcounted *r) {
5931 if (color(t, r) == GREEN) {
5932 return getattr(t, r) >> 33;
5933 } else {
5934 return UINT32_MAX;
5935 }
5936 }
5937
set_lowlink(tarjan * t,const upb_refcounted * r,uint32_t lowlink)5938 static void set_lowlink(tarjan *t, const upb_refcounted *r, uint32_t lowlink) {
5939 UPB_ASSERT(color(t, r) == GREEN);
5940 setattr(t, r, ((uint64_t)lowlink << 33) | (getattr(t, r) & 0x1FFFFFFFF));
5941 }
5942
group(tarjan * t,upb_refcounted * r)5943 static uint32_t *group(tarjan *t, upb_refcounted *r) {
5944 uint64_t groupnum;
5945 upb_value v;
5946 bool found;
5947
5948 UPB_ASSERT(color(t, r) == WHITE);
5949 groupnum = getattr(t, r) >> 8;
5950 found = upb_inttable_lookup(&t->groups, groupnum, &v);
5951 UPB_ASSERT(found);
5952 return upb_value_getptr(v);
5953 }
5954
5955 /* If the group leader for this object's group has not previously been set,
5956 * the given object is assigned to be its leader. */
groupleader(tarjan * t,upb_refcounted * r)5957 static upb_refcounted *groupleader(tarjan *t, upb_refcounted *r) {
5958 uint64_t leader_slot;
5959 upb_value v;
5960 bool found;
5961
5962 UPB_ASSERT(color(t, r) == WHITE);
5963 leader_slot = (getattr(t, r) >> 8) + 1;
5964 found = upb_inttable_lookup(&t->groups, leader_slot, &v);
5965 UPB_ASSERT(found);
5966 if (upb_value_getptr(v)) {
5967 return upb_value_getptr(v);
5968 } else {
5969 upb_inttable_remove(&t->groups, leader_slot, NULL);
5970 upb_inttable_insert(&t->groups, leader_slot, upb_value_ptr(r));
5971 return r;
5972 }
5973 }
5974
5975
5976 /* Tarjan's algorithm --------------------------------------------------------*/
5977
5978 /* See:
5979 * http://en.wikipedia.org/wiki/Tarjan%27s_strongly_connected_components_algorithm */
5980 static void do_tarjan(const upb_refcounted *obj, tarjan *t);
5981
tarjan_visit(const upb_refcounted * obj,const upb_refcounted * subobj,void * closure)5982 static void tarjan_visit(const upb_refcounted *obj,
5983 const upb_refcounted *subobj,
5984 void *closure) {
5985 tarjan *t = closure;
5986 if (++t->depth > t->maxdepth) {
5987 upb_status_seterrf(t->status, "graph too deep to freeze (%d)", t->maxdepth);
5988 err(t);
5989 } else if (subobj->is_frozen || color(t, subobj) == WHITE) {
5990 /* Do nothing: we don't want to visit or color already-frozen nodes,
5991 * and WHITE nodes have already been assigned a SCC. */
5992 } else if (color(t, subobj) < GREEN) {
5993 /* Subdef has not yet been visited; recurse on it. */
5994 do_tarjan(subobj, t);
5995 set_lowlink(t, obj, UPB_MIN(lowlink(t, obj), lowlink(t, subobj)));
5996 } else if (color(t, subobj) == GREEN) {
5997 /* Subdef is in the stack and hence in the current SCC. */
5998 set_lowlink(t, obj, UPB_MIN(lowlink(t, obj), idx(t, subobj)));
5999 }
6000 --t->depth;
6001 }
6002
do_tarjan(const upb_refcounted * obj,tarjan * t)6003 static void do_tarjan(const upb_refcounted *obj, tarjan *t) {
6004 if (color(t, obj) == BLACK) {
6005 /* We haven't seen this object's group; mark the whole group GRAY. */
6006 const upb_refcounted *o = obj;
6007 do { set_gray(t, o); } while ((o = o->next) != obj);
6008 }
6009
6010 push(t, obj);
6011 visit(obj, tarjan_visit, t);
6012 if (lowlink(t, obj) == idx(t, obj)) {
6013 tarjan_newgroup(t);
6014 while (pop(t) != obj)
6015 ;
6016 }
6017 }
6018
6019
6020 /* freeze() ------------------------------------------------------------------*/
6021
crossref(const upb_refcounted * r,const upb_refcounted * subobj,void * _t)6022 static void crossref(const upb_refcounted *r, const upb_refcounted *subobj,
6023 void *_t) {
6024 tarjan *t = _t;
6025 UPB_ASSERT(color(t, r) > BLACK);
6026 if (color(t, subobj) > BLACK && r->group != subobj->group) {
6027 /* Previously this ref was not reflected in subobj->group because they
6028 * were in the same group; now that they are split a ref must be taken. */
6029 refgroup(subobj->group);
6030 }
6031 }
6032
freeze(upb_refcounted * const * roots,int n,upb_status * s,int maxdepth)6033 static bool freeze(upb_refcounted *const*roots, int n, upb_status *s,
6034 int maxdepth) {
6035 volatile bool ret = false;
6036 int i;
6037 upb_inttable_iter iter;
6038
6039 /* We run in two passes so that we can allocate all memory before performing
6040 * any mutation of the input -- this allows us to leave the input unchanged
6041 * in the case of memory allocation failure. */
6042 tarjan t;
6043 t.index = 0;
6044 t.depth = 0;
6045 t.maxdepth = maxdepth;
6046 t.status = s;
6047 if (!upb_inttable_init(&t.objattr, UPB_CTYPE_UINT64)) goto err1;
6048 if (!upb_inttable_init(&t.stack, UPB_CTYPE_PTR)) goto err2;
6049 if (!upb_inttable_init(&t.groups, UPB_CTYPE_PTR)) goto err3;
6050 if (setjmp(t.err) != 0) goto err4;
6051
6052
6053 for (i = 0; i < n; i++) {
6054 if (color(&t, roots[i]) < GREEN) {
6055 do_tarjan(roots[i], &t);
6056 }
6057 }
6058
6059 /* If we've made it this far, no further errors are possible so it's safe to
6060 * mutate the objects without risk of leaving them in an inconsistent state. */
6061 ret = true;
6062
6063 /* The transformation that follows requires care. The preconditions are:
6064 * - all objects in attr map are WHITE or GRAY, and are in mutable groups
6065 * (groups of all mutable objs)
6066 * - no ref2(to, from) refs have incremented count(to) if both "to" and
6067 * "from" are in our attr map (this follows from invariants (2) and (3)) */
6068
6069 /* Pass 1: we remove WHITE objects from their mutable groups, and add them to
6070 * new groups according to the SCC's we computed. These new groups will
6071 * consist of only frozen objects. None will be immediately collectible,
6072 * because WHITE objects are by definition reachable from one of "roots",
6073 * which the caller must own refs on. */
6074 upb_inttable_begin(&iter, &t.objattr);
6075 for(; !upb_inttable_done(&iter); upb_inttable_next(&iter)) {
6076 upb_refcounted *obj = (upb_refcounted*)upb_inttable_iter_key(&iter);
6077 /* Since removal from a singly-linked list requires access to the object's
6078 * predecessor, we consider obj->next instead of obj for moving. With the
6079 * while() loop we guarantee that we will visit every node's predecessor.
6080 * Proof:
6081 * 1. every node's predecessor is in our attr map.
6082 * 2. though the loop body may change a node's predecessor, it will only
6083 * change it to be the node we are currently operating on, so with a
6084 * while() loop we guarantee ourselves the chance to remove each node. */
6085 while (color(&t, obj->next) == WHITE &&
6086 group(&t, obj->next) != obj->next->group) {
6087 upb_refcounted *leader;
6088
6089 /* Remove from old group. */
6090 upb_refcounted *move = obj->next;
6091 if (obj == move) {
6092 /* Removing the last object from a group. */
6093 UPB_ASSERT(*obj->group == obj->individual_count);
6094 upb_gfree(obj->group);
6095 } else {
6096 obj->next = move->next;
6097 /* This may decrease to zero; we'll collect GRAY objects (if any) that
6098 * remain in the group in the third pass. */
6099 UPB_ASSERT(*move->group >= move->individual_count);
6100 *move->group -= move->individual_count;
6101 }
6102
6103 /* Add to new group. */
6104 leader = groupleader(&t, move);
6105 if (move == leader) {
6106 /* First object added to new group is its leader. */
6107 move->group = group(&t, move);
6108 move->next = move;
6109 *move->group = move->individual_count;
6110 } else {
6111 /* Group already has at least one object in it. */
6112 UPB_ASSERT(leader->group == group(&t, move));
6113 move->group = group(&t, move);
6114 move->next = leader->next;
6115 leader->next = move;
6116 *move->group += move->individual_count;
6117 }
6118
6119 move->is_frozen = true;
6120 }
6121 }
6122
6123 /* Pass 2: GRAY and WHITE objects "obj" with ref2(to, obj) references must
6124 * increment count(to) if group(obj) != group(to) (which could now be the
6125 * case if "to" was just frozen). */
6126 upb_inttable_begin(&iter, &t.objattr);
6127 for(; !upb_inttable_done(&iter); upb_inttable_next(&iter)) {
6128 upb_refcounted *obj = (upb_refcounted*)upb_inttable_iter_key(&iter);
6129 visit(obj, crossref, &t);
6130 }
6131
6132 /* Pass 3: GRAY objects are collected if their group's refcount dropped to
6133 * zero when we removed its white nodes. This can happen if they had only
6134 * been kept alive by virtue of sharing a group with an object that was just
6135 * frozen.
6136 *
6137 * It is important that we do this last, since the GRAY object's free()
6138 * function could call unref2() on just-frozen objects, which will decrement
6139 * refs that were added in pass 2. */
6140 upb_inttable_begin(&iter, &t.objattr);
6141 for(; !upb_inttable_done(&iter); upb_inttable_next(&iter)) {
6142 upb_refcounted *obj = (upb_refcounted*)upb_inttable_iter_key(&iter);
6143 if (obj->group == NULL || *obj->group == 0) {
6144 if (obj->group) {
6145 upb_refcounted *o;
6146
6147 /* We eagerly free() the group's count (since we can't easily determine
6148 * the group's remaining size it's the easiest way to ensure it gets
6149 * done). */
6150 upb_gfree(obj->group);
6151
6152 /* Visit to release ref2's (done in a separate pass since release_ref2
6153 * depends on o->group being unmodified so it can test merged()). */
6154 o = obj;
6155 do { visit(o, release_ref2, NULL); } while ((o = o->next) != obj);
6156
6157 /* Mark "group" fields as NULL so we know to free the objects later in
6158 * this loop, but also don't try to delete the group twice. */
6159 o = obj;
6160 do { o->group = NULL; } while ((o = o->next) != obj);
6161 }
6162 freeobj(obj);
6163 }
6164 }
6165
6166 err4:
6167 if (!ret) {
6168 upb_inttable_begin(&iter, &t.groups);
6169 for(; !upb_inttable_done(&iter); upb_inttable_next(&iter))
6170 upb_gfree(upb_value_getptr(upb_inttable_iter_value(&iter)));
6171 }
6172 upb_inttable_uninit(&t.groups);
6173 err3:
6174 upb_inttable_uninit(&t.stack);
6175 err2:
6176 upb_inttable_uninit(&t.objattr);
6177 err1:
6178 return ret;
6179 }
6180
6181
6182 /* Misc internal functions ***************************************************/
6183
merged(const upb_refcounted * r,const upb_refcounted * r2)6184 static bool merged(const upb_refcounted *r, const upb_refcounted *r2) {
6185 return r->group == r2->group;
6186 }
6187
merge(upb_refcounted * r,upb_refcounted * from)6188 static void merge(upb_refcounted *r, upb_refcounted *from) {
6189 upb_refcounted *base;
6190 upb_refcounted *tmp;
6191
6192 if (merged(r, from)) return;
6193 *r->group += *from->group;
6194 upb_gfree(from->group);
6195 base = from;
6196
6197 /* Set all refcount pointers in the "from" chain to the merged refcount.
6198 *
6199 * TODO(haberman): this linear algorithm can result in an overall O(n^2) bound
6200 * if the user continuously extends a group by one object. Prevent this by
6201 * using one of the techniques in this paper:
6202 * http://bioinfo.ict.ac.cn/~dbu/AlgorithmCourses/Lectures/Union-Find-Tarjan.pdf */
6203 do { from->group = r->group; } while ((from = from->next) != base);
6204
6205 /* Merge the two circularly linked lists by swapping their next pointers. */
6206 tmp = r->next;
6207 r->next = base->next;
6208 base->next = tmp;
6209 }
6210
6211 static void unref(const upb_refcounted *r);
6212
release_ref2(const upb_refcounted * obj,const upb_refcounted * subobj,void * closure)6213 static void release_ref2(const upb_refcounted *obj,
6214 const upb_refcounted *subobj,
6215 void *closure) {
6216 UPB_UNUSED(closure);
6217 untrack(subobj, obj, true);
6218 if (!merged(obj, subobj)) {
6219 UPB_ASSERT(subobj->is_frozen);
6220 unref(subobj);
6221 }
6222 }
6223
unref(const upb_refcounted * r)6224 static void unref(const upb_refcounted *r) {
6225 if (unrefgroup(r->group)) {
6226 const upb_refcounted *o;
6227
6228 upb_gfree(r->group);
6229
6230 /* In two passes, since release_ref2 needs a guarantee that any subobjs
6231 * are alive. */
6232 o = r;
6233 do { visit(o, release_ref2, NULL); } while((o = o->next) != r);
6234
6235 o = r;
6236 do {
6237 const upb_refcounted *next = o->next;
6238 UPB_ASSERT(o->is_frozen || o->individual_count == 0);
6239 freeobj((upb_refcounted*)o);
6240 o = next;
6241 } while(o != r);
6242 }
6243 }
6244
freeobj(upb_refcounted * o)6245 static void freeobj(upb_refcounted *o) {
6246 trackfree(o);
6247 o->vtbl->free((upb_refcounted*)o);
6248 }
6249
6250
6251 /* Public interface ***********************************************************/
6252
upb_refcounted_init(upb_refcounted * r,const struct upb_refcounted_vtbl * vtbl,const void * owner)6253 bool upb_refcounted_init(upb_refcounted *r,
6254 const struct upb_refcounted_vtbl *vtbl,
6255 const void *owner) {
6256 #ifndef NDEBUG
6257 /* Endianness check. This is unrelated to upb_refcounted, it's just a
6258 * convenient place to put the check that we can be assured will run for
6259 * basically every program using upb. */
6260 const int x = 1;
6261 #ifdef UPB_BIG_ENDIAN
6262 UPB_ASSERT(*(char*)&x != 1);
6263 #else
6264 UPB_ASSERT(*(char*)&x == 1);
6265 #endif
6266 #endif
6267
6268 r->next = r;
6269 r->vtbl = vtbl;
6270 r->individual_count = 0;
6271 r->is_frozen = false;
6272 r->group = upb_gmalloc(sizeof(*r->group));
6273 if (!r->group) return false;
6274 *r->group = 0;
6275 trackinit(r);
6276 upb_refcounted_ref(r, owner);
6277 return true;
6278 }
6279
upb_refcounted_isfrozen(const upb_refcounted * r)6280 bool upb_refcounted_isfrozen(const upb_refcounted *r) {
6281 return r->is_frozen;
6282 }
6283
upb_refcounted_ref(const upb_refcounted * r,const void * owner)6284 void upb_refcounted_ref(const upb_refcounted *r, const void *owner) {
6285 track(r, owner, false);
6286 if (!r->is_frozen)
6287 ((upb_refcounted*)r)->individual_count++;
6288 refgroup(r->group);
6289 }
6290
upb_refcounted_unref(const upb_refcounted * r,const void * owner)6291 void upb_refcounted_unref(const upb_refcounted *r, const void *owner) {
6292 untrack(r, owner, false);
6293 if (!r->is_frozen)
6294 ((upb_refcounted*)r)->individual_count--;
6295 unref(r);
6296 }
6297
upb_refcounted_ref2(const upb_refcounted * r,upb_refcounted * from)6298 void upb_refcounted_ref2(const upb_refcounted *r, upb_refcounted *from) {
6299 UPB_ASSERT(!from->is_frozen); /* Non-const pointer implies this. */
6300 track(r, from, true);
6301 if (r->is_frozen) {
6302 refgroup(r->group);
6303 } else {
6304 merge((upb_refcounted*)r, from);
6305 }
6306 }
6307
upb_refcounted_unref2(const upb_refcounted * r,upb_refcounted * from)6308 void upb_refcounted_unref2(const upb_refcounted *r, upb_refcounted *from) {
6309 UPB_ASSERT(!from->is_frozen); /* Non-const pointer implies this. */
6310 untrack(r, from, true);
6311 if (r->is_frozen) {
6312 unref(r);
6313 } else {
6314 UPB_ASSERT(merged(r, from));
6315 }
6316 }
6317
upb_refcounted_donateref(const upb_refcounted * r,const void * from,const void * to)6318 void upb_refcounted_donateref(
6319 const upb_refcounted *r, const void *from, const void *to) {
6320 UPB_ASSERT(from != to);
6321 if (to != NULL)
6322 upb_refcounted_ref(r, to);
6323 if (from != NULL)
6324 upb_refcounted_unref(r, from);
6325 }
6326
upb_refcounted_checkref(const upb_refcounted * r,const void * owner)6327 void upb_refcounted_checkref(const upb_refcounted *r, const void *owner) {
6328 checkref(r, owner, false);
6329 }
6330
upb_refcounted_freeze(upb_refcounted * const * roots,int n,upb_status * s,int maxdepth)6331 bool upb_refcounted_freeze(upb_refcounted *const*roots, int n, upb_status *s,
6332 int maxdepth) {
6333 int i;
6334 bool ret;
6335 for (i = 0; i < n; i++) {
6336 UPB_ASSERT(!roots[i]->is_frozen);
6337 }
6338 ret = freeze(roots, n, s, maxdepth);
6339 UPB_ASSERT(!s || ret == upb_ok(s));
6340 return ret;
6341 }
6342
6343
upb_bufsrc_putbuf(const char * buf,size_t len,upb_bytessink * sink)6344 bool upb_bufsrc_putbuf(const char *buf, size_t len, upb_bytessink *sink) {
6345 void *subc;
6346 bool ret;
6347 upb_bufhandle handle;
6348 upb_bufhandle_init(&handle);
6349 upb_bufhandle_setbuf(&handle, buf, 0);
6350 ret = upb_bytessink_start(sink, len, &subc);
6351 if (ret && len != 0) {
6352 ret = (upb_bytessink_putbuf(sink, subc, buf, len, &handle) >= len);
6353 }
6354 if (ret) {
6355 ret = upb_bytessink_end(sink);
6356 }
6357 upb_bufhandle_uninit(&handle);
6358 return ret;
6359 }
6360
6361 struct upb_bufsink {
6362 upb_byteshandler handler;
6363 upb_bytessink sink;
6364 upb_env *env;
6365 char *ptr;
6366 size_t len, size;
6367 };
6368
upb_bufsink_start(void * _sink,const void * hd,size_t size_hint)6369 static void *upb_bufsink_start(void *_sink, const void *hd, size_t size_hint) {
6370 upb_bufsink *sink = _sink;
6371 UPB_UNUSED(hd);
6372 UPB_UNUSED(size_hint);
6373 sink->len = 0;
6374 return sink;
6375 }
6376
upb_bufsink_string(void * _sink,const void * hd,const char * ptr,size_t len,const upb_bufhandle * handle)6377 static size_t upb_bufsink_string(void *_sink, const void *hd, const char *ptr,
6378 size_t len, const upb_bufhandle *handle) {
6379 upb_bufsink *sink = _sink;
6380 size_t new_size = sink->size;
6381
6382 UPB_ASSERT(new_size > 0);
6383 UPB_UNUSED(hd);
6384 UPB_UNUSED(handle);
6385
6386 while (sink->len + len > new_size) {
6387 new_size *= 2;
6388 }
6389
6390 if (new_size != sink->size) {
6391 sink->ptr = upb_env_realloc(sink->env, sink->ptr, sink->size, new_size);
6392 sink->size = new_size;
6393 }
6394
6395 memcpy(sink->ptr + sink->len, ptr, len);
6396 sink->len += len;
6397
6398 return len;
6399 }
6400
upb_bufsink_new(upb_env * env)6401 upb_bufsink *upb_bufsink_new(upb_env *env) {
6402 upb_bufsink *sink = upb_env_malloc(env, sizeof(upb_bufsink));
6403 upb_byteshandler_init(&sink->handler);
6404 upb_byteshandler_setstartstr(&sink->handler, upb_bufsink_start, NULL);
6405 upb_byteshandler_setstring(&sink->handler, upb_bufsink_string, NULL);
6406
6407 upb_bytessink_reset(&sink->sink, &sink->handler, sink);
6408
6409 sink->env = env;
6410 sink->size = 32;
6411 sink->ptr = upb_env_malloc(env, sink->size);
6412 sink->len = 0;
6413
6414 return sink;
6415 }
6416
upb_bufsink_free(upb_bufsink * sink)6417 void upb_bufsink_free(upb_bufsink *sink) {
6418 upb_env_free(sink->env, sink->ptr);
6419 upb_env_free(sink->env, sink);
6420 }
6421
upb_bufsink_sink(upb_bufsink * sink)6422 upb_bytessink *upb_bufsink_sink(upb_bufsink *sink) {
6423 return &sink->sink;
6424 }
6425
upb_bufsink_getdata(const upb_bufsink * sink,size_t * len)6426 const char *upb_bufsink_getdata(const upb_bufsink *sink, size_t *len) {
6427 *len = sink->len;
6428 return sink->ptr;
6429 }
6430 /*
6431 ** upb_table Implementation
6432 **
6433 ** Implementation is heavily inspired by Lua's ltable.c.
6434 */
6435
6436
6437 #include <string.h>
6438
6439 #define UPB_MAXARRSIZE 16 /* 64k. */
6440
6441 /* From Chromium. */
6442 #define ARRAY_SIZE(x) \
6443 ((sizeof(x)/sizeof(0[x])) / ((size_t)(!(sizeof(x) % sizeof(0[x])))))
6444
upb_check_alloc(upb_table * t,upb_alloc * a)6445 static void upb_check_alloc(upb_table *t, upb_alloc *a) {
6446 UPB_UNUSED(t);
6447 UPB_UNUSED(a);
6448 UPB_ASSERT_DEBUGVAR(t->alloc == a);
6449 }
6450
6451 static const double MAX_LOAD = 0.85;
6452
6453 /* The minimum utilization of the array part of a mixed hash/array table. This
6454 * is a speed/memory-usage tradeoff (though it's not straightforward because of
6455 * cache effects). The lower this is, the more memory we'll use. */
6456 static const double MIN_DENSITY = 0.1;
6457
is_pow2(uint64_t v)6458 bool is_pow2(uint64_t v) { return v == 0 || (v & (v - 1)) == 0; }
6459
log2ceil(uint64_t v)6460 int log2ceil(uint64_t v) {
6461 int ret = 0;
6462 bool pow2 = is_pow2(v);
6463 while (v >>= 1) ret++;
6464 ret = pow2 ? ret : ret + 1; /* Ceiling. */
6465 return UPB_MIN(UPB_MAXARRSIZE, ret);
6466 }
6467
upb_strdup(const char * s,upb_alloc * a)6468 char *upb_strdup(const char *s, upb_alloc *a) {
6469 return upb_strdup2(s, strlen(s), a);
6470 }
6471
upb_strdup2(const char * s,size_t len,upb_alloc * a)6472 char *upb_strdup2(const char *s, size_t len, upb_alloc *a) {
6473 size_t n;
6474 char *p;
6475
6476 /* Prevent overflow errors. */
6477 if (len == SIZE_MAX) return NULL;
6478 /* Always null-terminate, even if binary data; but don't rely on the input to
6479 * have a null-terminating byte since it may be a raw binary buffer. */
6480 n = len + 1;
6481 p = upb_malloc(a, n);
6482 if (p) {
6483 memcpy(p, s, len);
6484 p[len] = 0;
6485 }
6486 return p;
6487 }
6488
6489 /* A type to represent the lookup key of either a strtable or an inttable. */
6490 typedef union {
6491 uintptr_t num;
6492 struct {
6493 const char *str;
6494 size_t len;
6495 } str;
6496 } lookupkey_t;
6497
strkey2(const char * str,size_t len)6498 static lookupkey_t strkey2(const char *str, size_t len) {
6499 lookupkey_t k;
6500 k.str.str = str;
6501 k.str.len = len;
6502 return k;
6503 }
6504
intkey(uintptr_t key)6505 static lookupkey_t intkey(uintptr_t key) {
6506 lookupkey_t k;
6507 k.num = key;
6508 return k;
6509 }
6510
6511 typedef uint32_t hashfunc_t(upb_tabkey key);
6512 typedef bool eqlfunc_t(upb_tabkey k1, lookupkey_t k2);
6513
6514 /* Base table (shared code) ***************************************************/
6515
6516 /* For when we need to cast away const. */
mutable_entries(upb_table * t)6517 static upb_tabent *mutable_entries(upb_table *t) {
6518 return (upb_tabent*)t->entries;
6519 }
6520
isfull(upb_table * t)6521 static bool isfull(upb_table *t) {
6522 if (upb_table_size(t) == 0) {
6523 return true;
6524 } else {
6525 return ((double)(t->count + 1) / upb_table_size(t)) > MAX_LOAD;
6526 }
6527 }
6528
init(upb_table * t,upb_ctype_t ctype,uint8_t size_lg2,upb_alloc * a)6529 static bool init(upb_table *t, upb_ctype_t ctype, uint8_t size_lg2,
6530 upb_alloc *a) {
6531 size_t bytes;
6532
6533 t->count = 0;
6534 t->ctype = ctype;
6535 t->size_lg2 = size_lg2;
6536 t->mask = upb_table_size(t) ? upb_table_size(t) - 1 : 0;
6537 #ifndef NDEBUG
6538 t->alloc = a;
6539 #endif
6540 bytes = upb_table_size(t) * sizeof(upb_tabent);
6541 if (bytes > 0) {
6542 t->entries = upb_malloc(a, bytes);
6543 if (!t->entries) return false;
6544 memset(mutable_entries(t), 0, bytes);
6545 } else {
6546 t->entries = NULL;
6547 }
6548 return true;
6549 }
6550
uninit(upb_table * t,upb_alloc * a)6551 static void uninit(upb_table *t, upb_alloc *a) {
6552 upb_check_alloc(t, a);
6553 upb_free(a, mutable_entries(t));
6554 }
6555
emptyent(upb_table * t)6556 static upb_tabent *emptyent(upb_table *t) {
6557 upb_tabent *e = mutable_entries(t) + upb_table_size(t);
6558 while (1) { if (upb_tabent_isempty(--e)) return e; UPB_ASSERT(e > t->entries); }
6559 }
6560
getentry_mutable(upb_table * t,uint32_t hash)6561 static upb_tabent *getentry_mutable(upb_table *t, uint32_t hash) {
6562 return (upb_tabent*)upb_getentry(t, hash);
6563 }
6564
findentry(const upb_table * t,lookupkey_t key,uint32_t hash,eqlfunc_t * eql)6565 static const upb_tabent *findentry(const upb_table *t, lookupkey_t key,
6566 uint32_t hash, eqlfunc_t *eql) {
6567 const upb_tabent *e;
6568
6569 if (t->size_lg2 == 0) return NULL;
6570 e = upb_getentry(t, hash);
6571 if (upb_tabent_isempty(e)) return NULL;
6572 while (1) {
6573 if (eql(e->key, key)) return e;
6574 if ((e = e->next) == NULL) return NULL;
6575 }
6576 }
6577
findentry_mutable(upb_table * t,lookupkey_t key,uint32_t hash,eqlfunc_t * eql)6578 static upb_tabent *findentry_mutable(upb_table *t, lookupkey_t key,
6579 uint32_t hash, eqlfunc_t *eql) {
6580 return (upb_tabent*)findentry(t, key, hash, eql);
6581 }
6582
lookup(const upb_table * t,lookupkey_t key,upb_value * v,uint32_t hash,eqlfunc_t * eql)6583 static bool lookup(const upb_table *t, lookupkey_t key, upb_value *v,
6584 uint32_t hash, eqlfunc_t *eql) {
6585 const upb_tabent *e = findentry(t, key, hash, eql);
6586 if (e) {
6587 if (v) {
6588 _upb_value_setval(v, e->val.val, t->ctype);
6589 }
6590 return true;
6591 } else {
6592 return false;
6593 }
6594 }
6595
6596 /* The given key must not already exist in the table. */
insert(upb_table * t,lookupkey_t key,upb_tabkey tabkey,upb_value val,uint32_t hash,hashfunc_t * hashfunc,eqlfunc_t * eql)6597 static void insert(upb_table *t, lookupkey_t key, upb_tabkey tabkey,
6598 upb_value val, uint32_t hash,
6599 hashfunc_t *hashfunc, eqlfunc_t *eql) {
6600 upb_tabent *mainpos_e;
6601 upb_tabent *our_e;
6602
6603 UPB_ASSERT(findentry(t, key, hash, eql) == NULL);
6604 UPB_ASSERT_DEBUGVAR(val.ctype == t->ctype);
6605
6606 t->count++;
6607 mainpos_e = getentry_mutable(t, hash);
6608 our_e = mainpos_e;
6609
6610 if (upb_tabent_isempty(mainpos_e)) {
6611 /* Our main position is empty; use it. */
6612 our_e->next = NULL;
6613 } else {
6614 /* Collision. */
6615 upb_tabent *new_e = emptyent(t);
6616 /* Head of collider's chain. */
6617 upb_tabent *chain = getentry_mutable(t, hashfunc(mainpos_e->key));
6618 if (chain == mainpos_e) {
6619 /* Existing ent is in its main posisiton (it has the same hash as us, and
6620 * is the head of our chain). Insert to new ent and append to this chain. */
6621 new_e->next = mainpos_e->next;
6622 mainpos_e->next = new_e;
6623 our_e = new_e;
6624 } else {
6625 /* Existing ent is not in its main position (it is a node in some other
6626 * chain). This implies that no existing ent in the table has our hash.
6627 * Evict it (updating its chain) and use its ent for head of our chain. */
6628 *new_e = *mainpos_e; /* copies next. */
6629 while (chain->next != mainpos_e) {
6630 chain = (upb_tabent*)chain->next;
6631 UPB_ASSERT(chain);
6632 }
6633 chain->next = new_e;
6634 our_e = mainpos_e;
6635 our_e->next = NULL;
6636 }
6637 }
6638 our_e->key = tabkey;
6639 our_e->val.val = val.val;
6640 UPB_ASSERT(findentry(t, key, hash, eql) == our_e);
6641 }
6642
rm(upb_table * t,lookupkey_t key,upb_value * val,upb_tabkey * removed,uint32_t hash,eqlfunc_t * eql)6643 static bool rm(upb_table *t, lookupkey_t key, upb_value *val,
6644 upb_tabkey *removed, uint32_t hash, eqlfunc_t *eql) {
6645 upb_tabent *chain = getentry_mutable(t, hash);
6646 if (upb_tabent_isempty(chain)) return false;
6647 if (eql(chain->key, key)) {
6648 /* Element to remove is at the head of its chain. */
6649 t->count--;
6650 if (val) _upb_value_setval(val, chain->val.val, t->ctype);
6651 if (removed) *removed = chain->key;
6652 if (chain->next) {
6653 upb_tabent *move = (upb_tabent*)chain->next;
6654 *chain = *move;
6655 move->key = 0; /* Make the slot empty. */
6656 } else {
6657 chain->key = 0; /* Make the slot empty. */
6658 }
6659 return true;
6660 } else {
6661 /* Element to remove is either in a non-head position or not in the
6662 * table. */
6663 while (chain->next && !eql(chain->next->key, key)) {
6664 chain = (upb_tabent*)chain->next;
6665 }
6666 if (chain->next) {
6667 /* Found element to remove. */
6668 upb_tabent *rm = (upb_tabent*)chain->next;
6669 t->count--;
6670 if (val) _upb_value_setval(val, chain->next->val.val, t->ctype);
6671 if (removed) *removed = rm->key;
6672 rm->key = 0; /* Make the slot empty. */
6673 chain->next = rm->next;
6674 return true;
6675 } else {
6676 /* Element to remove is not in the table. */
6677 return false;
6678 }
6679 }
6680 }
6681
next(const upb_table * t,size_t i)6682 static size_t next(const upb_table *t, size_t i) {
6683 do {
6684 if (++i >= upb_table_size(t))
6685 return SIZE_MAX;
6686 } while(upb_tabent_isempty(&t->entries[i]));
6687
6688 return i;
6689 }
6690
begin(const upb_table * t)6691 static size_t begin(const upb_table *t) {
6692 return next(t, -1);
6693 }
6694
6695
6696 /* upb_strtable ***************************************************************/
6697
6698 /* A simple "subclass" of upb_table that only adds a hash function for strings. */
6699
strcopy(lookupkey_t k2,upb_alloc * a)6700 static upb_tabkey strcopy(lookupkey_t k2, upb_alloc *a) {
6701 uint32_t len = (uint32_t) k2.str.len;
6702 char *str = upb_malloc(a, k2.str.len + sizeof(uint32_t) + 1);
6703 if (str == NULL) return 0;
6704 memcpy(str, &len, sizeof(uint32_t));
6705 memcpy(str + sizeof(uint32_t), k2.str.str, k2.str.len + 1);
6706 return (uintptr_t)str;
6707 }
6708
strhash(upb_tabkey key)6709 static uint32_t strhash(upb_tabkey key) {
6710 uint32_t len;
6711 char *str = upb_tabstr(key, &len);
6712 return MurmurHash2(str, len, 0);
6713 }
6714
streql(upb_tabkey k1,lookupkey_t k2)6715 static bool streql(upb_tabkey k1, lookupkey_t k2) {
6716 uint32_t len;
6717 char *str = upb_tabstr(k1, &len);
6718 return len == k2.str.len && memcmp(str, k2.str.str, len) == 0;
6719 }
6720
upb_strtable_init2(upb_strtable * t,upb_ctype_t ctype,upb_alloc * a)6721 bool upb_strtable_init2(upb_strtable *t, upb_ctype_t ctype, upb_alloc *a) {
6722 return init(&t->t, ctype, 2, a);
6723 }
6724
upb_strtable_uninit2(upb_strtable * t,upb_alloc * a)6725 void upb_strtable_uninit2(upb_strtable *t, upb_alloc *a) {
6726 size_t i;
6727 for (i = 0; i < upb_table_size(&t->t); i++)
6728 upb_free(a, (void*)t->t.entries[i].key);
6729 uninit(&t->t, a);
6730 }
6731
upb_strtable_resize(upb_strtable * t,size_t size_lg2,upb_alloc * a)6732 bool upb_strtable_resize(upb_strtable *t, size_t size_lg2, upb_alloc *a) {
6733 upb_strtable new_table;
6734 upb_strtable_iter i;
6735
6736 upb_check_alloc(&t->t, a);
6737
6738 if (!init(&new_table.t, t->t.ctype, size_lg2, a))
6739 return false;
6740 upb_strtable_begin(&i, t);
6741 for ( ; !upb_strtable_done(&i); upb_strtable_next(&i)) {
6742 upb_strtable_insert3(
6743 &new_table,
6744 upb_strtable_iter_key(&i),
6745 upb_strtable_iter_keylength(&i),
6746 upb_strtable_iter_value(&i),
6747 a);
6748 }
6749 upb_strtable_uninit2(t, a);
6750 *t = new_table;
6751 return true;
6752 }
6753
upb_strtable_insert3(upb_strtable * t,const char * k,size_t len,upb_value v,upb_alloc * a)6754 bool upb_strtable_insert3(upb_strtable *t, const char *k, size_t len,
6755 upb_value v, upb_alloc *a) {
6756 lookupkey_t key;
6757 upb_tabkey tabkey;
6758 uint32_t hash;
6759
6760 upb_check_alloc(&t->t, a);
6761
6762 if (isfull(&t->t)) {
6763 /* Need to resize. New table of double the size, add old elements to it. */
6764 if (!upb_strtable_resize(t, t->t.size_lg2 + 1, a)) {
6765 return false;
6766 }
6767 }
6768
6769 key = strkey2(k, len);
6770 tabkey = strcopy(key, a);
6771 if (tabkey == 0) return false;
6772
6773 hash = MurmurHash2(key.str.str, key.str.len, 0);
6774 insert(&t->t, key, tabkey, v, hash, &strhash, &streql);
6775 return true;
6776 }
6777
upb_strtable_lookup2(const upb_strtable * t,const char * key,size_t len,upb_value * v)6778 bool upb_strtable_lookup2(const upb_strtable *t, const char *key, size_t len,
6779 upb_value *v) {
6780 uint32_t hash = MurmurHash2(key, len, 0);
6781 return lookup(&t->t, strkey2(key, len), v, hash, &streql);
6782 }
6783
upb_strtable_remove3(upb_strtable * t,const char * key,size_t len,upb_value * val,upb_alloc * alloc)6784 bool upb_strtable_remove3(upb_strtable *t, const char *key, size_t len,
6785 upb_value *val, upb_alloc *alloc) {
6786 uint32_t hash = MurmurHash2(key, len, 0);
6787 upb_tabkey tabkey;
6788 if (rm(&t->t, strkey2(key, len), val, &tabkey, hash, &streql)) {
6789 upb_free(alloc, (void*)tabkey);
6790 return true;
6791 } else {
6792 return false;
6793 }
6794 }
6795
6796 /* Iteration */
6797
str_tabent(const upb_strtable_iter * i)6798 static const upb_tabent *str_tabent(const upb_strtable_iter *i) {
6799 return &i->t->t.entries[i->index];
6800 }
6801
upb_strtable_begin(upb_strtable_iter * i,const upb_strtable * t)6802 void upb_strtable_begin(upb_strtable_iter *i, const upb_strtable *t) {
6803 i->t = t;
6804 i->index = begin(&t->t);
6805 }
6806
upb_strtable_next(upb_strtable_iter * i)6807 void upb_strtable_next(upb_strtable_iter *i) {
6808 i->index = next(&i->t->t, i->index);
6809 }
6810
upb_strtable_done(const upb_strtable_iter * i)6811 bool upb_strtable_done(const upb_strtable_iter *i) {
6812 return i->index >= upb_table_size(&i->t->t) ||
6813 upb_tabent_isempty(str_tabent(i));
6814 }
6815
upb_strtable_iter_key(const upb_strtable_iter * i)6816 const char *upb_strtable_iter_key(const upb_strtable_iter *i) {
6817 UPB_ASSERT(!upb_strtable_done(i));
6818 return upb_tabstr(str_tabent(i)->key, NULL);
6819 }
6820
upb_strtable_iter_keylength(const upb_strtable_iter * i)6821 size_t upb_strtable_iter_keylength(const upb_strtable_iter *i) {
6822 uint32_t len;
6823 UPB_ASSERT(!upb_strtable_done(i));
6824 upb_tabstr(str_tabent(i)->key, &len);
6825 return len;
6826 }
6827
upb_strtable_iter_value(const upb_strtable_iter * i)6828 upb_value upb_strtable_iter_value(const upb_strtable_iter *i) {
6829 UPB_ASSERT(!upb_strtable_done(i));
6830 return _upb_value_val(str_tabent(i)->val.val, i->t->t.ctype);
6831 }
6832
upb_strtable_iter_setdone(upb_strtable_iter * i)6833 void upb_strtable_iter_setdone(upb_strtable_iter *i) {
6834 i->index = SIZE_MAX;
6835 }
6836
upb_strtable_iter_isequal(const upb_strtable_iter * i1,const upb_strtable_iter * i2)6837 bool upb_strtable_iter_isequal(const upb_strtable_iter *i1,
6838 const upb_strtable_iter *i2) {
6839 if (upb_strtable_done(i1) && upb_strtable_done(i2))
6840 return true;
6841 return i1->t == i2->t && i1->index == i2->index;
6842 }
6843
6844
6845 /* upb_inttable ***************************************************************/
6846
6847 /* For inttables we use a hybrid structure where small keys are kept in an
6848 * array and large keys are put in the hash table. */
6849
inthash(upb_tabkey key)6850 static uint32_t inthash(upb_tabkey key) { return upb_inthash(key); }
6851
inteql(upb_tabkey k1,lookupkey_t k2)6852 static bool inteql(upb_tabkey k1, lookupkey_t k2) {
6853 return k1 == k2.num;
6854 }
6855
mutable_array(upb_inttable * t)6856 static upb_tabval *mutable_array(upb_inttable *t) {
6857 return (upb_tabval*)t->array;
6858 }
6859
inttable_val(upb_inttable * t,uintptr_t key)6860 static upb_tabval *inttable_val(upb_inttable *t, uintptr_t key) {
6861 if (key < t->array_size) {
6862 return upb_arrhas(t->array[key]) ? &(mutable_array(t)[key]) : NULL;
6863 } else {
6864 upb_tabent *e =
6865 findentry_mutable(&t->t, intkey(key), upb_inthash(key), &inteql);
6866 return e ? &e->val : NULL;
6867 }
6868 }
6869
inttable_val_const(const upb_inttable * t,uintptr_t key)6870 static const upb_tabval *inttable_val_const(const upb_inttable *t,
6871 uintptr_t key) {
6872 return inttable_val((upb_inttable*)t, key);
6873 }
6874
upb_inttable_count(const upb_inttable * t)6875 size_t upb_inttable_count(const upb_inttable *t) {
6876 return t->t.count + t->array_count;
6877 }
6878
check(upb_inttable * t)6879 static void check(upb_inttable *t) {
6880 UPB_UNUSED(t);
6881 #if defined(UPB_DEBUG_TABLE) && !defined(NDEBUG)
6882 {
6883 /* This check is very expensive (makes inserts/deletes O(N)). */
6884 size_t count = 0;
6885 upb_inttable_iter i;
6886 upb_inttable_begin(&i, t);
6887 for(; !upb_inttable_done(&i); upb_inttable_next(&i), count++) {
6888 UPB_ASSERT(upb_inttable_lookup(t, upb_inttable_iter_key(&i), NULL));
6889 }
6890 UPB_ASSERT(count == upb_inttable_count(t));
6891 }
6892 #endif
6893 }
6894
upb_inttable_sizedinit(upb_inttable * t,upb_ctype_t ctype,size_t asize,int hsize_lg2,upb_alloc * a)6895 bool upb_inttable_sizedinit(upb_inttable *t, upb_ctype_t ctype,
6896 size_t asize, int hsize_lg2, upb_alloc *a) {
6897 size_t array_bytes;
6898
6899 if (!init(&t->t, ctype, hsize_lg2, a)) return false;
6900 /* Always make the array part at least 1 long, so that we know key 0
6901 * won't be in the hash part, which simplifies things. */
6902 t->array_size = UPB_MAX(1, asize);
6903 t->array_count = 0;
6904 array_bytes = t->array_size * sizeof(upb_value);
6905 t->array = upb_malloc(a, array_bytes);
6906 if (!t->array) {
6907 uninit(&t->t, a);
6908 return false;
6909 }
6910 memset(mutable_array(t), 0xff, array_bytes);
6911 check(t);
6912 return true;
6913 }
6914
upb_inttable_init2(upb_inttable * t,upb_ctype_t ctype,upb_alloc * a)6915 bool upb_inttable_init2(upb_inttable *t, upb_ctype_t ctype, upb_alloc *a) {
6916 return upb_inttable_sizedinit(t, ctype, 0, 4, a);
6917 }
6918
upb_inttable_uninit2(upb_inttable * t,upb_alloc * a)6919 void upb_inttable_uninit2(upb_inttable *t, upb_alloc *a) {
6920 uninit(&t->t, a);
6921 upb_free(a, mutable_array(t));
6922 }
6923
upb_inttable_insert2(upb_inttable * t,uintptr_t key,upb_value val,upb_alloc * a)6924 bool upb_inttable_insert2(upb_inttable *t, uintptr_t key, upb_value val,
6925 upb_alloc *a) {
6926 upb_tabval tabval;
6927 tabval.val = val.val;
6928 UPB_ASSERT(upb_arrhas(tabval)); /* This will reject (uint64_t)-1. Fix this. */
6929
6930 upb_check_alloc(&t->t, a);
6931
6932 if (key < t->array_size) {
6933 UPB_ASSERT(!upb_arrhas(t->array[key]));
6934 t->array_count++;
6935 mutable_array(t)[key].val = val.val;
6936 } else {
6937 if (isfull(&t->t)) {
6938 /* Need to resize the hash part, but we re-use the array part. */
6939 size_t i;
6940 upb_table new_table;
6941
6942 if (!init(&new_table, t->t.ctype, t->t.size_lg2 + 1, a)) {
6943 return false;
6944 }
6945
6946 for (i = begin(&t->t); i < upb_table_size(&t->t); i = next(&t->t, i)) {
6947 const upb_tabent *e = &t->t.entries[i];
6948 uint32_t hash;
6949 upb_value v;
6950
6951 _upb_value_setval(&v, e->val.val, t->t.ctype);
6952 hash = upb_inthash(e->key);
6953 insert(&new_table, intkey(e->key), e->key, v, hash, &inthash, &inteql);
6954 }
6955
6956 UPB_ASSERT(t->t.count == new_table.count);
6957
6958 uninit(&t->t, a);
6959 t->t = new_table;
6960 }
6961 insert(&t->t, intkey(key), key, val, upb_inthash(key), &inthash, &inteql);
6962 }
6963 check(t);
6964 return true;
6965 }
6966
upb_inttable_lookup(const upb_inttable * t,uintptr_t key,upb_value * v)6967 bool upb_inttable_lookup(const upb_inttable *t, uintptr_t key, upb_value *v) {
6968 const upb_tabval *table_v = inttable_val_const(t, key);
6969 if (!table_v) return false;
6970 if (v) _upb_value_setval(v, table_v->val, t->t.ctype);
6971 return true;
6972 }
6973
upb_inttable_replace(upb_inttable * t,uintptr_t key,upb_value val)6974 bool upb_inttable_replace(upb_inttable *t, uintptr_t key, upb_value val) {
6975 upb_tabval *table_v = inttable_val(t, key);
6976 if (!table_v) return false;
6977 table_v->val = val.val;
6978 return true;
6979 }
6980
upb_inttable_remove(upb_inttable * t,uintptr_t key,upb_value * val)6981 bool upb_inttable_remove(upb_inttable *t, uintptr_t key, upb_value *val) {
6982 bool success;
6983 if (key < t->array_size) {
6984 if (upb_arrhas(t->array[key])) {
6985 upb_tabval empty = UPB_TABVALUE_EMPTY_INIT;
6986 t->array_count--;
6987 if (val) {
6988 _upb_value_setval(val, t->array[key].val, t->t.ctype);
6989 }
6990 mutable_array(t)[key] = empty;
6991 success = true;
6992 } else {
6993 success = false;
6994 }
6995 } else {
6996 success = rm(&t->t, intkey(key), val, NULL, upb_inthash(key), &inteql);
6997 }
6998 check(t);
6999 return success;
7000 }
7001
upb_inttable_push2(upb_inttable * t,upb_value val,upb_alloc * a)7002 bool upb_inttable_push2(upb_inttable *t, upb_value val, upb_alloc *a) {
7003 upb_check_alloc(&t->t, a);
7004 return upb_inttable_insert2(t, upb_inttable_count(t), val, a);
7005 }
7006
upb_inttable_pop(upb_inttable * t)7007 upb_value upb_inttable_pop(upb_inttable *t) {
7008 upb_value val;
7009 bool ok = upb_inttable_remove(t, upb_inttable_count(t) - 1, &val);
7010 UPB_ASSERT(ok);
7011 return val;
7012 }
7013
upb_inttable_insertptr2(upb_inttable * t,const void * key,upb_value val,upb_alloc * a)7014 bool upb_inttable_insertptr2(upb_inttable *t, const void *key, upb_value val,
7015 upb_alloc *a) {
7016 upb_check_alloc(&t->t, a);
7017 return upb_inttable_insert2(t, (uintptr_t)key, val, a);
7018 }
7019
upb_inttable_lookupptr(const upb_inttable * t,const void * key,upb_value * v)7020 bool upb_inttable_lookupptr(const upb_inttable *t, const void *key,
7021 upb_value *v) {
7022 return upb_inttable_lookup(t, (uintptr_t)key, v);
7023 }
7024
upb_inttable_removeptr(upb_inttable * t,const void * key,upb_value * val)7025 bool upb_inttable_removeptr(upb_inttable *t, const void *key, upb_value *val) {
7026 return upb_inttable_remove(t, (uintptr_t)key, val);
7027 }
7028
upb_inttable_compact2(upb_inttable * t,upb_alloc * a)7029 void upb_inttable_compact2(upb_inttable *t, upb_alloc *a) {
7030 /* A power-of-two histogram of the table keys. */
7031 size_t counts[UPB_MAXARRSIZE + 1] = {0};
7032
7033 /* The max key in each bucket. */
7034 uintptr_t max[UPB_MAXARRSIZE + 1] = {0};
7035
7036 upb_inttable_iter i;
7037 size_t arr_count;
7038 int size_lg2;
7039 upb_inttable new_t;
7040
7041 upb_check_alloc(&t->t, a);
7042
7043 upb_inttable_begin(&i, t);
7044 for (; !upb_inttable_done(&i); upb_inttable_next(&i)) {
7045 uintptr_t key = upb_inttable_iter_key(&i);
7046 int bucket = log2ceil(key);
7047 max[bucket] = UPB_MAX(max[bucket], key);
7048 counts[bucket]++;
7049 }
7050
7051 /* Find the largest power of two that satisfies the MIN_DENSITY
7052 * definition (while actually having some keys). */
7053 arr_count = upb_inttable_count(t);
7054
7055 for (size_lg2 = ARRAY_SIZE(counts) - 1; size_lg2 > 0; size_lg2--) {
7056 if (counts[size_lg2] == 0) {
7057 /* We can halve again without losing any entries. */
7058 continue;
7059 } else if (arr_count >= (1 << size_lg2) * MIN_DENSITY) {
7060 break;
7061 }
7062
7063 arr_count -= counts[size_lg2];
7064 }
7065
7066 UPB_ASSERT(arr_count <= upb_inttable_count(t));
7067
7068 {
7069 /* Insert all elements into new, perfectly-sized table. */
7070 size_t arr_size = max[size_lg2] + 1; /* +1 so arr[max] will fit. */
7071 size_t hash_count = upb_inttable_count(t) - arr_count;
7072 size_t hash_size = hash_count ? (hash_count / MAX_LOAD) + 1 : 0;
7073 size_t hashsize_lg2 = log2ceil(hash_size);
7074
7075 upb_inttable_sizedinit(&new_t, t->t.ctype, arr_size, hashsize_lg2, a);
7076 upb_inttable_begin(&i, t);
7077 for (; !upb_inttable_done(&i); upb_inttable_next(&i)) {
7078 uintptr_t k = upb_inttable_iter_key(&i);
7079 upb_inttable_insert2(&new_t, k, upb_inttable_iter_value(&i), a);
7080 }
7081 UPB_ASSERT(new_t.array_size == arr_size);
7082 UPB_ASSERT(new_t.t.size_lg2 == hashsize_lg2);
7083 }
7084 upb_inttable_uninit2(t, a);
7085 *t = new_t;
7086 }
7087
7088 /* Iteration. */
7089
int_tabent(const upb_inttable_iter * i)7090 static const upb_tabent *int_tabent(const upb_inttable_iter *i) {
7091 UPB_ASSERT(!i->array_part);
7092 return &i->t->t.entries[i->index];
7093 }
7094
int_arrent(const upb_inttable_iter * i)7095 static upb_tabval int_arrent(const upb_inttable_iter *i) {
7096 UPB_ASSERT(i->array_part);
7097 return i->t->array[i->index];
7098 }
7099
upb_inttable_begin(upb_inttable_iter * i,const upb_inttable * t)7100 void upb_inttable_begin(upb_inttable_iter *i, const upb_inttable *t) {
7101 i->t = t;
7102 i->index = -1;
7103 i->array_part = true;
7104 upb_inttable_next(i);
7105 }
7106
upb_inttable_next(upb_inttable_iter * iter)7107 void upb_inttable_next(upb_inttable_iter *iter) {
7108 const upb_inttable *t = iter->t;
7109 if (iter->array_part) {
7110 while (++iter->index < t->array_size) {
7111 if (upb_arrhas(int_arrent(iter))) {
7112 return;
7113 }
7114 }
7115 iter->array_part = false;
7116 iter->index = begin(&t->t);
7117 } else {
7118 iter->index = next(&t->t, iter->index);
7119 }
7120 }
7121
upb_inttable_done(const upb_inttable_iter * i)7122 bool upb_inttable_done(const upb_inttable_iter *i) {
7123 if (i->array_part) {
7124 return i->index >= i->t->array_size ||
7125 !upb_arrhas(int_arrent(i));
7126 } else {
7127 return i->index >= upb_table_size(&i->t->t) ||
7128 upb_tabent_isempty(int_tabent(i));
7129 }
7130 }
7131
upb_inttable_iter_key(const upb_inttable_iter * i)7132 uintptr_t upb_inttable_iter_key(const upb_inttable_iter *i) {
7133 UPB_ASSERT(!upb_inttable_done(i));
7134 return i->array_part ? i->index : int_tabent(i)->key;
7135 }
7136
upb_inttable_iter_value(const upb_inttable_iter * i)7137 upb_value upb_inttable_iter_value(const upb_inttable_iter *i) {
7138 UPB_ASSERT(!upb_inttable_done(i));
7139 return _upb_value_val(
7140 i->array_part ? i->t->array[i->index].val : int_tabent(i)->val.val,
7141 i->t->t.ctype);
7142 }
7143
upb_inttable_iter_setdone(upb_inttable_iter * i)7144 void upb_inttable_iter_setdone(upb_inttable_iter *i) {
7145 i->index = SIZE_MAX;
7146 i->array_part = false;
7147 }
7148
upb_inttable_iter_isequal(const upb_inttable_iter * i1,const upb_inttable_iter * i2)7149 bool upb_inttable_iter_isequal(const upb_inttable_iter *i1,
7150 const upb_inttable_iter *i2) {
7151 if (upb_inttable_done(i1) && upb_inttable_done(i2))
7152 return true;
7153 return i1->t == i2->t && i1->index == i2->index &&
7154 i1->array_part == i2->array_part;
7155 }
7156
7157 #ifdef UPB_UNALIGNED_READS_OK
7158 /* -----------------------------------------------------------------------------
7159 * MurmurHash2, by Austin Appleby (released as public domain).
7160 * Reformatted and C99-ified by Joshua Haberman.
7161 * Note - This code makes a few assumptions about how your machine behaves -
7162 * 1. We can read a 4-byte value from any address without crashing
7163 * 2. sizeof(int) == 4 (in upb this limitation is removed by using uint32_t
7164 * And it has a few limitations -
7165 * 1. It will not work incrementally.
7166 * 2. It will not produce the same results on little-endian and big-endian
7167 * machines. */
MurmurHash2(const void * key,size_t len,uint32_t seed)7168 uint32_t MurmurHash2(const void *key, size_t len, uint32_t seed) {
7169 /* 'm' and 'r' are mixing constants generated offline.
7170 * They're not really 'magic', they just happen to work well. */
7171 const uint32_t m = 0x5bd1e995;
7172 const int32_t r = 24;
7173
7174 /* Initialize the hash to a 'random' value */
7175 uint32_t h = seed ^ len;
7176
7177 /* Mix 4 bytes at a time into the hash */
7178 const uint8_t * data = (const uint8_t *)key;
7179 while(len >= 4) {
7180 uint32_t k = *(uint32_t *)data;
7181
7182 k *= m;
7183 k ^= k >> r;
7184 k *= m;
7185
7186 h *= m;
7187 h ^= k;
7188
7189 data += 4;
7190 len -= 4;
7191 }
7192
7193 /* Handle the last few bytes of the input array */
7194 switch(len) {
7195 case 3: h ^= data[2] << 16;
7196 case 2: h ^= data[1] << 8;
7197 case 1: h ^= data[0]; h *= m;
7198 };
7199
7200 /* Do a few final mixes of the hash to ensure the last few
7201 * bytes are well-incorporated. */
7202 h ^= h >> 13;
7203 h *= m;
7204 h ^= h >> 15;
7205
7206 return h;
7207 }
7208
7209 #else /* !UPB_UNALIGNED_READS_OK */
7210
7211 /* -----------------------------------------------------------------------------
7212 * MurmurHashAligned2, by Austin Appleby
7213 * Same algorithm as MurmurHash2, but only does aligned reads - should be safer
7214 * on certain platforms.
7215 * Performance will be lower than MurmurHash2 */
7216
7217 #define MIX(h,k,m) { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; }
7218
MurmurHash2(const void * key,size_t len,uint32_t seed)7219 uint32_t MurmurHash2(const void * key, size_t len, uint32_t seed) {
7220 const uint32_t m = 0x5bd1e995;
7221 const int32_t r = 24;
7222 const uint8_t * data = (const uint8_t *)key;
7223 uint32_t h = seed ^ len;
7224 uint8_t align = (uintptr_t)data & 3;
7225
7226 if(align && (len >= 4)) {
7227 /* Pre-load the temp registers */
7228 uint32_t t = 0, d = 0;
7229 int32_t sl;
7230 int32_t sr;
7231
7232 switch(align) {
7233 case 1: t |= data[2] << 16;
7234 case 2: t |= data[1] << 8;
7235 case 3: t |= data[0];
7236 }
7237
7238 t <<= (8 * align);
7239
7240 data += 4-align;
7241 len -= 4-align;
7242
7243 sl = 8 * (4-align);
7244 sr = 8 * align;
7245
7246 /* Mix */
7247
7248 while(len >= 4) {
7249 uint32_t k;
7250
7251 d = *(uint32_t *)data;
7252 t = (t >> sr) | (d << sl);
7253
7254 k = t;
7255
7256 MIX(h,k,m);
7257
7258 t = d;
7259
7260 data += 4;
7261 len -= 4;
7262 }
7263
7264 /* Handle leftover data in temp registers */
7265
7266 d = 0;
7267
7268 if(len >= align) {
7269 uint32_t k;
7270
7271 switch(align) {
7272 case 3: d |= data[2] << 16;
7273 case 2: d |= data[1] << 8;
7274 case 1: d |= data[0];
7275 }
7276
7277 k = (t >> sr) | (d << sl);
7278 MIX(h,k,m);
7279
7280 data += align;
7281 len -= align;
7282
7283 /* ----------
7284 * Handle tail bytes */
7285
7286 switch(len) {
7287 case 3: h ^= data[2] << 16;
7288 case 2: h ^= data[1] << 8;
7289 case 1: h ^= data[0]; h *= m;
7290 };
7291 } else {
7292 switch(len) {
7293 case 3: d |= data[2] << 16;
7294 case 2: d |= data[1] << 8;
7295 case 1: d |= data[0];
7296 case 0: h ^= (t >> sr) | (d << sl); h *= m;
7297 }
7298 }
7299
7300 h ^= h >> 13;
7301 h *= m;
7302 h ^= h >> 15;
7303
7304 return h;
7305 } else {
7306 while(len >= 4) {
7307 uint32_t k = *(uint32_t *)data;
7308
7309 MIX(h,k,m);
7310
7311 data += 4;
7312 len -= 4;
7313 }
7314
7315 /* ----------
7316 * Handle tail bytes */
7317
7318 switch(len) {
7319 case 3: h ^= data[2] << 16;
7320 case 2: h ^= data[1] << 8;
7321 case 1: h ^= data[0]; h *= m;
7322 };
7323
7324 h ^= h >> 13;
7325 h *= m;
7326 h ^= h >> 15;
7327
7328 return h;
7329 }
7330 }
7331 #undef MIX
7332
7333 #endif /* UPB_UNALIGNED_READS_OK */
7334
7335 #include <errno.h>
7336 #include <stdarg.h>
7337 #include <stddef.h>
7338 #include <stdint.h>
7339 #include <stdio.h>
7340 #include <stdlib.h>
7341 #include <string.h>
7342
upb_dumptostderr(void * closure,const upb_status * status)7343 bool upb_dumptostderr(void *closure, const upb_status* status) {
7344 UPB_UNUSED(closure);
7345 fprintf(stderr, "%s\n", upb_status_errmsg(status));
7346 return false;
7347 }
7348
7349 /* Guarantee null-termination and provide ellipsis truncation.
7350 * It may be tempting to "optimize" this by initializing these final
7351 * four bytes up-front and then being careful never to overwrite them,
7352 * this is safer and simpler. */
nullz(upb_status * status)7353 static void nullz(upb_status *status) {
7354 const char *ellipsis = "...";
7355 size_t len = strlen(ellipsis);
7356 UPB_ASSERT(sizeof(status->msg) > len);
7357 memcpy(status->msg + sizeof(status->msg) - len, ellipsis, len);
7358 }
7359
7360
7361 /* upb_upberr *****************************************************************/
7362
7363 upb_errorspace upb_upberr = {"upb error"};
7364
upb_upberr_setoom(upb_status * status)7365 void upb_upberr_setoom(upb_status *status) {
7366 status->error_space_ = &upb_upberr;
7367 upb_status_seterrmsg(status, "Out of memory");
7368 }
7369
7370
7371 /* upb_status *****************************************************************/
7372
upb_status_clear(upb_status * status)7373 void upb_status_clear(upb_status *status) {
7374 if (!status) return;
7375 status->ok_ = true;
7376 status->code_ = 0;
7377 status->msg[0] = '\0';
7378 }
7379
upb_ok(const upb_status * status)7380 bool upb_ok(const upb_status *status) { return status->ok_; }
7381
upb_status_errspace(const upb_status * status)7382 upb_errorspace *upb_status_errspace(const upb_status *status) {
7383 return status->error_space_;
7384 }
7385
upb_status_errcode(const upb_status * status)7386 int upb_status_errcode(const upb_status *status) { return status->code_; }
7387
upb_status_errmsg(const upb_status * status)7388 const char *upb_status_errmsg(const upb_status *status) { return status->msg; }
7389
upb_status_seterrmsg(upb_status * status,const char * msg)7390 void upb_status_seterrmsg(upb_status *status, const char *msg) {
7391 if (!status) return;
7392 status->ok_ = false;
7393 strncpy(status->msg, msg, sizeof(status->msg));
7394 nullz(status);
7395 }
7396
upb_status_seterrf(upb_status * status,const char * fmt,...)7397 void upb_status_seterrf(upb_status *status, const char *fmt, ...) {
7398 va_list args;
7399 va_start(args, fmt);
7400 upb_status_vseterrf(status, fmt, args);
7401 va_end(args);
7402 }
7403
upb_status_vseterrf(upb_status * status,const char * fmt,va_list args)7404 void upb_status_vseterrf(upb_status *status, const char *fmt, va_list args) {
7405 if (!status) return;
7406 status->ok_ = false;
7407 _upb_vsnprintf(status->msg, sizeof(status->msg), fmt, args);
7408 nullz(status);
7409 }
7410
upb_status_copy(upb_status * to,const upb_status * from)7411 void upb_status_copy(upb_status *to, const upb_status *from) {
7412 if (!to) return;
7413 *to = *from;
7414 }
7415
7416
7417 /* upb_alloc ******************************************************************/
7418
upb_global_allocfunc(upb_alloc * alloc,void * ptr,size_t oldsize,size_t size)7419 static void *upb_global_allocfunc(upb_alloc *alloc, void *ptr, size_t oldsize,
7420 size_t size) {
7421 UPB_UNUSED(alloc);
7422 UPB_UNUSED(oldsize);
7423 if (size == 0) {
7424 free(ptr);
7425 return NULL;
7426 } else {
7427 return realloc(ptr, size);
7428 }
7429 }
7430
7431 upb_alloc upb_alloc_global = {&upb_global_allocfunc};
7432
7433
7434 /* upb_arena ******************************************************************/
7435
7436 /* Be conservative and choose 16 in case anyone is using SSE. */
7437 static const size_t maxalign = 16;
7438
align_up_max(size_t size)7439 static size_t align_up_max(size_t size) {
7440 return ((size + maxalign - 1) / maxalign) * maxalign;
7441 }
7442
7443 typedef struct mem_block {
7444 struct mem_block *next;
7445 size_t size;
7446 size_t used;
7447 bool owned;
7448 /* Data follows. */
7449 } mem_block;
7450
7451 typedef struct cleanup_ent {
7452 struct cleanup_ent *next;
7453 upb_cleanup_func *cleanup;
7454 void *ud;
7455 } cleanup_ent;
7456
upb_arena_addblock(upb_arena * a,void * ptr,size_t size,bool owned)7457 static void upb_arena_addblock(upb_arena *a, void *ptr, size_t size,
7458 bool owned) {
7459 mem_block *block = ptr;
7460
7461 block->next = a->block_head;
7462 block->size = size;
7463 block->used = align_up_max(sizeof(mem_block));
7464 block->owned = owned;
7465
7466 a->block_head = block;
7467
7468 /* TODO(haberman): ASAN poison. */
7469 }
7470
7471
upb_arena_allocblock(upb_arena * a,size_t size)7472 static mem_block *upb_arena_allocblock(upb_arena *a, size_t size) {
7473 size_t block_size = UPB_MAX(size, a->next_block_size) + sizeof(mem_block);
7474 mem_block *block = upb_malloc(a->block_alloc, block_size);
7475
7476 if (!block) {
7477 return NULL;
7478 }
7479
7480 upb_arena_addblock(a, block, block_size, true);
7481 a->next_block_size = UPB_MIN(block_size * 2, a->max_block_size);
7482
7483 return block;
7484 }
7485
upb_arena_doalloc(upb_alloc * alloc,void * ptr,size_t oldsize,size_t size)7486 static void *upb_arena_doalloc(upb_alloc *alloc, void *ptr, size_t oldsize,
7487 size_t size) {
7488 upb_arena *a = (upb_arena*)alloc; /* upb_alloc is initial member. */
7489 mem_block *block = a->block_head;
7490 void *ret;
7491
7492 if (size == 0) {
7493 return NULL; /* We are an arena, don't need individual frees. */
7494 }
7495
7496 size = align_up_max(size);
7497
7498 /* TODO(haberman): special-case if this is a realloc of the last alloc? */
7499
7500 if (!block || block->size - block->used < size) {
7501 /* Slow path: have to allocate a new block. */
7502 block = upb_arena_allocblock(a, size);
7503
7504 if (!block) {
7505 return NULL; /* Out of memory. */
7506 }
7507 }
7508
7509 ret = (char*)block + block->used;
7510 block->used += size;
7511
7512 if (oldsize > 0) {
7513 memcpy(ret, ptr, oldsize); /* Preserve existing data. */
7514 }
7515
7516 /* TODO(haberman): ASAN unpoison. */
7517
7518 a->bytes_allocated += size;
7519 return ret;
7520 }
7521
7522 /* Public Arena API ***********************************************************/
7523
upb_arena_init(upb_arena * a)7524 void upb_arena_init(upb_arena *a) {
7525 a->alloc.func = &upb_arena_doalloc;
7526 a->block_alloc = &upb_alloc_global;
7527 a->bytes_allocated = 0;
7528 a->next_block_size = 256;
7529 a->max_block_size = 16384;
7530 a->cleanup_head = NULL;
7531 a->block_head = NULL;
7532 }
7533
upb_arena_init2(upb_arena * a,void * mem,size_t size,upb_alloc * alloc)7534 void upb_arena_init2(upb_arena *a, void *mem, size_t size, upb_alloc *alloc) {
7535 upb_arena_init(a);
7536
7537 if (size > sizeof(mem_block)) {
7538 upb_arena_addblock(a, mem, size, false);
7539 }
7540
7541 if (alloc) {
7542 a->block_alloc = alloc;
7543 }
7544 }
7545
upb_arena_uninit(upb_arena * a)7546 void upb_arena_uninit(upb_arena *a) {
7547 cleanup_ent *ent = a->cleanup_head;
7548 mem_block *block = a->block_head;
7549
7550 while (ent) {
7551 ent->cleanup(ent->ud);
7552 ent = ent->next;
7553 }
7554
7555 /* Must do this after running cleanup functions, because this will delete
7556 * the memory we store our cleanup entries in! */
7557 while (block) {
7558 mem_block *next = block->next;
7559
7560 if (block->owned) {
7561 upb_free(a->block_alloc, block);
7562 }
7563
7564 block = next;
7565 }
7566
7567 /* Protect against multiple-uninit. */
7568 a->cleanup_head = NULL;
7569 a->block_head = NULL;
7570 }
7571
upb_arena_addcleanup(upb_arena * a,upb_cleanup_func * func,void * ud)7572 bool upb_arena_addcleanup(upb_arena *a, upb_cleanup_func *func, void *ud) {
7573 cleanup_ent *ent = upb_malloc(&a->alloc, sizeof(cleanup_ent));
7574 if (!ent) {
7575 return false; /* Out of memory. */
7576 }
7577
7578 ent->cleanup = func;
7579 ent->ud = ud;
7580 ent->next = a->cleanup_head;
7581 a->cleanup_head = ent;
7582
7583 return true;
7584 }
7585
upb_arena_bytesallocated(const upb_arena * a)7586 size_t upb_arena_bytesallocated(const upb_arena *a) {
7587 return a->bytes_allocated;
7588 }
7589
7590
7591 /* Standard error functions ***************************************************/
7592
default_err(void * ud,const upb_status * status)7593 static bool default_err(void *ud, const upb_status *status) {
7594 UPB_UNUSED(ud);
7595 UPB_UNUSED(status);
7596 return false;
7597 }
7598
write_err_to(void * ud,const upb_status * status)7599 static bool write_err_to(void *ud, const upb_status *status) {
7600 upb_status *copy_to = ud;
7601 upb_status_copy(copy_to, status);
7602 return false;
7603 }
7604
7605
7606 /* upb_env ********************************************************************/
7607
upb_env_initonly(upb_env * e)7608 void upb_env_initonly(upb_env *e) {
7609 e->ok_ = true;
7610 e->error_func_ = &default_err;
7611 e->error_ud_ = NULL;
7612 }
7613
upb_env_init(upb_env * e)7614 void upb_env_init(upb_env *e) {
7615 upb_arena_init(&e->arena_);
7616 upb_env_initonly(e);
7617 }
7618
upb_env_init2(upb_env * e,void * mem,size_t n,upb_alloc * alloc)7619 void upb_env_init2(upb_env *e, void *mem, size_t n, upb_alloc *alloc) {
7620 upb_arena_init2(&e->arena_, mem, n, alloc);
7621 upb_env_initonly(e);
7622 }
7623
upb_env_uninit(upb_env * e)7624 void upb_env_uninit(upb_env *e) {
7625 upb_arena_uninit(&e->arena_);
7626 }
7627
upb_env_seterrorfunc(upb_env * e,upb_error_func * func,void * ud)7628 void upb_env_seterrorfunc(upb_env *e, upb_error_func *func, void *ud) {
7629 e->error_func_ = func;
7630 e->error_ud_ = ud;
7631 }
7632
upb_env_reporterrorsto(upb_env * e,upb_status * s)7633 void upb_env_reporterrorsto(upb_env *e, upb_status *s) {
7634 e->error_func_ = &write_err_to;
7635 e->error_ud_ = s;
7636 }
7637
upb_env_reporterror(upb_env * e,const upb_status * status)7638 bool upb_env_reporterror(upb_env *e, const upb_status *status) {
7639 e->ok_ = false;
7640 return e->error_func_(e->error_ud_, status);
7641 }
7642
upb_env_malloc(upb_env * e,size_t size)7643 void *upb_env_malloc(upb_env *e, size_t size) {
7644 return upb_malloc(&e->arena_.alloc, size);
7645 }
7646
upb_env_realloc(upb_env * e,void * ptr,size_t oldsize,size_t size)7647 void *upb_env_realloc(upb_env *e, void *ptr, size_t oldsize, size_t size) {
7648 return upb_realloc(&e->arena_.alloc, ptr, oldsize, size);
7649 }
7650
upb_env_free(upb_env * e,void * ptr)7651 void upb_env_free(upb_env *e, void *ptr) {
7652 upb_free(&e->arena_.alloc, ptr);
7653 }
7654
upb_env_addcleanup(upb_env * e,upb_cleanup_func * func,void * ud)7655 bool upb_env_addcleanup(upb_env *e, upb_cleanup_func *func, void *ud) {
7656 return upb_arena_addcleanup(&e->arena_, func, ud);
7657 }
7658
upb_env_bytesallocated(const upb_env * e)7659 size_t upb_env_bytesallocated(const upb_env *e) {
7660 return upb_arena_bytesallocated(&e->arena_);
7661 }
7662 /* This file was generated by upbc (the upb compiler) from the input
7663 * file:
7664 *
7665 * upb/descriptor/descriptor.proto
7666 *
7667 * Do not edit -- your changes will be discarded when the file is
7668 * regenerated. */
7669
7670 static const upb_msgdef msgs[22];
7671 static const upb_fielddef fields[107];
7672 static const upb_enumdef enums[5];
7673 static const upb_tabent strentries[236];
7674 static const upb_tabent intentries[18];
7675 static const upb_tabval arrays[187];
7676
7677 #ifdef UPB_DEBUG_REFS
7678 static upb_inttable reftables[268];
7679 #endif
7680
7681 static const upb_msgdef msgs[22] = {
7682 UPB_MSGDEF_INIT("google.protobuf.DescriptorProto", 41, 8, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[0], 11, 10), UPB_STRTABLE_INIT(10, 15, UPB_CTYPE_PTR, 4, &strentries[0]), false, UPB_SYNTAX_PROTO2, UPB_WELLKNOWN_UNSPECIFIED, &reftables[0], &reftables[1]),
7683 UPB_MSGDEF_INIT("google.protobuf.DescriptorProto.ExtensionRange", 5, 0, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[11], 3, 2), UPB_STRTABLE_INIT(2, 3, UPB_CTYPE_PTR, 2, &strentries[16]), false, UPB_SYNTAX_PROTO2, UPB_WELLKNOWN_UNSPECIFIED, &reftables[2], &reftables[3]),
7684 UPB_MSGDEF_INIT("google.protobuf.DescriptorProto.ReservedRange", 5, 0, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[14], 3, 2), UPB_STRTABLE_INIT(2, 3, UPB_CTYPE_PTR, 2, &strentries[20]), false, UPB_SYNTAX_PROTO2, UPB_WELLKNOWN_UNSPECIFIED, &reftables[4], &reftables[5]),
7685 UPB_MSGDEF_INIT("google.protobuf.EnumDescriptorProto", 12, 2, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[17], 4, 3), UPB_STRTABLE_INIT(3, 3, UPB_CTYPE_PTR, 2, &strentries[24]), false, UPB_SYNTAX_PROTO2, UPB_WELLKNOWN_UNSPECIFIED, &reftables[6], &reftables[7]),
7686 UPB_MSGDEF_INIT("google.protobuf.EnumOptions", 9, 1, UPB_INTTABLE_INIT(1, 1, UPB_CTYPE_PTR, 1, &intentries[0], &arrays[21], 4, 2), UPB_STRTABLE_INIT(3, 3, UPB_CTYPE_PTR, 2, &strentries[28]), false, UPB_SYNTAX_PROTO2, UPB_WELLKNOWN_UNSPECIFIED, &reftables[8], &reftables[9]),
7687 UPB_MSGDEF_INIT("google.protobuf.EnumValueDescriptorProto", 9, 1, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[25], 4, 3), UPB_STRTABLE_INIT(3, 3, UPB_CTYPE_PTR, 2, &strentries[32]), false, UPB_SYNTAX_PROTO2, UPB_WELLKNOWN_UNSPECIFIED, &reftables[10], &reftables[11]),
7688 UPB_MSGDEF_INIT("google.protobuf.EnumValueOptions", 8, 1, UPB_INTTABLE_INIT(1, 1, UPB_CTYPE_PTR, 1, &intentries[2], &arrays[29], 2, 1), UPB_STRTABLE_INIT(2, 3, UPB_CTYPE_PTR, 2, &strentries[36]), false, UPB_SYNTAX_PROTO2, UPB_WELLKNOWN_UNSPECIFIED, &reftables[12], &reftables[13]),
7689 UPB_MSGDEF_INIT("google.protobuf.FieldDescriptorProto", 24, 1, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[31], 11, 10), UPB_STRTABLE_INIT(10, 15, UPB_CTYPE_PTR, 4, &strentries[40]), false, UPB_SYNTAX_PROTO2, UPB_WELLKNOWN_UNSPECIFIED, &reftables[14], &reftables[15]),
7690 UPB_MSGDEF_INIT("google.protobuf.FieldOptions", 13, 1, UPB_INTTABLE_INIT(1, 1, UPB_CTYPE_PTR, 1, &intentries[4], &arrays[42], 11, 6), UPB_STRTABLE_INIT(7, 15, UPB_CTYPE_PTR, 4, &strentries[56]), false, UPB_SYNTAX_PROTO2, UPB_WELLKNOWN_UNSPECIFIED, &reftables[16], &reftables[17]),
7691 UPB_MSGDEF_INIT("google.protobuf.FileDescriptorProto", 43, 6, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[53], 13, 12), UPB_STRTABLE_INIT(12, 15, UPB_CTYPE_PTR, 4, &strentries[72]), false, UPB_SYNTAX_PROTO2, UPB_WELLKNOWN_UNSPECIFIED, &reftables[18], &reftables[19]),
7692 UPB_MSGDEF_INIT("google.protobuf.FileDescriptorSet", 7, 1, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[66], 2, 1), UPB_STRTABLE_INIT(1, 3, UPB_CTYPE_PTR, 2, &strentries[88]), false, UPB_SYNTAX_PROTO2, UPB_WELLKNOWN_UNSPECIFIED, &reftables[20], &reftables[21]),
7693 UPB_MSGDEF_INIT("google.protobuf.FileOptions", 38, 1, UPB_INTTABLE_INIT(1, 1, UPB_CTYPE_PTR, 1, &intentries[6], &arrays[68], 42, 17), UPB_STRTABLE_INIT(18, 31, UPB_CTYPE_PTR, 5, &strentries[92]), false, UPB_SYNTAX_PROTO2, UPB_WELLKNOWN_UNSPECIFIED, &reftables[22], &reftables[23]),
7694 UPB_MSGDEF_INIT("google.protobuf.MessageOptions", 11, 1, UPB_INTTABLE_INIT(1, 1, UPB_CTYPE_PTR, 1, &intentries[8], &arrays[110], 8, 4), UPB_STRTABLE_INIT(5, 7, UPB_CTYPE_PTR, 3, &strentries[124]), false, UPB_SYNTAX_PROTO2, UPB_WELLKNOWN_UNSPECIFIED, &reftables[24], &reftables[25]),
7695 UPB_MSGDEF_INIT("google.protobuf.MethodDescriptorProto", 16, 1, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[118], 7, 6), UPB_STRTABLE_INIT(6, 7, UPB_CTYPE_PTR, 3, &strentries[132]), false, UPB_SYNTAX_PROTO2, UPB_WELLKNOWN_UNSPECIFIED, &reftables[26], &reftables[27]),
7696 UPB_MSGDEF_INIT("google.protobuf.MethodOptions", 8, 1, UPB_INTTABLE_INIT(2, 3, UPB_CTYPE_PTR, 2, &intentries[10], &arrays[125], 1, 0), UPB_STRTABLE_INIT(2, 3, UPB_CTYPE_PTR, 2, &strentries[140]), false, UPB_SYNTAX_PROTO2, UPB_WELLKNOWN_UNSPECIFIED, &reftables[28], &reftables[29]),
7697 UPB_MSGDEF_INIT("google.protobuf.OneofDescriptorProto", 6, 0, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[126], 2, 1), UPB_STRTABLE_INIT(1, 3, UPB_CTYPE_PTR, 2, &strentries[144]), false, UPB_SYNTAX_PROTO2, UPB_WELLKNOWN_UNSPECIFIED, &reftables[30], &reftables[31]),
7698 UPB_MSGDEF_INIT("google.protobuf.ServiceDescriptorProto", 12, 2, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[128], 4, 3), UPB_STRTABLE_INIT(3, 3, UPB_CTYPE_PTR, 2, &strentries[148]), false, UPB_SYNTAX_PROTO2, UPB_WELLKNOWN_UNSPECIFIED, &reftables[32], &reftables[33]),
7699 UPB_MSGDEF_INIT("google.protobuf.ServiceOptions", 8, 1, UPB_INTTABLE_INIT(2, 3, UPB_CTYPE_PTR, 2, &intentries[14], &arrays[132], 1, 0), UPB_STRTABLE_INIT(2, 3, UPB_CTYPE_PTR, 2, &strentries[152]), false, UPB_SYNTAX_PROTO2, UPB_WELLKNOWN_UNSPECIFIED, &reftables[34], &reftables[35]),
7700 UPB_MSGDEF_INIT("google.protobuf.SourceCodeInfo", 7, 1, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[133], 2, 1), UPB_STRTABLE_INIT(1, 3, UPB_CTYPE_PTR, 2, &strentries[156]), false, UPB_SYNTAX_PROTO2, UPB_WELLKNOWN_UNSPECIFIED, &reftables[36], &reftables[37]),
7701 UPB_MSGDEF_INIT("google.protobuf.SourceCodeInfo.Location", 20, 0, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[135], 7, 5), UPB_STRTABLE_INIT(5, 7, UPB_CTYPE_PTR, 3, &strentries[160]), false, UPB_SYNTAX_PROTO2, UPB_WELLKNOWN_UNSPECIFIED, &reftables[38], &reftables[39]),
7702 UPB_MSGDEF_INIT("google.protobuf.UninterpretedOption", 19, 1, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[142], 9, 7), UPB_STRTABLE_INIT(7, 15, UPB_CTYPE_PTR, 4, &strentries[168]), false, UPB_SYNTAX_PROTO2, UPB_WELLKNOWN_UNSPECIFIED, &reftables[40], &reftables[41]),
7703 UPB_MSGDEF_INIT("google.protobuf.UninterpretedOption.NamePart", 7, 0, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[151], 3, 2), UPB_STRTABLE_INIT(2, 3, UPB_CTYPE_PTR, 2, &strentries[184]), false, UPB_SYNTAX_PROTO2, UPB_WELLKNOWN_UNSPECIFIED, &reftables[42], &reftables[43]),
7704 };
7705
7706 static const upb_fielddef fields[107] = {
7707 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "aggregate_value", 8, &msgs[20], NULL, 16, 6, {0},&reftables[44], &reftables[45]),
7708 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "allow_alias", 2, &msgs[4], NULL, 7, 1, {0},&reftables[46], &reftables[47]),
7709 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "cc_enable_arenas", 31, &msgs[11], NULL, 24, 12, {0},&reftables[48], &reftables[49]),
7710 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "cc_generic_services", 16, &msgs[11], NULL, 18, 6, {0},&reftables[50], &reftables[51]),
7711 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "client_streaming", 5, &msgs[13], NULL, 14, 4, {0},&reftables[52], &reftables[53]),
7712 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "csharp_namespace", 37, &msgs[11], NULL, 28, 14, {0},&reftables[54], &reftables[55]),
7713 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_ENUM, 0, false, false, false, false, "ctype", 1, &msgs[8], (const upb_def*)(&enums[2]), 7, 1, {0},&reftables[56], &reftables[57]),
7714 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "default_value", 7, &msgs[7], NULL, 17, 7, {0},&reftables[58], &reftables[59]),
7715 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_STRING, 0, false, false, false, false, "dependency", 3, &msgs[9], NULL, 31, 8, {0},&reftables[60], &reftables[61]),
7716 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "deprecated", 3, &msgs[8], NULL, 9, 3, {0},&reftables[62], &reftables[63]),
7717 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "deprecated", 33, &msgs[14], NULL, 7, 1, {0},&reftables[64], &reftables[65]),
7718 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "deprecated", 3, &msgs[12], NULL, 9, 3, {0},&reftables[66], &reftables[67]),
7719 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "deprecated", 23, &msgs[11], NULL, 22, 10, {0},&reftables[68], &reftables[69]),
7720 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "deprecated", 1, &msgs[6], NULL, 7, 1, {0},&reftables[70], &reftables[71]),
7721 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "deprecated", 3, &msgs[4], NULL, 8, 2, {0},&reftables[72], &reftables[73]),
7722 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "deprecated", 33, &msgs[17], NULL, 7, 1, {0},&reftables[74], &reftables[75]),
7723 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_DOUBLE, 0, false, false, false, false, "double_value", 6, &msgs[20], NULL, 12, 4, {0},&reftables[76], &reftables[77]),
7724 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, false, "end", 2, &msgs[2], NULL, 4, 1, {0},&reftables[78], &reftables[79]),
7725 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, false, "end", 2, &msgs[1], NULL, 4, 1, {0},&reftables[80], &reftables[81]),
7726 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "enum_type", 5, &msgs[9], (const upb_def*)(&msgs[3]), 14, 1, {0},&reftables[82], &reftables[83]),
7727 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "enum_type", 4, &msgs[0], (const upb_def*)(&msgs[3]), 19, 2, {0},&reftables[84], &reftables[85]),
7728 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "extendee", 2, &msgs[7], NULL, 8, 2, {0},&reftables[86], &reftables[87]),
7729 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "extension", 6, &msgs[0], (const upb_def*)(&msgs[7]), 25, 4, {0},&reftables[88], &reftables[89]),
7730 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "extension", 7, &msgs[9], (const upb_def*)(&msgs[7]), 20, 3, {0},&reftables[90], &reftables[91]),
7731 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "extension_range", 5, &msgs[0], (const upb_def*)(&msgs[1]), 22, 3, {0},&reftables[92], &reftables[93]),
7732 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "field", 2, &msgs[0], (const upb_def*)(&msgs[7]), 13, 0, {0},&reftables[94], &reftables[95]),
7733 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "file", 1, &msgs[10], (const upb_def*)(&msgs[9]), 6, 0, {0},&reftables[96], &reftables[97]),
7734 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "go_package", 11, &msgs[11], NULL, 15, 5, {0},&reftables[98], &reftables[99]),
7735 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "identifier_value", 3, &msgs[20], NULL, 7, 1, {0},&reftables[100], &reftables[101]),
7736 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "input_type", 2, &msgs[13], NULL, 8, 2, {0},&reftables[102], &reftables[103]),
7737 UPB_FIELDDEF_INIT(UPB_LABEL_REQUIRED, UPB_TYPE_BOOL, 0, false, false, false, false, "is_extension", 2, &msgs[21], NULL, 6, 1, {0},&reftables[104], &reftables[105]),
7738 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "java_generate_equals_and_hash", 20, &msgs[11], NULL, 21, 9, {0},&reftables[106], &reftables[107]),
7739 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "java_generic_services", 17, &msgs[11], NULL, 19, 7, {0},&reftables[108], &reftables[109]),
7740 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "java_multiple_files", 10, &msgs[11], NULL, 14, 4, {0},&reftables[110], &reftables[111]),
7741 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "java_outer_classname", 8, &msgs[11], NULL, 10, 2, {0},&reftables[112], &reftables[113]),
7742 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "java_package", 1, &msgs[11], NULL, 7, 1, {0},&reftables[114], &reftables[115]),
7743 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "java_string_check_utf8", 27, &msgs[11], NULL, 23, 11, {0},&reftables[116], &reftables[117]),
7744 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "javanano_use_deprecated_package", 38, &msgs[11], NULL, 31, 15, {0},&reftables[118], &reftables[119]),
7745 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "json_name", 10, &msgs[7], NULL, 21, 9, {0},&reftables[120], &reftables[121]),
7746 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_ENUM, 0, false, false, false, false, "jstype", 6, &msgs[8], (const upb_def*)(&enums[3]), 11, 5, {0},&reftables[122], &reftables[123]),
7747 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_ENUM, 0, false, false, false, false, "label", 4, &msgs[7], (const upb_def*)(&enums[0]), 12, 4, {0},&reftables[124], &reftables[125]),
7748 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "lazy", 5, &msgs[8], NULL, 10, 4, {0},&reftables[126], &reftables[127]),
7749 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "leading_comments", 3, &msgs[19], NULL, 9, 2, {0},&reftables[128], &reftables[129]),
7750 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_STRING, 0, false, false, false, false, "leading_detached_comments", 6, &msgs[19], NULL, 17, 4, {0},&reftables[130], &reftables[131]),
7751 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "location", 1, &msgs[18], (const upb_def*)(&msgs[19]), 6, 0, {0},&reftables[132], &reftables[133]),
7752 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "map_entry", 7, &msgs[12], NULL, 10, 4, {0},&reftables[134], &reftables[135]),
7753 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "message_set_wire_format", 1, &msgs[12], NULL, 7, 1, {0},&reftables[136], &reftables[137]),
7754 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "message_type", 4, &msgs[9], (const upb_def*)(&msgs[0]), 11, 0, {0},&reftables[138], &reftables[139]),
7755 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "method", 2, &msgs[16], (const upb_def*)(&msgs[13]), 7, 0, {0},&reftables[140], &reftables[141]),
7756 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "name", 2, &msgs[20], (const upb_def*)(&msgs[21]), 6, 0, {0},&reftables[142], &reftables[143]),
7757 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "name", 1, &msgs[5], NULL, 5, 1, {0},&reftables[144], &reftables[145]),
7758 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "name", 1, &msgs[9], NULL, 23, 6, {0},&reftables[146], &reftables[147]),
7759 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "name", 1, &msgs[3], NULL, 9, 2, {0},&reftables[148], &reftables[149]),
7760 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "name", 1, &msgs[16], NULL, 9, 2, {0},&reftables[150], &reftables[151]),
7761 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "name", 1, &msgs[15], NULL, 3, 0, {0},&reftables[152], &reftables[153]),
7762 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "name", 1, &msgs[13], NULL, 5, 1, {0},&reftables[154], &reftables[155]),
7763 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "name", 1, &msgs[7], NULL, 5, 1, {0},&reftables[156], &reftables[157]),
7764 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "name", 1, &msgs[0], NULL, 33, 8, {0},&reftables[158], &reftables[159]),
7765 UPB_FIELDDEF_INIT(UPB_LABEL_REQUIRED, UPB_TYPE_STRING, 0, false, false, false, false, "name_part", 1, &msgs[21], NULL, 3, 0, {0},&reftables[160], &reftables[161]),
7766 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_INT64, UPB_INTFMT_VARIABLE, false, false, false, false, "negative_int_value", 5, &msgs[20], NULL, 11, 3, {0},&reftables[162], &reftables[163]),
7767 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "nested_type", 3, &msgs[0], (const upb_def*)(&msgs[0]), 16, 1, {0},&reftables[164], &reftables[165]),
7768 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "no_standard_descriptor_accessor", 2, &msgs[12], NULL, 8, 2, {0},&reftables[166], &reftables[167]),
7769 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, false, "number", 3, &msgs[7], NULL, 11, 3, {0},&reftables[168], &reftables[169]),
7770 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, false, "number", 2, &msgs[5], NULL, 8, 2, {0},&reftables[170], &reftables[171]),
7771 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "objc_class_prefix", 36, &msgs[11], NULL, 25, 13, {0},&reftables[172], &reftables[173]),
7772 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "oneof_decl", 8, &msgs[0], (const upb_def*)(&msgs[15]), 29, 6, {0},&reftables[174], &reftables[175]),
7773 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, false, "oneof_index", 9, &msgs[7], NULL, 20, 8, {0},&reftables[176], &reftables[177]),
7774 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_ENUM, 0, false, false, false, false, "optimize_for", 9, &msgs[11], (const upb_def*)(&enums[4]), 13, 3, {0},&reftables[178], &reftables[179]),
7775 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, 0, false, false, false, false, "options", 7, &msgs[0], (const upb_def*)(&msgs[12]), 26, 5, {0},&reftables[180], &reftables[181]),
7776 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, 0, false, false, false, false, "options", 8, &msgs[9], (const upb_def*)(&msgs[11]), 21, 4, {0},&reftables[182], &reftables[183]),
7777 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, 0, false, false, false, false, "options", 8, &msgs[7], (const upb_def*)(&msgs[8]), 4, 0, {0},&reftables[184], &reftables[185]),
7778 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, 0, false, false, false, false, "options", 4, &msgs[13], (const upb_def*)(&msgs[14]), 4, 0, {0},&reftables[186], &reftables[187]),
7779 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, 0, false, false, false, false, "options", 3, &msgs[16], (const upb_def*)(&msgs[17]), 8, 1, {0},&reftables[188], &reftables[189]),
7780 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, 0, false, false, false, false, "options", 3, &msgs[3], (const upb_def*)(&msgs[4]), 8, 1, {0},&reftables[190], &reftables[191]),
7781 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, 0, false, false, false, false, "options", 3, &msgs[5], (const upb_def*)(&msgs[6]), 4, 0, {0},&reftables[192], &reftables[193]),
7782 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "output_type", 3, &msgs[13], NULL, 11, 3, {0},&reftables[194], &reftables[195]),
7783 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "package", 2, &msgs[9], NULL, 26, 7, {0},&reftables[196], &reftables[197]),
7784 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "packed", 2, &msgs[8], NULL, 8, 2, {0},&reftables[198], &reftables[199]),
7785 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, true, "path", 1, &msgs[19], NULL, 5, 0, {0},&reftables[200], &reftables[201]),
7786 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "php_class_prefix", 40, &msgs[11], NULL, 32, 16, {0},&reftables[202], &reftables[203]),
7787 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "php_namespace", 41, &msgs[11], NULL, 35, 17, {0},&reftables[204], &reftables[205]),
7788 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_UINT64, UPB_INTFMT_VARIABLE, false, false, false, false, "positive_int_value", 4, &msgs[20], NULL, 10, 2, {0},&reftables[206], &reftables[207]),
7789 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, false, "public_dependency", 10, &msgs[9], NULL, 36, 9, {0},&reftables[208], &reftables[209]),
7790 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "py_generic_services", 18, &msgs[11], NULL, 20, 8, {0},&reftables[210], &reftables[211]),
7791 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_STRING, 0, false, false, false, false, "reserved_name", 10, &msgs[0], NULL, 38, 9, {0},&reftables[212], &reftables[213]),
7792 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "reserved_range", 9, &msgs[0], (const upb_def*)(&msgs[2]), 32, 7, {0},&reftables[214], &reftables[215]),
7793 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "server_streaming", 6, &msgs[13], NULL, 15, 5, {0},&reftables[216], &reftables[217]),
7794 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "service", 6, &msgs[9], (const upb_def*)(&msgs[16]), 17, 2, {0},&reftables[218], &reftables[219]),
7795 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, 0, false, false, false, false, "source_code_info", 9, &msgs[9], (const upb_def*)(&msgs[18]), 22, 5, {0},&reftables[220], &reftables[221]),
7796 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, true, "span", 2, &msgs[19], NULL, 8, 1, {0},&reftables[222], &reftables[223]),
7797 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, false, "start", 1, &msgs[2], NULL, 3, 0, {0},&reftables[224], &reftables[225]),
7798 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, false, "start", 1, &msgs[1], NULL, 3, 0, {0},&reftables[226], &reftables[227]),
7799 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BYTES, 0, false, false, false, false, "string_value", 7, &msgs[20], NULL, 13, 5, {0},&reftables[228], &reftables[229]),
7800 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "syntax", 12, &msgs[9], NULL, 40, 11, {0},&reftables[230], &reftables[231]),
7801 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "trailing_comments", 4, &msgs[19], NULL, 12, 3, {0},&reftables[232], &reftables[233]),
7802 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_ENUM, 0, false, false, false, false, "type", 5, &msgs[7], (const upb_def*)(&enums[1]), 13, 5, {0},&reftables[234], &reftables[235]),
7803 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "type_name", 6, &msgs[7], NULL, 14, 6, {0},&reftables[236], &reftables[237]),
7804 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "uninterpreted_option", 999, &msgs[12], (const upb_def*)(&msgs[20]), 6, 0, {0},&reftables[238], &reftables[239]),
7805 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "uninterpreted_option", 999, &msgs[17], (const upb_def*)(&msgs[20]), 6, 0, {0},&reftables[240], &reftables[241]),
7806 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "uninterpreted_option", 999, &msgs[11], (const upb_def*)(&msgs[20]), 6, 0, {0},&reftables[242], &reftables[243]),
7807 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "uninterpreted_option", 999, &msgs[14], (const upb_def*)(&msgs[20]), 6, 0, {0},&reftables[244], &reftables[245]),
7808 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "uninterpreted_option", 999, &msgs[8], (const upb_def*)(&msgs[20]), 6, 0, {0},&reftables[246], &reftables[247]),
7809 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "uninterpreted_option", 999, &msgs[6], (const upb_def*)(&msgs[20]), 6, 0, {0},&reftables[248], &reftables[249]),
7810 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "uninterpreted_option", 999, &msgs[4], (const upb_def*)(&msgs[20]), 6, 0, {0},&reftables[250], &reftables[251]),
7811 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "value", 2, &msgs[3], (const upb_def*)(&msgs[5]), 7, 0, {0},&reftables[252], &reftables[253]),
7812 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "weak", 10, &msgs[8], NULL, 12, 6, {0},&reftables[254], &reftables[255]),
7813 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, false, "weak_dependency", 11, &msgs[9], NULL, 39, 10, {0},&reftables[256], &reftables[257]),
7814 };
7815
7816 static const upb_enumdef enums[5] = {
7817 UPB_ENUMDEF_INIT("google.protobuf.FieldDescriptorProto.Label", UPB_STRTABLE_INIT(3, 3, UPB_CTYPE_INT32, 2, &strentries[188]), UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_CSTR, 0, NULL, &arrays[154], 4, 3), 0, &reftables[258], &reftables[259]),
7818 UPB_ENUMDEF_INIT("google.protobuf.FieldDescriptorProto.Type", UPB_STRTABLE_INIT(18, 31, UPB_CTYPE_INT32, 5, &strentries[192]), UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_CSTR, 0, NULL, &arrays[158], 19, 18), 0, &reftables[260], &reftables[261]),
7819 UPB_ENUMDEF_INIT("google.protobuf.FieldOptions.CType", UPB_STRTABLE_INIT(3, 3, UPB_CTYPE_INT32, 2, &strentries[224]), UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_CSTR, 0, NULL, &arrays[177], 3, 3), 0, &reftables[262], &reftables[263]),
7820 UPB_ENUMDEF_INIT("google.protobuf.FieldOptions.JSType", UPB_STRTABLE_INIT(3, 3, UPB_CTYPE_INT32, 2, &strentries[228]), UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_CSTR, 0, NULL, &arrays[180], 3, 3), 0, &reftables[264], &reftables[265]),
7821 UPB_ENUMDEF_INIT("google.protobuf.FileOptions.OptimizeMode", UPB_STRTABLE_INIT(3, 3, UPB_CTYPE_INT32, 2, &strentries[232]), UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_CSTR, 0, NULL, &arrays[183], 4, 3), 0, &reftables[266], &reftables[267]),
7822 };
7823
7824 static const upb_tabent strentries[236] = {
7825 {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "extension"), UPB_TABVALUE_PTR_INIT(&fields[22]), NULL},
7826 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7827 {UPB_TABKEY_STR("\015", "\000", "\000", "\000", "reserved_name"), UPB_TABVALUE_PTR_INIT(&fields[84]), NULL},
7828 {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "name"), UPB_TABVALUE_PTR_INIT(&fields[57]), NULL},
7829 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7830 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7831 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7832 {UPB_TABKEY_STR("\005", "\000", "\000", "\000", "field"), UPB_TABVALUE_PTR_INIT(&fields[25]), &strentries[12]},
7833 {UPB_TABKEY_STR("\017", "\000", "\000", "\000", "extension_range"), UPB_TABVALUE_PTR_INIT(&fields[24]), &strentries[14]},
7834 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7835 {UPB_TABKEY_STR("\013", "\000", "\000", "\000", "nested_type"), UPB_TABVALUE_PTR_INIT(&fields[60]), NULL},
7836 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7837 {UPB_TABKEY_STR("\016", "\000", "\000", "\000", "reserved_range"), UPB_TABVALUE_PTR_INIT(&fields[85]), NULL},
7838 {UPB_TABKEY_STR("\007", "\000", "\000", "\000", "options"), UPB_TABVALUE_PTR_INIT(&fields[68]), NULL},
7839 {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "oneof_decl"), UPB_TABVALUE_PTR_INIT(&fields[65]), NULL},
7840 {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "enum_type"), UPB_TABVALUE_PTR_INIT(&fields[20]), &strentries[13]},
7841 {UPB_TABKEY_STR("\005", "\000", "\000", "\000", "start"), UPB_TABVALUE_PTR_INIT(&fields[91]), NULL},
7842 {UPB_TABKEY_STR("\003", "\000", "\000", "\000", "end"), UPB_TABVALUE_PTR_INIT(&fields[18]), NULL},
7843 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7844 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7845 {UPB_TABKEY_STR("\005", "\000", "\000", "\000", "start"), UPB_TABVALUE_PTR_INIT(&fields[90]), NULL},
7846 {UPB_TABKEY_STR("\003", "\000", "\000", "\000", "end"), UPB_TABVALUE_PTR_INIT(&fields[17]), NULL},
7847 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7848 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7849 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7850 {UPB_TABKEY_STR("\005", "\000", "\000", "\000", "value"), UPB_TABVALUE_PTR_INIT(&fields[104]), NULL},
7851 {UPB_TABKEY_STR("\007", "\000", "\000", "\000", "options"), UPB_TABVALUE_PTR_INIT(&fields[73]), NULL},
7852 {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "name"), UPB_TABVALUE_PTR_INIT(&fields[52]), &strentries[26]},
7853 {UPB_TABKEY_STR("\024", "\000", "\000", "\000", "uninterpreted_option"), UPB_TABVALUE_PTR_INIT(&fields[103]), NULL},
7854 {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "deprecated"), UPB_TABVALUE_PTR_INIT(&fields[14]), NULL},
7855 {UPB_TABKEY_STR("\013", "\000", "\000", "\000", "allow_alias"), UPB_TABVALUE_PTR_INIT(&fields[1]), NULL},
7856 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7857 {UPB_TABKEY_STR("\006", "\000", "\000", "\000", "number"), UPB_TABVALUE_PTR_INIT(&fields[63]), NULL},
7858 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7859 {UPB_TABKEY_STR("\007", "\000", "\000", "\000", "options"), UPB_TABVALUE_PTR_INIT(&fields[74]), NULL},
7860 {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "name"), UPB_TABVALUE_PTR_INIT(&fields[50]), &strentries[34]},
7861 {UPB_TABKEY_STR("\024", "\000", "\000", "\000", "uninterpreted_option"), UPB_TABVALUE_PTR_INIT(&fields[102]), NULL},
7862 {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "deprecated"), UPB_TABVALUE_PTR_INIT(&fields[13]), NULL},
7863 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7864 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7865 {UPB_TABKEY_STR("\013", "\000", "\000", "\000", "oneof_index"), UPB_TABVALUE_PTR_INIT(&fields[66]), NULL},
7866 {UPB_TABKEY_STR("\005", "\000", "\000", "\000", "label"), UPB_TABVALUE_PTR_INIT(&fields[40]), NULL},
7867 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7868 {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "name"), UPB_TABVALUE_PTR_INIT(&fields[56]), NULL},
7869 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7870 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7871 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7872 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7873 {UPB_TABKEY_STR("\006", "\000", "\000", "\000", "number"), UPB_TABVALUE_PTR_INIT(&fields[62]), &strentries[53]},
7874 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7875 {UPB_TABKEY_STR("\010", "\000", "\000", "\000", "extendee"), UPB_TABVALUE_PTR_INIT(&fields[21]), NULL},
7876 {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "type_name"), UPB_TABVALUE_PTR_INIT(&fields[96]), NULL},
7877 {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "json_name"), UPB_TABVALUE_PTR_INIT(&fields[38]), NULL},
7878 {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "type"), UPB_TABVALUE_PTR_INIT(&fields[95]), &strentries[50]},
7879 {UPB_TABKEY_STR("\015", "\000", "\000", "\000", "default_value"), UPB_TABVALUE_PTR_INIT(&fields[7]), NULL},
7880 {UPB_TABKEY_STR("\007", "\000", "\000", "\000", "options"), UPB_TABVALUE_PTR_INIT(&fields[70]), NULL},
7881 {UPB_TABKEY_STR("\024", "\000", "\000", "\000", "uninterpreted_option"), UPB_TABVALUE_PTR_INIT(&fields[101]), NULL},
7882 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7883 {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "weak"), UPB_TABVALUE_PTR_INIT(&fields[105]), NULL},
7884 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7885 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7886 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7887 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7888 {UPB_TABKEY_STR("\006", "\000", "\000", "\000", "packed"), UPB_TABVALUE_PTR_INIT(&fields[77]), NULL},
7889 {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "lazy"), UPB_TABVALUE_PTR_INIT(&fields[41]), NULL},
7890 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7891 {UPB_TABKEY_STR("\005", "\000", "\000", "\000", "ctype"), UPB_TABVALUE_PTR_INIT(&fields[6]), NULL},
7892 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7893 {UPB_TABKEY_STR("\006", "\000", "\000", "\000", "jstype"), UPB_TABVALUE_PTR_INIT(&fields[39]), NULL},
7894 {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "deprecated"), UPB_TABVALUE_PTR_INIT(&fields[9]), NULL},
7895 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7896 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7897 {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "extension"), UPB_TABVALUE_PTR_INIT(&fields[23]), NULL},
7898 {UPB_TABKEY_STR("\017", "\000", "\000", "\000", "weak_dependency"), UPB_TABVALUE_PTR_INIT(&fields[106]), NULL},
7899 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7900 {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "name"), UPB_TABVALUE_PTR_INIT(&fields[51]), NULL},
7901 {UPB_TABKEY_STR("\007", "\000", "\000", "\000", "service"), UPB_TABVALUE_PTR_INIT(&fields[87]), NULL},
7902 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7903 {UPB_TABKEY_STR("\020", "\000", "\000", "\000", "source_code_info"), UPB_TABVALUE_PTR_INIT(&fields[88]), NULL},
7904 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7905 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7906 {UPB_TABKEY_STR("\006", "\000", "\000", "\000", "syntax"), UPB_TABVALUE_PTR_INIT(&fields[93]), NULL},
7907 {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "dependency"), UPB_TABVALUE_PTR_INIT(&fields[8]), NULL},
7908 {UPB_TABKEY_STR("\014", "\000", "\000", "\000", "message_type"), UPB_TABVALUE_PTR_INIT(&fields[47]), NULL},
7909 {UPB_TABKEY_STR("\007", "\000", "\000", "\000", "package"), UPB_TABVALUE_PTR_INIT(&fields[76]), NULL},
7910 {UPB_TABKEY_STR("\007", "\000", "\000", "\000", "options"), UPB_TABVALUE_PTR_INIT(&fields[69]), &strentries[86]},
7911 {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "enum_type"), UPB_TABVALUE_PTR_INIT(&fields[19]), NULL},
7912 {UPB_TABKEY_STR("\021", "\000", "\000", "\000", "public_dependency"), UPB_TABVALUE_PTR_INIT(&fields[82]), &strentries[85]},
7913 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7914 {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "file"), UPB_TABVALUE_PTR_INIT(&fields[26]), NULL},
7915 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7916 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7917 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7918 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7919 {UPB_TABKEY_STR("\023", "\000", "\000", "\000", "cc_generic_services"), UPB_TABVALUE_PTR_INIT(&fields[3]), NULL},
7920 {UPB_TABKEY_STR("\020", "\000", "\000", "\000", "csharp_namespace"), UPB_TABVALUE_PTR_INIT(&fields[5]), &strentries[116]},
7921 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7922 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7923 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7924 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7925 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7926 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7927 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7928 {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "go_package"), UPB_TABVALUE_PTR_INIT(&fields[27]), NULL},
7929 {UPB_TABKEY_STR("\014", "\000", "\000", "\000", "java_package"), UPB_TABVALUE_PTR_INIT(&fields[35]), &strentries[120]},
7930 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7931 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7932 {UPB_TABKEY_STR("\024", "\000", "\000", "\000", "java_outer_classname"), UPB_TABVALUE_PTR_INIT(&fields[34]), NULL},
7933 {UPB_TABKEY_STR("\015", "\000", "\000", "\000", "php_namespace"), UPB_TABVALUE_PTR_INIT(&fields[80]), &strentries[113]},
7934 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7935 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7936 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7937 {UPB_TABKEY_STR("\023", "\000", "\000", "\000", "java_multiple_files"), UPB_TABVALUE_PTR_INIT(&fields[33]), &strentries[117]},
7938 {UPB_TABKEY_STR("\024", "\000", "\000", "\000", "uninterpreted_option"), UPB_TABVALUE_PTR_INIT(&fields[99]), NULL},
7939 {UPB_TABKEY_STR("\025", "\000", "\000", "\000", "java_generic_services"), UPB_TABVALUE_PTR_INIT(&fields[32]), &strentries[118]},
7940 {UPB_TABKEY_STR("\035", "\000", "\000", "\000", "java_generate_equals_and_hash"), UPB_TABVALUE_PTR_INIT(&fields[31]), NULL},
7941 {UPB_TABKEY_STR("\020", "\000", "\000", "\000", "php_class_prefix"), UPB_TABVALUE_PTR_INIT(&fields[79]), NULL},
7942 {UPB_TABKEY_STR("\037", "\000", "\000", "\000", "javanano_use_deprecated_package"), UPB_TABVALUE_PTR_INIT(&fields[37]), &strentries[123]},
7943 {UPB_TABKEY_STR("\023", "\000", "\000", "\000", "py_generic_services"), UPB_TABVALUE_PTR_INIT(&fields[83]), NULL},
7944 {UPB_TABKEY_STR("\014", "\000", "\000", "\000", "optimize_for"), UPB_TABVALUE_PTR_INIT(&fields[67]), NULL},
7945 {UPB_TABKEY_STR("\026", "\000", "\000", "\000", "java_string_check_utf8"), UPB_TABVALUE_PTR_INIT(&fields[36]), NULL},
7946 {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "deprecated"), UPB_TABVALUE_PTR_INIT(&fields[12]), &strentries[119]},
7947 {UPB_TABKEY_STR("\021", "\000", "\000", "\000", "objc_class_prefix"), UPB_TABVALUE_PTR_INIT(&fields[64]), NULL},
7948 {UPB_TABKEY_STR("\020", "\000", "\000", "\000", "cc_enable_arenas"), UPB_TABVALUE_PTR_INIT(&fields[2]), NULL},
7949 {UPB_TABKEY_STR("\027", "\000", "\000", "\000", "message_set_wire_format"), UPB_TABVALUE_PTR_INIT(&fields[46]), &strentries[128]},
7950 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7951 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7952 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7953 {UPB_TABKEY_STR("\024", "\000", "\000", "\000", "uninterpreted_option"), UPB_TABVALUE_PTR_INIT(&fields[97]), NULL},
7954 {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "deprecated"), UPB_TABVALUE_PTR_INIT(&fields[11]), NULL},
7955 {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "map_entry"), UPB_TABVALUE_PTR_INIT(&fields[45]), NULL},
7956 {UPB_TABKEY_STR("\037", "\000", "\000", "\000", "no_standard_descriptor_accessor"), UPB_TABVALUE_PTR_INIT(&fields[61]), NULL},
7957 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7958 {UPB_TABKEY_STR("\020", "\000", "\000", "\000", "client_streaming"), UPB_TABVALUE_PTR_INIT(&fields[4]), NULL},
7959 {UPB_TABKEY_STR("\020", "\000", "\000", "\000", "server_streaming"), UPB_TABVALUE_PTR_INIT(&fields[86]), NULL},
7960 {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "name"), UPB_TABVALUE_PTR_INIT(&fields[55]), NULL},
7961 {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "input_type"), UPB_TABVALUE_PTR_INIT(&fields[29]), NULL},
7962 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7963 {UPB_TABKEY_STR("\013", "\000", "\000", "\000", "output_type"), UPB_TABVALUE_PTR_INIT(&fields[75]), NULL},
7964 {UPB_TABKEY_STR("\007", "\000", "\000", "\000", "options"), UPB_TABVALUE_PTR_INIT(&fields[71]), NULL},
7965 {UPB_TABKEY_STR("\024", "\000", "\000", "\000", "uninterpreted_option"), UPB_TABVALUE_PTR_INIT(&fields[100]), NULL},
7966 {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "deprecated"), UPB_TABVALUE_PTR_INIT(&fields[10]), NULL},
7967 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7968 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7969 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7970 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7971 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7972 {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "name"), UPB_TABVALUE_PTR_INIT(&fields[54]), NULL},
7973 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7974 {UPB_TABKEY_STR("\007", "\000", "\000", "\000", "options"), UPB_TABVALUE_PTR_INIT(&fields[72]), &strentries[150]},
7975 {UPB_TABKEY_STR("\006", "\000", "\000", "\000", "method"), UPB_TABVALUE_PTR_INIT(&fields[48]), NULL},
7976 {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "name"), UPB_TABVALUE_PTR_INIT(&fields[53]), &strentries[149]},
7977 {UPB_TABKEY_STR("\024", "\000", "\000", "\000", "uninterpreted_option"), UPB_TABVALUE_PTR_INIT(&fields[98]), NULL},
7978 {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "deprecated"), UPB_TABVALUE_PTR_INIT(&fields[15]), NULL},
7979 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7980 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7981 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7982 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7983 {UPB_TABKEY_STR("\010", "\000", "\000", "\000", "location"), UPB_TABVALUE_PTR_INIT(&fields[44]), NULL},
7984 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7985 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7986 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7987 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7988 {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "span"), UPB_TABVALUE_PTR_INIT(&fields[89]), &strentries[167]},
7989 {UPB_TABKEY_STR("\031", "\000", "\000", "\000", "leading_detached_comments"), UPB_TABVALUE_PTR_INIT(&fields[43]), &strentries[165]},
7990 {UPB_TABKEY_STR("\021", "\000", "\000", "\000", "trailing_comments"), UPB_TABVALUE_PTR_INIT(&fields[94]), NULL},
7991 {UPB_TABKEY_STR("\020", "\000", "\000", "\000", "leading_comments"), UPB_TABVALUE_PTR_INIT(&fields[42]), &strentries[164]},
7992 {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "path"), UPB_TABVALUE_PTR_INIT(&fields[78]), NULL},
7993 {UPB_TABKEY_STR("\014", "\000", "\000", "\000", "double_value"), UPB_TABVALUE_PTR_INIT(&fields[16]), NULL},
7994 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7995 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7996 {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "name"), UPB_TABVALUE_PTR_INIT(&fields[49]), NULL},
7997 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7998 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
7999 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
8000 {UPB_TABKEY_STR("\022", "\000", "\000", "\000", "negative_int_value"), UPB_TABVALUE_PTR_INIT(&fields[59]), NULL},
8001 {UPB_TABKEY_STR("\017", "\000", "\000", "\000", "aggregate_value"), UPB_TABVALUE_PTR_INIT(&fields[0]), NULL},
8002 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
8003 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
8004 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
8005 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
8006 {UPB_TABKEY_STR("\022", "\000", "\000", "\000", "positive_int_value"), UPB_TABVALUE_PTR_INIT(&fields[81]), NULL},
8007 {UPB_TABKEY_STR("\020", "\000", "\000", "\000", "identifier_value"), UPB_TABVALUE_PTR_INIT(&fields[28]), NULL},
8008 {UPB_TABKEY_STR("\014", "\000", "\000", "\000", "string_value"), UPB_TABVALUE_PTR_INIT(&fields[92]), &strentries[182]},
8009 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
8010 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
8011 {UPB_TABKEY_STR("\014", "\000", "\000", "\000", "is_extension"), UPB_TABVALUE_PTR_INIT(&fields[30]), NULL},
8012 {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "name_part"), UPB_TABVALUE_PTR_INIT(&fields[58]), NULL},
8013 {UPB_TABKEY_STR("\016", "\000", "\000", "\000", "LABEL_REQUIRED"), UPB_TABVALUE_INT_INIT(2), &strentries[190]},
8014 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
8015 {UPB_TABKEY_STR("\016", "\000", "\000", "\000", "LABEL_REPEATED"), UPB_TABVALUE_INT_INIT(3), NULL},
8016 {UPB_TABKEY_STR("\016", "\000", "\000", "\000", "LABEL_OPTIONAL"), UPB_TABVALUE_INT_INIT(1), NULL},
8017 {UPB_TABKEY_STR("\014", "\000", "\000", "\000", "TYPE_FIXED64"), UPB_TABVALUE_INT_INIT(6), NULL},
8018 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
8019 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
8020 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
8021 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
8022 {UPB_TABKEY_STR("\013", "\000", "\000", "\000", "TYPE_STRING"), UPB_TABVALUE_INT_INIT(9), NULL},
8023 {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "TYPE_FLOAT"), UPB_TABVALUE_INT_INIT(2), &strentries[221]},
8024 {UPB_TABKEY_STR("\013", "\000", "\000", "\000", "TYPE_DOUBLE"), UPB_TABVALUE_INT_INIT(1), NULL},
8025 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
8026 {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "TYPE_INT32"), UPB_TABVALUE_INT_INIT(5), NULL},
8027 {UPB_TABKEY_STR("\015", "\000", "\000", "\000", "TYPE_SFIXED32"), UPB_TABVALUE_INT_INIT(15), NULL},
8028 {UPB_TABKEY_STR("\014", "\000", "\000", "\000", "TYPE_FIXED32"), UPB_TABVALUE_INT_INIT(7), NULL},
8029 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
8030 {UPB_TABKEY_STR("\014", "\000", "\000", "\000", "TYPE_MESSAGE"), UPB_TABVALUE_INT_INIT(11), &strentries[222]},
8031 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
8032 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
8033 {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "TYPE_INT64"), UPB_TABVALUE_INT_INIT(3), &strentries[219]},
8034 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
8035 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
8036 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
8037 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
8038 {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "TYPE_ENUM"), UPB_TABVALUE_INT_INIT(14), NULL},
8039 {UPB_TABKEY_STR("\013", "\000", "\000", "\000", "TYPE_UINT32"), UPB_TABVALUE_INT_INIT(13), NULL},
8040 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
8041 {UPB_TABKEY_STR("\013", "\000", "\000", "\000", "TYPE_UINT64"), UPB_TABVALUE_INT_INIT(4), &strentries[218]},
8042 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
8043 {UPB_TABKEY_STR("\015", "\000", "\000", "\000", "TYPE_SFIXED64"), UPB_TABVALUE_INT_INIT(16), NULL},
8044 {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "TYPE_BYTES"), UPB_TABVALUE_INT_INIT(12), NULL},
8045 {UPB_TABKEY_STR("\013", "\000", "\000", "\000", "TYPE_SINT64"), UPB_TABVALUE_INT_INIT(18), NULL},
8046 {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "TYPE_BOOL"), UPB_TABVALUE_INT_INIT(8), NULL},
8047 {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "TYPE_GROUP"), UPB_TABVALUE_INT_INIT(10), NULL},
8048 {UPB_TABKEY_STR("\013", "\000", "\000", "\000", "TYPE_SINT32"), UPB_TABVALUE_INT_INIT(17), NULL},
8049 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
8050 {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "CORD"), UPB_TABVALUE_INT_INIT(1), NULL},
8051 {UPB_TABKEY_STR("\006", "\000", "\000", "\000", "STRING"), UPB_TABVALUE_INT_INIT(0), &strentries[225]},
8052 {UPB_TABKEY_STR("\014", "\000", "\000", "\000", "STRING_PIECE"), UPB_TABVALUE_INT_INIT(2), NULL},
8053 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
8054 {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "JS_NORMAL"), UPB_TABVALUE_INT_INIT(0), NULL},
8055 {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "JS_NUMBER"), UPB_TABVALUE_INT_INIT(2), NULL},
8056 {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "JS_STRING"), UPB_TABVALUE_INT_INIT(1), NULL},
8057 {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "CODE_SIZE"), UPB_TABVALUE_INT_INIT(2), NULL},
8058 {UPB_TABKEY_STR("\005", "\000", "\000", "\000", "SPEED"), UPB_TABVALUE_INT_INIT(1), &strentries[235]},
8059 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
8060 {UPB_TABKEY_STR("\014", "\000", "\000", "\000", "LITE_RUNTIME"), UPB_TABVALUE_INT_INIT(3), NULL},
8061 };
8062
8063 static const upb_tabent intentries[18] = {
8064 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
8065 {UPB_TABKEY_NUM(999), UPB_TABVALUE_PTR_INIT(&fields[103]), NULL},
8066 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
8067 {UPB_TABKEY_NUM(999), UPB_TABVALUE_PTR_INIT(&fields[102]), NULL},
8068 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
8069 {UPB_TABKEY_NUM(999), UPB_TABVALUE_PTR_INIT(&fields[101]), NULL},
8070 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
8071 {UPB_TABKEY_NUM(999), UPB_TABVALUE_PTR_INIT(&fields[99]), NULL},
8072 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
8073 {UPB_TABKEY_NUM(999), UPB_TABVALUE_PTR_INIT(&fields[97]), NULL},
8074 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
8075 {UPB_TABKEY_NUM(33), UPB_TABVALUE_PTR_INIT(&fields[10]), NULL},
8076 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
8077 {UPB_TABKEY_NUM(999), UPB_TABVALUE_PTR_INIT(&fields[100]), NULL},
8078 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
8079 {UPB_TABKEY_NUM(33), UPB_TABVALUE_PTR_INIT(&fields[15]), NULL},
8080 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
8081 {UPB_TABKEY_NUM(999), UPB_TABVALUE_PTR_INIT(&fields[98]), NULL},
8082 };
8083
8084 static const upb_tabval arrays[187] = {
8085 UPB_TABVALUE_EMPTY_INIT,
8086 UPB_TABVALUE_PTR_INIT(&fields[57]),
8087 UPB_TABVALUE_PTR_INIT(&fields[25]),
8088 UPB_TABVALUE_PTR_INIT(&fields[60]),
8089 UPB_TABVALUE_PTR_INIT(&fields[20]),
8090 UPB_TABVALUE_PTR_INIT(&fields[24]),
8091 UPB_TABVALUE_PTR_INIT(&fields[22]),
8092 UPB_TABVALUE_PTR_INIT(&fields[68]),
8093 UPB_TABVALUE_PTR_INIT(&fields[65]),
8094 UPB_TABVALUE_PTR_INIT(&fields[85]),
8095 UPB_TABVALUE_PTR_INIT(&fields[84]),
8096 UPB_TABVALUE_EMPTY_INIT,
8097 UPB_TABVALUE_PTR_INIT(&fields[91]),
8098 UPB_TABVALUE_PTR_INIT(&fields[18]),
8099 UPB_TABVALUE_EMPTY_INIT,
8100 UPB_TABVALUE_PTR_INIT(&fields[90]),
8101 UPB_TABVALUE_PTR_INIT(&fields[17]),
8102 UPB_TABVALUE_EMPTY_INIT,
8103 UPB_TABVALUE_PTR_INIT(&fields[52]),
8104 UPB_TABVALUE_PTR_INIT(&fields[104]),
8105 UPB_TABVALUE_PTR_INIT(&fields[73]),
8106 UPB_TABVALUE_EMPTY_INIT,
8107 UPB_TABVALUE_EMPTY_INIT,
8108 UPB_TABVALUE_PTR_INIT(&fields[1]),
8109 UPB_TABVALUE_PTR_INIT(&fields[14]),
8110 UPB_TABVALUE_EMPTY_INIT,
8111 UPB_TABVALUE_PTR_INIT(&fields[50]),
8112 UPB_TABVALUE_PTR_INIT(&fields[63]),
8113 UPB_TABVALUE_PTR_INIT(&fields[74]),
8114 UPB_TABVALUE_EMPTY_INIT,
8115 UPB_TABVALUE_PTR_INIT(&fields[13]),
8116 UPB_TABVALUE_EMPTY_INIT,
8117 UPB_TABVALUE_PTR_INIT(&fields[56]),
8118 UPB_TABVALUE_PTR_INIT(&fields[21]),
8119 UPB_TABVALUE_PTR_INIT(&fields[62]),
8120 UPB_TABVALUE_PTR_INIT(&fields[40]),
8121 UPB_TABVALUE_PTR_INIT(&fields[95]),
8122 UPB_TABVALUE_PTR_INIT(&fields[96]),
8123 UPB_TABVALUE_PTR_INIT(&fields[7]),
8124 UPB_TABVALUE_PTR_INIT(&fields[70]),
8125 UPB_TABVALUE_PTR_INIT(&fields[66]),
8126 UPB_TABVALUE_PTR_INIT(&fields[38]),
8127 UPB_TABVALUE_EMPTY_INIT,
8128 UPB_TABVALUE_PTR_INIT(&fields[6]),
8129 UPB_TABVALUE_PTR_INIT(&fields[77]),
8130 UPB_TABVALUE_PTR_INIT(&fields[9]),
8131 UPB_TABVALUE_EMPTY_INIT,
8132 UPB_TABVALUE_PTR_INIT(&fields[41]),
8133 UPB_TABVALUE_PTR_INIT(&fields[39]),
8134 UPB_TABVALUE_EMPTY_INIT,
8135 UPB_TABVALUE_EMPTY_INIT,
8136 UPB_TABVALUE_EMPTY_INIT,
8137 UPB_TABVALUE_PTR_INIT(&fields[105]),
8138 UPB_TABVALUE_EMPTY_INIT,
8139 UPB_TABVALUE_PTR_INIT(&fields[51]),
8140 UPB_TABVALUE_PTR_INIT(&fields[76]),
8141 UPB_TABVALUE_PTR_INIT(&fields[8]),
8142 UPB_TABVALUE_PTR_INIT(&fields[47]),
8143 UPB_TABVALUE_PTR_INIT(&fields[19]),
8144 UPB_TABVALUE_PTR_INIT(&fields[87]),
8145 UPB_TABVALUE_PTR_INIT(&fields[23]),
8146 UPB_TABVALUE_PTR_INIT(&fields[69]),
8147 UPB_TABVALUE_PTR_INIT(&fields[88]),
8148 UPB_TABVALUE_PTR_INIT(&fields[82]),
8149 UPB_TABVALUE_PTR_INIT(&fields[106]),
8150 UPB_TABVALUE_PTR_INIT(&fields[93]),
8151 UPB_TABVALUE_EMPTY_INIT,
8152 UPB_TABVALUE_PTR_INIT(&fields[26]),
8153 UPB_TABVALUE_EMPTY_INIT,
8154 UPB_TABVALUE_PTR_INIT(&fields[35]),
8155 UPB_TABVALUE_EMPTY_INIT,
8156 UPB_TABVALUE_EMPTY_INIT,
8157 UPB_TABVALUE_EMPTY_INIT,
8158 UPB_TABVALUE_EMPTY_INIT,
8159 UPB_TABVALUE_EMPTY_INIT,
8160 UPB_TABVALUE_EMPTY_INIT,
8161 UPB_TABVALUE_PTR_INIT(&fields[34]),
8162 UPB_TABVALUE_PTR_INIT(&fields[67]),
8163 UPB_TABVALUE_PTR_INIT(&fields[33]),
8164 UPB_TABVALUE_PTR_INIT(&fields[27]),
8165 UPB_TABVALUE_EMPTY_INIT,
8166 UPB_TABVALUE_EMPTY_INIT,
8167 UPB_TABVALUE_EMPTY_INIT,
8168 UPB_TABVALUE_EMPTY_INIT,
8169 UPB_TABVALUE_PTR_INIT(&fields[3]),
8170 UPB_TABVALUE_PTR_INIT(&fields[32]),
8171 UPB_TABVALUE_PTR_INIT(&fields[83]),
8172 UPB_TABVALUE_EMPTY_INIT,
8173 UPB_TABVALUE_PTR_INIT(&fields[31]),
8174 UPB_TABVALUE_EMPTY_INIT,
8175 UPB_TABVALUE_EMPTY_INIT,
8176 UPB_TABVALUE_PTR_INIT(&fields[12]),
8177 UPB_TABVALUE_EMPTY_INIT,
8178 UPB_TABVALUE_EMPTY_INIT,
8179 UPB_TABVALUE_EMPTY_INIT,
8180 UPB_TABVALUE_PTR_INIT(&fields[36]),
8181 UPB_TABVALUE_EMPTY_INIT,
8182 UPB_TABVALUE_EMPTY_INIT,
8183 UPB_TABVALUE_EMPTY_INIT,
8184 UPB_TABVALUE_PTR_INIT(&fields[2]),
8185 UPB_TABVALUE_EMPTY_INIT,
8186 UPB_TABVALUE_EMPTY_INIT,
8187 UPB_TABVALUE_EMPTY_INIT,
8188 UPB_TABVALUE_EMPTY_INIT,
8189 UPB_TABVALUE_PTR_INIT(&fields[64]),
8190 UPB_TABVALUE_PTR_INIT(&fields[5]),
8191 UPB_TABVALUE_PTR_INIT(&fields[37]),
8192 UPB_TABVALUE_EMPTY_INIT,
8193 UPB_TABVALUE_PTR_INIT(&fields[79]),
8194 UPB_TABVALUE_PTR_INIT(&fields[80]),
8195 UPB_TABVALUE_EMPTY_INIT,
8196 UPB_TABVALUE_PTR_INIT(&fields[46]),
8197 UPB_TABVALUE_PTR_INIT(&fields[61]),
8198 UPB_TABVALUE_PTR_INIT(&fields[11]),
8199 UPB_TABVALUE_EMPTY_INIT,
8200 UPB_TABVALUE_EMPTY_INIT,
8201 UPB_TABVALUE_EMPTY_INIT,
8202 UPB_TABVALUE_PTR_INIT(&fields[45]),
8203 UPB_TABVALUE_EMPTY_INIT,
8204 UPB_TABVALUE_PTR_INIT(&fields[55]),
8205 UPB_TABVALUE_PTR_INIT(&fields[29]),
8206 UPB_TABVALUE_PTR_INIT(&fields[75]),
8207 UPB_TABVALUE_PTR_INIT(&fields[71]),
8208 UPB_TABVALUE_PTR_INIT(&fields[4]),
8209 UPB_TABVALUE_PTR_INIT(&fields[86]),
8210 UPB_TABVALUE_EMPTY_INIT,
8211 UPB_TABVALUE_EMPTY_INIT,
8212 UPB_TABVALUE_PTR_INIT(&fields[54]),
8213 UPB_TABVALUE_EMPTY_INIT,
8214 UPB_TABVALUE_PTR_INIT(&fields[53]),
8215 UPB_TABVALUE_PTR_INIT(&fields[48]),
8216 UPB_TABVALUE_PTR_INIT(&fields[72]),
8217 UPB_TABVALUE_EMPTY_INIT,
8218 UPB_TABVALUE_EMPTY_INIT,
8219 UPB_TABVALUE_PTR_INIT(&fields[44]),
8220 UPB_TABVALUE_EMPTY_INIT,
8221 UPB_TABVALUE_PTR_INIT(&fields[78]),
8222 UPB_TABVALUE_PTR_INIT(&fields[89]),
8223 UPB_TABVALUE_PTR_INIT(&fields[42]),
8224 UPB_TABVALUE_PTR_INIT(&fields[94]),
8225 UPB_TABVALUE_EMPTY_INIT,
8226 UPB_TABVALUE_PTR_INIT(&fields[43]),
8227 UPB_TABVALUE_EMPTY_INIT,
8228 UPB_TABVALUE_EMPTY_INIT,
8229 UPB_TABVALUE_PTR_INIT(&fields[49]),
8230 UPB_TABVALUE_PTR_INIT(&fields[28]),
8231 UPB_TABVALUE_PTR_INIT(&fields[81]),
8232 UPB_TABVALUE_PTR_INIT(&fields[59]),
8233 UPB_TABVALUE_PTR_INIT(&fields[16]),
8234 UPB_TABVALUE_PTR_INIT(&fields[92]),
8235 UPB_TABVALUE_PTR_INIT(&fields[0]),
8236 UPB_TABVALUE_EMPTY_INIT,
8237 UPB_TABVALUE_PTR_INIT(&fields[58]),
8238 UPB_TABVALUE_PTR_INIT(&fields[30]),
8239 UPB_TABVALUE_EMPTY_INIT,
8240 UPB_TABVALUE_PTR_INIT("LABEL_OPTIONAL"),
8241 UPB_TABVALUE_PTR_INIT("LABEL_REQUIRED"),
8242 UPB_TABVALUE_PTR_INIT("LABEL_REPEATED"),
8243 UPB_TABVALUE_EMPTY_INIT,
8244 UPB_TABVALUE_PTR_INIT("TYPE_DOUBLE"),
8245 UPB_TABVALUE_PTR_INIT("TYPE_FLOAT"),
8246 UPB_TABVALUE_PTR_INIT("TYPE_INT64"),
8247 UPB_TABVALUE_PTR_INIT("TYPE_UINT64"),
8248 UPB_TABVALUE_PTR_INIT("TYPE_INT32"),
8249 UPB_TABVALUE_PTR_INIT("TYPE_FIXED64"),
8250 UPB_TABVALUE_PTR_INIT("TYPE_FIXED32"),
8251 UPB_TABVALUE_PTR_INIT("TYPE_BOOL"),
8252 UPB_TABVALUE_PTR_INIT("TYPE_STRING"),
8253 UPB_TABVALUE_PTR_INIT("TYPE_GROUP"),
8254 UPB_TABVALUE_PTR_INIT("TYPE_MESSAGE"),
8255 UPB_TABVALUE_PTR_INIT("TYPE_BYTES"),
8256 UPB_TABVALUE_PTR_INIT("TYPE_UINT32"),
8257 UPB_TABVALUE_PTR_INIT("TYPE_ENUM"),
8258 UPB_TABVALUE_PTR_INIT("TYPE_SFIXED32"),
8259 UPB_TABVALUE_PTR_INIT("TYPE_SFIXED64"),
8260 UPB_TABVALUE_PTR_INIT("TYPE_SINT32"),
8261 UPB_TABVALUE_PTR_INIT("TYPE_SINT64"),
8262 UPB_TABVALUE_PTR_INIT("STRING"),
8263 UPB_TABVALUE_PTR_INIT("CORD"),
8264 UPB_TABVALUE_PTR_INIT("STRING_PIECE"),
8265 UPB_TABVALUE_PTR_INIT("JS_NORMAL"),
8266 UPB_TABVALUE_PTR_INIT("JS_STRING"),
8267 UPB_TABVALUE_PTR_INIT("JS_NUMBER"),
8268 UPB_TABVALUE_EMPTY_INIT,
8269 UPB_TABVALUE_PTR_INIT("SPEED"),
8270 UPB_TABVALUE_PTR_INIT("CODE_SIZE"),
8271 UPB_TABVALUE_PTR_INIT("LITE_RUNTIME"),
8272 };
8273
8274 #ifdef UPB_DEBUG_REFS
8275 static upb_inttable reftables[268] = {
8276 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8277 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8278 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8279 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8280 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8281 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8282 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8283 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8284 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8285 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8286 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8287 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8288 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8289 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8290 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8291 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8292 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8293 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8294 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8295 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8296 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8297 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8298 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8299 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8300 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8301 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8302 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8303 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8304 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8305 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8306 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8307 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8308 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8309 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8310 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8311 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8312 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8313 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8314 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8315 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8316 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8317 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8318 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8319 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8320 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8321 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8322 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8323 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8324 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8325 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8326 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8327 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8328 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8329 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8330 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8331 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8332 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8333 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8334 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8335 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8336 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8337 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8338 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8339 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8340 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8341 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8342 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8343 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8344 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8345 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8346 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8347 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8348 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8349 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8350 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8351 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8352 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8353 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8354 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8355 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8356 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8357 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8358 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8359 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8360 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8361 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8362 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8363 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8364 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8365 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8366 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8367 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8368 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8369 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8370 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8371 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8372 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8373 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8374 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8375 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8376 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8377 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8378 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8379 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8380 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8381 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8382 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8383 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8384 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8385 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8386 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8387 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8388 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8389 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8390 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8391 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8392 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8393 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8394 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8395 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8396 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8397 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8398 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8399 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8400 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8401 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8402 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8403 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8404 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8405 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8406 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8407 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8408 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8409 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8410 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8411 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8412 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8413 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8414 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8415 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8416 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8417 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8418 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8419 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8420 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8421 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8422 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8423 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8424 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8425 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8426 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8427 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8428 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8429 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8430 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8431 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8432 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8433 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8434 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8435 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8436 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8437 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8438 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8439 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8440 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8441 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8442 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8443 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8444 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8445 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8446 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8447 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8448 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8449 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8450 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8451 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8452 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8453 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8454 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8455 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8456 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8457 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8458 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8459 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8460 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8461 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8462 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8463 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8464 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8465 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8466 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8467 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8468 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8469 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8470 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8471 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8472 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8473 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8474 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8475 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8476 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8477 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8478 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8479 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8480 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8481 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8482 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8483 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8484 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8485 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8486 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8487 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8488 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8489 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8490 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8491 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8492 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8493 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8494 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8495 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8496 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8497 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8498 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8499 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8500 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8501 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8502 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8503 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8504 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8505 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8506 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8507 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8508 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8509 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8510 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8511 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8512 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8513 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8514 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8515 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8516 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8517 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8518 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8519 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8520 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8521 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8522 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8523 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8524 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8525 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8526 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8527 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8528 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8529 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8530 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8531 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8532 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8533 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8534 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8535 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8536 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8537 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8538 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8539 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8540 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8541 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8542 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8543 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
8544 };
8545 #endif
8546
refm(const upb_msgdef * m,const void * owner)8547 static const upb_msgdef *refm(const upb_msgdef *m, const void *owner) {
8548 upb_msgdef_ref(m, owner);
8549 return m;
8550 }
8551
refe(const upb_enumdef * e,const void * owner)8552 static const upb_enumdef *refe(const upb_enumdef *e, const void *owner) {
8553 upb_enumdef_ref(e, owner);
8554 return e;
8555 }
8556
8557 /* Public API. */
upbdefs_google_protobuf_DescriptorProto_get(const void * owner)8558 const upb_msgdef *upbdefs_google_protobuf_DescriptorProto_get(const void *owner) { return refm(&msgs[0], owner); }
upbdefs_google_protobuf_DescriptorProto_ExtensionRange_get(const void * owner)8559 const upb_msgdef *upbdefs_google_protobuf_DescriptorProto_ExtensionRange_get(const void *owner) { return refm(&msgs[1], owner); }
upbdefs_google_protobuf_DescriptorProto_ReservedRange_get(const void * owner)8560 const upb_msgdef *upbdefs_google_protobuf_DescriptorProto_ReservedRange_get(const void *owner) { return refm(&msgs[2], owner); }
upbdefs_google_protobuf_EnumDescriptorProto_get(const void * owner)8561 const upb_msgdef *upbdefs_google_protobuf_EnumDescriptorProto_get(const void *owner) { return refm(&msgs[3], owner); }
upbdefs_google_protobuf_EnumOptions_get(const void * owner)8562 const upb_msgdef *upbdefs_google_protobuf_EnumOptions_get(const void *owner) { return refm(&msgs[4], owner); }
upbdefs_google_protobuf_EnumValueDescriptorProto_get(const void * owner)8563 const upb_msgdef *upbdefs_google_protobuf_EnumValueDescriptorProto_get(const void *owner) { return refm(&msgs[5], owner); }
upbdefs_google_protobuf_EnumValueOptions_get(const void * owner)8564 const upb_msgdef *upbdefs_google_protobuf_EnumValueOptions_get(const void *owner) { return refm(&msgs[6], owner); }
upbdefs_google_protobuf_FieldDescriptorProto_get(const void * owner)8565 const upb_msgdef *upbdefs_google_protobuf_FieldDescriptorProto_get(const void *owner) { return refm(&msgs[7], owner); }
upbdefs_google_protobuf_FieldOptions_get(const void * owner)8566 const upb_msgdef *upbdefs_google_protobuf_FieldOptions_get(const void *owner) { return refm(&msgs[8], owner); }
upbdefs_google_protobuf_FileDescriptorProto_get(const void * owner)8567 const upb_msgdef *upbdefs_google_protobuf_FileDescriptorProto_get(const void *owner) { return refm(&msgs[9], owner); }
upbdefs_google_protobuf_FileDescriptorSet_get(const void * owner)8568 const upb_msgdef *upbdefs_google_protobuf_FileDescriptorSet_get(const void *owner) { return refm(&msgs[10], owner); }
upbdefs_google_protobuf_FileOptions_get(const void * owner)8569 const upb_msgdef *upbdefs_google_protobuf_FileOptions_get(const void *owner) { return refm(&msgs[11], owner); }
upbdefs_google_protobuf_MessageOptions_get(const void * owner)8570 const upb_msgdef *upbdefs_google_protobuf_MessageOptions_get(const void *owner) { return refm(&msgs[12], owner); }
upbdefs_google_protobuf_MethodDescriptorProto_get(const void * owner)8571 const upb_msgdef *upbdefs_google_protobuf_MethodDescriptorProto_get(const void *owner) { return refm(&msgs[13], owner); }
upbdefs_google_protobuf_MethodOptions_get(const void * owner)8572 const upb_msgdef *upbdefs_google_protobuf_MethodOptions_get(const void *owner) { return refm(&msgs[14], owner); }
upbdefs_google_protobuf_OneofDescriptorProto_get(const void * owner)8573 const upb_msgdef *upbdefs_google_protobuf_OneofDescriptorProto_get(const void *owner) { return refm(&msgs[15], owner); }
upbdefs_google_protobuf_ServiceDescriptorProto_get(const void * owner)8574 const upb_msgdef *upbdefs_google_protobuf_ServiceDescriptorProto_get(const void *owner) { return refm(&msgs[16], owner); }
upbdefs_google_protobuf_ServiceOptions_get(const void * owner)8575 const upb_msgdef *upbdefs_google_protobuf_ServiceOptions_get(const void *owner) { return refm(&msgs[17], owner); }
upbdefs_google_protobuf_SourceCodeInfo_get(const void * owner)8576 const upb_msgdef *upbdefs_google_protobuf_SourceCodeInfo_get(const void *owner) { return refm(&msgs[18], owner); }
upbdefs_google_protobuf_SourceCodeInfo_Location_get(const void * owner)8577 const upb_msgdef *upbdefs_google_protobuf_SourceCodeInfo_Location_get(const void *owner) { return refm(&msgs[19], owner); }
upbdefs_google_protobuf_UninterpretedOption_get(const void * owner)8578 const upb_msgdef *upbdefs_google_protobuf_UninterpretedOption_get(const void *owner) { return refm(&msgs[20], owner); }
upbdefs_google_protobuf_UninterpretedOption_NamePart_get(const void * owner)8579 const upb_msgdef *upbdefs_google_protobuf_UninterpretedOption_NamePart_get(const void *owner) { return refm(&msgs[21], owner); }
8580
upbdefs_google_protobuf_FieldDescriptorProto_Label_get(const void * owner)8581 const upb_enumdef *upbdefs_google_protobuf_FieldDescriptorProto_Label_get(const void *owner) { return refe(&enums[0], owner); }
upbdefs_google_protobuf_FieldDescriptorProto_Type_get(const void * owner)8582 const upb_enumdef *upbdefs_google_protobuf_FieldDescriptorProto_Type_get(const void *owner) { return refe(&enums[1], owner); }
upbdefs_google_protobuf_FieldOptions_CType_get(const void * owner)8583 const upb_enumdef *upbdefs_google_protobuf_FieldOptions_CType_get(const void *owner) { return refe(&enums[2], owner); }
upbdefs_google_protobuf_FieldOptions_JSType_get(const void * owner)8584 const upb_enumdef *upbdefs_google_protobuf_FieldOptions_JSType_get(const void *owner) { return refe(&enums[3], owner); }
upbdefs_google_protobuf_FileOptions_OptimizeMode_get(const void * owner)8585 const upb_enumdef *upbdefs_google_protobuf_FileOptions_OptimizeMode_get(const void *owner) { return refe(&enums[4], owner); }
8586 /*
8587 ** XXX: The routines in this file that consume a string do not currently
8588 ** support having the string span buffers. In the future, as upb_sink and
8589 ** its buffering/sharing functionality evolve there should be an easy and
8590 ** idiomatic way of correctly handling this case. For now, we accept this
8591 ** limitation since we currently only parse descriptors from single strings.
8592 */
8593
8594
8595 #include <errno.h>
8596 #include <stdlib.h>
8597 #include <string.h>
8598
8599 /* Compares a NULL-terminated string with a non-NULL-terminated string. */
upb_streq(const char * str,const char * buf,size_t n)8600 static bool upb_streq(const char *str, const char *buf, size_t n) {
8601 return strlen(str) == n && memcmp(str, buf, n) == 0;
8602 }
8603
8604 /* We keep a stack of all the messages scopes we are currently in, as well as
8605 * the top-level file scope. This is necessary to correctly qualify the
8606 * definitions that are contained inside. "name" tracks the name of the
8607 * message or package (a bare name -- not qualified by any enclosing scopes). */
8608 typedef struct {
8609 char *name;
8610 /* Index of the first def that is under this scope. For msgdefs, the
8611 * msgdef itself is at start-1. */
8612 int start;
8613 uint32_t oneof_start;
8614 uint32_t oneof_index;
8615 } upb_descreader_frame;
8616
8617 /* The maximum number of nested declarations that are allowed, ie.
8618 * message Foo {
8619 * message Bar {
8620 * message Baz {
8621 * }
8622 * }
8623 * }
8624 *
8625 * This is a resource limit that affects how big our runtime stack can grow.
8626 * TODO: make this a runtime-settable property of the Reader instance. */
8627 #define UPB_MAX_MESSAGE_NESTING 64
8628
8629 struct upb_descreader {
8630 upb_sink sink;
8631 upb_inttable files;
8632 upb_strtable files_by_name;
8633 upb_filedef *file; /* The last file in files. */
8634 upb_descreader_frame stack[UPB_MAX_MESSAGE_NESTING];
8635 int stack_len;
8636 upb_inttable oneofs;
8637
8638 uint32_t number;
8639 char *name;
8640 bool saw_number;
8641 bool saw_name;
8642
8643 char *default_string;
8644
8645 upb_fielddef *f;
8646 };
8647
upb_gstrndup(const char * buf,size_t n)8648 static char *upb_gstrndup(const char *buf, size_t n) {
8649 char *ret = upb_gmalloc(n + 1);
8650 if (!ret) return NULL;
8651 memcpy(ret, buf, n);
8652 ret[n] = '\0';
8653 return ret;
8654 }
8655
8656 /* Returns a newly allocated string that joins input strings together, for
8657 * example:
8658 * join("Foo.Bar", "Baz") -> "Foo.Bar.Baz"
8659 * join("", "Baz") -> "Baz"
8660 * Caller owns a ref on the returned string. */
upb_join(const char * base,const char * name)8661 static char *upb_join(const char *base, const char *name) {
8662 if (!base || strlen(base) == 0) {
8663 return upb_gstrdup(name);
8664 } else {
8665 char *ret = upb_gmalloc(strlen(base) + strlen(name) + 2);
8666 if (!ret) {
8667 return NULL;
8668 }
8669 ret[0] = '\0';
8670 strcat(ret, base);
8671 strcat(ret, ".");
8672 strcat(ret, name);
8673 return ret;
8674 }
8675 }
8676
8677 /* Qualify the defname for all defs starting with offset "start" with "str". */
upb_descreader_qualify(upb_filedef * f,char * str,int32_t start)8678 static bool upb_descreader_qualify(upb_filedef *f, char *str, int32_t start) {
8679 size_t i;
8680 for (i = start; i < upb_filedef_defcount(f); i++) {
8681 upb_def *def = upb_filedef_mutabledef(f, i);
8682 char *name = upb_join(str, upb_def_fullname(def));
8683 if (!name) {
8684 /* Need better logic here; at this point we've qualified some names but
8685 * not others. */
8686 return false;
8687 }
8688 upb_def_setfullname(def, name, NULL);
8689 upb_gfree(name);
8690 }
8691 return true;
8692 }
8693
8694
8695 /* upb_descreader ************************************************************/
8696
upb_descreader_top(upb_descreader * r)8697 static upb_msgdef *upb_descreader_top(upb_descreader *r) {
8698 int index;
8699 UPB_ASSERT(r->stack_len > 1);
8700 index = r->stack[r->stack_len-1].start - 1;
8701 UPB_ASSERT(index >= 0);
8702 return upb_downcast_msgdef_mutable(upb_filedef_mutabledef(r->file, index));
8703 }
8704
upb_descreader_last(upb_descreader * r)8705 static upb_def *upb_descreader_last(upb_descreader *r) {
8706 return upb_filedef_mutabledef(r->file, upb_filedef_defcount(r->file) - 1);
8707 }
8708
8709 /* Start/end handlers for FileDescriptorProto and DescriptorProto (the two
8710 * entities that have names and can contain sub-definitions. */
upb_descreader_startcontainer(upb_descreader * r)8711 void upb_descreader_startcontainer(upb_descreader *r) {
8712 upb_descreader_frame *f = &r->stack[r->stack_len++];
8713 f->start = upb_filedef_defcount(r->file);
8714 f->oneof_start = upb_inttable_count(&r->oneofs);
8715 f->oneof_index = 0;
8716 f->name = NULL;
8717 }
8718
upb_descreader_endcontainer(upb_descreader * r)8719 bool upb_descreader_endcontainer(upb_descreader *r) {
8720 upb_descreader_frame *f = &r->stack[r->stack_len - 1];
8721
8722 while (upb_inttable_count(&r->oneofs) > f->oneof_start) {
8723 upb_oneofdef *o = upb_value_getptr(upb_inttable_pop(&r->oneofs));
8724 bool ok = upb_msgdef_addoneof(upb_descreader_top(r), o, &r->oneofs, NULL);
8725 UPB_ASSERT(ok);
8726 }
8727
8728 if (!upb_descreader_qualify(r->file, f->name, f->start)) {
8729 return false;
8730 }
8731 upb_gfree(f->name);
8732 f->name = NULL;
8733
8734 r->stack_len--;
8735 return true;
8736 }
8737
upb_descreader_setscopename(upb_descreader * r,char * str)8738 void upb_descreader_setscopename(upb_descreader *r, char *str) {
8739 upb_descreader_frame *f = &r->stack[r->stack_len-1];
8740 upb_gfree(f->name);
8741 f->name = str;
8742 }
8743
upb_descreader_getoneof(upb_descreader * r,uint32_t index)8744 static upb_oneofdef *upb_descreader_getoneof(upb_descreader *r,
8745 uint32_t index) {
8746 bool found;
8747 upb_value val;
8748 upb_descreader_frame *f = &r->stack[r->stack_len-1];
8749
8750 /* DescriptorProto messages can be nested, so we will see the nested messages
8751 * between when we see the FieldDescriptorProto and the OneofDescriptorProto.
8752 * We need to preserve the oneofs in between these two things. */
8753 index += f->oneof_start;
8754
8755 while (upb_inttable_count(&r->oneofs) <= index) {
8756 upb_inttable_push(&r->oneofs, upb_value_ptr(upb_oneofdef_new(&r->oneofs)));
8757 }
8758
8759 found = upb_inttable_lookup(&r->oneofs, index, &val);
8760 UPB_ASSERT(found);
8761 return upb_value_getptr(val);
8762 }
8763
8764 /** Handlers for google.protobuf.FileDescriptorSet. ***************************/
8765
fileset_startfile(void * closure,const void * hd)8766 static void *fileset_startfile(void *closure, const void *hd) {
8767 upb_descreader *r = closure;
8768 UPB_UNUSED(hd);
8769 r->file = upb_filedef_new(&r->files);
8770 upb_inttable_push(&r->files, upb_value_ptr(r->file));
8771 return r;
8772 }
8773
8774 /** Handlers for google.protobuf.FileDescriptorProto. *************************/
8775
file_start(void * closure,const void * hd)8776 static bool file_start(void *closure, const void *hd) {
8777 upb_descreader *r = closure;
8778 UPB_UNUSED(hd);
8779 upb_descreader_startcontainer(r);
8780 return true;
8781 }
8782
file_end(void * closure,const void * hd,upb_status * status)8783 static bool file_end(void *closure, const void *hd, upb_status *status) {
8784 upb_descreader *r = closure;
8785 UPB_UNUSED(hd);
8786 UPB_UNUSED(status);
8787 return upb_descreader_endcontainer(r);
8788 }
8789
file_onname(void * closure,const void * hd,const char * buf,size_t n,const upb_bufhandle * handle)8790 static size_t file_onname(void *closure, const void *hd, const char *buf,
8791 size_t n, const upb_bufhandle *handle) {
8792 upb_descreader *r = closure;
8793 char *name;
8794 bool ok;
8795 UPB_UNUSED(hd);
8796 UPB_UNUSED(handle);
8797
8798 name = upb_gstrndup(buf, n);
8799 upb_strtable_insert(&r->files_by_name, name, upb_value_ptr(r->file));
8800 /* XXX: see comment at the top of the file. */
8801 ok = upb_filedef_setname(r->file, name, NULL);
8802 upb_gfree(name);
8803 UPB_ASSERT(ok);
8804 return n;
8805 }
8806
file_onpackage(void * closure,const void * hd,const char * buf,size_t n,const upb_bufhandle * handle)8807 static size_t file_onpackage(void *closure, const void *hd, const char *buf,
8808 size_t n, const upb_bufhandle *handle) {
8809 upb_descreader *r = closure;
8810 char *package;
8811 bool ok;
8812 UPB_UNUSED(hd);
8813 UPB_UNUSED(handle);
8814
8815 package = upb_gstrndup(buf, n);
8816 /* XXX: see comment at the top of the file. */
8817 upb_descreader_setscopename(r, package);
8818 ok = upb_filedef_setpackage(r->file, package, NULL);
8819 UPB_ASSERT(ok);
8820 return n;
8821 }
8822
file_startphpnamespace(void * closure,const void * hd,size_t size_hint)8823 static void *file_startphpnamespace(void *closure, const void *hd,
8824 size_t size_hint) {
8825 upb_descreader *r = closure;
8826 bool ok;
8827 UPB_UNUSED(hd);
8828 UPB_UNUSED(size_hint);
8829
8830 ok = upb_filedef_setphpnamespace(r->file, "", NULL);
8831 UPB_ASSERT(ok);
8832 return closure;
8833 }
8834
file_onphpnamespace(void * closure,const void * hd,const char * buf,size_t n,const upb_bufhandle * handle)8835 static size_t file_onphpnamespace(void *closure, const void *hd,
8836 const char *buf, size_t n,
8837 const upb_bufhandle *handle) {
8838 upb_descreader *r = closure;
8839 char *php_namespace;
8840 bool ok;
8841 UPB_UNUSED(hd);
8842 UPB_UNUSED(handle);
8843
8844 php_namespace = upb_gstrndup(buf, n);
8845 ok = upb_filedef_setphpnamespace(r->file, php_namespace, NULL);
8846 upb_gfree(php_namespace);
8847 UPB_ASSERT(ok);
8848 return n;
8849 }
8850
file_onphpprefix(void * closure,const void * hd,const char * buf,size_t n,const upb_bufhandle * handle)8851 static size_t file_onphpprefix(void *closure, const void *hd, const char *buf,
8852 size_t n, const upb_bufhandle *handle) {
8853 upb_descreader *r = closure;
8854 char *prefix;
8855 bool ok;
8856 UPB_UNUSED(hd);
8857 UPB_UNUSED(handle);
8858
8859 prefix = upb_gstrndup(buf, n);
8860 ok = upb_filedef_setphpprefix(r->file, prefix, NULL);
8861 upb_gfree(prefix);
8862 UPB_ASSERT(ok);
8863 return n;
8864 }
8865
file_onsyntax(void * closure,const void * hd,const char * buf,size_t n,const upb_bufhandle * handle)8866 static size_t file_onsyntax(void *closure, const void *hd, const char *buf,
8867 size_t n, const upb_bufhandle *handle) {
8868 upb_descreader *r = closure;
8869 bool ok;
8870 UPB_UNUSED(hd);
8871 UPB_UNUSED(handle);
8872 /* XXX: see comment at the top of the file. */
8873 if (upb_streq("proto2", buf, n)) {
8874 ok = upb_filedef_setsyntax(r->file, UPB_SYNTAX_PROTO2, NULL);
8875 } else if (upb_streq("proto3", buf, n)) {
8876 ok = upb_filedef_setsyntax(r->file, UPB_SYNTAX_PROTO3, NULL);
8877 } else {
8878 ok = false;
8879 }
8880
8881 UPB_ASSERT(ok);
8882 return n;
8883 }
8884
file_startmsg(void * closure,const void * hd)8885 static void *file_startmsg(void *closure, const void *hd) {
8886 upb_descreader *r = closure;
8887 upb_msgdef *m = upb_msgdef_new(&m);
8888 bool ok = upb_filedef_addmsg(r->file, m, &m, NULL);
8889 UPB_UNUSED(hd);
8890 UPB_ASSERT(ok);
8891 return r;
8892 }
8893
file_startenum(void * closure,const void * hd)8894 static void *file_startenum(void *closure, const void *hd) {
8895 upb_descreader *r = closure;
8896 upb_enumdef *e = upb_enumdef_new(&e);
8897 bool ok = upb_filedef_addenum(r->file, e, &e, NULL);
8898 UPB_UNUSED(hd);
8899 UPB_ASSERT(ok);
8900 return r;
8901 }
8902
file_startext(void * closure,const void * hd)8903 static void *file_startext(void *closure, const void *hd) {
8904 upb_descreader *r = closure;
8905 r->f = upb_fielddef_new(r);
8906 UPB_UNUSED(hd);
8907 return r;
8908 }
8909
file_endext(void * closure,const void * hd)8910 static bool file_endext(void *closure, const void *hd) {
8911 /* The current symtab code can't handle extensions, so we just discard
8912 * them for now. */
8913 upb_descreader *r = closure;
8914 upb_fielddef_unref(r->f, r);
8915 UPB_UNUSED(hd);
8916 r->f = NULL;
8917 return true;
8918 }
8919
file_ondep(void * closure,const void * hd,const char * buf,size_t n,const upb_bufhandle * handle)8920 static size_t file_ondep(void *closure, const void *hd, const char *buf,
8921 size_t n, const upb_bufhandle *handle) {
8922 upb_descreader *r = closure;
8923 upb_value val;
8924 if (upb_strtable_lookup2(&r->files_by_name, buf, n, &val)) {
8925 upb_filedef_adddep(r->file, upb_value_getptr(val));
8926 }
8927 UPB_UNUSED(hd);
8928 UPB_UNUSED(handle);
8929 return n;
8930 }
8931
8932 /** Handlers for google.protobuf.EnumValueDescriptorProto. *********************/
8933
enumval_startmsg(void * closure,const void * hd)8934 static bool enumval_startmsg(void *closure, const void *hd) {
8935 upb_descreader *r = closure;
8936 UPB_UNUSED(hd);
8937 r->saw_number = false;
8938 r->saw_name = false;
8939 return true;
8940 }
8941
enumval_onname(void * closure,const void * hd,const char * buf,size_t n,const upb_bufhandle * handle)8942 static size_t enumval_onname(void *closure, const void *hd, const char *buf,
8943 size_t n, const upb_bufhandle *handle) {
8944 upb_descreader *r = closure;
8945 UPB_UNUSED(hd);
8946 UPB_UNUSED(handle);
8947 /* XXX: see comment at the top of the file. */
8948 upb_gfree(r->name);
8949 r->name = upb_gstrndup(buf, n);
8950 r->saw_name = true;
8951 return n;
8952 }
8953
enumval_onnumber(void * closure,const void * hd,int32_t val)8954 static bool enumval_onnumber(void *closure, const void *hd, int32_t val) {
8955 upb_descreader *r = closure;
8956 UPB_UNUSED(hd);
8957 r->number = val;
8958 r->saw_number = true;
8959 return true;
8960 }
8961
enumval_endmsg(void * closure,const void * hd,upb_status * status)8962 static bool enumval_endmsg(void *closure, const void *hd, upb_status *status) {
8963 upb_descreader *r = closure;
8964 upb_enumdef *e;
8965 UPB_UNUSED(hd);
8966
8967 if(!r->saw_number || !r->saw_name) {
8968 upb_status_seterrmsg(status, "Enum value missing name or number.");
8969 return false;
8970 }
8971 e = upb_downcast_enumdef_mutable(upb_descreader_last(r));
8972 upb_enumdef_addval(e, r->name, r->number, status);
8973 upb_gfree(r->name);
8974 r->name = NULL;
8975 return true;
8976 }
8977
8978 /** Handlers for google.protobuf.EnumDescriptorProto. *************************/
8979
enum_endmsg(void * closure,const void * hd,upb_status * status)8980 static bool enum_endmsg(void *closure, const void *hd, upb_status *status) {
8981 upb_descreader *r = closure;
8982 upb_enumdef *e;
8983 UPB_UNUSED(hd);
8984
8985 e = upb_downcast_enumdef_mutable(upb_descreader_last(r));
8986 if (upb_def_fullname(upb_descreader_last(r)) == NULL) {
8987 upb_status_seterrmsg(status, "Enum had no name.");
8988 return false;
8989 }
8990 if (upb_enumdef_numvals(e) == 0) {
8991 upb_status_seterrmsg(status, "Enum had no values.");
8992 return false;
8993 }
8994 return true;
8995 }
8996
enum_onname(void * closure,const void * hd,const char * buf,size_t n,const upb_bufhandle * handle)8997 static size_t enum_onname(void *closure, const void *hd, const char *buf,
8998 size_t n, const upb_bufhandle *handle) {
8999 upb_descreader *r = closure;
9000 char *fullname = upb_gstrndup(buf, n);
9001 UPB_UNUSED(hd);
9002 UPB_UNUSED(handle);
9003 /* XXX: see comment at the top of the file. */
9004 upb_def_setfullname(upb_descreader_last(r), fullname, NULL);
9005 upb_gfree(fullname);
9006 return n;
9007 }
9008
9009 /** Handlers for google.protobuf.FieldDescriptorProto *************************/
9010
field_startmsg(void * closure,const void * hd)9011 static bool field_startmsg(void *closure, const void *hd) {
9012 upb_descreader *r = closure;
9013 UPB_UNUSED(hd);
9014 UPB_ASSERT(r->f);
9015 upb_gfree(r->default_string);
9016 r->default_string = NULL;
9017
9018 /* fielddefs default to packed, but descriptors default to non-packed. */
9019 upb_fielddef_setpacked(r->f, false);
9020 return true;
9021 }
9022
9023 /* Converts the default value in string "str" into "d". Passes a ref on str.
9024 * Returns true on success. */
parse_default(char * str,upb_fielddef * f)9025 static bool parse_default(char *str, upb_fielddef *f) {
9026 bool success = true;
9027 char *end;
9028 switch (upb_fielddef_type(f)) {
9029 case UPB_TYPE_INT32: {
9030 long val = strtol(str, &end, 0);
9031 if (val > INT32_MAX || val < INT32_MIN || errno == ERANGE || *end)
9032 success = false;
9033 else
9034 upb_fielddef_setdefaultint32(f, val);
9035 break;
9036 }
9037 case UPB_TYPE_INT64: {
9038 /* XXX: Need to write our own strtoll, since it's not available in c89. */
9039 long long val = strtol(str, &end, 0);
9040 if (val > INT64_MAX || val < INT64_MIN || errno == ERANGE || *end)
9041 success = false;
9042 else
9043 upb_fielddef_setdefaultint64(f, val);
9044 break;
9045 }
9046 case UPB_TYPE_UINT32: {
9047 unsigned long val = strtoul(str, &end, 0);
9048 if (val > UINT32_MAX || errno == ERANGE || *end)
9049 success = false;
9050 else
9051 upb_fielddef_setdefaultuint32(f, val);
9052 break;
9053 }
9054 case UPB_TYPE_UINT64: {
9055 /* XXX: Need to write our own strtoull, since it's not available in c89. */
9056 unsigned long long val = strtoul(str, &end, 0);
9057 if (val > UINT64_MAX || errno == ERANGE || *end)
9058 success = false;
9059 else
9060 upb_fielddef_setdefaultuint64(f, val);
9061 break;
9062 }
9063 case UPB_TYPE_DOUBLE: {
9064 double val = strtod(str, &end);
9065 if (errno == ERANGE || *end)
9066 success = false;
9067 else
9068 upb_fielddef_setdefaultdouble(f, val);
9069 break;
9070 }
9071 case UPB_TYPE_FLOAT: {
9072 /* XXX: Need to write our own strtof, since it's not available in c89. */
9073 float val = strtod(str, &end);
9074 if (errno == ERANGE || *end)
9075 success = false;
9076 else
9077 upb_fielddef_setdefaultfloat(f, val);
9078 break;
9079 }
9080 case UPB_TYPE_BOOL: {
9081 if (strcmp(str, "false") == 0)
9082 upb_fielddef_setdefaultbool(f, false);
9083 else if (strcmp(str, "true") == 0)
9084 upb_fielddef_setdefaultbool(f, true);
9085 else
9086 success = false;
9087 break;
9088 }
9089 default: abort();
9090 }
9091 return success;
9092 }
9093
field_endmsg(void * closure,const void * hd,upb_status * status)9094 static bool field_endmsg(void *closure, const void *hd, upb_status *status) {
9095 upb_descreader *r = closure;
9096 upb_fielddef *f = r->f;
9097 UPB_UNUSED(hd);
9098
9099 /* TODO: verify that all required fields were present. */
9100 UPB_ASSERT(upb_fielddef_number(f) != 0);
9101 UPB_ASSERT(upb_fielddef_name(f) != NULL);
9102 UPB_ASSERT((upb_fielddef_subdefname(f) != NULL) == upb_fielddef_hassubdef(f));
9103
9104 if (r->default_string) {
9105 if (upb_fielddef_issubmsg(f)) {
9106 upb_status_seterrmsg(status, "Submessages cannot have defaults.");
9107 return false;
9108 }
9109 if (upb_fielddef_isstring(f) || upb_fielddef_type(f) == UPB_TYPE_ENUM) {
9110 upb_fielddef_setdefaultcstr(f, r->default_string, NULL);
9111 } else {
9112 if (r->default_string && !parse_default(r->default_string, f)) {
9113 /* We don't worry too much about giving a great error message since the
9114 * compiler should have ensured this was correct. */
9115 upb_status_seterrmsg(status, "Error converting default value.");
9116 return false;
9117 }
9118 }
9119 }
9120 return true;
9121 }
9122
field_onlazy(void * closure,const void * hd,bool val)9123 static bool field_onlazy(void *closure, const void *hd, bool val) {
9124 upb_descreader *r = closure;
9125 UPB_UNUSED(hd);
9126
9127 upb_fielddef_setlazy(r->f, val);
9128 return true;
9129 }
9130
field_onpacked(void * closure,const void * hd,bool val)9131 static bool field_onpacked(void *closure, const void *hd, bool val) {
9132 upb_descreader *r = closure;
9133 UPB_UNUSED(hd);
9134
9135 upb_fielddef_setpacked(r->f, val);
9136 return true;
9137 }
9138
field_ontype(void * closure,const void * hd,int32_t val)9139 static bool field_ontype(void *closure, const void *hd, int32_t val) {
9140 upb_descreader *r = closure;
9141 UPB_UNUSED(hd);
9142
9143 upb_fielddef_setdescriptortype(r->f, val);
9144 return true;
9145 }
9146
field_onlabel(void * closure,const void * hd,int32_t val)9147 static bool field_onlabel(void *closure, const void *hd, int32_t val) {
9148 upb_descreader *r = closure;
9149 UPB_UNUSED(hd);
9150
9151 upb_fielddef_setlabel(r->f, val);
9152 return true;
9153 }
9154
field_onnumber(void * closure,const void * hd,int32_t val)9155 static bool field_onnumber(void *closure, const void *hd, int32_t val) {
9156 upb_descreader *r = closure;
9157 bool ok;
9158 UPB_UNUSED(hd);
9159
9160 ok = upb_fielddef_setnumber(r->f, val, NULL);
9161 UPB_ASSERT(ok);
9162 return true;
9163 }
9164
field_onname(void * closure,const void * hd,const char * buf,size_t n,const upb_bufhandle * handle)9165 static size_t field_onname(void *closure, const void *hd, const char *buf,
9166 size_t n, const upb_bufhandle *handle) {
9167 upb_descreader *r = closure;
9168 char *name = upb_gstrndup(buf, n);
9169 UPB_UNUSED(hd);
9170 UPB_UNUSED(handle);
9171
9172 /* XXX: see comment at the top of the file. */
9173 upb_fielddef_setname(r->f, name, NULL);
9174 upb_gfree(name);
9175 return n;
9176 }
9177
field_ontypename(void * closure,const void * hd,const char * buf,size_t n,const upb_bufhandle * handle)9178 static size_t field_ontypename(void *closure, const void *hd, const char *buf,
9179 size_t n, const upb_bufhandle *handle) {
9180 upb_descreader *r = closure;
9181 char *name = upb_gstrndup(buf, n);
9182 UPB_UNUSED(hd);
9183 UPB_UNUSED(handle);
9184
9185 /* XXX: see comment at the top of the file. */
9186 upb_fielddef_setsubdefname(r->f, name, NULL);
9187 upb_gfree(name);
9188 return n;
9189 }
9190
field_onextendee(void * closure,const void * hd,const char * buf,size_t n,const upb_bufhandle * handle)9191 static size_t field_onextendee(void *closure, const void *hd, const char *buf,
9192 size_t n, const upb_bufhandle *handle) {
9193 upb_descreader *r = closure;
9194 char *name = upb_gstrndup(buf, n);
9195 UPB_UNUSED(hd);
9196 UPB_UNUSED(handle);
9197
9198 /* XXX: see comment at the top of the file. */
9199 upb_fielddef_setcontainingtypename(r->f, name, NULL);
9200 upb_gfree(name);
9201 return n;
9202 }
9203
field_ondefaultval(void * closure,const void * hd,const char * buf,size_t n,const upb_bufhandle * handle)9204 static size_t field_ondefaultval(void *closure, const void *hd, const char *buf,
9205 size_t n, const upb_bufhandle *handle) {
9206 upb_descreader *r = closure;
9207 UPB_UNUSED(hd);
9208 UPB_UNUSED(handle);
9209
9210 /* Have to convert from string to the correct type, but we might not know the
9211 * type yet, so we save it as a string until the end of the field.
9212 * XXX: see comment at the top of the file. */
9213 upb_gfree(r->default_string);
9214 r->default_string = upb_gstrndup(buf, n);
9215 return n;
9216 }
9217
field_ononeofindex(void * closure,const void * hd,int32_t index)9218 static bool field_ononeofindex(void *closure, const void *hd, int32_t index) {
9219 upb_descreader *r = closure;
9220 upb_oneofdef *o = upb_descreader_getoneof(r, index);
9221 bool ok = upb_oneofdef_addfield(o, r->f, &r->f, NULL);
9222 UPB_UNUSED(hd);
9223
9224 UPB_ASSERT(ok);
9225 return true;
9226 }
9227
9228 /** Handlers for google.protobuf.OneofDescriptorProto. ************************/
9229
oneof_name(void * closure,const void * hd,const char * buf,size_t n,const upb_bufhandle * handle)9230 static size_t oneof_name(void *closure, const void *hd, const char *buf,
9231 size_t n, const upb_bufhandle *handle) {
9232 upb_descreader *r = closure;
9233 upb_descreader_frame *f = &r->stack[r->stack_len-1];
9234 upb_oneofdef *o = upb_descreader_getoneof(r, f->oneof_index++);
9235 char *name_null_terminated = upb_gstrndup(buf, n);
9236 bool ok = upb_oneofdef_setname(o, name_null_terminated, NULL);
9237 UPB_UNUSED(hd);
9238 UPB_UNUSED(handle);
9239
9240 UPB_ASSERT(ok);
9241 free(name_null_terminated);
9242 return n;
9243 }
9244
9245 /** Handlers for google.protobuf.DescriptorProto ******************************/
9246
msg_start(void * closure,const void * hd)9247 static bool msg_start(void *closure, const void *hd) {
9248 upb_descreader *r = closure;
9249 UPB_UNUSED(hd);
9250
9251 upb_descreader_startcontainer(r);
9252 return true;
9253 }
9254
msg_end(void * closure,const void * hd,upb_status * status)9255 static bool msg_end(void *closure, const void *hd, upb_status *status) {
9256 upb_descreader *r = closure;
9257 upb_msgdef *m = upb_descreader_top(r);
9258 UPB_UNUSED(hd);
9259
9260 if(!upb_def_fullname(upb_msgdef_upcast_mutable(m))) {
9261 upb_status_seterrmsg(status, "Encountered message with no name.");
9262 return false;
9263 }
9264 return upb_descreader_endcontainer(r);
9265 }
9266
msg_name(void * closure,const void * hd,const char * buf,size_t n,const upb_bufhandle * handle)9267 static size_t msg_name(void *closure, const void *hd, const char *buf,
9268 size_t n, const upb_bufhandle *handle) {
9269 upb_descreader *r = closure;
9270 upb_msgdef *m = upb_descreader_top(r);
9271 /* XXX: see comment at the top of the file. */
9272 char *name = upb_gstrndup(buf, n);
9273 UPB_UNUSED(hd);
9274 UPB_UNUSED(handle);
9275
9276 upb_def_setfullname(upb_msgdef_upcast_mutable(m), name, NULL);
9277 upb_descreader_setscopename(r, name); /* Passes ownership of name. */
9278
9279 return n;
9280 }
9281
msg_startmsg(void * closure,const void * hd)9282 static void *msg_startmsg(void *closure, const void *hd) {
9283 upb_descreader *r = closure;
9284 upb_msgdef *m = upb_msgdef_new(&m);
9285 bool ok = upb_filedef_addmsg(r->file, m, &m, NULL);
9286 UPB_UNUSED(hd);
9287 UPB_ASSERT(ok);
9288 return r;
9289 }
9290
msg_startext(void * closure,const void * hd)9291 static void *msg_startext(void *closure, const void *hd) {
9292 upb_descreader *r = closure;
9293 r->f = upb_fielddef_new(r);
9294 UPB_UNUSED(hd);
9295 return r;
9296 }
9297
msg_endext(void * closure,const void * hd)9298 static bool msg_endext(void *closure, const void *hd) {
9299 /* The current symtab code can't handle extensions, so we just discard
9300 * them for now. */
9301 upb_descreader *r = closure;
9302 upb_fielddef_unref(r->f, r);
9303 UPB_UNUSED(hd);
9304 r->f = NULL;
9305 return true;
9306 }
9307
msg_startfield(void * closure,const void * hd)9308 static void *msg_startfield(void *closure, const void *hd) {
9309 upb_descreader *r = closure;
9310 r->f = upb_fielddef_new(&r->f);
9311 /* We can't add the new field to the message until its name/number are
9312 * filled in. */
9313 UPB_UNUSED(hd);
9314 return r;
9315 }
9316
msg_endfield(void * closure,const void * hd)9317 static bool msg_endfield(void *closure, const void *hd) {
9318 upb_descreader *r = closure;
9319 upb_msgdef *m = upb_descreader_top(r);
9320 bool ok;
9321 UPB_UNUSED(hd);
9322
9323 /* Oneof fields are added to the msgdef through their oneof, so don't need to
9324 * be added here. */
9325 if (upb_fielddef_containingoneof(r->f) == NULL) {
9326 ok = upb_msgdef_addfield(m, r->f, &r->f, NULL);
9327 UPB_ASSERT(ok);
9328 }
9329 r->f = NULL;
9330 return true;
9331 }
9332
msg_onmapentry(void * closure,const void * hd,bool mapentry)9333 static bool msg_onmapentry(void *closure, const void *hd, bool mapentry) {
9334 upb_descreader *r = closure;
9335 upb_msgdef *m = upb_descreader_top(r);
9336 UPB_UNUSED(hd);
9337
9338 upb_msgdef_setmapentry(m, mapentry);
9339 r->f = NULL;
9340 return true;
9341 }
9342
9343
9344
9345 /** Code to register handlers *************************************************/
9346
9347 #define F(msg, field) upbdefs_google_protobuf_ ## msg ## _f_ ## field(m)
9348
reghandlers(const void * closure,upb_handlers * h)9349 static void reghandlers(const void *closure, upb_handlers *h) {
9350 const upb_msgdef *m = upb_handlers_msgdef(h);
9351 UPB_UNUSED(closure);
9352
9353 if (upbdefs_google_protobuf_FileDescriptorSet_is(m)) {
9354 upb_handlers_setstartsubmsg(h, F(FileDescriptorSet, file),
9355 &fileset_startfile, NULL);
9356 } else if (upbdefs_google_protobuf_DescriptorProto_is(m)) {
9357 upb_handlers_setstartmsg(h, &msg_start, NULL);
9358 upb_handlers_setendmsg(h, &msg_end, NULL);
9359 upb_handlers_setstring(h, F(DescriptorProto, name), &msg_name, NULL);
9360 upb_handlers_setstartsubmsg(h, F(DescriptorProto, extension), &msg_startext,
9361 NULL);
9362 upb_handlers_setendsubmsg(h, F(DescriptorProto, extension), &msg_endext,
9363 NULL);
9364 upb_handlers_setstartsubmsg(h, F(DescriptorProto, nested_type),
9365 &msg_startmsg, NULL);
9366 upb_handlers_setstartsubmsg(h, F(DescriptorProto, field),
9367 &msg_startfield, NULL);
9368 upb_handlers_setendsubmsg(h, F(DescriptorProto, field),
9369 &msg_endfield, NULL);
9370 upb_handlers_setstartsubmsg(h, F(DescriptorProto, enum_type),
9371 &file_startenum, NULL);
9372 } else if (upbdefs_google_protobuf_FileDescriptorProto_is(m)) {
9373 upb_handlers_setstartmsg(h, &file_start, NULL);
9374 upb_handlers_setendmsg(h, &file_end, NULL);
9375 upb_handlers_setstring(h, F(FileDescriptorProto, name), &file_onname,
9376 NULL);
9377 upb_handlers_setstring(h, F(FileDescriptorProto, package), &file_onpackage,
9378 NULL);
9379 upb_handlers_setstring(h, F(FileDescriptorProto, syntax), &file_onsyntax,
9380 NULL);
9381 upb_handlers_setstartsubmsg(h, F(FileDescriptorProto, message_type),
9382 &file_startmsg, NULL);
9383 upb_handlers_setstartsubmsg(h, F(FileDescriptorProto, enum_type),
9384 &file_startenum, NULL);
9385 upb_handlers_setstartsubmsg(h, F(FileDescriptorProto, extension),
9386 &file_startext, NULL);
9387 upb_handlers_setendsubmsg(h, F(FileDescriptorProto, extension),
9388 &file_endext, NULL);
9389 upb_handlers_setstring(h, F(FileDescriptorProto, dependency),
9390 &file_ondep, NULL);
9391 } else if (upbdefs_google_protobuf_EnumValueDescriptorProto_is(m)) {
9392 upb_handlers_setstartmsg(h, &enumval_startmsg, NULL);
9393 upb_handlers_setendmsg(h, &enumval_endmsg, NULL);
9394 upb_handlers_setstring(h, F(EnumValueDescriptorProto, name), &enumval_onname, NULL);
9395 upb_handlers_setint32(h, F(EnumValueDescriptorProto, number), &enumval_onnumber,
9396 NULL);
9397 } else if (upbdefs_google_protobuf_EnumDescriptorProto_is(m)) {
9398 upb_handlers_setendmsg(h, &enum_endmsg, NULL);
9399 upb_handlers_setstring(h, F(EnumDescriptorProto, name), &enum_onname, NULL);
9400 } else if (upbdefs_google_protobuf_FieldDescriptorProto_is(m)) {
9401 upb_handlers_setstartmsg(h, &field_startmsg, NULL);
9402 upb_handlers_setendmsg(h, &field_endmsg, NULL);
9403 upb_handlers_setint32(h, F(FieldDescriptorProto, type), &field_ontype,
9404 NULL);
9405 upb_handlers_setint32(h, F(FieldDescriptorProto, label), &field_onlabel,
9406 NULL);
9407 upb_handlers_setint32(h, F(FieldDescriptorProto, number), &field_onnumber,
9408 NULL);
9409 upb_handlers_setstring(h, F(FieldDescriptorProto, name), &field_onname,
9410 NULL);
9411 upb_handlers_setstring(h, F(FieldDescriptorProto, type_name),
9412 &field_ontypename, NULL);
9413 upb_handlers_setstring(h, F(FieldDescriptorProto, extendee),
9414 &field_onextendee, NULL);
9415 upb_handlers_setstring(h, F(FieldDescriptorProto, default_value),
9416 &field_ondefaultval, NULL);
9417 upb_handlers_setint32(h, F(FieldDescriptorProto, oneof_index),
9418 &field_ononeofindex, NULL);
9419 } else if (upbdefs_google_protobuf_OneofDescriptorProto_is(m)) {
9420 upb_handlers_setstring(h, F(OneofDescriptorProto, name), &oneof_name, NULL);
9421 } else if (upbdefs_google_protobuf_FieldOptions_is(m)) {
9422 upb_handlers_setbool(h, F(FieldOptions, lazy), &field_onlazy, NULL);
9423 upb_handlers_setbool(h, F(FieldOptions, packed), &field_onpacked, NULL);
9424 } else if (upbdefs_google_protobuf_MessageOptions_is(m)) {
9425 upb_handlers_setbool(h, F(MessageOptions, map_entry), &msg_onmapentry, NULL);
9426 } else if (upbdefs_google_protobuf_FileOptions_is(m)) {
9427 upb_handlers_setstring(h, F(FileOptions, php_class_prefix),
9428 &file_onphpprefix, NULL);
9429 upb_handlers_setstartstr(h, F(FileOptions, php_namespace),
9430 &file_startphpnamespace, NULL);
9431 upb_handlers_setstring(h, F(FileOptions, php_namespace),
9432 &file_onphpnamespace, NULL);
9433 }
9434
9435 UPB_ASSERT(upb_ok(upb_handlers_status(h)));
9436 }
9437
9438 #undef F
9439
descreader_cleanup(void * _r)9440 void descreader_cleanup(void *_r) {
9441 upb_descreader *r = _r;
9442 size_t i;
9443
9444 for (i = 0; i < upb_descreader_filecount(r); i++) {
9445 upb_filedef_unref(upb_descreader_file(r, i), &r->files);
9446 }
9447
9448 upb_gfree(r->name);
9449 upb_inttable_uninit(&r->files);
9450 upb_strtable_uninit(&r->files_by_name);
9451 upb_inttable_uninit(&r->oneofs);
9452 upb_gfree(r->default_string);
9453 while (r->stack_len > 0) {
9454 upb_descreader_frame *f = &r->stack[--r->stack_len];
9455 upb_gfree(f->name);
9456 }
9457 }
9458
9459
9460 /* Public API ****************************************************************/
9461
upb_descreader_create(upb_env * e,const upb_handlers * h)9462 upb_descreader *upb_descreader_create(upb_env *e, const upb_handlers *h) {
9463 upb_descreader *r = upb_env_malloc(e, sizeof(upb_descreader));
9464 if (!r || !upb_env_addcleanup(e, descreader_cleanup, r)) {
9465 return NULL;
9466 }
9467
9468 upb_inttable_init(&r->files, UPB_CTYPE_PTR);
9469 upb_strtable_init(&r->files_by_name, UPB_CTYPE_PTR);
9470 upb_inttable_init(&r->oneofs, UPB_CTYPE_PTR);
9471 upb_sink_reset(upb_descreader_input(r), h, r);
9472 r->stack_len = 0;
9473 r->name = NULL;
9474 r->default_string = NULL;
9475
9476 return r;
9477 }
9478
upb_descreader_filecount(const upb_descreader * r)9479 size_t upb_descreader_filecount(const upb_descreader *r) {
9480 return upb_inttable_count(&r->files);
9481 }
9482
upb_descreader_file(const upb_descreader * r,size_t i)9483 upb_filedef *upb_descreader_file(const upb_descreader *r, size_t i) {
9484 upb_value v;
9485 if (upb_inttable_lookup(&r->files, i, &v)) {
9486 return upb_value_getptr(v);
9487 } else {
9488 return NULL;
9489 }
9490 }
9491
upb_descreader_input(upb_descreader * r)9492 upb_sink *upb_descreader_input(upb_descreader *r) {
9493 return &r->sink;
9494 }
9495
upb_descreader_newhandlers(const void * owner)9496 const upb_handlers *upb_descreader_newhandlers(const void *owner) {
9497 const upb_msgdef *m = upbdefs_google_protobuf_FileDescriptorSet_get(&m);
9498 const upb_handlers *h = upb_handlers_newfrozen(m, owner, reghandlers, NULL);
9499 upb_msgdef_unref(m, &m);
9500 return h;
9501 }
9502 /*
9503 ** protobuf decoder bytecode compiler
9504 **
9505 ** Code to compile a upb::Handlers into bytecode for decoding a protobuf
9506 ** according to that specific schema and destination handlers.
9507 **
9508 ** Compiling to bytecode is always the first step. If we are using the
9509 ** interpreted decoder we leave it as bytecode and interpret that. If we are
9510 ** using a JIT decoder we use a code generator to turn the bytecode into native
9511 ** code, LLVM IR, etc.
9512 **
9513 ** Bytecode definition is in decoder.int.h.
9514 */
9515
9516 #include <stdarg.h>
9517
9518 #ifdef UPB_DUMP_BYTECODE
9519 #include <stdio.h>
9520 #endif
9521
9522 #define MAXLABEL 5
9523 #define EMPTYLABEL -1
9524
9525 /* mgroup *********************************************************************/
9526
freegroup(upb_refcounted * r)9527 static void freegroup(upb_refcounted *r) {
9528 mgroup *g = (mgroup*)r;
9529 upb_inttable_uninit(&g->methods);
9530 #ifdef UPB_USE_JIT_X64
9531 upb_pbdecoder_freejit(g);
9532 #endif
9533 upb_gfree(g->bytecode);
9534 upb_gfree(g);
9535 }
9536
visitgroup(const upb_refcounted * r,upb_refcounted_visit * visit,void * closure)9537 static void visitgroup(const upb_refcounted *r, upb_refcounted_visit *visit,
9538 void *closure) {
9539 const mgroup *g = (const mgroup*)r;
9540 upb_inttable_iter i;
9541 upb_inttable_begin(&i, &g->methods);
9542 for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
9543 upb_pbdecodermethod *method = upb_value_getptr(upb_inttable_iter_value(&i));
9544 visit(r, upb_pbdecodermethod_upcast(method), closure);
9545 }
9546 }
9547
newgroup(const void * owner)9548 mgroup *newgroup(const void *owner) {
9549 mgroup *g = upb_gmalloc(sizeof(*g));
9550 static const struct upb_refcounted_vtbl vtbl = {visitgroup, freegroup};
9551 upb_refcounted_init(mgroup_upcast_mutable(g), &vtbl, owner);
9552 upb_inttable_init(&g->methods, UPB_CTYPE_PTR);
9553 g->bytecode = NULL;
9554 g->bytecode_end = NULL;
9555 return g;
9556 }
9557
9558
9559 /* upb_pbdecodermethod ********************************************************/
9560
freemethod(upb_refcounted * r)9561 static void freemethod(upb_refcounted *r) {
9562 upb_pbdecodermethod *method = (upb_pbdecodermethod*)r;
9563
9564 if (method->dest_handlers_) {
9565 upb_handlers_unref(method->dest_handlers_, method);
9566 }
9567
9568 upb_inttable_uninit(&method->dispatch);
9569 upb_gfree(method);
9570 }
9571
visitmethod(const upb_refcounted * r,upb_refcounted_visit * visit,void * closure)9572 static void visitmethod(const upb_refcounted *r, upb_refcounted_visit *visit,
9573 void *closure) {
9574 const upb_pbdecodermethod *m = (const upb_pbdecodermethod*)r;
9575 visit(r, m->group, closure);
9576 }
9577
newmethod(const upb_handlers * dest_handlers,mgroup * group)9578 static upb_pbdecodermethod *newmethod(const upb_handlers *dest_handlers,
9579 mgroup *group) {
9580 static const struct upb_refcounted_vtbl vtbl = {visitmethod, freemethod};
9581 upb_pbdecodermethod *ret = upb_gmalloc(sizeof(*ret));
9582 upb_refcounted_init(upb_pbdecodermethod_upcast_mutable(ret), &vtbl, &ret);
9583 upb_byteshandler_init(&ret->input_handler_);
9584
9585 /* The method references the group and vice-versa, in a circular reference. */
9586 upb_ref2(ret, group);
9587 upb_ref2(group, ret);
9588 upb_inttable_insertptr(&group->methods, dest_handlers, upb_value_ptr(ret));
9589 upb_pbdecodermethod_unref(ret, &ret);
9590
9591 ret->group = mgroup_upcast_mutable(group);
9592 ret->dest_handlers_ = dest_handlers;
9593 ret->is_native_ = false; /* If we JIT, it will update this later. */
9594 upb_inttable_init(&ret->dispatch, UPB_CTYPE_UINT64);
9595
9596 if (ret->dest_handlers_) {
9597 upb_handlers_ref(ret->dest_handlers_, ret);
9598 }
9599 return ret;
9600 }
9601
upb_pbdecodermethod_desthandlers(const upb_pbdecodermethod * m)9602 const upb_handlers *upb_pbdecodermethod_desthandlers(
9603 const upb_pbdecodermethod *m) {
9604 return m->dest_handlers_;
9605 }
9606
upb_pbdecodermethod_inputhandler(const upb_pbdecodermethod * m)9607 const upb_byteshandler *upb_pbdecodermethod_inputhandler(
9608 const upb_pbdecodermethod *m) {
9609 return &m->input_handler_;
9610 }
9611
upb_pbdecodermethod_isnative(const upb_pbdecodermethod * m)9612 bool upb_pbdecodermethod_isnative(const upb_pbdecodermethod *m) {
9613 return m->is_native_;
9614 }
9615
upb_pbdecodermethod_new(const upb_pbdecodermethodopts * opts,const void * owner)9616 const upb_pbdecodermethod *upb_pbdecodermethod_new(
9617 const upb_pbdecodermethodopts *opts, const void *owner) {
9618 const upb_pbdecodermethod *ret;
9619 upb_pbcodecache cache;
9620
9621 upb_pbcodecache_init(&cache);
9622 ret = upb_pbcodecache_getdecodermethod(&cache, opts);
9623 upb_pbdecodermethod_ref(ret, owner);
9624 upb_pbcodecache_uninit(&cache);
9625 return ret;
9626 }
9627
9628
9629 /* bytecode compiler **********************************************************/
9630
9631 /* Data used only at compilation time. */
9632 typedef struct {
9633 mgroup *group;
9634
9635 uint32_t *pc;
9636 int fwd_labels[MAXLABEL];
9637 int back_labels[MAXLABEL];
9638
9639 /* For fields marked "lazy", parse them lazily or eagerly? */
9640 bool lazy;
9641 } compiler;
9642
newcompiler(mgroup * group,bool lazy)9643 static compiler *newcompiler(mgroup *group, bool lazy) {
9644 compiler *ret = upb_gmalloc(sizeof(*ret));
9645 int i;
9646
9647 ret->group = group;
9648 ret->lazy = lazy;
9649 for (i = 0; i < MAXLABEL; i++) {
9650 ret->fwd_labels[i] = EMPTYLABEL;
9651 ret->back_labels[i] = EMPTYLABEL;
9652 }
9653 return ret;
9654 }
9655
freecompiler(compiler * c)9656 static void freecompiler(compiler *c) {
9657 upb_gfree(c);
9658 }
9659
9660 const size_t ptr_words = sizeof(void*) / sizeof(uint32_t);
9661
9662 /* How many words an instruction is. */
instruction_len(uint32_t instr)9663 static int instruction_len(uint32_t instr) {
9664 switch (getop(instr)) {
9665 case OP_SETDISPATCH: return 1 + ptr_words;
9666 case OP_TAGN: return 3;
9667 case OP_SETBIGGROUPNUM: return 2;
9668 default: return 1;
9669 }
9670 }
9671
op_has_longofs(int32_t instruction)9672 bool op_has_longofs(int32_t instruction) {
9673 switch (getop(instruction)) {
9674 case OP_CALL:
9675 case OP_BRANCH:
9676 case OP_CHECKDELIM:
9677 return true;
9678 /* The "tag" instructions only have 8 bytes available for the jump target,
9679 * but that is ok because these opcodes only require short jumps. */
9680 case OP_TAG1:
9681 case OP_TAG2:
9682 case OP_TAGN:
9683 return false;
9684 default:
9685 UPB_ASSERT(false);
9686 return false;
9687 }
9688 }
9689
getofs(uint32_t instruction)9690 static int32_t getofs(uint32_t instruction) {
9691 if (op_has_longofs(instruction)) {
9692 return (int32_t)instruction >> 8;
9693 } else {
9694 return (int8_t)(instruction >> 8);
9695 }
9696 }
9697
setofs(uint32_t * instruction,int32_t ofs)9698 static void setofs(uint32_t *instruction, int32_t ofs) {
9699 if (op_has_longofs(*instruction)) {
9700 *instruction = getop(*instruction) | ofs << 8;
9701 } else {
9702 *instruction = (*instruction & ~0xff00) | ((ofs & 0xff) << 8);
9703 }
9704 UPB_ASSERT(getofs(*instruction) == ofs); /* Would fail in cases of overflow. */
9705 }
9706
pcofs(compiler * c)9707 static uint32_t pcofs(compiler *c) { return c->pc - c->group->bytecode; }
9708
9709 /* Defines a local label at the current PC location. All previous forward
9710 * references are updated to point to this location. The location is noted
9711 * for any future backward references. */
label(compiler * c,unsigned int label)9712 static void label(compiler *c, unsigned int label) {
9713 int val;
9714 uint32_t *codep;
9715
9716 UPB_ASSERT(label < MAXLABEL);
9717 val = c->fwd_labels[label];
9718 codep = (val == EMPTYLABEL) ? NULL : c->group->bytecode + val;
9719 while (codep) {
9720 int ofs = getofs(*codep);
9721 setofs(codep, c->pc - codep - instruction_len(*codep));
9722 codep = ofs ? codep + ofs : NULL;
9723 }
9724 c->fwd_labels[label] = EMPTYLABEL;
9725 c->back_labels[label] = pcofs(c);
9726 }
9727
9728 /* Creates a reference to a numbered label; either a forward reference
9729 * (positive arg) or backward reference (negative arg). For forward references
9730 * the value returned now is actually a "next" pointer into a linked list of all
9731 * instructions that use this label and will be patched later when the label is
9732 * defined with label().
9733 *
9734 * The returned value is the offset that should be written into the instruction.
9735 */
labelref(compiler * c,int label)9736 static int32_t labelref(compiler *c, int label) {
9737 UPB_ASSERT(label < MAXLABEL);
9738 if (label == LABEL_DISPATCH) {
9739 /* No resolving required. */
9740 return 0;
9741 } else if (label < 0) {
9742 /* Backward local label. Relative to the next instruction. */
9743 uint32_t from = (c->pc + 1) - c->group->bytecode;
9744 return c->back_labels[-label] - from;
9745 } else {
9746 /* Forward local label: prepend to (possibly-empty) linked list. */
9747 int *lptr = &c->fwd_labels[label];
9748 int32_t ret = (*lptr == EMPTYLABEL) ? 0 : *lptr - pcofs(c);
9749 *lptr = pcofs(c);
9750 return ret;
9751 }
9752 }
9753
put32(compiler * c,uint32_t v)9754 static void put32(compiler *c, uint32_t v) {
9755 mgroup *g = c->group;
9756 if (c->pc == g->bytecode_end) {
9757 int ofs = pcofs(c);
9758 size_t oldsize = g->bytecode_end - g->bytecode;
9759 size_t newsize = UPB_MAX(oldsize * 2, 64);
9760 /* TODO(haberman): handle OOM. */
9761 g->bytecode = upb_grealloc(g->bytecode, oldsize * sizeof(uint32_t),
9762 newsize * sizeof(uint32_t));
9763 g->bytecode_end = g->bytecode + newsize;
9764 c->pc = g->bytecode + ofs;
9765 }
9766 *c->pc++ = v;
9767 }
9768
putop(compiler * c,int op,...)9769 static void putop(compiler *c, int op, ...) {
9770 va_list ap;
9771 va_start(ap, op);
9772
9773 switch (op) {
9774 case OP_SETDISPATCH: {
9775 uintptr_t ptr = (uintptr_t)va_arg(ap, void*);
9776 put32(c, OP_SETDISPATCH);
9777 put32(c, ptr);
9778 if (sizeof(uintptr_t) > sizeof(uint32_t))
9779 put32(c, (uint64_t)ptr >> 32);
9780 break;
9781 }
9782 case OP_STARTMSG:
9783 case OP_ENDMSG:
9784 case OP_PUSHLENDELIM:
9785 case OP_POP:
9786 case OP_SETDELIM:
9787 case OP_HALT:
9788 case OP_RET:
9789 case OP_DISPATCH:
9790 put32(c, op);
9791 break;
9792 case OP_PARSE_DOUBLE:
9793 case OP_PARSE_FLOAT:
9794 case OP_PARSE_INT64:
9795 case OP_PARSE_UINT64:
9796 case OP_PARSE_INT32:
9797 case OP_PARSE_FIXED64:
9798 case OP_PARSE_FIXED32:
9799 case OP_PARSE_BOOL:
9800 case OP_PARSE_UINT32:
9801 case OP_PARSE_SFIXED32:
9802 case OP_PARSE_SFIXED64:
9803 case OP_PARSE_SINT32:
9804 case OP_PARSE_SINT64:
9805 case OP_STARTSEQ:
9806 case OP_ENDSEQ:
9807 case OP_STARTSUBMSG:
9808 case OP_ENDSUBMSG:
9809 case OP_STARTSTR:
9810 case OP_STRING:
9811 case OP_ENDSTR:
9812 case OP_PUSHTAGDELIM:
9813 put32(c, op | va_arg(ap, upb_selector_t) << 8);
9814 break;
9815 case OP_SETBIGGROUPNUM:
9816 put32(c, op);
9817 put32(c, va_arg(ap, int));
9818 break;
9819 case OP_CALL: {
9820 const upb_pbdecodermethod *method = va_arg(ap, upb_pbdecodermethod *);
9821 put32(c, op | (method->code_base.ofs - (pcofs(c) + 1)) << 8);
9822 break;
9823 }
9824 case OP_CHECKDELIM:
9825 case OP_BRANCH: {
9826 uint32_t instruction = op;
9827 int label = va_arg(ap, int);
9828 setofs(&instruction, labelref(c, label));
9829 put32(c, instruction);
9830 break;
9831 }
9832 case OP_TAG1:
9833 case OP_TAG2: {
9834 int label = va_arg(ap, int);
9835 uint64_t tag = va_arg(ap, uint64_t);
9836 uint32_t instruction = op | (tag << 16);
9837 UPB_ASSERT(tag <= 0xffff);
9838 setofs(&instruction, labelref(c, label));
9839 put32(c, instruction);
9840 break;
9841 }
9842 case OP_TAGN: {
9843 int label = va_arg(ap, int);
9844 uint64_t tag = va_arg(ap, uint64_t);
9845 uint32_t instruction = op | (upb_value_size(tag) << 16);
9846 setofs(&instruction, labelref(c, label));
9847 put32(c, instruction);
9848 put32(c, tag);
9849 put32(c, tag >> 32);
9850 break;
9851 }
9852 }
9853
9854 va_end(ap);
9855 }
9856
9857 #if defined(UPB_USE_JIT_X64) || defined(UPB_DUMP_BYTECODE)
9858
upb_pbdecoder_getopname(unsigned int op)9859 const char *upb_pbdecoder_getopname(unsigned int op) {
9860 #define QUOTE(x) #x
9861 #define EXPAND_AND_QUOTE(x) QUOTE(x)
9862 #define OPNAME(x) OP_##x
9863 #define OP(x) case OPNAME(x): return EXPAND_AND_QUOTE(OPNAME(x));
9864 #define T(x) OP(PARSE_##x)
9865 /* Keep in sync with list in decoder.int.h. */
9866 switch ((opcode)op) {
9867 T(DOUBLE) T(FLOAT) T(INT64) T(UINT64) T(INT32) T(FIXED64) T(FIXED32)
9868 T(BOOL) T(UINT32) T(SFIXED32) T(SFIXED64) T(SINT32) T(SINT64)
9869 OP(STARTMSG) OP(ENDMSG) OP(STARTSEQ) OP(ENDSEQ) OP(STARTSUBMSG)
9870 OP(ENDSUBMSG) OP(STARTSTR) OP(STRING) OP(ENDSTR) OP(CALL) OP(RET)
9871 OP(PUSHLENDELIM) OP(PUSHTAGDELIM) OP(SETDELIM) OP(CHECKDELIM)
9872 OP(BRANCH) OP(TAG1) OP(TAG2) OP(TAGN) OP(SETDISPATCH) OP(POP)
9873 OP(SETBIGGROUPNUM) OP(DISPATCH) OP(HALT)
9874 }
9875 return "<unknown op>";
9876 #undef OP
9877 #undef T
9878 }
9879
9880 #endif
9881
9882 #ifdef UPB_DUMP_BYTECODE
9883
dumpbc(uint32_t * p,uint32_t * end,FILE * f)9884 static void dumpbc(uint32_t *p, uint32_t *end, FILE *f) {
9885
9886 uint32_t *begin = p;
9887
9888 while (p < end) {
9889 fprintf(f, "%p %8tx", p, p - begin);
9890 uint32_t instr = *p++;
9891 uint8_t op = getop(instr);
9892 fprintf(f, " %s", upb_pbdecoder_getopname(op));
9893 switch ((opcode)op) {
9894 case OP_SETDISPATCH: {
9895 const upb_inttable *dispatch;
9896 memcpy(&dispatch, p, sizeof(void*));
9897 p += ptr_words;
9898 const upb_pbdecodermethod *method =
9899 (void *)((char *)dispatch -
9900 offsetof(upb_pbdecodermethod, dispatch));
9901 fprintf(f, " %s", upb_msgdef_fullname(
9902 upb_handlers_msgdef(method->dest_handlers_)));
9903 break;
9904 }
9905 case OP_DISPATCH:
9906 case OP_STARTMSG:
9907 case OP_ENDMSG:
9908 case OP_PUSHLENDELIM:
9909 case OP_POP:
9910 case OP_SETDELIM:
9911 case OP_HALT:
9912 case OP_RET:
9913 break;
9914 case OP_PARSE_DOUBLE:
9915 case OP_PARSE_FLOAT:
9916 case OP_PARSE_INT64:
9917 case OP_PARSE_UINT64:
9918 case OP_PARSE_INT32:
9919 case OP_PARSE_FIXED64:
9920 case OP_PARSE_FIXED32:
9921 case OP_PARSE_BOOL:
9922 case OP_PARSE_UINT32:
9923 case OP_PARSE_SFIXED32:
9924 case OP_PARSE_SFIXED64:
9925 case OP_PARSE_SINT32:
9926 case OP_PARSE_SINT64:
9927 case OP_STARTSEQ:
9928 case OP_ENDSEQ:
9929 case OP_STARTSUBMSG:
9930 case OP_ENDSUBMSG:
9931 case OP_STARTSTR:
9932 case OP_STRING:
9933 case OP_ENDSTR:
9934 case OP_PUSHTAGDELIM:
9935 fprintf(f, " %d", instr >> 8);
9936 break;
9937 case OP_SETBIGGROUPNUM:
9938 fprintf(f, " %d", *p++);
9939 break;
9940 case OP_CHECKDELIM:
9941 case OP_CALL:
9942 case OP_BRANCH:
9943 fprintf(f, " =>0x%tx", p + getofs(instr) - begin);
9944 break;
9945 case OP_TAG1:
9946 case OP_TAG2: {
9947 fprintf(f, " tag:0x%x", instr >> 16);
9948 if (getofs(instr)) {
9949 fprintf(f, " =>0x%tx", p + getofs(instr) - begin);
9950 }
9951 break;
9952 }
9953 case OP_TAGN: {
9954 uint64_t tag = *p++;
9955 tag |= (uint64_t)*p++ << 32;
9956 fprintf(f, " tag:0x%llx", (long long)tag);
9957 fprintf(f, " n:%d", instr >> 16);
9958 if (getofs(instr)) {
9959 fprintf(f, " =>0x%tx", p + getofs(instr) - begin);
9960 }
9961 break;
9962 }
9963 }
9964 fputs("\n", f);
9965 }
9966 }
9967
9968 #endif
9969
get_encoded_tag(const upb_fielddef * f,int wire_type)9970 static uint64_t get_encoded_tag(const upb_fielddef *f, int wire_type) {
9971 uint32_t tag = (upb_fielddef_number(f) << 3) | wire_type;
9972 uint64_t encoded_tag = upb_vencode32(tag);
9973 /* No tag should be greater than 5 bytes. */
9974 UPB_ASSERT(encoded_tag <= 0xffffffffff);
9975 return encoded_tag;
9976 }
9977
putchecktag(compiler * c,const upb_fielddef * f,int wire_type,int dest)9978 static void putchecktag(compiler *c, const upb_fielddef *f,
9979 int wire_type, int dest) {
9980 uint64_t tag = get_encoded_tag(f, wire_type);
9981 switch (upb_value_size(tag)) {
9982 case 1:
9983 putop(c, OP_TAG1, dest, tag);
9984 break;
9985 case 2:
9986 putop(c, OP_TAG2, dest, tag);
9987 break;
9988 default:
9989 putop(c, OP_TAGN, dest, tag);
9990 break;
9991 }
9992 }
9993
getsel(const upb_fielddef * f,upb_handlertype_t type)9994 static upb_selector_t getsel(const upb_fielddef *f, upb_handlertype_t type) {
9995 upb_selector_t selector;
9996 bool ok = upb_handlers_getselector(f, type, &selector);
9997 UPB_ASSERT(ok);
9998 return selector;
9999 }
10000
10001 /* Takes an existing, primary dispatch table entry and repacks it with a
10002 * different alternate wire type. Called when we are inserting a secondary
10003 * dispatch table entry for an alternate wire type. */
repack(uint64_t dispatch,int new_wt2)10004 static uint64_t repack(uint64_t dispatch, int new_wt2) {
10005 uint64_t ofs;
10006 uint8_t wt1;
10007 uint8_t old_wt2;
10008 upb_pbdecoder_unpackdispatch(dispatch, &ofs, &wt1, &old_wt2);
10009 UPB_ASSERT(old_wt2 == NO_WIRE_TYPE); /* wt2 should not be set yet. */
10010 return upb_pbdecoder_packdispatch(ofs, wt1, new_wt2);
10011 }
10012
10013 /* Marks the current bytecode position as the dispatch target for this message,
10014 * field, and wire type. */
dispatchtarget(compiler * c,upb_pbdecodermethod * method,const upb_fielddef * f,int wire_type)10015 static void dispatchtarget(compiler *c, upb_pbdecodermethod *method,
10016 const upb_fielddef *f, int wire_type) {
10017 /* Offset is relative to msg base. */
10018 uint64_t ofs = pcofs(c) - method->code_base.ofs;
10019 uint32_t fn = upb_fielddef_number(f);
10020 upb_inttable *d = &method->dispatch;
10021 upb_value v;
10022 if (upb_inttable_remove(d, fn, &v)) {
10023 /* TODO: prioritize based on packed setting in .proto file. */
10024 uint64_t repacked = repack(upb_value_getuint64(v), wire_type);
10025 upb_inttable_insert(d, fn, upb_value_uint64(repacked));
10026 upb_inttable_insert(d, fn + UPB_MAX_FIELDNUMBER, upb_value_uint64(ofs));
10027 } else {
10028 uint64_t val = upb_pbdecoder_packdispatch(ofs, wire_type, NO_WIRE_TYPE);
10029 upb_inttable_insert(d, fn, upb_value_uint64(val));
10030 }
10031 }
10032
putpush(compiler * c,const upb_fielddef * f)10033 static void putpush(compiler *c, const upb_fielddef *f) {
10034 if (upb_fielddef_descriptortype(f) == UPB_DESCRIPTOR_TYPE_MESSAGE) {
10035 putop(c, OP_PUSHLENDELIM);
10036 } else {
10037 uint32_t fn = upb_fielddef_number(f);
10038 if (fn >= 1 << 24) {
10039 putop(c, OP_PUSHTAGDELIM, 0);
10040 putop(c, OP_SETBIGGROUPNUM, fn);
10041 } else {
10042 putop(c, OP_PUSHTAGDELIM, fn);
10043 }
10044 }
10045 }
10046
find_submethod(const compiler * c,const upb_pbdecodermethod * method,const upb_fielddef * f)10047 static upb_pbdecodermethod *find_submethod(const compiler *c,
10048 const upb_pbdecodermethod *method,
10049 const upb_fielddef *f) {
10050 const upb_handlers *sub =
10051 upb_handlers_getsubhandlers(method->dest_handlers_, f);
10052 upb_value v;
10053 return upb_inttable_lookupptr(&c->group->methods, sub, &v)
10054 ? upb_value_getptr(v)
10055 : NULL;
10056 }
10057
putsel(compiler * c,opcode op,upb_selector_t sel,const upb_handlers * h)10058 static void putsel(compiler *c, opcode op, upb_selector_t sel,
10059 const upb_handlers *h) {
10060 if (upb_handlers_gethandler(h, sel)) {
10061 putop(c, op, sel);
10062 }
10063 }
10064
10065 /* Puts an opcode to call a callback, but only if a callback actually exists for
10066 * this field and handler type. */
maybeput(compiler * c,opcode op,const upb_handlers * h,const upb_fielddef * f,upb_handlertype_t type)10067 static void maybeput(compiler *c, opcode op, const upb_handlers *h,
10068 const upb_fielddef *f, upb_handlertype_t type) {
10069 putsel(c, op, getsel(f, type), h);
10070 }
10071
haslazyhandlers(const upb_handlers * h,const upb_fielddef * f)10072 static bool haslazyhandlers(const upb_handlers *h, const upb_fielddef *f) {
10073 if (!upb_fielddef_lazy(f))
10074 return false;
10075
10076 return upb_handlers_gethandler(h, getsel(f, UPB_HANDLER_STARTSTR)) ||
10077 upb_handlers_gethandler(h, getsel(f, UPB_HANDLER_STRING)) ||
10078 upb_handlers_gethandler(h, getsel(f, UPB_HANDLER_ENDSTR));
10079 }
10080
10081
10082 /* bytecode compiler code generation ******************************************/
10083
10084 /* Symbolic names for our local labels. */
10085 #define LABEL_LOOPSTART 1 /* Top of a repeated field loop. */
10086 #define LABEL_LOOPBREAK 2 /* To jump out of a repeated loop */
10087 #define LABEL_FIELD 3 /* Jump backward to find the most recent field. */
10088 #define LABEL_ENDMSG 4 /* To reach the OP_ENDMSG instr for this msg. */
10089
10090 /* Generates bytecode to parse a single non-lazy message field. */
generate_msgfield(compiler * c,const upb_fielddef * f,upb_pbdecodermethod * method)10091 static void generate_msgfield(compiler *c, const upb_fielddef *f,
10092 upb_pbdecodermethod *method) {
10093 const upb_handlers *h = upb_pbdecodermethod_desthandlers(method);
10094 const upb_pbdecodermethod *sub_m = find_submethod(c, method, f);
10095 int wire_type;
10096
10097 if (!sub_m) {
10098 /* Don't emit any code for this field at all; it will be parsed as an
10099 * unknown field.
10100 *
10101 * TODO(haberman): we should change this to parse it as a string field
10102 * instead. It will probably be faster, but more importantly, once we
10103 * start vending unknown fields, a field shouldn't be treated as unknown
10104 * just because it doesn't have subhandlers registered. */
10105 return;
10106 }
10107
10108 label(c, LABEL_FIELD);
10109
10110 wire_type =
10111 (upb_fielddef_descriptortype(f) == UPB_DESCRIPTOR_TYPE_MESSAGE)
10112 ? UPB_WIRE_TYPE_DELIMITED
10113 : UPB_WIRE_TYPE_START_GROUP;
10114
10115 if (upb_fielddef_isseq(f)) {
10116 putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
10117 putchecktag(c, f, wire_type, LABEL_DISPATCH);
10118 dispatchtarget(c, method, f, wire_type);
10119 putop(c, OP_PUSHTAGDELIM, 0);
10120 putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ));
10121 label(c, LABEL_LOOPSTART);
10122 putpush(c, f);
10123 putop(c, OP_STARTSUBMSG, getsel(f, UPB_HANDLER_STARTSUBMSG));
10124 putop(c, OP_CALL, sub_m);
10125 putop(c, OP_POP);
10126 maybeput(c, OP_ENDSUBMSG, h, f, UPB_HANDLER_ENDSUBMSG);
10127 if (wire_type == UPB_WIRE_TYPE_DELIMITED) {
10128 putop(c, OP_SETDELIM);
10129 }
10130 putop(c, OP_CHECKDELIM, LABEL_LOOPBREAK);
10131 putchecktag(c, f, wire_type, LABEL_LOOPBREAK);
10132 putop(c, OP_BRANCH, -LABEL_LOOPSTART);
10133 label(c, LABEL_LOOPBREAK);
10134 putop(c, OP_POP);
10135 maybeput(c, OP_ENDSEQ, h, f, UPB_HANDLER_ENDSEQ);
10136 } else {
10137 putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
10138 putchecktag(c, f, wire_type, LABEL_DISPATCH);
10139 dispatchtarget(c, method, f, wire_type);
10140 putpush(c, f);
10141 putop(c, OP_STARTSUBMSG, getsel(f, UPB_HANDLER_STARTSUBMSG));
10142 putop(c, OP_CALL, sub_m);
10143 putop(c, OP_POP);
10144 maybeput(c, OP_ENDSUBMSG, h, f, UPB_HANDLER_ENDSUBMSG);
10145 if (wire_type == UPB_WIRE_TYPE_DELIMITED) {
10146 putop(c, OP_SETDELIM);
10147 }
10148 }
10149 }
10150
10151 /* Generates bytecode to parse a single string or lazy submessage field. */
generate_delimfield(compiler * c,const upb_fielddef * f,upb_pbdecodermethod * method)10152 static void generate_delimfield(compiler *c, const upb_fielddef *f,
10153 upb_pbdecodermethod *method) {
10154 const upb_handlers *h = upb_pbdecodermethod_desthandlers(method);
10155
10156 label(c, LABEL_FIELD);
10157 if (upb_fielddef_isseq(f)) {
10158 putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
10159 putchecktag(c, f, UPB_WIRE_TYPE_DELIMITED, LABEL_DISPATCH);
10160 dispatchtarget(c, method, f, UPB_WIRE_TYPE_DELIMITED);
10161 putop(c, OP_PUSHTAGDELIM, 0);
10162 putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ));
10163 label(c, LABEL_LOOPSTART);
10164 putop(c, OP_PUSHLENDELIM);
10165 putop(c, OP_STARTSTR, getsel(f, UPB_HANDLER_STARTSTR));
10166 /* Need to emit even if no handler to skip past the string. */
10167 putop(c, OP_STRING, getsel(f, UPB_HANDLER_STRING));
10168 maybeput(c, OP_ENDSTR, h, f, UPB_HANDLER_ENDSTR);
10169 putop(c, OP_POP);
10170 putop(c, OP_SETDELIM);
10171 putop(c, OP_CHECKDELIM, LABEL_LOOPBREAK);
10172 putchecktag(c, f, UPB_WIRE_TYPE_DELIMITED, LABEL_LOOPBREAK);
10173 putop(c, OP_BRANCH, -LABEL_LOOPSTART);
10174 label(c, LABEL_LOOPBREAK);
10175 putop(c, OP_POP);
10176 maybeput(c, OP_ENDSEQ, h, f, UPB_HANDLER_ENDSEQ);
10177 } else {
10178 putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
10179 putchecktag(c, f, UPB_WIRE_TYPE_DELIMITED, LABEL_DISPATCH);
10180 dispatchtarget(c, method, f, UPB_WIRE_TYPE_DELIMITED);
10181 putop(c, OP_PUSHLENDELIM);
10182 putop(c, OP_STARTSTR, getsel(f, UPB_HANDLER_STARTSTR));
10183 putop(c, OP_STRING, getsel(f, UPB_HANDLER_STRING));
10184 maybeput(c, OP_ENDSTR, h, f, UPB_HANDLER_ENDSTR);
10185 putop(c, OP_POP);
10186 putop(c, OP_SETDELIM);
10187 }
10188 }
10189
10190 /* Generates bytecode to parse a single primitive field. */
generate_primitivefield(compiler * c,const upb_fielddef * f,upb_pbdecodermethod * method)10191 static void generate_primitivefield(compiler *c, const upb_fielddef *f,
10192 upb_pbdecodermethod *method) {
10193 const upb_handlers *h = upb_pbdecodermethod_desthandlers(method);
10194 upb_descriptortype_t descriptor_type = upb_fielddef_descriptortype(f);
10195 opcode parse_type;
10196 upb_selector_t sel;
10197 int wire_type;
10198
10199 label(c, LABEL_FIELD);
10200
10201 /* From a decoding perspective, ENUM is the same as INT32. */
10202 if (descriptor_type == UPB_DESCRIPTOR_TYPE_ENUM)
10203 descriptor_type = UPB_DESCRIPTOR_TYPE_INT32;
10204
10205 parse_type = (opcode)descriptor_type;
10206
10207 /* TODO(haberman): generate packed or non-packed first depending on "packed"
10208 * setting in the fielddef. This will favor (in speed) whichever was
10209 * specified. */
10210
10211 UPB_ASSERT((int)parse_type >= 0 && parse_type <= OP_MAX);
10212 sel = getsel(f, upb_handlers_getprimitivehandlertype(f));
10213 wire_type = upb_pb_native_wire_types[upb_fielddef_descriptortype(f)];
10214 if (upb_fielddef_isseq(f)) {
10215 putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
10216 putchecktag(c, f, UPB_WIRE_TYPE_DELIMITED, LABEL_DISPATCH);
10217 dispatchtarget(c, method, f, UPB_WIRE_TYPE_DELIMITED);
10218 putop(c, OP_PUSHLENDELIM);
10219 putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ)); /* Packed */
10220 label(c, LABEL_LOOPSTART);
10221 putop(c, parse_type, sel);
10222 putop(c, OP_CHECKDELIM, LABEL_LOOPBREAK);
10223 putop(c, OP_BRANCH, -LABEL_LOOPSTART);
10224 dispatchtarget(c, method, f, wire_type);
10225 putop(c, OP_PUSHTAGDELIM, 0);
10226 putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ)); /* Non-packed */
10227 label(c, LABEL_LOOPSTART);
10228 putop(c, parse_type, sel);
10229 putop(c, OP_CHECKDELIM, LABEL_LOOPBREAK);
10230 putchecktag(c, f, wire_type, LABEL_LOOPBREAK);
10231 putop(c, OP_BRANCH, -LABEL_LOOPSTART);
10232 label(c, LABEL_LOOPBREAK);
10233 putop(c, OP_POP); /* Packed and non-packed join. */
10234 maybeput(c, OP_ENDSEQ, h, f, UPB_HANDLER_ENDSEQ);
10235 putop(c, OP_SETDELIM); /* Could remove for non-packed by dup ENDSEQ. */
10236 } else {
10237 putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
10238 putchecktag(c, f, wire_type, LABEL_DISPATCH);
10239 dispatchtarget(c, method, f, wire_type);
10240 putop(c, parse_type, sel);
10241 }
10242 }
10243
10244 /* Adds bytecode for parsing the given message to the given decoderplan,
10245 * while adding all dispatch targets to this message's dispatch table. */
compile_method(compiler * c,upb_pbdecodermethod * method)10246 static void compile_method(compiler *c, upb_pbdecodermethod *method) {
10247 const upb_handlers *h;
10248 const upb_msgdef *md;
10249 uint32_t* start_pc;
10250 upb_msg_field_iter i;
10251 upb_value val;
10252
10253 UPB_ASSERT(method);
10254
10255 /* Clear all entries in the dispatch table. */
10256 upb_inttable_uninit(&method->dispatch);
10257 upb_inttable_init(&method->dispatch, UPB_CTYPE_UINT64);
10258
10259 h = upb_pbdecodermethod_desthandlers(method);
10260 md = upb_handlers_msgdef(h);
10261
10262 method->code_base.ofs = pcofs(c);
10263 putop(c, OP_SETDISPATCH, &method->dispatch);
10264 putsel(c, OP_STARTMSG, UPB_STARTMSG_SELECTOR, h);
10265 label(c, LABEL_FIELD);
10266 start_pc = c->pc;
10267 for(upb_msg_field_begin(&i, md);
10268 !upb_msg_field_done(&i);
10269 upb_msg_field_next(&i)) {
10270 const upb_fielddef *f = upb_msg_iter_field(&i);
10271 upb_fieldtype_t type = upb_fielddef_type(f);
10272
10273 if (type == UPB_TYPE_MESSAGE && !(haslazyhandlers(h, f) && c->lazy)) {
10274 generate_msgfield(c, f, method);
10275 } else if (type == UPB_TYPE_STRING || type == UPB_TYPE_BYTES ||
10276 type == UPB_TYPE_MESSAGE) {
10277 generate_delimfield(c, f, method);
10278 } else {
10279 generate_primitivefield(c, f, method);
10280 }
10281 }
10282
10283 /* If there were no fields, or if no handlers were defined, we need to
10284 * generate a non-empty loop body so that we can at least dispatch for unknown
10285 * fields and check for the end of the message. */
10286 if (c->pc == start_pc) {
10287 /* Check for end-of-message. */
10288 putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
10289 /* Unconditionally dispatch. */
10290 putop(c, OP_DISPATCH, 0);
10291 }
10292
10293 /* For now we just loop back to the last field of the message (or if none,
10294 * the DISPATCH opcode for the message). */
10295 putop(c, OP_BRANCH, -LABEL_FIELD);
10296
10297 /* Insert both a label and a dispatch table entry for this end-of-msg. */
10298 label(c, LABEL_ENDMSG);
10299 val = upb_value_uint64(pcofs(c) - method->code_base.ofs);
10300 upb_inttable_insert(&method->dispatch, DISPATCH_ENDMSG, val);
10301
10302 putsel(c, OP_ENDMSG, UPB_ENDMSG_SELECTOR, h);
10303 putop(c, OP_RET);
10304
10305 upb_inttable_compact(&method->dispatch);
10306 }
10307
10308 /* Populate "methods" with new upb_pbdecodermethod objects reachable from "h".
10309 * Returns the method for these handlers.
10310 *
10311 * Generates a new method for every destination handlers reachable from "h". */
find_methods(compiler * c,const upb_handlers * h)10312 static void find_methods(compiler *c, const upb_handlers *h) {
10313 upb_value v;
10314 upb_msg_field_iter i;
10315 const upb_msgdef *md;
10316
10317 if (upb_inttable_lookupptr(&c->group->methods, h, &v))
10318 return;
10319 newmethod(h, c->group);
10320
10321 /* Find submethods. */
10322 md = upb_handlers_msgdef(h);
10323 for(upb_msg_field_begin(&i, md);
10324 !upb_msg_field_done(&i);
10325 upb_msg_field_next(&i)) {
10326 const upb_fielddef *f = upb_msg_iter_field(&i);
10327 const upb_handlers *sub_h;
10328 if (upb_fielddef_type(f) == UPB_TYPE_MESSAGE &&
10329 (sub_h = upb_handlers_getsubhandlers(h, f)) != NULL) {
10330 /* We only generate a decoder method for submessages with handlers.
10331 * Others will be parsed as unknown fields. */
10332 find_methods(c, sub_h);
10333 }
10334 }
10335 }
10336
10337 /* (Re-)compile bytecode for all messages in "msgs."
10338 * Overwrites any existing bytecode in "c". */
compile_methods(compiler * c)10339 static void compile_methods(compiler *c) {
10340 upb_inttable_iter i;
10341
10342 /* Start over at the beginning of the bytecode. */
10343 c->pc = c->group->bytecode;
10344
10345 upb_inttable_begin(&i, &c->group->methods);
10346 for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
10347 upb_pbdecodermethod *method = upb_value_getptr(upb_inttable_iter_value(&i));
10348 compile_method(c, method);
10349 }
10350 }
10351
set_bytecode_handlers(mgroup * g)10352 static void set_bytecode_handlers(mgroup *g) {
10353 upb_inttable_iter i;
10354 upb_inttable_begin(&i, &g->methods);
10355 for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
10356 upb_pbdecodermethod *m = upb_value_getptr(upb_inttable_iter_value(&i));
10357 upb_byteshandler *h = &m->input_handler_;
10358
10359 m->code_base.ptr = g->bytecode + m->code_base.ofs;
10360
10361 upb_byteshandler_setstartstr(h, upb_pbdecoder_startbc, m->code_base.ptr);
10362 upb_byteshandler_setstring(h, upb_pbdecoder_decode, g);
10363 upb_byteshandler_setendstr(h, upb_pbdecoder_end, m);
10364 }
10365 }
10366
10367
10368 /* JIT setup. *****************************************************************/
10369
10370 #ifdef UPB_USE_JIT_X64
10371
sethandlers(mgroup * g,bool allowjit)10372 static void sethandlers(mgroup *g, bool allowjit) {
10373 g->jit_code = NULL;
10374 if (allowjit) {
10375 /* Compile byte-code into machine code, create handlers. */
10376 upb_pbdecoder_jit(g);
10377 } else {
10378 set_bytecode_handlers(g);
10379 }
10380 }
10381
10382 #else /* UPB_USE_JIT_X64 */
10383
sethandlers(mgroup * g,bool allowjit)10384 static void sethandlers(mgroup *g, bool allowjit) {
10385 /* No JIT compiled in; use bytecode handlers unconditionally. */
10386 UPB_UNUSED(allowjit);
10387 set_bytecode_handlers(g);
10388 }
10389
10390 #endif /* UPB_USE_JIT_X64 */
10391
10392
10393 /* TODO(haberman): allow this to be constructed for an arbitrary set of dest
10394 * handlers and other mgroups (but verify we have a transitive closure). */
mgroup_new(const upb_handlers * dest,bool allowjit,bool lazy,const void * owner)10395 const mgroup *mgroup_new(const upb_handlers *dest, bool allowjit, bool lazy,
10396 const void *owner) {
10397 mgroup *g;
10398 compiler *c;
10399
10400 UPB_UNUSED(allowjit);
10401 UPB_ASSERT(upb_handlers_isfrozen(dest));
10402
10403 g = newgroup(owner);
10404 c = newcompiler(g, lazy);
10405 find_methods(c, dest);
10406
10407 /* We compile in two passes:
10408 * 1. all messages are assigned relative offsets from the beginning of the
10409 * bytecode (saved in method->code_base).
10410 * 2. forwards OP_CALL instructions can be correctly linked since message
10411 * offsets have been previously assigned.
10412 *
10413 * Could avoid the second pass by linking OP_CALL instructions somehow. */
10414 compile_methods(c);
10415 compile_methods(c);
10416 g->bytecode_end = c->pc;
10417 freecompiler(c);
10418
10419 #ifdef UPB_DUMP_BYTECODE
10420 {
10421 FILE *f = fopen("/tmp/upb-bytecode", "w");
10422 UPB_ASSERT(f);
10423 dumpbc(g->bytecode, g->bytecode_end, stderr);
10424 dumpbc(g->bytecode, g->bytecode_end, f);
10425 fclose(f);
10426
10427 f = fopen("/tmp/upb-bytecode.bin", "wb");
10428 UPB_ASSERT(f);
10429 fwrite(g->bytecode, 1, g->bytecode_end - g->bytecode, f);
10430 fclose(f);
10431 }
10432 #endif
10433
10434 sethandlers(g, allowjit);
10435 return g;
10436 }
10437
10438
10439 /* upb_pbcodecache ************************************************************/
10440
upb_pbcodecache_init(upb_pbcodecache * c)10441 void upb_pbcodecache_init(upb_pbcodecache *c) {
10442 upb_inttable_init(&c->groups, UPB_CTYPE_CONSTPTR);
10443 c->allow_jit_ = true;
10444 }
10445
upb_pbcodecache_uninit(upb_pbcodecache * c)10446 void upb_pbcodecache_uninit(upb_pbcodecache *c) {
10447 upb_inttable_iter i;
10448 upb_inttable_begin(&i, &c->groups);
10449 for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
10450 const mgroup *group = upb_value_getconstptr(upb_inttable_iter_value(&i));
10451 mgroup_unref(group, c);
10452 }
10453 upb_inttable_uninit(&c->groups);
10454 }
10455
upb_pbcodecache_allowjit(const upb_pbcodecache * c)10456 bool upb_pbcodecache_allowjit(const upb_pbcodecache *c) {
10457 return c->allow_jit_;
10458 }
10459
upb_pbcodecache_setallowjit(upb_pbcodecache * c,bool allow)10460 bool upb_pbcodecache_setallowjit(upb_pbcodecache *c, bool allow) {
10461 if (upb_inttable_count(&c->groups) > 0)
10462 return false;
10463 c->allow_jit_ = allow;
10464 return true;
10465 }
10466
upb_pbcodecache_getdecodermethod(upb_pbcodecache * c,const upb_pbdecodermethodopts * opts)10467 const upb_pbdecodermethod *upb_pbcodecache_getdecodermethod(
10468 upb_pbcodecache *c, const upb_pbdecodermethodopts *opts) {
10469 upb_value v;
10470 bool ok;
10471
10472 /* Right now we build a new DecoderMethod every time.
10473 * TODO(haberman): properly cache methods by their true key. */
10474 const mgroup *g = mgroup_new(opts->handlers, c->allow_jit_, opts->lazy, c);
10475 upb_inttable_push(&c->groups, upb_value_constptr(g));
10476
10477 ok = upb_inttable_lookupptr(&g->methods, opts->handlers, &v);
10478 UPB_ASSERT(ok);
10479 return upb_value_getptr(v);
10480 }
10481
10482
10483 /* upb_pbdecodermethodopts ****************************************************/
10484
upb_pbdecodermethodopts_init(upb_pbdecodermethodopts * opts,const upb_handlers * h)10485 void upb_pbdecodermethodopts_init(upb_pbdecodermethodopts *opts,
10486 const upb_handlers *h) {
10487 opts->handlers = h;
10488 opts->lazy = false;
10489 }
10490
upb_pbdecodermethodopts_setlazy(upb_pbdecodermethodopts * opts,bool lazy)10491 void upb_pbdecodermethodopts_setlazy(upb_pbdecodermethodopts *opts, bool lazy) {
10492 opts->lazy = lazy;
10493 }
10494 /*
10495 ** upb::Decoder (Bytecode Decoder VM)
10496 **
10497 ** Bytecode must previously have been generated using the bytecode compiler in
10498 ** compile_decoder.c. This decoder then walks through the bytecode op-by-op to
10499 ** parse the input.
10500 **
10501 ** Decoding is fully resumable; we just keep a pointer to the current bytecode
10502 ** instruction and resume from there. A fair amount of the logic here is to
10503 ** handle the fact that values can span buffer seams and we have to be able to
10504 ** be capable of suspending/resuming from any byte in the stream. This
10505 ** sometimes requires keeping a few trailing bytes from the last buffer around
10506 ** in the "residual" buffer.
10507 */
10508
10509 #include <inttypes.h>
10510 #include <stddef.h>
10511
10512 #ifdef UPB_DUMP_BYTECODE
10513 #include <stdio.h>
10514 #endif
10515
10516 #define CHECK_SUSPEND(x) if (!(x)) return upb_pbdecoder_suspend(d);
10517
10518 /* Error messages that are shared between the bytecode and JIT decoders. */
10519 const char *kPbDecoderStackOverflow = "Nesting too deep.";
10520 const char *kPbDecoderSubmessageTooLong =
10521 "Submessage end extends past enclosing submessage.";
10522
10523 /* Error messages shared within this file. */
10524 static const char *kUnterminatedVarint = "Unterminated varint.";
10525
10526 /* upb_pbdecoder **************************************************************/
10527
10528 static opcode halt = OP_HALT;
10529
10530 /* A dummy character we can point to when the user passes us a NULL buffer.
10531 * We need this because in C (NULL + 0) and (NULL - NULL) are undefined
10532 * behavior, which would invalidate functions like curbufleft(). */
10533 static const char dummy_char;
10534
10535 /* Whether an op consumes any of the input buffer. */
consumes_input(opcode op)10536 static bool consumes_input(opcode op) {
10537 switch (op) {
10538 case OP_SETDISPATCH:
10539 case OP_STARTMSG:
10540 case OP_ENDMSG:
10541 case OP_STARTSEQ:
10542 case OP_ENDSEQ:
10543 case OP_STARTSUBMSG:
10544 case OP_ENDSUBMSG:
10545 case OP_STARTSTR:
10546 case OP_ENDSTR:
10547 case OP_PUSHTAGDELIM:
10548 case OP_POP:
10549 case OP_SETDELIM:
10550 case OP_SETBIGGROUPNUM:
10551 case OP_CHECKDELIM:
10552 case OP_CALL:
10553 case OP_RET:
10554 case OP_BRANCH:
10555 return false;
10556 default:
10557 return true;
10558 }
10559 }
10560
stacksize(upb_pbdecoder * d,size_t entries)10561 static size_t stacksize(upb_pbdecoder *d, size_t entries) {
10562 UPB_UNUSED(d);
10563 return entries * sizeof(upb_pbdecoder_frame);
10564 }
10565
callstacksize(upb_pbdecoder * d,size_t entries)10566 static size_t callstacksize(upb_pbdecoder *d, size_t entries) {
10567 UPB_UNUSED(d);
10568
10569 #ifdef UPB_USE_JIT_X64
10570 if (d->method_->is_native_) {
10571 /* Each native stack frame needs two pointers, plus we need a few frames for
10572 * the enter/exit trampolines. */
10573 size_t ret = entries * sizeof(void*) * 2;
10574 ret += sizeof(void*) * 10;
10575 return ret;
10576 }
10577 #endif
10578
10579 return entries * sizeof(uint32_t*);
10580 }
10581
10582
10583 static bool in_residual_buf(const upb_pbdecoder *d, const char *p);
10584
10585 /* It's unfortunate that we have to micro-manage the compiler with
10586 * UPB_FORCEINLINE and UPB_NOINLINE, especially since this tuning is necessarily
10587 * specific to one hardware configuration. But empirically on a Core i7,
10588 * performance increases 30-50% with these annotations. Every instance where
10589 * these appear, gcc 4.2.1 made the wrong decision and degraded performance in
10590 * benchmarks. */
10591
seterr(upb_pbdecoder * d,const char * msg)10592 static void seterr(upb_pbdecoder *d, const char *msg) {
10593 upb_status status = UPB_STATUS_INIT;
10594 upb_status_seterrmsg(&status, msg);
10595 upb_env_reporterror(d->env, &status);
10596 }
10597
upb_pbdecoder_seterr(upb_pbdecoder * d,const char * msg)10598 void upb_pbdecoder_seterr(upb_pbdecoder *d, const char *msg) {
10599 seterr(d, msg);
10600 }
10601
10602
10603 /* Buffering ******************************************************************/
10604
10605 /* We operate on one buffer at a time, which is either the user's buffer passed
10606 * to our "decode" callback or some residual bytes from the previous buffer. */
10607
10608 /* How many bytes can be safely read from d->ptr without reading past end-of-buf
10609 * or past the current delimited end. */
curbufleft(const upb_pbdecoder * d)10610 static size_t curbufleft(const upb_pbdecoder *d) {
10611 UPB_ASSERT(d->data_end >= d->ptr);
10612 return d->data_end - d->ptr;
10613 }
10614
10615 /* How many bytes are available before end-of-buffer. */
bufleft(const upb_pbdecoder * d)10616 static size_t bufleft(const upb_pbdecoder *d) {
10617 return d->end - d->ptr;
10618 }
10619
10620 /* Overall stream offset of d->ptr. */
offset(const upb_pbdecoder * d)10621 uint64_t offset(const upb_pbdecoder *d) {
10622 return d->bufstart_ofs + (d->ptr - d->buf);
10623 }
10624
10625 /* How many bytes are available before the end of this delimited region. */
delim_remaining(const upb_pbdecoder * d)10626 size_t delim_remaining(const upb_pbdecoder *d) {
10627 return d->top->end_ofs - offset(d);
10628 }
10629
10630 /* Advances d->ptr. */
advance(upb_pbdecoder * d,size_t len)10631 static void advance(upb_pbdecoder *d, size_t len) {
10632 UPB_ASSERT(curbufleft(d) >= len);
10633 d->ptr += len;
10634 }
10635
in_buf(const char * p,const char * buf,const char * end)10636 static bool in_buf(const char *p, const char *buf, const char *end) {
10637 return p >= buf && p <= end;
10638 }
10639
in_residual_buf(const upb_pbdecoder * d,const char * p)10640 static bool in_residual_buf(const upb_pbdecoder *d, const char *p) {
10641 return in_buf(p, d->residual, d->residual_end);
10642 }
10643
10644 /* Calculates the delim_end value, which is affected by both the current buffer
10645 * and the parsing stack, so must be called whenever either is updated. */
set_delim_end(upb_pbdecoder * d)10646 static void set_delim_end(upb_pbdecoder *d) {
10647 size_t delim_ofs = d->top->end_ofs - d->bufstart_ofs;
10648 if (delim_ofs <= (size_t)(d->end - d->buf)) {
10649 d->delim_end = d->buf + delim_ofs;
10650 d->data_end = d->delim_end;
10651 } else {
10652 d->data_end = d->end;
10653 d->delim_end = NULL;
10654 }
10655 }
10656
switchtobuf(upb_pbdecoder * d,const char * buf,const char * end)10657 static void switchtobuf(upb_pbdecoder *d, const char *buf, const char *end) {
10658 d->ptr = buf;
10659 d->buf = buf;
10660 d->end = end;
10661 set_delim_end(d);
10662 }
10663
advancetobuf(upb_pbdecoder * d,const char * buf,size_t len)10664 static void advancetobuf(upb_pbdecoder *d, const char *buf, size_t len) {
10665 UPB_ASSERT(curbufleft(d) == 0);
10666 d->bufstart_ofs += (d->end - d->buf);
10667 switchtobuf(d, buf, buf + len);
10668 }
10669
checkpoint(upb_pbdecoder * d)10670 static void checkpoint(upb_pbdecoder *d) {
10671 /* The assertion here is in the interests of efficiency, not correctness.
10672 * We are trying to ensure that we don't checkpoint() more often than
10673 * necessary. */
10674 UPB_ASSERT(d->checkpoint != d->ptr);
10675 d->checkpoint = d->ptr;
10676 }
10677
10678 /* Skips "bytes" bytes in the stream, which may be more than available. If we
10679 * skip more bytes than are available, we return a long read count to the caller
10680 * indicating how many bytes can be skipped over before passing actual data
10681 * again. Skipped bytes can pass a NULL buffer and the decoder guarantees they
10682 * won't actually be read.
10683 */
skip(upb_pbdecoder * d,size_t bytes)10684 static int32_t skip(upb_pbdecoder *d, size_t bytes) {
10685 UPB_ASSERT(!in_residual_buf(d, d->ptr) || d->size_param == 0);
10686 UPB_ASSERT(d->skip == 0);
10687 if (bytes > delim_remaining(d)) {
10688 seterr(d, "Skipped value extended beyond enclosing submessage.");
10689 return upb_pbdecoder_suspend(d);
10690 } else if (bufleft(d) >= bytes) {
10691 /* Skipped data is all in current buffer, and more is still available. */
10692 advance(d, bytes);
10693 d->skip = 0;
10694 return DECODE_OK;
10695 } else {
10696 /* Skipped data extends beyond currently available buffers. */
10697 d->pc = d->last;
10698 d->skip = bytes - curbufleft(d);
10699 d->bufstart_ofs += (d->end - d->buf);
10700 d->residual_end = d->residual;
10701 switchtobuf(d, d->residual, d->residual_end);
10702 return d->size_param + d->skip;
10703 }
10704 }
10705
10706
10707 /* Resumes the decoder from an initial state or from a previous suspend. */
upb_pbdecoder_resume(upb_pbdecoder * d,void * p,const char * buf,size_t size,const upb_bufhandle * handle)10708 int32_t upb_pbdecoder_resume(upb_pbdecoder *d, void *p, const char *buf,
10709 size_t size, const upb_bufhandle *handle) {
10710 UPB_UNUSED(p); /* Useless; just for the benefit of the JIT. */
10711
10712 /* d->skip and d->residual_end could probably elegantly be represented
10713 * as a single variable, to more easily represent this invariant. */
10714 UPB_ASSERT(!(d->skip && d->residual_end > d->residual));
10715
10716 /* We need to remember the original size_param, so that the value we return
10717 * is relative to it, even if we do some skipping first. */
10718 d->size_param = size;
10719 d->handle = handle;
10720
10721 /* Have to handle this case specially (ie. not with skip()) because the user
10722 * is allowed to pass a NULL buffer here, which won't allow us to safely
10723 * calculate a d->end or use our normal functions like curbufleft(). */
10724 if (d->skip && d->skip >= size) {
10725 d->skip -= size;
10726 d->bufstart_ofs += size;
10727 buf = &dummy_char;
10728 size = 0;
10729
10730 /* We can't just return now, because we might need to execute some ops
10731 * like CHECKDELIM, which could call some callbacks and pop the stack. */
10732 }
10733
10734 /* We need to pretend that this was the actual buffer param, since some of the
10735 * calculations assume that d->ptr/d->buf is relative to this. */
10736 d->buf_param = buf;
10737
10738 if (!buf) {
10739 /* NULL buf is ok if its entire span is covered by the "skip" above, but
10740 * by this point we know that "skip" doesn't cover the buffer. */
10741 seterr(d, "Passed NULL buffer over non-skippable region.");
10742 return upb_pbdecoder_suspend(d);
10743 }
10744
10745 if (d->residual_end > d->residual) {
10746 /* We have residual bytes from the last buffer. */
10747 UPB_ASSERT(d->ptr == d->residual);
10748 } else {
10749 switchtobuf(d, buf, buf + size);
10750 }
10751
10752 d->checkpoint = d->ptr;
10753
10754 /* Handle skips that don't cover the whole buffer (as above). */
10755 if (d->skip) {
10756 size_t skip_bytes = d->skip;
10757 d->skip = 0;
10758 CHECK_RETURN(skip(d, skip_bytes));
10759 checkpoint(d);
10760 }
10761
10762 /* If we're inside an unknown group, continue to parse unknown values. */
10763 if (d->top->groupnum < 0) {
10764 CHECK_RETURN(upb_pbdecoder_skipunknown(d, -1, 0));
10765 checkpoint(d);
10766 }
10767
10768 return DECODE_OK;
10769 }
10770
10771 /* Suspends the decoder at the last checkpoint, without saving any residual
10772 * bytes. If there are any unconsumed bytes, returns a short byte count. */
upb_pbdecoder_suspend(upb_pbdecoder * d)10773 size_t upb_pbdecoder_suspend(upb_pbdecoder *d) {
10774 d->pc = d->last;
10775 if (d->checkpoint == d->residual) {
10776 /* Checkpoint was in residual buf; no user bytes were consumed. */
10777 d->ptr = d->residual;
10778 return 0;
10779 } else {
10780 size_t ret = d->size_param - (d->end - d->checkpoint);
10781 UPB_ASSERT(!in_residual_buf(d, d->checkpoint));
10782 UPB_ASSERT(d->buf == d->buf_param || d->buf == &dummy_char);
10783
10784 d->bufstart_ofs += (d->checkpoint - d->buf);
10785 d->residual_end = d->residual;
10786 switchtobuf(d, d->residual, d->residual_end);
10787 return ret;
10788 }
10789 }
10790
10791 /* Suspends the decoder at the last checkpoint, and saves any unconsumed
10792 * bytes in our residual buffer. This is necessary if we need more user
10793 * bytes to form a complete value, which might not be contiguous in the
10794 * user's buffers. Always consumes all user bytes. */
suspend_save(upb_pbdecoder * d)10795 static size_t suspend_save(upb_pbdecoder *d) {
10796 /* We hit end-of-buffer before we could parse a full value.
10797 * Save any unconsumed bytes (if any) to the residual buffer. */
10798 d->pc = d->last;
10799
10800 if (d->checkpoint == d->residual) {
10801 /* Checkpoint was in residual buf; append user byte(s) to residual buf. */
10802 UPB_ASSERT((d->residual_end - d->residual) + d->size_param <=
10803 sizeof(d->residual));
10804 if (!in_residual_buf(d, d->ptr)) {
10805 d->bufstart_ofs -= (d->residual_end - d->residual);
10806 }
10807 memcpy(d->residual_end, d->buf_param, d->size_param);
10808 d->residual_end += d->size_param;
10809 } else {
10810 /* Checkpoint was in user buf; old residual bytes not needed. */
10811 size_t save;
10812 UPB_ASSERT(!in_residual_buf(d, d->checkpoint));
10813
10814 d->ptr = d->checkpoint;
10815 save = curbufleft(d);
10816 UPB_ASSERT(save <= sizeof(d->residual));
10817 memcpy(d->residual, d->ptr, save);
10818 d->residual_end = d->residual + save;
10819 d->bufstart_ofs = offset(d);
10820 }
10821
10822 switchtobuf(d, d->residual, d->residual_end);
10823 return d->size_param;
10824 }
10825
10826 /* Copies the next "bytes" bytes into "buf" and advances the stream.
10827 * Requires that this many bytes are available in the current buffer. */
consumebytes(upb_pbdecoder * d,void * buf,size_t bytes)10828 UPB_FORCEINLINE static void consumebytes(upb_pbdecoder *d, void *buf,
10829 size_t bytes) {
10830 UPB_ASSERT(bytes <= curbufleft(d));
10831 memcpy(buf, d->ptr, bytes);
10832 advance(d, bytes);
10833 }
10834
10835 /* Slow path for getting the next "bytes" bytes, regardless of whether they are
10836 * available in the current buffer or not. Returns a status code as described
10837 * in decoder.int.h. */
getbytes_slow(upb_pbdecoder * d,void * buf,size_t bytes)10838 UPB_NOINLINE static int32_t getbytes_slow(upb_pbdecoder *d, void *buf,
10839 size_t bytes) {
10840 const size_t avail = curbufleft(d);
10841 consumebytes(d, buf, avail);
10842 bytes -= avail;
10843 UPB_ASSERT(bytes > 0);
10844 if (in_residual_buf(d, d->ptr)) {
10845 advancetobuf(d, d->buf_param, d->size_param);
10846 }
10847 if (curbufleft(d) >= bytes) {
10848 consumebytes(d, (char *)buf + avail, bytes);
10849 return DECODE_OK;
10850 } else if (d->data_end == d->delim_end) {
10851 seterr(d, "Submessage ended in the middle of a value or group");
10852 return upb_pbdecoder_suspend(d);
10853 } else {
10854 return suspend_save(d);
10855 }
10856 }
10857
10858 /* Gets the next "bytes" bytes, regardless of whether they are available in the
10859 * current buffer or not. Returns a status code as described in decoder.int.h.
10860 */
getbytes(upb_pbdecoder * d,void * buf,size_t bytes)10861 UPB_FORCEINLINE static int32_t getbytes(upb_pbdecoder *d, void *buf,
10862 size_t bytes) {
10863 if (curbufleft(d) >= bytes) {
10864 /* Buffer has enough data to satisfy. */
10865 consumebytes(d, buf, bytes);
10866 return DECODE_OK;
10867 } else {
10868 return getbytes_slow(d, buf, bytes);
10869 }
10870 }
10871
peekbytes_slow(upb_pbdecoder * d,void * buf,size_t bytes)10872 UPB_NOINLINE static size_t peekbytes_slow(upb_pbdecoder *d, void *buf,
10873 size_t bytes) {
10874 size_t ret = curbufleft(d);
10875 memcpy(buf, d->ptr, ret);
10876 if (in_residual_buf(d, d->ptr)) {
10877 size_t copy = UPB_MIN(bytes - ret, d->size_param);
10878 memcpy((char *)buf + ret, d->buf_param, copy);
10879 ret += copy;
10880 }
10881 return ret;
10882 }
10883
peekbytes(upb_pbdecoder * d,void * buf,size_t bytes)10884 UPB_FORCEINLINE static size_t peekbytes(upb_pbdecoder *d, void *buf,
10885 size_t bytes) {
10886 if (curbufleft(d) >= bytes) {
10887 memcpy(buf, d->ptr, bytes);
10888 return bytes;
10889 } else {
10890 return peekbytes_slow(d, buf, bytes);
10891 }
10892 }
10893
10894
10895 /* Decoding of wire types *****************************************************/
10896
10897 /* Slow path for decoding a varint from the current buffer position.
10898 * Returns a status code as described in decoder.int.h. */
upb_pbdecoder_decode_varint_slow(upb_pbdecoder * d,uint64_t * u64)10899 UPB_NOINLINE int32_t upb_pbdecoder_decode_varint_slow(upb_pbdecoder *d,
10900 uint64_t *u64) {
10901 uint8_t byte = 0x80;
10902 int bitpos;
10903 *u64 = 0;
10904 for(bitpos = 0; bitpos < 70 && (byte & 0x80); bitpos += 7) {
10905 CHECK_RETURN(getbytes(d, &byte, 1));
10906 *u64 |= (uint64_t)(byte & 0x7F) << bitpos;
10907 }
10908 if(bitpos == 70 && (byte & 0x80)) {
10909 seterr(d, kUnterminatedVarint);
10910 return upb_pbdecoder_suspend(d);
10911 }
10912 return DECODE_OK;
10913 }
10914
10915 /* Decodes a varint from the current buffer position.
10916 * Returns a status code as described in decoder.int.h. */
decode_varint(upb_pbdecoder * d,uint64_t * u64)10917 UPB_FORCEINLINE static int32_t decode_varint(upb_pbdecoder *d, uint64_t *u64) {
10918 if (curbufleft(d) > 0 && !(*d->ptr & 0x80)) {
10919 *u64 = *d->ptr;
10920 advance(d, 1);
10921 return DECODE_OK;
10922 } else if (curbufleft(d) >= 10) {
10923 /* Fast case. */
10924 upb_decoderet r = upb_vdecode_fast(d->ptr);
10925 if (r.p == NULL) {
10926 seterr(d, kUnterminatedVarint);
10927 return upb_pbdecoder_suspend(d);
10928 }
10929 advance(d, r.p - d->ptr);
10930 *u64 = r.val;
10931 return DECODE_OK;
10932 } else {
10933 /* Slow case -- varint spans buffer seam. */
10934 return upb_pbdecoder_decode_varint_slow(d, u64);
10935 }
10936 }
10937
10938 /* Decodes a 32-bit varint from the current buffer position.
10939 * Returns a status code as described in decoder.int.h. */
decode_v32(upb_pbdecoder * d,uint32_t * u32)10940 UPB_FORCEINLINE static int32_t decode_v32(upb_pbdecoder *d, uint32_t *u32) {
10941 uint64_t u64;
10942 int32_t ret = decode_varint(d, &u64);
10943 if (ret >= 0) return ret;
10944 if (u64 > UINT32_MAX) {
10945 seterr(d, "Unterminated 32-bit varint");
10946 /* TODO(haberman) guarantee that this function return is >= 0 somehow,
10947 * so we know this path will always be treated as error by our caller.
10948 * Right now the size_t -> int32_t can overflow and produce negative values.
10949 */
10950 *u32 = 0;
10951 return upb_pbdecoder_suspend(d);
10952 }
10953 *u32 = u64;
10954 return DECODE_OK;
10955 }
10956
10957 /* Decodes a fixed32 from the current buffer position.
10958 * Returns a status code as described in decoder.int.h.
10959 * TODO: proper byte swapping for big-endian machines. */
decode_fixed32(upb_pbdecoder * d,uint32_t * u32)10960 UPB_FORCEINLINE static int32_t decode_fixed32(upb_pbdecoder *d, uint32_t *u32) {
10961 return getbytes(d, u32, 4);
10962 }
10963
10964 /* Decodes a fixed64 from the current buffer position.
10965 * Returns a status code as described in decoder.int.h.
10966 * TODO: proper byte swapping for big-endian machines. */
decode_fixed64(upb_pbdecoder * d,uint64_t * u64)10967 UPB_FORCEINLINE static int32_t decode_fixed64(upb_pbdecoder *d, uint64_t *u64) {
10968 return getbytes(d, u64, 8);
10969 }
10970
10971 /* Non-static versions of the above functions.
10972 * These are called by the JIT for fallback paths. */
upb_pbdecoder_decode_f32(upb_pbdecoder * d,uint32_t * u32)10973 int32_t upb_pbdecoder_decode_f32(upb_pbdecoder *d, uint32_t *u32) {
10974 return decode_fixed32(d, u32);
10975 }
10976
upb_pbdecoder_decode_f64(upb_pbdecoder * d,uint64_t * u64)10977 int32_t upb_pbdecoder_decode_f64(upb_pbdecoder *d, uint64_t *u64) {
10978 return decode_fixed64(d, u64);
10979 }
10980
as_double(uint64_t n)10981 static double as_double(uint64_t n) { double d; memcpy(&d, &n, 8); return d; }
as_float(uint32_t n)10982 static float as_float(uint32_t n) { float f; memcpy(&f, &n, 4); return f; }
10983
10984 /* Pushes a frame onto the decoder stack. */
decoder_push(upb_pbdecoder * d,uint64_t end)10985 static bool decoder_push(upb_pbdecoder *d, uint64_t end) {
10986 upb_pbdecoder_frame *fr = d->top;
10987
10988 if (end > fr->end_ofs) {
10989 seterr(d, kPbDecoderSubmessageTooLong);
10990 return false;
10991 } else if (fr == d->limit) {
10992 seterr(d, kPbDecoderStackOverflow);
10993 return false;
10994 }
10995
10996 fr++;
10997 fr->end_ofs = end;
10998 fr->dispatch = NULL;
10999 fr->groupnum = 0;
11000 d->top = fr;
11001 return true;
11002 }
11003
pushtagdelim(upb_pbdecoder * d,uint32_t arg)11004 static bool pushtagdelim(upb_pbdecoder *d, uint32_t arg) {
11005 /* While we expect to see an "end" tag (either ENDGROUP or a non-sequence
11006 * field number) prior to hitting any enclosing submessage end, pushing our
11007 * existing delim end prevents us from continuing to parse values from a
11008 * corrupt proto that doesn't give us an END tag in time. */
11009 if (!decoder_push(d, d->top->end_ofs))
11010 return false;
11011 d->top->groupnum = arg;
11012 return true;
11013 }
11014
11015 /* Pops a frame from the decoder stack. */
decoder_pop(upb_pbdecoder * d)11016 static void decoder_pop(upb_pbdecoder *d) { d->top--; }
11017
upb_pbdecoder_checktag_slow(upb_pbdecoder * d,uint64_t expected)11018 UPB_NOINLINE int32_t upb_pbdecoder_checktag_slow(upb_pbdecoder *d,
11019 uint64_t expected) {
11020 uint64_t data = 0;
11021 size_t bytes = upb_value_size(expected);
11022 size_t read = peekbytes(d, &data, bytes);
11023 if (read == bytes && data == expected) {
11024 /* Advance past matched bytes. */
11025 int32_t ok = getbytes(d, &data, read);
11026 UPB_ASSERT(ok < 0);
11027 return DECODE_OK;
11028 } else if (read < bytes && memcmp(&data, &expected, read) == 0) {
11029 return suspend_save(d);
11030 } else {
11031 return DECODE_MISMATCH;
11032 }
11033 }
11034
upb_pbdecoder_skipunknown(upb_pbdecoder * d,int32_t fieldnum,uint8_t wire_type)11035 int32_t upb_pbdecoder_skipunknown(upb_pbdecoder *d, int32_t fieldnum,
11036 uint8_t wire_type) {
11037 if (fieldnum >= 0)
11038 goto have_tag;
11039
11040 while (true) {
11041 uint32_t tag;
11042 CHECK_RETURN(decode_v32(d, &tag));
11043 wire_type = tag & 0x7;
11044 fieldnum = tag >> 3;
11045
11046 have_tag:
11047 if (fieldnum == 0) {
11048 seterr(d, "Saw invalid field number (0)");
11049 return upb_pbdecoder_suspend(d);
11050 }
11051
11052 switch (wire_type) {
11053 case UPB_WIRE_TYPE_32BIT:
11054 CHECK_RETURN(skip(d, 4));
11055 break;
11056 case UPB_WIRE_TYPE_64BIT:
11057 CHECK_RETURN(skip(d, 8));
11058 break;
11059 case UPB_WIRE_TYPE_VARINT: {
11060 uint64_t u64;
11061 CHECK_RETURN(decode_varint(d, &u64));
11062 break;
11063 }
11064 case UPB_WIRE_TYPE_DELIMITED: {
11065 uint32_t len;
11066 CHECK_RETURN(decode_v32(d, &len));
11067 CHECK_RETURN(skip(d, len));
11068 break;
11069 }
11070 case UPB_WIRE_TYPE_START_GROUP:
11071 CHECK_SUSPEND(pushtagdelim(d, -fieldnum));
11072 break;
11073 case UPB_WIRE_TYPE_END_GROUP:
11074 if (fieldnum == -d->top->groupnum) {
11075 decoder_pop(d);
11076 } else if (fieldnum == d->top->groupnum) {
11077 return DECODE_ENDGROUP;
11078 } else {
11079 seterr(d, "Unmatched ENDGROUP tag.");
11080 return upb_pbdecoder_suspend(d);
11081 }
11082 break;
11083 default:
11084 seterr(d, "Invalid wire type");
11085 return upb_pbdecoder_suspend(d);
11086 }
11087
11088 if (d->top->groupnum >= 0) {
11089 /* TODO: More code needed for handling unknown groups. */
11090 upb_sink_putunknown(&d->top->sink, d->checkpoint, d->ptr - d->checkpoint);
11091 return DECODE_OK;
11092 }
11093
11094 /* Unknown group -- continue looping over unknown fields. */
11095 checkpoint(d);
11096 }
11097 }
11098
goto_endmsg(upb_pbdecoder * d)11099 static void goto_endmsg(upb_pbdecoder *d) {
11100 upb_value v;
11101 bool found = upb_inttable_lookup32(d->top->dispatch, DISPATCH_ENDMSG, &v);
11102 UPB_ASSERT(found);
11103 d->pc = d->top->base + upb_value_getuint64(v);
11104 }
11105
11106 /* Parses a tag and jumps to the corresponding bytecode instruction for this
11107 * field.
11108 *
11109 * If the tag is unknown (or the wire type doesn't match), parses the field as
11110 * unknown. If the tag is a valid ENDGROUP tag, jumps to the bytecode
11111 * instruction for the end of message. */
dispatch(upb_pbdecoder * d)11112 static int32_t dispatch(upb_pbdecoder *d) {
11113 upb_inttable *dispatch = d->top->dispatch;
11114 uint32_t tag;
11115 uint8_t wire_type;
11116 uint32_t fieldnum;
11117 upb_value val;
11118 int32_t retval;
11119
11120 /* Decode tag. */
11121 CHECK_RETURN(decode_v32(d, &tag));
11122 wire_type = tag & 0x7;
11123 fieldnum = tag >> 3;
11124
11125 /* Lookup tag. Because of packed/non-packed compatibility, we have to
11126 * check the wire type against two possibilities. */
11127 if (fieldnum != DISPATCH_ENDMSG &&
11128 upb_inttable_lookup32(dispatch, fieldnum, &val)) {
11129 uint64_t v = upb_value_getuint64(val);
11130 if (wire_type == (v & 0xff)) {
11131 d->pc = d->top->base + (v >> 16);
11132 return DECODE_OK;
11133 } else if (wire_type == ((v >> 8) & 0xff)) {
11134 bool found =
11135 upb_inttable_lookup(dispatch, fieldnum + UPB_MAX_FIELDNUMBER, &val);
11136 UPB_ASSERT(found);
11137 d->pc = d->top->base + upb_value_getuint64(val);
11138 return DECODE_OK;
11139 }
11140 }
11141
11142 /* We have some unknown fields (or ENDGROUP) to parse. The DISPATCH or TAG
11143 * bytecode that triggered this is preceded by a CHECKDELIM bytecode which
11144 * we need to back up to, so that when we're done skipping unknown data we
11145 * can re-check the delimited end. */
11146 d->last--; /* Necessary if we get suspended */
11147 d->pc = d->last;
11148 UPB_ASSERT(getop(*d->last) == OP_CHECKDELIM);
11149
11150 /* Unknown field or ENDGROUP. */
11151 retval = upb_pbdecoder_skipunknown(d, fieldnum, wire_type);
11152
11153 CHECK_RETURN(retval);
11154
11155 if (retval == DECODE_ENDGROUP) {
11156 goto_endmsg(d);
11157 return DECODE_OK;
11158 }
11159
11160 return DECODE_OK;
11161 }
11162
11163 /* Callers know that the stack is more than one deep because the opcodes that
11164 * call this only occur after PUSH operations. */
outer_frame(upb_pbdecoder * d)11165 upb_pbdecoder_frame *outer_frame(upb_pbdecoder *d) {
11166 UPB_ASSERT(d->top != d->stack);
11167 return d->top - 1;
11168 }
11169
11170
11171 /* The main decoding loop *****************************************************/
11172
11173 /* The main decoder VM function. Uses traditional bytecode dispatch loop with a
11174 * switch() statement. */
run_decoder_vm(upb_pbdecoder * d,const mgroup * group,const upb_bufhandle * handle)11175 size_t run_decoder_vm(upb_pbdecoder *d, const mgroup *group,
11176 const upb_bufhandle* handle) {
11177
11178 #define VMCASE(op, code) \
11179 case op: { code; if (consumes_input(op)) checkpoint(d); break; }
11180 #define PRIMITIVE_OP(type, wt, name, convfunc, ctype) \
11181 VMCASE(OP_PARSE_ ## type, { \
11182 ctype val; \
11183 CHECK_RETURN(decode_ ## wt(d, &val)); \
11184 upb_sink_put ## name(&d->top->sink, arg, (convfunc)(val)); \
11185 })
11186
11187 while(1) {
11188 int32_t instruction;
11189 opcode op;
11190 uint32_t arg;
11191 int32_t longofs;
11192
11193 d->last = d->pc;
11194 instruction = *d->pc++;
11195 op = getop(instruction);
11196 arg = instruction >> 8;
11197 longofs = arg;
11198 UPB_ASSERT(d->ptr != d->residual_end);
11199 UPB_UNUSED(group);
11200 #ifdef UPB_DUMP_BYTECODE
11201 fprintf(stderr, "s_ofs=%d buf_ofs=%d data_rem=%d buf_rem=%d delim_rem=%d "
11202 "%x %s (%d)\n",
11203 (int)offset(d),
11204 (int)(d->ptr - d->buf),
11205 (int)(d->data_end - d->ptr),
11206 (int)(d->end - d->ptr),
11207 (int)((d->top->end_ofs - d->bufstart_ofs) - (d->ptr - d->buf)),
11208 (int)(d->pc - 1 - group->bytecode),
11209 upb_pbdecoder_getopname(op),
11210 arg);
11211 #endif
11212 switch (op) {
11213 /* Technically, we are losing data if we see a 32-bit varint that is not
11214 * properly sign-extended. We could detect this and error about the data
11215 * loss, but proto2 does not do this, so we pass. */
11216 PRIMITIVE_OP(INT32, varint, int32, int32_t, uint64_t)
11217 PRIMITIVE_OP(INT64, varint, int64, int64_t, uint64_t)
11218 PRIMITIVE_OP(UINT32, varint, uint32, uint32_t, uint64_t)
11219 PRIMITIVE_OP(UINT64, varint, uint64, uint64_t, uint64_t)
11220 PRIMITIVE_OP(FIXED32, fixed32, uint32, uint32_t, uint32_t)
11221 PRIMITIVE_OP(FIXED64, fixed64, uint64, uint64_t, uint64_t)
11222 PRIMITIVE_OP(SFIXED32, fixed32, int32, int32_t, uint32_t)
11223 PRIMITIVE_OP(SFIXED64, fixed64, int64, int64_t, uint64_t)
11224 PRIMITIVE_OP(BOOL, varint, bool, bool, uint64_t)
11225 PRIMITIVE_OP(DOUBLE, fixed64, double, as_double, uint64_t)
11226 PRIMITIVE_OP(FLOAT, fixed32, float, as_float, uint32_t)
11227 PRIMITIVE_OP(SINT32, varint, int32, upb_zzdec_32, uint64_t)
11228 PRIMITIVE_OP(SINT64, varint, int64, upb_zzdec_64, uint64_t)
11229
11230 VMCASE(OP_SETDISPATCH,
11231 d->top->base = d->pc - 1;
11232 memcpy(&d->top->dispatch, d->pc, sizeof(void*));
11233 d->pc += sizeof(void*) / sizeof(uint32_t);
11234 )
11235 VMCASE(OP_STARTMSG,
11236 CHECK_SUSPEND(upb_sink_startmsg(&d->top->sink));
11237 )
11238 VMCASE(OP_ENDMSG,
11239 CHECK_SUSPEND(upb_sink_endmsg(&d->top->sink, d->status));
11240 )
11241 VMCASE(OP_STARTSEQ,
11242 upb_pbdecoder_frame *outer = outer_frame(d);
11243 CHECK_SUSPEND(upb_sink_startseq(&outer->sink, arg, &d->top->sink));
11244 )
11245 VMCASE(OP_ENDSEQ,
11246 CHECK_SUSPEND(upb_sink_endseq(&d->top->sink, arg));
11247 )
11248 VMCASE(OP_STARTSUBMSG,
11249 upb_pbdecoder_frame *outer = outer_frame(d);
11250 CHECK_SUSPEND(upb_sink_startsubmsg(&outer->sink, arg, &d->top->sink));
11251 )
11252 VMCASE(OP_ENDSUBMSG,
11253 CHECK_SUSPEND(upb_sink_endsubmsg(&d->top->sink, arg));
11254 )
11255 VMCASE(OP_STARTSTR,
11256 uint32_t len = delim_remaining(d);
11257 upb_pbdecoder_frame *outer = outer_frame(d);
11258 CHECK_SUSPEND(upb_sink_startstr(&outer->sink, arg, len, &d->top->sink));
11259 if (len == 0) {
11260 d->pc++; /* Skip OP_STRING. */
11261 }
11262 )
11263 VMCASE(OP_STRING,
11264 uint32_t len = curbufleft(d);
11265 size_t n = upb_sink_putstring(&d->top->sink, arg, d->ptr, len, handle);
11266 if (n > len) {
11267 if (n > delim_remaining(d)) {
11268 seterr(d, "Tried to skip past end of string.");
11269 return upb_pbdecoder_suspend(d);
11270 } else {
11271 int32_t ret = skip(d, n);
11272 /* This shouldn't return DECODE_OK, because n > len. */
11273 UPB_ASSERT(ret >= 0);
11274 return ret;
11275 }
11276 }
11277 advance(d, n);
11278 if (n < len || d->delim_end == NULL) {
11279 /* We aren't finished with this string yet. */
11280 d->pc--; /* Repeat OP_STRING. */
11281 if (n > 0) checkpoint(d);
11282 return upb_pbdecoder_suspend(d);
11283 }
11284 )
11285 VMCASE(OP_ENDSTR,
11286 CHECK_SUSPEND(upb_sink_endstr(&d->top->sink, arg));
11287 )
11288 VMCASE(OP_PUSHTAGDELIM,
11289 CHECK_SUSPEND(pushtagdelim(d, arg));
11290 )
11291 VMCASE(OP_SETBIGGROUPNUM,
11292 d->top->groupnum = *d->pc++;
11293 )
11294 VMCASE(OP_POP,
11295 UPB_ASSERT(d->top > d->stack);
11296 decoder_pop(d);
11297 )
11298 VMCASE(OP_PUSHLENDELIM,
11299 uint32_t len;
11300 CHECK_RETURN(decode_v32(d, &len));
11301 CHECK_SUSPEND(decoder_push(d, offset(d) + len));
11302 set_delim_end(d);
11303 )
11304 VMCASE(OP_SETDELIM,
11305 set_delim_end(d);
11306 )
11307 VMCASE(OP_CHECKDELIM,
11308 /* We are guaranteed of this assert because we never allow ourselves to
11309 * consume bytes beyond data_end, which covers delim_end when non-NULL.
11310 */
11311 UPB_ASSERT(!(d->delim_end && d->ptr > d->delim_end));
11312 if (d->ptr == d->delim_end)
11313 d->pc += longofs;
11314 )
11315 VMCASE(OP_CALL,
11316 d->callstack[d->call_len++] = d->pc;
11317 d->pc += longofs;
11318 )
11319 VMCASE(OP_RET,
11320 UPB_ASSERT(d->call_len > 0);
11321 d->pc = d->callstack[--d->call_len];
11322 )
11323 VMCASE(OP_BRANCH,
11324 d->pc += longofs;
11325 )
11326 VMCASE(OP_TAG1,
11327 uint8_t expected;
11328 CHECK_SUSPEND(curbufleft(d) > 0);
11329 expected = (arg >> 8) & 0xff;
11330 if (*d->ptr == expected) {
11331 advance(d, 1);
11332 } else {
11333 int8_t shortofs;
11334 badtag:
11335 shortofs = arg;
11336 if (shortofs == LABEL_DISPATCH) {
11337 CHECK_RETURN(dispatch(d));
11338 } else {
11339 d->pc += shortofs;
11340 break; /* Avoid checkpoint(). */
11341 }
11342 }
11343 )
11344 VMCASE(OP_TAG2,
11345 uint16_t expected;
11346 CHECK_SUSPEND(curbufleft(d) > 0);
11347 expected = (arg >> 8) & 0xffff;
11348 if (curbufleft(d) >= 2) {
11349 uint16_t actual;
11350 memcpy(&actual, d->ptr, 2);
11351 if (expected == actual) {
11352 advance(d, 2);
11353 } else {
11354 goto badtag;
11355 }
11356 } else {
11357 int32_t result = upb_pbdecoder_checktag_slow(d, expected);
11358 if (result == DECODE_MISMATCH) goto badtag;
11359 if (result >= 0) return result;
11360 }
11361 )
11362 VMCASE(OP_TAGN, {
11363 uint64_t expected;
11364 int32_t result;
11365 memcpy(&expected, d->pc, 8);
11366 d->pc += 2;
11367 result = upb_pbdecoder_checktag_slow(d, expected);
11368 if (result == DECODE_MISMATCH) goto badtag;
11369 if (result >= 0) return result;
11370 })
11371 VMCASE(OP_DISPATCH, {
11372 CHECK_RETURN(dispatch(d));
11373 })
11374 VMCASE(OP_HALT, {
11375 return d->size_param;
11376 })
11377 }
11378 }
11379 }
11380
11381
11382 /* BytesHandler handlers ******************************************************/
11383
upb_pbdecoder_startbc(void * closure,const void * pc,size_t size_hint)11384 void *upb_pbdecoder_startbc(void *closure, const void *pc, size_t size_hint) {
11385 upb_pbdecoder *d = closure;
11386 UPB_UNUSED(size_hint);
11387 d->top->end_ofs = UINT64_MAX;
11388 d->bufstart_ofs = 0;
11389 d->call_len = 1;
11390 d->callstack[0] = &halt;
11391 d->pc = pc;
11392 d->skip = 0;
11393 return d;
11394 }
11395
upb_pbdecoder_startjit(void * closure,const void * hd,size_t size_hint)11396 void *upb_pbdecoder_startjit(void *closure, const void *hd, size_t size_hint) {
11397 upb_pbdecoder *d = closure;
11398 UPB_UNUSED(hd);
11399 UPB_UNUSED(size_hint);
11400 d->top->end_ofs = UINT64_MAX;
11401 d->bufstart_ofs = 0;
11402 d->call_len = 0;
11403 d->skip = 0;
11404 return d;
11405 }
11406
upb_pbdecoder_end(void * closure,const void * handler_data)11407 bool upb_pbdecoder_end(void *closure, const void *handler_data) {
11408 upb_pbdecoder *d = closure;
11409 const upb_pbdecodermethod *method = handler_data;
11410 uint64_t end;
11411 char dummy;
11412
11413 if (d->residual_end > d->residual) {
11414 seterr(d, "Unexpected EOF: decoder still has buffered unparsed data");
11415 return false;
11416 }
11417
11418 if (d->skip) {
11419 seterr(d, "Unexpected EOF inside skipped data");
11420 return false;
11421 }
11422
11423 if (d->top->end_ofs != UINT64_MAX) {
11424 seterr(d, "Unexpected EOF inside delimited string");
11425 return false;
11426 }
11427
11428 /* The user's end() call indicates that the message ends here. */
11429 end = offset(d);
11430 d->top->end_ofs = end;
11431
11432 #ifdef UPB_USE_JIT_X64
11433 if (method->is_native_) {
11434 const mgroup *group = (const mgroup*)method->group;
11435 if (d->top != d->stack)
11436 d->stack->end_ofs = 0;
11437 group->jit_code(closure, method->code_base.ptr, &dummy, 0, NULL);
11438 } else
11439 #endif
11440 {
11441 const uint32_t *p = d->pc;
11442 d->stack->end_ofs = end;
11443 /* Check the previous bytecode, but guard against beginning. */
11444 if (p != method->code_base.ptr) p--;
11445 if (getop(*p) == OP_CHECKDELIM) {
11446 /* Rewind from OP_TAG* to OP_CHECKDELIM. */
11447 UPB_ASSERT(getop(*d->pc) == OP_TAG1 ||
11448 getop(*d->pc) == OP_TAG2 ||
11449 getop(*d->pc) == OP_TAGN ||
11450 getop(*d->pc) == OP_DISPATCH);
11451 d->pc = p;
11452 }
11453 upb_pbdecoder_decode(closure, handler_data, &dummy, 0, NULL);
11454 }
11455
11456 if (d->call_len != 0) {
11457 seterr(d, "Unexpected EOF inside submessage or group");
11458 return false;
11459 }
11460
11461 return true;
11462 }
11463
upb_pbdecoder_decode(void * decoder,const void * group,const char * buf,size_t size,const upb_bufhandle * handle)11464 size_t upb_pbdecoder_decode(void *decoder, const void *group, const char *buf,
11465 size_t size, const upb_bufhandle *handle) {
11466 int32_t result = upb_pbdecoder_resume(decoder, NULL, buf, size, handle);
11467
11468 if (result == DECODE_ENDGROUP) goto_endmsg(decoder);
11469 CHECK_RETURN(result);
11470
11471 return run_decoder_vm(decoder, group, handle);
11472 }
11473
11474
11475 /* Public API *****************************************************************/
11476
upb_pbdecoder_reset(upb_pbdecoder * d)11477 void upb_pbdecoder_reset(upb_pbdecoder *d) {
11478 d->top = d->stack;
11479 d->top->groupnum = 0;
11480 d->ptr = d->residual;
11481 d->buf = d->residual;
11482 d->end = d->residual;
11483 d->residual_end = d->residual;
11484 }
11485
upb_pbdecoder_create(upb_env * e,const upb_pbdecodermethod * m,upb_sink * sink)11486 upb_pbdecoder *upb_pbdecoder_create(upb_env *e, const upb_pbdecodermethod *m,
11487 upb_sink *sink) {
11488 const size_t default_max_nesting = 64;
11489 #ifndef NDEBUG
11490 size_t size_before = upb_env_bytesallocated(e);
11491 #endif
11492
11493 upb_pbdecoder *d = upb_env_malloc(e, sizeof(upb_pbdecoder));
11494 if (!d) return NULL;
11495
11496 d->method_ = m;
11497 d->callstack = upb_env_malloc(e, callstacksize(d, default_max_nesting));
11498 d->stack = upb_env_malloc(e, stacksize(d, default_max_nesting));
11499 if (!d->stack || !d->callstack) {
11500 return NULL;
11501 }
11502
11503 d->env = e;
11504 d->limit = d->stack + default_max_nesting - 1;
11505 d->stack_size = default_max_nesting;
11506 d->status = NULL;
11507
11508 upb_pbdecoder_reset(d);
11509 upb_bytessink_reset(&d->input_, &m->input_handler_, d);
11510
11511 UPB_ASSERT(sink);
11512 if (d->method_->dest_handlers_) {
11513 if (sink->handlers != d->method_->dest_handlers_)
11514 return NULL;
11515 }
11516 upb_sink_reset(&d->top->sink, sink->handlers, sink->closure);
11517
11518 /* If this fails, increase the value in decoder.h. */
11519 UPB_ASSERT_DEBUGVAR(upb_env_bytesallocated(e) - size_before <=
11520 UPB_PB_DECODER_SIZE);
11521 return d;
11522 }
11523
upb_pbdecoder_bytesparsed(const upb_pbdecoder * d)11524 uint64_t upb_pbdecoder_bytesparsed(const upb_pbdecoder *d) {
11525 return offset(d);
11526 }
11527
upb_pbdecoder_method(const upb_pbdecoder * d)11528 const upb_pbdecodermethod *upb_pbdecoder_method(const upb_pbdecoder *d) {
11529 return d->method_;
11530 }
11531
upb_pbdecoder_input(upb_pbdecoder * d)11532 upb_bytessink *upb_pbdecoder_input(upb_pbdecoder *d) {
11533 return &d->input_;
11534 }
11535
upb_pbdecoder_maxnesting(const upb_pbdecoder * d)11536 size_t upb_pbdecoder_maxnesting(const upb_pbdecoder *d) {
11537 return d->stack_size;
11538 }
11539
upb_pbdecoder_setmaxnesting(upb_pbdecoder * d,size_t max)11540 bool upb_pbdecoder_setmaxnesting(upb_pbdecoder *d, size_t max) {
11541 UPB_ASSERT(d->top >= d->stack);
11542
11543 if (max < (size_t)(d->top - d->stack)) {
11544 /* Can't set a limit smaller than what we are currently at. */
11545 return false;
11546 }
11547
11548 if (max > d->stack_size) {
11549 /* Need to reallocate stack and callstack to accommodate. */
11550 size_t old_size = stacksize(d, d->stack_size);
11551 size_t new_size = stacksize(d, max);
11552 void *p = upb_env_realloc(d->env, d->stack, old_size, new_size);
11553 if (!p) {
11554 return false;
11555 }
11556 d->stack = p;
11557
11558 old_size = callstacksize(d, d->stack_size);
11559 new_size = callstacksize(d, max);
11560 p = upb_env_realloc(d->env, d->callstack, old_size, new_size);
11561 if (!p) {
11562 return false;
11563 }
11564 d->callstack = p;
11565
11566 d->stack_size = max;
11567 }
11568
11569 d->limit = d->stack + max - 1;
11570 return true;
11571 }
11572 /*
11573 ** upb::Encoder
11574 **
11575 ** Since we are implementing pure handlers (ie. without any out-of-band access
11576 ** to pre-computed lengths), we have to buffer all submessages before we can
11577 ** emit even their first byte.
11578 **
11579 ** Not knowing the size of submessages also means we can't write a perfect
11580 ** zero-copy implementation, even with buffering. Lengths are stored as
11581 ** varints, which means that we don't know how many bytes to reserve for the
11582 ** length until we know what the length is.
11583 **
11584 ** This leaves us with three main choices:
11585 **
11586 ** 1. buffer all submessage data in a temporary buffer, then copy it exactly
11587 ** once into the output buffer.
11588 **
11589 ** 2. attempt to buffer data directly into the output buffer, estimating how
11590 ** many bytes each length will take. When our guesses are wrong, use
11591 ** memmove() to grow or shrink the allotted space.
11592 **
11593 ** 3. buffer directly into the output buffer, allocating a max length
11594 ** ahead-of-time for each submessage length. If we overallocated, we waste
11595 ** space, but no memcpy() or memmove() is required. This approach requires
11596 ** defining a maximum size for submessages and rejecting submessages that
11597 ** exceed that size.
11598 **
11599 ** (2) and (3) have the potential to have better performance, but they are more
11600 ** complicated and subtle to implement:
11601 **
11602 ** (3) requires making an arbitrary choice of the maximum message size; it
11603 ** wastes space when submessages are shorter than this and fails
11604 ** completely when they are longer. This makes it more finicky and
11605 ** requires configuration based on the input. It also makes it impossible
11606 ** to perfectly match the output of reference encoders that always use the
11607 ** optimal amount of space for each length.
11608 **
11609 ** (2) requires guessing the the size upfront, and if multiple lengths are
11610 ** guessed wrong the minimum required number of memmove() operations may
11611 ** be complicated to compute correctly. Implemented properly, it may have
11612 ** a useful amortized or average cost, but more investigation is required
11613 ** to determine this and what the optimal algorithm is to achieve it.
11614 **
11615 ** (1) makes you always pay for exactly one copy, but its implementation is
11616 ** the simplest and its performance is predictable.
11617 **
11618 ** So for now, we implement (1) only. If we wish to optimize later, we should
11619 ** be able to do it without affecting users.
11620 **
11621 ** The strategy is to buffer the segments of data that do *not* depend on
11622 ** unknown lengths in one buffer, and keep a separate buffer of segment pointers
11623 ** and lengths. When the top-level submessage ends, we can go beginning to end,
11624 ** alternating the writing of lengths with memcpy() of the rest of the data.
11625 ** At the top level though, no buffering is required.
11626 */
11627
11628
11629
11630 /* The output buffer is divided into segments; a segment is a string of data
11631 * that is "ready to go" -- it does not need any varint lengths inserted into
11632 * the middle. The seams between segments are where varints will be inserted
11633 * once they are known.
11634 *
11635 * We also use the concept of a "run", which is a range of encoded bytes that
11636 * occur at a single submessage level. Every segment contains one or more runs.
11637 *
11638 * A segment can span messages. Consider:
11639 *
11640 * .--Submessage lengths---------.
11641 * | | |
11642 * | V V
11643 * V | |--------------- | |-----------------
11644 * Submessages: | |-----------------------------------------------
11645 * Top-level msg: ------------------------------------------------------------
11646 *
11647 * Segments: ----- ------------------- -----------------
11648 * Runs: *---- *--------------*--- *----------------
11649 * (* marks the start)
11650 *
11651 * Note that the top-level menssage is not in any segment because it does not
11652 * have any length preceding it.
11653 *
11654 * A segment is only interrupted when another length needs to be inserted. So
11655 * observe how the second segment spans both the inner submessage and part of
11656 * the next enclosing message. */
11657 typedef struct {
11658 uint32_t msglen; /* The length to varint-encode before this segment. */
11659 uint32_t seglen; /* Length of the segment. */
11660 } upb_pb_encoder_segment;
11661
11662 struct upb_pb_encoder {
11663 upb_env *env;
11664
11665 /* Our input and output. */
11666 upb_sink input_;
11667 upb_bytessink *output_;
11668
11669 /* The "subclosure" -- used as the inner closure as part of the bytessink
11670 * protocol. */
11671 void *subc;
11672
11673 /* The output buffer and limit, and our current write position. "buf"
11674 * initially points to "initbuf", but is dynamically allocated if we need to
11675 * grow beyond the initial size. */
11676 char *buf, *ptr, *limit;
11677
11678 /* The beginning of the current run, or undefined if we are at the top
11679 * level. */
11680 char *runbegin;
11681
11682 /* The list of segments we are accumulating. */
11683 upb_pb_encoder_segment *segbuf, *segptr, *seglimit;
11684
11685 /* The stack of enclosing submessages. Each entry in the stack points to the
11686 * segment where this submessage's length is being accumulated. */
11687 int *stack, *top, *stacklimit;
11688
11689 /* Depth of startmsg/endmsg calls. */
11690 int depth;
11691 };
11692
11693 /* low-level buffering ********************************************************/
11694
11695 /* Low-level functions for interacting with the output buffer. */
11696
11697 /* TODO(haberman): handle pushback */
putbuf(upb_pb_encoder * e,const char * buf,size_t len)11698 static void putbuf(upb_pb_encoder *e, const char *buf, size_t len) {
11699 size_t n = upb_bytessink_putbuf(e->output_, e->subc, buf, len, NULL);
11700 UPB_ASSERT(n == len);
11701 }
11702
top(upb_pb_encoder * e)11703 static upb_pb_encoder_segment *top(upb_pb_encoder *e) {
11704 return &e->segbuf[*e->top];
11705 }
11706
11707 /* Call to ensure that at least "bytes" bytes are available for writing at
11708 * e->ptr. Returns false if the bytes could not be allocated. */
reserve(upb_pb_encoder * e,size_t bytes)11709 static bool reserve(upb_pb_encoder *e, size_t bytes) {
11710 if ((size_t)(e->limit - e->ptr) < bytes) {
11711 /* Grow buffer. */
11712 char *new_buf;
11713 size_t needed = bytes + (e->ptr - e->buf);
11714 size_t old_size = e->limit - e->buf;
11715
11716 size_t new_size = old_size;
11717
11718 while (new_size < needed) {
11719 new_size *= 2;
11720 }
11721
11722 new_buf = upb_env_realloc(e->env, e->buf, old_size, new_size);
11723
11724 if (new_buf == NULL) {
11725 return false;
11726 }
11727
11728 e->ptr = new_buf + (e->ptr - e->buf);
11729 e->runbegin = new_buf + (e->runbegin - e->buf);
11730 e->limit = new_buf + new_size;
11731 e->buf = new_buf;
11732 }
11733
11734 return true;
11735 }
11736
11737 /* Call when "bytes" bytes have been writte at e->ptr. The caller *must* have
11738 * previously called reserve() with at least this many bytes. */
encoder_advance(upb_pb_encoder * e,size_t bytes)11739 static void encoder_advance(upb_pb_encoder *e, size_t bytes) {
11740 UPB_ASSERT((size_t)(e->limit - e->ptr) >= bytes);
11741 e->ptr += bytes;
11742 }
11743
11744 /* Call when all of the bytes for a handler have been written. Flushes the
11745 * bytes if possible and necessary, returning false if this failed. */
commit(upb_pb_encoder * e)11746 static bool commit(upb_pb_encoder *e) {
11747 if (!e->top) {
11748 /* We aren't inside a delimited region. Flush our accumulated bytes to
11749 * the output.
11750 *
11751 * TODO(haberman): in the future we may want to delay flushing for
11752 * efficiency reasons. */
11753 putbuf(e, e->buf, e->ptr - e->buf);
11754 e->ptr = e->buf;
11755 }
11756
11757 return true;
11758 }
11759
11760 /* Writes the given bytes to the buffer, handling reserve/advance. */
encode_bytes(upb_pb_encoder * e,const void * data,size_t len)11761 static bool encode_bytes(upb_pb_encoder *e, const void *data, size_t len) {
11762 if (!reserve(e, len)) {
11763 return false;
11764 }
11765
11766 memcpy(e->ptr, data, len);
11767 encoder_advance(e, len);
11768 return true;
11769 }
11770
11771 /* Finish the current run by adding the run totals to the segment and message
11772 * length. */
accumulate(upb_pb_encoder * e)11773 static void accumulate(upb_pb_encoder *e) {
11774 size_t run_len;
11775 UPB_ASSERT(e->ptr >= e->runbegin);
11776 run_len = e->ptr - e->runbegin;
11777 e->segptr->seglen += run_len;
11778 top(e)->msglen += run_len;
11779 e->runbegin = e->ptr;
11780 }
11781
11782 /* Call to indicate the start of delimited region for which the full length is
11783 * not yet known. All data will be buffered until the length is known.
11784 * Delimited regions may be nested; their lengths will all be tracked properly. */
start_delim(upb_pb_encoder * e)11785 static bool start_delim(upb_pb_encoder *e) {
11786 if (e->top) {
11787 /* We are already buffering, advance to the next segment and push it on the
11788 * stack. */
11789 accumulate(e);
11790
11791 if (++e->top == e->stacklimit) {
11792 /* TODO(haberman): grow stack? */
11793 return false;
11794 }
11795
11796 if (++e->segptr == e->seglimit) {
11797 /* Grow segment buffer. */
11798 size_t old_size =
11799 (e->seglimit - e->segbuf) * sizeof(upb_pb_encoder_segment);
11800 size_t new_size = old_size * 2;
11801 upb_pb_encoder_segment *new_buf =
11802 upb_env_realloc(e->env, e->segbuf, old_size, new_size);
11803
11804 if (new_buf == NULL) {
11805 return false;
11806 }
11807
11808 e->segptr = new_buf + (e->segptr - e->segbuf);
11809 e->seglimit = new_buf + (new_size / sizeof(upb_pb_encoder_segment));
11810 e->segbuf = new_buf;
11811 }
11812 } else {
11813 /* We were previously at the top level, start buffering. */
11814 e->segptr = e->segbuf;
11815 e->top = e->stack;
11816 e->runbegin = e->ptr;
11817 }
11818
11819 *e->top = e->segptr - e->segbuf;
11820 e->segptr->seglen = 0;
11821 e->segptr->msglen = 0;
11822
11823 return true;
11824 }
11825
11826 /* Call to indicate the end of a delimited region. We now know the length of
11827 * the delimited region. If we are not nested inside any other delimited
11828 * regions, we can now emit all of the buffered data we accumulated. */
end_delim(upb_pb_encoder * e)11829 static bool end_delim(upb_pb_encoder *e) {
11830 size_t msglen;
11831 accumulate(e);
11832 msglen = top(e)->msglen;
11833
11834 if (e->top == e->stack) {
11835 /* All lengths are now available, emit all buffered data. */
11836 char buf[UPB_PB_VARINT_MAX_LEN];
11837 upb_pb_encoder_segment *s;
11838 const char *ptr = e->buf;
11839 for (s = e->segbuf; s <= e->segptr; s++) {
11840 size_t lenbytes = upb_vencode64(s->msglen, buf);
11841 putbuf(e, buf, lenbytes);
11842 putbuf(e, ptr, s->seglen);
11843 ptr += s->seglen;
11844 }
11845
11846 e->ptr = e->buf;
11847 e->top = NULL;
11848 } else {
11849 /* Need to keep buffering; propagate length info into enclosing
11850 * submessages. */
11851 --e->top;
11852 top(e)->msglen += msglen + upb_varint_size(msglen);
11853 }
11854
11855 return true;
11856 }
11857
11858
11859 /* tag_t **********************************************************************/
11860
11861 /* A precomputed (pre-encoded) tag and length. */
11862
11863 typedef struct {
11864 uint8_t bytes;
11865 char tag[7];
11866 } tag_t;
11867
11868 /* Allocates a new tag for this field, and sets it in these handlerattr. */
new_tag(upb_handlers * h,const upb_fielddef * f,upb_wiretype_t wt,upb_handlerattr * attr)11869 static void new_tag(upb_handlers *h, const upb_fielddef *f, upb_wiretype_t wt,
11870 upb_handlerattr *attr) {
11871 uint32_t n = upb_fielddef_number(f);
11872
11873 tag_t *tag = upb_gmalloc(sizeof(tag_t));
11874 tag->bytes = upb_vencode64((n << 3) | wt, tag->tag);
11875
11876 upb_handlerattr_init(attr);
11877 upb_handlerattr_sethandlerdata(attr, tag);
11878 upb_handlers_addcleanup(h, tag, upb_gfree);
11879 }
11880
encode_tag(upb_pb_encoder * e,const tag_t * tag)11881 static bool encode_tag(upb_pb_encoder *e, const tag_t *tag) {
11882 return encode_bytes(e, tag->tag, tag->bytes);
11883 }
11884
11885
11886 /* encoding of wire types *****************************************************/
11887
encode_fixed64(upb_pb_encoder * e,uint64_t val)11888 static bool encode_fixed64(upb_pb_encoder *e, uint64_t val) {
11889 /* TODO(haberman): byte-swap for big endian. */
11890 return encode_bytes(e, &val, sizeof(uint64_t));
11891 }
11892
encode_fixed32(upb_pb_encoder * e,uint32_t val)11893 static bool encode_fixed32(upb_pb_encoder *e, uint32_t val) {
11894 /* TODO(haberman): byte-swap for big endian. */
11895 return encode_bytes(e, &val, sizeof(uint32_t));
11896 }
11897
encode_varint(upb_pb_encoder * e,uint64_t val)11898 static bool encode_varint(upb_pb_encoder *e, uint64_t val) {
11899 if (!reserve(e, UPB_PB_VARINT_MAX_LEN)) {
11900 return false;
11901 }
11902
11903 encoder_advance(e, upb_vencode64(val, e->ptr));
11904 return true;
11905 }
11906
dbl2uint64(double d)11907 static uint64_t dbl2uint64(double d) {
11908 uint64_t ret;
11909 memcpy(&ret, &d, sizeof(uint64_t));
11910 return ret;
11911 }
11912
flt2uint32(float d)11913 static uint32_t flt2uint32(float d) {
11914 uint32_t ret;
11915 memcpy(&ret, &d, sizeof(uint32_t));
11916 return ret;
11917 }
11918
11919
11920 /* encoding of proto types ****************************************************/
11921
startmsg(void * c,const void * hd)11922 static bool startmsg(void *c, const void *hd) {
11923 upb_pb_encoder *e = c;
11924 UPB_UNUSED(hd);
11925 if (e->depth++ == 0) {
11926 upb_bytessink_start(e->output_, 0, &e->subc);
11927 }
11928 return true;
11929 }
11930
endmsg(void * c,const void * hd,upb_status * status)11931 static bool endmsg(void *c, const void *hd, upb_status *status) {
11932 upb_pb_encoder *e = c;
11933 UPB_UNUSED(hd);
11934 UPB_UNUSED(status);
11935 if (--e->depth == 0) {
11936 upb_bytessink_end(e->output_);
11937 }
11938 return true;
11939 }
11940
encode_startdelimfield(void * c,const void * hd)11941 static void *encode_startdelimfield(void *c, const void *hd) {
11942 bool ok = encode_tag(c, hd) && commit(c) && start_delim(c);
11943 return ok ? c : UPB_BREAK;
11944 }
11945
encode_unknown(void * c,const void * hd,const char * buf,size_t len)11946 static bool encode_unknown(void *c, const void *hd, const char *buf,
11947 size_t len) {
11948 UPB_UNUSED(hd);
11949 return encode_bytes(c, buf, len) && commit(c);
11950 }
11951
encode_enddelimfield(void * c,const void * hd)11952 static bool encode_enddelimfield(void *c, const void *hd) {
11953 UPB_UNUSED(hd);
11954 return end_delim(c);
11955 }
11956
encode_startgroup(void * c,const void * hd)11957 static void *encode_startgroup(void *c, const void *hd) {
11958 return (encode_tag(c, hd) && commit(c)) ? c : UPB_BREAK;
11959 }
11960
encode_endgroup(void * c,const void * hd)11961 static bool encode_endgroup(void *c, const void *hd) {
11962 return encode_tag(c, hd) && commit(c);
11963 }
11964
encode_startstr(void * c,const void * hd,size_t size_hint)11965 static void *encode_startstr(void *c, const void *hd, size_t size_hint) {
11966 UPB_UNUSED(size_hint);
11967 return encode_startdelimfield(c, hd);
11968 }
11969
encode_strbuf(void * c,const void * hd,const char * buf,size_t len,const upb_bufhandle * h)11970 static size_t encode_strbuf(void *c, const void *hd, const char *buf,
11971 size_t len, const upb_bufhandle *h) {
11972 UPB_UNUSED(hd);
11973 UPB_UNUSED(h);
11974 return encode_bytes(c, buf, len) ? len : 0;
11975 }
11976
11977 #define T(type, ctype, convert, encode) \
11978 static bool encode_scalar_##type(void *e, const void *hd, ctype val) { \
11979 return encode_tag(e, hd) && encode(e, (convert)(val)) && commit(e); \
11980 } \
11981 static bool encode_packed_##type(void *e, const void *hd, ctype val) { \
11982 UPB_UNUSED(hd); \
11983 return encode(e, (convert)(val)); \
11984 }
11985
T(double,double,dbl2uint64,encode_fixed64)11986 T(double, double, dbl2uint64, encode_fixed64)
11987 T(float, float, flt2uint32, encode_fixed32)
11988 T(int64, int64_t, uint64_t, encode_varint)
11989 T(int32, int32_t, int64_t, encode_varint)
11990 T(fixed64, uint64_t, uint64_t, encode_fixed64)
11991 T(fixed32, uint32_t, uint32_t, encode_fixed32)
11992 T(bool, bool, bool, encode_varint)
11993 T(uint32, uint32_t, uint32_t, encode_varint)
11994 T(uint64, uint64_t, uint64_t, encode_varint)
11995 T(enum, int32_t, uint32_t, encode_varint)
11996 T(sfixed32, int32_t, uint32_t, encode_fixed32)
11997 T(sfixed64, int64_t, uint64_t, encode_fixed64)
11998 T(sint32, int32_t, upb_zzenc_32, encode_varint)
11999 T(sint64, int64_t, upb_zzenc_64, encode_varint)
12000
12001 #undef T
12002
12003
12004 /* code to build the handlers *************************************************/
12005
12006 static void newhandlers_callback(const void *closure, upb_handlers *h) {
12007 const upb_msgdef *m;
12008 upb_msg_field_iter i;
12009
12010 UPB_UNUSED(closure);
12011
12012 upb_handlers_setstartmsg(h, startmsg, NULL);
12013 upb_handlers_setendmsg(h, endmsg, NULL);
12014 upb_handlers_setunknown(h, encode_unknown, NULL);
12015
12016 m = upb_handlers_msgdef(h);
12017 for(upb_msg_field_begin(&i, m);
12018 !upb_msg_field_done(&i);
12019 upb_msg_field_next(&i)) {
12020 const upb_fielddef *f = upb_msg_iter_field(&i);
12021 bool packed = upb_fielddef_isseq(f) && upb_fielddef_isprimitive(f) &&
12022 upb_fielddef_packed(f);
12023 upb_handlerattr attr;
12024 upb_wiretype_t wt =
12025 packed ? UPB_WIRE_TYPE_DELIMITED
12026 : upb_pb_native_wire_types[upb_fielddef_descriptortype(f)];
12027
12028 /* Pre-encode the tag for this field. */
12029 new_tag(h, f, wt, &attr);
12030
12031 if (packed) {
12032 upb_handlers_setstartseq(h, f, encode_startdelimfield, &attr);
12033 upb_handlers_setendseq(h, f, encode_enddelimfield, &attr);
12034 }
12035
12036 #define T(upper, lower, upbtype) \
12037 case UPB_DESCRIPTOR_TYPE_##upper: \
12038 if (packed) { \
12039 upb_handlers_set##upbtype(h, f, encode_packed_##lower, &attr); \
12040 } else { \
12041 upb_handlers_set##upbtype(h, f, encode_scalar_##lower, &attr); \
12042 } \
12043 break;
12044
12045 switch (upb_fielddef_descriptortype(f)) {
12046 T(DOUBLE, double, double);
12047 T(FLOAT, float, float);
12048 T(INT64, int64, int64);
12049 T(INT32, int32, int32);
12050 T(FIXED64, fixed64, uint64);
12051 T(FIXED32, fixed32, uint32);
12052 T(BOOL, bool, bool);
12053 T(UINT32, uint32, uint32);
12054 T(UINT64, uint64, uint64);
12055 T(ENUM, enum, int32);
12056 T(SFIXED32, sfixed32, int32);
12057 T(SFIXED64, sfixed64, int64);
12058 T(SINT32, sint32, int32);
12059 T(SINT64, sint64, int64);
12060 case UPB_DESCRIPTOR_TYPE_STRING:
12061 case UPB_DESCRIPTOR_TYPE_BYTES:
12062 upb_handlers_setstartstr(h, f, encode_startstr, &attr);
12063 upb_handlers_setendstr(h, f, encode_enddelimfield, &attr);
12064 upb_handlers_setstring(h, f, encode_strbuf, &attr);
12065 break;
12066 case UPB_DESCRIPTOR_TYPE_MESSAGE:
12067 upb_handlers_setstartsubmsg(h, f, encode_startdelimfield, &attr);
12068 upb_handlers_setendsubmsg(h, f, encode_enddelimfield, &attr);
12069 break;
12070 case UPB_DESCRIPTOR_TYPE_GROUP: {
12071 /* Endgroup takes a different tag (wire_type = END_GROUP). */
12072 upb_handlerattr attr2;
12073 new_tag(h, f, UPB_WIRE_TYPE_END_GROUP, &attr2);
12074
12075 upb_handlers_setstartsubmsg(h, f, encode_startgroup, &attr);
12076 upb_handlers_setendsubmsg(h, f, encode_endgroup, &attr2);
12077
12078 upb_handlerattr_uninit(&attr2);
12079 break;
12080 }
12081 }
12082
12083 #undef T
12084
12085 upb_handlerattr_uninit(&attr);
12086 }
12087 }
12088
upb_pb_encoder_reset(upb_pb_encoder * e)12089 void upb_pb_encoder_reset(upb_pb_encoder *e) {
12090 e->segptr = NULL;
12091 e->top = NULL;
12092 e->depth = 0;
12093 }
12094
12095
12096 /* public API *****************************************************************/
12097
upb_pb_encoder_newhandlers(const upb_msgdef * m,const void * owner)12098 const upb_handlers *upb_pb_encoder_newhandlers(const upb_msgdef *m,
12099 const void *owner) {
12100 return upb_handlers_newfrozen(m, owner, newhandlers_callback, NULL);
12101 }
12102
upb_pb_encoder_create(upb_env * env,const upb_handlers * h,upb_bytessink * output)12103 upb_pb_encoder *upb_pb_encoder_create(upb_env *env, const upb_handlers *h,
12104 upb_bytessink *output) {
12105 const size_t initial_bufsize = 256;
12106 const size_t initial_segbufsize = 16;
12107 /* TODO(haberman): make this configurable. */
12108 const size_t stack_size = 64;
12109 #ifndef NDEBUG
12110 const size_t size_before = upb_env_bytesallocated(env);
12111 #endif
12112
12113 upb_pb_encoder *e = upb_env_malloc(env, sizeof(upb_pb_encoder));
12114 if (!e) return NULL;
12115
12116 e->buf = upb_env_malloc(env, initial_bufsize);
12117 e->segbuf = upb_env_malloc(env, initial_segbufsize * sizeof(*e->segbuf));
12118 e->stack = upb_env_malloc(env, stack_size * sizeof(*e->stack));
12119
12120 if (!e->buf || !e->segbuf || !e->stack) {
12121 return NULL;
12122 }
12123
12124 e->limit = e->buf + initial_bufsize;
12125 e->seglimit = e->segbuf + initial_segbufsize;
12126 e->stacklimit = e->stack + stack_size;
12127
12128 upb_pb_encoder_reset(e);
12129 upb_sink_reset(&e->input_, h, e);
12130
12131 e->env = env;
12132 e->output_ = output;
12133 e->subc = output->closure;
12134 e->ptr = e->buf;
12135
12136 /* If this fails, increase the value in encoder.h. */
12137 UPB_ASSERT_DEBUGVAR(upb_env_bytesallocated(env) - size_before <=
12138 UPB_PB_ENCODER_SIZE);
12139 return e;
12140 }
12141
upb_pb_encoder_input(upb_pb_encoder * e)12142 upb_sink *upb_pb_encoder_input(upb_pb_encoder *e) { return &e->input_; }
12143
12144
12145
upb_loaddescriptor(const char * buf,size_t n,const void * owner,upb_status * status)12146 upb_filedef **upb_loaddescriptor(const char *buf, size_t n, const void *owner,
12147 upb_status *status) {
12148 /* Create handlers. */
12149 const upb_pbdecodermethod *decoder_m;
12150 const upb_handlers *reader_h = upb_descreader_newhandlers(&reader_h);
12151 upb_env env;
12152 upb_pbdecodermethodopts opts;
12153 upb_pbdecoder *decoder;
12154 upb_descreader *reader;
12155 bool ok;
12156 size_t i;
12157 upb_filedef **ret = NULL;
12158
12159 upb_pbdecodermethodopts_init(&opts, reader_h);
12160 decoder_m = upb_pbdecodermethod_new(&opts, &decoder_m);
12161
12162 upb_env_init(&env);
12163 upb_env_reporterrorsto(&env, status);
12164
12165 reader = upb_descreader_create(&env, reader_h);
12166 decoder = upb_pbdecoder_create(&env, decoder_m, upb_descreader_input(reader));
12167
12168 /* Push input data. */
12169 ok = upb_bufsrc_putbuf(buf, n, upb_pbdecoder_input(decoder));
12170
12171 if (!ok) {
12172 goto cleanup;
12173 }
12174
12175 ret = upb_gmalloc(sizeof (*ret) * (upb_descreader_filecount(reader) + 1));
12176
12177 if (!ret) {
12178 goto cleanup;
12179 }
12180
12181 for (i = 0; i < upb_descreader_filecount(reader); i++) {
12182 ret[i] = upb_descreader_file(reader, i);
12183 upb_filedef_ref(ret[i], owner);
12184 }
12185
12186 ret[i] = NULL;
12187
12188 cleanup:
12189 upb_env_uninit(&env);
12190 upb_handlers_unref(reader_h, &reader_h);
12191 upb_pbdecodermethod_unref(decoder_m, &decoder_m);
12192 return ret;
12193 }
12194 /*
12195 * upb::pb::TextPrinter
12196 *
12197 * OPT: This is not optimized at all. It uses printf() which parses the format
12198 * string every time, and it allocates memory for every put.
12199 */
12200
12201
12202 #include <ctype.h>
12203 #include <float.h>
12204 #include <inttypes.h>
12205 #include <stdarg.h>
12206 #include <stdio.h>
12207 #include <string.h>
12208
12209
12210 struct upb_textprinter {
12211 upb_sink input_;
12212 upb_bytessink *output_;
12213 int indent_depth_;
12214 bool single_line_;
12215 void *subc;
12216 };
12217
12218 #define CHECK(x) if ((x) < 0) goto err;
12219
shortname(const char * longname)12220 static const char *shortname(const char *longname) {
12221 const char *last = strrchr(longname, '.');
12222 return last ? last + 1 : longname;
12223 }
12224
indent(upb_textprinter * p)12225 static int indent(upb_textprinter *p) {
12226 int i;
12227 if (!p->single_line_)
12228 for (i = 0; i < p->indent_depth_; i++)
12229 upb_bytessink_putbuf(p->output_, p->subc, " ", 2, NULL);
12230 return 0;
12231 }
12232
endfield(upb_textprinter * p)12233 static int endfield(upb_textprinter *p) {
12234 const char ch = (p->single_line_ ? ' ' : '\n');
12235 upb_bytessink_putbuf(p->output_, p->subc, &ch, 1, NULL);
12236 return 0;
12237 }
12238
putescaped(upb_textprinter * p,const char * buf,size_t len,bool preserve_utf8)12239 static int putescaped(upb_textprinter *p, const char *buf, size_t len,
12240 bool preserve_utf8) {
12241 /* Based on CEscapeInternal() from Google's protobuf release. */
12242 char dstbuf[4096], *dst = dstbuf, *dstend = dstbuf + sizeof(dstbuf);
12243 const char *end = buf + len;
12244
12245 /* I think hex is prettier and more useful, but proto2 uses octal; should
12246 * investigate whether it can parse hex also. */
12247 const bool use_hex = false;
12248 bool last_hex_escape = false; /* true if last output char was \xNN */
12249
12250 for (; buf < end; buf++) {
12251 bool is_hex_escape;
12252
12253 if (dstend - dst < 4) {
12254 upb_bytessink_putbuf(p->output_, p->subc, dstbuf, dst - dstbuf, NULL);
12255 dst = dstbuf;
12256 }
12257
12258 is_hex_escape = false;
12259 switch (*buf) {
12260 case '\n': *(dst++) = '\\'; *(dst++) = 'n'; break;
12261 case '\r': *(dst++) = '\\'; *(dst++) = 'r'; break;
12262 case '\t': *(dst++) = '\\'; *(dst++) = 't'; break;
12263 case '\"': *(dst++) = '\\'; *(dst++) = '\"'; break;
12264 case '\'': *(dst++) = '\\'; *(dst++) = '\''; break;
12265 case '\\': *(dst++) = '\\'; *(dst++) = '\\'; break;
12266 default:
12267 /* Note that if we emit \xNN and the buf character after that is a hex
12268 * digit then that digit must be escaped too to prevent it being
12269 * interpreted as part of the character code by C. */
12270 if ((!preserve_utf8 || (uint8_t)*buf < 0x80) &&
12271 (!isprint(*buf) || (last_hex_escape && isxdigit(*buf)))) {
12272 sprintf(dst, (use_hex ? "\\x%02x" : "\\%03o"), (uint8_t)*buf);
12273 is_hex_escape = use_hex;
12274 dst += 4;
12275 } else {
12276 *(dst++) = *buf; break;
12277 }
12278 }
12279 last_hex_escape = is_hex_escape;
12280 }
12281 /* Flush remaining data. */
12282 upb_bytessink_putbuf(p->output_, p->subc, dstbuf, dst - dstbuf, NULL);
12283 return 0;
12284 }
12285
putf(upb_textprinter * p,const char * fmt,...)12286 bool putf(upb_textprinter *p, const char *fmt, ...) {
12287 va_list args;
12288 va_list args_copy;
12289 char *str;
12290 int written;
12291 int len;
12292 bool ok;
12293
12294 va_start(args, fmt);
12295
12296 /* Run once to get the length of the string. */
12297 _upb_va_copy(args_copy, args);
12298 len = _upb_vsnprintf(NULL, 0, fmt, args_copy);
12299 va_end(args_copy);
12300
12301 /* + 1 for NULL terminator (vsprintf() requires it even if we don't). */
12302 str = upb_gmalloc(len + 1);
12303 if (!str) return false;
12304 written = vsprintf(str, fmt, args);
12305 va_end(args);
12306 UPB_ASSERT(written == len);
12307
12308 ok = upb_bytessink_putbuf(p->output_, p->subc, str, len, NULL);
12309 upb_gfree(str);
12310 return ok;
12311 }
12312
12313
12314 /* handlers *******************************************************************/
12315
textprinter_startmsg(void * c,const void * hd)12316 static bool textprinter_startmsg(void *c, const void *hd) {
12317 upb_textprinter *p = c;
12318 UPB_UNUSED(hd);
12319 if (p->indent_depth_ == 0) {
12320 upb_bytessink_start(p->output_, 0, &p->subc);
12321 }
12322 return true;
12323 }
12324
textprinter_endmsg(void * c,const void * hd,upb_status * s)12325 static bool textprinter_endmsg(void *c, const void *hd, upb_status *s) {
12326 upb_textprinter *p = c;
12327 UPB_UNUSED(hd);
12328 UPB_UNUSED(s);
12329 if (p->indent_depth_ == 0) {
12330 upb_bytessink_end(p->output_);
12331 }
12332 return true;
12333 }
12334
12335 #define TYPE(name, ctype, fmt) \
12336 static bool textprinter_put ## name(void *closure, const void *handler_data, \
12337 ctype val) { \
12338 upb_textprinter *p = closure; \
12339 const upb_fielddef *f = handler_data; \
12340 CHECK(indent(p)); \
12341 putf(p, "%s: " fmt, upb_fielddef_name(f), val); \
12342 CHECK(endfield(p)); \
12343 return true; \
12344 err: \
12345 return false; \
12346 }
12347
textprinter_putbool(void * closure,const void * handler_data,bool val)12348 static bool textprinter_putbool(void *closure, const void *handler_data,
12349 bool val) {
12350 upb_textprinter *p = closure;
12351 const upb_fielddef *f = handler_data;
12352 CHECK(indent(p));
12353 putf(p, "%s: %s", upb_fielddef_name(f), val ? "true" : "false");
12354 CHECK(endfield(p));
12355 return true;
12356 err:
12357 return false;
12358 }
12359
12360 #define STRINGIFY_HELPER(x) #x
12361 #define STRINGIFY_MACROVAL(x) STRINGIFY_HELPER(x)
12362
12363 TYPE(int32, int32_t, "%" PRId32)
12364 TYPE(int64, int64_t, "%" PRId64)
12365 TYPE(uint32, uint32_t, "%" PRIu32)
12366 TYPE(uint64, uint64_t, "%" PRIu64)
STRINGIFY_MACROVAL(FLT_DIG)12367 TYPE(float, float, "%." STRINGIFY_MACROVAL(FLT_DIG) "g")
12368 TYPE(double, double, "%." STRINGIFY_MACROVAL(DBL_DIG) "g")
12369
12370 #undef TYPE
12371
12372 /* Output a symbolic value from the enum if found, else just print as int32. */
12373 static bool textprinter_putenum(void *closure, const void *handler_data,
12374 int32_t val) {
12375 upb_textprinter *p = closure;
12376 const upb_fielddef *f = handler_data;
12377 const upb_enumdef *enum_def = upb_downcast_enumdef(upb_fielddef_subdef(f));
12378 const char *label = upb_enumdef_iton(enum_def, val);
12379 if (label) {
12380 indent(p);
12381 putf(p, "%s: %s", upb_fielddef_name(f), label);
12382 endfield(p);
12383 } else {
12384 if (!textprinter_putint32(closure, handler_data, val))
12385 return false;
12386 }
12387 return true;
12388 }
12389
textprinter_startstr(void * closure,const void * handler_data,size_t size_hint)12390 static void *textprinter_startstr(void *closure, const void *handler_data,
12391 size_t size_hint) {
12392 upb_textprinter *p = closure;
12393 const upb_fielddef *f = handler_data;
12394 UPB_UNUSED(size_hint);
12395 indent(p);
12396 putf(p, "%s: \"", upb_fielddef_name(f));
12397 return p;
12398 }
12399
textprinter_endstr(void * closure,const void * handler_data)12400 static bool textprinter_endstr(void *closure, const void *handler_data) {
12401 upb_textprinter *p = closure;
12402 UPB_UNUSED(handler_data);
12403 putf(p, "\"");
12404 endfield(p);
12405 return true;
12406 }
12407
textprinter_putstr(void * closure,const void * hd,const char * buf,size_t len,const upb_bufhandle * handle)12408 static size_t textprinter_putstr(void *closure, const void *hd, const char *buf,
12409 size_t len, const upb_bufhandle *handle) {
12410 upb_textprinter *p = closure;
12411 const upb_fielddef *f = hd;
12412 UPB_UNUSED(handle);
12413 CHECK(putescaped(p, buf, len, upb_fielddef_type(f) == UPB_TYPE_STRING));
12414 return len;
12415 err:
12416 return 0;
12417 }
12418
textprinter_startsubmsg(void * closure,const void * handler_data)12419 static void *textprinter_startsubmsg(void *closure, const void *handler_data) {
12420 upb_textprinter *p = closure;
12421 const char *name = handler_data;
12422 CHECK(indent(p));
12423 putf(p, "%s {%c", name, p->single_line_ ? ' ' : '\n');
12424 p->indent_depth_++;
12425 return p;
12426 err:
12427 return UPB_BREAK;
12428 }
12429
textprinter_endsubmsg(void * closure,const void * handler_data)12430 static bool textprinter_endsubmsg(void *closure, const void *handler_data) {
12431 upb_textprinter *p = closure;
12432 UPB_UNUSED(handler_data);
12433 p->indent_depth_--;
12434 CHECK(indent(p));
12435 upb_bytessink_putbuf(p->output_, p->subc, "}", 1, NULL);
12436 CHECK(endfield(p));
12437 return true;
12438 err:
12439 return false;
12440 }
12441
onmreg(const void * c,upb_handlers * h)12442 static void onmreg(const void *c, upb_handlers *h) {
12443 const upb_msgdef *m = upb_handlers_msgdef(h);
12444 upb_msg_field_iter i;
12445 UPB_UNUSED(c);
12446
12447 upb_handlers_setstartmsg(h, textprinter_startmsg, NULL);
12448 upb_handlers_setendmsg(h, textprinter_endmsg, NULL);
12449
12450 for(upb_msg_field_begin(&i, m);
12451 !upb_msg_field_done(&i);
12452 upb_msg_field_next(&i)) {
12453 upb_fielddef *f = upb_msg_iter_field(&i);
12454 upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER;
12455 upb_handlerattr_sethandlerdata(&attr, f);
12456 switch (upb_fielddef_type(f)) {
12457 case UPB_TYPE_INT32:
12458 upb_handlers_setint32(h, f, textprinter_putint32, &attr);
12459 break;
12460 case UPB_TYPE_INT64:
12461 upb_handlers_setint64(h, f, textprinter_putint64, &attr);
12462 break;
12463 case UPB_TYPE_UINT32:
12464 upb_handlers_setuint32(h, f, textprinter_putuint32, &attr);
12465 break;
12466 case UPB_TYPE_UINT64:
12467 upb_handlers_setuint64(h, f, textprinter_putuint64, &attr);
12468 break;
12469 case UPB_TYPE_FLOAT:
12470 upb_handlers_setfloat(h, f, textprinter_putfloat, &attr);
12471 break;
12472 case UPB_TYPE_DOUBLE:
12473 upb_handlers_setdouble(h, f, textprinter_putdouble, &attr);
12474 break;
12475 case UPB_TYPE_BOOL:
12476 upb_handlers_setbool(h, f, textprinter_putbool, &attr);
12477 break;
12478 case UPB_TYPE_STRING:
12479 case UPB_TYPE_BYTES:
12480 upb_handlers_setstartstr(h, f, textprinter_startstr, &attr);
12481 upb_handlers_setstring(h, f, textprinter_putstr, &attr);
12482 upb_handlers_setendstr(h, f, textprinter_endstr, &attr);
12483 break;
12484 case UPB_TYPE_MESSAGE: {
12485 const char *name =
12486 upb_fielddef_istagdelim(f)
12487 ? shortname(upb_msgdef_fullname(upb_fielddef_msgsubdef(f)))
12488 : upb_fielddef_name(f);
12489 upb_handlerattr_sethandlerdata(&attr, name);
12490 upb_handlers_setstartsubmsg(h, f, textprinter_startsubmsg, &attr);
12491 upb_handlers_setendsubmsg(h, f, textprinter_endsubmsg, &attr);
12492 break;
12493 }
12494 case UPB_TYPE_ENUM:
12495 upb_handlers_setint32(h, f, textprinter_putenum, &attr);
12496 break;
12497 }
12498 }
12499 }
12500
textprinter_reset(upb_textprinter * p,bool single_line)12501 static void textprinter_reset(upb_textprinter *p, bool single_line) {
12502 p->single_line_ = single_line;
12503 p->indent_depth_ = 0;
12504 }
12505
12506
12507 /* Public API *****************************************************************/
12508
upb_textprinter_create(upb_env * env,const upb_handlers * h,upb_bytessink * output)12509 upb_textprinter *upb_textprinter_create(upb_env *env, const upb_handlers *h,
12510 upb_bytessink *output) {
12511 upb_textprinter *p = upb_env_malloc(env, sizeof(upb_textprinter));
12512 if (!p) return NULL;
12513
12514 p->output_ = output;
12515 upb_sink_reset(&p->input_, h, p);
12516 textprinter_reset(p, false);
12517
12518 return p;
12519 }
12520
upb_textprinter_newhandlers(const upb_msgdef * m,const void * owner)12521 const upb_handlers *upb_textprinter_newhandlers(const upb_msgdef *m,
12522 const void *owner) {
12523 return upb_handlers_newfrozen(m, owner, &onmreg, NULL);
12524 }
12525
upb_textprinter_input(upb_textprinter * p)12526 upb_sink *upb_textprinter_input(upb_textprinter *p) { return &p->input_; }
12527
upb_textprinter_setsingleline(upb_textprinter * p,bool single_line)12528 void upb_textprinter_setsingleline(upb_textprinter *p, bool single_line) {
12529 p->single_line_ = single_line;
12530 }
12531
12532
12533 /* Index is descriptor type. */
12534 const uint8_t upb_pb_native_wire_types[] = {
12535 UPB_WIRE_TYPE_END_GROUP, /* ENDGROUP */
12536 UPB_WIRE_TYPE_64BIT, /* DOUBLE */
12537 UPB_WIRE_TYPE_32BIT, /* FLOAT */
12538 UPB_WIRE_TYPE_VARINT, /* INT64 */
12539 UPB_WIRE_TYPE_VARINT, /* UINT64 */
12540 UPB_WIRE_TYPE_VARINT, /* INT32 */
12541 UPB_WIRE_TYPE_64BIT, /* FIXED64 */
12542 UPB_WIRE_TYPE_32BIT, /* FIXED32 */
12543 UPB_WIRE_TYPE_VARINT, /* BOOL */
12544 UPB_WIRE_TYPE_DELIMITED, /* STRING */
12545 UPB_WIRE_TYPE_START_GROUP, /* GROUP */
12546 UPB_WIRE_TYPE_DELIMITED, /* MESSAGE */
12547 UPB_WIRE_TYPE_DELIMITED, /* BYTES */
12548 UPB_WIRE_TYPE_VARINT, /* UINT32 */
12549 UPB_WIRE_TYPE_VARINT, /* ENUM */
12550 UPB_WIRE_TYPE_32BIT, /* SFIXED32 */
12551 UPB_WIRE_TYPE_64BIT, /* SFIXED64 */
12552 UPB_WIRE_TYPE_VARINT, /* SINT32 */
12553 UPB_WIRE_TYPE_VARINT, /* SINT64 */
12554 };
12555
12556 /* A basic branch-based decoder, uses 32-bit values to get good performance
12557 * on 32-bit architectures (but performs well on 64-bits also).
12558 * This scheme comes from the original Google Protobuf implementation
12559 * (proto2). */
upb_vdecode_max8_branch32(upb_decoderet r)12560 upb_decoderet upb_vdecode_max8_branch32(upb_decoderet r) {
12561 upb_decoderet err = {NULL, 0};
12562 const char *p = r.p;
12563 uint32_t low = (uint32_t)r.val;
12564 uint32_t high = 0;
12565 uint32_t b;
12566 b = *(p++); low |= (b & 0x7fU) << 14; if (!(b & 0x80)) goto done;
12567 b = *(p++); low |= (b & 0x7fU) << 21; if (!(b & 0x80)) goto done;
12568 b = *(p++); low |= (b & 0x7fU) << 28;
12569 high = (b & 0x7fU) >> 4; if (!(b & 0x80)) goto done;
12570 b = *(p++); high |= (b & 0x7fU) << 3; if (!(b & 0x80)) goto done;
12571 b = *(p++); high |= (b & 0x7fU) << 10; if (!(b & 0x80)) goto done;
12572 b = *(p++); high |= (b & 0x7fU) << 17; if (!(b & 0x80)) goto done;
12573 b = *(p++); high |= (b & 0x7fU) << 24; if (!(b & 0x80)) goto done;
12574 b = *(p++); high |= (b & 0x7fU) << 31; if (!(b & 0x80)) goto done;
12575 return err;
12576
12577 done:
12578 r.val = ((uint64_t)high << 32) | low;
12579 r.p = p;
12580 return r;
12581 }
12582
12583 /* Like the previous, but uses 64-bit values. */
upb_vdecode_max8_branch64(upb_decoderet r)12584 upb_decoderet upb_vdecode_max8_branch64(upb_decoderet r) {
12585 const char *p = r.p;
12586 uint64_t val = r.val;
12587 uint64_t b;
12588 upb_decoderet err = {NULL, 0};
12589 b = *(p++); val |= (b & 0x7fU) << 14; if (!(b & 0x80)) goto done;
12590 b = *(p++); val |= (b & 0x7fU) << 21; if (!(b & 0x80)) goto done;
12591 b = *(p++); val |= (b & 0x7fU) << 28; if (!(b & 0x80)) goto done;
12592 b = *(p++); val |= (b & 0x7fU) << 35; if (!(b & 0x80)) goto done;
12593 b = *(p++); val |= (b & 0x7fU) << 42; if (!(b & 0x80)) goto done;
12594 b = *(p++); val |= (b & 0x7fU) << 49; if (!(b & 0x80)) goto done;
12595 b = *(p++); val |= (b & 0x7fU) << 56; if (!(b & 0x80)) goto done;
12596 b = *(p++); val |= (b & 0x7fU) << 63; if (!(b & 0x80)) goto done;
12597 return err;
12598
12599 done:
12600 r.val = val;
12601 r.p = p;
12602 return r;
12603 }
12604
12605 #line 1 "upb/json/parser.rl"
12606 /*
12607 ** upb::json::Parser (upb_json_parser)
12608 **
12609 ** A parser that uses the Ragel State Machine Compiler to generate
12610 ** the finite automata.
12611 **
12612 ** Ragel only natively handles regular languages, but we can manually
12613 ** program it a bit to handle context-free languages like JSON, by using
12614 ** the "fcall" and "fret" constructs.
12615 **
12616 ** This parser can handle the basics, but needs several things to be fleshed
12617 ** out:
12618 **
12619 ** - handling of unicode escape sequences (including high surrogate pairs).
12620 ** - properly check and report errors for unknown fields, stack overflow,
12621 ** improper array nesting (or lack of nesting).
12622 ** - handling of base64 sequences with padding characters.
12623 ** - handling of push-back (non-success returns from sink functions).
12624 ** - handling of keys/escape-sequences/etc that span input buffers.
12625 */
12626
12627 #include <ctype.h>
12628 #include <errno.h>
12629 #include <float.h>
12630 #include <math.h>
12631 #include <stdint.h>
12632 #include <stdio.h>
12633 #include <stdlib.h>
12634 #include <string.h>
12635
12636 #include <time.h>
12637
12638
12639 #define UPB_JSON_MAX_DEPTH 64
12640
12641 /* Type of value message */
12642 enum {
12643 VALUE_NULLVALUE = 0,
12644 VALUE_NUMBERVALUE = 1,
12645 VALUE_STRINGVALUE = 2,
12646 VALUE_BOOLVALUE = 3,
12647 VALUE_STRUCTVALUE = 4,
12648 VALUE_LISTVALUE = 5
12649 };
12650
12651 /* Forward declare */
12652 static bool is_top_level(upb_json_parser *p);
12653 static bool is_wellknown_msg(upb_json_parser *p, upb_wellknowntype_t type);
12654 static bool is_wellknown_field(upb_json_parser *p, upb_wellknowntype_t type);
12655
12656 static bool is_number_wrapper_object(upb_json_parser *p);
12657 static bool does_number_wrapper_start(upb_json_parser *p);
12658 static bool does_number_wrapper_end(upb_json_parser *p);
12659
12660 static bool is_string_wrapper_object(upb_json_parser *p);
12661 static bool does_string_wrapper_start(upb_json_parser *p);
12662 static bool does_string_wrapper_end(upb_json_parser *p);
12663
12664 static bool is_fieldmask_object(upb_json_parser *p);
12665 static bool does_fieldmask_start(upb_json_parser *p);
12666 static bool does_fieldmask_end(upb_json_parser *p);
12667 static void start_fieldmask_object(upb_json_parser *p);
12668 static void end_fieldmask_object(upb_json_parser *p);
12669
12670 static void start_wrapper_object(upb_json_parser *p);
12671 static void end_wrapper_object(upb_json_parser *p);
12672
12673 static void start_value_object(upb_json_parser *p, int value_type);
12674 static void end_value_object(upb_json_parser *p);
12675
12676 static void start_listvalue_object(upb_json_parser *p);
12677 static void end_listvalue_object(upb_json_parser *p);
12678
12679 static void start_structvalue_object(upb_json_parser *p);
12680 static void end_structvalue_object(upb_json_parser *p);
12681
12682 static void start_object(upb_json_parser *p);
12683 static void end_object(upb_json_parser *p);
12684
12685 static void start_any_object(upb_json_parser *p, const char *ptr);
12686 static bool end_any_object(upb_json_parser *p, const char *ptr);
12687
12688 static bool start_subobject(upb_json_parser *p);
12689 static void end_subobject(upb_json_parser *p);
12690
12691 static void start_member(upb_json_parser *p);
12692 static void end_member(upb_json_parser *p);
12693 static bool end_membername(upb_json_parser *p);
12694
12695 static void start_any_member(upb_json_parser *p, const char *ptr);
12696 static void end_any_member(upb_json_parser *p, const char *ptr);
12697 static bool end_any_membername(upb_json_parser *p);
12698
12699 size_t parse(void *closure, const void *hd, const char *buf, size_t size,
12700 const upb_bufhandle *handle);
12701 static bool end(void *closure, const void *hd);
12702
12703 static const char eof_ch = 'e';
12704
12705 /* stringsink */
12706 typedef struct {
12707 upb_byteshandler handler;
12708 upb_bytessink sink;
12709 char *ptr;
12710 size_t len, size;
12711 } upb_stringsink;
12712
12713
stringsink_start(void * _sink,const void * hd,size_t size_hint)12714 static void *stringsink_start(void *_sink, const void *hd, size_t size_hint) {
12715 upb_stringsink *sink = _sink;
12716 sink->len = 0;
12717 UPB_UNUSED(hd);
12718 UPB_UNUSED(size_hint);
12719 return sink;
12720 }
12721
stringsink_string(void * _sink,const void * hd,const char * ptr,size_t len,const upb_bufhandle * handle)12722 static size_t stringsink_string(void *_sink, const void *hd, const char *ptr,
12723 size_t len, const upb_bufhandle *handle) {
12724 upb_stringsink *sink = _sink;
12725 size_t new_size = sink->size;
12726
12727 UPB_UNUSED(hd);
12728 UPB_UNUSED(handle);
12729
12730 while (sink->len + len > new_size) {
12731 new_size *= 2;
12732 }
12733
12734 if (new_size != sink->size) {
12735 sink->ptr = realloc(sink->ptr, new_size);
12736 sink->size = new_size;
12737 }
12738
12739 memcpy(sink->ptr + sink->len, ptr, len);
12740 sink->len += len;
12741
12742 return len;
12743 }
12744
upb_stringsink_init(upb_stringsink * sink)12745 void upb_stringsink_init(upb_stringsink *sink) {
12746 upb_byteshandler_init(&sink->handler);
12747 upb_byteshandler_setstartstr(&sink->handler, stringsink_start, NULL);
12748 upb_byteshandler_setstring(&sink->handler, stringsink_string, NULL);
12749
12750 upb_bytessink_reset(&sink->sink, &sink->handler, sink);
12751
12752 sink->size = 32;
12753 sink->ptr = malloc(sink->size);
12754 sink->len = 0;
12755 }
12756
upb_stringsink_uninit(upb_stringsink * sink)12757 void upb_stringsink_uninit(upb_stringsink *sink) { free(sink->ptr); }
12758
12759 typedef struct {
12760 /* For encoding Any value field in binary format. */
12761 const upb_handlers *encoder_handlers;
12762 upb_pb_encoder *encoder;
12763 upb_stringsink stringsink;
12764
12765 /* For decoding Any value field in json format. */
12766 upb_json_parsermethod *parser_method;
12767 upb_json_parser* parser;
12768 upb_sink sink;
12769
12770 /* Mark the range of uninterpreted values in json input before type url. */
12771 const char *before_type_url_start;
12772 const char *before_type_url_end;
12773
12774 /* Mark the range of uninterpreted values in json input after type url. */
12775 const char *after_type_url_start;
12776 } upb_jsonparser_any_frame;
12777
12778 typedef struct {
12779 upb_sink sink;
12780
12781 /* The current message in which we're parsing, and the field whose value we're
12782 * expecting next. */
12783 const upb_msgdef *m;
12784 const upb_fielddef *f;
12785
12786 /* The table mapping json name to fielddef for this message. */
12787 upb_strtable *name_table;
12788
12789 /* We are in a repeated-field context. We need this flag to decide whether to
12790 * handle the array as a normal repeated field or a
12791 * google.protobuf.ListValue/google.protobuf.Value. */
12792 bool is_repeated;
12793
12794 /* We are in a repeated-field context, ready to emit mapentries as
12795 * submessages. This flag alters the start-of-object (open-brace) behavior to
12796 * begin a sequence of mapentry messages rather than a single submessage. */
12797 bool is_map;
12798
12799 /* We are in a map-entry message context. This flag is set when parsing the
12800 * value field of a single map entry and indicates to all value-field parsers
12801 * (subobjects, strings, numbers, and bools) that the map-entry submessage
12802 * should end as soon as the value is parsed. */
12803 bool is_mapentry;
12804
12805 /* If |is_map| or |is_mapentry| is true, |mapfield| refers to the parent
12806 * message's map field that we're currently parsing. This differs from |f|
12807 * because |f| is the field in the *current* message (i.e., the map-entry
12808 * message itself), not the parent's field that leads to this map. */
12809 const upb_fielddef *mapfield;
12810
12811 /* We are in an Any message context. This flag is set when parsing the Any
12812 * message and indicates to all field parsers (subobjects, strings, numbers,
12813 * and bools) that the parsed field should be serialized as binary data or
12814 * cached (type url not found yet). */
12815 bool is_any;
12816
12817 /* The type of packed message in Any. */
12818 upb_jsonparser_any_frame *any_frame;
12819
12820 /* True if the field to be parsed is unknown. */
12821 bool is_unknown_field;
12822 } upb_jsonparser_frame;
12823
init_frame(upb_jsonparser_frame * frame)12824 static void init_frame(upb_jsonparser_frame* frame) {
12825 frame->m = NULL;
12826 frame->f = NULL;
12827 frame->name_table = NULL;
12828 frame->is_repeated = false;
12829 frame->is_map = false;
12830 frame->is_mapentry = false;
12831 frame->mapfield = NULL;
12832 frame->is_any = false;
12833 frame->any_frame = NULL;
12834 frame->is_unknown_field = false;
12835 }
12836
12837 struct upb_json_parser {
12838 upb_env *env;
12839 const upb_json_parsermethod *method;
12840 upb_bytessink input_;
12841
12842 /* Stack to track the JSON scopes we are in. */
12843 upb_jsonparser_frame stack[UPB_JSON_MAX_DEPTH];
12844 upb_jsonparser_frame *top;
12845 upb_jsonparser_frame *limit;
12846
12847 upb_status status;
12848
12849 /* Ragel's internal parsing stack for the parsing state machine. */
12850 int current_state;
12851 int parser_stack[UPB_JSON_MAX_DEPTH];
12852 int parser_top;
12853
12854 /* The handle for the current buffer. */
12855 const upb_bufhandle *handle;
12856
12857 /* Accumulate buffer. See details in parser.rl. */
12858 const char *accumulated;
12859 size_t accumulated_len;
12860 char *accumulate_buf;
12861 size_t accumulate_buf_size;
12862
12863 /* Multi-part text data. See details in parser.rl. */
12864 int multipart_state;
12865 upb_selector_t string_selector;
12866
12867 /* Input capture. See details in parser.rl. */
12868 const char *capture;
12869
12870 /* Intermediate result of parsing a unicode escape sequence. */
12871 uint32_t digit;
12872
12873 /* For resolve type url in Any. */
12874 const upb_symtab *symtab;
12875
12876 /* Whether to proceed if unknown field is met. */
12877 bool ignore_json_unknown;
12878
12879 /* Cache for parsing timestamp due to base and zone are handled in different
12880 * handlers. */
12881 struct tm tm;
12882 };
12883
start_jsonparser_frame(upb_json_parser * p)12884 static upb_jsonparser_frame* start_jsonparser_frame(upb_json_parser *p) {
12885 upb_jsonparser_frame *inner;
12886 inner = p->top + 1;
12887 init_frame(inner);
12888 return inner;
12889 }
12890
12891 struct upb_json_parsermethod {
12892 upb_refcounted base;
12893
12894 upb_byteshandler input_handler_;
12895
12896 /* Mainly for the purposes of refcounting, so all the fielddefs we point
12897 * to stay alive. */
12898 const upb_msgdef *msg;
12899
12900 /* Keys are upb_msgdef*, values are upb_strtable (json_name -> fielddef) */
12901 upb_inttable name_tables;
12902 };
12903
12904 #define PARSER_CHECK_RETURN(x) if (!(x)) return false
12905
json_parser_any_frame_reset(upb_jsonparser_any_frame * frame)12906 static void json_parser_any_frame_reset(upb_jsonparser_any_frame *frame) {
12907 frame->encoder_handlers = NULL;
12908 frame->encoder = NULL;
12909 frame->parser_method = NULL;
12910 frame->parser = NULL;
12911 frame->before_type_url_start = NULL;
12912 frame->before_type_url_end = NULL;
12913 frame->after_type_url_start = NULL;
12914 }
12915
json_parser_any_frame_set_payload_type(upb_json_parser * p,upb_jsonparser_any_frame * frame,const upb_msgdef * payload_type)12916 static void json_parser_any_frame_set_payload_type(
12917 upb_json_parser *p,
12918 upb_jsonparser_any_frame *frame,
12919 const upb_msgdef *payload_type) {
12920 /* Initialize encoder. */
12921 frame->encoder_handlers =
12922 upb_pb_encoder_newhandlers(payload_type, &frame->encoder_handlers);
12923 upb_stringsink_init(&frame->stringsink);
12924 frame->encoder =
12925 upb_pb_encoder_create(
12926 p->env, frame->encoder_handlers,
12927 &frame->stringsink.sink);
12928
12929 /* Initialize parser. */
12930 frame->parser_method =
12931 upb_json_parsermethod_new(payload_type, &frame->parser_method);
12932 upb_sink_reset(&frame->sink, frame->encoder_handlers, frame->encoder);
12933 frame->parser =
12934 upb_json_parser_create(p->env, frame->parser_method, p->symtab,
12935 &frame->sink, p->ignore_json_unknown);
12936 }
12937
json_parser_any_frame_free(upb_jsonparser_any_frame * frame)12938 static void json_parser_any_frame_free(upb_jsonparser_any_frame *frame) {
12939 upb_handlers_unref(frame->encoder_handlers,
12940 &frame->encoder_handlers);
12941 upb_json_parsermethod_unref(frame->parser_method,
12942 &frame->parser_method);
12943 upb_stringsink_uninit(&frame->stringsink);
12944 }
12945
json_parser_any_frame_has_type_url(upb_jsonparser_any_frame * frame)12946 static bool json_parser_any_frame_has_type_url(
12947 upb_jsonparser_any_frame *frame) {
12948 return frame->encoder != NULL;
12949 }
12950
json_parser_any_frame_has_value_before_type_url(upb_jsonparser_any_frame * frame)12951 static bool json_parser_any_frame_has_value_before_type_url(
12952 upb_jsonparser_any_frame *frame) {
12953 return frame->before_type_url_start != frame->before_type_url_end;
12954 }
12955
json_parser_any_frame_has_value_after_type_url(upb_jsonparser_any_frame * frame)12956 static bool json_parser_any_frame_has_value_after_type_url(
12957 upb_jsonparser_any_frame *frame) {
12958 return frame->after_type_url_start != NULL;
12959 }
12960
json_parser_any_frame_has_value(upb_jsonparser_any_frame * frame)12961 static bool json_parser_any_frame_has_value(
12962 upb_jsonparser_any_frame *frame) {
12963 return json_parser_any_frame_has_value_before_type_url(frame) ||
12964 json_parser_any_frame_has_value_after_type_url(frame);
12965 }
12966
json_parser_any_frame_set_before_type_url_end(upb_jsonparser_any_frame * frame,const char * ptr)12967 static void json_parser_any_frame_set_before_type_url_end(
12968 upb_jsonparser_any_frame *frame,
12969 const char *ptr) {
12970 if (frame->encoder == NULL) {
12971 frame->before_type_url_end = ptr;
12972 }
12973 }
12974
json_parser_any_frame_set_after_type_url_start_once(upb_jsonparser_any_frame * frame,const char * ptr)12975 static void json_parser_any_frame_set_after_type_url_start_once(
12976 upb_jsonparser_any_frame *frame,
12977 const char *ptr) {
12978 if (json_parser_any_frame_has_type_url(frame) &&
12979 frame->after_type_url_start == NULL) {
12980 frame->after_type_url_start = ptr;
12981 }
12982 }
12983
12984 /* Used to signal that a capture has been suspended. */
12985 static char suspend_capture;
12986
getsel_for_handlertype(upb_json_parser * p,upb_handlertype_t type)12987 static upb_selector_t getsel_for_handlertype(upb_json_parser *p,
12988 upb_handlertype_t type) {
12989 upb_selector_t sel;
12990 bool ok = upb_handlers_getselector(p->top->f, type, &sel);
12991 UPB_ASSERT(ok);
12992 return sel;
12993 }
12994
parser_getsel(upb_json_parser * p)12995 static upb_selector_t parser_getsel(upb_json_parser *p) {
12996 return getsel_for_handlertype(
12997 p, upb_handlers_getprimitivehandlertype(p->top->f));
12998 }
12999
check_stack(upb_json_parser * p)13000 static bool check_stack(upb_json_parser *p) {
13001 if ((p->top + 1) == p->limit) {
13002 upb_status_seterrmsg(&p->status, "Nesting too deep");
13003 upb_env_reporterror(p->env, &p->status);
13004 return false;
13005 }
13006
13007 return true;
13008 }
13009
set_name_table(upb_json_parser * p,upb_jsonparser_frame * frame)13010 static void set_name_table(upb_json_parser *p, upb_jsonparser_frame *frame) {
13011 upb_value v;
13012 bool ok = upb_inttable_lookupptr(&p->method->name_tables, frame->m, &v);
13013 UPB_ASSERT(ok);
13014 frame->name_table = upb_value_getptr(v);
13015 }
13016
13017 /* There are GCC/Clang built-ins for overflow checking which we could start
13018 * using if there was any performance benefit to it. */
13019
checked_add(size_t a,size_t b,size_t * c)13020 static bool checked_add(size_t a, size_t b, size_t *c) {
13021 if (SIZE_MAX - a < b) return false;
13022 *c = a + b;
13023 return true;
13024 }
13025
saturating_multiply(size_t a,size_t b)13026 static size_t saturating_multiply(size_t a, size_t b) {
13027 /* size_t is unsigned, so this is defined behavior even on overflow. */
13028 size_t ret = a * b;
13029 if (b != 0 && ret / b != a) {
13030 ret = SIZE_MAX;
13031 }
13032 return ret;
13033 }
13034
13035
13036 /* Base64 decoding ************************************************************/
13037
13038 /* TODO(haberman): make this streaming. */
13039
13040 static const signed char b64table[] = {
13041 -1, -1, -1, -1, -1, -1, -1, -1,
13042 -1, -1, -1, -1, -1, -1, -1, -1,
13043 -1, -1, -1, -1, -1, -1, -1, -1,
13044 -1, -1, -1, -1, -1, -1, -1, -1,
13045 -1, -1, -1, -1, -1, -1, -1, -1,
13046 -1, -1, -1, 62/*+*/, -1, -1, -1, 63/*/ */,
13047 52/*0*/, 53/*1*/, 54/*2*/, 55/*3*/, 56/*4*/, 57/*5*/, 58/*6*/, 59/*7*/,
13048 60/*8*/, 61/*9*/, -1, -1, -1, -1, -1, -1,
13049 -1, 0/*A*/, 1/*B*/, 2/*C*/, 3/*D*/, 4/*E*/, 5/*F*/, 6/*G*/,
13050 07/*H*/, 8/*I*/, 9/*J*/, 10/*K*/, 11/*L*/, 12/*M*/, 13/*N*/, 14/*O*/,
13051 15/*P*/, 16/*Q*/, 17/*R*/, 18/*S*/, 19/*T*/, 20/*U*/, 21/*V*/, 22/*W*/,
13052 23/*X*/, 24/*Y*/, 25/*Z*/, -1, -1, -1, -1, -1,
13053 -1, 26/*a*/, 27/*b*/, 28/*c*/, 29/*d*/, 30/*e*/, 31/*f*/, 32/*g*/,
13054 33/*h*/, 34/*i*/, 35/*j*/, 36/*k*/, 37/*l*/, 38/*m*/, 39/*n*/, 40/*o*/,
13055 41/*p*/, 42/*q*/, 43/*r*/, 44/*s*/, 45/*t*/, 46/*u*/, 47/*v*/, 48/*w*/,
13056 49/*x*/, 50/*y*/, 51/*z*/, -1, -1, -1, -1, -1,
13057 -1, -1, -1, -1, -1, -1, -1, -1,
13058 -1, -1, -1, -1, -1, -1, -1, -1,
13059 -1, -1, -1, -1, -1, -1, -1, -1,
13060 -1, -1, -1, -1, -1, -1, -1, -1,
13061 -1, -1, -1, -1, -1, -1, -1, -1,
13062 -1, -1, -1, -1, -1, -1, -1, -1,
13063 -1, -1, -1, -1, -1, -1, -1, -1,
13064 -1, -1, -1, -1, -1, -1, -1, -1,
13065 -1, -1, -1, -1, -1, -1, -1, -1,
13066 -1, -1, -1, -1, -1, -1, -1, -1,
13067 -1, -1, -1, -1, -1, -1, -1, -1,
13068 -1, -1, -1, -1, -1, -1, -1, -1,
13069 -1, -1, -1, -1, -1, -1, -1, -1,
13070 -1, -1, -1, -1, -1, -1, -1, -1,
13071 -1, -1, -1, -1, -1, -1, -1, -1,
13072 -1, -1, -1, -1, -1, -1, -1, -1
13073 };
13074
13075 /* Returns the table value sign-extended to 32 bits. Knowing that the upper
13076 * bits will be 1 for unrecognized characters makes it easier to check for
13077 * this error condition later (see below). */
b64lookup(unsigned char ch)13078 int32_t b64lookup(unsigned char ch) { return b64table[ch]; }
13079
13080 /* Returns true if the given character is not a valid base64 character or
13081 * padding. */
nonbase64(unsigned char ch)13082 bool nonbase64(unsigned char ch) { return b64lookup(ch) == -1 && ch != '='; }
13083
base64_push(upb_json_parser * p,upb_selector_t sel,const char * ptr,size_t len)13084 static bool base64_push(upb_json_parser *p, upb_selector_t sel, const char *ptr,
13085 size_t len) {
13086 const char *limit = ptr + len;
13087 for (; ptr < limit; ptr += 4) {
13088 uint32_t val;
13089 char output[3];
13090
13091 if (limit - ptr < 4) {
13092 upb_status_seterrf(&p->status,
13093 "Base64 input for bytes field not a multiple of 4: %s",
13094 upb_fielddef_name(p->top->f));
13095 upb_env_reporterror(p->env, &p->status);
13096 return false;
13097 }
13098
13099 val = b64lookup(ptr[0]) << 18 |
13100 b64lookup(ptr[1]) << 12 |
13101 b64lookup(ptr[2]) << 6 |
13102 b64lookup(ptr[3]);
13103
13104 /* Test the upper bit; returns true if any of the characters returned -1. */
13105 if (val & 0x80000000) {
13106 goto otherchar;
13107 }
13108
13109 output[0] = val >> 16;
13110 output[1] = (val >> 8) & 0xff;
13111 output[2] = val & 0xff;
13112 upb_sink_putstring(&p->top->sink, sel, output, 3, NULL);
13113 }
13114 return true;
13115
13116 otherchar:
13117 if (nonbase64(ptr[0]) || nonbase64(ptr[1]) || nonbase64(ptr[2]) ||
13118 nonbase64(ptr[3]) ) {
13119 upb_status_seterrf(&p->status,
13120 "Non-base64 characters in bytes field: %s",
13121 upb_fielddef_name(p->top->f));
13122 upb_env_reporterror(p->env, &p->status);
13123 return false;
13124 } if (ptr[2] == '=') {
13125 uint32_t val;
13126 char output;
13127
13128 /* Last group contains only two input bytes, one output byte. */
13129 if (ptr[0] == '=' || ptr[1] == '=' || ptr[3] != '=') {
13130 goto badpadding;
13131 }
13132
13133 val = b64lookup(ptr[0]) << 18 |
13134 b64lookup(ptr[1]) << 12;
13135
13136 UPB_ASSERT(!(val & 0x80000000));
13137 output = val >> 16;
13138 upb_sink_putstring(&p->top->sink, sel, &output, 1, NULL);
13139 return true;
13140 } else {
13141 uint32_t val;
13142 char output[2];
13143
13144 /* Last group contains only three input bytes, two output bytes. */
13145 if (ptr[0] == '=' || ptr[1] == '=' || ptr[2] == '=') {
13146 goto badpadding;
13147 }
13148
13149 val = b64lookup(ptr[0]) << 18 |
13150 b64lookup(ptr[1]) << 12 |
13151 b64lookup(ptr[2]) << 6;
13152
13153 output[0] = val >> 16;
13154 output[1] = (val >> 8) & 0xff;
13155 upb_sink_putstring(&p->top->sink, sel, output, 2, NULL);
13156 return true;
13157 }
13158
13159 badpadding:
13160 upb_status_seterrf(&p->status,
13161 "Incorrect base64 padding for field: %s (%.*s)",
13162 upb_fielddef_name(p->top->f),
13163 4, ptr);
13164 upb_env_reporterror(p->env, &p->status);
13165 return false;
13166 }
13167
13168
13169 /* Accumulate buffer **********************************************************/
13170
13171 /* Functionality for accumulating a buffer.
13172 *
13173 * Some parts of the parser need an entire value as a contiguous string. For
13174 * example, to look up a member name in a hash table, or to turn a string into
13175 * a number, the relevant library routines need the input string to be in
13176 * contiguous memory, even if the value spanned two or more buffers in the
13177 * input. These routines handle that.
13178 *
13179 * In the common case we can just point to the input buffer to get this
13180 * contiguous string and avoid any actual copy. So we optimistically begin
13181 * this way. But there are a few cases where we must instead copy into a
13182 * separate buffer:
13183 *
13184 * 1. The string was not contiguous in the input (it spanned buffers).
13185 *
13186 * 2. The string included escape sequences that need to be interpreted to get
13187 * the true value in a contiguous buffer. */
13188
assert_accumulate_empty(upb_json_parser * p)13189 static void assert_accumulate_empty(upb_json_parser *p) {
13190 UPB_ASSERT(p->accumulated == NULL);
13191 UPB_ASSERT(p->accumulated_len == 0);
13192 }
13193
accumulate_clear(upb_json_parser * p)13194 static void accumulate_clear(upb_json_parser *p) {
13195 p->accumulated = NULL;
13196 p->accumulated_len = 0;
13197 }
13198
13199 /* Used internally by accumulate_append(). */
accumulate_realloc(upb_json_parser * p,size_t need)13200 static bool accumulate_realloc(upb_json_parser *p, size_t need) {
13201 void *mem;
13202 size_t old_size = p->accumulate_buf_size;
13203 size_t new_size = UPB_MAX(old_size, 128);
13204 while (new_size < need) {
13205 new_size = saturating_multiply(new_size, 2);
13206 }
13207
13208 mem = upb_env_realloc(p->env, p->accumulate_buf, old_size, new_size);
13209 if (!mem) {
13210 upb_status_seterrmsg(&p->status, "Out of memory allocating buffer.");
13211 upb_env_reporterror(p->env, &p->status);
13212 return false;
13213 }
13214
13215 p->accumulate_buf = mem;
13216 p->accumulate_buf_size = new_size;
13217 return true;
13218 }
13219
13220 /* Logically appends the given data to the append buffer.
13221 * If "can_alias" is true, we will try to avoid actually copying, but the buffer
13222 * must be valid until the next accumulate_append() call (if any). */
accumulate_append(upb_json_parser * p,const char * buf,size_t len,bool can_alias)13223 static bool accumulate_append(upb_json_parser *p, const char *buf, size_t len,
13224 bool can_alias) {
13225 size_t need;
13226
13227 if (!p->accumulated && can_alias) {
13228 p->accumulated = buf;
13229 p->accumulated_len = len;
13230 return true;
13231 }
13232
13233 if (!checked_add(p->accumulated_len, len, &need)) {
13234 upb_status_seterrmsg(&p->status, "Integer overflow.");
13235 upb_env_reporterror(p->env, &p->status);
13236 return false;
13237 }
13238
13239 if (need > p->accumulate_buf_size && !accumulate_realloc(p, need)) {
13240 return false;
13241 }
13242
13243 if (p->accumulated != p->accumulate_buf) {
13244 memcpy(p->accumulate_buf, p->accumulated, p->accumulated_len);
13245 p->accumulated = p->accumulate_buf;
13246 }
13247
13248 memcpy(p->accumulate_buf + p->accumulated_len, buf, len);
13249 p->accumulated_len += len;
13250 return true;
13251 }
13252
13253 /* Returns a pointer to the data accumulated since the last accumulate_clear()
13254 * call, and writes the length to *len. This with point either to the input
13255 * buffer or a temporary accumulate buffer. */
accumulate_getptr(upb_json_parser * p,size_t * len)13256 static const char *accumulate_getptr(upb_json_parser *p, size_t *len) {
13257 UPB_ASSERT(p->accumulated);
13258 *len = p->accumulated_len;
13259 return p->accumulated;
13260 }
13261
13262
13263 /* Mult-part text data ********************************************************/
13264
13265 /* When we have text data in the input, it can often come in multiple segments.
13266 * For example, there may be some raw string data followed by an escape
13267 * sequence. The two segments are processed with different logic. Also buffer
13268 * seams in the input can cause multiple segments.
13269 *
13270 * As we see segments, there are two main cases for how we want to process them:
13271 *
13272 * 1. we want to push the captured input directly to string handlers.
13273 *
13274 * 2. we need to accumulate all the parts into a contiguous buffer for further
13275 * processing (field name lookup, string->number conversion, etc). */
13276
13277 /* This is the set of states for p->multipart_state. */
13278 enum {
13279 /* We are not currently processing multipart data. */
13280 MULTIPART_INACTIVE = 0,
13281
13282 /* We are processing multipart data by accumulating it into a contiguous
13283 * buffer. */
13284 MULTIPART_ACCUMULATE = 1,
13285
13286 /* We are processing multipart data by pushing each part directly to the
13287 * current string handlers. */
13288 MULTIPART_PUSHEAGERLY = 2
13289 };
13290
13291 /* Start a multi-part text value where we accumulate the data for processing at
13292 * the end. */
multipart_startaccum(upb_json_parser * p)13293 static void multipart_startaccum(upb_json_parser *p) {
13294 assert_accumulate_empty(p);
13295 UPB_ASSERT(p->multipart_state == MULTIPART_INACTIVE);
13296 p->multipart_state = MULTIPART_ACCUMULATE;
13297 }
13298
13299 /* Start a multi-part text value where we immediately push text data to a string
13300 * value with the given selector. */
multipart_start(upb_json_parser * p,upb_selector_t sel)13301 static void multipart_start(upb_json_parser *p, upb_selector_t sel) {
13302 assert_accumulate_empty(p);
13303 UPB_ASSERT(p->multipart_state == MULTIPART_INACTIVE);
13304 p->multipart_state = MULTIPART_PUSHEAGERLY;
13305 p->string_selector = sel;
13306 }
13307
multipart_text(upb_json_parser * p,const char * buf,size_t len,bool can_alias)13308 static bool multipart_text(upb_json_parser *p, const char *buf, size_t len,
13309 bool can_alias) {
13310 switch (p->multipart_state) {
13311 case MULTIPART_INACTIVE:
13312 upb_status_seterrmsg(
13313 &p->status, "Internal error: unexpected state MULTIPART_INACTIVE");
13314 upb_env_reporterror(p->env, &p->status);
13315 return false;
13316
13317 case MULTIPART_ACCUMULATE:
13318 if (!accumulate_append(p, buf, len, can_alias)) {
13319 return false;
13320 }
13321 break;
13322
13323 case MULTIPART_PUSHEAGERLY: {
13324 const upb_bufhandle *handle = can_alias ? p->handle : NULL;
13325 upb_sink_putstring(&p->top->sink, p->string_selector, buf, len, handle);
13326 break;
13327 }
13328 }
13329
13330 return true;
13331 }
13332
13333 /* Note: this invalidates the accumulate buffer! Call only after reading its
13334 * contents. */
multipart_end(upb_json_parser * p)13335 static void multipart_end(upb_json_parser *p) {
13336 UPB_ASSERT(p->multipart_state != MULTIPART_INACTIVE);
13337 p->multipart_state = MULTIPART_INACTIVE;
13338 accumulate_clear(p);
13339 }
13340
13341
13342 /* Input capture **************************************************************/
13343
13344 /* Functionality for capturing a region of the input as text. Gracefully
13345 * handles the case where a buffer seam occurs in the middle of the captured
13346 * region. */
13347
capture_begin(upb_json_parser * p,const char * ptr)13348 static void capture_begin(upb_json_parser *p, const char *ptr) {
13349 UPB_ASSERT(p->multipart_state != MULTIPART_INACTIVE);
13350 UPB_ASSERT(p->capture == NULL);
13351 p->capture = ptr;
13352 }
13353
capture_end(upb_json_parser * p,const char * ptr)13354 static bool capture_end(upb_json_parser *p, const char *ptr) {
13355 UPB_ASSERT(p->capture);
13356 if (multipart_text(p, p->capture, ptr - p->capture, true)) {
13357 p->capture = NULL;
13358 return true;
13359 } else {
13360 return false;
13361 }
13362 }
13363
13364 /* This is called at the end of each input buffer (ie. when we have hit a
13365 * buffer seam). If we are in the middle of capturing the input, this
13366 * processes the unprocessed capture region. */
capture_suspend(upb_json_parser * p,const char ** ptr)13367 static void capture_suspend(upb_json_parser *p, const char **ptr) {
13368 if (!p->capture) return;
13369
13370 if (multipart_text(p, p->capture, *ptr - p->capture, false)) {
13371 /* We use this as a signal that we were in the middle of capturing, and
13372 * that capturing should resume at the beginning of the next buffer.
13373 *
13374 * We can't use *ptr here, because we have no guarantee that this pointer
13375 * will be valid when we resume (if the underlying memory is freed, then
13376 * using the pointer at all, even to compare to NULL, is likely undefined
13377 * behavior). */
13378 p->capture = &suspend_capture;
13379 } else {
13380 /* Need to back up the pointer to the beginning of the capture, since
13381 * we were not able to actually preserve it. */
13382 *ptr = p->capture;
13383 }
13384 }
13385
capture_resume(upb_json_parser * p,const char * ptr)13386 static void capture_resume(upb_json_parser *p, const char *ptr) {
13387 if (p->capture) {
13388 UPB_ASSERT(p->capture == &suspend_capture);
13389 p->capture = ptr;
13390 }
13391 }
13392
13393
13394 /* Callbacks from the parser **************************************************/
13395
13396 /* These are the functions called directly from the parser itself.
13397 * We define these in the same order as their declarations in the parser. */
13398
escape_char(char in)13399 static char escape_char(char in) {
13400 switch (in) {
13401 case 'r': return '\r';
13402 case 't': return '\t';
13403 case 'n': return '\n';
13404 case 'f': return '\f';
13405 case 'b': return '\b';
13406 case '/': return '/';
13407 case '"': return '"';
13408 case '\\': return '\\';
13409 default:
13410 UPB_ASSERT(0);
13411 return 'x';
13412 }
13413 }
13414
escape(upb_json_parser * p,const char * ptr)13415 static bool escape(upb_json_parser *p, const char *ptr) {
13416 char ch = escape_char(*ptr);
13417 return multipart_text(p, &ch, 1, false);
13418 }
13419
start_hex(upb_json_parser * p)13420 static void start_hex(upb_json_parser *p) {
13421 p->digit = 0;
13422 }
13423
hexdigit(upb_json_parser * p,const char * ptr)13424 static void hexdigit(upb_json_parser *p, const char *ptr) {
13425 char ch = *ptr;
13426
13427 p->digit <<= 4;
13428
13429 if (ch >= '0' && ch <= '9') {
13430 p->digit += (ch - '0');
13431 } else if (ch >= 'a' && ch <= 'f') {
13432 p->digit += ((ch - 'a') + 10);
13433 } else {
13434 UPB_ASSERT(ch >= 'A' && ch <= 'F');
13435 p->digit += ((ch - 'A') + 10);
13436 }
13437 }
13438
end_hex(upb_json_parser * p)13439 static bool end_hex(upb_json_parser *p) {
13440 uint32_t codepoint = p->digit;
13441
13442 /* emit the codepoint as UTF-8. */
13443 char utf8[3]; /* support \u0000 -- \uFFFF -- need only three bytes. */
13444 int length = 0;
13445 if (codepoint <= 0x7F) {
13446 utf8[0] = codepoint;
13447 length = 1;
13448 } else if (codepoint <= 0x07FF) {
13449 utf8[1] = (codepoint & 0x3F) | 0x80;
13450 codepoint >>= 6;
13451 utf8[0] = (codepoint & 0x1F) | 0xC0;
13452 length = 2;
13453 } else /* codepoint <= 0xFFFF */ {
13454 utf8[2] = (codepoint & 0x3F) | 0x80;
13455 codepoint >>= 6;
13456 utf8[1] = (codepoint & 0x3F) | 0x80;
13457 codepoint >>= 6;
13458 utf8[0] = (codepoint & 0x0F) | 0xE0;
13459 length = 3;
13460 }
13461 /* TODO(haberman): Handle high surrogates: if codepoint is a high surrogate
13462 * we have to wait for the next escape to get the full code point). */
13463
13464 return multipart_text(p, utf8, length, false);
13465 }
13466
start_text(upb_json_parser * p,const char * ptr)13467 static void start_text(upb_json_parser *p, const char *ptr) {
13468 capture_begin(p, ptr);
13469 }
13470
end_text(upb_json_parser * p,const char * ptr)13471 static bool end_text(upb_json_parser *p, const char *ptr) {
13472 return capture_end(p, ptr);
13473 }
13474
start_number(upb_json_parser * p,const char * ptr)13475 static bool start_number(upb_json_parser *p, const char *ptr) {
13476 if (is_top_level(p)) {
13477 if (is_number_wrapper_object(p)) {
13478 start_wrapper_object(p);
13479 } else if (is_wellknown_msg(p, UPB_WELLKNOWN_VALUE)) {
13480 start_value_object(p, VALUE_NUMBERVALUE);
13481 } else {
13482 return false;
13483 }
13484 } else if (does_number_wrapper_start(p)) {
13485 if (!start_subobject(p)) {
13486 return false;
13487 }
13488 start_wrapper_object(p);
13489 } else if (is_wellknown_field(p, UPB_WELLKNOWN_VALUE)) {
13490 if (!start_subobject(p)) {
13491 return false;
13492 }
13493 start_value_object(p, VALUE_NUMBERVALUE);
13494 }
13495
13496 multipart_startaccum(p);
13497 capture_begin(p, ptr);
13498 return true;
13499 }
13500
13501 static bool parse_number(upb_json_parser *p, bool is_quoted);
13502
end_number_nontop(upb_json_parser * p,const char * ptr)13503 static bool end_number_nontop(upb_json_parser *p, const char *ptr) {
13504 if (!capture_end(p, ptr)) {
13505 return false;
13506 }
13507
13508 if (p->top->f == NULL) {
13509 multipart_end(p);
13510 return true;
13511 }
13512
13513 return parse_number(p, false);
13514 }
13515
end_number(upb_json_parser * p,const char * ptr)13516 static bool end_number(upb_json_parser *p, const char *ptr) {
13517 if (!end_number_nontop(p, ptr)) {
13518 return false;
13519 }
13520
13521 if (does_number_wrapper_end(p)) {
13522 end_wrapper_object(p);
13523 if (!is_top_level(p)) {
13524 end_subobject(p);
13525 }
13526 return true;
13527 }
13528
13529 if (is_wellknown_msg(p, UPB_WELLKNOWN_VALUE)) {
13530 end_value_object(p);
13531 if (!is_top_level(p)) {
13532 end_subobject(p);
13533 }
13534 return true;
13535 }
13536
13537 return true;
13538 }
13539
13540 /* |buf| is NULL-terminated. |buf| itself will never include quotes;
13541 * |is_quoted| tells us whether this text originally appeared inside quotes. */
parse_number_from_buffer(upb_json_parser * p,const char * buf,bool is_quoted)13542 static bool parse_number_from_buffer(upb_json_parser *p, const char *buf,
13543 bool is_quoted) {
13544 size_t len = strlen(buf);
13545 const char *bufend = buf + len;
13546 char *end;
13547 upb_fieldtype_t type = upb_fielddef_type(p->top->f);
13548 double val;
13549 double dummy;
13550 double inf = 1.0 / 0.0; /* C89 does not have an INFINITY macro. */
13551
13552 errno = 0;
13553
13554 if (len == 0 || buf[0] == ' ') {
13555 return false;
13556 }
13557
13558 /* For integer types, first try parsing with integer-specific routines.
13559 * If these succeed, they will be more accurate for int64/uint64 than
13560 * strtod().
13561 */
13562 switch (type) {
13563 case UPB_TYPE_ENUM:
13564 case UPB_TYPE_INT32: {
13565 long val = strtol(buf, &end, 0);
13566 if (errno == ERANGE || end != bufend) {
13567 break;
13568 } else if (val > INT32_MAX || val < INT32_MIN) {
13569 return false;
13570 } else {
13571 upb_sink_putint32(&p->top->sink, parser_getsel(p), val);
13572 return true;
13573 }
13574 }
13575 case UPB_TYPE_UINT32: {
13576 unsigned long val = strtoul(buf, &end, 0);
13577 if (end != bufend) {
13578 break;
13579 } else if (val > UINT32_MAX || errno == ERANGE) {
13580 return false;
13581 } else {
13582 upb_sink_putuint32(&p->top->sink, parser_getsel(p), val);
13583 return true;
13584 }
13585 }
13586 /* XXX: We can't handle [u]int64 properly on 32-bit machines because
13587 * strto[u]ll isn't in C89. */
13588 case UPB_TYPE_INT64: {
13589 long val = strtol(buf, &end, 0);
13590 if (errno == ERANGE || end != bufend) {
13591 break;
13592 } else {
13593 upb_sink_putint64(&p->top->sink, parser_getsel(p), val);
13594 return true;
13595 }
13596 }
13597 case UPB_TYPE_UINT64: {
13598 unsigned long val = strtoul(p->accumulated, &end, 0);
13599 if (end != bufend) {
13600 break;
13601 } else if (errno == ERANGE) {
13602 return false;
13603 } else {
13604 upb_sink_putuint64(&p->top->sink, parser_getsel(p), val);
13605 return true;
13606 }
13607 }
13608 default:
13609 break;
13610 }
13611
13612 if (type != UPB_TYPE_DOUBLE && type != UPB_TYPE_FLOAT && is_quoted) {
13613 /* Quoted numbers for integer types are not allowed to be in double form. */
13614 return false;
13615 }
13616
13617 if (len == strlen("Infinity") && strcmp(buf, "Infinity") == 0) {
13618 /* C89 does not have an INFINITY macro. */
13619 val = inf;
13620 } else if (len == strlen("-Infinity") && strcmp(buf, "-Infinity") == 0) {
13621 val = -inf;
13622 } else {
13623 val = strtod(buf, &end);
13624 if (errno == ERANGE || end != bufend) {
13625 return false;
13626 }
13627 }
13628
13629 switch (type) {
13630 #define CASE(capitaltype, smalltype, ctype, min, max) \
13631 case UPB_TYPE_ ## capitaltype: { \
13632 if (modf(val, &dummy) != 0 || val > max || val < min) { \
13633 return false; \
13634 } else { \
13635 upb_sink_put ## smalltype(&p->top->sink, parser_getsel(p), \
13636 (ctype)val); \
13637 return true; \
13638 } \
13639 break; \
13640 }
13641 case UPB_TYPE_ENUM:
13642 CASE(INT32, int32, int32_t, INT32_MIN, INT32_MAX);
13643 CASE(INT64, int64, int64_t, INT64_MIN, INT64_MAX);
13644 CASE(UINT32, uint32, uint32_t, 0, UINT32_MAX);
13645 CASE(UINT64, uint64, uint64_t, 0, UINT64_MAX);
13646 #undef CASE
13647
13648 case UPB_TYPE_DOUBLE:
13649 upb_sink_putdouble(&p->top->sink, parser_getsel(p), val);
13650 return true;
13651 case UPB_TYPE_FLOAT:
13652 if ((val > FLT_MAX || val < -FLT_MAX) && val != inf && val != -inf) {
13653 return false;
13654 } else {
13655 upb_sink_putfloat(&p->top->sink, parser_getsel(p), val);
13656 return true;
13657 }
13658 default:
13659 return false;
13660 }
13661 }
13662
parse_number(upb_json_parser * p,bool is_quoted)13663 static bool parse_number(upb_json_parser *p, bool is_quoted) {
13664 size_t len;
13665 const char *buf;
13666
13667 /* strtol() and friends unfortunately do not support specifying the length of
13668 * the input string, so we need to force a copy into a NULL-terminated buffer. */
13669 if (!multipart_text(p, "\0", 1, false)) {
13670 return false;
13671 }
13672
13673 buf = accumulate_getptr(p, &len);
13674
13675 if (parse_number_from_buffer(p, buf, is_quoted)) {
13676 multipart_end(p);
13677 return true;
13678 } else {
13679 upb_status_seterrf(&p->status, "error parsing number: %s", buf);
13680 upb_env_reporterror(p->env, &p->status);
13681 multipart_end(p);
13682 return false;
13683 }
13684 }
13685
parser_putbool(upb_json_parser * p,bool val)13686 static bool parser_putbool(upb_json_parser *p, bool val) {
13687 bool ok;
13688
13689 if (p->top->f == NULL) {
13690 return true;
13691 }
13692
13693 if (upb_fielddef_type(p->top->f) != UPB_TYPE_BOOL) {
13694 upb_status_seterrf(&p->status,
13695 "Boolean value specified for non-bool field: %s",
13696 upb_fielddef_name(p->top->f));
13697 upb_env_reporterror(p->env, &p->status);
13698 return false;
13699 }
13700
13701 ok = upb_sink_putbool(&p->top->sink, parser_getsel(p), val);
13702 UPB_ASSERT(ok);
13703
13704 return true;
13705 }
13706
end_bool(upb_json_parser * p,bool val)13707 static bool end_bool(upb_json_parser *p, bool val) {
13708 if (is_top_level(p)) {
13709 if (is_wellknown_msg(p, UPB_WELLKNOWN_BOOLVALUE)) {
13710 start_wrapper_object(p);
13711 } else if (is_wellknown_msg(p, UPB_WELLKNOWN_VALUE)) {
13712 start_value_object(p, VALUE_BOOLVALUE);
13713 } else {
13714 return false;
13715 }
13716 } else if (is_wellknown_field(p, UPB_WELLKNOWN_BOOLVALUE)) {
13717 if (!start_subobject(p)) {
13718 return false;
13719 }
13720 start_wrapper_object(p);
13721 } else if (is_wellknown_field(p, UPB_WELLKNOWN_VALUE)) {
13722 if (!start_subobject(p)) {
13723 return false;
13724 }
13725 start_value_object(p, VALUE_BOOLVALUE);
13726 }
13727
13728 if (p->top->is_unknown_field) {
13729 return true;
13730 }
13731
13732 if (!parser_putbool(p, val)) {
13733 return false;
13734 }
13735
13736 if (is_wellknown_msg(p, UPB_WELLKNOWN_BOOLVALUE)) {
13737 end_wrapper_object(p);
13738 if (!is_top_level(p)) {
13739 end_subobject(p);
13740 }
13741 return true;
13742 }
13743
13744 if (is_wellknown_msg(p, UPB_WELLKNOWN_VALUE)) {
13745 end_value_object(p);
13746 if (!is_top_level(p)) {
13747 end_subobject(p);
13748 }
13749 return true;
13750 }
13751
13752 return true;
13753 }
13754
end_null(upb_json_parser * p)13755 static bool end_null(upb_json_parser *p) {
13756 const char *zero_ptr = "0";
13757
13758 if (is_top_level(p)) {
13759 if (is_wellknown_msg(p, UPB_WELLKNOWN_VALUE)) {
13760 start_value_object(p, VALUE_NULLVALUE);
13761 } else {
13762 return true;
13763 }
13764 } else if (is_wellknown_field(p, UPB_WELLKNOWN_VALUE)) {
13765 if (!start_subobject(p)) {
13766 return false;
13767 }
13768 start_value_object(p, VALUE_NULLVALUE);
13769 } else {
13770 return true;
13771 }
13772
13773 /* Fill null_value field. */
13774 multipart_startaccum(p);
13775 capture_begin(p, zero_ptr);
13776 capture_end(p, zero_ptr + 1);
13777 parse_number(p, false);
13778
13779 end_value_object(p);
13780 if (!is_top_level(p)) {
13781 end_subobject(p);
13782 }
13783
13784 return true;
13785 }
13786
start_any_stringval(upb_json_parser * p)13787 static bool start_any_stringval(upb_json_parser *p) {
13788 multipart_startaccum(p);
13789 return true;
13790 }
13791
start_stringval(upb_json_parser * p)13792 static bool start_stringval(upb_json_parser *p) {
13793 if (is_top_level(p)) {
13794 if (is_string_wrapper_object(p)) {
13795 start_wrapper_object(p);
13796 } else if (is_wellknown_msg(p, UPB_WELLKNOWN_FIELDMASK)) {
13797 start_fieldmask_object(p);
13798 return true;
13799 } else if (is_wellknown_msg(p, UPB_WELLKNOWN_TIMESTAMP) ||
13800 is_wellknown_msg(p, UPB_WELLKNOWN_DURATION)) {
13801 start_object(p);
13802 } else if (is_wellknown_msg(p, UPB_WELLKNOWN_VALUE)) {
13803 start_value_object(p, VALUE_STRINGVALUE);
13804 } else {
13805 return false;
13806 }
13807 } else if (does_string_wrapper_start(p)) {
13808 if (!start_subobject(p)) {
13809 return false;
13810 }
13811 start_wrapper_object(p);
13812 } else if (does_fieldmask_start(p)) {
13813 if (!start_subobject(p)) {
13814 return false;
13815 }
13816 start_fieldmask_object(p);
13817 return true;
13818 } else if (is_wellknown_field(p, UPB_WELLKNOWN_TIMESTAMP) ||
13819 is_wellknown_field(p, UPB_WELLKNOWN_DURATION)) {
13820 if (!start_subobject(p)) {
13821 return false;
13822 }
13823 start_object(p);
13824 } else if (is_wellknown_field(p, UPB_WELLKNOWN_VALUE)) {
13825 if (!start_subobject(p)) {
13826 return false;
13827 }
13828 start_value_object(p, VALUE_STRINGVALUE);
13829 }
13830
13831 if (p->top->f == NULL) {
13832 multipart_startaccum(p);
13833 return true;
13834 }
13835
13836 if (p->top->is_any) {
13837 return start_any_stringval(p);
13838 }
13839
13840 if (upb_fielddef_isstring(p->top->f)) {
13841 upb_jsonparser_frame *inner;
13842 upb_selector_t sel;
13843
13844 if (!check_stack(p)) return false;
13845
13846 /* Start a new parser frame: parser frames correspond one-to-one with
13847 * handler frames, and string events occur in a sub-frame. */
13848 inner = start_jsonparser_frame(p);
13849 sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSTR);
13850 upb_sink_startstr(&p->top->sink, sel, 0, &inner->sink);
13851 inner->m = p->top->m;
13852 inner->f = p->top->f;
13853 p->top = inner;
13854
13855 if (upb_fielddef_type(p->top->f) == UPB_TYPE_STRING) {
13856 /* For STRING fields we push data directly to the handlers as it is
13857 * parsed. We don't do this yet for BYTES fields, because our base64
13858 * decoder is not streaming.
13859 *
13860 * TODO(haberman): make base64 decoding streaming also. */
13861 multipart_start(p, getsel_for_handlertype(p, UPB_HANDLER_STRING));
13862 return true;
13863 } else {
13864 multipart_startaccum(p);
13865 return true;
13866 }
13867 } else if (upb_fielddef_type(p->top->f) != UPB_TYPE_BOOL &&
13868 upb_fielddef_type(p->top->f) != UPB_TYPE_MESSAGE) {
13869 /* No need to push a frame -- numeric values in quotes remain in the
13870 * current parser frame. These values must accmulate so we can convert
13871 * them all at once at the end. */
13872 multipart_startaccum(p);
13873 return true;
13874 } else {
13875 upb_status_seterrf(&p->status,
13876 "String specified for bool or submessage field: %s",
13877 upb_fielddef_name(p->top->f));
13878 upb_env_reporterror(p->env, &p->status);
13879 return false;
13880 }
13881 }
13882
end_any_stringval(upb_json_parser * p)13883 static bool end_any_stringval(upb_json_parser *p) {
13884 size_t len;
13885 const char *buf = accumulate_getptr(p, &len);
13886
13887 /* Set type_url */
13888 upb_selector_t sel;
13889 upb_jsonparser_frame *inner;
13890 if (!check_stack(p)) return false;
13891 inner = p->top + 1;
13892
13893 sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSTR);
13894 upb_sink_startstr(&p->top->sink, sel, 0, &inner->sink);
13895 sel = getsel_for_handlertype(p, UPB_HANDLER_STRING);
13896 upb_sink_putstring(&inner->sink, sel, buf, len, NULL);
13897 sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSTR);
13898 upb_sink_endstr(&inner->sink, sel);
13899
13900 multipart_end(p);
13901
13902 /* Resolve type url */
13903 if (strncmp(buf, "type.googleapis.com/", 20) == 0 && len > 20) {
13904 const upb_msgdef *payload_type = NULL;
13905 buf += 20;
13906 len -= 20;
13907
13908 payload_type = upb_symtab_lookupmsg2(p->symtab, buf, len);
13909 if (payload_type == NULL) {
13910 upb_status_seterrf(
13911 &p->status, "Cannot find packed type: %.*s\n", (int)len, buf);
13912 upb_env_reporterror(p->env, &p->status);
13913 return false;
13914 }
13915
13916 json_parser_any_frame_set_payload_type(p, p->top->any_frame, payload_type);
13917
13918 return true;
13919 } else {
13920 upb_status_seterrf(
13921 &p->status, "Invalid type url: %.*s\n", (int)len, buf);
13922 upb_env_reporterror(p->env, &p->status);
13923 return false;
13924 }
13925 }
13926
end_stringval_nontop(upb_json_parser * p)13927 static bool end_stringval_nontop(upb_json_parser *p) {
13928 bool ok = true;
13929
13930 if (is_wellknown_msg(p, UPB_WELLKNOWN_TIMESTAMP) ||
13931 is_wellknown_msg(p, UPB_WELLKNOWN_DURATION)) {
13932 multipart_end(p);
13933 return true;
13934 }
13935
13936 if (p->top->f == NULL) {
13937 multipart_end(p);
13938 return true;
13939 }
13940
13941 if (p->top->is_any) {
13942 return end_any_stringval(p);
13943 }
13944
13945 switch (upb_fielddef_type(p->top->f)) {
13946 case UPB_TYPE_BYTES:
13947 if (!base64_push(p, getsel_for_handlertype(p, UPB_HANDLER_STRING),
13948 p->accumulated, p->accumulated_len)) {
13949 return false;
13950 }
13951 /* Fall through. */
13952
13953 case UPB_TYPE_STRING: {
13954 upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSTR);
13955 upb_sink_endstr(&p->top->sink, sel);
13956 p->top--;
13957 break;
13958 }
13959
13960 case UPB_TYPE_ENUM: {
13961 /* Resolve enum symbolic name to integer value. */
13962 const upb_enumdef *enumdef =
13963 (const upb_enumdef*)upb_fielddef_subdef(p->top->f);
13964
13965 size_t len;
13966 const char *buf = accumulate_getptr(p, &len);
13967
13968 int32_t int_val = 0;
13969 ok = upb_enumdef_ntoi(enumdef, buf, len, &int_val);
13970
13971 if (ok) {
13972 upb_selector_t sel = parser_getsel(p);
13973 upb_sink_putint32(&p->top->sink, sel, int_val);
13974 } else {
13975 upb_status_seterrf(&p->status, "Enum value unknown: '%.*s'", len, buf);
13976 upb_env_reporterror(p->env, &p->status);
13977 }
13978
13979 break;
13980 }
13981
13982 case UPB_TYPE_INT32:
13983 case UPB_TYPE_INT64:
13984 case UPB_TYPE_UINT32:
13985 case UPB_TYPE_UINT64:
13986 case UPB_TYPE_DOUBLE:
13987 case UPB_TYPE_FLOAT:
13988 ok = parse_number(p, true);
13989 break;
13990
13991 default:
13992 UPB_ASSERT(false);
13993 upb_status_seterrmsg(&p->status, "Internal error in JSON decoder");
13994 upb_env_reporterror(p->env, &p->status);
13995 ok = false;
13996 break;
13997 }
13998
13999 multipart_end(p);
14000
14001 return ok;
14002 }
14003
end_stringval(upb_json_parser * p)14004 static bool end_stringval(upb_json_parser *p) {
14005 /* FieldMask's stringvals have been ended when handling them. Only need to
14006 * close FieldMask here.*/
14007 if (does_fieldmask_end(p)) {
14008 end_fieldmask_object(p);
14009 if (!is_top_level(p)) {
14010 end_subobject(p);
14011 }
14012 return true;
14013 }
14014
14015 if (!end_stringval_nontop(p)) {
14016 return false;
14017 }
14018
14019 if (does_string_wrapper_end(p)) {
14020 end_wrapper_object(p);
14021 if (!is_top_level(p)) {
14022 end_subobject(p);
14023 }
14024 return true;
14025 }
14026
14027 if (is_wellknown_msg(p, UPB_WELLKNOWN_VALUE)) {
14028 end_value_object(p);
14029 if (!is_top_level(p)) {
14030 end_subobject(p);
14031 }
14032 return true;
14033 }
14034
14035 if (is_wellknown_msg(p, UPB_WELLKNOWN_TIMESTAMP) ||
14036 is_wellknown_msg(p, UPB_WELLKNOWN_DURATION) ||
14037 is_wellknown_msg(p, UPB_WELLKNOWN_FIELDMASK)) {
14038 end_object(p);
14039 if (!is_top_level(p)) {
14040 end_subobject(p);
14041 }
14042 return true;
14043 }
14044
14045 return true;
14046 }
14047
start_duration_base(upb_json_parser * p,const char * ptr)14048 static void start_duration_base(upb_json_parser *p, const char *ptr) {
14049 capture_begin(p, ptr);
14050 }
14051
end_duration_base(upb_json_parser * p,const char * ptr)14052 static bool end_duration_base(upb_json_parser *p, const char *ptr) {
14053 size_t len;
14054 const char *buf;
14055 char seconds_buf[14];
14056 char nanos_buf[12];
14057 char *end;
14058 int64_t seconds = 0;
14059 int32_t nanos = 0;
14060 double val = 0.0;
14061 const char *seconds_membername = "seconds";
14062 const char *nanos_membername = "nanos";
14063 size_t fraction_start;
14064
14065 if (!capture_end(p, ptr)) {
14066 return false;
14067 }
14068
14069 buf = accumulate_getptr(p, &len);
14070
14071 memset(seconds_buf, 0, 14);
14072 memset(nanos_buf, 0, 12);
14073
14074 /* Find out base end. The maximus duration is 315576000000, which cannot be
14075 * represented by double without losing precision. Thus, we need to handle
14076 * fraction and base separately. */
14077 for (fraction_start = 0; fraction_start < len && buf[fraction_start] != '.';
14078 fraction_start++);
14079
14080 /* Parse base */
14081 memcpy(seconds_buf, buf, fraction_start);
14082 seconds = strtol(seconds_buf, &end, 10);
14083 if (errno == ERANGE || end != seconds_buf + fraction_start) {
14084 upb_status_seterrf(&p->status, "error parsing duration: %s",
14085 seconds_buf);
14086 upb_env_reporterror(p->env, &p->status);
14087 return false;
14088 }
14089
14090 if (seconds > 315576000000) {
14091 upb_status_seterrf(&p->status, "error parsing duration: "
14092 "maximum acceptable value is "
14093 "315576000000");
14094 upb_env_reporterror(p->env, &p->status);
14095 return false;
14096 }
14097
14098 if (seconds < -315576000000) {
14099 upb_status_seterrf(&p->status, "error parsing duration: "
14100 "minimum acceptable value is "
14101 "-315576000000");
14102 upb_env_reporterror(p->env, &p->status);
14103 return false;
14104 }
14105
14106 /* Parse fraction */
14107 nanos_buf[0] = '0';
14108 memcpy(nanos_buf + 1, buf + fraction_start, len - fraction_start);
14109 val = strtod(nanos_buf, &end);
14110 if (errno == ERANGE || end != nanos_buf + len - fraction_start + 1) {
14111 upb_status_seterrf(&p->status, "error parsing duration: %s",
14112 nanos_buf);
14113 upb_env_reporterror(p->env, &p->status);
14114 return false;
14115 }
14116
14117 nanos = val * 1000000000;
14118 if (seconds < 0) nanos = -nanos;
14119
14120 /* Clean up buffer */
14121 multipart_end(p);
14122
14123 /* Set seconds */
14124 start_member(p);
14125 capture_begin(p, seconds_membername);
14126 capture_end(p, seconds_membername + 7);
14127 end_membername(p);
14128 upb_sink_putint64(&p->top->sink, parser_getsel(p), seconds);
14129 end_member(p);
14130
14131 /* Set nanos */
14132 start_member(p);
14133 capture_begin(p, nanos_membername);
14134 capture_end(p, nanos_membername + 5);
14135 end_membername(p);
14136 upb_sink_putint32(&p->top->sink, parser_getsel(p), nanos);
14137 end_member(p);
14138
14139 /* Continue previous environment */
14140 multipart_startaccum(p);
14141
14142 return true;
14143 }
14144
parse_timestamp_number(upb_json_parser * p)14145 static int parse_timestamp_number(upb_json_parser *p) {
14146 size_t len;
14147 const char *buf;
14148 char *end;
14149 int val;
14150
14151 /* atoi() and friends unfortunately do not support specifying the length of
14152 * the input string, so we need to force a copy into a NULL-terminated buffer. */
14153 multipart_text(p, "\0", 1, false);
14154
14155 buf = accumulate_getptr(p, &len);
14156 val = atoi(buf);
14157 multipart_end(p);
14158 multipart_startaccum(p);
14159
14160 return val;
14161 }
14162
start_year(upb_json_parser * p,const char * ptr)14163 static void start_year(upb_json_parser *p, const char *ptr) {
14164 capture_begin(p, ptr);
14165 }
14166
end_year(upb_json_parser * p,const char * ptr)14167 static bool end_year(upb_json_parser *p, const char *ptr) {
14168 if (!capture_end(p, ptr)) {
14169 return false;
14170 }
14171 p->tm.tm_year = parse_timestamp_number(p) - 1900;
14172 return true;
14173 }
14174
start_month(upb_json_parser * p,const char * ptr)14175 static void start_month(upb_json_parser *p, const char *ptr) {
14176 capture_begin(p, ptr);
14177 }
14178
end_month(upb_json_parser * p,const char * ptr)14179 static bool end_month(upb_json_parser *p, const char *ptr) {
14180 if (!capture_end(p, ptr)) {
14181 return false;
14182 }
14183 p->tm.tm_mon = parse_timestamp_number(p) - 1;
14184 return true;
14185 }
14186
start_day(upb_json_parser * p,const char * ptr)14187 static void start_day(upb_json_parser *p, const char *ptr) {
14188 capture_begin(p, ptr);
14189 }
14190
end_day(upb_json_parser * p,const char * ptr)14191 static bool end_day(upb_json_parser *p, const char *ptr) {
14192 if (!capture_end(p, ptr)) {
14193 return false;
14194 }
14195 p->tm.tm_mday = parse_timestamp_number(p);
14196 return true;
14197 }
14198
start_hour(upb_json_parser * p,const char * ptr)14199 static void start_hour(upb_json_parser *p, const char *ptr) {
14200 capture_begin(p, ptr);
14201 }
14202
end_hour(upb_json_parser * p,const char * ptr)14203 static bool end_hour(upb_json_parser *p, const char *ptr) {
14204 if (!capture_end(p, ptr)) {
14205 return false;
14206 }
14207 p->tm.tm_hour = parse_timestamp_number(p);
14208 return true;
14209 }
14210
start_minute(upb_json_parser * p,const char * ptr)14211 static void start_minute(upb_json_parser *p, const char *ptr) {
14212 capture_begin(p, ptr);
14213 }
14214
end_minute(upb_json_parser * p,const char * ptr)14215 static bool end_minute(upb_json_parser *p, const char *ptr) {
14216 if (!capture_end(p, ptr)) {
14217 return false;
14218 }
14219 p->tm.tm_min = parse_timestamp_number(p);
14220 return true;
14221 }
14222
start_second(upb_json_parser * p,const char * ptr)14223 static void start_second(upb_json_parser *p, const char *ptr) {
14224 capture_begin(p, ptr);
14225 }
14226
end_second(upb_json_parser * p,const char * ptr)14227 static bool end_second(upb_json_parser *p, const char *ptr) {
14228 if (!capture_end(p, ptr)) {
14229 return false;
14230 }
14231 p->tm.tm_sec = parse_timestamp_number(p);
14232 return true;
14233 }
14234
start_timestamp_base(upb_json_parser * p)14235 static void start_timestamp_base(upb_json_parser *p) {
14236 memset(&p->tm, 0, sizeof(struct tm));
14237 }
14238
start_timestamp_fraction(upb_json_parser * p,const char * ptr)14239 static void start_timestamp_fraction(upb_json_parser *p, const char *ptr) {
14240 capture_begin(p, ptr);
14241 }
14242
end_timestamp_fraction(upb_json_parser * p,const char * ptr)14243 static bool end_timestamp_fraction(upb_json_parser *p, const char *ptr) {
14244 size_t len;
14245 const char *buf;
14246 char nanos_buf[12];
14247 char *end;
14248 double val = 0.0;
14249 int32_t nanos;
14250 const char *nanos_membername = "nanos";
14251
14252 memset(nanos_buf, 0, 12);
14253
14254 if (!capture_end(p, ptr)) {
14255 return false;
14256 }
14257
14258 buf = accumulate_getptr(p, &len);
14259
14260 if (len > 10) {
14261 upb_status_seterrf(&p->status,
14262 "error parsing timestamp: at most 9-digit fraction.");
14263 upb_env_reporterror(p->env, &p->status);
14264 return false;
14265 }
14266
14267 /* Parse nanos */
14268 nanos_buf[0] = '0';
14269 memcpy(nanos_buf + 1, buf, len);
14270 val = strtod(nanos_buf, &end);
14271
14272 if (errno == ERANGE || end != nanos_buf + len + 1) {
14273 upb_status_seterrf(&p->status, "error parsing timestamp nanos: %s",
14274 nanos_buf);
14275 upb_env_reporterror(p->env, &p->status);
14276 return false;
14277 }
14278
14279 nanos = val * 1000000000;
14280
14281 /* Clean up previous environment */
14282 multipart_end(p);
14283
14284 /* Set nanos */
14285 start_member(p);
14286 capture_begin(p, nanos_membername);
14287 capture_end(p, nanos_membername + 5);
14288 end_membername(p);
14289 upb_sink_putint32(&p->top->sink, parser_getsel(p), nanos);
14290 end_member(p);
14291
14292 /* Continue previous environment */
14293 multipart_startaccum(p);
14294
14295 return true;
14296 }
14297
start_timestamp_zone(upb_json_parser * p,const char * ptr)14298 static void start_timestamp_zone(upb_json_parser *p, const char *ptr) {
14299 capture_begin(p, ptr);
14300 }
14301
14302 #define EPOCH_YEAR 1970
14303 #define TM_YEAR_BASE 1900
14304
isleap(int year)14305 static bool isleap(int year) {
14306 return (year % 4) == 0 && (year % 100 != 0 || (year % 400) == 0);
14307 }
14308
14309 const unsigned short int __mon_yday[2][13] = {
14310 /* Normal years. */
14311 { 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365 },
14312 /* Leap years. */
14313 { 0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366 }
14314 };
14315
epoch(int year,int yday,int hour,int min,int sec)14316 int64_t epoch(int year, int yday, int hour, int min, int sec) {
14317 int64_t years = year - EPOCH_YEAR;
14318
14319 int64_t leap_days = years / 4 - years / 100 + years / 400;
14320
14321 int64_t days = years * 365 + yday + leap_days;
14322 int64_t hours = days * 24 + hour;
14323 int64_t mins = hours * 60 + min;
14324 int64_t secs = mins * 60 + sec;
14325 return secs;
14326 }
14327
upb_mktime(const struct tm * tp)14328 static int64_t upb_mktime(const struct tm *tp) {
14329 int sec = tp->tm_sec;
14330 int min = tp->tm_min;
14331 int hour = tp->tm_hour;
14332 int mday = tp->tm_mday;
14333 int mon = tp->tm_mon;
14334 int year = tp->tm_year + TM_YEAR_BASE;
14335
14336 /* Calculate day of year from year, month, and day of month. */
14337 int mon_yday = ((__mon_yday[isleap(year)][mon]) - 1);
14338 int yday = mon_yday + mday;
14339
14340 return epoch(year, yday, hour, min, sec);
14341 }
14342
end_timestamp_zone(upb_json_parser * p,const char * ptr)14343 static bool end_timestamp_zone(upb_json_parser *p, const char *ptr) {
14344 size_t len;
14345 const char *buf;
14346 int hours = 0;
14347 int64_t seconds;
14348 const char *seconds_membername = "seconds";
14349
14350 if (!capture_end(p, ptr)) {
14351 return false;
14352 }
14353
14354 buf = accumulate_getptr(p, &len);
14355
14356 if (buf[0] != 'Z') {
14357 if (sscanf(buf + 1, "%2d:00", &hours) != 1) {
14358 upb_status_seterrf(&p->status, "error parsing timestamp offset");
14359 upb_env_reporterror(p->env, &p->status);
14360 return false;
14361 }
14362
14363 if (buf[0] == '+') {
14364 hours = -hours;
14365 }
14366 }
14367
14368 /* Normalize tm */
14369 seconds = upb_mktime(&p->tm);
14370 seconds += 3600 * hours;
14371
14372 /* Check timestamp boundary */
14373 if (seconds < -62135596800) {
14374 upb_status_seterrf(&p->status, "error parsing timestamp: "
14375 "minimum acceptable value is "
14376 "0001-01-01T00:00:00Z");
14377 upb_env_reporterror(p->env, &p->status);
14378 return false;
14379 }
14380
14381 /* Clean up previous environment */
14382 multipart_end(p);
14383
14384 /* Set seconds */
14385 start_member(p);
14386 capture_begin(p, seconds_membername);
14387 capture_end(p, seconds_membername + 7);
14388 end_membername(p);
14389 upb_sink_putint64(&p->top->sink, parser_getsel(p), seconds);
14390 end_member(p);
14391
14392 /* Continue previous environment */
14393 multipart_startaccum(p);
14394
14395 return true;
14396 }
14397
start_fieldmask_path_text(upb_json_parser * p,const char * ptr)14398 static void start_fieldmask_path_text(upb_json_parser *p, const char *ptr) {
14399 capture_begin(p, ptr);
14400 }
14401
end_fieldmask_path_text(upb_json_parser * p,const char * ptr)14402 static bool end_fieldmask_path_text(upb_json_parser *p, const char *ptr) {
14403 if (!capture_end(p, ptr)) {
14404 return false;
14405 }
14406 }
14407
start_fieldmask_path(upb_json_parser * p)14408 static bool start_fieldmask_path(upb_json_parser *p) {
14409 upb_jsonparser_frame *inner;
14410 upb_selector_t sel;
14411
14412 if (!check_stack(p)) return false;
14413
14414 /* Start a new parser frame: parser frames correspond one-to-one with
14415 * handler frames, and string events occur in a sub-frame. */
14416 inner = start_jsonparser_frame(p);
14417 sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSTR);
14418 upb_sink_startstr(&p->top->sink, sel, 0, &inner->sink);
14419 inner->m = p->top->m;
14420 inner->f = p->top->f;
14421 p->top = inner;
14422
14423 multipart_startaccum(p);
14424 return true;
14425 }
14426
lower_camel_push(upb_json_parser * p,upb_selector_t sel,const char * ptr,size_t len)14427 static bool lower_camel_push(
14428 upb_json_parser *p, upb_selector_t sel, const char *ptr, size_t len) {
14429 const char *limit = ptr + len;
14430 bool first = true;
14431 for (;ptr < limit; ptr++) {
14432 if (*ptr >= 'A' && *ptr <= 'Z' && !first) {
14433 char lower = tolower(*ptr);
14434 upb_sink_putstring(&p->top->sink, sel, "_", 1, NULL);
14435 upb_sink_putstring(&p->top->sink, sel, &lower, 1, NULL);
14436 } else {
14437 upb_sink_putstring(&p->top->sink, sel, ptr, 1, NULL);
14438 }
14439 first = false;
14440 }
14441 return true;
14442 }
14443
end_fieldmask_path(upb_json_parser * p)14444 static bool end_fieldmask_path(upb_json_parser *p) {
14445 upb_selector_t sel;
14446
14447 if (!lower_camel_push(
14448 p, getsel_for_handlertype(p, UPB_HANDLER_STRING),
14449 p->accumulated, p->accumulated_len)) {
14450 return false;
14451 }
14452
14453 sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSTR);
14454 upb_sink_endstr(&p->top->sink, sel);
14455 p->top--;
14456
14457 multipart_end(p);
14458 return true;
14459 }
14460
start_member(upb_json_parser * p)14461 static void start_member(upb_json_parser *p) {
14462 UPB_ASSERT(!p->top->f);
14463 multipart_startaccum(p);
14464 }
14465
14466 /* Helper: invoked during parse_mapentry() to emit the mapentry message's key
14467 * field based on the current contents of the accumulate buffer. */
parse_mapentry_key(upb_json_parser * p)14468 static bool parse_mapentry_key(upb_json_parser *p) {
14469
14470 size_t len;
14471 const char *buf = accumulate_getptr(p, &len);
14472
14473 /* Emit the key field. We do a bit of ad-hoc parsing here because the
14474 * parser state machine has already decided that this is a string field
14475 * name, and we are reinterpreting it as some arbitrary key type. In
14476 * particular, integer and bool keys are quoted, so we need to parse the
14477 * quoted string contents here. */
14478
14479 p->top->f = upb_msgdef_itof(p->top->m, UPB_MAPENTRY_KEY);
14480 if (p->top->f == NULL) {
14481 upb_status_seterrmsg(&p->status, "mapentry message has no key");
14482 upb_env_reporterror(p->env, &p->status);
14483 return false;
14484 }
14485 switch (upb_fielddef_type(p->top->f)) {
14486 case UPB_TYPE_INT32:
14487 case UPB_TYPE_INT64:
14488 case UPB_TYPE_UINT32:
14489 case UPB_TYPE_UINT64:
14490 /* Invoke end_number. The accum buffer has the number's text already. */
14491 if (!parse_number(p, true)) {
14492 return false;
14493 }
14494 break;
14495 case UPB_TYPE_BOOL:
14496 if (len == 4 && !strncmp(buf, "true", 4)) {
14497 if (!parser_putbool(p, true)) {
14498 return false;
14499 }
14500 } else if (len == 5 && !strncmp(buf, "false", 5)) {
14501 if (!parser_putbool(p, false)) {
14502 return false;
14503 }
14504 } else {
14505 upb_status_seterrmsg(&p->status,
14506 "Map bool key not 'true' or 'false'");
14507 upb_env_reporterror(p->env, &p->status);
14508 return false;
14509 }
14510 multipart_end(p);
14511 break;
14512 case UPB_TYPE_STRING:
14513 case UPB_TYPE_BYTES: {
14514 upb_sink subsink;
14515 upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSTR);
14516 upb_sink_startstr(&p->top->sink, sel, len, &subsink);
14517 sel = getsel_for_handlertype(p, UPB_HANDLER_STRING);
14518 upb_sink_putstring(&subsink, sel, buf, len, NULL);
14519 sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSTR);
14520 upb_sink_endstr(&subsink, sel);
14521 multipart_end(p);
14522 break;
14523 }
14524 default:
14525 upb_status_seterrmsg(&p->status, "Invalid field type for map key");
14526 upb_env_reporterror(p->env, &p->status);
14527 return false;
14528 }
14529
14530 return true;
14531 }
14532
14533 /* Helper: emit one map entry (as a submessage in the map field sequence). This
14534 * is invoked from end_membername(), at the end of the map entry's key string,
14535 * with the map key in the accumulate buffer. It parses the key from that
14536 * buffer, emits the handler calls to start the mapentry submessage (setting up
14537 * its subframe in the process), and sets up state in the subframe so that the
14538 * value parser (invoked next) will emit the mapentry's value field and then
14539 * end the mapentry message. */
14540
handle_mapentry(upb_json_parser * p)14541 static bool handle_mapentry(upb_json_parser *p) {
14542 const upb_fielddef *mapfield;
14543 const upb_msgdef *mapentrymsg;
14544 upb_jsonparser_frame *inner;
14545 upb_selector_t sel;
14546
14547 /* Map entry: p->top->sink is the seq frame, so we need to start a frame
14548 * for the mapentry itself, and then set |f| in that frame so that the map
14549 * value field is parsed, and also set a flag to end the frame after the
14550 * map-entry value is parsed. */
14551 if (!check_stack(p)) return false;
14552
14553 mapfield = p->top->mapfield;
14554 mapentrymsg = upb_fielddef_msgsubdef(mapfield);
14555
14556 inner = start_jsonparser_frame(p);
14557 p->top->f = mapfield;
14558 sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSUBMSG);
14559 upb_sink_startsubmsg(&p->top->sink, sel, &inner->sink);
14560 inner->m = mapentrymsg;
14561 inner->mapfield = mapfield;
14562
14563 /* Don't set this to true *yet* -- we reuse parsing handlers below to push
14564 * the key field value to the sink, and these handlers will pop the frame
14565 * if they see is_mapentry (when invoked by the parser state machine, they
14566 * would have just seen the map-entry value, not key). */
14567 inner->is_mapentry = false;
14568 p->top = inner;
14569
14570 /* send STARTMSG in submsg frame. */
14571 upb_sink_startmsg(&p->top->sink);
14572
14573 parse_mapentry_key(p);
14574
14575 /* Set up the value field to receive the map-entry value. */
14576 p->top->f = upb_msgdef_itof(p->top->m, UPB_MAPENTRY_VALUE);
14577 p->top->is_mapentry = true; /* set up to pop frame after value is parsed. */
14578 p->top->mapfield = mapfield;
14579 if (p->top->f == NULL) {
14580 upb_status_seterrmsg(&p->status, "mapentry message has no value");
14581 upb_env_reporterror(p->env, &p->status);
14582 return false;
14583 }
14584
14585 return true;
14586 }
14587
end_membername(upb_json_parser * p)14588 static bool end_membername(upb_json_parser *p) {
14589 UPB_ASSERT(!p->top->f);
14590
14591 if (!p->top->m) {
14592 p->top->is_unknown_field = true;
14593 multipart_end(p);
14594 return true;
14595 }
14596
14597 if (p->top->is_any) {
14598 return end_any_membername(p);
14599 } else if (p->top->is_map) {
14600 return handle_mapentry(p);
14601 } else {
14602 size_t len;
14603 const char *buf = accumulate_getptr(p, &len);
14604 upb_value v;
14605
14606 if (upb_strtable_lookup2(p->top->name_table, buf, len, &v)) {
14607 p->top->f = upb_value_getconstptr(v);
14608 multipart_end(p);
14609
14610 return true;
14611 } else if (p->ignore_json_unknown) {
14612 p->top->is_unknown_field = true;
14613 multipart_end(p);
14614 return true;
14615 } else {
14616 upb_status_seterrf(&p->status, "No such field: %.*s\n", (int)len, buf);
14617 upb_env_reporterror(p->env, &p->status);
14618 return false;
14619 }
14620 }
14621 }
14622
end_any_membername(upb_json_parser * p)14623 static bool end_any_membername(upb_json_parser *p) {
14624 size_t len;
14625 const char *buf = accumulate_getptr(p, &len);
14626 upb_value v;
14627
14628 if (len == 5 && strncmp(buf, "@type", len) == 0) {
14629 upb_strtable_lookup2(p->top->name_table, "type_url", 8, &v);
14630 p->top->f = upb_value_getconstptr(v);
14631 multipart_end(p);
14632 return true;
14633 } else {
14634 p->top->is_unknown_field = true;
14635 multipart_end(p);
14636 return true;
14637 }
14638 }
14639
end_member(upb_json_parser * p)14640 static void end_member(upb_json_parser *p) {
14641 /* If we just parsed a map-entry value, end that frame too. */
14642 if (p->top->is_mapentry) {
14643 upb_status s = UPB_STATUS_INIT;
14644 upb_selector_t sel;
14645 bool ok;
14646 const upb_fielddef *mapfield;
14647
14648 UPB_ASSERT(p->top > p->stack);
14649 /* send ENDMSG on submsg. */
14650 upb_sink_endmsg(&p->top->sink, &s);
14651 mapfield = p->top->mapfield;
14652
14653 /* send ENDSUBMSG in repeated-field-of-mapentries frame. */
14654 p->top--;
14655 ok = upb_handlers_getselector(mapfield, UPB_HANDLER_ENDSUBMSG, &sel);
14656 UPB_ASSERT(ok);
14657 upb_sink_endsubmsg(&p->top->sink, sel);
14658 }
14659
14660 p->top->f = NULL;
14661 p->top->is_unknown_field = false;
14662 }
14663
start_any_member(upb_json_parser * p,const char * ptr)14664 static void start_any_member(upb_json_parser *p, const char *ptr) {
14665 start_member(p);
14666 json_parser_any_frame_set_after_type_url_start_once(p->top->any_frame, ptr);
14667 }
14668
end_any_member(upb_json_parser * p,const char * ptr)14669 static void end_any_member(upb_json_parser *p, const char *ptr) {
14670 json_parser_any_frame_set_before_type_url_end(p->top->any_frame, ptr);
14671 end_member(p);
14672 }
14673
start_subobject(upb_json_parser * p)14674 static bool start_subobject(upb_json_parser *p) {
14675 if (p->top->is_unknown_field) {
14676 upb_jsonparser_frame *inner;
14677 if (!check_stack(p)) return false;
14678
14679 p->top = start_jsonparser_frame(p);
14680 return true;
14681 }
14682
14683 if (upb_fielddef_ismap(p->top->f)) {
14684 upb_jsonparser_frame *inner;
14685 upb_selector_t sel;
14686
14687 /* Beginning of a map. Start a new parser frame in a repeated-field
14688 * context. */
14689 if (!check_stack(p)) return false;
14690
14691 inner = start_jsonparser_frame(p);
14692 sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSEQ);
14693 upb_sink_startseq(&p->top->sink, sel, &inner->sink);
14694 inner->m = upb_fielddef_msgsubdef(p->top->f);
14695 inner->mapfield = p->top->f;
14696 inner->is_map = true;
14697 p->top = inner;
14698
14699 return true;
14700 } else if (upb_fielddef_issubmsg(p->top->f)) {
14701 upb_jsonparser_frame *inner;
14702 upb_selector_t sel;
14703
14704 /* Beginning of a subobject. Start a new parser frame in the submsg
14705 * context. */
14706 if (!check_stack(p)) return false;
14707
14708 inner = start_jsonparser_frame(p);
14709 sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSUBMSG);
14710 upb_sink_startsubmsg(&p->top->sink, sel, &inner->sink);
14711 inner->m = upb_fielddef_msgsubdef(p->top->f);
14712 set_name_table(p, inner);
14713 p->top = inner;
14714
14715 if (is_wellknown_msg(p, UPB_WELLKNOWN_ANY)) {
14716 p->top->is_any = true;
14717 p->top->any_frame =
14718 upb_env_malloc(p->env, sizeof(upb_jsonparser_any_frame));
14719 json_parser_any_frame_reset(p->top->any_frame);
14720 } else {
14721 p->top->is_any = false;
14722 p->top->any_frame = NULL;
14723 }
14724
14725 return true;
14726 } else {
14727 upb_status_seterrf(&p->status,
14728 "Object specified for non-message/group field: %s",
14729 upb_fielddef_name(p->top->f));
14730 upb_env_reporterror(p->env, &p->status);
14731 return false;
14732 }
14733 }
14734
start_subobject_full(upb_json_parser * p)14735 static bool start_subobject_full(upb_json_parser *p) {
14736 if (is_top_level(p)) {
14737 if (is_wellknown_msg(p, UPB_WELLKNOWN_VALUE)) {
14738 start_value_object(p, VALUE_STRUCTVALUE);
14739 if (!start_subobject(p)) return false;
14740 start_structvalue_object(p);
14741 } else if (is_wellknown_msg(p, UPB_WELLKNOWN_STRUCT)) {
14742 start_structvalue_object(p);
14743 } else {
14744 return true;
14745 }
14746 } else if (is_wellknown_field(p, UPB_WELLKNOWN_STRUCT)) {
14747 if (!start_subobject(p)) return false;
14748 start_structvalue_object(p);
14749 } else if (is_wellknown_field(p, UPB_WELLKNOWN_VALUE)) {
14750 if (!start_subobject(p)) return false;
14751 start_value_object(p, VALUE_STRUCTVALUE);
14752 if (!start_subobject(p)) return false;
14753 start_structvalue_object(p);
14754 }
14755
14756 return start_subobject(p);
14757 }
14758
end_subobject(upb_json_parser * p)14759 static void end_subobject(upb_json_parser *p) {
14760 if (is_top_level(p)) {
14761 return;
14762 }
14763
14764 if (p->top->is_map) {
14765 upb_selector_t sel;
14766 p->top--;
14767 sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSEQ);
14768 upb_sink_endseq(&p->top->sink, sel);
14769 } else {
14770 upb_selector_t sel;
14771 bool is_unknown = p->top->m == NULL;
14772 p->top--;
14773 if (!is_unknown) {
14774 sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSUBMSG);
14775 upb_sink_endsubmsg(&p->top->sink, sel);
14776 }
14777 }
14778 }
14779
end_subobject_full(upb_json_parser * p)14780 static void end_subobject_full(upb_json_parser *p) {
14781 end_subobject(p);
14782
14783 if (is_wellknown_msg(p, UPB_WELLKNOWN_STRUCT)) {
14784 end_structvalue_object(p);
14785 if (!is_top_level(p)) {
14786 end_subobject(p);
14787 }
14788 }
14789
14790 if (is_wellknown_msg(p, UPB_WELLKNOWN_VALUE)) {
14791 end_value_object(p);
14792 if (!is_top_level(p)) {
14793 end_subobject(p);
14794 }
14795 }
14796 }
14797
start_array(upb_json_parser * p)14798 static bool start_array(upb_json_parser *p) {
14799 upb_jsonparser_frame *inner;
14800 upb_selector_t sel;
14801
14802 if (is_top_level(p)) {
14803 if (is_wellknown_msg(p, UPB_WELLKNOWN_VALUE)) {
14804 start_value_object(p, VALUE_LISTVALUE);
14805 if (!start_subobject(p)) return false;
14806 start_listvalue_object(p);
14807 } else if (is_wellknown_msg(p, UPB_WELLKNOWN_LISTVALUE)) {
14808 start_listvalue_object(p);
14809 } else {
14810 return false;
14811 }
14812 } else if (is_wellknown_field(p, UPB_WELLKNOWN_LISTVALUE) &&
14813 (!upb_fielddef_isseq(p->top->f) ||
14814 p->top->is_repeated)) {
14815 if (!start_subobject(p)) return false;
14816 start_listvalue_object(p);
14817 } else if (is_wellknown_field(p, UPB_WELLKNOWN_VALUE) &&
14818 (!upb_fielddef_isseq(p->top->f) ||
14819 p->top->is_repeated)) {
14820 if (!start_subobject(p)) return false;
14821 start_value_object(p, VALUE_LISTVALUE);
14822 if (!start_subobject(p)) return false;
14823 start_listvalue_object(p);
14824 }
14825
14826 if (p->top->is_unknown_field) {
14827 inner = start_jsonparser_frame(p);
14828 inner->is_unknown_field = true;
14829 p->top = inner;
14830
14831 return true;
14832 }
14833
14834 if (!upb_fielddef_isseq(p->top->f)) {
14835 upb_status_seterrf(&p->status,
14836 "Array specified for non-repeated field: %s",
14837 upb_fielddef_name(p->top->f));
14838 upb_env_reporterror(p->env, &p->status);
14839 return false;
14840 }
14841
14842 if (!check_stack(p)) return false;
14843
14844 inner = start_jsonparser_frame(p);
14845 sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSEQ);
14846 upb_sink_startseq(&p->top->sink, sel, &inner->sink);
14847 inner->m = p->top->m;
14848 inner->f = p->top->f;
14849 inner->is_repeated = true;
14850 p->top = inner;
14851
14852 return true;
14853 }
14854
end_array(upb_json_parser * p)14855 static void end_array(upb_json_parser *p) {
14856 upb_selector_t sel;
14857
14858 UPB_ASSERT(p->top > p->stack);
14859
14860 p->top--;
14861
14862 if (p->top->is_unknown_field) {
14863 return;
14864 }
14865
14866 sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSEQ);
14867 upb_sink_endseq(&p->top->sink, sel);
14868
14869 if (is_wellknown_msg(p, UPB_WELLKNOWN_LISTVALUE)) {
14870 end_listvalue_object(p);
14871 if (!is_top_level(p)) {
14872 end_subobject(p);
14873 }
14874 }
14875
14876 if (is_wellknown_msg(p, UPB_WELLKNOWN_VALUE)) {
14877 end_value_object(p);
14878 if (!is_top_level(p)) {
14879 end_subobject(p);
14880 }
14881 }
14882 }
14883
start_object(upb_json_parser * p)14884 static void start_object(upb_json_parser *p) {
14885 if (!p->top->is_map && p->top->m != NULL) {
14886 upb_sink_startmsg(&p->top->sink);
14887 }
14888 }
14889
end_object(upb_json_parser * p)14890 static void end_object(upb_json_parser *p) {
14891 if (!p->top->is_map && p->top->m != NULL) {
14892 upb_status status;
14893 upb_status_clear(&status);
14894 upb_sink_endmsg(&p->top->sink, &status);
14895 if (!upb_ok(&status)) {
14896 upb_env_reporterror(p->env, &status);
14897 }
14898 }
14899 }
14900
start_any_object(upb_json_parser * p,const char * ptr)14901 static void start_any_object(upb_json_parser *p, const char *ptr) {
14902 start_object(p);
14903 p->top->any_frame->before_type_url_start = ptr;
14904 p->top->any_frame->before_type_url_end = ptr;
14905 }
14906
end_any_object(upb_json_parser * p,const char * ptr)14907 static bool end_any_object(upb_json_parser *p, const char *ptr) {
14908 const char *value_membername = "value";
14909 bool is_well_known_packed = false;
14910 const char *packed_end = ptr + 1;
14911 upb_selector_t sel;
14912 upb_jsonparser_frame *inner;
14913
14914 if (json_parser_any_frame_has_value(p->top->any_frame) &&
14915 !json_parser_any_frame_has_type_url(p->top->any_frame)) {
14916 upb_status_seterrmsg(&p->status, "No valid type url");
14917 upb_env_reporterror(p->env, &p->status);
14918 return false;
14919 }
14920
14921 /* Well known types data is represented as value field. */
14922 if (upb_msgdef_wellknowntype(p->top->any_frame->parser->top->m) !=
14923 UPB_WELLKNOWN_UNSPECIFIED) {
14924 is_well_known_packed = true;
14925
14926 if (json_parser_any_frame_has_value_before_type_url(p->top->any_frame)) {
14927 p->top->any_frame->before_type_url_start =
14928 memchr(p->top->any_frame->before_type_url_start, ':',
14929 p->top->any_frame->before_type_url_end -
14930 p->top->any_frame->before_type_url_start);
14931 if (p->top->any_frame->before_type_url_start == NULL) {
14932 upb_status_seterrmsg(&p->status, "invalid data for well known type.");
14933 upb_env_reporterror(p->env, &p->status);
14934 return false;
14935 }
14936 p->top->any_frame->before_type_url_start++;
14937 }
14938
14939 if (json_parser_any_frame_has_value_after_type_url(p->top->any_frame)) {
14940 p->top->any_frame->after_type_url_start =
14941 memchr(p->top->any_frame->after_type_url_start, ':',
14942 (ptr + 1) -
14943 p->top->any_frame->after_type_url_start);
14944 if (p->top->any_frame->after_type_url_start == NULL) {
14945 upb_status_seterrmsg(&p->status, "Invalid data for well known type.");
14946 upb_env_reporterror(p->env, &p->status);
14947 return false;
14948 }
14949 p->top->any_frame->after_type_url_start++;
14950 packed_end = ptr;
14951 }
14952 }
14953
14954 if (json_parser_any_frame_has_value_before_type_url(p->top->any_frame)) {
14955 if (!parse(p->top->any_frame->parser, NULL,
14956 p->top->any_frame->before_type_url_start,
14957 p->top->any_frame->before_type_url_end -
14958 p->top->any_frame->before_type_url_start, NULL)) {
14959 return false;
14960 }
14961 } else {
14962 if (!is_well_known_packed) {
14963 if (!parse(p->top->any_frame->parser, NULL, "{", 1, NULL)) {
14964 return false;
14965 }
14966 }
14967 }
14968
14969 if (json_parser_any_frame_has_value_before_type_url(p->top->any_frame) &&
14970 json_parser_any_frame_has_value_after_type_url(p->top->any_frame)) {
14971 if (!parse(p->top->any_frame->parser, NULL, ",", 1, NULL)) {
14972 return false;
14973 }
14974 }
14975
14976 if (json_parser_any_frame_has_value_after_type_url(p->top->any_frame)) {
14977 if (!parse(p->top->any_frame->parser, NULL,
14978 p->top->any_frame->after_type_url_start,
14979 packed_end - p->top->any_frame->after_type_url_start, NULL)) {
14980 return false;
14981 }
14982 } else {
14983 if (!is_well_known_packed) {
14984 if (!parse(p->top->any_frame->parser, NULL, "}", 1, NULL)) {
14985 return false;
14986 }
14987 }
14988 }
14989
14990 if (!end(p->top->any_frame->parser, NULL)) {
14991 return false;
14992 }
14993
14994 p->top->is_any = false;
14995
14996 /* Set value */
14997 start_member(p);
14998 capture_begin(p, value_membername);
14999 capture_end(p, value_membername + 5);
15000 end_membername(p);
15001
15002 if (!check_stack(p)) return false;
15003 inner = p->top + 1;
15004
15005 sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSTR);
15006 upb_sink_startstr(&p->top->sink, sel, 0, &inner->sink);
15007 sel = getsel_for_handlertype(p, UPB_HANDLER_STRING);
15008 upb_sink_putstring(&inner->sink, sel, p->top->any_frame->stringsink.ptr,
15009 p->top->any_frame->stringsink.len, NULL);
15010 sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSTR);
15011 upb_sink_endstr(&inner->sink, sel);
15012
15013 end_member(p);
15014
15015 end_object(p);
15016
15017 /* Deallocate any parse frame. */
15018 json_parser_any_frame_free(p->top->any_frame);
15019 upb_env_free(p->env, p->top->any_frame);
15020
15021 return true;
15022 }
15023
is_string_wrapper(const upb_msgdef * m)15024 static bool is_string_wrapper(const upb_msgdef *m) {
15025 upb_wellknowntype_t type = upb_msgdef_wellknowntype(m);
15026 return type == UPB_WELLKNOWN_STRINGVALUE ||
15027 type == UPB_WELLKNOWN_BYTESVALUE;
15028 }
15029
is_fieldmask(const upb_msgdef * m)15030 static bool is_fieldmask(const upb_msgdef *m) {
15031 upb_wellknowntype_t type = upb_msgdef_wellknowntype(m);
15032 return type == UPB_WELLKNOWN_FIELDMASK;
15033 }
15034
start_fieldmask_object(upb_json_parser * p)15035 static void start_fieldmask_object(upb_json_parser *p) {
15036 const char *membername = "paths";
15037
15038 start_object(p);
15039
15040 /* Set up context for parsing value */
15041 start_member(p);
15042 capture_begin(p, membername);
15043 capture_end(p, membername + 5);
15044 end_membername(p);
15045
15046 start_array(p);
15047 }
15048
end_fieldmask_object(upb_json_parser * p)15049 static void end_fieldmask_object(upb_json_parser *p) {
15050 end_array(p);
15051 end_member(p);
15052 end_object(p);
15053 }
15054
start_wrapper_object(upb_json_parser * p)15055 static void start_wrapper_object(upb_json_parser *p) {
15056 const char *membername = "value";
15057
15058 start_object(p);
15059
15060 /* Set up context for parsing value */
15061 start_member(p);
15062 capture_begin(p, membername);
15063 capture_end(p, membername + 5);
15064 end_membername(p);
15065 }
15066
end_wrapper_object(upb_json_parser * p)15067 static void end_wrapper_object(upb_json_parser *p) {
15068 end_member(p);
15069 end_object(p);
15070 }
15071
start_value_object(upb_json_parser * p,int value_type)15072 static void start_value_object(upb_json_parser *p, int value_type) {
15073 const char *nullmember = "null_value";
15074 const char *numbermember = "number_value";
15075 const char *stringmember = "string_value";
15076 const char *boolmember = "bool_value";
15077 const char *structmember = "struct_value";
15078 const char *listmember = "list_value";
15079 const char *membername = "";
15080
15081 switch (value_type) {
15082 case VALUE_NULLVALUE:
15083 membername = nullmember;
15084 break;
15085 case VALUE_NUMBERVALUE:
15086 membername = numbermember;
15087 break;
15088 case VALUE_STRINGVALUE:
15089 membername = stringmember;
15090 break;
15091 case VALUE_BOOLVALUE:
15092 membername = boolmember;
15093 break;
15094 case VALUE_STRUCTVALUE:
15095 membername = structmember;
15096 break;
15097 case VALUE_LISTVALUE:
15098 membername = listmember;
15099 break;
15100 }
15101
15102 start_object(p);
15103
15104 /* Set up context for parsing value */
15105 start_member(p);
15106 capture_begin(p, membername);
15107 capture_end(p, membername + strlen(membername));
15108 end_membername(p);
15109 }
15110
end_value_object(upb_json_parser * p)15111 static void end_value_object(upb_json_parser *p) {
15112 end_member(p);
15113 end_object(p);
15114 }
15115
start_listvalue_object(upb_json_parser * p)15116 static void start_listvalue_object(upb_json_parser *p) {
15117 const char *membername = "values";
15118
15119 start_object(p);
15120
15121 /* Set up context for parsing value */
15122 start_member(p);
15123 capture_begin(p, membername);
15124 capture_end(p, membername + strlen(membername));
15125 end_membername(p);
15126 }
15127
end_listvalue_object(upb_json_parser * p)15128 static void end_listvalue_object(upb_json_parser *p) {
15129 end_member(p);
15130 end_object(p);
15131 }
15132
start_structvalue_object(upb_json_parser * p)15133 static void start_structvalue_object(upb_json_parser *p) {
15134 const char *membername = "fields";
15135
15136 start_object(p);
15137
15138 /* Set up context for parsing value */
15139 start_member(p);
15140 capture_begin(p, membername);
15141 capture_end(p, membername + strlen(membername));
15142 end_membername(p);
15143 }
15144
end_structvalue_object(upb_json_parser * p)15145 static void end_structvalue_object(upb_json_parser *p) {
15146 end_member(p);
15147 end_object(p);
15148 }
15149
is_top_level(upb_json_parser * p)15150 static bool is_top_level(upb_json_parser *p) {
15151 return p->top == p->stack && p->top->f == NULL && !p->top->is_unknown_field;
15152 }
15153
is_wellknown_msg(upb_json_parser * p,upb_wellknowntype_t type)15154 static bool is_wellknown_msg(upb_json_parser *p, upb_wellknowntype_t type) {
15155 return p->top->m != NULL && upb_msgdef_wellknowntype(p->top->m) == type;
15156 }
15157
is_wellknown_field(upb_json_parser * p,upb_wellknowntype_t type)15158 static bool is_wellknown_field(upb_json_parser *p, upb_wellknowntype_t type) {
15159 return p->top->f != NULL &&
15160 upb_fielddef_issubmsg(p->top->f) &&
15161 (upb_msgdef_wellknowntype(upb_fielddef_msgsubdef(p->top->f))
15162 == type);
15163 }
15164
does_number_wrapper_start(upb_json_parser * p)15165 static bool does_number_wrapper_start(upb_json_parser *p) {
15166 return p->top->f != NULL &&
15167 upb_fielddef_issubmsg(p->top->f) &&
15168 upb_msgdef_isnumberwrapper(upb_fielddef_msgsubdef(p->top->f));
15169 }
15170
does_number_wrapper_end(upb_json_parser * p)15171 static bool does_number_wrapper_end(upb_json_parser *p) {
15172 return p->top->m != NULL && upb_msgdef_isnumberwrapper(p->top->m);
15173 }
15174
is_number_wrapper_object(upb_json_parser * p)15175 static bool is_number_wrapper_object(upb_json_parser *p) {
15176 return p->top->m != NULL && upb_msgdef_isnumberwrapper(p->top->m);
15177 }
15178
does_string_wrapper_start(upb_json_parser * p)15179 static bool does_string_wrapper_start(upb_json_parser *p) {
15180 return p->top->f != NULL &&
15181 upb_fielddef_issubmsg(p->top->f) &&
15182 is_string_wrapper(upb_fielddef_msgsubdef(p->top->f));
15183 }
15184
does_string_wrapper_end(upb_json_parser * p)15185 static bool does_string_wrapper_end(upb_json_parser *p) {
15186 return p->top->m != NULL && is_string_wrapper(p->top->m);
15187 }
15188
is_string_wrapper_object(upb_json_parser * p)15189 static bool is_string_wrapper_object(upb_json_parser *p) {
15190 return p->top->m != NULL && is_string_wrapper(p->top->m);
15191 }
15192
does_fieldmask_start(upb_json_parser * p)15193 static bool does_fieldmask_start(upb_json_parser *p) {
15194 return p->top->f != NULL &&
15195 upb_fielddef_issubmsg(p->top->f) &&
15196 is_fieldmask(upb_fielddef_msgsubdef(p->top->f));
15197 }
15198
does_fieldmask_end(upb_json_parser * p)15199 static bool does_fieldmask_end(upb_json_parser *p) {
15200 return p->top->m != NULL && is_fieldmask(p->top->m);
15201 }
15202
is_fieldmask_object(upb_json_parser * p)15203 static bool is_fieldmask_object(upb_json_parser *p) {
15204 return p->top->m != NULL && is_fieldmask(p->top->m);
15205 }
15206
15207 #define CHECK_RETURN_TOP(x) if (!(x)) goto error
15208
15209
15210 /* The actual parser **********************************************************/
15211
15212 /* What follows is the Ragel parser itself. The language is specified in Ragel
15213 * and the actions call our C functions above.
15214 *
15215 * Ragel has an extensive set of functionality, and we use only a small part of
15216 * it. There are many action types but we only use a few:
15217 *
15218 * ">" -- transition into a machine
15219 * "%" -- transition out of a machine
15220 * "@" -- transition into a final state of a machine.
15221 *
15222 * "@" transitions are tricky because a machine can transition into a final
15223 * state repeatedly. But in some cases we know this can't happen, for example
15224 * a string which is delimited by a final '"' can only transition into its
15225 * final state once, when the closing '"' is seen. */
15226
15227
15228 #line 2789 "upb/json/parser.rl"
15229
15230
15231
15232 #line 2592 "upb/json/parser.c"
15233 static const char _json_actions[] = {
15234 0, 1, 0, 1, 1, 1, 3, 1,
15235 4, 1, 6, 1, 7, 1, 8, 1,
15236 9, 1, 11, 1, 12, 1, 13, 1,
15237 14, 1, 15, 1, 16, 1, 17, 1,
15238 18, 1, 19, 1, 20, 1, 22, 1,
15239 23, 1, 24, 1, 35, 1, 37, 1,
15240 39, 1, 40, 1, 42, 1, 43, 1,
15241 44, 1, 46, 1, 48, 1, 49, 1,
15242 50, 1, 51, 1, 53, 1, 54, 2,
15243 4, 9, 2, 5, 6, 2, 7, 3,
15244 2, 7, 9, 2, 21, 26, 2, 25,
15245 10, 2, 27, 28, 2, 29, 30, 2,
15246 32, 34, 2, 33, 31, 2, 38, 36,
15247 2, 40, 42, 2, 45, 2, 2, 46,
15248 54, 2, 47, 36, 2, 49, 54, 2,
15249 50, 54, 2, 51, 54, 2, 52, 41,
15250 2, 53, 54, 3, 32, 34, 35, 4,
15251 21, 26, 27, 28
15252 };
15253
15254 static const short _json_key_offsets[] = {
15255 0, 0, 12, 13, 18, 23, 28, 29,
15256 30, 31, 32, 33, 34, 35, 36, 37,
15257 38, 43, 44, 48, 53, 58, 63, 67,
15258 71, 74, 77, 79, 83, 87, 89, 91,
15259 96, 98, 100, 109, 115, 121, 127, 133,
15260 135, 139, 142, 144, 146, 149, 150, 154,
15261 156, 158, 160, 162, 163, 165, 167, 168,
15262 170, 172, 173, 175, 177, 178, 180, 182,
15263 183, 185, 187, 191, 193, 195, 196, 197,
15264 198, 199, 201, 206, 208, 210, 212, 221,
15265 222, 222, 222, 227, 232, 237, 238, 239,
15266 240, 241, 241, 242, 243, 244, 244, 245,
15267 246, 247, 247, 252, 253, 257, 262, 267,
15268 272, 276, 276, 279, 282, 285, 288, 291,
15269 294, 294, 294, 294, 294, 294
15270 };
15271
15272 static const char _json_trans_keys[] = {
15273 32, 34, 45, 91, 102, 110, 116, 123,
15274 9, 13, 48, 57, 34, 32, 93, 125,
15275 9, 13, 32, 44, 93, 9, 13, 32,
15276 93, 125, 9, 13, 97, 108, 115, 101,
15277 117, 108, 108, 114, 117, 101, 32, 34,
15278 125, 9, 13, 34, 32, 58, 9, 13,
15279 32, 93, 125, 9, 13, 32, 44, 125,
15280 9, 13, 32, 44, 125, 9, 13, 32,
15281 34, 9, 13, 45, 48, 49, 57, 48,
15282 49, 57, 46, 69, 101, 48, 57, 69,
15283 101, 48, 57, 43, 45, 48, 57, 48,
15284 57, 48, 57, 46, 69, 101, 48, 57,
15285 34, 92, 34, 92, 34, 47, 92, 98,
15286 102, 110, 114, 116, 117, 48, 57, 65,
15287 70, 97, 102, 48, 57, 65, 70, 97,
15288 102, 48, 57, 65, 70, 97, 102, 48,
15289 57, 65, 70, 97, 102, 34, 92, 45,
15290 48, 49, 57, 48, 49, 57, 46, 115,
15291 48, 57, 115, 48, 57, 34, 46, 115,
15292 48, 57, 48, 57, 48, 57, 48, 57,
15293 48, 57, 45, 48, 57, 48, 57, 45,
15294 48, 57, 48, 57, 84, 48, 57, 48,
15295 57, 58, 48, 57, 48, 57, 58, 48,
15296 57, 48, 57, 43, 45, 46, 90, 48,
15297 57, 48, 57, 58, 48, 48, 34, 48,
15298 57, 43, 45, 90, 48, 57, 34, 44,
15299 34, 44, 34, 44, 34, 45, 91, 102,
15300 110, 116, 123, 48, 57, 34, 32, 93,
15301 125, 9, 13, 32, 44, 93, 9, 13,
15302 32, 93, 125, 9, 13, 97, 108, 115,
15303 101, 117, 108, 108, 114, 117, 101, 32,
15304 34, 125, 9, 13, 34, 32, 58, 9,
15305 13, 32, 93, 125, 9, 13, 32, 44,
15306 125, 9, 13, 32, 44, 125, 9, 13,
15307 32, 34, 9, 13, 32, 9, 13, 32,
15308 9, 13, 32, 9, 13, 32, 9, 13,
15309 32, 9, 13, 32, 9, 13, 0
15310 };
15311
15312 static const char _json_single_lengths[] = {
15313 0, 8, 1, 3, 3, 3, 1, 1,
15314 1, 1, 1, 1, 1, 1, 1, 1,
15315 3, 1, 2, 3, 3, 3, 2, 2,
15316 1, 3, 0, 2, 2, 0, 0, 3,
15317 2, 2, 9, 0, 0, 0, 0, 2,
15318 2, 1, 2, 0, 1, 1, 2, 0,
15319 0, 0, 0, 1, 0, 0, 1, 0,
15320 0, 1, 0, 0, 1, 0, 0, 1,
15321 0, 0, 4, 0, 0, 1, 1, 1,
15322 1, 0, 3, 2, 2, 2, 7, 1,
15323 0, 0, 3, 3, 3, 1, 1, 1,
15324 1, 0, 1, 1, 1, 0, 1, 1,
15325 1, 0, 3, 1, 2, 3, 3, 3,
15326 2, 0, 1, 1, 1, 1, 1, 1,
15327 0, 0, 0, 0, 0, 0
15328 };
15329
15330 static const char _json_range_lengths[] = {
15331 0, 2, 0, 1, 1, 1, 0, 0,
15332 0, 0, 0, 0, 0, 0, 0, 0,
15333 1, 0, 1, 1, 1, 1, 1, 1,
15334 1, 0, 1, 1, 1, 1, 1, 1,
15335 0, 0, 0, 3, 3, 3, 3, 0,
15336 1, 1, 0, 1, 1, 0, 1, 1,
15337 1, 1, 1, 0, 1, 1, 0, 1,
15338 1, 0, 1, 1, 0, 1, 1, 0,
15339 1, 1, 0, 1, 1, 0, 0, 0,
15340 0, 1, 1, 0, 0, 0, 1, 0,
15341 0, 0, 1, 1, 1, 0, 0, 0,
15342 0, 0, 0, 0, 0, 0, 0, 0,
15343 0, 0, 1, 0, 1, 1, 1, 1,
15344 1, 0, 1, 1, 1, 1, 1, 1,
15345 0, 0, 0, 0, 0, 0
15346 };
15347
15348 static const short _json_index_offsets[] = {
15349 0, 0, 11, 13, 18, 23, 28, 30,
15350 32, 34, 36, 38, 40, 42, 44, 46,
15351 48, 53, 55, 59, 64, 69, 74, 78,
15352 82, 85, 89, 91, 95, 99, 101, 103,
15353 108, 111, 114, 124, 128, 132, 136, 140,
15354 143, 147, 150, 153, 155, 158, 160, 164,
15355 166, 168, 170, 172, 174, 176, 178, 180,
15356 182, 184, 186, 188, 190, 192, 194, 196,
15357 198, 200, 202, 207, 209, 211, 213, 215,
15358 217, 219, 221, 226, 229, 232, 235, 244,
15359 246, 247, 248, 253, 258, 263, 265, 267,
15360 269, 271, 272, 274, 276, 278, 279, 281,
15361 283, 285, 286, 291, 293, 297, 302, 307,
15362 312, 316, 317, 320, 323, 326, 329, 332,
15363 335, 336, 337, 338, 339, 340
15364 };
15365
15366 static const unsigned char _json_indicies[] = {
15367 0, 2, 3, 4, 5, 6, 7, 8,
15368 0, 3, 1, 9, 1, 11, 12, 1,
15369 11, 10, 13, 14, 12, 13, 1, 14,
15370 1, 1, 14, 10, 15, 1, 16, 1,
15371 17, 1, 18, 1, 19, 1, 20, 1,
15372 21, 1, 22, 1, 23, 1, 24, 1,
15373 25, 26, 27, 25, 1, 28, 1, 29,
15374 30, 29, 1, 30, 1, 1, 30, 31,
15375 32, 33, 34, 32, 1, 35, 36, 27,
15376 35, 1, 36, 26, 36, 1, 37, 38,
15377 39, 1, 38, 39, 1, 41, 42, 42,
15378 40, 43, 1, 42, 42, 43, 40, 44,
15379 44, 45, 1, 45, 1, 45, 40, 41,
15380 42, 42, 39, 40, 47, 48, 46, 50,
15381 51, 49, 52, 52, 52, 52, 52, 52,
15382 52, 52, 53, 1, 54, 54, 54, 1,
15383 55, 55, 55, 1, 56, 56, 56, 1,
15384 57, 57, 57, 1, 59, 60, 58, 61,
15385 62, 63, 1, 64, 65, 1, 66, 67,
15386 1, 68, 1, 67, 68, 1, 69, 1,
15387 66, 67, 65, 1, 70, 1, 71, 1,
15388 72, 1, 73, 1, 74, 1, 75, 1,
15389 76, 1, 77, 1, 78, 1, 79, 1,
15390 80, 1, 81, 1, 82, 1, 83, 1,
15391 84, 1, 85, 1, 86, 1, 87, 1,
15392 88, 1, 89, 89, 90, 91, 1, 92,
15393 1, 93, 1, 94, 1, 95, 1, 96,
15394 1, 97, 1, 98, 1, 99, 99, 100,
15395 98, 1, 102, 1, 101, 104, 105, 103,
15396 1, 1, 101, 106, 107, 108, 109, 110,
15397 111, 112, 107, 1, 113, 1, 114, 115,
15398 117, 118, 1, 117, 116, 119, 120, 118,
15399 119, 1, 120, 1, 1, 120, 116, 121,
15400 1, 122, 1, 123, 1, 124, 1, 125,
15401 126, 1, 127, 1, 128, 1, 129, 130,
15402 1, 131, 1, 132, 1, 133, 134, 135,
15403 136, 134, 1, 137, 1, 138, 139, 138,
15404 1, 139, 1, 1, 139, 140, 141, 142,
15405 143, 141, 1, 144, 145, 136, 144, 1,
15406 145, 135, 145, 1, 146, 147, 147, 1,
15407 148, 148, 1, 149, 149, 1, 150, 150,
15408 1, 151, 151, 1, 152, 152, 1, 1,
15409 1, 1, 1, 1, 1, 0
15410 };
15411
15412 static const char _json_trans_targs[] = {
15413 1, 0, 2, 107, 3, 6, 10, 13,
15414 16, 106, 4, 3, 106, 4, 5, 7,
15415 8, 9, 108, 11, 12, 109, 14, 15,
15416 110, 16, 17, 111, 18, 18, 19, 20,
15417 21, 22, 111, 21, 22, 24, 25, 31,
15418 112, 26, 28, 27, 29, 30, 33, 113,
15419 34, 33, 113, 34, 32, 35, 36, 37,
15420 38, 39, 33, 113, 34, 41, 42, 46,
15421 42, 46, 43, 45, 44, 114, 48, 49,
15422 50, 51, 52, 53, 54, 55, 56, 57,
15423 58, 59, 60, 61, 62, 63, 64, 65,
15424 66, 67, 73, 72, 68, 69, 70, 71,
15425 72, 115, 74, 67, 72, 76, 116, 76,
15426 116, 77, 79, 81, 82, 85, 90, 94,
15427 98, 80, 117, 117, 83, 82, 80, 83,
15428 84, 86, 87, 88, 89, 117, 91, 92,
15429 93, 117, 95, 96, 97, 117, 98, 99,
15430 105, 100, 100, 101, 102, 103, 104, 105,
15431 103, 104, 117, 106, 106, 106, 106, 106,
15432 106
15433 };
15434
15435 static const unsigned char _json_trans_actions[] = {
15436 0, 0, 113, 107, 53, 0, 0, 0,
15437 125, 59, 45, 0, 55, 0, 0, 0,
15438 0, 0, 0, 0, 0, 0, 0, 0,
15439 0, 0, 101, 51, 47, 0, 0, 45,
15440 49, 49, 104, 0, 0, 0, 0, 0,
15441 3, 0, 0, 0, 0, 0, 5, 15,
15442 0, 0, 71, 7, 13, 0, 74, 9,
15443 9, 9, 77, 80, 11, 37, 37, 37,
15444 0, 0, 0, 39, 0, 41, 86, 0,
15445 0, 0, 17, 19, 0, 21, 23, 0,
15446 25, 27, 0, 29, 31, 0, 33, 35,
15447 0, 135, 83, 135, 0, 0, 0, 0,
15448 0, 92, 0, 89, 89, 98, 43, 0,
15449 131, 95, 113, 107, 53, 0, 0, 0,
15450 125, 59, 69, 110, 45, 0, 55, 0,
15451 0, 0, 0, 0, 0, 119, 0, 0,
15452 0, 122, 0, 0, 0, 116, 0, 101,
15453 51, 47, 0, 0, 45, 49, 49, 104,
15454 0, 0, 128, 0, 57, 63, 65, 61,
15455 67
15456 };
15457
15458 static const unsigned char _json_eof_actions[] = {
15459 0, 0, 0, 0, 0, 0, 0, 0,
15460 0, 0, 0, 0, 0, 0, 0, 0,
15461 0, 0, 0, 0, 0, 0, 0, 0,
15462 0, 1, 0, 1, 0, 0, 1, 1,
15463 0, 0, 0, 0, 0, 0, 0, 0,
15464 0, 0, 0, 0, 0, 0, 0, 0,
15465 0, 0, 0, 0, 0, 0, 0, 0,
15466 0, 0, 0, 0, 0, 0, 0, 0,
15467 0, 0, 0, 0, 0, 0, 0, 0,
15468 0, 0, 0, 0, 0, 0, 0, 0,
15469 0, 0, 0, 0, 0, 0, 0, 0,
15470 0, 0, 0, 0, 0, 0, 0, 0,
15471 0, 0, 0, 0, 0, 0, 0, 0,
15472 0, 0, 0, 57, 63, 65, 61, 67,
15473 0, 0, 0, 0, 0, 0
15474 };
15475
15476 static const int json_start = 1;
15477
15478 static const int json_en_number_machine = 23;
15479 static const int json_en_string_machine = 32;
15480 static const int json_en_duration_machine = 40;
15481 static const int json_en_timestamp_machine = 47;
15482 static const int json_en_fieldmask_machine = 75;
15483 static const int json_en_value_machine = 78;
15484 static const int json_en_main = 1;
15485
15486
15487 #line 2792 "upb/json/parser.rl"
15488
parse(void * closure,const void * hd,const char * buf,size_t size,const upb_bufhandle * handle)15489 size_t parse(void *closure, const void *hd, const char *buf, size_t size,
15490 const upb_bufhandle *handle) {
15491 upb_json_parser *parser = closure;
15492
15493 /* Variables used by Ragel's generated code. */
15494 int cs = parser->current_state;
15495 int *stack = parser->parser_stack;
15496 int top = parser->parser_top;
15497
15498 const char *p = buf;
15499 const char *pe = buf + size;
15500 const char *eof = &eof_ch;
15501
15502 parser->handle = handle;
15503
15504 UPB_UNUSED(hd);
15505 UPB_UNUSED(handle);
15506
15507 capture_resume(parser, buf);
15508
15509
15510 #line 2870 "upb/json/parser.c"
15511 {
15512 int _klen;
15513 unsigned int _trans;
15514 const char *_acts;
15515 unsigned int _nacts;
15516 const char *_keys;
15517
15518 if ( p == pe )
15519 goto _test_eof;
15520 if ( cs == 0 )
15521 goto _out;
15522 _resume:
15523 _keys = _json_trans_keys + _json_key_offsets[cs];
15524 _trans = _json_index_offsets[cs];
15525
15526 _klen = _json_single_lengths[cs];
15527 if ( _klen > 0 ) {
15528 const char *_lower = _keys;
15529 const char *_mid;
15530 const char *_upper = _keys + _klen - 1;
15531 while (1) {
15532 if ( _upper < _lower )
15533 break;
15534
15535 _mid = _lower + ((_upper-_lower) >> 1);
15536 if ( (*p) < *_mid )
15537 _upper = _mid - 1;
15538 else if ( (*p) > *_mid )
15539 _lower = _mid + 1;
15540 else {
15541 _trans += (unsigned int)(_mid - _keys);
15542 goto _match;
15543 }
15544 }
15545 _keys += _klen;
15546 _trans += _klen;
15547 }
15548
15549 _klen = _json_range_lengths[cs];
15550 if ( _klen > 0 ) {
15551 const char *_lower = _keys;
15552 const char *_mid;
15553 const char *_upper = _keys + (_klen<<1) - 2;
15554 while (1) {
15555 if ( _upper < _lower )
15556 break;
15557
15558 _mid = _lower + (((_upper-_lower) >> 1) & ~1);
15559 if ( (*p) < _mid[0] )
15560 _upper = _mid - 2;
15561 else if ( (*p) > _mid[1] )
15562 _lower = _mid + 2;
15563 else {
15564 _trans += (unsigned int)((_mid - _keys)>>1);
15565 goto _match;
15566 }
15567 }
15568 _trans += _klen;
15569 }
15570
15571 _match:
15572 _trans = _json_indicies[_trans];
15573 cs = _json_trans_targs[_trans];
15574
15575 if ( _json_trans_actions[_trans] == 0 )
15576 goto _again;
15577
15578 _acts = _json_actions + _json_trans_actions[_trans];
15579 _nacts = (unsigned int) *_acts++;
15580 while ( _nacts-- > 0 )
15581 {
15582 switch ( *_acts++ )
15583 {
15584 case 1:
15585 #line 2597 "upb/json/parser.rl"
15586 { p--; {cs = stack[--top]; goto _again;} }
15587 break;
15588 case 2:
15589 #line 2599 "upb/json/parser.rl"
15590 { p--; {stack[top++] = cs; cs = 23;goto _again;} }
15591 break;
15592 case 3:
15593 #line 2603 "upb/json/parser.rl"
15594 { start_text(parser, p); }
15595 break;
15596 case 4:
15597 #line 2604 "upb/json/parser.rl"
15598 { CHECK_RETURN_TOP(end_text(parser, p)); }
15599 break;
15600 case 5:
15601 #line 2610 "upb/json/parser.rl"
15602 { start_hex(parser); }
15603 break;
15604 case 6:
15605 #line 2611 "upb/json/parser.rl"
15606 { hexdigit(parser, p); }
15607 break;
15608 case 7:
15609 #line 2612 "upb/json/parser.rl"
15610 { CHECK_RETURN_TOP(end_hex(parser)); }
15611 break;
15612 case 8:
15613 #line 2618 "upb/json/parser.rl"
15614 { CHECK_RETURN_TOP(escape(parser, p)); }
15615 break;
15616 case 9:
15617 #line 2624 "upb/json/parser.rl"
15618 { p--; {cs = stack[--top]; goto _again;} }
15619 break;
15620 case 10:
15621 #line 2629 "upb/json/parser.rl"
15622 { start_year(parser, p); }
15623 break;
15624 case 11:
15625 #line 2630 "upb/json/parser.rl"
15626 { CHECK_RETURN_TOP(end_year(parser, p)); }
15627 break;
15628 case 12:
15629 #line 2634 "upb/json/parser.rl"
15630 { start_month(parser, p); }
15631 break;
15632 case 13:
15633 #line 2635 "upb/json/parser.rl"
15634 { CHECK_RETURN_TOP(end_month(parser, p)); }
15635 break;
15636 case 14:
15637 #line 2639 "upb/json/parser.rl"
15638 { start_day(parser, p); }
15639 break;
15640 case 15:
15641 #line 2640 "upb/json/parser.rl"
15642 { CHECK_RETURN_TOP(end_day(parser, p)); }
15643 break;
15644 case 16:
15645 #line 2644 "upb/json/parser.rl"
15646 { start_hour(parser, p); }
15647 break;
15648 case 17:
15649 #line 2645 "upb/json/parser.rl"
15650 { CHECK_RETURN_TOP(end_hour(parser, p)); }
15651 break;
15652 case 18:
15653 #line 2649 "upb/json/parser.rl"
15654 { start_minute(parser, p); }
15655 break;
15656 case 19:
15657 #line 2650 "upb/json/parser.rl"
15658 { CHECK_RETURN_TOP(end_minute(parser, p)); }
15659 break;
15660 case 20:
15661 #line 2654 "upb/json/parser.rl"
15662 { start_second(parser, p); }
15663 break;
15664 case 21:
15665 #line 2655 "upb/json/parser.rl"
15666 { CHECK_RETURN_TOP(end_second(parser, p)); }
15667 break;
15668 case 22:
15669 #line 2660 "upb/json/parser.rl"
15670 { start_duration_base(parser, p); }
15671 break;
15672 case 23:
15673 #line 2661 "upb/json/parser.rl"
15674 { CHECK_RETURN_TOP(end_duration_base(parser, p)); }
15675 break;
15676 case 24:
15677 #line 2663 "upb/json/parser.rl"
15678 { p--; {cs = stack[--top]; goto _again;} }
15679 break;
15680 case 25:
15681 #line 2668 "upb/json/parser.rl"
15682 { start_timestamp_base(parser); }
15683 break;
15684 case 26:
15685 #line 2670 "upb/json/parser.rl"
15686 { start_timestamp_fraction(parser, p); }
15687 break;
15688 case 27:
15689 #line 2671 "upb/json/parser.rl"
15690 { CHECK_RETURN_TOP(end_timestamp_fraction(parser, p)); }
15691 break;
15692 case 28:
15693 #line 2673 "upb/json/parser.rl"
15694 { start_timestamp_zone(parser, p); }
15695 break;
15696 case 29:
15697 #line 2674 "upb/json/parser.rl"
15698 { CHECK_RETURN_TOP(end_timestamp_zone(parser, p)); }
15699 break;
15700 case 30:
15701 #line 2676 "upb/json/parser.rl"
15702 { p--; {cs = stack[--top]; goto _again;} }
15703 break;
15704 case 31:
15705 #line 2681 "upb/json/parser.rl"
15706 { start_fieldmask_path_text(parser, p); }
15707 break;
15708 case 32:
15709 #line 2682 "upb/json/parser.rl"
15710 { end_fieldmask_path_text(parser, p); }
15711 break;
15712 case 33:
15713 #line 2687 "upb/json/parser.rl"
15714 { start_fieldmask_path(parser); }
15715 break;
15716 case 34:
15717 #line 2688 "upb/json/parser.rl"
15718 { end_fieldmask_path(parser); }
15719 break;
15720 case 35:
15721 #line 2694 "upb/json/parser.rl"
15722 { p--; {cs = stack[--top]; goto _again;} }
15723 break;
15724 case 36:
15725 #line 2699 "upb/json/parser.rl"
15726 {
15727 if (is_wellknown_msg(parser, UPB_WELLKNOWN_TIMESTAMP)) {
15728 {stack[top++] = cs; cs = 47;goto _again;}
15729 } else if (is_wellknown_msg(parser, UPB_WELLKNOWN_DURATION)) {
15730 {stack[top++] = cs; cs = 40;goto _again;}
15731 } else if (is_wellknown_msg(parser, UPB_WELLKNOWN_FIELDMASK)) {
15732 {stack[top++] = cs; cs = 75;goto _again;}
15733 } else {
15734 {stack[top++] = cs; cs = 32;goto _again;}
15735 }
15736 }
15737 break;
15738 case 37:
15739 #line 2712 "upb/json/parser.rl"
15740 { p--; {stack[top++] = cs; cs = 78;goto _again;} }
15741 break;
15742 case 38:
15743 #line 2717 "upb/json/parser.rl"
15744 {
15745 if (is_wellknown_msg(parser, UPB_WELLKNOWN_ANY)) {
15746 start_any_member(parser, p);
15747 } else {
15748 start_member(parser);
15749 }
15750 }
15751 break;
15752 case 39:
15753 #line 2724 "upb/json/parser.rl"
15754 { CHECK_RETURN_TOP(end_membername(parser)); }
15755 break;
15756 case 40:
15757 #line 2727 "upb/json/parser.rl"
15758 {
15759 if (is_wellknown_msg(parser, UPB_WELLKNOWN_ANY)) {
15760 end_any_member(parser, p);
15761 } else {
15762 end_member(parser);
15763 }
15764 }
15765 break;
15766 case 41:
15767 #line 2738 "upb/json/parser.rl"
15768 {
15769 if (is_wellknown_msg(parser, UPB_WELLKNOWN_ANY)) {
15770 start_any_object(parser, p);
15771 } else {
15772 start_object(parser);
15773 }
15774 }
15775 break;
15776 case 42:
15777 #line 2747 "upb/json/parser.rl"
15778 {
15779 if (is_wellknown_msg(parser, UPB_WELLKNOWN_ANY)) {
15780 CHECK_RETURN_TOP(end_any_object(parser, p));
15781 } else {
15782 end_object(parser);
15783 }
15784 }
15785 break;
15786 case 43:
15787 #line 2759 "upb/json/parser.rl"
15788 { CHECK_RETURN_TOP(start_array(parser)); }
15789 break;
15790 case 44:
15791 #line 2763 "upb/json/parser.rl"
15792 { end_array(parser); }
15793 break;
15794 case 45:
15795 #line 2768 "upb/json/parser.rl"
15796 { CHECK_RETURN_TOP(start_number(parser, p)); }
15797 break;
15798 case 46:
15799 #line 2769 "upb/json/parser.rl"
15800 { CHECK_RETURN_TOP(end_number(parser, p)); }
15801 break;
15802 case 47:
15803 #line 2771 "upb/json/parser.rl"
15804 { CHECK_RETURN_TOP(start_stringval(parser)); }
15805 break;
15806 case 48:
15807 #line 2772 "upb/json/parser.rl"
15808 { CHECK_RETURN_TOP(end_stringval(parser)); }
15809 break;
15810 case 49:
15811 #line 2774 "upb/json/parser.rl"
15812 { CHECK_RETURN_TOP(end_bool(parser, true)); }
15813 break;
15814 case 50:
15815 #line 2776 "upb/json/parser.rl"
15816 { CHECK_RETURN_TOP(end_bool(parser, false)); }
15817 break;
15818 case 51:
15819 #line 2778 "upb/json/parser.rl"
15820 { CHECK_RETURN_TOP(end_null(parser)); }
15821 break;
15822 case 52:
15823 #line 2780 "upb/json/parser.rl"
15824 { CHECK_RETURN_TOP(start_subobject_full(parser)); }
15825 break;
15826 case 53:
15827 #line 2781 "upb/json/parser.rl"
15828 { end_subobject_full(parser); }
15829 break;
15830 case 54:
15831 #line 2786 "upb/json/parser.rl"
15832 { p--; {cs = stack[--top]; goto _again;} }
15833 break;
15834 #line 3194 "upb/json/parser.c"
15835 }
15836 }
15837
15838 _again:
15839 if ( cs == 0 )
15840 goto _out;
15841 if ( ++p != pe )
15842 goto _resume;
15843 _test_eof: {}
15844 if ( p == eof )
15845 {
15846 const char *__acts = _json_actions + _json_eof_actions[cs];
15847 unsigned int __nacts = (unsigned int) *__acts++;
15848 while ( __nacts-- > 0 ) {
15849 switch ( *__acts++ ) {
15850 case 0:
15851 #line 2595 "upb/json/parser.rl"
15852 { p--; {cs = stack[--top]; if ( p == pe )
15853 goto _test_eof;
15854 goto _again;} }
15855 break;
15856 case 46:
15857 #line 2769 "upb/json/parser.rl"
15858 { CHECK_RETURN_TOP(end_number(parser, p)); }
15859 break;
15860 case 49:
15861 #line 2774 "upb/json/parser.rl"
15862 { CHECK_RETURN_TOP(end_bool(parser, true)); }
15863 break;
15864 case 50:
15865 #line 2776 "upb/json/parser.rl"
15866 { CHECK_RETURN_TOP(end_bool(parser, false)); }
15867 break;
15868 case 51:
15869 #line 2778 "upb/json/parser.rl"
15870 { CHECK_RETURN_TOP(end_null(parser)); }
15871 break;
15872 case 53:
15873 #line 2781 "upb/json/parser.rl"
15874 { end_subobject_full(parser); }
15875 break;
15876 #line 3236 "upb/json/parser.c"
15877 }
15878 }
15879 }
15880
15881 _out: {}
15882 }
15883
15884 #line 2814 "upb/json/parser.rl"
15885
15886 if (p != pe) {
15887 upb_status_seterrf(&parser->status, "Parse error at '%.*s'\n", pe - p, p);
15888 upb_env_reporterror(parser->env, &parser->status);
15889 } else {
15890 capture_suspend(parser, &p);
15891 }
15892
15893 error:
15894 /* Save parsing state back to parser. */
15895 parser->current_state = cs;
15896 parser->parser_top = top;
15897
15898 return p - buf;
15899 }
15900
end(void * closure,const void * hd)15901 static bool end(void *closure, const void *hd) {
15902 upb_json_parser *parser = closure;
15903
15904 /* Prevent compile warning on unused static constants. */
15905 UPB_UNUSED(json_start);
15906 UPB_UNUSED(json_en_duration_machine);
15907 UPB_UNUSED(json_en_fieldmask_machine);
15908 UPB_UNUSED(json_en_number_machine);
15909 UPB_UNUSED(json_en_string_machine);
15910 UPB_UNUSED(json_en_timestamp_machine);
15911 UPB_UNUSED(json_en_value_machine);
15912 UPB_UNUSED(json_en_main);
15913
15914 parse(parser, hd, &eof_ch, 0, NULL);
15915
15916 return parser->current_state >= 106;
15917 }
15918
json_parser_reset(upb_json_parser * p)15919 static void json_parser_reset(upb_json_parser *p) {
15920 int cs;
15921 int top;
15922
15923 p->top = p->stack;
15924 init_frame(p->top);
15925
15926 /* Emit Ragel initialization of the parser. */
15927
15928 #line 3288 "upb/json/parser.c"
15929 {
15930 cs = json_start;
15931 top = 0;
15932 }
15933
15934 #line 2857 "upb/json/parser.rl"
15935 p->current_state = cs;
15936 p->parser_top = top;
15937 accumulate_clear(p);
15938 p->multipart_state = MULTIPART_INACTIVE;
15939 p->capture = NULL;
15940 p->accumulated = NULL;
15941 upb_status_clear(&p->status);
15942 }
15943
visit_json_parsermethod(const upb_refcounted * r,upb_refcounted_visit * visit,void * closure)15944 static void visit_json_parsermethod(const upb_refcounted *r,
15945 upb_refcounted_visit *visit,
15946 void *closure) {
15947 const upb_json_parsermethod *method = (upb_json_parsermethod*)r;
15948 visit(r, upb_msgdef_upcast2(method->msg), closure);
15949 }
15950
free_json_parsermethod(upb_refcounted * r)15951 static void free_json_parsermethod(upb_refcounted *r) {
15952 upb_json_parsermethod *method = (upb_json_parsermethod*)r;
15953
15954 upb_inttable_iter i;
15955 upb_inttable_begin(&i, &method->name_tables);
15956 for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
15957 upb_value val = upb_inttable_iter_value(&i);
15958 upb_strtable *t = upb_value_getptr(val);
15959 upb_strtable_uninit(t);
15960 upb_gfree(t);
15961 }
15962
15963 upb_inttable_uninit(&method->name_tables);
15964
15965 upb_gfree(r);
15966 }
15967
add_jsonname_table(upb_json_parsermethod * m,const upb_msgdef * md)15968 static void add_jsonname_table(upb_json_parsermethod *m, const upb_msgdef* md) {
15969 upb_msg_field_iter i;
15970 upb_strtable *t;
15971
15972 /* It would be nice to stack-allocate this, but protobufs do not limit the
15973 * length of fields to any reasonable limit. */
15974 char *buf = NULL;
15975 size_t len = 0;
15976
15977 if (upb_inttable_lookupptr(&m->name_tables, md, NULL)) {
15978 return;
15979 }
15980
15981 /* TODO(haberman): handle malloc failure. */
15982 t = upb_gmalloc(sizeof(*t));
15983 upb_strtable_init(t, UPB_CTYPE_CONSTPTR);
15984 upb_inttable_insertptr(&m->name_tables, md, upb_value_ptr(t));
15985
15986 for(upb_msg_field_begin(&i, md);
15987 !upb_msg_field_done(&i);
15988 upb_msg_field_next(&i)) {
15989 const upb_fielddef *f = upb_msg_iter_field(&i);
15990
15991 /* Add an entry for the JSON name. */
15992 size_t field_len = upb_fielddef_getjsonname(f, buf, len);
15993 if (field_len > len) {
15994 size_t len2;
15995 buf = upb_grealloc(buf, 0, field_len);
15996 len = field_len;
15997 len2 = upb_fielddef_getjsonname(f, buf, len);
15998 UPB_ASSERT(len == len2);
15999 }
16000 upb_strtable_insert(t, buf, upb_value_constptr(f));
16001
16002 if (strcmp(buf, upb_fielddef_name(f)) != 0) {
16003 /* Since the JSON name is different from the regular field name, add an
16004 * entry for the raw name (compliant proto3 JSON parsers must accept
16005 * both). */
16006 upb_strtable_insert(t, upb_fielddef_name(f), upb_value_constptr(f));
16007 }
16008
16009 if (upb_fielddef_issubmsg(f)) {
16010 add_jsonname_table(m, upb_fielddef_msgsubdef(f));
16011 }
16012 }
16013
16014 upb_gfree(buf);
16015 }
16016
16017 /* Public API *****************************************************************/
16018
upb_json_parser_create(upb_env * env,const upb_json_parsermethod * method,const upb_symtab * symtab,upb_sink * output,bool ignore_json_unknown)16019 upb_json_parser *upb_json_parser_create(upb_env *env,
16020 const upb_json_parsermethod *method,
16021 const upb_symtab* symtab,
16022 upb_sink *output,
16023 bool ignore_json_unknown) {
16024 #ifndef NDEBUG
16025 const size_t size_before = upb_env_bytesallocated(env);
16026 #endif
16027 upb_json_parser *p = upb_env_malloc(env, sizeof(upb_json_parser));
16028 if (!p) return false;
16029
16030 p->env = env;
16031 p->method = method;
16032 p->limit = p->stack + UPB_JSON_MAX_DEPTH;
16033 p->accumulate_buf = NULL;
16034 p->accumulate_buf_size = 0;
16035 upb_bytessink_reset(&p->input_, &method->input_handler_, p);
16036
16037 json_parser_reset(p);
16038 upb_sink_reset(&p->top->sink, output->handlers, output->closure);
16039 p->top->m = upb_handlers_msgdef(output->handlers);
16040 if (is_wellknown_msg(p, UPB_WELLKNOWN_ANY)) {
16041 p->top->is_any = true;
16042 p->top->any_frame =
16043 upb_env_malloc(p->env, sizeof(upb_jsonparser_any_frame));
16044 json_parser_any_frame_reset(p->top->any_frame);
16045 } else {
16046 p->top->is_any = false;
16047 p->top->any_frame = NULL;
16048 }
16049 set_name_table(p, p->top);
16050 p->symtab = symtab;
16051
16052 p->ignore_json_unknown = ignore_json_unknown;
16053
16054 /* If this fails, uncomment and increase the value in parser.h. */
16055 /* fprintf(stderr, "%zd\n", upb_env_bytesallocated(env) - size_before); */
16056 UPB_ASSERT_DEBUGVAR(upb_env_bytesallocated(env) - size_before <=
16057 UPB_JSON_PARSER_SIZE);
16058 return p;
16059 }
16060
upb_json_parser_input(upb_json_parser * p)16061 upb_bytessink *upb_json_parser_input(upb_json_parser *p) {
16062 return &p->input_;
16063 }
16064
upb_json_parsermethod_new(const upb_msgdef * md,const void * owner)16065 upb_json_parsermethod *upb_json_parsermethod_new(const upb_msgdef* md,
16066 const void* owner) {
16067 static const struct upb_refcounted_vtbl vtbl = {visit_json_parsermethod,
16068 free_json_parsermethod};
16069 upb_json_parsermethod *ret = upb_gmalloc(sizeof(*ret));
16070 upb_refcounted_init(upb_json_parsermethod_upcast_mutable(ret), &vtbl, owner);
16071
16072 ret->msg = md;
16073 upb_ref2(md, ret);
16074
16075 upb_byteshandler_init(&ret->input_handler_);
16076 upb_byteshandler_setstring(&ret->input_handler_, parse, ret);
16077 upb_byteshandler_setendstr(&ret->input_handler_, end, ret);
16078
16079 upb_inttable_init(&ret->name_tables, UPB_CTYPE_PTR);
16080
16081 add_jsonname_table(ret, md);
16082
16083 return ret;
16084 }
16085
upb_json_parsermethod_inputhandler(const upb_json_parsermethod * m)16086 const upb_byteshandler *upb_json_parsermethod_inputhandler(
16087 const upb_json_parsermethod *m) {
16088 return &m->input_handler_;
16089 }
16090 /*
16091 ** This currently uses snprintf() to format primitives, and could be optimized
16092 ** further.
16093 */
16094
16095
16096 #include <string.h>
16097 #include <stdint.h>
16098 #include <time.h>
16099
16100 struct upb_json_printer {
16101 upb_sink input_;
16102 /* BytesSink closure. */
16103 void *subc_;
16104 upb_bytessink *output_;
16105
16106 /* We track the depth so that we know when to emit startstr/endstr on the
16107 * output. */
16108 int depth_;
16109
16110 /* Have we emitted the first element? This state is necessary to emit commas
16111 * without leaving a trailing comma in arrays/maps. We keep this state per
16112 * frame depth.
16113 *
16114 * Why max_depth * 2? UPB_MAX_HANDLER_DEPTH counts depth as nested messages.
16115 * We count frames (contexts in which we separate elements by commas) as both
16116 * repeated fields and messages (maps), and the worst case is a
16117 * message->repeated field->submessage->repeated field->... nesting. */
16118 bool first_elem_[UPB_MAX_HANDLER_DEPTH * 2];
16119
16120 /* To print timestamp, printer needs to cache its seconds and nanos values
16121 * and convert them when ending timestamp message. See comments of
16122 * printer_sethandlers_timestamp for more detail. */
16123 int64_t seconds;
16124 int32_t nanos;
16125 };
16126
16127 /* StringPiece; a pointer plus a length. */
16128 typedef struct {
16129 char *ptr;
16130 size_t len;
16131 } strpc;
16132
freestrpc(void * ptr)16133 void freestrpc(void *ptr) {
16134 strpc *pc = ptr;
16135 upb_gfree(pc->ptr);
16136 upb_gfree(pc);
16137 }
16138
16139 /* Convert fielddef name to JSON name and return as a string piece. */
newstrpc(upb_handlers * h,const upb_fielddef * f,bool preserve_fieldnames)16140 strpc *newstrpc(upb_handlers *h, const upb_fielddef *f,
16141 bool preserve_fieldnames) {
16142 /* TODO(haberman): handle malloc failure. */
16143 strpc *ret = upb_gmalloc(sizeof(*ret));
16144 if (preserve_fieldnames) {
16145 ret->ptr = upb_gstrdup(upb_fielddef_name(f));
16146 ret->len = strlen(ret->ptr);
16147 } else {
16148 size_t len;
16149 ret->len = upb_fielddef_getjsonname(f, NULL, 0);
16150 ret->ptr = upb_gmalloc(ret->len);
16151 len = upb_fielddef_getjsonname(f, ret->ptr, ret->len);
16152 UPB_ASSERT(len == ret->len);
16153 ret->len--; /* NULL */
16154 }
16155
16156 upb_handlers_addcleanup(h, ret, freestrpc);
16157 return ret;
16158 }
16159
16160 /* Convert a null-terminated const char* to a string piece. */
newstrpc_str(upb_handlers * h,const char * str)16161 strpc *newstrpc_str(upb_handlers *h, const char * str) {
16162 strpc * ret = upb_gmalloc(sizeof(*ret));
16163 ret->ptr = upb_gstrdup(str);
16164 ret->len = strlen(str);
16165 upb_handlers_addcleanup(h, ret, freestrpc);
16166 return ret;
16167 }
16168
16169 /* ------------ JSON string printing: values, maps, arrays ------------------ */
16170
print_data(upb_json_printer * p,const char * buf,unsigned int len)16171 static void print_data(
16172 upb_json_printer *p, const char *buf, unsigned int len) {
16173 /* TODO: Will need to change if we support pushback from the sink. */
16174 size_t n = upb_bytessink_putbuf(p->output_, p->subc_, buf, len, NULL);
16175 UPB_ASSERT(n == len);
16176 }
16177
print_comma(upb_json_printer * p)16178 static void print_comma(upb_json_printer *p) {
16179 if (!p->first_elem_[p->depth_]) {
16180 print_data(p, ",", 1);
16181 }
16182 p->first_elem_[p->depth_] = false;
16183 }
16184
16185 /* Helpers that print properly formatted elements to the JSON output stream. */
16186
16187 /* Used for escaping control chars in strings. */
16188 static const char kControlCharLimit = 0x20;
16189
is_json_escaped(char c)16190 UPB_INLINE bool is_json_escaped(char c) {
16191 /* See RFC 4627. */
16192 unsigned char uc = (unsigned char)c;
16193 return uc < kControlCharLimit || uc == '"' || uc == '\\';
16194 }
16195
json_nice_escape(char c)16196 UPB_INLINE const char* json_nice_escape(char c) {
16197 switch (c) {
16198 case '"': return "\\\"";
16199 case '\\': return "\\\\";
16200 case '\b': return "\\b";
16201 case '\f': return "\\f";
16202 case '\n': return "\\n";
16203 case '\r': return "\\r";
16204 case '\t': return "\\t";
16205 default: return NULL;
16206 }
16207 }
16208
16209 /* Write a properly escaped string chunk. The surrounding quotes are *not*
16210 * printed; this is so that the caller has the option of emitting the string
16211 * content in chunks. */
putstring(upb_json_printer * p,const char * buf,unsigned int len)16212 static void putstring(upb_json_printer *p, const char *buf, unsigned int len) {
16213 const char* unescaped_run = NULL;
16214 unsigned int i;
16215 for (i = 0; i < len; i++) {
16216 char c = buf[i];
16217 /* Handle escaping. */
16218 if (is_json_escaped(c)) {
16219 /* Use a "nice" escape, like \n, if one exists for this character. */
16220 const char* escape = json_nice_escape(c);
16221 /* If we don't have a specific 'nice' escape code, use a \uXXXX-style
16222 * escape. */
16223 char escape_buf[8];
16224 if (!escape) {
16225 unsigned char byte = (unsigned char)c;
16226 _upb_snprintf(escape_buf, sizeof(escape_buf), "\\u%04x", (int)byte);
16227 escape = escape_buf;
16228 }
16229
16230 /* N.B. that we assume that the input encoding is equal to the output
16231 * encoding (both UTF-8 for now), so for chars >= 0x20 and != \, ", we
16232 * can simply pass the bytes through. */
16233
16234 /* If there's a current run of unescaped chars, print that run first. */
16235 if (unescaped_run) {
16236 print_data(p, unescaped_run, &buf[i] - unescaped_run);
16237 unescaped_run = NULL;
16238 }
16239 /* Then print the escape code. */
16240 print_data(p, escape, strlen(escape));
16241 } else {
16242 /* Add to the current unescaped run of characters. */
16243 if (unescaped_run == NULL) {
16244 unescaped_run = &buf[i];
16245 }
16246 }
16247 }
16248
16249 /* If the string ended in a run of unescaped characters, print that last run. */
16250 if (unescaped_run) {
16251 print_data(p, unescaped_run, &buf[len] - unescaped_run);
16252 }
16253 }
16254
16255 #define CHKLENGTH(x) if (!(x)) return -1;
16256
16257 /* Helpers that format floating point values according to our custom formats.
16258 * Right now we use %.8g and %.17g for float/double, respectively, to match
16259 * proto2::util::JsonFormat's defaults. May want to change this later. */
16260
16261 const char neginf[] = "\"-Infinity\"";
16262 const char inf[] = "\"Infinity\"";
16263
fmt_double(double val,char * buf,size_t length)16264 static size_t fmt_double(double val, char* buf, size_t length) {
16265 if (val == (1.0 / 0.0)) {
16266 CHKLENGTH(length >= strlen(inf));
16267 strcpy(buf, inf);
16268 return strlen(inf);
16269 } else if (val == (-1.0 / 0.0)) {
16270 CHKLENGTH(length >= strlen(neginf));
16271 strcpy(buf, neginf);
16272 return strlen(neginf);
16273 } else {
16274 size_t n = _upb_snprintf(buf, length, "%.17g", val);
16275 CHKLENGTH(n > 0 && n < length);
16276 return n;
16277 }
16278 }
16279
fmt_float(float val,char * buf,size_t length)16280 static size_t fmt_float(float val, char* buf, size_t length) {
16281 size_t n = _upb_snprintf(buf, length, "%.8g", val);
16282 CHKLENGTH(n > 0 && n < length);
16283 return n;
16284 }
16285
fmt_bool(bool val,char * buf,size_t length)16286 static size_t fmt_bool(bool val, char* buf, size_t length) {
16287 size_t n = _upb_snprintf(buf, length, "%s", (val ? "true" : "false"));
16288 CHKLENGTH(n > 0 && n < length);
16289 return n;
16290 }
16291
fmt_int64_as_number(long long val,char * buf,size_t length)16292 static size_t fmt_int64_as_number(long long val, char* buf, size_t length) {
16293 size_t n = _upb_snprintf(buf, length, "%lld", val);
16294 CHKLENGTH(n > 0 && n < length);
16295 return n;
16296 }
16297
fmt_uint64_as_number(unsigned long long val,char * buf,size_t length)16298 static size_t fmt_uint64_as_number(
16299 unsigned long long val, char* buf, size_t length) {
16300 size_t n = _upb_snprintf(buf, length, "%llu", val);
16301 CHKLENGTH(n > 0 && n < length);
16302 return n;
16303 }
16304
fmt_int64_as_string(long long val,char * buf,size_t length)16305 static size_t fmt_int64_as_string(long long val, char* buf, size_t length) {
16306 size_t n = _upb_snprintf(buf, length, "\"%lld\"", val);
16307 CHKLENGTH(n > 0 && n < length);
16308 return n;
16309 }
16310
fmt_uint64_as_string(unsigned long long val,char * buf,size_t length)16311 static size_t fmt_uint64_as_string(
16312 unsigned long long val, char* buf, size_t length) {
16313 size_t n = _upb_snprintf(buf, length, "\"%llu\"", val);
16314 CHKLENGTH(n > 0 && n < length);
16315 return n;
16316 }
16317
16318 /* Print a map key given a field name. Called by scalar field handlers and by
16319 * startseq for repeated fields. */
putkey(void * closure,const void * handler_data)16320 static bool putkey(void *closure, const void *handler_data) {
16321 upb_json_printer *p = closure;
16322 const strpc *key = handler_data;
16323 print_comma(p);
16324 print_data(p, "\"", 1);
16325 putstring(p, key->ptr, key->len);
16326 print_data(p, "\":", 2);
16327 return true;
16328 }
16329
16330 #define CHKFMT(val) if ((val) == (size_t)-1) return false;
16331 #define CHK(val) if (!(val)) return false;
16332
16333 #define TYPE_HANDLERS(type, fmt_func) \
16334 static bool put##type(void *closure, const void *handler_data, type val) { \
16335 upb_json_printer *p = closure; \
16336 char data[64]; \
16337 size_t length = fmt_func(val, data, sizeof(data)); \
16338 UPB_UNUSED(handler_data); \
16339 CHKFMT(length); \
16340 print_data(p, data, length); \
16341 return true; \
16342 } \
16343 static bool scalar_##type(void *closure, const void *handler_data, \
16344 type val) { \
16345 CHK(putkey(closure, handler_data)); \
16346 CHK(put##type(closure, handler_data, val)); \
16347 return true; \
16348 } \
16349 static bool repeated_##type(void *closure, const void *handler_data, \
16350 type val) { \
16351 upb_json_printer *p = closure; \
16352 print_comma(p); \
16353 CHK(put##type(closure, handler_data, val)); \
16354 return true; \
16355 }
16356
16357 #define TYPE_HANDLERS_MAPKEY(type, fmt_func) \
16358 static bool putmapkey_##type(void *closure, const void *handler_data, \
16359 type val) { \
16360 upb_json_printer *p = closure; \
16361 char data[64]; \
16362 size_t length = fmt_func(val, data, sizeof(data)); \
16363 UPB_UNUSED(handler_data); \
16364 print_data(p, "\"", 1); \
16365 print_data(p, data, length); \
16366 print_data(p, "\":", 2); \
16367 return true; \
16368 }
16369
16370 TYPE_HANDLERS(double, fmt_double)
16371 TYPE_HANDLERS(float, fmt_float)
16372 TYPE_HANDLERS(bool, fmt_bool)
16373 TYPE_HANDLERS(int32_t, fmt_int64_as_number)
16374 TYPE_HANDLERS(uint32_t, fmt_int64_as_number)
16375 TYPE_HANDLERS(int64_t, fmt_int64_as_string)
16376 TYPE_HANDLERS(uint64_t, fmt_uint64_as_string)
16377
16378 /* double and float are not allowed to be map keys. */
16379 TYPE_HANDLERS_MAPKEY(bool, fmt_bool)
16380 TYPE_HANDLERS_MAPKEY(int32_t, fmt_int64_as_number)
16381 TYPE_HANDLERS_MAPKEY(uint32_t, fmt_int64_as_number)
16382 TYPE_HANDLERS_MAPKEY(int64_t, fmt_int64_as_number)
16383 TYPE_HANDLERS_MAPKEY(uint64_t, fmt_uint64_as_number)
16384
16385 #undef TYPE_HANDLERS
16386 #undef TYPE_HANDLERS_MAPKEY
16387
16388 typedef struct {
16389 void *keyname;
16390 const upb_enumdef *enumdef;
16391 } EnumHandlerData;
16392
scalar_enum(void * closure,const void * handler_data,int32_t val)16393 static bool scalar_enum(void *closure, const void *handler_data,
16394 int32_t val) {
16395 const EnumHandlerData *hd = handler_data;
16396 upb_json_printer *p = closure;
16397 const char *symbolic_name;
16398
16399 CHK(putkey(closure, hd->keyname));
16400
16401 symbolic_name = upb_enumdef_iton(hd->enumdef, val);
16402 if (symbolic_name) {
16403 print_data(p, "\"", 1);
16404 putstring(p, symbolic_name, strlen(symbolic_name));
16405 print_data(p, "\"", 1);
16406 } else {
16407 putint32_t(closure, NULL, val);
16408 }
16409
16410 return true;
16411 }
16412
print_enum_symbolic_name(upb_json_printer * p,const upb_enumdef * def,int32_t val)16413 static void print_enum_symbolic_name(upb_json_printer *p,
16414 const upb_enumdef *def,
16415 int32_t val) {
16416 const char *symbolic_name = upb_enumdef_iton(def, val);
16417 if (symbolic_name) {
16418 print_data(p, "\"", 1);
16419 putstring(p, symbolic_name, strlen(symbolic_name));
16420 print_data(p, "\"", 1);
16421 } else {
16422 putint32_t(p, NULL, val);
16423 }
16424 }
16425
repeated_enum(void * closure,const void * handler_data,int32_t val)16426 static bool repeated_enum(void *closure, const void *handler_data,
16427 int32_t val) {
16428 const EnumHandlerData *hd = handler_data;
16429 upb_json_printer *p = closure;
16430 print_comma(p);
16431
16432 print_enum_symbolic_name(p, hd->enumdef, val);
16433
16434 return true;
16435 }
16436
mapvalue_enum(void * closure,const void * handler_data,int32_t val)16437 static bool mapvalue_enum(void *closure, const void *handler_data,
16438 int32_t val) {
16439 const EnumHandlerData *hd = handler_data;
16440 upb_json_printer *p = closure;
16441
16442 print_enum_symbolic_name(p, hd->enumdef, val);
16443
16444 return true;
16445 }
16446
scalar_startsubmsg(void * closure,const void * handler_data)16447 static void *scalar_startsubmsg(void *closure, const void *handler_data) {
16448 return putkey(closure, handler_data) ? closure : UPB_BREAK;
16449 }
16450
repeated_startsubmsg(void * closure,const void * handler_data)16451 static void *repeated_startsubmsg(void *closure, const void *handler_data) {
16452 upb_json_printer *p = closure;
16453 UPB_UNUSED(handler_data);
16454 print_comma(p);
16455 return closure;
16456 }
16457
start_frame(upb_json_printer * p)16458 static void start_frame(upb_json_printer *p) {
16459 p->depth_++;
16460 p->first_elem_[p->depth_] = true;
16461 print_data(p, "{", 1);
16462 }
16463
end_frame(upb_json_printer * p)16464 static void end_frame(upb_json_printer *p) {
16465 print_data(p, "}", 1);
16466 p->depth_--;
16467 }
16468
printer_startmsg(void * closure,const void * handler_data)16469 static bool printer_startmsg(void *closure, const void *handler_data) {
16470 upb_json_printer *p = closure;
16471 UPB_UNUSED(handler_data);
16472 if (p->depth_ == 0) {
16473 upb_bytessink_start(p->output_, 0, &p->subc_);
16474 }
16475 start_frame(p);
16476 return true;
16477 }
16478
printer_endmsg(void * closure,const void * handler_data,upb_status * s)16479 static bool printer_endmsg(void *closure, const void *handler_data, upb_status *s) {
16480 upb_json_printer *p = closure;
16481 UPB_UNUSED(handler_data);
16482 UPB_UNUSED(s);
16483 end_frame(p);
16484 if (p->depth_ == 0) {
16485 upb_bytessink_end(p->output_);
16486 }
16487 return true;
16488 }
16489
startseq(void * closure,const void * handler_data)16490 static void *startseq(void *closure, const void *handler_data) {
16491 upb_json_printer *p = closure;
16492 CHK(putkey(closure, handler_data));
16493 p->depth_++;
16494 p->first_elem_[p->depth_] = true;
16495 print_data(p, "[", 1);
16496 return closure;
16497 }
16498
endseq(void * closure,const void * handler_data)16499 static bool endseq(void *closure, const void *handler_data) {
16500 upb_json_printer *p = closure;
16501 UPB_UNUSED(handler_data);
16502 print_data(p, "]", 1);
16503 p->depth_--;
16504 return true;
16505 }
16506
startmap(void * closure,const void * handler_data)16507 static void *startmap(void *closure, const void *handler_data) {
16508 upb_json_printer *p = closure;
16509 CHK(putkey(closure, handler_data));
16510 p->depth_++;
16511 p->first_elem_[p->depth_] = true;
16512 print_data(p, "{", 1);
16513 return closure;
16514 }
16515
endmap(void * closure,const void * handler_data)16516 static bool endmap(void *closure, const void *handler_data) {
16517 upb_json_printer *p = closure;
16518 UPB_UNUSED(handler_data);
16519 print_data(p, "}", 1);
16520 p->depth_--;
16521 return true;
16522 }
16523
putstr(void * closure,const void * handler_data,const char * str,size_t len,const upb_bufhandle * handle)16524 static size_t putstr(void *closure, const void *handler_data, const char *str,
16525 size_t len, const upb_bufhandle *handle) {
16526 upb_json_printer *p = closure;
16527 UPB_UNUSED(handler_data);
16528 UPB_UNUSED(handle);
16529 putstring(p, str, len);
16530 return len;
16531 }
16532
16533 /* This has to Base64 encode the bytes, because JSON has no "bytes" type. */
putbytes(void * closure,const void * handler_data,const char * str,size_t len,const upb_bufhandle * handle)16534 static size_t putbytes(void *closure, const void *handler_data, const char *str,
16535 size_t len, const upb_bufhandle *handle) {
16536 upb_json_printer *p = closure;
16537
16538 /* This is the regular base64, not the "web-safe" version. */
16539 static const char base64[] =
16540 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
16541
16542 /* Base64-encode. */
16543 char data[16000];
16544 const char *limit = data + sizeof(data);
16545 const unsigned char *from = (const unsigned char*)str;
16546 char *to = data;
16547 size_t remaining = len;
16548 size_t bytes;
16549
16550 UPB_UNUSED(handler_data);
16551 UPB_UNUSED(handle);
16552
16553 print_data(p, "\"", 1);
16554
16555 while (remaining > 2) {
16556 if (limit - to < 4) {
16557 bytes = to - data;
16558 putstring(p, data, bytes);
16559 to = data;
16560 }
16561
16562 to[0] = base64[from[0] >> 2];
16563 to[1] = base64[((from[0] & 0x3) << 4) | (from[1] >> 4)];
16564 to[2] = base64[((from[1] & 0xf) << 2) | (from[2] >> 6)];
16565 to[3] = base64[from[2] & 0x3f];
16566
16567 remaining -= 3;
16568 to += 4;
16569 from += 3;
16570 }
16571
16572 switch (remaining) {
16573 case 2:
16574 to[0] = base64[from[0] >> 2];
16575 to[1] = base64[((from[0] & 0x3) << 4) | (from[1] >> 4)];
16576 to[2] = base64[(from[1] & 0xf) << 2];
16577 to[3] = '=';
16578 to += 4;
16579 from += 2;
16580 break;
16581 case 1:
16582 to[0] = base64[from[0] >> 2];
16583 to[1] = base64[((from[0] & 0x3) << 4)];
16584 to[2] = '=';
16585 to[3] = '=';
16586 to += 4;
16587 from += 1;
16588 break;
16589 }
16590
16591 bytes = to - data;
16592 putstring(p, data, bytes);
16593 print_data(p, "\"", 1);
16594 return len;
16595 }
16596
scalar_startstr(void * closure,const void * handler_data,size_t size_hint)16597 static void *scalar_startstr(void *closure, const void *handler_data,
16598 size_t size_hint) {
16599 upb_json_printer *p = closure;
16600 UPB_UNUSED(handler_data);
16601 UPB_UNUSED(size_hint);
16602 CHK(putkey(closure, handler_data));
16603 print_data(p, "\"", 1);
16604 return p;
16605 }
16606
scalar_str(void * closure,const void * handler_data,const char * str,size_t len,const upb_bufhandle * handle)16607 static size_t scalar_str(void *closure, const void *handler_data,
16608 const char *str, size_t len,
16609 const upb_bufhandle *handle) {
16610 CHK(putstr(closure, handler_data, str, len, handle));
16611 return len;
16612 }
16613
scalar_endstr(void * closure,const void * handler_data)16614 static bool scalar_endstr(void *closure, const void *handler_data) {
16615 upb_json_printer *p = closure;
16616 UPB_UNUSED(handler_data);
16617 print_data(p, "\"", 1);
16618 return true;
16619 }
16620
repeated_startstr(void * closure,const void * handler_data,size_t size_hint)16621 static void *repeated_startstr(void *closure, const void *handler_data,
16622 size_t size_hint) {
16623 upb_json_printer *p = closure;
16624 UPB_UNUSED(handler_data);
16625 UPB_UNUSED(size_hint);
16626 print_comma(p);
16627 print_data(p, "\"", 1);
16628 return p;
16629 }
16630
repeated_str(void * closure,const void * handler_data,const char * str,size_t len,const upb_bufhandle * handle)16631 static size_t repeated_str(void *closure, const void *handler_data,
16632 const char *str, size_t len,
16633 const upb_bufhandle *handle) {
16634 CHK(putstr(closure, handler_data, str, len, handle));
16635 return len;
16636 }
16637
repeated_endstr(void * closure,const void * handler_data)16638 static bool repeated_endstr(void *closure, const void *handler_data) {
16639 upb_json_printer *p = closure;
16640 UPB_UNUSED(handler_data);
16641 print_data(p, "\"", 1);
16642 return true;
16643 }
16644
mapkeyval_startstr(void * closure,const void * handler_data,size_t size_hint)16645 static void *mapkeyval_startstr(void *closure, const void *handler_data,
16646 size_t size_hint) {
16647 upb_json_printer *p = closure;
16648 UPB_UNUSED(handler_data);
16649 UPB_UNUSED(size_hint);
16650 print_data(p, "\"", 1);
16651 return p;
16652 }
16653
mapkey_str(void * closure,const void * handler_data,const char * str,size_t len,const upb_bufhandle * handle)16654 static size_t mapkey_str(void *closure, const void *handler_data,
16655 const char *str, size_t len,
16656 const upb_bufhandle *handle) {
16657 CHK(putstr(closure, handler_data, str, len, handle));
16658 return len;
16659 }
16660
mapkey_endstr(void * closure,const void * handler_data)16661 static bool mapkey_endstr(void *closure, const void *handler_data) {
16662 upb_json_printer *p = closure;
16663 UPB_UNUSED(handler_data);
16664 print_data(p, "\":", 2);
16665 return true;
16666 }
16667
mapvalue_endstr(void * closure,const void * handler_data)16668 static bool mapvalue_endstr(void *closure, const void *handler_data) {
16669 upb_json_printer *p = closure;
16670 UPB_UNUSED(handler_data);
16671 print_data(p, "\"", 1);
16672 return true;
16673 }
16674
scalar_bytes(void * closure,const void * handler_data,const char * str,size_t len,const upb_bufhandle * handle)16675 static size_t scalar_bytes(void *closure, const void *handler_data,
16676 const char *str, size_t len,
16677 const upb_bufhandle *handle) {
16678 CHK(putkey(closure, handler_data));
16679 CHK(putbytes(closure, handler_data, str, len, handle));
16680 return len;
16681 }
16682
repeated_bytes(void * closure,const void * handler_data,const char * str,size_t len,const upb_bufhandle * handle)16683 static size_t repeated_bytes(void *closure, const void *handler_data,
16684 const char *str, size_t len,
16685 const upb_bufhandle *handle) {
16686 upb_json_printer *p = closure;
16687 print_comma(p);
16688 CHK(putbytes(closure, handler_data, str, len, handle));
16689 return len;
16690 }
16691
mapkey_bytes(void * closure,const void * handler_data,const char * str,size_t len,const upb_bufhandle * handle)16692 static size_t mapkey_bytes(void *closure, const void *handler_data,
16693 const char *str, size_t len,
16694 const upb_bufhandle *handle) {
16695 upb_json_printer *p = closure;
16696 CHK(putbytes(closure, handler_data, str, len, handle));
16697 print_data(p, ":", 1);
16698 return len;
16699 }
16700
set_enum_hd(upb_handlers * h,const upb_fielddef * f,bool preserve_fieldnames,upb_handlerattr * attr)16701 static void set_enum_hd(upb_handlers *h,
16702 const upb_fielddef *f,
16703 bool preserve_fieldnames,
16704 upb_handlerattr *attr) {
16705 EnumHandlerData *hd = upb_gmalloc(sizeof(EnumHandlerData));
16706 hd->enumdef = (const upb_enumdef *)upb_fielddef_subdef(f);
16707 hd->keyname = newstrpc(h, f, preserve_fieldnames);
16708 upb_handlers_addcleanup(h, hd, upb_gfree);
16709 upb_handlerattr_sethandlerdata(attr, hd);
16710 }
16711
16712 /* Set up handlers for a mapentry submessage (i.e., an individual key/value pair
16713 * in a map).
16714 *
16715 * TODO: Handle missing key, missing value, out-of-order key/value, or repeated
16716 * key or value cases properly. The right way to do this is to allocate a
16717 * temporary structure at the start of a mapentry submessage, store key and
16718 * value data in it as key and value handlers are called, and then print the
16719 * key/value pair once at the end of the submessage. If we don't do this, we
16720 * should at least detect the case and throw an error. However, so far all of
16721 * our sources that emit mapentry messages do so canonically (with one key
16722 * field, and then one value field), so this is not a pressing concern at the
16723 * moment. */
printer_sethandlers_mapentry(const void * closure,bool preserve_fieldnames,upb_handlers * h)16724 void printer_sethandlers_mapentry(const void *closure, bool preserve_fieldnames,
16725 upb_handlers *h) {
16726 const upb_msgdef *md = upb_handlers_msgdef(h);
16727
16728 /* A mapentry message is printed simply as '"key": value'. Rather than
16729 * special-case key and value for every type below, we just handle both
16730 * fields explicitly here. */
16731 const upb_fielddef* key_field = upb_msgdef_itof(md, UPB_MAPENTRY_KEY);
16732 const upb_fielddef* value_field = upb_msgdef_itof(md, UPB_MAPENTRY_VALUE);
16733
16734 upb_handlerattr empty_attr = UPB_HANDLERATTR_INITIALIZER;
16735
16736 UPB_UNUSED(closure);
16737
16738 switch (upb_fielddef_type(key_field)) {
16739 case UPB_TYPE_INT32:
16740 upb_handlers_setint32(h, key_field, putmapkey_int32_t, &empty_attr);
16741 break;
16742 case UPB_TYPE_INT64:
16743 upb_handlers_setint64(h, key_field, putmapkey_int64_t, &empty_attr);
16744 break;
16745 case UPB_TYPE_UINT32:
16746 upb_handlers_setuint32(h, key_field, putmapkey_uint32_t, &empty_attr);
16747 break;
16748 case UPB_TYPE_UINT64:
16749 upb_handlers_setuint64(h, key_field, putmapkey_uint64_t, &empty_attr);
16750 break;
16751 case UPB_TYPE_BOOL:
16752 upb_handlers_setbool(h, key_field, putmapkey_bool, &empty_attr);
16753 break;
16754 case UPB_TYPE_STRING:
16755 upb_handlers_setstartstr(h, key_field, mapkeyval_startstr, &empty_attr);
16756 upb_handlers_setstring(h, key_field, mapkey_str, &empty_attr);
16757 upb_handlers_setendstr(h, key_field, mapkey_endstr, &empty_attr);
16758 break;
16759 case UPB_TYPE_BYTES:
16760 upb_handlers_setstring(h, key_field, mapkey_bytes, &empty_attr);
16761 break;
16762 default:
16763 UPB_ASSERT(false);
16764 break;
16765 }
16766
16767 switch (upb_fielddef_type(value_field)) {
16768 case UPB_TYPE_INT32:
16769 upb_handlers_setint32(h, value_field, putint32_t, &empty_attr);
16770 break;
16771 case UPB_TYPE_INT64:
16772 upb_handlers_setint64(h, value_field, putint64_t, &empty_attr);
16773 break;
16774 case UPB_TYPE_UINT32:
16775 upb_handlers_setuint32(h, value_field, putuint32_t, &empty_attr);
16776 break;
16777 case UPB_TYPE_UINT64:
16778 upb_handlers_setuint64(h, value_field, putuint64_t, &empty_attr);
16779 break;
16780 case UPB_TYPE_BOOL:
16781 upb_handlers_setbool(h, value_field, putbool, &empty_attr);
16782 break;
16783 case UPB_TYPE_FLOAT:
16784 upb_handlers_setfloat(h, value_field, putfloat, &empty_attr);
16785 break;
16786 case UPB_TYPE_DOUBLE:
16787 upb_handlers_setdouble(h, value_field, putdouble, &empty_attr);
16788 break;
16789 case UPB_TYPE_STRING:
16790 upb_handlers_setstartstr(h, value_field, mapkeyval_startstr, &empty_attr);
16791 upb_handlers_setstring(h, value_field, putstr, &empty_attr);
16792 upb_handlers_setendstr(h, value_field, mapvalue_endstr, &empty_attr);
16793 break;
16794 case UPB_TYPE_BYTES:
16795 upb_handlers_setstring(h, value_field, putbytes, &empty_attr);
16796 break;
16797 case UPB_TYPE_ENUM: {
16798 upb_handlerattr enum_attr = UPB_HANDLERATTR_INITIALIZER;
16799 set_enum_hd(h, value_field, preserve_fieldnames, &enum_attr);
16800 upb_handlers_setint32(h, value_field, mapvalue_enum, &enum_attr);
16801 upb_handlerattr_uninit(&enum_attr);
16802 break;
16803 }
16804 case UPB_TYPE_MESSAGE:
16805 /* No handler necessary -- the submsg handlers will print the message
16806 * as appropriate. */
16807 break;
16808 }
16809
16810 upb_handlerattr_uninit(&empty_attr);
16811 }
16812
putseconds(void * closure,const void * handler_data,int64_t seconds)16813 static bool putseconds(void *closure, const void *handler_data,
16814 int64_t seconds) {
16815 upb_json_printer *p = closure;
16816 p->seconds = seconds;
16817 UPB_UNUSED(handler_data);
16818 return true;
16819 }
16820
putnanos(void * closure,const void * handler_data,int32_t nanos)16821 static bool putnanos(void *closure, const void *handler_data,
16822 int32_t nanos) {
16823 upb_json_printer *p = closure;
16824 p->nanos = nanos;
16825 UPB_UNUSED(handler_data);
16826 return true;
16827 }
16828
scalar_startstr_nokey(void * closure,const void * handler_data,size_t size_hint)16829 static void *scalar_startstr_nokey(void *closure, const void *handler_data,
16830 size_t size_hint) {
16831 upb_json_printer *p = closure;
16832 UPB_UNUSED(handler_data);
16833 UPB_UNUSED(size_hint);
16834 print_data(p, "\"", 1);
16835 return p;
16836 }
16837
putstr_nokey(void * closure,const void * handler_data,const char * str,size_t len,const upb_bufhandle * handle)16838 static size_t putstr_nokey(void *closure, const void *handler_data,
16839 const char *str, size_t len,
16840 const upb_bufhandle *handle) {
16841 upb_json_printer *p = closure;
16842 UPB_UNUSED(handler_data);
16843 UPB_UNUSED(handle);
16844 print_data(p, "\"", 1);
16845 putstring(p, str, len);
16846 print_data(p, "\"", 1);
16847 return len + 2;
16848 }
16849
startseq_nokey(void * closure,const void * handler_data)16850 static void *startseq_nokey(void *closure, const void *handler_data) {
16851 upb_json_printer *p = closure;
16852 UPB_UNUSED(handler_data);
16853 p->depth_++;
16854 p->first_elem_[p->depth_] = true;
16855 print_data(p, "[", 1);
16856 return closure;
16857 }
16858
startseq_fieldmask(void * closure,const void * handler_data)16859 static void *startseq_fieldmask(void *closure, const void *handler_data) {
16860 upb_json_printer *p = closure;
16861 UPB_UNUSED(handler_data);
16862 p->depth_++;
16863 p->first_elem_[p->depth_] = true;
16864 return closure;
16865 }
16866
endseq_fieldmask(void * closure,const void * handler_data)16867 static bool endseq_fieldmask(void *closure, const void *handler_data) {
16868 upb_json_printer *p = closure;
16869 UPB_UNUSED(handler_data);
16870 p->depth_--;
16871 return true;
16872 }
16873
repeated_startstr_fieldmask(void * closure,const void * handler_data,size_t size_hint)16874 static void *repeated_startstr_fieldmask(
16875 void *closure, const void *handler_data,
16876 size_t size_hint) {
16877 upb_json_printer *p = closure;
16878 UPB_UNUSED(handler_data);
16879 UPB_UNUSED(size_hint);
16880 print_comma(p);
16881 return p;
16882 }
16883
repeated_str_fieldmask(void * closure,const void * handler_data,const char * str,size_t len,const upb_bufhandle * handle)16884 static size_t repeated_str_fieldmask(
16885 void *closure, const void *handler_data,
16886 const char *str, size_t len,
16887 const upb_bufhandle *handle) {
16888 const char* limit = str + len;
16889 bool upper = false;
16890 size_t result_len = 0;
16891 for (; str < limit; str++) {
16892 if (*str == '_') {
16893 upper = true;
16894 continue;
16895 }
16896 if (upper && *str >= 'a' && *str <= 'z') {
16897 char upper_char = toupper(*str);
16898 CHK(putstr(closure, handler_data, &upper_char, 1, handle));
16899 } else {
16900 CHK(putstr(closure, handler_data, str, 1, handle));
16901 }
16902 upper = false;
16903 result_len++;
16904 }
16905 return result_len;
16906 }
16907
startmap_nokey(void * closure,const void * handler_data)16908 static void *startmap_nokey(void *closure, const void *handler_data) {
16909 upb_json_printer *p = closure;
16910 UPB_UNUSED(handler_data);
16911 p->depth_++;
16912 p->first_elem_[p->depth_] = true;
16913 print_data(p, "{", 1);
16914 return closure;
16915 }
16916
putnull(void * closure,const void * handler_data,int32_t null)16917 static bool putnull(void *closure, const void *handler_data,
16918 int32_t null) {
16919 upb_json_printer *p = closure;
16920 print_data(p, "null", 4);
16921 UPB_UNUSED(handler_data);
16922 UPB_UNUSED(null);
16923 return true;
16924 }
16925
printer_startdurationmsg(void * closure,const void * handler_data)16926 static bool printer_startdurationmsg(void *closure, const void *handler_data) {
16927 upb_json_printer *p = closure;
16928 UPB_UNUSED(handler_data);
16929 if (p->depth_ == 0) {
16930 upb_bytessink_start(p->output_, 0, &p->subc_);
16931 }
16932 return true;
16933 }
16934
16935 #define UPB_DURATION_MAX_JSON_LEN 23
16936 #define UPB_DURATION_MAX_NANO_LEN 9
16937
printer_enddurationmsg(void * closure,const void * handler_data,upb_status * s)16938 static bool printer_enddurationmsg(void *closure, const void *handler_data,
16939 upb_status *s) {
16940 upb_json_printer *p = closure;
16941 char buffer[UPB_DURATION_MAX_JSON_LEN];
16942 size_t base_len;
16943 size_t curr;
16944 size_t i;
16945
16946 memset(buffer, 0, UPB_DURATION_MAX_JSON_LEN);
16947
16948 if (p->seconds < -315576000000) {
16949 upb_status_seterrf(s, "error parsing duration: "
16950 "minimum acceptable value is "
16951 "-315576000000");
16952 return false;
16953 }
16954
16955 if (p->seconds > 315576000000) {
16956 upb_status_seterrf(s, "error serializing duration: "
16957 "maximum acceptable value is "
16958 "315576000000");
16959 return false;
16960 }
16961
16962 _upb_snprintf(buffer, sizeof(buffer), "%ld", (long)p->seconds);
16963 base_len = strlen(buffer);
16964
16965 if (p->nanos != 0) {
16966 char nanos_buffer[UPB_DURATION_MAX_NANO_LEN + 3];
16967 _upb_snprintf(nanos_buffer, sizeof(nanos_buffer), "%.9f",
16968 p->nanos / 1000000000.0);
16969 /* Remove trailing 0. */
16970 for (i = UPB_DURATION_MAX_NANO_LEN + 2;
16971 nanos_buffer[i] == '0'; i--) {
16972 nanos_buffer[i] = 0;
16973 }
16974 strcpy(buffer + base_len, nanos_buffer + 1);
16975 }
16976
16977 curr = strlen(buffer);
16978 strcpy(buffer + curr, "s");
16979
16980 p->seconds = 0;
16981 p->nanos = 0;
16982
16983 print_data(p, "\"", 1);
16984 print_data(p, buffer, strlen(buffer));
16985 print_data(p, "\"", 1);
16986
16987 if (p->depth_ == 0) {
16988 upb_bytessink_end(p->output_);
16989 }
16990
16991 UPB_UNUSED(handler_data);
16992 return true;
16993 }
16994
printer_starttimestampmsg(void * closure,const void * handler_data)16995 static bool printer_starttimestampmsg(void *closure, const void *handler_data) {
16996 upb_json_printer *p = closure;
16997 UPB_UNUSED(handler_data);
16998 if (p->depth_ == 0) {
16999 upb_bytessink_start(p->output_, 0, &p->subc_);
17000 }
17001 return true;
17002 }
17003
17004 #define UPB_TIMESTAMP_MAX_JSON_LEN 31
17005 #define UPB_TIMESTAMP_BEFORE_NANO_LEN 19
17006 #define UPB_TIMESTAMP_MAX_NANO_LEN 9
17007
printer_endtimestampmsg(void * closure,const void * handler_data,upb_status * s)17008 static bool printer_endtimestampmsg(void *closure, const void *handler_data,
17009 upb_status *s) {
17010 upb_json_printer *p = closure;
17011 char buffer[UPB_TIMESTAMP_MAX_JSON_LEN];
17012 time_t time = p->seconds;
17013 size_t curr;
17014 size_t i;
17015 size_t year_length =
17016 strftime(buffer, UPB_TIMESTAMP_MAX_JSON_LEN, "%Y", gmtime(&time));
17017
17018 if (p->seconds < -62135596800) {
17019 upb_status_seterrf(s, "error parsing timestamp: "
17020 "minimum acceptable value is "
17021 "0001-01-01T00:00:00Z");
17022 return false;
17023 }
17024
17025 if (p->seconds > 253402300799) {
17026 upb_status_seterrf(s, "error parsing timestamp: "
17027 "maximum acceptable value is "
17028 "9999-12-31T23:59:59Z");
17029 return false;
17030 }
17031
17032 /* strftime doesn't guarantee 4 digits for year. Prepend 0 by ourselves. */
17033 for (i = 0; i < 4 - year_length; i++) {
17034 buffer[i] = '0';
17035 }
17036
17037 strftime(buffer + (4 - year_length), UPB_TIMESTAMP_MAX_JSON_LEN,
17038 "%Y-%m-%dT%H:%M:%S", gmtime(&time));
17039 if (p->nanos != 0) {
17040 char nanos_buffer[UPB_TIMESTAMP_MAX_NANO_LEN + 3];
17041 _upb_snprintf(nanos_buffer, sizeof(nanos_buffer), "%.9f",
17042 p->nanos / 1000000000.0);
17043 /* Remove trailing 0. */
17044 for (i = UPB_TIMESTAMP_MAX_NANO_LEN + 2;
17045 nanos_buffer[i] == '0'; i--) {
17046 nanos_buffer[i] = 0;
17047 }
17048 strcpy(buffer + UPB_TIMESTAMP_BEFORE_NANO_LEN, nanos_buffer + 1);
17049 }
17050
17051 curr = strlen(buffer);
17052 strcpy(buffer + curr, "Z");
17053
17054 p->seconds = 0;
17055 p->nanos = 0;
17056
17057 print_data(p, "\"", 1);
17058 print_data(p, buffer, strlen(buffer));
17059 print_data(p, "\"", 1);
17060
17061 if (p->depth_ == 0) {
17062 upb_bytessink_end(p->output_);
17063 }
17064
17065 UPB_UNUSED(handler_data);
17066 UPB_UNUSED(s);
17067 return true;
17068 }
17069
printer_startmsg_noframe(void * closure,const void * handler_data)17070 static bool printer_startmsg_noframe(void *closure, const void *handler_data) {
17071 upb_json_printer *p = closure;
17072 UPB_UNUSED(handler_data);
17073 if (p->depth_ == 0) {
17074 upb_bytessink_start(p->output_, 0, &p->subc_);
17075 }
17076 return true;
17077 }
17078
printer_endmsg_noframe(void * closure,const void * handler_data,upb_status * s)17079 static bool printer_endmsg_noframe(
17080 void *closure, const void *handler_data, upb_status *s) {
17081 upb_json_printer *p = closure;
17082 UPB_UNUSED(handler_data);
17083 UPB_UNUSED(s);
17084 if (p->depth_ == 0) {
17085 upb_bytessink_end(p->output_);
17086 }
17087 return true;
17088 }
17089
printer_startmsg_fieldmask(void * closure,const void * handler_data)17090 static bool printer_startmsg_fieldmask(
17091 void *closure, const void *handler_data) {
17092 upb_json_printer *p = closure;
17093 UPB_UNUSED(handler_data);
17094 if (p->depth_ == 0) {
17095 upb_bytessink_start(p->output_, 0, &p->subc_);
17096 }
17097 print_data(p, "\"", 1);
17098 return true;
17099 }
17100
printer_endmsg_fieldmask(void * closure,const void * handler_data,upb_status * s)17101 static bool printer_endmsg_fieldmask(
17102 void *closure, const void *handler_data, upb_status *s) {
17103 upb_json_printer *p = closure;
17104 UPB_UNUSED(handler_data);
17105 UPB_UNUSED(s);
17106 print_data(p, "\"", 1);
17107 if (p->depth_ == 0) {
17108 upb_bytessink_end(p->output_);
17109 }
17110 return true;
17111 }
17112
scalar_startstr_onlykey(void * closure,const void * handler_data,size_t size_hint)17113 static void *scalar_startstr_onlykey(
17114 void *closure, const void *handler_data, size_t size_hint) {
17115 upb_json_printer *p = closure;
17116 UPB_UNUSED(size_hint);
17117 CHK(putkey(closure, handler_data));
17118 return p;
17119 }
17120
17121 /* Set up handlers for an Any submessage. */
printer_sethandlers_any(const void * closure,upb_handlers * h)17122 void printer_sethandlers_any(const void *closure, upb_handlers *h) {
17123 const upb_msgdef *md = upb_handlers_msgdef(h);
17124
17125 const upb_fielddef* type_field = upb_msgdef_itof(md, UPB_ANY_TYPE);
17126 const upb_fielddef* value_field = upb_msgdef_itof(md, UPB_ANY_VALUE);
17127
17128 upb_handlerattr empty_attr = UPB_HANDLERATTR_INITIALIZER;
17129
17130 /* type_url's json name is "@type" */
17131 upb_handlerattr type_name_attr = UPB_HANDLERATTR_INITIALIZER;
17132 upb_handlerattr value_name_attr = UPB_HANDLERATTR_INITIALIZER;
17133 strpc *type_url_json_name = newstrpc_str(h, "@type");
17134 strpc *value_json_name = newstrpc_str(h, "value");
17135
17136 upb_handlerattr_sethandlerdata(&type_name_attr, type_url_json_name);
17137 upb_handlerattr_sethandlerdata(&value_name_attr, value_json_name);
17138
17139 /* Set up handlers. */
17140 upb_handlers_setstartmsg(h, printer_startmsg, &empty_attr);
17141 upb_handlers_setendmsg(h, printer_endmsg, &empty_attr);
17142
17143 upb_handlers_setstartstr(h, type_field, scalar_startstr, &type_name_attr);
17144 upb_handlers_setstring(h, type_field, scalar_str, &empty_attr);
17145 upb_handlers_setendstr(h, type_field, scalar_endstr, &empty_attr);
17146
17147 /* This is not the full and correct JSON encoding for the Any value field. It
17148 * requires further processing by the wrapper code based on the type URL.
17149 */
17150 upb_handlers_setstartstr(h, value_field, scalar_startstr_onlykey,
17151 &value_name_attr);
17152
17153 UPB_UNUSED(closure);
17154 }
17155
17156 /* Set up handlers for a fieldmask submessage. */
printer_sethandlers_fieldmask(const void * closure,upb_handlers * h)17157 void printer_sethandlers_fieldmask(const void *closure, upb_handlers *h) {
17158 const upb_msgdef *md = upb_handlers_msgdef(h);
17159 const upb_fielddef* f = upb_msgdef_itof(md, 1);
17160
17161 upb_handlerattr empty_attr = UPB_HANDLERATTR_INITIALIZER;
17162
17163 upb_handlers_setstartseq(h, f, startseq_fieldmask, &empty_attr);
17164 upb_handlers_setendseq(h, f, endseq_fieldmask, &empty_attr);
17165
17166 upb_handlers_setstartmsg(h, printer_startmsg_fieldmask, &empty_attr);
17167 upb_handlers_setendmsg(h, printer_endmsg_fieldmask, &empty_attr);
17168
17169 upb_handlers_setstartstr(h, f, repeated_startstr_fieldmask, &empty_attr);
17170 upb_handlers_setstring(h, f, repeated_str_fieldmask, &empty_attr);
17171
17172 UPB_UNUSED(closure);
17173 }
17174
17175 /* Set up handlers for a duration submessage. */
printer_sethandlers_duration(const void * closure,upb_handlers * h)17176 void printer_sethandlers_duration(const void *closure, upb_handlers *h) {
17177 const upb_msgdef *md = upb_handlers_msgdef(h);
17178
17179 const upb_fielddef* seconds_field =
17180 upb_msgdef_itof(md, UPB_DURATION_SECONDS);
17181 const upb_fielddef* nanos_field =
17182 upb_msgdef_itof(md, UPB_DURATION_NANOS);
17183
17184 upb_handlerattr empty_attr = UPB_HANDLERATTR_INITIALIZER;
17185
17186 upb_handlers_setstartmsg(h, printer_startdurationmsg, &empty_attr);
17187 upb_handlers_setint64(h, seconds_field, putseconds, &empty_attr);
17188 upb_handlers_setint32(h, nanos_field, putnanos, &empty_attr);
17189 upb_handlers_setendmsg(h, printer_enddurationmsg, &empty_attr);
17190
17191 UPB_UNUSED(closure);
17192 }
17193
17194 /* Set up handlers for a timestamp submessage. Instead of printing fields
17195 * separately, the json representation of timestamp follows RFC 3339 */
printer_sethandlers_timestamp(const void * closure,upb_handlers * h)17196 void printer_sethandlers_timestamp(const void *closure, upb_handlers *h) {
17197 const upb_msgdef *md = upb_handlers_msgdef(h);
17198
17199 const upb_fielddef* seconds_field =
17200 upb_msgdef_itof(md, UPB_TIMESTAMP_SECONDS);
17201 const upb_fielddef* nanos_field =
17202 upb_msgdef_itof(md, UPB_TIMESTAMP_NANOS);
17203
17204 upb_handlerattr empty_attr = UPB_HANDLERATTR_INITIALIZER;
17205
17206 upb_handlers_setstartmsg(h, printer_starttimestampmsg, &empty_attr);
17207 upb_handlers_setint64(h, seconds_field, putseconds, &empty_attr);
17208 upb_handlers_setint32(h, nanos_field, putnanos, &empty_attr);
17209 upb_handlers_setendmsg(h, printer_endtimestampmsg, &empty_attr);
17210
17211 UPB_UNUSED(closure);
17212 }
17213
printer_sethandlers_value(const void * closure,upb_handlers * h)17214 void printer_sethandlers_value(const void *closure, upb_handlers *h) {
17215 const upb_msgdef *md = upb_handlers_msgdef(h);
17216 upb_msg_field_iter i;
17217
17218 upb_handlerattr empty_attr = UPB_HANDLERATTR_INITIALIZER;
17219
17220 upb_handlers_setstartmsg(h, printer_startmsg_noframe, &empty_attr);
17221 upb_handlers_setendmsg(h, printer_endmsg_noframe, &empty_attr);
17222
17223 upb_msg_field_begin(&i, md);
17224 for(; !upb_msg_field_done(&i); upb_msg_field_next(&i)) {
17225 const upb_fielddef *f = upb_msg_iter_field(&i);
17226
17227 switch (upb_fielddef_type(f)) {
17228 case UPB_TYPE_ENUM:
17229 upb_handlers_setint32(h, f, putnull, &empty_attr);
17230 break;
17231 case UPB_TYPE_DOUBLE:
17232 upb_handlers_setdouble(h, f, putdouble, &empty_attr);
17233 break;
17234 case UPB_TYPE_STRING:
17235 upb_handlers_setstartstr(h, f, scalar_startstr_nokey, &empty_attr);
17236 upb_handlers_setstring(h, f, scalar_str, &empty_attr);
17237 upb_handlers_setendstr(h, f, scalar_endstr, &empty_attr);
17238 break;
17239 case UPB_TYPE_BOOL:
17240 upb_handlers_setbool(h, f, putbool, &empty_attr);
17241 break;
17242 case UPB_TYPE_MESSAGE:
17243 break;
17244 default:
17245 UPB_ASSERT(false);
17246 break;
17247 }
17248 }
17249
17250 UPB_UNUSED(closure);
17251 }
17252
17253 #define WRAPPER_SETHANDLERS(wrapper, type, putmethod) \
17254 void printer_sethandlers_##wrapper(const void *closure, upb_handlers *h) { \
17255 const upb_msgdef *md = upb_handlers_msgdef(h); \
17256 const upb_fielddef* f = upb_msgdef_itof(md, 1); \
17257 upb_handlerattr empty_attr = UPB_HANDLERATTR_INITIALIZER; \
17258 upb_handlers_setstartmsg(h, printer_startmsg_noframe, &empty_attr); \
17259 upb_handlers_setendmsg(h, printer_endmsg_noframe, &empty_attr); \
17260 upb_handlers_set##type(h, f, putmethod, &empty_attr); \
17261 UPB_UNUSED(closure); \
17262 }
17263
WRAPPER_SETHANDLERS(doublevalue,double,putdouble)17264 WRAPPER_SETHANDLERS(doublevalue, double, putdouble)
17265 WRAPPER_SETHANDLERS(floatvalue, float, putfloat)
17266 WRAPPER_SETHANDLERS(int64value, int64, putint64_t)
17267 WRAPPER_SETHANDLERS(uint64value, uint64, putuint64_t)
17268 WRAPPER_SETHANDLERS(int32value, int32, putint32_t)
17269 WRAPPER_SETHANDLERS(uint32value, uint32, putuint32_t)
17270 WRAPPER_SETHANDLERS(boolvalue, bool, putbool)
17271 WRAPPER_SETHANDLERS(stringvalue, string, putstr_nokey)
17272 WRAPPER_SETHANDLERS(bytesvalue, string, putbytes)
17273
17274 #undef WRAPPER_SETHANDLERS
17275
17276 void printer_sethandlers_listvalue(const void *closure, upb_handlers *h) {
17277 const upb_msgdef *md = upb_handlers_msgdef(h);
17278 const upb_fielddef* f = upb_msgdef_itof(md, 1);
17279
17280 upb_handlerattr empty_attr = UPB_HANDLERATTR_INITIALIZER;
17281
17282 upb_handlers_setstartseq(h, f, startseq_nokey, &empty_attr);
17283 upb_handlers_setendseq(h, f, endseq, &empty_attr);
17284
17285 upb_handlers_setstartmsg(h, printer_startmsg_noframe, &empty_attr);
17286 upb_handlers_setendmsg(h, printer_endmsg_noframe, &empty_attr);
17287
17288 upb_handlers_setstartsubmsg(h, f, repeated_startsubmsg, &empty_attr);
17289
17290 UPB_UNUSED(closure);
17291 }
17292
printer_sethandlers_structvalue(const void * closure,upb_handlers * h)17293 void printer_sethandlers_structvalue(const void *closure, upb_handlers *h) {
17294 const upb_msgdef *md = upb_handlers_msgdef(h);
17295 const upb_fielddef* f = upb_msgdef_itof(md, 1);
17296
17297 upb_handlerattr empty_attr = UPB_HANDLERATTR_INITIALIZER;
17298
17299 upb_handlers_setstartseq(h, f, startmap_nokey, &empty_attr);
17300 upb_handlers_setendseq(h, f, endmap, &empty_attr);
17301
17302 upb_handlers_setstartmsg(h, printer_startmsg_noframe, &empty_attr);
17303 upb_handlers_setendmsg(h, printer_endmsg_noframe, &empty_attr);
17304
17305 upb_handlers_setstartsubmsg(h, f, repeated_startsubmsg, &empty_attr);
17306
17307 UPB_UNUSED(closure);
17308 }
17309
printer_sethandlers(const void * closure,upb_handlers * h)17310 void printer_sethandlers(const void *closure, upb_handlers *h) {
17311 const upb_msgdef *md = upb_handlers_msgdef(h);
17312 bool is_mapentry = upb_msgdef_mapentry(md);
17313 upb_handlerattr empty_attr = UPB_HANDLERATTR_INITIALIZER;
17314 upb_msg_field_iter i;
17315 const bool *preserve_fieldnames_ptr = closure;
17316 const bool preserve_fieldnames = *preserve_fieldnames_ptr;
17317
17318 if (is_mapentry) {
17319 /* mapentry messages are sufficiently different that we handle them
17320 * separately. */
17321 printer_sethandlers_mapentry(closure, preserve_fieldnames, h);
17322 return;
17323 }
17324
17325 switch (upb_msgdef_wellknowntype(md)) {
17326 case UPB_WELLKNOWN_UNSPECIFIED:
17327 break;
17328 case UPB_WELLKNOWN_ANY:
17329 printer_sethandlers_any(closure, h);
17330 return;
17331 case UPB_WELLKNOWN_FIELDMASK:
17332 printer_sethandlers_fieldmask(closure, h);
17333 return;
17334 case UPB_WELLKNOWN_DURATION:
17335 printer_sethandlers_duration(closure, h);
17336 return;
17337 case UPB_WELLKNOWN_TIMESTAMP:
17338 printer_sethandlers_timestamp(closure, h);
17339 return;
17340 case UPB_WELLKNOWN_VALUE:
17341 printer_sethandlers_value(closure, h);
17342 return;
17343 case UPB_WELLKNOWN_LISTVALUE:
17344 printer_sethandlers_listvalue(closure, h);
17345 return;
17346 case UPB_WELLKNOWN_STRUCT:
17347 printer_sethandlers_structvalue(closure, h);
17348 return;
17349 #define WRAPPER(wellknowntype, name) \
17350 case wellknowntype: \
17351 printer_sethandlers_##name(closure, h); \
17352 return; \
17353
17354 WRAPPER(UPB_WELLKNOWN_DOUBLEVALUE, doublevalue);
17355 WRAPPER(UPB_WELLKNOWN_FLOATVALUE, floatvalue);
17356 WRAPPER(UPB_WELLKNOWN_INT64VALUE, int64value);
17357 WRAPPER(UPB_WELLKNOWN_UINT64VALUE, uint64value);
17358 WRAPPER(UPB_WELLKNOWN_INT32VALUE, int32value);
17359 WRAPPER(UPB_WELLKNOWN_UINT32VALUE, uint32value);
17360 WRAPPER(UPB_WELLKNOWN_BOOLVALUE, boolvalue);
17361 WRAPPER(UPB_WELLKNOWN_STRINGVALUE, stringvalue);
17362 WRAPPER(UPB_WELLKNOWN_BYTESVALUE, bytesvalue);
17363
17364 #undef WRAPPER
17365 }
17366
17367 upb_handlers_setstartmsg(h, printer_startmsg, &empty_attr);
17368 upb_handlers_setendmsg(h, printer_endmsg, &empty_attr);
17369
17370 #define TYPE(type, name, ctype) \
17371 case type: \
17372 if (upb_fielddef_isseq(f)) { \
17373 upb_handlers_set##name(h, f, repeated_##ctype, &empty_attr); \
17374 } else { \
17375 upb_handlers_set##name(h, f, scalar_##ctype, &name_attr); \
17376 } \
17377 break;
17378
17379 upb_msg_field_begin(&i, md);
17380 for(; !upb_msg_field_done(&i); upb_msg_field_next(&i)) {
17381 const upb_fielddef *f = upb_msg_iter_field(&i);
17382
17383 upb_handlerattr name_attr = UPB_HANDLERATTR_INITIALIZER;
17384 upb_handlerattr_sethandlerdata(&name_attr,
17385 newstrpc(h, f, preserve_fieldnames));
17386
17387 if (upb_fielddef_ismap(f)) {
17388 upb_handlers_setstartseq(h, f, startmap, &name_attr);
17389 upb_handlers_setendseq(h, f, endmap, &name_attr);
17390 } else if (upb_fielddef_isseq(f)) {
17391 upb_handlers_setstartseq(h, f, startseq, &name_attr);
17392 upb_handlers_setendseq(h, f, endseq, &empty_attr);
17393 }
17394
17395 switch (upb_fielddef_type(f)) {
17396 TYPE(UPB_TYPE_FLOAT, float, float);
17397 TYPE(UPB_TYPE_DOUBLE, double, double);
17398 TYPE(UPB_TYPE_BOOL, bool, bool);
17399 TYPE(UPB_TYPE_INT32, int32, int32_t);
17400 TYPE(UPB_TYPE_UINT32, uint32, uint32_t);
17401 TYPE(UPB_TYPE_INT64, int64, int64_t);
17402 TYPE(UPB_TYPE_UINT64, uint64, uint64_t);
17403 case UPB_TYPE_ENUM: {
17404 /* For now, we always emit symbolic names for enums. We may want an
17405 * option later to control this behavior, but we will wait for a real
17406 * need first. */
17407 upb_handlerattr enum_attr = UPB_HANDLERATTR_INITIALIZER;
17408 set_enum_hd(h, f, preserve_fieldnames, &enum_attr);
17409
17410 if (upb_fielddef_isseq(f)) {
17411 upb_handlers_setint32(h, f, repeated_enum, &enum_attr);
17412 } else {
17413 upb_handlers_setint32(h, f, scalar_enum, &enum_attr);
17414 }
17415
17416 upb_handlerattr_uninit(&enum_attr);
17417 break;
17418 }
17419 case UPB_TYPE_STRING:
17420 if (upb_fielddef_isseq(f)) {
17421 upb_handlers_setstartstr(h, f, repeated_startstr, &empty_attr);
17422 upb_handlers_setstring(h, f, repeated_str, &empty_attr);
17423 upb_handlers_setendstr(h, f, repeated_endstr, &empty_attr);
17424 } else {
17425 upb_handlers_setstartstr(h, f, scalar_startstr, &name_attr);
17426 upb_handlers_setstring(h, f, scalar_str, &empty_attr);
17427 upb_handlers_setendstr(h, f, scalar_endstr, &empty_attr);
17428 }
17429 break;
17430 case UPB_TYPE_BYTES:
17431 /* XXX: this doesn't support strings that span buffers yet. The base64
17432 * encoder will need to be made resumable for this to work properly. */
17433 if (upb_fielddef_isseq(f)) {
17434 upb_handlers_setstring(h, f, repeated_bytes, &empty_attr);
17435 } else {
17436 upb_handlers_setstring(h, f, scalar_bytes, &name_attr);
17437 }
17438 break;
17439 case UPB_TYPE_MESSAGE:
17440 if (upb_fielddef_isseq(f)) {
17441 upb_handlers_setstartsubmsg(h, f, repeated_startsubmsg, &name_attr);
17442 } else {
17443 upb_handlers_setstartsubmsg(h, f, scalar_startsubmsg, &name_attr);
17444 }
17445 break;
17446 }
17447
17448 upb_handlerattr_uninit(&name_attr);
17449 }
17450
17451 upb_handlerattr_uninit(&empty_attr);
17452 #undef TYPE
17453 }
17454
json_printer_reset(upb_json_printer * p)17455 static void json_printer_reset(upb_json_printer *p) {
17456 p->depth_ = 0;
17457 }
17458
17459
17460 /* Public API *****************************************************************/
17461
upb_json_printer_create(upb_env * e,const upb_handlers * h,upb_bytessink * output)17462 upb_json_printer *upb_json_printer_create(upb_env *e, const upb_handlers *h,
17463 upb_bytessink *output) {
17464 #ifndef NDEBUG
17465 size_t size_before = upb_env_bytesallocated(e);
17466 #endif
17467
17468 upb_json_printer *p = upb_env_malloc(e, sizeof(upb_json_printer));
17469 if (!p) return NULL;
17470
17471 p->output_ = output;
17472 json_printer_reset(p);
17473 upb_sink_reset(&p->input_, h, p);
17474 p->seconds = 0;
17475 p->nanos = 0;
17476
17477 /* If this fails, increase the value in printer.h. */
17478 UPB_ASSERT_DEBUGVAR(upb_env_bytesallocated(e) - size_before <=
17479 UPB_JSON_PRINTER_SIZE);
17480 return p;
17481 }
17482
upb_json_printer_input(upb_json_printer * p)17483 upb_sink *upb_json_printer_input(upb_json_printer *p) {
17484 return &p->input_;
17485 }
17486
upb_json_printer_newhandlers(const upb_msgdef * md,bool preserve_fieldnames,const void * owner)17487 const upb_handlers *upb_json_printer_newhandlers(const upb_msgdef *md,
17488 bool preserve_fieldnames,
17489 const void *owner) {
17490 return upb_handlers_newfrozen(
17491 md, owner, printer_sethandlers, &preserve_fieldnames);
17492 }
17493
17494 #undef UPB_SIZE
17495 #undef UPB_FIELD_AT
17496 #undef UPB_READ_ONEOF
17497 #undef UPB_WRITE_ONEOF
17498