1 /* We encode backwards, to avoid pre-computing lengths (one-pass encode). */
2
3 #include "upb/encode.h"
4
5 #include <setjmp.h>
6 #include <string.h>
7
8 #include "upb/msg.h"
9 #include "upb/upb.h"
10
11 /* Must be last. */
12 #include "upb/port_def.inc"
13
14 #define UPB_PB_VARINT_MAX_LEN 10
15
16 UPB_NOINLINE
encode_varint64(uint64_t val,char * buf)17 static size_t encode_varint64(uint64_t val, char *buf) {
18 size_t i = 0;
19 do {
20 uint8_t byte = val & 0x7fU;
21 val >>= 7;
22 if (val) byte |= 0x80U;
23 buf[i++] = byte;
24 } while (val);
25 return i;
26 }
27
encode_zz32(int32_t n)28 static uint32_t encode_zz32(int32_t n) { return ((uint32_t)n << 1) ^ (n >> 31); }
encode_zz64(int64_t n)29 static uint64_t encode_zz64(int64_t n) { return ((uint64_t)n << 1) ^ (n >> 63); }
30
31 typedef struct {
32 jmp_buf err;
33 upb_alloc *alloc;
34 char *buf, *ptr, *limit;
35 int options;
36 int depth;
37 _upb_mapsorter sorter;
38 } upb_encstate;
39
upb_roundup_pow2(size_t bytes)40 static size_t upb_roundup_pow2(size_t bytes) {
41 size_t ret = 128;
42 while (ret < bytes) {
43 ret *= 2;
44 }
45 return ret;
46 }
47
encode_err(upb_encstate * e)48 UPB_NORETURN static void encode_err(upb_encstate *e) {
49 UPB_LONGJMP(e->err, 1);
50 }
51
52 UPB_NOINLINE
encode_growbuffer(upb_encstate * e,size_t bytes)53 static void encode_growbuffer(upb_encstate *e, size_t bytes) {
54 size_t old_size = e->limit - e->buf;
55 size_t new_size = upb_roundup_pow2(bytes + (e->limit - e->ptr));
56 char *new_buf = upb_realloc(e->alloc, e->buf, old_size, new_size);
57
58 if (!new_buf) encode_err(e);
59
60 /* We want previous data at the end, realloc() put it at the beginning. */
61 if (old_size > 0) {
62 memmove(new_buf + new_size - old_size, e->buf, old_size);
63 }
64
65 e->ptr = new_buf + new_size - (e->limit - e->ptr);
66 e->limit = new_buf + new_size;
67 e->buf = new_buf;
68
69 e->ptr -= bytes;
70 }
71
72 /* Call to ensure that at least "bytes" bytes are available for writing at
73 * e->ptr. Returns false if the bytes could not be allocated. */
74 UPB_FORCEINLINE
encode_reserve(upb_encstate * e,size_t bytes)75 static void encode_reserve(upb_encstate *e, size_t bytes) {
76 if ((size_t)(e->ptr - e->buf) < bytes) {
77 encode_growbuffer(e, bytes);
78 return;
79 }
80
81 e->ptr -= bytes;
82 }
83
84 /* Writes the given bytes to the buffer, handling reserve/advance. */
encode_bytes(upb_encstate * e,const void * data,size_t len)85 static void encode_bytes(upb_encstate *e, const void *data, size_t len) {
86 if (len == 0) return; /* memcpy() with zero size is UB */
87 encode_reserve(e, len);
88 memcpy(e->ptr, data, len);
89 }
90
encode_fixed64(upb_encstate * e,uint64_t val)91 static void encode_fixed64(upb_encstate *e, uint64_t val) {
92 val = _upb_be_swap64(val);
93 encode_bytes(e, &val, sizeof(uint64_t));
94 }
95
encode_fixed32(upb_encstate * e,uint32_t val)96 static void encode_fixed32(upb_encstate *e, uint32_t val) {
97 val = _upb_be_swap32(val);
98 encode_bytes(e, &val, sizeof(uint32_t));
99 }
100
101 UPB_NOINLINE
encode_longvarint(upb_encstate * e,uint64_t val)102 static void encode_longvarint(upb_encstate *e, uint64_t val) {
103 size_t len;
104 char *start;
105
106 encode_reserve(e, UPB_PB_VARINT_MAX_LEN);
107 len = encode_varint64(val, e->ptr);
108 start = e->ptr + UPB_PB_VARINT_MAX_LEN - len;
109 memmove(start, e->ptr, len);
110 e->ptr = start;
111 }
112
113 UPB_FORCEINLINE
encode_varint(upb_encstate * e,uint64_t val)114 static void encode_varint(upb_encstate *e, uint64_t val) {
115 if (val < 128 && e->ptr != e->buf) {
116 --e->ptr;
117 *e->ptr = val;
118 } else {
119 encode_longvarint(e, val);
120 }
121 }
122
encode_double(upb_encstate * e,double d)123 static void encode_double(upb_encstate *e, double d) {
124 uint64_t u64;
125 UPB_ASSERT(sizeof(double) == sizeof(uint64_t));
126 memcpy(&u64, &d, sizeof(uint64_t));
127 encode_fixed64(e, u64);
128 }
129
encode_float(upb_encstate * e,float d)130 static void encode_float(upb_encstate *e, float d) {
131 uint32_t u32;
132 UPB_ASSERT(sizeof(float) == sizeof(uint32_t));
133 memcpy(&u32, &d, sizeof(uint32_t));
134 encode_fixed32(e, u32);
135 }
136
encode_tag(upb_encstate * e,uint32_t field_number,uint8_t wire_type)137 static void encode_tag(upb_encstate *e, uint32_t field_number,
138 uint8_t wire_type) {
139 encode_varint(e, (field_number << 3) | wire_type);
140 }
141
encode_fixedarray(upb_encstate * e,const upb_array * arr,size_t elem_size,uint32_t tag)142 static void encode_fixedarray(upb_encstate *e, const upb_array *arr,
143 size_t elem_size, uint32_t tag) {
144 size_t bytes = arr->len * elem_size;
145 const char* data = _upb_array_constptr(arr);
146 const char* ptr = data + bytes - elem_size;
147 if (tag) {
148 while (true) {
149 encode_bytes(e, ptr, elem_size);
150 encode_varint(e, tag);
151 if (ptr == data) break;
152 ptr -= elem_size;
153 }
154 } else {
155 encode_bytes(e, data, bytes);
156 }
157 }
158
159 static void encode_message(upb_encstate *e, const char *msg,
160 const upb_msglayout *m, size_t *size);
161
encode_scalar(upb_encstate * e,const void * _field_mem,const upb_msglayout * m,const upb_msglayout_field * f,bool skip_zero_value)162 static void encode_scalar(upb_encstate *e, const void *_field_mem,
163 const upb_msglayout *m, const upb_msglayout_field *f,
164 bool skip_zero_value) {
165 const char *field_mem = _field_mem;
166 int wire_type;
167
168 #define CASE(ctype, type, wtype, encodeval) \
169 { \
170 ctype val = *(ctype *)field_mem; \
171 if (skip_zero_value && val == 0) { \
172 return; \
173 } \
174 encode_##type(e, encodeval); \
175 wire_type = wtype; \
176 break; \
177 }
178
179 switch (f->descriptortype) {
180 case UPB_DESCRIPTOR_TYPE_DOUBLE:
181 CASE(double, double, UPB_WIRE_TYPE_64BIT, val);
182 case UPB_DESCRIPTOR_TYPE_FLOAT:
183 CASE(float, float, UPB_WIRE_TYPE_32BIT, val);
184 case UPB_DESCRIPTOR_TYPE_INT64:
185 case UPB_DESCRIPTOR_TYPE_UINT64:
186 CASE(uint64_t, varint, UPB_WIRE_TYPE_VARINT, val);
187 case UPB_DESCRIPTOR_TYPE_UINT32:
188 CASE(uint32_t, varint, UPB_WIRE_TYPE_VARINT, val);
189 case UPB_DESCRIPTOR_TYPE_INT32:
190 case UPB_DESCRIPTOR_TYPE_ENUM:
191 CASE(int32_t, varint, UPB_WIRE_TYPE_VARINT, (int64_t)val);
192 case UPB_DESCRIPTOR_TYPE_SFIXED64:
193 case UPB_DESCRIPTOR_TYPE_FIXED64:
194 CASE(uint64_t, fixed64, UPB_WIRE_TYPE_64BIT, val);
195 case UPB_DESCRIPTOR_TYPE_FIXED32:
196 case UPB_DESCRIPTOR_TYPE_SFIXED32:
197 CASE(uint32_t, fixed32, UPB_WIRE_TYPE_32BIT, val);
198 case UPB_DESCRIPTOR_TYPE_BOOL:
199 CASE(bool, varint, UPB_WIRE_TYPE_VARINT, val);
200 case UPB_DESCRIPTOR_TYPE_SINT32:
201 CASE(int32_t, varint, UPB_WIRE_TYPE_VARINT, encode_zz32(val));
202 case UPB_DESCRIPTOR_TYPE_SINT64:
203 CASE(int64_t, varint, UPB_WIRE_TYPE_VARINT, encode_zz64(val));
204 case UPB_DESCRIPTOR_TYPE_STRING:
205 case UPB_DESCRIPTOR_TYPE_BYTES: {
206 upb_strview view = *(upb_strview*)field_mem;
207 if (skip_zero_value && view.size == 0) {
208 return;
209 }
210 encode_bytes(e, view.data, view.size);
211 encode_varint(e, view.size);
212 wire_type = UPB_WIRE_TYPE_DELIMITED;
213 break;
214 }
215 case UPB_DESCRIPTOR_TYPE_GROUP: {
216 size_t size;
217 void *submsg = *(void **)field_mem;
218 const upb_msglayout *subm = m->submsgs[f->submsg_index];
219 if (submsg == NULL) {
220 return;
221 }
222 if (--e->depth == 0) encode_err(e);
223 encode_tag(e, f->number, UPB_WIRE_TYPE_END_GROUP);
224 encode_message(e, submsg, subm, &size);
225 wire_type = UPB_WIRE_TYPE_START_GROUP;
226 e->depth++;
227 break;
228 }
229 case UPB_DESCRIPTOR_TYPE_MESSAGE: {
230 size_t size;
231 void *submsg = *(void **)field_mem;
232 const upb_msglayout *subm = m->submsgs[f->submsg_index];
233 if (submsg == NULL) {
234 return;
235 }
236 if (--e->depth == 0) encode_err(e);
237 encode_message(e, submsg, subm, &size);
238 encode_varint(e, size);
239 wire_type = UPB_WIRE_TYPE_DELIMITED;
240 e->depth++;
241 break;
242 }
243 default:
244 UPB_UNREACHABLE();
245 }
246 #undef CASE
247
248 encode_tag(e, f->number, wire_type);
249 }
250
encode_array(upb_encstate * e,const char * field_mem,const upb_msglayout * m,const upb_msglayout_field * f)251 static void encode_array(upb_encstate *e, const char *field_mem,
252 const upb_msglayout *m, const upb_msglayout_field *f) {
253 const upb_array *arr = *(const upb_array**)field_mem;
254 bool packed = f->label == _UPB_LABEL_PACKED;
255 size_t pre_len = e->limit - e->ptr;
256
257 if (arr == NULL || arr->len == 0) {
258 return;
259 }
260
261 #define VARINT_CASE(ctype, encode) \
262 { \
263 const ctype *start = _upb_array_constptr(arr); \
264 const ctype *ptr = start + arr->len; \
265 uint32_t tag = packed ? 0 : (f->number << 3) | UPB_WIRE_TYPE_VARINT; \
266 do { \
267 ptr--; \
268 encode_varint(e, encode); \
269 if (tag) encode_varint(e, tag); \
270 } while (ptr != start); \
271 } \
272 break;
273
274 #define TAG(wire_type) (packed ? 0 : (f->number << 3 | wire_type))
275
276 switch (f->descriptortype) {
277 case UPB_DESCRIPTOR_TYPE_DOUBLE:
278 encode_fixedarray(e, arr, sizeof(double), TAG(UPB_WIRE_TYPE_64BIT));
279 break;
280 case UPB_DESCRIPTOR_TYPE_FLOAT:
281 encode_fixedarray(e, arr, sizeof(float), TAG(UPB_WIRE_TYPE_32BIT));
282 break;
283 case UPB_DESCRIPTOR_TYPE_SFIXED64:
284 case UPB_DESCRIPTOR_TYPE_FIXED64:
285 encode_fixedarray(e, arr, sizeof(uint64_t), TAG(UPB_WIRE_TYPE_64BIT));
286 break;
287 case UPB_DESCRIPTOR_TYPE_FIXED32:
288 case UPB_DESCRIPTOR_TYPE_SFIXED32:
289 encode_fixedarray(e, arr, sizeof(uint32_t), TAG(UPB_WIRE_TYPE_32BIT));
290 break;
291 case UPB_DESCRIPTOR_TYPE_INT64:
292 case UPB_DESCRIPTOR_TYPE_UINT64:
293 VARINT_CASE(uint64_t, *ptr);
294 case UPB_DESCRIPTOR_TYPE_UINT32:
295 VARINT_CASE(uint32_t, *ptr);
296 case UPB_DESCRIPTOR_TYPE_INT32:
297 case UPB_DESCRIPTOR_TYPE_ENUM:
298 VARINT_CASE(int32_t, (int64_t)*ptr);
299 case UPB_DESCRIPTOR_TYPE_BOOL:
300 VARINT_CASE(bool, *ptr);
301 case UPB_DESCRIPTOR_TYPE_SINT32:
302 VARINT_CASE(int32_t, encode_zz32(*ptr));
303 case UPB_DESCRIPTOR_TYPE_SINT64:
304 VARINT_CASE(int64_t, encode_zz64(*ptr));
305 case UPB_DESCRIPTOR_TYPE_STRING:
306 case UPB_DESCRIPTOR_TYPE_BYTES: {
307 const upb_strview *start = _upb_array_constptr(arr);
308 const upb_strview *ptr = start + arr->len;
309 do {
310 ptr--;
311 encode_bytes(e, ptr->data, ptr->size);
312 encode_varint(e, ptr->size);
313 encode_tag(e, f->number, UPB_WIRE_TYPE_DELIMITED);
314 } while (ptr != start);
315 return;
316 }
317 case UPB_DESCRIPTOR_TYPE_GROUP: {
318 const void *const*start = _upb_array_constptr(arr);
319 const void *const*ptr = start + arr->len;
320 const upb_msglayout *subm = m->submsgs[f->submsg_index];
321 if (--e->depth == 0) encode_err(e);
322 do {
323 size_t size;
324 ptr--;
325 encode_tag(e, f->number, UPB_WIRE_TYPE_END_GROUP);
326 encode_message(e, *ptr, subm, &size);
327 encode_tag(e, f->number, UPB_WIRE_TYPE_START_GROUP);
328 } while (ptr != start);
329 e->depth++;
330 return;
331 }
332 case UPB_DESCRIPTOR_TYPE_MESSAGE: {
333 const void *const*start = _upb_array_constptr(arr);
334 const void *const*ptr = start + arr->len;
335 const upb_msglayout *subm = m->submsgs[f->submsg_index];
336 if (--e->depth == 0) encode_err(e);
337 do {
338 size_t size;
339 ptr--;
340 encode_message(e, *ptr, subm, &size);
341 encode_varint(e, size);
342 encode_tag(e, f->number, UPB_WIRE_TYPE_DELIMITED);
343 } while (ptr != start);
344 e->depth++;
345 return;
346 }
347 }
348 #undef VARINT_CASE
349
350 if (packed) {
351 encode_varint(e, e->limit - e->ptr - pre_len);
352 encode_tag(e, f->number, UPB_WIRE_TYPE_DELIMITED);
353 }
354 }
355
encode_mapentry(upb_encstate * e,uint32_t number,const upb_msglayout * layout,const upb_map_entry * ent)356 static void encode_mapentry(upb_encstate *e, uint32_t number,
357 const upb_msglayout *layout,
358 const upb_map_entry *ent) {
359 const upb_msglayout_field *key_field = &layout->fields[0];
360 const upb_msglayout_field *val_field = &layout->fields[1];
361 size_t pre_len = e->limit - e->ptr;
362 size_t size;
363 encode_scalar(e, &ent->v, layout, val_field, false);
364 encode_scalar(e, &ent->k, layout, key_field, false);
365 size = (e->limit - e->ptr) - pre_len;
366 encode_varint(e, size);
367 encode_tag(e, number, UPB_WIRE_TYPE_DELIMITED);
368 }
369
encode_map(upb_encstate * e,const char * field_mem,const upb_msglayout * m,const upb_msglayout_field * f)370 static void encode_map(upb_encstate *e, const char *field_mem,
371 const upb_msglayout *m, const upb_msglayout_field *f) {
372 const upb_map *map = *(const upb_map**)field_mem;
373 const upb_msglayout *layout = m->submsgs[f->submsg_index];
374 UPB_ASSERT(layout->field_count == 2);
375
376 if (map == NULL) return;
377
378 if (e->options & UPB_ENCODE_DETERMINISTIC) {
379 _upb_sortedmap sorted;
380 _upb_mapsorter_pushmap(&e->sorter, layout->fields[0].descriptortype, map,
381 &sorted);
382 upb_map_entry ent;
383 while (_upb_sortedmap_next(&e->sorter, map, &sorted, &ent)) {
384 encode_mapentry(e, f->number, layout, &ent);
385 }
386 _upb_mapsorter_popmap(&e->sorter, &sorted);
387 } else {
388 upb_strtable_iter i;
389 upb_strtable_begin(&i, &map->table);
390 for(; !upb_strtable_done(&i); upb_strtable_next(&i)) {
391 upb_strview key = upb_strtable_iter_key(&i);
392 const upb_value val = upb_strtable_iter_value(&i);
393 upb_map_entry ent;
394 _upb_map_fromkey(key, &ent.k, map->key_size);
395 _upb_map_fromvalue(val, &ent.v, map->val_size);
396 encode_mapentry(e, f->number, layout, &ent);
397 }
398 }
399 }
400
encode_scalarfield(upb_encstate * e,const char * msg,const upb_msglayout * m,const upb_msglayout_field * f)401 static void encode_scalarfield(upb_encstate *e, const char *msg,
402 const upb_msglayout *m,
403 const upb_msglayout_field *f) {
404 bool skip_empty = false;
405 if (f->presence == 0) {
406 /* Proto3 presence. */
407 skip_empty = true;
408 } else if (f->presence > 0) {
409 /* Proto2 presence: hasbit. */
410 if (!_upb_hasbit_field(msg, f)) return;
411 } else {
412 /* Field is in a oneof. */
413 if (_upb_getoneofcase_field(msg, f) != f->number) return;
414 }
415 encode_scalar(e, msg + f->offset, m, f, skip_empty);
416 }
417
encode_message(upb_encstate * e,const char * msg,const upb_msglayout * m,size_t * size)418 static void encode_message(upb_encstate *e, const char *msg,
419 const upb_msglayout *m, size_t *size) {
420 size_t pre_len = e->limit - e->ptr;
421 const upb_msglayout_field *f = &m->fields[m->field_count];
422 const upb_msglayout_field *first = &m->fields[0];
423
424 if ((e->options & UPB_ENCODE_SKIPUNKNOWN) == 0) {
425 size_t unknown_size;
426 const char *unknown = upb_msg_getunknown(msg, &unknown_size);
427
428 if (unknown) {
429 encode_bytes(e, unknown, unknown_size);
430 }
431 }
432
433 while (f != first) {
434 f--;
435 if (_upb_isrepeated(f)) {
436 encode_array(e, msg + f->offset, m, f);
437 } else if (f->label == _UPB_LABEL_MAP) {
438 encode_map(e, msg + f->offset, m, f);
439 } else {
440 encode_scalarfield(e, msg, m, f);
441 }
442 }
443
444 *size = (e->limit - e->ptr) - pre_len;
445 }
446
upb_encode_ex(const void * msg,const upb_msglayout * m,int options,upb_arena * arena,size_t * size)447 char *upb_encode_ex(const void *msg, const upb_msglayout *m, int options,
448 upb_arena *arena, size_t *size) {
449 upb_encstate e;
450 unsigned depth = (unsigned)options >> 16;
451
452 e.alloc = upb_arena_alloc(arena);
453 e.buf = NULL;
454 e.limit = NULL;
455 e.ptr = NULL;
456 e.depth = depth ? depth : 64;
457 e.options = options;
458 _upb_mapsorter_init(&e.sorter);
459 char *ret = NULL;
460
461 if (UPB_SETJMP(e.err)) {
462 *size = 0;
463 ret = NULL;
464 } else {
465 encode_message(&e, msg, m, size);
466 *size = e.limit - e.ptr;
467 if (*size == 0) {
468 static char ch;
469 ret = &ch;
470 } else {
471 UPB_ASSERT(e.ptr);
472 ret = e.ptr;
473 }
474 }
475
476 _upb_mapsorter_destroy(&e.sorter);
477 return ret;
478 }
479