• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2014 Google Inc.  All rights reserved.
3 // https://developers.google.com/protocol-buffers/
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 //     * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 //     * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following disclaimer
13 // in the documentation and/or other materials provided with the
14 // distribution.
15 //     * Neither the name of Google Inc. nor the names of its
16 // contributors may be used to endorse or promote products derived from
17 // this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 
31 #include "protobuf.h"
32 
initialize_rb_class_with_no_args(VALUE klass)33 VALUE initialize_rb_class_with_no_args(VALUE klass) {
34   return rb_funcall(klass, rb_intern("new"), 0);
35 }
36 
37 // This function is equivalent to rb_str_cat(), but unlike the real
38 // rb_str_cat(), it doesn't leak memory in some versions of Ruby.
39 // For more information, see:
40 //   https://bugs.ruby-lang.org/issues/11328
noleak_rb_str_cat(VALUE rb_str,const char * str,long len)41 VALUE noleak_rb_str_cat(VALUE rb_str, const char *str, long len) {
42   char *p;
43   size_t oldlen = RSTRING_LEN(rb_str);
44   rb_str_modify_expand(rb_str, len);
45   p = RSTRING_PTR(rb_str);
46   memcpy(p + oldlen, str, len);
47   rb_str_set_len(rb_str, oldlen + len);
48   return rb_str;
49 }
50 
is_wrapper(const upb_msgdef * m)51 bool is_wrapper(const upb_msgdef* m) {
52   switch (upb_msgdef_wellknowntype(m)) {
53     case UPB_WELLKNOWN_DOUBLEVALUE:
54     case UPB_WELLKNOWN_FLOATVALUE:
55     case UPB_WELLKNOWN_INT64VALUE:
56     case UPB_WELLKNOWN_UINT64VALUE:
57     case UPB_WELLKNOWN_INT32VALUE:
58     case UPB_WELLKNOWN_UINT32VALUE:
59     case UPB_WELLKNOWN_STRINGVALUE:
60     case UPB_WELLKNOWN_BYTESVALUE:
61     case UPB_WELLKNOWN_BOOLVALUE:
62       return true;
63     default:
64       return false;
65   }
66 }
67 
68 // The code below also comes from upb's prototype Ruby binding, developed by
69 // haberman@.
70 
71 /* stringsink *****************************************************************/
72 
stringsink_start(void * _sink,const void * hd,size_t size_hint)73 static void *stringsink_start(void *_sink, const void *hd, size_t size_hint) {
74   stringsink *sink = _sink;
75   sink->len = 0;
76   return sink;
77 }
78 
stringsink_string(void * _sink,const void * hd,const char * ptr,size_t len,const upb_bufhandle * handle)79 static size_t stringsink_string(void *_sink, const void *hd, const char *ptr,
80                                 size_t len, const upb_bufhandle *handle) {
81   stringsink *sink = _sink;
82   size_t new_size = sink->size;
83 
84   UPB_UNUSED(hd);
85   UPB_UNUSED(handle);
86 
87   while (sink->len + len > new_size) {
88     new_size *= 2;
89   }
90 
91   if (new_size != sink->size) {
92     sink->ptr = realloc(sink->ptr, new_size);
93     sink->size = new_size;
94   }
95 
96   memcpy(sink->ptr + sink->len, ptr, len);
97   sink->len += len;
98 
99   return len;
100 }
101 
stringsink_init(stringsink * sink)102 void stringsink_init(stringsink *sink) {
103   upb_byteshandler_init(&sink->handler);
104   upb_byteshandler_setstartstr(&sink->handler, stringsink_start, NULL);
105   upb_byteshandler_setstring(&sink->handler, stringsink_string, NULL);
106 
107   upb_bytessink_reset(&sink->sink, &sink->handler, sink);
108 
109   sink->size = 32;
110   sink->ptr = malloc(sink->size);
111   sink->len = 0;
112 }
113 
stringsink_uninit(stringsink * sink)114 void stringsink_uninit(stringsink *sink) {
115   free(sink->ptr);
116 }
117 
118 // -----------------------------------------------------------------------------
119 // Parsing.
120 // -----------------------------------------------------------------------------
121 
122 #define DEREF(msg, ofs, type) *(type*)(((uint8_t *)msg) + ofs)
123 
124 typedef struct {
125   size_t ofs;
126   int32_t hasbit;
127 } field_handlerdata_t;
128 
129 // Creates a handlerdata that contains the offset and the hasbit for the field
newhandlerdata(upb_handlers * h,uint32_t ofs,int32_t hasbit)130 static const void* newhandlerdata(upb_handlers* h, uint32_t ofs, int32_t hasbit) {
131   field_handlerdata_t *hd = ALLOC(field_handlerdata_t);
132   hd->ofs = ofs;
133   hd->hasbit = hasbit;
134   upb_handlers_addcleanup(h, hd, xfree);
135   return hd;
136 }
137 
138 typedef struct {
139   size_t ofs;
140   int32_t hasbit;
141   upb_fieldtype_t wrapped_type;  // Only for wrappers.
142   VALUE subklass;
143 } submsg_handlerdata_t;
144 
145 // Creates a handlerdata that contains offset and submessage type information.
newsubmsghandlerdata(upb_handlers * h,const upb_fielddef * f,uint32_t ofs,int32_t hasbit,VALUE subklass)146 static const void *newsubmsghandlerdata(upb_handlers* h,
147                                         const upb_fielddef *f,
148                                         uint32_t ofs,
149                                         int32_t hasbit,
150                                         VALUE subklass) {
151   submsg_handlerdata_t *hd = ALLOC(submsg_handlerdata_t);
152   const upb_msgdef *subm = upb_fielddef_msgsubdef(f);
153   hd->ofs = ofs;
154   hd->hasbit = hasbit;
155   hd->subklass = subklass;
156   upb_handlers_addcleanup(h, hd, xfree);
157   if (is_wrapper(subm)) {
158     const upb_fielddef *value_f = upb_msgdef_itof(subm, 1);
159     hd->wrapped_type = upb_fielddef_type(value_f);
160   }
161   return hd;
162 }
163 
164 typedef struct {
165   size_t ofs;              // union data slot
166   size_t case_ofs;         // oneof_case field
167   uint32_t oneof_case_num; // oneof-case number to place in oneof_case field
168   VALUE subklass;
169 } oneof_handlerdata_t;
170 
newoneofhandlerdata(upb_handlers * h,uint32_t ofs,uint32_t case_ofs,const upb_fielddef * f,const Descriptor * desc)171 static const void *newoneofhandlerdata(upb_handlers *h,
172                                        uint32_t ofs,
173                                        uint32_t case_ofs,
174                                        const upb_fielddef *f,
175                                        const Descriptor* desc) {
176   oneof_handlerdata_t *hd = ALLOC(oneof_handlerdata_t);
177   hd->ofs = ofs;
178   hd->case_ofs = case_ofs;
179   // We reuse the field tag number as a oneof union discriminant tag. Note that
180   // we don't expose these numbers to the user, so the only requirement is that
181   // we have some unique ID for each union case/possibility. The field tag
182   // numbers are already present and are easy to use so there's no reason to
183   // create a separate ID space. In addition, using the field tag number here
184   // lets us easily look up the field in the oneof accessor.
185   hd->oneof_case_num = upb_fielddef_number(f);
186   if (is_value_field(f)) {
187     hd->oneof_case_num |= ONEOF_CASE_MASK;
188   }
189   hd->subklass = field_type_class(desc->layout, f);
190   upb_handlers_addcleanup(h, hd, xfree);
191   return hd;
192 }
193 
194 // A handler that starts a repeated field.  Gets the Repeated*Field instance for
195 // this field (such an instance always exists even in an empty message).
startseq_handler(void * closure,const void * hd)196 static void *startseq_handler(void* closure, const void* hd) {
197   MessageHeader* msg = closure;
198   const size_t *ofs = hd;
199   return (void*)DEREF(msg, *ofs, VALUE);
200 }
201 
202 // Handlers that append primitive values to a repeated field.
203 #define DEFINE_APPEND_HANDLER(type, ctype)                 \
204   static bool append##type##_handler(void *closure, const void *hd, \
205                                      ctype val) {                   \
206     VALUE ary = (VALUE)closure;                                     \
207     RepeatedField_push_native(ary, &val);                           \
208     return true;                                                    \
209   }
210 
DEFINE_APPEND_HANDLER(bool,bool)211 DEFINE_APPEND_HANDLER(bool,   bool)
212 DEFINE_APPEND_HANDLER(int32,  int32_t)
213 DEFINE_APPEND_HANDLER(uint32, uint32_t)
214 DEFINE_APPEND_HANDLER(float,  float)
215 DEFINE_APPEND_HANDLER(int64,  int64_t)
216 DEFINE_APPEND_HANDLER(uint64, uint64_t)
217 DEFINE_APPEND_HANDLER(double, double)
218 
219 // Appends a string to a repeated field.
220 static void* appendstr_handler(void *closure,
221                                const void *hd,
222                                size_t size_hint) {
223   VALUE ary = (VALUE)closure;
224   VALUE str = rb_str_new2("");
225   rb_enc_associate(str, kRubyStringUtf8Encoding);
226   RepeatedField_push_native(ary, &str);
227   return (void*)str;
228 }
229 
set_hasbit(void * closure,int32_t hasbit)230 static void set_hasbit(void *closure, int32_t hasbit) {
231   if (hasbit > 0) {
232     uint8_t* storage = closure;
233     storage[hasbit/8] |= 1 << (hasbit % 8);
234   }
235 }
236 
237 // Appends a 'bytes' string to a repeated field.
appendbytes_handler(void * closure,const void * hd,size_t size_hint)238 static void* appendbytes_handler(void *closure,
239                                  const void *hd,
240                                  size_t size_hint) {
241   VALUE ary = (VALUE)closure;
242   VALUE str = rb_str_new2("");
243   rb_enc_associate(str, kRubyString8bitEncoding);
244   RepeatedField_push_native(ary, &str);
245   return (void*)str;
246 }
247 
248 // Sets a non-repeated string field in a message.
str_handler(void * closure,const void * hd,size_t size_hint)249 static void* str_handler(void *closure,
250                          const void *hd,
251                          size_t size_hint) {
252   MessageHeader* msg = closure;
253   const field_handlerdata_t *fieldhandler = hd;
254 
255   VALUE str = rb_str_new2("");
256   rb_enc_associate(str, kRubyStringUtf8Encoding);
257   DEREF(msg, fieldhandler->ofs, VALUE) = str;
258   set_hasbit(closure, fieldhandler->hasbit);
259   return (void*)str;
260 }
261 
262 // Sets a non-repeated 'bytes' field in a message.
bytes_handler(void * closure,const void * hd,size_t size_hint)263 static void* bytes_handler(void *closure,
264                            const void *hd,
265                            size_t size_hint) {
266   MessageHeader* msg = closure;
267   const field_handlerdata_t *fieldhandler = hd;
268 
269   VALUE str = rb_str_new2("");
270   rb_enc_associate(str, kRubyString8bitEncoding);
271   DEREF(msg, fieldhandler->ofs, VALUE) = str;
272   set_hasbit(closure, fieldhandler->hasbit);
273   return (void*)str;
274 }
275 
stringdata_handler(void * closure,const void * hd,const char * str,size_t len,const upb_bufhandle * handle)276 static size_t stringdata_handler(void* closure, const void* hd,
277                                  const char* str, size_t len,
278                                  const upb_bufhandle* handle) {
279   VALUE rb_str = (VALUE)closure;
280   noleak_rb_str_cat(rb_str, str, len);
281   return len;
282 }
283 
stringdata_end_handler(void * closure,const void * hd)284 static bool stringdata_end_handler(void* closure, const void* hd) {
285   VALUE rb_str = (VALUE)closure;
286   rb_obj_freeze(rb_str);
287   return true;
288 }
289 
appendstring_end_handler(void * closure,const void * hd)290 static bool appendstring_end_handler(void* closure, const void* hd) {
291   VALUE rb_str = (VALUE)closure;
292   rb_obj_freeze(rb_str);
293   return true;
294 }
295 
296 // Appends a submessage to a repeated field (a regular Ruby array for now).
appendsubmsg_handler(void * closure,const void * hd)297 static void *appendsubmsg_handler(void *closure, const void *hd) {
298   VALUE ary = (VALUE)closure;
299   const submsg_handlerdata_t *submsgdata = hd;
300   MessageHeader* submsg;
301 
302   VALUE submsg_rb = initialize_rb_class_with_no_args(submsgdata->subklass);
303   RepeatedField_push(ary, submsg_rb);
304 
305   TypedData_Get_Struct(submsg_rb, MessageHeader, &Message_type, submsg);
306   return submsg;
307 }
308 
309 // Appends a wrapper to a repeated field (a regular Ruby array for now).
appendwrapper_handler(void * closure,const void * hd)310 static void *appendwrapper_handler(void *closure, const void *hd) {
311   VALUE ary = (VALUE)closure;
312   int size = RepeatedField_size(ary);
313   (void)hd;
314 
315   RepeatedField_push(ary, Qnil);
316 
317   return RepeatedField_index_native(ary, size);
318 }
319 
320 // Sets a non-repeated submessage field in a message.
submsg_handler(void * closure,const void * hd)321 static void *submsg_handler(void *closure, const void *hd) {
322   MessageHeader* msg = closure;
323   const submsg_handlerdata_t* submsgdata = hd;
324   VALUE submsg_rb;
325   MessageHeader* submsg;
326 
327   if (DEREF(msg, submsgdata->ofs, VALUE) == Qnil) {
328     DEREF(msg, submsgdata->ofs, VALUE) =
329         initialize_rb_class_with_no_args(submsgdata->subklass);
330   }
331 
332   set_hasbit(closure, submsgdata->hasbit);
333 
334   submsg_rb = DEREF(msg, submsgdata->ofs, VALUE);
335   TypedData_Get_Struct(submsg_rb, MessageHeader, &Message_type, submsg);
336 
337   return submsg;
338 }
339 
startwrapper(void * closure,const void * hd)340 static void* startwrapper(void* closure, const void* hd) {
341   const submsg_handlerdata_t* submsgdata = hd;
342   char* msg = closure;
343   VALUE* field = (VALUE*)(msg + submsgdata->ofs);
344 
345   set_hasbit(closure, submsgdata->hasbit);
346 
347   switch (submsgdata->wrapped_type) {
348     case UPB_TYPE_FLOAT:
349     case UPB_TYPE_DOUBLE:
350       *field = DBL2NUM(0);
351       break;
352     case UPB_TYPE_BOOL:
353       *field = Qfalse;
354       break;
355     case UPB_TYPE_STRING:
356       *field = get_frozen_string(NULL, 0, false);
357       break;
358     case UPB_TYPE_BYTES:
359       *field = get_frozen_string(NULL, 0, true);
360       break;
361     case UPB_TYPE_ENUM:
362     case UPB_TYPE_INT32:
363     case UPB_TYPE_INT64:
364     case UPB_TYPE_UINT32:
365     case UPB_TYPE_UINT64:
366       *field = INT2NUM(0);
367       break;
368     case UPB_TYPE_MESSAGE:
369       rb_raise(rb_eRuntimeError,
370                "Internal logic error with well-known types.");
371   }
372 
373   return field;
374 }
375 
376 // Handler data for startmap/endmap handlers.
377 typedef struct {
378   size_t ofs;
379   upb_fieldtype_t key_field_type;
380   upb_fieldtype_t value_field_type;
381   VALUE subklass;
382 } map_handlerdata_t;
383 
384 // Temporary frame for map parsing: at the beginning of a map entry message, a
385 // submsg handler allocates a frame to hold (i) a reference to the Map object
386 // into which this message will be inserted and (ii) storage slots to
387 // temporarily hold the key and value for this map entry until the end of the
388 // submessage. When the submessage ends, another handler is called to insert the
389 // value into the map.
390 typedef struct {
391   VALUE map;
392   const map_handlerdata_t* handlerdata;
393   char key_storage[NATIVE_SLOT_MAX_SIZE];
394   char value_storage[NATIVE_SLOT_MAX_SIZE];
395 } map_parse_frame_t;
396 
MapParseFrame_mark(void * _self)397 static void MapParseFrame_mark(void* _self) {
398   map_parse_frame_t* frame = _self;
399 
400   // This shouldn't strictly be necessary since this should be rooted by the
401   // message itself, but it can't hurt.
402   rb_gc_mark(frame->map);
403 
404   native_slot_mark(frame->handlerdata->key_field_type, &frame->key_storage);
405   native_slot_mark(frame->handlerdata->value_field_type, &frame->value_storage);
406 }
407 
MapParseFrame_free(void * self)408 void MapParseFrame_free(void* self) {
409   xfree(self);
410 }
411 
412 rb_data_type_t MapParseFrame_type = {
413   "MapParseFrame",
414   { MapParseFrame_mark, MapParseFrame_free, NULL },
415 };
416 
417 // Handler to begin a map entry: allocates a temporary frame. This is the
418 // 'startsubmsg' handler on the msgdef that contains the map field.
startmap_handler(void * closure,const void * hd)419 static void *startmap_handler(void *closure, const void *hd) {
420   MessageHeader* msg = closure;
421   const map_handlerdata_t* mapdata = hd;
422   map_parse_frame_t* frame = ALLOC(map_parse_frame_t);
423   VALUE map_rb = DEREF(msg, mapdata->ofs, VALUE);
424 
425   frame->handlerdata = mapdata;
426   frame->map = map_rb;
427   native_slot_init(mapdata->key_field_type, &frame->key_storage);
428   native_slot_init(mapdata->value_field_type, &frame->value_storage);
429 
430   Map_set_frame(map_rb,
431                 TypedData_Wrap_Struct(rb_cObject, &MapParseFrame_type, frame));
432 
433   return frame;
434 }
435 
endmap_handler(void * closure,const void * hd)436 static bool endmap_handler(void *closure, const void *hd) {
437   map_parse_frame_t* frame = closure;
438   Map_set_frame(frame->map, Qnil);
439   return true;
440 }
441 
442 // Handler to end a map entry: inserts the value defined during the message into
443 // the map. This is the 'endmsg' handler on the map entry msgdef.
endmapentry_handler(void * closure,const void * hd,upb_status * s)444 static bool endmapentry_handler(void* closure, const void* hd, upb_status* s) {
445   map_parse_frame_t* frame = closure;
446   const map_handlerdata_t* mapdata = hd;
447 
448   VALUE key = native_slot_get(
449       mapdata->key_field_type, Qnil,
450       &frame->key_storage);
451 
452   VALUE value = native_slot_get(
453       mapdata->value_field_type, mapdata->subklass,
454       &frame->value_storage);
455 
456   Map_index_set(frame->map, key, value);
457 
458   return true;
459 }
460 
461 // Allocates a new map_handlerdata_t given the map entry message definition. If
462 // the offset of the field within the parent message is also given, that is
463 // added to the handler data as well. Note that this is called *twice* per map
464 // field: once in the parent message handler setup when setting the startsubmsg
465 // handler and once in the map entry message handler setup when setting the
466 // key/value and endmsg handlers. The reason is that there is no easy way to
467 // pass the handlerdata down to the sub-message handler setup.
new_map_handlerdata(size_t ofs,const upb_msgdef * mapentry_def,const Descriptor * desc)468 static map_handlerdata_t* new_map_handlerdata(
469     size_t ofs,
470     const upb_msgdef* mapentry_def,
471     const Descriptor* desc) {
472   const upb_fielddef* key_field;
473   const upb_fielddef* value_field;
474   map_handlerdata_t* hd = ALLOC(map_handlerdata_t);
475   hd->ofs = ofs;
476   key_field = upb_msgdef_itof(mapentry_def, MAP_KEY_FIELD);
477   assert(key_field != NULL);
478   hd->key_field_type = upb_fielddef_type(key_field);
479   value_field = upb_msgdef_itof(mapentry_def, MAP_VALUE_FIELD);
480   assert(value_field != NULL);
481   hd->value_field_type = upb_fielddef_type(value_field);
482   hd->subklass = field_type_class(desc->layout, value_field);
483 
484   return hd;
485 }
486 
487 // Handlers that set primitive values in oneofs.
488 #define DEFINE_ONEOF_HANDLER(type, ctype)                           \
489   static bool oneof##type##_handler(void *closure, const void *hd,  \
490                                      ctype val) {                   \
491     const oneof_handlerdata_t *oneofdata = hd;                      \
492     DEREF(closure, oneofdata->case_ofs, uint32_t) =                 \
493         oneofdata->oneof_case_num;                                  \
494     DEREF(closure, oneofdata->ofs, ctype) = val;                    \
495     return true;                                                    \
496   }
497 
DEFINE_ONEOF_HANDLER(bool,bool)498 DEFINE_ONEOF_HANDLER(bool,   bool)
499 DEFINE_ONEOF_HANDLER(int32,  int32_t)
500 DEFINE_ONEOF_HANDLER(uint32, uint32_t)
501 DEFINE_ONEOF_HANDLER(float,  float)
502 DEFINE_ONEOF_HANDLER(int64,  int64_t)
503 DEFINE_ONEOF_HANDLER(uint64, uint64_t)
504 DEFINE_ONEOF_HANDLER(double, double)
505 
506 #undef DEFINE_ONEOF_HANDLER
507 
508 // Handlers for strings in a oneof.
509 static void *oneofstr_handler(void *closure,
510                               const void *hd,
511                               size_t size_hint) {
512   MessageHeader* msg = closure;
513   const oneof_handlerdata_t *oneofdata = hd;
514   VALUE str = rb_str_new2("");
515   rb_enc_associate(str, kRubyStringUtf8Encoding);
516   DEREF(msg, oneofdata->case_ofs, uint32_t) =
517       oneofdata->oneof_case_num;
518   DEREF(msg, oneofdata->ofs, VALUE) = str;
519   return (void*)str;
520 }
521 
oneofbytes_handler(void * closure,const void * hd,size_t size_hint)522 static void *oneofbytes_handler(void *closure,
523                                 const void *hd,
524                                 size_t size_hint) {
525   MessageHeader* msg = closure;
526   const oneof_handlerdata_t *oneofdata = hd;
527   VALUE str = rb_str_new2("");
528   rb_enc_associate(str, kRubyString8bitEncoding);
529   DEREF(msg, oneofdata->case_ofs, uint32_t) =
530       oneofdata->oneof_case_num;
531   DEREF(msg, oneofdata->ofs, VALUE) = str;
532   return (void*)str;
533 }
534 
oneofstring_end_handler(void * closure,const void * hd)535 static bool oneofstring_end_handler(void* closure, const void* hd) {
536   VALUE rb_str = rb_str_new2("");
537   rb_obj_freeze(rb_str);
538   return true;
539 }
540 
541 // Handler for a submessage field in a oneof.
oneofsubmsg_handler(void * closure,const void * hd)542 static void *oneofsubmsg_handler(void *closure,
543                                  const void *hd) {
544   MessageHeader* msg = closure;
545   const oneof_handlerdata_t *oneofdata = hd;
546   uint32_t oldcase = DEREF(msg, oneofdata->case_ofs, uint32_t);
547 
548   VALUE submsg_rb;
549   MessageHeader* submsg;
550 
551   if (oldcase != oneofdata->oneof_case_num ||
552       DEREF(msg, oneofdata->ofs, VALUE) == Qnil) {
553     DEREF(msg, oneofdata->ofs, VALUE) =
554         initialize_rb_class_with_no_args(oneofdata->subklass);
555   }
556   // Set the oneof case *after* allocating the new class instance -- otherwise,
557   // if the Ruby GC is invoked as part of a call into the VM, it might invoke
558   // our mark routines, and our mark routines might see the case value
559   // indicating a VALUE is present and expect a valid VALUE. See comment in
560   // layout_set() for more detail: basically, the change to the value and the
561   // case must be atomic w.r.t. the Ruby VM.
562   DEREF(msg, oneofdata->case_ofs, uint32_t) = oneofdata->oneof_case_num;
563 
564   submsg_rb = DEREF(msg, oneofdata->ofs, VALUE);
565   TypedData_Get_Struct(submsg_rb, MessageHeader, &Message_type, submsg);
566   return submsg;
567 }
568 
oneof_startwrapper(void * closure,const void * hd)569 static void* oneof_startwrapper(void* closure, const void* hd) {
570   char* msg = closure;
571   const oneof_handlerdata_t *oneofdata = hd;
572 
573   DEREF(msg, oneofdata->case_ofs, uint32_t) = oneofdata->oneof_case_num;
574 
575   return msg + oneofdata->ofs;
576 }
577 
578 // Set up handlers for a repeated field.
add_handlers_for_repeated_field(upb_handlers * h,const Descriptor * desc,const upb_fielddef * f,size_t offset)579 static void add_handlers_for_repeated_field(upb_handlers *h,
580                                             const Descriptor* desc,
581                                             const upb_fielddef *f,
582                                             size_t offset) {
583   upb_handlerattr attr = UPB_HANDLERATTR_INIT;
584   attr.handler_data = newhandlerdata(h, offset, -1);
585   upb_handlers_setstartseq(h, f, startseq_handler, &attr);
586 
587   switch (upb_fielddef_type(f)) {
588 
589 #define SET_HANDLER(utype, ltype)                                 \
590   case utype:                                                     \
591     upb_handlers_set##ltype(h, f, append##ltype##_handler, NULL); \
592     break;
593 
594     SET_HANDLER(UPB_TYPE_BOOL,   bool);
595     SET_HANDLER(UPB_TYPE_INT32,  int32);
596     SET_HANDLER(UPB_TYPE_UINT32, uint32);
597     SET_HANDLER(UPB_TYPE_ENUM,   int32);
598     SET_HANDLER(UPB_TYPE_FLOAT,  float);
599     SET_HANDLER(UPB_TYPE_INT64,  int64);
600     SET_HANDLER(UPB_TYPE_UINT64, uint64);
601     SET_HANDLER(UPB_TYPE_DOUBLE, double);
602 
603 #undef SET_HANDLER
604 
605     case UPB_TYPE_STRING:
606     case UPB_TYPE_BYTES: {
607       bool is_bytes = upb_fielddef_type(f) == UPB_TYPE_BYTES;
608       upb_handlers_setstartstr(h, f, is_bytes ?
609                                appendbytes_handler : appendstr_handler,
610                                NULL);
611       upb_handlers_setstring(h, f, stringdata_handler, NULL);
612       upb_handlers_setendstr(h, f, appendstring_end_handler, NULL);
613       break;
614     }
615     case UPB_TYPE_MESSAGE: {
616       VALUE subklass = field_type_class(desc->layout, f);
617       upb_handlerattr attr = UPB_HANDLERATTR_INIT;
618       attr.handler_data = newsubmsghandlerdata(h, f, 0, -1, subklass);
619       if (is_wrapper(upb_fielddef_msgsubdef(f))) {
620         upb_handlers_setstartsubmsg(h, f, appendwrapper_handler, &attr);
621       } else {
622         upb_handlers_setstartsubmsg(h, f, appendsubmsg_handler, &attr);
623       }
624       break;
625     }
626   }
627 }
628 
doublewrapper_handler(void * closure,const void * hd,double val)629 static bool doublewrapper_handler(void* closure, const void* hd, double val) {
630   VALUE* rbval = closure;
631   *rbval = DBL2NUM(val);
632   return true;
633 }
634 
floatwrapper_handler(void * closure,const void * hd,float val)635 static bool floatwrapper_handler(void* closure, const void* hd, float val) {
636   VALUE* rbval = closure;
637   *rbval = DBL2NUM(val);
638   return true;
639 }
640 
int64wrapper_handler(void * closure,const void * hd,int64_t val)641 static bool int64wrapper_handler(void* closure, const void* hd, int64_t val) {
642   VALUE* rbval = closure;
643   *rbval = LL2NUM(val);
644   return true;
645 }
646 
uint64wrapper_handler(void * closure,const void * hd,uint64_t val)647 static bool uint64wrapper_handler(void* closure, const void* hd, uint64_t val) {
648   VALUE* rbval = closure;
649   *rbval = ULL2NUM(val);
650   return true;
651 }
652 
int32wrapper_handler(void * closure,const void * hd,int32_t val)653 static bool int32wrapper_handler(void* closure, const void* hd, int32_t val) {
654   VALUE* rbval = closure;
655   *rbval = INT2NUM(val);
656   return true;
657 }
658 
uint32wrapper_handler(void * closure,const void * hd,uint32_t val)659 static bool uint32wrapper_handler(void* closure, const void* hd, uint32_t val) {
660   VALUE* rbval = closure;
661   *rbval = UINT2NUM(val);
662   return true;
663 }
664 
startstringwrapper_handler(void * closure,const void * hd,size_t size_hint)665 static void* startstringwrapper_handler(void* closure, const void* hd,
666                                         size_t size_hint) {
667   VALUE* rbval = closure;
668   (void)size_hint;
669   *rbval = rb_str_new(NULL, 0);
670   rb_enc_associate(*rbval, kRubyStringUtf8Encoding);
671   return closure;
672 }
673 
stringwrapper_handler(void * closure,const void * hd,const char * ptr,size_t len,const upb_bufhandle * handle)674 static size_t stringwrapper_handler(void* closure, const void* hd,
675                                     const char* ptr, size_t len,
676                                     const upb_bufhandle* handle) {
677   VALUE* rbval = closure;
678   *rbval = noleak_rb_str_cat(*rbval, ptr, len);
679   return len;
680 }
681 
startbyteswrapper_handler(void * closure,const void * hd,size_t size_hint)682 static void* startbyteswrapper_handler(void* closure, const void* hd,
683                                        size_t size_hint) {
684   VALUE* rbval = closure;
685   (void)size_hint;
686   *rbval = rb_str_new(NULL, 0);
687   rb_enc_associate(*rbval, kRubyString8bitEncoding);
688   return closure;
689 }
690 
byteswrapper_handler(void * closure,const void * hd,const char * ptr,size_t len,const upb_bufhandle * handle)691 static size_t byteswrapper_handler(void* closure, const void* hd,
692                                    const char* ptr, size_t len,
693                                    const upb_bufhandle* handle) {
694   VALUE* rbval = closure;
695   *rbval = noleak_rb_str_cat(*rbval, ptr, len);
696   return len;
697 }
698 
boolwrapper_handler(void * closure,const void * hd,bool val)699 static bool boolwrapper_handler(void* closure, const void* hd, bool val) {
700   VALUE* rbval = closure;
701   if (val) {
702     *rbval = Qtrue;
703   } else {
704     *rbval = Qfalse;
705   }
706   return true;
707 }
708 
709 // Set up handlers for a singular field.
add_handlers_for_singular_field(const Descriptor * desc,upb_handlers * h,const upb_fielddef * f,size_t offset,size_t hasbit_off)710 static void add_handlers_for_singular_field(const Descriptor* desc,
711                                             upb_handlers* h,
712                                             const upb_fielddef* f,
713                                             size_t offset, size_t hasbit_off) {
714   // The offset we pass to UPB points to the start of the Message,
715   // rather than the start of where our data is stored.
716   int32_t hasbit = -1;
717   if (hasbit_off != MESSAGE_FIELD_NO_HASBIT) {
718     hasbit = hasbit_off + sizeof(MessageHeader) * 8;
719   }
720 
721   switch (upb_fielddef_type(f)) {
722     case UPB_TYPE_BOOL:
723     case UPB_TYPE_INT32:
724     case UPB_TYPE_UINT32:
725     case UPB_TYPE_ENUM:
726     case UPB_TYPE_FLOAT:
727     case UPB_TYPE_INT64:
728     case UPB_TYPE_UINT64:
729     case UPB_TYPE_DOUBLE:
730       upb_msg_setscalarhandler(h, f, offset, hasbit);
731       break;
732     case UPB_TYPE_STRING:
733     case UPB_TYPE_BYTES: {
734       bool is_bytes = upb_fielddef_type(f) == UPB_TYPE_BYTES;
735       upb_handlerattr attr = UPB_HANDLERATTR_INIT;
736       attr.handler_data = newhandlerdata(h, offset, hasbit);
737       upb_handlers_setstartstr(h, f,
738                                is_bytes ? bytes_handler : str_handler,
739                                &attr);
740       upb_handlers_setstring(h, f, stringdata_handler, &attr);
741       upb_handlers_setendstr(h, f, stringdata_end_handler, &attr);
742       break;
743     }
744     case UPB_TYPE_MESSAGE: {
745       upb_handlerattr attr = UPB_HANDLERATTR_INIT;
746       attr.handler_data = newsubmsghandlerdata(
747           h, f, offset, hasbit, field_type_class(desc->layout, f));
748       if (is_wrapper(upb_fielddef_msgsubdef(f))) {
749         upb_handlers_setstartsubmsg(h, f, startwrapper, &attr);
750       } else {
751         upb_handlers_setstartsubmsg(h, f, submsg_handler, &attr);
752       }
753     }
754   }
755 }
756 
757 // Adds handlers to a map field.
add_handlers_for_mapfield(upb_handlers * h,const upb_fielddef * fielddef,size_t offset,const Descriptor * desc)758 static void add_handlers_for_mapfield(upb_handlers* h,
759                                       const upb_fielddef* fielddef,
760                                       size_t offset,
761                                       const Descriptor* desc) {
762   const upb_msgdef* map_msgdef = upb_fielddef_msgsubdef(fielddef);
763   map_handlerdata_t* hd = new_map_handlerdata(offset, map_msgdef, desc);
764   upb_handlerattr attr = UPB_HANDLERATTR_INIT;
765 
766   upb_handlers_addcleanup(h, hd, xfree);
767   attr.handler_data = hd;
768   upb_handlers_setstartsubmsg(h, fielddef, startmap_handler, &attr);
769   upb_handlers_setendsubmsg(h, fielddef, endmap_handler, &attr);
770 }
771 
772 // Adds handlers to a map-entry msgdef.
add_handlers_for_mapentry(const upb_msgdef * msgdef,upb_handlers * h,const Descriptor * desc)773 static void add_handlers_for_mapentry(const upb_msgdef* msgdef, upb_handlers* h,
774                                       const Descriptor* desc) {
775   const upb_fielddef* key_field = map_entry_key(msgdef);
776   const upb_fielddef* value_field = map_entry_value(msgdef);
777   map_handlerdata_t* hd = new_map_handlerdata(0, msgdef, desc);
778   upb_handlerattr attr = UPB_HANDLERATTR_INIT;
779 
780   upb_handlers_addcleanup(h, hd, xfree);
781   attr.handler_data = hd;
782   upb_handlers_setendmsg(h, endmapentry_handler, &attr);
783 
784   add_handlers_for_singular_field(
785       desc, h, key_field,
786       offsetof(map_parse_frame_t, key_storage),
787       MESSAGE_FIELD_NO_HASBIT);
788   add_handlers_for_singular_field(
789       desc, h, value_field,
790       offsetof(map_parse_frame_t, value_storage),
791       MESSAGE_FIELD_NO_HASBIT);
792 }
793 
add_handlers_for_wrapper(const upb_msgdef * msgdef,upb_handlers * h)794 static void add_handlers_for_wrapper(const upb_msgdef* msgdef,
795                                      upb_handlers* h) {
796   const upb_fielddef* f = upb_msgdef_itof(msgdef, 1);
797   switch (upb_msgdef_wellknowntype(msgdef)) {
798     case UPB_WELLKNOWN_DOUBLEVALUE:
799       upb_handlers_setdouble(h, f, doublewrapper_handler, NULL);
800       break;
801     case UPB_WELLKNOWN_FLOATVALUE:
802       upb_handlers_setfloat(h, f, floatwrapper_handler, NULL);
803       break;
804     case UPB_WELLKNOWN_INT64VALUE:
805       upb_handlers_setint64(h, f, int64wrapper_handler, NULL);
806       break;
807     case UPB_WELLKNOWN_UINT64VALUE:
808       upb_handlers_setuint64(h, f, uint64wrapper_handler, NULL);
809       break;
810     case UPB_WELLKNOWN_INT32VALUE:
811       upb_handlers_setint32(h, f, int32wrapper_handler, NULL);
812       break;
813     case UPB_WELLKNOWN_UINT32VALUE:
814       upb_handlers_setuint32(h, f, uint32wrapper_handler, NULL);
815       break;
816     case UPB_WELLKNOWN_STRINGVALUE:
817       upb_handlers_setstartstr(h, f, startstringwrapper_handler, NULL);
818       upb_handlers_setstring(h, f, stringwrapper_handler, NULL);
819       break;
820     case UPB_WELLKNOWN_BYTESVALUE:
821       upb_handlers_setstartstr(h, f, startbyteswrapper_handler, NULL);
822       upb_handlers_setstring(h, f, byteswrapper_handler, NULL);
823       break;
824     case UPB_WELLKNOWN_BOOLVALUE:
825       upb_handlers_setbool(h, f, boolwrapper_handler, NULL);
826       return;
827     default:
828       rb_raise(rb_eRuntimeError,
829                "Internal logic error with well-known types.");
830   }
831 }
832 
833 // Set up handlers for a oneof field.
add_handlers_for_oneof_field(upb_handlers * h,const upb_fielddef * f,size_t offset,size_t oneof_case_offset,const Descriptor * desc)834 static void add_handlers_for_oneof_field(upb_handlers *h,
835                                          const upb_fielddef *f,
836                                          size_t offset,
837                                          size_t oneof_case_offset,
838                                          const Descriptor* desc) {
839   upb_handlerattr attr = UPB_HANDLERATTR_INIT;
840   attr.handler_data =
841       newoneofhandlerdata(h, offset, oneof_case_offset, f, desc);
842 
843   switch (upb_fielddef_type(f)) {
844 
845 #define SET_HANDLER(utype, ltype)                                 \
846   case utype:                                                     \
847     upb_handlers_set##ltype(h, f, oneof##ltype##_handler, &attr); \
848     break;
849 
850     SET_HANDLER(UPB_TYPE_BOOL,   bool);
851     SET_HANDLER(UPB_TYPE_INT32,  int32);
852     SET_HANDLER(UPB_TYPE_UINT32, uint32);
853     SET_HANDLER(UPB_TYPE_ENUM,   int32);
854     SET_HANDLER(UPB_TYPE_FLOAT,  float);
855     SET_HANDLER(UPB_TYPE_INT64,  int64);
856     SET_HANDLER(UPB_TYPE_UINT64, uint64);
857     SET_HANDLER(UPB_TYPE_DOUBLE, double);
858 
859 #undef SET_HANDLER
860 
861     case UPB_TYPE_STRING:
862     case UPB_TYPE_BYTES: {
863       bool is_bytes = upb_fielddef_type(f) == UPB_TYPE_BYTES;
864       upb_handlers_setstartstr(h, f, is_bytes ?
865                                oneofbytes_handler : oneofstr_handler,
866                                &attr);
867       upb_handlers_setstring(h, f, stringdata_handler, NULL);
868       upb_handlers_setendstr(h, f, oneofstring_end_handler, &attr);
869       break;
870     }
871     case UPB_TYPE_MESSAGE: {
872       if (is_wrapper(upb_fielddef_msgsubdef(f))) {
873         upb_handlers_setstartsubmsg(h, f, oneof_startwrapper, &attr);
874       } else {
875         upb_handlers_setstartsubmsg(h, f, oneofsubmsg_handler, &attr);
876       }
877       break;
878     }
879   }
880 }
881 
unknown_field_handler(void * closure,const void * hd,const char * buf,size_t size)882 static bool unknown_field_handler(void* closure, const void* hd,
883                                   const char* buf, size_t size) {
884   MessageHeader* msg = (MessageHeader*)closure;
885   UPB_UNUSED(hd);
886 
887   if (msg->unknown_fields == NULL) {
888     msg->unknown_fields = malloc(sizeof(stringsink));
889     stringsink_init(msg->unknown_fields);
890   }
891 
892   stringsink_string(msg->unknown_fields, NULL, buf, size, NULL);
893 
894   return true;
895 }
896 
get_field_offset(MessageLayout * layout,const upb_fielddef * f)897 size_t get_field_offset(MessageLayout* layout, const upb_fielddef* f) {
898   return layout->fields[upb_fielddef_index(f)].offset + sizeof(MessageHeader);
899 }
900 
add_handlers_for_message(const void * closure,upb_handlers * h)901 void add_handlers_for_message(const void *closure, upb_handlers *h) {
902   const VALUE descriptor_pool = (VALUE)closure;
903   const upb_msgdef* msgdef = upb_handlers_msgdef(h);
904   Descriptor* desc =
905       ruby_to_Descriptor(get_msgdef_obj(descriptor_pool, msgdef));
906   upb_msg_field_iter i;
907   upb_handlerattr attr = UPB_HANDLERATTR_INIT;
908 
909   // Ensure layout exists. We may be invoked to create handlers for a given
910   // message if we are included as a submsg of another message type before our
911   // class is actually built, so to work around this, we just create the layout
912   // (and handlers, in the class-building function) on-demand.
913   if (desc->layout == NULL) {
914     create_layout(desc);
915   }
916 
917   // If this is a mapentry message type, set up a special set of handlers and
918   // bail out of the normal (user-defined) message type handling.
919   if (upb_msgdef_mapentry(msgdef)) {
920     add_handlers_for_mapentry(msgdef, h, desc);
921     return;
922   }
923 
924   // If this is a wrapper type, use special handlers and bail.
925   if (is_wrapper(msgdef)) {
926     add_handlers_for_wrapper(msgdef, h);
927     return;
928   }
929 
930   upb_handlers_setunknown(h, unknown_field_handler, &attr);
931 
932   for (upb_msg_field_begin(&i, desc->msgdef);
933        !upb_msg_field_done(&i);
934        upb_msg_field_next(&i)) {
935     const upb_fielddef *f = upb_msg_iter_field(&i);
936     const upb_oneofdef* oneof = upb_fielddef_realcontainingoneof(f);
937     size_t offset = get_field_offset(desc->layout, f);
938 
939     if (oneof) {
940       size_t oneof_case_offset =
941           desc->layout->oneofs[upb_oneofdef_index(oneof)].case_offset +
942           sizeof(MessageHeader);
943       add_handlers_for_oneof_field(h, f, offset, oneof_case_offset, desc);
944     } else if (is_map_field(f)) {
945       add_handlers_for_mapfield(h, f, offset, desc);
946     } else if (upb_fielddef_isseq(f)) {
947       add_handlers_for_repeated_field(h, desc, f, offset);
948     } else {
949       add_handlers_for_singular_field(
950           desc, h, f, offset,
951           desc->layout->fields[upb_fielddef_index(f)].hasbit);
952     }
953   }
954 }
955 
956 // Constructs the handlers for filling a message's data into an in-memory
957 // object.
get_fill_handlers(Descriptor * desc)958 const upb_handlers* get_fill_handlers(Descriptor* desc) {
959   DescriptorPool* pool = ruby_to_DescriptorPool(desc->descriptor_pool);
960   return upb_handlercache_get(pool->fill_handler_cache, desc->msgdef);
961 }
962 
msgdef_decodermethod(Descriptor * desc)963 static const upb_pbdecodermethod *msgdef_decodermethod(Descriptor* desc) {
964   DescriptorPool* pool = ruby_to_DescriptorPool(desc->descriptor_pool);
965   return upb_pbcodecache_get(pool->fill_method_cache, desc->msgdef);
966 }
967 
msgdef_jsonparsermethod(Descriptor * desc)968 static const upb_json_parsermethod *msgdef_jsonparsermethod(Descriptor* desc) {
969   DescriptorPool* pool = ruby_to_DescriptorPool(desc->descriptor_pool);
970   return upb_json_codecache_get(pool->json_fill_method_cache, desc->msgdef);
971 }
972 
msgdef_pb_serialize_handlers(Descriptor * desc)973 static const upb_handlers* msgdef_pb_serialize_handlers(Descriptor* desc) {
974   DescriptorPool* pool = ruby_to_DescriptorPool(desc->descriptor_pool);
975   return upb_handlercache_get(pool->pb_serialize_handler_cache, desc->msgdef);
976 }
977 
msgdef_json_serialize_handlers(Descriptor * desc,bool preserve_proto_fieldnames)978 static const upb_handlers* msgdef_json_serialize_handlers(
979     Descriptor* desc, bool preserve_proto_fieldnames) {
980   DescriptorPool* pool = ruby_to_DescriptorPool(desc->descriptor_pool);
981   if (preserve_proto_fieldnames) {
982     return upb_handlercache_get(pool->json_serialize_handler_preserve_cache,
983                                 desc->msgdef);
984   } else {
985     return upb_handlercache_get(pool->json_serialize_handler_cache,
986                                 desc->msgdef);
987   }
988 }
989 
990 
991 // Stack-allocated context during an encode/decode operation. Contains the upb
992 // environment and its stack-based allocator, an initial buffer for allocations
993 // to avoid malloc() when possible, and a template for Ruby exception messages
994 // if any error occurs.
995 #define STACK_ENV_STACKBYTES 4096
996 typedef struct {
997   upb_arena *arena;
998   upb_status status;
999   const char* ruby_error_template;
1000   char allocbuf[STACK_ENV_STACKBYTES];
1001 } stackenv;
1002 
1003 static void stackenv_init(stackenv* se, const char* errmsg);
1004 static void stackenv_uninit(stackenv* se);
1005 
stackenv_init(stackenv * se,const char * errmsg)1006 static void stackenv_init(stackenv* se, const char* errmsg) {
1007   se->ruby_error_template = errmsg;
1008   se->arena =
1009       upb_arena_init(se->allocbuf, sizeof(se->allocbuf), &upb_alloc_global);
1010   upb_status_clear(&se->status);
1011 }
1012 
stackenv_uninit(stackenv * se)1013 static void stackenv_uninit(stackenv* se) {
1014   upb_arena_free(se->arena);
1015 
1016   if (!upb_ok(&se->status)) {
1017     // TODO(haberman): have a way to verify that this is actually a parse error,
1018     // instead of just throwing "parse error" unconditionally.
1019     VALUE errmsg = rb_str_new2(upb_status_errmsg(&se->status));
1020     rb_raise(cParseError, se->ruby_error_template, errmsg);
1021   }
1022 }
1023 
1024 /*
1025  * call-seq:
1026  *     MessageClass.decode(data) => message
1027  *
1028  * Decodes the given data (as a string containing bytes in protocol buffers wire
1029  * format) under the interpretration given by this message class's definition
1030  * and returns a message object with the corresponding field values.
1031  */
Message_decode(VALUE klass,VALUE data)1032 VALUE Message_decode(VALUE klass, VALUE data) {
1033   VALUE descriptor = rb_ivar_get(klass, descriptor_instancevar_interned);
1034   Descriptor* desc = ruby_to_Descriptor(descriptor);
1035   VALUE msgklass = Descriptor_msgclass(descriptor);
1036   VALUE msg_rb;
1037   MessageHeader* msg;
1038 
1039   if (TYPE(data) != T_STRING) {
1040     rb_raise(rb_eArgError, "Expected string for binary protobuf data.");
1041   }
1042 
1043   msg_rb = initialize_rb_class_with_no_args(msgklass);
1044   TypedData_Get_Struct(msg_rb, MessageHeader, &Message_type, msg);
1045 
1046   {
1047     const upb_pbdecodermethod* method = msgdef_decodermethod(desc);
1048     const upb_handlers* h = upb_pbdecodermethod_desthandlers(method);
1049     const upb_msgdef* m = upb_handlers_msgdef(h);
1050     VALUE wrapper = Qnil;
1051     void* ptr = msg;
1052     stackenv se;
1053     upb_sink sink;
1054     upb_pbdecoder* decoder;
1055     stackenv_init(&se, "Error occurred during parsing: %" PRIsVALUE);
1056 
1057     if (is_wrapper(m)) {
1058       ptr = &wrapper;
1059     }
1060 
1061     upb_sink_reset(&sink, h, ptr);
1062     decoder = upb_pbdecoder_create(se.arena, method, sink, &se.status);
1063     upb_bufsrc_putbuf(RSTRING_PTR(data), RSTRING_LEN(data),
1064                       upb_pbdecoder_input(decoder));
1065 
1066     stackenv_uninit(&se);
1067 
1068     if (is_wrapper(m)) {
1069       msg_rb = ruby_wrapper_type(msgklass, wrapper);
1070     }
1071   }
1072 
1073   return msg_rb;
1074 }
1075 
1076 /*
1077  * call-seq:
1078  *     MessageClass.decode_json(data, options = {}) => message
1079  *
1080  * Decodes the given data (as a string containing bytes in protocol buffers wire
1081  * format) under the interpretration given by this message class's definition
1082  * and returns a message object with the corresponding field values.
1083  *
1084  *  @param options [Hash] options for the decoder
1085  *   ignore_unknown_fields: set true to ignore unknown fields (default is to
1086  *   raise an error)
1087  */
Message_decode_json(int argc,VALUE * argv,VALUE klass)1088 VALUE Message_decode_json(int argc, VALUE* argv, VALUE klass) {
1089   VALUE descriptor = rb_ivar_get(klass, descriptor_instancevar_interned);
1090   Descriptor* desc = ruby_to_Descriptor(descriptor);
1091   VALUE msgklass = Descriptor_msgclass(descriptor);
1092   VALUE msg_rb;
1093   VALUE data = argv[0];
1094   VALUE ignore_unknown_fields = Qfalse;
1095   MessageHeader* msg;
1096 
1097   if (argc < 1 || argc > 2) {
1098     rb_raise(rb_eArgError, "Expected 1 or 2 arguments.");
1099   }
1100 
1101   if (argc == 2) {
1102     VALUE hash_args = argv[1];
1103     if (TYPE(hash_args) != T_HASH) {
1104       rb_raise(rb_eArgError, "Expected hash arguments.");
1105     }
1106 
1107     ignore_unknown_fields = rb_hash_lookup2(
1108         hash_args, ID2SYM(rb_intern("ignore_unknown_fields")), Qfalse);
1109   }
1110 
1111   if (TYPE(data) != T_STRING) {
1112     rb_raise(rb_eArgError, "Expected string for JSON data.");
1113   }
1114 
1115   // TODO(cfallin): Check and respect string encoding. If not UTF-8, we need to
1116   // convert, because string handlers pass data directly to message string
1117   // fields.
1118 
1119   msg_rb = initialize_rb_class_with_no_args(msgklass);
1120   TypedData_Get_Struct(msg_rb, MessageHeader, &Message_type, msg);
1121 
1122   {
1123     const upb_json_parsermethod* method = msgdef_jsonparsermethod(desc);
1124     const upb_handlers* h = get_fill_handlers(desc);
1125     const upb_msgdef* m = upb_handlers_msgdef(h);
1126     stackenv se;
1127     upb_sink sink;
1128     upb_json_parser* parser;
1129     DescriptorPool* pool = ruby_to_DescriptorPool(generated_pool);
1130     stackenv_init(&se, "Error occurred during parsing: %" PRIsVALUE);
1131 
1132     if (is_wrapper(m)) {
1133       rb_raise(
1134           rb_eRuntimeError,
1135           "Parsing a wrapper type from JSON at the top level does not work.");
1136     }
1137 
1138     upb_sink_reset(&sink, h, msg);
1139     parser = upb_json_parser_create(se.arena, method, pool->symtab, sink,
1140                                     &se.status, RTEST(ignore_unknown_fields));
1141     upb_bufsrc_putbuf(RSTRING_PTR(data), RSTRING_LEN(data),
1142                       upb_json_parser_input(parser));
1143 
1144     stackenv_uninit(&se);
1145   }
1146 
1147   return msg_rb;
1148 }
1149 
1150 // -----------------------------------------------------------------------------
1151 // Serializing.
1152 // -----------------------------------------------------------------------------
1153 
1154 /* msgvisitor *****************************************************************/
1155 
1156 static void putmsg(VALUE msg, const Descriptor* desc, upb_sink sink, int depth,
1157                    bool emit_defaults, bool is_json, bool open_msg);
1158 
getsel(const upb_fielddef * f,upb_handlertype_t type)1159 static upb_selector_t getsel(const upb_fielddef *f, upb_handlertype_t type) {
1160   upb_selector_t ret;
1161   bool ok = upb_handlers_getselector(f, type, &ret);
1162   UPB_ASSERT(ok);
1163   return ret;
1164 }
1165 
putstr(VALUE str,const upb_fielddef * f,upb_sink sink)1166 static void putstr(VALUE str, const upb_fielddef *f, upb_sink sink) {
1167   upb_sink subsink;
1168 
1169   if (str == Qnil) return;
1170 
1171   assert(BUILTIN_TYPE(str) == RUBY_T_STRING);
1172 
1173   // We should be guaranteed that the string has the correct encoding because
1174   // we ensured this at assignment time and then froze the string.
1175   if (upb_fielddef_type(f) == UPB_TYPE_STRING) {
1176     assert(rb_enc_from_index(ENCODING_GET(str)) == kRubyStringUtf8Encoding);
1177   } else {
1178     assert(rb_enc_from_index(ENCODING_GET(str)) == kRubyString8bitEncoding);
1179   }
1180 
1181   upb_sink_startstr(sink, getsel(f, UPB_HANDLER_STARTSTR), RSTRING_LEN(str),
1182                     &subsink);
1183   upb_sink_putstring(subsink, getsel(f, UPB_HANDLER_STRING), RSTRING_PTR(str),
1184                      RSTRING_LEN(str), NULL);
1185   upb_sink_endstr(sink, getsel(f, UPB_HANDLER_ENDSTR));
1186 }
1187 
putsubmsg(VALUE submsg,const upb_fielddef * f,upb_sink sink,int depth,bool emit_defaults,bool is_json)1188 static void putsubmsg(VALUE submsg, const upb_fielddef *f, upb_sink sink,
1189                       int depth, bool emit_defaults, bool is_json) {
1190   upb_sink subsink;
1191   VALUE descriptor;
1192   Descriptor* subdesc;
1193 
1194   if (submsg == Qnil) return;
1195 
1196   descriptor = rb_ivar_get(submsg, descriptor_instancevar_interned);
1197   subdesc = ruby_to_Descriptor(descriptor);
1198 
1199   upb_sink_startsubmsg(sink, getsel(f, UPB_HANDLER_STARTSUBMSG), &subsink);
1200   putmsg(submsg, subdesc, subsink, depth + 1, emit_defaults, is_json, true);
1201   upb_sink_endsubmsg(sink, subsink, getsel(f, UPB_HANDLER_ENDSUBMSG));
1202 }
1203 
putary(VALUE ary,const upb_fielddef * f,upb_sink sink,int depth,bool emit_defaults,bool is_json)1204 static void putary(VALUE ary, const upb_fielddef* f, upb_sink sink, int depth,
1205                    bool emit_defaults, bool is_json) {
1206   upb_sink subsink;
1207   upb_fieldtype_t type = upb_fielddef_type(f);
1208   upb_selector_t sel = 0;
1209   int size;
1210   int i;
1211   VALUE type_class = ruby_to_RepeatedField(ary)->field_type_class;
1212 
1213   if (ary == Qnil) return;
1214   if (!emit_defaults && NUM2INT(RepeatedField_length(ary)) == 0) return;
1215 
1216   size = NUM2INT(RepeatedField_length(ary));
1217   if (size == 0 && !emit_defaults) return;
1218 
1219   upb_sink_startseq(sink, getsel(f, UPB_HANDLER_STARTSEQ), &subsink);
1220 
1221   if (upb_fielddef_isprimitive(f)) {
1222     sel = getsel(f, upb_handlers_getprimitivehandlertype(f));
1223   }
1224 
1225   for (i = 0; i < size; i++) {
1226     void* memory = RepeatedField_index_native(ary, i);
1227     switch (type) {
1228 #define T(upbtypeconst, upbtype, ctype)                     \
1229   case upbtypeconst:                                        \
1230     upb_sink_put##upbtype(subsink, sel, *((ctype*)memory)); \
1231     break;
1232 
1233       T(UPB_TYPE_FLOAT,  float,  float)
1234       T(UPB_TYPE_DOUBLE, double, double)
1235       T(UPB_TYPE_BOOL,   bool,   int8_t)
1236       case UPB_TYPE_ENUM:
1237       T(UPB_TYPE_INT32,  int32,  int32_t)
1238       T(UPB_TYPE_UINT32, uint32, uint32_t)
1239       T(UPB_TYPE_INT64,  int64,  int64_t)
1240       T(UPB_TYPE_UINT64, uint64, uint64_t)
1241 
1242       case UPB_TYPE_STRING:
1243       case UPB_TYPE_BYTES:
1244         putstr(*((VALUE *)memory), f, subsink);
1245         break;
1246       case UPB_TYPE_MESSAGE: {
1247         VALUE val = native_slot_get(UPB_TYPE_MESSAGE, type_class, memory);
1248         putsubmsg(val, f, subsink, depth, emit_defaults, is_json);
1249         break;
1250       }
1251 
1252 #undef T
1253 
1254     }
1255   }
1256   upb_sink_endseq(sink, getsel(f, UPB_HANDLER_ENDSEQ));
1257 }
1258 
put_ruby_value(VALUE value,const upb_fielddef * f,VALUE type_class,int depth,upb_sink sink,bool emit_defaults,bool is_json)1259 static void put_ruby_value(VALUE value, const upb_fielddef* f, VALUE type_class,
1260                            int depth, upb_sink sink, bool emit_defaults,
1261                            bool is_json) {
1262   upb_selector_t sel = 0;
1263 
1264   if (depth > ENCODE_MAX_NESTING) {
1265     rb_raise(rb_eRuntimeError,
1266              "Maximum recursion depth exceeded during encoding.");
1267   }
1268 
1269   if (upb_fielddef_isprimitive(f)) {
1270     sel = getsel(f, upb_handlers_getprimitivehandlertype(f));
1271   }
1272 
1273   switch (upb_fielddef_type(f)) {
1274     case UPB_TYPE_INT32:
1275       upb_sink_putint32(sink, sel, NUM2INT(value));
1276       break;
1277     case UPB_TYPE_INT64:
1278       upb_sink_putint64(sink, sel, NUM2LL(value));
1279       break;
1280     case UPB_TYPE_UINT32:
1281       upb_sink_putuint32(sink, sel, NUM2UINT(value));
1282       break;
1283     case UPB_TYPE_UINT64:
1284       upb_sink_putuint64(sink, sel, NUM2ULL(value));
1285       break;
1286     case UPB_TYPE_FLOAT:
1287       upb_sink_putfloat(sink, sel, NUM2DBL(value));
1288       break;
1289     case UPB_TYPE_DOUBLE:
1290       upb_sink_putdouble(sink, sel, NUM2DBL(value));
1291       break;
1292     case UPB_TYPE_ENUM: {
1293       if (TYPE(value) == T_SYMBOL) {
1294         value = rb_funcall(type_class, rb_intern("resolve"), 1, value);
1295       }
1296       upb_sink_putint32(sink, sel, NUM2INT(value));
1297       break;
1298     }
1299     case UPB_TYPE_BOOL:
1300       upb_sink_putbool(sink, sel, value == Qtrue);
1301       break;
1302     case UPB_TYPE_STRING:
1303     case UPB_TYPE_BYTES:
1304       putstr(value, f, sink);
1305       break;
1306     case UPB_TYPE_MESSAGE:
1307       putsubmsg(value, f, sink, depth, emit_defaults, is_json);
1308   }
1309 }
1310 
putmap(VALUE map,const upb_fielddef * f,upb_sink sink,int depth,bool emit_defaults,bool is_json)1311 static void putmap(VALUE map, const upb_fielddef* f, upb_sink sink, int depth,
1312                    bool emit_defaults, bool is_json) {
1313   Map* self;
1314   upb_sink subsink;
1315   const upb_fielddef* key_field;
1316   const upb_fielddef* value_field;
1317   Map_iter it;
1318 
1319   if (map == Qnil) return;
1320   if (!emit_defaults && Map_length(map) == 0) return;
1321 
1322   self = ruby_to_Map(map);
1323 
1324   upb_sink_startseq(sink, getsel(f, UPB_HANDLER_STARTSEQ), &subsink);
1325 
1326   assert(upb_fielddef_type(f) == UPB_TYPE_MESSAGE);
1327   key_field = map_field_key(f);
1328   value_field = map_field_value(f);
1329 
1330   for (Map_begin(map, &it); !Map_done(&it); Map_next(&it)) {
1331     VALUE key = Map_iter_key(&it);
1332     VALUE value = Map_iter_value(&it);
1333     upb_status status;
1334 
1335     upb_sink entry_sink;
1336     upb_sink_startsubmsg(subsink, getsel(f, UPB_HANDLER_STARTSUBMSG),
1337                          &entry_sink);
1338     upb_sink_startmsg(entry_sink);
1339 
1340     put_ruby_value(key, key_field, Qnil, depth + 1, entry_sink, emit_defaults,
1341                    is_json);
1342     put_ruby_value(value, value_field, self->value_type_class, depth + 1,
1343                    entry_sink, emit_defaults, is_json);
1344 
1345     upb_sink_endmsg(entry_sink, &status);
1346     upb_sink_endsubmsg(subsink, entry_sink, getsel(f, UPB_HANDLER_ENDSUBMSG));
1347   }
1348 
1349   upb_sink_endseq(sink, getsel(f, UPB_HANDLER_ENDSEQ));
1350 }
1351 
1352 static const upb_handlers* msgdef_json_serialize_handlers(
1353     Descriptor* desc, bool preserve_proto_fieldnames);
1354 
putjsonany(VALUE msg_rb,const Descriptor * desc,upb_sink sink,int depth,bool emit_defaults)1355 static void putjsonany(VALUE msg_rb, const Descriptor* desc, upb_sink sink,
1356                        int depth, bool emit_defaults) {
1357   upb_status status;
1358   MessageHeader* msg = NULL;
1359   const upb_fielddef* type_field = upb_msgdef_itof(desc->msgdef, UPB_ANY_TYPE);
1360   const upb_fielddef* value_field = upb_msgdef_itof(desc->msgdef, UPB_ANY_VALUE);
1361 
1362   size_t type_url_offset;
1363   VALUE type_url_str_rb;
1364   const upb_msgdef *payload_type = NULL;
1365 
1366   TypedData_Get_Struct(msg_rb, MessageHeader, &Message_type, msg);
1367 
1368   upb_sink_startmsg(sink);
1369 
1370   /* Handle type url */
1371   type_url_offset = desc->layout->fields[upb_fielddef_index(type_field)].offset;
1372   type_url_str_rb = DEREF(Message_data(msg), type_url_offset, VALUE);
1373   if (RSTRING_LEN(type_url_str_rb) > 0) {
1374     putstr(type_url_str_rb, type_field, sink);
1375   }
1376 
1377   {
1378     const char* type_url_str = RSTRING_PTR(type_url_str_rb);
1379     size_t type_url_len = RSTRING_LEN(type_url_str_rb);
1380     DescriptorPool* pool = ruby_to_DescriptorPool(generated_pool);
1381 
1382     if (type_url_len <= 20 ||
1383         strncmp(type_url_str, "type.googleapis.com/", 20) != 0) {
1384       rb_raise(rb_eRuntimeError, "Invalid type url: %s", type_url_str);
1385       return;
1386     }
1387 
1388     /* Resolve type url */
1389     type_url_str += 20;
1390     type_url_len -= 20;
1391 
1392     payload_type = upb_symtab_lookupmsg2(
1393         pool->symtab, type_url_str, type_url_len);
1394     if (payload_type == NULL) {
1395       rb_raise(rb_eRuntimeError, "Unknown type: %s", type_url_str);
1396       return;
1397     }
1398   }
1399 
1400   {
1401     uint32_t value_offset;
1402     VALUE value_str_rb;
1403     size_t value_len;
1404 
1405     value_offset = desc->layout->fields[upb_fielddef_index(value_field)].offset;
1406     value_str_rb = DEREF(Message_data(msg), value_offset, VALUE);
1407     value_len = RSTRING_LEN(value_str_rb);
1408 
1409     if (value_len > 0) {
1410       VALUE payload_desc_rb = get_msgdef_obj(generated_pool, payload_type);
1411       Descriptor* payload_desc = ruby_to_Descriptor(payload_desc_rb);
1412       VALUE payload_class = Descriptor_msgclass(payload_desc_rb);
1413       upb_sink subsink;
1414       bool is_wellknown;
1415 
1416       VALUE payload_msg_rb = Message_decode(payload_class, value_str_rb);
1417 
1418       is_wellknown =
1419           upb_msgdef_wellknowntype(payload_desc->msgdef) !=
1420               UPB_WELLKNOWN_UNSPECIFIED;
1421       if (is_wellknown) {
1422         upb_sink_startstr(sink, getsel(value_field, UPB_HANDLER_STARTSTR), 0,
1423                           &subsink);
1424       }
1425 
1426       subsink.handlers =
1427           msgdef_json_serialize_handlers(payload_desc, true);
1428       subsink.closure = sink.closure;
1429       putmsg(payload_msg_rb, payload_desc, subsink, depth, emit_defaults, true,
1430              is_wellknown);
1431     }
1432   }
1433 
1434   upb_sink_endmsg(sink, &status);
1435 }
1436 
putjsonlistvalue(VALUE msg_rb,const Descriptor * desc,upb_sink sink,int depth,bool emit_defaults)1437 static void putjsonlistvalue(
1438     VALUE msg_rb, const Descriptor* desc,
1439     upb_sink sink, int depth, bool emit_defaults) {
1440   upb_status status;
1441   upb_sink subsink;
1442   MessageHeader* msg = NULL;
1443   const upb_fielddef* f = upb_msgdef_itof(desc->msgdef, 1);
1444   uint32_t offset =
1445       desc->layout->fields[upb_fielddef_index(f)].offset +
1446       sizeof(MessageHeader);
1447   VALUE ary;
1448 
1449   TypedData_Get_Struct(msg_rb, MessageHeader, &Message_type, msg);
1450 
1451   upb_sink_startmsg(sink);
1452 
1453   ary = DEREF(msg, offset, VALUE);
1454 
1455   if (ary == Qnil || RepeatedField_size(ary) == 0) {
1456     upb_sink_startseq(sink, getsel(f, UPB_HANDLER_STARTSEQ), &subsink);
1457     upb_sink_endseq(sink, getsel(f, UPB_HANDLER_ENDSEQ));
1458   } else {
1459     putary(ary, f, sink, depth, emit_defaults, true);
1460   }
1461 
1462   upb_sink_endmsg(sink, &status);
1463 }
1464 
putmsg(VALUE msg_rb,const Descriptor * desc,upb_sink sink,int depth,bool emit_defaults,bool is_json,bool open_msg)1465 static void putmsg(VALUE msg_rb, const Descriptor* desc,
1466                    upb_sink sink, int depth, bool emit_defaults,
1467                    bool is_json, bool open_msg) {
1468   MessageHeader* msg;
1469   upb_msg_field_iter i;
1470   upb_status status;
1471   bool json_wrapper = is_wrapper(desc->msgdef) && is_json;
1472 
1473   if (is_json &&
1474       upb_msgdef_wellknowntype(desc->msgdef) == UPB_WELLKNOWN_ANY) {
1475     putjsonany(msg_rb, desc, sink, depth, emit_defaults);
1476     return;
1477   }
1478 
1479   if (is_json &&
1480       upb_msgdef_wellknowntype(desc->msgdef) == UPB_WELLKNOWN_LISTVALUE) {
1481     putjsonlistvalue(msg_rb, desc, sink, depth, emit_defaults);
1482     return;
1483   }
1484 
1485   if (open_msg) {
1486     upb_sink_startmsg(sink);
1487   }
1488 
1489   // Protect against cycles (possible because users may freely reassign message
1490   // and repeated fields) by imposing a maximum recursion depth.
1491   if (depth > ENCODE_MAX_NESTING) {
1492     rb_raise(rb_eRuntimeError,
1493              "Maximum recursion depth exceeded during encoding.");
1494   }
1495 
1496   TypedData_Get_Struct(msg_rb, MessageHeader, &Message_type, msg);
1497 
1498   if (desc != msg->descriptor) {
1499     rb_raise(rb_eArgError,
1500              "The type of given msg is '%s', expect '%s'.",
1501              upb_msgdef_fullname(msg->descriptor->msgdef),
1502              upb_msgdef_fullname(desc->msgdef));
1503   }
1504 
1505   for (upb_msg_field_begin(&i, desc->msgdef);
1506        !upb_msg_field_done(&i);
1507        upb_msg_field_next(&i)) {
1508     upb_fielddef *f = upb_msg_iter_field(&i);
1509     const upb_oneofdef* oneof = upb_fielddef_realcontainingoneof(f);
1510     bool is_matching_oneof = false;
1511     uint32_t offset =
1512         desc->layout->fields[upb_fielddef_index(f)].offset +
1513         sizeof(MessageHeader);
1514 
1515     if (oneof) {
1516       uint32_t oneof_case =
1517           slot_read_oneof_case(desc->layout, Message_data(msg), oneof);
1518       // For a oneof, check that this field is actually present -- skip all the
1519       // below if not.
1520       if (oneof_case != upb_fielddef_number(f)) {
1521         continue;
1522       }
1523       // Otherwise, fall through to the appropriate singular-field handler
1524       // below.
1525       is_matching_oneof = true;
1526     }
1527 
1528     if (is_map_field(f)) {
1529       VALUE map = DEREF(msg, offset, VALUE);
1530       if (map != Qnil || emit_defaults) {
1531         putmap(map, f, sink, depth, emit_defaults, is_json);
1532       }
1533     } else if (upb_fielddef_isseq(f)) {
1534       VALUE ary = DEREF(msg, offset, VALUE);
1535       if (ary != Qnil) {
1536         putary(ary, f, sink, depth, emit_defaults, is_json);
1537       }
1538     } else if (upb_fielddef_isstring(f)) {
1539       VALUE str = DEREF(msg, offset, VALUE);
1540       bool is_default = false;
1541 
1542       if (upb_msgdef_syntax(desc->msgdef) == UPB_SYNTAX_PROTO2) {
1543         is_default = layout_has(desc->layout, Message_data(msg), f) == Qfalse;
1544       } else if (upb_msgdef_syntax(desc->msgdef) == UPB_SYNTAX_PROTO3) {
1545         is_default = RSTRING_LEN(str) == 0;
1546       }
1547 
1548       if (is_matching_oneof || emit_defaults || !is_default || json_wrapper) {
1549         putstr(str, f, sink);
1550       }
1551     } else if (upb_fielddef_issubmsg(f)) {
1552       // OPT: could try to avoid the layout_get() (which will expand lazy
1553       // wrappers).
1554       VALUE val = layout_get(desc->layout, Message_data(msg), f);
1555       putsubmsg(val, f, sink, depth, emit_defaults, is_json);
1556     } else {
1557       upb_selector_t sel = getsel(f, upb_handlers_getprimitivehandlertype(f));
1558 
1559 #define T(upbtypeconst, upbtype, ctype, default_value)                       \
1560   case upbtypeconst: {                                                       \
1561     ctype value = DEREF(msg, offset, ctype);                                 \
1562     bool is_default = false;                                                 \
1563     if (upb_fielddef_haspresence(f)) {                                       \
1564       is_default = layout_has(desc->layout, Message_data(msg), f) == Qfalse; \
1565     } else if (upb_msgdef_syntax(desc->msgdef) == UPB_SYNTAX_PROTO3) {       \
1566       is_default = default_value == value;                                   \
1567     }                                                                        \
1568     if (is_matching_oneof || emit_defaults || !is_default || json_wrapper) { \
1569       upb_sink_put##upbtype(sink, sel, value);                               \
1570     }                                                                        \
1571   } break;
1572 
1573       switch (upb_fielddef_type(f)) {
1574         T(UPB_TYPE_FLOAT,  float,  float, 0.0)
1575         T(UPB_TYPE_DOUBLE, double, double, 0.0)
1576         T(UPB_TYPE_BOOL,   bool,   uint8_t, 0)
1577         case UPB_TYPE_ENUM:
1578         T(UPB_TYPE_INT32,  int32,  int32_t, 0)
1579         T(UPB_TYPE_UINT32, uint32, uint32_t, 0)
1580         T(UPB_TYPE_INT64,  int64,  int64_t, 0)
1581         T(UPB_TYPE_UINT64, uint64, uint64_t, 0)
1582 
1583         case UPB_TYPE_STRING:
1584         case UPB_TYPE_BYTES:
1585         case UPB_TYPE_MESSAGE: rb_raise(rb_eRuntimeError, "Internal error.");
1586       }
1587 
1588 #undef T
1589     }
1590   }
1591 
1592   {
1593     stringsink* unknown = msg->unknown_fields;
1594     if (unknown != NULL) {
1595       upb_sink_putunknown(sink, unknown->ptr, unknown->len);
1596     }
1597   }
1598 
1599   if (open_msg) {
1600     upb_sink_endmsg(sink, &status);
1601   }
1602 }
1603 
1604 /*
1605  * call-seq:
1606  *     MessageClass.encode(msg) => bytes
1607  *
1608  * Encodes the given message object to its serialized form in protocol buffers
1609  * wire format.
1610  */
Message_encode(VALUE klass,VALUE msg_rb)1611 VALUE Message_encode(VALUE klass, VALUE msg_rb) {
1612   VALUE descriptor = rb_ivar_get(klass, descriptor_instancevar_interned);
1613   Descriptor* desc = ruby_to_Descriptor(descriptor);
1614 
1615   stringsink sink;
1616   stringsink_init(&sink);
1617 
1618   {
1619     const upb_handlers* serialize_handlers =
1620         msgdef_pb_serialize_handlers(desc);
1621 
1622     stackenv se;
1623     upb_pb_encoder* encoder;
1624     VALUE ret;
1625 
1626     stackenv_init(&se, "Error occurred during encoding: %" PRIsVALUE);
1627     encoder = upb_pb_encoder_create(se.arena, serialize_handlers, sink.sink);
1628 
1629     putmsg(msg_rb, desc, upb_pb_encoder_input(encoder), 0, false, false, true);
1630 
1631     ret = rb_str_new(sink.ptr, sink.len);
1632 
1633     stackenv_uninit(&se);
1634     stringsink_uninit(&sink);
1635 
1636     return ret;
1637   }
1638 }
1639 
1640 /*
1641  * call-seq:
1642  *     MessageClass.encode_json(msg, options = {}) => json_string
1643  *
1644  * Encodes the given message object into its serialized JSON representation.
1645  * @param options [Hash] options for the decoder
1646  *  preserve_proto_fieldnames: set true to use original fieldnames (default is to camelCase)
1647  *  emit_defaults: set true to emit 0/false values (default is to omit them)
1648  */
Message_encode_json(int argc,VALUE * argv,VALUE klass)1649 VALUE Message_encode_json(int argc, VALUE* argv, VALUE klass) {
1650   VALUE descriptor = rb_ivar_get(klass, descriptor_instancevar_interned);
1651   Descriptor* desc = ruby_to_Descriptor(descriptor);
1652   VALUE msg_rb;
1653   VALUE preserve_proto_fieldnames = Qfalse;
1654   VALUE emit_defaults = Qfalse;
1655   stringsink sink;
1656 
1657   if (argc < 1 || argc > 2) {
1658     rb_raise(rb_eArgError, "Expected 1 or 2 arguments.");
1659   }
1660 
1661   msg_rb = argv[0];
1662 
1663   if (argc == 2) {
1664     VALUE hash_args = argv[1];
1665     if (TYPE(hash_args) != T_HASH) {
1666       rb_raise(rb_eArgError, "Expected hash arguments.");
1667     }
1668     preserve_proto_fieldnames = rb_hash_lookup2(
1669         hash_args, ID2SYM(rb_intern("preserve_proto_fieldnames")), Qfalse);
1670 
1671     emit_defaults = rb_hash_lookup2(
1672         hash_args, ID2SYM(rb_intern("emit_defaults")), Qfalse);
1673   }
1674 
1675   stringsink_init(&sink);
1676 
1677   {
1678     const upb_handlers* serialize_handlers =
1679         msgdef_json_serialize_handlers(desc, RTEST(preserve_proto_fieldnames));
1680     upb_json_printer* printer;
1681     stackenv se;
1682     VALUE ret;
1683 
1684     stackenv_init(&se, "Error occurred during encoding: %" PRIsVALUE);
1685     printer = upb_json_printer_create(se.arena, serialize_handlers, sink.sink);
1686 
1687     putmsg(msg_rb, desc, upb_json_printer_input(printer), 0,
1688            RTEST(emit_defaults), true, true);
1689 
1690     ret = rb_enc_str_new(sink.ptr, sink.len, rb_utf8_encoding());
1691 
1692     stackenv_uninit(&se);
1693     stringsink_uninit(&sink);
1694 
1695     return ret;
1696   }
1697 }
1698 
discard_unknown(VALUE msg_rb,const Descriptor * desc)1699 static void discard_unknown(VALUE msg_rb, const Descriptor* desc) {
1700   MessageHeader* msg;
1701   upb_msg_field_iter it;
1702 
1703   TypedData_Get_Struct(msg_rb, MessageHeader, &Message_type, msg);
1704 
1705   {
1706     stringsink* unknown = msg->unknown_fields;
1707     if (unknown != NULL) {
1708       stringsink_uninit(unknown);
1709       msg->unknown_fields = NULL;
1710     }
1711   }
1712 
1713   for (upb_msg_field_begin(&it, desc->msgdef);
1714        !upb_msg_field_done(&it);
1715        upb_msg_field_next(&it)) {
1716     upb_fielddef *f = upb_msg_iter_field(&it);
1717     const upb_oneofdef* oneof = upb_fielddef_realcontainingoneof(f);
1718     uint32_t offset =
1719         desc->layout->fields[upb_fielddef_index(f)].offset +
1720         sizeof(MessageHeader);
1721 
1722     if (oneof) {
1723       uint32_t oneof_case =
1724           slot_read_oneof_case(desc->layout, Message_data(msg), oneof);
1725       // For a oneof, check that this field is actually present -- skip all the
1726       // below if not.
1727       if (oneof_case != upb_fielddef_number(f)) {
1728         continue;
1729       }
1730       // Otherwise, fall through to the appropriate singular-field handler
1731       // below.
1732     }
1733 
1734     if (!upb_fielddef_issubmsg(f)) {
1735       continue;
1736     }
1737 
1738     if (is_map_field(f)) {
1739       VALUE map;
1740       Map_iter map_it;
1741 
1742       if (!upb_fielddef_issubmsg(map_field_value(f))) continue;
1743       map = DEREF(msg, offset, VALUE);
1744       if (map == Qnil) continue;
1745       for (Map_begin(map, &map_it); !Map_done(&map_it); Map_next(&map_it)) {
1746         VALUE submsg = Map_iter_value(&map_it);
1747         VALUE descriptor = rb_ivar_get(submsg, descriptor_instancevar_interned);
1748         const Descriptor* subdesc = ruby_to_Descriptor(descriptor);
1749         discard_unknown(submsg, subdesc);
1750       }
1751     } else if (upb_fielddef_isseq(f)) {
1752       VALUE ary = DEREF(msg, offset, VALUE);
1753       int size;
1754       int i;
1755 
1756       if (ary == Qnil) continue;
1757       size = NUM2INT(RepeatedField_length(ary));
1758       for (i = 0; i < size; i++) {
1759         void* memory = RepeatedField_index_native(ary, i);
1760         VALUE submsg = *((VALUE *)memory);
1761         VALUE descriptor = rb_ivar_get(submsg, descriptor_instancevar_interned);
1762         const Descriptor* subdesc = ruby_to_Descriptor(descriptor);
1763         discard_unknown(submsg, subdesc);
1764       }
1765     } else {
1766       VALUE submsg = DEREF(msg, offset, VALUE);
1767       VALUE descriptor;
1768       const Descriptor* subdesc;
1769 
1770       if (submsg == Qnil) continue;
1771       descriptor = rb_ivar_get(submsg, descriptor_instancevar_interned);
1772       subdesc = ruby_to_Descriptor(descriptor);
1773       discard_unknown(submsg, subdesc);
1774     }
1775   }
1776 }
1777 
1778 /*
1779  * call-seq:
1780  *     Google::Protobuf.discard_unknown(msg)
1781  *
1782  * Discard unknown fields in the given message object and recursively discard
1783  * unknown fields in submessages.
1784  */
Google_Protobuf_discard_unknown(VALUE self,VALUE msg_rb)1785 VALUE Google_Protobuf_discard_unknown(VALUE self, VALUE msg_rb) {
1786   VALUE klass = CLASS_OF(msg_rb);
1787   VALUE descriptor = rb_ivar_get(klass, descriptor_instancevar_interned);
1788   Descriptor* desc = ruby_to_Descriptor(descriptor);
1789   if (klass == cRepeatedField || klass == cMap) {
1790     rb_raise(rb_eArgError, "Expected proto msg for discard unknown.");
1791   } else {
1792     discard_unknown(msg_rb, desc);
1793   }
1794   return Qnil;
1795 }
1796