1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2014 Google Inc. All rights reserved.
3 // https://developers.google.com/protocol-buffers/
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 // * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 // * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following disclaimer
13 // in the documentation and/or other materials provided with the
14 // distribution.
15 // * Neither the name of Google Inc. nor the names of its
16 // contributors may be used to endorse or promote products derived from
17 // this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31 #include "protobuf.h"
32
33 #include <math.h>
34
35 #include <ruby/encoding.h>
36
37 // -----------------------------------------------------------------------------
38 // Ruby <-> native slot management.
39 // -----------------------------------------------------------------------------
40
41 #define CHARPTR_AT(msg, ofs) ((char*)msg + ofs)
42 #define DEREF_OFFSET(msg, ofs, type) *(type*)CHARPTR_AT(msg, ofs)
43 #define DEREF(memory, type) *(type*)(memory)
44
native_slot_size(upb_fieldtype_t type)45 size_t native_slot_size(upb_fieldtype_t type) {
46 switch (type) {
47 case UPB_TYPE_FLOAT: return 4;
48 case UPB_TYPE_DOUBLE: return 8;
49 case UPB_TYPE_BOOL: return 1;
50 case UPB_TYPE_STRING: return sizeof(VALUE);
51 case UPB_TYPE_BYTES: return sizeof(VALUE);
52 case UPB_TYPE_MESSAGE: return sizeof(VALUE);
53 case UPB_TYPE_ENUM: return 4;
54 case UPB_TYPE_INT32: return 4;
55 case UPB_TYPE_INT64: return 8;
56 case UPB_TYPE_UINT32: return 4;
57 case UPB_TYPE_UINT64: return 8;
58 default: return 0;
59 }
60 }
61
is_ruby_num(VALUE value)62 static bool is_ruby_num(VALUE value) {
63 return (TYPE(value) == T_FLOAT ||
64 TYPE(value) == T_FIXNUM ||
65 TYPE(value) == T_BIGNUM);
66 }
67
native_slot_check_int_range_precision(const char * name,upb_fieldtype_t type,VALUE val)68 void native_slot_check_int_range_precision(const char* name, upb_fieldtype_t type, VALUE val) {
69 if (!is_ruby_num(val)) {
70 rb_raise(cTypeError, "Expected number type for integral field '%s' (given %s).",
71 name, rb_class2name(CLASS_OF(val)));
72 }
73
74 // NUM2{INT,UINT,LL,ULL} macros do the appropriate range checks on upper
75 // bound; we just need to do precision checks (i.e., disallow rounding) and
76 // check for < 0 on unsigned types.
77 if (TYPE(val) == T_FLOAT) {
78 double dbl_val = NUM2DBL(val);
79 if (floor(dbl_val) != dbl_val) {
80 rb_raise(rb_eRangeError,
81 "Non-integral floating point value assigned to integer field '%s' (given %s).",
82 name, rb_class2name(CLASS_OF(val)));
83 }
84 }
85 if (type == UPB_TYPE_UINT32 || type == UPB_TYPE_UINT64) {
86 if (NUM2DBL(val) < 0) {
87 rb_raise(rb_eRangeError,
88 "Assigning negative value to unsigned integer field '%s' (given %s).",
89 name, rb_class2name(CLASS_OF(val)));
90 }
91 }
92 }
93
native_slot_encode_and_freeze_string(upb_fieldtype_t type,VALUE value)94 VALUE native_slot_encode_and_freeze_string(upb_fieldtype_t type, VALUE value) {
95 rb_encoding* desired_encoding = (type == UPB_TYPE_STRING) ?
96 kRubyStringUtf8Encoding : kRubyString8bitEncoding;
97 VALUE desired_encoding_value = rb_enc_from_encoding(desired_encoding);
98
99 // Note: this will not duplicate underlying string data unless necessary.
100 value = rb_str_encode(value, desired_encoding_value, 0, Qnil);
101
102 if (type == UPB_TYPE_STRING &&
103 rb_enc_str_coderange(value) == ENC_CODERANGE_BROKEN) {
104 rb_raise(rb_eEncodingError, "String is invalid UTF-8");
105 }
106
107 // Ensure the data remains valid. Since we called #encode a moment ago,
108 // this does not freeze the string the user assigned.
109 rb_obj_freeze(value);
110
111 return value;
112 }
113
native_slot_set(const char * name,upb_fieldtype_t type,VALUE type_class,void * memory,VALUE value)114 void native_slot_set(const char* name,
115 upb_fieldtype_t type, VALUE type_class,
116 void* memory, VALUE value) {
117 native_slot_set_value_and_case(name, type, type_class, memory, value, NULL, 0);
118 }
119
native_slot_set_value_and_case(const char * name,upb_fieldtype_t type,VALUE type_class,void * memory,VALUE value,uint32_t * case_memory,uint32_t case_number)120 void native_slot_set_value_and_case(const char* name,
121 upb_fieldtype_t type, VALUE type_class,
122 void* memory, VALUE value,
123 uint32_t* case_memory,
124 uint32_t case_number) {
125 // Note that in order to atomically change the value in memory and the case
126 // value (w.r.t. Ruby VM calls), we must set the value at |memory| only after
127 // all Ruby VM calls are complete. The case is then set at the bottom of this
128 // function.
129 switch (type) {
130 case UPB_TYPE_FLOAT:
131 if (!is_ruby_num(value)) {
132 rb_raise(cTypeError, "Expected number type for float field '%s' (given %s).",
133 name, rb_class2name(CLASS_OF(value)));
134 }
135 DEREF(memory, float) = NUM2DBL(value);
136 break;
137 case UPB_TYPE_DOUBLE:
138 if (!is_ruby_num(value)) {
139 rb_raise(cTypeError, "Expected number type for double field '%s' (given %s).",
140 name, rb_class2name(CLASS_OF(value)));
141 }
142 DEREF(memory, double) = NUM2DBL(value);
143 break;
144 case UPB_TYPE_BOOL: {
145 int8_t val = -1;
146 if (value == Qtrue) {
147 val = 1;
148 } else if (value == Qfalse) {
149 val = 0;
150 } else {
151 rb_raise(cTypeError, "Invalid argument for boolean field '%s' (given %s).",
152 name, rb_class2name(CLASS_OF(value)));
153 }
154 DEREF(memory, int8_t) = val;
155 break;
156 }
157 case UPB_TYPE_STRING:
158 if (CLASS_OF(value) == rb_cSymbol) {
159 value = rb_funcall(value, rb_intern("to_s"), 0);
160 } else if (CLASS_OF(value) != rb_cString) {
161 rb_raise(cTypeError, "Invalid argument for string field '%s' (given %s).",
162 name, rb_class2name(CLASS_OF(value)));
163 }
164
165 DEREF(memory, VALUE) = native_slot_encode_and_freeze_string(type, value);
166 break;
167
168 case UPB_TYPE_BYTES: {
169 if (CLASS_OF(value) != rb_cString) {
170 rb_raise(cTypeError, "Invalid argument for bytes field '%s' (given %s).",
171 name, rb_class2name(CLASS_OF(value)));
172 }
173
174 DEREF(memory, VALUE) = native_slot_encode_and_freeze_string(type, value);
175 break;
176 }
177 case UPB_TYPE_MESSAGE: {
178 if (CLASS_OF(value) == CLASS_OF(Qnil)) {
179 value = Qnil;
180 } else if (CLASS_OF(value) != type_class) {
181 // check for possible implicit conversions
182 VALUE converted_value = NULL;
183 char* field_type_name = rb_class2name(type_class);
184
185 if (strcmp(field_type_name, "Google::Protobuf::Timestamp") == 0 &&
186 rb_obj_is_kind_of(value, rb_cTime)) {
187 // Time -> Google::Protobuf::Timestamp
188 VALUE hash = rb_hash_new();
189 rb_hash_aset(hash, rb_str_new2("seconds"), rb_funcall(value, rb_intern("to_i"), 0));
190 rb_hash_aset(hash, rb_str_new2("nanos"), rb_funcall(value, rb_intern("nsec"), 0));
191 VALUE args[1] = { hash };
192 converted_value = rb_class_new_instance(1, args, type_class);
193 } else if (strcmp(field_type_name, "Google::Protobuf::Duration") == 0 &&
194 rb_obj_is_kind_of(value, rb_cNumeric)) {
195 // Numeric -> Google::Protobuf::Duration
196 VALUE hash = rb_hash_new();
197 rb_hash_aset(hash, rb_str_new2("seconds"), rb_funcall(value, rb_intern("to_i"), 0));
198 VALUE n_value = rb_funcall(value, rb_intern("remainder"), 1, INT2NUM(1));
199 n_value = rb_funcall(n_value, rb_intern("*"), 1, INT2NUM(1000000000));
200 n_value = rb_funcall(n_value, rb_intern("round"), 0);
201 rb_hash_aset(hash, rb_str_new2("nanos"), n_value);
202 VALUE args[1] = { hash };
203 converted_value = rb_class_new_instance(1, args, type_class);
204 }
205
206 // raise if no suitable conversaion could be found
207 if (converted_value == NULL) {
208 rb_raise(cTypeError,
209 "Invalid type %s to assign to submessage field '%s'.",
210 rb_class2name(CLASS_OF(value)), name);
211 } else {
212 value = converted_value;
213 }
214 }
215 DEREF(memory, VALUE) = value;
216 break;
217 }
218 case UPB_TYPE_ENUM: {
219 int32_t int_val = 0;
220 if (TYPE(value) == T_STRING) {
221 value = rb_funcall(value, rb_intern("to_sym"), 0);
222 } else if (!is_ruby_num(value) && TYPE(value) != T_SYMBOL) {
223 rb_raise(cTypeError,
224 "Expected number or symbol type for enum field '%s'.", name);
225 }
226 if (TYPE(value) == T_SYMBOL) {
227 // Ensure that the given symbol exists in the enum module.
228 VALUE lookup = rb_funcall(type_class, rb_intern("resolve"), 1, value);
229 if (lookup == Qnil) {
230 rb_raise(rb_eRangeError, "Unknown symbol value for enum field '%s'.", name);
231 } else {
232 int_val = NUM2INT(lookup);
233 }
234 } else {
235 native_slot_check_int_range_precision(name, UPB_TYPE_INT32, value);
236 int_val = NUM2INT(value);
237 }
238 DEREF(memory, int32_t) = int_val;
239 break;
240 }
241 case UPB_TYPE_INT32:
242 case UPB_TYPE_INT64:
243 case UPB_TYPE_UINT32:
244 case UPB_TYPE_UINT64:
245 native_slot_check_int_range_precision(name, type, value);
246 switch (type) {
247 case UPB_TYPE_INT32:
248 DEREF(memory, int32_t) = NUM2INT(value);
249 break;
250 case UPB_TYPE_INT64:
251 DEREF(memory, int64_t) = NUM2LL(value);
252 break;
253 case UPB_TYPE_UINT32:
254 DEREF(memory, uint32_t) = NUM2UINT(value);
255 break;
256 case UPB_TYPE_UINT64:
257 DEREF(memory, uint64_t) = NUM2ULL(value);
258 break;
259 default:
260 break;
261 }
262 break;
263 default:
264 break;
265 }
266
267 if (case_memory != NULL) {
268 *case_memory = case_number;
269 }
270 }
271
native_slot_get(upb_fieldtype_t type,VALUE type_class,const void * memory)272 VALUE native_slot_get(upb_fieldtype_t type,
273 VALUE type_class,
274 const void* memory) {
275 switch (type) {
276 case UPB_TYPE_FLOAT:
277 return DBL2NUM(DEREF(memory, float));
278 case UPB_TYPE_DOUBLE:
279 return DBL2NUM(DEREF(memory, double));
280 case UPB_TYPE_BOOL:
281 return DEREF(memory, int8_t) ? Qtrue : Qfalse;
282 case UPB_TYPE_STRING:
283 case UPB_TYPE_BYTES:
284 case UPB_TYPE_MESSAGE:
285 return DEREF(memory, VALUE);
286 case UPB_TYPE_ENUM: {
287 int32_t val = DEREF(memory, int32_t);
288 VALUE symbol = enum_lookup(type_class, INT2NUM(val));
289 if (symbol == Qnil) {
290 return INT2NUM(val);
291 } else {
292 return symbol;
293 }
294 }
295 case UPB_TYPE_INT32:
296 return INT2NUM(DEREF(memory, int32_t));
297 case UPB_TYPE_INT64:
298 return LL2NUM(DEREF(memory, int64_t));
299 case UPB_TYPE_UINT32:
300 return UINT2NUM(DEREF(memory, uint32_t));
301 case UPB_TYPE_UINT64:
302 return ULL2NUM(DEREF(memory, uint64_t));
303 default:
304 return Qnil;
305 }
306 }
307
native_slot_init(upb_fieldtype_t type,void * memory)308 void native_slot_init(upb_fieldtype_t type, void* memory) {
309 switch (type) {
310 case UPB_TYPE_FLOAT:
311 DEREF(memory, float) = 0.0;
312 break;
313 case UPB_TYPE_DOUBLE:
314 DEREF(memory, double) = 0.0;
315 break;
316 case UPB_TYPE_BOOL:
317 DEREF(memory, int8_t) = 0;
318 break;
319 case UPB_TYPE_STRING:
320 case UPB_TYPE_BYTES:
321 DEREF(memory, VALUE) = rb_str_new2("");
322 rb_enc_associate(DEREF(memory, VALUE), (type == UPB_TYPE_BYTES) ?
323 kRubyString8bitEncoding : kRubyStringUtf8Encoding);
324 break;
325 case UPB_TYPE_MESSAGE:
326 DEREF(memory, VALUE) = Qnil;
327 break;
328 case UPB_TYPE_ENUM:
329 case UPB_TYPE_INT32:
330 DEREF(memory, int32_t) = 0;
331 break;
332 case UPB_TYPE_INT64:
333 DEREF(memory, int64_t) = 0;
334 break;
335 case UPB_TYPE_UINT32:
336 DEREF(memory, uint32_t) = 0;
337 break;
338 case UPB_TYPE_UINT64:
339 DEREF(memory, uint64_t) = 0;
340 break;
341 default:
342 break;
343 }
344 }
345
native_slot_mark(upb_fieldtype_t type,void * memory)346 void native_slot_mark(upb_fieldtype_t type, void* memory) {
347 switch (type) {
348 case UPB_TYPE_STRING:
349 case UPB_TYPE_BYTES:
350 case UPB_TYPE_MESSAGE:
351 rb_gc_mark(DEREF(memory, VALUE));
352 break;
353 default:
354 break;
355 }
356 }
357
native_slot_dup(upb_fieldtype_t type,void * to,void * from)358 void native_slot_dup(upb_fieldtype_t type, void* to, void* from) {
359 memcpy(to, from, native_slot_size(type));
360 }
361
native_slot_deep_copy(upb_fieldtype_t type,void * to,void * from)362 void native_slot_deep_copy(upb_fieldtype_t type, void* to, void* from) {
363 switch (type) {
364 case UPB_TYPE_STRING:
365 case UPB_TYPE_BYTES: {
366 VALUE from_val = DEREF(from, VALUE);
367 DEREF(to, VALUE) = (from_val != Qnil) ?
368 rb_funcall(from_val, rb_intern("dup"), 0) : Qnil;
369 break;
370 }
371 case UPB_TYPE_MESSAGE: {
372 VALUE from_val = DEREF(from, VALUE);
373 DEREF(to, VALUE) = (from_val != Qnil) ?
374 Message_deep_copy(from_val) : Qnil;
375 break;
376 }
377 default:
378 memcpy(to, from, native_slot_size(type));
379 }
380 }
381
native_slot_eq(upb_fieldtype_t type,void * mem1,void * mem2)382 bool native_slot_eq(upb_fieldtype_t type, void* mem1, void* mem2) {
383 switch (type) {
384 case UPB_TYPE_STRING:
385 case UPB_TYPE_BYTES:
386 case UPB_TYPE_MESSAGE: {
387 VALUE val1 = DEREF(mem1, VALUE);
388 VALUE val2 = DEREF(mem2, VALUE);
389 VALUE ret = rb_funcall(val1, rb_intern("=="), 1, val2);
390 return ret == Qtrue;
391 }
392 default:
393 return !memcmp(mem1, mem2, native_slot_size(type));
394 }
395 }
396
397 // -----------------------------------------------------------------------------
398 // Map field utilities.
399 // -----------------------------------------------------------------------------
400
tryget_map_entry_msgdef(const upb_fielddef * field)401 const upb_msgdef* tryget_map_entry_msgdef(const upb_fielddef* field) {
402 const upb_msgdef* subdef;
403 if (upb_fielddef_label(field) != UPB_LABEL_REPEATED ||
404 upb_fielddef_type(field) != UPB_TYPE_MESSAGE) {
405 return NULL;
406 }
407 subdef = upb_fielddef_msgsubdef(field);
408 return upb_msgdef_mapentry(subdef) ? subdef : NULL;
409 }
410
map_entry_msgdef(const upb_fielddef * field)411 const upb_msgdef *map_entry_msgdef(const upb_fielddef* field) {
412 const upb_msgdef* subdef = tryget_map_entry_msgdef(field);
413 assert(subdef);
414 return subdef;
415 }
416
is_map_field(const upb_fielddef * field)417 bool is_map_field(const upb_fielddef *field) {
418 const upb_msgdef* subdef = tryget_map_entry_msgdef(field);
419 if (subdef == NULL) return false;
420
421 // Map fields are a proto3 feature.
422 // If we're using proto2 syntax we need to fallback to the repeated field.
423 return upb_msgdef_syntax(subdef) == UPB_SYNTAX_PROTO3;
424 }
425
map_field_key(const upb_fielddef * field)426 const upb_fielddef* map_field_key(const upb_fielddef* field) {
427 const upb_msgdef* subdef = map_entry_msgdef(field);
428 return map_entry_key(subdef);
429 }
430
map_field_value(const upb_fielddef * field)431 const upb_fielddef* map_field_value(const upb_fielddef* field) {
432 const upb_msgdef* subdef = map_entry_msgdef(field);
433 return map_entry_value(subdef);
434 }
435
map_entry_key(const upb_msgdef * msgdef)436 const upb_fielddef* map_entry_key(const upb_msgdef* msgdef) {
437 const upb_fielddef* key_field = upb_msgdef_itof(msgdef, MAP_KEY_FIELD);
438 assert(key_field != NULL);
439 return key_field;
440 }
441
map_entry_value(const upb_msgdef * msgdef)442 const upb_fielddef* map_entry_value(const upb_msgdef* msgdef) {
443 const upb_fielddef* value_field = upb_msgdef_itof(msgdef, MAP_VALUE_FIELD);
444 assert(value_field != NULL);
445 return value_field;
446 }
447
448 // -----------------------------------------------------------------------------
449 // Memory layout management.
450 // -----------------------------------------------------------------------------
451
field_contains_hasbit(MessageLayout * layout,const upb_fielddef * field)452 bool field_contains_hasbit(MessageLayout* layout,
453 const upb_fielddef* field) {
454 return layout->fields[upb_fielddef_index(field)].hasbit !=
455 MESSAGE_FIELD_NO_HASBIT;
456 }
457
align_up_to(size_t offset,size_t granularity)458 static size_t align_up_to(size_t offset, size_t granularity) {
459 // Granularity must be a power of two.
460 return (offset + granularity - 1) & ~(granularity - 1);
461 }
462
create_layout(const upb_msgdef * msgdef)463 MessageLayout* create_layout(const upb_msgdef* msgdef) {
464 MessageLayout* layout = ALLOC(MessageLayout);
465 int nfields = upb_msgdef_numfields(msgdef);
466 upb_msg_field_iter it;
467 upb_msg_oneof_iter oit;
468 size_t off = 0;
469
470 layout->fields = ALLOC_N(MessageField, nfields);
471
472 size_t hasbit = 0;
473 for (upb_msg_field_begin(&it, msgdef);
474 !upb_msg_field_done(&it);
475 upb_msg_field_next(&it)) {
476 const upb_fielddef* field = upb_msg_iter_field(&it);
477 if (upb_fielddef_haspresence(field)) {
478 layout->fields[upb_fielddef_index(field)].hasbit = hasbit++;
479 } else {
480 layout->fields[upb_fielddef_index(field)].hasbit =
481 MESSAGE_FIELD_NO_HASBIT;
482 }
483 }
484
485 if (hasbit != 0) {
486 off += (hasbit + 8 - 1) / 8;
487 }
488
489 for (upb_msg_field_begin(&it, msgdef);
490 !upb_msg_field_done(&it);
491 upb_msg_field_next(&it)) {
492 const upb_fielddef* field = upb_msg_iter_field(&it);
493 size_t field_size;
494
495 if (upb_fielddef_containingoneof(field)) {
496 // Oneofs are handled separately below.
497 continue;
498 }
499
500 // Allocate |field_size| bytes for this field in the layout.
501 field_size = 0;
502 if (upb_fielddef_label(field) == UPB_LABEL_REPEATED) {
503 field_size = sizeof(VALUE);
504 } else {
505 field_size = native_slot_size(upb_fielddef_type(field));
506 }
507 // Align current offset up to |size| granularity.
508 off = align_up_to(off, field_size);
509 layout->fields[upb_fielddef_index(field)].offset = off;
510 layout->fields[upb_fielddef_index(field)].case_offset =
511 MESSAGE_FIELD_NO_CASE;
512 off += field_size;
513 }
514
515 // Handle oneofs now -- we iterate over oneofs specifically and allocate only
516 // one slot per oneof.
517 //
518 // We assign all value slots first, then pack the 'case' fields at the end,
519 // since in the common case (modern 64-bit platform) these are 8 bytes and 4
520 // bytes respectively and we want to avoid alignment overhead.
521 //
522 // Note that we reserve 4 bytes (a uint32) per 'case' slot because the value
523 // space for oneof cases is conceptually as wide as field tag numbers. In
524 // practice, it's unlikely that a oneof would have more than e.g. 256 or 64K
525 // members (8 or 16 bits respectively), so conceivably we could assign
526 // consecutive case numbers and then pick a smaller oneof case slot size, but
527 // the complexity to implement this indirection is probably not worthwhile.
528 for (upb_msg_oneof_begin(&oit, msgdef);
529 !upb_msg_oneof_done(&oit);
530 upb_msg_oneof_next(&oit)) {
531 const upb_oneofdef* oneof = upb_msg_iter_oneof(&oit);
532 upb_oneof_iter fit;
533
534 // Always allocate NATIVE_SLOT_MAX_SIZE bytes, but share the slot between
535 // all fields.
536 size_t field_size = NATIVE_SLOT_MAX_SIZE;
537 // Align the offset.
538 off = align_up_to(off, field_size);
539 // Assign all fields in the oneof this same offset.
540 for (upb_oneof_begin(&fit, oneof);
541 !upb_oneof_done(&fit);
542 upb_oneof_next(&fit)) {
543 const upb_fielddef* field = upb_oneof_iter_field(&fit);
544 layout->fields[upb_fielddef_index(field)].offset = off;
545 }
546 off += field_size;
547 }
548
549 // Now the case fields.
550 for (upb_msg_oneof_begin(&oit, msgdef);
551 !upb_msg_oneof_done(&oit);
552 upb_msg_oneof_next(&oit)) {
553 const upb_oneofdef* oneof = upb_msg_iter_oneof(&oit);
554 upb_oneof_iter fit;
555
556 size_t field_size = sizeof(uint32_t);
557 // Align the offset.
558 off = (off + field_size - 1) & ~(field_size - 1);
559 // Assign all fields in the oneof this same offset.
560 for (upb_oneof_begin(&fit, oneof);
561 !upb_oneof_done(&fit);
562 upb_oneof_next(&fit)) {
563 const upb_fielddef* field = upb_oneof_iter_field(&fit);
564 layout->fields[upb_fielddef_index(field)].case_offset = off;
565 }
566 off += field_size;
567 }
568
569 layout->size = off;
570
571 layout->msgdef = msgdef;
572 upb_msgdef_ref(layout->msgdef, &layout->msgdef);
573
574 return layout;
575 }
576
free_layout(MessageLayout * layout)577 void free_layout(MessageLayout* layout) {
578 xfree(layout->fields);
579 upb_msgdef_unref(layout->msgdef, &layout->msgdef);
580 xfree(layout);
581 }
582
field_type_class(const upb_fielddef * field)583 VALUE field_type_class(const upb_fielddef* field) {
584 VALUE type_class = Qnil;
585 if (upb_fielddef_type(field) == UPB_TYPE_MESSAGE) {
586 VALUE submsgdesc =
587 get_def_obj(upb_fielddef_subdef(field));
588 type_class = Descriptor_msgclass(submsgdesc);
589 } else if (upb_fielddef_type(field) == UPB_TYPE_ENUM) {
590 VALUE subenumdesc =
591 get_def_obj(upb_fielddef_subdef(field));
592 type_class = EnumDescriptor_enummodule(subenumdesc);
593 }
594 return type_class;
595 }
596
slot_memory(MessageLayout * layout,const void * storage,const upb_fielddef * field)597 static void* slot_memory(MessageLayout* layout,
598 const void* storage,
599 const upb_fielddef* field) {
600 return ((uint8_t *)storage) +
601 layout->fields[upb_fielddef_index(field)].offset;
602 }
603
slot_oneof_case(MessageLayout * layout,const void * storage,const upb_fielddef * field)604 static uint32_t* slot_oneof_case(MessageLayout* layout,
605 const void* storage,
606 const upb_fielddef* field) {
607 return (uint32_t *)(((uint8_t *)storage) +
608 layout->fields[upb_fielddef_index(field)].case_offset);
609 }
610
slot_set_hasbit(MessageLayout * layout,const void * storage,const upb_fielddef * field)611 static void slot_set_hasbit(MessageLayout* layout,
612 const void* storage,
613 const upb_fielddef* field) {
614 size_t hasbit = layout->fields[upb_fielddef_index(field)].hasbit;
615 assert(hasbit != MESSAGE_FIELD_NO_HASBIT);
616
617 ((uint8_t*)storage)[hasbit / 8] |= 1 << (hasbit % 8);
618 }
619
slot_clear_hasbit(MessageLayout * layout,const void * storage,const upb_fielddef * field)620 static void slot_clear_hasbit(MessageLayout* layout,
621 const void* storage,
622 const upb_fielddef* field) {
623 size_t hasbit = layout->fields[upb_fielddef_index(field)].hasbit;
624 assert(hasbit != MESSAGE_FIELD_NO_HASBIT);
625 ((uint8_t*)storage)[hasbit / 8] &= ~(1 << (hasbit % 8));
626 }
627
slot_is_hasbit_set(MessageLayout * layout,const void * storage,const upb_fielddef * field)628 static bool slot_is_hasbit_set(MessageLayout* layout,
629 const void* storage,
630 const upb_fielddef* field) {
631 size_t hasbit = layout->fields[upb_fielddef_index(field)].hasbit;
632 if (hasbit == MESSAGE_FIELD_NO_HASBIT) {
633 return false;
634 }
635
636 return DEREF_OFFSET(
637 (uint8_t*)storage, hasbit / 8, char) & (1 << (hasbit % 8));
638 }
639
layout_has(MessageLayout * layout,const void * storage,const upb_fielddef * field)640 VALUE layout_has(MessageLayout* layout,
641 const void* storage,
642 const upb_fielddef* field) {
643 assert(field_contains_hasbit(layout, field));
644 return slot_is_hasbit_set(layout, storage, field) ? Qtrue : Qfalse;
645 }
646
layout_clear(MessageLayout * layout,const void * storage,const upb_fielddef * field)647 void layout_clear(MessageLayout* layout,
648 const void* storage,
649 const upb_fielddef* field) {
650 void* memory = slot_memory(layout, storage, field);
651 uint32_t* oneof_case = slot_oneof_case(layout, storage, field);
652
653 if (field_contains_hasbit(layout, field)) {
654 slot_clear_hasbit(layout, storage, field);
655 }
656
657 if (upb_fielddef_containingoneof(field)) {
658 memset(memory, 0, NATIVE_SLOT_MAX_SIZE);
659 *oneof_case = ONEOF_CASE_NONE;
660 } else if (is_map_field(field)) {
661 VALUE map = Qnil;
662
663 const upb_fielddef* key_field = map_field_key(field);
664 const upb_fielddef* value_field = map_field_value(field);
665 VALUE type_class = field_type_class(value_field);
666
667 if (type_class != Qnil) {
668 VALUE args[3] = {
669 fieldtype_to_ruby(upb_fielddef_type(key_field)),
670 fieldtype_to_ruby(upb_fielddef_type(value_field)),
671 type_class,
672 };
673 map = rb_class_new_instance(3, args, cMap);
674 } else {
675 VALUE args[2] = {
676 fieldtype_to_ruby(upb_fielddef_type(key_field)),
677 fieldtype_to_ruby(upb_fielddef_type(value_field)),
678 };
679 map = rb_class_new_instance(2, args, cMap);
680 }
681
682 DEREF(memory, VALUE) = map;
683 } else if (upb_fielddef_label(field) == UPB_LABEL_REPEATED) {
684 VALUE ary = Qnil;
685
686 VALUE type_class = field_type_class(field);
687
688 if (type_class != Qnil) {
689 VALUE args[2] = {
690 fieldtype_to_ruby(upb_fielddef_type(field)),
691 type_class,
692 };
693 ary = rb_class_new_instance(2, args, cRepeatedField);
694 } else {
695 VALUE args[1] = { fieldtype_to_ruby(upb_fielddef_type(field)) };
696 ary = rb_class_new_instance(1, args, cRepeatedField);
697 }
698
699 DEREF(memory, VALUE) = ary;
700 } else {
701 native_slot_set(upb_fielddef_name(field),
702 upb_fielddef_type(field), field_type_class(field),
703 memory, layout_get_default(field));
704 }
705 }
706
layout_get_default(const upb_fielddef * field)707 VALUE layout_get_default(const upb_fielddef *field) {
708 switch (upb_fielddef_type(field)) {
709 case UPB_TYPE_FLOAT: return DBL2NUM(upb_fielddef_defaultfloat(field));
710 case UPB_TYPE_DOUBLE: return DBL2NUM(upb_fielddef_defaultdouble(field));
711 case UPB_TYPE_BOOL:
712 return upb_fielddef_defaultbool(field) ? Qtrue : Qfalse;
713 case UPB_TYPE_MESSAGE: return Qnil;
714 case UPB_TYPE_ENUM: {
715 const upb_enumdef *enumdef = upb_fielddef_enumsubdef(field);
716 int32_t num = upb_fielddef_defaultint32(field);
717 const char *label = upb_enumdef_iton(enumdef, num);
718 if (label) {
719 return ID2SYM(rb_intern(label));
720 } else {
721 return INT2NUM(num);
722 }
723 }
724 case UPB_TYPE_INT32: return INT2NUM(upb_fielddef_defaultint32(field));
725 case UPB_TYPE_INT64: return LL2NUM(upb_fielddef_defaultint64(field));;
726 case UPB_TYPE_UINT32: return UINT2NUM(upb_fielddef_defaultuint32(field));
727 case UPB_TYPE_UINT64: return ULL2NUM(upb_fielddef_defaultuint64(field));
728 case UPB_TYPE_STRING:
729 case UPB_TYPE_BYTES: {
730 size_t size;
731 const char *str = upb_fielddef_defaultstr(field, &size);
732 VALUE str_rb = rb_str_new(str, size);
733
734 rb_enc_associate(str_rb, (upb_fielddef_type(field) == UPB_TYPE_BYTES) ?
735 kRubyString8bitEncoding : kRubyStringUtf8Encoding);
736 rb_obj_freeze(str_rb);
737 return str_rb;
738 }
739 default: return Qnil;
740 }
741 }
742
layout_get(MessageLayout * layout,const void * storage,const upb_fielddef * field)743 VALUE layout_get(MessageLayout* layout,
744 const void* storage,
745 const upb_fielddef* field) {
746 void* memory = slot_memory(layout, storage, field);
747 uint32_t* oneof_case = slot_oneof_case(layout, storage, field);
748
749 bool field_set;
750 if (field_contains_hasbit(layout, field)) {
751 field_set = slot_is_hasbit_set(layout, storage, field);
752 } else {
753 field_set = true;
754 }
755
756 if (upb_fielddef_containingoneof(field)) {
757 if (*oneof_case != upb_fielddef_number(field)) {
758 return layout_get_default(field);
759 }
760 return native_slot_get(upb_fielddef_type(field),
761 field_type_class(field),
762 memory);
763 } else if (upb_fielddef_label(field) == UPB_LABEL_REPEATED) {
764 return *((VALUE *)memory);
765 } else if (!field_set) {
766 return layout_get_default(field);
767 } else {
768 return native_slot_get(upb_fielddef_type(field),
769 field_type_class(field),
770 memory);
771 }
772 }
773
check_repeated_field_type(VALUE val,const upb_fielddef * field)774 static void check_repeated_field_type(VALUE val, const upb_fielddef* field) {
775 RepeatedField* self;
776 assert(upb_fielddef_label(field) == UPB_LABEL_REPEATED);
777
778 if (!RB_TYPE_P(val, T_DATA) || !RTYPEDDATA_P(val) ||
779 RTYPEDDATA_TYPE(val) != &RepeatedField_type) {
780 rb_raise(cTypeError, "Expected repeated field array");
781 }
782
783 self = ruby_to_RepeatedField(val);
784 if (self->field_type != upb_fielddef_type(field)) {
785 rb_raise(cTypeError, "Repeated field array has wrong element type");
786 }
787
788 if (self->field_type == UPB_TYPE_MESSAGE) {
789 if (self->field_type_class !=
790 Descriptor_msgclass(get_def_obj(upb_fielddef_subdef(field)))) {
791 rb_raise(cTypeError,
792 "Repeated field array has wrong message class");
793 }
794 }
795
796
797 if (self->field_type == UPB_TYPE_ENUM) {
798 if (self->field_type_class !=
799 EnumDescriptor_enummodule(get_def_obj(upb_fielddef_subdef(field)))) {
800 rb_raise(cTypeError,
801 "Repeated field array has wrong enum class");
802 }
803 }
804 }
805
check_map_field_type(VALUE val,const upb_fielddef * field)806 static void check_map_field_type(VALUE val, const upb_fielddef* field) {
807 const upb_fielddef* key_field = map_field_key(field);
808 const upb_fielddef* value_field = map_field_value(field);
809 Map* self;
810
811 if (!RB_TYPE_P(val, T_DATA) || !RTYPEDDATA_P(val) ||
812 RTYPEDDATA_TYPE(val) != &Map_type) {
813 rb_raise(cTypeError, "Expected Map instance");
814 }
815
816 self = ruby_to_Map(val);
817 if (self->key_type != upb_fielddef_type(key_field)) {
818 rb_raise(cTypeError, "Map key type does not match field's key type");
819 }
820 if (self->value_type != upb_fielddef_type(value_field)) {
821 rb_raise(cTypeError, "Map value type does not match field's value type");
822 }
823 if (upb_fielddef_type(value_field) == UPB_TYPE_MESSAGE ||
824 upb_fielddef_type(value_field) == UPB_TYPE_ENUM) {
825 if (self->value_type_class !=
826 get_def_obj(upb_fielddef_subdef(value_field))) {
827 rb_raise(cTypeError,
828 "Map value type has wrong message/enum class");
829 }
830 }
831 }
832
833
layout_set(MessageLayout * layout,void * storage,const upb_fielddef * field,VALUE val)834 void layout_set(MessageLayout* layout,
835 void* storage,
836 const upb_fielddef* field,
837 VALUE val) {
838 void* memory = slot_memory(layout, storage, field);
839 uint32_t* oneof_case = slot_oneof_case(layout, storage, field);
840
841 if (upb_fielddef_containingoneof(field)) {
842 if (val == Qnil) {
843 // Assigning nil to a oneof field clears the oneof completely.
844 *oneof_case = ONEOF_CASE_NONE;
845 memset(memory, 0, NATIVE_SLOT_MAX_SIZE);
846 } else {
847 // The transition between field types for a single oneof (union) slot is
848 // somewhat complex because we need to ensure that a GC triggered at any
849 // point by a call into the Ruby VM sees a valid state for this field and
850 // does not either go off into the weeds (following what it thinks is a
851 // VALUE but is actually a different field type) or miss an object (seeing
852 // what it thinks is a primitive field but is actually a VALUE for the new
853 // field type).
854 //
855 // In order for the transition to be safe, the oneof case slot must be in
856 // sync with the value slot whenever the Ruby VM has been called. Thus, we
857 // use native_slot_set_value_and_case(), which ensures that both the value
858 // and case number are altered atomically (w.r.t. the Ruby VM).
859 native_slot_set_value_and_case(
860 upb_fielddef_name(field),
861 upb_fielddef_type(field), field_type_class(field),
862 memory, val,
863 oneof_case, upb_fielddef_number(field));
864 }
865 } else if (is_map_field(field)) {
866 check_map_field_type(val, field);
867 DEREF(memory, VALUE) = val;
868 } else if (upb_fielddef_label(field) == UPB_LABEL_REPEATED) {
869 check_repeated_field_type(val, field);
870 DEREF(memory, VALUE) = val;
871 } else {
872 native_slot_set(upb_fielddef_name(field),
873 upb_fielddef_type(field), field_type_class(field),
874 memory, val);
875 }
876
877 if (layout->fields[upb_fielddef_index(field)].hasbit !=
878 MESSAGE_FIELD_NO_HASBIT) {
879 slot_set_hasbit(layout, storage, field);
880 }
881 }
882
layout_init(MessageLayout * layout,void * storage)883 void layout_init(MessageLayout* layout,
884 void* storage) {
885
886 upb_msg_field_iter it;
887 for (upb_msg_field_begin(&it, layout->msgdef);
888 !upb_msg_field_done(&it);
889 upb_msg_field_next(&it)) {
890 layout_clear(layout, storage, upb_msg_iter_field(&it));
891 }
892 }
893
layout_mark(MessageLayout * layout,void * storage)894 void layout_mark(MessageLayout* layout, void* storage) {
895 upb_msg_field_iter it;
896 for (upb_msg_field_begin(&it, layout->msgdef);
897 !upb_msg_field_done(&it);
898 upb_msg_field_next(&it)) {
899 const upb_fielddef* field = upb_msg_iter_field(&it);
900 void* memory = slot_memory(layout, storage, field);
901 uint32_t* oneof_case = slot_oneof_case(layout, storage, field);
902
903 if (upb_fielddef_containingoneof(field)) {
904 if (*oneof_case == upb_fielddef_number(field)) {
905 native_slot_mark(upb_fielddef_type(field), memory);
906 }
907 } else if (upb_fielddef_label(field) == UPB_LABEL_REPEATED) {
908 rb_gc_mark(DEREF(memory, VALUE));
909 } else {
910 native_slot_mark(upb_fielddef_type(field), memory);
911 }
912 }
913 }
914
layout_dup(MessageLayout * layout,void * to,void * from)915 void layout_dup(MessageLayout* layout, void* to, void* from) {
916 upb_msg_field_iter it;
917 for (upb_msg_field_begin(&it, layout->msgdef);
918 !upb_msg_field_done(&it);
919 upb_msg_field_next(&it)) {
920 const upb_fielddef* field = upb_msg_iter_field(&it);
921
922 void* to_memory = slot_memory(layout, to, field);
923 uint32_t* to_oneof_case = slot_oneof_case(layout, to, field);
924 void* from_memory = slot_memory(layout, from, field);
925 uint32_t* from_oneof_case = slot_oneof_case(layout, from, field);
926
927 if (upb_fielddef_containingoneof(field)) {
928 if (*from_oneof_case == upb_fielddef_number(field)) {
929 *to_oneof_case = *from_oneof_case;
930 native_slot_dup(upb_fielddef_type(field), to_memory, from_memory);
931 }
932 } else if (is_map_field(field)) {
933 DEREF(to_memory, VALUE) = Map_dup(DEREF(from_memory, VALUE));
934 } else if (upb_fielddef_label(field) == UPB_LABEL_REPEATED) {
935 DEREF(to_memory, VALUE) = RepeatedField_dup(DEREF(from_memory, VALUE));
936 } else {
937 if (field_contains_hasbit(layout, field)) {
938 if (!slot_is_hasbit_set(layout, from, field)) continue;
939 slot_set_hasbit(layout, to, field);
940 }
941
942 native_slot_dup(upb_fielddef_type(field), to_memory, from_memory);
943 }
944 }
945 }
946
layout_deep_copy(MessageLayout * layout,void * to,void * from)947 void layout_deep_copy(MessageLayout* layout, void* to, void* from) {
948 upb_msg_field_iter it;
949 for (upb_msg_field_begin(&it, layout->msgdef);
950 !upb_msg_field_done(&it);
951 upb_msg_field_next(&it)) {
952 const upb_fielddef* field = upb_msg_iter_field(&it);
953
954 void* to_memory = slot_memory(layout, to, field);
955 uint32_t* to_oneof_case = slot_oneof_case(layout, to, field);
956 void* from_memory = slot_memory(layout, from, field);
957 uint32_t* from_oneof_case = slot_oneof_case(layout, from, field);
958
959 if (upb_fielddef_containingoneof(field)) {
960 if (*from_oneof_case == upb_fielddef_number(field)) {
961 *to_oneof_case = *from_oneof_case;
962 native_slot_deep_copy(upb_fielddef_type(field), to_memory, from_memory);
963 }
964 } else if (is_map_field(field)) {
965 DEREF(to_memory, VALUE) =
966 Map_deep_copy(DEREF(from_memory, VALUE));
967 } else if (upb_fielddef_label(field) == UPB_LABEL_REPEATED) {
968 DEREF(to_memory, VALUE) =
969 RepeatedField_deep_copy(DEREF(from_memory, VALUE));
970 } else {
971 if (field_contains_hasbit(layout, field)) {
972 if (!slot_is_hasbit_set(layout, from, field)) continue;
973 slot_set_hasbit(layout, to, field);
974 }
975
976 native_slot_deep_copy(upb_fielddef_type(field), to_memory, from_memory);
977 }
978 }
979 }
980
layout_eq(MessageLayout * layout,void * msg1,void * msg2)981 VALUE layout_eq(MessageLayout* layout, void* msg1, void* msg2) {
982 upb_msg_field_iter it;
983 for (upb_msg_field_begin(&it, layout->msgdef);
984 !upb_msg_field_done(&it);
985 upb_msg_field_next(&it)) {
986 const upb_fielddef* field = upb_msg_iter_field(&it);
987
988 void* msg1_memory = slot_memory(layout, msg1, field);
989 uint32_t* msg1_oneof_case = slot_oneof_case(layout, msg1, field);
990 void* msg2_memory = slot_memory(layout, msg2, field);
991 uint32_t* msg2_oneof_case = slot_oneof_case(layout, msg2, field);
992
993 if (upb_fielddef_containingoneof(field)) {
994 if (*msg1_oneof_case != *msg2_oneof_case ||
995 (*msg1_oneof_case == upb_fielddef_number(field) &&
996 !native_slot_eq(upb_fielddef_type(field),
997 msg1_memory,
998 msg2_memory))) {
999 return Qfalse;
1000 }
1001 } else if (is_map_field(field)) {
1002 if (!Map_eq(DEREF(msg1_memory, VALUE),
1003 DEREF(msg2_memory, VALUE))) {
1004 return Qfalse;
1005 }
1006 } else if (upb_fielddef_label(field) == UPB_LABEL_REPEATED) {
1007 if (!RepeatedField_eq(DEREF(msg1_memory, VALUE),
1008 DEREF(msg2_memory, VALUE))) {
1009 return Qfalse;
1010 }
1011 } else {
1012 if (slot_is_hasbit_set(layout, msg1, field) !=
1013 slot_is_hasbit_set(layout, msg2, field) ||
1014 !native_slot_eq(upb_fielddef_type(field),
1015 msg1_memory, msg2_memory)) {
1016 return Qfalse;
1017 }
1018 }
1019 }
1020 return Qtrue;
1021 }
1022
layout_hash(MessageLayout * layout,void * storage)1023 VALUE layout_hash(MessageLayout* layout, void* storage) {
1024 upb_msg_field_iter it;
1025 st_index_t h = rb_hash_start(0);
1026 VALUE hash_sym = rb_intern("hash");
1027 for (upb_msg_field_begin(&it, layout->msgdef);
1028 !upb_msg_field_done(&it);
1029 upb_msg_field_next(&it)) {
1030 const upb_fielddef* field = upb_msg_iter_field(&it);
1031 VALUE field_val = layout_get(layout, storage, field);
1032 h = rb_hash_uint(h, NUM2LONG(rb_funcall(field_val, hash_sym, 0)));
1033 }
1034 h = rb_hash_end(h);
1035
1036 return INT2FIX(h);
1037 }
1038
layout_inspect(MessageLayout * layout,void * storage)1039 VALUE layout_inspect(MessageLayout* layout, void* storage) {
1040 VALUE str = rb_str_new2("");
1041
1042 upb_msg_field_iter it;
1043 bool first = true;
1044 for (upb_msg_field_begin(&it, layout->msgdef);
1045 !upb_msg_field_done(&it);
1046 upb_msg_field_next(&it)) {
1047 const upb_fielddef* field = upb_msg_iter_field(&it);
1048 VALUE field_val = layout_get(layout, storage, field);
1049
1050 if (!first) {
1051 str = rb_str_cat2(str, ", ");
1052 } else {
1053 first = false;
1054 }
1055 str = rb_str_cat2(str, upb_fielddef_name(field));
1056 str = rb_str_cat2(str, ": ");
1057
1058 str = rb_str_append(str, rb_funcall(field_val, rb_intern("inspect"), 0));
1059 }
1060
1061 return str;
1062 }
1063