• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc.  All rights reserved.
3 //
4 // Use of this source code is governed by a BSD-style
5 // license that can be found in the LICENSE file or at
6 // https://developers.google.com/open-source/licenses/bsd
7 
8 // -----------------------------------------------------------------------------
9 // Ruby <-> upb data conversion functions.
10 //
11 // This file Also contains a few other assorted algorithms on upb_MessageValue.
12 //
13 // None of the algorithms in this file require any access to the internal
14 // representation of Ruby or upb objects.
15 // -----------------------------------------------------------------------------
16 
17 #include "convert.h"
18 
19 #include "message.h"
20 #include "protobuf.h"
21 #include "shared_convert.h"
22 
Convert_StringData(VALUE str,upb_Arena * arena)23 static upb_StringView Convert_StringData(VALUE str, upb_Arena* arena) {
24   upb_StringView ret;
25   if (arena) {
26     char* ptr = upb_Arena_Malloc(arena, RSTRING_LEN(str));
27     memcpy(ptr, RSTRING_PTR(str), RSTRING_LEN(str));
28     ret.data = ptr;
29   } else {
30     // Data is only needed temporarily (within map lookup).
31     ret.data = RSTRING_PTR(str);
32   }
33   ret.size = RSTRING_LEN(str);
34   return ret;
35 }
36 
is_ruby_num(VALUE value)37 static bool is_ruby_num(VALUE value) {
38   return (TYPE(value) == T_FLOAT || TYPE(value) == T_FIXNUM ||
39           TYPE(value) == T_BIGNUM);
40 }
41 
Convert_CheckInt(const char * name,upb_CType type,VALUE val)42 static void Convert_CheckInt(const char* name, upb_CType type, VALUE val) {
43   if (!is_ruby_num(val)) {
44     rb_raise(cTypeError,
45              "Expected number type for integral field '%s' (given %s).", name,
46              rb_class2name(CLASS_OF(val)));
47   }
48 
49   // NUM2{INT,UINT,LL,ULL} macros do the appropriate range checks on upper
50   // bound; we just need to do precision checks (i.e., disallow rounding) and
51   // check for < 0 on unsigned types.
52   if (TYPE(val) == T_FLOAT) {
53     double dbl_val = NUM2DBL(val);
54     if (floor(dbl_val) != dbl_val) {
55       rb_raise(rb_eRangeError,
56                "Non-integral floating point value assigned to integer field "
57                "'%s' (given %s).",
58                name, rb_class2name(CLASS_OF(val)));
59     }
60   }
61   if (type == kUpb_CType_UInt32 || type == kUpb_CType_UInt64) {
62     if (NUM2DBL(val) < 0) {
63       rb_raise(
64           rb_eRangeError,
65           "Assigning negative value to unsigned integer field '%s' (given %s).",
66           name, rb_class2name(CLASS_OF(val)));
67     }
68   }
69 }
70 
Convert_ToEnum(VALUE value,const char * name,const upb_EnumDef * e)71 static int32_t Convert_ToEnum(VALUE value, const char* name,
72                               const upb_EnumDef* e) {
73   int32_t val;
74 
75   switch (TYPE(value)) {
76     case T_FLOAT:
77     case T_FIXNUM:
78     case T_BIGNUM:
79       Convert_CheckInt(name, kUpb_CType_Int32, value);
80       val = NUM2INT(value);
81       break;
82     case T_STRING: {
83       const upb_EnumValueDef* ev = upb_EnumDef_FindValueByNameWithSize(
84           e, RSTRING_PTR(value), RSTRING_LEN(value));
85       if (!ev) goto unknownval;
86       val = upb_EnumValueDef_Number(ev);
87       break;
88     }
89     case T_SYMBOL: {
90       const upb_EnumValueDef* ev =
91           upb_EnumDef_FindValueByName(e, rb_id2name(SYM2ID(value)));
92       if (!ev) goto unknownval;
93       val = upb_EnumValueDef_Number(ev);
94       break;
95     }
96     default:
97       rb_raise(cTypeError,
98                "Expected number or symbol type for enum field '%s'.", name);
99   }
100 
101   return val;
102 
103 unknownval:
104   rb_raise(rb_eRangeError, "Unknown symbol value for enum field '%s'.", name);
105 }
106 
Convert_CheckStringUtf8(VALUE str)107 VALUE Convert_CheckStringUtf8(VALUE str) {
108   VALUE utf8 = rb_enc_from_encoding(rb_utf8_encoding());
109 
110   if (rb_obj_encoding(str) == utf8) {
111     // Note: Just because a string is marked as having UTF-8 encoding does
112     // not mean that it is *valid* UTF-8.  We have to check separately
113     // whether it is valid.
114     if (rb_enc_str_coderange(str) == ENC_CODERANGE_BROKEN) {
115       // TODO: For now
116       // we only warn for this case.  We will remove the warning and throw an
117       // exception below in the 30.x release
118 
119       rb_warn(
120           "String is invalid UTF-8. This will be an error in a future "
121           "version.");
122       // VALUE exc = rb_const_get_at(
123       //     rb_cEncoding, rb_intern("InvalidByteSequenceError"));
124       // rb_raise(exc, "String is invalid UTF-8");
125     }
126   } else {
127     // Note: this will not duplicate underlying string data unless
128     // necessary.
129     //
130     // This will throw an exception if the conversion cannot be performed:
131     // - Encoding::UndefinedConversionError if certain characters cannot be
132     //   converted to UTF-8.
133     // - Encoding::InvalidByteSequenceError if certain characters were invalid
134     //   in the source encoding.
135     str = rb_str_encode(str, utf8, 0, Qnil);
136     PBRUBY_ASSERT(rb_enc_str_coderange(str) != ENC_CODERANGE_BROKEN);
137   }
138 
139   return str;
140 }
141 
Convert_RubyToUpb(VALUE value,const char * name,TypeInfo type_info,upb_Arena * arena)142 upb_MessageValue Convert_RubyToUpb(VALUE value, const char* name,
143                                    TypeInfo type_info, upb_Arena* arena) {
144   upb_MessageValue ret;
145 
146   switch (type_info.type) {
147     case kUpb_CType_Float:
148       if (!is_ruby_num(value)) {
149         rb_raise(cTypeError,
150                  "Expected number type for float field '%s' (given %s).", name,
151                  rb_class2name(CLASS_OF(value)));
152       }
153       ret.float_val = NUM2DBL(value);
154       break;
155     case kUpb_CType_Double:
156       if (!is_ruby_num(value)) {
157         rb_raise(cTypeError,
158                  "Expected number type for double field '%s' (given %s).", name,
159                  rb_class2name(CLASS_OF(value)));
160       }
161       ret.double_val = NUM2DBL(value);
162       break;
163     case kUpb_CType_Bool: {
164       if (value == Qtrue) {
165         ret.bool_val = 1;
166       } else if (value == Qfalse) {
167         ret.bool_val = 0;
168       } else {
169         rb_raise(cTypeError,
170                  "Invalid argument for boolean field '%s' (given %s).", name,
171                  rb_class2name(CLASS_OF(value)));
172       }
173       break;
174     }
175     case kUpb_CType_String:
176       if (rb_obj_class(value) == rb_cSymbol) {
177         value = rb_funcall(value, rb_intern("to_s"), 0);
178       } else if (!rb_obj_is_kind_of(value, rb_cString)) {
179         rb_raise(cTypeError,
180                  "Invalid argument for string field '%s' (given %s).", name,
181                  rb_class2name(CLASS_OF(value)));
182       }
183 
184       value = Convert_CheckStringUtf8(value);
185       ret.str_val = Convert_StringData(value, arena);
186       break;
187     case kUpb_CType_Bytes: {
188       VALUE bytes = rb_enc_from_encoding(rb_ascii8bit_encoding());
189       if (rb_obj_class(value) != rb_cString) {
190         rb_raise(cTypeError,
191                  "Invalid argument for bytes field '%s' (given %s).", name,
192                  rb_class2name(CLASS_OF(value)));
193       }
194 
195       if (rb_obj_encoding(value) != bytes) {
196         // Note: this will not duplicate underlying string data unless
197         // necessary.
198         // TODO: is this really necessary to get raw bytes?
199         value = rb_str_encode(value, bytes, 0, Qnil);
200       }
201 
202       ret.str_val = Convert_StringData(value, arena);
203       break;
204     }
205     case kUpb_CType_Message:
206       ret.msg_val =
207           Message_GetUpbMessage(value, type_info.def.msgdef, name, arena);
208       break;
209     case kUpb_CType_Enum:
210       ret.int32_val = Convert_ToEnum(value, name, type_info.def.enumdef);
211       break;
212     case kUpb_CType_Int32:
213     case kUpb_CType_Int64:
214     case kUpb_CType_UInt32:
215     case kUpb_CType_UInt64:
216       Convert_CheckInt(name, type_info.type, value);
217       switch (type_info.type) {
218         case kUpb_CType_Int32:
219           ret.int32_val = NUM2INT(value);
220           break;
221         case kUpb_CType_Int64:
222           ret.int64_val = NUM2LL(value);
223           break;
224         case kUpb_CType_UInt32:
225           ret.uint32_val = NUM2UINT(value);
226           break;
227         case kUpb_CType_UInt64:
228           ret.uint64_val = NUM2ULL(value);
229           break;
230         default:
231           rb_raise(cTypeError, "Convert_RubyToUpb(): Unexpected type %d",
232                    (int)type_info.type);
233       }
234       break;
235     default:
236       rb_raise(cTypeError,
237                 "Convert_RubyToUpb(): Unexpected type %d", (int)type_info.type);
238   }
239 
240   return ret;
241 }
242 
Convert_UpbToRuby(upb_MessageValue upb_val,TypeInfo type_info,VALUE arena)243 VALUE Convert_UpbToRuby(upb_MessageValue upb_val, TypeInfo type_info,
244                         VALUE arena) {
245   switch (type_info.type) {
246     case kUpb_CType_Float:
247       return DBL2NUM(upb_val.float_val);
248     case kUpb_CType_Double:
249       return DBL2NUM(upb_val.double_val);
250     case kUpb_CType_Bool:
251       return upb_val.bool_val ? Qtrue : Qfalse;
252     case kUpb_CType_Int32:
253       return INT2NUM(upb_val.int32_val);
254     case kUpb_CType_Int64:
255       return LL2NUM(upb_val.int64_val);
256     case kUpb_CType_UInt32:
257       return UINT2NUM(upb_val.uint32_val);
258     case kUpb_CType_UInt64:
259       return ULL2NUM(upb_val.int64_val);
260     case kUpb_CType_Enum: {
261       const upb_EnumValueDef* ev = upb_EnumDef_FindValueByNumber(
262           type_info.def.enumdef, upb_val.int32_val);
263       if (ev) {
264         return ID2SYM(rb_intern(upb_EnumValueDef_Name(ev)));
265       } else {
266         return INT2NUM(upb_val.int32_val);
267       }
268     }
269     case kUpb_CType_String: {
270       VALUE str_rb = rb_str_new(upb_val.str_val.data, upb_val.str_val.size);
271       rb_enc_associate(str_rb, rb_utf8_encoding());
272       rb_obj_freeze(str_rb);
273       return str_rb;
274     }
275     case kUpb_CType_Bytes: {
276       VALUE str_rb = rb_str_new(upb_val.str_val.data, upb_val.str_val.size);
277       rb_enc_associate(str_rb, rb_ascii8bit_encoding());
278       rb_obj_freeze(str_rb);
279       return str_rb;
280     }
281     case kUpb_CType_Message:
282       return Message_GetRubyWrapper((upb_Message*)upb_val.msg_val,
283                                     type_info.def.msgdef, arena);
284     default:
285       rb_raise(rb_eRuntimeError, "Convert_UpbToRuby(): Unexpected type %d",
286                (int)type_info.type);
287   }
288 }
289 
Msgval_DeepCopy(upb_MessageValue msgval,TypeInfo type_info,upb_Arena * arena)290 upb_MessageValue Msgval_DeepCopy(upb_MessageValue msgval, TypeInfo type_info,
291                                  upb_Arena* arena) {
292   upb_MessageValue new_msgval;
293 
294   switch (type_info.type) {
295     default:
296       memcpy(&new_msgval, &msgval, sizeof(msgval));
297       break;
298     case kUpb_CType_String:
299     case kUpb_CType_Bytes: {
300       size_t n = msgval.str_val.size;
301       char* mem = upb_Arena_Malloc(arena, n);
302       new_msgval.str_val.data = mem;
303       new_msgval.str_val.size = n;
304       memcpy(mem, msgval.str_val.data, n);
305       break;
306     }
307     case kUpb_CType_Message:
308       new_msgval.msg_val =
309           Message_deep_copy(msgval.msg_val, type_info.def.msgdef, arena);
310       break;
311   }
312 
313   return new_msgval;
314 }
315 
Msgval_IsEqual(upb_MessageValue val1,upb_MessageValue val2,TypeInfo type_info)316 bool Msgval_IsEqual(upb_MessageValue val1, upb_MessageValue val2,
317                     TypeInfo type_info) {
318   upb_Status status;
319   upb_Status_Clear(&status);
320   bool return_value = shared_Msgval_IsEqual(val1, val2, type_info.type,
321                                             type_info.def.msgdef, &status);
322   if (upb_Status_IsOk(&status)) {
323     return return_value;
324   } else {
325     rb_raise(rb_eRuntimeError, "Msgval_IsEqual(): %s",
326              upb_Status_ErrorMessage(&status));
327   }
328 }
329 
Msgval_GetHash(upb_MessageValue val,TypeInfo type_info,uint64_t seed)330 uint64_t Msgval_GetHash(upb_MessageValue val, TypeInfo type_info,
331                         uint64_t seed) {
332   upb_Status status;
333   upb_Status_Clear(&status);
334   uint64_t return_value = shared_Msgval_GetHash(
335       val, type_info.type, type_info.def.msgdef, seed, &status);
336   if (upb_Status_IsOk(&status)) {
337     return return_value;
338   } else {
339     rb_raise(rb_eRuntimeError, "Msgval_GetHash(): %s",
340              upb_Status_ErrorMessage(&status));
341   }
342 }
343