1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc. All rights reserved.
3 //
4 // Use of this source code is governed by a BSD-style
5 // license that can be found in the LICENSE file or at
6 // https://developers.google.com/open-source/licenses/bsd
7
8 // -----------------------------------------------------------------------------
9 // Ruby <-> upb data conversion functions.
10 //
11 // This file Also contains a few other assorted algorithms on upb_MessageValue.
12 //
13 // None of the algorithms in this file require any access to the internal
14 // representation of Ruby or upb objects.
15 // -----------------------------------------------------------------------------
16
17 #include "convert.h"
18
19 #include "message.h"
20 #include "protobuf.h"
21 #include "shared_convert.h"
22
Convert_StringData(VALUE str,upb_Arena * arena)23 static upb_StringView Convert_StringData(VALUE str, upb_Arena* arena) {
24 upb_StringView ret;
25 if (arena) {
26 char* ptr = upb_Arena_Malloc(arena, RSTRING_LEN(str));
27 memcpy(ptr, RSTRING_PTR(str), RSTRING_LEN(str));
28 ret.data = ptr;
29 } else {
30 // Data is only needed temporarily (within map lookup).
31 ret.data = RSTRING_PTR(str);
32 }
33 ret.size = RSTRING_LEN(str);
34 return ret;
35 }
36
is_ruby_num(VALUE value)37 static bool is_ruby_num(VALUE value) {
38 return (TYPE(value) == T_FLOAT || TYPE(value) == T_FIXNUM ||
39 TYPE(value) == T_BIGNUM);
40 }
41
Convert_CheckInt(const char * name,upb_CType type,VALUE val)42 static void Convert_CheckInt(const char* name, upb_CType type, VALUE val) {
43 if (!is_ruby_num(val)) {
44 rb_raise(cTypeError,
45 "Expected number type for integral field '%s' (given %s).", name,
46 rb_class2name(CLASS_OF(val)));
47 }
48
49 // NUM2{INT,UINT,LL,ULL} macros do the appropriate range checks on upper
50 // bound; we just need to do precision checks (i.e., disallow rounding) and
51 // check for < 0 on unsigned types.
52 if (TYPE(val) == T_FLOAT) {
53 double dbl_val = NUM2DBL(val);
54 if (floor(dbl_val) != dbl_val) {
55 rb_raise(rb_eRangeError,
56 "Non-integral floating point value assigned to integer field "
57 "'%s' (given %s).",
58 name, rb_class2name(CLASS_OF(val)));
59 }
60 }
61 if (type == kUpb_CType_UInt32 || type == kUpb_CType_UInt64) {
62 if (NUM2DBL(val) < 0) {
63 rb_raise(
64 rb_eRangeError,
65 "Assigning negative value to unsigned integer field '%s' (given %s).",
66 name, rb_class2name(CLASS_OF(val)));
67 }
68 }
69 }
70
Convert_ToEnum(VALUE value,const char * name,const upb_EnumDef * e)71 static int32_t Convert_ToEnum(VALUE value, const char* name,
72 const upb_EnumDef* e) {
73 int32_t val;
74
75 switch (TYPE(value)) {
76 case T_FLOAT:
77 case T_FIXNUM:
78 case T_BIGNUM:
79 Convert_CheckInt(name, kUpb_CType_Int32, value);
80 val = NUM2INT(value);
81 break;
82 case T_STRING: {
83 const upb_EnumValueDef* ev = upb_EnumDef_FindValueByNameWithSize(
84 e, RSTRING_PTR(value), RSTRING_LEN(value));
85 if (!ev) goto unknownval;
86 val = upb_EnumValueDef_Number(ev);
87 break;
88 }
89 case T_SYMBOL: {
90 const upb_EnumValueDef* ev =
91 upb_EnumDef_FindValueByName(e, rb_id2name(SYM2ID(value)));
92 if (!ev) goto unknownval;
93 val = upb_EnumValueDef_Number(ev);
94 break;
95 }
96 default:
97 rb_raise(cTypeError,
98 "Expected number or symbol type for enum field '%s'.", name);
99 }
100
101 return val;
102
103 unknownval:
104 rb_raise(rb_eRangeError, "Unknown symbol value for enum field '%s'.", name);
105 }
106
Convert_CheckStringUtf8(VALUE str)107 VALUE Convert_CheckStringUtf8(VALUE str) {
108 VALUE utf8 = rb_enc_from_encoding(rb_utf8_encoding());
109
110 if (rb_obj_encoding(str) == utf8) {
111 // Note: Just because a string is marked as having UTF-8 encoding does
112 // not mean that it is *valid* UTF-8. We have to check separately
113 // whether it is valid.
114 if (rb_enc_str_coderange(str) == ENC_CODERANGE_BROKEN) {
115 // TODO: For now
116 // we only warn for this case. We will remove the warning and throw an
117 // exception below in the 30.x release
118
119 rb_warn(
120 "String is invalid UTF-8. This will be an error in a future "
121 "version.");
122 // VALUE exc = rb_const_get_at(
123 // rb_cEncoding, rb_intern("InvalidByteSequenceError"));
124 // rb_raise(exc, "String is invalid UTF-8");
125 }
126 } else {
127 // Note: this will not duplicate underlying string data unless
128 // necessary.
129 //
130 // This will throw an exception if the conversion cannot be performed:
131 // - Encoding::UndefinedConversionError if certain characters cannot be
132 // converted to UTF-8.
133 // - Encoding::InvalidByteSequenceError if certain characters were invalid
134 // in the source encoding.
135 str = rb_str_encode(str, utf8, 0, Qnil);
136 PBRUBY_ASSERT(rb_enc_str_coderange(str) != ENC_CODERANGE_BROKEN);
137 }
138
139 return str;
140 }
141
Convert_RubyToUpb(VALUE value,const char * name,TypeInfo type_info,upb_Arena * arena)142 upb_MessageValue Convert_RubyToUpb(VALUE value, const char* name,
143 TypeInfo type_info, upb_Arena* arena) {
144 upb_MessageValue ret;
145
146 switch (type_info.type) {
147 case kUpb_CType_Float:
148 if (!is_ruby_num(value)) {
149 rb_raise(cTypeError,
150 "Expected number type for float field '%s' (given %s).", name,
151 rb_class2name(CLASS_OF(value)));
152 }
153 ret.float_val = NUM2DBL(value);
154 break;
155 case kUpb_CType_Double:
156 if (!is_ruby_num(value)) {
157 rb_raise(cTypeError,
158 "Expected number type for double field '%s' (given %s).", name,
159 rb_class2name(CLASS_OF(value)));
160 }
161 ret.double_val = NUM2DBL(value);
162 break;
163 case kUpb_CType_Bool: {
164 if (value == Qtrue) {
165 ret.bool_val = 1;
166 } else if (value == Qfalse) {
167 ret.bool_val = 0;
168 } else {
169 rb_raise(cTypeError,
170 "Invalid argument for boolean field '%s' (given %s).", name,
171 rb_class2name(CLASS_OF(value)));
172 }
173 break;
174 }
175 case kUpb_CType_String:
176 if (rb_obj_class(value) == rb_cSymbol) {
177 value = rb_funcall(value, rb_intern("to_s"), 0);
178 } else if (!rb_obj_is_kind_of(value, rb_cString)) {
179 rb_raise(cTypeError,
180 "Invalid argument for string field '%s' (given %s).", name,
181 rb_class2name(CLASS_OF(value)));
182 }
183
184 value = Convert_CheckStringUtf8(value);
185 ret.str_val = Convert_StringData(value, arena);
186 break;
187 case kUpb_CType_Bytes: {
188 VALUE bytes = rb_enc_from_encoding(rb_ascii8bit_encoding());
189 if (rb_obj_class(value) != rb_cString) {
190 rb_raise(cTypeError,
191 "Invalid argument for bytes field '%s' (given %s).", name,
192 rb_class2name(CLASS_OF(value)));
193 }
194
195 if (rb_obj_encoding(value) != bytes) {
196 // Note: this will not duplicate underlying string data unless
197 // necessary.
198 // TODO: is this really necessary to get raw bytes?
199 value = rb_str_encode(value, bytes, 0, Qnil);
200 }
201
202 ret.str_val = Convert_StringData(value, arena);
203 break;
204 }
205 case kUpb_CType_Message:
206 ret.msg_val =
207 Message_GetUpbMessage(value, type_info.def.msgdef, name, arena);
208 break;
209 case kUpb_CType_Enum:
210 ret.int32_val = Convert_ToEnum(value, name, type_info.def.enumdef);
211 break;
212 case kUpb_CType_Int32:
213 case kUpb_CType_Int64:
214 case kUpb_CType_UInt32:
215 case kUpb_CType_UInt64:
216 Convert_CheckInt(name, type_info.type, value);
217 switch (type_info.type) {
218 case kUpb_CType_Int32:
219 ret.int32_val = NUM2INT(value);
220 break;
221 case kUpb_CType_Int64:
222 ret.int64_val = NUM2LL(value);
223 break;
224 case kUpb_CType_UInt32:
225 ret.uint32_val = NUM2UINT(value);
226 break;
227 case kUpb_CType_UInt64:
228 ret.uint64_val = NUM2ULL(value);
229 break;
230 default:
231 rb_raise(cTypeError, "Convert_RubyToUpb(): Unexpected type %d",
232 (int)type_info.type);
233 }
234 break;
235 default:
236 rb_raise(cTypeError,
237 "Convert_RubyToUpb(): Unexpected type %d", (int)type_info.type);
238 }
239
240 return ret;
241 }
242
Convert_UpbToRuby(upb_MessageValue upb_val,TypeInfo type_info,VALUE arena)243 VALUE Convert_UpbToRuby(upb_MessageValue upb_val, TypeInfo type_info,
244 VALUE arena) {
245 switch (type_info.type) {
246 case kUpb_CType_Float:
247 return DBL2NUM(upb_val.float_val);
248 case kUpb_CType_Double:
249 return DBL2NUM(upb_val.double_val);
250 case kUpb_CType_Bool:
251 return upb_val.bool_val ? Qtrue : Qfalse;
252 case kUpb_CType_Int32:
253 return INT2NUM(upb_val.int32_val);
254 case kUpb_CType_Int64:
255 return LL2NUM(upb_val.int64_val);
256 case kUpb_CType_UInt32:
257 return UINT2NUM(upb_val.uint32_val);
258 case kUpb_CType_UInt64:
259 return ULL2NUM(upb_val.int64_val);
260 case kUpb_CType_Enum: {
261 const upb_EnumValueDef* ev = upb_EnumDef_FindValueByNumber(
262 type_info.def.enumdef, upb_val.int32_val);
263 if (ev) {
264 return ID2SYM(rb_intern(upb_EnumValueDef_Name(ev)));
265 } else {
266 return INT2NUM(upb_val.int32_val);
267 }
268 }
269 case kUpb_CType_String: {
270 VALUE str_rb = rb_str_new(upb_val.str_val.data, upb_val.str_val.size);
271 rb_enc_associate(str_rb, rb_utf8_encoding());
272 rb_obj_freeze(str_rb);
273 return str_rb;
274 }
275 case kUpb_CType_Bytes: {
276 VALUE str_rb = rb_str_new(upb_val.str_val.data, upb_val.str_val.size);
277 rb_enc_associate(str_rb, rb_ascii8bit_encoding());
278 rb_obj_freeze(str_rb);
279 return str_rb;
280 }
281 case kUpb_CType_Message:
282 return Message_GetRubyWrapper((upb_Message*)upb_val.msg_val,
283 type_info.def.msgdef, arena);
284 default:
285 rb_raise(rb_eRuntimeError, "Convert_UpbToRuby(): Unexpected type %d",
286 (int)type_info.type);
287 }
288 }
289
Msgval_DeepCopy(upb_MessageValue msgval,TypeInfo type_info,upb_Arena * arena)290 upb_MessageValue Msgval_DeepCopy(upb_MessageValue msgval, TypeInfo type_info,
291 upb_Arena* arena) {
292 upb_MessageValue new_msgval;
293
294 switch (type_info.type) {
295 default:
296 memcpy(&new_msgval, &msgval, sizeof(msgval));
297 break;
298 case kUpb_CType_String:
299 case kUpb_CType_Bytes: {
300 size_t n = msgval.str_val.size;
301 char* mem = upb_Arena_Malloc(arena, n);
302 new_msgval.str_val.data = mem;
303 new_msgval.str_val.size = n;
304 memcpy(mem, msgval.str_val.data, n);
305 break;
306 }
307 case kUpb_CType_Message:
308 new_msgval.msg_val =
309 Message_deep_copy(msgval.msg_val, type_info.def.msgdef, arena);
310 break;
311 }
312
313 return new_msgval;
314 }
315
Msgval_IsEqual(upb_MessageValue val1,upb_MessageValue val2,TypeInfo type_info)316 bool Msgval_IsEqual(upb_MessageValue val1, upb_MessageValue val2,
317 TypeInfo type_info) {
318 upb_Status status;
319 upb_Status_Clear(&status);
320 bool return_value = shared_Msgval_IsEqual(val1, val2, type_info.type,
321 type_info.def.msgdef, &status);
322 if (upb_Status_IsOk(&status)) {
323 return return_value;
324 } else {
325 rb_raise(rb_eRuntimeError, "Msgval_IsEqual(): %s",
326 upb_Status_ErrorMessage(&status));
327 }
328 }
329
Msgval_GetHash(upb_MessageValue val,TypeInfo type_info,uint64_t seed)330 uint64_t Msgval_GetHash(upb_MessageValue val, TypeInfo type_info,
331 uint64_t seed) {
332 upb_Status status;
333 upb_Status_Clear(&status);
334 uint64_t return_value = shared_Msgval_GetHash(
335 val, type_info.type, type_info.def.msgdef, seed, &status);
336 if (upb_Status_IsOk(&status)) {
337 return return_value;
338 } else {
339 rb_raise(rb_eRuntimeError, "Msgval_GetHash(): %s",
340 upb_Status_ErrorMessage(&status));
341 }
342 }
343