1
2 #include <memory>
3
4 #include "absl/container/flat_hash_map.h"
5 #include "absl/strings/ascii.h"
6 #include "absl/strings/substitute.h"
7 #include "google/protobuf/compiler/code_generator.h"
8 #include "google/protobuf/compiler/plugin.h"
9 #include "google/protobuf/descriptor.h"
10 #include "google/protobuf/descriptor.pb.h"
11 #include "google/protobuf/wire_format.h"
12 #include "upbc/common.h"
13 #include "upbc/message_layout.h"
14
15 namespace upbc {
16 namespace {
17
18 namespace protoc = ::google::protobuf::compiler;
19 namespace protobuf = ::google::protobuf;
20
HeaderFilename(std::string proto_filename)21 std::string HeaderFilename(std::string proto_filename) {
22 return StripExtension(proto_filename) + ".upb.h";
23 }
24
SourceFilename(std::string proto_filename)25 std::string SourceFilename(std::string proto_filename) {
26 return StripExtension(proto_filename) + ".upb.c";
27 }
28
AddEnums(const protobuf::Descriptor * message,std::vector<const protobuf::EnumDescriptor * > * enums)29 void AddEnums(const protobuf::Descriptor* message,
30 std::vector<const protobuf::EnumDescriptor*>* enums) {
31 for (int i = 0; i < message->enum_type_count(); i++) {
32 enums->push_back(message->enum_type(i));
33 }
34 for (int i = 0; i < message->nested_type_count(); i++) {
35 AddEnums(message->nested_type(i), enums);
36 }
37 }
38
39 template <class T>
SortDefs(std::vector<T> * defs)40 void SortDefs(std::vector<T>* defs) {
41 std::sort(defs->begin(), defs->end(),
42 [](T a, T b) { return a->full_name() < b->full_name(); });
43 }
44
SortedEnums(const protobuf::FileDescriptor * file)45 std::vector<const protobuf::EnumDescriptor*> SortedEnums(
46 const protobuf::FileDescriptor* file) {
47 std::vector<const protobuf::EnumDescriptor*> enums;
48 for (int i = 0; i < file->enum_type_count(); i++) {
49 enums.push_back(file->enum_type(i));
50 }
51 for (int i = 0; i < file->message_type_count(); i++) {
52 AddEnums(file->message_type(i), &enums);
53 }
54 SortDefs(&enums);
55 return enums;
56 }
57
FieldNumberOrder(const protobuf::Descriptor * message)58 std::vector<const protobuf::FieldDescriptor*> FieldNumberOrder(
59 const protobuf::Descriptor* message) {
60 std::vector<const protobuf::FieldDescriptor*> fields;
61 for (int i = 0; i < message->field_count(); i++) {
62 fields.push_back(message->field(i));
63 }
64 std::sort(fields.begin(), fields.end(),
65 [](const protobuf::FieldDescriptor* a,
66 const protobuf::FieldDescriptor* b) {
67 return a->number() < b->number();
68 });
69 return fields;
70 }
71
SortedSubmessages(const protobuf::Descriptor * message)72 std::vector<const protobuf::FieldDescriptor*> SortedSubmessages(
73 const protobuf::Descriptor* message) {
74 std::vector<const protobuf::FieldDescriptor*> ret;
75 for (int i = 0; i < message->field_count(); i++) {
76 if (message->field(i)->cpp_type() ==
77 protobuf::FieldDescriptor::CPPTYPE_MESSAGE) {
78 ret.push_back(message->field(i));
79 }
80 }
81 std::sort(ret.begin(), ret.end(),
82 [](const protobuf::FieldDescriptor* a,
83 const protobuf::FieldDescriptor* b) {
84 return a->message_type()->full_name() <
85 b->message_type()->full_name();
86 });
87 return ret;
88 }
89
EnumValueSymbol(const protobuf::EnumValueDescriptor * value)90 std::string EnumValueSymbol(const protobuf::EnumValueDescriptor* value) {
91 return ToCIdent(value->full_name());
92 }
93
GetSizeInit(const MessageLayout::Size & size)94 std::string GetSizeInit(const MessageLayout::Size& size) {
95 return absl::Substitute("UPB_SIZE($0, $1)", size.size32, size.size64);
96 }
97
CTypeInternal(const protobuf::FieldDescriptor * field,bool is_const)98 std::string CTypeInternal(const protobuf::FieldDescriptor* field,
99 bool is_const) {
100 std::string maybe_const = is_const ? "const " : "";
101 switch (field->cpp_type()) {
102 case protobuf::FieldDescriptor::CPPTYPE_MESSAGE: {
103 std::string maybe_struct =
104 field->file() != field->message_type()->file() ? "struct " : "";
105 return maybe_const + maybe_struct + MessageName(field->message_type()) +
106 "*";
107 }
108 case protobuf::FieldDescriptor::CPPTYPE_BOOL:
109 return "bool";
110 case protobuf::FieldDescriptor::CPPTYPE_FLOAT:
111 return "float";
112 case protobuf::FieldDescriptor::CPPTYPE_INT32:
113 case protobuf::FieldDescriptor::CPPTYPE_ENUM:
114 return "int32_t";
115 case protobuf::FieldDescriptor::CPPTYPE_UINT32:
116 return "uint32_t";
117 case protobuf::FieldDescriptor::CPPTYPE_DOUBLE:
118 return "double";
119 case protobuf::FieldDescriptor::CPPTYPE_INT64:
120 return "int64_t";
121 case protobuf::FieldDescriptor::CPPTYPE_UINT64:
122 return "uint64_t";
123 case protobuf::FieldDescriptor::CPPTYPE_STRING:
124 return "upb_strview";
125 default:
126 fprintf(stderr, "Unexpected type");
127 abort();
128 }
129 }
130
SizeLg2(const protobuf::FieldDescriptor * field)131 std::string SizeLg2(const protobuf::FieldDescriptor* field) {
132 switch (field->cpp_type()) {
133 case protobuf::FieldDescriptor::CPPTYPE_MESSAGE:
134 return "UPB_SIZE(2, 3)";
135 case protobuf::FieldDescriptor::CPPTYPE_ENUM:
136 return std::to_string(2);
137 case protobuf::FieldDescriptor::CPPTYPE_BOOL:
138 return std::to_string(1);
139 case protobuf::FieldDescriptor::CPPTYPE_FLOAT:
140 return std::to_string(2);
141 case protobuf::FieldDescriptor::CPPTYPE_INT32:
142 return std::to_string(2);
143 case protobuf::FieldDescriptor::CPPTYPE_UINT32:
144 return std::to_string(2);
145 case protobuf::FieldDescriptor::CPPTYPE_DOUBLE:
146 return std::to_string(3);
147 case protobuf::FieldDescriptor::CPPTYPE_INT64:
148 return std::to_string(3);
149 case protobuf::FieldDescriptor::CPPTYPE_UINT64:
150 return std::to_string(3);
151 case protobuf::FieldDescriptor::CPPTYPE_STRING:
152 return "UPB_SIZE(3, 4)";
153 default:
154 fprintf(stderr, "Unexpected type");
155 abort();
156 }
157 }
158
FieldDefault(const protobuf::FieldDescriptor * field)159 std::string FieldDefault(const protobuf::FieldDescriptor* field) {
160 switch (field->cpp_type()) {
161 case protobuf::FieldDescriptor::CPPTYPE_MESSAGE:
162 return "NULL";
163 case protobuf::FieldDescriptor::CPPTYPE_STRING:
164 return absl::Substitute("upb_strview_make(\"$0\", strlen(\"$0\"))",
165 absl::CEscape(field->default_value_string()));
166 case protobuf::FieldDescriptor::CPPTYPE_INT32:
167 return absl::StrCat(field->default_value_int32());
168 case protobuf::FieldDescriptor::CPPTYPE_INT64:
169 return absl::StrCat(field->default_value_int64());
170 case protobuf::FieldDescriptor::CPPTYPE_UINT32:
171 return absl::StrCat(field->default_value_uint32());
172 case protobuf::FieldDescriptor::CPPTYPE_UINT64:
173 return absl::StrCat(field->default_value_uint64());
174 case protobuf::FieldDescriptor::CPPTYPE_FLOAT:
175 return absl::StrCat(field->default_value_float());
176 case protobuf::FieldDescriptor::CPPTYPE_DOUBLE:
177 return absl::StrCat(field->default_value_double());
178 case protobuf::FieldDescriptor::CPPTYPE_BOOL:
179 return field->default_value_bool() ? "true" : "false";
180 case protobuf::FieldDescriptor::CPPTYPE_ENUM:
181 // Use a number instead of a symbolic name so that we don't require
182 // this enum's header to be included.
183 return absl::StrCat(field->default_value_enum()->number());
184 }
185 ABSL_ASSERT(false);
186 return "XXX";
187 }
188
CType(const protobuf::FieldDescriptor * field)189 std::string CType(const protobuf::FieldDescriptor* field) {
190 return CTypeInternal(field, false);
191 }
192
CTypeConst(const protobuf::FieldDescriptor * field)193 std::string CTypeConst(const protobuf::FieldDescriptor* field) {
194 return CTypeInternal(field, true);
195 }
196
DumpEnumValues(const protobuf::EnumDescriptor * desc,Output & output)197 void DumpEnumValues(const protobuf::EnumDescriptor* desc, Output& output) {
198 std::vector<const protobuf::EnumValueDescriptor*> values;
199 for (int i = 0; i < desc->value_count(); i++) {
200 values.push_back(desc->value(i));
201 }
202 std::sort(values.begin(), values.end(),
203 [](const protobuf::EnumValueDescriptor* a,
204 const protobuf::EnumValueDescriptor* b) {
205 return a->number() < b->number();
206 });
207
208 for (size_t i = 0; i < values.size(); i++) {
209 auto value = values[i];
210 output(" $0 = $1", EnumValueSymbol(value), value->number());
211 if (i != values.size() - 1) {
212 output(",");
213 }
214 output("\n");
215 }
216 }
217
GenerateMessageInHeader(const protobuf::Descriptor * message,Output & output)218 void GenerateMessageInHeader(const protobuf::Descriptor* message, Output& output) {
219 MessageLayout layout(message);
220
221 output("/* $0 */\n\n", message->full_name());
222 std::string msgname = ToCIdent(message->full_name());
223
224 if (!message->options().map_entry()) {
225 output(
226 "UPB_INLINE $0 *$0_new(upb_arena *arena) {\n"
227 " return ($0 *)_upb_msg_new(&$1, arena);\n"
228 "}\n"
229 "UPB_INLINE $0 *$0_parse(const char *buf, size_t size,\n"
230 " upb_arena *arena) {\n"
231 " $0 *ret = $0_new(arena);\n"
232 " return (ret && upb_decode(buf, size, ret, &$1, arena)) ? ret : NULL;\n"
233 "}\n"
234 "UPB_INLINE $0 *$0_parse_ex(const char *buf, size_t size,\n"
235 " upb_arena *arena, int options) {\n"
236 " $0 *ret = $0_new(arena);\n"
237 " return (ret && _upb_decode(buf, size, ret, &$1, arena, options))\n"
238 " ? ret : NULL;\n"
239 "}\n"
240 "UPB_INLINE char *$0_serialize(const $0 *msg, upb_arena *arena, size_t "
241 "*len) {\n"
242 " return upb_encode(msg, &$1, arena, len);\n"
243 "}\n"
244 "\n",
245 MessageName(message), MessageInit(message));
246 }
247
248 for (int i = 0; i < message->real_oneof_decl_count(); i++) {
249 const protobuf::OneofDescriptor* oneof = message->oneof_decl(i);
250 std::string fullname = ToCIdent(oneof->full_name());
251 output("typedef enum {\n");
252 for (int j = 0; j < oneof->field_count(); j++) {
253 const protobuf::FieldDescriptor* field = oneof->field(j);
254 output(" $0_$1 = $2,\n", fullname, field->name(), field->number());
255 }
256 output(
257 " $0_NOT_SET = 0\n"
258 "} $0_oneofcases;\n",
259 fullname);
260 output(
261 "UPB_INLINE $0_oneofcases $1_$2_case(const $1* msg) { "
262 "return ($0_oneofcases)*UPB_PTR_AT(msg, $3, int32_t); }\n"
263 "\n",
264 fullname, msgname, oneof->name(),
265 GetSizeInit(layout.GetOneofCaseOffset(oneof)));
266 }
267
268 // Generate const methods.
269
270 for (auto field : FieldNumberOrder(message)) {
271 // Generate hazzer (if any).
272 if (layout.HasHasbit(field)) {
273 output(
274 "UPB_INLINE bool $0_has_$1(const $0 *msg) { "
275 "return _upb_hasbit(msg, $2); }\n",
276 msgname, field->name(), layout.GetHasbitIndex(field));
277 } else if (field->real_containing_oneof()) {
278 output(
279 "UPB_INLINE bool $0_has_$1(const $0 *msg) { "
280 "return _upb_getoneofcase(msg, $2) == $3; }\n",
281 msgname, field->name(),
282 GetSizeInit(
283 layout.GetOneofCaseOffset(field->real_containing_oneof())),
284 field->number());
285 } else if (field->message_type()) {
286 output(
287 "UPB_INLINE bool $0_has_$1(const $0 *msg) { "
288 "return _upb_has_submsg_nohasbit(msg, $2); }\n",
289 msgname, field->name(), GetSizeInit(layout.GetFieldOffset(field)));
290 }
291
292 // Generate getter.
293 if (field->is_map()) {
294 const protobuf::Descriptor* entry = field->message_type();
295 const protobuf::FieldDescriptor* key = entry->FindFieldByNumber(1);
296 const protobuf::FieldDescriptor* val = entry->FindFieldByNumber(2);
297 output(
298 "UPB_INLINE size_t $0_$1_size(const $0 *msg) {"
299 "return _upb_msg_map_size(msg, $2); }\n",
300 msgname, field->name(), GetSizeInit(layout.GetFieldOffset(field)));
301 output(
302 "UPB_INLINE bool $0_$1_get(const $0 *msg, $2 key, $3 *val) { "
303 "return _upb_msg_map_get(msg, $4, &key, $5, val, $6); }\n",
304 msgname, field->name(), CType(key), CType(val),
305 GetSizeInit(layout.GetFieldOffset(field)),
306 key->cpp_type() == protobuf::FieldDescriptor::CPPTYPE_STRING
307 ? "0"
308 : "sizeof(key)",
309 val->cpp_type() == protobuf::FieldDescriptor::CPPTYPE_STRING
310 ? "0"
311 : "sizeof(*val)");
312 output(
313 "UPB_INLINE $0 $1_$2_next(const $1 *msg, size_t* iter) { "
314 "return ($0)_upb_msg_map_next(msg, $3, iter); }\n",
315 CTypeConst(field), msgname, field->name(),
316 GetSizeInit(layout.GetFieldOffset(field)));
317 } else if (message->options().map_entry()) {
318 output(
319 "UPB_INLINE $0 $1_$2(const $1 *msg) {\n"
320 " $3 ret;\n"
321 " _upb_msg_map_$2(msg, &ret, $4);\n"
322 " return ret;\n"
323 "}\n",
324 CTypeConst(field), msgname, field->name(), CType(field),
325 field->cpp_type() == protobuf::FieldDescriptor::CPPTYPE_STRING
326 ? "0"
327 : "sizeof(ret)");
328 } else if (field->is_repeated()) {
329 output(
330 "UPB_INLINE $0 const* $1_$2(const $1 *msg, size_t *len) { "
331 "return ($0 const*)_upb_array_accessor(msg, $3, len); }\n",
332 CTypeConst(field), msgname, field->name(),
333 GetSizeInit(layout.GetFieldOffset(field)));
334 } else if (field->real_containing_oneof()) {
335 output(
336 "UPB_INLINE $0 $1_$2(const $1 *msg) { "
337 "return UPB_READ_ONEOF(msg, $0, $3, $4, $5, $6); }\n",
338 CTypeConst(field), msgname, field->name(),
339 GetSizeInit(layout.GetFieldOffset(field)),
340 GetSizeInit(layout.GetOneofCaseOffset(field->real_containing_oneof())),
341 field->number(), FieldDefault(field));
342 } else {
343 output(
344 "UPB_INLINE $0 $1_$2(const $1 *msg) { "
345 "return *UPB_PTR_AT(msg, $3, $0); }\n",
346 CTypeConst(field), msgname, field->name(),
347 GetSizeInit(layout.GetFieldOffset(field)));
348 }
349 }
350
351 output("\n");
352
353 // Generate mutable methods.
354
355 for (auto field : FieldNumberOrder(message)) {
356 if (field->is_map()) {
357 // TODO(haberman): add map-based mutators.
358 const protobuf::Descriptor* entry = field->message_type();
359 const protobuf::FieldDescriptor* key = entry->FindFieldByNumber(1);
360 const protobuf::FieldDescriptor* val = entry->FindFieldByNumber(2);
361 output(
362 "UPB_INLINE void $0_$1_clear($0 *msg) { _upb_msg_map_clear(msg, $2); }\n",
363 msgname, field->name(),
364 GetSizeInit(layout.GetFieldOffset(field)));
365 output(
366 "UPB_INLINE bool $0_$1_set($0 *msg, $2 key, $3 val, upb_arena *a) { "
367 "return _upb_msg_map_set(msg, $4, &key, $5, &val, $6, a); }\n",
368 msgname, field->name(), CType(key), CType(val),
369 GetSizeInit(layout.GetFieldOffset(field)),
370 key->cpp_type() == protobuf::FieldDescriptor::CPPTYPE_STRING
371 ? "0"
372 : "sizeof(key)",
373 val->cpp_type() == protobuf::FieldDescriptor::CPPTYPE_STRING
374 ? "0"
375 : "sizeof(val)");
376 output(
377 "UPB_INLINE bool $0_$1_delete($0 *msg, $2 key) { "
378 "return _upb_msg_map_delete(msg, $3, &key, $4); }\n",
379 msgname, field->name(), CType(key),
380 GetSizeInit(layout.GetFieldOffset(field)),
381 key->cpp_type() == protobuf::FieldDescriptor::CPPTYPE_STRING
382 ? "0"
383 : "sizeof(key)");
384 output(
385 "UPB_INLINE $0 $1_$2_nextmutable($1 *msg, size_t* iter) { "
386 "return ($0)_upb_msg_map_next(msg, $3, iter); }\n",
387 CType(field), msgname, field->name(),
388 GetSizeInit(layout.GetFieldOffset(field)));
389 } else if (field->is_repeated()) {
390 output(
391 "UPB_INLINE $0* $1_mutable_$2($1 *msg, size_t *len) {\n"
392 " return ($0*)_upb_array_mutable_accessor(msg, $3, len);\n"
393 "}\n",
394 CType(field), msgname, field->name(),
395 GetSizeInit(layout.GetFieldOffset(field)));
396 output(
397 "UPB_INLINE $0* $1_resize_$2($1 *msg, size_t len, "
398 "upb_arena *arena) {\n"
399 " return ($0*)_upb_array_resize_accessor2(msg, $3, len, $4, arena);\n"
400 "}\n",
401 CType(field), msgname, field->name(),
402 GetSizeInit(layout.GetFieldOffset(field)),
403 SizeLg2(field));
404 if (field->cpp_type() == protobuf::FieldDescriptor::CPPTYPE_MESSAGE) {
405 output(
406 "UPB_INLINE struct $0* $1_add_$2($1 *msg, upb_arena *arena) {\n"
407 " struct $0* sub = (struct $0*)_upb_msg_new(&$3, arena);\n"
408 " bool ok = _upb_array_append_accessor2(\n"
409 " msg, $4, $5, &sub, arena);\n"
410 " if (!ok) return NULL;\n"
411 " return sub;\n"
412 "}\n",
413 MessageName(field->message_type()), msgname, field->name(),
414 MessageInit(field->message_type()),
415 GetSizeInit(layout.GetFieldOffset(field)),
416 SizeLg2(field));
417 } else {
418 output(
419 "UPB_INLINE bool $1_add_$2($1 *msg, $0 val, upb_arena *arena) {\n"
420 " return _upb_array_append_accessor2(msg, $3, $4, &val,\n"
421 " arena);\n"
422 "}\n",
423 CType(field), msgname, field->name(),
424 GetSizeInit(layout.GetFieldOffset(field)),
425 SizeLg2(field));
426 }
427 } else {
428 // Non-repeated field.
429 if (message->options().map_entry() && field->name() == "key") {
430 // Key cannot be mutated.
431 continue;
432 }
433
434 // The common function signature for all setters. Varying implementations
435 // follow.
436 output("UPB_INLINE void $0_set_$1($0 *msg, $2 value) {\n", msgname,
437 field->name(), CType(field));
438
439 if (message->options().map_entry()) {
440 output(
441 " _upb_msg_map_set_value(msg, &value, $0);\n"
442 "}\n",
443 field->cpp_type() == protobuf::FieldDescriptor::CPPTYPE_STRING
444 ? "0"
445 : "sizeof(" + CType(field) + ")");
446 } else if (field->real_containing_oneof()) {
447 output(
448 " UPB_WRITE_ONEOF(msg, $0, $1, value, $2, $3);\n"
449 "}\n",
450 CType(field), GetSizeInit(layout.GetFieldOffset(field)),
451 GetSizeInit(
452 layout.GetOneofCaseOffset(field->real_containing_oneof())),
453 field->number());
454 } else {
455 if (MessageLayout::HasHasbit(field)) {
456 output(" _upb_sethas(msg, $0);\n", layout.GetHasbitIndex(field));
457 }
458 output(
459 " *UPB_PTR_AT(msg, $1, $0) = value;\n"
460 "}\n",
461 CType(field), GetSizeInit(layout.GetFieldOffset(field)));
462 }
463
464 if (field->cpp_type() == protobuf::FieldDescriptor::CPPTYPE_MESSAGE &&
465 !message->options().map_entry()) {
466 output(
467 "UPB_INLINE struct $0* $1_mutable_$2($1 *msg, upb_arena *arena) {\n"
468 " struct $0* sub = (struct $0*)$1_$2(msg);\n"
469 " if (sub == NULL) {\n"
470 " sub = (struct $0*)_upb_msg_new(&$3, arena);\n"
471 " if (!sub) return NULL;\n"
472 " $1_set_$2(msg, sub);\n"
473 " }\n"
474 " return sub;\n"
475 "}\n",
476 MessageName(field->message_type()), msgname, field->name(),
477 MessageInit(field->message_type()));
478 }
479 }
480 }
481
482 output("\n");
483 }
484
WriteHeader(const protobuf::FileDescriptor * file,Output & output)485 void WriteHeader(const protobuf::FileDescriptor* file, Output& output) {
486 EmitFileWarning(file, output);
487 output(
488 "#ifndef $0_UPB_H_\n"
489 "#define $0_UPB_H_\n\n"
490 "#include \"upb/msg.h\"\n"
491 "#include \"upb/decode.h\"\n"
492 "#include \"upb/decode_fast.h\"\n"
493 "#include \"upb/encode.h\"\n\n",
494 ToPreproc(file->name()));
495
496 for (int i = 0; i < file->public_dependency_count(); i++) {
497 const auto& name = file->public_dependency(i)->name();
498 if (i == 0) {
499 output("/* Public Imports. */\n");
500 }
501 output("#include \"$0\"\n", HeaderFilename(name));
502 if (i == file->public_dependency_count() - 1) {
503 output("\n");
504 }
505 }
506
507 output(
508 "#include \"upb/port_def.inc\"\n"
509 "\n"
510 "#ifdef __cplusplus\n"
511 "extern \"C\" {\n"
512 "#endif\n"
513 "\n");
514
515 std::vector<const protobuf::Descriptor*> this_file_messages =
516 SortedMessages(file);
517
518 // Forward-declare types defined in this file.
519 for (auto message : this_file_messages) {
520 output("struct $0;\n", ToCIdent(message->full_name()));
521 }
522 for (auto message : this_file_messages) {
523 output("typedef struct $0 $0;\n", ToCIdent(message->full_name()));
524 }
525 for (auto message : this_file_messages) {
526 output("extern const upb_msglayout $0;\n", MessageInit(message));
527 }
528
529 // Forward-declare types not in this file, but used as submessages.
530 // Order by full name for consistent ordering.
531 std::map<std::string, const protobuf::Descriptor*> forward_messages;
532
533 for (auto message : SortedMessages(file)) {
534 for (int i = 0; i < message->field_count(); i++) {
535 const protobuf::FieldDescriptor* field = message->field(i);
536 if (field->cpp_type() == protobuf::FieldDescriptor::CPPTYPE_MESSAGE &&
537 field->file() != field->message_type()->file()) {
538 forward_messages[field->message_type()->full_name()] =
539 field->message_type();
540 }
541 }
542 }
543 for (const auto& pair : forward_messages) {
544 output("struct $0;\n", MessageName(pair.second));
545 }
546 for (const auto& pair : forward_messages) {
547 output("extern const upb_msglayout $0;\n", MessageInit(pair.second));
548 }
549
550 if (!this_file_messages.empty()) {
551 output("\n");
552 }
553
554 std::vector<const protobuf::EnumDescriptor*> this_file_enums =
555 SortedEnums(file);
556
557 for (auto enumdesc : this_file_enums) {
558 output("typedef enum {\n");
559 DumpEnumValues(enumdesc, output);
560 output("} $0;\n\n", ToCIdent(enumdesc->full_name()));
561 }
562
563 output("\n");
564
565 for (auto message : this_file_messages) {
566 GenerateMessageInHeader(message, output);
567 }
568
569 output(
570 "#ifdef __cplusplus\n"
571 "} /* extern \"C\" */\n"
572 "#endif\n"
573 "\n"
574 "#include \"upb/port_undef.inc\"\n"
575 "\n"
576 "#endif /* $0_UPB_H_ */\n",
577 ToPreproc(file->name()));
578 }
579
TableDescriptorType(const protobuf::FieldDescriptor * field)580 int TableDescriptorType(const protobuf::FieldDescriptor* field) {
581 if (field->file()->syntax() == protobuf::FileDescriptor::SYNTAX_PROTO2 &&
582 field->type() == protobuf::FieldDescriptor::TYPE_STRING) {
583 // From the perspective of the binary encoder/decoder, proto2 string fields
584 // are identical to bytes fields. Only in proto3 do we check UTF-8 for
585 // string fields at parse time.
586 //
587 // If we ever use these tables for JSON encoding/decoding (for example by
588 // embedding field names on the side) we will have to revisit this, because
589 // string vs. bytes behavior is not affected by proto2 vs proto3.
590 return protobuf::FieldDescriptor::TYPE_BYTES;
591 } else {
592 return field->type();
593 }
594 }
595
596 struct SubmsgArray {
597 public:
SubmsgArrayupbc::__anon3a1ee3ae0111::SubmsgArray598 SubmsgArray(const protobuf::Descriptor* message) : message_(message) {
599 MessageLayout layout(message);
600 std::vector<const protobuf::FieldDescriptor*> sorted_submsgs =
601 SortedSubmessages(message);
602 int i = 0;
603 for (auto submsg : sorted_submsgs) {
604 if (indexes_.find(submsg->message_type()) != indexes_.end()) {
605 continue;
606 }
607 submsgs_.push_back(submsg->message_type());
608 indexes_[submsg->message_type()] = i++;
609 }
610 }
611
submsgsupbc::__anon3a1ee3ae0111::SubmsgArray612 const std::vector<const protobuf::Descriptor*>& submsgs() const {
613 return submsgs_;
614 }
615
GetIndexupbc::__anon3a1ee3ae0111::SubmsgArray616 int GetIndex(const protobuf::FieldDescriptor* field) {
617 (void)message_;
618 assert(field->containing_type() == message_);
619 auto it = indexes_.find(field->message_type());
620 assert(it != indexes_.end());
621 return it->second;
622 }
623
624 private:
625 const protobuf::Descriptor* message_;
626 std::vector<const protobuf::Descriptor*> submsgs_;
627 absl::flat_hash_map<const protobuf::Descriptor*, int> indexes_;
628 };
629
630 typedef std::pair<std::string, uint64_t> TableEntry;
631
GetEncodedTag(const protobuf::FieldDescriptor * field)632 uint64_t GetEncodedTag(const protobuf::FieldDescriptor* field) {
633 protobuf::internal::WireFormatLite::WireType wire_type =
634 protobuf::internal::WireFormat::WireTypeForField(field);
635 uint32_t unencoded_tag =
636 protobuf::internal::WireFormatLite::MakeTag(field->number(), wire_type);
637 uint8_t tag_bytes[10] = {0};
638 protobuf::io::CodedOutputStream::WriteVarint32ToArray(unencoded_tag,
639 tag_bytes);
640 uint64_t encoded_tag = 0;
641 memcpy(&encoded_tag, tag_bytes, sizeof(encoded_tag));
642 // TODO: byte-swap for big endian.
643 return encoded_tag;
644 }
645
GetTableSlot(const protobuf::FieldDescriptor * field)646 int GetTableSlot(const protobuf::FieldDescriptor* field) {
647 uint64_t tag = GetEncodedTag(field);
648 if (tag > 0x7fff) {
649 // Tag must fit within a two-byte varint.
650 return -1;
651 }
652 return (tag & 0xf8) >> 3;
653 }
654
TryFillTableEntry(const protobuf::Descriptor * message,const MessageLayout & layout,const protobuf::FieldDescriptor * field,TableEntry & ent)655 bool TryFillTableEntry(const protobuf::Descriptor* message,
656 const MessageLayout& layout,
657 const protobuf::FieldDescriptor* field,
658 TableEntry& ent) {
659 std::string type = "";
660 std::string cardinality = "";
661 switch (field->type()) {
662 case protobuf::FieldDescriptor::TYPE_BOOL:
663 type = "b1";
664 break;
665 case protobuf::FieldDescriptor::TYPE_INT32:
666 case protobuf::FieldDescriptor::TYPE_ENUM:
667 case protobuf::FieldDescriptor::TYPE_UINT32:
668 type = "v4";
669 break;
670 case protobuf::FieldDescriptor::TYPE_INT64:
671 case protobuf::FieldDescriptor::TYPE_UINT64:
672 type = "v8";
673 break;
674 case protobuf::FieldDescriptor::TYPE_FIXED32:
675 case protobuf::FieldDescriptor::TYPE_SFIXED32:
676 case protobuf::FieldDescriptor::TYPE_FLOAT:
677 type = "f4";
678 break;
679 case protobuf::FieldDescriptor::TYPE_FIXED64:
680 case protobuf::FieldDescriptor::TYPE_SFIXED64:
681 case protobuf::FieldDescriptor::TYPE_DOUBLE:
682 type = "f8";
683 break;
684 case protobuf::FieldDescriptor::TYPE_SINT32:
685 type = "z4";
686 break;
687 case protobuf::FieldDescriptor::TYPE_SINT64:
688 type = "z8";
689 break;
690 case protobuf::FieldDescriptor::TYPE_STRING:
691 if (field->file()->syntax() == protobuf::FileDescriptor::SYNTAX_PROTO3) {
692 // Only proto3 validates UTF-8.
693 type = "s";
694 break;
695 }
696 ABSL_FALLTHROUGH_INTENDED;
697 case protobuf::FieldDescriptor::TYPE_BYTES:
698 type = "b";
699 break;
700 case protobuf::FieldDescriptor::TYPE_MESSAGE:
701 if (field->is_map()) {
702 return false; // Not supported yet (ever?).
703 }
704 type = "m";
705 break;
706 default:
707 return false; // Not supported yet.
708 }
709
710 switch (field->label()) {
711 case protobuf::FieldDescriptor::LABEL_REPEATED:
712 if (field->is_packed()) {
713 cardinality = "p";
714 } else {
715 cardinality = "r";
716 }
717 break;
718 case protobuf::FieldDescriptor::LABEL_OPTIONAL:
719 case protobuf::FieldDescriptor::LABEL_REQUIRED:
720 if (field->real_containing_oneof()) {
721 cardinality = "o";
722 } else {
723 cardinality = "s";
724 }
725 break;
726 }
727
728 uint64_t expected_tag = GetEncodedTag(field);
729 MessageLayout::Size offset = layout.GetFieldOffset(field);
730
731 // Data is:
732 //
733 // 48 32 16 0
734 // |--------|--------|--------|--------|--------|--------|--------|--------|
735 // | offset (16) |case offset (16) |presence| submsg | exp. tag (16) |
736 // |--------|--------|--------|--------|--------|--------|--------|--------|
737 //
738 // - |presence| is either hasbit index or field number for oneofs.
739
740 uint64_t data = offset.size64 << 48 | expected_tag;
741
742 if (field->is_repeated()) {
743 // No hasbit/oneof-related fields.
744 } if (field->real_containing_oneof()) {
745 MessageLayout::Size case_offset =
746 layout.GetOneofCaseOffset(field->real_containing_oneof());
747 if (case_offset.size64 > 0xffff) return false;
748 assert(field->number() < 256);
749 data |= field->number() << 24;
750 data |= case_offset.size64 << 32;
751 } else {
752 uint64_t hasbit_index = 63; // No hasbit (set a high, unused bit).
753 if (layout.HasHasbit(field)) {
754 hasbit_index = layout.GetHasbitIndex(field);
755 if (hasbit_index > 31) return false;
756 }
757 data |= hasbit_index << 24;
758 }
759
760 if (field->cpp_type() == protobuf::FieldDescriptor::CPPTYPE_MESSAGE) {
761 SubmsgArray submsg_array(message);
762 uint64_t idx = submsg_array.GetIndex(field);
763 if (idx > 255) return false;
764 data |= idx << 16;
765
766 std::string size_ceil = "max";
767 size_t size = SIZE_MAX;
768 if (field->message_type()->file() == field->file()) {
769 // We can only be guaranteed the size of the sub-message if it is in the
770 // same file as us. We could relax this to increase the speed of
771 // cross-file sub-message parsing if we are comfortable requiring that
772 // users compile all messages at the same time.
773 MessageLayout sub_layout(field->message_type());
774 size = sub_layout.message_size().size64 + 8;
775 }
776 std::vector<size_t> breaks = {64, 128, 192, 256};
777 for (auto brk : breaks) {
778 if (size <= brk) {
779 size_ceil = std::to_string(brk);
780 break;
781 }
782 }
783 ent.first = absl::Substitute("upb_p$0$1_$2bt_max$3b", cardinality, type,
784 expected_tag > 0xff ? "2" : "1", size_ceil);
785
786 } else {
787 ent.first = absl::Substitute("upb_p$0$1_$2bt", cardinality, type,
788 expected_tag > 0xff ? "2" : "1");
789 }
790 ent.second = data;
791 return true;
792 }
793
FastDecodeTable(const protobuf::Descriptor * message,const MessageLayout & layout)794 std::vector<TableEntry> FastDecodeTable(const protobuf::Descriptor* message,
795 const MessageLayout& layout) {
796 std::vector<TableEntry> table;
797 for (const auto field : FieldHotnessOrder(message)) {
798 TableEntry ent;
799 int slot = GetTableSlot(field);
800 // std::cerr << "table slot: " << field->number() << ": " << slot << "\n";
801 if (slot < 0) {
802 // Tag can't fit in the table.
803 continue;
804 }
805 if (!TryFillTableEntry(message, layout, field, ent)) {
806 // Unsupported field type or offset, hasbit index, etc. doesn't fit.
807 continue;
808 }
809 while ((size_t)slot >= table.size()) {
810 size_t size = std::max(static_cast<size_t>(1), table.size() * 2);
811 table.resize(size, TableEntry{"fastdecode_generic", 0});
812 }
813 if (table[slot].first != "fastdecode_generic") {
814 // A hotter field already filled this slot.
815 continue;
816 }
817 table[slot] = ent;
818 }
819 return table;
820 }
821
WriteSource(const protobuf::FileDescriptor * file,Output & output,bool fasttable_enabled)822 void WriteSource(const protobuf::FileDescriptor* file, Output& output,
823 bool fasttable_enabled) {
824 EmitFileWarning(file, output);
825
826 output(
827 "#include <stddef.h>\n"
828 "#include \"upb/msg.h\"\n"
829 "#include \"$0\"\n",
830 HeaderFilename(file->name()));
831
832 for (int i = 0; i < file->dependency_count(); i++) {
833 output("#include \"$0\"\n", HeaderFilename(file->dependency(i)->name()));
834 }
835
836 output(
837 "\n"
838 "#include \"upb/port_def.inc\"\n"
839 "\n");
840
841
842 for (auto message : SortedMessages(file)) {
843 std::string msgname = ToCIdent(message->full_name());
844 std::string fields_array_ref = "NULL";
845 std::string submsgs_array_ref = "NULL";
846 MessageLayout layout(message);
847 SubmsgArray submsg_array(message);
848
849 if (!submsg_array.submsgs().empty()) {
850 // TODO(haberman): could save a little bit of space by only generating a
851 // "submsgs" array for every strongly-connected component.
852 std::string submsgs_array_name = msgname + "_submsgs";
853 submsgs_array_ref = "&" + submsgs_array_name + "[0]";
854 output("static const upb_msglayout *const $0[$1] = {\n",
855 submsgs_array_name, submsg_array.submsgs().size());
856
857 for (auto submsg : submsg_array.submsgs()) {
858 output(" &$0,\n", MessageInit(submsg));
859 }
860
861 output("};\n\n");
862 }
863
864 std::vector<const protobuf::FieldDescriptor*> field_number_order =
865 FieldNumberOrder(message);
866 if (!field_number_order.empty()) {
867 std::string fields_array_name = msgname + "__fields";
868 fields_array_ref = "&" + fields_array_name + "[0]";
869 output("static const upb_msglayout_field $0[$1] = {\n",
870 fields_array_name, field_number_order.size());
871 for (auto field : field_number_order) {
872 int submsg_index = 0;
873 std::string presence = "0";
874
875 if (field->cpp_type() == protobuf::FieldDescriptor::CPPTYPE_MESSAGE) {
876 submsg_index = submsg_array.GetIndex(field);
877 }
878
879 if (MessageLayout::HasHasbit(field)) {
880 int index = layout.GetHasbitIndex(field);
881 assert(index != 0);
882 presence = absl::StrCat(index);
883 } else if (field->real_containing_oneof()) {
884 MessageLayout::Size case_offset =
885 layout.GetOneofCaseOffset(field->real_containing_oneof());
886
887 // We encode as negative to distinguish from hasbits.
888 case_offset.size32 = ~case_offset.size32;
889 case_offset.size64 = ~case_offset.size64;
890 assert(case_offset.size32 < 0);
891 assert(case_offset.size64 < 0);
892 presence = GetSizeInit(case_offset);
893 }
894
895 std::string label;
896 if (field->is_map()) {
897 label = "_UPB_LABEL_MAP";
898 } else if (field->is_packed()) {
899 label = "_UPB_LABEL_PACKED";
900 } else {
901 label = absl::StrCat(field->label());
902 }
903
904 output(" {$0, $1, $2, $3, $4, $5},\n",
905 field->number(),
906 GetSizeInit(layout.GetFieldOffset(field)),
907 presence,
908 submsg_index,
909 TableDescriptorType(field),
910 label);
911 }
912 output("};\n\n");
913 }
914
915 std::vector<TableEntry> table;
916 uint8_t table_mask = -1;
917
918 if (fasttable_enabled) {
919 table = FastDecodeTable(message, layout);
920 }
921
922 if (table.size() > 1) {
923 assert((table.size() & (table.size() - 1)) == 0);
924 table_mask = (table.size() - 1) << 3;
925 }
926
927 output("const upb_msglayout $0 = {\n", MessageInit(message));
928 output(" $0,\n", submsgs_array_ref);
929 output(" $0,\n", fields_array_ref);
930 output(" $0, $1, $2, $3,\n", GetSizeInit(layout.message_size()),
931 field_number_order.size(),
932 "false", // TODO: extendable
933 table_mask
934 );
935 if (!table.empty()) {
936 output(" UPB_FASTTABLE_INIT({\n");
937 for (const auto& ent : table) {
938 output(" {0x$1, &$0},\n", ent.first,
939 absl::StrCat(absl::Hex(ent.second, absl::kZeroPad16)));
940 }
941 output(" }),\n");
942 }
943 output("};\n\n");
944 }
945
946 output("#include \"upb/port_undef.inc\"\n");
947 output("\n");
948 }
949
950 class Generator : public protoc::CodeGenerator {
~Generator()951 ~Generator() override {}
952 bool Generate(const protobuf::FileDescriptor* file,
953 const std::string& parameter, protoc::GeneratorContext* context,
954 std::string* error) const override;
GetSupportedFeatures() const955 uint64_t GetSupportedFeatures() const override {
956 return FEATURE_PROTO3_OPTIONAL;
957 }
958 };
959
Generate(const protobuf::FileDescriptor * file,const std::string & parameter,protoc::GeneratorContext * context,std::string * error) const960 bool Generator::Generate(const protobuf::FileDescriptor* file,
961 const std::string& parameter,
962 protoc::GeneratorContext* context,
963 std::string* error) const {
964 bool fasttable_enabled = false;
965 std::vector<std::pair<std::string, std::string>> params;
966 google::protobuf::compiler::ParseGeneratorParameter(parameter, ¶ms);
967
968 for (const auto& pair : params) {
969 if (pair.first == "fasttable") {
970 fasttable_enabled = true;
971 } else {
972 *error = "Unknown parameter: " + pair.first;
973 return false;
974 }
975 }
976
977 Output h_output(context->Open(HeaderFilename(file->name())));
978 WriteHeader(file, h_output);
979
980 Output c_output(context->Open(SourceFilename(file->name())));
981 WriteSource(file, c_output, fasttable_enabled);
982
983 return true;
984 }
985
986 } // namespace
987 } // namespace upbc
988
main(int argc,char ** argv)989 int main(int argc, char** argv) {
990 std::unique_ptr<google::protobuf::compiler::CodeGenerator> generator(
991 new upbc::Generator());
992 return google::protobuf::compiler::PluginMain(argc, argv, generator.get());
993 }
994