1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc. All rights reserved.
3 // https://developers.google.com/protocol-buffers/
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 // * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 // * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following disclaimer
13 // in the documentation and/or other materials provided with the
14 // distribution.
15 // * Neither the name of Google Inc. nor the names of its
16 // contributors may be used to endorse or promote products derived from
17 // this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31 // Author: kenton@google.com (Kenton Varda)
32 // Based on original Protocol Buffers design by
33 // Sanjay Ghemawat, Jeff Dean, and others.
34
35 #include <google/protobuf/compiler/java/java_helpers.h>
36
37 #include <algorithm>
38 #include <cstdint>
39 #include <limits>
40 #include <unordered_set>
41 #include <vector>
42
43 #include <google/protobuf/wire_format.h>
44 #include <google/protobuf/stubs/strutil.h>
45 #include <google/protobuf/stubs/stringprintf.h>
46 #include <google/protobuf/stubs/substitute.h>
47 #include <google/protobuf/compiler/java/java_name_resolver.h>
48 #include <google/protobuf/compiler/java/java_names.h>
49 #include <google/protobuf/descriptor.pb.h>
50 #include <google/protobuf/stubs/hash.h> // for hash<T *>
51
52 namespace google {
53 namespace protobuf {
54 namespace compiler {
55 namespace java {
56
57 using internal::WireFormat;
58 using internal::WireFormatLite;
59
60 const char kThickSeparator[] =
61 "// ===================================================================\n";
62 const char kThinSeparator[] =
63 "// -------------------------------------------------------------------\n";
64
65 namespace {
66
67 const char* kDefaultPackage = "";
68
69 // Names that should be avoided (in UpperCamelCase format).
70 // Using them will cause the compiler to generate accessors whose names
71 // collide with methods defined in base classes.
72 // Keep this list in sync with specialFieldNames in
73 // java/core/src/main/java/com/google/protobuf/DescriptorMessageInfoFactory.java
74 const char* kForbiddenWordList[] = {
75 // java.lang.Object:
76 "Class",
77 // com.google.protobuf.MessageLiteOrBuilder:
78 "DefaultInstanceForType",
79 // com.google.protobuf.MessageLite:
80 "ParserForType",
81 "SerializedSize",
82 // com.google.protobuf.MessageOrBuilder:
83 "AllFields",
84 "DescriptorForType",
85 "InitializationErrorString",
86 // TODO(b/219045204): re-enable
87 // "UnknownFields",
88 // obsolete. kept for backwards compatibility of generated code
89 "CachedSize",
90 };
91
92 const std::unordered_set<std::string>* kReservedNames =
93 new std::unordered_set<std::string>({
94 "abstract", "assert", "boolean", "break", "byte",
95 "case", "catch", "char", "class", "const",
96 "continue", "default", "do", "double", "else",
97 "enum", "extends", "final", "finally", "float",
98 "for", "goto", "if", "implements", "import",
99 "instanceof", "int", "interface", "long", "native",
100 "new", "package", "private", "protected", "public",
101 "return", "short", "static", "strictfp", "super",
102 "switch", "synchronized", "this", "throw", "throws",
103 "transient", "try", "void", "volatile", "while",
104 });
105
IsForbidden(const std::string & field_name)106 bool IsForbidden(const std::string& field_name) {
107 for (int i = 0; i < GOOGLE_ARRAYSIZE(kForbiddenWordList); ++i) {
108 if (UnderscoresToCamelCase(field_name, true) == kForbiddenWordList[i]) {
109 return true;
110 }
111 }
112 return false;
113 }
114
FieldName(const FieldDescriptor * field)115 std::string FieldName(const FieldDescriptor* field) {
116 std::string field_name;
117 // Groups are hacky: The name of the field is just the lower-cased name
118 // of the group type. In Java, though, we would like to retain the original
119 // capitalization of the type name.
120 if (GetType(field) == FieldDescriptor::TYPE_GROUP) {
121 field_name = field->message_type()->name();
122 } else {
123 field_name = field->name();
124 }
125 if (IsForbidden(field_name)) {
126 // Append a trailing "#" to indicate that the name should be decorated to
127 // avoid collision with other names.
128 field_name += "#";
129 }
130 return field_name;
131 }
132
133
134 } // namespace
135
PrintGeneratedAnnotation(io::Printer * printer,char delimiter,const std::string & annotation_file)136 void PrintGeneratedAnnotation(io::Printer* printer, char delimiter,
137 const std::string& annotation_file) {
138 if (annotation_file.empty()) {
139 return;
140 }
141 std::string ptemplate =
142 "@javax.annotation.Generated(value=\"protoc\", comments=\"annotations:";
143 ptemplate.push_back(delimiter);
144 ptemplate.append("annotation_file");
145 ptemplate.push_back(delimiter);
146 ptemplate.append("\")\n");
147 printer->Print(ptemplate.c_str(), "annotation_file", annotation_file);
148 }
149
PrintEnumVerifierLogic(io::Printer * printer,const FieldDescriptor * descriptor,const std::map<std::string,std::string> & variables,const char * var_name,const char * terminating_string,bool enforce_lite)150 void PrintEnumVerifierLogic(io::Printer* printer,
151 const FieldDescriptor* descriptor,
152 const std::map<std::string, std::string>& variables,
153 const char* var_name,
154 const char* terminating_string, bool enforce_lite) {
155 std::string enum_verifier_string =
156 enforce_lite ? StrCat(var_name, ".internalGetVerifier()")
157 : StrCat(
158 "new com.google.protobuf.Internal.EnumVerifier() {\n"
159 " @java.lang.Override\n"
160 " public boolean isInRange(int number) {\n"
161 " return ",
162 var_name,
163 ".forNumber(number) != null;\n"
164 " }\n"
165 " }");
166 printer->Print(
167 variables,
168 StrCat(enum_verifier_string, terminating_string).c_str());
169 }
170
UnderscoresToCamelCase(const std::string & input,bool cap_next_letter)171 std::string UnderscoresToCamelCase(const std::string& input,
172 bool cap_next_letter) {
173 GOOGLE_CHECK(!input.empty());
174 std::string result;
175 // Note: I distrust ctype.h due to locales.
176 for (int i = 0; i < input.size(); i++) {
177 if ('a' <= input[i] && input[i] <= 'z') {
178 if (cap_next_letter) {
179 result += input[i] + ('A' - 'a');
180 } else {
181 result += input[i];
182 }
183 cap_next_letter = false;
184 } else if ('A' <= input[i] && input[i] <= 'Z') {
185 if (i == 0 && !cap_next_letter) {
186 // Force first letter to lower-case unless explicitly told to
187 // capitalize it.
188 result += input[i] + ('a' - 'A');
189 } else {
190 // Capital letters after the first are left as-is.
191 result += input[i];
192 }
193 cap_next_letter = false;
194 } else if ('0' <= input[i] && input[i] <= '9') {
195 result += input[i];
196 cap_next_letter = true;
197 } else {
198 cap_next_letter = true;
199 }
200 }
201 // Add a trailing "_" if the name should be altered.
202 if (input[input.size() - 1] == '#') {
203 result += '_';
204 }
205 return result;
206 }
207
ToCamelCase(const std::string & input,bool lower_first)208 std::string ToCamelCase(const std::string& input, bool lower_first) {
209 bool capitalize_next = !lower_first;
210 std::string result;
211 result.reserve(input.size());
212
213 for (char i : input) {
214 if (i == '_') {
215 capitalize_next = true;
216 } else if (capitalize_next) {
217 result.push_back(ToUpperCh(i));
218 capitalize_next = false;
219 } else {
220 result.push_back(i);
221 }
222 }
223
224 // Lower-case the first letter.
225 if (lower_first && !result.empty()) {
226 result[0] = ToLowerCh(result[0]);
227 }
228
229 return result;
230 }
231
ToUpperCh(char ch)232 char ToUpperCh(char ch) {
233 return (ch >= 'a' && ch <= 'z') ? (ch - 'a' + 'A') : ch;
234 }
235
ToLowerCh(char ch)236 char ToLowerCh(char ch) {
237 return (ch >= 'A' && ch <= 'Z') ? (ch - 'A' + 'a') : ch;
238 }
239
UnderscoresToCamelCase(const FieldDescriptor * field)240 std::string UnderscoresToCamelCase(const FieldDescriptor* field) {
241 return UnderscoresToCamelCase(FieldName(field), false);
242 }
243
UnderscoresToCapitalizedCamelCase(const FieldDescriptor * field)244 std::string UnderscoresToCapitalizedCamelCase(const FieldDescriptor* field) {
245 return UnderscoresToCamelCase(FieldName(field), true);
246 }
247
CapitalizedFieldName(const FieldDescriptor * field)248 std::string CapitalizedFieldName(const FieldDescriptor* field) {
249 return UnderscoresToCapitalizedCamelCase(field);
250 }
251
UnderscoresToCamelCase(const MethodDescriptor * method)252 std::string UnderscoresToCamelCase(const MethodDescriptor* method) {
253 return UnderscoresToCamelCase(method->name(), false);
254 }
255
UnderscoresToCamelCaseCheckReserved(const FieldDescriptor * field)256 std::string UnderscoresToCamelCaseCheckReserved(const FieldDescriptor* field) {
257 std::string name = UnderscoresToCamelCase(field);
258 if (kReservedNames->find(name) != kReservedNames->end()) {
259 return name + "_";
260 }
261 return name;
262 }
263
IsForbiddenKotlin(const std::string & field_name)264 bool IsForbiddenKotlin(const std::string& field_name) {
265 // Names that should be avoided as field names in Kotlin.
266 // All Kotlin hard keywords are in this list.
267 const std::unordered_set<std::string>* kKotlinForbiddenNames =
268 new std::unordered_set<std::string>({
269 "as", "as?", "break", "class", "continue", "do",
270 "else", "false", "for", "fun", "if", "in",
271 "!in", "interface", "is", "!is", "null", "object",
272 "package", "return", "super", "this", "throw", "true",
273 "try", "typealias", "typeof", "val", "var", "when",
274 "while",
275 });
276 return kKotlinForbiddenNames->find(field_name) !=
277 kKotlinForbiddenNames->end();
278 }
279
UniqueFileScopeIdentifier(const Descriptor * descriptor)280 std::string UniqueFileScopeIdentifier(const Descriptor* descriptor) {
281 return "static_" + StringReplace(descriptor->full_name(), ".", "_", true);
282 }
283
CamelCaseFieldName(const FieldDescriptor * field)284 std::string CamelCaseFieldName(const FieldDescriptor* field) {
285 std::string fieldName = UnderscoresToCamelCase(field);
286 if ('0' <= fieldName[0] && fieldName[0] <= '9') {
287 return '_' + fieldName;
288 }
289 return fieldName;
290 }
291
FileClassName(const FileDescriptor * file,bool immutable)292 std::string FileClassName(const FileDescriptor* file, bool immutable) {
293 ClassNameResolver name_resolver;
294 return name_resolver.GetFileClassName(file, immutable);
295 }
296
FileJavaPackage(const FileDescriptor * file,bool immutable)297 std::string FileJavaPackage(const FileDescriptor* file, bool immutable) {
298 std::string result;
299
300 if (file->options().has_java_package()) {
301 result = file->options().java_package();
302 } else {
303 result = kDefaultPackage;
304 if (!file->package().empty()) {
305 if (!result.empty()) result += '.';
306 result += file->package();
307 }
308 }
309
310 return result;
311 }
312
FileJavaPackage(const FileDescriptor * file)313 std::string FileJavaPackage(const FileDescriptor* file) {
314 return FileJavaPackage(file, true /* immutable */);
315 }
316
JavaPackageToDir(std::string package_name)317 std::string JavaPackageToDir(std::string package_name) {
318 std::string package_dir = StringReplace(package_name, ".", "/", true);
319 if (!package_dir.empty()) package_dir += "/";
320 return package_dir;
321 }
322
ClassName(const Descriptor * descriptor)323 std::string ClassName(const Descriptor* descriptor) {
324 ClassNameResolver name_resolver;
325 return name_resolver.GetClassName(descriptor, true);
326 }
327
ClassName(const EnumDescriptor * descriptor)328 std::string ClassName(const EnumDescriptor* descriptor) {
329 ClassNameResolver name_resolver;
330 return name_resolver.GetClassName(descriptor, true);
331 }
332
ClassName(const ServiceDescriptor * descriptor)333 std::string ClassName(const ServiceDescriptor* descriptor) {
334 ClassNameResolver name_resolver;
335 return name_resolver.GetClassName(descriptor, true);
336 }
337
ClassName(const FileDescriptor * descriptor)338 std::string ClassName(const FileDescriptor* descriptor) {
339 ClassNameResolver name_resolver;
340 return name_resolver.GetClassName(descriptor, true);
341 }
342
343
ExtraMessageInterfaces(const Descriptor * descriptor)344 std::string ExtraMessageInterfaces(const Descriptor* descriptor) {
345 std::string interfaces = "// @@protoc_insertion_point(message_implements:" +
346 descriptor->full_name() + ")";
347 return interfaces;
348 }
349
350
ExtraBuilderInterfaces(const Descriptor * descriptor)351 std::string ExtraBuilderInterfaces(const Descriptor* descriptor) {
352 std::string interfaces = "// @@protoc_insertion_point(builder_implements:" +
353 descriptor->full_name() + ")";
354 return interfaces;
355 }
356
ExtraMessageOrBuilderInterfaces(const Descriptor * descriptor)357 std::string ExtraMessageOrBuilderInterfaces(const Descriptor* descriptor) {
358 std::string interfaces = "// @@protoc_insertion_point(interface_extends:" +
359 descriptor->full_name() + ")";
360 return interfaces;
361 }
362
FieldConstantName(const FieldDescriptor * field)363 std::string FieldConstantName(const FieldDescriptor* field) {
364 std::string name = field->name() + "_FIELD_NUMBER";
365 ToUpper(&name);
366 return name;
367 }
368
GetType(const FieldDescriptor * field)369 FieldDescriptor::Type GetType(const FieldDescriptor* field) {
370 return field->type();
371 }
372
GetJavaType(const FieldDescriptor * field)373 JavaType GetJavaType(const FieldDescriptor* field) {
374 switch (GetType(field)) {
375 case FieldDescriptor::TYPE_INT32:
376 case FieldDescriptor::TYPE_UINT32:
377 case FieldDescriptor::TYPE_SINT32:
378 case FieldDescriptor::TYPE_FIXED32:
379 case FieldDescriptor::TYPE_SFIXED32:
380 return JAVATYPE_INT;
381
382 case FieldDescriptor::TYPE_INT64:
383 case FieldDescriptor::TYPE_UINT64:
384 case FieldDescriptor::TYPE_SINT64:
385 case FieldDescriptor::TYPE_FIXED64:
386 case FieldDescriptor::TYPE_SFIXED64:
387 return JAVATYPE_LONG;
388
389 case FieldDescriptor::TYPE_FLOAT:
390 return JAVATYPE_FLOAT;
391
392 case FieldDescriptor::TYPE_DOUBLE:
393 return JAVATYPE_DOUBLE;
394
395 case FieldDescriptor::TYPE_BOOL:
396 return JAVATYPE_BOOLEAN;
397
398 case FieldDescriptor::TYPE_STRING:
399 return JAVATYPE_STRING;
400
401 case FieldDescriptor::TYPE_BYTES:
402 return JAVATYPE_BYTES;
403
404 case FieldDescriptor::TYPE_ENUM:
405 return JAVATYPE_ENUM;
406
407 case FieldDescriptor::TYPE_GROUP:
408 case FieldDescriptor::TYPE_MESSAGE:
409 return JAVATYPE_MESSAGE;
410
411 // No default because we want the compiler to complain if any new
412 // types are added.
413 }
414
415 GOOGLE_LOG(FATAL) << "Can't get here.";
416 return JAVATYPE_INT;
417 }
418
PrimitiveTypeName(JavaType type)419 const char* PrimitiveTypeName(JavaType type) {
420 switch (type) {
421 case JAVATYPE_INT:
422 return "int";
423 case JAVATYPE_LONG:
424 return "long";
425 case JAVATYPE_FLOAT:
426 return "float";
427 case JAVATYPE_DOUBLE:
428 return "double";
429 case JAVATYPE_BOOLEAN:
430 return "boolean";
431 case JAVATYPE_STRING:
432 return "java.lang.String";
433 case JAVATYPE_BYTES:
434 return "com.google.protobuf.ByteString";
435 case JAVATYPE_ENUM:
436 return NULL;
437 case JAVATYPE_MESSAGE:
438 return NULL;
439
440 // No default because we want the compiler to complain if any new
441 // JavaTypes are added.
442 }
443
444 GOOGLE_LOG(FATAL) << "Can't get here.";
445 return NULL;
446 }
447
PrimitiveTypeName(const FieldDescriptor * descriptor)448 const char* PrimitiveTypeName(const FieldDescriptor* descriptor) {
449 return PrimitiveTypeName(GetJavaType(descriptor));
450 }
451
BoxedPrimitiveTypeName(JavaType type)452 const char* BoxedPrimitiveTypeName(JavaType type) {
453 switch (type) {
454 case JAVATYPE_INT:
455 return "java.lang.Integer";
456 case JAVATYPE_LONG:
457 return "java.lang.Long";
458 case JAVATYPE_FLOAT:
459 return "java.lang.Float";
460 case JAVATYPE_DOUBLE:
461 return "java.lang.Double";
462 case JAVATYPE_BOOLEAN:
463 return "java.lang.Boolean";
464 case JAVATYPE_STRING:
465 return "java.lang.String";
466 case JAVATYPE_BYTES:
467 return "com.google.protobuf.ByteString";
468 case JAVATYPE_ENUM:
469 return NULL;
470 case JAVATYPE_MESSAGE:
471 return NULL;
472
473 // No default because we want the compiler to complain if any new
474 // JavaTypes are added.
475 }
476
477 GOOGLE_LOG(FATAL) << "Can't get here.";
478 return NULL;
479 }
480
BoxedPrimitiveTypeName(const FieldDescriptor * descriptor)481 const char* BoxedPrimitiveTypeName(const FieldDescriptor* descriptor) {
482 return BoxedPrimitiveTypeName(GetJavaType(descriptor));
483 }
484
KotlinTypeName(JavaType type)485 const char* KotlinTypeName(JavaType type) {
486 switch (type) {
487 case JAVATYPE_INT:
488 return "kotlin.Int";
489 case JAVATYPE_LONG:
490 return "kotlin.Long";
491 case JAVATYPE_FLOAT:
492 return "kotlin.Float";
493 case JAVATYPE_DOUBLE:
494 return "kotlin.Double";
495 case JAVATYPE_BOOLEAN:
496 return "kotlin.Boolean";
497 case JAVATYPE_STRING:
498 return "kotlin.String";
499 case JAVATYPE_BYTES:
500 return "com.google.protobuf.ByteString";
501 case JAVATYPE_ENUM:
502 return NULL;
503 case JAVATYPE_MESSAGE:
504 return NULL;
505
506 // No default because we want the compiler to complain if any new
507 // JavaTypes are added.
508 }
509
510 GOOGLE_LOG(FATAL) << "Can't get here.";
511 return NULL;
512 }
513
GetOneofStoredType(const FieldDescriptor * field)514 std::string GetOneofStoredType(const FieldDescriptor* field) {
515 const JavaType javaType = GetJavaType(field);
516 switch (javaType) {
517 case JAVATYPE_ENUM:
518 return "java.lang.Integer";
519 case JAVATYPE_MESSAGE:
520 return ClassName(field->message_type());
521 default:
522 return BoxedPrimitiveTypeName(javaType);
523 }
524 }
525
FieldTypeName(FieldDescriptor::Type field_type)526 const char* FieldTypeName(FieldDescriptor::Type field_type) {
527 switch (field_type) {
528 case FieldDescriptor::TYPE_INT32:
529 return "INT32";
530 case FieldDescriptor::TYPE_UINT32:
531 return "UINT32";
532 case FieldDescriptor::TYPE_SINT32:
533 return "SINT32";
534 case FieldDescriptor::TYPE_FIXED32:
535 return "FIXED32";
536 case FieldDescriptor::TYPE_SFIXED32:
537 return "SFIXED32";
538 case FieldDescriptor::TYPE_INT64:
539 return "INT64";
540 case FieldDescriptor::TYPE_UINT64:
541 return "UINT64";
542 case FieldDescriptor::TYPE_SINT64:
543 return "SINT64";
544 case FieldDescriptor::TYPE_FIXED64:
545 return "FIXED64";
546 case FieldDescriptor::TYPE_SFIXED64:
547 return "SFIXED64";
548 case FieldDescriptor::TYPE_FLOAT:
549 return "FLOAT";
550 case FieldDescriptor::TYPE_DOUBLE:
551 return "DOUBLE";
552 case FieldDescriptor::TYPE_BOOL:
553 return "BOOL";
554 case FieldDescriptor::TYPE_STRING:
555 return "STRING";
556 case FieldDescriptor::TYPE_BYTES:
557 return "BYTES";
558 case FieldDescriptor::TYPE_ENUM:
559 return "ENUM";
560 case FieldDescriptor::TYPE_GROUP:
561 return "GROUP";
562 case FieldDescriptor::TYPE_MESSAGE:
563 return "MESSAGE";
564
565 // No default because we want the compiler to complain if any new
566 // types are added.
567 }
568
569 GOOGLE_LOG(FATAL) << "Can't get here.";
570 return NULL;
571 }
572
AllAscii(const std::string & text)573 bool AllAscii(const std::string& text) {
574 for (int i = 0; i < text.size(); i++) {
575 if ((text[i] & 0x80) != 0) {
576 return false;
577 }
578 }
579 return true;
580 }
581
DefaultValue(const FieldDescriptor * field,bool immutable,ClassNameResolver * name_resolver)582 std::string DefaultValue(const FieldDescriptor* field, bool immutable,
583 ClassNameResolver* name_resolver) {
584 // Switch on CppType since we need to know which default_value_* method
585 // of FieldDescriptor to call.
586 switch (field->cpp_type()) {
587 case FieldDescriptor::CPPTYPE_INT32:
588 return StrCat(field->default_value_int32());
589 case FieldDescriptor::CPPTYPE_UINT32:
590 // Need to print as a signed int since Java has no unsigned.
591 return StrCat(static_cast<int32_t>(field->default_value_uint32()));
592 case FieldDescriptor::CPPTYPE_INT64:
593 return StrCat(field->default_value_int64()) + "L";
594 case FieldDescriptor::CPPTYPE_UINT64:
595 return StrCat(static_cast<int64_t>(field->default_value_uint64())) +
596 "L";
597 case FieldDescriptor::CPPTYPE_DOUBLE: {
598 double value = field->default_value_double();
599 if (value == std::numeric_limits<double>::infinity()) {
600 return "Double.POSITIVE_INFINITY";
601 } else if (value == -std::numeric_limits<double>::infinity()) {
602 return "Double.NEGATIVE_INFINITY";
603 } else if (value != value) {
604 return "Double.NaN";
605 } else {
606 return SimpleDtoa(value) + "D";
607 }
608 }
609 case FieldDescriptor::CPPTYPE_FLOAT: {
610 float value = field->default_value_float();
611 if (value == std::numeric_limits<float>::infinity()) {
612 return "Float.POSITIVE_INFINITY";
613 } else if (value == -std::numeric_limits<float>::infinity()) {
614 return "Float.NEGATIVE_INFINITY";
615 } else if (value != value) {
616 return "Float.NaN";
617 } else {
618 return SimpleFtoa(value) + "F";
619 }
620 }
621 case FieldDescriptor::CPPTYPE_BOOL:
622 return field->default_value_bool() ? "true" : "false";
623 case FieldDescriptor::CPPTYPE_STRING:
624 if (GetType(field) == FieldDescriptor::TYPE_BYTES) {
625 if (field->has_default_value()) {
626 // See comments in Internal.java for gory details.
627 return strings::Substitute(
628 "com.google.protobuf.Internal.bytesDefaultValue(\"$0\")",
629 CEscape(field->default_value_string()));
630 } else {
631 return "com.google.protobuf.ByteString.EMPTY";
632 }
633 } else {
634 if (AllAscii(field->default_value_string())) {
635 // All chars are ASCII. In this case CEscape() works fine.
636 return "\"" + CEscape(field->default_value_string()) + "\"";
637 } else {
638 // See comments in Internal.java for gory details.
639 return strings::Substitute(
640 "com.google.protobuf.Internal.stringDefaultValue(\"$0\")",
641 CEscape(field->default_value_string()));
642 }
643 }
644
645 case FieldDescriptor::CPPTYPE_ENUM:
646 return name_resolver->GetClassName(field->enum_type(), immutable) + "." +
647 field->default_value_enum()->name();
648
649 case FieldDescriptor::CPPTYPE_MESSAGE:
650 return name_resolver->GetClassName(field->message_type(), immutable) +
651 ".getDefaultInstance()";
652
653 // No default because we want the compiler to complain if any new
654 // types are added.
655 }
656
657 GOOGLE_LOG(FATAL) << "Can't get here.";
658 return "";
659 }
660
IsDefaultValueJavaDefault(const FieldDescriptor * field)661 bool IsDefaultValueJavaDefault(const FieldDescriptor* field) {
662 // Switch on CppType since we need to know which default_value_* method
663 // of FieldDescriptor to call.
664 switch (field->cpp_type()) {
665 case FieldDescriptor::CPPTYPE_INT32:
666 return field->default_value_int32() == 0;
667 case FieldDescriptor::CPPTYPE_UINT32:
668 return field->default_value_uint32() == 0;
669 case FieldDescriptor::CPPTYPE_INT64:
670 return field->default_value_int64() == 0L;
671 case FieldDescriptor::CPPTYPE_UINT64:
672 return field->default_value_uint64() == 0L;
673 case FieldDescriptor::CPPTYPE_DOUBLE:
674 return field->default_value_double() == 0.0;
675 case FieldDescriptor::CPPTYPE_FLOAT:
676 return field->default_value_float() == 0.0;
677 case FieldDescriptor::CPPTYPE_BOOL:
678 return field->default_value_bool() == false;
679 case FieldDescriptor::CPPTYPE_ENUM:
680 return field->default_value_enum()->number() == 0;
681 case FieldDescriptor::CPPTYPE_STRING:
682 case FieldDescriptor::CPPTYPE_MESSAGE:
683 return false;
684
685 // No default because we want the compiler to complain if any new
686 // types are added.
687 }
688
689 GOOGLE_LOG(FATAL) << "Can't get here.";
690 return false;
691 }
692
IsByteStringWithCustomDefaultValue(const FieldDescriptor * field)693 bool IsByteStringWithCustomDefaultValue(const FieldDescriptor* field) {
694 return GetJavaType(field) == JAVATYPE_BYTES &&
695 field->default_value_string() != "";
696 }
697
698 const char* bit_masks[] = {
699 "0x00000001", "0x00000002", "0x00000004", "0x00000008",
700 "0x00000010", "0x00000020", "0x00000040", "0x00000080",
701
702 "0x00000100", "0x00000200", "0x00000400", "0x00000800",
703 "0x00001000", "0x00002000", "0x00004000", "0x00008000",
704
705 "0x00010000", "0x00020000", "0x00040000", "0x00080000",
706 "0x00100000", "0x00200000", "0x00400000", "0x00800000",
707
708 "0x01000000", "0x02000000", "0x04000000", "0x08000000",
709 "0x10000000", "0x20000000", "0x40000000", "0x80000000",
710 };
711
GetBitFieldName(int index)712 std::string GetBitFieldName(int index) {
713 std::string varName = "bitField";
714 varName += StrCat(index);
715 varName += "_";
716 return varName;
717 }
718
GetBitFieldNameForBit(int bitIndex)719 std::string GetBitFieldNameForBit(int bitIndex) {
720 return GetBitFieldName(bitIndex / 32);
721 }
722
723 namespace {
724
GenerateGetBitInternal(const std::string & prefix,int bitIndex)725 std::string GenerateGetBitInternal(const std::string& prefix, int bitIndex) {
726 std::string varName = prefix + GetBitFieldNameForBit(bitIndex);
727 int bitInVarIndex = bitIndex % 32;
728
729 std::string mask = bit_masks[bitInVarIndex];
730 std::string result = "((" + varName + " & " + mask + ") != 0)";
731 return result;
732 }
733
GenerateSetBitInternal(const std::string & prefix,int bitIndex)734 std::string GenerateSetBitInternal(const std::string& prefix, int bitIndex) {
735 std::string varName = prefix + GetBitFieldNameForBit(bitIndex);
736 int bitInVarIndex = bitIndex % 32;
737
738 std::string mask = bit_masks[bitInVarIndex];
739 std::string result = varName + " |= " + mask;
740 return result;
741 }
742
743 } // namespace
744
GenerateGetBit(int bitIndex)745 std::string GenerateGetBit(int bitIndex) {
746 return GenerateGetBitInternal("", bitIndex);
747 }
748
GenerateSetBit(int bitIndex)749 std::string GenerateSetBit(int bitIndex) {
750 return GenerateSetBitInternal("", bitIndex);
751 }
752
GenerateClearBit(int bitIndex)753 std::string GenerateClearBit(int bitIndex) {
754 std::string varName = GetBitFieldNameForBit(bitIndex);
755 int bitInVarIndex = bitIndex % 32;
756
757 std::string mask = bit_masks[bitInVarIndex];
758 std::string result = varName + " = (" + varName + " & ~" + mask + ")";
759 return result;
760 }
761
GenerateGetBitFromLocal(int bitIndex)762 std::string GenerateGetBitFromLocal(int bitIndex) {
763 return GenerateGetBitInternal("from_", bitIndex);
764 }
765
GenerateSetBitToLocal(int bitIndex)766 std::string GenerateSetBitToLocal(int bitIndex) {
767 return GenerateSetBitInternal("to_", bitIndex);
768 }
769
GenerateGetBitMutableLocal(int bitIndex)770 std::string GenerateGetBitMutableLocal(int bitIndex) {
771 return GenerateGetBitInternal("mutable_", bitIndex);
772 }
773
GenerateSetBitMutableLocal(int bitIndex)774 std::string GenerateSetBitMutableLocal(int bitIndex) {
775 return GenerateSetBitInternal("mutable_", bitIndex);
776 }
777
IsReferenceType(JavaType type)778 bool IsReferenceType(JavaType type) {
779 switch (type) {
780 case JAVATYPE_INT:
781 return false;
782 case JAVATYPE_LONG:
783 return false;
784 case JAVATYPE_FLOAT:
785 return false;
786 case JAVATYPE_DOUBLE:
787 return false;
788 case JAVATYPE_BOOLEAN:
789 return false;
790 case JAVATYPE_STRING:
791 return true;
792 case JAVATYPE_BYTES:
793 return true;
794 case JAVATYPE_ENUM:
795 return true;
796 case JAVATYPE_MESSAGE:
797 return true;
798
799 // No default because we want the compiler to complain if any new
800 // JavaTypes are added.
801 }
802
803 GOOGLE_LOG(FATAL) << "Can't get here.";
804 return false;
805 }
806
GetCapitalizedType(const FieldDescriptor * field,bool immutable)807 const char* GetCapitalizedType(const FieldDescriptor* field, bool immutable) {
808 switch (GetType(field)) {
809 case FieldDescriptor::TYPE_INT32:
810 return "Int32";
811 case FieldDescriptor::TYPE_UINT32:
812 return "UInt32";
813 case FieldDescriptor::TYPE_SINT32:
814 return "SInt32";
815 case FieldDescriptor::TYPE_FIXED32:
816 return "Fixed32";
817 case FieldDescriptor::TYPE_SFIXED32:
818 return "SFixed32";
819 case FieldDescriptor::TYPE_INT64:
820 return "Int64";
821 case FieldDescriptor::TYPE_UINT64:
822 return "UInt64";
823 case FieldDescriptor::TYPE_SINT64:
824 return "SInt64";
825 case FieldDescriptor::TYPE_FIXED64:
826 return "Fixed64";
827 case FieldDescriptor::TYPE_SFIXED64:
828 return "SFixed64";
829 case FieldDescriptor::TYPE_FLOAT:
830 return "Float";
831 case FieldDescriptor::TYPE_DOUBLE:
832 return "Double";
833 case FieldDescriptor::TYPE_BOOL:
834 return "Bool";
835 case FieldDescriptor::TYPE_STRING:
836 return "String";
837 case FieldDescriptor::TYPE_BYTES: {
838 return "Bytes";
839 }
840 case FieldDescriptor::TYPE_ENUM:
841 return "Enum";
842 case FieldDescriptor::TYPE_GROUP:
843 return "Group";
844 case FieldDescriptor::TYPE_MESSAGE:
845 return "Message";
846
847 // No default because we want the compiler to complain if any new
848 // types are added.
849 }
850
851 GOOGLE_LOG(FATAL) << "Can't get here.";
852 return NULL;
853 }
854
855 // For encodings with fixed sizes, returns that size in bytes. Otherwise
856 // returns -1.
FixedSize(FieldDescriptor::Type type)857 int FixedSize(FieldDescriptor::Type type) {
858 switch (type) {
859 case FieldDescriptor::TYPE_INT32:
860 return -1;
861 case FieldDescriptor::TYPE_INT64:
862 return -1;
863 case FieldDescriptor::TYPE_UINT32:
864 return -1;
865 case FieldDescriptor::TYPE_UINT64:
866 return -1;
867 case FieldDescriptor::TYPE_SINT32:
868 return -1;
869 case FieldDescriptor::TYPE_SINT64:
870 return -1;
871 case FieldDescriptor::TYPE_FIXED32:
872 return WireFormatLite::kFixed32Size;
873 case FieldDescriptor::TYPE_FIXED64:
874 return WireFormatLite::kFixed64Size;
875 case FieldDescriptor::TYPE_SFIXED32:
876 return WireFormatLite::kSFixed32Size;
877 case FieldDescriptor::TYPE_SFIXED64:
878 return WireFormatLite::kSFixed64Size;
879 case FieldDescriptor::TYPE_FLOAT:
880 return WireFormatLite::kFloatSize;
881 case FieldDescriptor::TYPE_DOUBLE:
882 return WireFormatLite::kDoubleSize;
883
884 case FieldDescriptor::TYPE_BOOL:
885 return WireFormatLite::kBoolSize;
886 case FieldDescriptor::TYPE_ENUM:
887 return -1;
888
889 case FieldDescriptor::TYPE_STRING:
890 return -1;
891 case FieldDescriptor::TYPE_BYTES:
892 return -1;
893 case FieldDescriptor::TYPE_GROUP:
894 return -1;
895 case FieldDescriptor::TYPE_MESSAGE:
896 return -1;
897
898 // No default because we want the compiler to complain if any new
899 // types are added.
900 }
901 GOOGLE_LOG(FATAL) << "Can't get here.";
902 return -1;
903 }
904
905 // Sort the fields of the given Descriptor by number into a new[]'d array
906 // and return it. The caller should delete the returned array.
SortFieldsByNumber(const Descriptor * descriptor)907 const FieldDescriptor** SortFieldsByNumber(const Descriptor* descriptor) {
908 const FieldDescriptor** fields =
909 new const FieldDescriptor*[descriptor->field_count()];
910 for (int i = 0; i < descriptor->field_count(); i++) {
911 fields[i] = descriptor->field(i);
912 }
913 std::sort(fields, fields + descriptor->field_count(),
914 FieldOrderingByNumber());
915 return fields;
916 }
917
918 // Returns true if the message type has any required fields. If it doesn't,
919 // we can optimize out calls to its isInitialized() method.
920 //
921 // already_seen is used to avoid checking the same type multiple times
922 // (and also to protect against recursion).
HasRequiredFields(const Descriptor * type,std::unordered_set<const Descriptor * > * already_seen)923 bool HasRequiredFields(const Descriptor* type,
924 std::unordered_set<const Descriptor*>* already_seen) {
925 if (already_seen->count(type) > 0) {
926 // The type is already in cache. This means that either:
927 // a. The type has no required fields.
928 // b. We are in the midst of checking if the type has required fields,
929 // somewhere up the stack. In this case, we know that if the type
930 // has any required fields, they'll be found when we return to it,
931 // and the whole call to HasRequiredFields() will return true.
932 // Therefore, we don't have to check if this type has required fields
933 // here.
934 return false;
935 }
936 already_seen->insert(type);
937
938 // If the type has extensions, an extension with message type could contain
939 // required fields, so we have to be conservative and assume such an
940 // extension exists.
941 if (type->extension_range_count() > 0) return true;
942
943 for (int i = 0; i < type->field_count(); i++) {
944 const FieldDescriptor* field = type->field(i);
945 if (field->is_required()) {
946 return true;
947 }
948 if (GetJavaType(field) == JAVATYPE_MESSAGE) {
949 if (HasRequiredFields(field->message_type(), already_seen)) {
950 return true;
951 }
952 }
953 }
954
955 return false;
956 }
957
HasRequiredFields(const Descriptor * type)958 bool HasRequiredFields(const Descriptor* type) {
959 std::unordered_set<const Descriptor*> already_seen;
960 return HasRequiredFields(type, &already_seen);
961 }
962
HasRepeatedFields(const Descriptor * descriptor)963 bool HasRepeatedFields(const Descriptor* descriptor) {
964 for (int i = 0; i < descriptor->field_count(); ++i) {
965 const FieldDescriptor* field = descriptor->field(i);
966 if (field->is_repeated()) {
967 return true;
968 }
969 }
970 return false;
971 }
972
973 // Encode an unsigned 32-bit value into a sequence of UTF-16 characters.
974 //
975 // If the value is in [0x0000, 0xD7FF], we encode it with a single character
976 // with the same numeric value.
977 //
978 // If the value is larger than 0xD7FF, we encode its lowest 13 bits into a
979 // character in the range [0xE000, 0xFFFF] by combining these 13 bits with
980 // 0xE000 using logic-or. Then we shift the value to the right by 13 bits, and
981 // encode the remaining value by repeating this same process until we get to
982 // a value in [0x0000, 0xD7FF] where we will encode it using a character with
983 // the same numeric value.
984 //
985 // Note that we only use code points in [0x0000, 0xD7FF] and [0xE000, 0xFFFF].
986 // There will be no surrogate pairs in the encoded character sequence.
WriteUInt32ToUtf16CharSequence(uint32_t number,std::vector<uint16_t> * output)987 void WriteUInt32ToUtf16CharSequence(uint32_t number,
988 std::vector<uint16_t>* output) {
989 // For values in [0x0000, 0xD7FF], only use one char to encode it.
990 if (number < 0xD800) {
991 output->push_back(static_cast<uint16_t>(number));
992 return;
993 }
994 // Encode into multiple chars. All except the last char will be in the range
995 // [0xE000, 0xFFFF], and the last char will be in the range [0x0000, 0xD7FF].
996 // Note that we don't use any value in range [0xD800, 0xDFFF] because they
997 // have to come in pairs and the encoding is just more space-efficient w/o
998 // them.
999 while (number >= 0xD800) {
1000 // [0xE000, 0xFFFF] can represent 13 bits of info.
1001 output->push_back(static_cast<uint16_t>(0xE000 | (number & 0x1FFF)));
1002 number >>= 13;
1003 }
1004 output->push_back(static_cast<uint16_t>(number));
1005 }
1006
GetExperimentalJavaFieldTypeForSingular(const FieldDescriptor * field)1007 int GetExperimentalJavaFieldTypeForSingular(const FieldDescriptor* field) {
1008 // j/c/g/protobuf/FieldType.java lists field types in a slightly different
1009 // order from FieldDescriptor::Type so we can't do a simple cast.
1010 //
1011 // TODO(xiaofeng): Make j/c/g/protobuf/FieldType.java follow the same order.
1012 int result = field->type();
1013 if (result == FieldDescriptor::TYPE_GROUP) {
1014 return 17;
1015 } else if (result < FieldDescriptor::TYPE_GROUP) {
1016 return result - 1;
1017 } else {
1018 return result - 2;
1019 }
1020 }
1021
GetExperimentalJavaFieldTypeForRepeated(const FieldDescriptor * field)1022 int GetExperimentalJavaFieldTypeForRepeated(const FieldDescriptor* field) {
1023 if (field->type() == FieldDescriptor::TYPE_GROUP) {
1024 return 49;
1025 } else {
1026 return GetExperimentalJavaFieldTypeForSingular(field) + 18;
1027 }
1028 }
1029
GetExperimentalJavaFieldTypeForPacked(const FieldDescriptor * field)1030 int GetExperimentalJavaFieldTypeForPacked(const FieldDescriptor* field) {
1031 int result = field->type();
1032 if (result < FieldDescriptor::TYPE_STRING) {
1033 return result + 34;
1034 } else if (result > FieldDescriptor::TYPE_BYTES) {
1035 return result + 30;
1036 } else {
1037 GOOGLE_LOG(FATAL) << field->full_name() << " can't be packed.";
1038 return 0;
1039 }
1040 }
1041
GetExperimentalJavaFieldType(const FieldDescriptor * field)1042 int GetExperimentalJavaFieldType(const FieldDescriptor* field) {
1043 static const int kMapFieldType = 50;
1044 static const int kOneofFieldTypeOffset = 51;
1045 static const int kRequiredBit = 0x100;
1046 static const int kUtf8CheckBit = 0x200;
1047 static const int kCheckInitialized = 0x400;
1048 static const int kMapWithProto2EnumValue = 0x800;
1049 static const int kHasHasBit = 0x1000;
1050 int extra_bits = field->is_required() ? kRequiredBit : 0;
1051 if (field->type() == FieldDescriptor::TYPE_STRING && CheckUtf8(field)) {
1052 extra_bits |= kUtf8CheckBit;
1053 }
1054 if (field->is_required() || (GetJavaType(field) == JAVATYPE_MESSAGE &&
1055 HasRequiredFields(field->message_type()))) {
1056 extra_bits |= kCheckInitialized;
1057 }
1058 if (HasHasbit(field)) {
1059 extra_bits |= kHasHasBit;
1060 }
1061
1062 if (field->is_map()) {
1063 if (!SupportUnknownEnumValue(field)) {
1064 const FieldDescriptor* value = field->message_type()->map_value();
1065 if (GetJavaType(value) == JAVATYPE_ENUM) {
1066 extra_bits |= kMapWithProto2EnumValue;
1067 }
1068 }
1069 return kMapFieldType | extra_bits;
1070 } else if (field->is_packed()) {
1071 return GetExperimentalJavaFieldTypeForPacked(field);
1072 } else if (field->is_repeated()) {
1073 return GetExperimentalJavaFieldTypeForRepeated(field) | extra_bits;
1074 } else if (IsRealOneof(field)) {
1075 return (GetExperimentalJavaFieldTypeForSingular(field) +
1076 kOneofFieldTypeOffset) |
1077 extra_bits;
1078 } else {
1079 return GetExperimentalJavaFieldTypeForSingular(field) | extra_bits;
1080 }
1081 }
1082
1083 // Escape a UTF-16 character to be embedded in a Java string.
EscapeUtf16ToString(uint16_t code,std::string * output)1084 void EscapeUtf16ToString(uint16_t code, std::string* output) {
1085 if (code == '\t') {
1086 output->append("\\t");
1087 } else if (code == '\b') {
1088 output->append("\\b");
1089 } else if (code == '\n') {
1090 output->append("\\n");
1091 } else if (code == '\r') {
1092 output->append("\\r");
1093 } else if (code == '\f') {
1094 output->append("\\f");
1095 } else if (code == '\'') {
1096 output->append("\\'");
1097 } else if (code == '\"') {
1098 output->append("\\\"");
1099 } else if (code == '\\') {
1100 output->append("\\\\");
1101 } else if (code >= 0x20 && code <= 0x7f) {
1102 output->push_back(static_cast<char>(code));
1103 } else {
1104 output->append(StringPrintf("\\u%04x", code));
1105 }
1106 }
1107
1108 } // namespace java
1109 } // namespace compiler
1110 } // namespace protobuf
1111 } // namespace google
1112