1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc. All rights reserved.
3 // https://developers.google.com/protocol-buffers/
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 // * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 // * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following disclaimer
13 // in the documentation and/or other materials provided with the
14 // distribution.
15 // * Neither the name of Google Inc. nor the names of its
16 // contributors may be used to endorse or promote products derived from
17 // this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31 // Author: kenton@google.com (Kenton Varda)
32 // Based on original Protocol Buffers design by
33 // Sanjay Ghemawat, Jeff Dean, and others.
34
35 #include <algorithm>
36 #include <limits>
37 #include <unordered_set>
38 #include <vector>
39
40 #include <google/protobuf/stubs/stringprintf.h>
41 #include <google/protobuf/compiler/java/java_helpers.h>
42 #include <google/protobuf/compiler/java/java_name_resolver.h>
43 #include <google/protobuf/descriptor.pb.h>
44 #include <google/protobuf/wire_format.h>
45 #include <google/protobuf/stubs/strutil.h>
46 #include <google/protobuf/stubs/substitute.h>
47
48
49
50 #include <google/protobuf/stubs/hash.h> // for hash<T *>
51
52 namespace google {
53 namespace protobuf {
54 namespace compiler {
55 namespace java {
56
57 using internal::WireFormat;
58 using internal::WireFormatLite;
59
60 const char kThickSeparator[] =
61 "// ===================================================================\n";
62 const char kThinSeparator[] =
63 "// -------------------------------------------------------------------\n";
64
65 namespace {
66
67 const char* kDefaultPackage = "";
68
69 // Names that should be avoided as field names.
70 // Using them will cause the compiler to generate accessors whose names are
71 // colliding with methods defined in base classes.
72 const char* kForbiddenWordList[] = {
73 // message base class:
74 "cached_size",
75 "serialized_size",
76 // java.lang.Object:
77 "class",
78 };
79
80 const std::unordered_set<string>* kReservedNames =
81 new std::unordered_set<string>({
82 "abstract", "assert", "boolean", "break", "byte",
83 "case", "catch", "char", "class", "const",
84 "continue", "default", "do", "double", "else",
85 "enum", "extends", "final", "finally", "float",
86 "for", "goto", "if", "implements", "import",
87 "instanceof", "int", "interface", "long", "native",
88 "new", "package", "private", "protected", "public",
89 "return", "short", "static", "strictfp", "super",
90 "switch", "synchronized", "this", "throw", "throws",
91 "transient", "try", "void", "volatile", "while",
92 });
93
IsForbidden(const std::string & field_name)94 bool IsForbidden(const std::string& field_name) {
95 for (int i = 0; i < GOOGLE_ARRAYSIZE(kForbiddenWordList); ++i) {
96 if (field_name == kForbiddenWordList[i]) {
97 return true;
98 }
99 }
100 return false;
101 }
102
FieldName(const FieldDescriptor * field)103 std::string FieldName(const FieldDescriptor* field) {
104 std::string field_name;
105 // Groups are hacky: The name of the field is just the lower-cased name
106 // of the group type. In Java, though, we would like to retain the original
107 // capitalization of the type name.
108 if (GetType(field) == FieldDescriptor::TYPE_GROUP) {
109 field_name = field->message_type()->name();
110 } else {
111 field_name = field->name();
112 }
113 if (IsForbidden(field_name)) {
114 // Append a trailing "#" to indicate that the name should be decorated to
115 // avoid collision with other names.
116 field_name += "#";
117 }
118 return field_name;
119 }
120
121
122 } // namespace
123
PrintGeneratedAnnotation(io::Printer * printer,char delimiter,const std::string & annotation_file)124 void PrintGeneratedAnnotation(io::Printer* printer, char delimiter,
125 const std::string& annotation_file) {
126 if (annotation_file.empty()) {
127 return;
128 }
129 std::string ptemplate =
130 "@javax.annotation.Generated(value=\"protoc\", comments=\"annotations:";
131 ptemplate.push_back(delimiter);
132 ptemplate.append("annotation_file");
133 ptemplate.push_back(delimiter);
134 ptemplate.append("\")\n");
135 printer->Print(ptemplate.c_str(), "annotation_file", annotation_file);
136 }
137
PrintEnumVerifierLogic(io::Printer * printer,const FieldDescriptor * descriptor,const std::map<std::string,std::string> & variables,const char * var_name,const char * terminating_string,bool enforce_lite)138 void PrintEnumVerifierLogic(io::Printer* printer,
139 const FieldDescriptor* descriptor,
140 const std::map<std::string, std::string>& variables,
141 const char* var_name,
142 const char* terminating_string, bool enforce_lite) {
143 std::string enum_verifier_string =
144 enforce_lite ? StrCat(var_name, ".internalGetVerifier()")
145 : StrCat(
146 "new com.google.protobuf.Internal.EnumVerifier() {\n"
147 " @java.lang.Override\n"
148 " public boolean isInRange(int number) {\n"
149 " return ",
150 var_name,
151 ".forNumber(number) != null;\n"
152 " }\n"
153 " }");
154 printer->Print(
155 variables,
156 StrCat(enum_verifier_string, terminating_string).c_str());
157 }
158
UnderscoresToCamelCase(const std::string & input,bool cap_next_letter)159 std::string UnderscoresToCamelCase(const std::string& input,
160 bool cap_next_letter) {
161 GOOGLE_CHECK(!input.empty());
162 std::string result;
163 // Note: I distrust ctype.h due to locales.
164 for (int i = 0; i < input.size(); i++) {
165 if ('a' <= input[i] && input[i] <= 'z') {
166 if (cap_next_letter) {
167 result += input[i] + ('A' - 'a');
168 } else {
169 result += input[i];
170 }
171 cap_next_letter = false;
172 } else if ('A' <= input[i] && input[i] <= 'Z') {
173 if (i == 0 && !cap_next_letter) {
174 // Force first letter to lower-case unless explicitly told to
175 // capitalize it.
176 result += input[i] + ('a' - 'A');
177 } else {
178 // Capital letters after the first are left as-is.
179 result += input[i];
180 }
181 cap_next_letter = false;
182 } else if ('0' <= input[i] && input[i] <= '9') {
183 result += input[i];
184 cap_next_letter = true;
185 } else {
186 cap_next_letter = true;
187 }
188 }
189 // Add a trailing "_" if the name should be altered.
190 if (input[input.size() - 1] == '#') {
191 result += '_';
192 }
193 return result;
194 }
195
UnderscoresToCamelCase(const FieldDescriptor * field)196 std::string UnderscoresToCamelCase(const FieldDescriptor* field) {
197 return UnderscoresToCamelCase(FieldName(field), false);
198 }
199
UnderscoresToCapitalizedCamelCase(const FieldDescriptor * field)200 std::string UnderscoresToCapitalizedCamelCase(const FieldDescriptor* field) {
201 return UnderscoresToCamelCase(FieldName(field), true);
202 }
203
CapitalizedFieldName(const FieldDescriptor * field)204 std::string CapitalizedFieldName(const FieldDescriptor* field) {
205 return UnderscoresToCapitalizedCamelCase(field);
206 }
207
UnderscoresToCamelCase(const MethodDescriptor * method)208 std::string UnderscoresToCamelCase(const MethodDescriptor* method) {
209 return UnderscoresToCamelCase(method->name(), false);
210 }
211
UnderscoresToCamelCaseCheckReserved(const FieldDescriptor * field)212 std::string UnderscoresToCamelCaseCheckReserved(const FieldDescriptor* field) {
213 std::string name = UnderscoresToCamelCase(field);
214 if (kReservedNames->find(name) != kReservedNames->end()) {
215 return name + "_";
216 }
217 return name;
218 }
219
UniqueFileScopeIdentifier(const Descriptor * descriptor)220 std::string UniqueFileScopeIdentifier(const Descriptor* descriptor) {
221 return "static_" + StringReplace(descriptor->full_name(), ".", "_", true);
222 }
223
CamelCaseFieldName(const FieldDescriptor * field)224 std::string CamelCaseFieldName(const FieldDescriptor* field) {
225 std::string fieldName = UnderscoresToCamelCase(field);
226 if ('0' <= fieldName[0] && fieldName[0] <= '9') {
227 return '_' + fieldName;
228 }
229 return fieldName;
230 }
231
StripProto(const std::string & filename)232 std::string StripProto(const std::string& filename) {
233 if (HasSuffixString(filename, ".protodevel")) {
234 return StripSuffixString(filename, ".protodevel");
235 } else {
236 return StripSuffixString(filename, ".proto");
237 }
238 }
239
FileClassName(const FileDescriptor * file,bool immutable)240 std::string FileClassName(const FileDescriptor* file, bool immutable) {
241 ClassNameResolver name_resolver;
242 return name_resolver.GetFileClassName(file, immutable);
243 }
244
FileJavaPackage(const FileDescriptor * file,bool immutable)245 std::string FileJavaPackage(const FileDescriptor* file, bool immutable) {
246 std::string result;
247
248 if (file->options().has_java_package()) {
249 result = file->options().java_package();
250 } else {
251 result = kDefaultPackage;
252 if (!file->package().empty()) {
253 if (!result.empty()) result += '.';
254 result += file->package();
255 }
256 }
257
258 return result;
259 }
260
FileJavaPackage(const FileDescriptor * file)261 std::string FileJavaPackage(const FileDescriptor* file) {
262 return FileJavaPackage(file, true /* immutable */);
263 }
264
JavaPackageToDir(std::string package_name)265 std::string JavaPackageToDir(std::string package_name) {
266 std::string package_dir = StringReplace(package_name, ".", "/", true);
267 if (!package_dir.empty()) package_dir += "/";
268 return package_dir;
269 }
270
ClassName(const Descriptor * descriptor)271 std::string ClassName(const Descriptor* descriptor) {
272 ClassNameResolver name_resolver;
273 return name_resolver.GetClassName(descriptor, true);
274 }
275
ClassName(const EnumDescriptor * descriptor)276 std::string ClassName(const EnumDescriptor* descriptor) {
277 ClassNameResolver name_resolver;
278 return name_resolver.GetClassName(descriptor, true);
279 }
280
ClassName(const ServiceDescriptor * descriptor)281 std::string ClassName(const ServiceDescriptor* descriptor) {
282 ClassNameResolver name_resolver;
283 return name_resolver.GetClassName(descriptor, true);
284 }
285
ClassName(const FileDescriptor * descriptor)286 std::string ClassName(const FileDescriptor* descriptor) {
287 ClassNameResolver name_resolver;
288 return name_resolver.GetClassName(descriptor, true);
289 }
290
291
ExtraMessageInterfaces(const Descriptor * descriptor)292 std::string ExtraMessageInterfaces(const Descriptor* descriptor) {
293 std::string interfaces = "// @@protoc_insertion_point(message_implements:" +
294 descriptor->full_name() + ")";
295 return interfaces;
296 }
297
298
ExtraBuilderInterfaces(const Descriptor * descriptor)299 std::string ExtraBuilderInterfaces(const Descriptor* descriptor) {
300 std::string interfaces = "// @@protoc_insertion_point(builder_implements:" +
301 descriptor->full_name() + ")";
302 return interfaces;
303 }
304
ExtraMessageOrBuilderInterfaces(const Descriptor * descriptor)305 std::string ExtraMessageOrBuilderInterfaces(const Descriptor* descriptor) {
306 std::string interfaces = "// @@protoc_insertion_point(interface_extends:" +
307 descriptor->full_name() + ")";
308 return interfaces;
309 }
310
FieldConstantName(const FieldDescriptor * field)311 std::string FieldConstantName(const FieldDescriptor* field) {
312 std::string name = field->name() + "_FIELD_NUMBER";
313 UpperString(&name);
314 return name;
315 }
316
GetType(const FieldDescriptor * field)317 FieldDescriptor::Type GetType(const FieldDescriptor* field) {
318 return field->type();
319 }
320
GetJavaType(const FieldDescriptor * field)321 JavaType GetJavaType(const FieldDescriptor* field) {
322 switch (GetType(field)) {
323 case FieldDescriptor::TYPE_INT32:
324 case FieldDescriptor::TYPE_UINT32:
325 case FieldDescriptor::TYPE_SINT32:
326 case FieldDescriptor::TYPE_FIXED32:
327 case FieldDescriptor::TYPE_SFIXED32:
328 return JAVATYPE_INT;
329
330 case FieldDescriptor::TYPE_INT64:
331 case FieldDescriptor::TYPE_UINT64:
332 case FieldDescriptor::TYPE_SINT64:
333 case FieldDescriptor::TYPE_FIXED64:
334 case FieldDescriptor::TYPE_SFIXED64:
335 return JAVATYPE_LONG;
336
337 case FieldDescriptor::TYPE_FLOAT:
338 return JAVATYPE_FLOAT;
339
340 case FieldDescriptor::TYPE_DOUBLE:
341 return JAVATYPE_DOUBLE;
342
343 case FieldDescriptor::TYPE_BOOL:
344 return JAVATYPE_BOOLEAN;
345
346 case FieldDescriptor::TYPE_STRING:
347 return JAVATYPE_STRING;
348
349 case FieldDescriptor::TYPE_BYTES:
350 return JAVATYPE_BYTES;
351
352 case FieldDescriptor::TYPE_ENUM:
353 return JAVATYPE_ENUM;
354
355 case FieldDescriptor::TYPE_GROUP:
356 case FieldDescriptor::TYPE_MESSAGE:
357 return JAVATYPE_MESSAGE;
358
359 // No default because we want the compiler to complain if any new
360 // types are added.
361 }
362
363 GOOGLE_LOG(FATAL) << "Can't get here.";
364 return JAVATYPE_INT;
365 }
366
PrimitiveTypeName(JavaType type)367 const char* PrimitiveTypeName(JavaType type) {
368 switch (type) {
369 case JAVATYPE_INT:
370 return "int";
371 case JAVATYPE_LONG:
372 return "long";
373 case JAVATYPE_FLOAT:
374 return "float";
375 case JAVATYPE_DOUBLE:
376 return "double";
377 case JAVATYPE_BOOLEAN:
378 return "boolean";
379 case JAVATYPE_STRING:
380 return "java.lang.String";
381 case JAVATYPE_BYTES:
382 return "com.google.protobuf.ByteString";
383 case JAVATYPE_ENUM:
384 return NULL;
385 case JAVATYPE_MESSAGE:
386 return NULL;
387
388 // No default because we want the compiler to complain if any new
389 // JavaTypes are added.
390 }
391
392 GOOGLE_LOG(FATAL) << "Can't get here.";
393 return NULL;
394 }
395
PrimitiveTypeName(const FieldDescriptor * descriptor)396 const char* PrimitiveTypeName(const FieldDescriptor* descriptor) {
397 return PrimitiveTypeName(GetJavaType(descriptor));
398 }
399
BoxedPrimitiveTypeName(JavaType type)400 const char* BoxedPrimitiveTypeName(JavaType type) {
401 switch (type) {
402 case JAVATYPE_INT:
403 return "java.lang.Integer";
404 case JAVATYPE_LONG:
405 return "java.lang.Long";
406 case JAVATYPE_FLOAT:
407 return "java.lang.Float";
408 case JAVATYPE_DOUBLE:
409 return "java.lang.Double";
410 case JAVATYPE_BOOLEAN:
411 return "java.lang.Boolean";
412 case JAVATYPE_STRING:
413 return "java.lang.String";
414 case JAVATYPE_BYTES:
415 return "com.google.protobuf.ByteString";
416 case JAVATYPE_ENUM:
417 return NULL;
418 case JAVATYPE_MESSAGE:
419 return NULL;
420
421 // No default because we want the compiler to complain if any new
422 // JavaTypes are added.
423 }
424
425 GOOGLE_LOG(FATAL) << "Can't get here.";
426 return NULL;
427 }
428
BoxedPrimitiveTypeName(const FieldDescriptor * descriptor)429 const char* BoxedPrimitiveTypeName(const FieldDescriptor* descriptor) {
430 return BoxedPrimitiveTypeName(GetJavaType(descriptor));
431 }
432
GetOneofStoredType(const FieldDescriptor * field)433 std::string GetOneofStoredType(const FieldDescriptor* field) {
434 const JavaType javaType = GetJavaType(field);
435 switch (javaType) {
436 case JAVATYPE_ENUM:
437 return "java.lang.Integer";
438 case JAVATYPE_MESSAGE:
439 return ClassName(field->message_type());
440 default:
441 return BoxedPrimitiveTypeName(javaType);
442 }
443 }
444
FieldTypeName(FieldDescriptor::Type field_type)445 const char* FieldTypeName(FieldDescriptor::Type field_type) {
446 switch (field_type) {
447 case FieldDescriptor::TYPE_INT32:
448 return "INT32";
449 case FieldDescriptor::TYPE_UINT32:
450 return "UINT32";
451 case FieldDescriptor::TYPE_SINT32:
452 return "SINT32";
453 case FieldDescriptor::TYPE_FIXED32:
454 return "FIXED32";
455 case FieldDescriptor::TYPE_SFIXED32:
456 return "SFIXED32";
457 case FieldDescriptor::TYPE_INT64:
458 return "INT64";
459 case FieldDescriptor::TYPE_UINT64:
460 return "UINT64";
461 case FieldDescriptor::TYPE_SINT64:
462 return "SINT64";
463 case FieldDescriptor::TYPE_FIXED64:
464 return "FIXED64";
465 case FieldDescriptor::TYPE_SFIXED64:
466 return "SFIXED64";
467 case FieldDescriptor::TYPE_FLOAT:
468 return "FLOAT";
469 case FieldDescriptor::TYPE_DOUBLE:
470 return "DOUBLE";
471 case FieldDescriptor::TYPE_BOOL:
472 return "BOOL";
473 case FieldDescriptor::TYPE_STRING:
474 return "STRING";
475 case FieldDescriptor::TYPE_BYTES:
476 return "BYTES";
477 case FieldDescriptor::TYPE_ENUM:
478 return "ENUM";
479 case FieldDescriptor::TYPE_GROUP:
480 return "GROUP";
481 case FieldDescriptor::TYPE_MESSAGE:
482 return "MESSAGE";
483
484 // No default because we want the compiler to complain if any new
485 // types are added.
486 }
487
488 GOOGLE_LOG(FATAL) << "Can't get here.";
489 return NULL;
490 }
491
AllAscii(const std::string & text)492 bool AllAscii(const std::string& text) {
493 for (int i = 0; i < text.size(); i++) {
494 if ((text[i] & 0x80) != 0) {
495 return false;
496 }
497 }
498 return true;
499 }
500
DefaultValue(const FieldDescriptor * field,bool immutable,ClassNameResolver * name_resolver)501 std::string DefaultValue(const FieldDescriptor* field, bool immutable,
502 ClassNameResolver* name_resolver) {
503 // Switch on CppType since we need to know which default_value_* method
504 // of FieldDescriptor to call.
505 switch (field->cpp_type()) {
506 case FieldDescriptor::CPPTYPE_INT32:
507 return StrCat(field->default_value_int32());
508 case FieldDescriptor::CPPTYPE_UINT32:
509 // Need to print as a signed int since Java has no unsigned.
510 return StrCat(static_cast<int32>(field->default_value_uint32()));
511 case FieldDescriptor::CPPTYPE_INT64:
512 return StrCat(field->default_value_int64()) + "L";
513 case FieldDescriptor::CPPTYPE_UINT64:
514 return StrCat(static_cast<int64>(field->default_value_uint64())) +
515 "L";
516 case FieldDescriptor::CPPTYPE_DOUBLE: {
517 double value = field->default_value_double();
518 if (value == std::numeric_limits<double>::infinity()) {
519 return "Double.POSITIVE_INFINITY";
520 } else if (value == -std::numeric_limits<double>::infinity()) {
521 return "Double.NEGATIVE_INFINITY";
522 } else if (value != value) {
523 return "Double.NaN";
524 } else {
525 return SimpleDtoa(value) + "D";
526 }
527 }
528 case FieldDescriptor::CPPTYPE_FLOAT: {
529 float value = field->default_value_float();
530 if (value == std::numeric_limits<float>::infinity()) {
531 return "Float.POSITIVE_INFINITY";
532 } else if (value == -std::numeric_limits<float>::infinity()) {
533 return "Float.NEGATIVE_INFINITY";
534 } else if (value != value) {
535 return "Float.NaN";
536 } else {
537 return SimpleFtoa(value) + "F";
538 }
539 }
540 case FieldDescriptor::CPPTYPE_BOOL:
541 return field->default_value_bool() ? "true" : "false";
542 case FieldDescriptor::CPPTYPE_STRING:
543 if (GetType(field) == FieldDescriptor::TYPE_BYTES) {
544 if (field->has_default_value()) {
545 // See comments in Internal.java for gory details.
546 return strings::Substitute(
547 "com.google.protobuf.Internal.bytesDefaultValue(\"$0\")",
548 CEscape(field->default_value_string()));
549 } else {
550 return "com.google.protobuf.ByteString.EMPTY";
551 }
552 } else {
553 if (AllAscii(field->default_value_string())) {
554 // All chars are ASCII. In this case CEscape() works fine.
555 return "\"" + CEscape(field->default_value_string()) + "\"";
556 } else {
557 // See comments in Internal.java for gory details.
558 return strings::Substitute(
559 "com.google.protobuf.Internal.stringDefaultValue(\"$0\")",
560 CEscape(field->default_value_string()));
561 }
562 }
563
564 case FieldDescriptor::CPPTYPE_ENUM:
565 return name_resolver->GetClassName(field->enum_type(), immutable) + "." +
566 field->default_value_enum()->name();
567
568 case FieldDescriptor::CPPTYPE_MESSAGE:
569 return name_resolver->GetClassName(field->message_type(), immutable) +
570 ".getDefaultInstance()";
571
572 // No default because we want the compiler to complain if any new
573 // types are added.
574 }
575
576 GOOGLE_LOG(FATAL) << "Can't get here.";
577 return "";
578 }
579
IsDefaultValueJavaDefault(const FieldDescriptor * field)580 bool IsDefaultValueJavaDefault(const FieldDescriptor* field) {
581 // Switch on CppType since we need to know which default_value_* method
582 // of FieldDescriptor to call.
583 switch (field->cpp_type()) {
584 case FieldDescriptor::CPPTYPE_INT32:
585 return field->default_value_int32() == 0;
586 case FieldDescriptor::CPPTYPE_UINT32:
587 return field->default_value_uint32() == 0;
588 case FieldDescriptor::CPPTYPE_INT64:
589 return field->default_value_int64() == 0L;
590 case FieldDescriptor::CPPTYPE_UINT64:
591 return field->default_value_uint64() == 0L;
592 case FieldDescriptor::CPPTYPE_DOUBLE:
593 return field->default_value_double() == 0.0;
594 case FieldDescriptor::CPPTYPE_FLOAT:
595 return field->default_value_float() == 0.0;
596 case FieldDescriptor::CPPTYPE_BOOL:
597 return field->default_value_bool() == false;
598 case FieldDescriptor::CPPTYPE_ENUM:
599 return field->default_value_enum()->number() == 0;
600 case FieldDescriptor::CPPTYPE_STRING:
601 case FieldDescriptor::CPPTYPE_MESSAGE:
602 return false;
603
604 // No default because we want the compiler to complain if any new
605 // types are added.
606 }
607
608 GOOGLE_LOG(FATAL) << "Can't get here.";
609 return false;
610 }
611
IsByteStringWithCustomDefaultValue(const FieldDescriptor * field)612 bool IsByteStringWithCustomDefaultValue(const FieldDescriptor* field) {
613 return GetJavaType(field) == JAVATYPE_BYTES &&
614 field->default_value_string() != "";
615 }
616
617 const char* bit_masks[] = {
618 "0x00000001", "0x00000002", "0x00000004", "0x00000008",
619 "0x00000010", "0x00000020", "0x00000040", "0x00000080",
620
621 "0x00000100", "0x00000200", "0x00000400", "0x00000800",
622 "0x00001000", "0x00002000", "0x00004000", "0x00008000",
623
624 "0x00010000", "0x00020000", "0x00040000", "0x00080000",
625 "0x00100000", "0x00200000", "0x00400000", "0x00800000",
626
627 "0x01000000", "0x02000000", "0x04000000", "0x08000000",
628 "0x10000000", "0x20000000", "0x40000000", "0x80000000",
629 };
630
GetBitFieldName(int index)631 std::string GetBitFieldName(int index) {
632 std::string varName = "bitField";
633 varName += StrCat(index);
634 varName += "_";
635 return varName;
636 }
637
GetBitFieldNameForBit(int bitIndex)638 std::string GetBitFieldNameForBit(int bitIndex) {
639 return GetBitFieldName(bitIndex / 32);
640 }
641
642 namespace {
643
GenerateGetBitInternal(const std::string & prefix,int bitIndex)644 std::string GenerateGetBitInternal(const std::string& prefix, int bitIndex) {
645 std::string varName = prefix + GetBitFieldNameForBit(bitIndex);
646 int bitInVarIndex = bitIndex % 32;
647
648 std::string mask = bit_masks[bitInVarIndex];
649 std::string result = "((" + varName + " & " + mask + ") != 0)";
650 return result;
651 }
652
GenerateSetBitInternal(const std::string & prefix,int bitIndex)653 std::string GenerateSetBitInternal(const std::string& prefix, int bitIndex) {
654 std::string varName = prefix + GetBitFieldNameForBit(bitIndex);
655 int bitInVarIndex = bitIndex % 32;
656
657 std::string mask = bit_masks[bitInVarIndex];
658 std::string result = varName + " |= " + mask;
659 return result;
660 }
661
662 } // namespace
663
GenerateGetBit(int bitIndex)664 std::string GenerateGetBit(int bitIndex) {
665 return GenerateGetBitInternal("", bitIndex);
666 }
667
GenerateSetBit(int bitIndex)668 std::string GenerateSetBit(int bitIndex) {
669 return GenerateSetBitInternal("", bitIndex);
670 }
671
GenerateClearBit(int bitIndex)672 std::string GenerateClearBit(int bitIndex) {
673 std::string varName = GetBitFieldNameForBit(bitIndex);
674 int bitInVarIndex = bitIndex % 32;
675
676 std::string mask = bit_masks[bitInVarIndex];
677 std::string result = varName + " = (" + varName + " & ~" + mask + ")";
678 return result;
679 }
680
GenerateGetBitFromLocal(int bitIndex)681 std::string GenerateGetBitFromLocal(int bitIndex) {
682 return GenerateGetBitInternal("from_", bitIndex);
683 }
684
GenerateSetBitToLocal(int bitIndex)685 std::string GenerateSetBitToLocal(int bitIndex) {
686 return GenerateSetBitInternal("to_", bitIndex);
687 }
688
GenerateGetBitMutableLocal(int bitIndex)689 std::string GenerateGetBitMutableLocal(int bitIndex) {
690 return GenerateGetBitInternal("mutable_", bitIndex);
691 }
692
GenerateSetBitMutableLocal(int bitIndex)693 std::string GenerateSetBitMutableLocal(int bitIndex) {
694 return GenerateSetBitInternal("mutable_", bitIndex);
695 }
696
IsReferenceType(JavaType type)697 bool IsReferenceType(JavaType type) {
698 switch (type) {
699 case JAVATYPE_INT:
700 return false;
701 case JAVATYPE_LONG:
702 return false;
703 case JAVATYPE_FLOAT:
704 return false;
705 case JAVATYPE_DOUBLE:
706 return false;
707 case JAVATYPE_BOOLEAN:
708 return false;
709 case JAVATYPE_STRING:
710 return true;
711 case JAVATYPE_BYTES:
712 return true;
713 case JAVATYPE_ENUM:
714 return true;
715 case JAVATYPE_MESSAGE:
716 return true;
717
718 // No default because we want the compiler to complain if any new
719 // JavaTypes are added.
720 }
721
722 GOOGLE_LOG(FATAL) << "Can't get here.";
723 return false;
724 }
725
GetCapitalizedType(const FieldDescriptor * field,bool immutable)726 const char* GetCapitalizedType(const FieldDescriptor* field, bool immutable) {
727 switch (GetType(field)) {
728 case FieldDescriptor::TYPE_INT32:
729 return "Int32";
730 case FieldDescriptor::TYPE_UINT32:
731 return "UInt32";
732 case FieldDescriptor::TYPE_SINT32:
733 return "SInt32";
734 case FieldDescriptor::TYPE_FIXED32:
735 return "Fixed32";
736 case FieldDescriptor::TYPE_SFIXED32:
737 return "SFixed32";
738 case FieldDescriptor::TYPE_INT64:
739 return "Int64";
740 case FieldDescriptor::TYPE_UINT64:
741 return "UInt64";
742 case FieldDescriptor::TYPE_SINT64:
743 return "SInt64";
744 case FieldDescriptor::TYPE_FIXED64:
745 return "Fixed64";
746 case FieldDescriptor::TYPE_SFIXED64:
747 return "SFixed64";
748 case FieldDescriptor::TYPE_FLOAT:
749 return "Float";
750 case FieldDescriptor::TYPE_DOUBLE:
751 return "Double";
752 case FieldDescriptor::TYPE_BOOL:
753 return "Bool";
754 case FieldDescriptor::TYPE_STRING:
755 return "String";
756 case FieldDescriptor::TYPE_BYTES: {
757 return "Bytes";
758 }
759 case FieldDescriptor::TYPE_ENUM:
760 return "Enum";
761 case FieldDescriptor::TYPE_GROUP:
762 return "Group";
763 case FieldDescriptor::TYPE_MESSAGE:
764 return "Message";
765
766 // No default because we want the compiler to complain if any new
767 // types are added.
768 }
769
770 GOOGLE_LOG(FATAL) << "Can't get here.";
771 return NULL;
772 }
773
774 // For encodings with fixed sizes, returns that size in bytes. Otherwise
775 // returns -1.
FixedSize(FieldDescriptor::Type type)776 int FixedSize(FieldDescriptor::Type type) {
777 switch (type) {
778 case FieldDescriptor::TYPE_INT32:
779 return -1;
780 case FieldDescriptor::TYPE_INT64:
781 return -1;
782 case FieldDescriptor::TYPE_UINT32:
783 return -1;
784 case FieldDescriptor::TYPE_UINT64:
785 return -1;
786 case FieldDescriptor::TYPE_SINT32:
787 return -1;
788 case FieldDescriptor::TYPE_SINT64:
789 return -1;
790 case FieldDescriptor::TYPE_FIXED32:
791 return WireFormatLite::kFixed32Size;
792 case FieldDescriptor::TYPE_FIXED64:
793 return WireFormatLite::kFixed64Size;
794 case FieldDescriptor::TYPE_SFIXED32:
795 return WireFormatLite::kSFixed32Size;
796 case FieldDescriptor::TYPE_SFIXED64:
797 return WireFormatLite::kSFixed64Size;
798 case FieldDescriptor::TYPE_FLOAT:
799 return WireFormatLite::kFloatSize;
800 case FieldDescriptor::TYPE_DOUBLE:
801 return WireFormatLite::kDoubleSize;
802
803 case FieldDescriptor::TYPE_BOOL:
804 return WireFormatLite::kBoolSize;
805 case FieldDescriptor::TYPE_ENUM:
806 return -1;
807
808 case FieldDescriptor::TYPE_STRING:
809 return -1;
810 case FieldDescriptor::TYPE_BYTES:
811 return -1;
812 case FieldDescriptor::TYPE_GROUP:
813 return -1;
814 case FieldDescriptor::TYPE_MESSAGE:
815 return -1;
816
817 // No default because we want the compiler to complain if any new
818 // types are added.
819 }
820 GOOGLE_LOG(FATAL) << "Can't get here.";
821 return -1;
822 }
823
824 // Sort the fields of the given Descriptor by number into a new[]'d array
825 // and return it. The caller should delete the returned array.
SortFieldsByNumber(const Descriptor * descriptor)826 const FieldDescriptor** SortFieldsByNumber(const Descriptor* descriptor) {
827 const FieldDescriptor** fields =
828 new const FieldDescriptor*[descriptor->field_count()];
829 for (int i = 0; i < descriptor->field_count(); i++) {
830 fields[i] = descriptor->field(i);
831 }
832 std::sort(fields, fields + descriptor->field_count(),
833 FieldOrderingByNumber());
834 return fields;
835 }
836
837 // Returns true if the message type has any required fields. If it doesn't,
838 // we can optimize out calls to its isInitialized() method.
839 //
840 // already_seen is used to avoid checking the same type multiple times
841 // (and also to protect against recursion).
HasRequiredFields(const Descriptor * type,std::unordered_set<const Descriptor * > * already_seen)842 bool HasRequiredFields(const Descriptor* type,
843 std::unordered_set<const Descriptor*>* already_seen) {
844 if (already_seen->count(type) > 0) {
845 // The type is already in cache. This means that either:
846 // a. The type has no required fields.
847 // b. We are in the midst of checking if the type has required fields,
848 // somewhere up the stack. In this case, we know that if the type
849 // has any required fields, they'll be found when we return to it,
850 // and the whole call to HasRequiredFields() will return true.
851 // Therefore, we don't have to check if this type has required fields
852 // here.
853 return false;
854 }
855 already_seen->insert(type);
856
857 // If the type has extensions, an extension with message type could contain
858 // required fields, so we have to be conservative and assume such an
859 // extension exists.
860 if (type->extension_range_count() > 0) return true;
861
862 for (int i = 0; i < type->field_count(); i++) {
863 const FieldDescriptor* field = type->field(i);
864 if (field->is_required()) {
865 return true;
866 }
867 if (GetJavaType(field) == JAVATYPE_MESSAGE) {
868 if (HasRequiredFields(field->message_type(), already_seen)) {
869 return true;
870 }
871 }
872 }
873
874 return false;
875 }
876
HasRequiredFields(const Descriptor * type)877 bool HasRequiredFields(const Descriptor* type) {
878 std::unordered_set<const Descriptor*> already_seen;
879 return HasRequiredFields(type, &already_seen);
880 }
881
HasRepeatedFields(const Descriptor * descriptor)882 bool HasRepeatedFields(const Descriptor* descriptor) {
883 for (int i = 0; i < descriptor->field_count(); ++i) {
884 const FieldDescriptor* field = descriptor->field(i);
885 if (field->is_repeated()) {
886 return true;
887 }
888 }
889 return false;
890 }
891
892 // Encode an unsigned 32-bit value into a sequence of UTF-16 characters.
893 //
894 // If the value is in [0x0000, 0xD7FF], we encode it with a single character
895 // with the same numeric value.
896 //
897 // If the value is larger than 0xD7FF, we encode its lowest 13 bits into a
898 // character in the range [0xE000, 0xFFFF] by combining these 13 bits with
899 // 0xE000 using logic-or. Then we shift the value to the right by 13 bits, and
900 // encode the remaining value by repeating this same process until we get to
901 // a value in [0x0000, 0xD7FF] where we will encode it using a character with
902 // the same numeric value.
903 //
904 // Note that we only use code points in [0x0000, 0xD7FF] and [0xE000, 0xFFFF].
905 // There will be no surrogate pairs in the encoded character sequence.
WriteUInt32ToUtf16CharSequence(uint32 number,std::vector<uint16> * output)906 void WriteUInt32ToUtf16CharSequence(uint32 number,
907 std::vector<uint16>* output) {
908 // For values in [0x0000, 0xD7FF], only use one char to encode it.
909 if (number < 0xD800) {
910 output->push_back(static_cast<uint16>(number));
911 return;
912 }
913 // Encode into multiple chars. All except the last char will be in the range
914 // [0xE000, 0xFFFF], and the last char will be in the range [0x0000, 0xD7FF].
915 // Note that we don't use any value in range [0xD800, 0xDFFF] because they
916 // have to come in pairs and the encoding is just more space-efficient w/o
917 // them.
918 while (number >= 0xD800) {
919 // [0xE000, 0xFFFF] can represent 13 bits of info.
920 output->push_back(static_cast<uint16>(0xE000 | (number & 0x1FFF)));
921 number >>= 13;
922 }
923 output->push_back(static_cast<uint16>(number));
924 }
925
GetExperimentalJavaFieldTypeForSingular(const FieldDescriptor * field)926 int GetExperimentalJavaFieldTypeForSingular(const FieldDescriptor* field) {
927 // j/c/g/protobuf/FieldType.java lists field types in a slightly different
928 // order from FieldDescriptor::Type so we can't do a simple cast.
929 //
930 // TODO(xiaofeng): Make j/c/g/protobuf/FieldType.java follow the same order.
931 int result = field->type();
932 if (result == FieldDescriptor::TYPE_GROUP) {
933 return 17;
934 } else if (result < FieldDescriptor::TYPE_GROUP) {
935 return result - 1;
936 } else {
937 return result - 2;
938 }
939 }
940
GetExperimentalJavaFieldTypeForRepeated(const FieldDescriptor * field)941 int GetExperimentalJavaFieldTypeForRepeated(const FieldDescriptor* field) {
942 if (field->type() == FieldDescriptor::TYPE_GROUP) {
943 return 49;
944 } else {
945 return GetExperimentalJavaFieldTypeForSingular(field) + 18;
946 }
947 }
948
GetExperimentalJavaFieldTypeForPacked(const FieldDescriptor * field)949 int GetExperimentalJavaFieldTypeForPacked(const FieldDescriptor* field) {
950 int result = field->type();
951 if (result < FieldDescriptor::TYPE_STRING) {
952 return result + 34;
953 } else if (result > FieldDescriptor::TYPE_BYTES) {
954 return result + 30;
955 } else {
956 GOOGLE_LOG(FATAL) << field->full_name() << " can't be packed.";
957 return 0;
958 }
959 }
960
GetExperimentalJavaFieldType(const FieldDescriptor * field)961 int GetExperimentalJavaFieldType(const FieldDescriptor* field) {
962 static const int kMapFieldType = 50;
963 static const int kOneofFieldTypeOffset = 51;
964 static const int kRequiredBit = 0x100;
965 static const int kUtf8CheckBit = 0x200;
966 static const int kCheckInitialized = 0x400;
967 static const int kMapWithProto2EnumValue = 0x800;
968 int extra_bits = field->is_required() ? kRequiredBit : 0;
969 if (field->type() == FieldDescriptor::TYPE_STRING && CheckUtf8(field)) {
970 extra_bits |= kUtf8CheckBit;
971 }
972 if (field->is_required() || (GetJavaType(field) == JAVATYPE_MESSAGE &&
973 HasRequiredFields(field->message_type()))) {
974 extra_bits |= kCheckInitialized;
975 }
976
977 if (field->is_map()) {
978 if (SupportFieldPresence(field->file())) {
979 const FieldDescriptor* value =
980 field->message_type()->FindFieldByName("value");
981 if (GetJavaType(value) == JAVATYPE_ENUM) {
982 extra_bits |= kMapWithProto2EnumValue;
983 }
984 }
985 return kMapFieldType | extra_bits;
986 } else if (field->is_packed()) {
987 return GetExperimentalJavaFieldTypeForPacked(field);
988 } else if (field->is_repeated()) {
989 return GetExperimentalJavaFieldTypeForRepeated(field) | extra_bits;
990 } else if (field->containing_oneof() != NULL) {
991 return (GetExperimentalJavaFieldTypeForSingular(field) +
992 kOneofFieldTypeOffset) |
993 extra_bits;
994 } else {
995 return GetExperimentalJavaFieldTypeForSingular(field) | extra_bits;
996 }
997 }
998
999 // Escape a UTF-16 character to be embedded in a Java string.
EscapeUtf16ToString(uint16 code,std::string * output)1000 void EscapeUtf16ToString(uint16 code, std::string* output) {
1001 if (code == '\t') {
1002 output->append("\\t");
1003 } else if (code == '\b') {
1004 output->append("\\b");
1005 } else if (code == '\n') {
1006 output->append("\\n");
1007 } else if (code == '\r') {
1008 output->append("\\r");
1009 } else if (code == '\f') {
1010 output->append("\\f");
1011 } else if (code == '\'') {
1012 output->append("\\'");
1013 } else if (code == '\"') {
1014 output->append("\\\"");
1015 } else if (code == '\\') {
1016 output->append("\\\\");
1017 } else if (code >= 0x20 && code <= 0x7f) {
1018 output->push_back(static_cast<char>(code));
1019 } else {
1020 output->append(StringPrintf("\\u%04x", code));
1021 }
1022 }
1023
1024 } // namespace java
1025 } // namespace compiler
1026 } // namespace protobuf
1027 } // namespace google
1028