• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1  // Protocol Buffers - Google's data interchange format
2  // Copyright 2008 Google Inc.  All rights reserved.
3  // https://developers.google.com/protocol-buffers/
4  //
5  // Redistribution and use in source and binary forms, with or without
6  // modification, are permitted provided that the following conditions are
7  // met:
8  //
9  //     * Redistributions of source code must retain the above copyright
10  // notice, this list of conditions and the following disclaimer.
11  //     * Redistributions in binary form must reproduce the above
12  // copyright notice, this list of conditions and the following disclaimer
13  // in the documentation and/or other materials provided with the
14  // distribution.
15  //     * Neither the name of Google Inc. nor the names of its
16  // contributors may be used to endorse or promote products derived from
17  // this software without specific prior written permission.
18  //
19  // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20  // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21  // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22  // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23  // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24  // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25  // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26  // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27  // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28  // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29  // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30  
31  // Author: kenton@google.com (Kenton Varda)
32  //  Based on original Protocol Buffers design by
33  //  Sanjay Ghemawat, Jeff Dean, and others.
34  
35  #include <algorithm>
36  #include <limits>
37  #include <unordered_set>
38  #include <vector>
39  
40  #include <google/protobuf/stubs/stringprintf.h>
41  #include <google/protobuf/compiler/java/java_helpers.h>
42  #include <google/protobuf/compiler/java/java_name_resolver.h>
43  #include <google/protobuf/descriptor.pb.h>
44  #include <google/protobuf/wire_format.h>
45  #include <google/protobuf/stubs/strutil.h>
46  #include <google/protobuf/stubs/substitute.h>
47  
48  
49  
50  #include <google/protobuf/stubs/hash.h>  // for hash<T *>
51  
52  namespace google {
53  namespace protobuf {
54  namespace compiler {
55  namespace java {
56  
57  using internal::WireFormat;
58  using internal::WireFormatLite;
59  
60  const char kThickSeparator[] =
61      "// ===================================================================\n";
62  const char kThinSeparator[] =
63      "// -------------------------------------------------------------------\n";
64  
65  namespace {
66  
67  const char* kDefaultPackage = "";
68  
69  // Names that should be avoided as field names.
70  // Using them will cause the compiler to generate accessors whose names are
71  // colliding with methods defined in base classes.
72  const char* kForbiddenWordList[] = {
73      // message base class:
74      "cached_size",
75      "serialized_size",
76      // java.lang.Object:
77      "class",
78  };
79  
80  const std::unordered_set<string>* kReservedNames =
81      new std::unordered_set<string>({
82          "abstract",   "assert",       "boolean",   "break",      "byte",
83          "case",       "catch",        "char",      "class",      "const",
84          "continue",   "default",      "do",        "double",     "else",
85          "enum",       "extends",      "final",     "finally",    "float",
86          "for",        "goto",         "if",        "implements", "import",
87          "instanceof", "int",          "interface", "long",       "native",
88          "new",        "package",      "private",   "protected",  "public",
89          "return",     "short",        "static",    "strictfp",   "super",
90          "switch",     "synchronized", "this",      "throw",      "throws",
91          "transient",  "try",          "void",      "volatile",   "while",
92      });
93  
IsForbidden(const std::string & field_name)94  bool IsForbidden(const std::string& field_name) {
95    for (int i = 0; i < GOOGLE_ARRAYSIZE(kForbiddenWordList); ++i) {
96      if (field_name == kForbiddenWordList[i]) {
97        return true;
98      }
99    }
100    return false;
101  }
102  
FieldName(const FieldDescriptor * field)103  std::string FieldName(const FieldDescriptor* field) {
104    std::string field_name;
105    // Groups are hacky:  The name of the field is just the lower-cased name
106    // of the group type.  In Java, though, we would like to retain the original
107    // capitalization of the type name.
108    if (GetType(field) == FieldDescriptor::TYPE_GROUP) {
109      field_name = field->message_type()->name();
110    } else {
111      field_name = field->name();
112    }
113    if (IsForbidden(field_name)) {
114      // Append a trailing "#" to indicate that the name should be decorated to
115      // avoid collision with other names.
116      field_name += "#";
117    }
118    return field_name;
119  }
120  
121  
122  }  // namespace
123  
PrintGeneratedAnnotation(io::Printer * printer,char delimiter,const std::string & annotation_file)124  void PrintGeneratedAnnotation(io::Printer* printer, char delimiter,
125                                const std::string& annotation_file) {
126    if (annotation_file.empty()) {
127      return;
128    }
129    std::string ptemplate =
130        "@javax.annotation.Generated(value=\"protoc\", comments=\"annotations:";
131    ptemplate.push_back(delimiter);
132    ptemplate.append("annotation_file");
133    ptemplate.push_back(delimiter);
134    ptemplate.append("\")\n");
135    printer->Print(ptemplate.c_str(), "annotation_file", annotation_file);
136  }
137  
PrintEnumVerifierLogic(io::Printer * printer,const FieldDescriptor * descriptor,const std::map<std::string,std::string> & variables,const char * var_name,const char * terminating_string,bool enforce_lite)138  void PrintEnumVerifierLogic(io::Printer* printer,
139                              const FieldDescriptor* descriptor,
140                              const std::map<std::string, std::string>& variables,
141                              const char* var_name,
142                              const char* terminating_string, bool enforce_lite) {
143    std::string enum_verifier_string =
144        enforce_lite ? StrCat(var_name, ".internalGetVerifier()")
145                     : StrCat(
146                           "new com.google.protobuf.Internal.EnumVerifier() {\n"
147                           "        @java.lang.Override\n"
148                           "        public boolean isInRange(int number) {\n"
149                           "          return ",
150                           var_name,
151                           ".forNumber(number) != null;\n"
152                           "        }\n"
153                           "      }");
154    printer->Print(
155        variables,
156        StrCat(enum_verifier_string, terminating_string).c_str());
157  }
158  
UnderscoresToCamelCase(const std::string & input,bool cap_next_letter)159  std::string UnderscoresToCamelCase(const std::string& input,
160                                     bool cap_next_letter) {
161    GOOGLE_CHECK(!input.empty());
162    std::string result;
163    // Note:  I distrust ctype.h due to locales.
164    for (int i = 0; i < input.size(); i++) {
165      if ('a' <= input[i] && input[i] <= 'z') {
166        if (cap_next_letter) {
167          result += input[i] + ('A' - 'a');
168        } else {
169          result += input[i];
170        }
171        cap_next_letter = false;
172      } else if ('A' <= input[i] && input[i] <= 'Z') {
173        if (i == 0 && !cap_next_letter) {
174          // Force first letter to lower-case unless explicitly told to
175          // capitalize it.
176          result += input[i] + ('a' - 'A');
177        } else {
178          // Capital letters after the first are left as-is.
179          result += input[i];
180        }
181        cap_next_letter = false;
182      } else if ('0' <= input[i] && input[i] <= '9') {
183        result += input[i];
184        cap_next_letter = true;
185      } else {
186        cap_next_letter = true;
187      }
188    }
189    // Add a trailing "_" if the name should be altered.
190    if (input[input.size() - 1] == '#') {
191      result += '_';
192    }
193    return result;
194  }
195  
UnderscoresToCamelCase(const FieldDescriptor * field)196  std::string UnderscoresToCamelCase(const FieldDescriptor* field) {
197    return UnderscoresToCamelCase(FieldName(field), false);
198  }
199  
UnderscoresToCapitalizedCamelCase(const FieldDescriptor * field)200  std::string UnderscoresToCapitalizedCamelCase(const FieldDescriptor* field) {
201    return UnderscoresToCamelCase(FieldName(field), true);
202  }
203  
CapitalizedFieldName(const FieldDescriptor * field)204  std::string CapitalizedFieldName(const FieldDescriptor* field) {
205    return UnderscoresToCapitalizedCamelCase(field);
206  }
207  
UnderscoresToCamelCase(const MethodDescriptor * method)208  std::string UnderscoresToCamelCase(const MethodDescriptor* method) {
209    return UnderscoresToCamelCase(method->name(), false);
210  }
211  
UnderscoresToCamelCaseCheckReserved(const FieldDescriptor * field)212  std::string UnderscoresToCamelCaseCheckReserved(const FieldDescriptor* field) {
213    std::string name = UnderscoresToCamelCase(field);
214    if (kReservedNames->find(name) != kReservedNames->end()) {
215      return name + "_";
216    }
217    return name;
218  }
219  
UniqueFileScopeIdentifier(const Descriptor * descriptor)220  std::string UniqueFileScopeIdentifier(const Descriptor* descriptor) {
221    return "static_" + StringReplace(descriptor->full_name(), ".", "_", true);
222  }
223  
CamelCaseFieldName(const FieldDescriptor * field)224  std::string CamelCaseFieldName(const FieldDescriptor* field) {
225    std::string fieldName = UnderscoresToCamelCase(field);
226    if ('0' <= fieldName[0] && fieldName[0] <= '9') {
227      return '_' + fieldName;
228    }
229    return fieldName;
230  }
231  
StripProto(const std::string & filename)232  std::string StripProto(const std::string& filename) {
233    if (HasSuffixString(filename, ".protodevel")) {
234      return StripSuffixString(filename, ".protodevel");
235    } else {
236      return StripSuffixString(filename, ".proto");
237    }
238  }
239  
FileClassName(const FileDescriptor * file,bool immutable)240  std::string FileClassName(const FileDescriptor* file, bool immutable) {
241    ClassNameResolver name_resolver;
242    return name_resolver.GetFileClassName(file, immutable);
243  }
244  
FileJavaPackage(const FileDescriptor * file,bool immutable)245  std::string FileJavaPackage(const FileDescriptor* file, bool immutable) {
246    std::string result;
247  
248    if (file->options().has_java_package()) {
249      result = file->options().java_package();
250    } else {
251      result = kDefaultPackage;
252      if (!file->package().empty()) {
253        if (!result.empty()) result += '.';
254        result += file->package();
255      }
256    }
257  
258    return result;
259  }
260  
FileJavaPackage(const FileDescriptor * file)261  std::string FileJavaPackage(const FileDescriptor* file) {
262    return FileJavaPackage(file, true /* immutable */);
263  }
264  
JavaPackageToDir(std::string package_name)265  std::string JavaPackageToDir(std::string package_name) {
266    std::string package_dir = StringReplace(package_name, ".", "/", true);
267    if (!package_dir.empty()) package_dir += "/";
268    return package_dir;
269  }
270  
ClassName(const Descriptor * descriptor)271  std::string ClassName(const Descriptor* descriptor) {
272    ClassNameResolver name_resolver;
273    return name_resolver.GetClassName(descriptor, true);
274  }
275  
ClassName(const EnumDescriptor * descriptor)276  std::string ClassName(const EnumDescriptor* descriptor) {
277    ClassNameResolver name_resolver;
278    return name_resolver.GetClassName(descriptor, true);
279  }
280  
ClassName(const ServiceDescriptor * descriptor)281  std::string ClassName(const ServiceDescriptor* descriptor) {
282    ClassNameResolver name_resolver;
283    return name_resolver.GetClassName(descriptor, true);
284  }
285  
ClassName(const FileDescriptor * descriptor)286  std::string ClassName(const FileDescriptor* descriptor) {
287    ClassNameResolver name_resolver;
288    return name_resolver.GetClassName(descriptor, true);
289  }
290  
291  
ExtraMessageInterfaces(const Descriptor * descriptor)292  std::string ExtraMessageInterfaces(const Descriptor* descriptor) {
293    std::string interfaces = "// @@protoc_insertion_point(message_implements:" +
294                             descriptor->full_name() + ")";
295    return interfaces;
296  }
297  
298  
ExtraBuilderInterfaces(const Descriptor * descriptor)299  std::string ExtraBuilderInterfaces(const Descriptor* descriptor) {
300    std::string interfaces = "// @@protoc_insertion_point(builder_implements:" +
301                             descriptor->full_name() + ")";
302    return interfaces;
303  }
304  
ExtraMessageOrBuilderInterfaces(const Descriptor * descriptor)305  std::string ExtraMessageOrBuilderInterfaces(const Descriptor* descriptor) {
306    std::string interfaces = "// @@protoc_insertion_point(interface_extends:" +
307                             descriptor->full_name() + ")";
308    return interfaces;
309  }
310  
FieldConstantName(const FieldDescriptor * field)311  std::string FieldConstantName(const FieldDescriptor* field) {
312    std::string name = field->name() + "_FIELD_NUMBER";
313    UpperString(&name);
314    return name;
315  }
316  
GetType(const FieldDescriptor * field)317  FieldDescriptor::Type GetType(const FieldDescriptor* field) {
318    return field->type();
319  }
320  
GetJavaType(const FieldDescriptor * field)321  JavaType GetJavaType(const FieldDescriptor* field) {
322    switch (GetType(field)) {
323      case FieldDescriptor::TYPE_INT32:
324      case FieldDescriptor::TYPE_UINT32:
325      case FieldDescriptor::TYPE_SINT32:
326      case FieldDescriptor::TYPE_FIXED32:
327      case FieldDescriptor::TYPE_SFIXED32:
328        return JAVATYPE_INT;
329  
330      case FieldDescriptor::TYPE_INT64:
331      case FieldDescriptor::TYPE_UINT64:
332      case FieldDescriptor::TYPE_SINT64:
333      case FieldDescriptor::TYPE_FIXED64:
334      case FieldDescriptor::TYPE_SFIXED64:
335        return JAVATYPE_LONG;
336  
337      case FieldDescriptor::TYPE_FLOAT:
338        return JAVATYPE_FLOAT;
339  
340      case FieldDescriptor::TYPE_DOUBLE:
341        return JAVATYPE_DOUBLE;
342  
343      case FieldDescriptor::TYPE_BOOL:
344        return JAVATYPE_BOOLEAN;
345  
346      case FieldDescriptor::TYPE_STRING:
347        return JAVATYPE_STRING;
348  
349      case FieldDescriptor::TYPE_BYTES:
350        return JAVATYPE_BYTES;
351  
352      case FieldDescriptor::TYPE_ENUM:
353        return JAVATYPE_ENUM;
354  
355      case FieldDescriptor::TYPE_GROUP:
356      case FieldDescriptor::TYPE_MESSAGE:
357        return JAVATYPE_MESSAGE;
358  
359        // No default because we want the compiler to complain if any new
360        // types are added.
361    }
362  
363    GOOGLE_LOG(FATAL) << "Can't get here.";
364    return JAVATYPE_INT;
365  }
366  
PrimitiveTypeName(JavaType type)367  const char* PrimitiveTypeName(JavaType type) {
368    switch (type) {
369      case JAVATYPE_INT:
370        return "int";
371      case JAVATYPE_LONG:
372        return "long";
373      case JAVATYPE_FLOAT:
374        return "float";
375      case JAVATYPE_DOUBLE:
376        return "double";
377      case JAVATYPE_BOOLEAN:
378        return "boolean";
379      case JAVATYPE_STRING:
380        return "java.lang.String";
381      case JAVATYPE_BYTES:
382        return "com.google.protobuf.ByteString";
383      case JAVATYPE_ENUM:
384        return NULL;
385      case JAVATYPE_MESSAGE:
386        return NULL;
387  
388        // No default because we want the compiler to complain if any new
389        // JavaTypes are added.
390    }
391  
392    GOOGLE_LOG(FATAL) << "Can't get here.";
393    return NULL;
394  }
395  
PrimitiveTypeName(const FieldDescriptor * descriptor)396  const char* PrimitiveTypeName(const FieldDescriptor* descriptor) {
397    return PrimitiveTypeName(GetJavaType(descriptor));
398  }
399  
BoxedPrimitiveTypeName(JavaType type)400  const char* BoxedPrimitiveTypeName(JavaType type) {
401    switch (type) {
402      case JAVATYPE_INT:
403        return "java.lang.Integer";
404      case JAVATYPE_LONG:
405        return "java.lang.Long";
406      case JAVATYPE_FLOAT:
407        return "java.lang.Float";
408      case JAVATYPE_DOUBLE:
409        return "java.lang.Double";
410      case JAVATYPE_BOOLEAN:
411        return "java.lang.Boolean";
412      case JAVATYPE_STRING:
413        return "java.lang.String";
414      case JAVATYPE_BYTES:
415        return "com.google.protobuf.ByteString";
416      case JAVATYPE_ENUM:
417        return NULL;
418      case JAVATYPE_MESSAGE:
419        return NULL;
420  
421        // No default because we want the compiler to complain if any new
422        // JavaTypes are added.
423    }
424  
425    GOOGLE_LOG(FATAL) << "Can't get here.";
426    return NULL;
427  }
428  
BoxedPrimitiveTypeName(const FieldDescriptor * descriptor)429  const char* BoxedPrimitiveTypeName(const FieldDescriptor* descriptor) {
430    return BoxedPrimitiveTypeName(GetJavaType(descriptor));
431  }
432  
GetOneofStoredType(const FieldDescriptor * field)433  std::string GetOneofStoredType(const FieldDescriptor* field) {
434    const JavaType javaType = GetJavaType(field);
435    switch (javaType) {
436      case JAVATYPE_ENUM:
437        return "java.lang.Integer";
438      case JAVATYPE_MESSAGE:
439        return ClassName(field->message_type());
440      default:
441        return BoxedPrimitiveTypeName(javaType);
442    }
443  }
444  
FieldTypeName(FieldDescriptor::Type field_type)445  const char* FieldTypeName(FieldDescriptor::Type field_type) {
446    switch (field_type) {
447      case FieldDescriptor::TYPE_INT32:
448        return "INT32";
449      case FieldDescriptor::TYPE_UINT32:
450        return "UINT32";
451      case FieldDescriptor::TYPE_SINT32:
452        return "SINT32";
453      case FieldDescriptor::TYPE_FIXED32:
454        return "FIXED32";
455      case FieldDescriptor::TYPE_SFIXED32:
456        return "SFIXED32";
457      case FieldDescriptor::TYPE_INT64:
458        return "INT64";
459      case FieldDescriptor::TYPE_UINT64:
460        return "UINT64";
461      case FieldDescriptor::TYPE_SINT64:
462        return "SINT64";
463      case FieldDescriptor::TYPE_FIXED64:
464        return "FIXED64";
465      case FieldDescriptor::TYPE_SFIXED64:
466        return "SFIXED64";
467      case FieldDescriptor::TYPE_FLOAT:
468        return "FLOAT";
469      case FieldDescriptor::TYPE_DOUBLE:
470        return "DOUBLE";
471      case FieldDescriptor::TYPE_BOOL:
472        return "BOOL";
473      case FieldDescriptor::TYPE_STRING:
474        return "STRING";
475      case FieldDescriptor::TYPE_BYTES:
476        return "BYTES";
477      case FieldDescriptor::TYPE_ENUM:
478        return "ENUM";
479      case FieldDescriptor::TYPE_GROUP:
480        return "GROUP";
481      case FieldDescriptor::TYPE_MESSAGE:
482        return "MESSAGE";
483  
484        // No default because we want the compiler to complain if any new
485        // types are added.
486    }
487  
488    GOOGLE_LOG(FATAL) << "Can't get here.";
489    return NULL;
490  }
491  
AllAscii(const std::string & text)492  bool AllAscii(const std::string& text) {
493    for (int i = 0; i < text.size(); i++) {
494      if ((text[i] & 0x80) != 0) {
495        return false;
496      }
497    }
498    return true;
499  }
500  
DefaultValue(const FieldDescriptor * field,bool immutable,ClassNameResolver * name_resolver)501  std::string DefaultValue(const FieldDescriptor* field, bool immutable,
502                           ClassNameResolver* name_resolver) {
503    // Switch on CppType since we need to know which default_value_* method
504    // of FieldDescriptor to call.
505    switch (field->cpp_type()) {
506      case FieldDescriptor::CPPTYPE_INT32:
507        return StrCat(field->default_value_int32());
508      case FieldDescriptor::CPPTYPE_UINT32:
509        // Need to print as a signed int since Java has no unsigned.
510        return StrCat(static_cast<int32>(field->default_value_uint32()));
511      case FieldDescriptor::CPPTYPE_INT64:
512        return StrCat(field->default_value_int64()) + "L";
513      case FieldDescriptor::CPPTYPE_UINT64:
514        return StrCat(static_cast<int64>(field->default_value_uint64())) +
515               "L";
516      case FieldDescriptor::CPPTYPE_DOUBLE: {
517        double value = field->default_value_double();
518        if (value == std::numeric_limits<double>::infinity()) {
519          return "Double.POSITIVE_INFINITY";
520        } else if (value == -std::numeric_limits<double>::infinity()) {
521          return "Double.NEGATIVE_INFINITY";
522        } else if (value != value) {
523          return "Double.NaN";
524        } else {
525          return SimpleDtoa(value) + "D";
526        }
527      }
528      case FieldDescriptor::CPPTYPE_FLOAT: {
529        float value = field->default_value_float();
530        if (value == std::numeric_limits<float>::infinity()) {
531          return "Float.POSITIVE_INFINITY";
532        } else if (value == -std::numeric_limits<float>::infinity()) {
533          return "Float.NEGATIVE_INFINITY";
534        } else if (value != value) {
535          return "Float.NaN";
536        } else {
537          return SimpleFtoa(value) + "F";
538        }
539      }
540      case FieldDescriptor::CPPTYPE_BOOL:
541        return field->default_value_bool() ? "true" : "false";
542      case FieldDescriptor::CPPTYPE_STRING:
543        if (GetType(field) == FieldDescriptor::TYPE_BYTES) {
544          if (field->has_default_value()) {
545            // See comments in Internal.java for gory details.
546            return strings::Substitute(
547                "com.google.protobuf.Internal.bytesDefaultValue(\"$0\")",
548                CEscape(field->default_value_string()));
549          } else {
550            return "com.google.protobuf.ByteString.EMPTY";
551          }
552        } else {
553          if (AllAscii(field->default_value_string())) {
554            // All chars are ASCII.  In this case CEscape() works fine.
555            return "\"" + CEscape(field->default_value_string()) + "\"";
556          } else {
557            // See comments in Internal.java for gory details.
558            return strings::Substitute(
559                "com.google.protobuf.Internal.stringDefaultValue(\"$0\")",
560                CEscape(field->default_value_string()));
561          }
562        }
563  
564      case FieldDescriptor::CPPTYPE_ENUM:
565        return name_resolver->GetClassName(field->enum_type(), immutable) + "." +
566               field->default_value_enum()->name();
567  
568      case FieldDescriptor::CPPTYPE_MESSAGE:
569        return name_resolver->GetClassName(field->message_type(), immutable) +
570               ".getDefaultInstance()";
571  
572        // No default because we want the compiler to complain if any new
573        // types are added.
574    }
575  
576    GOOGLE_LOG(FATAL) << "Can't get here.";
577    return "";
578  }
579  
IsDefaultValueJavaDefault(const FieldDescriptor * field)580  bool IsDefaultValueJavaDefault(const FieldDescriptor* field) {
581    // Switch on CppType since we need to know which default_value_* method
582    // of FieldDescriptor to call.
583    switch (field->cpp_type()) {
584      case FieldDescriptor::CPPTYPE_INT32:
585        return field->default_value_int32() == 0;
586      case FieldDescriptor::CPPTYPE_UINT32:
587        return field->default_value_uint32() == 0;
588      case FieldDescriptor::CPPTYPE_INT64:
589        return field->default_value_int64() == 0L;
590      case FieldDescriptor::CPPTYPE_UINT64:
591        return field->default_value_uint64() == 0L;
592      case FieldDescriptor::CPPTYPE_DOUBLE:
593        return field->default_value_double() == 0.0;
594      case FieldDescriptor::CPPTYPE_FLOAT:
595        return field->default_value_float() == 0.0;
596      case FieldDescriptor::CPPTYPE_BOOL:
597        return field->default_value_bool() == false;
598      case FieldDescriptor::CPPTYPE_ENUM:
599        return field->default_value_enum()->number() == 0;
600      case FieldDescriptor::CPPTYPE_STRING:
601      case FieldDescriptor::CPPTYPE_MESSAGE:
602        return false;
603  
604        // No default because we want the compiler to complain if any new
605        // types are added.
606    }
607  
608    GOOGLE_LOG(FATAL) << "Can't get here.";
609    return false;
610  }
611  
IsByteStringWithCustomDefaultValue(const FieldDescriptor * field)612  bool IsByteStringWithCustomDefaultValue(const FieldDescriptor* field) {
613    return GetJavaType(field) == JAVATYPE_BYTES &&
614           field->default_value_string() != "";
615  }
616  
617  const char* bit_masks[] = {
618      "0x00000001", "0x00000002", "0x00000004", "0x00000008",
619      "0x00000010", "0x00000020", "0x00000040", "0x00000080",
620  
621      "0x00000100", "0x00000200", "0x00000400", "0x00000800",
622      "0x00001000", "0x00002000", "0x00004000", "0x00008000",
623  
624      "0x00010000", "0x00020000", "0x00040000", "0x00080000",
625      "0x00100000", "0x00200000", "0x00400000", "0x00800000",
626  
627      "0x01000000", "0x02000000", "0x04000000", "0x08000000",
628      "0x10000000", "0x20000000", "0x40000000", "0x80000000",
629  };
630  
GetBitFieldName(int index)631  std::string GetBitFieldName(int index) {
632    std::string varName = "bitField";
633    varName += StrCat(index);
634    varName += "_";
635    return varName;
636  }
637  
GetBitFieldNameForBit(int bitIndex)638  std::string GetBitFieldNameForBit(int bitIndex) {
639    return GetBitFieldName(bitIndex / 32);
640  }
641  
642  namespace {
643  
GenerateGetBitInternal(const std::string & prefix,int bitIndex)644  std::string GenerateGetBitInternal(const std::string& prefix, int bitIndex) {
645    std::string varName = prefix + GetBitFieldNameForBit(bitIndex);
646    int bitInVarIndex = bitIndex % 32;
647  
648    std::string mask = bit_masks[bitInVarIndex];
649    std::string result = "((" + varName + " & " + mask + ") != 0)";
650    return result;
651  }
652  
GenerateSetBitInternal(const std::string & prefix,int bitIndex)653  std::string GenerateSetBitInternal(const std::string& prefix, int bitIndex) {
654    std::string varName = prefix + GetBitFieldNameForBit(bitIndex);
655    int bitInVarIndex = bitIndex % 32;
656  
657    std::string mask = bit_masks[bitInVarIndex];
658    std::string result = varName + " |= " + mask;
659    return result;
660  }
661  
662  }  // namespace
663  
GenerateGetBit(int bitIndex)664  std::string GenerateGetBit(int bitIndex) {
665    return GenerateGetBitInternal("", bitIndex);
666  }
667  
GenerateSetBit(int bitIndex)668  std::string GenerateSetBit(int bitIndex) {
669    return GenerateSetBitInternal("", bitIndex);
670  }
671  
GenerateClearBit(int bitIndex)672  std::string GenerateClearBit(int bitIndex) {
673    std::string varName = GetBitFieldNameForBit(bitIndex);
674    int bitInVarIndex = bitIndex % 32;
675  
676    std::string mask = bit_masks[bitInVarIndex];
677    std::string result = varName + " = (" + varName + " & ~" + mask + ")";
678    return result;
679  }
680  
GenerateGetBitFromLocal(int bitIndex)681  std::string GenerateGetBitFromLocal(int bitIndex) {
682    return GenerateGetBitInternal("from_", bitIndex);
683  }
684  
GenerateSetBitToLocal(int bitIndex)685  std::string GenerateSetBitToLocal(int bitIndex) {
686    return GenerateSetBitInternal("to_", bitIndex);
687  }
688  
GenerateGetBitMutableLocal(int bitIndex)689  std::string GenerateGetBitMutableLocal(int bitIndex) {
690    return GenerateGetBitInternal("mutable_", bitIndex);
691  }
692  
GenerateSetBitMutableLocal(int bitIndex)693  std::string GenerateSetBitMutableLocal(int bitIndex) {
694    return GenerateSetBitInternal("mutable_", bitIndex);
695  }
696  
IsReferenceType(JavaType type)697  bool IsReferenceType(JavaType type) {
698    switch (type) {
699      case JAVATYPE_INT:
700        return false;
701      case JAVATYPE_LONG:
702        return false;
703      case JAVATYPE_FLOAT:
704        return false;
705      case JAVATYPE_DOUBLE:
706        return false;
707      case JAVATYPE_BOOLEAN:
708        return false;
709      case JAVATYPE_STRING:
710        return true;
711      case JAVATYPE_BYTES:
712        return true;
713      case JAVATYPE_ENUM:
714        return true;
715      case JAVATYPE_MESSAGE:
716        return true;
717  
718        // No default because we want the compiler to complain if any new
719        // JavaTypes are added.
720    }
721  
722    GOOGLE_LOG(FATAL) << "Can't get here.";
723    return false;
724  }
725  
GetCapitalizedType(const FieldDescriptor * field,bool immutable)726  const char* GetCapitalizedType(const FieldDescriptor* field, bool immutable) {
727    switch (GetType(field)) {
728      case FieldDescriptor::TYPE_INT32:
729        return "Int32";
730      case FieldDescriptor::TYPE_UINT32:
731        return "UInt32";
732      case FieldDescriptor::TYPE_SINT32:
733        return "SInt32";
734      case FieldDescriptor::TYPE_FIXED32:
735        return "Fixed32";
736      case FieldDescriptor::TYPE_SFIXED32:
737        return "SFixed32";
738      case FieldDescriptor::TYPE_INT64:
739        return "Int64";
740      case FieldDescriptor::TYPE_UINT64:
741        return "UInt64";
742      case FieldDescriptor::TYPE_SINT64:
743        return "SInt64";
744      case FieldDescriptor::TYPE_FIXED64:
745        return "Fixed64";
746      case FieldDescriptor::TYPE_SFIXED64:
747        return "SFixed64";
748      case FieldDescriptor::TYPE_FLOAT:
749        return "Float";
750      case FieldDescriptor::TYPE_DOUBLE:
751        return "Double";
752      case FieldDescriptor::TYPE_BOOL:
753        return "Bool";
754      case FieldDescriptor::TYPE_STRING:
755        return "String";
756      case FieldDescriptor::TYPE_BYTES: {
757        return "Bytes";
758      }
759      case FieldDescriptor::TYPE_ENUM:
760        return "Enum";
761      case FieldDescriptor::TYPE_GROUP:
762        return "Group";
763      case FieldDescriptor::TYPE_MESSAGE:
764        return "Message";
765  
766        // No default because we want the compiler to complain if any new
767        // types are added.
768    }
769  
770    GOOGLE_LOG(FATAL) << "Can't get here.";
771    return NULL;
772  }
773  
774  // For encodings with fixed sizes, returns that size in bytes.  Otherwise
775  // returns -1.
FixedSize(FieldDescriptor::Type type)776  int FixedSize(FieldDescriptor::Type type) {
777    switch (type) {
778      case FieldDescriptor::TYPE_INT32:
779        return -1;
780      case FieldDescriptor::TYPE_INT64:
781        return -1;
782      case FieldDescriptor::TYPE_UINT32:
783        return -1;
784      case FieldDescriptor::TYPE_UINT64:
785        return -1;
786      case FieldDescriptor::TYPE_SINT32:
787        return -1;
788      case FieldDescriptor::TYPE_SINT64:
789        return -1;
790      case FieldDescriptor::TYPE_FIXED32:
791        return WireFormatLite::kFixed32Size;
792      case FieldDescriptor::TYPE_FIXED64:
793        return WireFormatLite::kFixed64Size;
794      case FieldDescriptor::TYPE_SFIXED32:
795        return WireFormatLite::kSFixed32Size;
796      case FieldDescriptor::TYPE_SFIXED64:
797        return WireFormatLite::kSFixed64Size;
798      case FieldDescriptor::TYPE_FLOAT:
799        return WireFormatLite::kFloatSize;
800      case FieldDescriptor::TYPE_DOUBLE:
801        return WireFormatLite::kDoubleSize;
802  
803      case FieldDescriptor::TYPE_BOOL:
804        return WireFormatLite::kBoolSize;
805      case FieldDescriptor::TYPE_ENUM:
806        return -1;
807  
808      case FieldDescriptor::TYPE_STRING:
809        return -1;
810      case FieldDescriptor::TYPE_BYTES:
811        return -1;
812      case FieldDescriptor::TYPE_GROUP:
813        return -1;
814      case FieldDescriptor::TYPE_MESSAGE:
815        return -1;
816  
817        // No default because we want the compiler to complain if any new
818        // types are added.
819    }
820    GOOGLE_LOG(FATAL) << "Can't get here.";
821    return -1;
822  }
823  
824  // Sort the fields of the given Descriptor by number into a new[]'d array
825  // and return it. The caller should delete the returned array.
SortFieldsByNumber(const Descriptor * descriptor)826  const FieldDescriptor** SortFieldsByNumber(const Descriptor* descriptor) {
827    const FieldDescriptor** fields =
828        new const FieldDescriptor*[descriptor->field_count()];
829    for (int i = 0; i < descriptor->field_count(); i++) {
830      fields[i] = descriptor->field(i);
831    }
832    std::sort(fields, fields + descriptor->field_count(),
833              FieldOrderingByNumber());
834    return fields;
835  }
836  
837  // Returns true if the message type has any required fields.  If it doesn't,
838  // we can optimize out calls to its isInitialized() method.
839  //
840  // already_seen is used to avoid checking the same type multiple times
841  // (and also to protect against recursion).
HasRequiredFields(const Descriptor * type,std::unordered_set<const Descriptor * > * already_seen)842  bool HasRequiredFields(const Descriptor* type,
843                         std::unordered_set<const Descriptor*>* already_seen) {
844    if (already_seen->count(type) > 0) {
845      // The type is already in cache.  This means that either:
846      // a. The type has no required fields.
847      // b. We are in the midst of checking if the type has required fields,
848      //    somewhere up the stack.  In this case, we know that if the type
849      //    has any required fields, they'll be found when we return to it,
850      //    and the whole call to HasRequiredFields() will return true.
851      //    Therefore, we don't have to check if this type has required fields
852      //    here.
853      return false;
854    }
855    already_seen->insert(type);
856  
857    // If the type has extensions, an extension with message type could contain
858    // required fields, so we have to be conservative and assume such an
859    // extension exists.
860    if (type->extension_range_count() > 0) return true;
861  
862    for (int i = 0; i < type->field_count(); i++) {
863      const FieldDescriptor* field = type->field(i);
864      if (field->is_required()) {
865        return true;
866      }
867      if (GetJavaType(field) == JAVATYPE_MESSAGE) {
868        if (HasRequiredFields(field->message_type(), already_seen)) {
869          return true;
870        }
871      }
872    }
873  
874    return false;
875  }
876  
HasRequiredFields(const Descriptor * type)877  bool HasRequiredFields(const Descriptor* type) {
878    std::unordered_set<const Descriptor*> already_seen;
879    return HasRequiredFields(type, &already_seen);
880  }
881  
HasRepeatedFields(const Descriptor * descriptor)882  bool HasRepeatedFields(const Descriptor* descriptor) {
883    for (int i = 0; i < descriptor->field_count(); ++i) {
884      const FieldDescriptor* field = descriptor->field(i);
885      if (field->is_repeated()) {
886        return true;
887      }
888    }
889    return false;
890  }
891  
892  // Encode an unsigned 32-bit value into a sequence of UTF-16 characters.
893  //
894  // If the value is in [0x0000, 0xD7FF], we encode it with a single character
895  // with the same numeric value.
896  //
897  // If the value is larger than 0xD7FF, we encode its lowest 13 bits into a
898  // character in the range [0xE000, 0xFFFF] by combining these 13 bits with
899  // 0xE000 using logic-or. Then we shift the value to the right by 13 bits, and
900  // encode the remaining value by repeating this same process until we get to
901  // a value in [0x0000, 0xD7FF] where we will encode it using a character with
902  // the same numeric value.
903  //
904  // Note that we only use code points in [0x0000, 0xD7FF] and [0xE000, 0xFFFF].
905  // There will be no surrogate pairs in the encoded character sequence.
WriteUInt32ToUtf16CharSequence(uint32 number,std::vector<uint16> * output)906  void WriteUInt32ToUtf16CharSequence(uint32 number,
907                                      std::vector<uint16>* output) {
908    // For values in [0x0000, 0xD7FF], only use one char to encode it.
909    if (number < 0xD800) {
910      output->push_back(static_cast<uint16>(number));
911      return;
912    }
913    // Encode into multiple chars. All except the last char will be in the range
914    // [0xE000, 0xFFFF], and the last char will be in the range [0x0000, 0xD7FF].
915    // Note that we don't use any value in range [0xD800, 0xDFFF] because they
916    // have to come in pairs and the encoding is just more space-efficient w/o
917    // them.
918    while (number >= 0xD800) {
919      // [0xE000, 0xFFFF] can represent 13 bits of info.
920      output->push_back(static_cast<uint16>(0xE000 | (number & 0x1FFF)));
921      number >>= 13;
922    }
923    output->push_back(static_cast<uint16>(number));
924  }
925  
GetExperimentalJavaFieldTypeForSingular(const FieldDescriptor * field)926  int GetExperimentalJavaFieldTypeForSingular(const FieldDescriptor* field) {
927    // j/c/g/protobuf/FieldType.java lists field types in a slightly different
928    // order from FieldDescriptor::Type so we can't do a simple cast.
929    //
930    // TODO(xiaofeng): Make j/c/g/protobuf/FieldType.java follow the same order.
931    int result = field->type();
932    if (result == FieldDescriptor::TYPE_GROUP) {
933      return 17;
934    } else if (result < FieldDescriptor::TYPE_GROUP) {
935      return result - 1;
936    } else {
937      return result - 2;
938    }
939  }
940  
GetExperimentalJavaFieldTypeForRepeated(const FieldDescriptor * field)941  int GetExperimentalJavaFieldTypeForRepeated(const FieldDescriptor* field) {
942    if (field->type() == FieldDescriptor::TYPE_GROUP) {
943      return 49;
944    } else {
945      return GetExperimentalJavaFieldTypeForSingular(field) + 18;
946    }
947  }
948  
GetExperimentalJavaFieldTypeForPacked(const FieldDescriptor * field)949  int GetExperimentalJavaFieldTypeForPacked(const FieldDescriptor* field) {
950    int result = field->type();
951    if (result < FieldDescriptor::TYPE_STRING) {
952      return result + 34;
953    } else if (result > FieldDescriptor::TYPE_BYTES) {
954      return result + 30;
955    } else {
956      GOOGLE_LOG(FATAL) << field->full_name() << " can't be packed.";
957      return 0;
958    }
959  }
960  
GetExperimentalJavaFieldType(const FieldDescriptor * field)961  int GetExperimentalJavaFieldType(const FieldDescriptor* field) {
962    static const int kMapFieldType = 50;
963    static const int kOneofFieldTypeOffset = 51;
964    static const int kRequiredBit = 0x100;
965    static const int kUtf8CheckBit = 0x200;
966    static const int kCheckInitialized = 0x400;
967    static const int kMapWithProto2EnumValue = 0x800;
968    int extra_bits = field->is_required() ? kRequiredBit : 0;
969    if (field->type() == FieldDescriptor::TYPE_STRING && CheckUtf8(field)) {
970      extra_bits |= kUtf8CheckBit;
971    }
972    if (field->is_required() || (GetJavaType(field) == JAVATYPE_MESSAGE &&
973                                 HasRequiredFields(field->message_type()))) {
974      extra_bits |= kCheckInitialized;
975    }
976  
977    if (field->is_map()) {
978      if (SupportFieldPresence(field->file())) {
979        const FieldDescriptor* value =
980            field->message_type()->FindFieldByName("value");
981        if (GetJavaType(value) == JAVATYPE_ENUM) {
982          extra_bits |= kMapWithProto2EnumValue;
983        }
984      }
985      return kMapFieldType | extra_bits;
986    } else if (field->is_packed()) {
987      return GetExperimentalJavaFieldTypeForPacked(field);
988    } else if (field->is_repeated()) {
989      return GetExperimentalJavaFieldTypeForRepeated(field) | extra_bits;
990    } else if (field->containing_oneof() != NULL) {
991      return (GetExperimentalJavaFieldTypeForSingular(field) +
992              kOneofFieldTypeOffset) |
993             extra_bits;
994    } else {
995      return GetExperimentalJavaFieldTypeForSingular(field) | extra_bits;
996    }
997  }
998  
999  // Escape a UTF-16 character to be embedded in a Java string.
EscapeUtf16ToString(uint16 code,std::string * output)1000  void EscapeUtf16ToString(uint16 code, std::string* output) {
1001    if (code == '\t') {
1002      output->append("\\t");
1003    } else if (code == '\b') {
1004      output->append("\\b");
1005    } else if (code == '\n') {
1006      output->append("\\n");
1007    } else if (code == '\r') {
1008      output->append("\\r");
1009    } else if (code == '\f') {
1010      output->append("\\f");
1011    } else if (code == '\'') {
1012      output->append("\\'");
1013    } else if (code == '\"') {
1014      output->append("\\\"");
1015    } else if (code == '\\') {
1016      output->append("\\\\");
1017    } else if (code >= 0x20 && code <= 0x7f) {
1018      output->push_back(static_cast<char>(code));
1019    } else {
1020      output->append(StringPrintf("\\u%04x", code));
1021    }
1022  }
1023  
1024  }  // namespace java
1025  }  // namespace compiler
1026  }  // namespace protobuf
1027  }  // namespace google
1028