1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc.  All rights reserved.
3 // https://developers.google.com/protocol-buffers/
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 //     * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 //     * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following disclaimer
13 // in the documentation and/or other materials provided with the
14 // distribution.
15 //     * Neither the name of Google Inc. nor the names of its
16 // contributors may be used to endorse or promote products derived from
17 // this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 
31 // Author: kenton@google.com (Kenton Varda)
32 //  Based on original Protocol Buffers design by
33 //  Sanjay Ghemawat, Jeff Dean, and others.
34 
35 #ifndef GOOGLE_PROTOBUF_COMPILER_JAVA_HELPERS_H__
36 #define GOOGLE_PROTOBUF_COMPILER_JAVA_HELPERS_H__
37 
38 #include <cstdint>
39 #include <string>
40 
41 #include <google/protobuf/io/printer.h>
42 #include <google/protobuf/descriptor.h>
43 #include <google/protobuf/compiler/java/java_context.h>
44 #include <google/protobuf/descriptor.pb.h>
45 
46 namespace google {
47 namespace protobuf {
48 namespace compiler {
49 namespace java {
50 
51 // Commonly-used separator comments.  Thick is a line of '=', thin is a line
52 // of '-'.
53 extern const char kThickSeparator[];
54 extern const char kThinSeparator[];
55 
56 bool IsForbiddenKotlin(const std::string& field_name);
57 
58 // If annotation_file is non-empty, prints a javax.annotation.Generated
59 // annotation to the given Printer. annotation_file will be referenced in the
60 // annotation's comments field. delimiter should be the Printer's delimiter
61 // character. annotation_file will be included verbatim into a Java literal
62 // string, so it should not contain quotes or invalid Java escape sequences;
63 // however, these are unlikely to appear in practice, as the value of
64 // annotation_file should be generated from the filename of the source file
65 // being annotated (which in turn must be a Java identifier plus ".java").
66 void PrintGeneratedAnnotation(io::Printer* printer, char delimiter = '$',
67                               const std::string& annotation_file = "");
68 
69 // If a GeneratedMessageLite contains non-lite enums, then its verifier
70 // must be instantiated inline, rather than retrieved from the enum class.
71 void PrintEnumVerifierLogic(io::Printer* printer,
72                             const FieldDescriptor* descriptor,
73                             const std::map<std::string, std::string>& variables,
74                             const char* var_name,
75                             const char* terminating_string, bool enforce_lite);
76 
77 // Converts a name to camel-case. If cap_first_letter is true, capitalize the
78 // first letter.
79 std::string ToCamelCase(const std::string& input, bool lower_first);
80 
81 char ToUpperCh(char ch);
82 char ToLowerCh(char ch);
83 
84 // Converts a name to camel-case. If cap_first_letter is true, capitalize the
85 // first letter.
86 std::string UnderscoresToCamelCase(const std::string& name,
87                                    bool cap_first_letter);
88 // Converts the field's name to camel-case, e.g. "foo_bar_baz" becomes
89 // "fooBarBaz" or "FooBarBaz", respectively.
90 std::string UnderscoresToCamelCase(const FieldDescriptor* field);
91 std::string UnderscoresToCapitalizedCamelCase(const FieldDescriptor* field);
92 
93 // Similar, but for method names.  (Typically, this merely has the effect
94 // of lower-casing the first letter of the name.)
95 std::string UnderscoresToCamelCase(const MethodDescriptor* method);
96 
97 // Same as UnderscoresToCamelCase, but checks for reserved keywords
98 std::string UnderscoresToCamelCaseCheckReserved(const FieldDescriptor* field);
99 
100 // Similar to UnderscoresToCamelCase, but guarantees that the result is a
101 // complete Java identifier by adding a _ if needed.
102 std::string CamelCaseFieldName(const FieldDescriptor* field);
103 
104 // Get an identifier that uniquely identifies this type within the file.
105 // This is used to declare static variables related to this type at the
106 // outermost file scope.
107 std::string UniqueFileScopeIdentifier(const Descriptor* descriptor);
108 
109 // Gets the unqualified class name for the file.  For each .proto file, there
110 // will be one Java class containing all the immutable messages and another
111 // Java class containing all the mutable messages.
112 // TODO(xiaofeng): remove the default value after updating client code.
113 std::string FileClassName(const FileDescriptor* file, bool immutable = true);
114 
115 // Returns the file's Java package name.
116 std::string FileJavaPackage(const FileDescriptor* file, bool immutable);
117 
118 // Returns output directory for the given package name.
119 std::string JavaPackageToDir(std::string package_name);
120 
121 // Comma-separate list of option-specified interfaces implemented by the
122 // Message, to follow the "implements" declaration of the Message definition.
123 std::string ExtraMessageInterfaces(const Descriptor* descriptor);
124 // Comma-separate list of option-specified interfaces implemented by the
125 // MutableMessage, to follow the "implements" declaration of the MutableMessage
126 // definition.
127 std::string ExtraMutableMessageInterfaces(const Descriptor* descriptor);
128 // Comma-separate list of option-specified interfaces implemented by the
129 // Builder, to follow the "implements" declaration of the Builder definition.
130 std::string ExtraBuilderInterfaces(const Descriptor* descriptor);
131 // Comma-separate list of option-specified interfaces extended by the
132 // MessageOrBuilder, to follow the "extends" declaration of the
133 // MessageOrBuilder definition.
134 std::string ExtraMessageOrBuilderInterfaces(const Descriptor* descriptor);
135 
136 // Get the unqualified Java class name for mutable messages. i.e. without
137 // package or outer classnames.
ShortMutableJavaClassName(const Descriptor * descriptor)138 inline std::string ShortMutableJavaClassName(const Descriptor* descriptor) {
139   return descriptor->name();
140 }
141 
142 // Whether the given descriptor is for one of the core descriptor protos. We
143 // cannot currently use the new runtime with core protos since there is a
144 // bootstrapping problem with obtaining their descriptors.
IsDescriptorProto(const Descriptor * descriptor)145 inline bool IsDescriptorProto(const Descriptor* descriptor) {
146   return descriptor->file()->name() == "net/proto2/proto/descriptor.proto" ||
147          descriptor->file()->name() == "google/protobuf/descriptor.proto";
148 }
149 
150 // Returns the stored type string used by the experimental runtime for oneof
151 // fields.
152 std::string GetOneofStoredType(const FieldDescriptor* field);
153 
154 // We use either the proto1 enums if the enum is generated, otherwise fall back
155 // to use integers.
156 enum class Proto1EnumRepresentation {
157   kEnum,
158   kInteger,
159 };
160 
161 // Returns which representation we should use.
GetProto1EnumRepresentation(const EnumDescriptor * descriptor)162 inline Proto1EnumRepresentation GetProto1EnumRepresentation(
163     const EnumDescriptor* descriptor) {
164   if (descriptor->containing_type() != nullptr) {
165     return Proto1EnumRepresentation::kEnum;
166   }
167   return Proto1EnumRepresentation::kInteger;
168 }
169 
170 // Whether we should generate multiple java files for messages.
MultipleJavaFiles(const FileDescriptor * descriptor,bool immutable)171 inline bool MultipleJavaFiles(const FileDescriptor* descriptor,
172                               bool immutable) {
173   (void)immutable;
174   return descriptor->options().java_multiple_files();
175 }
176 
177 
178 // Returns true if `descriptor` will be written to its own .java file.
179 // `immutable` should be set to true if we're generating for the immutable API.
180 template <typename Descriptor>
IsOwnFile(const Descriptor * descriptor,bool immutable)181 bool IsOwnFile(const Descriptor* descriptor, bool immutable) {
182   return descriptor->containing_type() == NULL &&
183          MultipleJavaFiles(descriptor->file(), immutable);
184 }
185 
186 template <>
IsOwnFile(const ServiceDescriptor * descriptor,bool immutable)187 inline bool IsOwnFile(const ServiceDescriptor* descriptor, bool immutable) {
188   return MultipleJavaFiles(descriptor->file(), immutable);
189 }
190 
191 // If `descriptor` describes an object with its own .java file,
192 // returns the name (relative to that .java file) of the file that stores
193 // annotation data for that descriptor. `suffix` is usually empty, but may
194 // (e.g.) be "OrBuilder" for some generated interfaces.
195 template <typename Descriptor>
AnnotationFileName(const Descriptor * descriptor,const std::string & suffix)196 std::string AnnotationFileName(const Descriptor* descriptor,
197                                const std::string& suffix) {
198   return descriptor->name() + suffix + ".java.pb.meta";
199 }
200 
201 template <typename Descriptor>
202 void MaybePrintGeneratedAnnotation(Context* context, io::Printer* printer,
203                                    Descriptor* descriptor, bool immutable,
204                                    const std::string& suffix = "") {
205   if (IsOwnFile(descriptor, immutable)) {
206     PrintGeneratedAnnotation(printer, '$',
207                              context->options().annotate_code
208                                  ? AnnotationFileName(descriptor, suffix)
209                                  : "");
210   }
211 }
212 
213 // Get the unqualified name that should be used for a field's field
214 // number constant.
215 std::string FieldConstantName(const FieldDescriptor* field);
216 
217 // Returns the type of the FieldDescriptor.
218 // This does nothing interesting for the open source release, but is used for
219 // hacks that improve compatibility with version 1 protocol buffers at Google.
220 FieldDescriptor::Type GetType(const FieldDescriptor* field);
221 
222 enum JavaType {
223   JAVATYPE_INT,
224   JAVATYPE_LONG,
225   JAVATYPE_FLOAT,
226   JAVATYPE_DOUBLE,
227   JAVATYPE_BOOLEAN,
228   JAVATYPE_STRING,
229   JAVATYPE_BYTES,
230   JAVATYPE_ENUM,
231   JAVATYPE_MESSAGE
232 };
233 
234 JavaType GetJavaType(const FieldDescriptor* field);
235 
236 const char* PrimitiveTypeName(JavaType type);
237 
238 // Get the fully-qualified class name for a boxed primitive type, e.g.
239 // "java.lang.Integer" for JAVATYPE_INT.  Returns NULL for enum and message
240 // types.
241 const char* BoxedPrimitiveTypeName(JavaType type);
242 
243 // Kotlin source does not distinguish between primitives and non-primitives,
244 // but does use Kotlin-specific qualified types for them.
245 const char* KotlinTypeName(JavaType type);
246 
247 // Get the name of the java enum constant representing this type. E.g.,
248 // "INT32" for FieldDescriptor::TYPE_INT32. The enum constant's full
249 // name is "com.google.protobuf.WireFormat.FieldType.INT32".
250 const char* FieldTypeName(const FieldDescriptor::Type field_type);
251 
252 class ClassNameResolver;
253 std::string DefaultValue(const FieldDescriptor* field, bool immutable,
254                          ClassNameResolver* name_resolver);
ImmutableDefaultValue(const FieldDescriptor * field,ClassNameResolver * name_resolver)255 inline std::string ImmutableDefaultValue(const FieldDescriptor* field,
256                                          ClassNameResolver* name_resolver) {
257   return DefaultValue(field, true, name_resolver);
258 }
259 bool IsDefaultValueJavaDefault(const FieldDescriptor* field);
260 bool IsByteStringWithCustomDefaultValue(const FieldDescriptor* field);
261 
262 // Does this message class have descriptor and reflection methods?
HasDescriptorMethods(const Descriptor *,bool enforce_lite)263 inline bool HasDescriptorMethods(const Descriptor* /* descriptor */,
264                                  bool enforce_lite) {
265   return !enforce_lite;
266 }
HasDescriptorMethods(const EnumDescriptor *,bool enforce_lite)267 inline bool HasDescriptorMethods(const EnumDescriptor* /* descriptor */,
268                                  bool enforce_lite) {
269   return !enforce_lite;
270 }
HasDescriptorMethods(const FileDescriptor *,bool enforce_lite)271 inline bool HasDescriptorMethods(const FileDescriptor* /* descriptor */,
272                                  bool enforce_lite) {
273   return !enforce_lite;
274 }
275 
276 // Should we generate generic services for this file?
HasGenericServices(const FileDescriptor * file,bool enforce_lite)277 inline bool HasGenericServices(const FileDescriptor* file, bool enforce_lite) {
278   return file->service_count() > 0 &&
279          HasDescriptorMethods(file, enforce_lite) &&
280          file->options().java_generic_services();
281 }
282 
283 // Methods for shared bitfields.
284 
285 // Gets the name of the shared bitfield for the given index.
286 std::string GetBitFieldName(int index);
287 
288 // Gets the name of the shared bitfield for the given bit index.
289 // Effectively, GetBitFieldName(bitIndex / 32)
290 std::string GetBitFieldNameForBit(int bitIndex);
291 
292 // Generates the java code for the expression that returns the boolean value
293 // of the bit of the shared bitfields for the given bit index.
294 // Example: "((bitField1_ & 0x04) == 0x04)"
295 std::string GenerateGetBit(int bitIndex);
296 
297 // Generates the java code for the expression that sets the bit of the shared
298 // bitfields for the given bit index.
299 // Example: "bitField1_ = (bitField1_ | 0x04)"
300 std::string GenerateSetBit(int bitIndex);
301 
302 // Generates the java code for the expression that clears the bit of the shared
303 // bitfields for the given bit index.
304 // Example: "bitField1_ = (bitField1_ & ~0x04)"
305 std::string GenerateClearBit(int bitIndex);
306 
307 // Does the same as GenerateGetBit but operates on the bit field on a local
308 // variable. This is used by the builder to copy the value in the builder to
309 // the message.
310 // Example: "((from_bitField1_ & 0x04) == 0x04)"
311 std::string GenerateGetBitFromLocal(int bitIndex);
312 
313 // Does the same as GenerateSetBit but operates on the bit field on a local
314 // variable. This is used by the builder to copy the value in the builder to
315 // the message.
316 // Example: "to_bitField1_ = (to_bitField1_ | 0x04)"
317 std::string GenerateSetBitToLocal(int bitIndex);
318 
319 // Does the same as GenerateGetBit but operates on the bit field on a local
320 // variable. This is used by the parsing constructor to record if a repeated
321 // field is mutable.
322 // Example: "((mutable_bitField1_ & 0x04) == 0x04)"
323 std::string GenerateGetBitMutableLocal(int bitIndex);
324 
325 // Does the same as GenerateSetBit but operates on the bit field on a local
326 // variable. This is used by the parsing constructor to record if a repeated
327 // field is mutable.
328 // Example: "mutable_bitField1_ = (mutable_bitField1_ | 0x04)"
329 std::string GenerateSetBitMutableLocal(int bitIndex);
330 
331 // Returns whether the JavaType is a reference type.
332 bool IsReferenceType(JavaType type);
333 
334 // Returns the capitalized name for calling relative functions in
335 // CodedInputStream
336 const char* GetCapitalizedType(const FieldDescriptor* field, bool immutable);
337 
338 // For encodings with fixed sizes, returns that size in bytes.  Otherwise
339 // returns -1.
340 int FixedSize(FieldDescriptor::Type type);
341 
342 // Comparators used to sort fields in MessageGenerator
343 struct FieldOrderingByNumber {
operatorFieldOrderingByNumber344   inline bool operator()(const FieldDescriptor* a,
345                          const FieldDescriptor* b) const {
346     return a->number() < b->number();
347   }
348 };
349 
350 struct ExtensionRangeOrdering {
operatorExtensionRangeOrdering351   bool operator()(const Descriptor::ExtensionRange* a,
352                   const Descriptor::ExtensionRange* b) const {
353     return a->start < b->start;
354   }
355 };
356 
357 // Sort the fields of the given Descriptor by number into a new[]'d array
358 // and return it. The caller should delete the returned array.
359 const FieldDescriptor** SortFieldsByNumber(const Descriptor* descriptor);
360 
361 // Does this message class have any packed fields?
HasPackedFields(const Descriptor * descriptor)362 inline bool HasPackedFields(const Descriptor* descriptor) {
363   for (int i = 0; i < descriptor->field_count(); i++) {
364     if (descriptor->field(i)->is_packed()) {
365       return true;
366     }
367   }
368   return false;
369 }
370 
371 // Check a message type and its sub-message types recursively to see if any of
372 // them has a required field. Return true if a required field is found.
373 bool HasRequiredFields(const Descriptor* descriptor);
374 
IsProto2(const FileDescriptor * descriptor)375 inline bool IsProto2(const FileDescriptor* descriptor) {
376   return descriptor->syntax() == FileDescriptor::SYNTAX_PROTO2;
377 }
378 
IsRealOneof(const FieldDescriptor * descriptor)379 inline bool IsRealOneof(const FieldDescriptor* descriptor) {
380   return descriptor->containing_oneof() &&
381          !descriptor->containing_oneof()->is_synthetic();
382 }
383 
HasHazzer(const FieldDescriptor * descriptor)384 inline bool HasHazzer(const FieldDescriptor* descriptor) {
385   return !descriptor->is_repeated() &&
386          (descriptor->message_type() || descriptor->has_optional_keyword() ||
387           IsProto2(descriptor->file()) || IsRealOneof(descriptor));
388 }
389 
HasHasbit(const FieldDescriptor * descriptor)390 inline bool HasHasbit(const FieldDescriptor* descriptor) {
391   // Note that currently message fields inside oneofs have hasbits. This is
392   // surprising, as the oneof case should avoid any need for a hasbit. But if
393   // you change this method to remove hasbits for oneofs, a few tests fail.
394   // TODO(b/124347790): remove hasbits for oneofs
395   return !descriptor->is_repeated() &&
396          (descriptor->has_optional_keyword() || IsProto2(descriptor->file()));
397 }
398 
399 // Whether generate classes expose public PARSER instances.
ExposePublicParser(const FileDescriptor * descriptor)400 inline bool ExposePublicParser(const FileDescriptor* descriptor) {
401   // TODO(liujisi): Mark the PARSER private in 3.1.x releases.
402   return descriptor->syntax() == FileDescriptor::SYNTAX_PROTO2;
403 }
404 
405 // Whether unknown enum values are kept (i.e., not stored in UnknownFieldSet
406 // but in the message and can be queried using additional getters that return
407 // ints.
SupportUnknownEnumValue(const FileDescriptor * descriptor)408 inline bool SupportUnknownEnumValue(const FileDescriptor* descriptor) {
409   return descriptor->syntax() == FileDescriptor::SYNTAX_PROTO3;
410 }
411 
SupportUnknownEnumValue(const FieldDescriptor * field)412 inline bool SupportUnknownEnumValue(const FieldDescriptor* field) {
413   return field->file()->syntax() == FileDescriptor::SYNTAX_PROTO3;
414 }
415 
416 // Check whether a message has repeated fields.
417 bool HasRepeatedFields(const Descriptor* descriptor);
418 
IsMapEntry(const Descriptor * descriptor)419 inline bool IsMapEntry(const Descriptor* descriptor) {
420   return descriptor->options().map_entry();
421 }
422 
IsMapField(const FieldDescriptor * descriptor)423 inline bool IsMapField(const FieldDescriptor* descriptor) {
424   return descriptor->is_map();
425 }
426 
IsAnyMessage(const Descriptor * descriptor)427 inline bool IsAnyMessage(const Descriptor* descriptor) {
428   return descriptor->full_name() == "google.protobuf.Any";
429 }
430 
IsWrappersProtoFile(const FileDescriptor * descriptor)431 inline bool IsWrappersProtoFile(const FileDescriptor* descriptor) {
432   return descriptor->name() == "google/protobuf/wrappers.proto";
433 }
434 
CheckUtf8(const FieldDescriptor * descriptor)435 inline bool CheckUtf8(const FieldDescriptor* descriptor) {
436   return descriptor->file()->syntax() == FileDescriptor::SYNTAX_PROTO3 ||
437          descriptor->file()->options().java_string_check_utf8();
438 }
439 
GeneratedCodeVersionSuffix()440 inline std::string GeneratedCodeVersionSuffix() {
441   return "V3";
442 }
443 
444 void WriteUInt32ToUtf16CharSequence(uint32_t number,
445                                     std::vector<uint16_t>* output);
446 
WriteIntToUtf16CharSequence(int value,std::vector<uint16_t> * output)447 inline void WriteIntToUtf16CharSequence(int value,
448                                         std::vector<uint16_t>* output) {
449   WriteUInt32ToUtf16CharSequence(static_cast<uint32_t>(value), output);
450 }
451 
452 // Escape a UTF-16 character so it can be embedded in a Java string literal.
453 void EscapeUtf16ToString(uint16_t code, std::string* output);
454 
455 // Only the lowest two bytes of the return value are used. The lowest byte
456 // is the integer value of a j/c/g/protobuf/FieldType enum. For the other
457 // byte:
458 //    bit 0: whether the field is required.
459 //    bit 1: whether the field requires UTF-8 validation.
460 //    bit 2: whether the field needs isInitialized check.
461 //    bit 3: whether the field is a map field with proto2 enum value.
462 //    bits 4-7: unused
463 int GetExperimentalJavaFieldType(const FieldDescriptor* field);
464 
465 // To get the total number of entries need to be built for experimental runtime
466 // and the first field number that are not in the table part
467 std::pair<int, int> GetTableDrivenNumberOfEntriesAndLookUpStartFieldNumber(
468     const FieldDescriptor** fields, int count);
469 }  // namespace java
470 }  // namespace compiler
471 }  // namespace protobuf
472 }  // namespace google
473 
474 #endif  // GOOGLE_PROTOBUF_COMPILER_JAVA_HELPERS_H__
475