• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc.  All rights reserved.
3 // https://developers.google.com/protocol-buffers/
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 //     * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 //     * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following disclaimer
13 // in the documentation and/or other materials provided with the
14 // distribution.
15 //     * Neither the name of Google Inc. nor the names of its
16 // contributors may be used to endorse or promote products derived from
17 // this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 
31 // Author: kenton@google.com (Kenton Varda)
32 //  Based on original Protocol Buffers design by
33 //  Sanjay Ghemawat, Jeff Dean, and others.
34 
35 #ifndef GOOGLE_PROTOBUF_COMPILER_CPP_HELPERS_H__
36 #define GOOGLE_PROTOBUF_COMPILER_CPP_HELPERS_H__
37 
38 #include <algorithm>
39 #include <cstdint>
40 #include <iterator>
41 #include <map>
42 #include <string>
43 
44 #include <google/protobuf/compiler/scc.h>
45 #include <google/protobuf/compiler/code_generator.h>
46 #include <google/protobuf/compiler/cpp/names.h>
47 #include <google/protobuf/compiler/cpp/options.h>
48 #include <google/protobuf/descriptor.pb.h>
49 #include <google/protobuf/io/printer.h>
50 #include <google/protobuf/descriptor.h>
51 #include <google/protobuf/port.h>
52 #include <google/protobuf/stubs/strutil.h>
53 
54 // Must be included last.
55 #include <google/protobuf/port_def.inc>
56 
57 namespace google {
58 namespace protobuf {
59 namespace compiler {
60 namespace cpp {
61 
62 enum class ArenaDtorNeeds { kNone = 0, kOnDemand = 1, kRequired = 2 };
63 
ProtobufNamespace(const Options &)64 inline std::string ProtobufNamespace(const Options& /* options */) {
65   return "PROTOBUF_NAMESPACE_ID";
66 }
67 
MacroPrefix(const Options &)68 inline std::string MacroPrefix(const Options& /* options */) {
69   return "GOOGLE_PROTOBUF";
70 }
71 
DeprecatedAttribute(const Options &,const FieldDescriptor * d)72 inline std::string DeprecatedAttribute(const Options& /* options */,
73                                        const FieldDescriptor* d) {
74   return d->options().deprecated() ? "PROTOBUF_DEPRECATED " : "";
75 }
76 
DeprecatedAttribute(const Options &,const EnumValueDescriptor * d)77 inline std::string DeprecatedAttribute(const Options& /* options */,
78                                        const EnumValueDescriptor* d) {
79   return d->options().deprecated() ? "PROTOBUF_DEPRECATED_ENUM " : "";
80 }
81 
82 // Commonly-used separator comments.  Thick is a line of '=', thin is a line
83 // of '-'.
84 extern const char kThickSeparator[];
85 extern const char kThinSeparator[];
86 
87 void SetCommonVars(const Options& options,
88                    std::map<std::string, std::string>* variables);
89 
90 // Variables to access message data from the message scope.
91 void SetCommonMessageDataVariables(
92     const Descriptor* descriptor,
93     std::map<std::string, std::string>* variables);
94 
95 void SetUnknownFieldsVariable(const Descriptor* descriptor,
96                               const Options& options,
97                               std::map<std::string, std::string>* variables);
98 
99 bool GetBootstrapBasename(const Options& options, const std::string& basename,
100                           std::string* bootstrap_basename);
101 bool MaybeBootstrap(const Options& options, GeneratorContext* generator_context,
102                     bool bootstrap_flag, std::string* basename);
103 bool IsBootstrapProto(const Options& options, const FileDescriptor* file);
104 
105 // Name space of the proto file. This namespace is such that the string
106 // "<namespace>::some_name" is the correct fully qualified namespace.
107 // This means if the package is empty the namespace is "", and otherwise
108 // the namespace is "::foo::bar::...::baz" without trailing semi-colons.
109 std::string Namespace(const FileDescriptor* d, const Options& options);
110 std::string Namespace(const Descriptor* d, const Options& options);
111 std::string Namespace(const FieldDescriptor* d, const Options& options);
112 std::string Namespace(const EnumDescriptor* d, const Options& options);
113 
114 // Returns true if it's safe to reset "field" to zero.
115 bool CanInitializeByZeroing(const FieldDescriptor* field);
116 
117 std::string ClassName(const Descriptor* descriptor);
118 std::string ClassName(const EnumDescriptor* enum_descriptor);
119 
120 std::string QualifiedClassName(const Descriptor* d, const Options& options);
121 std::string QualifiedClassName(const EnumDescriptor* d, const Options& options);
122 
123 std::string QualifiedClassName(const Descriptor* d);
124 std::string QualifiedClassName(const EnumDescriptor* d);
125 
126 // DEPRECATED just use ClassName or QualifiedClassName, a boolean is very
127 // unreadable at the callsite.
128 // Returns the non-nested type name for the given type.  If "qualified" is
129 // true, prefix the type with the full namespace.  For example, if you had:
130 //   package foo.bar;
131 //   message Baz { message Moo {} }
132 // Then the qualified ClassName for Moo would be:
133 //   ::foo::bar::Baz_Moo
134 // While the non-qualified version would be:
135 //   Baz_Moo
ClassName(const Descriptor * descriptor,bool qualified)136 inline std::string ClassName(const Descriptor* descriptor, bool qualified) {
137   return qualified ? QualifiedClassName(descriptor, Options())
138                    : ClassName(descriptor);
139 }
140 
ClassName(const EnumDescriptor * descriptor,bool qualified)141 inline std::string ClassName(const EnumDescriptor* descriptor, bool qualified) {
142   return qualified ? QualifiedClassName(descriptor, Options())
143                    : ClassName(descriptor);
144 }
145 
146 // Returns the extension name prefixed with the class name if nested but without
147 // the package name.
148 std::string ExtensionName(const FieldDescriptor* d);
149 
150 std::string QualifiedExtensionName(const FieldDescriptor* d,
151                                    const Options& options);
152 std::string QualifiedExtensionName(const FieldDescriptor* d);
153 
154 // Type name of default instance.
155 std::string DefaultInstanceType(const Descriptor* descriptor,
156                                 const Options& options, bool split = false);
157 
158 // Non-qualified name of the default_instance of this message.
159 std::string DefaultInstanceName(const Descriptor* descriptor,
160                                 const Options& options, bool split = false);
161 
162 // Non-qualified name of the default instance pointer. This is used only for
163 // implicit weak fields, where we need an extra indirection.
164 std::string DefaultInstancePtr(const Descriptor* descriptor,
165                                const Options& options, bool split = false);
166 
167 // Fully qualified name of the default_instance of this message.
168 std::string QualifiedDefaultInstanceName(const Descriptor* descriptor,
169                                          const Options& options,
170                                          bool split = false);
171 
172 // Fully qualified name of the default instance pointer.
173 std::string QualifiedDefaultInstancePtr(const Descriptor* descriptor,
174                                         const Options& options,
175                                         bool split = false);
176 
177 // DescriptorTable variable name.
178 std::string DescriptorTableName(const FileDescriptor* file,
179                                 const Options& options);
180 
181 // When declaring symbol externs from another file, this macro will supply the
182 // dllexport needed for the target file, if any.
183 std::string FileDllExport(const FileDescriptor* file, const Options& options);
184 
185 // Name of the base class: google::protobuf::Message or google::protobuf::MessageLite.
186 std::string SuperClassName(const Descriptor* descriptor,
187                            const Options& options);
188 
189 // Adds an underscore if necessary to prevent conflicting with a keyword.
190 std::string ResolveKeyword(const std::string& name);
191 
192 // Get the (unqualified) name that should be used for this field in C++ code.
193 // The name is coerced to lower-case to emulate proto1 behavior.  People
194 // should be using lowercase-with-underscores style for proto field names
195 // anyway, so normally this just returns field->name().
196 std::string FieldName(const FieldDescriptor* field);
197 
198 // Returns the (unqualified) private member name for this field in C++ code.
199 std::string FieldMemberName(const FieldDescriptor* field, bool split);
200 
201 // Returns an estimate of the compiler's alignment for the field.  This
202 // can't guarantee to be correct because the generated code could be compiled on
203 // different systems with different alignment rules.  The estimates below assume
204 // 64-bit pointers.
205 int EstimateAlignmentSize(const FieldDescriptor* field);
206 
207 // Get the unqualified name that should be used for a field's field
208 // number constant.
209 std::string FieldConstantName(const FieldDescriptor* field);
210 
211 // Returns the scope where the field was defined (for extensions, this is
212 // different from the message type to which the field applies).
FieldScope(const FieldDescriptor * field)213 inline const Descriptor* FieldScope(const FieldDescriptor* field) {
214   return field->is_extension() ? field->extension_scope()
215                                : field->containing_type();
216 }
217 
218 // Returns the fully-qualified type name field->message_type().  Usually this
219 // is just ClassName(field->message_type(), true);
220 std::string FieldMessageTypeName(const FieldDescriptor* field,
221                                  const Options& options);
222 
223 // Get the C++ type name for a primitive type (e.g. "double", "::google::protobuf::int32", etc.).
224 const char* PrimitiveTypeName(FieldDescriptor::CppType type);
225 std::string PrimitiveTypeName(const Options& options,
226                               FieldDescriptor::CppType type);
227 
228 // Get the declared type name in CamelCase format, as is used e.g. for the
229 // methods of WireFormat.  For example, TYPE_INT32 becomes "Int32".
230 const char* DeclaredTypeMethodName(FieldDescriptor::Type type);
231 
232 // Return the code that evaluates to the number when compiled.
233 std::string Int32ToString(int number);
234 
235 // Get code that evaluates to the field's default value.
236 std::string DefaultValue(const Options& options, const FieldDescriptor* field);
237 
238 // Compatibility function for callers outside proto2.
239 std::string DefaultValue(const FieldDescriptor* field);
240 
241 // Convert a file name into a valid identifier.
242 std::string FilenameIdentifier(const std::string& filename);
243 
244 // For each .proto file generates a unique name. To prevent collisions of
245 // symbols in the global namespace
246 std::string UniqueName(const std::string& name, const std::string& filename,
247                        const Options& options);
UniqueName(const std::string & name,const FileDescriptor * d,const Options & options)248 inline std::string UniqueName(const std::string& name, const FileDescriptor* d,
249                               const Options& options) {
250   return UniqueName(name, d->name(), options);
251 }
UniqueName(const std::string & name,const Descriptor * d,const Options & options)252 inline std::string UniqueName(const std::string& name, const Descriptor* d,
253                               const Options& options) {
254   return UniqueName(name, d->file(), options);
255 }
UniqueName(const std::string & name,const EnumDescriptor * d,const Options & options)256 inline std::string UniqueName(const std::string& name, const EnumDescriptor* d,
257                               const Options& options) {
258   return UniqueName(name, d->file(), options);
259 }
UniqueName(const std::string & name,const ServiceDescriptor * d,const Options & options)260 inline std::string UniqueName(const std::string& name,
261                               const ServiceDescriptor* d,
262                               const Options& options) {
263   return UniqueName(name, d->file(), options);
264 }
265 
266 // Versions for call sites that only support the internal runtime (like proto1
267 // support).
InternalRuntimeOptions()268 inline Options InternalRuntimeOptions() {
269   Options options;
270   options.opensource_runtime = false;
271   return options;
272 }
UniqueName(const std::string & name,const std::string & filename)273 inline std::string UniqueName(const std::string& name,
274                               const std::string& filename) {
275   return UniqueName(name, filename, InternalRuntimeOptions());
276 }
UniqueName(const std::string & name,const FileDescriptor * d)277 inline std::string UniqueName(const std::string& name,
278                               const FileDescriptor* d) {
279   return UniqueName(name, d->name(), InternalRuntimeOptions());
280 }
UniqueName(const std::string & name,const Descriptor * d)281 inline std::string UniqueName(const std::string& name, const Descriptor* d) {
282   return UniqueName(name, d->file(), InternalRuntimeOptions());
283 }
UniqueName(const std::string & name,const EnumDescriptor * d)284 inline std::string UniqueName(const std::string& name,
285                               const EnumDescriptor* d) {
286   return UniqueName(name, d->file(), InternalRuntimeOptions());
287 }
UniqueName(const std::string & name,const ServiceDescriptor * d)288 inline std::string UniqueName(const std::string& name,
289                               const ServiceDescriptor* d) {
290   return UniqueName(name, d->file(), InternalRuntimeOptions());
291 }
292 
293 // Return the qualified C++ name for a file level symbol.
294 std::string QualifiedFileLevelSymbol(const FileDescriptor* file,
295                                      const std::string& name,
296                                      const Options& options);
297 
298 // Escape C++ trigraphs by escaping question marks to \?
299 std::string EscapeTrigraphs(const std::string& to_escape);
300 
301 // Escaped function name to eliminate naming conflict.
302 std::string SafeFunctionName(const Descriptor* descriptor,
303                              const FieldDescriptor* field,
304                              const std::string& prefix);
305 
306 // Returns true if generated messages have public unknown fields accessors
PublicUnknownFieldsAccessors(const Descriptor * message)307 inline bool PublicUnknownFieldsAccessors(const Descriptor* message) {
308   return message->file()->syntax() != FileDescriptor::SYNTAX_PROTO3;
309 }
310 
311 // Returns the optimize mode for <file>, respecting <options.enforce_lite>.
312 FileOptions_OptimizeMode GetOptimizeFor(const FileDescriptor* file,
313                                         const Options& options);
314 
315 // Determines whether unknown fields will be stored in an UnknownFieldSet or
316 // a string.
UseUnknownFieldSet(const FileDescriptor * file,const Options & options)317 inline bool UseUnknownFieldSet(const FileDescriptor* file,
318                                const Options& options) {
319   return GetOptimizeFor(file, options) != FileOptions::LITE_RUNTIME;
320 }
321 
IsWeak(const FieldDescriptor * field,const Options & options)322 inline bool IsWeak(const FieldDescriptor* field, const Options& options) {
323   if (field->options().weak()) {
324     GOOGLE_CHECK(!options.opensource_runtime);
325     return true;
326   }
327   return false;
328 }
329 
330 bool IsStringInlined(const FieldDescriptor* descriptor, const Options& options);
331 
332 // For a string field, returns the effective ctype.  If the actual ctype is
333 // not supported, returns the default of STRING.
334 FieldOptions::CType EffectiveStringCType(const FieldDescriptor* field,
335                                          const Options& options);
336 
IsCord(const FieldDescriptor * field,const Options & options)337 inline bool IsCord(const FieldDescriptor* field, const Options& options) {
338   return field->cpp_type() == FieldDescriptor::CPPTYPE_STRING &&
339          EffectiveStringCType(field, options) == FieldOptions::CORD;
340 }
341 
IsString(const FieldDescriptor * field,const Options & options)342 inline bool IsString(const FieldDescriptor* field, const Options& options) {
343   return field->cpp_type() == FieldDescriptor::CPPTYPE_STRING &&
344          EffectiveStringCType(field, options) == FieldOptions::STRING;
345 }
346 
IsStringPiece(const FieldDescriptor * field,const Options & options)347 inline bool IsStringPiece(const FieldDescriptor* field,
348                           const Options& options) {
349   return field->cpp_type() == FieldDescriptor::CPPTYPE_STRING &&
350          EffectiveStringCType(field, options) == FieldOptions::STRING_PIECE;
351 }
352 
353 class MessageSCCAnalyzer;
354 
355 // Does the given FileDescriptor use lazy fields?
356 bool HasLazyFields(const FileDescriptor* file, const Options& options,
357                    MessageSCCAnalyzer* scc_analyzer);
358 
359 // Is the given field a supported lazy field?
360 bool IsLazy(const FieldDescriptor* field, const Options& options,
361             MessageSCCAnalyzer* scc_analyzer);
362 
363 // Is this an explicit (non-profile driven) lazy field, as denoted by
364 // lazy/unverified_lazy in the descriptor?
IsExplicitLazy(const FieldDescriptor * field)365 inline bool IsExplicitLazy(const FieldDescriptor* field) {
366   return field->options().lazy() || field->options().unverified_lazy();
367 }
368 
369 bool IsEagerlyVerifiedLazy(const FieldDescriptor* field, const Options& options,
370                            MessageSCCAnalyzer* scc_analyzer);
371 
372 bool IsLazilyVerifiedLazy(const FieldDescriptor* field, const Options& options);
373 
374 // Is the given message being split (go/pdsplit)?
375 bool ShouldSplit(const Descriptor* desc, const Options& options);
376 
377 // Is the given field being split out?
378 bool ShouldSplit(const FieldDescriptor* field, const Options& options);
379 
IsFieldUsed(const FieldDescriptor *,const Options &)380 inline bool IsFieldUsed(const FieldDescriptor* /* field */,
381                         const Options& /* options */) {
382   return true;
383 }
384 
385 // Returns true if "field" is stripped.
IsFieldStripped(const FieldDescriptor *,const Options &)386 inline bool IsFieldStripped(const FieldDescriptor* /*field*/,
387                             const Options& /*options*/) {
388   return false;
389 }
390 
391 // Does the file contain any definitions that need extension_set.h?
392 bool HasExtensionsOrExtendableMessage(const FileDescriptor* file);
393 
394 // Does the file have any repeated fields, necessitating the file to include
395 // repeated_field.h? This does not include repeated extensions, since those are
396 // all stored internally in an ExtensionSet, not a separate RepeatedField*.
397 bool HasRepeatedFields(const FileDescriptor* file);
398 
399 // Does the file have any string/bytes fields with ctype=STRING_PIECE? This
400 // does not include extensions, since ctype is ignored for extensions.
401 bool HasStringPieceFields(const FileDescriptor* file, const Options& options);
402 
403 // Does the file have any string/bytes fields with ctype=CORD? This does not
404 // include extensions, since ctype is ignored for extensions.
405 bool HasCordFields(const FileDescriptor* file, const Options& options);
406 
407 // Does the file have any map fields, necessitating the file to include
408 // map_field_inl.h and map.h.
409 bool HasMapFields(const FileDescriptor* file);
410 
411 // Does this file have any enum type definitions?
412 bool HasEnumDefinitions(const FileDescriptor* file);
413 
414 // Does this file have generated parsing, serialization, and other
415 // standard methods for which reflection-based fallback implementations exist?
HasGeneratedMethods(const FileDescriptor * file,const Options & options)416 inline bool HasGeneratedMethods(const FileDescriptor* file,
417                                 const Options& options) {
418   return GetOptimizeFor(file, options) != FileOptions::CODE_SIZE;
419 }
420 
421 // Do message classes in this file have descriptor and reflection methods?
HasDescriptorMethods(const FileDescriptor * file,const Options & options)422 inline bool HasDescriptorMethods(const FileDescriptor* file,
423                                  const Options& options) {
424   return GetOptimizeFor(file, options) != FileOptions::LITE_RUNTIME;
425 }
426 
427 // Should we generate generic services for this file?
HasGenericServices(const FileDescriptor * file,const Options & options)428 inline bool HasGenericServices(const FileDescriptor* file,
429                                const Options& options) {
430   return file->service_count() > 0 &&
431          GetOptimizeFor(file, options) != FileOptions::LITE_RUNTIME &&
432          file->options().cc_generic_services();
433 }
434 
IsProto2MessageSet(const Descriptor * descriptor,const Options & options)435 inline bool IsProto2MessageSet(const Descriptor* descriptor,
436                                const Options& options) {
437   return !options.opensource_runtime &&
438          options.enforce_mode != EnforceOptimizeMode::kLiteRuntime &&
439          !options.lite_implicit_weak_fields &&
440          descriptor->options().message_set_wire_format() &&
441          descriptor->full_name() == "google.protobuf.bridge.MessageSet";
442 }
443 
IsMapEntryMessage(const Descriptor * descriptor)444 inline bool IsMapEntryMessage(const Descriptor* descriptor) {
445   return descriptor->options().map_entry();
446 }
447 
448 // Returns true if the field's CPPTYPE is string or message.
449 bool IsStringOrMessage(const FieldDescriptor* field);
450 
451 std::string UnderscoresToCamelCase(const std::string& input,
452                                    bool cap_next_letter);
453 
IsProto3(const FileDescriptor * file)454 inline bool IsProto3(const FileDescriptor* file) {
455   return file->syntax() == FileDescriptor::SYNTAX_PROTO3;
456 }
457 
HasHasbit(const FieldDescriptor * field)458 inline bool HasHasbit(const FieldDescriptor* field) {
459   // This predicate includes proto3 message fields only if they have "optional".
460   //   Foo submsg1 = 1;           // HasHasbit() == false
461   //   optional Foo submsg2 = 2;  // HasHasbit() == true
462   // This is slightly odd, as adding "optional" to a singular proto3 field does
463   // not change the semantics or API. However whenever any field in a message
464   // has a hasbit, it forces reflection to include hasbit offsets for *all*
465   // fields, even if almost all of them are set to -1 (no hasbit). So to avoid
466   // causing a sudden size regression for ~all proto3 messages, we give proto3
467   // message fields a hasbit only if "optional" is present. If the user is
468   // explicitly writing "optional", it is likely they are writing it on
469   // primitive fields also.
470   return (field->has_optional_keyword() || field->is_required()) &&
471          !field->options().weak();
472 }
473 
474 // Returns true if 'enum' semantics are such that unknown values are preserved
475 // in the enum field itself, rather than going to the UnknownFieldSet.
HasPreservingUnknownEnumSemantics(const FieldDescriptor * field)476 inline bool HasPreservingUnknownEnumSemantics(const FieldDescriptor* field) {
477   return field->file()->syntax() == FileDescriptor::SYNTAX_PROTO3;
478 }
479 
IsCrossFileMessage(const FieldDescriptor * field)480 inline bool IsCrossFileMessage(const FieldDescriptor* field) {
481   return field->type() == FieldDescriptor::TYPE_MESSAGE &&
482          field->message_type()->file() != field->file();
483 }
484 
MakeDefaultName(const FieldDescriptor * field)485 inline std::string MakeDefaultName(const FieldDescriptor* field) {
486   return StrCat("_i_give_permission_to_break_this_code_default_",
487                       FieldName(field), "_");
488 }
489 
490 // Semantically distinct from MakeDefaultName in that it gives the C++ code
491 // referencing a default field from the message scope, rather than just the
492 // variable name.
493 // For example, declarations of default variables should always use just
494 // MakeDefaultName to produce code like:
495 //   Type _i_give_permission_to_break_this_code_default_field_;
496 //
497 // Code that references these should use MakeDefaultFieldName, in case the field
498 // exists at some nested level like:
499 //   internal_container_._i_give_permission_to_break_this_code_default_field_;
MakeDefaultFieldName(const FieldDescriptor * field)500 inline std::string MakeDefaultFieldName(const FieldDescriptor* field) {
501   return StrCat("Impl_::", MakeDefaultName(field));
502 }
503 
MakeVarintCachedSizeName(const FieldDescriptor * field)504 inline std::string MakeVarintCachedSizeName(const FieldDescriptor* field) {
505   return StrCat("_", FieldName(field), "_cached_byte_size_");
506 }
507 
508 // Semantically distinct from MakeVarintCachedSizeName in that it gives the C++
509 // code referencing the object from the message scope, rather than just the
510 // variable name.
511 // For example, declarations of default variables should always use just
512 // MakeVarintCachedSizeName to produce code like:
513 //   Type _field_cached_byte_size_;
514 //
515 // Code that references these variables should use
516 // MakeVarintCachedSizeFieldName, in case the field exists at some nested level
517 // like:
518 //   internal_container_._field_cached_byte_size_;
MakeVarintCachedSizeFieldName(const FieldDescriptor * field,bool split)519 inline std::string MakeVarintCachedSizeFieldName(const FieldDescriptor* field,
520                                                  bool split) {
521   return StrCat("_impl_.", split ? "_split_->" : "", "_",
522                       FieldName(field), "_cached_byte_size_");
523 }
524 
525 // Note: A lot of libraries detect Any protos based on Descriptor::full_name()
526 // while the two functions below use FileDescriptor::name(). In a sane world the
527 // two approaches should be equivalent. But if you are dealing with descriptors
528 // from untrusted sources, you might need to match semantics across libraries.
529 bool IsAnyMessage(const FileDescriptor* descriptor, const Options& options);
530 bool IsAnyMessage(const Descriptor* descriptor, const Options& options);
531 
532 bool IsWellKnownMessage(const FileDescriptor* descriptor);
533 
IncludeGuard(const FileDescriptor * file,bool pb_h,const Options & options)534 inline std::string IncludeGuard(const FileDescriptor* file, bool pb_h,
535                                 const Options& options) {
536   // If we are generating a .pb.h file and the proto_h option is enabled, then
537   // the .pb.h gets an extra suffix.
538   std::string filename_identifier = FilenameIdentifier(
539       file->name() + (pb_h && options.proto_h ? ".pb.h" : ""));
540 
541   if (IsWellKnownMessage(file)) {
542     // For well-known messages we need third_party/protobuf and net/proto2 to
543     // have distinct include guards, because some source files include both and
544     // both need to be defined (the third_party copies will be in the
545     // google::protobuf_opensource namespace).
546     return MacroPrefix(options) + "_INCLUDED_" + filename_identifier;
547   } else {
548     // Ideally this case would use distinct include guards for opensource and
549     // google3 protos also.  (The behavior of "first #included wins" is not
550     // ideal).  But unfortunately some legacy code includes both and depends on
551     // the identical include guards to avoid compile errors.
552     //
553     // We should clean this up so that this case can be removed.
554     return "GOOGLE_PROTOBUF_INCLUDED_" + filename_identifier;
555   }
556 }
557 
558 // Returns the OptimizeMode for this file, furthermore it updates a status
559 // bool if has_opt_codesize_extension is non-null. If this status bool is true
560 // it means this file contains an extension that itself is defined as
561 // optimized_for = CODE_SIZE.
562 FileOptions_OptimizeMode GetOptimizeFor(const FileDescriptor* file,
563                                         const Options& options,
564                                         bool* has_opt_codesize_extension);
GetOptimizeFor(const FileDescriptor * file,const Options & options)565 inline FileOptions_OptimizeMode GetOptimizeFor(const FileDescriptor* file,
566                                                const Options& options) {
567   return GetOptimizeFor(file, options, nullptr);
568 }
NeedsEagerDescriptorAssignment(const FileDescriptor * file,const Options & options)569 inline bool NeedsEagerDescriptorAssignment(const FileDescriptor* file,
570                                            const Options& options) {
571   bool has_opt_codesize_extension;
572   if (GetOptimizeFor(file, options, &has_opt_codesize_extension) ==
573           FileOptions::CODE_SIZE &&
574       has_opt_codesize_extension) {
575     // If this filedescriptor contains an extension from another file which
576     // is optimized_for = CODE_SIZE. We need to be careful in the ordering so
577     // we eagerly build the descriptors in the dependencies before building
578     // the descriptors of this file.
579     return true;
580   } else {
581     // If we have a generated code based parser we never need eager
582     // initialization of descriptors of our deps.
583     return false;
584   }
585 }
586 
587 // This orders the messages in a .pb.cc as it's outputted by file.cc
588 void FlattenMessagesInFile(const FileDescriptor* file,
589                            std::vector<const Descriptor*>* result);
FlattenMessagesInFile(const FileDescriptor * file)590 inline std::vector<const Descriptor*> FlattenMessagesInFile(
591     const FileDescriptor* file) {
592   std::vector<const Descriptor*> result;
593   FlattenMessagesInFile(file, &result);
594   return result;
595 }
596 
597 template <typename F>
ForEachMessage(const Descriptor * descriptor,F && func)598 void ForEachMessage(const Descriptor* descriptor, F&& func) {
599   for (int i = 0; i < descriptor->nested_type_count(); i++)
600     ForEachMessage(descriptor->nested_type(i), std::forward<F&&>(func));
601   func(descriptor);
602 }
603 
604 template <typename F>
ForEachMessage(const FileDescriptor * descriptor,F && func)605 void ForEachMessage(const FileDescriptor* descriptor, F&& func) {
606   for (int i = 0; i < descriptor->message_type_count(); i++)
607     ForEachMessage(descriptor->message_type(i), std::forward<F&&>(func));
608 }
609 
610 bool HasWeakFields(const Descriptor* desc, const Options& options);
611 bool HasWeakFields(const FileDescriptor* desc, const Options& options);
612 
613 // Returns true if the "required" restriction check should be ignored for the
614 // given field.
ShouldIgnoreRequiredFieldCheck(const FieldDescriptor * field,const Options & options)615 inline static bool ShouldIgnoreRequiredFieldCheck(const FieldDescriptor* field,
616                                                   const Options& options) {
617   // Do not check "required" for lazily verified lazy fields.
618   return IsLazilyVerifiedLazy(field, options);
619 }
620 
621 struct MessageAnalysis {
622   bool is_recursive = false;
623   bool contains_cord = false;
624   bool contains_extension = false;
625   bool contains_required = false;
626   bool contains_weak = false;  // Implicit weak as well.
627 };
628 
629 // This class is used in FileGenerator, to ensure linear instead of
630 // quadratic performance, if we do this per message we would get O(V*(V+E)).
631 // Logically this is just only used in message.cc, but in the header for
632 // FileGenerator to help share it.
633 class PROTOC_EXPORT MessageSCCAnalyzer {
634  public:
MessageSCCAnalyzer(const Options & options)635   explicit MessageSCCAnalyzer(const Options& options) : options_(options) {}
636 
637   MessageAnalysis GetSCCAnalysis(const SCC* scc);
638 
HasRequiredFields(const Descriptor * descriptor)639   bool HasRequiredFields(const Descriptor* descriptor) {
640     MessageAnalysis result = GetSCCAnalysis(GetSCC(descriptor));
641     return result.contains_required || result.contains_extension;
642   }
HasWeakField(const Descriptor * descriptor)643   bool HasWeakField(const Descriptor* descriptor) {
644     MessageAnalysis result = GetSCCAnalysis(GetSCC(descriptor));
645     return result.contains_weak;
646   }
GetSCC(const Descriptor * descriptor)647   const SCC* GetSCC(const Descriptor* descriptor) {
648     return analyzer_.GetSCC(descriptor);
649   }
650 
651  private:
652   struct DepsGenerator {
operatorDepsGenerator653     std::vector<const Descriptor*> operator()(const Descriptor* desc) const {
654       std::vector<const Descriptor*> deps;
655       for (int i = 0; i < desc->field_count(); i++) {
656         if (desc->field(i)->message_type()) {
657           deps.push_back(desc->field(i)->message_type());
658         }
659       }
660       return deps;
661     }
662   };
663   SCCAnalyzer<DepsGenerator> analyzer_;
664   Options options_;
665   std::map<const SCC*, MessageAnalysis> analysis_cache_;
666 };
667 
668 void ListAllFields(const Descriptor* d,
669                    std::vector<const FieldDescriptor*>* fields);
670 void ListAllFields(const FileDescriptor* d,
671                    std::vector<const FieldDescriptor*>* fields);
672 
673 template <class T>
ForEachField(const Descriptor * d,T && func)674 void ForEachField(const Descriptor* d, T&& func) {
675   for (int i = 0; i < d->nested_type_count(); i++) {
676     ForEachField(d->nested_type(i), std::forward<T&&>(func));
677   }
678   for (int i = 0; i < d->extension_count(); i++) {
679     func(d->extension(i));
680   }
681   for (int i = 0; i < d->field_count(); i++) {
682     func(d->field(i));
683   }
684 }
685 
686 template <class T>
ForEachField(const FileDescriptor * d,T && func)687 void ForEachField(const FileDescriptor* d, T&& func) {
688   for (int i = 0; i < d->message_type_count(); i++) {
689     ForEachField(d->message_type(i), std::forward<T&&>(func));
690   }
691   for (int i = 0; i < d->extension_count(); i++) {
692     func(d->extension(i));
693   }
694 }
695 
696 void ListAllTypesForServices(const FileDescriptor* fd,
697                              std::vector<const Descriptor*>* types);
698 
699 // Indicates whether we should use implicit weak fields for this file.
700 bool UsingImplicitWeakFields(const FileDescriptor* file,
701                              const Options& options);
702 
703 // Indicates whether to treat this field as implicitly weak.
704 bool IsImplicitWeakField(const FieldDescriptor* field, const Options& options,
705                          MessageSCCAnalyzer* scc_analyzer);
706 
HasSimpleBaseClass(const Descriptor * desc,const Options & options)707 inline bool HasSimpleBaseClass(const Descriptor* desc, const Options& options) {
708   if (!HasDescriptorMethods(desc->file(), options)) return false;
709   if (desc->extension_range_count() != 0) return false;
710   if (desc->field_count() == 0) return true;
711   // TODO(jorg): Support additional common message types with only one
712   // or two fields
713   return false;
714 }
715 
HasSimpleBaseClasses(const FileDescriptor * file,const Options & options)716 inline bool HasSimpleBaseClasses(const FileDescriptor* file,
717                                  const Options& options) {
718   bool v = false;
719   ForEachMessage(file, [&v, &options](const Descriptor* desc) {
720     v |= HasSimpleBaseClass(desc, options);
721   });
722   return v;
723 }
724 
SimpleBaseClass(const Descriptor * desc,const Options & options)725 inline std::string SimpleBaseClass(const Descriptor* desc,
726                                    const Options& options) {
727   if (!HasDescriptorMethods(desc->file(), options)) return "";
728   if (desc->extension_range_count() != 0) return "";
729   if (desc->field_count() == 0) {
730     return "ZeroFieldsBase";
731   }
732   // TODO(jorg): Support additional common message types with only one
733   // or two fields
734   return "";
735 }
736 
737 // Returns true if this message has a _tracker_ field.
HasTracker(const Descriptor * desc,const Options & options)738 inline bool HasTracker(const Descriptor* desc, const Options& options) {
739   return options.field_listener_options.inject_field_listener_events &&
740          desc->file()->options().optimize_for() !=
741              google::protobuf::FileOptions::LITE_RUNTIME;
742 }
743 
744 // Returns true if this message needs an Impl_ struct for it's data.
HasImplData(const Descriptor * desc,const Options & options)745 inline bool HasImplData(const Descriptor* desc, const Options& options) {
746   return !HasSimpleBaseClass(desc, options);
747 }
748 
749 // Formatter is a functor class which acts as a closure around printer and
750 // the variable map. It's much like printer->Print except it supports both named
751 // variables that are substituted using a key value map and direct arguments. In
752 // the format string $1$, $2$, etc... are substituted for the first, second, ...
753 // direct argument respectively in the format call, it accepts both strings and
754 // integers. The implementation verifies all arguments are used and are "first"
755 // used in order of appearance in the argument list. For example,
756 //
757 // Format("return array[$1$];", 3) -> "return array[3];"
758 // Format("array[$2$] = $1$;", "Bla", 3) -> FATAL error (wrong order)
759 // Format("array[$1$] = $2$;", 3, "Bla") -> "array[3] = Bla;"
760 //
761 // The arguments can be used more than once like
762 //
763 // Format("array[$1$] = $2$;  // Index = $1$", 3, "Bla") ->
764 //        "array[3] = Bla;  // Index = 3"
765 //
766 // If you use more arguments use the following style to help the reader,
767 //
768 // Format("int $1$() {\n"
769 //        "  array[$2$] = $3$;\n"
770 //        "  return $4$;"
771 //        "}\n",
772 //        funname, // 1
773 //        idx,  // 2
774 //        varname,  // 3
775 //        retval);  // 4
776 //
777 // but consider using named variables. Named variables like $foo$, with some
778 // identifier foo, are looked up in the map. One additional feature is that
779 // spaces are accepted between the '$' delimiters, $ foo$ will
780 // substitute to " bar" if foo stands for "bar", but in case it's empty
781 // will substitute to "". Hence, for example,
782 //
783 // Format(vars, "$dllexport $void fun();") -> "void fun();"
784 //                                            "__declspec(export) void fun();"
785 //
786 // which is convenient to prevent double, leading or trailing spaces.
787 class PROTOC_EXPORT Formatter {
788  public:
Formatter(io::Printer * printer)789   explicit Formatter(io::Printer* printer) : printer_(printer) {}
Formatter(io::Printer * printer,const std::map<std::string,std::string> & vars)790   Formatter(io::Printer* printer,
791             const std::map<std::string, std::string>& vars)
792       : printer_(printer), vars_(vars) {}
793 
794   template <typename T>
Set(const std::string & key,const T & value)795   void Set(const std::string& key, const T& value) {
796     vars_[key] = ToString(value);
797   }
798 
AddMap(const std::map<std::string,std::string> & vars)799   void AddMap(const std::map<std::string, std::string>& vars) {
800     for (const auto& keyval : vars) vars_[keyval.first] = keyval.second;
801   }
802 
803   template <typename... Args>
operator()804   void operator()(const char* format, const Args&... args) const {
805     printer_->FormatInternal({ToString(args)...}, vars_, format);
806   }
807 
Indent()808   void Indent() const { printer_->Indent(); }
Outdent()809   void Outdent() const { printer_->Outdent(); }
printer()810   io::Printer* printer() const { return printer_; }
811 
812   class PROTOC_EXPORT ScopedIndenter {
813    public:
ScopedIndenter(Formatter * format)814     explicit ScopedIndenter(Formatter* format) : format_(format) {
815       format_->Indent();
816     }
~ScopedIndenter()817     ~ScopedIndenter() { format_->Outdent(); }
818 
819    private:
820     Formatter* format_;
821   };
822 
ScopedIndent()823   PROTOBUF_NODISCARD ScopedIndenter ScopedIndent() {
824     return ScopedIndenter(this);
825   }
826   template <typename... Args>
ScopedIndent(const char * format,const Args &&...args)827   PROTOBUF_NODISCARD ScopedIndenter ScopedIndent(const char* format,
828                                                  const Args&&... args) {
829     (*this)(format, static_cast<Args&&>(args)...);
830     return ScopedIndenter(this);
831   }
832 
833   class PROTOC_EXPORT SaveState {
834    public:
SaveState(Formatter * format)835     explicit SaveState(Formatter* format)
836         : format_(format), vars_(format->vars_) {}
~SaveState()837     ~SaveState() { format_->vars_.swap(vars_); }
838 
839    private:
840     Formatter* format_;
841     std::map<std::string, std::string> vars_;
842   };
843 
844  private:
845   io::Printer* printer_;
846   std::map<std::string, std::string> vars_;
847 
848   // Convenience overloads to accept different types as arguments.
ToString(const std::string & s)849   static std::string ToString(const std::string& s) { return s; }
850   template <typename I, typename = typename std::enable_if<
851                             std::is_integral<I>::value>::type>
ToString(I x)852   static std::string ToString(I x) {
853     return StrCat(x);
854   }
ToString(strings::Hex x)855   static std::string ToString(strings::Hex x) { return StrCat(x); }
ToString(const FieldDescriptor * d)856   static std::string ToString(const FieldDescriptor* d) { return Payload(d); }
ToString(const Descriptor * d)857   static std::string ToString(const Descriptor* d) { return Payload(d); }
ToString(const EnumDescriptor * d)858   static std::string ToString(const EnumDescriptor* d) { return Payload(d); }
ToString(const EnumValueDescriptor * d)859   static std::string ToString(const EnumValueDescriptor* d) {
860     return Payload(d);
861   }
ToString(const OneofDescriptor * d)862   static std::string ToString(const OneofDescriptor* d) { return Payload(d); }
863 
864   template <typename Descriptor>
Payload(const Descriptor * descriptor)865   static std::string Payload(const Descriptor* descriptor) {
866     std::vector<int> path;
867     descriptor->GetLocationPath(&path);
868     GeneratedCodeInfo::Annotation annotation;
869     for (int index : path) {
870       annotation.add_path(index);
871     }
872     annotation.set_source_file(descriptor->file()->name());
873     return annotation.SerializeAsString();
874   }
875 };
876 
877 template <class T>
PrintFieldComment(const Formatter & format,const T * field)878 void PrintFieldComment(const Formatter& format, const T* field) {
879   // Print the field's (or oneof's) proto-syntax definition as a comment.
880   // We don't want to print group bodies so we cut off after the first
881   // line.
882   DebugStringOptions options;
883   options.elide_group_body = true;
884   options.elide_oneof_body = true;
885   std::string def = field->DebugStringWithOptions(options);
886   format("// $1$\n", def.substr(0, def.find_first_of('\n')));
887 }
888 
889 class PROTOC_EXPORT NamespaceOpener {
890  public:
NamespaceOpener(const Formatter & format)891   explicit NamespaceOpener(const Formatter& format)
892       : printer_(format.printer()) {}
NamespaceOpener(const std::string & name,const Formatter & format)893   NamespaceOpener(const std::string& name, const Formatter& format)
894       : NamespaceOpener(format) {
895     ChangeTo(name);
896   }
~NamespaceOpener()897   ~NamespaceOpener() { ChangeTo(""); }
898 
ChangeTo(const std::string & name)899   void ChangeTo(const std::string& name) {
900     std::vector<std::string> new_stack_ =
901         Split(name, "::", true);
902     size_t len = std::min(name_stack_.size(), new_stack_.size());
903     size_t common_idx = 0;
904     while (common_idx < len) {
905       if (name_stack_[common_idx] != new_stack_[common_idx]) break;
906       common_idx++;
907     }
908     for (auto it = name_stack_.crbegin();
909          it != name_stack_.crend() - common_idx; ++it) {
910       if (*it == "PROTOBUF_NAMESPACE_ID") {
911         printer_->Print("PROTOBUF_NAMESPACE_CLOSE\n");
912       } else {
913         printer_->Print("}  // namespace $ns$\n", "ns", *it);
914       }
915     }
916     name_stack_.swap(new_stack_);
917     for (size_t i = common_idx; i < name_stack_.size(); ++i) {
918       if (name_stack_[i] == "PROTOBUF_NAMESPACE_ID") {
919         printer_->Print("PROTOBUF_NAMESPACE_OPEN\n");
920       } else {
921         printer_->Print("namespace $ns$ {\n", "ns", name_stack_[i]);
922       }
923     }
924   }
925 
926  private:
927   io::Printer* printer_;
928   std::vector<std::string> name_stack_;
929 };
930 
931 enum class Utf8CheckMode {
932   kStrict = 0,  // Parsing will fail if non UTF-8 data is in string fields.
933   kVerify = 1,  // Only log an error but parsing will succeed.
934   kNone = 2,    // No UTF-8 check.
935 };
936 
937 Utf8CheckMode GetUtf8CheckMode(const FieldDescriptor* field,
938                                const Options& options);
939 
940 void GenerateUtf8CheckCodeForString(const FieldDescriptor* field,
941                                     const Options& options, bool for_parse,
942                                     const char* parameters,
943                                     const Formatter& format);
944 
945 void GenerateUtf8CheckCodeForCord(const FieldDescriptor* field,
946                                   const Options& options, bool for_parse,
947                                   const char* parameters,
948                                   const Formatter& format);
949 
950 template <typename T>
951 struct FieldRangeImpl {
952   struct Iterator {
953     using iterator_category = std::forward_iterator_tag;
954     using value_type = const FieldDescriptor*;
955     using difference_type = int;
956 
957     value_type operator*() { return descriptor->field(idx); }
958 
959     friend bool operator==(const Iterator& a, const Iterator& b) {
960       GOOGLE_DCHECK(a.descriptor == b.descriptor);
961       return a.idx == b.idx;
962     }
963     friend bool operator!=(const Iterator& a, const Iterator& b) {
964       return !(a == b);
965     }
966 
967     Iterator& operator++() {
968       idx++;
969       return *this;
970     }
971 
972     int idx;
973     const T* descriptor;
974   };
975 
beginFieldRangeImpl976   Iterator begin() const { return {0, descriptor}; }
endFieldRangeImpl977   Iterator end() const { return {descriptor->field_count(), descriptor}; }
978 
979   const T* descriptor;
980 };
981 
982 template <typename T>
FieldRange(const T * desc)983 FieldRangeImpl<T> FieldRange(const T* desc) {
984   return {desc};
985 }
986 
987 struct OneOfRangeImpl {
988   struct Iterator {
989     using iterator_category = std::forward_iterator_tag;
990     using value_type = const OneofDescriptor*;
991     using difference_type = int;
992 
993     value_type operator*() { return descriptor->oneof_decl(idx); }
994 
995     friend bool operator==(const Iterator& a, const Iterator& b) {
996       GOOGLE_DCHECK(a.descriptor == b.descriptor);
997       return a.idx == b.idx;
998     }
999     friend bool operator!=(const Iterator& a, const Iterator& b) {
1000       return !(a == b);
1001     }
1002 
1003     Iterator& operator++() {
1004       idx++;
1005       return *this;
1006     }
1007 
1008     int idx;
1009     const Descriptor* descriptor;
1010   };
1011 
beginOneOfRangeImpl1012   Iterator begin() const { return {0, descriptor}; }
endOneOfRangeImpl1013   Iterator end() const {
1014     return {descriptor->real_oneof_decl_count(), descriptor};
1015   }
1016 
1017   const Descriptor* descriptor;
1018 };
1019 
OneOfRange(const Descriptor * desc)1020 inline OneOfRangeImpl OneOfRange(const Descriptor* desc) { return {desc}; }
1021 
1022 PROTOC_EXPORT std::string StripProto(const std::string& filename);
1023 
1024 bool EnableMessageOwnedArena(const Descriptor* desc, const Options& options);
1025 
1026 bool EnableMessageOwnedArenaTrial(const Descriptor* desc,
1027                                   const Options& options);
1028 
1029 bool ShouldVerify(const Descriptor* descriptor, const Options& options,
1030                   MessageSCCAnalyzer* scc_analyzer);
1031 bool ShouldVerify(const FileDescriptor* file, const Options& options,
1032                   MessageSCCAnalyzer* scc_analyzer);
1033 
1034 // Indicates whether to use predefined verify methods for a given message. If a
1035 // message is "simple" and needs no special verification per field (e.g. message
1036 // field, repeated packed, UTF8 string, etc.), we can use either VerifySimple or
1037 // VerifySimpleAlwaysCheckInt32 methods as all verification can be done based on
1038 // the wire type.
1039 //
1040 // Otherwise, we need "custom" verify methods tailored to a message to pass
1041 // which field needs a special verification; i.e. InternalVerify.
1042 enum class VerifySimpleType {
1043   kSimpleInt32Never,   // Use VerifySimple
1044   kSimpleInt32Always,  // Use VerifySimpleAlwaysCheckInt32
1045   kCustom,             // Use InternalVerify and check only for int32
1046   kCustomInt32Never,   // Use InternalVerify but never check for int32
1047   kCustomInt32Always,  // Use InternalVerify and always check for int32
1048 };
1049 
1050 // Returns VerifySimpleType if messages can be verified by predefined methods.
1051 VerifySimpleType ShouldVerifySimple(const Descriptor* descriptor);
1052 
1053 bool IsUtf8String(const FieldDescriptor* field);
1054 
1055 bool HasMessageFieldOrExtension(const Descriptor* desc);
1056 
1057 }  // namespace cpp
1058 }  // namespace compiler
1059 }  // namespace protobuf
1060 }  // namespace google
1061 
1062 #include <google/protobuf/port_undef.inc>
1063 
1064 #endif  // GOOGLE_PROTOBUF_COMPILER_CPP_HELPERS_H__
1065