• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc.  All rights reserved.
3 // https://developers.google.com/protocol-buffers/
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 //     * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 //     * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following disclaimer
13 // in the documentation and/or other materials provided with the
14 // distribution.
15 //     * Neither the name of Google Inc. nor the names of its
16 // contributors may be used to endorse or promote products derived from
17 // this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 
31 // Author: kenton@google.com (Kenton Varda)
32 //  Based on original Protocol Buffers design by
33 //  Sanjay Ghemawat, Jeff Dean, and others.
34 
35 #ifndef GOOGLE_PROTOBUF_COMPILER_CPP_HELPERS_H__
36 #define GOOGLE_PROTOBUF_COMPILER_CPP_HELPERS_H__
37 
38 #include <algorithm>
39 #include <cstdint>
40 #include <iterator>
41 #include <map>
42 #include <string>
43 
44 #include <google/protobuf/compiler/scc.h>
45 #include <google/protobuf/compiler/code_generator.h>
46 #include <google/protobuf/compiler/cpp/cpp_names.h>
47 #include <google/protobuf/compiler/cpp/cpp_options.h>
48 #include <google/protobuf/descriptor.pb.h>
49 #include <google/protobuf/io/printer.h>
50 #include <google/protobuf/descriptor.h>
51 #include <google/protobuf/port.h>
52 #include <google/protobuf/stubs/strutil.h>
53 
54 // Must be included last.
55 #include <google/protobuf/port_def.inc>
56 
57 namespace google {
58 namespace protobuf {
59 namespace compiler {
60 namespace cpp {
61 
62 enum class ArenaDtorNeeds { kNone = 0, kOnDemand = 1, kRequired = 2 };
63 
ProtobufNamespace(const Options &)64 inline std::string ProtobufNamespace(const Options& /* options */) {
65   return "PROTOBUF_NAMESPACE_ID";
66 }
67 
MacroPrefix(const Options &)68 inline std::string MacroPrefix(const Options& /* options */) {
69   return "GOOGLE_PROTOBUF";
70 }
71 
DeprecatedAttribute(const Options &,const FieldDescriptor * d)72 inline std::string DeprecatedAttribute(const Options& /* options */,
73                                        const FieldDescriptor* d) {
74   return d->options().deprecated() ? "PROTOBUF_DEPRECATED " : "";
75 }
76 
DeprecatedAttribute(const Options &,const EnumValueDescriptor * d)77 inline std::string DeprecatedAttribute(const Options& /* options */,
78                                        const EnumValueDescriptor* d) {
79   return d->options().deprecated() ? "PROTOBUF_DEPRECATED_ENUM " : "";
80 }
81 
82 // Commonly-used separator comments.  Thick is a line of '=', thin is a line
83 // of '-'.
84 extern const char kThickSeparator[];
85 extern const char kThinSeparator[];
86 
87 void SetCommonVars(const Options& options,
88                    std::map<std::string, std::string>* variables);
89 
90 // Variables to access message data from the message scope.
91 void SetCommonMessageDataVariables(
92     std::map<std::string, std::string>* variables);
93 
94 void SetUnknownFieldsVariable(const Descriptor* descriptor,
95                               const Options& options,
96                               std::map<std::string, std::string>* variables);
97 
98 bool GetBootstrapBasename(const Options& options, const std::string& basename,
99                           std::string* bootstrap_basename);
100 bool MaybeBootstrap(const Options& options, GeneratorContext* generator_context,
101                     bool bootstrap_flag, std::string* basename);
102 bool IsBootstrapProto(const Options& options, const FileDescriptor* file);
103 
104 // Name space of the proto file. This namespace is such that the string
105 // "<namespace>::some_name" is the correct fully qualified namespace.
106 // This means if the package is empty the namespace is "", and otherwise
107 // the namespace is "::foo::bar::...::baz" without trailing semi-colons.
108 std::string Namespace(const FileDescriptor* d, const Options& options);
109 std::string Namespace(const Descriptor* d, const Options& options);
110 std::string Namespace(const FieldDescriptor* d, const Options& options);
111 std::string Namespace(const EnumDescriptor* d, const Options& options);
112 
113 // Returns true if it's safe to reset "field" to zero.
114 bool CanInitializeByZeroing(const FieldDescriptor* field);
115 
116 std::string ClassName(const Descriptor* descriptor);
117 std::string ClassName(const EnumDescriptor* enum_descriptor);
118 
119 std::string QualifiedClassName(const Descriptor* d, const Options& options);
120 std::string QualifiedClassName(const EnumDescriptor* d, const Options& options);
121 
122 std::string QualifiedClassName(const Descriptor* d);
123 std::string QualifiedClassName(const EnumDescriptor* d);
124 
125 // DEPRECATED just use ClassName or QualifiedClassName, a boolean is very
126 // unreadable at the callsite.
127 // Returns the non-nested type name for the given type.  If "qualified" is
128 // true, prefix the type with the full namespace.  For example, if you had:
129 //   package foo.bar;
130 //   message Baz { message Qux {} }
131 // Then the qualified ClassName for Qux would be:
132 //   ::foo::bar::Baz_Qux
133 // While the non-qualified version would be:
134 //   Baz_Qux
ClassName(const Descriptor * descriptor,bool qualified)135 inline std::string ClassName(const Descriptor* descriptor, bool qualified) {
136   return qualified ? QualifiedClassName(descriptor, Options())
137                    : ClassName(descriptor);
138 }
139 
ClassName(const EnumDescriptor * descriptor,bool qualified)140 inline std::string ClassName(const EnumDescriptor* descriptor, bool qualified) {
141   return qualified ? QualifiedClassName(descriptor, Options())
142                    : ClassName(descriptor);
143 }
144 
145 // Returns the extension name prefixed with the class name if nested but without
146 // the package name.
147 std::string ExtensionName(const FieldDescriptor* d);
148 
149 std::string QualifiedExtensionName(const FieldDescriptor* d,
150                                    const Options& options);
151 std::string QualifiedExtensionName(const FieldDescriptor* d);
152 
153 // Type name of default instance.
154 std::string DefaultInstanceType(const Descriptor* descriptor,
155                                 const Options& options);
156 
157 // Non-qualified name of the default_instance of this message.
158 std::string DefaultInstanceName(const Descriptor* descriptor,
159                                 const Options& options);
160 
161 // Non-qualified name of the default instance pointer. This is used only for
162 // implicit weak fields, where we need an extra indirection.
163 std::string DefaultInstancePtr(const Descriptor* descriptor,
164                                const Options& options);
165 
166 // Fully qualified name of the default_instance of this message.
167 std::string QualifiedDefaultInstanceName(const Descriptor* descriptor,
168                                          const Options& options);
169 
170 // Fully qualified name of the default instance pointer.
171 std::string QualifiedDefaultInstancePtr(const Descriptor* descriptor,
172                                         const Options& options);
173 
174 // DescriptorTable variable name.
175 std::string DescriptorTableName(const FileDescriptor* file,
176                                 const Options& options);
177 
178 // When declaring symbol externs from another file, this macro will supply the
179 // dllexport needed for the target file, if any.
180 std::string FileDllExport(const FileDescriptor* file, const Options& options);
181 
182 // Name of the base class: google::protobuf::Message or google::protobuf::MessageLite.
183 std::string SuperClassName(const Descriptor* descriptor,
184                            const Options& options);
185 
186 // Adds an underscore if necessary to prevent conflicting with a keyword.
187 std::string ResolveKeyword(const std::string& name);
188 
189 // Get the (unqualified) name that should be used for this field in C++ code.
190 // The name is coerced to lower-case to emulate proto1 behavior.  People
191 // should be using lowercase-with-underscores style for proto field names
192 // anyway, so normally this just returns field->name().
193 std::string FieldName(const FieldDescriptor* field);
194 
195 // Returns the (unqualified) private member name for this field in C++ code.
196 std::string FieldMemberName(const FieldDescriptor* field);
197 
198 // Returns an estimate of the compiler's alignment for the field.  This
199 // can't guarantee to be correct because the generated code could be compiled on
200 // different systems with different alignment rules.  The estimates below assume
201 // 64-bit pointers.
202 int EstimateAlignmentSize(const FieldDescriptor* field);
203 
204 // Get the unqualified name that should be used for a field's field
205 // number constant.
206 std::string FieldConstantName(const FieldDescriptor* field);
207 
208 // Returns the scope where the field was defined (for extensions, this is
209 // different from the message type to which the field applies).
FieldScope(const FieldDescriptor * field)210 inline const Descriptor* FieldScope(const FieldDescriptor* field) {
211   return field->is_extension() ? field->extension_scope()
212                                : field->containing_type();
213 }
214 
215 // Returns the fully-qualified type name field->message_type().  Usually this
216 // is just ClassName(field->message_type(), true);
217 std::string FieldMessageTypeName(const FieldDescriptor* field,
218                                  const Options& options);
219 
220 // Get the C++ type name for a primitive type (e.g. "double", "::google::protobuf::int32", etc.).
221 const char* PrimitiveTypeName(FieldDescriptor::CppType type);
222 std::string PrimitiveTypeName(const Options& options,
223                               FieldDescriptor::CppType type);
224 
225 // Get the declared type name in CamelCase format, as is used e.g. for the
226 // methods of WireFormat.  For example, TYPE_INT32 becomes "Int32".
227 const char* DeclaredTypeMethodName(FieldDescriptor::Type type);
228 
229 // Return the code that evaluates to the number when compiled.
230 std::string Int32ToString(int number);
231 
232 // Get code that evaluates to the field's default value.
233 std::string DefaultValue(const Options& options, const FieldDescriptor* field);
234 
235 // Compatibility function for callers outside proto2.
236 std::string DefaultValue(const FieldDescriptor* field);
237 
238 // Convert a file name into a valid identifier.
239 std::string FilenameIdentifier(const std::string& filename);
240 
241 // For each .proto file generates a unique name. To prevent collisions of
242 // symbols in the global namespace
243 std::string UniqueName(const std::string& name, const std::string& filename,
244                        const Options& options);
UniqueName(const std::string & name,const FileDescriptor * d,const Options & options)245 inline std::string UniqueName(const std::string& name, const FileDescriptor* d,
246                               const Options& options) {
247   return UniqueName(name, d->name(), options);
248 }
UniqueName(const std::string & name,const Descriptor * d,const Options & options)249 inline std::string UniqueName(const std::string& name, const Descriptor* d,
250                               const Options& options) {
251   return UniqueName(name, d->file(), options);
252 }
UniqueName(const std::string & name,const EnumDescriptor * d,const Options & options)253 inline std::string UniqueName(const std::string& name, const EnumDescriptor* d,
254                               const Options& options) {
255   return UniqueName(name, d->file(), options);
256 }
UniqueName(const std::string & name,const ServiceDescriptor * d,const Options & options)257 inline std::string UniqueName(const std::string& name,
258                               const ServiceDescriptor* d,
259                               const Options& options) {
260   return UniqueName(name, d->file(), options);
261 }
262 
263 // Versions for call sites that only support the internal runtime (like proto1
264 // support).
InternalRuntimeOptions()265 inline Options InternalRuntimeOptions() {
266   Options options;
267   options.opensource_runtime = false;
268   return options;
269 }
UniqueName(const std::string & name,const std::string & filename)270 inline std::string UniqueName(const std::string& name,
271                               const std::string& filename) {
272   return UniqueName(name, filename, InternalRuntimeOptions());
273 }
UniqueName(const std::string & name,const FileDescriptor * d)274 inline std::string UniqueName(const std::string& name,
275                               const FileDescriptor* d) {
276   return UniqueName(name, d->name(), InternalRuntimeOptions());
277 }
UniqueName(const std::string & name,const Descriptor * d)278 inline std::string UniqueName(const std::string& name, const Descriptor* d) {
279   return UniqueName(name, d->file(), InternalRuntimeOptions());
280 }
UniqueName(const std::string & name,const EnumDescriptor * d)281 inline std::string UniqueName(const std::string& name,
282                               const EnumDescriptor* d) {
283   return UniqueName(name, d->file(), InternalRuntimeOptions());
284 }
UniqueName(const std::string & name,const ServiceDescriptor * d)285 inline std::string UniqueName(const std::string& name,
286                               const ServiceDescriptor* d) {
287   return UniqueName(name, d->file(), InternalRuntimeOptions());
288 }
289 
290 // Return the qualified C++ name for a file level symbol.
291 std::string QualifiedFileLevelSymbol(const FileDescriptor* file,
292                                      const std::string& name,
293                                      const Options& options);
294 
295 // Escape C++ trigraphs by escaping question marks to \?
296 std::string EscapeTrigraphs(const std::string& to_escape);
297 
298 // Escaped function name to eliminate naming conflict.
299 std::string SafeFunctionName(const Descriptor* descriptor,
300                              const FieldDescriptor* field,
301                              const std::string& prefix);
302 
303 // Returns true if generated messages have public unknown fields accessors
PublicUnknownFieldsAccessors(const Descriptor * message)304 inline bool PublicUnknownFieldsAccessors(const Descriptor* message) {
305   return message->file()->syntax() != FileDescriptor::SYNTAX_PROTO3;
306 }
307 
308 // Returns the optimize mode for <file>, respecting <options.enforce_lite>.
309 FileOptions_OptimizeMode GetOptimizeFor(const FileDescriptor* file,
310                                         const Options& options);
311 
312 // Determines whether unknown fields will be stored in an UnknownFieldSet or
313 // a string.
UseUnknownFieldSet(const FileDescriptor * file,const Options & options)314 inline bool UseUnknownFieldSet(const FileDescriptor* file,
315                                const Options& options) {
316   return GetOptimizeFor(file, options) != FileOptions::LITE_RUNTIME;
317 }
318 
IsWeak(const FieldDescriptor * field,const Options & options)319 inline bool IsWeak(const FieldDescriptor* field, const Options& options) {
320   if (field->options().weak()) {
321     GOOGLE_CHECK(!options.opensource_runtime);
322     return true;
323   }
324   return false;
325 }
326 
327 bool IsStringInlined(const FieldDescriptor* descriptor, const Options& options);
328 
329 // For a string field, returns the effective ctype.  If the actual ctype is
330 // not supported, returns the default of STRING.
331 FieldOptions::CType EffectiveStringCType(const FieldDescriptor* field,
332                                          const Options& options);
333 
IsCord(const FieldDescriptor * field,const Options & options)334 inline bool IsCord(const FieldDescriptor* field, const Options& options) {
335   return field->cpp_type() == FieldDescriptor::CPPTYPE_STRING &&
336          EffectiveStringCType(field, options) == FieldOptions::CORD;
337 }
338 
IsString(const FieldDescriptor * field,const Options & options)339 inline bool IsString(const FieldDescriptor* field, const Options& options) {
340   return field->cpp_type() == FieldDescriptor::CPPTYPE_STRING &&
341          EffectiveStringCType(field, options) == FieldOptions::STRING;
342 }
343 
IsStringPiece(const FieldDescriptor * field,const Options & options)344 inline bool IsStringPiece(const FieldDescriptor* field,
345                           const Options& options) {
346   return field->cpp_type() == FieldDescriptor::CPPTYPE_STRING &&
347          EffectiveStringCType(field, options) == FieldOptions::STRING_PIECE;
348 }
349 
350 class MessageSCCAnalyzer;
351 
352 // Does the given FileDescriptor use lazy fields?
353 bool HasLazyFields(const FileDescriptor* file, const Options& options,
354                    MessageSCCAnalyzer* scc_analyzer);
355 
356 // Is the given field a supported lazy field?
357 bool IsLazy(const FieldDescriptor* field, const Options& options,
358             MessageSCCAnalyzer* scc_analyzer);
359 
360 // Is this an explicit (non-profile driven) lazy field, as denoted by
361 // lazy/unverified_lazy in the descriptor?
IsExplicitLazy(const FieldDescriptor * field)362 inline bool IsExplicitLazy(const FieldDescriptor* field) {
363   return field->options().lazy() || field->options().unverified_lazy();
364 }
365 
IsLazilyVerifiedLazy(const FieldDescriptor * field,const Options & options)366 inline bool IsLazilyVerifiedLazy(const FieldDescriptor* field,
367                                  const Options& options) {
368   // TODO(b/211906113): Make lazy() imply eagerly verified lazy.
369   return IsExplicitLazy(field) && !field->is_repeated() &&
370          field->type() == FieldDescriptor::TYPE_MESSAGE &&
371          GetOptimizeFor(field->file(), options) != FileOptions::LITE_RUNTIME &&
372          !options.opensource_runtime;
373 }
374 
IsEagerlyVerifiedLazy(const FieldDescriptor * field,const Options & options,MessageSCCAnalyzer * scc_analyzer)375 inline bool IsEagerlyVerifiedLazy(const FieldDescriptor* field,
376                                   const Options& options,
377                                   MessageSCCAnalyzer* scc_analyzer) {
378   // TODO(b/211906113): Make lazy() imply eagerly verified lazy.
379   return IsLazy(field, options, scc_analyzer) && !IsExplicitLazy(field);
380 }
381 
IsFieldUsed(const FieldDescriptor *,const Options &)382 inline bool IsFieldUsed(const FieldDescriptor* /* field */,
383                         const Options& /* options */) {
384   return true;
385 }
386 
387 // Returns true if "field" is stripped.
IsFieldStripped(const FieldDescriptor *,const Options &)388 inline bool IsFieldStripped(const FieldDescriptor* /*field*/,
389                             const Options& /*options*/) {
390   return false;
391 }
392 
393 // Does the file contain any definitions that need extension_set.h?
394 bool HasExtensionsOrExtendableMessage(const FileDescriptor* file);
395 
396 // Does the file have any repeated fields, necessitating the file to include
397 // repeated_field.h? This does not include repeated extensions, since those are
398 // all stored internally in an ExtensionSet, not a separate RepeatedField*.
399 bool HasRepeatedFields(const FileDescriptor* file);
400 
401 // Does the file have any string/bytes fields with ctype=STRING_PIECE? This
402 // does not include extensions, since ctype is ignored for extensions.
403 bool HasStringPieceFields(const FileDescriptor* file, const Options& options);
404 
405 // Does the file have any string/bytes fields with ctype=CORD? This does not
406 // include extensions, since ctype is ignored for extensions.
407 bool HasCordFields(const FileDescriptor* file, const Options& options);
408 
409 // Does the file have any map fields, necessitating the file to include
410 // map_field_inl.h and map.h.
411 bool HasMapFields(const FileDescriptor* file);
412 
413 // Does this file have any enum type definitions?
414 bool HasEnumDefinitions(const FileDescriptor* file);
415 
416 // Does this file have generated parsing, serialization, and other
417 // standard methods for which reflection-based fallback implementations exist?
HasGeneratedMethods(const FileDescriptor * file,const Options & options)418 inline bool HasGeneratedMethods(const FileDescriptor* file,
419                                 const Options& options) {
420   return GetOptimizeFor(file, options) != FileOptions::CODE_SIZE;
421 }
422 
423 // Do message classes in this file have descriptor and reflection methods?
HasDescriptorMethods(const FileDescriptor * file,const Options & options)424 inline bool HasDescriptorMethods(const FileDescriptor* file,
425                                  const Options& options) {
426   return GetOptimizeFor(file, options) != FileOptions::LITE_RUNTIME;
427 }
428 
429 // Should we generate generic services for this file?
HasGenericServices(const FileDescriptor * file,const Options & options)430 inline bool HasGenericServices(const FileDescriptor* file,
431                                const Options& options) {
432   return file->service_count() > 0 &&
433          GetOptimizeFor(file, options) != FileOptions::LITE_RUNTIME &&
434          file->options().cc_generic_services();
435 }
436 
IsProto2MessageSet(const Descriptor * descriptor,const Options & options)437 inline bool IsProto2MessageSet(const Descriptor* descriptor,
438                                const Options& options) {
439   return !options.opensource_runtime &&
440          options.enforce_mode != EnforceOptimizeMode::kLiteRuntime &&
441          !options.lite_implicit_weak_fields &&
442          descriptor->options().message_set_wire_format() &&
443          descriptor->full_name() == "google.protobuf.bridge.MessageSet";
444 }
445 
IsMapEntryMessage(const Descriptor * descriptor)446 inline bool IsMapEntryMessage(const Descriptor* descriptor) {
447   return descriptor->options().map_entry();
448 }
449 
450 // Returns true if the field's CPPTYPE is string or message.
451 bool IsStringOrMessage(const FieldDescriptor* field);
452 
453 std::string UnderscoresToCamelCase(const std::string& input,
454                                    bool cap_next_letter);
455 
IsProto3(const FileDescriptor * file)456 inline bool IsProto3(const FileDescriptor* file) {
457   return file->syntax() == FileDescriptor::SYNTAX_PROTO3;
458 }
459 
HasHasbit(const FieldDescriptor * field)460 inline bool HasHasbit(const FieldDescriptor* field) {
461   // This predicate includes proto3 message fields only if they have "optional".
462   //   Foo submsg1 = 1;           // HasHasbit() == false
463   //   optional Foo submsg2 = 2;  // HasHasbit() == true
464   // This is slightly odd, as adding "optional" to a singular proto3 field does
465   // not change the semantics or API. However whenever any field in a message
466   // has a hasbit, it forces reflection to include hasbit offsets for *all*
467   // fields, even if almost all of them are set to -1 (no hasbit). So to avoid
468   // causing a sudden size regression for ~all proto3 messages, we give proto3
469   // message fields a hasbit only if "optional" is present. If the user is
470   // explicitly writing "optional", it is likely they are writing it on
471   // primitive fields also.
472   return (field->has_optional_keyword() || field->is_required()) &&
473          !field->options().weak();
474 }
475 
476 // Returns true if 'enum' semantics are such that unknown values are preserved
477 // in the enum field itself, rather than going to the UnknownFieldSet.
HasPreservingUnknownEnumSemantics(const FieldDescriptor * field)478 inline bool HasPreservingUnknownEnumSemantics(const FieldDescriptor* field) {
479   return field->file()->syntax() == FileDescriptor::SYNTAX_PROTO3;
480 }
481 
IsCrossFileMessage(const FieldDescriptor * field)482 inline bool IsCrossFileMessage(const FieldDescriptor* field) {
483   return field->type() == FieldDescriptor::TYPE_MESSAGE &&
484          field->message_type()->file() != field->file();
485 }
486 
MakeDefaultName(const FieldDescriptor * field)487 inline std::string MakeDefaultName(const FieldDescriptor* field) {
488   return "_i_give_permission_to_break_this_code_default_" + FieldName(field) +
489          "_";
490 }
491 
492 // Semantically distinct from MakeDefaultName in that it gives the C++ code
493 // referencing a default field from the message scope, rather than just the
494 // variable name.
495 // For example, declarations of default variables should always use just
496 // MakeDefaultName to produce code like:
497 //   Type _i_give_permission_to_break_this_code_default_field_;
498 //
499 // Code that references these should use MakeDefaultFieldName, in case the field
500 // exists at some nested level like:
501 //   internal_container_._i_give_permission_to_break_this_code_default_field_;
MakeDefaultFieldName(const FieldDescriptor * field)502 inline std::string MakeDefaultFieldName(const FieldDescriptor* field) {
503   return MakeDefaultName(field);
504 }
505 
MakeVarintCachedSizeName(const FieldDescriptor * field)506 inline std::string MakeVarintCachedSizeName(const FieldDescriptor* field) {
507   return StrCat("_", FieldName(field), "_cached_byte_size_");
508 }
509 
510 // Semantically distinct from MakeVarintCachedSizeName in that it gives the C++
511 // code referencing the object from the message scope, rather than just the
512 // variable name.
513 // For example, declarations of default variables should always use just
514 // MakeVarintCachedSizeName to produce code like:
515 //   Type _field_cached_byte_size_;
516 //
517 // Code that references these variables should use
518 // MakeVarintCachedSizeFieldName, in case the field exists at some nested level
519 // like:
520 //   internal_container_._field_cached_byte_size_;
MakeVarintCachedSizeFieldName(const FieldDescriptor * field)521 inline std::string MakeVarintCachedSizeFieldName(const FieldDescriptor* field) {
522   return StrCat("_", FieldName(field), "_cached_byte_size_");
523 }
524 
525 // Note: A lot of libraries detect Any protos based on Descriptor::full_name()
526 // while the two functions below use FileDescriptor::name(). In a sane world the
527 // two approaches should be equivalent. But if you are dealing with descriptors
528 // from untrusted sources, you might need to match semantics across libraries.
529 bool IsAnyMessage(const FileDescriptor* descriptor, const Options& options);
530 bool IsAnyMessage(const Descriptor* descriptor, const Options& options);
531 
532 bool IsWellKnownMessage(const FileDescriptor* descriptor);
533 
IncludeGuard(const FileDescriptor * file,bool pb_h,const Options & options)534 inline std::string IncludeGuard(const FileDescriptor* file, bool pb_h,
535                                 const Options& options) {
536   // If we are generating a .pb.h file and the proto_h option is enabled, then
537   // the .pb.h gets an extra suffix.
538   std::string filename_identifier = FilenameIdentifier(
539       file->name() + (pb_h && options.proto_h ? ".pb.h" : ""));
540 
541   if (IsWellKnownMessage(file)) {
542     // For well-known messages we need third_party/protobuf and net/proto2 to
543     // have distinct include guards, because some source files include both and
544     // both need to be defined (the third_party copies will be in the
545     // google::protobuf_opensource namespace).
546     return MacroPrefix(options) + "_INCLUDED_" + filename_identifier;
547   } else {
548     // Ideally this case would use distinct include guards for opensource and
549     // google3 protos also.  (The behavior of "first #included wins" is not
550     // ideal).  But unfortunately some legacy code includes both and depends on
551     // the identical include guards to avoid compile errors.
552     //
553     // We should clean this up so that this case can be removed.
554     return "GOOGLE_PROTOBUF_INCLUDED_" + filename_identifier;
555   }
556 }
557 
558 // Returns the OptimizeMode for this file, furthermore it updates a status
559 // bool if has_opt_codesize_extension is non-null. If this status bool is true
560 // it means this file contains an extension that itself is defined as
561 // optimized_for = CODE_SIZE.
562 FileOptions_OptimizeMode GetOptimizeFor(const FileDescriptor* file,
563                                         const Options& options,
564                                         bool* has_opt_codesize_extension);
GetOptimizeFor(const FileDescriptor * file,const Options & options)565 inline FileOptions_OptimizeMode GetOptimizeFor(const FileDescriptor* file,
566                                                const Options& options) {
567   return GetOptimizeFor(file, options, nullptr);
568 }
NeedsEagerDescriptorAssignment(const FileDescriptor * file,const Options & options)569 inline bool NeedsEagerDescriptorAssignment(const FileDescriptor* file,
570                                            const Options& options) {
571   bool has_opt_codesize_extension;
572   if (GetOptimizeFor(file, options, &has_opt_codesize_extension) ==
573           FileOptions::CODE_SIZE &&
574       has_opt_codesize_extension) {
575     // If this filedescriptor contains an extension from another file which
576     // is optimized_for = CODE_SIZE. We need to be careful in the ordering so
577     // we eagerly build the descriptors in the dependencies before building
578     // the descriptors of this file.
579     return true;
580   } else {
581     // If we have a generated code based parser we never need eager
582     // initialization of descriptors of our deps.
583     return false;
584   }
585 }
586 
587 // This orders the messages in a .pb.cc as it's outputted by file.cc
588 void FlattenMessagesInFile(const FileDescriptor* file,
589                            std::vector<const Descriptor*>* result);
FlattenMessagesInFile(const FileDescriptor * file)590 inline std::vector<const Descriptor*> FlattenMessagesInFile(
591     const FileDescriptor* file) {
592   std::vector<const Descriptor*> result;
593   FlattenMessagesInFile(file, &result);
594   return result;
595 }
596 
597 template <typename F>
ForEachMessage(const Descriptor * descriptor,F && func)598 void ForEachMessage(const Descriptor* descriptor, F&& func) {
599   for (int i = 0; i < descriptor->nested_type_count(); i++)
600     ForEachMessage(descriptor->nested_type(i), std::forward<F&&>(func));
601   func(descriptor);
602 }
603 
604 template <typename F>
ForEachMessage(const FileDescriptor * descriptor,F && func)605 void ForEachMessage(const FileDescriptor* descriptor, F&& func) {
606   for (int i = 0; i < descriptor->message_type_count(); i++)
607     ForEachMessage(descriptor->message_type(i), std::forward<F&&>(func));
608 }
609 
610 bool HasWeakFields(const Descriptor* desc, const Options& options);
611 bool HasWeakFields(const FileDescriptor* desc, const Options& options);
612 
613 // Returns true if the "required" restriction check should be ignored for the
614 // given field.
ShouldIgnoreRequiredFieldCheck(const FieldDescriptor * field,const Options & options)615 inline static bool ShouldIgnoreRequiredFieldCheck(const FieldDescriptor* field,
616                                                   const Options& options) {
617   // Do not check "required" for lazily verified lazy fields.
618   return IsLazilyVerifiedLazy(field, options);
619 }
620 
621 struct MessageAnalysis {
622   bool is_recursive = false;
623   bool contains_cord = false;
624   bool contains_extension = false;
625   bool contains_required = false;
626   bool contains_weak = false;  // Implicit weak as well.
627 };
628 
629 // This class is used in FileGenerator, to ensure linear instead of
630 // quadratic performance, if we do this per message we would get O(V*(V+E)).
631 // Logically this is just only used in message.cc, but in the header for
632 // FileGenerator to help share it.
633 class PROTOC_EXPORT MessageSCCAnalyzer {
634  public:
MessageSCCAnalyzer(const Options & options)635   explicit MessageSCCAnalyzer(const Options& options) : options_(options) {}
636 
637   MessageAnalysis GetSCCAnalysis(const SCC* scc);
638 
HasRequiredFields(const Descriptor * descriptor)639   bool HasRequiredFields(const Descriptor* descriptor) {
640     MessageAnalysis result = GetSCCAnalysis(GetSCC(descriptor));
641     return result.contains_required || result.contains_extension;
642   }
HasWeakField(const Descriptor * descriptor)643   bool HasWeakField(const Descriptor* descriptor) {
644     MessageAnalysis result = GetSCCAnalysis(GetSCC(descriptor));
645     return result.contains_weak;
646   }
GetSCC(const Descriptor * descriptor)647   const SCC* GetSCC(const Descriptor* descriptor) {
648     return analyzer_.GetSCC(descriptor);
649   }
650 
651  private:
652   struct DepsGenerator {
operatorDepsGenerator653     std::vector<const Descriptor*> operator()(const Descriptor* desc) const {
654       std::vector<const Descriptor*> deps;
655       for (int i = 0; i < desc->field_count(); i++) {
656         if (desc->field(i)->message_type()) {
657           deps.push_back(desc->field(i)->message_type());
658         }
659       }
660       return deps;
661     }
662   };
663   SCCAnalyzer<DepsGenerator> analyzer_;
664   Options options_;
665   std::map<const SCC*, MessageAnalysis> analysis_cache_;
666 };
667 
668 void ListAllFields(const Descriptor* d,
669                    std::vector<const FieldDescriptor*>* fields);
670 void ListAllFields(const FileDescriptor* d,
671                    std::vector<const FieldDescriptor*>* fields);
672 
673 template <class T>
ForEachField(const Descriptor * d,T && func)674 void ForEachField(const Descriptor* d, T&& func) {
675   for (int i = 0; i < d->nested_type_count(); i++) {
676     ForEachField(d->nested_type(i), std::forward<T&&>(func));
677   }
678   for (int i = 0; i < d->extension_count(); i++) {
679     func(d->extension(i));
680   }
681   for (int i = 0; i < d->field_count(); i++) {
682     func(d->field(i));
683   }
684 }
685 
686 template <class T>
ForEachField(const FileDescriptor * d,T && func)687 void ForEachField(const FileDescriptor* d, T&& func) {
688   for (int i = 0; i < d->message_type_count(); i++) {
689     ForEachField(d->message_type(i), std::forward<T&&>(func));
690   }
691   for (int i = 0; i < d->extension_count(); i++) {
692     func(d->extension(i));
693   }
694 }
695 
696 void ListAllTypesForServices(const FileDescriptor* fd,
697                              std::vector<const Descriptor*>* types);
698 
699 // Indicates whether we should use implicit weak fields for this file.
700 bool UsingImplicitWeakFields(const FileDescriptor* file,
701                              const Options& options);
702 
703 // Indicates whether to treat this field as implicitly weak.
704 bool IsImplicitWeakField(const FieldDescriptor* field, const Options& options,
705                          MessageSCCAnalyzer* scc_analyzer);
706 
HasSimpleBaseClass(const Descriptor * desc,const Options & options)707 inline bool HasSimpleBaseClass(const Descriptor* desc, const Options& options) {
708   if (!HasDescriptorMethods(desc->file(), options)) return false;
709   if (desc->extension_range_count() != 0) return false;
710   if (desc->field_count() == 0) return true;
711   // TODO(jorg): Support additional common message types with only one
712   // or two fields
713   return false;
714 }
715 
HasSimpleBaseClasses(const FileDescriptor * file,const Options & options)716 inline bool HasSimpleBaseClasses(const FileDescriptor* file,
717                                  const Options& options) {
718   bool v = false;
719   ForEachMessage(file, [&v, &options](const Descriptor* desc) {
720     v |= HasSimpleBaseClass(desc, options);
721   });
722   return v;
723 }
724 
SimpleBaseClass(const Descriptor * desc,const Options & options)725 inline std::string SimpleBaseClass(const Descriptor* desc,
726                                    const Options& options) {
727   if (!HasDescriptorMethods(desc->file(), options)) return "";
728   if (desc->extension_range_count() != 0) return "";
729   if (desc->field_count() == 0) {
730     return "ZeroFieldsBase";
731   }
732   // TODO(jorg): Support additional common message types with only one
733   // or two fields
734   return "";
735 }
736 
737 // Formatter is a functor class which acts as a closure around printer and
738 // the variable map. It's much like printer->Print except it supports both named
739 // variables that are substituted using a key value map and direct arguments. In
740 // the format string $1$, $2$, etc... are substituted for the first, second, ...
741 // direct argument respectively in the format call, it accepts both strings and
742 // integers. The implementation verifies all arguments are used and are "first"
743 // used in order of appearance in the argument list. For example,
744 //
745 // Format("return array[$1$];", 3) -> "return array[3];"
746 // Format("array[$2$] = $1$;", "Bla", 3) -> FATAL error (wrong order)
747 // Format("array[$1$] = $2$;", 3, "Bla") -> "array[3] = Bla;"
748 //
749 // The arguments can be used more than once like
750 //
751 // Format("array[$1$] = $2$;  // Index = $1$", 3, "Bla") ->
752 //        "array[3] = Bla;  // Index = 3"
753 //
754 // If you use more arguments use the following style to help the reader,
755 //
756 // Format("int $1$() {\n"
757 //        "  array[$2$] = $3$;\n"
758 //        "  return $4$;"
759 //        "}\n",
760 //        funname, // 1
761 //        idx,  // 2
762 //        varname,  // 3
763 //        retval);  // 4
764 //
765 // but consider using named variables. Named variables like $foo$, with some
766 // identifier foo, are looked up in the map. One additional feature is that
767 // spaces are accepted between the '$' delimiters, $ foo$ will
768 // substiture to " bar" if foo stands for "bar", but in case it's empty
769 // will substitute to "". Hence, for example,
770 //
771 // Format(vars, "$dllexport $void fun();") -> "void fun();"
772 //                                            "__declspec(export) void fun();"
773 //
774 // which is convenient to prevent double, leading or trailing spaces.
775 class PROTOC_EXPORT Formatter {
776  public:
Formatter(io::Printer * printer)777   explicit Formatter(io::Printer* printer) : printer_(printer) {}
Formatter(io::Printer * printer,const std::map<std::string,std::string> & vars)778   Formatter(io::Printer* printer,
779             const std::map<std::string, std::string>& vars)
780       : printer_(printer), vars_(vars) {}
781 
782   template <typename T>
Set(const std::string & key,const T & value)783   void Set(const std::string& key, const T& value) {
784     vars_[key] = ToString(value);
785   }
786 
AddMap(const std::map<std::string,std::string> & vars)787   void AddMap(const std::map<std::string, std::string>& vars) {
788     for (const auto& keyval : vars) vars_[keyval.first] = keyval.second;
789   }
790 
791   template <typename... Args>
operator()792   void operator()(const char* format, const Args&... args) const {
793     printer_->FormatInternal({ToString(args)...}, vars_, format);
794   }
795 
Indent()796   void Indent() const { printer_->Indent(); }
Outdent()797   void Outdent() const { printer_->Outdent(); }
printer()798   io::Printer* printer() const { return printer_; }
799 
800   class PROTOC_EXPORT ScopedIndenter {
801    public:
ScopedIndenter(Formatter * format)802     explicit ScopedIndenter(Formatter* format) : format_(format) {
803       format_->Indent();
804     }
~ScopedIndenter()805     ~ScopedIndenter() { format_->Outdent(); }
806 
807    private:
808     Formatter* format_;
809   };
810 
ScopedIndent()811   PROTOBUF_NODISCARD ScopedIndenter ScopedIndent() {
812     return ScopedIndenter(this);
813   }
814   template <typename... Args>
ScopedIndent(const char * format,const Args &&...args)815   PROTOBUF_NODISCARD ScopedIndenter ScopedIndent(const char* format,
816                                                  const Args&&... args) {
817     (*this)(format, static_cast<Args&&>(args)...);
818     return ScopedIndenter(this);
819   }
820 
821   class PROTOC_EXPORT SaveState {
822    public:
SaveState(Formatter * format)823     explicit SaveState(Formatter* format)
824         : format_(format), vars_(format->vars_) {}
~SaveState()825     ~SaveState() { format_->vars_.swap(vars_); }
826 
827    private:
828     Formatter* format_;
829     std::map<std::string, std::string> vars_;
830   };
831 
832  private:
833   io::Printer* printer_;
834   std::map<std::string, std::string> vars_;
835 
836   // Convenience overloads to accept different types as arguments.
ToString(const std::string & s)837   static std::string ToString(const std::string& s) { return s; }
838   template <typename I, typename = typename std::enable_if<
839                             std::is_integral<I>::value>::type>
ToString(I x)840   static std::string ToString(I x) {
841     return StrCat(x);
842   }
ToString(strings::Hex x)843   static std::string ToString(strings::Hex x) { return StrCat(x); }
ToString(const FieldDescriptor * d)844   static std::string ToString(const FieldDescriptor* d) { return Payload(d); }
ToString(const Descriptor * d)845   static std::string ToString(const Descriptor* d) { return Payload(d); }
ToString(const EnumDescriptor * d)846   static std::string ToString(const EnumDescriptor* d) { return Payload(d); }
ToString(const EnumValueDescriptor * d)847   static std::string ToString(const EnumValueDescriptor* d) {
848     return Payload(d);
849   }
ToString(const OneofDescriptor * d)850   static std::string ToString(const OneofDescriptor* d) { return Payload(d); }
851 
852   template <typename Descriptor>
Payload(const Descriptor * descriptor)853   static std::string Payload(const Descriptor* descriptor) {
854     std::vector<int> path;
855     descriptor->GetLocationPath(&path);
856     GeneratedCodeInfo::Annotation annotation;
857     for (int index : path) {
858       annotation.add_path(index);
859     }
860     annotation.set_source_file(descriptor->file()->name());
861     return annotation.SerializeAsString();
862   }
863 };
864 
865 template <class T>
PrintFieldComment(const Formatter & format,const T * field)866 void PrintFieldComment(const Formatter& format, const T* field) {
867   // Print the field's (or oneof's) proto-syntax definition as a comment.
868   // We don't want to print group bodies so we cut off after the first
869   // line.
870   DebugStringOptions options;
871   options.elide_group_body = true;
872   options.elide_oneof_body = true;
873   std::string def = field->DebugStringWithOptions(options);
874   format("// $1$\n", def.substr(0, def.find_first_of('\n')));
875 }
876 
877 class PROTOC_EXPORT NamespaceOpener {
878  public:
NamespaceOpener(const Formatter & format)879   explicit NamespaceOpener(const Formatter& format)
880       : printer_(format.printer()) {}
NamespaceOpener(const std::string & name,const Formatter & format)881   NamespaceOpener(const std::string& name, const Formatter& format)
882       : NamespaceOpener(format) {
883     ChangeTo(name);
884   }
~NamespaceOpener()885   ~NamespaceOpener() { ChangeTo(""); }
886 
ChangeTo(const std::string & name)887   void ChangeTo(const std::string& name) {
888     std::vector<std::string> new_stack_ =
889         Split(name, "::", true);
890     size_t len = std::min(name_stack_.size(), new_stack_.size());
891     size_t common_idx = 0;
892     while (common_idx < len) {
893       if (name_stack_[common_idx] != new_stack_[common_idx]) break;
894       common_idx++;
895     }
896     for (auto it = name_stack_.crbegin();
897          it != name_stack_.crend() - common_idx; ++it) {
898       if (*it == "PROTOBUF_NAMESPACE_ID") {
899         printer_->Print("PROTOBUF_NAMESPACE_CLOSE\n");
900       } else {
901         printer_->Print("}  // namespace $ns$\n", "ns", *it);
902       }
903     }
904     name_stack_.swap(new_stack_);
905     for (size_t i = common_idx; i < name_stack_.size(); ++i) {
906       if (name_stack_[i] == "PROTOBUF_NAMESPACE_ID") {
907         printer_->Print("PROTOBUF_NAMESPACE_OPEN\n");
908       } else {
909         printer_->Print("namespace $ns$ {\n", "ns", name_stack_[i]);
910       }
911     }
912   }
913 
914  private:
915   io::Printer* printer_;
916   std::vector<std::string> name_stack_;
917 };
918 
919 enum class Utf8CheckMode {
920   kStrict = 0,  // Parsing will fail if non UTF-8 data is in string fields.
921   kVerify = 1,  // Only log an error but parsing will succeed.
922   kNone = 2,    // No UTF-8 check.
923 };
924 
925 Utf8CheckMode GetUtf8CheckMode(const FieldDescriptor* field,
926                                const Options& options);
927 
928 void GenerateUtf8CheckCodeForString(const FieldDescriptor* field,
929                                     const Options& options, bool for_parse,
930                                     const char* parameters,
931                                     const Formatter& format);
932 
933 void GenerateUtf8CheckCodeForCord(const FieldDescriptor* field,
934                                   const Options& options, bool for_parse,
935                                   const char* parameters,
936                                   const Formatter& format);
937 
938 template <typename T>
939 struct FieldRangeImpl {
940   struct Iterator {
941     using iterator_category = std::forward_iterator_tag;
942     using value_type = const FieldDescriptor*;
943     using difference_type = int;
944 
945     value_type operator*() { return descriptor->field(idx); }
946 
947     friend bool operator==(const Iterator& a, const Iterator& b) {
948       GOOGLE_DCHECK(a.descriptor == b.descriptor);
949       return a.idx == b.idx;
950     }
951     friend bool operator!=(const Iterator& a, const Iterator& b) {
952       return !(a == b);
953     }
954 
955     Iterator& operator++() {
956       idx++;
957       return *this;
958     }
959 
960     int idx;
961     const T* descriptor;
962   };
963 
beginFieldRangeImpl964   Iterator begin() const { return {0, descriptor}; }
endFieldRangeImpl965   Iterator end() const { return {descriptor->field_count(), descriptor}; }
966 
967   const T* descriptor;
968 };
969 
970 template <typename T>
FieldRange(const T * desc)971 FieldRangeImpl<T> FieldRange(const T* desc) {
972   return {desc};
973 }
974 
975 struct OneOfRangeImpl {
976   struct Iterator {
977     using iterator_category = std::forward_iterator_tag;
978     using value_type = const OneofDescriptor*;
979     using difference_type = int;
980 
981     value_type operator*() { return descriptor->oneof_decl(idx); }
982 
983     friend bool operator==(const Iterator& a, const Iterator& b) {
984       GOOGLE_DCHECK(a.descriptor == b.descriptor);
985       return a.idx == b.idx;
986     }
987     friend bool operator!=(const Iterator& a, const Iterator& b) {
988       return !(a == b);
989     }
990 
991     Iterator& operator++() {
992       idx++;
993       return *this;
994     }
995 
996     int idx;
997     const Descriptor* descriptor;
998   };
999 
beginOneOfRangeImpl1000   Iterator begin() const { return {0, descriptor}; }
endOneOfRangeImpl1001   Iterator end() const {
1002     return {descriptor->real_oneof_decl_count(), descriptor};
1003   }
1004 
1005   const Descriptor* descriptor;
1006 };
1007 
OneOfRange(const Descriptor * desc)1008 inline OneOfRangeImpl OneOfRange(const Descriptor* desc) { return {desc}; }
1009 
1010 PROTOC_EXPORT std::string StripProto(const std::string& filename);
1011 
1012 bool EnableMessageOwnedArena(const Descriptor* desc, const Options& options);
1013 
1014 bool ShouldVerify(const Descriptor* descriptor, const Options& options,
1015                   MessageSCCAnalyzer* scc_analyzer);
1016 bool ShouldVerify(const FileDescriptor* file, const Options& options,
1017                   MessageSCCAnalyzer* scc_analyzer);
1018 }  // namespace cpp
1019 }  // namespace compiler
1020 }  // namespace protobuf
1021 }  // namespace google
1022 
1023 #include <google/protobuf/port_undef.inc>
1024 
1025 #endif  // GOOGLE_PROTOBUF_COMPILER_CPP_HELPERS_H__
1026