1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc. All rights reserved.
3 // https://developers.google.com/protocol-buffers/
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 // * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 // * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following disclaimer
13 // in the documentation and/or other materials provided with the
14 // distribution.
15 // * Neither the name of Google Inc. nor the names of its
16 // contributors may be used to endorse or promote products derived from
17 // this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31 // Author: kenton@google.com (Kenton Varda)
32 // Based on original Protocol Buffers design by
33 // Sanjay Ghemawat, Jeff Dean, and others.
34
35 #ifndef GOOGLE_PROTOBUF_COMPILER_CPP_HELPERS_H__
36 #define GOOGLE_PROTOBUF_COMPILER_CPP_HELPERS_H__
37
38 #include <algorithm>
39 #include <cstdint>
40 #include <iterator>
41 #include <map>
42 #include <string>
43
44 #include <google/protobuf/compiler/scc.h>
45 #include <google/protobuf/compiler/code_generator.h>
46 #include <google/protobuf/compiler/cpp/names.h>
47 #include <google/protobuf/compiler/cpp/options.h>
48 #include <google/protobuf/descriptor.pb.h>
49 #include <google/protobuf/io/printer.h>
50 #include <google/protobuf/descriptor.h>
51 #include <google/protobuf/port.h>
52 #include <google/protobuf/stubs/strutil.h>
53
54 // Must be included last.
55 #include <google/protobuf/port_def.inc>
56
57 namespace google {
58 namespace protobuf {
59 namespace compiler {
60 namespace cpp {
61
62 enum class ArenaDtorNeeds { kNone = 0, kOnDemand = 1, kRequired = 2 };
63
ProtobufNamespace(const Options &)64 inline std::string ProtobufNamespace(const Options& /* options */) {
65 return "PROTOBUF_NAMESPACE_ID";
66 }
67
MacroPrefix(const Options &)68 inline std::string MacroPrefix(const Options& /* options */) {
69 return "GOOGLE_PROTOBUF";
70 }
71
DeprecatedAttribute(const Options &,const FieldDescriptor * d)72 inline std::string DeprecatedAttribute(const Options& /* options */,
73 const FieldDescriptor* d) {
74 return d->options().deprecated() ? "PROTOBUF_DEPRECATED " : "";
75 }
76
DeprecatedAttribute(const Options &,const EnumValueDescriptor * d)77 inline std::string DeprecatedAttribute(const Options& /* options */,
78 const EnumValueDescriptor* d) {
79 return d->options().deprecated() ? "PROTOBUF_DEPRECATED_ENUM " : "";
80 }
81
82 // Commonly-used separator comments. Thick is a line of '=', thin is a line
83 // of '-'.
84 extern const char kThickSeparator[];
85 extern const char kThinSeparator[];
86
87 void SetCommonVars(const Options& options,
88 std::map<std::string, std::string>* variables);
89
90 // Variables to access message data from the message scope.
91 void SetCommonMessageDataVariables(
92 const Descriptor* descriptor,
93 std::map<std::string, std::string>* variables);
94
95 void SetUnknownFieldsVariable(const Descriptor* descriptor,
96 const Options& options,
97 std::map<std::string, std::string>* variables);
98
99 bool GetBootstrapBasename(const Options& options, const std::string& basename,
100 std::string* bootstrap_basename);
101 bool MaybeBootstrap(const Options& options, GeneratorContext* generator_context,
102 bool bootstrap_flag, std::string* basename);
103 bool IsBootstrapProto(const Options& options, const FileDescriptor* file);
104
105 // Name space of the proto file. This namespace is such that the string
106 // "<namespace>::some_name" is the correct fully qualified namespace.
107 // This means if the package is empty the namespace is "", and otherwise
108 // the namespace is "::foo::bar::...::baz" without trailing semi-colons.
109 std::string Namespace(const FileDescriptor* d, const Options& options);
110 std::string Namespace(const Descriptor* d, const Options& options);
111 std::string Namespace(const FieldDescriptor* d, const Options& options);
112 std::string Namespace(const EnumDescriptor* d, const Options& options);
113
114 // Returns true if it's safe to reset "field" to zero.
115 bool CanInitializeByZeroing(const FieldDescriptor* field);
116
117 std::string ClassName(const Descriptor* descriptor);
118 std::string ClassName(const EnumDescriptor* enum_descriptor);
119
120 std::string QualifiedClassName(const Descriptor* d, const Options& options);
121 std::string QualifiedClassName(const EnumDescriptor* d, const Options& options);
122
123 std::string QualifiedClassName(const Descriptor* d);
124 std::string QualifiedClassName(const EnumDescriptor* d);
125
126 // DEPRECATED just use ClassName or QualifiedClassName, a boolean is very
127 // unreadable at the callsite.
128 // Returns the non-nested type name for the given type. If "qualified" is
129 // true, prefix the type with the full namespace. For example, if you had:
130 // package foo.bar;
131 // message Baz { message Moo {} }
132 // Then the qualified ClassName for Moo would be:
133 // ::foo::bar::Baz_Moo
134 // While the non-qualified version would be:
135 // Baz_Moo
ClassName(const Descriptor * descriptor,bool qualified)136 inline std::string ClassName(const Descriptor* descriptor, bool qualified) {
137 return qualified ? QualifiedClassName(descriptor, Options())
138 : ClassName(descriptor);
139 }
140
ClassName(const EnumDescriptor * descriptor,bool qualified)141 inline std::string ClassName(const EnumDescriptor* descriptor, bool qualified) {
142 return qualified ? QualifiedClassName(descriptor, Options())
143 : ClassName(descriptor);
144 }
145
146 // Returns the extension name prefixed with the class name if nested but without
147 // the package name.
148 std::string ExtensionName(const FieldDescriptor* d);
149
150 std::string QualifiedExtensionName(const FieldDescriptor* d,
151 const Options& options);
152 std::string QualifiedExtensionName(const FieldDescriptor* d);
153
154 // Type name of default instance.
155 std::string DefaultInstanceType(const Descriptor* descriptor,
156 const Options& options, bool split = false);
157
158 // Non-qualified name of the default_instance of this message.
159 std::string DefaultInstanceName(const Descriptor* descriptor,
160 const Options& options, bool split = false);
161
162 // Non-qualified name of the default instance pointer. This is used only for
163 // implicit weak fields, where we need an extra indirection.
164 std::string DefaultInstancePtr(const Descriptor* descriptor,
165 const Options& options, bool split = false);
166
167 // Fully qualified name of the default_instance of this message.
168 std::string QualifiedDefaultInstanceName(const Descriptor* descriptor,
169 const Options& options,
170 bool split = false);
171
172 // Fully qualified name of the default instance pointer.
173 std::string QualifiedDefaultInstancePtr(const Descriptor* descriptor,
174 const Options& options,
175 bool split = false);
176
177 // DescriptorTable variable name.
178 std::string DescriptorTableName(const FileDescriptor* file,
179 const Options& options);
180
181 // When declaring symbol externs from another file, this macro will supply the
182 // dllexport needed for the target file, if any.
183 std::string FileDllExport(const FileDescriptor* file, const Options& options);
184
185 // Name of the base class: google::protobuf::Message or google::protobuf::MessageLite.
186 std::string SuperClassName(const Descriptor* descriptor,
187 const Options& options);
188
189 // Adds an underscore if necessary to prevent conflicting with a keyword.
190 std::string ResolveKeyword(const std::string& name);
191
192 // Get the (unqualified) name that should be used for this field in C++ code.
193 // The name is coerced to lower-case to emulate proto1 behavior. People
194 // should be using lowercase-with-underscores style for proto field names
195 // anyway, so normally this just returns field->name().
196 std::string FieldName(const FieldDescriptor* field);
197
198 // Returns the (unqualified) private member name for this field in C++ code.
199 std::string FieldMemberName(const FieldDescriptor* field, bool split);
200
201 // Returns an estimate of the compiler's alignment for the field. This
202 // can't guarantee to be correct because the generated code could be compiled on
203 // different systems with different alignment rules. The estimates below assume
204 // 64-bit pointers.
205 int EstimateAlignmentSize(const FieldDescriptor* field);
206
207 // Get the unqualified name that should be used for a field's field
208 // number constant.
209 std::string FieldConstantName(const FieldDescriptor* field);
210
211 // Returns the scope where the field was defined (for extensions, this is
212 // different from the message type to which the field applies).
FieldScope(const FieldDescriptor * field)213 inline const Descriptor* FieldScope(const FieldDescriptor* field) {
214 return field->is_extension() ? field->extension_scope()
215 : field->containing_type();
216 }
217
218 // Returns the fully-qualified type name field->message_type(). Usually this
219 // is just ClassName(field->message_type(), true);
220 std::string FieldMessageTypeName(const FieldDescriptor* field,
221 const Options& options);
222
223 // Get the C++ type name for a primitive type (e.g. "double", "::google::protobuf::int32", etc.).
224 const char* PrimitiveTypeName(FieldDescriptor::CppType type);
225 std::string PrimitiveTypeName(const Options& options,
226 FieldDescriptor::CppType type);
227
228 // Get the declared type name in CamelCase format, as is used e.g. for the
229 // methods of WireFormat. For example, TYPE_INT32 becomes "Int32".
230 const char* DeclaredTypeMethodName(FieldDescriptor::Type type);
231
232 // Return the code that evaluates to the number when compiled.
233 std::string Int32ToString(int number);
234
235 // Get code that evaluates to the field's default value.
236 std::string DefaultValue(const Options& options, const FieldDescriptor* field);
237
238 // Compatibility function for callers outside proto2.
239 std::string DefaultValue(const FieldDescriptor* field);
240
241 // Convert a file name into a valid identifier.
242 std::string FilenameIdentifier(const std::string& filename);
243
244 // For each .proto file generates a unique name. To prevent collisions of
245 // symbols in the global namespace
246 std::string UniqueName(const std::string& name, const std::string& filename,
247 const Options& options);
UniqueName(const std::string & name,const FileDescriptor * d,const Options & options)248 inline std::string UniqueName(const std::string& name, const FileDescriptor* d,
249 const Options& options) {
250 return UniqueName(name, d->name(), options);
251 }
UniqueName(const std::string & name,const Descriptor * d,const Options & options)252 inline std::string UniqueName(const std::string& name, const Descriptor* d,
253 const Options& options) {
254 return UniqueName(name, d->file(), options);
255 }
UniqueName(const std::string & name,const EnumDescriptor * d,const Options & options)256 inline std::string UniqueName(const std::string& name, const EnumDescriptor* d,
257 const Options& options) {
258 return UniqueName(name, d->file(), options);
259 }
UniqueName(const std::string & name,const ServiceDescriptor * d,const Options & options)260 inline std::string UniqueName(const std::string& name,
261 const ServiceDescriptor* d,
262 const Options& options) {
263 return UniqueName(name, d->file(), options);
264 }
265
266 // Versions for call sites that only support the internal runtime (like proto1
267 // support).
InternalRuntimeOptions()268 inline Options InternalRuntimeOptions() {
269 Options options;
270 options.opensource_runtime = false;
271 return options;
272 }
UniqueName(const std::string & name,const std::string & filename)273 inline std::string UniqueName(const std::string& name,
274 const std::string& filename) {
275 return UniqueName(name, filename, InternalRuntimeOptions());
276 }
UniqueName(const std::string & name,const FileDescriptor * d)277 inline std::string UniqueName(const std::string& name,
278 const FileDescriptor* d) {
279 return UniqueName(name, d->name(), InternalRuntimeOptions());
280 }
UniqueName(const std::string & name,const Descriptor * d)281 inline std::string UniqueName(const std::string& name, const Descriptor* d) {
282 return UniqueName(name, d->file(), InternalRuntimeOptions());
283 }
UniqueName(const std::string & name,const EnumDescriptor * d)284 inline std::string UniqueName(const std::string& name,
285 const EnumDescriptor* d) {
286 return UniqueName(name, d->file(), InternalRuntimeOptions());
287 }
UniqueName(const std::string & name,const ServiceDescriptor * d)288 inline std::string UniqueName(const std::string& name,
289 const ServiceDescriptor* d) {
290 return UniqueName(name, d->file(), InternalRuntimeOptions());
291 }
292
293 // Return the qualified C++ name for a file level symbol.
294 std::string QualifiedFileLevelSymbol(const FileDescriptor* file,
295 const std::string& name,
296 const Options& options);
297
298 // Escape C++ trigraphs by escaping question marks to \?
299 std::string EscapeTrigraphs(const std::string& to_escape);
300
301 // Escaped function name to eliminate naming conflict.
302 std::string SafeFunctionName(const Descriptor* descriptor,
303 const FieldDescriptor* field,
304 const std::string& prefix);
305
306 // Returns true if generated messages have public unknown fields accessors
PublicUnknownFieldsAccessors(const Descriptor * message)307 inline bool PublicUnknownFieldsAccessors(const Descriptor* message) {
308 return message->file()->syntax() != FileDescriptor::SYNTAX_PROTO3;
309 }
310
311 // Returns the optimize mode for <file>, respecting <options.enforce_lite>.
312 FileOptions_OptimizeMode GetOptimizeFor(const FileDescriptor* file,
313 const Options& options);
314
315 // Determines whether unknown fields will be stored in an UnknownFieldSet or
316 // a string.
UseUnknownFieldSet(const FileDescriptor * file,const Options & options)317 inline bool UseUnknownFieldSet(const FileDescriptor* file,
318 const Options& options) {
319 return GetOptimizeFor(file, options) != FileOptions::LITE_RUNTIME;
320 }
321
IsWeak(const FieldDescriptor * field,const Options & options)322 inline bool IsWeak(const FieldDescriptor* field, const Options& options) {
323 if (field->options().weak()) {
324 GOOGLE_CHECK(!options.opensource_runtime);
325 return true;
326 }
327 return false;
328 }
329
330 bool IsStringInlined(const FieldDescriptor* descriptor, const Options& options);
331
332 // For a string field, returns the effective ctype. If the actual ctype is
333 // not supported, returns the default of STRING.
334 FieldOptions::CType EffectiveStringCType(const FieldDescriptor* field,
335 const Options& options);
336
IsCord(const FieldDescriptor * field,const Options & options)337 inline bool IsCord(const FieldDescriptor* field, const Options& options) {
338 return field->cpp_type() == FieldDescriptor::CPPTYPE_STRING &&
339 EffectiveStringCType(field, options) == FieldOptions::CORD;
340 }
341
IsString(const FieldDescriptor * field,const Options & options)342 inline bool IsString(const FieldDescriptor* field, const Options& options) {
343 return field->cpp_type() == FieldDescriptor::CPPTYPE_STRING &&
344 EffectiveStringCType(field, options) == FieldOptions::STRING;
345 }
346
IsStringPiece(const FieldDescriptor * field,const Options & options)347 inline bool IsStringPiece(const FieldDescriptor* field,
348 const Options& options) {
349 return field->cpp_type() == FieldDescriptor::CPPTYPE_STRING &&
350 EffectiveStringCType(field, options) == FieldOptions::STRING_PIECE;
351 }
352
353 class MessageSCCAnalyzer;
354
355 // Does the given FileDescriptor use lazy fields?
356 bool HasLazyFields(const FileDescriptor* file, const Options& options,
357 MessageSCCAnalyzer* scc_analyzer);
358
359 // Is the given field a supported lazy field?
360 bool IsLazy(const FieldDescriptor* field, const Options& options,
361 MessageSCCAnalyzer* scc_analyzer);
362
363 // Is this an explicit (non-profile driven) lazy field, as denoted by
364 // lazy/unverified_lazy in the descriptor?
IsExplicitLazy(const FieldDescriptor * field)365 inline bool IsExplicitLazy(const FieldDescriptor* field) {
366 return field->options().lazy() || field->options().unverified_lazy();
367 }
368
369 bool IsEagerlyVerifiedLazy(const FieldDescriptor* field, const Options& options,
370 MessageSCCAnalyzer* scc_analyzer);
371
372 bool IsLazilyVerifiedLazy(const FieldDescriptor* field, const Options& options);
373
374 // Is the given message being split (go/pdsplit)?
375 bool ShouldSplit(const Descriptor* desc, const Options& options);
376
377 // Is the given field being split out?
378 bool ShouldSplit(const FieldDescriptor* field, const Options& options);
379
IsFieldUsed(const FieldDescriptor *,const Options &)380 inline bool IsFieldUsed(const FieldDescriptor* /* field */,
381 const Options& /* options */) {
382 return true;
383 }
384
385 // Returns true if "field" is stripped.
IsFieldStripped(const FieldDescriptor *,const Options &)386 inline bool IsFieldStripped(const FieldDescriptor* /*field*/,
387 const Options& /*options*/) {
388 return false;
389 }
390
391 // Does the file contain any definitions that need extension_set.h?
392 bool HasExtensionsOrExtendableMessage(const FileDescriptor* file);
393
394 // Does the file have any repeated fields, necessitating the file to include
395 // repeated_field.h? This does not include repeated extensions, since those are
396 // all stored internally in an ExtensionSet, not a separate RepeatedField*.
397 bool HasRepeatedFields(const FileDescriptor* file);
398
399 // Does the file have any string/bytes fields with ctype=STRING_PIECE? This
400 // does not include extensions, since ctype is ignored for extensions.
401 bool HasStringPieceFields(const FileDescriptor* file, const Options& options);
402
403 // Does the file have any string/bytes fields with ctype=CORD? This does not
404 // include extensions, since ctype is ignored for extensions.
405 bool HasCordFields(const FileDescriptor* file, const Options& options);
406
407 // Does the file have any map fields, necessitating the file to include
408 // map_field_inl.h and map.h.
409 bool HasMapFields(const FileDescriptor* file);
410
411 // Does this file have any enum type definitions?
412 bool HasEnumDefinitions(const FileDescriptor* file);
413
414 // Does this file have generated parsing, serialization, and other
415 // standard methods for which reflection-based fallback implementations exist?
HasGeneratedMethods(const FileDescriptor * file,const Options & options)416 inline bool HasGeneratedMethods(const FileDescriptor* file,
417 const Options& options) {
418 return GetOptimizeFor(file, options) != FileOptions::CODE_SIZE;
419 }
420
421 // Do message classes in this file have descriptor and reflection methods?
HasDescriptorMethods(const FileDescriptor * file,const Options & options)422 inline bool HasDescriptorMethods(const FileDescriptor* file,
423 const Options& options) {
424 return GetOptimizeFor(file, options) != FileOptions::LITE_RUNTIME;
425 }
426
427 // Should we generate generic services for this file?
HasGenericServices(const FileDescriptor * file,const Options & options)428 inline bool HasGenericServices(const FileDescriptor* file,
429 const Options& options) {
430 return file->service_count() > 0 &&
431 GetOptimizeFor(file, options) != FileOptions::LITE_RUNTIME &&
432 file->options().cc_generic_services();
433 }
434
IsProto2MessageSet(const Descriptor * descriptor,const Options & options)435 inline bool IsProto2MessageSet(const Descriptor* descriptor,
436 const Options& options) {
437 return !options.opensource_runtime &&
438 options.enforce_mode != EnforceOptimizeMode::kLiteRuntime &&
439 !options.lite_implicit_weak_fields &&
440 descriptor->options().message_set_wire_format() &&
441 descriptor->full_name() == "google.protobuf.bridge.MessageSet";
442 }
443
IsMapEntryMessage(const Descriptor * descriptor)444 inline bool IsMapEntryMessage(const Descriptor* descriptor) {
445 return descriptor->options().map_entry();
446 }
447
448 // Returns true if the field's CPPTYPE is string or message.
449 bool IsStringOrMessage(const FieldDescriptor* field);
450
451 std::string UnderscoresToCamelCase(const std::string& input,
452 bool cap_next_letter);
453
IsProto3(const FileDescriptor * file)454 inline bool IsProto3(const FileDescriptor* file) {
455 return file->syntax() == FileDescriptor::SYNTAX_PROTO3;
456 }
457
HasHasbit(const FieldDescriptor * field)458 inline bool HasHasbit(const FieldDescriptor* field) {
459 // This predicate includes proto3 message fields only if they have "optional".
460 // Foo submsg1 = 1; // HasHasbit() == false
461 // optional Foo submsg2 = 2; // HasHasbit() == true
462 // This is slightly odd, as adding "optional" to a singular proto3 field does
463 // not change the semantics or API. However whenever any field in a message
464 // has a hasbit, it forces reflection to include hasbit offsets for *all*
465 // fields, even if almost all of them are set to -1 (no hasbit). So to avoid
466 // causing a sudden size regression for ~all proto3 messages, we give proto3
467 // message fields a hasbit only if "optional" is present. If the user is
468 // explicitly writing "optional", it is likely they are writing it on
469 // primitive fields also.
470 return (field->has_optional_keyword() || field->is_required()) &&
471 !field->options().weak();
472 }
473
474 // Returns true if 'enum' semantics are such that unknown values are preserved
475 // in the enum field itself, rather than going to the UnknownFieldSet.
HasPreservingUnknownEnumSemantics(const FieldDescriptor * field)476 inline bool HasPreservingUnknownEnumSemantics(const FieldDescriptor* field) {
477 return field->file()->syntax() == FileDescriptor::SYNTAX_PROTO3;
478 }
479
IsCrossFileMessage(const FieldDescriptor * field)480 inline bool IsCrossFileMessage(const FieldDescriptor* field) {
481 return field->type() == FieldDescriptor::TYPE_MESSAGE &&
482 field->message_type()->file() != field->file();
483 }
484
MakeDefaultName(const FieldDescriptor * field)485 inline std::string MakeDefaultName(const FieldDescriptor* field) {
486 return StrCat("_i_give_permission_to_break_this_code_default_",
487 FieldName(field), "_");
488 }
489
490 // Semantically distinct from MakeDefaultName in that it gives the C++ code
491 // referencing a default field from the message scope, rather than just the
492 // variable name.
493 // For example, declarations of default variables should always use just
494 // MakeDefaultName to produce code like:
495 // Type _i_give_permission_to_break_this_code_default_field_;
496 //
497 // Code that references these should use MakeDefaultFieldName, in case the field
498 // exists at some nested level like:
499 // internal_container_._i_give_permission_to_break_this_code_default_field_;
MakeDefaultFieldName(const FieldDescriptor * field)500 inline std::string MakeDefaultFieldName(const FieldDescriptor* field) {
501 return StrCat("Impl_::", MakeDefaultName(field));
502 }
503
MakeVarintCachedSizeName(const FieldDescriptor * field)504 inline std::string MakeVarintCachedSizeName(const FieldDescriptor* field) {
505 return StrCat("_", FieldName(field), "_cached_byte_size_");
506 }
507
508 // Semantically distinct from MakeVarintCachedSizeName in that it gives the C++
509 // code referencing the object from the message scope, rather than just the
510 // variable name.
511 // For example, declarations of default variables should always use just
512 // MakeVarintCachedSizeName to produce code like:
513 // Type _field_cached_byte_size_;
514 //
515 // Code that references these variables should use
516 // MakeVarintCachedSizeFieldName, in case the field exists at some nested level
517 // like:
518 // internal_container_._field_cached_byte_size_;
MakeVarintCachedSizeFieldName(const FieldDescriptor * field,bool split)519 inline std::string MakeVarintCachedSizeFieldName(const FieldDescriptor* field,
520 bool split) {
521 return StrCat("_impl_.", split ? "_split_->" : "", "_",
522 FieldName(field), "_cached_byte_size_");
523 }
524
525 // Note: A lot of libraries detect Any protos based on Descriptor::full_name()
526 // while the two functions below use FileDescriptor::name(). In a sane world the
527 // two approaches should be equivalent. But if you are dealing with descriptors
528 // from untrusted sources, you might need to match semantics across libraries.
529 bool IsAnyMessage(const FileDescriptor* descriptor, const Options& options);
530 bool IsAnyMessage(const Descriptor* descriptor, const Options& options);
531
532 bool IsWellKnownMessage(const FileDescriptor* descriptor);
533
IncludeGuard(const FileDescriptor * file,bool pb_h,const Options & options)534 inline std::string IncludeGuard(const FileDescriptor* file, bool pb_h,
535 const Options& options) {
536 // If we are generating a .pb.h file and the proto_h option is enabled, then
537 // the .pb.h gets an extra suffix.
538 std::string filename_identifier = FilenameIdentifier(
539 file->name() + (pb_h && options.proto_h ? ".pb.h" : ""));
540
541 if (IsWellKnownMessage(file)) {
542 // For well-known messages we need third_party/protobuf and net/proto2 to
543 // have distinct include guards, because some source files include both and
544 // both need to be defined (the third_party copies will be in the
545 // google::protobuf_opensource namespace).
546 return MacroPrefix(options) + "_INCLUDED_" + filename_identifier;
547 } else {
548 // Ideally this case would use distinct include guards for opensource and
549 // google3 protos also. (The behavior of "first #included wins" is not
550 // ideal). But unfortunately some legacy code includes both and depends on
551 // the identical include guards to avoid compile errors.
552 //
553 // We should clean this up so that this case can be removed.
554 return "GOOGLE_PROTOBUF_INCLUDED_" + filename_identifier;
555 }
556 }
557
558 // Returns the OptimizeMode for this file, furthermore it updates a status
559 // bool if has_opt_codesize_extension is non-null. If this status bool is true
560 // it means this file contains an extension that itself is defined as
561 // optimized_for = CODE_SIZE.
562 FileOptions_OptimizeMode GetOptimizeFor(const FileDescriptor* file,
563 const Options& options,
564 bool* has_opt_codesize_extension);
GetOptimizeFor(const FileDescriptor * file,const Options & options)565 inline FileOptions_OptimizeMode GetOptimizeFor(const FileDescriptor* file,
566 const Options& options) {
567 return GetOptimizeFor(file, options, nullptr);
568 }
NeedsEagerDescriptorAssignment(const FileDescriptor * file,const Options & options)569 inline bool NeedsEagerDescriptorAssignment(const FileDescriptor* file,
570 const Options& options) {
571 bool has_opt_codesize_extension;
572 if (GetOptimizeFor(file, options, &has_opt_codesize_extension) ==
573 FileOptions::CODE_SIZE &&
574 has_opt_codesize_extension) {
575 // If this filedescriptor contains an extension from another file which
576 // is optimized_for = CODE_SIZE. We need to be careful in the ordering so
577 // we eagerly build the descriptors in the dependencies before building
578 // the descriptors of this file.
579 return true;
580 } else {
581 // If we have a generated code based parser we never need eager
582 // initialization of descriptors of our deps.
583 return false;
584 }
585 }
586
587 // This orders the messages in a .pb.cc as it's outputted by file.cc
588 void FlattenMessagesInFile(const FileDescriptor* file,
589 std::vector<const Descriptor*>* result);
FlattenMessagesInFile(const FileDescriptor * file)590 inline std::vector<const Descriptor*> FlattenMessagesInFile(
591 const FileDescriptor* file) {
592 std::vector<const Descriptor*> result;
593 FlattenMessagesInFile(file, &result);
594 return result;
595 }
596
597 template <typename F>
ForEachMessage(const Descriptor * descriptor,F && func)598 void ForEachMessage(const Descriptor* descriptor, F&& func) {
599 for (int i = 0; i < descriptor->nested_type_count(); i++)
600 ForEachMessage(descriptor->nested_type(i), std::forward<F&&>(func));
601 func(descriptor);
602 }
603
604 template <typename F>
ForEachMessage(const FileDescriptor * descriptor,F && func)605 void ForEachMessage(const FileDescriptor* descriptor, F&& func) {
606 for (int i = 0; i < descriptor->message_type_count(); i++)
607 ForEachMessage(descriptor->message_type(i), std::forward<F&&>(func));
608 }
609
610 bool HasWeakFields(const Descriptor* desc, const Options& options);
611 bool HasWeakFields(const FileDescriptor* desc, const Options& options);
612
613 // Returns true if the "required" restriction check should be ignored for the
614 // given field.
ShouldIgnoreRequiredFieldCheck(const FieldDescriptor * field,const Options & options)615 inline static bool ShouldIgnoreRequiredFieldCheck(const FieldDescriptor* field,
616 const Options& options) {
617 // Do not check "required" for lazily verified lazy fields.
618 return IsLazilyVerifiedLazy(field, options);
619 }
620
621 struct MessageAnalysis {
622 bool is_recursive = false;
623 bool contains_cord = false;
624 bool contains_extension = false;
625 bool contains_required = false;
626 bool contains_weak = false; // Implicit weak as well.
627 };
628
629 // This class is used in FileGenerator, to ensure linear instead of
630 // quadratic performance, if we do this per message we would get O(V*(V+E)).
631 // Logically this is just only used in message.cc, but in the header for
632 // FileGenerator to help share it.
633 class PROTOC_EXPORT MessageSCCAnalyzer {
634 public:
MessageSCCAnalyzer(const Options & options)635 explicit MessageSCCAnalyzer(const Options& options) : options_(options) {}
636
637 MessageAnalysis GetSCCAnalysis(const SCC* scc);
638
HasRequiredFields(const Descriptor * descriptor)639 bool HasRequiredFields(const Descriptor* descriptor) {
640 MessageAnalysis result = GetSCCAnalysis(GetSCC(descriptor));
641 return result.contains_required || result.contains_extension;
642 }
HasWeakField(const Descriptor * descriptor)643 bool HasWeakField(const Descriptor* descriptor) {
644 MessageAnalysis result = GetSCCAnalysis(GetSCC(descriptor));
645 return result.contains_weak;
646 }
GetSCC(const Descriptor * descriptor)647 const SCC* GetSCC(const Descriptor* descriptor) {
648 return analyzer_.GetSCC(descriptor);
649 }
650
651 private:
652 struct DepsGenerator {
operatorDepsGenerator653 std::vector<const Descriptor*> operator()(const Descriptor* desc) const {
654 std::vector<const Descriptor*> deps;
655 for (int i = 0; i < desc->field_count(); i++) {
656 if (desc->field(i)->message_type()) {
657 deps.push_back(desc->field(i)->message_type());
658 }
659 }
660 return deps;
661 }
662 };
663 SCCAnalyzer<DepsGenerator> analyzer_;
664 Options options_;
665 std::map<const SCC*, MessageAnalysis> analysis_cache_;
666 };
667
668 void ListAllFields(const Descriptor* d,
669 std::vector<const FieldDescriptor*>* fields);
670 void ListAllFields(const FileDescriptor* d,
671 std::vector<const FieldDescriptor*>* fields);
672
673 template <class T>
ForEachField(const Descriptor * d,T && func)674 void ForEachField(const Descriptor* d, T&& func) {
675 for (int i = 0; i < d->nested_type_count(); i++) {
676 ForEachField(d->nested_type(i), std::forward<T&&>(func));
677 }
678 for (int i = 0; i < d->extension_count(); i++) {
679 func(d->extension(i));
680 }
681 for (int i = 0; i < d->field_count(); i++) {
682 func(d->field(i));
683 }
684 }
685
686 template <class T>
ForEachField(const FileDescriptor * d,T && func)687 void ForEachField(const FileDescriptor* d, T&& func) {
688 for (int i = 0; i < d->message_type_count(); i++) {
689 ForEachField(d->message_type(i), std::forward<T&&>(func));
690 }
691 for (int i = 0; i < d->extension_count(); i++) {
692 func(d->extension(i));
693 }
694 }
695
696 void ListAllTypesForServices(const FileDescriptor* fd,
697 std::vector<const Descriptor*>* types);
698
699 // Indicates whether we should use implicit weak fields for this file.
700 bool UsingImplicitWeakFields(const FileDescriptor* file,
701 const Options& options);
702
703 // Indicates whether to treat this field as implicitly weak.
704 bool IsImplicitWeakField(const FieldDescriptor* field, const Options& options,
705 MessageSCCAnalyzer* scc_analyzer);
706
HasSimpleBaseClass(const Descriptor * desc,const Options & options)707 inline bool HasSimpleBaseClass(const Descriptor* desc, const Options& options) {
708 if (!HasDescriptorMethods(desc->file(), options)) return false;
709 if (desc->extension_range_count() != 0) return false;
710 if (desc->field_count() == 0) return true;
711 // TODO(jorg): Support additional common message types with only one
712 // or two fields
713 return false;
714 }
715
HasSimpleBaseClasses(const FileDescriptor * file,const Options & options)716 inline bool HasSimpleBaseClasses(const FileDescriptor* file,
717 const Options& options) {
718 bool v = false;
719 ForEachMessage(file, [&v, &options](const Descriptor* desc) {
720 v |= HasSimpleBaseClass(desc, options);
721 });
722 return v;
723 }
724
SimpleBaseClass(const Descriptor * desc,const Options & options)725 inline std::string SimpleBaseClass(const Descriptor* desc,
726 const Options& options) {
727 if (!HasDescriptorMethods(desc->file(), options)) return "";
728 if (desc->extension_range_count() != 0) return "";
729 if (desc->field_count() == 0) {
730 return "ZeroFieldsBase";
731 }
732 // TODO(jorg): Support additional common message types with only one
733 // or two fields
734 return "";
735 }
736
737 // Returns true if this message has a _tracker_ field.
HasTracker(const Descriptor * desc,const Options & options)738 inline bool HasTracker(const Descriptor* desc, const Options& options) {
739 return options.field_listener_options.inject_field_listener_events &&
740 desc->file()->options().optimize_for() !=
741 google::protobuf::FileOptions::LITE_RUNTIME;
742 }
743
744 // Returns true if this message needs an Impl_ struct for it's data.
HasImplData(const Descriptor * desc,const Options & options)745 inline bool HasImplData(const Descriptor* desc, const Options& options) {
746 return !HasSimpleBaseClass(desc, options);
747 }
748
749 // Formatter is a functor class which acts as a closure around printer and
750 // the variable map. It's much like printer->Print except it supports both named
751 // variables that are substituted using a key value map and direct arguments. In
752 // the format string $1$, $2$, etc... are substituted for the first, second, ...
753 // direct argument respectively in the format call, it accepts both strings and
754 // integers. The implementation verifies all arguments are used and are "first"
755 // used in order of appearance in the argument list. For example,
756 //
757 // Format("return array[$1$];", 3) -> "return array[3];"
758 // Format("array[$2$] = $1$;", "Bla", 3) -> FATAL error (wrong order)
759 // Format("array[$1$] = $2$;", 3, "Bla") -> "array[3] = Bla;"
760 //
761 // The arguments can be used more than once like
762 //
763 // Format("array[$1$] = $2$; // Index = $1$", 3, "Bla") ->
764 // "array[3] = Bla; // Index = 3"
765 //
766 // If you use more arguments use the following style to help the reader,
767 //
768 // Format("int $1$() {\n"
769 // " array[$2$] = $3$;\n"
770 // " return $4$;"
771 // "}\n",
772 // funname, // 1
773 // idx, // 2
774 // varname, // 3
775 // retval); // 4
776 //
777 // but consider using named variables. Named variables like $foo$, with some
778 // identifier foo, are looked up in the map. One additional feature is that
779 // spaces are accepted between the '$' delimiters, $ foo$ will
780 // substitute to " bar" if foo stands for "bar", but in case it's empty
781 // will substitute to "". Hence, for example,
782 //
783 // Format(vars, "$dllexport $void fun();") -> "void fun();"
784 // "__declspec(export) void fun();"
785 //
786 // which is convenient to prevent double, leading or trailing spaces.
787 class PROTOC_EXPORT Formatter {
788 public:
Formatter(io::Printer * printer)789 explicit Formatter(io::Printer* printer) : printer_(printer) {}
Formatter(io::Printer * printer,const std::map<std::string,std::string> & vars)790 Formatter(io::Printer* printer,
791 const std::map<std::string, std::string>& vars)
792 : printer_(printer), vars_(vars) {}
793
794 template <typename T>
Set(const std::string & key,const T & value)795 void Set(const std::string& key, const T& value) {
796 vars_[key] = ToString(value);
797 }
798
AddMap(const std::map<std::string,std::string> & vars)799 void AddMap(const std::map<std::string, std::string>& vars) {
800 for (const auto& keyval : vars) vars_[keyval.first] = keyval.second;
801 }
802
803 template <typename... Args>
operator()804 void operator()(const char* format, const Args&... args) const {
805 printer_->FormatInternal({ToString(args)...}, vars_, format);
806 }
807
Indent()808 void Indent() const { printer_->Indent(); }
Outdent()809 void Outdent() const { printer_->Outdent(); }
printer()810 io::Printer* printer() const { return printer_; }
811
812 class PROTOC_EXPORT ScopedIndenter {
813 public:
ScopedIndenter(Formatter * format)814 explicit ScopedIndenter(Formatter* format) : format_(format) {
815 format_->Indent();
816 }
~ScopedIndenter()817 ~ScopedIndenter() { format_->Outdent(); }
818
819 private:
820 Formatter* format_;
821 };
822
ScopedIndent()823 PROTOBUF_NODISCARD ScopedIndenter ScopedIndent() {
824 return ScopedIndenter(this);
825 }
826 template <typename... Args>
ScopedIndent(const char * format,const Args &&...args)827 PROTOBUF_NODISCARD ScopedIndenter ScopedIndent(const char* format,
828 const Args&&... args) {
829 (*this)(format, static_cast<Args&&>(args)...);
830 return ScopedIndenter(this);
831 }
832
833 class PROTOC_EXPORT SaveState {
834 public:
SaveState(Formatter * format)835 explicit SaveState(Formatter* format)
836 : format_(format), vars_(format->vars_) {}
~SaveState()837 ~SaveState() { format_->vars_.swap(vars_); }
838
839 private:
840 Formatter* format_;
841 std::map<std::string, std::string> vars_;
842 };
843
844 private:
845 io::Printer* printer_;
846 std::map<std::string, std::string> vars_;
847
848 // Convenience overloads to accept different types as arguments.
ToString(const std::string & s)849 static std::string ToString(const std::string& s) { return s; }
850 template <typename I, typename = typename std::enable_if<
851 std::is_integral<I>::value>::type>
ToString(I x)852 static std::string ToString(I x) {
853 return StrCat(x);
854 }
ToString(strings::Hex x)855 static std::string ToString(strings::Hex x) { return StrCat(x); }
ToString(const FieldDescriptor * d)856 static std::string ToString(const FieldDescriptor* d) { return Payload(d); }
ToString(const Descriptor * d)857 static std::string ToString(const Descriptor* d) { return Payload(d); }
ToString(const EnumDescriptor * d)858 static std::string ToString(const EnumDescriptor* d) { return Payload(d); }
ToString(const EnumValueDescriptor * d)859 static std::string ToString(const EnumValueDescriptor* d) {
860 return Payload(d);
861 }
ToString(const OneofDescriptor * d)862 static std::string ToString(const OneofDescriptor* d) { return Payload(d); }
863
864 template <typename Descriptor>
Payload(const Descriptor * descriptor)865 static std::string Payload(const Descriptor* descriptor) {
866 std::vector<int> path;
867 descriptor->GetLocationPath(&path);
868 GeneratedCodeInfo::Annotation annotation;
869 for (int index : path) {
870 annotation.add_path(index);
871 }
872 annotation.set_source_file(descriptor->file()->name());
873 return annotation.SerializeAsString();
874 }
875 };
876
877 template <class T>
PrintFieldComment(const Formatter & format,const T * field)878 void PrintFieldComment(const Formatter& format, const T* field) {
879 // Print the field's (or oneof's) proto-syntax definition as a comment.
880 // We don't want to print group bodies so we cut off after the first
881 // line.
882 DebugStringOptions options;
883 options.elide_group_body = true;
884 options.elide_oneof_body = true;
885 std::string def = field->DebugStringWithOptions(options);
886 format("// $1$\n", def.substr(0, def.find_first_of('\n')));
887 }
888
889 class PROTOC_EXPORT NamespaceOpener {
890 public:
NamespaceOpener(const Formatter & format)891 explicit NamespaceOpener(const Formatter& format)
892 : printer_(format.printer()) {}
NamespaceOpener(const std::string & name,const Formatter & format)893 NamespaceOpener(const std::string& name, const Formatter& format)
894 : NamespaceOpener(format) {
895 ChangeTo(name);
896 }
~NamespaceOpener()897 ~NamespaceOpener() { ChangeTo(""); }
898
ChangeTo(const std::string & name)899 void ChangeTo(const std::string& name) {
900 std::vector<std::string> new_stack_ =
901 Split(name, "::", true);
902 size_t len = std::min(name_stack_.size(), new_stack_.size());
903 size_t common_idx = 0;
904 while (common_idx < len) {
905 if (name_stack_[common_idx] != new_stack_[common_idx]) break;
906 common_idx++;
907 }
908 for (auto it = name_stack_.crbegin();
909 it != name_stack_.crend() - common_idx; ++it) {
910 if (*it == "PROTOBUF_NAMESPACE_ID") {
911 printer_->Print("PROTOBUF_NAMESPACE_CLOSE\n");
912 } else {
913 printer_->Print("} // namespace $ns$\n", "ns", *it);
914 }
915 }
916 name_stack_.swap(new_stack_);
917 for (size_t i = common_idx; i < name_stack_.size(); ++i) {
918 if (name_stack_[i] == "PROTOBUF_NAMESPACE_ID") {
919 printer_->Print("PROTOBUF_NAMESPACE_OPEN\n");
920 } else {
921 printer_->Print("namespace $ns$ {\n", "ns", name_stack_[i]);
922 }
923 }
924 }
925
926 private:
927 io::Printer* printer_;
928 std::vector<std::string> name_stack_;
929 };
930
931 enum class Utf8CheckMode {
932 kStrict = 0, // Parsing will fail if non UTF-8 data is in string fields.
933 kVerify = 1, // Only log an error but parsing will succeed.
934 kNone = 2, // No UTF-8 check.
935 };
936
937 Utf8CheckMode GetUtf8CheckMode(const FieldDescriptor* field,
938 const Options& options);
939
940 void GenerateUtf8CheckCodeForString(const FieldDescriptor* field,
941 const Options& options, bool for_parse,
942 const char* parameters,
943 const Formatter& format);
944
945 void GenerateUtf8CheckCodeForCord(const FieldDescriptor* field,
946 const Options& options, bool for_parse,
947 const char* parameters,
948 const Formatter& format);
949
950 template <typename T>
951 struct FieldRangeImpl {
952 struct Iterator {
953 using iterator_category = std::forward_iterator_tag;
954 using value_type = const FieldDescriptor*;
955 using difference_type = int;
956
957 value_type operator*() { return descriptor->field(idx); }
958
959 friend bool operator==(const Iterator& a, const Iterator& b) {
960 GOOGLE_DCHECK(a.descriptor == b.descriptor);
961 return a.idx == b.idx;
962 }
963 friend bool operator!=(const Iterator& a, const Iterator& b) {
964 return !(a == b);
965 }
966
967 Iterator& operator++() {
968 idx++;
969 return *this;
970 }
971
972 int idx;
973 const T* descriptor;
974 };
975
beginFieldRangeImpl976 Iterator begin() const { return {0, descriptor}; }
endFieldRangeImpl977 Iterator end() const { return {descriptor->field_count(), descriptor}; }
978
979 const T* descriptor;
980 };
981
982 template <typename T>
FieldRange(const T * desc)983 FieldRangeImpl<T> FieldRange(const T* desc) {
984 return {desc};
985 }
986
987 struct OneOfRangeImpl {
988 struct Iterator {
989 using iterator_category = std::forward_iterator_tag;
990 using value_type = const OneofDescriptor*;
991 using difference_type = int;
992
993 value_type operator*() { return descriptor->oneof_decl(idx); }
994
995 friend bool operator==(const Iterator& a, const Iterator& b) {
996 GOOGLE_DCHECK(a.descriptor == b.descriptor);
997 return a.idx == b.idx;
998 }
999 friend bool operator!=(const Iterator& a, const Iterator& b) {
1000 return !(a == b);
1001 }
1002
1003 Iterator& operator++() {
1004 idx++;
1005 return *this;
1006 }
1007
1008 int idx;
1009 const Descriptor* descriptor;
1010 };
1011
beginOneOfRangeImpl1012 Iterator begin() const { return {0, descriptor}; }
endOneOfRangeImpl1013 Iterator end() const {
1014 return {descriptor->real_oneof_decl_count(), descriptor};
1015 }
1016
1017 const Descriptor* descriptor;
1018 };
1019
OneOfRange(const Descriptor * desc)1020 inline OneOfRangeImpl OneOfRange(const Descriptor* desc) { return {desc}; }
1021
1022 PROTOC_EXPORT std::string StripProto(const std::string& filename);
1023
1024 bool EnableMessageOwnedArena(const Descriptor* desc, const Options& options);
1025
1026 bool EnableMessageOwnedArenaTrial(const Descriptor* desc,
1027 const Options& options);
1028
1029 bool ShouldVerify(const Descriptor* descriptor, const Options& options,
1030 MessageSCCAnalyzer* scc_analyzer);
1031 bool ShouldVerify(const FileDescriptor* file, const Options& options,
1032 MessageSCCAnalyzer* scc_analyzer);
1033
1034 // Indicates whether to use predefined verify methods for a given message. If a
1035 // message is "simple" and needs no special verification per field (e.g. message
1036 // field, repeated packed, UTF8 string, etc.), we can use either VerifySimple or
1037 // VerifySimpleAlwaysCheckInt32 methods as all verification can be done based on
1038 // the wire type.
1039 //
1040 // Otherwise, we need "custom" verify methods tailored to a message to pass
1041 // which field needs a special verification; i.e. InternalVerify.
1042 enum class VerifySimpleType {
1043 kSimpleInt32Never, // Use VerifySimple
1044 kSimpleInt32Always, // Use VerifySimpleAlwaysCheckInt32
1045 kCustom, // Use InternalVerify and check only for int32
1046 kCustomInt32Never, // Use InternalVerify but never check for int32
1047 kCustomInt32Always, // Use InternalVerify and always check for int32
1048 };
1049
1050 // Returns VerifySimpleType if messages can be verified by predefined methods.
1051 VerifySimpleType ShouldVerifySimple(const Descriptor* descriptor);
1052
1053 bool IsUtf8String(const FieldDescriptor* field);
1054
1055 bool HasMessageFieldOrExtension(const Descriptor* desc);
1056
1057 } // namespace cpp
1058 } // namespace compiler
1059 } // namespace protobuf
1060 } // namespace google
1061
1062 #include <google/protobuf/port_undef.inc>
1063
1064 #endif // GOOGLE_PROTOBUF_COMPILER_CPP_HELPERS_H__
1065