1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc. All rights reserved.
3 // https://developers.google.com/protocol-buffers/
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 // * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 // * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following disclaimer
13 // in the documentation and/or other materials provided with the
14 // distribution.
15 // * Neither the name of Google Inc. nor the names of its
16 // contributors may be used to endorse or promote products derived from
17 // this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31 // Author: kenton@google.com (Kenton Varda)
32 // Based on original Protocol Buffers design by
33 // Sanjay Ghemawat, Jeff Dean, and others.
34
35 #ifndef GOOGLE_PROTOBUF_COMPILER_CPP_HELPERS_H__
36 #define GOOGLE_PROTOBUF_COMPILER_CPP_HELPERS_H__
37
38 #include <algorithm>
39 #include <cstdint>
40 #include <iterator>
41 #include <map>
42 #include <string>
43
44 #include <google/protobuf/compiler/scc.h>
45 #include <google/protobuf/compiler/code_generator.h>
46 #include <google/protobuf/compiler/cpp/cpp_names.h>
47 #include <google/protobuf/compiler/cpp/cpp_options.h>
48 #include <google/protobuf/descriptor.pb.h>
49 #include <google/protobuf/io/printer.h>
50 #include <google/protobuf/descriptor.h>
51 #include <google/protobuf/port.h>
52 #include <google/protobuf/stubs/strutil.h>
53
54 // Must be included last.
55 #include <google/protobuf/port_def.inc>
56
57 namespace google {
58 namespace protobuf {
59 namespace compiler {
60 namespace cpp {
61
62 enum class ArenaDtorNeeds { kNone = 0, kOnDemand = 1, kRequired = 2 };
63
ProtobufNamespace(const Options &)64 inline std::string ProtobufNamespace(const Options& /* options */) {
65 return "PROTOBUF_NAMESPACE_ID";
66 }
67
MacroPrefix(const Options &)68 inline std::string MacroPrefix(const Options& /* options */) {
69 return "GOOGLE_PROTOBUF";
70 }
71
DeprecatedAttribute(const Options &,const FieldDescriptor * d)72 inline std::string DeprecatedAttribute(const Options& /* options */,
73 const FieldDescriptor* d) {
74 return d->options().deprecated() ? "PROTOBUF_DEPRECATED " : "";
75 }
76
DeprecatedAttribute(const Options &,const EnumValueDescriptor * d)77 inline std::string DeprecatedAttribute(const Options& /* options */,
78 const EnumValueDescriptor* d) {
79 return d->options().deprecated() ? "PROTOBUF_DEPRECATED_ENUM " : "";
80 }
81
82 // Commonly-used separator comments. Thick is a line of '=', thin is a line
83 // of '-'.
84 extern const char kThickSeparator[];
85 extern const char kThinSeparator[];
86
87 void SetCommonVars(const Options& options,
88 std::map<std::string, std::string>* variables);
89
90 // Variables to access message data from the message scope.
91 void SetCommonMessageDataVariables(
92 std::map<std::string, std::string>* variables);
93
94 void SetUnknownFieldsVariable(const Descriptor* descriptor,
95 const Options& options,
96 std::map<std::string, std::string>* variables);
97
98 bool GetBootstrapBasename(const Options& options, const std::string& basename,
99 std::string* bootstrap_basename);
100 bool MaybeBootstrap(const Options& options, GeneratorContext* generator_context,
101 bool bootstrap_flag, std::string* basename);
102 bool IsBootstrapProto(const Options& options, const FileDescriptor* file);
103
104 // Name space of the proto file. This namespace is such that the string
105 // "<namespace>::some_name" is the correct fully qualified namespace.
106 // This means if the package is empty the namespace is "", and otherwise
107 // the namespace is "::foo::bar::...::baz" without trailing semi-colons.
108 std::string Namespace(const FileDescriptor* d, const Options& options);
109 std::string Namespace(const Descriptor* d, const Options& options);
110 std::string Namespace(const FieldDescriptor* d, const Options& options);
111 std::string Namespace(const EnumDescriptor* d, const Options& options);
112
113 // Returns true if it's safe to reset "field" to zero.
114 bool CanInitializeByZeroing(const FieldDescriptor* field);
115
116 std::string ClassName(const Descriptor* descriptor);
117 std::string ClassName(const EnumDescriptor* enum_descriptor);
118
119 std::string QualifiedClassName(const Descriptor* d, const Options& options);
120 std::string QualifiedClassName(const EnumDescriptor* d, const Options& options);
121
122 std::string QualifiedClassName(const Descriptor* d);
123 std::string QualifiedClassName(const EnumDescriptor* d);
124
125 // DEPRECATED just use ClassName or QualifiedClassName, a boolean is very
126 // unreadable at the callsite.
127 // Returns the non-nested type name for the given type. If "qualified" is
128 // true, prefix the type with the full namespace. For example, if you had:
129 // package foo.bar;
130 // message Baz { message Qux {} }
131 // Then the qualified ClassName for Qux would be:
132 // ::foo::bar::Baz_Qux
133 // While the non-qualified version would be:
134 // Baz_Qux
ClassName(const Descriptor * descriptor,bool qualified)135 inline std::string ClassName(const Descriptor* descriptor, bool qualified) {
136 return qualified ? QualifiedClassName(descriptor, Options())
137 : ClassName(descriptor);
138 }
139
ClassName(const EnumDescriptor * descriptor,bool qualified)140 inline std::string ClassName(const EnumDescriptor* descriptor, bool qualified) {
141 return qualified ? QualifiedClassName(descriptor, Options())
142 : ClassName(descriptor);
143 }
144
145 // Returns the extension name prefixed with the class name if nested but without
146 // the package name.
147 std::string ExtensionName(const FieldDescriptor* d);
148
149 std::string QualifiedExtensionName(const FieldDescriptor* d,
150 const Options& options);
151 std::string QualifiedExtensionName(const FieldDescriptor* d);
152
153 // Type name of default instance.
154 std::string DefaultInstanceType(const Descriptor* descriptor,
155 const Options& options);
156
157 // Non-qualified name of the default_instance of this message.
158 std::string DefaultInstanceName(const Descriptor* descriptor,
159 const Options& options);
160
161 // Non-qualified name of the default instance pointer. This is used only for
162 // implicit weak fields, where we need an extra indirection.
163 std::string DefaultInstancePtr(const Descriptor* descriptor,
164 const Options& options);
165
166 // Fully qualified name of the default_instance of this message.
167 std::string QualifiedDefaultInstanceName(const Descriptor* descriptor,
168 const Options& options);
169
170 // Fully qualified name of the default instance pointer.
171 std::string QualifiedDefaultInstancePtr(const Descriptor* descriptor,
172 const Options& options);
173
174 // DescriptorTable variable name.
175 std::string DescriptorTableName(const FileDescriptor* file,
176 const Options& options);
177
178 // When declaring symbol externs from another file, this macro will supply the
179 // dllexport needed for the target file, if any.
180 std::string FileDllExport(const FileDescriptor* file, const Options& options);
181
182 // Name of the base class: google::protobuf::Message or google::protobuf::MessageLite.
183 std::string SuperClassName(const Descriptor* descriptor,
184 const Options& options);
185
186 // Adds an underscore if necessary to prevent conflicting with a keyword.
187 std::string ResolveKeyword(const std::string& name);
188
189 // Get the (unqualified) name that should be used for this field in C++ code.
190 // The name is coerced to lower-case to emulate proto1 behavior. People
191 // should be using lowercase-with-underscores style for proto field names
192 // anyway, so normally this just returns field->name().
193 std::string FieldName(const FieldDescriptor* field);
194
195 // Returns the (unqualified) private member name for this field in C++ code.
196 std::string FieldMemberName(const FieldDescriptor* field);
197
198 // Returns an estimate of the compiler's alignment for the field. This
199 // can't guarantee to be correct because the generated code could be compiled on
200 // different systems with different alignment rules. The estimates below assume
201 // 64-bit pointers.
202 int EstimateAlignmentSize(const FieldDescriptor* field);
203
204 // Get the unqualified name that should be used for a field's field
205 // number constant.
206 std::string FieldConstantName(const FieldDescriptor* field);
207
208 // Returns the scope where the field was defined (for extensions, this is
209 // different from the message type to which the field applies).
FieldScope(const FieldDescriptor * field)210 inline const Descriptor* FieldScope(const FieldDescriptor* field) {
211 return field->is_extension() ? field->extension_scope()
212 : field->containing_type();
213 }
214
215 // Returns the fully-qualified type name field->message_type(). Usually this
216 // is just ClassName(field->message_type(), true);
217 std::string FieldMessageTypeName(const FieldDescriptor* field,
218 const Options& options);
219
220 // Get the C++ type name for a primitive type (e.g. "double", "::google::protobuf::int32", etc.).
221 const char* PrimitiveTypeName(FieldDescriptor::CppType type);
222 std::string PrimitiveTypeName(const Options& options,
223 FieldDescriptor::CppType type);
224
225 // Get the declared type name in CamelCase format, as is used e.g. for the
226 // methods of WireFormat. For example, TYPE_INT32 becomes "Int32".
227 const char* DeclaredTypeMethodName(FieldDescriptor::Type type);
228
229 // Return the code that evaluates to the number when compiled.
230 std::string Int32ToString(int number);
231
232 // Get code that evaluates to the field's default value.
233 std::string DefaultValue(const Options& options, const FieldDescriptor* field);
234
235 // Compatibility function for callers outside proto2.
236 std::string DefaultValue(const FieldDescriptor* field);
237
238 // Convert a file name into a valid identifier.
239 std::string FilenameIdentifier(const std::string& filename);
240
241 // For each .proto file generates a unique name. To prevent collisions of
242 // symbols in the global namespace
243 std::string UniqueName(const std::string& name, const std::string& filename,
244 const Options& options);
UniqueName(const std::string & name,const FileDescriptor * d,const Options & options)245 inline std::string UniqueName(const std::string& name, const FileDescriptor* d,
246 const Options& options) {
247 return UniqueName(name, d->name(), options);
248 }
UniqueName(const std::string & name,const Descriptor * d,const Options & options)249 inline std::string UniqueName(const std::string& name, const Descriptor* d,
250 const Options& options) {
251 return UniqueName(name, d->file(), options);
252 }
UniqueName(const std::string & name,const EnumDescriptor * d,const Options & options)253 inline std::string UniqueName(const std::string& name, const EnumDescriptor* d,
254 const Options& options) {
255 return UniqueName(name, d->file(), options);
256 }
UniqueName(const std::string & name,const ServiceDescriptor * d,const Options & options)257 inline std::string UniqueName(const std::string& name,
258 const ServiceDescriptor* d,
259 const Options& options) {
260 return UniqueName(name, d->file(), options);
261 }
262
263 // Versions for call sites that only support the internal runtime (like proto1
264 // support).
InternalRuntimeOptions()265 inline Options InternalRuntimeOptions() {
266 Options options;
267 options.opensource_runtime = false;
268 return options;
269 }
UniqueName(const std::string & name,const std::string & filename)270 inline std::string UniqueName(const std::string& name,
271 const std::string& filename) {
272 return UniqueName(name, filename, InternalRuntimeOptions());
273 }
UniqueName(const std::string & name,const FileDescriptor * d)274 inline std::string UniqueName(const std::string& name,
275 const FileDescriptor* d) {
276 return UniqueName(name, d->name(), InternalRuntimeOptions());
277 }
UniqueName(const std::string & name,const Descriptor * d)278 inline std::string UniqueName(const std::string& name, const Descriptor* d) {
279 return UniqueName(name, d->file(), InternalRuntimeOptions());
280 }
UniqueName(const std::string & name,const EnumDescriptor * d)281 inline std::string UniqueName(const std::string& name,
282 const EnumDescriptor* d) {
283 return UniqueName(name, d->file(), InternalRuntimeOptions());
284 }
UniqueName(const std::string & name,const ServiceDescriptor * d)285 inline std::string UniqueName(const std::string& name,
286 const ServiceDescriptor* d) {
287 return UniqueName(name, d->file(), InternalRuntimeOptions());
288 }
289
290 // Return the qualified C++ name for a file level symbol.
291 std::string QualifiedFileLevelSymbol(const FileDescriptor* file,
292 const std::string& name,
293 const Options& options);
294
295 // Escape C++ trigraphs by escaping question marks to \?
296 std::string EscapeTrigraphs(const std::string& to_escape);
297
298 // Escaped function name to eliminate naming conflict.
299 std::string SafeFunctionName(const Descriptor* descriptor,
300 const FieldDescriptor* field,
301 const std::string& prefix);
302
303 // Returns true if generated messages have public unknown fields accessors
PublicUnknownFieldsAccessors(const Descriptor * message)304 inline bool PublicUnknownFieldsAccessors(const Descriptor* message) {
305 return message->file()->syntax() != FileDescriptor::SYNTAX_PROTO3;
306 }
307
308 // Returns the optimize mode for <file>, respecting <options.enforce_lite>.
309 FileOptions_OptimizeMode GetOptimizeFor(const FileDescriptor* file,
310 const Options& options);
311
312 // Determines whether unknown fields will be stored in an UnknownFieldSet or
313 // a string.
UseUnknownFieldSet(const FileDescriptor * file,const Options & options)314 inline bool UseUnknownFieldSet(const FileDescriptor* file,
315 const Options& options) {
316 return GetOptimizeFor(file, options) != FileOptions::LITE_RUNTIME;
317 }
318
IsWeak(const FieldDescriptor * field,const Options & options)319 inline bool IsWeak(const FieldDescriptor* field, const Options& options) {
320 if (field->options().weak()) {
321 GOOGLE_CHECK(!options.opensource_runtime);
322 return true;
323 }
324 return false;
325 }
326
327 bool IsStringInlined(const FieldDescriptor* descriptor, const Options& options);
328
329 // For a string field, returns the effective ctype. If the actual ctype is
330 // not supported, returns the default of STRING.
331 FieldOptions::CType EffectiveStringCType(const FieldDescriptor* field,
332 const Options& options);
333
IsCord(const FieldDescriptor * field,const Options & options)334 inline bool IsCord(const FieldDescriptor* field, const Options& options) {
335 return field->cpp_type() == FieldDescriptor::CPPTYPE_STRING &&
336 EffectiveStringCType(field, options) == FieldOptions::CORD;
337 }
338
IsString(const FieldDescriptor * field,const Options & options)339 inline bool IsString(const FieldDescriptor* field, const Options& options) {
340 return field->cpp_type() == FieldDescriptor::CPPTYPE_STRING &&
341 EffectiveStringCType(field, options) == FieldOptions::STRING;
342 }
343
IsStringPiece(const FieldDescriptor * field,const Options & options)344 inline bool IsStringPiece(const FieldDescriptor* field,
345 const Options& options) {
346 return field->cpp_type() == FieldDescriptor::CPPTYPE_STRING &&
347 EffectiveStringCType(field, options) == FieldOptions::STRING_PIECE;
348 }
349
350 class MessageSCCAnalyzer;
351
352 // Does the given FileDescriptor use lazy fields?
353 bool HasLazyFields(const FileDescriptor* file, const Options& options,
354 MessageSCCAnalyzer* scc_analyzer);
355
356 // Is the given field a supported lazy field?
357 bool IsLazy(const FieldDescriptor* field, const Options& options,
358 MessageSCCAnalyzer* scc_analyzer);
359
360 // Is this an explicit (non-profile driven) lazy field, as denoted by
361 // lazy/unverified_lazy in the descriptor?
IsExplicitLazy(const FieldDescriptor * field)362 inline bool IsExplicitLazy(const FieldDescriptor* field) {
363 return field->options().lazy() || field->options().unverified_lazy();
364 }
365
IsLazilyVerifiedLazy(const FieldDescriptor * field,const Options & options)366 inline bool IsLazilyVerifiedLazy(const FieldDescriptor* field,
367 const Options& options) {
368 // TODO(b/211906113): Make lazy() imply eagerly verified lazy.
369 return IsExplicitLazy(field) && !field->is_repeated() &&
370 field->type() == FieldDescriptor::TYPE_MESSAGE &&
371 GetOptimizeFor(field->file(), options) != FileOptions::LITE_RUNTIME &&
372 !options.opensource_runtime;
373 }
374
IsEagerlyVerifiedLazy(const FieldDescriptor * field,const Options & options,MessageSCCAnalyzer * scc_analyzer)375 inline bool IsEagerlyVerifiedLazy(const FieldDescriptor* field,
376 const Options& options,
377 MessageSCCAnalyzer* scc_analyzer) {
378 // TODO(b/211906113): Make lazy() imply eagerly verified lazy.
379 return IsLazy(field, options, scc_analyzer) && !IsExplicitLazy(field);
380 }
381
IsFieldUsed(const FieldDescriptor *,const Options &)382 inline bool IsFieldUsed(const FieldDescriptor* /* field */,
383 const Options& /* options */) {
384 return true;
385 }
386
387 // Returns true if "field" is stripped.
IsFieldStripped(const FieldDescriptor *,const Options &)388 inline bool IsFieldStripped(const FieldDescriptor* /*field*/,
389 const Options& /*options*/) {
390 return false;
391 }
392
393 // Does the file contain any definitions that need extension_set.h?
394 bool HasExtensionsOrExtendableMessage(const FileDescriptor* file);
395
396 // Does the file have any repeated fields, necessitating the file to include
397 // repeated_field.h? This does not include repeated extensions, since those are
398 // all stored internally in an ExtensionSet, not a separate RepeatedField*.
399 bool HasRepeatedFields(const FileDescriptor* file);
400
401 // Does the file have any string/bytes fields with ctype=STRING_PIECE? This
402 // does not include extensions, since ctype is ignored for extensions.
403 bool HasStringPieceFields(const FileDescriptor* file, const Options& options);
404
405 // Does the file have any string/bytes fields with ctype=CORD? This does not
406 // include extensions, since ctype is ignored for extensions.
407 bool HasCordFields(const FileDescriptor* file, const Options& options);
408
409 // Does the file have any map fields, necessitating the file to include
410 // map_field_inl.h and map.h.
411 bool HasMapFields(const FileDescriptor* file);
412
413 // Does this file have any enum type definitions?
414 bool HasEnumDefinitions(const FileDescriptor* file);
415
416 // Does this file have generated parsing, serialization, and other
417 // standard methods for which reflection-based fallback implementations exist?
HasGeneratedMethods(const FileDescriptor * file,const Options & options)418 inline bool HasGeneratedMethods(const FileDescriptor* file,
419 const Options& options) {
420 return GetOptimizeFor(file, options) != FileOptions::CODE_SIZE;
421 }
422
423 // Do message classes in this file have descriptor and reflection methods?
HasDescriptorMethods(const FileDescriptor * file,const Options & options)424 inline bool HasDescriptorMethods(const FileDescriptor* file,
425 const Options& options) {
426 return GetOptimizeFor(file, options) != FileOptions::LITE_RUNTIME;
427 }
428
429 // Should we generate generic services for this file?
HasGenericServices(const FileDescriptor * file,const Options & options)430 inline bool HasGenericServices(const FileDescriptor* file,
431 const Options& options) {
432 return file->service_count() > 0 &&
433 GetOptimizeFor(file, options) != FileOptions::LITE_RUNTIME &&
434 file->options().cc_generic_services();
435 }
436
IsProto2MessageSet(const Descriptor * descriptor,const Options & options)437 inline bool IsProto2MessageSet(const Descriptor* descriptor,
438 const Options& options) {
439 return !options.opensource_runtime &&
440 options.enforce_mode != EnforceOptimizeMode::kLiteRuntime &&
441 !options.lite_implicit_weak_fields &&
442 descriptor->options().message_set_wire_format() &&
443 descriptor->full_name() == "google.protobuf.bridge.MessageSet";
444 }
445
IsMapEntryMessage(const Descriptor * descriptor)446 inline bool IsMapEntryMessage(const Descriptor* descriptor) {
447 return descriptor->options().map_entry();
448 }
449
450 // Returns true if the field's CPPTYPE is string or message.
451 bool IsStringOrMessage(const FieldDescriptor* field);
452
453 std::string UnderscoresToCamelCase(const std::string& input,
454 bool cap_next_letter);
455
IsProto3(const FileDescriptor * file)456 inline bool IsProto3(const FileDescriptor* file) {
457 return file->syntax() == FileDescriptor::SYNTAX_PROTO3;
458 }
459
HasHasbit(const FieldDescriptor * field)460 inline bool HasHasbit(const FieldDescriptor* field) {
461 // This predicate includes proto3 message fields only if they have "optional".
462 // Foo submsg1 = 1; // HasHasbit() == false
463 // optional Foo submsg2 = 2; // HasHasbit() == true
464 // This is slightly odd, as adding "optional" to a singular proto3 field does
465 // not change the semantics or API. However whenever any field in a message
466 // has a hasbit, it forces reflection to include hasbit offsets for *all*
467 // fields, even if almost all of them are set to -1 (no hasbit). So to avoid
468 // causing a sudden size regression for ~all proto3 messages, we give proto3
469 // message fields a hasbit only if "optional" is present. If the user is
470 // explicitly writing "optional", it is likely they are writing it on
471 // primitive fields also.
472 return (field->has_optional_keyword() || field->is_required()) &&
473 !field->options().weak();
474 }
475
476 // Returns true if 'enum' semantics are such that unknown values are preserved
477 // in the enum field itself, rather than going to the UnknownFieldSet.
HasPreservingUnknownEnumSemantics(const FieldDescriptor * field)478 inline bool HasPreservingUnknownEnumSemantics(const FieldDescriptor* field) {
479 return field->file()->syntax() == FileDescriptor::SYNTAX_PROTO3;
480 }
481
IsCrossFileMessage(const FieldDescriptor * field)482 inline bool IsCrossFileMessage(const FieldDescriptor* field) {
483 return field->type() == FieldDescriptor::TYPE_MESSAGE &&
484 field->message_type()->file() != field->file();
485 }
486
MakeDefaultName(const FieldDescriptor * field)487 inline std::string MakeDefaultName(const FieldDescriptor* field) {
488 return "_i_give_permission_to_break_this_code_default_" + FieldName(field) +
489 "_";
490 }
491
492 // Semantically distinct from MakeDefaultName in that it gives the C++ code
493 // referencing a default field from the message scope, rather than just the
494 // variable name.
495 // For example, declarations of default variables should always use just
496 // MakeDefaultName to produce code like:
497 // Type _i_give_permission_to_break_this_code_default_field_;
498 //
499 // Code that references these should use MakeDefaultFieldName, in case the field
500 // exists at some nested level like:
501 // internal_container_._i_give_permission_to_break_this_code_default_field_;
MakeDefaultFieldName(const FieldDescriptor * field)502 inline std::string MakeDefaultFieldName(const FieldDescriptor* field) {
503 return MakeDefaultName(field);
504 }
505
MakeVarintCachedSizeName(const FieldDescriptor * field)506 inline std::string MakeVarintCachedSizeName(const FieldDescriptor* field) {
507 return StrCat("_", FieldName(field), "_cached_byte_size_");
508 }
509
510 // Semantically distinct from MakeVarintCachedSizeName in that it gives the C++
511 // code referencing the object from the message scope, rather than just the
512 // variable name.
513 // For example, declarations of default variables should always use just
514 // MakeVarintCachedSizeName to produce code like:
515 // Type _field_cached_byte_size_;
516 //
517 // Code that references these variables should use
518 // MakeVarintCachedSizeFieldName, in case the field exists at some nested level
519 // like:
520 // internal_container_._field_cached_byte_size_;
MakeVarintCachedSizeFieldName(const FieldDescriptor * field)521 inline std::string MakeVarintCachedSizeFieldName(const FieldDescriptor* field) {
522 return StrCat("_", FieldName(field), "_cached_byte_size_");
523 }
524
525 // Note: A lot of libraries detect Any protos based on Descriptor::full_name()
526 // while the two functions below use FileDescriptor::name(). In a sane world the
527 // two approaches should be equivalent. But if you are dealing with descriptors
528 // from untrusted sources, you might need to match semantics across libraries.
529 bool IsAnyMessage(const FileDescriptor* descriptor, const Options& options);
530 bool IsAnyMessage(const Descriptor* descriptor, const Options& options);
531
532 bool IsWellKnownMessage(const FileDescriptor* descriptor);
533
IncludeGuard(const FileDescriptor * file,bool pb_h,const Options & options)534 inline std::string IncludeGuard(const FileDescriptor* file, bool pb_h,
535 const Options& options) {
536 // If we are generating a .pb.h file and the proto_h option is enabled, then
537 // the .pb.h gets an extra suffix.
538 std::string filename_identifier = FilenameIdentifier(
539 file->name() + (pb_h && options.proto_h ? ".pb.h" : ""));
540
541 if (IsWellKnownMessage(file)) {
542 // For well-known messages we need third_party/protobuf and net/proto2 to
543 // have distinct include guards, because some source files include both and
544 // both need to be defined (the third_party copies will be in the
545 // google::protobuf_opensource namespace).
546 return MacroPrefix(options) + "_INCLUDED_" + filename_identifier;
547 } else {
548 // Ideally this case would use distinct include guards for opensource and
549 // google3 protos also. (The behavior of "first #included wins" is not
550 // ideal). But unfortunately some legacy code includes both and depends on
551 // the identical include guards to avoid compile errors.
552 //
553 // We should clean this up so that this case can be removed.
554 return "GOOGLE_PROTOBUF_INCLUDED_" + filename_identifier;
555 }
556 }
557
558 // Returns the OptimizeMode for this file, furthermore it updates a status
559 // bool if has_opt_codesize_extension is non-null. If this status bool is true
560 // it means this file contains an extension that itself is defined as
561 // optimized_for = CODE_SIZE.
562 FileOptions_OptimizeMode GetOptimizeFor(const FileDescriptor* file,
563 const Options& options,
564 bool* has_opt_codesize_extension);
GetOptimizeFor(const FileDescriptor * file,const Options & options)565 inline FileOptions_OptimizeMode GetOptimizeFor(const FileDescriptor* file,
566 const Options& options) {
567 return GetOptimizeFor(file, options, nullptr);
568 }
NeedsEagerDescriptorAssignment(const FileDescriptor * file,const Options & options)569 inline bool NeedsEagerDescriptorAssignment(const FileDescriptor* file,
570 const Options& options) {
571 bool has_opt_codesize_extension;
572 if (GetOptimizeFor(file, options, &has_opt_codesize_extension) ==
573 FileOptions::CODE_SIZE &&
574 has_opt_codesize_extension) {
575 // If this filedescriptor contains an extension from another file which
576 // is optimized_for = CODE_SIZE. We need to be careful in the ordering so
577 // we eagerly build the descriptors in the dependencies before building
578 // the descriptors of this file.
579 return true;
580 } else {
581 // If we have a generated code based parser we never need eager
582 // initialization of descriptors of our deps.
583 return false;
584 }
585 }
586
587 // This orders the messages in a .pb.cc as it's outputted by file.cc
588 void FlattenMessagesInFile(const FileDescriptor* file,
589 std::vector<const Descriptor*>* result);
FlattenMessagesInFile(const FileDescriptor * file)590 inline std::vector<const Descriptor*> FlattenMessagesInFile(
591 const FileDescriptor* file) {
592 std::vector<const Descriptor*> result;
593 FlattenMessagesInFile(file, &result);
594 return result;
595 }
596
597 template <typename F>
ForEachMessage(const Descriptor * descriptor,F && func)598 void ForEachMessage(const Descriptor* descriptor, F&& func) {
599 for (int i = 0; i < descriptor->nested_type_count(); i++)
600 ForEachMessage(descriptor->nested_type(i), std::forward<F&&>(func));
601 func(descriptor);
602 }
603
604 template <typename F>
ForEachMessage(const FileDescriptor * descriptor,F && func)605 void ForEachMessage(const FileDescriptor* descriptor, F&& func) {
606 for (int i = 0; i < descriptor->message_type_count(); i++)
607 ForEachMessage(descriptor->message_type(i), std::forward<F&&>(func));
608 }
609
610 bool HasWeakFields(const Descriptor* desc, const Options& options);
611 bool HasWeakFields(const FileDescriptor* desc, const Options& options);
612
613 // Returns true if the "required" restriction check should be ignored for the
614 // given field.
ShouldIgnoreRequiredFieldCheck(const FieldDescriptor * field,const Options & options)615 inline static bool ShouldIgnoreRequiredFieldCheck(const FieldDescriptor* field,
616 const Options& options) {
617 // Do not check "required" for lazily verified lazy fields.
618 return IsLazilyVerifiedLazy(field, options);
619 }
620
621 struct MessageAnalysis {
622 bool is_recursive = false;
623 bool contains_cord = false;
624 bool contains_extension = false;
625 bool contains_required = false;
626 bool contains_weak = false; // Implicit weak as well.
627 };
628
629 // This class is used in FileGenerator, to ensure linear instead of
630 // quadratic performance, if we do this per message we would get O(V*(V+E)).
631 // Logically this is just only used in message.cc, but in the header for
632 // FileGenerator to help share it.
633 class PROTOC_EXPORT MessageSCCAnalyzer {
634 public:
MessageSCCAnalyzer(const Options & options)635 explicit MessageSCCAnalyzer(const Options& options) : options_(options) {}
636
637 MessageAnalysis GetSCCAnalysis(const SCC* scc);
638
HasRequiredFields(const Descriptor * descriptor)639 bool HasRequiredFields(const Descriptor* descriptor) {
640 MessageAnalysis result = GetSCCAnalysis(GetSCC(descriptor));
641 return result.contains_required || result.contains_extension;
642 }
HasWeakField(const Descriptor * descriptor)643 bool HasWeakField(const Descriptor* descriptor) {
644 MessageAnalysis result = GetSCCAnalysis(GetSCC(descriptor));
645 return result.contains_weak;
646 }
GetSCC(const Descriptor * descriptor)647 const SCC* GetSCC(const Descriptor* descriptor) {
648 return analyzer_.GetSCC(descriptor);
649 }
650
651 private:
652 struct DepsGenerator {
operatorDepsGenerator653 std::vector<const Descriptor*> operator()(const Descriptor* desc) const {
654 std::vector<const Descriptor*> deps;
655 for (int i = 0; i < desc->field_count(); i++) {
656 if (desc->field(i)->message_type()) {
657 deps.push_back(desc->field(i)->message_type());
658 }
659 }
660 return deps;
661 }
662 };
663 SCCAnalyzer<DepsGenerator> analyzer_;
664 Options options_;
665 std::map<const SCC*, MessageAnalysis> analysis_cache_;
666 };
667
668 void ListAllFields(const Descriptor* d,
669 std::vector<const FieldDescriptor*>* fields);
670 void ListAllFields(const FileDescriptor* d,
671 std::vector<const FieldDescriptor*>* fields);
672
673 template <class T>
ForEachField(const Descriptor * d,T && func)674 void ForEachField(const Descriptor* d, T&& func) {
675 for (int i = 0; i < d->nested_type_count(); i++) {
676 ForEachField(d->nested_type(i), std::forward<T&&>(func));
677 }
678 for (int i = 0; i < d->extension_count(); i++) {
679 func(d->extension(i));
680 }
681 for (int i = 0; i < d->field_count(); i++) {
682 func(d->field(i));
683 }
684 }
685
686 template <class T>
ForEachField(const FileDescriptor * d,T && func)687 void ForEachField(const FileDescriptor* d, T&& func) {
688 for (int i = 0; i < d->message_type_count(); i++) {
689 ForEachField(d->message_type(i), std::forward<T&&>(func));
690 }
691 for (int i = 0; i < d->extension_count(); i++) {
692 func(d->extension(i));
693 }
694 }
695
696 void ListAllTypesForServices(const FileDescriptor* fd,
697 std::vector<const Descriptor*>* types);
698
699 // Indicates whether we should use implicit weak fields for this file.
700 bool UsingImplicitWeakFields(const FileDescriptor* file,
701 const Options& options);
702
703 // Indicates whether to treat this field as implicitly weak.
704 bool IsImplicitWeakField(const FieldDescriptor* field, const Options& options,
705 MessageSCCAnalyzer* scc_analyzer);
706
HasSimpleBaseClass(const Descriptor * desc,const Options & options)707 inline bool HasSimpleBaseClass(const Descriptor* desc, const Options& options) {
708 if (!HasDescriptorMethods(desc->file(), options)) return false;
709 if (desc->extension_range_count() != 0) return false;
710 if (desc->field_count() == 0) return true;
711 // TODO(jorg): Support additional common message types with only one
712 // or two fields
713 return false;
714 }
715
HasSimpleBaseClasses(const FileDescriptor * file,const Options & options)716 inline bool HasSimpleBaseClasses(const FileDescriptor* file,
717 const Options& options) {
718 bool v = false;
719 ForEachMessage(file, [&v, &options](const Descriptor* desc) {
720 v |= HasSimpleBaseClass(desc, options);
721 });
722 return v;
723 }
724
SimpleBaseClass(const Descriptor * desc,const Options & options)725 inline std::string SimpleBaseClass(const Descriptor* desc,
726 const Options& options) {
727 if (!HasDescriptorMethods(desc->file(), options)) return "";
728 if (desc->extension_range_count() != 0) return "";
729 if (desc->field_count() == 0) {
730 return "ZeroFieldsBase";
731 }
732 // TODO(jorg): Support additional common message types with only one
733 // or two fields
734 return "";
735 }
736
737 // Formatter is a functor class which acts as a closure around printer and
738 // the variable map. It's much like printer->Print except it supports both named
739 // variables that are substituted using a key value map and direct arguments. In
740 // the format string $1$, $2$, etc... are substituted for the first, second, ...
741 // direct argument respectively in the format call, it accepts both strings and
742 // integers. The implementation verifies all arguments are used and are "first"
743 // used in order of appearance in the argument list. For example,
744 //
745 // Format("return array[$1$];", 3) -> "return array[3];"
746 // Format("array[$2$] = $1$;", "Bla", 3) -> FATAL error (wrong order)
747 // Format("array[$1$] = $2$;", 3, "Bla") -> "array[3] = Bla;"
748 //
749 // The arguments can be used more than once like
750 //
751 // Format("array[$1$] = $2$; // Index = $1$", 3, "Bla") ->
752 // "array[3] = Bla; // Index = 3"
753 //
754 // If you use more arguments use the following style to help the reader,
755 //
756 // Format("int $1$() {\n"
757 // " array[$2$] = $3$;\n"
758 // " return $4$;"
759 // "}\n",
760 // funname, // 1
761 // idx, // 2
762 // varname, // 3
763 // retval); // 4
764 //
765 // but consider using named variables. Named variables like $foo$, with some
766 // identifier foo, are looked up in the map. One additional feature is that
767 // spaces are accepted between the '$' delimiters, $ foo$ will
768 // substiture to " bar" if foo stands for "bar", but in case it's empty
769 // will substitute to "". Hence, for example,
770 //
771 // Format(vars, "$dllexport $void fun();") -> "void fun();"
772 // "__declspec(export) void fun();"
773 //
774 // which is convenient to prevent double, leading or trailing spaces.
775 class PROTOC_EXPORT Formatter {
776 public:
Formatter(io::Printer * printer)777 explicit Formatter(io::Printer* printer) : printer_(printer) {}
Formatter(io::Printer * printer,const std::map<std::string,std::string> & vars)778 Formatter(io::Printer* printer,
779 const std::map<std::string, std::string>& vars)
780 : printer_(printer), vars_(vars) {}
781
782 template <typename T>
Set(const std::string & key,const T & value)783 void Set(const std::string& key, const T& value) {
784 vars_[key] = ToString(value);
785 }
786
AddMap(const std::map<std::string,std::string> & vars)787 void AddMap(const std::map<std::string, std::string>& vars) {
788 for (const auto& keyval : vars) vars_[keyval.first] = keyval.second;
789 }
790
791 template <typename... Args>
operator()792 void operator()(const char* format, const Args&... args) const {
793 printer_->FormatInternal({ToString(args)...}, vars_, format);
794 }
795
Indent()796 void Indent() const { printer_->Indent(); }
Outdent()797 void Outdent() const { printer_->Outdent(); }
printer()798 io::Printer* printer() const { return printer_; }
799
800 class PROTOC_EXPORT ScopedIndenter {
801 public:
ScopedIndenter(Formatter * format)802 explicit ScopedIndenter(Formatter* format) : format_(format) {
803 format_->Indent();
804 }
~ScopedIndenter()805 ~ScopedIndenter() { format_->Outdent(); }
806
807 private:
808 Formatter* format_;
809 };
810
ScopedIndent()811 PROTOBUF_NODISCARD ScopedIndenter ScopedIndent() {
812 return ScopedIndenter(this);
813 }
814 template <typename... Args>
ScopedIndent(const char * format,const Args &&...args)815 PROTOBUF_NODISCARD ScopedIndenter ScopedIndent(const char* format,
816 const Args&&... args) {
817 (*this)(format, static_cast<Args&&>(args)...);
818 return ScopedIndenter(this);
819 }
820
821 class PROTOC_EXPORT SaveState {
822 public:
SaveState(Formatter * format)823 explicit SaveState(Formatter* format)
824 : format_(format), vars_(format->vars_) {}
~SaveState()825 ~SaveState() { format_->vars_.swap(vars_); }
826
827 private:
828 Formatter* format_;
829 std::map<std::string, std::string> vars_;
830 };
831
832 private:
833 io::Printer* printer_;
834 std::map<std::string, std::string> vars_;
835
836 // Convenience overloads to accept different types as arguments.
ToString(const std::string & s)837 static std::string ToString(const std::string& s) { return s; }
838 template <typename I, typename = typename std::enable_if<
839 std::is_integral<I>::value>::type>
ToString(I x)840 static std::string ToString(I x) {
841 return StrCat(x);
842 }
ToString(strings::Hex x)843 static std::string ToString(strings::Hex x) { return StrCat(x); }
ToString(const FieldDescriptor * d)844 static std::string ToString(const FieldDescriptor* d) { return Payload(d); }
ToString(const Descriptor * d)845 static std::string ToString(const Descriptor* d) { return Payload(d); }
ToString(const EnumDescriptor * d)846 static std::string ToString(const EnumDescriptor* d) { return Payload(d); }
ToString(const EnumValueDescriptor * d)847 static std::string ToString(const EnumValueDescriptor* d) {
848 return Payload(d);
849 }
ToString(const OneofDescriptor * d)850 static std::string ToString(const OneofDescriptor* d) { return Payload(d); }
851
852 template <typename Descriptor>
Payload(const Descriptor * descriptor)853 static std::string Payload(const Descriptor* descriptor) {
854 std::vector<int> path;
855 descriptor->GetLocationPath(&path);
856 GeneratedCodeInfo::Annotation annotation;
857 for (int index : path) {
858 annotation.add_path(index);
859 }
860 annotation.set_source_file(descriptor->file()->name());
861 return annotation.SerializeAsString();
862 }
863 };
864
865 template <class T>
PrintFieldComment(const Formatter & format,const T * field)866 void PrintFieldComment(const Formatter& format, const T* field) {
867 // Print the field's (or oneof's) proto-syntax definition as a comment.
868 // We don't want to print group bodies so we cut off after the first
869 // line.
870 DebugStringOptions options;
871 options.elide_group_body = true;
872 options.elide_oneof_body = true;
873 std::string def = field->DebugStringWithOptions(options);
874 format("// $1$\n", def.substr(0, def.find_first_of('\n')));
875 }
876
877 class PROTOC_EXPORT NamespaceOpener {
878 public:
NamespaceOpener(const Formatter & format)879 explicit NamespaceOpener(const Formatter& format)
880 : printer_(format.printer()) {}
NamespaceOpener(const std::string & name,const Formatter & format)881 NamespaceOpener(const std::string& name, const Formatter& format)
882 : NamespaceOpener(format) {
883 ChangeTo(name);
884 }
~NamespaceOpener()885 ~NamespaceOpener() { ChangeTo(""); }
886
ChangeTo(const std::string & name)887 void ChangeTo(const std::string& name) {
888 std::vector<std::string> new_stack_ =
889 Split(name, "::", true);
890 size_t len = std::min(name_stack_.size(), new_stack_.size());
891 size_t common_idx = 0;
892 while (common_idx < len) {
893 if (name_stack_[common_idx] != new_stack_[common_idx]) break;
894 common_idx++;
895 }
896 for (auto it = name_stack_.crbegin();
897 it != name_stack_.crend() - common_idx; ++it) {
898 if (*it == "PROTOBUF_NAMESPACE_ID") {
899 printer_->Print("PROTOBUF_NAMESPACE_CLOSE\n");
900 } else {
901 printer_->Print("} // namespace $ns$\n", "ns", *it);
902 }
903 }
904 name_stack_.swap(new_stack_);
905 for (size_t i = common_idx; i < name_stack_.size(); ++i) {
906 if (name_stack_[i] == "PROTOBUF_NAMESPACE_ID") {
907 printer_->Print("PROTOBUF_NAMESPACE_OPEN\n");
908 } else {
909 printer_->Print("namespace $ns$ {\n", "ns", name_stack_[i]);
910 }
911 }
912 }
913
914 private:
915 io::Printer* printer_;
916 std::vector<std::string> name_stack_;
917 };
918
919 enum class Utf8CheckMode {
920 kStrict = 0, // Parsing will fail if non UTF-8 data is in string fields.
921 kVerify = 1, // Only log an error but parsing will succeed.
922 kNone = 2, // No UTF-8 check.
923 };
924
925 Utf8CheckMode GetUtf8CheckMode(const FieldDescriptor* field,
926 const Options& options);
927
928 void GenerateUtf8CheckCodeForString(const FieldDescriptor* field,
929 const Options& options, bool for_parse,
930 const char* parameters,
931 const Formatter& format);
932
933 void GenerateUtf8CheckCodeForCord(const FieldDescriptor* field,
934 const Options& options, bool for_parse,
935 const char* parameters,
936 const Formatter& format);
937
938 template <typename T>
939 struct FieldRangeImpl {
940 struct Iterator {
941 using iterator_category = std::forward_iterator_tag;
942 using value_type = const FieldDescriptor*;
943 using difference_type = int;
944
945 value_type operator*() { return descriptor->field(idx); }
946
947 friend bool operator==(const Iterator& a, const Iterator& b) {
948 GOOGLE_DCHECK(a.descriptor == b.descriptor);
949 return a.idx == b.idx;
950 }
951 friend bool operator!=(const Iterator& a, const Iterator& b) {
952 return !(a == b);
953 }
954
955 Iterator& operator++() {
956 idx++;
957 return *this;
958 }
959
960 int idx;
961 const T* descriptor;
962 };
963
beginFieldRangeImpl964 Iterator begin() const { return {0, descriptor}; }
endFieldRangeImpl965 Iterator end() const { return {descriptor->field_count(), descriptor}; }
966
967 const T* descriptor;
968 };
969
970 template <typename T>
FieldRange(const T * desc)971 FieldRangeImpl<T> FieldRange(const T* desc) {
972 return {desc};
973 }
974
975 struct OneOfRangeImpl {
976 struct Iterator {
977 using iterator_category = std::forward_iterator_tag;
978 using value_type = const OneofDescriptor*;
979 using difference_type = int;
980
981 value_type operator*() { return descriptor->oneof_decl(idx); }
982
983 friend bool operator==(const Iterator& a, const Iterator& b) {
984 GOOGLE_DCHECK(a.descriptor == b.descriptor);
985 return a.idx == b.idx;
986 }
987 friend bool operator!=(const Iterator& a, const Iterator& b) {
988 return !(a == b);
989 }
990
991 Iterator& operator++() {
992 idx++;
993 return *this;
994 }
995
996 int idx;
997 const Descriptor* descriptor;
998 };
999
beginOneOfRangeImpl1000 Iterator begin() const { return {0, descriptor}; }
endOneOfRangeImpl1001 Iterator end() const {
1002 return {descriptor->real_oneof_decl_count(), descriptor};
1003 }
1004
1005 const Descriptor* descriptor;
1006 };
1007
OneOfRange(const Descriptor * desc)1008 inline OneOfRangeImpl OneOfRange(const Descriptor* desc) { return {desc}; }
1009
1010 PROTOC_EXPORT std::string StripProto(const std::string& filename);
1011
1012 bool EnableMessageOwnedArena(const Descriptor* desc, const Options& options);
1013
1014 bool ShouldVerify(const Descriptor* descriptor, const Options& options,
1015 MessageSCCAnalyzer* scc_analyzer);
1016 bool ShouldVerify(const FileDescriptor* file, const Options& options,
1017 MessageSCCAnalyzer* scc_analyzer);
1018 } // namespace cpp
1019 } // namespace compiler
1020 } // namespace protobuf
1021 } // namespace google
1022
1023 #include <google/protobuf/port_undef.inc>
1024
1025 #endif // GOOGLE_PROTOBUF_COMPILER_CPP_HELPERS_H__
1026