• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2024 Google LLC.  All rights reserved.
3 //
4 // Use of this source code is governed by a BSD-style
5 // license that can be found in the LICENSE file or at
6 // https://developers.google.com/open-source/licenses/bsd
7 
8 // Author: kenton@google.com (Kenton Varda)
9 //  Based on original Protocol Buffers design by
10 //  Sanjay Ghemawat, Jeff Dean, and others.
11 //
12 // Utility class for writing text to a ZeroCopyOutputStream.
13 
14 #ifndef GOOGLE_PROTOBUF_IO_PRINTER_H__
15 #define GOOGLE_PROTOBUF_IO_PRINTER_H__
16 
17 #include <cstddef>
18 #include <functional>
19 #include <initializer_list>
20 #include <string>
21 #include <type_traits>
22 #include <utility>
23 #include <vector>
24 
25 #include "absl/cleanup/cleanup.h"
26 #include "absl/container/flat_hash_map.h"
27 #include "absl/functional/any_invocable.h"
28 #include "absl/functional/function_ref.h"
29 #include "absl/log/absl_check.h"
30 #include "absl/meta/type_traits.h"
31 #include "absl/strings/str_cat.h"
32 #include "absl/strings/str_format.h"
33 #include "absl/strings/string_view.h"
34 #include "absl/types/optional.h"
35 #include "absl/types/variant.h"
36 #include "google/protobuf/io/zero_copy_sink.h"
37 
38 
39 // Must be included last.
40 #include "google/protobuf/port_def.inc"
41 
42 namespace google {
43 namespace protobuf {
44 namespace io {
45 
46 // Records annotations about a Printer's output.
47 class PROTOBUF_EXPORT AnnotationCollector {
48  public:
49   // Annotation is a offset range and a payload pair. This payload's layout is
50   // specific to derived types of AnnotationCollector.
51   using Annotation = std::pair<std::pair<size_t, size_t>, std::string>;
52 
53   // The semantic meaning of an annotation. This enum mirrors
54   // google.protobuf.GeneratedCodeInfo.Annotation.Semantic, and the enumerator values
55   // should match it.
56   enum Semantic {
57     kNone = 0,
58     kSet = 1,
59     kAlias = 2,
60   };
61 
62   virtual ~AnnotationCollector() = default;
63 
64   // Records that the bytes in file_path beginning with begin_offset and ending
65   // before end_offset are associated with the SourceCodeInfo-style path.
66   virtual void AddAnnotation(size_t begin_offset, size_t end_offset,
67                              const std::string& file_path,
68                              const std::vector<int>& path) = 0;
69 
AddAnnotation(size_t begin_offset,size_t end_offset,const std::string & file_path,const std::vector<int> & path,absl::optional<Semantic> semantic)70   virtual void AddAnnotation(size_t begin_offset, size_t end_offset,
71                              const std::string& file_path,
72                              const std::vector<int>& path,
73                              absl::optional<Semantic> semantic) {
74     AddAnnotation(begin_offset, end_offset, file_path, path);
75   }
76 
77   // TODO I don't see why we need virtuals here. Just a vector of
78   // range, payload pairs stored in a context should suffice.
AddAnnotationNew(Annotation &)79   virtual void AddAnnotationNew(Annotation&) {}
80 };
81 
82 // Records annotations about a Printer's output to a Protobuf message,
83 // assuming that it has a repeated submessage field named `annotation` with
84 // fields matching
85 //
86 // message ??? {
87 //   repeated int32 path = 1;
88 //   optional string source_file = 2;
89 //   optional int32 begin = 3;
90 //   optional int32 end = 4;
91 //   optional int32 semantic = 5;
92 // }
93 template <typename AnnotationProto>
94 class AnnotationProtoCollector : public AnnotationCollector {
95  private:
96   // Some users of this type use it with a proto that does not have a
97   // "semantic" field. Therefore, we need to detect it with SFINAE.
98 
99   // go/ranked-overloads
100   struct Rank0 {};
101   struct Rank1 : Rank0 {};
102 
103   template <typename Proto>
104   static auto SetSemantic(Proto* p, int semantic, Rank1)
105       -> decltype(p->set_semantic(
106           static_cast<typename Proto::Semantic>(semantic))) {
107     return p->set_semantic(static_cast<typename Proto::Semantic>(semantic));
108   }
109 
110   template <typename Proto>
SetSemantic(Proto *,int,Rank0)111   static void SetSemantic(Proto*, int, Rank0) {}
112 
113  public:
AnnotationProtoCollector(AnnotationProto * annotation_proto)114   explicit AnnotationProtoCollector(AnnotationProto* annotation_proto)
115       : annotation_proto_(annotation_proto) {}
116 
AddAnnotation(size_t begin_offset,size_t end_offset,const std::string & file_path,const std::vector<int> & path)117   void AddAnnotation(size_t begin_offset, size_t end_offset,
118                      const std::string& file_path,
119                      const std::vector<int>& path) override {
120     AddAnnotation(begin_offset, end_offset, file_path, path, absl::nullopt);
121   }
122 
AddAnnotation(size_t begin_offset,size_t end_offset,const std::string & file_path,const std::vector<int> & path,absl::optional<Semantic> semantic)123   void AddAnnotation(size_t begin_offset, size_t end_offset,
124                      const std::string& file_path, const std::vector<int>& path,
125                      absl::optional<Semantic> semantic) override {
126     auto* annotation = annotation_proto_->add_annotation();
127     for (int i = 0; i < path.size(); ++i) {
128       annotation->add_path(path[i]);
129     }
130     annotation->set_source_file(file_path);
131     annotation->set_begin(begin_offset);
132     annotation->set_end(end_offset);
133 
134     if (semantic.has_value()) {
135       SetSemantic(annotation, *semantic, Rank1{});
136     }
137   }
138 
AddAnnotationNew(Annotation & a)139   void AddAnnotationNew(Annotation& a) override {
140     auto* annotation = annotation_proto_->add_annotation();
141     annotation->ParseFromString(a.second);
142     annotation->set_begin(a.first.first);
143     annotation->set_end(a.first.second);
144   }
145 
146  private:
147   AnnotationProto* annotation_proto_;
148 };
149 
150 // A source code printer for assisting in code generation.
151 //
152 // This type implements a simple templating language for substituting variables
153 // into static, user-provided strings, and also tracks indentation
154 // automatically.
155 //
156 // The main entry-point for this type is the Emit function, which can be used
157 // as thus:
158 //
159 //   Printer p(output);
160 //   p.Emit({{"class", my_class_name}}, R"cc(
161 //     class $class$ {
162 //      public:
163 //       $class$(int x) : x_(x) {}
164 //      private:
165 //       int x_;
166 //     };
167 //   )cc");
168 //
169 // Substitutions are of the form $var$, which is looked up in the map passed in
170 // as the first argument. The variable delimiter character, $, can be chosen to
171 // be something convenient for the target language. For example, in PHP, which
172 // makes heavy use of $, it can be made into something like # instead.
173 //
174 // A literal $ can be emitted by writing $$.
175 //
176 // Substitutions may contain spaces around the name of the variable, which will
177 // be ignored for the purposes of looking up the variable to substitute in, but
178 // which will be reproduced in the output:
179 //
180 //   p.Emit({{"foo", "bar"}}, "$ foo $");
181 //
182 // emits the string " bar ". If the substituted-in variable is the empty string,
183 // then the surrounding spaces are *not* printed:
184 //
185 //   p.Emit({{"xyz", xyz}}, "$xyz $Thing");
186 //
187 // If xyz is "Foo", this will become "Foo Thing", but if it is "", this becomes
188 // "Thing", rather than " Thing". This helps minimize awkward whitespace in the
189 // output.
190 //
191 // The value may be any type that can be stringified with `absl::StrCat`:
192 //
193 //   p.Emit({{"num", 5}}, "x = $num$;");
194 //
195 // If a variable that is referenced in the format string is missing, the program
196 // will crash. Callers must statically know that every variable reference is
197 // valid, and MUST NOT pass user-provided strings directly into Emit().
198 //
199 // In practice, this means the first member of io::Printer::Sub here:
200 //
201 //   p.Emit({{"num", 5}}, "x = $num$;");
202 //            ^
203 // must always be a string literal.
204 //
205 // Substitutions can be configured to "chomp" a single character after them, to
206 // help make indentation work out. This can be configured by passing a
207 // io::Printer::Sub().WithSuffix() into Emit's substitution map:
208 //   p.Emit({io::Printer::Sub("var", var_decl).WithSuffix(";")}, R"cc(
209 //     class $class$ {
210 //      public:
211 //       $var$;
212 //     };
213 //   )cc");
214 //
215 // This will delete the ; after $var$, regardless of whether it was an empty
216 // declaration or not. It will also intelligently attempt to clean up
217 // empty lines that follow, if it was on an empty line; this promotes cleaner
218 // formatting of the output.
219 //
220 // You can configure a large set of skippable characters, but when chomping,
221 // only one character will actually be skipped at a time. For example, callback
222 // substitutions (see below) use ";," by default as their "chomping set".
223 //
224 //   p.Emit({io::Printer::Sub("var", 123).WithSuffix(";,")}, R"cc(
225 //       $var$,;
226 //   )cc");
227 //
228 // will produce "123,".
229 //
230 // # Callback Substitution
231 //
232 // Instead of passing a string into Emit(), it is possible to pass in a callback
233 // as a variable mapping. This will take indentation into account, which allows
234 // factoring out parts of a formatting string while ensuring braces are
235 // balanced:
236 //
237 //   p.Emit(
238 //     {{"methods", [&] {
239 //       p.Emit(R"cc(
240 //         int Bar() {
241 //            return 42;
242 //         }
243 //       )cc");
244 //     }}},
245 //     R"cc(
246 //       class Foo {
247 //        public:
248 //         $methods$;
249 //       };
250 //     )cc"
251 //   );
252 //
253 // This emits
254 //
255 //   class Foo {
256 //    public:
257 //     int Bar() {
258 //       return 42;
259 //     }
260 //   };
261 //
262 // # Comments
263 //
264 // It may be desirable to place comments in a raw string that are stripped out
265 // before printing. The prefix for Printer-ignored comments can be configured
266 // in Options. By default, this is `//~`.
267 //
268 //   p.Emit(R"cc(
269 //     // Will be printed in the output.
270 //     //~ Won't be.
271 //   )cc");
272 //
273 // # Lookup Frames
274 //
275 // If many calls to Emit() use the same set of variables, they can be stored
276 // in a *variable lookup frame*, like so:
277 //
278 //   auto vars = p.WithVars({{"class_name", my_class_name}});
279 //   p.Emit(R"cc(
280 //     class $class_name$ {
281 //      public:
282 //       $class_name$(int x);
283 //       // Etc.
284 //     };
285 //   )cc");
286 //
287 // WithVars() returns an RAII object that will "pop" the lookup frame on scope
288 // exit, ensuring that the variables remain local. There are a few different
289 // overloads of WithVars(); it accepts a map type, like absl::flat_hash_map,
290 // either by-value (which will cause the Printer to store a copy), or by
291 // pointer (which will cause the Printer to store a pointer, potentially
292 // avoiding a copy.)
293 //
294 // p.Emit(vars, "..."); is effectively syntax sugar for
295 //
296 //  { auto v = p.WithVars(vars); p.Emit("..."); }
297 //
298 // NOTE: callbacks are *not* allowed with WithVars; callbacks should be local
299 // to a specific Emit() call.
300 //
301 // # Annotations
302 //
303 // If Printer is given an AnnotationCollector, it will use it to record which
304 // spans of generated code correspond to user-indicated descriptors. There are
305 // a few different ways of indicating when to emit annotations.
306 //
307 // The WithAnnotations() function is like WithVars(), but accepts maps with
308 // string keys and descriptor values. It adds an annotation variable frame and
309 // returns an RAII object that pops the frame.
310 //
311 // There are two different ways to annotate code. In the first, when
312 // substituting a variable, if there is an annotation with the same name, then
313 // the resulting expanded value's span will be annotated with that annotation.
314 // For example:
315 //
316 //   auto v = p.WithVars({{"class_name", my_class_name}});
317 //   auto a = p.WithAnnotations({{"class_name", message_descriptor}});
318 //   p.Emit(R"cc(
319 //     class $class_name$ {
320 //      public:
321 //       $class_name$(int x);
322 //       // Etc.
323 //     };
324 //   )cc");
325 //
326 // The span corresponding to whatever $class_name$ expands to will be annotated
327 // as having come from message_descriptor.
328 //
329 // For convenience, this can be done with a single WithVars(), using the special
330 // three-argument form:
331 //
332 //   auto v = p.WithVars({{"class_name", my_class_name, message_descriptor}});
333 //   p.Emit(R"cc(
334 //     class $class_name$ {
335 //      public:
336 //       $class_name$(int x);
337 //       // Etc.
338 //     };
339 //   )cc");
340 //
341 //
342 // Alternatively, a range may be given explicitly:
343 //
344 //   auto a = p.WithAnnotations({{"my_desc", message_descriptor}});
345 //   p.Emit(R"cc(
346 //     $_start$my_desc$
347 //     class Foo {
348 //       // Etc.
349 //     };
350 //     $_end$my_desc$
351 //   )cc");
352 //
353 // The special $_start$ and $_end$ variables indicate the start and end of an
354 // annotated span, which is annotated with the variable that follows. This
355 // form can produce somewhat unreadable format strings and is not recommended.
356 //
357 // Note that whitespace after a $_start$ and before an $_end$ is not printed.
358 //
359 // # Indentation
360 //
361 // Printer tracks an indentation amount to add to each new line, independent
362 // from indentation in an Emit() call's literal. The amount of indentation to
363 // add is controlled by the WithIndent() function:
364 //
365 //   p.Emit("class $class_name$ {");
366 //   {
367 //     auto indent = p.WithIndent();
368 //     p.Emit(R"cc(
369 //       public:
370 //        $class_name$(int x);
371 //     )cc");
372 //   }
373 //   p.Emit("};");
374 //
375 // This will automatically add one level of indentation to all code in scope of
376 // `indent`, which is an RAII object much like the return value of `WithVars()`.
377 //
378 // # Old API
379 // TODO: Delete this documentation.
380 //
381 // Printer supports an older-style API that is in the process of being
382 // re-written. The old documentation is reproduced here until all use-cases are
383 // handled.
384 //
385 // This simple utility class assists in code generation.  It basically
386 // allows the caller to define a set of variables and then output some
387 // text with variable substitutions.  Example usage:
388 //
389 //   Printer printer(output, '$');
390 //   map<string, string> vars;
391 //   vars["name"] = "Bob";
392 //   printer.Print(vars, "My name is $name$.");
393 //
394 // The above writes "My name is Bob." to the output stream.
395 //
396 // Printer aggressively enforces correct usage, crashing (with assert failures)
397 // in the case of undefined variables in debug builds. This helps greatly in
398 // debugging code which uses it.
399 //
400 // If a Printer is constructed with an AnnotationCollector, it will provide it
401 // with annotations that connect the Printer's output to paths that can identify
402 // various descriptors.  In the above example, if person_ is a descriptor that
403 // identifies Bob, we can associate the output string "My name is Bob." with
404 // a source path pointing to that descriptor with:
405 //
406 //   printer.Annotate("name", person_);
407 //
408 // The AnnotationCollector will be sent an annotation linking the output range
409 // covering "Bob" to the logical path provided by person_.  Tools may use
410 // this association to (for example) link "Bob" in the output back to the
411 // source file that defined the person_ descriptor identifying Bob.
412 //
413 // Annotate can only examine variables substituted during the last call to
414 // Print.  It is invalid to refer to a variable that was used multiple times
415 // in a single Print call.
416 //
417 // In full generality, one may specify a range of output text using a beginning
418 // substitution variable and an ending variable.  The resulting annotation will
419 // span from the first character of the substituted value for the beginning
420 // variable to the last character of the substituted value for the ending
421 // variable.  For example, the Annotate call above is equivalent to this one:
422 //
423 //   printer.Annotate("name", "name", person_);
424 //
425 // This is useful if multiple variables combine to form a single span of output
426 // that should be annotated with the same source path.  For example:
427 //
428 //   Printer printer(output, '$');
429 //   map<string, string> vars;
430 //   vars["first"] = "Alice";
431 //   vars["last"] = "Smith";
432 //   printer.Print(vars, "My name is $first$ $last$.");
433 //   printer.Annotate("first", "last", person_);
434 //
435 // This code would associate the span covering "Alice Smith" in the output with
436 // the person_ descriptor.
437 //
438 // Note that the beginning variable must come before (or overlap with, in the
439 // case of zero-sized substitution values) the ending variable.
440 //
441 // It is also sometimes useful to use variables with zero-sized values as
442 // markers.  This avoids issues with multiple references to the same variable
443 // and also allows annotation ranges to span literal text from the Print
444 // templates:
445 //
446 //   Printer printer(output, '$');
447 //   map<string, string> vars;
448 //   vars["foo"] = "bar";
449 //   vars["function"] = "call";
450 //   vars["mark"] = "";
451 //   printer.Print(vars, "$function$($foo$,$foo$)$mark$");
452 //   printer.Annotate("function", "mark", call_);
453 //
454 // This code associates the span covering "call(bar,bar)" in the output with the
455 // call_ descriptor.
456 class PROTOBUF_EXPORT Printer {
457  private:
458   struct AnnotationRecord;
459 
460  public:
461   // This type exists to work around an absl type that has not yet been
462   // released.
463   struct SourceLocation {
currentSourceLocation464     static SourceLocation current() { return {}; }
file_nameSourceLocation465     absl::string_view file_name() const { return "<unknown>"; }
lineSourceLocation466     int line() const { return 0; }
467   };
468 
469   static constexpr char kDefaultVariableDelimiter = '$';
470   static constexpr absl::string_view kProtocCodegenTrace =
471       "PROTOC_CODEGEN_TRACE";
472 
473   // Sink type for constructing substitutions to pass to WithVars() and Emit().
474   class Sub;
475 
476   // Options for controlling how the output of a Printer is formatted.
477   struct Options {
478     Options() = default;
479     Options(const Options&) = default;
480     Options(Options&&) = default;
OptionsOptions481     Options(char variable_delimiter, AnnotationCollector* annotation_collector)
482         : variable_delimiter(variable_delimiter),
483           annotation_collector(annotation_collector) {}
484 
485     // The delimiter for variable substitutions, e.g. $foo$.
486     char variable_delimiter = kDefaultVariableDelimiter;
487     // An optional listener the Printer calls whenever it emits a source
488     // annotation; may be null.
489     AnnotationCollector* annotation_collector = nullptr;
490     // The "comment start" token for the language being generated. This is used
491     // to allow the Printer to emit debugging annotations in the source code
492     // output.
493     absl::string_view comment_start = "//";
494     // The token for beginning comments that are discarded by Printer's internal
495     // formatter.
496     absl::string_view ignored_comment_start = "//~";
497     // The number of spaces that a single level of indentation adds by default;
498     // this is the amount that WithIndent() increases indentation by.
499     size_t spaces_per_indent = 2;
500     // Whether to emit a "codegen trace" for calls to Emit(). If true, each call
501     // to Emit() will print a comment indicating where in the source of the
502     // compiler the Emit() call occurred.
503     //
504     // If disengaged, defaults to whether or not the environment variable
505     // `PROTOC_CODEGEN_TRACE` is set.
506     absl::optional<bool> enable_codegen_trace = absl::nullopt;
507   };
508 
509   // Constructs a new Printer with the default options to output to
510   // `output`.
511   explicit Printer(ZeroCopyOutputStream* output);
512 
513   // Constructs a new printer with the given set of options to output to
514   // `output`.
515   Printer(ZeroCopyOutputStream* output, Options options);
516 
517   // Old-style constructor. Avoid in preference to the two constructors above.
518   //
519   // Will eventually be marked as deprecated.
520   Printer(ZeroCopyOutputStream* output, char variable_delimiter,
521           AnnotationCollector* annotation_collector = nullptr);
522 
523   Printer(const Printer&) = delete;
524   Printer& operator=(const Printer&) = delete;
525 
526   // Pushes a new variable lookup frame that stores `vars` by reference.
527   //
528   // Returns an RAII object that pops the lookup frame.
529   template <typename Map>
530   auto WithVars(const Map* vars);
531 
532   // Pushes a new variable lookup frame that stores `vars` by value.
533   //
534   // Returns an RAII object that pops the lookup frame.
535   template <
536       typename Map = absl::flat_hash_map<absl::string_view, absl::string_view>,
537       typename = std::enable_if_t<!std::is_pointer<Map>::value>,
538       // Prefer the more specific span impl if this could be turned into
539       // a span.
540       typename = std::enable_if_t<
541           !std::is_convertible<Map, absl::Span<const Sub>>::value>>
542   auto WithVars(Map&& vars);
543 
544   // Pushes a new variable lookup frame that stores `vars` by value.
545   //
546   // Returns an RAII object that pops the lookup frame.
547   auto WithVars(absl::Span<const Sub> vars);
548 
549   // Looks up a variable set with WithVars().
550   //
551   // Will crash if:
552   // - `var` is not present in the lookup frame table.
553   // - `var` is a callback, rather than a string.
554   absl::string_view LookupVar(absl::string_view var);
555 
556   // Pushes a new annotation lookup frame that stores `vars` by reference.
557   //
558   // Returns an RAII object that pops the lookup frame.
559   template <typename Map>
560   auto WithAnnotations(const Map* vars);
561 
562   // Pushes a new variable lookup frame that stores `vars` by value.
563   //
564   // When writing `WithAnnotations({...})`, this is the overload that will be
565   // called, and it will synthesize an `absl::flat_hash_map`.
566   //
567   // Returns an RAII object that pops the lookup frame.
568   template <typename Map = absl::flat_hash_map<std::string, AnnotationRecord>>
569   auto WithAnnotations(Map&& vars);
570 
571   // Increases the indentation by `indent` spaces; when nullopt, increments
572   // indentation by the configured default spaces_per_indent.
573   //
574   // Returns an RAII object that removes this indentation.
575   auto WithIndent(absl::optional<size_t> indent = absl::nullopt) {
576     size_t delta = indent.value_or(options_.spaces_per_indent);
577     indent_ += delta;
578     return absl::MakeCleanup([this, delta] { indent_ -= delta; });
579   }
580 
581   // Emits formatted source code to the underlying output. See the class
582   // documentation for more details.
583   //
584   // `format` MUST be a string constant.
585   void Emit(absl::string_view format,
586             SourceLocation loc = SourceLocation::current());
587 
588   // Emits formatted source code to the underlying output, injecting
589   // additional variables as a lookup frame for just this call. See the class
590   // documentation for more details.
591   //
592   // `format` MUST be a string constant.
593   void Emit(absl::Span<const Sub> vars, absl::string_view format,
594             SourceLocation loc = SourceLocation::current());
595 
596   // Write a string directly to the underlying output, performing no formatting
597   // of any sort.
PrintRaw(absl::string_view data)598   void PrintRaw(absl::string_view data) { WriteRaw(data.data(), data.size()); }
599 
600   // Write a string directly to the underlying output, performing no formatting
601   // of any sort.
602   void WriteRaw(const char* data, size_t size);
603 
604   // True if any write to the underlying stream failed.  (We don't just
605   // crash in this case because this is an I/O failure, not a programming
606   // error.)
failed()607   bool failed() const { return failed_; }
608 
609   // -- Old-style API below; to be deprecated and removed. --
610   // TODO: Deprecate these APIs.
611 
612   template <
613       typename Map = absl::flat_hash_map<absl::string_view, absl::string_view>>
614   void Print(const Map& vars, absl::string_view text);
615 
616   template <typename... Args>
617   void Print(absl::string_view text, const Args&... args);
618 
619   // Link a substitution variable emitted by the last call to Print to the
620   // object described by descriptor.
621   template <typename SomeDescriptor>
622   void Annotate(
623       absl::string_view varname, const SomeDescriptor* descriptor,
624       absl::optional<AnnotationCollector::Semantic> semantic = absl::nullopt) {
625     Annotate(varname, varname, descriptor, semantic);
626   }
627 
628   // Link the output range defined by the substitution variables as emitted by
629   // the last call to Print to the object described by descriptor. The range
630   // begins at begin_varname's value and ends after the last character of the
631   // value substituted for end_varname.
632   template <typename Desc>
633   void Annotate(
634       absl::string_view begin_varname, absl::string_view end_varname,
635       const Desc* descriptor,
636       absl::optional<AnnotationCollector::Semantic> semantic = absl::nullopt);
637 
638   // Link a substitution variable emitted by the last call to Print to the file
639   // with path file_name.
640   void Annotate(
641       absl::string_view varname, absl::string_view file_name,
642       absl::optional<AnnotationCollector::Semantic> semantic = absl::nullopt) {
643     Annotate(varname, varname, file_name, semantic);
644   }
645 
646   // Link the output range defined by the substitution variables as emitted by
647   // the last call to Print to the file with path file_name. The range begins
648   // at begin_varname's value and ends after the last character of the value
649   // substituted for end_varname.
650   void Annotate(
651       absl::string_view begin_varname, absl::string_view end_varname,
652       absl::string_view file_name,
653       absl::optional<AnnotationCollector::Semantic> semantic = absl::nullopt) {
654     if (options_.annotation_collector == nullptr) {
655       return;
656     }
657 
658     Annotate(begin_varname, end_varname, file_name, {}, semantic);
659   }
660 
661   // Indent text by `options.spaces_per_indent`; undone by Outdent().
Indent()662   void Indent() { indent_ += options_.spaces_per_indent; }
663 
664   // Undoes a call to Indent().
665   void Outdent();
666 
667   // FormatInternal is a helper function not meant to use directly, use
668   // compiler::cpp::Formatter instead.
669   template <typename Map = absl::flat_hash_map<std::string, std::string>>
670   void FormatInternal(absl::Span<const std::string> args, const Map& vars,
671                       absl::string_view format);
672 
673   // Injects a substitution listener for the lifetime of the RAII object
674   // returned.
675   // While the listener is active it will receive a callback on each
676   // substitution label found.
677   // This can be used to add basic verification on top of emit routines.
WithSubstitutionListener(absl::AnyInvocable<void (absl::string_view,SourceLocation)> listener)678   auto WithSubstitutionListener(
679       absl::AnyInvocable<void(absl::string_view, SourceLocation)> listener) {
680     ABSL_CHECK(substitution_listener_ == nullptr);
681     substitution_listener_ = std::move(listener);
682     return absl::MakeCleanup([this] { substitution_listener_ = nullptr; });
683   }
684 
685  private:
686   struct PrintOptions;
687   struct Format;
688 
689   // Helper type for wrapping a variable substitution expansion result.
690   template <bool owned>
691   struct ValueImpl;
692 
693   using ValueView = ValueImpl</*owned=*/false>;
694   using Value = ValueImpl</*owned=*/true>;
695 
696   // Provide a helper to use heterogeneous lookup when it's available.
697   template <typename...>
698   using Void = void;
699 
700   template <typename Map, typename = void>
701   struct HasHeteroLookup : std::false_type {};
702   template <typename Map>
703   struct HasHeteroLookup<Map, Void<decltype(std::declval<Map>().find(
704                                   std::declval<absl::string_view>()))>>
705       : std::true_type {};
706 
707   template <typename Map,
708             typename = std::enable_if_t<HasHeteroLookup<Map>::value>>
709   static absl::string_view ToStringKey(absl::string_view x) {
710     return x;
711   }
712 
713   template <typename Map,
714             typename = std::enable_if_t<!HasHeteroLookup<Map>::value>>
715   static std::string ToStringKey(absl::string_view x) {
716     return std::string(x);
717   }
718 
719   Format TokenizeFormat(absl::string_view format_string,
720                         const PrintOptions& options);
721 
722   // Emit an annotation for the range defined by the given substitution
723   // variables, as set by the most recent call to PrintImpl() that set
724   // `use_substitution_map` to true.
725   //
726   // The range begins at the start of `begin_varname`'s value and ends after the
727   // last byte of `end_varname`'s value.
728   //
729   // `begin_varname` and `end_varname may` refer to the same variable.
730   void Annotate(absl::string_view begin_varname, absl::string_view end_varname,
731                 absl::string_view file_path, const std::vector<int>& path,
732                 absl::optional<AnnotationCollector::Semantic> semantic);
733 
734   // The core printing implementation. There are three public entry points,
735   // which enable different slices of functionality that are controlled by the
736   // `opts` argument.
737   void PrintImpl(absl::string_view format, absl::Span<const std::string> args,
738                  PrintOptions opts);
739 
740   // This is a private function only so that it can see PrintOptions.
741   static bool Validate(bool cond, PrintOptions opts,
742                        absl::FunctionRef<std::string()> message);
743   static bool Validate(bool cond, PrintOptions opts, absl::string_view message);
744 
745   // Performs calls to `Validate()` to check that `index < current_arg_index`
746   // and `index < args_len`, producing appropriate log lines if the checks fail,
747   // and crashing if necessary.
748   bool ValidateIndexLookupInBounds(size_t index, size_t current_arg_index,
749                                    size_t args_len, PrintOptions opts);
750 
751   // Prints indentation if `at_start_of_line_` is true.
752   void IndentIfAtStart();
753 
754   // Prints a codegen trace, for the given location in the compiler's source.
755   void PrintCodegenTrace(absl::optional<SourceLocation> loc);
756 
757   // The core implementation for "fully-elaborated" variable definitions.
758   auto WithDefs(absl::Span<const Sub> vars, bool allow_callbacks);
759 
760   // Returns the start and end of the value that was substituted in place of
761   // the variable `varname` in the last call to PrintImpl() (with
762   // `use_substitution_map` set), if such a variable was substituted exactly
763   // once.
764   absl::optional<std::pair<size_t, size_t>> GetSubstitutionRange(
765       absl::string_view varname, PrintOptions opts);
766 
767   google::protobuf::io::zc_sink_internal::ZeroCopyStreamByteSink sink_;
768   Options options_;
769   size_t indent_ = 0;
770   bool at_start_of_line_ = true;
771   bool failed_ = false;
772 
773   size_t paren_depth_ = 0;
774   std::vector<size_t> paren_depth_to_omit_;
775 
776   std::vector<std::function<absl::optional<ValueView>(absl::string_view)>>
777       var_lookups_;
778 
779   std::vector<
780       std::function<absl::optional<AnnotationRecord>(absl::string_view)>>
781       annotation_lookups_;
782 
783   // If set, we invoke this when we do a label substitution. This can be used to
784   // verify consistency of the generated code while we generate it.
785   absl::AnyInvocable<void(absl::string_view, SourceLocation)>
786       substitution_listener_;
787 
788   // A map from variable name to [start, end) offsets in the output buffer.
789   //
790   // This stores the data looked up by GetSubstitutionRange().
791   absl::flat_hash_map<std::string, std::pair<size_t, size_t>> substitutions_;
792   // Keeps track of the keys in `substitutions_` that need to be updated when
793   // indents are inserted. These are keys that refer to the beginning of the
794   // current line.
795   std::vector<std::string> line_start_variables_;
796 };
797 
798 // Options for PrintImpl().
799 struct Printer::PrintOptions {
800   // The callsite of the public entry-point. Only Emit() sets this.
801   absl::optional<SourceLocation> loc;
802   // If set, Validate() calls will not crash the program.
803   bool checks_are_debug_only = false;
804   // If set, the `substitutions_` map will be populated as variables are
805   // substituted.
806   bool use_substitution_map = false;
807   // If set, the ${1$ and $}$ forms will be substituted. These are used for
808   // a slightly janky annotation-insertion mechanism in FormatInternal, that
809   // requires that passed-in substitution variables be serialized protos.
810   bool use_curly_brace_substitutions = false;
811   // If set, the $n$ forms will be substituted, pulling from the `args`
812   // argument to PrintImpl().
813   bool allow_digit_substitutions = true;
814   // If set, when a variable substitution with spaces in it, such as $ var$,
815   // is encountered, the spaces are stripped, so that it is as if it was
816   // $var$. If $var$ substitutes to a non-empty string, the removed spaces are
817   // printed around the substituted value.
818   //
819   // See the class documentation for more information on this behavior.
820   bool strip_spaces_around_vars = true;
821   // If set, leading whitespace will be stripped from the format string to
822   // determine the "extraneous indentation" that is produced when the format
823   // string is a C++ raw string. This is used to remove leading spaces from
824   // a raw string that would otherwise result in erratic indentation in the
825   // output.
826   bool strip_raw_string_indentation = false;
827   // If set, the annotation lookup frames are searched, per the annotation
828   // semantics of Emit() described in the class documentation.
829   bool use_annotation_frames = true;
830 };
831 
832 // Helper type for wrapping a variable substitution expansion result.
833 template <bool owned>
834 struct Printer::ValueImpl {
835  private:
836   template <typename T>
837   struct IsSubImpl : std::false_type {};
838   template <bool a>
839   struct IsSubImpl<ValueImpl<a>> : std::true_type {};
840 
841  public:
842   using StringType = std::conditional_t<owned, std::string, absl::string_view>;
843   // These callbacks return false if this is a recursive call.
844   using Callback = std::function<bool()>;
845   using StringOrCallback = absl::variant<StringType, Callback>;
846 
847   ValueImpl() = default;
848 
849   // This is a template to avoid colliding with the copy constructor below.
850   template <typename Value,
851             typename = std::enable_if_t<
852                 !IsSubImpl<absl::remove_cvref_t<Value>>::value>>
853   ValueImpl(Value&& value)  // NOLINT
854       : value(ToStringOrCallback(std::forward<Value>(value), Rank2{})) {
855     if (absl::holds_alternative<Callback>(this->value)) {
856       consume_after = ";,";
857     }
858   }
859 
860   // Copy ctor/assign allow interconversion of the two template parameters.
861   template <bool that_owned>
862   ValueImpl(const ValueImpl<that_owned>& that) {  // NOLINT
863     *this = that;
864   }
865 
866   template <bool that_owned>
867   ValueImpl& operator=(const ValueImpl<that_owned>& that);
868 
869   const StringType* AsString() const {
870     return absl::get_if<StringType>(&value);
871   }
872 
873   const Callback* AsCallback() const { return absl::get_if<Callback>(&value); }
874 
875   StringOrCallback value;
876   std::string consume_after;
877   bool consume_parens_if_empty = false;
878 
879  private:
880   // go/ranked-overloads
881   struct Rank0 {};
882   struct Rank1 : Rank0 {};
883   struct Rank2 : Rank1 {};
884 
885   // Dummy template for delayed instantiation, which is required for the
886   // static assert below to kick in only when this function is called when it
887   // shouldn't.
888   //
889   // This is done to produce a better error message than the "candidate does
890   // not match" SFINAE errors.
891   template <typename Cb, typename = decltype(std::declval<Cb&&>()())>
892   StringOrCallback ToStringOrCallback(Cb&& cb, Rank2);
893 
894   // Separate from the AlphaNum overload to avoid copies when taking strings
895   // by value when in `owned` mode.
896   StringOrCallback ToStringOrCallback(StringType s, Rank1) { return s; }
897 
898   StringOrCallback ToStringOrCallback(const absl::AlphaNum& s, Rank0) {
899     return StringType(s.Piece());
900   }
901 };
902 
903 template <bool owned>
904 template <bool that_owned>
905 Printer::ValueImpl<owned>& Printer::ValueImpl<owned>::operator=(
906     const ValueImpl<that_owned>& that) {
907   // Cast to void* is required, since this and that may potentially be of
908   // different types (due to the `that_owned` parameter).
909   if (static_cast<const void*>(this) == static_cast<const void*>(&that)) {
910     return *this;
911   }
912 
913   using ThatStringType = typename ValueImpl<that_owned>::StringType;
914 
915   if (auto* str = absl::get_if<ThatStringType>(&that.value)) {
916     value = StringType(*str);
917   } else {
918     value = absl::get<Callback>(that.value);
919   }
920 
921   consume_after = that.consume_after;
922   consume_parens_if_empty = that.consume_parens_if_empty;
923   return *this;
924 }
925 
926 template <bool owned>
927 template <typename Cb, typename /*Sfinae*/>
928 auto Printer::ValueImpl<owned>::ToStringOrCallback(Cb&& cb, Rank2)
929     -> StringOrCallback {
930   return Callback(
931       [cb = std::forward<Cb>(cb), is_called = false]() mutable -> bool {
932         if (is_called) {
933           // Catch whether or not this function is being called recursively.
934           return false;
935         }
936         is_called = true;
937         cb();
938         is_called = false;
939         return true;
940       });
941 }
942 
943 struct Printer::AnnotationRecord {
944   std::vector<int> path;
945   std::string file_path;
946   absl::optional<AnnotationCollector::Semantic> semantic;
947 
948   // AnnotationRecord's constructors are *not* marked as explicit,
949   // specifically so that it is possible to construct a
950   // map<string, AnnotationRecord> by writing
951   //
952   // {{"foo", my_cool_descriptor}, {"bar", "file.proto"}}
953 
954   template <
955       typename String,
956       std::enable_if_t<std::is_convertible<const String&, std::string>::value,
957                        int> = 0>
958   AnnotationRecord(  // NOLINT(google-explicit-constructor)
959       const String& file_path,
960       absl::optional<AnnotationCollector::Semantic> semantic = absl::nullopt)
961       : file_path(file_path), semantic(semantic) {}
962 
963   template <typename Desc,
964             // This SFINAE clause excludes char* from matching this
965             // constructor.
966             std::enable_if_t<std::is_class<Desc>::value, int> = 0>
967   AnnotationRecord(  // NOLINT(google-explicit-constructor)
968       const Desc* desc,
969       absl::optional<AnnotationCollector::Semantic> semantic = absl::nullopt)
970       : file_path(desc->file()->name()), semantic(semantic) {
971     desc->GetLocationPath(&path);
972   }
973 };
974 
975 class Printer::Sub {
976  public:
977   template <typename Value>
978   Sub(std::string key, Value&& value)
979       : key_(std::move(key)),
980         value_(std::forward<Value>(value)),
981         annotation_(absl::nullopt) {}
982 
983   Sub AnnotatedAs(AnnotationRecord annotation) && {
984     annotation_ = std::move(annotation);
985     return std::move(*this);
986   }
987 
988   Sub WithSuffix(std::string sub_suffix) && {
989     value_.consume_after = std::move(sub_suffix);
990     return std::move(*this);
991   }
992 
993   Sub ConditionalFunctionCall() && {
994     value_.consume_parens_if_empty = true;
995     return std::move(*this);
996   }
997 
998   absl::string_view key() const { return key_; }
999 
1000   absl::string_view value() const {
1001     const auto* str = value_.AsString();
1002     ABSL_CHECK(str != nullptr)
1003         << "could not find " << key() << "; found callback instead";
1004     return *str;
1005   }
1006 
1007  private:
1008   friend class Printer;
1009 
1010   std::string key_;
1011   Value value_;
1012   absl::optional<AnnotationRecord> annotation_;
1013 };
1014 
1015 template <typename Map>
1016 auto Printer::WithVars(const Map* vars) {
1017   var_lookups_.emplace_back(
1018       [vars](absl::string_view var) -> absl::optional<ValueView> {
1019         auto it = vars->find(ToStringKey<Map>(var));
1020         if (it == vars->end()) {
1021           return absl::nullopt;
1022         }
1023         return ValueView(it->second);
1024       });
1025   return absl::MakeCleanup([this] { var_lookups_.pop_back(); });
1026 }
1027 
1028 template <typename Map, typename, typename /*Sfinae*/>
1029 auto Printer::WithVars(Map&& vars) {
1030   var_lookups_.emplace_back(
1031       [vars = std::forward<Map>(vars)](
1032           absl::string_view var) -> absl::optional<ValueView> {
1033         auto it = vars.find(ToStringKey<Map>(var));
1034         if (it == vars.end()) {
1035           return absl::nullopt;
1036         }
1037         return ValueView(it->second);
1038       });
1039   return absl::MakeCleanup([this] { var_lookups_.pop_back(); });
1040 }
1041 
1042 template <typename Map>
1043 auto Printer::WithAnnotations(const Map* vars) {
1044   annotation_lookups_.emplace_back(
1045       [vars](absl::string_view var) -> absl::optional<AnnotationRecord> {
1046         auto it = vars->find(ToStringKey<Map>(var));
1047         if (it == vars->end()) {
1048           return absl::nullopt;
1049         }
1050         return AnnotationRecord(it->second);
1051       });
1052   return absl::MakeCleanup([this] { annotation_lookups_.pop_back(); });
1053 }
1054 
1055 template <typename Map>
1056 auto Printer::WithAnnotations(Map&& vars) {
1057   annotation_lookups_.emplace_back(
1058       [vars = std::forward<Map>(vars)](
1059           absl::string_view var) -> absl::optional<AnnotationRecord> {
1060         auto it = vars.find(ToStringKey<Map>(var));
1061         if (it == vars.end()) {
1062           return absl::nullopt;
1063         }
1064         return AnnotationRecord(it->second);
1065       });
1066   return absl::MakeCleanup([this] { annotation_lookups_.pop_back(); });
1067 }
1068 
1069 inline void Printer::Emit(absl::string_view format, SourceLocation loc) {
1070   Emit({}, format, loc);
1071 }
1072 
1073 template <typename Map>
1074 void Printer::Print(const Map& vars, absl::string_view text) {
1075   PrintOptions opts;
1076   opts.checks_are_debug_only = true;
1077   opts.use_substitution_map = true;
1078   opts.allow_digit_substitutions = false;
1079 
1080   auto pop = WithVars(&vars);
1081   PrintImpl(text, {}, opts);
1082 }
1083 
1084 template <typename... Args>
1085 void Printer::Print(absl::string_view text, const Args&... args) {
1086   static_assert(sizeof...(args) % 2 == 0, "");
1087 
1088   // Include an extra arg, since a zero-length array is ill-formed, and
1089   // MSVC complains.
1090   absl::string_view vars[] = {args..., ""};
1091   absl::flat_hash_map<absl::string_view, absl::string_view> map;
1092   map.reserve(sizeof...(args) / 2);
1093   for (size_t i = 0; i < sizeof...(args); i += 2) {
1094     map.emplace(vars[i], vars[i + 1]);
1095   }
1096 
1097   Print(map, text);
1098 }
1099 
1100 template <typename Desc>
1101 void Printer::Annotate(absl::string_view begin_varname,
1102                        absl::string_view end_varname, const Desc* descriptor,
1103                        absl::optional<AnnotationCollector::Semantic> semantic) {
1104   if (options_.annotation_collector == nullptr) {
1105     return;
1106   }
1107 
1108   std::vector<int> path;
1109   descriptor->GetLocationPath(&path);
1110   Annotate(begin_varname, end_varname, descriptor->file()->name(), path,
1111            semantic);
1112 }
1113 
1114 template <typename Map>
1115 void Printer::FormatInternal(absl::Span<const std::string> args,
1116                              const Map& vars, absl::string_view format) {
1117   PrintOptions opts;
1118   opts.use_curly_brace_substitutions = true;
1119   opts.strip_spaces_around_vars = true;
1120 
1121   auto pop = WithVars(&vars);
1122   PrintImpl(format, args, opts);
1123 }
1124 
1125 inline auto Printer::WithDefs(absl::Span<const Sub> vars,
1126                               bool allow_callbacks) {
1127   absl::flat_hash_map<std::string, Value> var_map;
1128   var_map.reserve(vars.size());
1129 
1130   absl::flat_hash_map<std::string, AnnotationRecord> annotation_map;
1131 
1132   for (const auto& var : vars) {
1133     ABSL_CHECK(allow_callbacks || var.value_.AsCallback() == nullptr)
1134         << "callback arguments are not permitted in this position";
1135     auto result = var_map.insert({var.key_, var.value_});
1136     ABSL_CHECK(result.second)
1137         << "repeated variable in Emit() or WithVars() call: \"" << var.key_
1138         << "\"";
1139     if (var.annotation_.has_value()) {
1140       annotation_map.insert({var.key_, *var.annotation_});
1141     }
1142   }
1143 
1144   var_lookups_.emplace_back([map = std::move(var_map)](absl::string_view var)
1145                                 -> absl::optional<ValueView> {
1146     auto it = map.find(var);
1147     if (it == map.end()) {
1148       return absl::nullopt;
1149     }
1150     return ValueView(it->second);
1151   });
1152 
1153   bool has_annotations = !annotation_map.empty();
1154   if (has_annotations) {
1155     annotation_lookups_.emplace_back(
1156         [map = std::move(annotation_map)](
1157             absl::string_view var) -> absl::optional<AnnotationRecord> {
1158           auto it = map.find(var);
1159           if (it == map.end()) {
1160             return absl::nullopt;
1161           }
1162           return it->second;
1163         });
1164   }
1165 
1166   return absl::MakeCleanup([this, has_annotations] {
1167     var_lookups_.pop_back();
1168     if (has_annotations) {
1169       annotation_lookups_.pop_back();
1170     }
1171   });
1172 }
1173 
1174 inline auto Printer::WithVars(absl::Span<const Sub> vars) {
1175   return WithDefs(vars, /*allow_callbacks=*/false);
1176 }
1177 }  // namespace io
1178 }  // namespace protobuf
1179 }  // namespace google
1180 
1181 #include "google/protobuf/port_undef.inc"
1182 
1183 #endif  // GOOGLE_PROTOBUF_IO_PRINTER_H__
1184