1 // Protocol Buffers - Google's data interchange format 2 // Copyright 2024 Google LLC. All rights reserved. 3 // 4 // Use of this source code is governed by a BSD-style 5 // license that can be found in the LICENSE file or at 6 // https://developers.google.com/open-source/licenses/bsd 7 8 // Author: kenton@google.com (Kenton Varda) 9 // Based on original Protocol Buffers design by 10 // Sanjay Ghemawat, Jeff Dean, and others. 11 // 12 // Utility class for writing text to a ZeroCopyOutputStream. 13 14 #ifndef GOOGLE_PROTOBUF_IO_PRINTER_H__ 15 #define GOOGLE_PROTOBUF_IO_PRINTER_H__ 16 17 #include <cstddef> 18 #include <functional> 19 #include <initializer_list> 20 #include <string> 21 #include <type_traits> 22 #include <utility> 23 #include <vector> 24 25 #include "absl/cleanup/cleanup.h" 26 #include "absl/container/flat_hash_map.h" 27 #include "absl/functional/any_invocable.h" 28 #include "absl/functional/function_ref.h" 29 #include "absl/log/absl_check.h" 30 #include "absl/meta/type_traits.h" 31 #include "absl/strings/str_cat.h" 32 #include "absl/strings/str_format.h" 33 #include "absl/strings/string_view.h" 34 #include "absl/types/optional.h" 35 #include "absl/types/variant.h" 36 #include "google/protobuf/io/zero_copy_sink.h" 37 38 39 // Must be included last. 40 #include "google/protobuf/port_def.inc" 41 42 namespace google { 43 namespace protobuf { 44 namespace io { 45 46 // Records annotations about a Printer's output. 47 class PROTOBUF_EXPORT AnnotationCollector { 48 public: 49 // Annotation is a offset range and a payload pair. This payload's layout is 50 // specific to derived types of AnnotationCollector. 51 using Annotation = std::pair<std::pair<size_t, size_t>, std::string>; 52 53 // The semantic meaning of an annotation. This enum mirrors 54 // google.protobuf.GeneratedCodeInfo.Annotation.Semantic, and the enumerator values 55 // should match it. 56 enum Semantic { 57 kNone = 0, 58 kSet = 1, 59 kAlias = 2, 60 }; 61 62 virtual ~AnnotationCollector() = default; 63 64 // Records that the bytes in file_path beginning with begin_offset and ending 65 // before end_offset are associated with the SourceCodeInfo-style path. 66 virtual void AddAnnotation(size_t begin_offset, size_t end_offset, 67 const std::string& file_path, 68 const std::vector<int>& path) = 0; 69 AddAnnotation(size_t begin_offset,size_t end_offset,const std::string & file_path,const std::vector<int> & path,absl::optional<Semantic> semantic)70 virtual void AddAnnotation(size_t begin_offset, size_t end_offset, 71 const std::string& file_path, 72 const std::vector<int>& path, 73 absl::optional<Semantic> semantic) { 74 AddAnnotation(begin_offset, end_offset, file_path, path); 75 } 76 77 // TODO I don't see why we need virtuals here. Just a vector of 78 // range, payload pairs stored in a context should suffice. AddAnnotationNew(Annotation &)79 virtual void AddAnnotationNew(Annotation&) {} 80 }; 81 82 // Records annotations about a Printer's output to a Protobuf message, 83 // assuming that it has a repeated submessage field named `annotation` with 84 // fields matching 85 // 86 // message ??? { 87 // repeated int32 path = 1; 88 // optional string source_file = 2; 89 // optional int32 begin = 3; 90 // optional int32 end = 4; 91 // optional int32 semantic = 5; 92 // } 93 template <typename AnnotationProto> 94 class AnnotationProtoCollector : public AnnotationCollector { 95 private: 96 // Some users of this type use it with a proto that does not have a 97 // "semantic" field. Therefore, we need to detect it with SFINAE. 98 99 // go/ranked-overloads 100 struct Rank0 {}; 101 struct Rank1 : Rank0 {}; 102 103 template <typename Proto> 104 static auto SetSemantic(Proto* p, int semantic, Rank1) 105 -> decltype(p->set_semantic( 106 static_cast<typename Proto::Semantic>(semantic))) { 107 return p->set_semantic(static_cast<typename Proto::Semantic>(semantic)); 108 } 109 110 template <typename Proto> SetSemantic(Proto *,int,Rank0)111 static void SetSemantic(Proto*, int, Rank0) {} 112 113 public: AnnotationProtoCollector(AnnotationProto * annotation_proto)114 explicit AnnotationProtoCollector(AnnotationProto* annotation_proto) 115 : annotation_proto_(annotation_proto) {} 116 AddAnnotation(size_t begin_offset,size_t end_offset,const std::string & file_path,const std::vector<int> & path)117 void AddAnnotation(size_t begin_offset, size_t end_offset, 118 const std::string& file_path, 119 const std::vector<int>& path) override { 120 AddAnnotation(begin_offset, end_offset, file_path, path, absl::nullopt); 121 } 122 AddAnnotation(size_t begin_offset,size_t end_offset,const std::string & file_path,const std::vector<int> & path,absl::optional<Semantic> semantic)123 void AddAnnotation(size_t begin_offset, size_t end_offset, 124 const std::string& file_path, const std::vector<int>& path, 125 absl::optional<Semantic> semantic) override { 126 auto* annotation = annotation_proto_->add_annotation(); 127 for (int i = 0; i < path.size(); ++i) { 128 annotation->add_path(path[i]); 129 } 130 annotation->set_source_file(file_path); 131 annotation->set_begin(begin_offset); 132 annotation->set_end(end_offset); 133 134 if (semantic.has_value()) { 135 SetSemantic(annotation, *semantic, Rank1{}); 136 } 137 } 138 AddAnnotationNew(Annotation & a)139 void AddAnnotationNew(Annotation& a) override { 140 auto* annotation = annotation_proto_->add_annotation(); 141 annotation->ParseFromString(a.second); 142 annotation->set_begin(a.first.first); 143 annotation->set_end(a.first.second); 144 } 145 146 private: 147 AnnotationProto* annotation_proto_; 148 }; 149 150 // A source code printer for assisting in code generation. 151 // 152 // This type implements a simple templating language for substituting variables 153 // into static, user-provided strings, and also tracks indentation 154 // automatically. 155 // 156 // The main entry-point for this type is the Emit function, which can be used 157 // as thus: 158 // 159 // Printer p(output); 160 // p.Emit({{"class", my_class_name}}, R"cc( 161 // class $class$ { 162 // public: 163 // $class$(int x) : x_(x) {} 164 // private: 165 // int x_; 166 // }; 167 // )cc"); 168 // 169 // Substitutions are of the form $var$, which is looked up in the map passed in 170 // as the first argument. The variable delimiter character, $, can be chosen to 171 // be something convenient for the target language. For example, in PHP, which 172 // makes heavy use of $, it can be made into something like # instead. 173 // 174 // A literal $ can be emitted by writing $$. 175 // 176 // Substitutions may contain spaces around the name of the variable, which will 177 // be ignored for the purposes of looking up the variable to substitute in, but 178 // which will be reproduced in the output: 179 // 180 // p.Emit({{"foo", "bar"}}, "$ foo $"); 181 // 182 // emits the string " bar ". If the substituted-in variable is the empty string, 183 // then the surrounding spaces are *not* printed: 184 // 185 // p.Emit({{"xyz", xyz}}, "$xyz $Thing"); 186 // 187 // If xyz is "Foo", this will become "Foo Thing", but if it is "", this becomes 188 // "Thing", rather than " Thing". This helps minimize awkward whitespace in the 189 // output. 190 // 191 // The value may be any type that can be stringified with `absl::StrCat`: 192 // 193 // p.Emit({{"num", 5}}, "x = $num$;"); 194 // 195 // If a variable that is referenced in the format string is missing, the program 196 // will crash. Callers must statically know that every variable reference is 197 // valid, and MUST NOT pass user-provided strings directly into Emit(). 198 // 199 // In practice, this means the first member of io::Printer::Sub here: 200 // 201 // p.Emit({{"num", 5}}, "x = $num$;"); 202 // ^ 203 // must always be a string literal. 204 // 205 // Substitutions can be configured to "chomp" a single character after them, to 206 // help make indentation work out. This can be configured by passing a 207 // io::Printer::Sub().WithSuffix() into Emit's substitution map: 208 // p.Emit({io::Printer::Sub("var", var_decl).WithSuffix(";")}, R"cc( 209 // class $class$ { 210 // public: 211 // $var$; 212 // }; 213 // )cc"); 214 // 215 // This will delete the ; after $var$, regardless of whether it was an empty 216 // declaration or not. It will also intelligently attempt to clean up 217 // empty lines that follow, if it was on an empty line; this promotes cleaner 218 // formatting of the output. 219 // 220 // You can configure a large set of skippable characters, but when chomping, 221 // only one character will actually be skipped at a time. For example, callback 222 // substitutions (see below) use ";," by default as their "chomping set". 223 // 224 // p.Emit({io::Printer::Sub("var", 123).WithSuffix(";,")}, R"cc( 225 // $var$,; 226 // )cc"); 227 // 228 // will produce "123,". 229 // 230 // # Callback Substitution 231 // 232 // Instead of passing a string into Emit(), it is possible to pass in a callback 233 // as a variable mapping. This will take indentation into account, which allows 234 // factoring out parts of a formatting string while ensuring braces are 235 // balanced: 236 // 237 // p.Emit( 238 // {{"methods", [&] { 239 // p.Emit(R"cc( 240 // int Bar() { 241 // return 42; 242 // } 243 // )cc"); 244 // }}}, 245 // R"cc( 246 // class Foo { 247 // public: 248 // $methods$; 249 // }; 250 // )cc" 251 // ); 252 // 253 // This emits 254 // 255 // class Foo { 256 // public: 257 // int Bar() { 258 // return 42; 259 // } 260 // }; 261 // 262 // # Comments 263 // 264 // It may be desirable to place comments in a raw string that are stripped out 265 // before printing. The prefix for Printer-ignored comments can be configured 266 // in Options. By default, this is `//~`. 267 // 268 // p.Emit(R"cc( 269 // // Will be printed in the output. 270 // //~ Won't be. 271 // )cc"); 272 // 273 // # Lookup Frames 274 // 275 // If many calls to Emit() use the same set of variables, they can be stored 276 // in a *variable lookup frame*, like so: 277 // 278 // auto vars = p.WithVars({{"class_name", my_class_name}}); 279 // p.Emit(R"cc( 280 // class $class_name$ { 281 // public: 282 // $class_name$(int x); 283 // // Etc. 284 // }; 285 // )cc"); 286 // 287 // WithVars() returns an RAII object that will "pop" the lookup frame on scope 288 // exit, ensuring that the variables remain local. There are a few different 289 // overloads of WithVars(); it accepts a map type, like absl::flat_hash_map, 290 // either by-value (which will cause the Printer to store a copy), or by 291 // pointer (which will cause the Printer to store a pointer, potentially 292 // avoiding a copy.) 293 // 294 // p.Emit(vars, "..."); is effectively syntax sugar for 295 // 296 // { auto v = p.WithVars(vars); p.Emit("..."); } 297 // 298 // NOTE: callbacks are *not* allowed with WithVars; callbacks should be local 299 // to a specific Emit() call. 300 // 301 // # Annotations 302 // 303 // If Printer is given an AnnotationCollector, it will use it to record which 304 // spans of generated code correspond to user-indicated descriptors. There are 305 // a few different ways of indicating when to emit annotations. 306 // 307 // The WithAnnotations() function is like WithVars(), but accepts maps with 308 // string keys and descriptor values. It adds an annotation variable frame and 309 // returns an RAII object that pops the frame. 310 // 311 // There are two different ways to annotate code. In the first, when 312 // substituting a variable, if there is an annotation with the same name, then 313 // the resulting expanded value's span will be annotated with that annotation. 314 // For example: 315 // 316 // auto v = p.WithVars({{"class_name", my_class_name}}); 317 // auto a = p.WithAnnotations({{"class_name", message_descriptor}}); 318 // p.Emit(R"cc( 319 // class $class_name$ { 320 // public: 321 // $class_name$(int x); 322 // // Etc. 323 // }; 324 // )cc"); 325 // 326 // The span corresponding to whatever $class_name$ expands to will be annotated 327 // as having come from message_descriptor. 328 // 329 // For convenience, this can be done with a single WithVars(), using the special 330 // three-argument form: 331 // 332 // auto v = p.WithVars({{"class_name", my_class_name, message_descriptor}}); 333 // p.Emit(R"cc( 334 // class $class_name$ { 335 // public: 336 // $class_name$(int x); 337 // // Etc. 338 // }; 339 // )cc"); 340 // 341 // 342 // Alternatively, a range may be given explicitly: 343 // 344 // auto a = p.WithAnnotations({{"my_desc", message_descriptor}}); 345 // p.Emit(R"cc( 346 // $_start$my_desc$ 347 // class Foo { 348 // // Etc. 349 // }; 350 // $_end$my_desc$ 351 // )cc"); 352 // 353 // The special $_start$ and $_end$ variables indicate the start and end of an 354 // annotated span, which is annotated with the variable that follows. This 355 // form can produce somewhat unreadable format strings and is not recommended. 356 // 357 // Note that whitespace after a $_start$ and before an $_end$ is not printed. 358 // 359 // # Indentation 360 // 361 // Printer tracks an indentation amount to add to each new line, independent 362 // from indentation in an Emit() call's literal. The amount of indentation to 363 // add is controlled by the WithIndent() function: 364 // 365 // p.Emit("class $class_name$ {"); 366 // { 367 // auto indent = p.WithIndent(); 368 // p.Emit(R"cc( 369 // public: 370 // $class_name$(int x); 371 // )cc"); 372 // } 373 // p.Emit("};"); 374 // 375 // This will automatically add one level of indentation to all code in scope of 376 // `indent`, which is an RAII object much like the return value of `WithVars()`. 377 // 378 // # Old API 379 // TODO: Delete this documentation. 380 // 381 // Printer supports an older-style API that is in the process of being 382 // re-written. The old documentation is reproduced here until all use-cases are 383 // handled. 384 // 385 // This simple utility class assists in code generation. It basically 386 // allows the caller to define a set of variables and then output some 387 // text with variable substitutions. Example usage: 388 // 389 // Printer printer(output, '$'); 390 // map<string, string> vars; 391 // vars["name"] = "Bob"; 392 // printer.Print(vars, "My name is $name$."); 393 // 394 // The above writes "My name is Bob." to the output stream. 395 // 396 // Printer aggressively enforces correct usage, crashing (with assert failures) 397 // in the case of undefined variables in debug builds. This helps greatly in 398 // debugging code which uses it. 399 // 400 // If a Printer is constructed with an AnnotationCollector, it will provide it 401 // with annotations that connect the Printer's output to paths that can identify 402 // various descriptors. In the above example, if person_ is a descriptor that 403 // identifies Bob, we can associate the output string "My name is Bob." with 404 // a source path pointing to that descriptor with: 405 // 406 // printer.Annotate("name", person_); 407 // 408 // The AnnotationCollector will be sent an annotation linking the output range 409 // covering "Bob" to the logical path provided by person_. Tools may use 410 // this association to (for example) link "Bob" in the output back to the 411 // source file that defined the person_ descriptor identifying Bob. 412 // 413 // Annotate can only examine variables substituted during the last call to 414 // Print. It is invalid to refer to a variable that was used multiple times 415 // in a single Print call. 416 // 417 // In full generality, one may specify a range of output text using a beginning 418 // substitution variable and an ending variable. The resulting annotation will 419 // span from the first character of the substituted value for the beginning 420 // variable to the last character of the substituted value for the ending 421 // variable. For example, the Annotate call above is equivalent to this one: 422 // 423 // printer.Annotate("name", "name", person_); 424 // 425 // This is useful if multiple variables combine to form a single span of output 426 // that should be annotated with the same source path. For example: 427 // 428 // Printer printer(output, '$'); 429 // map<string, string> vars; 430 // vars["first"] = "Alice"; 431 // vars["last"] = "Smith"; 432 // printer.Print(vars, "My name is $first$ $last$."); 433 // printer.Annotate("first", "last", person_); 434 // 435 // This code would associate the span covering "Alice Smith" in the output with 436 // the person_ descriptor. 437 // 438 // Note that the beginning variable must come before (or overlap with, in the 439 // case of zero-sized substitution values) the ending variable. 440 // 441 // It is also sometimes useful to use variables with zero-sized values as 442 // markers. This avoids issues with multiple references to the same variable 443 // and also allows annotation ranges to span literal text from the Print 444 // templates: 445 // 446 // Printer printer(output, '$'); 447 // map<string, string> vars; 448 // vars["foo"] = "bar"; 449 // vars["function"] = "call"; 450 // vars["mark"] = ""; 451 // printer.Print(vars, "$function$($foo$,$foo$)$mark$"); 452 // printer.Annotate("function", "mark", call_); 453 // 454 // This code associates the span covering "call(bar,bar)" in the output with the 455 // call_ descriptor. 456 class PROTOBUF_EXPORT Printer { 457 private: 458 struct AnnotationRecord; 459 460 public: 461 // This type exists to work around an absl type that has not yet been 462 // released. 463 struct SourceLocation { currentSourceLocation464 static SourceLocation current() { return {}; } file_nameSourceLocation465 absl::string_view file_name() const { return "<unknown>"; } lineSourceLocation466 int line() const { return 0; } 467 }; 468 469 static constexpr char kDefaultVariableDelimiter = '$'; 470 static constexpr absl::string_view kProtocCodegenTrace = 471 "PROTOC_CODEGEN_TRACE"; 472 473 // Sink type for constructing substitutions to pass to WithVars() and Emit(). 474 class Sub; 475 476 // Options for controlling how the output of a Printer is formatted. 477 struct Options { 478 Options() = default; 479 Options(const Options&) = default; 480 Options(Options&&) = default; OptionsOptions481 Options(char variable_delimiter, AnnotationCollector* annotation_collector) 482 : variable_delimiter(variable_delimiter), 483 annotation_collector(annotation_collector) {} 484 485 // The delimiter for variable substitutions, e.g. $foo$. 486 char variable_delimiter = kDefaultVariableDelimiter; 487 // An optional listener the Printer calls whenever it emits a source 488 // annotation; may be null. 489 AnnotationCollector* annotation_collector = nullptr; 490 // The "comment start" token for the language being generated. This is used 491 // to allow the Printer to emit debugging annotations in the source code 492 // output. 493 absl::string_view comment_start = "//"; 494 // The token for beginning comments that are discarded by Printer's internal 495 // formatter. 496 absl::string_view ignored_comment_start = "//~"; 497 // The number of spaces that a single level of indentation adds by default; 498 // this is the amount that WithIndent() increases indentation by. 499 size_t spaces_per_indent = 2; 500 // Whether to emit a "codegen trace" for calls to Emit(). If true, each call 501 // to Emit() will print a comment indicating where in the source of the 502 // compiler the Emit() call occurred. 503 // 504 // If disengaged, defaults to whether or not the environment variable 505 // `PROTOC_CODEGEN_TRACE` is set. 506 absl::optional<bool> enable_codegen_trace = absl::nullopt; 507 }; 508 509 // Constructs a new Printer with the default options to output to 510 // `output`. 511 explicit Printer(ZeroCopyOutputStream* output); 512 513 // Constructs a new printer with the given set of options to output to 514 // `output`. 515 Printer(ZeroCopyOutputStream* output, Options options); 516 517 // Old-style constructor. Avoid in preference to the two constructors above. 518 // 519 // Will eventually be marked as deprecated. 520 Printer(ZeroCopyOutputStream* output, char variable_delimiter, 521 AnnotationCollector* annotation_collector = nullptr); 522 523 Printer(const Printer&) = delete; 524 Printer& operator=(const Printer&) = delete; 525 526 // Pushes a new variable lookup frame that stores `vars` by reference. 527 // 528 // Returns an RAII object that pops the lookup frame. 529 template <typename Map> 530 auto WithVars(const Map* vars); 531 532 // Pushes a new variable lookup frame that stores `vars` by value. 533 // 534 // Returns an RAII object that pops the lookup frame. 535 template < 536 typename Map = absl::flat_hash_map<absl::string_view, absl::string_view>, 537 typename = std::enable_if_t<!std::is_pointer<Map>::value>, 538 // Prefer the more specific span impl if this could be turned into 539 // a span. 540 typename = std::enable_if_t< 541 !std::is_convertible<Map, absl::Span<const Sub>>::value>> 542 auto WithVars(Map&& vars); 543 544 // Pushes a new variable lookup frame that stores `vars` by value. 545 // 546 // Returns an RAII object that pops the lookup frame. 547 auto WithVars(absl::Span<const Sub> vars); 548 549 // Looks up a variable set with WithVars(). 550 // 551 // Will crash if: 552 // - `var` is not present in the lookup frame table. 553 // - `var` is a callback, rather than a string. 554 absl::string_view LookupVar(absl::string_view var); 555 556 // Pushes a new annotation lookup frame that stores `vars` by reference. 557 // 558 // Returns an RAII object that pops the lookup frame. 559 template <typename Map> 560 auto WithAnnotations(const Map* vars); 561 562 // Pushes a new variable lookup frame that stores `vars` by value. 563 // 564 // When writing `WithAnnotations({...})`, this is the overload that will be 565 // called, and it will synthesize an `absl::flat_hash_map`. 566 // 567 // Returns an RAII object that pops the lookup frame. 568 template <typename Map = absl::flat_hash_map<std::string, AnnotationRecord>> 569 auto WithAnnotations(Map&& vars); 570 571 // Increases the indentation by `indent` spaces; when nullopt, increments 572 // indentation by the configured default spaces_per_indent. 573 // 574 // Returns an RAII object that removes this indentation. 575 auto WithIndent(absl::optional<size_t> indent = absl::nullopt) { 576 size_t delta = indent.value_or(options_.spaces_per_indent); 577 indent_ += delta; 578 return absl::MakeCleanup([this, delta] { indent_ -= delta; }); 579 } 580 581 // Emits formatted source code to the underlying output. See the class 582 // documentation for more details. 583 // 584 // `format` MUST be a string constant. 585 void Emit(absl::string_view format, 586 SourceLocation loc = SourceLocation::current()); 587 588 // Emits formatted source code to the underlying output, injecting 589 // additional variables as a lookup frame for just this call. See the class 590 // documentation for more details. 591 // 592 // `format` MUST be a string constant. 593 void Emit(absl::Span<const Sub> vars, absl::string_view format, 594 SourceLocation loc = SourceLocation::current()); 595 596 // Write a string directly to the underlying output, performing no formatting 597 // of any sort. PrintRaw(absl::string_view data)598 void PrintRaw(absl::string_view data) { WriteRaw(data.data(), data.size()); } 599 600 // Write a string directly to the underlying output, performing no formatting 601 // of any sort. 602 void WriteRaw(const char* data, size_t size); 603 604 // True if any write to the underlying stream failed. (We don't just 605 // crash in this case because this is an I/O failure, not a programming 606 // error.) failed()607 bool failed() const { return failed_; } 608 609 // -- Old-style API below; to be deprecated and removed. -- 610 // TODO: Deprecate these APIs. 611 612 template < 613 typename Map = absl::flat_hash_map<absl::string_view, absl::string_view>> 614 void Print(const Map& vars, absl::string_view text); 615 616 template <typename... Args> 617 void Print(absl::string_view text, const Args&... args); 618 619 // Link a substitution variable emitted by the last call to Print to the 620 // object described by descriptor. 621 template <typename SomeDescriptor> 622 void Annotate( 623 absl::string_view varname, const SomeDescriptor* descriptor, 624 absl::optional<AnnotationCollector::Semantic> semantic = absl::nullopt) { 625 Annotate(varname, varname, descriptor, semantic); 626 } 627 628 // Link the output range defined by the substitution variables as emitted by 629 // the last call to Print to the object described by descriptor. The range 630 // begins at begin_varname's value and ends after the last character of the 631 // value substituted for end_varname. 632 template <typename Desc> 633 void Annotate( 634 absl::string_view begin_varname, absl::string_view end_varname, 635 const Desc* descriptor, 636 absl::optional<AnnotationCollector::Semantic> semantic = absl::nullopt); 637 638 // Link a substitution variable emitted by the last call to Print to the file 639 // with path file_name. 640 void Annotate( 641 absl::string_view varname, absl::string_view file_name, 642 absl::optional<AnnotationCollector::Semantic> semantic = absl::nullopt) { 643 Annotate(varname, varname, file_name, semantic); 644 } 645 646 // Link the output range defined by the substitution variables as emitted by 647 // the last call to Print to the file with path file_name. The range begins 648 // at begin_varname's value and ends after the last character of the value 649 // substituted for end_varname. 650 void Annotate( 651 absl::string_view begin_varname, absl::string_view end_varname, 652 absl::string_view file_name, 653 absl::optional<AnnotationCollector::Semantic> semantic = absl::nullopt) { 654 if (options_.annotation_collector == nullptr) { 655 return; 656 } 657 658 Annotate(begin_varname, end_varname, file_name, {}, semantic); 659 } 660 661 // Indent text by `options.spaces_per_indent`; undone by Outdent(). Indent()662 void Indent() { indent_ += options_.spaces_per_indent; } 663 664 // Undoes a call to Indent(). 665 void Outdent(); 666 667 // FormatInternal is a helper function not meant to use directly, use 668 // compiler::cpp::Formatter instead. 669 template <typename Map = absl::flat_hash_map<std::string, std::string>> 670 void FormatInternal(absl::Span<const std::string> args, const Map& vars, 671 absl::string_view format); 672 673 // Injects a substitution listener for the lifetime of the RAII object 674 // returned. 675 // While the listener is active it will receive a callback on each 676 // substitution label found. 677 // This can be used to add basic verification on top of emit routines. WithSubstitutionListener(absl::AnyInvocable<void (absl::string_view,SourceLocation)> listener)678 auto WithSubstitutionListener( 679 absl::AnyInvocable<void(absl::string_view, SourceLocation)> listener) { 680 ABSL_CHECK(substitution_listener_ == nullptr); 681 substitution_listener_ = std::move(listener); 682 return absl::MakeCleanup([this] { substitution_listener_ = nullptr; }); 683 } 684 685 private: 686 struct PrintOptions; 687 struct Format; 688 689 // Helper type for wrapping a variable substitution expansion result. 690 template <bool owned> 691 struct ValueImpl; 692 693 using ValueView = ValueImpl</*owned=*/false>; 694 using Value = ValueImpl</*owned=*/true>; 695 696 // Provide a helper to use heterogeneous lookup when it's available. 697 template <typename...> 698 using Void = void; 699 700 template <typename Map, typename = void> 701 struct HasHeteroLookup : std::false_type {}; 702 template <typename Map> 703 struct HasHeteroLookup<Map, Void<decltype(std::declval<Map>().find( 704 std::declval<absl::string_view>()))>> 705 : std::true_type {}; 706 707 template <typename Map, 708 typename = std::enable_if_t<HasHeteroLookup<Map>::value>> 709 static absl::string_view ToStringKey(absl::string_view x) { 710 return x; 711 } 712 713 template <typename Map, 714 typename = std::enable_if_t<!HasHeteroLookup<Map>::value>> 715 static std::string ToStringKey(absl::string_view x) { 716 return std::string(x); 717 } 718 719 Format TokenizeFormat(absl::string_view format_string, 720 const PrintOptions& options); 721 722 // Emit an annotation for the range defined by the given substitution 723 // variables, as set by the most recent call to PrintImpl() that set 724 // `use_substitution_map` to true. 725 // 726 // The range begins at the start of `begin_varname`'s value and ends after the 727 // last byte of `end_varname`'s value. 728 // 729 // `begin_varname` and `end_varname may` refer to the same variable. 730 void Annotate(absl::string_view begin_varname, absl::string_view end_varname, 731 absl::string_view file_path, const std::vector<int>& path, 732 absl::optional<AnnotationCollector::Semantic> semantic); 733 734 // The core printing implementation. There are three public entry points, 735 // which enable different slices of functionality that are controlled by the 736 // `opts` argument. 737 void PrintImpl(absl::string_view format, absl::Span<const std::string> args, 738 PrintOptions opts); 739 740 // This is a private function only so that it can see PrintOptions. 741 static bool Validate(bool cond, PrintOptions opts, 742 absl::FunctionRef<std::string()> message); 743 static bool Validate(bool cond, PrintOptions opts, absl::string_view message); 744 745 // Performs calls to `Validate()` to check that `index < current_arg_index` 746 // and `index < args_len`, producing appropriate log lines if the checks fail, 747 // and crashing if necessary. 748 bool ValidateIndexLookupInBounds(size_t index, size_t current_arg_index, 749 size_t args_len, PrintOptions opts); 750 751 // Prints indentation if `at_start_of_line_` is true. 752 void IndentIfAtStart(); 753 754 // Prints a codegen trace, for the given location in the compiler's source. 755 void PrintCodegenTrace(absl::optional<SourceLocation> loc); 756 757 // The core implementation for "fully-elaborated" variable definitions. 758 auto WithDefs(absl::Span<const Sub> vars, bool allow_callbacks); 759 760 // Returns the start and end of the value that was substituted in place of 761 // the variable `varname` in the last call to PrintImpl() (with 762 // `use_substitution_map` set), if such a variable was substituted exactly 763 // once. 764 absl::optional<std::pair<size_t, size_t>> GetSubstitutionRange( 765 absl::string_view varname, PrintOptions opts); 766 767 google::protobuf::io::zc_sink_internal::ZeroCopyStreamByteSink sink_; 768 Options options_; 769 size_t indent_ = 0; 770 bool at_start_of_line_ = true; 771 bool failed_ = false; 772 773 size_t paren_depth_ = 0; 774 std::vector<size_t> paren_depth_to_omit_; 775 776 std::vector<std::function<absl::optional<ValueView>(absl::string_view)>> 777 var_lookups_; 778 779 std::vector< 780 std::function<absl::optional<AnnotationRecord>(absl::string_view)>> 781 annotation_lookups_; 782 783 // If set, we invoke this when we do a label substitution. This can be used to 784 // verify consistency of the generated code while we generate it. 785 absl::AnyInvocable<void(absl::string_view, SourceLocation)> 786 substitution_listener_; 787 788 // A map from variable name to [start, end) offsets in the output buffer. 789 // 790 // This stores the data looked up by GetSubstitutionRange(). 791 absl::flat_hash_map<std::string, std::pair<size_t, size_t>> substitutions_; 792 // Keeps track of the keys in `substitutions_` that need to be updated when 793 // indents are inserted. These are keys that refer to the beginning of the 794 // current line. 795 std::vector<std::string> line_start_variables_; 796 }; 797 798 // Options for PrintImpl(). 799 struct Printer::PrintOptions { 800 // The callsite of the public entry-point. Only Emit() sets this. 801 absl::optional<SourceLocation> loc; 802 // If set, Validate() calls will not crash the program. 803 bool checks_are_debug_only = false; 804 // If set, the `substitutions_` map will be populated as variables are 805 // substituted. 806 bool use_substitution_map = false; 807 // If set, the ${1$ and $}$ forms will be substituted. These are used for 808 // a slightly janky annotation-insertion mechanism in FormatInternal, that 809 // requires that passed-in substitution variables be serialized protos. 810 bool use_curly_brace_substitutions = false; 811 // If set, the $n$ forms will be substituted, pulling from the `args` 812 // argument to PrintImpl(). 813 bool allow_digit_substitutions = true; 814 // If set, when a variable substitution with spaces in it, such as $ var$, 815 // is encountered, the spaces are stripped, so that it is as if it was 816 // $var$. If $var$ substitutes to a non-empty string, the removed spaces are 817 // printed around the substituted value. 818 // 819 // See the class documentation for more information on this behavior. 820 bool strip_spaces_around_vars = true; 821 // If set, leading whitespace will be stripped from the format string to 822 // determine the "extraneous indentation" that is produced when the format 823 // string is a C++ raw string. This is used to remove leading spaces from 824 // a raw string that would otherwise result in erratic indentation in the 825 // output. 826 bool strip_raw_string_indentation = false; 827 // If set, the annotation lookup frames are searched, per the annotation 828 // semantics of Emit() described in the class documentation. 829 bool use_annotation_frames = true; 830 }; 831 832 // Helper type for wrapping a variable substitution expansion result. 833 template <bool owned> 834 struct Printer::ValueImpl { 835 private: 836 template <typename T> 837 struct IsSubImpl : std::false_type {}; 838 template <bool a> 839 struct IsSubImpl<ValueImpl<a>> : std::true_type {}; 840 841 public: 842 using StringType = std::conditional_t<owned, std::string, absl::string_view>; 843 // These callbacks return false if this is a recursive call. 844 using Callback = std::function<bool()>; 845 using StringOrCallback = absl::variant<StringType, Callback>; 846 847 ValueImpl() = default; 848 849 // This is a template to avoid colliding with the copy constructor below. 850 template <typename Value, 851 typename = std::enable_if_t< 852 !IsSubImpl<absl::remove_cvref_t<Value>>::value>> 853 ValueImpl(Value&& value) // NOLINT 854 : value(ToStringOrCallback(std::forward<Value>(value), Rank2{})) { 855 if (absl::holds_alternative<Callback>(this->value)) { 856 consume_after = ";,"; 857 } 858 } 859 860 // Copy ctor/assign allow interconversion of the two template parameters. 861 template <bool that_owned> 862 ValueImpl(const ValueImpl<that_owned>& that) { // NOLINT 863 *this = that; 864 } 865 866 template <bool that_owned> 867 ValueImpl& operator=(const ValueImpl<that_owned>& that); 868 869 const StringType* AsString() const { 870 return absl::get_if<StringType>(&value); 871 } 872 873 const Callback* AsCallback() const { return absl::get_if<Callback>(&value); } 874 875 StringOrCallback value; 876 std::string consume_after; 877 bool consume_parens_if_empty = false; 878 879 private: 880 // go/ranked-overloads 881 struct Rank0 {}; 882 struct Rank1 : Rank0 {}; 883 struct Rank2 : Rank1 {}; 884 885 // Dummy template for delayed instantiation, which is required for the 886 // static assert below to kick in only when this function is called when it 887 // shouldn't. 888 // 889 // This is done to produce a better error message than the "candidate does 890 // not match" SFINAE errors. 891 template <typename Cb, typename = decltype(std::declval<Cb&&>()())> 892 StringOrCallback ToStringOrCallback(Cb&& cb, Rank2); 893 894 // Separate from the AlphaNum overload to avoid copies when taking strings 895 // by value when in `owned` mode. 896 StringOrCallback ToStringOrCallback(StringType s, Rank1) { return s; } 897 898 StringOrCallback ToStringOrCallback(const absl::AlphaNum& s, Rank0) { 899 return StringType(s.Piece()); 900 } 901 }; 902 903 template <bool owned> 904 template <bool that_owned> 905 Printer::ValueImpl<owned>& Printer::ValueImpl<owned>::operator=( 906 const ValueImpl<that_owned>& that) { 907 // Cast to void* is required, since this and that may potentially be of 908 // different types (due to the `that_owned` parameter). 909 if (static_cast<const void*>(this) == static_cast<const void*>(&that)) { 910 return *this; 911 } 912 913 using ThatStringType = typename ValueImpl<that_owned>::StringType; 914 915 if (auto* str = absl::get_if<ThatStringType>(&that.value)) { 916 value = StringType(*str); 917 } else { 918 value = absl::get<Callback>(that.value); 919 } 920 921 consume_after = that.consume_after; 922 consume_parens_if_empty = that.consume_parens_if_empty; 923 return *this; 924 } 925 926 template <bool owned> 927 template <typename Cb, typename /*Sfinae*/> 928 auto Printer::ValueImpl<owned>::ToStringOrCallback(Cb&& cb, Rank2) 929 -> StringOrCallback { 930 return Callback( 931 [cb = std::forward<Cb>(cb), is_called = false]() mutable -> bool { 932 if (is_called) { 933 // Catch whether or not this function is being called recursively. 934 return false; 935 } 936 is_called = true; 937 cb(); 938 is_called = false; 939 return true; 940 }); 941 } 942 943 struct Printer::AnnotationRecord { 944 std::vector<int> path; 945 std::string file_path; 946 absl::optional<AnnotationCollector::Semantic> semantic; 947 948 // AnnotationRecord's constructors are *not* marked as explicit, 949 // specifically so that it is possible to construct a 950 // map<string, AnnotationRecord> by writing 951 // 952 // {{"foo", my_cool_descriptor}, {"bar", "file.proto"}} 953 954 template < 955 typename String, 956 std::enable_if_t<std::is_convertible<const String&, std::string>::value, 957 int> = 0> 958 AnnotationRecord( // NOLINT(google-explicit-constructor) 959 const String& file_path, 960 absl::optional<AnnotationCollector::Semantic> semantic = absl::nullopt) 961 : file_path(file_path), semantic(semantic) {} 962 963 template <typename Desc, 964 // This SFINAE clause excludes char* from matching this 965 // constructor. 966 std::enable_if_t<std::is_class<Desc>::value, int> = 0> 967 AnnotationRecord( // NOLINT(google-explicit-constructor) 968 const Desc* desc, 969 absl::optional<AnnotationCollector::Semantic> semantic = absl::nullopt) 970 : file_path(desc->file()->name()), semantic(semantic) { 971 desc->GetLocationPath(&path); 972 } 973 }; 974 975 class Printer::Sub { 976 public: 977 template <typename Value> 978 Sub(std::string key, Value&& value) 979 : key_(std::move(key)), 980 value_(std::forward<Value>(value)), 981 annotation_(absl::nullopt) {} 982 983 Sub AnnotatedAs(AnnotationRecord annotation) && { 984 annotation_ = std::move(annotation); 985 return std::move(*this); 986 } 987 988 Sub WithSuffix(std::string sub_suffix) && { 989 value_.consume_after = std::move(sub_suffix); 990 return std::move(*this); 991 } 992 993 Sub ConditionalFunctionCall() && { 994 value_.consume_parens_if_empty = true; 995 return std::move(*this); 996 } 997 998 absl::string_view key() const { return key_; } 999 1000 absl::string_view value() const { 1001 const auto* str = value_.AsString(); 1002 ABSL_CHECK(str != nullptr) 1003 << "could not find " << key() << "; found callback instead"; 1004 return *str; 1005 } 1006 1007 private: 1008 friend class Printer; 1009 1010 std::string key_; 1011 Value value_; 1012 absl::optional<AnnotationRecord> annotation_; 1013 }; 1014 1015 template <typename Map> 1016 auto Printer::WithVars(const Map* vars) { 1017 var_lookups_.emplace_back( 1018 [vars](absl::string_view var) -> absl::optional<ValueView> { 1019 auto it = vars->find(ToStringKey<Map>(var)); 1020 if (it == vars->end()) { 1021 return absl::nullopt; 1022 } 1023 return ValueView(it->second); 1024 }); 1025 return absl::MakeCleanup([this] { var_lookups_.pop_back(); }); 1026 } 1027 1028 template <typename Map, typename, typename /*Sfinae*/> 1029 auto Printer::WithVars(Map&& vars) { 1030 var_lookups_.emplace_back( 1031 [vars = std::forward<Map>(vars)]( 1032 absl::string_view var) -> absl::optional<ValueView> { 1033 auto it = vars.find(ToStringKey<Map>(var)); 1034 if (it == vars.end()) { 1035 return absl::nullopt; 1036 } 1037 return ValueView(it->second); 1038 }); 1039 return absl::MakeCleanup([this] { var_lookups_.pop_back(); }); 1040 } 1041 1042 template <typename Map> 1043 auto Printer::WithAnnotations(const Map* vars) { 1044 annotation_lookups_.emplace_back( 1045 [vars](absl::string_view var) -> absl::optional<AnnotationRecord> { 1046 auto it = vars->find(ToStringKey<Map>(var)); 1047 if (it == vars->end()) { 1048 return absl::nullopt; 1049 } 1050 return AnnotationRecord(it->second); 1051 }); 1052 return absl::MakeCleanup([this] { annotation_lookups_.pop_back(); }); 1053 } 1054 1055 template <typename Map> 1056 auto Printer::WithAnnotations(Map&& vars) { 1057 annotation_lookups_.emplace_back( 1058 [vars = std::forward<Map>(vars)]( 1059 absl::string_view var) -> absl::optional<AnnotationRecord> { 1060 auto it = vars.find(ToStringKey<Map>(var)); 1061 if (it == vars.end()) { 1062 return absl::nullopt; 1063 } 1064 return AnnotationRecord(it->second); 1065 }); 1066 return absl::MakeCleanup([this] { annotation_lookups_.pop_back(); }); 1067 } 1068 1069 inline void Printer::Emit(absl::string_view format, SourceLocation loc) { 1070 Emit({}, format, loc); 1071 } 1072 1073 template <typename Map> 1074 void Printer::Print(const Map& vars, absl::string_view text) { 1075 PrintOptions opts; 1076 opts.checks_are_debug_only = true; 1077 opts.use_substitution_map = true; 1078 opts.allow_digit_substitutions = false; 1079 1080 auto pop = WithVars(&vars); 1081 PrintImpl(text, {}, opts); 1082 } 1083 1084 template <typename... Args> 1085 void Printer::Print(absl::string_view text, const Args&... args) { 1086 static_assert(sizeof...(args) % 2 == 0, ""); 1087 1088 // Include an extra arg, since a zero-length array is ill-formed, and 1089 // MSVC complains. 1090 absl::string_view vars[] = {args..., ""}; 1091 absl::flat_hash_map<absl::string_view, absl::string_view> map; 1092 map.reserve(sizeof...(args) / 2); 1093 for (size_t i = 0; i < sizeof...(args); i += 2) { 1094 map.emplace(vars[i], vars[i + 1]); 1095 } 1096 1097 Print(map, text); 1098 } 1099 1100 template <typename Desc> 1101 void Printer::Annotate(absl::string_view begin_varname, 1102 absl::string_view end_varname, const Desc* descriptor, 1103 absl::optional<AnnotationCollector::Semantic> semantic) { 1104 if (options_.annotation_collector == nullptr) { 1105 return; 1106 } 1107 1108 std::vector<int> path; 1109 descriptor->GetLocationPath(&path); 1110 Annotate(begin_varname, end_varname, descriptor->file()->name(), path, 1111 semantic); 1112 } 1113 1114 template <typename Map> 1115 void Printer::FormatInternal(absl::Span<const std::string> args, 1116 const Map& vars, absl::string_view format) { 1117 PrintOptions opts; 1118 opts.use_curly_brace_substitutions = true; 1119 opts.strip_spaces_around_vars = true; 1120 1121 auto pop = WithVars(&vars); 1122 PrintImpl(format, args, opts); 1123 } 1124 1125 inline auto Printer::WithDefs(absl::Span<const Sub> vars, 1126 bool allow_callbacks) { 1127 absl::flat_hash_map<std::string, Value> var_map; 1128 var_map.reserve(vars.size()); 1129 1130 absl::flat_hash_map<std::string, AnnotationRecord> annotation_map; 1131 1132 for (const auto& var : vars) { 1133 ABSL_CHECK(allow_callbacks || var.value_.AsCallback() == nullptr) 1134 << "callback arguments are not permitted in this position"; 1135 auto result = var_map.insert({var.key_, var.value_}); 1136 ABSL_CHECK(result.second) 1137 << "repeated variable in Emit() or WithVars() call: \"" << var.key_ 1138 << "\""; 1139 if (var.annotation_.has_value()) { 1140 annotation_map.insert({var.key_, *var.annotation_}); 1141 } 1142 } 1143 1144 var_lookups_.emplace_back([map = std::move(var_map)](absl::string_view var) 1145 -> absl::optional<ValueView> { 1146 auto it = map.find(var); 1147 if (it == map.end()) { 1148 return absl::nullopt; 1149 } 1150 return ValueView(it->second); 1151 }); 1152 1153 bool has_annotations = !annotation_map.empty(); 1154 if (has_annotations) { 1155 annotation_lookups_.emplace_back( 1156 [map = std::move(annotation_map)]( 1157 absl::string_view var) -> absl::optional<AnnotationRecord> { 1158 auto it = map.find(var); 1159 if (it == map.end()) { 1160 return absl::nullopt; 1161 } 1162 return it->second; 1163 }); 1164 } 1165 1166 return absl::MakeCleanup([this, has_annotations] { 1167 var_lookups_.pop_back(); 1168 if (has_annotations) { 1169 annotation_lookups_.pop_back(); 1170 } 1171 }); 1172 } 1173 1174 inline auto Printer::WithVars(absl::Span<const Sub> vars) { 1175 return WithDefs(vars, /*allow_callbacks=*/false); 1176 } 1177 } // namespace io 1178 } // namespace protobuf 1179 } // namespace google 1180 1181 #include "google/protobuf/port_undef.inc" 1182 1183 #endif // GOOGLE_PROTOBUF_IO_PRINTER_H__ 1184