• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2025 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "src/protozero/text_to_proto/text_to_proto.h"
18 
19 #include <cctype>
20 #include <cinttypes>
21 #include <cstddef>
22 #include <cstdint>
23 #include <cstdio>
24 #include <cstdlib>
25 #include <limits>
26 #include <map>
27 #include <memory>
28 #include <optional>
29 #include <set>
30 #include <stack>
31 #include <string>
32 #include <string_view>
33 #include <utility>
34 #include <vector>
35 
36 #include "perfetto/base/logging.h"
37 #include "perfetto/base/status.h"
38 #include "perfetto/ext/base/status_or.h"
39 #include "perfetto/ext/base/string_utils.h"
40 #include "perfetto/ext/base/string_view.h"
41 #include "perfetto/ext/base/utils.h"
42 #include "perfetto/protozero/message.h"
43 #include "perfetto/protozero/message_handle.h"
44 #include "perfetto/protozero/scattered_heap_buffer.h"
45 
46 #include "protos/perfetto/common/descriptor.gen.h"
47 
48 namespace protozero {
49 
50 using perfetto::protos::gen::DescriptorProto;
51 using perfetto::protos::gen::EnumDescriptorProto;
52 using perfetto::protos::gen::EnumValueDescriptorProto;
53 using perfetto::protos::gen::FieldDescriptorProto;
54 using perfetto::protos::gen::FileDescriptorSet;
55 
56 namespace {
57 
IsOct(char c)58 constexpr bool IsOct(char c) {
59   return (c >= '0' && c <= '7');
60 }
61 
IsDigit(char c)62 constexpr bool IsDigit(char c) {
63   return (c >= '0' && c <= '9');
64 }
65 
IsIdentifierStart(char c)66 constexpr bool IsIdentifierStart(char c) {
67   return ('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z') || c == '_';
68 }
69 
IsIdentifierBody(char c)70 constexpr bool IsIdentifierBody(char c) {
71   return IsIdentifierStart(c) || IsDigit(c);
72 }
73 
FieldToTypeName(const FieldDescriptorProto * field)74 const char* FieldToTypeName(const FieldDescriptorProto* field) {
75   switch (field->type()) {
76     case FieldDescriptorProto::TYPE_UINT64:
77       return "uint64";
78     case FieldDescriptorProto::TYPE_UINT32:
79       return "uint32";
80     case FieldDescriptorProto::TYPE_INT64:
81       return "int64";
82     case FieldDescriptorProto::TYPE_SINT64:
83       return "sint64";
84     case FieldDescriptorProto::TYPE_INT32:
85       return "int32";
86     case FieldDescriptorProto::TYPE_SINT32:
87       return "sint32";
88     case FieldDescriptorProto::TYPE_FIXED64:
89       return "fixed64";
90     case FieldDescriptorProto::TYPE_SFIXED64:
91       return "sfixed64";
92     case FieldDescriptorProto::TYPE_FIXED32:
93       return "fixed32";
94     case FieldDescriptorProto::TYPE_SFIXED32:
95       return "sfixed32";
96     case FieldDescriptorProto::TYPE_DOUBLE:
97       return "double";
98     case FieldDescriptorProto::TYPE_FLOAT:
99       return "float";
100     case FieldDescriptorProto::TYPE_BOOL:
101       return "bool";
102     case FieldDescriptorProto::TYPE_STRING:
103       return "string";
104     case FieldDescriptorProto::TYPE_BYTES:
105       return "bytes";
106     case FieldDescriptorProto::TYPE_GROUP:
107       return "group";
108     case FieldDescriptorProto::TYPE_MESSAGE:
109       return "message";
110     case FieldDescriptorProto::TYPE_ENUM:
111       return "enum";
112   }
113   // For gcc
114   PERFETTO_FATAL("Non complete switch");
115 }
116 
Format(const char * fmt,const std::map<std::string,std::string> & args)117 std::string Format(const char* fmt,
118                    const std::map<std::string, std::string>& args) {
119   std::string result(fmt);
120   for (const auto& key_value : args) {
121     size_t start = result.find(key_value.first);
122     PERFETTO_CHECK(start != std::string::npos);
123     result.replace(start, key_value.first.size(), key_value.second);
124     PERFETTO_CHECK(result.find(key_value.first) == std::string::npos);
125   }
126   return result;
127 }
128 
129 enum ParseState {
130   kWaitingForKey,
131   kReadingKey,
132   kWaitingForValue,
133   kReadingStringValue,
134   kReadingStringEscape,
135   kReadingNumericValue,
136   kReadingIdentifierValue,
137 };
138 
139 struct Token {
140   size_t offset;
141   size_t column;
142   size_t row;
143   perfetto::base::StringView txt;
144 
sizeprotozero::__anon9a85fc900111::Token145   size_t size() const { return txt.size(); }
ToStdStringprotozero::__anon9a85fc900111::Token146   std::string ToStdString() const { return txt.ToStdString(); }
147 };
148 
149 struct ParserDelegateContext {
150   const DescriptorProto* descriptor;
151   protozero::Message* message;
152   std::set<std::string> seen_fields;
153 };
154 
155 class ErrorReporter {
156  public:
ErrorReporter(std::string file_name,std::string_view config)157   ErrorReporter(std::string file_name, std::string_view config)
158       : file_name_(std::move(file_name)), config_(config) {}
159 
AddError(size_t row,size_t column,size_t length,const std::string & message)160   void AddError(size_t row,
161                 size_t column,
162                 size_t length,
163                 const std::string& message) {
164     // Protobuf uses 1-indexed for row and column. Although in some rare cases
165     // they can be 0 if it can't locate the error.
166     row = row > 0 ? row - 1 : 0;
167     column = column > 0 ? column - 1 : 0;
168     parsed_successfully_ = false;
169     std::string line = ExtractLine(row).ToStdString();
170     if (!line.empty() && line[line.length() - 1] == '\n') {
171       line.erase(line.length() - 1);
172     }
173 
174     std::string guide(column + length, ' ');
175     for (size_t i = column; i < column + length; i++) {
176       guide[i] = i == column ? '^' : '~';
177     }
178     error_ += file_name_ + ":" + std::to_string(row + 1) + ":" +
179               std::to_string(column + 1) + " error: " + message + "\n";
180     error_ += line + "\n";
181     error_ += guide + "\n";
182   }
183 
success() const184   bool success() const { return parsed_successfully_; }
error() const185   const std::string& error() const { return error_; }
186 
187  private:
ExtractLine(size_t line)188   perfetto::base::StringView ExtractLine(size_t line) {
189     const char* start = config_.data();
190     const char* end = config_.data();
191 
192     for (size_t i = 0; i < line + 1; i++) {
193       start = end;
194       char c;
195       while ((c = *end++) && c != '\n')
196         ;
197     }
198     return {start, static_cast<size_t>(end - start)};
199   }
200 
201   bool parsed_successfully_ = true;
202   std::string file_name_;
203   std::string error_;
204   std::string_view config_;
205 };
206 
207 class ParserDelegate {
208  public:
ParserDelegate(const DescriptorProto * descriptor,protozero::Message * message,ErrorReporter * reporter,std::map<std::string,const DescriptorProto * > name_to_descriptor,std::map<std::string,const EnumDescriptorProto * > name_to_enum)209   ParserDelegate(
210       const DescriptorProto* descriptor,
211       protozero::Message* message,
212       ErrorReporter* reporter,
213       std::map<std::string, const DescriptorProto*> name_to_descriptor,
214       std::map<std::string, const EnumDescriptorProto*> name_to_enum)
215       : reporter_(reporter),
216         name_to_descriptor_(std::move(name_to_descriptor)),
217         name_to_enum_(std::move(name_to_enum)) {
218     ctx_.push(ParserDelegateContext{descriptor, message, {}});
219   }
220 
NumericField(const Token & key,const Token & value)221   void NumericField(const Token& key, const Token& value) {
222     const FieldDescriptorProto* field =
223         FindFieldByName(key, value,
224                         {
225                             FieldDescriptorProto::TYPE_UINT64,
226                             FieldDescriptorProto::TYPE_UINT32,
227                             FieldDescriptorProto::TYPE_INT64,
228                             FieldDescriptorProto::TYPE_SINT64,
229                             FieldDescriptorProto::TYPE_INT32,
230                             FieldDescriptorProto::TYPE_SINT32,
231                             FieldDescriptorProto::TYPE_FIXED64,
232                             FieldDescriptorProto::TYPE_SFIXED64,
233                             FieldDescriptorProto::TYPE_FIXED32,
234                             FieldDescriptorProto::TYPE_SFIXED32,
235                             FieldDescriptorProto::TYPE_DOUBLE,
236                             FieldDescriptorProto::TYPE_FLOAT,
237                         });
238     if (!field)
239       return;
240     const auto& field_type = field->type();
241     switch (field_type) {
242       case FieldDescriptorProto::TYPE_UINT64:
243         return VarIntField<uint64_t>(field, value);
244       case FieldDescriptorProto::TYPE_UINT32:
245         return VarIntField<uint32_t>(field, value);
246       case FieldDescriptorProto::TYPE_INT64:
247       case FieldDescriptorProto::TYPE_SINT64:
248         return VarIntField<int64_t>(field, value);
249       case FieldDescriptorProto::TYPE_INT32:
250       case FieldDescriptorProto::TYPE_SINT32:
251         return VarIntField<int32_t>(field, value);
252 
253       case FieldDescriptorProto::TYPE_FIXED64:
254       case FieldDescriptorProto::TYPE_SFIXED64:
255         return FixedField<int64_t>(field, value);
256 
257       case FieldDescriptorProto::TYPE_FIXED32:
258       case FieldDescriptorProto::TYPE_SFIXED32:
259         return FixedField<int32_t>(field, value);
260 
261       case FieldDescriptorProto::TYPE_DOUBLE:
262         return FixedFloatField<double>(field, value);
263       case FieldDescriptorProto::TYPE_FLOAT:
264         return FixedFloatField<float>(field, value);
265 
266       case FieldDescriptorProto::TYPE_BOOL:
267       case FieldDescriptorProto::TYPE_STRING:
268       case FieldDescriptorProto::TYPE_BYTES:
269       case FieldDescriptorProto::TYPE_GROUP:
270       case FieldDescriptorProto::TYPE_MESSAGE:
271       case FieldDescriptorProto::TYPE_ENUM:
272         PERFETTO_FATAL("Invalid type");
273     }
274   }
275 
StringField(const Token & key,const Token & value)276   void StringField(const Token& key, const Token& value) {
277     const FieldDescriptorProto* field =
278         FindFieldByName(key, value,
279                         {
280                             FieldDescriptorProto::TYPE_STRING,
281                             FieldDescriptorProto::TYPE_BYTES,
282                         });
283     if (!field)
284       return;
285     auto field_id = static_cast<uint32_t>(field->number());
286     const auto& field_type = field->type();
287     PERFETTO_CHECK(field_type == FieldDescriptorProto::TYPE_STRING ||
288                    field_type == FieldDescriptorProto::TYPE_BYTES);
289 
290     std::unique_ptr<char, perfetto::base::FreeDeleter> s(
291         static_cast<char*>(malloc(value.size())));
292     size_t j = 0;
293     const char* const txt = value.txt.data();
294     for (size_t i = 0; i < value.size(); i++) {
295       char c = txt[i];
296       if (c == '\\') {
297         if (i + 1 >= value.size()) {
298           // This should be caught by the lexer.
299           PERFETTO_FATAL("Escape at end of string.");
300           return;
301         }
302         char next = txt[++i];
303         switch (next) {
304           case '\\':
305           case '\'':
306           case '"':
307           case '?':
308             s.get()[j++] = next;
309             break;
310           case 'a':
311             s.get()[j++] = '\a';
312             break;
313           case 'b':
314             s.get()[j++] = '\b';
315             break;
316           case 'f':
317             s.get()[j++] = '\f';
318             break;
319           case 'n':
320             s.get()[j++] = '\n';
321             break;
322           case 'r':
323             s.get()[j++] = '\r';
324             break;
325           case 't':
326             s.get()[j++] = '\t';
327             break;
328           case 'v':
329             s.get()[j++] = '\v';
330             break;
331           case '0':
332           case '1':
333           case '2':
334           case '3':
335           case '4':
336           case '5':
337           case '6':
338           case '7':
339           case '8':
340           case '9': {
341             // Cases 8 and 9 are not really required and are only added for the
342             // sake of error reporting.
343             bool oct_err = false;
344             if (i + 2 >= value.size() || !IsOct(txt[i + 1]) ||
345                 !IsOct(txt[i + 2])) {
346               oct_err = true;
347             } else {
348               char buf[4]{next, txt[++i], txt[++i], '\0'};
349               auto octval = perfetto::base::CStringToUInt32(buf, 8);
350               if (!octval.has_value() || *octval > 0xff) {
351                 oct_err = true;
352               } else {
353                 s.get()[j++] = static_cast<char>(static_cast<uint8_t>(*octval));
354               }
355             }
356             if (oct_err) {
357               AddError(value,
358                        "Malformed string escape in $k in proto $n on '$v'. "
359                        "\\NNN escapes must be exactly three octal digits <= "
360                        "\\377 (0xff).",
361                        std::map<std::string, std::string>{
362                            {"$k", key.ToStdString()},
363                            {"$n", descriptor_name()},
364                            {"$v", value.ToStdString()},
365                        });
366             }
367             break;
368           }
369           default:
370             AddError(value,
371                      "Unknown string escape in $k in "
372                      "proto $n: '$v'",
373                      std::map<std::string, std::string>{
374                          {"$k", key.ToStdString()},
375                          {"$n", descriptor_name()},
376                          {"$v", value.ToStdString()},
377                      });
378             return;
379         }
380       } else {
381         s.get()[j++] = c;
382       }
383     }
384     msg()->AppendBytes(field_id, s.get(), j);
385   }
386 
IdentifierField(const Token & key,const Token & value)387   void IdentifierField(const Token& key, const Token& value) {
388     const FieldDescriptorProto* field =
389         FindFieldByName(key, value,
390                         {
391                             FieldDescriptorProto::TYPE_BOOL,
392                             FieldDescriptorProto::TYPE_ENUM,
393                         });
394     if (!field)
395       return;
396     uint32_t field_id = static_cast<uint32_t>(field->number());
397     const auto& field_type = field->type();
398     if (field_type == FieldDescriptorProto::TYPE_BOOL) {
399       if (value.txt != "true" && value.txt != "false") {
400         AddError(value,
401                  "Expected 'true' or 'false' for boolean field $k in "
402                  "proto $n instead saw '$v'",
403                  std::map<std::string, std::string>{
404                      {"$k", key.ToStdString()},
405                      {"$n", descriptor_name()},
406                      {"$v", value.ToStdString()},
407                  });
408         return;
409       }
410       msg()->AppendTinyVarInt(field_id, value.txt == "true" ? 1 : 0);
411     } else if (field_type == FieldDescriptorProto::TYPE_ENUM) {
412       const std::string& type_name = field->type_name();
413       const EnumDescriptorProto* enum_descriptor = name_to_enum_[type_name];
414       PERFETTO_CHECK(enum_descriptor);
415       bool found_value = false;
416       int32_t enum_value_number = 0;
417       for (const EnumValueDescriptorProto& enum_value :
418            enum_descriptor->value()) {
419         if (value.ToStdString() != enum_value.name())
420           continue;
421         found_value = true;
422         enum_value_number = enum_value.number();
423         break;
424       }
425       if (!found_value) {
426         AddError(value,
427                  "Unexpected value '$v' for enum field $k in "
428                  "proto $n",
429                  std::map<std::string, std::string>{
430                      {"$v", value.ToStdString()},
431                      {"$k", key.ToStdString()},
432                      {"$n", descriptor_name()},
433                  });
434         return;
435       }
436       msg()->AppendVarInt<int32_t>(field_id, enum_value_number);
437     }
438   }
439 
BeginNestedMessage(const Token & key,const Token & value)440   bool BeginNestedMessage(const Token& key, const Token& value) {
441     const FieldDescriptorProto* field =
442         FindFieldByName(key, value,
443                         {
444                             FieldDescriptorProto::TYPE_MESSAGE,
445                         });
446     if (!field) {
447       // FindFieldByName adds an error.
448       return false;
449     }
450     uint32_t field_id = static_cast<uint32_t>(field->number());
451     const std::string& type_name = field->type_name();
452     const DescriptorProto* nested_descriptor = name_to_descriptor_[type_name];
453     PERFETTO_CHECK(nested_descriptor);
454     auto* nested_msg = msg()->BeginNestedMessage<protozero::Message>(field_id);
455     ctx_.push(ParserDelegateContext{nested_descriptor, nested_msg, {}});
456     return true;
457   }
458 
EndNestedMessage()459   void EndNestedMessage() {
460     msg()->Finalize();
461     ctx_.pop();
462   }
463 
Eof()464   void Eof() {}
465 
AddError(size_t row,size_t column,const char * fmt,const std::map<std::string,std::string> & args)466   void AddError(size_t row,
467                 size_t column,
468                 const char* fmt,
469                 const std::map<std::string, std::string>& args) {
470     reporter_->AddError(row, column, 0, Format(fmt, args));
471   }
472 
AddError(const Token & token,const char * fmt,const std::map<std::string,std::string> & args)473   void AddError(const Token& token,
474                 const char* fmt,
475                 const std::map<std::string, std::string>& args) {
476     reporter_->AddError(token.row, token.column, token.size(),
477                         Format(fmt, args));
478   }
479 
480  private:
481   template <typename T>
VarIntField(const FieldDescriptorProto * field,Token t)482   void VarIntField(const FieldDescriptorProto* field, Token t) {
483     auto field_id = static_cast<uint32_t>(field->number());
484     uint64_t n = 0;
485     PERFETTO_CHECK(ParseInteger(t.txt, &n));
486     if (field->type() == FieldDescriptorProto::TYPE_SINT64 ||
487         field->type() == FieldDescriptorProto::TYPE_SINT32) {
488       msg()->AppendSignedVarInt<T>(field_id, static_cast<T>(n));
489     } else {
490       msg()->AppendVarInt<T>(field_id, static_cast<T>(n));
491     }
492   }
493 
494   template <typename T>
FixedField(const FieldDescriptorProto * field,const Token & t)495   void FixedField(const FieldDescriptorProto* field, const Token& t) {
496     uint32_t field_id = static_cast<uint32_t>(field->number());
497     uint64_t n = 0;
498     PERFETTO_CHECK(ParseInteger(t.txt, &n));
499     msg()->AppendFixed<T>(field_id, static_cast<T>(n));
500   }
501 
502   template <typename T>
FixedFloatField(const FieldDescriptorProto * field,const Token & t)503   void FixedFloatField(const FieldDescriptorProto* field, const Token& t) {
504     uint32_t field_id = static_cast<uint32_t>(field->number());
505     std::optional<double> opt_n =
506         perfetto::base::StringToDouble(t.ToStdString());
507     msg()->AppendFixed<T>(field_id, static_cast<T>(opt_n.value_or(0l)));
508   }
509 
510   template <typename T>
ParseInteger(perfetto::base::StringView s,T * number_ptr)511   bool ParseInteger(perfetto::base::StringView s, T* number_ptr) {
512     uint64_t n = 0;
513     PERFETTO_CHECK(sscanf(s.ToStdString().c_str(), "%" PRIu64, &n) == 1);
514     PERFETTO_CHECK(n <= std::numeric_limits<T>::max());
515     *number_ptr = static_cast<T>(n);
516     return true;
517   }
518 
FindFieldByName(const Token & key,const Token & value,const std::set<FieldDescriptorProto::Type> & valid_field_types)519   const FieldDescriptorProto* FindFieldByName(
520       const Token& key,
521       const Token& value,
522       const std::set<FieldDescriptorProto::Type>& valid_field_types) {
523     const std::string field_name = key.ToStdString();
524     const FieldDescriptorProto* field_descriptor = nullptr;
525     for (const auto& f : descriptor()->field()) {
526       if (f.name() == field_name) {
527         field_descriptor = &f;
528         break;
529       }
530     }
531 
532     if (!field_descriptor) {
533       AddError(key, "No field named \"$n\" in proto $p",
534                {
535                    {"$n", field_name},
536                    {"$p", descriptor_name()},
537                });
538       return nullptr;
539     }
540 
541     bool is_repeated =
542         field_descriptor->label() == FieldDescriptorProto::LABEL_REPEATED;
543     auto it_and_inserted = ctx_.top().seen_fields.emplace(field_name);
544     if (!it_and_inserted.second && !is_repeated) {
545       AddError(key, "Saw non-repeating field '$f' more than once",
546                {
547                    {"$f", field_name},
548                });
549     }
550 
551     if (!valid_field_types.count(field_descriptor->type())) {
552       AddError(value,
553                "Expected value of type $t for field $k in proto $n "
554                "instead saw '$v'",
555                {
556                    {"$t", FieldToTypeName(field_descriptor)},
557                    {"$k", field_name},
558                    {"$n", descriptor_name()},
559                    {"$v", value.ToStdString()},
560                });
561       return nullptr;
562     }
563 
564     return field_descriptor;
565   }
566 
descriptor()567   const DescriptorProto* descriptor() {
568     PERFETTO_CHECK(!ctx_.empty());
569     return ctx_.top().descriptor;
570   }
571 
descriptor_name()572   const std::string& descriptor_name() { return descriptor()->name(); }
573 
msg()574   protozero::Message* msg() {
575     PERFETTO_CHECK(!ctx_.empty());
576     return ctx_.top().message;
577   }
578 
579   std::stack<ParserDelegateContext> ctx_;
580   ErrorReporter* reporter_;
581   std::map<std::string, const DescriptorProto*> name_to_descriptor_;
582   std::map<std::string, const EnumDescriptorProto*> name_to_enum_;
583 };
584 
Parse(std::string_view input,ParserDelegate * delegate)585 void Parse(std::string_view input, ParserDelegate* delegate) {
586   ParseState state = kWaitingForKey;
587   size_t column = 0;
588   size_t row = 1;
589   size_t depth = 0;
590   bool saw_colon_for_this_key = false;
591   bool saw_semicolon_for_this_value = true;
592   bool comment_till_eol = false;
593   Token key{};
594   Token value{};
595 
596   for (size_t i = 0; i < input.size(); i++, column++) {
597     bool last_character = i + 1 == input.size();
598     char c = input.at(i);
599     if (c == '\n') {
600       column = 0;
601       row++;
602       if (comment_till_eol) {
603         comment_till_eol = false;
604         continue;
605       }
606     }
607     if (comment_till_eol)
608       continue;
609 
610     switch (state) {
611       case kWaitingForKey:
612         if (isspace(c))
613           continue;
614         if (c == '#') {
615           comment_till_eol = true;
616           continue;
617         }
618         if (c == '}') {
619           if (depth == 0) {
620             delegate->AddError(row, column, "Unmatched closing brace", {});
621             return;
622           }
623           saw_semicolon_for_this_value = false;
624           depth--;
625           delegate->EndNestedMessage();
626           continue;
627         }
628         if (!saw_semicolon_for_this_value && c == ';') {
629           saw_semicolon_for_this_value = true;
630           continue;
631         }
632         if (IsIdentifierStart(c)) {
633           saw_colon_for_this_key = false;
634           state = kReadingKey;
635           key.offset = i;
636           key.row = row;
637           key.column = column;
638           continue;
639         }
640         break;
641 
642       case kReadingKey:
643         if (IsIdentifierBody(c))
644           continue;
645         key.txt = perfetto::base::StringView(input.data() + key.offset,
646                                              i - key.offset);
647         state = kWaitingForValue;
648         if (c == '#')
649           comment_till_eol = true;
650         continue;
651 
652       case kWaitingForValue:
653         if (isspace(c))
654           continue;
655         if (c == '#') {
656           comment_till_eol = true;
657           continue;
658         }
659         value.offset = i;
660         value.row = row;
661         value.column = column;
662 
663         if (c == ':' && !saw_colon_for_this_key) {
664           saw_colon_for_this_key = true;
665           continue;
666         }
667         if (c == '"') {
668           state = kReadingStringValue;
669           continue;
670         }
671         if (c == '-' || IsDigit(c) || c == '.') {
672           state = kReadingNumericValue;
673           continue;
674         }
675         if (IsIdentifierStart(c)) {
676           state = kReadingIdentifierValue;
677           continue;
678         }
679         if (c == '{') {
680           state = kWaitingForKey;
681           depth++;
682           value.txt =
683               perfetto::base::StringView(input.data() + value.offset, 1);
684           if (!delegate->BeginNestedMessage(key, value)) {
685             return;
686           }
687           continue;
688         }
689         break;
690 
691       case kReadingNumericValue:
692         if (isspace(c) || c == ';' || last_character) {
693           bool keep_last = last_character && !isspace(c) && c != ';';
694           size_t size = i - value.offset + (keep_last ? 1 : 0);
695           value.txt =
696               perfetto::base::StringView(input.data() + value.offset, size);
697           saw_semicolon_for_this_value = c == ';';
698           state = kWaitingForKey;
699           delegate->NumericField(key, value);
700           continue;
701         }
702         if (IsDigit(c) || c == '.')
703           continue;
704         break;
705 
706       case kReadingStringValue:
707         if (c == '\\') {
708           state = kReadingStringEscape;
709         } else if (c == '"') {
710           size_t size = i - value.offset - 1;
711           value.column++;
712           value.txt =
713               perfetto::base::StringView(input.data() + value.offset + 1, size);
714           saw_semicolon_for_this_value = false;
715           state = kWaitingForKey;
716           delegate->StringField(key, value);
717         }
718         continue;
719 
720       case kReadingStringEscape:
721         state = kReadingStringValue;
722         continue;
723 
724       case kReadingIdentifierValue:
725         if (isspace(c) || c == ';' || c == '#' || last_character) {
726           bool keep_last =
727               last_character && !isspace(c) && c != ';' && c != '#';
728           size_t size = i - value.offset + (keep_last ? 1 : 0);
729           value.txt =
730               perfetto::base::StringView(input.data() + value.offset, size);
731           comment_till_eol = c == '#';
732           saw_semicolon_for_this_value = c == ';';
733           state = kWaitingForKey;
734           delegate->IdentifierField(key, value);
735           continue;
736         }
737         if (IsIdentifierBody(c)) {
738           continue;
739         }
740         break;
741     }
742     delegate->AddError(row, column, "Unexpected character '$c'",
743                        std::map<std::string, std::string>{
744                            {"$c", std::string(1, c)},
745                        });
746     return;
747   }  // for
748   if (depth > 0)
749     delegate->AddError(row, column, "Nested message not closed", {});
750   if (state != kWaitingForKey)
751     delegate->AddError(row, column, "Unexpected end of input", {});
752   delegate->Eof();
753 }
754 
AddNestedDescriptors(const std::string & prefix,const DescriptorProto * descriptor,std::map<std::string,const DescriptorProto * > * name_to_descriptor,std::map<std::string,const EnumDescriptorProto * > * name_to_enum)755 void AddNestedDescriptors(
756     const std::string& prefix,
757     const DescriptorProto* descriptor,
758     std::map<std::string, const DescriptorProto*>* name_to_descriptor,
759     std::map<std::string, const EnumDescriptorProto*>* name_to_enum) {
760   for (const EnumDescriptorProto& enum_descriptor : descriptor->enum_type()) {
761     const std::string name = prefix + "." + enum_descriptor.name();
762     (*name_to_enum)[name] = &enum_descriptor;
763   }
764   for (const DescriptorProto& nested_descriptor : descriptor->nested_type()) {
765     const std::string name = prefix + "." + nested_descriptor.name();
766     (*name_to_descriptor)[name] = &nested_descriptor;
767     AddNestedDescriptors(name, &nested_descriptor, name_to_descriptor,
768                          name_to_enum);
769   }
770 }
771 
772 }  // namespace
773 
TextToProto(const uint8_t * descriptor_set_ptr,size_t descriptor_set_size,const std::string & root_type,const std::string & file_name,std::string_view input)774 perfetto::base::StatusOr<std::vector<uint8_t>> TextToProto(
775     const uint8_t* descriptor_set_ptr,
776     size_t descriptor_set_size,
777     const std::string& root_type,
778     const std::string& file_name,
779     std::string_view input) {
780   std::map<std::string, const DescriptorProto*> name_to_descriptor;
781   std::map<std::string, const EnumDescriptorProto*> name_to_enum;
782   FileDescriptorSet file_descriptor_set;
783 
784   {
785     file_descriptor_set.ParseFromArray(descriptor_set_ptr, descriptor_set_size);
786     for (const auto& file_descriptor : file_descriptor_set.file()) {
787       for (const auto& enum_descriptor : file_descriptor.enum_type()) {
788         const std::string name =
789             "." + file_descriptor.package() + "." + enum_descriptor.name();
790         name_to_enum[name] = &enum_descriptor;
791       }
792       for (const auto& descriptor : file_descriptor.message_type()) {
793         const std::string name =
794             "." + file_descriptor.package() + "." + descriptor.name();
795         name_to_descriptor[name] = &descriptor;
796         AddNestedDescriptors(name, &descriptor, &name_to_descriptor,
797                              &name_to_enum);
798       }
799     }
800   }
801 
802   const DescriptorProto* descriptor = name_to_descriptor[root_type];
803   PERFETTO_CHECK(descriptor);
804 
805   protozero::HeapBuffered<protozero::Message> message;
806   ErrorReporter reporter(file_name, input);
807   ParserDelegate delegate(descriptor, message.get(), &reporter,
808                           std::move(name_to_descriptor),
809                           std::move(name_to_enum));
810   Parse(input, &delegate);
811   if (!reporter.success())
812     return perfetto::base::ErrStatus("%s", reporter.error().c_str());
813   return message.SerializeAsArray();
814 }
815 
816 }  // namespace protozero
817