1 /*
2 * Copyright (C) 2025 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "src/protozero/text_to_proto/text_to_proto.h"
18
19 #include <cctype>
20 #include <cinttypes>
21 #include <cstddef>
22 #include <cstdint>
23 #include <cstdio>
24 #include <cstdlib>
25 #include <limits>
26 #include <map>
27 #include <memory>
28 #include <optional>
29 #include <set>
30 #include <stack>
31 #include <string>
32 #include <string_view>
33 #include <utility>
34 #include <vector>
35
36 #include "perfetto/base/logging.h"
37 #include "perfetto/base/status.h"
38 #include "perfetto/ext/base/status_or.h"
39 #include "perfetto/ext/base/string_utils.h"
40 #include "perfetto/ext/base/string_view.h"
41 #include "perfetto/ext/base/utils.h"
42 #include "perfetto/protozero/message.h"
43 #include "perfetto/protozero/message_handle.h"
44 #include "perfetto/protozero/scattered_heap_buffer.h"
45
46 #include "protos/perfetto/common/descriptor.gen.h"
47
48 namespace protozero {
49
50 using perfetto::protos::gen::DescriptorProto;
51 using perfetto::protos::gen::EnumDescriptorProto;
52 using perfetto::protos::gen::EnumValueDescriptorProto;
53 using perfetto::protos::gen::FieldDescriptorProto;
54 using perfetto::protos::gen::FileDescriptorSet;
55
56 namespace {
57
IsOct(char c)58 constexpr bool IsOct(char c) {
59 return (c >= '0' && c <= '7');
60 }
61
IsDigit(char c)62 constexpr bool IsDigit(char c) {
63 return (c >= '0' && c <= '9');
64 }
65
IsIdentifierStart(char c)66 constexpr bool IsIdentifierStart(char c) {
67 return ('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z') || c == '_';
68 }
69
IsIdentifierBody(char c)70 constexpr bool IsIdentifierBody(char c) {
71 return IsIdentifierStart(c) || IsDigit(c);
72 }
73
FieldToTypeName(const FieldDescriptorProto * field)74 const char* FieldToTypeName(const FieldDescriptorProto* field) {
75 switch (field->type()) {
76 case FieldDescriptorProto::TYPE_UINT64:
77 return "uint64";
78 case FieldDescriptorProto::TYPE_UINT32:
79 return "uint32";
80 case FieldDescriptorProto::TYPE_INT64:
81 return "int64";
82 case FieldDescriptorProto::TYPE_SINT64:
83 return "sint64";
84 case FieldDescriptorProto::TYPE_INT32:
85 return "int32";
86 case FieldDescriptorProto::TYPE_SINT32:
87 return "sint32";
88 case FieldDescriptorProto::TYPE_FIXED64:
89 return "fixed64";
90 case FieldDescriptorProto::TYPE_SFIXED64:
91 return "sfixed64";
92 case FieldDescriptorProto::TYPE_FIXED32:
93 return "fixed32";
94 case FieldDescriptorProto::TYPE_SFIXED32:
95 return "sfixed32";
96 case FieldDescriptorProto::TYPE_DOUBLE:
97 return "double";
98 case FieldDescriptorProto::TYPE_FLOAT:
99 return "float";
100 case FieldDescriptorProto::TYPE_BOOL:
101 return "bool";
102 case FieldDescriptorProto::TYPE_STRING:
103 return "string";
104 case FieldDescriptorProto::TYPE_BYTES:
105 return "bytes";
106 case FieldDescriptorProto::TYPE_GROUP:
107 return "group";
108 case FieldDescriptorProto::TYPE_MESSAGE:
109 return "message";
110 case FieldDescriptorProto::TYPE_ENUM:
111 return "enum";
112 }
113 // For gcc
114 PERFETTO_FATAL("Non complete switch");
115 }
116
Format(const char * fmt,const std::map<std::string,std::string> & args)117 std::string Format(const char* fmt,
118 const std::map<std::string, std::string>& args) {
119 std::string result(fmt);
120 for (const auto& key_value : args) {
121 size_t start = result.find(key_value.first);
122 PERFETTO_CHECK(start != std::string::npos);
123 result.replace(start, key_value.first.size(), key_value.second);
124 PERFETTO_CHECK(result.find(key_value.first) == std::string::npos);
125 }
126 return result;
127 }
128
129 enum ParseState {
130 kWaitingForKey,
131 kReadingKey,
132 kWaitingForValue,
133 kReadingStringValue,
134 kReadingStringEscape,
135 kReadingNumericValue,
136 kReadingIdentifierValue,
137 };
138
139 struct Token {
140 size_t offset;
141 size_t column;
142 size_t row;
143 perfetto::base::StringView txt;
144
sizeprotozero::__anon9a85fc900111::Token145 size_t size() const { return txt.size(); }
ToStdStringprotozero::__anon9a85fc900111::Token146 std::string ToStdString() const { return txt.ToStdString(); }
147 };
148
149 struct ParserDelegateContext {
150 const DescriptorProto* descriptor;
151 protozero::Message* message;
152 std::set<std::string> seen_fields;
153 };
154
155 class ErrorReporter {
156 public:
ErrorReporter(std::string file_name,std::string_view config)157 ErrorReporter(std::string file_name, std::string_view config)
158 : file_name_(std::move(file_name)), config_(config) {}
159
AddError(size_t row,size_t column,size_t length,const std::string & message)160 void AddError(size_t row,
161 size_t column,
162 size_t length,
163 const std::string& message) {
164 // Protobuf uses 1-indexed for row and column. Although in some rare cases
165 // they can be 0 if it can't locate the error.
166 row = row > 0 ? row - 1 : 0;
167 column = column > 0 ? column - 1 : 0;
168 parsed_successfully_ = false;
169 std::string line = ExtractLine(row).ToStdString();
170 if (!line.empty() && line[line.length() - 1] == '\n') {
171 line.erase(line.length() - 1);
172 }
173
174 std::string guide(column + length, ' ');
175 for (size_t i = column; i < column + length; i++) {
176 guide[i] = i == column ? '^' : '~';
177 }
178 error_ += file_name_ + ":" + std::to_string(row + 1) + ":" +
179 std::to_string(column + 1) + " error: " + message + "\n";
180 error_ += line + "\n";
181 error_ += guide + "\n";
182 }
183
success() const184 bool success() const { return parsed_successfully_; }
error() const185 const std::string& error() const { return error_; }
186
187 private:
ExtractLine(size_t line)188 perfetto::base::StringView ExtractLine(size_t line) {
189 const char* start = config_.data();
190 const char* end = config_.data();
191
192 for (size_t i = 0; i < line + 1; i++) {
193 start = end;
194 char c;
195 while ((c = *end++) && c != '\n')
196 ;
197 }
198 return {start, static_cast<size_t>(end - start)};
199 }
200
201 bool parsed_successfully_ = true;
202 std::string file_name_;
203 std::string error_;
204 std::string_view config_;
205 };
206
207 class ParserDelegate {
208 public:
ParserDelegate(const DescriptorProto * descriptor,protozero::Message * message,ErrorReporter * reporter,std::map<std::string,const DescriptorProto * > name_to_descriptor,std::map<std::string,const EnumDescriptorProto * > name_to_enum)209 ParserDelegate(
210 const DescriptorProto* descriptor,
211 protozero::Message* message,
212 ErrorReporter* reporter,
213 std::map<std::string, const DescriptorProto*> name_to_descriptor,
214 std::map<std::string, const EnumDescriptorProto*> name_to_enum)
215 : reporter_(reporter),
216 name_to_descriptor_(std::move(name_to_descriptor)),
217 name_to_enum_(std::move(name_to_enum)) {
218 ctx_.push(ParserDelegateContext{descriptor, message, {}});
219 }
220
NumericField(const Token & key,const Token & value)221 void NumericField(const Token& key, const Token& value) {
222 const FieldDescriptorProto* field =
223 FindFieldByName(key, value,
224 {
225 FieldDescriptorProto::TYPE_UINT64,
226 FieldDescriptorProto::TYPE_UINT32,
227 FieldDescriptorProto::TYPE_INT64,
228 FieldDescriptorProto::TYPE_SINT64,
229 FieldDescriptorProto::TYPE_INT32,
230 FieldDescriptorProto::TYPE_SINT32,
231 FieldDescriptorProto::TYPE_FIXED64,
232 FieldDescriptorProto::TYPE_SFIXED64,
233 FieldDescriptorProto::TYPE_FIXED32,
234 FieldDescriptorProto::TYPE_SFIXED32,
235 FieldDescriptorProto::TYPE_DOUBLE,
236 FieldDescriptorProto::TYPE_FLOAT,
237 });
238 if (!field)
239 return;
240 const auto& field_type = field->type();
241 switch (field_type) {
242 case FieldDescriptorProto::TYPE_UINT64:
243 return VarIntField<uint64_t>(field, value);
244 case FieldDescriptorProto::TYPE_UINT32:
245 return VarIntField<uint32_t>(field, value);
246 case FieldDescriptorProto::TYPE_INT64:
247 case FieldDescriptorProto::TYPE_SINT64:
248 return VarIntField<int64_t>(field, value);
249 case FieldDescriptorProto::TYPE_INT32:
250 case FieldDescriptorProto::TYPE_SINT32:
251 return VarIntField<int32_t>(field, value);
252
253 case FieldDescriptorProto::TYPE_FIXED64:
254 case FieldDescriptorProto::TYPE_SFIXED64:
255 return FixedField<int64_t>(field, value);
256
257 case FieldDescriptorProto::TYPE_FIXED32:
258 case FieldDescriptorProto::TYPE_SFIXED32:
259 return FixedField<int32_t>(field, value);
260
261 case FieldDescriptorProto::TYPE_DOUBLE:
262 return FixedFloatField<double>(field, value);
263 case FieldDescriptorProto::TYPE_FLOAT:
264 return FixedFloatField<float>(field, value);
265
266 case FieldDescriptorProto::TYPE_BOOL:
267 case FieldDescriptorProto::TYPE_STRING:
268 case FieldDescriptorProto::TYPE_BYTES:
269 case FieldDescriptorProto::TYPE_GROUP:
270 case FieldDescriptorProto::TYPE_MESSAGE:
271 case FieldDescriptorProto::TYPE_ENUM:
272 PERFETTO_FATAL("Invalid type");
273 }
274 }
275
StringField(const Token & key,const Token & value)276 void StringField(const Token& key, const Token& value) {
277 const FieldDescriptorProto* field =
278 FindFieldByName(key, value,
279 {
280 FieldDescriptorProto::TYPE_STRING,
281 FieldDescriptorProto::TYPE_BYTES,
282 });
283 if (!field)
284 return;
285 auto field_id = static_cast<uint32_t>(field->number());
286 const auto& field_type = field->type();
287 PERFETTO_CHECK(field_type == FieldDescriptorProto::TYPE_STRING ||
288 field_type == FieldDescriptorProto::TYPE_BYTES);
289
290 std::unique_ptr<char, perfetto::base::FreeDeleter> s(
291 static_cast<char*>(malloc(value.size())));
292 size_t j = 0;
293 const char* const txt = value.txt.data();
294 for (size_t i = 0; i < value.size(); i++) {
295 char c = txt[i];
296 if (c == '\\') {
297 if (i + 1 >= value.size()) {
298 // This should be caught by the lexer.
299 PERFETTO_FATAL("Escape at end of string.");
300 return;
301 }
302 char next = txt[++i];
303 switch (next) {
304 case '\\':
305 case '\'':
306 case '"':
307 case '?':
308 s.get()[j++] = next;
309 break;
310 case 'a':
311 s.get()[j++] = '\a';
312 break;
313 case 'b':
314 s.get()[j++] = '\b';
315 break;
316 case 'f':
317 s.get()[j++] = '\f';
318 break;
319 case 'n':
320 s.get()[j++] = '\n';
321 break;
322 case 'r':
323 s.get()[j++] = '\r';
324 break;
325 case 't':
326 s.get()[j++] = '\t';
327 break;
328 case 'v':
329 s.get()[j++] = '\v';
330 break;
331 case '0':
332 case '1':
333 case '2':
334 case '3':
335 case '4':
336 case '5':
337 case '6':
338 case '7':
339 case '8':
340 case '9': {
341 // Cases 8 and 9 are not really required and are only added for the
342 // sake of error reporting.
343 bool oct_err = false;
344 if (i + 2 >= value.size() || !IsOct(txt[i + 1]) ||
345 !IsOct(txt[i + 2])) {
346 oct_err = true;
347 } else {
348 char buf[4]{next, txt[++i], txt[++i], '\0'};
349 auto octval = perfetto::base::CStringToUInt32(buf, 8);
350 if (!octval.has_value() || *octval > 0xff) {
351 oct_err = true;
352 } else {
353 s.get()[j++] = static_cast<char>(static_cast<uint8_t>(*octval));
354 }
355 }
356 if (oct_err) {
357 AddError(value,
358 "Malformed string escape in $k in proto $n on '$v'. "
359 "\\NNN escapes must be exactly three octal digits <= "
360 "\\377 (0xff).",
361 std::map<std::string, std::string>{
362 {"$k", key.ToStdString()},
363 {"$n", descriptor_name()},
364 {"$v", value.ToStdString()},
365 });
366 }
367 break;
368 }
369 default:
370 AddError(value,
371 "Unknown string escape in $k in "
372 "proto $n: '$v'",
373 std::map<std::string, std::string>{
374 {"$k", key.ToStdString()},
375 {"$n", descriptor_name()},
376 {"$v", value.ToStdString()},
377 });
378 return;
379 }
380 } else {
381 s.get()[j++] = c;
382 }
383 }
384 msg()->AppendBytes(field_id, s.get(), j);
385 }
386
IdentifierField(const Token & key,const Token & value)387 void IdentifierField(const Token& key, const Token& value) {
388 const FieldDescriptorProto* field =
389 FindFieldByName(key, value,
390 {
391 FieldDescriptorProto::TYPE_BOOL,
392 FieldDescriptorProto::TYPE_ENUM,
393 });
394 if (!field)
395 return;
396 uint32_t field_id = static_cast<uint32_t>(field->number());
397 const auto& field_type = field->type();
398 if (field_type == FieldDescriptorProto::TYPE_BOOL) {
399 if (value.txt != "true" && value.txt != "false") {
400 AddError(value,
401 "Expected 'true' or 'false' for boolean field $k in "
402 "proto $n instead saw '$v'",
403 std::map<std::string, std::string>{
404 {"$k", key.ToStdString()},
405 {"$n", descriptor_name()},
406 {"$v", value.ToStdString()},
407 });
408 return;
409 }
410 msg()->AppendTinyVarInt(field_id, value.txt == "true" ? 1 : 0);
411 } else if (field_type == FieldDescriptorProto::TYPE_ENUM) {
412 const std::string& type_name = field->type_name();
413 const EnumDescriptorProto* enum_descriptor = name_to_enum_[type_name];
414 PERFETTO_CHECK(enum_descriptor);
415 bool found_value = false;
416 int32_t enum_value_number = 0;
417 for (const EnumValueDescriptorProto& enum_value :
418 enum_descriptor->value()) {
419 if (value.ToStdString() != enum_value.name())
420 continue;
421 found_value = true;
422 enum_value_number = enum_value.number();
423 break;
424 }
425 if (!found_value) {
426 AddError(value,
427 "Unexpected value '$v' for enum field $k in "
428 "proto $n",
429 std::map<std::string, std::string>{
430 {"$v", value.ToStdString()},
431 {"$k", key.ToStdString()},
432 {"$n", descriptor_name()},
433 });
434 return;
435 }
436 msg()->AppendVarInt<int32_t>(field_id, enum_value_number);
437 }
438 }
439
BeginNestedMessage(const Token & key,const Token & value)440 bool BeginNestedMessage(const Token& key, const Token& value) {
441 const FieldDescriptorProto* field =
442 FindFieldByName(key, value,
443 {
444 FieldDescriptorProto::TYPE_MESSAGE,
445 });
446 if (!field) {
447 // FindFieldByName adds an error.
448 return false;
449 }
450 uint32_t field_id = static_cast<uint32_t>(field->number());
451 const std::string& type_name = field->type_name();
452 const DescriptorProto* nested_descriptor = name_to_descriptor_[type_name];
453 PERFETTO_CHECK(nested_descriptor);
454 auto* nested_msg = msg()->BeginNestedMessage<protozero::Message>(field_id);
455 ctx_.push(ParserDelegateContext{nested_descriptor, nested_msg, {}});
456 return true;
457 }
458
EndNestedMessage()459 void EndNestedMessage() {
460 msg()->Finalize();
461 ctx_.pop();
462 }
463
Eof()464 void Eof() {}
465
AddError(size_t row,size_t column,const char * fmt,const std::map<std::string,std::string> & args)466 void AddError(size_t row,
467 size_t column,
468 const char* fmt,
469 const std::map<std::string, std::string>& args) {
470 reporter_->AddError(row, column, 0, Format(fmt, args));
471 }
472
AddError(const Token & token,const char * fmt,const std::map<std::string,std::string> & args)473 void AddError(const Token& token,
474 const char* fmt,
475 const std::map<std::string, std::string>& args) {
476 reporter_->AddError(token.row, token.column, token.size(),
477 Format(fmt, args));
478 }
479
480 private:
481 template <typename T>
VarIntField(const FieldDescriptorProto * field,Token t)482 void VarIntField(const FieldDescriptorProto* field, Token t) {
483 auto field_id = static_cast<uint32_t>(field->number());
484 uint64_t n = 0;
485 PERFETTO_CHECK(ParseInteger(t.txt, &n));
486 if (field->type() == FieldDescriptorProto::TYPE_SINT64 ||
487 field->type() == FieldDescriptorProto::TYPE_SINT32) {
488 msg()->AppendSignedVarInt<T>(field_id, static_cast<T>(n));
489 } else {
490 msg()->AppendVarInt<T>(field_id, static_cast<T>(n));
491 }
492 }
493
494 template <typename T>
FixedField(const FieldDescriptorProto * field,const Token & t)495 void FixedField(const FieldDescriptorProto* field, const Token& t) {
496 uint32_t field_id = static_cast<uint32_t>(field->number());
497 uint64_t n = 0;
498 PERFETTO_CHECK(ParseInteger(t.txt, &n));
499 msg()->AppendFixed<T>(field_id, static_cast<T>(n));
500 }
501
502 template <typename T>
FixedFloatField(const FieldDescriptorProto * field,const Token & t)503 void FixedFloatField(const FieldDescriptorProto* field, const Token& t) {
504 uint32_t field_id = static_cast<uint32_t>(field->number());
505 std::optional<double> opt_n =
506 perfetto::base::StringToDouble(t.ToStdString());
507 msg()->AppendFixed<T>(field_id, static_cast<T>(opt_n.value_or(0l)));
508 }
509
510 template <typename T>
ParseInteger(perfetto::base::StringView s,T * number_ptr)511 bool ParseInteger(perfetto::base::StringView s, T* number_ptr) {
512 uint64_t n = 0;
513 PERFETTO_CHECK(sscanf(s.ToStdString().c_str(), "%" PRIu64, &n) == 1);
514 PERFETTO_CHECK(n <= std::numeric_limits<T>::max());
515 *number_ptr = static_cast<T>(n);
516 return true;
517 }
518
FindFieldByName(const Token & key,const Token & value,const std::set<FieldDescriptorProto::Type> & valid_field_types)519 const FieldDescriptorProto* FindFieldByName(
520 const Token& key,
521 const Token& value,
522 const std::set<FieldDescriptorProto::Type>& valid_field_types) {
523 const std::string field_name = key.ToStdString();
524 const FieldDescriptorProto* field_descriptor = nullptr;
525 for (const auto& f : descriptor()->field()) {
526 if (f.name() == field_name) {
527 field_descriptor = &f;
528 break;
529 }
530 }
531
532 if (!field_descriptor) {
533 AddError(key, "No field named \"$n\" in proto $p",
534 {
535 {"$n", field_name},
536 {"$p", descriptor_name()},
537 });
538 return nullptr;
539 }
540
541 bool is_repeated =
542 field_descriptor->label() == FieldDescriptorProto::LABEL_REPEATED;
543 auto it_and_inserted = ctx_.top().seen_fields.emplace(field_name);
544 if (!it_and_inserted.second && !is_repeated) {
545 AddError(key, "Saw non-repeating field '$f' more than once",
546 {
547 {"$f", field_name},
548 });
549 }
550
551 if (!valid_field_types.count(field_descriptor->type())) {
552 AddError(value,
553 "Expected value of type $t for field $k in proto $n "
554 "instead saw '$v'",
555 {
556 {"$t", FieldToTypeName(field_descriptor)},
557 {"$k", field_name},
558 {"$n", descriptor_name()},
559 {"$v", value.ToStdString()},
560 });
561 return nullptr;
562 }
563
564 return field_descriptor;
565 }
566
descriptor()567 const DescriptorProto* descriptor() {
568 PERFETTO_CHECK(!ctx_.empty());
569 return ctx_.top().descriptor;
570 }
571
descriptor_name()572 const std::string& descriptor_name() { return descriptor()->name(); }
573
msg()574 protozero::Message* msg() {
575 PERFETTO_CHECK(!ctx_.empty());
576 return ctx_.top().message;
577 }
578
579 std::stack<ParserDelegateContext> ctx_;
580 ErrorReporter* reporter_;
581 std::map<std::string, const DescriptorProto*> name_to_descriptor_;
582 std::map<std::string, const EnumDescriptorProto*> name_to_enum_;
583 };
584
Parse(std::string_view input,ParserDelegate * delegate)585 void Parse(std::string_view input, ParserDelegate* delegate) {
586 ParseState state = kWaitingForKey;
587 size_t column = 0;
588 size_t row = 1;
589 size_t depth = 0;
590 bool saw_colon_for_this_key = false;
591 bool saw_semicolon_for_this_value = true;
592 bool comment_till_eol = false;
593 Token key{};
594 Token value{};
595
596 for (size_t i = 0; i < input.size(); i++, column++) {
597 bool last_character = i + 1 == input.size();
598 char c = input.at(i);
599 if (c == '\n') {
600 column = 0;
601 row++;
602 if (comment_till_eol) {
603 comment_till_eol = false;
604 continue;
605 }
606 }
607 if (comment_till_eol)
608 continue;
609
610 switch (state) {
611 case kWaitingForKey:
612 if (isspace(c))
613 continue;
614 if (c == '#') {
615 comment_till_eol = true;
616 continue;
617 }
618 if (c == '}') {
619 if (depth == 0) {
620 delegate->AddError(row, column, "Unmatched closing brace", {});
621 return;
622 }
623 saw_semicolon_for_this_value = false;
624 depth--;
625 delegate->EndNestedMessage();
626 continue;
627 }
628 if (!saw_semicolon_for_this_value && c == ';') {
629 saw_semicolon_for_this_value = true;
630 continue;
631 }
632 if (IsIdentifierStart(c)) {
633 saw_colon_for_this_key = false;
634 state = kReadingKey;
635 key.offset = i;
636 key.row = row;
637 key.column = column;
638 continue;
639 }
640 break;
641
642 case kReadingKey:
643 if (IsIdentifierBody(c))
644 continue;
645 key.txt = perfetto::base::StringView(input.data() + key.offset,
646 i - key.offset);
647 state = kWaitingForValue;
648 if (c == '#')
649 comment_till_eol = true;
650 continue;
651
652 case kWaitingForValue:
653 if (isspace(c))
654 continue;
655 if (c == '#') {
656 comment_till_eol = true;
657 continue;
658 }
659 value.offset = i;
660 value.row = row;
661 value.column = column;
662
663 if (c == ':' && !saw_colon_for_this_key) {
664 saw_colon_for_this_key = true;
665 continue;
666 }
667 if (c == '"') {
668 state = kReadingStringValue;
669 continue;
670 }
671 if (c == '-' || IsDigit(c) || c == '.') {
672 state = kReadingNumericValue;
673 continue;
674 }
675 if (IsIdentifierStart(c)) {
676 state = kReadingIdentifierValue;
677 continue;
678 }
679 if (c == '{') {
680 state = kWaitingForKey;
681 depth++;
682 value.txt =
683 perfetto::base::StringView(input.data() + value.offset, 1);
684 if (!delegate->BeginNestedMessage(key, value)) {
685 return;
686 }
687 continue;
688 }
689 break;
690
691 case kReadingNumericValue:
692 if (isspace(c) || c == ';' || last_character) {
693 bool keep_last = last_character && !isspace(c) && c != ';';
694 size_t size = i - value.offset + (keep_last ? 1 : 0);
695 value.txt =
696 perfetto::base::StringView(input.data() + value.offset, size);
697 saw_semicolon_for_this_value = c == ';';
698 state = kWaitingForKey;
699 delegate->NumericField(key, value);
700 continue;
701 }
702 if (IsDigit(c) || c == '.')
703 continue;
704 break;
705
706 case kReadingStringValue:
707 if (c == '\\') {
708 state = kReadingStringEscape;
709 } else if (c == '"') {
710 size_t size = i - value.offset - 1;
711 value.column++;
712 value.txt =
713 perfetto::base::StringView(input.data() + value.offset + 1, size);
714 saw_semicolon_for_this_value = false;
715 state = kWaitingForKey;
716 delegate->StringField(key, value);
717 }
718 continue;
719
720 case kReadingStringEscape:
721 state = kReadingStringValue;
722 continue;
723
724 case kReadingIdentifierValue:
725 if (isspace(c) || c == ';' || c == '#' || last_character) {
726 bool keep_last =
727 last_character && !isspace(c) && c != ';' && c != '#';
728 size_t size = i - value.offset + (keep_last ? 1 : 0);
729 value.txt =
730 perfetto::base::StringView(input.data() + value.offset, size);
731 comment_till_eol = c == '#';
732 saw_semicolon_for_this_value = c == ';';
733 state = kWaitingForKey;
734 delegate->IdentifierField(key, value);
735 continue;
736 }
737 if (IsIdentifierBody(c)) {
738 continue;
739 }
740 break;
741 }
742 delegate->AddError(row, column, "Unexpected character '$c'",
743 std::map<std::string, std::string>{
744 {"$c", std::string(1, c)},
745 });
746 return;
747 } // for
748 if (depth > 0)
749 delegate->AddError(row, column, "Nested message not closed", {});
750 if (state != kWaitingForKey)
751 delegate->AddError(row, column, "Unexpected end of input", {});
752 delegate->Eof();
753 }
754
AddNestedDescriptors(const std::string & prefix,const DescriptorProto * descriptor,std::map<std::string,const DescriptorProto * > * name_to_descriptor,std::map<std::string,const EnumDescriptorProto * > * name_to_enum)755 void AddNestedDescriptors(
756 const std::string& prefix,
757 const DescriptorProto* descriptor,
758 std::map<std::string, const DescriptorProto*>* name_to_descriptor,
759 std::map<std::string, const EnumDescriptorProto*>* name_to_enum) {
760 for (const EnumDescriptorProto& enum_descriptor : descriptor->enum_type()) {
761 const std::string name = prefix + "." + enum_descriptor.name();
762 (*name_to_enum)[name] = &enum_descriptor;
763 }
764 for (const DescriptorProto& nested_descriptor : descriptor->nested_type()) {
765 const std::string name = prefix + "." + nested_descriptor.name();
766 (*name_to_descriptor)[name] = &nested_descriptor;
767 AddNestedDescriptors(name, &nested_descriptor, name_to_descriptor,
768 name_to_enum);
769 }
770 }
771
772 } // namespace
773
TextToProto(const uint8_t * descriptor_set_ptr,size_t descriptor_set_size,const std::string & root_type,const std::string & file_name,std::string_view input)774 perfetto::base::StatusOr<std::vector<uint8_t>> TextToProto(
775 const uint8_t* descriptor_set_ptr,
776 size_t descriptor_set_size,
777 const std::string& root_type,
778 const std::string& file_name,
779 std::string_view input) {
780 std::map<std::string, const DescriptorProto*> name_to_descriptor;
781 std::map<std::string, const EnumDescriptorProto*> name_to_enum;
782 FileDescriptorSet file_descriptor_set;
783
784 {
785 file_descriptor_set.ParseFromArray(descriptor_set_ptr, descriptor_set_size);
786 for (const auto& file_descriptor : file_descriptor_set.file()) {
787 for (const auto& enum_descriptor : file_descriptor.enum_type()) {
788 const std::string name =
789 "." + file_descriptor.package() + "." + enum_descriptor.name();
790 name_to_enum[name] = &enum_descriptor;
791 }
792 for (const auto& descriptor : file_descriptor.message_type()) {
793 const std::string name =
794 "." + file_descriptor.package() + "." + descriptor.name();
795 name_to_descriptor[name] = &descriptor;
796 AddNestedDescriptors(name, &descriptor, &name_to_descriptor,
797 &name_to_enum);
798 }
799 }
800 }
801
802 const DescriptorProto* descriptor = name_to_descriptor[root_type];
803 PERFETTO_CHECK(descriptor);
804
805 protozero::HeapBuffered<protozero::Message> message;
806 ErrorReporter reporter(file_name, input);
807 ParserDelegate delegate(descriptor, message.get(), &reporter,
808 std::move(name_to_descriptor),
809 std::move(name_to_enum));
810 Parse(input, &delegate);
811 if (!reporter.success())
812 return perfetto::base::ErrStatus("%s", reporter.error().c_str());
813 return message.SerializeAsArray();
814 }
815
816 } // namespace protozero
817