• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2019 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "src/trace_processor/util/descriptors.h"
18 
19 #include <cstddef>
20 #include <cstdint>
21 #include <optional>
22 #include <string>
23 #include <utility>
24 #include <vector>
25 
26 #include "perfetto/base/logging.h"
27 #include "perfetto/base/status.h"
28 #include "perfetto/ext/base/string_utils.h"
29 #include "perfetto/ext/base/string_view.h"
30 #include "perfetto/protozero/field.h"
31 #include "perfetto/protozero/message.h"
32 #include "perfetto/protozero/proto_decoder.h"
33 #include "perfetto/protozero/scattered_heap_buffer.h"
34 #include "protos/perfetto/common/descriptor.pbzero.h"
35 #include "protos/perfetto/trace_processor/trace_processor.pbzero.h"
36 #include "src/trace_processor/util/status_macros.h"
37 
38 namespace perfetto::trace_processor {
39 namespace {
CreateFieldFromDecoder(const protos::pbzero::FieldDescriptorProto::Decoder & f_decoder,bool is_extension)40 FieldDescriptor CreateFieldFromDecoder(
41     const protos::pbzero::FieldDescriptorProto::Decoder& f_decoder,
42     bool is_extension) {
43   using FieldDescriptorProto = protos::pbzero::FieldDescriptorProto;
44   std::string type_name =
45       f_decoder.has_type_name()
46           ? base::StringView(f_decoder.type_name()).ToStdString()
47           : "";
48   // TODO(lalitm): add support for enums here.
49   uint32_t type =
50       f_decoder.has_type()
51           ? static_cast<uint32_t>(f_decoder.type())
52           : static_cast<uint32_t>(FieldDescriptorProto::TYPE_MESSAGE);
53   protos::pbzero::FieldOptions::Decoder opt(f_decoder.options());
54   std::optional<std::string> default_value;
55   if (f_decoder.has_default_value()) {
56     default_value = f_decoder.default_value().ToStdString();
57   }
58   return {
59       base::StringView(f_decoder.name()).ToStdString(),
60       static_cast<uint32_t>(f_decoder.number()),
61       type,
62       std::move(type_name),
63       std::vector<uint8_t>(f_decoder.options().data,
64                            f_decoder.options().data + f_decoder.options().size),
65       default_value,
66       f_decoder.label() == FieldDescriptorProto::LABEL_REPEATED,
67       opt.packed(),
68       is_extension,
69   };
70 }
71 
CheckExtensionField(const ProtoDescriptor & proto_descriptor,const FieldDescriptor & field)72 base::Status CheckExtensionField(const ProtoDescriptor& proto_descriptor,
73                                  const FieldDescriptor& field) {
74   using FieldDescriptorProto = protos::pbzero::FieldDescriptorProto;
75   const auto* existing_field = proto_descriptor.FindFieldByTag(field.number());
76   if (existing_field) {
77     if (field.type() != existing_field->type()) {
78       return base::ErrStatus("Field %s is re-introduced with different type",
79                              field.name().c_str());
80     }
81     if ((field.type() == FieldDescriptorProto::TYPE_MESSAGE ||
82          field.type() == FieldDescriptorProto::TYPE_ENUM) &&
83         field.raw_type_name() != existing_field->raw_type_name()) {
84       return base::ErrStatus(
85           "Field %s is re-introduced with different type %s (was %s)",
86           field.name().c_str(), field.raw_type_name().c_str(),
87           existing_field->raw_type_name().c_str());
88     }
89   }
90   return base::OkStatus();
91 }
92 
93 }  // namespace
94 
ResolveShortType(const std::string & parent_path,const std::string & short_type)95 std::optional<uint32_t> DescriptorPool::ResolveShortType(
96     const std::string& parent_path,
97     const std::string& short_type) {
98   PERFETTO_DCHECK(!short_type.empty());
99 
100   std::string search_path = short_type[0] == '.'
101                                 ? parent_path + short_type
102                                 : parent_path + '.' + short_type;
103   auto opt_idx = FindDescriptorIdx(search_path);
104   if (opt_idx)
105     return opt_idx;
106 
107   if (parent_path.empty())
108     return std::nullopt;
109 
110   auto parent_dot_idx = parent_path.rfind('.');
111   auto parent_substr = parent_dot_idx == std::string::npos
112                            ? ""
113                            : parent_path.substr(0, parent_dot_idx);
114   return ResolveShortType(parent_substr, short_type);
115 }
116 
AddExtensionField(const std::string & package_name,protozero::ConstBytes field_desc_proto)117 base::Status DescriptorPool::AddExtensionField(
118     const std::string& package_name,
119     protozero::ConstBytes field_desc_proto) {
120   using FieldDescriptorProto = protos::pbzero::FieldDescriptorProto;
121   FieldDescriptorProto::Decoder f_decoder(field_desc_proto);
122   auto field = CreateFieldFromDecoder(f_decoder, true);
123 
124   std::string extendee_name = f_decoder.extendee().ToStdString();
125   if (extendee_name.empty()) {
126     return base::ErrStatus("Extendee name is empty");
127   }
128 
129   if (extendee_name[0] != '.') {
130     // Only prepend if the extendee is not fully qualified
131     extendee_name = package_name + "." + extendee_name;
132   }
133   std::optional<uint32_t> extendee = FindDescriptorIdx(extendee_name);
134   if (!extendee.has_value()) {
135     return base::ErrStatus("Extendee does not exist %s", extendee_name.c_str());
136   }
137   ProtoDescriptor& extendee_desc = descriptors_[extendee.value()];
138   RETURN_IF_ERROR(CheckExtensionField(extendee_desc, field));
139   extendee_desc.AddField(field);
140   return base::OkStatus();
141 }
142 
AddNestedProtoDescriptors(const std::string & file_name,const std::string & package_name,std::optional<uint32_t> parent_idx,protozero::ConstBytes descriptor_proto,std::vector<ExtensionInfo> * extensions,bool merge_existing_messages)143 base::Status DescriptorPool::AddNestedProtoDescriptors(
144     const std::string& file_name,
145     const std::string& package_name,
146     std::optional<uint32_t> parent_idx,
147     protozero::ConstBytes descriptor_proto,
148     std::vector<ExtensionInfo>* extensions,
149     bool merge_existing_messages) {
150   protos::pbzero::DescriptorProto::Decoder decoder(descriptor_proto);
151 
152   auto parent_name =
153       parent_idx ? descriptors_[*parent_idx].full_name() : package_name;
154   auto full_name =
155       parent_name + "." + base::StringView(decoder.name()).ToStdString();
156 
157   auto idx = FindDescriptorIdx(full_name);
158   if (idx.has_value() && !merge_existing_messages) {
159     const auto& existing_descriptor = descriptors_[*idx];
160     return base::ErrStatus("%s: %s was already defined in file %s",
161                            file_name.c_str(), full_name.c_str(),
162                            existing_descriptor.file_name().c_str());
163   }
164   if (!idx.has_value()) {
165     ProtoDescriptor proto_descriptor(file_name, package_name, full_name,
166                                      ProtoDescriptor::Type::kMessage,
167                                      parent_idx);
168     idx = AddProtoDescriptor(std::move(proto_descriptor));
169   }
170   ProtoDescriptor& proto_descriptor = descriptors_[*idx];
171   if (proto_descriptor.type() != ProtoDescriptor::Type::kMessage) {
172     return base::ErrStatus("%s was enum, redefined as message",
173                            full_name.c_str());
174   }
175 
176   using FieldDescriptorProto = protos::pbzero::FieldDescriptorProto;
177   for (auto it = decoder.field(); it; ++it) {
178     FieldDescriptorProto::Decoder f_decoder(*it);
179     auto field = CreateFieldFromDecoder(f_decoder, /*is_extension=*/false);
180     RETURN_IF_ERROR(CheckExtensionField(proto_descriptor, field));
181     proto_descriptor.AddField(std::move(field));
182   }
183 
184   for (auto it = decoder.enum_type(); it; ++it) {
185     RETURN_IF_ERROR(AddEnumProtoDescriptors(file_name, package_name, idx, *it,
186                                             merge_existing_messages));
187   }
188   for (auto it = decoder.nested_type(); it; ++it) {
189     RETURN_IF_ERROR(AddNestedProtoDescriptors(file_name, package_name, idx, *it,
190                                               extensions,
191                                               merge_existing_messages));
192   }
193   for (auto ext_it = decoder.extension(); ext_it; ++ext_it) {
194     extensions->emplace_back(package_name, *ext_it);
195   }
196   return base::OkStatus();
197 }
198 
AddEnumProtoDescriptors(const std::string & file_name,const std::string & package_name,std::optional<uint32_t> parent_idx,protozero::ConstBytes descriptor_proto,bool merge_existing_messages)199 base::Status DescriptorPool::AddEnumProtoDescriptors(
200     const std::string& file_name,
201     const std::string& package_name,
202     std::optional<uint32_t> parent_idx,
203     protozero::ConstBytes descriptor_proto,
204     bool merge_existing_messages) {
205   protos::pbzero::EnumDescriptorProto::Decoder decoder(descriptor_proto);
206 
207   auto parent_name =
208       parent_idx ? descriptors_[*parent_idx].full_name() : package_name;
209   auto full_name =
210       parent_name + "." + base::StringView(decoder.name()).ToStdString();
211 
212   auto prev_idx = FindDescriptorIdx(full_name);
213   if (prev_idx.has_value() && !merge_existing_messages) {
214     const auto& existing_descriptor = descriptors_[*prev_idx];
215     return base::ErrStatus("%s: %s was already defined in file %s",
216                            file_name.c_str(), full_name.c_str(),
217                            existing_descriptor.file_name().c_str());
218   }
219   if (!prev_idx.has_value()) {
220     ProtoDescriptor proto_descriptor(file_name, package_name, full_name,
221                                      ProtoDescriptor::Type::kEnum,
222                                      std::nullopt);
223     prev_idx = AddProtoDescriptor(std::move(proto_descriptor));
224   }
225   ProtoDescriptor& proto_descriptor = descriptors_[*prev_idx];
226   if (proto_descriptor.type() != ProtoDescriptor::Type::kEnum) {
227     return base::ErrStatus("%s was message, redefined as enum",
228                            full_name.c_str());
229   }
230 
231   for (auto it = decoder.value(); it; ++it) {
232     protos::pbzero::EnumValueDescriptorProto::Decoder enum_value(it->data(),
233                                                                  it->size());
234     proto_descriptor.AddEnumValue(enum_value.number(),
235                                   enum_value.name().ToStdString());
236   }
237 
238   return base::OkStatus();
239 }
240 
AddFromFileDescriptorSet(const uint8_t * file_descriptor_set_proto,size_t size,const std::vector<std::string> & skip_prefixes,bool merge_existing_messages)241 base::Status DescriptorPool::AddFromFileDescriptorSet(
242     const uint8_t* file_descriptor_set_proto,
243     size_t size,
244     const std::vector<std::string>& skip_prefixes,
245     bool merge_existing_messages) {
246   protos::pbzero::FileDescriptorSet::Decoder proto(file_descriptor_set_proto,
247                                                    size);
248   std::vector<ExtensionInfo> extensions;
249   for (auto it = proto.file(); it; ++it) {
250     protos::pbzero::FileDescriptorProto::Decoder file(*it);
251     const std::string file_name = file.name().ToStdString();
252     if (base::StartsWithAny(file_name, skip_prefixes))
253       continue;
254     if (!merge_existing_messages &&
255         processed_files_.find(file_name) != processed_files_.end()) {
256       // This file has been loaded once already. Skip.
257       continue;
258     }
259     processed_files_.insert(file_name);
260     std::string package = "." + base::StringView(file.package()).ToStdString();
261     for (auto message_it = file.message_type(); message_it; ++message_it) {
262       RETURN_IF_ERROR(AddNestedProtoDescriptors(
263           file_name, package, std::nullopt, *message_it, &extensions,
264           merge_existing_messages));
265     }
266     for (auto enum_it = file.enum_type(); enum_it; ++enum_it) {
267       RETURN_IF_ERROR(AddEnumProtoDescriptors(
268           file_name, package, std::nullopt, *enum_it, merge_existing_messages));
269     }
270     for (auto ext_it = file.extension(); ext_it; ++ext_it) {
271       extensions.emplace_back(package, *ext_it);
272     }
273   }
274 
275   // Second pass: Add extension fields to the real protos.
276   for (const auto& extension : extensions) {
277     RETURN_IF_ERROR(AddExtensionField(extension.first, extension.second));
278   }
279 
280   // Third pass: resolve the types of all the fields.
281   using FieldDescriptorProto = protos::pbzero::FieldDescriptorProto;
282   for (ProtoDescriptor& descriptor : descriptors_) {
283     for (auto& entry : *descriptor.mutable_fields()) {
284       FieldDescriptor& field = entry.second;
285       bool needs_resolution =
286           field.resolved_type_name().empty() &&
287           (field.type() == FieldDescriptorProto::TYPE_MESSAGE ||
288            field.type() == FieldDescriptorProto::TYPE_ENUM);
289       if (needs_resolution) {
290         auto opt_desc =
291             ResolveShortType(descriptor.full_name(), field.raw_type_name());
292         if (!opt_desc.has_value()) {
293           return base::ErrStatus(
294               "Unable to find short type %s in field inside message %s",
295               field.raw_type_name().c_str(), descriptor.full_name().c_str());
296         }
297         field.set_resolved_type_name(
298             descriptors_[opt_desc.value()].full_name());
299       }
300     }
301   }
302 
303   // Fourth pass: resolve all "uninterpreted" options to real options.
304   for (ProtoDescriptor& descriptor : descriptors_) {
305     for (auto& entry : *descriptor.mutable_fields()) {
306       FieldDescriptor& field = entry.second;
307       if (field.options().empty()) {
308         continue;
309       }
310       ResolveUninterpretedOption(descriptor, field, *field.mutable_options());
311     }
312   }
313   return base::OkStatus();
314 }
315 
ResolveUninterpretedOption(const ProtoDescriptor & proto_desc,const FieldDescriptor & field_desc,std::vector<uint8_t> & options)316 base::Status DescriptorPool::ResolveUninterpretedOption(
317     const ProtoDescriptor& proto_desc,
318     const FieldDescriptor& field_desc,
319     std::vector<uint8_t>& options) {
320   auto opt_idx = FindDescriptorIdx(".google.protobuf.FieldOptions");
321   if (!opt_idx) {
322     return base::ErrStatus("Unable to find field options for field %s in %s",
323                            field_desc.name().c_str(),
324                            proto_desc.full_name().c_str());
325   }
326   ProtoDescriptor& field_options_desc = descriptors_[*opt_idx];
327 
328   protozero::ProtoDecoder decoder(field_desc.options().data(),
329                                   field_desc.options().size());
330   protozero::HeapBuffered<protozero::Message> field_options;
331   for (;;) {
332     const uint8_t* start = decoder.begin() + decoder.read_offset();
333     auto field = decoder.ReadField();
334     if (!field.valid()) {
335       break;
336     }
337     const uint8_t* end = decoder.begin() + decoder.read_offset();
338 
339     if (field.id() !=
340         protos::pbzero::FieldOptions::kUninterpretedOptionFieldNumber) {
341       field_options->AppendRawProtoBytes(start,
342                                          static_cast<size_t>(end - start));
343       continue;
344     }
345 
346     protos::pbzero::UninterpretedOption::Decoder unint(field.as_bytes());
347     auto it = unint.name();
348     if (!it) {
349       return base::ErrStatus(
350           "Option for field %s in message %s does not have a name",
351           field_desc.name().c_str(), proto_desc.full_name().c_str());
352     }
353     protos::pbzero::UninterpretedOption::NamePart::Decoder name_part(*it);
354     const auto* option_field_desc =
355         field_options_desc.FindFieldByName(name_part.name_part().ToStdString());
356 
357     // It's not immediately clear how options with multiple names should
358     // be parsed. This likely requires digging into protobuf compiler
359     // source; given we don't have any examples of this in the codebase
360     // today, defer handling of this to when we may need it.
361     if (++it) {
362       return base::ErrStatus(
363           "Option for field %s in message %s has multiple name segments",
364           field_desc.name().c_str(), proto_desc.full_name().c_str());
365     }
366     if (unint.has_identifier_value()) {
367       field_options->AppendString(option_field_desc->number(),
368                                   unint.identifier_value().ToStdString());
369     } else if (unint.has_positive_int_value()) {
370       field_options->AppendVarInt(option_field_desc->number(),
371                                   unint.positive_int_value());
372     } else if (unint.has_negative_int_value()) {
373       field_options->AppendVarInt(option_field_desc->number(),
374                                   unint.negative_int_value());
375     } else if (unint.has_double_value()) {
376       field_options->AppendFixed(option_field_desc->number(),
377                                  unint.double_value());
378     } else if (unint.has_string_value()) {
379       field_options->AppendString(option_field_desc->number(),
380                                   unint.string_value().ToStdString());
381     } else if (unint.has_aggregate_value()) {
382       field_options->AppendString(option_field_desc->number(),
383                                   unint.aggregate_value().ToStdString());
384     } else {
385       return base::ErrStatus(
386           "Unknown field set in UninterpretedOption %s for field %s in message "
387           "%s",
388           option_field_desc->name().c_str(), field_desc.name().c_str(),
389           proto_desc.full_name().c_str());
390     }
391   }
392   if (decoder.bytes_left() > 0) {
393     return base::ErrStatus("Unexpected extra bytes when parsing option %zu",
394                            decoder.bytes_left());
395   }
396   options = field_options.SerializeAsArray();
397   return base::OkStatus();
398 }
399 
FindDescriptorIdx(const std::string & full_name) const400 std::optional<uint32_t> DescriptorPool::FindDescriptorIdx(
401     const std::string& full_name) const {
402   auto it = full_name_to_descriptor_index_.find(full_name);
403   if (it == full_name_to_descriptor_index_.end()) {
404     return std::nullopt;
405   }
406   return it->second;
407 }
408 
SerializeAsDescriptorSet() const409 std::vector<uint8_t> DescriptorPool::SerializeAsDescriptorSet() const {
410   protozero::HeapBuffered<protos::pbzero::DescriptorSet> descs;
411   for (const auto& desc : descriptors()) {
412     protos::pbzero::DescriptorProto* proto_descriptor =
413         descs->add_descriptors();
414     proto_descriptor->set_name(desc.full_name());
415     for (const auto& entry : desc.fields()) {
416       const auto& field = entry.second;
417       protos::pbzero::FieldDescriptorProto* field_descriptor =
418           proto_descriptor->add_field();
419       field_descriptor->set_name(field.name());
420       field_descriptor->set_number(static_cast<int32_t>(field.number()));
421       // We do not support required fields. They will show up as
422       // optional after serialization.
423       field_descriptor->set_label(
424           field.is_repeated()
425               ? protos::pbzero::FieldDescriptorProto::LABEL_REPEATED
426               : protos::pbzero::FieldDescriptorProto::LABEL_OPTIONAL);
427       field_descriptor->set_type_name(field.resolved_type_name());
428       field_descriptor->set_type(
429           static_cast<protos::pbzero::FieldDescriptorProto_Type>(field.type()));
430     }
431   }
432   return descs.SerializeAsArray();
433 }
434 
AddProtoDescriptor(ProtoDescriptor descriptor)435 uint32_t DescriptorPool::AddProtoDescriptor(ProtoDescriptor descriptor) {
436   uint32_t idx = static_cast<uint32_t>(descriptors_.size());
437   full_name_to_descriptor_index_[descriptor.full_name()] = idx;
438   descriptors_.emplace_back(std::move(descriptor));
439   return idx;
440 }
441 
ProtoDescriptor(std::string file_name,std::string package_name,std::string full_name,Type type,std::optional<uint32_t> parent_id)442 ProtoDescriptor::ProtoDescriptor(std::string file_name,
443                                  std::string package_name,
444                                  std::string full_name,
445                                  Type type,
446                                  std::optional<uint32_t> parent_id)
447     : file_name_(std::move(file_name)),
448       package_name_(std::move(package_name)),
449       full_name_(std::move(full_name)),
450       type_(type),
451       parent_id_(parent_id) {}
452 
FieldDescriptor(std::string name,uint32_t number,uint32_t type,std::string raw_type_name,std::vector<uint8_t> options,std::optional<std::string> default_value,bool is_repeated,bool is_packed,bool is_extension)453 FieldDescriptor::FieldDescriptor(std::string name,
454                                  uint32_t number,
455                                  uint32_t type,
456                                  std::string raw_type_name,
457                                  std::vector<uint8_t> options,
458                                  std::optional<std::string> default_value,
459                                  bool is_repeated,
460                                  bool is_packed,
461                                  bool is_extension)
462     : name_(std::move(name)),
463       number_(number),
464       type_(type),
465       raw_type_name_(std::move(raw_type_name)),
466       options_(std::move(options)),
467       default_value_(std::move(default_value)),
468       is_repeated_(is_repeated),
469       is_packed_(is_packed),
470       is_extension_(is_extension) {}
471 
472 }  // namespace perfetto::trace_processor
473