1 /*
2 * Copyright (C) 2019 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "src/trace_processor/util/descriptors.h"
18
19 #include <cstddef>
20 #include <cstdint>
21 #include <optional>
22 #include <string>
23 #include <utility>
24 #include <vector>
25
26 #include "perfetto/base/logging.h"
27 #include "perfetto/base/status.h"
28 #include "perfetto/ext/base/string_utils.h"
29 #include "perfetto/ext/base/string_view.h"
30 #include "perfetto/protozero/field.h"
31 #include "perfetto/protozero/message.h"
32 #include "perfetto/protozero/proto_decoder.h"
33 #include "perfetto/protozero/scattered_heap_buffer.h"
34 #include "protos/perfetto/common/descriptor.pbzero.h"
35 #include "protos/perfetto/trace_processor/trace_processor.pbzero.h"
36 #include "src/trace_processor/util/status_macros.h"
37
38 namespace perfetto::trace_processor {
39 namespace {
CreateFieldFromDecoder(const protos::pbzero::FieldDescriptorProto::Decoder & f_decoder,bool is_extension)40 FieldDescriptor CreateFieldFromDecoder(
41 const protos::pbzero::FieldDescriptorProto::Decoder& f_decoder,
42 bool is_extension) {
43 using FieldDescriptorProto = protos::pbzero::FieldDescriptorProto;
44 std::string type_name =
45 f_decoder.has_type_name()
46 ? base::StringView(f_decoder.type_name()).ToStdString()
47 : "";
48 // TODO(lalitm): add support for enums here.
49 uint32_t type =
50 f_decoder.has_type()
51 ? static_cast<uint32_t>(f_decoder.type())
52 : static_cast<uint32_t>(FieldDescriptorProto::TYPE_MESSAGE);
53 protos::pbzero::FieldOptions::Decoder opt(f_decoder.options());
54 std::optional<std::string> default_value;
55 if (f_decoder.has_default_value()) {
56 default_value = f_decoder.default_value().ToStdString();
57 }
58 return {
59 base::StringView(f_decoder.name()).ToStdString(),
60 static_cast<uint32_t>(f_decoder.number()),
61 type,
62 std::move(type_name),
63 std::vector<uint8_t>(f_decoder.options().data,
64 f_decoder.options().data + f_decoder.options().size),
65 default_value,
66 f_decoder.label() == FieldDescriptorProto::LABEL_REPEATED,
67 opt.packed(),
68 is_extension,
69 };
70 }
71
CheckExtensionField(const ProtoDescriptor & proto_descriptor,const FieldDescriptor & field)72 base::Status CheckExtensionField(const ProtoDescriptor& proto_descriptor,
73 const FieldDescriptor& field) {
74 using FieldDescriptorProto = protos::pbzero::FieldDescriptorProto;
75 const auto* existing_field = proto_descriptor.FindFieldByTag(field.number());
76 if (existing_field) {
77 if (field.type() != existing_field->type()) {
78 return base::ErrStatus("Field %s is re-introduced with different type",
79 field.name().c_str());
80 }
81 if ((field.type() == FieldDescriptorProto::TYPE_MESSAGE ||
82 field.type() == FieldDescriptorProto::TYPE_ENUM) &&
83 field.raw_type_name() != existing_field->raw_type_name()) {
84 return base::ErrStatus(
85 "Field %s is re-introduced with different type %s (was %s)",
86 field.name().c_str(), field.raw_type_name().c_str(),
87 existing_field->raw_type_name().c_str());
88 }
89 }
90 return base::OkStatus();
91 }
92
93 } // namespace
94
ResolveShortType(const std::string & parent_path,const std::string & short_type)95 std::optional<uint32_t> DescriptorPool::ResolveShortType(
96 const std::string& parent_path,
97 const std::string& short_type) {
98 PERFETTO_DCHECK(!short_type.empty());
99
100 std::string search_path = short_type[0] == '.'
101 ? parent_path + short_type
102 : parent_path + '.' + short_type;
103 auto opt_idx = FindDescriptorIdx(search_path);
104 if (opt_idx)
105 return opt_idx;
106
107 if (parent_path.empty())
108 return std::nullopt;
109
110 auto parent_dot_idx = parent_path.rfind('.');
111 auto parent_substr = parent_dot_idx == std::string::npos
112 ? ""
113 : parent_path.substr(0, parent_dot_idx);
114 return ResolveShortType(parent_substr, short_type);
115 }
116
AddExtensionField(const std::string & package_name,protozero::ConstBytes field_desc_proto)117 base::Status DescriptorPool::AddExtensionField(
118 const std::string& package_name,
119 protozero::ConstBytes field_desc_proto) {
120 using FieldDescriptorProto = protos::pbzero::FieldDescriptorProto;
121 FieldDescriptorProto::Decoder f_decoder(field_desc_proto);
122 auto field = CreateFieldFromDecoder(f_decoder, true);
123
124 std::string extendee_name = f_decoder.extendee().ToStdString();
125 if (extendee_name.empty()) {
126 return base::ErrStatus("Extendee name is empty");
127 }
128
129 if (extendee_name[0] != '.') {
130 // Only prepend if the extendee is not fully qualified
131 extendee_name = package_name + "." + extendee_name;
132 }
133 std::optional<uint32_t> extendee = FindDescriptorIdx(extendee_name);
134 if (!extendee.has_value()) {
135 return base::ErrStatus("Extendee does not exist %s", extendee_name.c_str());
136 }
137 ProtoDescriptor& extendee_desc = descriptors_[extendee.value()];
138 RETURN_IF_ERROR(CheckExtensionField(extendee_desc, field));
139 extendee_desc.AddField(field);
140 return base::OkStatus();
141 }
142
AddNestedProtoDescriptors(const std::string & file_name,const std::string & package_name,std::optional<uint32_t> parent_idx,protozero::ConstBytes descriptor_proto,std::vector<ExtensionInfo> * extensions,bool merge_existing_messages)143 base::Status DescriptorPool::AddNestedProtoDescriptors(
144 const std::string& file_name,
145 const std::string& package_name,
146 std::optional<uint32_t> parent_idx,
147 protozero::ConstBytes descriptor_proto,
148 std::vector<ExtensionInfo>* extensions,
149 bool merge_existing_messages) {
150 protos::pbzero::DescriptorProto::Decoder decoder(descriptor_proto);
151
152 auto parent_name =
153 parent_idx ? descriptors_[*parent_idx].full_name() : package_name;
154 auto full_name =
155 parent_name + "." + base::StringView(decoder.name()).ToStdString();
156
157 auto idx = FindDescriptorIdx(full_name);
158 if (idx.has_value() && !merge_existing_messages) {
159 const auto& existing_descriptor = descriptors_[*idx];
160 return base::ErrStatus("%s: %s was already defined in file %s",
161 file_name.c_str(), full_name.c_str(),
162 existing_descriptor.file_name().c_str());
163 }
164 if (!idx.has_value()) {
165 ProtoDescriptor proto_descriptor(file_name, package_name, full_name,
166 ProtoDescriptor::Type::kMessage,
167 parent_idx);
168 idx = AddProtoDescriptor(std::move(proto_descriptor));
169 }
170 ProtoDescriptor& proto_descriptor = descriptors_[*idx];
171 if (proto_descriptor.type() != ProtoDescriptor::Type::kMessage) {
172 return base::ErrStatus("%s was enum, redefined as message",
173 full_name.c_str());
174 }
175
176 using FieldDescriptorProto = protos::pbzero::FieldDescriptorProto;
177 for (auto it = decoder.field(); it; ++it) {
178 FieldDescriptorProto::Decoder f_decoder(*it);
179 auto field = CreateFieldFromDecoder(f_decoder, /*is_extension=*/false);
180 RETURN_IF_ERROR(CheckExtensionField(proto_descriptor, field));
181 proto_descriptor.AddField(std::move(field));
182 }
183
184 for (auto it = decoder.enum_type(); it; ++it) {
185 RETURN_IF_ERROR(AddEnumProtoDescriptors(file_name, package_name, idx, *it,
186 merge_existing_messages));
187 }
188 for (auto it = decoder.nested_type(); it; ++it) {
189 RETURN_IF_ERROR(AddNestedProtoDescriptors(file_name, package_name, idx, *it,
190 extensions,
191 merge_existing_messages));
192 }
193 for (auto ext_it = decoder.extension(); ext_it; ++ext_it) {
194 extensions->emplace_back(package_name, *ext_it);
195 }
196 return base::OkStatus();
197 }
198
AddEnumProtoDescriptors(const std::string & file_name,const std::string & package_name,std::optional<uint32_t> parent_idx,protozero::ConstBytes descriptor_proto,bool merge_existing_messages)199 base::Status DescriptorPool::AddEnumProtoDescriptors(
200 const std::string& file_name,
201 const std::string& package_name,
202 std::optional<uint32_t> parent_idx,
203 protozero::ConstBytes descriptor_proto,
204 bool merge_existing_messages) {
205 protos::pbzero::EnumDescriptorProto::Decoder decoder(descriptor_proto);
206
207 auto parent_name =
208 parent_idx ? descriptors_[*parent_idx].full_name() : package_name;
209 auto full_name =
210 parent_name + "." + base::StringView(decoder.name()).ToStdString();
211
212 auto prev_idx = FindDescriptorIdx(full_name);
213 if (prev_idx.has_value() && !merge_existing_messages) {
214 const auto& existing_descriptor = descriptors_[*prev_idx];
215 return base::ErrStatus("%s: %s was already defined in file %s",
216 file_name.c_str(), full_name.c_str(),
217 existing_descriptor.file_name().c_str());
218 }
219 if (!prev_idx.has_value()) {
220 ProtoDescriptor proto_descriptor(file_name, package_name, full_name,
221 ProtoDescriptor::Type::kEnum,
222 std::nullopt);
223 prev_idx = AddProtoDescriptor(std::move(proto_descriptor));
224 }
225 ProtoDescriptor& proto_descriptor = descriptors_[*prev_idx];
226 if (proto_descriptor.type() != ProtoDescriptor::Type::kEnum) {
227 return base::ErrStatus("%s was message, redefined as enum",
228 full_name.c_str());
229 }
230
231 for (auto it = decoder.value(); it; ++it) {
232 protos::pbzero::EnumValueDescriptorProto::Decoder enum_value(it->data(),
233 it->size());
234 proto_descriptor.AddEnumValue(enum_value.number(),
235 enum_value.name().ToStdString());
236 }
237
238 return base::OkStatus();
239 }
240
AddFromFileDescriptorSet(const uint8_t * file_descriptor_set_proto,size_t size,const std::vector<std::string> & skip_prefixes,bool merge_existing_messages)241 base::Status DescriptorPool::AddFromFileDescriptorSet(
242 const uint8_t* file_descriptor_set_proto,
243 size_t size,
244 const std::vector<std::string>& skip_prefixes,
245 bool merge_existing_messages) {
246 protos::pbzero::FileDescriptorSet::Decoder proto(file_descriptor_set_proto,
247 size);
248 std::vector<ExtensionInfo> extensions;
249 for (auto it = proto.file(); it; ++it) {
250 protos::pbzero::FileDescriptorProto::Decoder file(*it);
251 const std::string file_name = file.name().ToStdString();
252 if (base::StartsWithAny(file_name, skip_prefixes))
253 continue;
254 if (!merge_existing_messages &&
255 processed_files_.find(file_name) != processed_files_.end()) {
256 // This file has been loaded once already. Skip.
257 continue;
258 }
259 processed_files_.insert(file_name);
260 std::string package = "." + base::StringView(file.package()).ToStdString();
261 for (auto message_it = file.message_type(); message_it; ++message_it) {
262 RETURN_IF_ERROR(AddNestedProtoDescriptors(
263 file_name, package, std::nullopt, *message_it, &extensions,
264 merge_existing_messages));
265 }
266 for (auto enum_it = file.enum_type(); enum_it; ++enum_it) {
267 RETURN_IF_ERROR(AddEnumProtoDescriptors(
268 file_name, package, std::nullopt, *enum_it, merge_existing_messages));
269 }
270 for (auto ext_it = file.extension(); ext_it; ++ext_it) {
271 extensions.emplace_back(package, *ext_it);
272 }
273 }
274
275 // Second pass: Add extension fields to the real protos.
276 for (const auto& extension : extensions) {
277 RETURN_IF_ERROR(AddExtensionField(extension.first, extension.second));
278 }
279
280 // Third pass: resolve the types of all the fields.
281 using FieldDescriptorProto = protos::pbzero::FieldDescriptorProto;
282 for (ProtoDescriptor& descriptor : descriptors_) {
283 for (auto& entry : *descriptor.mutable_fields()) {
284 FieldDescriptor& field = entry.second;
285 bool needs_resolution =
286 field.resolved_type_name().empty() &&
287 (field.type() == FieldDescriptorProto::TYPE_MESSAGE ||
288 field.type() == FieldDescriptorProto::TYPE_ENUM);
289 if (needs_resolution) {
290 auto opt_desc =
291 ResolveShortType(descriptor.full_name(), field.raw_type_name());
292 if (!opt_desc.has_value()) {
293 return base::ErrStatus(
294 "Unable to find short type %s in field inside message %s",
295 field.raw_type_name().c_str(), descriptor.full_name().c_str());
296 }
297 field.set_resolved_type_name(
298 descriptors_[opt_desc.value()].full_name());
299 }
300 }
301 }
302
303 // Fourth pass: resolve all "uninterpreted" options to real options.
304 for (ProtoDescriptor& descriptor : descriptors_) {
305 for (auto& entry : *descriptor.mutable_fields()) {
306 FieldDescriptor& field = entry.second;
307 if (field.options().empty()) {
308 continue;
309 }
310 ResolveUninterpretedOption(descriptor, field, *field.mutable_options());
311 }
312 }
313 return base::OkStatus();
314 }
315
ResolveUninterpretedOption(const ProtoDescriptor & proto_desc,const FieldDescriptor & field_desc,std::vector<uint8_t> & options)316 base::Status DescriptorPool::ResolveUninterpretedOption(
317 const ProtoDescriptor& proto_desc,
318 const FieldDescriptor& field_desc,
319 std::vector<uint8_t>& options) {
320 auto opt_idx = FindDescriptorIdx(".google.protobuf.FieldOptions");
321 if (!opt_idx) {
322 return base::ErrStatus("Unable to find field options for field %s in %s",
323 field_desc.name().c_str(),
324 proto_desc.full_name().c_str());
325 }
326 ProtoDescriptor& field_options_desc = descriptors_[*opt_idx];
327
328 protozero::ProtoDecoder decoder(field_desc.options().data(),
329 field_desc.options().size());
330 protozero::HeapBuffered<protozero::Message> field_options;
331 for (;;) {
332 const uint8_t* start = decoder.begin() + decoder.read_offset();
333 auto field = decoder.ReadField();
334 if (!field.valid()) {
335 break;
336 }
337 const uint8_t* end = decoder.begin() + decoder.read_offset();
338
339 if (field.id() !=
340 protos::pbzero::FieldOptions::kUninterpretedOptionFieldNumber) {
341 field_options->AppendRawProtoBytes(start,
342 static_cast<size_t>(end - start));
343 continue;
344 }
345
346 protos::pbzero::UninterpretedOption::Decoder unint(field.as_bytes());
347 auto it = unint.name();
348 if (!it) {
349 return base::ErrStatus(
350 "Option for field %s in message %s does not have a name",
351 field_desc.name().c_str(), proto_desc.full_name().c_str());
352 }
353 protos::pbzero::UninterpretedOption::NamePart::Decoder name_part(*it);
354 const auto* option_field_desc =
355 field_options_desc.FindFieldByName(name_part.name_part().ToStdString());
356
357 // It's not immediately clear how options with multiple names should
358 // be parsed. This likely requires digging into protobuf compiler
359 // source; given we don't have any examples of this in the codebase
360 // today, defer handling of this to when we may need it.
361 if (++it) {
362 return base::ErrStatus(
363 "Option for field %s in message %s has multiple name segments",
364 field_desc.name().c_str(), proto_desc.full_name().c_str());
365 }
366 if (unint.has_identifier_value()) {
367 field_options->AppendString(option_field_desc->number(),
368 unint.identifier_value().ToStdString());
369 } else if (unint.has_positive_int_value()) {
370 field_options->AppendVarInt(option_field_desc->number(),
371 unint.positive_int_value());
372 } else if (unint.has_negative_int_value()) {
373 field_options->AppendVarInt(option_field_desc->number(),
374 unint.negative_int_value());
375 } else if (unint.has_double_value()) {
376 field_options->AppendFixed(option_field_desc->number(),
377 unint.double_value());
378 } else if (unint.has_string_value()) {
379 field_options->AppendString(option_field_desc->number(),
380 unint.string_value().ToStdString());
381 } else if (unint.has_aggregate_value()) {
382 field_options->AppendString(option_field_desc->number(),
383 unint.aggregate_value().ToStdString());
384 } else {
385 return base::ErrStatus(
386 "Unknown field set in UninterpretedOption %s for field %s in message "
387 "%s",
388 option_field_desc->name().c_str(), field_desc.name().c_str(),
389 proto_desc.full_name().c_str());
390 }
391 }
392 if (decoder.bytes_left() > 0) {
393 return base::ErrStatus("Unexpected extra bytes when parsing option %zu",
394 decoder.bytes_left());
395 }
396 options = field_options.SerializeAsArray();
397 return base::OkStatus();
398 }
399
FindDescriptorIdx(const std::string & full_name) const400 std::optional<uint32_t> DescriptorPool::FindDescriptorIdx(
401 const std::string& full_name) const {
402 auto it = full_name_to_descriptor_index_.find(full_name);
403 if (it == full_name_to_descriptor_index_.end()) {
404 return std::nullopt;
405 }
406 return it->second;
407 }
408
SerializeAsDescriptorSet() const409 std::vector<uint8_t> DescriptorPool::SerializeAsDescriptorSet() const {
410 protozero::HeapBuffered<protos::pbzero::DescriptorSet> descs;
411 for (const auto& desc : descriptors()) {
412 protos::pbzero::DescriptorProto* proto_descriptor =
413 descs->add_descriptors();
414 proto_descriptor->set_name(desc.full_name());
415 for (const auto& entry : desc.fields()) {
416 const auto& field = entry.second;
417 protos::pbzero::FieldDescriptorProto* field_descriptor =
418 proto_descriptor->add_field();
419 field_descriptor->set_name(field.name());
420 field_descriptor->set_number(static_cast<int32_t>(field.number()));
421 // We do not support required fields. They will show up as
422 // optional after serialization.
423 field_descriptor->set_label(
424 field.is_repeated()
425 ? protos::pbzero::FieldDescriptorProto::LABEL_REPEATED
426 : protos::pbzero::FieldDescriptorProto::LABEL_OPTIONAL);
427 field_descriptor->set_type_name(field.resolved_type_name());
428 field_descriptor->set_type(
429 static_cast<protos::pbzero::FieldDescriptorProto_Type>(field.type()));
430 }
431 }
432 return descs.SerializeAsArray();
433 }
434
AddProtoDescriptor(ProtoDescriptor descriptor)435 uint32_t DescriptorPool::AddProtoDescriptor(ProtoDescriptor descriptor) {
436 uint32_t idx = static_cast<uint32_t>(descriptors_.size());
437 full_name_to_descriptor_index_[descriptor.full_name()] = idx;
438 descriptors_.emplace_back(std::move(descriptor));
439 return idx;
440 }
441
ProtoDescriptor(std::string file_name,std::string package_name,std::string full_name,Type type,std::optional<uint32_t> parent_id)442 ProtoDescriptor::ProtoDescriptor(std::string file_name,
443 std::string package_name,
444 std::string full_name,
445 Type type,
446 std::optional<uint32_t> parent_id)
447 : file_name_(std::move(file_name)),
448 package_name_(std::move(package_name)),
449 full_name_(std::move(full_name)),
450 type_(type),
451 parent_id_(parent_id) {}
452
FieldDescriptor(std::string name,uint32_t number,uint32_t type,std::string raw_type_name,std::vector<uint8_t> options,std::optional<std::string> default_value,bool is_repeated,bool is_packed,bool is_extension)453 FieldDescriptor::FieldDescriptor(std::string name,
454 uint32_t number,
455 uint32_t type,
456 std::string raw_type_name,
457 std::vector<uint8_t> options,
458 std::optional<std::string> default_value,
459 bool is_repeated,
460 bool is_packed,
461 bool is_extension)
462 : name_(std::move(name)),
463 number_(number),
464 type_(type),
465 raw_type_name_(std::move(raw_type_name)),
466 options_(std::move(options)),
467 default_value_(std::move(default_value)),
468 is_repeated_(is_repeated),
469 is_packed_(is_packed),
470 is_extension_(is_extension) {}
471
472 } // namespace perfetto::trace_processor
473