1 /*
2 * Copyright (C) 2019 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "src/trace_processor/util/descriptors.h"
18
19 #include <cstdint>
20 #include <optional>
21 #include <vector>
22
23 #include "perfetto/base/status.h"
24 #include "perfetto/ext/base/string_utils.h"
25 #include "perfetto/ext/base/string_view.h"
26 #include "perfetto/protozero/field.h"
27 #include "perfetto/protozero/message.h"
28 #include "perfetto/protozero/proto_decoder.h"
29 #include "perfetto/protozero/scattered_heap_buffer.h"
30 #include "protos/perfetto/common/descriptor.pbzero.h"
31 #include "protos/perfetto/trace_processor/trace_processor.pbzero.h"
32 #include "src/trace_processor/util/status_macros.h"
33
34 namespace perfetto {
35 namespace trace_processor {
36 namespace {
CreateFieldFromDecoder(const protos::pbzero::FieldDescriptorProto::Decoder & f_decoder,bool is_extension)37 FieldDescriptor CreateFieldFromDecoder(
38 const protos::pbzero::FieldDescriptorProto::Decoder& f_decoder,
39 bool is_extension) {
40 using FieldDescriptorProto = protos::pbzero::FieldDescriptorProto;
41 std::string type_name =
42 f_decoder.has_type_name()
43 ? base::StringView(f_decoder.type_name()).ToStdString()
44 : "";
45 // TODO(lalitm): add support for enums here.
46 uint32_t type =
47 f_decoder.has_type()
48 ? static_cast<uint32_t>(f_decoder.type())
49 : static_cast<uint32_t>(FieldDescriptorProto::TYPE_MESSAGE);
50 protos::pbzero::FieldOptions::Decoder opt(f_decoder.options());
51 return FieldDescriptor(
52 base::StringView(f_decoder.name()).ToStdString(),
53 static_cast<uint32_t>(f_decoder.number()), type, std::move(type_name),
54 std::vector<uint8_t>(f_decoder.options().data,
55 f_decoder.options().data + f_decoder.options().size),
56 f_decoder.label() == FieldDescriptorProto::LABEL_REPEATED, opt.packed(),
57 is_extension);
58 }
59
CheckExtensionField(const ProtoDescriptor & proto_descriptor,const FieldDescriptor & field)60 base::Status CheckExtensionField(const ProtoDescriptor& proto_descriptor,
61 const FieldDescriptor& field) {
62 using FieldDescriptorProto = protos::pbzero::FieldDescriptorProto;
63 auto existing_field = proto_descriptor.FindFieldByTag(field.number());
64 if (existing_field) {
65 if (field.type() != existing_field->type()) {
66 return base::ErrStatus("Field %s is re-introduced with different type",
67 field.name().c_str());
68 }
69 if ((field.type() == FieldDescriptorProto::TYPE_MESSAGE ||
70 field.type() == FieldDescriptorProto::TYPE_ENUM) &&
71 field.raw_type_name() != existing_field->raw_type_name()) {
72 return base::ErrStatus(
73 "Field %s is re-introduced with different type %s (was %s)",
74 field.name().c_str(), field.raw_type_name().c_str(),
75 existing_field->raw_type_name().c_str());
76 }
77 }
78 return base::OkStatus();
79 }
80
81 } // namespace
82
ResolveShortType(const std::string & parent_path,const std::string & short_type)83 std::optional<uint32_t> DescriptorPool::ResolveShortType(
84 const std::string& parent_path,
85 const std::string& short_type) {
86 PERFETTO_DCHECK(!short_type.empty());
87
88 std::string search_path = short_type[0] == '.'
89 ? parent_path + short_type
90 : parent_path + '.' + short_type;
91 auto opt_idx = FindDescriptorIdx(search_path);
92 if (opt_idx)
93 return opt_idx;
94
95 if (parent_path.empty())
96 return std::nullopt;
97
98 auto parent_dot_idx = parent_path.rfind('.');
99 auto parent_substr = parent_dot_idx == std::string::npos
100 ? ""
101 : parent_path.substr(0, parent_dot_idx);
102 return ResolveShortType(parent_substr, short_type);
103 }
104
AddExtensionField(const std::string & package_name,protozero::ConstBytes field_desc_proto)105 base::Status DescriptorPool::AddExtensionField(
106 const std::string& package_name,
107 protozero::ConstBytes field_desc_proto) {
108 using FieldDescriptorProto = protos::pbzero::FieldDescriptorProto;
109 FieldDescriptorProto::Decoder f_decoder(field_desc_proto);
110 auto field = CreateFieldFromDecoder(f_decoder, true);
111
112 std::string extendee_name = f_decoder.extendee().ToStdString();
113 if (extendee_name.empty()) {
114 return base::ErrStatus("Extendee name is empty");
115 }
116
117 if (extendee_name[0] != '.') {
118 // Only prepend if the extendee is not fully qualified
119 extendee_name = package_name + "." + extendee_name;
120 }
121 std::optional<uint32_t> extendee = FindDescriptorIdx(extendee_name);
122 if (!extendee.has_value()) {
123 return base::ErrStatus("Extendee does not exist %s", extendee_name.c_str());
124 }
125 ProtoDescriptor& extendee_desc = descriptors_[extendee.value()];
126 RETURN_IF_ERROR(CheckExtensionField(extendee_desc, field));
127 extendee_desc.AddField(field);
128 return base::OkStatus();
129 }
130
AddNestedProtoDescriptors(const std::string & file_name,const std::string & package_name,std::optional<uint32_t> parent_idx,protozero::ConstBytes descriptor_proto,std::vector<ExtensionInfo> * extensions,bool merge_existing_messages)131 base::Status DescriptorPool::AddNestedProtoDescriptors(
132 const std::string& file_name,
133 const std::string& package_name,
134 std::optional<uint32_t> parent_idx,
135 protozero::ConstBytes descriptor_proto,
136 std::vector<ExtensionInfo>* extensions,
137 bool merge_existing_messages) {
138 protos::pbzero::DescriptorProto::Decoder decoder(descriptor_proto);
139
140 auto parent_name =
141 parent_idx ? descriptors_[*parent_idx].full_name() : package_name;
142 auto full_name =
143 parent_name + "." + base::StringView(decoder.name()).ToStdString();
144
145 auto idx = FindDescriptorIdx(full_name);
146 if (idx.has_value() && !merge_existing_messages) {
147 const auto& existing_descriptor = descriptors_[*idx];
148 return base::ErrStatus("%s: %s was already defined in file %s",
149 file_name.c_str(), full_name.c_str(),
150 existing_descriptor.file_name().c_str());
151 }
152 if (!idx.has_value()) {
153 ProtoDescriptor proto_descriptor(file_name, package_name, full_name,
154 ProtoDescriptor::Type::kMessage,
155 parent_idx);
156 idx = AddProtoDescriptor(std::move(proto_descriptor));
157 }
158 ProtoDescriptor& proto_descriptor = descriptors_[*idx];
159 if (proto_descriptor.type() != ProtoDescriptor::Type::kMessage) {
160 return base::ErrStatus("%s was enum, redefined as message",
161 full_name.c_str());
162 }
163
164 using FieldDescriptorProto = protos::pbzero::FieldDescriptorProto;
165 for (auto it = decoder.field(); it; ++it) {
166 FieldDescriptorProto::Decoder f_decoder(*it);
167 auto field = CreateFieldFromDecoder(f_decoder, /*is_extension=*/false);
168 RETURN_IF_ERROR(CheckExtensionField(proto_descriptor, field));
169 proto_descriptor.AddField(std::move(field));
170 }
171
172 for (auto it = decoder.enum_type(); it; ++it) {
173 RETURN_IF_ERROR(AddEnumProtoDescriptors(file_name, package_name, idx, *it,
174 merge_existing_messages));
175 }
176 for (auto it = decoder.nested_type(); it; ++it) {
177 RETURN_IF_ERROR(AddNestedProtoDescriptors(file_name, package_name, idx, *it,
178 extensions,
179 merge_existing_messages));
180 }
181 for (auto ext_it = decoder.extension(); ext_it; ++ext_it) {
182 extensions->emplace_back(package_name, *ext_it);
183 }
184 return base::OkStatus();
185 }
186
AddEnumProtoDescriptors(const std::string & file_name,const std::string & package_name,std::optional<uint32_t> parent_idx,protozero::ConstBytes descriptor_proto,bool merge_existing_messages)187 base::Status DescriptorPool::AddEnumProtoDescriptors(
188 const std::string& file_name,
189 const std::string& package_name,
190 std::optional<uint32_t> parent_idx,
191 protozero::ConstBytes descriptor_proto,
192 bool merge_existing_messages) {
193 protos::pbzero::EnumDescriptorProto::Decoder decoder(descriptor_proto);
194
195 auto parent_name =
196 parent_idx ? descriptors_[*parent_idx].full_name() : package_name;
197 auto full_name =
198 parent_name + "." + base::StringView(decoder.name()).ToStdString();
199
200 auto prev_idx = FindDescriptorIdx(full_name);
201 if (prev_idx.has_value() && !merge_existing_messages) {
202 const auto& existing_descriptor = descriptors_[*prev_idx];
203 return base::ErrStatus("%s: %s was already defined in file %s",
204 file_name.c_str(), full_name.c_str(),
205 existing_descriptor.file_name().c_str());
206 }
207 if (!prev_idx.has_value()) {
208 ProtoDescriptor proto_descriptor(file_name, package_name, full_name,
209 ProtoDescriptor::Type::kEnum,
210 std::nullopt);
211 prev_idx = AddProtoDescriptor(std::move(proto_descriptor));
212 }
213 ProtoDescriptor& proto_descriptor = descriptors_[*prev_idx];
214 if (proto_descriptor.type() != ProtoDescriptor::Type::kEnum) {
215 return base::ErrStatus("%s was message, redefined as enum",
216 full_name.c_str());
217 }
218
219 for (auto it = decoder.value(); it; ++it) {
220 protos::pbzero::EnumValueDescriptorProto::Decoder enum_value(it->data(),
221 it->size());
222 proto_descriptor.AddEnumValue(enum_value.number(),
223 enum_value.name().ToStdString());
224 }
225
226 return base::OkStatus();
227 }
228
AddFromFileDescriptorSet(const uint8_t * file_descriptor_set_proto,size_t size,const std::vector<std::string> & skip_prefixes,bool merge_existing_messages)229 base::Status DescriptorPool::AddFromFileDescriptorSet(
230 const uint8_t* file_descriptor_set_proto,
231 size_t size,
232 const std::vector<std::string>& skip_prefixes,
233 bool merge_existing_messages) {
234 protos::pbzero::FileDescriptorSet::Decoder proto(file_descriptor_set_proto,
235 size);
236 std::vector<ExtensionInfo> extensions;
237 for (auto it = proto.file(); it; ++it) {
238 protos::pbzero::FileDescriptorProto::Decoder file(*it);
239 const std::string file_name = file.name().ToStdString();
240 if (base::StartsWithAny(file_name, skip_prefixes))
241 continue;
242 if (!merge_existing_messages &&
243 processed_files_.find(file_name) != processed_files_.end()) {
244 // This file has been loaded once already. Skip.
245 continue;
246 }
247 processed_files_.insert(file_name);
248 std::string package = "." + base::StringView(file.package()).ToStdString();
249 for (auto message_it = file.message_type(); message_it; ++message_it) {
250 RETURN_IF_ERROR(AddNestedProtoDescriptors(
251 file_name, package, std::nullopt, *message_it, &extensions,
252 merge_existing_messages));
253 }
254 for (auto enum_it = file.enum_type(); enum_it; ++enum_it) {
255 RETURN_IF_ERROR(AddEnumProtoDescriptors(
256 file_name, package, std::nullopt, *enum_it, merge_existing_messages));
257 }
258 for (auto ext_it = file.extension(); ext_it; ++ext_it) {
259 extensions.emplace_back(package, *ext_it);
260 }
261 }
262
263 // Second pass: Add extension fields to the real protos.
264 for (const auto& extension : extensions) {
265 RETURN_IF_ERROR(AddExtensionField(extension.first, extension.second));
266 }
267
268 // Third pass: resolve the types of all the fields.
269 using FieldDescriptorProto = protos::pbzero::FieldDescriptorProto;
270 for (ProtoDescriptor& descriptor : descriptors_) {
271 for (auto& entry : *descriptor.mutable_fields()) {
272 FieldDescriptor& field = entry.second;
273 bool needs_resolution =
274 field.resolved_type_name().empty() &&
275 (field.type() == FieldDescriptorProto::TYPE_MESSAGE ||
276 field.type() == FieldDescriptorProto::TYPE_ENUM);
277 if (needs_resolution) {
278 auto opt_desc =
279 ResolveShortType(descriptor.full_name(), field.raw_type_name());
280 if (!opt_desc.has_value()) {
281 return base::ErrStatus(
282 "Unable to find short type %s in field inside message %s",
283 field.raw_type_name().c_str(), descriptor.full_name().c_str());
284 }
285 field.set_resolved_type_name(
286 descriptors_[opt_desc.value()].full_name());
287 }
288 }
289 }
290
291 // Fourth pass: resolve all "uninterpreted" options to real options.
292 for (ProtoDescriptor& descriptor : descriptors_) {
293 for (auto& entry : *descriptor.mutable_fields()) {
294 FieldDescriptor& field = entry.second;
295 if (field.options().empty()) {
296 continue;
297 }
298 ResolveUninterpretedOption(descriptor, field, *field.mutable_options());
299 }
300 }
301 return base::OkStatus();
302 }
303
ResolveUninterpretedOption(const ProtoDescriptor & proto_desc,const FieldDescriptor & field_desc,std::vector<uint8_t> & options)304 base::Status DescriptorPool::ResolveUninterpretedOption(
305 const ProtoDescriptor& proto_desc,
306 const FieldDescriptor& field_desc,
307 std::vector<uint8_t>& options) {
308 auto opt_idx = FindDescriptorIdx(".google.protobuf.FieldOptions");
309 if (!opt_idx) {
310 return base::ErrStatus("Unable to find field options for field %s in %s",
311 field_desc.name().c_str(),
312 proto_desc.full_name().c_str());
313 }
314 ProtoDescriptor& field_options_desc = descriptors_[*opt_idx];
315
316 protozero::ProtoDecoder decoder(field_desc.options().data(),
317 field_desc.options().size());
318 protozero::HeapBuffered<protozero::Message> field_options;
319 for (;;) {
320 const uint8_t* start = decoder.begin() + decoder.read_offset();
321 auto field = decoder.ReadField();
322 if (!field.valid()) {
323 break;
324 }
325 const uint8_t* end = decoder.begin() + decoder.read_offset();
326
327 if (field.id() !=
328 protos::pbzero::FieldOptions::kUninterpretedOptionFieldNumber) {
329 field_options->AppendRawProtoBytes(start,
330 static_cast<size_t>(end - start));
331 continue;
332 }
333
334 protos::pbzero::UninterpretedOption::Decoder unint(field.as_bytes());
335 auto it = unint.name();
336 if (!it) {
337 return base::ErrStatus(
338 "Option for field %s in message %s does not have a name",
339 field_desc.name().c_str(), proto_desc.full_name().c_str());
340 }
341 protos::pbzero::UninterpretedOption::NamePart::Decoder name_part(*it);
342 auto option_field_desc =
343 field_options_desc.FindFieldByName(name_part.name_part().ToStdString());
344
345 // It's not immediately clear how options with multiple names should
346 // be parsed. This likely requires digging into protobuf compiler
347 // source; given we don't have any examples of this in the codebase
348 // today, defer handling of this to when we may need it.
349 if (++it) {
350 return base::ErrStatus(
351 "Option for field %s in message %s has multiple name segments",
352 field_desc.name().c_str(), proto_desc.full_name().c_str());
353 }
354 if (unint.has_identifier_value()) {
355 field_options->AppendString(option_field_desc->number(),
356 unint.identifier_value().ToStdString());
357 } else if (unint.has_positive_int_value()) {
358 field_options->AppendVarInt(option_field_desc->number(),
359 unint.positive_int_value());
360 } else if (unint.has_negative_int_value()) {
361 field_options->AppendVarInt(option_field_desc->number(),
362 unint.negative_int_value());
363 } else if (unint.has_double_value()) {
364 field_options->AppendFixed(option_field_desc->number(),
365 unint.double_value());
366 } else if (unint.has_string_value()) {
367 field_options->AppendString(option_field_desc->number(),
368 unint.string_value().ToStdString());
369 } else if (unint.has_aggregate_value()) {
370 field_options->AppendString(option_field_desc->number(),
371 unint.aggregate_value().ToStdString());
372 } else {
373 return base::ErrStatus(
374 "Unknown field set in UninterpretedOption %s for field %s in message "
375 "%s",
376 option_field_desc->name().c_str(), field_desc.name().c_str(),
377 proto_desc.full_name().c_str());
378 }
379 }
380 if (decoder.bytes_left() > 0) {
381 return base::ErrStatus("Unexpected extra bytes when parsing option %zu",
382 decoder.bytes_left());
383 }
384 options = field_options.SerializeAsArray();
385 return base::OkStatus();
386 }
387
FindDescriptorIdx(const std::string & full_name) const388 std::optional<uint32_t> DescriptorPool::FindDescriptorIdx(
389 const std::string& full_name) const {
390 auto it = full_name_to_descriptor_index_.find(full_name);
391 if (it == full_name_to_descriptor_index_.end()) {
392 return std::nullopt;
393 }
394 return it->second;
395 }
396
SerializeAsDescriptorSet()397 std::vector<uint8_t> DescriptorPool::SerializeAsDescriptorSet() {
398 protozero::HeapBuffered<protos::pbzero::DescriptorSet> descs;
399 for (auto& desc : descriptors()) {
400 protos::pbzero::DescriptorProto* proto_descriptor =
401 descs->add_descriptors();
402 proto_descriptor->set_name(desc.full_name());
403 for (auto& entry : desc.fields()) {
404 auto& field = entry.second;
405 protos::pbzero::FieldDescriptorProto* field_descriptor =
406 proto_descriptor->add_field();
407 field_descriptor->set_name(field.name());
408 field_descriptor->set_number(static_cast<int32_t>(field.number()));
409 // We do not support required fields. They will show up as
410 // optional after serialization.
411 field_descriptor->set_label(
412 field.is_repeated()
413 ? protos::pbzero::FieldDescriptorProto::LABEL_REPEATED
414 : protos::pbzero::FieldDescriptorProto::LABEL_OPTIONAL);
415 field_descriptor->set_type_name(field.resolved_type_name());
416 field_descriptor->set_type(
417 static_cast<protos::pbzero::FieldDescriptorProto_Type>(field.type()));
418 }
419 }
420 return descs.SerializeAsArray();
421 }
422
AddProtoDescriptor(ProtoDescriptor descriptor)423 uint32_t DescriptorPool::AddProtoDescriptor(ProtoDescriptor descriptor) {
424 uint32_t idx = static_cast<uint32_t>(descriptors_.size());
425 full_name_to_descriptor_index_[descriptor.full_name()] = idx;
426 descriptors_.emplace_back(std::move(descriptor));
427 return idx;
428 }
429
ProtoDescriptor(std::string file_name,std::string package_name,std::string full_name,Type type,std::optional<uint32_t> parent_id)430 ProtoDescriptor::ProtoDescriptor(std::string file_name,
431 std::string package_name,
432 std::string full_name,
433 Type type,
434 std::optional<uint32_t> parent_id)
435 : file_name_(std::move(file_name)),
436 package_name_(std::move(package_name)),
437 full_name_(std::move(full_name)),
438 type_(type),
439 parent_id_(parent_id) {}
440
FieldDescriptor(std::string name,uint32_t number,uint32_t type,std::string raw_type_name,std::vector<uint8_t> options,bool is_repeated,bool is_packed,bool is_extension)441 FieldDescriptor::FieldDescriptor(std::string name,
442 uint32_t number,
443 uint32_t type,
444 std::string raw_type_name,
445 std::vector<uint8_t> options,
446 bool is_repeated,
447 bool is_packed,
448 bool is_extension)
449 : name_(std::move(name)),
450 number_(number),
451 type_(type),
452 raw_type_name_(std::move(raw_type_name)),
453 options_(std::move(options)),
454 is_repeated_(is_repeated),
455 is_packed_(is_packed),
456 is_extension_(is_extension) {}
457
458 } // namespace trace_processor
459 } // namespace perfetto
460