1 /*
2 * Copyright (C) 2021 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "perfetto/ext/base/file_utils.h"
18 #include "perfetto/ext/base/getopt.h"
19 #include "perfetto/ext/base/scoped_file.h"
20 #include "perfetto/ext/base/string_utils.h"
21 #include "perfetto/ext/base/version.h"
22 #include "protos/perfetto/config/trace_config.gen.h"
23 #include "src/perfetto_cmd/pbtxt_to_pb.h"
24 #include "src/protozero/filtering/filter_util.h"
25 #include "src/protozero/filtering/message_filter.h"
26
27 namespace perfetto {
28 namespace proto_filter {
29 namespace {
30
31 const char kUsage[] =
32 R"(Usage: proto_filter [options]
33
34 -s --schema-in: Path to the root .proto file. Required for most operations
35 -I --proto_path: Extra include directory for proto includes. If omitted assumed CWD.
36 -r --root_message: Fully qualified name for the root proto message (e.g. perfetto.protos.Trace)
37 If omitted the first message defined in the schema will be used.
38 -i --msg_in: Path of a binary-encoded proto message which will be filtered.
39 -o --msg_out: Path of the binary-encoded filtered proto message written in output.
40 -c --config_in: Path of a TraceConfig textproto (note: only trace_filter field is considered).
41 -f --filter_in: Path of a filter bytecode file previously generated by this tool.
42 -F --filter_out: Path of the filter bytecode file generated from the --schema-in definition.
43 -T --filter_oct_out: Like --filter_out, but emits a octal-escaped C string suitable for .pbtx.
44 -d --dedupe: Minimize filter size by deduping leaf messages with same field ids.
45 -x --passthrough: Passthrough a nested message as an opaque bytes field.
46 -g --filter_string: Filter the string using separately specified rules before passing it through.
47
48 Example usage:
49
50 # Convert a .proto schema file into a diff-friendly list of messages/fields>
51
52 proto_filter -r perfetto.protos.Trace -s protos/perfetto/trace/trace.proto
53
54 # Generate the filter bytecode from a .proto schema
55
56 proto_filter -r perfetto.protos.Trace -s protos/perfetto/trace/trace.proto \
57 -F /tmp/bytecode [--dedupe] \
58 [-x protos.Message:message_field_to_pass] \
59 [-g protos.Message:string_field_to_filter]
60
61 # List the used/filtered fields from a trace file
62
63 proto_filter -r perfetto.protos.Trace -s protos/perfetto/trace/trace.proto \
64 -i test/data/example_android_trace_30s.pb -f /tmp/bytecode
65
66 # Filter a trace using a filter bytecode
67
68 proto_filter -i test/data/example_android_trace_30s.pb -f /tmp/bytecode \
69 -o /tmp/filtered_trace
70
71 # Filter a trace using a TraceConfig textproto
72
73 proto_filter -i test/data/example_android_trace_30s.pb \
74 -c /tmp/config.textproto \
75 -o /tmp/filtered_trace
76
77 # Show which fields are allowed by a filter bytecode
78
79 proto_filter -r perfetto.protos.Trace -s protos/perfetto/trace/trace.proto \
80 [-g protos.Message:string_field_to_filter] \
81 -f /tmp/bytecode
82 )";
83
84 class LoggingErrorReporter : public ErrorReporter {
85 public:
LoggingErrorReporter(std::string file_name,const char * config)86 LoggingErrorReporter(std::string file_name, const char* config)
87 : file_name_(file_name), config_(config) {}
88
AddError(size_t row,size_t column,size_t length,const std::string & message)89 void AddError(size_t row,
90 size_t column,
91 size_t length,
92 const std::string& message) override {
93 parsed_successfully_ = false;
94 std::string line = ExtractLine(row - 1).ToStdString();
95 if (!line.empty() && line[line.length() - 1] == '\n') {
96 line.erase(line.length() - 1);
97 }
98
99 std::string guide(column + length, ' ');
100 for (size_t i = column; i < column + length; i++) {
101 guide[i - 1] = i == column ? '^' : '~';
102 }
103 fprintf(stderr, "%s:%zu:%zu error: %s\n", file_name_.c_str(), row, column,
104 message.c_str());
105 fprintf(stderr, "%s\n", line.c_str());
106 fprintf(stderr, "%s\n", guide.c_str());
107 }
108
Success() const109 bool Success() const { return parsed_successfully_; }
110
111 private:
ExtractLine(size_t line)112 base::StringView ExtractLine(size_t line) {
113 const char* start = config_;
114 const char* end = config_;
115
116 for (size_t i = 0; i < line + 1; i++) {
117 start = end;
118 char c;
119 while ((c = *end++) && c != '\n')
120 ;
121 }
122 return base::StringView(start, static_cast<size_t>(end - start));
123 }
124
125 bool parsed_successfully_ = true;
126 std::string file_name_;
127 const char* config_;
128 };
129
130 using TraceFilter = protos::gen::TraceConfig::TraceFilter;
ConvertPolicy(TraceFilter::StringFilterPolicy policy)131 std::optional<protozero::StringFilter::Policy> ConvertPolicy(
132 TraceFilter::StringFilterPolicy policy) {
133 switch (policy) {
134 case TraceFilter::SFP_UNSPECIFIED:
135 return std::nullopt;
136 case TraceFilter::SFP_MATCH_REDACT_GROUPS:
137 return protozero::StringFilter::Policy::kMatchRedactGroups;
138 case TraceFilter::SFP_ATRACE_MATCH_REDACT_GROUPS:
139 return protozero::StringFilter::Policy::kAtraceMatchRedactGroups;
140 case TraceFilter::SFP_MATCH_BREAK:
141 return protozero::StringFilter::Policy::kMatchBreak;
142 case TraceFilter::SFP_ATRACE_MATCH_BREAK:
143 return protozero::StringFilter::Policy::kAtraceMatchBreak;
144 case TraceFilter::SFP_ATRACE_REPEATED_SEARCH_REDACT_GROUPS:
145 return protozero::StringFilter::Policy::kAtraceRepeatedSearchRedactGroups;
146 }
147 return std::nullopt;
148 }
149
Main(int argc,char ** argv)150 int Main(int argc, char** argv) {
151 static const option long_options[] = {
152 {"help", no_argument, nullptr, 'h'},
153 {"version", no_argument, nullptr, 'v'},
154 {"dedupe", no_argument, nullptr, 'd'},
155 {"proto_path", required_argument, nullptr, 'I'},
156 {"schema_in", required_argument, nullptr, 's'},
157 {"root_message", required_argument, nullptr, 'r'},
158 {"msg_in", required_argument, nullptr, 'i'},
159 {"msg_out", required_argument, nullptr, 'o'},
160 {"config_in", required_argument, nullptr, 'c'},
161 {"filter_in", required_argument, nullptr, 'f'},
162 {"filter_out", required_argument, nullptr, 'F'},
163 {"filter_oct_out", required_argument, nullptr, 'T'},
164 {"passthrough", required_argument, nullptr, 'x'},
165 {"filter_string", required_argument, nullptr, 'g'},
166 {nullptr, 0, nullptr, 0}};
167
168 std::string msg_in;
169 std::string msg_out;
170 std::string config_in;
171 std::string filter_in;
172 std::string schema_in;
173 std::string filter_out;
174 std::string filter_oct_out;
175 std::string proto_path;
176 std::string root_message_arg;
177 std::set<std::string> passthrough_fields;
178 std::set<std::string> filter_string_fields;
179 bool dedupe = false;
180
181 for (;;) {
182 int option = getopt_long(
183 argc, argv, "hvdI:s:r:i:o:f:F:T:x:g:c:", long_options, nullptr);
184
185 if (option == -1)
186 break; // EOF.
187
188 if (option == 'v') {
189 printf("%s\n", base::GetVersionString());
190 exit(0);
191 }
192
193 if (option == 'd') {
194 dedupe = true;
195 continue;
196 }
197
198 if (option == 'I') {
199 proto_path = optarg;
200 continue;
201 }
202
203 if (option == 's') {
204 schema_in = optarg;
205 continue;
206 }
207
208 if (option == 'c') {
209 config_in = optarg;
210 continue;
211 }
212
213 if (option == 'r') {
214 root_message_arg = optarg;
215 continue;
216 }
217
218 if (option == 'i') {
219 msg_in = optarg;
220 continue;
221 }
222
223 if (option == 'o') {
224 msg_out = optarg;
225 continue;
226 }
227
228 if (option == 'f') {
229 filter_in = optarg;
230 continue;
231 }
232
233 if (option == 'F') {
234 filter_out = optarg;
235 continue;
236 }
237
238 if (option == 'T') {
239 filter_oct_out = optarg;
240 continue;
241 }
242
243 if (option == 'x') {
244 passthrough_fields.insert(optarg);
245 continue;
246 }
247
248 if (option == 'g') {
249 filter_string_fields.insert(optarg);
250 continue;
251 }
252
253 if (option == 'h') {
254 fprintf(stdout, kUsage);
255 exit(0);
256 }
257
258 fprintf(stderr, kUsage);
259 exit(1);
260 }
261
262 if (msg_in.empty() && filter_in.empty() && schema_in.empty()) {
263 fprintf(stderr, kUsage);
264 return 1;
265 }
266
267 if (!filter_in.empty() && !config_in.empty()) {
268 fprintf(stderr, kUsage);
269 return 1;
270 }
271
272 std::string msg_in_data;
273 if (!msg_in.empty()) {
274 PERFETTO_LOG("Loading proto-encoded message from %s", msg_in.c_str());
275 if (!base::ReadFile(msg_in, &msg_in_data)) {
276 PERFETTO_ELOG("Could not open message file %s", msg_in.c_str());
277 return 1;
278 }
279 }
280
281 protozero::FilterUtil filter;
282 if (!schema_in.empty()) {
283 PERFETTO_LOG("Loading proto schema from %s", schema_in.c_str());
284 if (!filter.LoadMessageDefinition(schema_in, root_message_arg, proto_path,
285 passthrough_fields,
286 filter_string_fields)) {
287 PERFETTO_ELOG("Failed to parse proto schema from %s", schema_in.c_str());
288 return 1;
289 }
290 if (dedupe)
291 filter.Dedupe();
292 }
293
294 protozero::MessageFilter msg_filter;
295 std::string filter_data;
296 std::string filter_data_src;
297 if (!filter_in.empty()) {
298 PERFETTO_LOG("Loading filter bytecode from %s", filter_in.c_str());
299 if (!base::ReadFile(filter_in, &filter_data)) {
300 PERFETTO_ELOG("Could not open filter file %s", filter_in.c_str());
301 return 1;
302 }
303 filter_data_src = filter_in;
304 } else if (!config_in.empty()) {
305 PERFETTO_LOG("Loading filter bytecode and rules from %s",
306 config_in.c_str());
307 std::string config_data;
308 if (!base::ReadFile(config_in, &config_data)) {
309 PERFETTO_ELOG("Could not open config file %s", config_in.c_str());
310 return 1;
311 }
312 LoggingErrorReporter reporter(config_in, config_data.c_str());
313 auto config_bytes = PbtxtToPb(config_data, &reporter);
314 if (!reporter.Success()) {
315 return 1;
316 }
317
318 protos::gen::TraceConfig config;
319 config.ParseFromArray(config_bytes.data(), config_bytes.size());
320
321 const auto& trace_filter = config.trace_filter();
322 for (const auto& rule : trace_filter.string_filter_chain().rules()) {
323 auto opt_policy = ConvertPolicy(rule.policy());
324 if (!opt_policy) {
325 PERFETTO_ELOG("Unknown string filter policy %d", rule.policy());
326 return 1;
327 }
328 msg_filter.string_filter().AddRule(*opt_policy, rule.regex_pattern(),
329 rule.atrace_payload_starts_with());
330 }
331 filter_data = trace_filter.bytecode_v2().empty()
332 ? trace_filter.bytecode()
333 : trace_filter.bytecode_v2();
334 filter_data_src = config_in;
335 } else if (!schema_in.empty()) {
336 PERFETTO_LOG("Generating filter bytecode from %s", schema_in.c_str());
337 filter_data = filter.GenerateFilterBytecode();
338 filter_data_src = schema_in;
339 }
340
341 if (!filter_data.empty()) {
342 const uint8_t* data = reinterpret_cast<const uint8_t*>(filter_data.data());
343 if (!msg_filter.LoadFilterBytecode(data, filter_data.size())) {
344 PERFETTO_ELOG("Failed to parse filter bytecode from %s",
345 filter_data_src.c_str());
346 return 1;
347 }
348 }
349
350 // Write the filter bytecode in output.
351 if (!filter_out.empty()) {
352 auto fd = base::OpenFile(filter_out, O_WRONLY | O_TRUNC | O_CREAT, 0644);
353 if (!fd) {
354 PERFETTO_ELOG("Could not open filter out path %s", filter_out.c_str());
355 return 1;
356 }
357 PERFETTO_LOG("Writing filter bytecode (%zu bytes) into %s",
358 filter_data.size(), filter_out.c_str());
359 base::WriteAll(*fd, filter_data.data(), filter_data.size());
360 }
361
362 if (!filter_oct_out.empty()) {
363 auto fd =
364 base::OpenFile(filter_oct_out, O_WRONLY | O_TRUNC | O_CREAT, 0644);
365 if (!fd) {
366 PERFETTO_ELOG("Could not open filter out path %s",
367 filter_oct_out.c_str());
368 return 1;
369 }
370 std::string oct_str;
371 oct_str.reserve(filter_data.size() * 4 + 64);
372 oct_str.append("trace_filter {\n bytecode: \"");
373 for (char c : filter_data) {
374 uint8_t octect = static_cast<uint8_t>(c);
375 char buf[5]{'\\', '0', '0', '0', 0};
376 for (uint8_t i = 0; i < 3; ++i) {
377 buf[3 - i] = static_cast<char>('0' + static_cast<uint8_t>(octect) % 8);
378 octect /= 8;
379 }
380 oct_str.append(buf);
381 }
382 oct_str.append("\"\n}\n");
383 PERFETTO_LOG("Writing filter bytecode (%zu bytes) into %s", oct_str.size(),
384 filter_oct_out.c_str());
385 base::WriteAll(*fd, oct_str.data(), oct_str.size());
386 }
387
388 // Apply the filter to the input message (if any).
389 std::vector<uint8_t> msg_filtered_data;
390 if (!msg_in.empty()) {
391 PERFETTO_LOG("Applying filter %s to proto message %s",
392 filter_data_src.c_str(), msg_in.c_str());
393 msg_filter.enable_field_usage_tracking(true);
394 auto res = msg_filter.FilterMessage(msg_in_data.data(), msg_in_data.size());
395 if (res.error)
396 PERFETTO_FATAL("Filtering failed");
397 msg_filtered_data.insert(msg_filtered_data.end(), res.data.get(),
398 res.data.get() + res.size);
399 }
400
401 // Write out the filtered message.
402 if (!msg_out.empty()) {
403 PERFETTO_LOG("Writing filtered proto bytes (%zu bytes) into %s",
404 msg_filtered_data.size(), msg_out.c_str());
405 auto fd = base::OpenFile(msg_out, O_WRONLY | O_TRUNC | O_CREAT, 0644);
406 base::WriteAll(*fd, msg_filtered_data.data(), msg_filtered_data.size());
407 }
408
409 if (!msg_in.empty()) {
410 const auto& field_usage_map = msg_filter.field_usage();
411 for (const auto& it : field_usage_map) {
412 const std::string& field_path_varint = it.first;
413 int32_t num_occurrences = it.second;
414 std::string path_str = filter.LookupField(field_path_varint);
415 printf("%-100s %s %d\n", path_str.c_str(),
416 num_occurrences < 0 ? "DROP" : "PASS", std::abs(num_occurrences));
417 }
418 } else if (!schema_in.empty()) {
419 filter.PrintAsText(!filter_data.empty() ? std::make_optional(filter_data)
420 : std::nullopt);
421 }
422
423 if ((!filter_out.empty() || !filter_oct_out.empty()) && !dedupe) {
424 PERFETTO_ELOG(
425 "Warning: looks like you are generating a filter without --dedupe. For "
426 "production use cases, --dedupe can make the output bytecode "
427 "significantly smaller.");
428 }
429 return 0;
430 }
431
432 } // namespace
433 } // namespace proto_filter
434 } // namespace perfetto
435
main(int argc,char ** argv)436 int main(int argc, char** argv) {
437 return perfetto::proto_filter::Main(argc, argv);
438 }
439