1 /*
2  * Copyright (C) 2023 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "src/protozero/filtering/string_filter.h"
18 
19 #include <cstring>
20 #include <regex>
21 #include <string_view>
22 
23 #include "perfetto/base/compiler.h"
24 #include "perfetto/base/logging.h"
25 #include "perfetto/ext/base/string_view.h"
26 #include "perfetto/public/compiler.h"
27 
28 namespace protozero {
29 namespace {
30 
31 using Matches = std::match_results<char*>;
32 
33 static constexpr std::string_view kRedacted = "P60REDACTED";
34 static constexpr char kRedactedDash = '-';
35 
36 // Returns a pointer to the first character after the tgid pipe character in
37 // the atrace string given by [ptr, end). Returns null if no such character
38 // exists.
39 //
40 // Examples:
41 // E|1024 -> nullptr
42 // foobarbaz -> nullptr
43 // B|1024|x -> pointer to x
FindAtracePayloadPtr(const char * ptr,const char * end)44 const char* FindAtracePayloadPtr(const char* ptr, const char* end) {
45   // Don't even bother checking any strings which are so short that they could
46   // not contain a post-tgid section. This filters out strings like "E|" which
47   // emitted by Bionic.
48   //
49   // Also filter out any other strings starting with "E" as they never contain
50   // anything past the tgid: this removes >half of the strings for ~zero cost.
51   static constexpr size_t kEarliestSecondPipeIndex = 2;
52   const char* search_start = ptr + kEarliestSecondPipeIndex;
53   if (search_start >= end || *ptr == 'E') {
54     return nullptr;
55   }
56 
57   // We skipped past the first '|' character by starting at the character at
58   // index 2. Just find the next pipe character (i.e. the one after tgid) using
59   // memchr.
60   const char* pipe = static_cast<const char*>(
61       memchr(search_start, '|', size_t(end - search_start)));
62   return pipe ? pipe + 1 : nullptr;
63 }
64 
StartsWith(const char * ptr,const char * end,const std::string & starts_with)65 bool StartsWith(const char* ptr,
66                 const char* end,
67                 const std::string& starts_with) {
68   // Verify that the atrace string has enough characters to match against all
69   // the characters in the "starts with" string. If it does, memcmp to check if
70   // all the characters match and return true if they do.
71   return ptr + starts_with.size() <= end &&
72          memcmp(ptr, starts_with.data(), starts_with.size()) == 0;
73 }
74 
RedactMatches(const Matches & matches)75 void RedactMatches(const Matches& matches) {
76   // Go through every group in the matches.
77   for (size_t i = 1; i < matches.size(); ++i) {
78     const auto& match = matches[i];
79     PERFETTO_CHECK(match.second >= match.first);
80 
81     // Overwrite the match with characters from |kRedacted|. If match is
82     // smaller, we will not use all of |kRedacted| but that's fine (i.e. we
83     // will overwrite with a truncated |kRedacted|).
84     size_t match_len = static_cast<size_t>(match.second - match.first);
85     size_t redacted_len = std::min(match_len, kRedacted.size());
86     memcpy(match.first, kRedacted.data(), redacted_len);
87 
88     // Overwrite any characters after |kRedacted| with |kRedactedDash|.
89     memset(match.first + redacted_len, kRedactedDash, match_len - redacted_len);
90   }
91 }
92 
93 }  // namespace
94 
AddRule(Policy policy,std::string_view pattern_str,std::string atrace_payload_starts_with)95 void StringFilter::AddRule(Policy policy,
96                            std::string_view pattern_str,
97                            std::string atrace_payload_starts_with) {
98   rules_.emplace_back(StringFilter::Rule{
99       policy,
100       std::regex(pattern_str.begin(), pattern_str.end(),
101                  std::regex::ECMAScript | std::regex_constants::optimize),
102       std::move(atrace_payload_starts_with)});
103 }
104 
MaybeFilterInternal(char * ptr,size_t len)105 bool StringFilter::MaybeFilterInternal(char* ptr, size_t len) {
106   std::match_results<char*> matches;
107   bool atrace_find_tried = false;
108   const char* atrace_payload_ptr = nullptr;
109   for (const Rule& rule : rules_) {
110     switch (rule.policy) {
111       case Policy::kMatchRedactGroups:
112       case Policy::kMatchBreak:
113         if (std::regex_match(ptr, ptr + len, matches, rule.pattern)) {
114           if (rule.policy == Policy::kMatchBreak) {
115             return false;
116           }
117           RedactMatches(matches);
118           return true;
119         }
120         break;
121       case Policy::kAtraceMatchRedactGroups:
122       case Policy::kAtraceMatchBreak:
123         atrace_payload_ptr = atrace_find_tried
124                                  ? atrace_payload_ptr
125                                  : FindAtracePayloadPtr(ptr, ptr + len);
126         atrace_find_tried = true;
127         if (atrace_payload_ptr &&
128             StartsWith(atrace_payload_ptr, ptr + len,
129                        rule.atrace_payload_starts_with) &&
130             std::regex_match(ptr, ptr + len, matches, rule.pattern)) {
131           if (rule.policy == Policy::kAtraceMatchBreak) {
132             return false;
133           }
134           RedactMatches(matches);
135           return true;
136         }
137         break;
138     }
139   }
140   return false;
141 }
142 
143 }  // namespace protozero
144