1 /*
2 * Copyright (C) 2021 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "RecordFilter.h"
18
19 #include "environment.h"
20 #include "utils.h"
21
22 using android::base::Split;
23 using android::base::Trim;
24
25 namespace simpleperf {
26
27 namespace {
28
29 using TimeRange = std::pair<uint64_t, uint64_t>;
30
31 class TimeRanges {
32 public:
Begin(uint64_t timestamp)33 void Begin(uint64_t timestamp) {
34 if (!begin_time_.has_value()) {
35 begin_time_ = timestamp;
36 }
37 }
38
End(uint64_t timestamp)39 bool End(uint64_t timestamp) {
40 if (begin_time_.has_value()) {
41 if (begin_time_ >= timestamp) {
42 LOG(ERROR) << "Invalid time range in filter data: begin time " << begin_time_.value()
43 << " >= end time " << timestamp;
44 return false;
45 }
46 ranges_.emplace_back(begin_time_.value(), timestamp);
47 begin_time_.reset();
48 }
49 return true;
50 }
51
NoMoreTimestamp()52 void NoMoreTimestamp() {
53 if (begin_time_.has_value()) {
54 ranges_.emplace_back(begin_time_.value(), UINT64_MAX);
55 }
56 std::sort(ranges_.begin(), ranges_.end());
57 }
58
Empty() const59 bool Empty() const { return ranges_.empty(); }
60
InRange(uint64_t timestamp) const61 bool InRange(uint64_t timestamp) const {
62 auto it = std::upper_bound(ranges_.begin(), ranges_.end(),
63 std::pair<uint64_t, uint64_t>(timestamp, 0));
64 if (it != ranges_.end() && it->first == timestamp) {
65 return true;
66 }
67 if (it != ranges_.begin()) {
68 --it;
69 if (it->second > timestamp) {
70 return true;
71 }
72 }
73 return false;
74 }
75
76 private:
77 std::optional<uint64_t> begin_time_;
78 std::vector<TimeRange> ranges_;
79 };
80
81 } // namespace
82
83 class TimeFilter {
84 public:
GetClock() const85 const std::string& GetClock() const { return clock_; }
SetClock(const std::string & clock)86 void SetClock(const std::string& clock) { clock_ = clock; }
87
GlobalBegin(uint64_t timestamp)88 void GlobalBegin(uint64_t timestamp) { global_ranges_.Begin(timestamp); }
89
GlobalEnd(uint64_t timestamp)90 bool GlobalEnd(uint64_t timestamp) { return global_ranges_.End(timestamp); }
91
ProcessBegin(pid_t pid,uint64_t timestamp)92 void ProcessBegin(pid_t pid, uint64_t timestamp) { process_ranges_[pid].Begin(timestamp); }
93
ProcessEnd(pid_t pid,uint64_t timestamp)94 bool ProcessEnd(pid_t pid, uint64_t timestamp) { return process_ranges_[pid].End(timestamp); }
95
ThreadBegin(pid_t tid,uint64_t timestamp)96 void ThreadBegin(pid_t tid, uint64_t timestamp) { thread_ranges_[tid].Begin(timestamp); }
97
ThreadEnd(pid_t tid,uint64_t timestamp)98 bool ThreadEnd(pid_t tid, uint64_t timestamp) { return thread_ranges_[tid].End(timestamp); }
99
NoMoreTimestamp()100 void NoMoreTimestamp() {
101 global_ranges_.NoMoreTimestamp();
102 for (auto& p : process_ranges_) {
103 p.second.NoMoreTimestamp();
104 }
105 for (auto& p : thread_ranges_) {
106 p.second.NoMoreTimestamp();
107 }
108 }
109
Empty() const110 bool Empty() const {
111 return global_ranges_.Empty() && process_ranges_.empty() && thread_ranges_.empty();
112 }
113
Check(const SampleRecord & sample) const114 bool Check(const SampleRecord& sample) const {
115 uint64_t timestamp = sample.Timestamp();
116 if (!global_ranges_.Empty() && !global_ranges_.InRange(timestamp)) {
117 return false;
118 }
119 if (!process_ranges_.empty()) {
120 auto it = process_ranges_.find(sample.tid_data.pid);
121 if (it == process_ranges_.end() || !it->second.InRange(timestamp)) {
122 return false;
123 }
124 }
125 if (!thread_ranges_.empty()) {
126 auto it = thread_ranges_.find(sample.tid_data.tid);
127 if (it == thread_ranges_.end() || !it->second.InRange(timestamp)) {
128 return false;
129 }
130 }
131 return true;
132 }
133
134 private:
135 std::string clock_ = "monotonic";
136 TimeRanges global_ranges_;
137 std::unordered_map<pid_t, TimeRanges> process_ranges_;
138 std::unordered_map<pid_t, TimeRanges> thread_ranges_;
139 };
140
141 // Read filter file. The format is in doc/sample_filter.md.
142 class FilterFileReader {
143 public:
FilterFileReader(const std::string & filename)144 FilterFileReader(const std::string& filename) : filename_(filename) {}
145
Read()146 bool Read() {
147 std::string data;
148 if (!android::base::ReadFileToString(filename_, &data)) {
149 PLOG(ERROR) << "failed to read " << filename_;
150 return false;
151 }
152 line_number_ = 0;
153 time_filter_.reset(new TimeFilter);
154 std::string arg_str;
155 std::vector<std::string> args;
156 uint64_t timestamp;
157 pid_t pid;
158 for (const auto& line : Split(data, "\n")) {
159 line_number_++;
160 if (SearchCmd(line, "CLOCK", &arg_str)) {
161 if (!SplitArgs(arg_str, 1, &args)) {
162 return false;
163 }
164 time_filter_->SetClock(args[0]);
165 } else if (SearchCmd(line, "GLOBAL_BEGIN", &arg_str)) {
166 if (!SplitArgs(arg_str, 1, &args) || !ParseTimestamp(args[0], ×tamp)) {
167 return false;
168 }
169 time_filter_->GlobalBegin(timestamp);
170 } else if (SearchCmd(line, "GLOBAL_END", &arg_str)) {
171 if (!SplitArgs(arg_str, 1, &args) || !ParseTimestamp(args[0], ×tamp) ||
172 !time_filter_->GlobalEnd(timestamp)) {
173 return false;
174 }
175 } else if (SearchCmd(line, "PROCESS_BEGIN", &arg_str)) {
176 if (!SplitArgs(arg_str, 2, &args) || !ParsePid(args[0], &pid) ||
177 !ParseTimestamp(args[1], ×tamp)) {
178 return false;
179 }
180 time_filter_->ProcessBegin(pid, timestamp);
181 } else if (SearchCmd(line, "PROCESS_END", &arg_str)) {
182 if (!SplitArgs(arg_str, 2, &args) || !ParsePid(args[0], &pid) ||
183 !ParseTimestamp(args[1], ×tamp) || !time_filter_->ProcessEnd(pid, timestamp)) {
184 return false;
185 }
186 } else if (SearchCmd(line, "THREAD_BEGIN", &arg_str)) {
187 if (!SplitArgs(arg_str, 2, &args) || !ParsePid(args[0], &pid) ||
188 !ParseTimestamp(args[1], ×tamp)) {
189 return false;
190 }
191 time_filter_->ThreadBegin(pid, timestamp);
192 } else if (SearchCmd(line, "THREAD_END", &arg_str)) {
193 if (!SplitArgs(arg_str, 2, &args) || !ParsePid(args[0], &pid) ||
194 !ParseTimestamp(args[1], ×tamp) || !time_filter_->ThreadEnd(pid, timestamp)) {
195 return false;
196 }
197 }
198 }
199 return true;
200 }
201
GetTimeFilter()202 std::unique_ptr<TimeFilter>& GetTimeFilter() { return time_filter_; }
203
204 private:
SearchCmd(const std::string & s,const char * cmd,std::string * arg_str)205 bool SearchCmd(const std::string& s, const char* cmd, std::string* arg_str) {
206 auto pos = s.find(cmd);
207 if (pos == s.npos) {
208 return false;
209 }
210 *arg_str = s.substr(pos + strlen(cmd));
211 return true;
212 }
213
SplitArgs(const std::string & s,size_t nargs,std::vector<std::string> * args)214 bool SplitArgs(const std::string& s, size_t nargs, std::vector<std::string>* args) {
215 *args = Split(Trim(s), " ");
216 if (args->size() != nargs) {
217 LOG(ERROR) << "Invalid args in " << filename_ << ":" << line_number_ << ": " << s;
218 return false;
219 }
220 return true;
221 }
222
ParsePid(const std::string & s,pid_t * pid)223 bool ParsePid(const std::string& s, pid_t* pid) {
224 if (!android::base::ParseInt(s.c_str(), pid, static_cast<pid_t>(0))) {
225 LOG(ERROR) << "Invalid pid in " << filename_ << ":" << line_number_ << ": " << s;
226 return false;
227 }
228 return true;
229 }
230
ParseTimestamp(const std::string & s,uint64_t * timestamp)231 bool ParseTimestamp(const std::string& s, uint64_t* timestamp) {
232 if (!android::base::ParseUint(s.c_str(), timestamp)) {
233 LOG(ERROR) << "Invalid timestamp in " << filename_ << ":" << line_number_ << ": " << s;
234 return false;
235 }
236 return true;
237 }
238
239 const std::string filename_;
240 size_t line_number_ = 0;
241 std::unique_ptr<TimeFilter> time_filter_;
242 };
243
RecordFilter(const ThreadTree & thread_tree)244 RecordFilter::RecordFilter(const ThreadTree& thread_tree) : thread_tree_(thread_tree) {}
245
~RecordFilter()246 RecordFilter::~RecordFilter() {}
247
ParseOptions(OptionValueMap & options)248 bool RecordFilter::ParseOptions(OptionValueMap& options) {
249 for (bool exclude : {true, false}) {
250 std::string prefix = exclude ? "--exclude-" : "--include-";
251 for (const OptionValue& value : options.PullValues(prefix + "pid")) {
252 if (auto pids = GetTidsFromString(*value.str_value, false); pids) {
253 AddPids(pids.value(), exclude);
254 } else {
255 return false;
256 }
257 }
258 for (const OptionValue& value : options.PullValues(prefix + "tid")) {
259 if (auto tids = GetTidsFromString(*value.str_value, false); tids) {
260 AddTids(tids.value(), exclude);
261 } else {
262 return false;
263 }
264 }
265 for (const OptionValue& value : options.PullValues(prefix + "process-name")) {
266 AddProcessNameRegex(*value.str_value, exclude);
267 }
268 for (const OptionValue& value : options.PullValues(prefix + "thread-name")) {
269 AddThreadNameRegex(*value.str_value, exclude);
270 }
271 for (const OptionValue& value : options.PullValues(prefix + "uid")) {
272 if (auto uids = ParseUintVector<uint32_t>(*value.str_value); uids) {
273 AddUids(uids.value(), exclude);
274 } else {
275 return false;
276 }
277 }
278 }
279 if (auto value = options.PullValue("--filter-file"); value) {
280 if (!SetFilterFile(*value->str_value)) {
281 return false;
282 }
283 }
284 return true;
285 }
286
AddPids(const std::set<pid_t> & pids,bool exclude)287 void RecordFilter::AddPids(const std::set<pid_t>& pids, bool exclude) {
288 RecordFilterCondition& cond = GetCondition(exclude);
289 cond.used = true;
290 cond.pids.insert(pids.begin(), pids.end());
291 }
292
AddTids(const std::set<pid_t> & tids,bool exclude)293 void RecordFilter::AddTids(const std::set<pid_t>& tids, bool exclude) {
294 RecordFilterCondition& cond = GetCondition(exclude);
295 cond.used = true;
296 cond.tids.insert(tids.begin(), tids.end());
297 }
298
AddProcessNameRegex(const std::string & process_name,bool exclude)299 void RecordFilter::AddProcessNameRegex(const std::string& process_name, bool exclude) {
300 RecordFilterCondition& cond = GetCondition(exclude);
301 cond.used = true;
302 cond.process_name_regs.emplace_back(process_name, std::regex::optimize);
303 }
304
AddThreadNameRegex(const std::string & thread_name,bool exclude)305 void RecordFilter::AddThreadNameRegex(const std::string& thread_name, bool exclude) {
306 RecordFilterCondition& cond = GetCondition(exclude);
307 cond.used = true;
308 cond.thread_name_regs.emplace_back(thread_name, std::regex::optimize);
309 }
310
AddUids(const std::set<uint32_t> & uids,bool exclude)311 void RecordFilter::AddUids(const std::set<uint32_t>& uids, bool exclude) {
312 RecordFilterCondition& cond = GetCondition(exclude);
313 cond.used = true;
314 cond.uids.insert(uids.begin(), uids.end());
315 }
316
SetFilterFile(const std::string & filename)317 bool RecordFilter::SetFilterFile(const std::string& filename) {
318 FilterFileReader reader(filename);
319 if (!reader.Read()) {
320 return false;
321 }
322 time_filter_ = std::move(reader.GetTimeFilter());
323 return true;
324 }
325
Check(const SampleRecord * r)326 bool RecordFilter::Check(const SampleRecord* r) {
327 if (exclude_condition_.used && CheckCondition(r, exclude_condition_)) {
328 return false;
329 }
330 if (include_condition_.used && !CheckCondition(r, include_condition_)) {
331 return false;
332 }
333 if (time_filter_ && !time_filter_->Check(*r)) {
334 return false;
335 }
336 return true;
337 }
338
CheckClock(const std::string & clock)339 bool RecordFilter::CheckClock(const std::string& clock) {
340 if (time_filter_ && time_filter_->GetClock() != clock) {
341 LOG(ERROR) << "clock generating sample timestamps is " << clock
342 << ", which doesn't match clock used in time filter " << time_filter_->GetClock();
343 return false;
344 }
345 return true;
346 }
347
Clear()348 void RecordFilter::Clear() {
349 exclude_condition_ = RecordFilterCondition();
350 include_condition_ = RecordFilterCondition();
351 pid_to_uid_map_.clear();
352 }
353
CheckCondition(const SampleRecord * r,const RecordFilterCondition & condition)354 bool RecordFilter::CheckCondition(const SampleRecord* r, const RecordFilterCondition& condition) {
355 if (condition.pids.count(r->tid_data.pid) == 1) {
356 return true;
357 }
358 if (condition.tids.count(r->tid_data.tid) == 1) {
359 return true;
360 }
361 if (!condition.process_name_regs.empty()) {
362 if (ThreadEntry* process = thread_tree_.FindThread(r->tid_data.pid); process != nullptr) {
363 if (SearchInRegs(process->comm, condition.process_name_regs)) {
364 return true;
365 }
366 }
367 }
368 if (!condition.thread_name_regs.empty()) {
369 if (ThreadEntry* thread = thread_tree_.FindThread(r->tid_data.tid); thread != nullptr) {
370 if (SearchInRegs(thread->comm, condition.thread_name_regs)) {
371 return true;
372 }
373 }
374 }
375 if (!condition.uids.empty()) {
376 if (auto uid_value = GetUidForProcess(r->tid_data.pid); uid_value) {
377 if (condition.uids.count(uid_value.value()) == 1) {
378 return true;
379 }
380 }
381 }
382 return false;
383 }
384
SearchInRegs(const std::string & s,const std::vector<std::regex> & regs)385 bool RecordFilter::SearchInRegs(const std::string& s, const std::vector<std::regex>& regs) {
386 for (auto& reg : regs) {
387 if (std::regex_search(s, reg)) {
388 return true;
389 }
390 }
391 return false;
392 }
393
GetUidForProcess(pid_t pid)394 std::optional<uint32_t> RecordFilter::GetUidForProcess(pid_t pid) {
395 if (auto it = pid_to_uid_map_.find(pid); it != pid_to_uid_map_.end()) {
396 return it->second;
397 }
398 auto uid = GetProcessUid(pid);
399 pid_to_uid_map_[pid] = uid;
400 return uid;
401 }
402
403 } // namespace simpleperf
404