1 /*
2 * Copyright (C) 2020 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "report_utils.h"
18
19 #include <stdlib.h>
20
21 #include <android-base/parsebool.h>
22 #include <android-base/scopeguard.h>
23 #include <android-base/strings.h>
24
25 #include "JITDebugReader.h"
26 #include "utils.h"
27
28 namespace simpleperf {
29
AddProguardMappingFile(std::string_view mapping_file)30 bool ProguardMappingRetrace::AddProguardMappingFile(std::string_view mapping_file) {
31 // The mapping file format is described in
32 // https://www.guardsquare.com/en/products/proguard/manual/retrace.
33 // Additional info provided by R8 is described in
34 // https://r8.googlesource.com/r8/+/refs/heads/main/doc/retrace.md.
35 line_reader_.reset(new LineReader(mapping_file));
36 android::base::ScopeGuard g([&]() { line_reader_ = nullptr; });
37
38 if (!line_reader_->Ok()) {
39 PLOG(ERROR) << "failed to read " << mapping_file;
40 return false;
41 }
42
43 MoveToNextLine();
44 while (cur_line_.type != LineType::LINE_EOF) {
45 if (cur_line_.type == LineType::CLASS_LINE) {
46 // Match line "original_classname -> obfuscated_classname:".
47 std::string_view s = cur_line_.data;
48 auto arrow_pos = s.find(" -> ");
49 auto arrow_end_pos = arrow_pos + strlen(" -> ");
50 if (auto colon_pos = s.find(':', arrow_end_pos); colon_pos != s.npos) {
51 std::string_view original_classname = s.substr(0, arrow_pos);
52 std::string obfuscated_classname(s.substr(arrow_end_pos, colon_pos - arrow_end_pos));
53 MappingClass& cur_class = class_map_[obfuscated_classname];
54 cur_class.original_classname = original_classname;
55 MoveToNextLine();
56 if (cur_line_.type == LineType::SYNTHESIZED_COMMENT) {
57 cur_class.synthesized = true;
58 MoveToNextLine();
59 }
60
61 while (cur_line_.type == LineType::METHOD_LINE) {
62 ParseMethod(cur_class);
63 }
64 continue;
65 }
66 }
67
68 // Skip unparsed line.
69 MoveToNextLine();
70 }
71 return true;
72 }
73
ParseMethod(MappingClass & mapping_class)74 void ProguardMappingRetrace::ParseMethod(MappingClass& mapping_class) {
75 // Match line "... [original_classname.]original_methodname(...)... -> obfuscated_methodname".
76 std::string_view s = cur_line_.data;
77 auto arrow_pos = s.find(" -> ");
78 auto arrow_end_pos = arrow_pos + strlen(" -> ");
79 if (auto left_brace_pos = s.rfind('(', arrow_pos); left_brace_pos != s.npos) {
80 if (auto space_pos = s.rfind(' ', left_brace_pos); space_pos != s.npos) {
81 std::string_view name = s.substr(space_pos + 1, left_brace_pos - space_pos - 1);
82 bool contains_classname = name.find('.') != name.npos;
83 if (contains_classname && android::base::StartsWith(name, mapping_class.original_classname)) {
84 name.remove_prefix(mapping_class.original_classname.size() + 1);
85 contains_classname = false;
86 }
87 std::string original_methodname(name);
88 std::string obfuscated_methodname(s.substr(arrow_end_pos));
89 bool synthesized = false;
90
91 MoveToNextLine();
92 if (cur_line_.type == LineType::SYNTHESIZED_COMMENT) {
93 synthesized = true;
94 MoveToNextLine();
95 }
96
97 auto& method_map = mapping_class.method_map;
98 if (auto it = method_map.find(obfuscated_methodname); it != method_map.end()) {
99 // The obfuscated method name already exists. We don't know which one to choose.
100 // So just prefer the latter one unless it's synthesized.
101 if (!synthesized) {
102 it->second.original_name = original_methodname;
103 it->second.contains_classname = contains_classname;
104 it->second.synthesized = synthesized;
105 }
106 } else {
107 auto& method = method_map[obfuscated_methodname];
108 method.original_name = original_methodname;
109 method.contains_classname = contains_classname;
110 method.synthesized = synthesized;
111 }
112 return;
113 }
114 }
115
116 // Skip unparsed line.
117 MoveToNextLine();
118 }
119
MoveToNextLine()120 void ProguardMappingRetrace::MoveToNextLine() {
121 std::string* line;
122 while ((line = line_reader_->ReadLine()) != nullptr) {
123 std::string_view s = *line;
124 if (s.empty()) {
125 continue;
126 }
127 size_t non_space_pos = s.find_first_not_of(' ');
128 if (non_space_pos != s.npos && s[non_space_pos] == '#') {
129 // Skip all comments unless it's synthesized comment.
130 if (s.find("com.android.tools.r8.synthesized") != s.npos) {
131 cur_line_.type = SYNTHESIZED_COMMENT;
132 cur_line_.data = s;
133 return;
134 }
135 continue;
136 }
137 if (s.find(" -> ") == s.npos) {
138 // Skip unknown lines.
139 continue;
140 }
141 cur_line_.data = s;
142 if (s[0] == ' ') {
143 cur_line_.type = METHOD_LINE;
144 } else {
145 cur_line_.type = CLASS_LINE;
146 }
147 return;
148 }
149 cur_line_.type = LINE_EOF;
150 }
151
DeObfuscateJavaMethods(std::string_view obfuscated_name,std::string * original_name,bool * synthesized)152 bool ProguardMappingRetrace::DeObfuscateJavaMethods(std::string_view obfuscated_name,
153 std::string* original_name, bool* synthesized) {
154 if (auto split_pos = obfuscated_name.rfind('.'); split_pos != obfuscated_name.npos) {
155 std::string obfuscated_classname(obfuscated_name.substr(0, split_pos));
156
157 if (auto it = class_map_.find(obfuscated_classname); it != class_map_.end()) {
158 const MappingClass& mapping_class = it->second;
159 const auto& method_map = mapping_class.method_map;
160 std::string obfuscated_methodname(obfuscated_name.substr(split_pos + 1));
161
162 if (auto method_it = method_map.find(obfuscated_methodname); method_it != method_map.end()) {
163 const auto& method = method_it->second;
164 if (method.contains_classname) {
165 *original_name = method.original_name;
166 } else {
167 *original_name = mapping_class.original_classname + "." + method.original_name;
168 }
169 *synthesized = method.synthesized;
170 } else {
171 // Only the classname is obfuscated.
172 *original_name = mapping_class.original_classname + "." + obfuscated_methodname;
173 *synthesized = mapping_class.synthesized;
174 }
175 return true;
176 }
177 }
178 return false;
179 }
180
IsArtEntry(const CallChainReportEntry & entry,bool * is_jni_trampoline)181 static bool IsArtEntry(const CallChainReportEntry& entry, bool* is_jni_trampoline) {
182 if (entry.execution_type == CallChainExecutionType::NATIVE_METHOD) {
183 // art_jni_trampoline/art_quick_generic_jni_trampoline are trampolines used to call jni
184 // methods in art runtime. We want to hide them when hiding art frames.
185 *is_jni_trampoline = android::base::EndsWith(entry.symbol->Name(), "jni_trampoline");
186 return *is_jni_trampoline || android::base::EndsWith(entry.dso->Path(), "/libart.so") ||
187 android::base::EndsWith(entry.dso->Path(), "/libartd.so");
188 }
189 return false;
190 };
191
CallChainReportBuilder(ThreadTree & thread_tree)192 CallChainReportBuilder::CallChainReportBuilder(ThreadTree& thread_tree)
193 : thread_tree_(thread_tree) {
194 const char* env_name = "REMOVE_R8_SYNTHESIZED_FRAME";
195 const char* s = getenv(env_name);
196 if (s != nullptr) {
197 auto result = android::base::ParseBool(s);
198 if (result == android::base::ParseBoolResult::kError) {
199 LOG(WARNING) << "invalid value in env variable " << env_name;
200 } else if (result == android::base::ParseBoolResult::kTrue) {
201 LOG(INFO) << "R8 synthesized frames will be removed.";
202 remove_r8_synthesized_frame_ = true;
203 }
204 }
205 }
206
AddProguardMappingFile(std::string_view mapping_file)207 bool CallChainReportBuilder::AddProguardMappingFile(std::string_view mapping_file) {
208 if (!retrace_) {
209 retrace_.reset(new ProguardMappingRetrace);
210 }
211 return retrace_->AddProguardMappingFile(mapping_file);
212 }
213
Build(const ThreadEntry * thread,const std::vector<uint64_t> & ips,size_t kernel_ip_count)214 std::vector<CallChainReportEntry> CallChainReportBuilder::Build(const ThreadEntry* thread,
215 const std::vector<uint64_t>& ips,
216 size_t kernel_ip_count) {
217 std::vector<CallChainReportEntry> result;
218 result.reserve(ips.size());
219 for (size_t i = 0; i < ips.size(); i++) {
220 const MapEntry* map = thread_tree_.FindMap(thread, ips[i], i < kernel_ip_count);
221 Dso* dso = map->dso;
222 uint64_t vaddr_in_file;
223 const Symbol* symbol = thread_tree_.FindSymbol(map, ips[i], &vaddr_in_file, &dso);
224 CallChainExecutionType execution_type = CallChainExecutionType::NATIVE_METHOD;
225 if (dso->IsForJavaMethod()) {
226 if (dso->type() == DSO_DEX_FILE) {
227 execution_type = CallChainExecutionType::INTERPRETED_JVM_METHOD;
228 } else {
229 execution_type = CallChainExecutionType::JIT_JVM_METHOD;
230 }
231 }
232 result.resize(result.size() + 1);
233 auto& entry = result.back();
234 entry.ip = ips[i];
235 entry.symbol = symbol;
236 entry.dso = dso;
237 entry.vaddr_in_file = vaddr_in_file;
238 entry.map = map;
239 entry.execution_type = execution_type;
240 }
241 MarkArtFrame(result);
242 if (remove_art_frame_) {
243 auto it = std::remove_if(result.begin(), result.end(), [](const CallChainReportEntry& entry) {
244 return entry.execution_type == CallChainExecutionType::ART_METHOD;
245 });
246 result.erase(it, result.end());
247 }
248 if (convert_jit_frame_) {
249 ConvertJITFrame(result);
250 }
251 if (retrace_) {
252 DeObfuscateJavaMethods(result);
253 }
254 return result;
255 }
256
MarkArtFrame(std::vector<CallChainReportEntry> & callchain)257 void CallChainReportBuilder::MarkArtFrame(std::vector<CallChainReportEntry>& callchain) {
258 // Mark art methods before or after a JVM method.
259 bool near_java_method = false;
260 bool is_jni_trampoline = false;
261 std::vector<size_t> jni_trampoline_positions;
262 for (size_t i = 0; i < callchain.size(); ++i) {
263 auto& entry = callchain[i];
264 if (entry.execution_type == CallChainExecutionType::INTERPRETED_JVM_METHOD ||
265 entry.execution_type == CallChainExecutionType::JIT_JVM_METHOD) {
266 near_java_method = true;
267
268 // Mark art frames before this entry.
269 for (int j = static_cast<int>(i) - 1; j >= 0; j--) {
270 if (!IsArtEntry(callchain[j], &is_jni_trampoline)) {
271 break;
272 }
273 callchain[j].execution_type = CallChainExecutionType::ART_METHOD;
274 if (is_jni_trampoline) {
275 jni_trampoline_positions.push_back(j);
276 }
277 }
278 } else if (near_java_method && IsArtEntry(entry, &is_jni_trampoline)) {
279 entry.execution_type = CallChainExecutionType::ART_METHOD;
280 if (is_jni_trampoline) {
281 jni_trampoline_positions.push_back(i);
282 }
283 } else {
284 near_java_method = false;
285 }
286 }
287 // Functions called by art_jni_trampoline are jni methods. And we don't want to hide them.
288 for (auto i : jni_trampoline_positions) {
289 if (i > 0 && callchain[i - 1].execution_type == CallChainExecutionType::ART_METHOD) {
290 callchain[i - 1].execution_type = CallChainExecutionType::NATIVE_METHOD;
291 }
292 }
293 }
294
ConvertJITFrame(std::vector<CallChainReportEntry> & callchain)295 void CallChainReportBuilder::ConvertJITFrame(std::vector<CallChainReportEntry>& callchain) {
296 CollectJavaMethods();
297 for (size_t i = 0; i < callchain.size();) {
298 auto& entry = callchain[i];
299 if (entry.execution_type == CallChainExecutionType::JIT_JVM_METHOD) {
300 // This is a JIT java method, merge it with the interpreted java method having the same
301 // name if possible. Otherwise, merge it with other JIT java methods having the same name
302 // by assigning a common dso_name.
303 if (auto it = java_method_map_.find(std::string(entry.symbol->FunctionName()));
304 it != java_method_map_.end()) {
305 entry.dso = it->second.dso;
306 entry.symbol = it->second.symbol;
307 // Not enough info to map an offset in a JIT method to an offset in a dex file. So just
308 // use the symbol_addr.
309 entry.vaddr_in_file = entry.symbol->addr;
310
311 // ART may call from an interpreted Java method into its corresponding JIT method. To
312 // avoid showing the method calling itself, remove the JIT frame.
313 if (i + 1 < callchain.size() && callchain[i + 1].dso == entry.dso &&
314 callchain[i + 1].symbol == entry.symbol) {
315 callchain.erase(callchain.begin() + i);
316 continue;
317 }
318
319 } else if (!JITDebugReader::IsPathInJITSymFile(entry.dso->Path())) {
320 // Old JITSymFiles use names like "TemporaryFile-XXXXXX". So give them a better name.
321 entry.dso_name = "[JIT cache]";
322 }
323 }
324 i++;
325 }
326 }
327
CollectJavaMethods()328 void CallChainReportBuilder::CollectJavaMethods() {
329 if (!java_method_initialized_) {
330 java_method_initialized_ = true;
331 for (Dso* dso : thread_tree_.GetAllDsos()) {
332 if (dso->type() == DSO_DEX_FILE) {
333 dso->LoadSymbols();
334 for (auto& symbol : dso->GetSymbols()) {
335 java_method_map_.emplace(symbol.Name(), JavaMethod(dso, &symbol));
336 }
337 }
338 }
339 }
340 }
341
IsJavaEntry(const CallChainReportEntry & entry)342 static bool IsJavaEntry(const CallChainReportEntry& entry) {
343 static const char* COMPILED_JAVA_FILE_SUFFIXES[] = {".odex", ".oat", ".dex"};
344 if (entry.execution_type == CallChainExecutionType::JIT_JVM_METHOD ||
345 entry.execution_type == CallChainExecutionType::INTERPRETED_JVM_METHOD) {
346 return true;
347 }
348 if (entry.execution_type == CallChainExecutionType::NATIVE_METHOD) {
349 const std::string& path = entry.dso->Path();
350 for (const char* suffix : COMPILED_JAVA_FILE_SUFFIXES) {
351 if (android::base::EndsWith(path, suffix)) {
352 return true;
353 }
354 }
355 }
356 return false;
357 }
358
DeObfuscateJavaMethods(std::vector<CallChainReportEntry> & callchain)359 void CallChainReportBuilder::DeObfuscateJavaMethods(std::vector<CallChainReportEntry>& callchain) {
360 for (size_t i = 0; i < callchain.size();) {
361 auto& entry = callchain[i];
362 if (!IsJavaEntry(entry)) {
363 i++;
364 continue;
365 }
366 std::string_view name = entry.symbol->FunctionName();
367 std::string original_name;
368 bool synthesized;
369 if (retrace_->DeObfuscateJavaMethods(name, &original_name, &synthesized)) {
370 if (synthesized && remove_r8_synthesized_frame_) {
371 callchain.erase(callchain.begin() + i);
372 continue;
373 }
374 entry.symbol->SetDemangledName(original_name);
375 }
376 i++;
377 }
378 }
379
AggregateThreads(const std::vector<std::string> & thread_name_regex)380 bool ThreadReportBuilder::AggregateThreads(const std::vector<std::string>& thread_name_regex) {
381 size_t i = thread_regs_.size();
382 thread_regs_.resize(i + thread_name_regex.size());
383 for (const auto& reg_str : thread_name_regex) {
384 std::unique_ptr<RegEx> re = RegEx::Create(reg_str);
385 if (!re) {
386 return false;
387 }
388 thread_regs_[i++].re = std::move(re);
389 }
390 return true;
391 }
392
Build(const ThreadEntry & thread)393 ThreadReport ThreadReportBuilder::Build(const ThreadEntry& thread) {
394 ThreadReport report(thread.pid, thread.tid, thread.comm);
395 ModifyReportToAggregateThreads(report);
396 return report;
397 }
398
ModifyReportToAggregateThreads(ThreadReport & report)399 void ThreadReportBuilder::ModifyReportToAggregateThreads(ThreadReport& report) {
400 if (thread_regs_.empty()) {
401 // No modification when there are no regular expressions.
402 return;
403 }
404 const std::string thread_name = report.thread_name;
405 if (auto it = thread_map_.find(thread_name); it != thread_map_.end()) {
406 // Found cached result in thread_map_.
407 if (it->second != -1) {
408 report = thread_regs_[it->second].report;
409 }
410 return;
411 }
412 // Run the slow path to walk through every regular expression.
413 size_t index;
414 for (index = 0; index < thread_regs_.size(); ++index) {
415 if (thread_regs_[index].re->Match(thread_name)) {
416 break;
417 }
418 }
419 if (index == thread_regs_.size()) {
420 thread_map_[thread_name] = -1;
421 } else {
422 thread_map_[thread_name] = static_cast<int>(index);
423 // Modify thread report.
424 auto& aggregated_report = thread_regs_[index].report;
425 if (aggregated_report.thread_name == nullptr) {
426 // Use regular expression as the name of the aggregated thread. So users know it's an
427 // aggregated thread.
428 aggregated_report =
429 ThreadReport(report.pid, report.tid, thread_regs_[index].re->GetPattern().c_str());
430 }
431 report = aggregated_report;
432 }
433 }
434
435 } // namespace simpleperf
436