• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/env python3
2#
3# Copyright (C) 2017 The Android Open Source Project
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9#      http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16#
17
18from __future__ import annotations
19import argparse
20import collections
21from concurrent.futures import Future, ThreadPoolExecutor
22from dataclasses import dataclass
23import datetime
24import json
25import logging
26import os
27from pathlib import Path
28import sys
29from typing import Any, Callable, Dict, Iterator, List, Optional, Set, Tuple, Union
30
31from simpleperf_report_lib import ReportLib, SymbolStruct
32from simpleperf_utils import (
33    Addr2Nearestline, BaseArgumentParser, BinaryFinder, get_script_dir, log_exit, Objdump,
34    open_report_in_browser, ReadElf, ReportLibOptions, SourceFileSearcher)
35
36MAX_CALLSTACK_LENGTH = 750
37
38
39class HtmlWriter(object):
40
41    def __init__(self, output_path: Union[Path, str]):
42        self.fh = open(output_path, 'w')
43        self.tag_stack = []
44
45    def close(self):
46        self.fh.close()
47
48    def open_tag(self, tag: str, **attrs: Dict[str, str]) -> HtmlWriter:
49        attr_str = ''
50        for key in attrs:
51            attr_str += ' %s="%s"' % (key, attrs[key])
52        self.fh.write('<%s%s>' % (tag, attr_str))
53        self.tag_stack.append(tag)
54        return self
55
56    def close_tag(self, tag: Optional[str] = None):
57        if tag:
58            assert tag == self.tag_stack[-1]
59        self.fh.write('</%s>\n' % self.tag_stack.pop())
60
61    def add(self, text: str) -> HtmlWriter:
62        self.fh.write(text)
63        return self
64
65    def add_file(self, file_path: Union[Path, str]) -> HtmlWriter:
66        file_path = os.path.join(get_script_dir(), file_path)
67        with open(file_path, 'r') as f:
68            self.add(f.read())
69        return self
70
71
72def modify_text_for_html(text: str) -> str:
73    return text.replace('>', '&gt;').replace('<', '&lt;')
74
75
76def hex_address_for_json(addr: int) -> str:
77    """ To handle big addrs (nears uint64_max) in Javascript, store addrs as hex strings in Json.
78    """
79    return '0x%x' % addr
80
81
82class EventScope(object):
83
84    def __init__(self, name: str):
85        self.name = name
86        self.processes: Dict[int, ProcessScope] = {}  # map from pid to ProcessScope
87        self.sample_count = 0
88        self.event_count = 0
89
90    def get_process(self, pid: int) -> ProcessScope:
91        process = self.processes.get(pid)
92        if not process:
93            process = self.processes[pid] = ProcessScope(pid)
94        return process
95
96    def get_sample_info(self, gen_addr_hit_map: bool) -> Dict[str, Any]:
97        result = {}
98        result['eventName'] = self.name
99        result['eventCount'] = self.event_count
100        processes = sorted(self.processes.values(), key=lambda a: a.event_count, reverse=True)
101        result['processes'] = [process.get_sample_info(gen_addr_hit_map)
102                               for process in processes]
103        return result
104
105    @property
106    def threads(self) -> Iterator[ThreadScope]:
107        for process in self.processes.values():
108            for thread in process.threads.values():
109                yield thread
110
111    @property
112    def libraries(self) -> Iterator[LibScope]:
113        for process in self.processes.values():
114            for thread in process.threads.values():
115                for lib in thread.libs.values():
116                    yield lib
117
118
119class ProcessScope(object):
120
121    def __init__(self, pid: int):
122        self.pid = pid
123        self.name = ''
124        self.event_count = 0
125        self.threads: Dict[int, ThreadScope] = {}  # map from tid to ThreadScope
126
127    def get_thread(self, tid: int, thread_name: str) -> ThreadScope:
128        thread = self.threads.get(tid)
129        if not thread:
130            thread = self.threads[tid] = ThreadScope(tid)
131        thread.name = thread_name
132        if self.pid == tid:
133            self.name = thread_name
134        return thread
135
136    def get_sample_info(self, gen_addr_hit_map: bool) -> Dict[str, Any]:
137        result = {}
138        result['pid'] = self.pid
139        result['eventCount'] = self.event_count
140        # Sorting threads by sample count is better for profiles recorded with --trace-offcpu.
141        threads = sorted(self.threads.values(), key=lambda a: a.sample_count, reverse=True)
142        result['threads'] = [thread.get_sample_info(gen_addr_hit_map)
143                             for thread in threads]
144        return result
145
146    def merge_by_thread_name(self, process: ProcessScope):
147        self.event_count += process.event_count
148        thread_list: List[ThreadScope] = list(
149            self.threads.values()) + list(process.threads.values())
150        new_threads: Dict[str, ThreadScope] = {}  # map from thread name to ThreadScope
151        for thread in thread_list:
152            cur_thread = new_threads.get(thread.name)
153            if cur_thread is None:
154                new_threads[thread.name] = thread
155            else:
156                cur_thread.merge(thread)
157        self.threads = {}
158        for thread in new_threads.values():
159            self.threads[thread.tid] = thread
160
161
162class ThreadScope(object):
163
164    def __init__(self, tid: int):
165        self.tid = tid
166        self.name = ''
167        self.event_count = 0
168        self.sample_count = 0
169        self.libs: Dict[int, LibScope] = {}  # map from lib_id to LibScope
170        self.call_graph = CallNode(-1)
171        self.reverse_call_graph = CallNode(-1)
172
173    def add_callstack(
174            self, event_count: int, callstack: List[Tuple[int, int, int]],
175            build_addr_hit_map: bool):
176        """ callstack is a list of tuple (lib_id, func_id, addr).
177            For each i > 0, callstack[i] calls callstack[i-1]."""
178        hit_func_ids: Set[int] = set()
179        for i, (lib_id, func_id, addr) in enumerate(callstack):
180            # When a callstack contains recursive function, only add for each function once.
181            if func_id in hit_func_ids:
182                continue
183            hit_func_ids.add(func_id)
184
185            lib = self.libs.get(lib_id)
186            if not lib:
187                lib = self.libs[lib_id] = LibScope(lib_id)
188            function = lib.get_function(func_id)
189            function.subtree_event_count += event_count
190            if i == 0:
191                lib.event_count += event_count
192                function.event_count += event_count
193                function.sample_count += 1
194            if build_addr_hit_map:
195                function.build_addr_hit_map(addr, event_count if i == 0 else 0, event_count)
196
197        # build call graph and reverse call graph
198        node = self.call_graph
199        for item in reversed(callstack):
200            node = node.get_child(item[1])
201        node.event_count += event_count
202        node = self.reverse_call_graph
203        for item in callstack:
204            node = node.get_child(item[1])
205        node.event_count += event_count
206
207    def update_subtree_event_count(self):
208        self.call_graph.update_subtree_event_count()
209        self.reverse_call_graph.update_subtree_event_count()
210
211    def limit_percents(self, min_func_limit: float, min_callchain_percent: float,
212                       hit_func_ids: Set[int]):
213        for lib in self.libs.values():
214            to_del_funcs = []
215            for function in lib.functions.values():
216                if function.subtree_event_count < min_func_limit:
217                    to_del_funcs.append(function.func_id)
218                else:
219                    hit_func_ids.add(function.func_id)
220            for func_id in to_del_funcs:
221                del lib.functions[func_id]
222        min_limit = min_callchain_percent * 0.01 * self.call_graph.subtree_event_count
223        self.call_graph.cut_edge(min_limit, hit_func_ids)
224        self.reverse_call_graph.cut_edge(min_limit, hit_func_ids)
225
226    def get_sample_info(self, gen_addr_hit_map: bool) -> Dict[str, Any]:
227        result = {}
228        result['tid'] = self.tid
229        result['eventCount'] = self.event_count
230        result['sampleCount'] = self.sample_count
231        result['libs'] = [lib.gen_sample_info(gen_addr_hit_map)
232                          for lib in self.libs.values()]
233        result['g'] = self.call_graph.gen_sample_info()
234        result['rg'] = self.reverse_call_graph.gen_sample_info()
235        return result
236
237    def merge(self, thread: ThreadScope):
238        self.event_count += thread.event_count
239        self.sample_count += thread.sample_count
240        for lib_id, lib in thread.libs.items():
241            cur_lib = self.libs.get(lib_id)
242            if cur_lib is None:
243                self.libs[lib_id] = lib
244            else:
245                cur_lib.merge(lib)
246        self.call_graph.merge(thread.call_graph)
247        self.reverse_call_graph.merge(thread.reverse_call_graph)
248
249
250class LibScope(object):
251
252    def __init__(self, lib_id: int):
253        self.lib_id = lib_id
254        self.event_count = 0
255        self.functions: Dict[int, FunctionScope] = {}  # map from func_id to FunctionScope.
256
257    def get_function(self, func_id: int) -> FunctionScope:
258        function = self.functions.get(func_id)
259        if not function:
260            function = self.functions[func_id] = FunctionScope(func_id)
261        return function
262
263    def gen_sample_info(self, gen_addr_hit_map: bool) -> Dict[str, Any]:
264        result = {}
265        result['libId'] = self.lib_id
266        result['eventCount'] = self.event_count
267        result['functions'] = [func.gen_sample_info(gen_addr_hit_map)
268                               for func in self.functions.values()]
269        return result
270
271    def merge(self, lib: LibScope):
272        self.event_count += lib.event_count
273        for func_id, function in lib.functions.items():
274            cur_function = self.functions.get(func_id)
275            if cur_function is None:
276                self.functions[func_id] = function
277            else:
278                cur_function.merge(function)
279
280
281class FunctionScope(object):
282
283    def __init__(self, func_id: int):
284        self.func_id = func_id
285        self.sample_count = 0
286        self.event_count = 0
287        self.subtree_event_count = 0
288        self.addr_hit_map = None  # map from addr to [event_count, subtree_event_count].
289        # map from (source_file_id, line) to [event_count, subtree_event_count].
290        self.line_hit_map = None
291
292    def build_addr_hit_map(self, addr: int, event_count: int, subtree_event_count: int):
293        if self.addr_hit_map is None:
294            self.addr_hit_map = {}
295        count_info = self.addr_hit_map.get(addr)
296        if count_info is None:
297            self.addr_hit_map[addr] = [event_count, subtree_event_count]
298        else:
299            count_info[0] += event_count
300            count_info[1] += subtree_event_count
301
302    def build_line_hit_map(self, source_file_id: int, line: int, event_count: int,
303                           subtree_event_count: int):
304        if self.line_hit_map is None:
305            self.line_hit_map = {}
306        key = (source_file_id, line)
307        count_info = self.line_hit_map.get(key)
308        if count_info is None:
309            self.line_hit_map[key] = [event_count, subtree_event_count]
310        else:
311            count_info[0] += event_count
312            count_info[1] += subtree_event_count
313
314    def gen_sample_info(self, gen_addr_hit_map: bool) -> Dict[str, Any]:
315        result = {}
316        result['f'] = self.func_id
317        result['c'] = [self.sample_count, self.event_count, self.subtree_event_count]
318        if self.line_hit_map:
319            items = []
320            for key in self.line_hit_map:
321                count_info = self.line_hit_map[key]
322                item = {'f': key[0], 'l': key[1], 'e': count_info[0], 's': count_info[1]}
323                items.append(item)
324            result['s'] = items
325        if gen_addr_hit_map and self.addr_hit_map:
326            items = []
327            for addr in sorted(self.addr_hit_map):
328                count_info = self.addr_hit_map[addr]
329                items.append(
330                    {'a': hex_address_for_json(addr),
331                     'e': count_info[0],
332                     's': count_info[1]})
333            result['a'] = items
334        return result
335
336    def merge(self, function: FunctionScope):
337        self.sample_count += function.sample_count
338        self.event_count += function.event_count
339        self.subtree_event_count += function.subtree_event_count
340        self.addr_hit_map = self.__merge_hit_map(self.addr_hit_map, function.addr_hit_map)
341        self.line_hit_map = self.__merge_hit_map(self.line_hit_map, function.line_hit_map)
342
343    @staticmethod
344    def __merge_hit_map(map1: Optional[Dict[int, List[int]]],
345                        map2: Optional[Dict[int, List[int]]]) -> Optional[Dict[int, List[int]]]:
346        if not map1:
347            return map2
348        if not map2:
349            return map1
350        for key, value2 in map2.items():
351            value1 = map1.get(key)
352            if value1 is None:
353                map1[key] = value2
354            else:
355                value1[0] += value2[0]
356                value1[1] += value2[1]
357        return map1
358
359
360class CallNode(object):
361
362    def __init__(self, func_id: int):
363        self.event_count = 0
364        self.subtree_event_count = 0
365        self.func_id = func_id
366        # map from func_id to CallNode
367        self.children: Dict[int, CallNode] = collections.OrderedDict()
368
369    def get_child(self, func_id: int) -> CallNode:
370        child = self.children.get(func_id)
371        if not child:
372            child = self.children[func_id] = CallNode(func_id)
373        return child
374
375    def update_subtree_event_count(self):
376        self.subtree_event_count = self.event_count
377        for child in self.children.values():
378            self.subtree_event_count += child.update_subtree_event_count()
379        return self.subtree_event_count
380
381    def cut_edge(self, min_limit: float, hit_func_ids: Set[int]):
382        hit_func_ids.add(self.func_id)
383        to_del_children = []
384        for key in self.children:
385            child = self.children[key]
386            if child.subtree_event_count < min_limit:
387                to_del_children.append(key)
388            else:
389                child.cut_edge(min_limit, hit_func_ids)
390        for key in to_del_children:
391            del self.children[key]
392
393    def gen_sample_info(self) -> Dict[str, Any]:
394        result = {}
395        result['e'] = self.event_count
396        result['s'] = self.subtree_event_count
397        result['f'] = self.func_id
398        result['c'] = [child.gen_sample_info() for child in self.children.values()]
399        return result
400
401    def merge(self, node: CallNode):
402        self.event_count += node.event_count
403        self.subtree_event_count += node.subtree_event_count
404        for key, child in node.children.items():
405            cur_child = self.children.get(key)
406            if cur_child is None:
407                self.children[key] = child
408            else:
409                cur_child.merge(child)
410
411
412@dataclass
413class LibInfo:
414    name: str
415    build_id: str
416
417
418class LibSet(object):
419    """ Collection of shared libraries used in perf.data. """
420
421    def __init__(self):
422        self.lib_name_to_id: Dict[str, int] = {}
423        self.libs: List[LibInfo] = []
424
425    def get_lib_id(self, lib_name: str) -> Optional[int]:
426        return self.lib_name_to_id.get(lib_name)
427
428    def add_lib(self, lib_name: str, build_id: str) -> int:
429        """ Return lib_id of the newly added lib. """
430        lib_id = len(self.libs)
431        self.libs.append(LibInfo(lib_name, build_id))
432        self.lib_name_to_id[lib_name] = lib_id
433        return lib_id
434
435    def get_lib(self, lib_id: int) -> LibInfo:
436        return self.libs[lib_id]
437
438
439class Function(object):
440    """ Represent a function in a shared library. """
441
442    def __init__(self, lib_id: int, func_name: str, func_id: int, start_addr: int, addr_len: int):
443        self.lib_id = lib_id
444        self.func_name = func_name
445        self.func_id = func_id
446        self.start_addr = start_addr
447        self.addr_len = addr_len
448        self.source_info = None
449        self.disassembly = None
450
451
452class FunctionSet(object):
453    """ Collection of functions used in perf.data. """
454
455    def __init__(self):
456        self.name_to_func: Dict[Tuple[int, str], Function] = {}
457        self.id_to_func: Dict[int, Function] = {}
458
459    def get_func_id(self, lib_id: int, symbol: SymbolStruct) -> int:
460        key = (lib_id, symbol.symbol_name)
461        function = self.name_to_func.get(key)
462        if function is None:
463            func_id = len(self.id_to_func)
464            function = Function(lib_id, symbol.symbol_name, func_id, symbol.symbol_addr,
465                                symbol.symbol_len)
466            self.name_to_func[key] = function
467            self.id_to_func[func_id] = function
468        return function.func_id
469
470    def trim_functions(self, left_func_ids: Set[int]):
471        """ Remove functions excepts those in left_func_ids. """
472        for function in self.name_to_func.values():
473            if function.func_id not in left_func_ids:
474                del self.id_to_func[function.func_id]
475        # name_to_func will not be used.
476        self.name_to_func = None
477
478
479class SourceFile(object):
480    """ A source file containing source code hit by samples. """
481
482    def __init__(self, file_id: int, abstract_path: str):
483        self.file_id = file_id
484        self.abstract_path = abstract_path  # path reported by addr2line
485        self.real_path: Optional[str] = None  # file path in the file system
486        self.requested_lines: Optional[Set[int]] = set()
487        self.line_to_code: Dict[int, str] = {}  # map from line to code in that line.
488
489    def request_lines(self, start_line: int, end_line: int):
490        self.requested_lines |= set(range(start_line, end_line + 1))
491
492    def add_source_code(self, real_path: str):
493        self.real_path = real_path
494        with open(real_path, 'r') as f:
495            source_code = f.readlines()
496        max_line = len(source_code)
497        for line in self.requested_lines:
498            if line > 0 and line <= max_line:
499                self.line_to_code[line] = source_code[line - 1]
500        # requested_lines is no longer used.
501        self.requested_lines = None
502
503
504class SourceFileSet(object):
505    """ Collection of source files. """
506
507    def __init__(self):
508        self.path_to_source_files: Dict[str, SourceFile] = {}  # map from file path to SourceFile.
509
510    def get_source_file(self, file_path: str) -> SourceFile:
511        source_file = self.path_to_source_files.get(file_path)
512        if not source_file:
513            source_file = SourceFile(len(self.path_to_source_files), file_path)
514            self.path_to_source_files[file_path] = source_file
515        return source_file
516
517    def load_source_code(self, source_dirs: List[str]):
518        file_searcher = SourceFileSearcher(source_dirs)
519        for source_file in self.path_to_source_files.values():
520            real_path = file_searcher.get_real_path(source_file.abstract_path)
521            if real_path:
522                source_file.add_source_code(real_path)
523
524
525class RecordData(object):
526
527    """RecordData reads perf.data, and generates data used by report_html.js in json format.
528        All generated items are listed as below:
529            1. recordTime: string
530            2. machineType: string
531            3. androidVersion: string
532            4. recordCmdline: string
533            5. totalSamples: int
534            6. processNames: map from pid to processName.
535            7. threadNames: map from tid to threadName.
536            8. libList: an array of libNames, indexed by libId.
537            9. functionMap: map from functionId to funcData.
538                funcData = {
539                    l: libId
540                    f: functionName
541                    s: [sourceFileId, startLine, endLine] [optional]
542                    d: [(disassembly, addr)] [optional]
543                }
544
545            10.  sampleInfo = [eventInfo]
546                eventInfo = {
547                    eventName
548                    eventCount
549                    processes: [processInfo]
550                }
551                processInfo = {
552                    pid
553                    eventCount
554                    threads: [threadInfo]
555                }
556                threadInfo = {
557                    tid
558                    eventCount
559                    sampleCount
560                    libs: [libInfo],
561                    g: callGraph,
562                    rg: reverseCallgraph
563                }
564                libInfo = {
565                    libId,
566                    eventCount,
567                    functions: [funcInfo]
568                }
569                funcInfo = {
570                    f: functionId
571                    c: [sampleCount, eventCount, subTreeEventCount]
572                    s: [sourceCodeInfo] [optional]
573                    a: [addrInfo] (sorted by addrInfo.addr) [optional]
574                }
575                callGraph and reverseCallGraph are both of type CallNode.
576                callGraph shows how a function calls other functions.
577                reverseCallGraph shows how a function is called by other functions.
578                CallNode {
579                    e: selfEventCount
580                    s: subTreeEventCount
581                    f: functionId
582                    c: [CallNode] # children
583                }
584
585                sourceCodeInfo {
586                    f: sourceFileId
587                    l: line
588                    e: eventCount
589                    s: subtreeEventCount
590                }
591
592                addrInfo {
593                    a: addr
594                    e: eventCount
595                    s: subtreeEventCount
596                }
597
598            11. sourceFiles: an array of sourceFile, indexed by sourceFileId.
599                sourceFile {
600                    path
601                    code:  # a map from line to code for that line.
602                }
603    """
604
605    def __init__(
606            self, binary_cache_path: Optional[str],
607            ndk_path: Optional[str],
608            build_addr_hit_map: bool):
609        self.binary_cache_path = binary_cache_path
610        self.ndk_path = ndk_path
611        self.build_addr_hit_map = build_addr_hit_map
612        self.meta_info: Optional[Dict[str, str]] = None
613        self.cmdline: Optional[str] = None
614        self.arch: Optional[str] = None
615        self.events: Dict[str, EventScope] = {}
616        self.libs = LibSet()
617        self.functions = FunctionSet()
618        self.total_samples = 0
619        self.source_files = SourceFileSet()
620        self.gen_addr_hit_map_in_record_info = False
621        self.binary_finder = BinaryFinder(binary_cache_path, ReadElf(ndk_path))
622
623    def load_record_file(self, record_file: str, report_lib_options: ReportLibOptions):
624        lib = ReportLib()
625        lib.SetRecordFile(record_file)
626        # If not showing ip for unknown symbols, the percent of the unknown symbol may be
627        # accumulated to very big, and ranks first in the sample table.
628        lib.ShowIpForUnknownSymbol()
629        if self.binary_cache_path:
630            lib.SetSymfs(self.binary_cache_path)
631        lib.SetReportOptions(report_lib_options)
632        self.meta_info = lib.MetaInfo()
633        self.cmdline = lib.GetRecordCmd()
634        self.arch = lib.GetArch()
635        while True:
636            raw_sample = lib.GetNextSample()
637            if not raw_sample:
638                lib.Close()
639                break
640            raw_event = lib.GetEventOfCurrentSample()
641            symbol = lib.GetSymbolOfCurrentSample()
642            callchain = lib.GetCallChainOfCurrentSample()
643            event = self._get_event(raw_event.name)
644            self.total_samples += 1
645            event.sample_count += 1
646            event.event_count += raw_sample.period
647            process = event.get_process(raw_sample.pid)
648            process.event_count += raw_sample.period
649            thread = process.get_thread(raw_sample.tid, raw_sample.thread_comm)
650            thread.event_count += raw_sample.period
651            thread.sample_count += 1
652
653            lib_id = self.libs.get_lib_id(symbol.dso_name)
654            if lib_id is None:
655                lib_id = self.libs.add_lib(symbol.dso_name, lib.GetBuildIdForPath(symbol.dso_name))
656            func_id = self.functions.get_func_id(lib_id, symbol)
657            callstack = [(lib_id, func_id, symbol.vaddr_in_file)]
658            for i in range(callchain.nr):
659                symbol = callchain.entries[i].symbol
660                lib_id = self.libs.get_lib_id(symbol.dso_name)
661                if lib_id is None:
662                    lib_id = self.libs.add_lib(
663                        symbol.dso_name, lib.GetBuildIdForPath(symbol.dso_name))
664                func_id = self.functions.get_func_id(lib_id, symbol)
665                callstack.append((lib_id, func_id, symbol.vaddr_in_file))
666            if len(callstack) > MAX_CALLSTACK_LENGTH:
667                callstack = callstack[:MAX_CALLSTACK_LENGTH]
668            thread.add_callstack(raw_sample.period, callstack, self.build_addr_hit_map)
669
670        for event in self.events.values():
671            for thread in event.threads:
672                thread.update_subtree_event_count()
673
674    def aggregate_by_thread_name(self):
675        for event in self.events.values():
676            new_processes = {}  # from process name to ProcessScope
677            for process in event.processes.values():
678                cur_process = new_processes.get(process.name)
679                if cur_process is None:
680                    new_processes[process.name] = process
681                else:
682                    cur_process.merge_by_thread_name(process)
683            event.processes = {}
684            for process in new_processes.values():
685                event.processes[process.pid] = process
686
687    def limit_percents(self, min_func_percent: float, min_callchain_percent: float):
688        hit_func_ids: Set[int] = set()
689        for event in self.events.values():
690            min_limit = event.event_count * min_func_percent * 0.01
691            to_del_processes = []
692            for process in event.processes.values():
693                to_del_threads = []
694                for thread in process.threads.values():
695                    if thread.call_graph.subtree_event_count < min_limit:
696                        to_del_threads.append(thread.tid)
697                    else:
698                        thread.limit_percents(min_limit, min_callchain_percent, hit_func_ids)
699                for thread in to_del_threads:
700                    del process.threads[thread]
701                if not process.threads:
702                    to_del_processes.append(process.pid)
703            for process in to_del_processes:
704                del event.processes[process]
705        self.functions.trim_functions(hit_func_ids)
706
707    def _get_event(self, event_name: str) -> EventScope:
708        if event_name not in self.events:
709            self.events[event_name] = EventScope(event_name)
710        return self.events[event_name]
711
712    def add_source_code(self, source_dirs: List[str], filter_lib: Callable[[str], bool], jobs: int):
713        """ Collect source code information:
714            1. Find line ranges for each function in FunctionSet.
715            2. Find line for each addr in FunctionScope.addr_hit_map.
716            3. Collect needed source code in SourceFileSet.
717        """
718        addr2line = Addr2Nearestline(self.ndk_path, self.binary_finder, False)
719        # Request line range for each function.
720        for function in self.functions.id_to_func.values():
721            if function.func_name == 'unknown':
722                continue
723            lib_info = self.libs.get_lib(function.lib_id)
724            if filter_lib(lib_info.name):
725                addr2line.add_addr(lib_info.name, lib_info.build_id,
726                                   function.start_addr, function.start_addr)
727                addr2line.add_addr(lib_info.name, lib_info.build_id, function.start_addr,
728                                   function.start_addr + function.addr_len - 1)
729        # Request line for each addr in FunctionScope.addr_hit_map.
730        for event in self.events.values():
731            for lib in event.libraries:
732                lib_info = self.libs.get_lib(lib.lib_id)
733                if filter_lib(lib_info.name):
734                    for function in lib.functions.values():
735                        func_addr = self.functions.id_to_func[function.func_id].start_addr
736                        for addr in function.addr_hit_map:
737                            addr2line.add_addr(lib_info.name, lib_info.build_id, func_addr, addr)
738        addr2line.convert_addrs_to_lines(jobs)
739
740        # Set line range for each function.
741        for function in self.functions.id_to_func.values():
742            if function.func_name == 'unknown':
743                continue
744            dso = addr2line.get_dso(self.libs.get_lib(function.lib_id).name)
745            if not dso:
746                continue
747            start_source = addr2line.get_addr_source(dso, function.start_addr)
748            end_source = addr2line.get_addr_source(dso, function.start_addr + function.addr_len - 1)
749            if not start_source or not end_source:
750                continue
751            start_file_path, start_line = start_source[-1]
752            end_file_path, end_line = end_source[-1]
753            if start_file_path != end_file_path or start_line > end_line:
754                continue
755            source_file = self.source_files.get_source_file(start_file_path)
756            source_file.request_lines(start_line, end_line)
757            function.source_info = (source_file.file_id, start_line, end_line)
758
759        # Build FunctionScope.line_hit_map.
760        for event in self.events.values():
761            for lib in event.libraries:
762                dso = addr2line.get_dso(self.libs.get_lib(lib.lib_id).name)
763                if not dso:
764                    continue
765                for function in lib.functions.values():
766                    for addr in function.addr_hit_map:
767                        source = addr2line.get_addr_source(dso, addr)
768                        if not source:
769                            continue
770                        for file_path, line in source:
771                            source_file = self.source_files.get_source_file(file_path)
772                            # Show [line - 5, line + 5] of the line hit by a sample.
773                            source_file.request_lines(line - 5, line + 5)
774                            count_info = function.addr_hit_map[addr]
775                            function.build_line_hit_map(source_file.file_id, line, count_info[0],
776                                                        count_info[1])
777
778        # Collect needed source code in SourceFileSet.
779        self.source_files.load_source_code(source_dirs)
780
781    def add_disassembly(self, filter_lib: Callable[[str], bool], jobs: int):
782        """ Collect disassembly information:
783            1. Use objdump to collect disassembly for each function in FunctionSet.
784            2. Set flag to dump addr_hit_map when generating record info.
785        """
786        objdump = Objdump(self.ndk_path, self.binary_finder)
787        lib_functions: Dict[int, List[Function]] = collections.defaultdict(list)
788
789        for function in self.functions.id_to_func.values():
790            if function.func_name == 'unknown':
791                continue
792            lib_functions[function.lib_id].append(function)
793
794        with ThreadPoolExecutor(jobs) as executor:
795            for lib_id, functions in lib_functions.items():
796                lib = self.libs.get_lib(lib_id)
797                if not filter_lib(lib.name):
798                    continue
799                dso_info = objdump.get_dso_info(lib.name, lib.build_id)
800                if not dso_info:
801                    continue
802                logging.info('Disassemble %s' % dso_info[0])
803                futures: List[Future] = []
804                for function in functions:
805                    futures.append(
806                        executor.submit(objdump.disassemble_code, dso_info,
807                                        function.start_addr, function.addr_len))
808                for i in range(len(functions)):
809                    # Call future.result() to report exceptions raised in the executor.
810                    functions[i].disassembly = futures[i].result()
811        self.gen_addr_hit_map_in_record_info = True
812
813    def gen_record_info(self) -> Dict[str, Any]:
814        """ Return json data which will be used by report_html.js. """
815        record_info = {}
816        timestamp = self.meta_info.get('timestamp')
817        if timestamp:
818            t = datetime.datetime.fromtimestamp(int(timestamp))
819        else:
820            t = datetime.datetime.now()
821        record_info['recordTime'] = t.strftime('%Y-%m-%d (%A) %H:%M:%S')
822
823        product_props = self.meta_info.get('product_props')
824        machine_type = self.arch
825        if product_props:
826            manufacturer, model, name = product_props.split(':')
827            machine_type = '%s (%s) by %s, arch %s' % (model, name, manufacturer, self.arch)
828        record_info['machineType'] = machine_type
829        record_info['androidVersion'] = self.meta_info.get('android_version', '')
830        record_info['androidBuildFingerprint'] = self.meta_info.get('android_build_fingerprint', '')
831        record_info['kernelVersion'] = self.meta_info.get('kernel_version', '')
832        record_info['recordCmdline'] = self.cmdline
833        record_info['totalSamples'] = self.total_samples
834        record_info['processNames'] = self._gen_process_names()
835        record_info['threadNames'] = self._gen_thread_names()
836        record_info['libList'] = self._gen_lib_list()
837        record_info['functionMap'] = self._gen_function_map()
838        record_info['sampleInfo'] = self._gen_sample_info()
839        record_info['sourceFiles'] = self._gen_source_files()
840        return record_info
841
842    def _gen_process_names(self) -> Dict[int, str]:
843        process_names: Dict[int, str] = {}
844        for event in self.events.values():
845            for process in event.processes.values():
846                process_names[process.pid] = process.name
847        return process_names
848
849    def _gen_thread_names(self) -> Dict[int, str]:
850        thread_names: Dict[int, str] = {}
851        for event in self.events.values():
852            for process in event.processes.values():
853                for thread in process.threads.values():
854                    thread_names[thread.tid] = thread.name
855        return thread_names
856
857    def _gen_lib_list(self) -> List[str]:
858        return [modify_text_for_html(lib.name) for lib in self.libs.libs]
859
860    def _gen_function_map(self) -> Dict[int, Any]:
861        func_map: Dict[int, Any] = {}
862        for func_id in sorted(self.functions.id_to_func):
863            function = self.functions.id_to_func[func_id]
864            func_data = {}
865            func_data['l'] = function.lib_id
866            func_data['f'] = modify_text_for_html(function.func_name)
867            if function.source_info:
868                func_data['s'] = function.source_info
869            if function.disassembly:
870                disassembly_list = []
871                for code, addr in function.disassembly:
872                    disassembly_list.append(
873                        [modify_text_for_html(code),
874                         hex_address_for_json(addr)])
875                func_data['d'] = disassembly_list
876            func_map[func_id] = func_data
877        return func_map
878
879    def _gen_sample_info(self) -> List[Dict[str, Any]]:
880        return [event.get_sample_info(self.gen_addr_hit_map_in_record_info)
881                for event in self.events.values()]
882
883    def _gen_source_files(self) -> List[Dict[str, Any]]:
884        source_files = sorted(self.source_files.path_to_source_files.values(),
885                              key=lambda x: x.file_id)
886        file_list = []
887        for source_file in source_files:
888            file_data = {}
889            if not source_file.real_path:
890                file_data['path'] = ''
891                file_data['code'] = {}
892            else:
893                file_data['path'] = source_file.real_path
894                code_map = {}
895                for line in source_file.line_to_code:
896                    code_map[line] = modify_text_for_html(source_file.line_to_code[line])
897                file_data['code'] = code_map
898            file_list.append(file_data)
899        return file_list
900
901
902URLS = {
903    'jquery': 'https://ajax.googleapis.com/ajax/libs/jquery/3.3.1/jquery.min.js',
904    'bootstrap4-css': 'https://stackpath.bootstrapcdn.com/bootstrap/4.1.2/css/bootstrap.min.css',
905    'bootstrap4-popper':
906        'https://cdnjs.cloudflare.com/ajax/libs/popper.js/1.12.9/umd/popper.min.js',
907    'bootstrap4': 'https://stackpath.bootstrapcdn.com/bootstrap/4.1.2/js/bootstrap.min.js',
908    'dataTable': 'https://cdn.datatables.net/1.10.19/js/jquery.dataTables.min.js',
909    'dataTable-bootstrap4': 'https://cdn.datatables.net/1.10.19/js/dataTables.bootstrap4.min.js',
910    'dataTable-css': 'https://cdn.datatables.net/1.10.19/css/dataTables.bootstrap4.min.css',
911    'gstatic-charts': 'https://www.gstatic.com/charts/loader.js',
912}
913
914
915class ReportGenerator(object):
916
917    def __init__(self, html_path: Union[Path, str]):
918        self.hw = HtmlWriter(html_path)
919        self.hw.open_tag('html')
920        self.hw.open_tag('head')
921        for css in ['bootstrap4-css', 'dataTable-css']:
922            self.hw.open_tag('link', rel='stylesheet', type='text/css', href=URLS[css]).close_tag()
923        for js in ['jquery', 'bootstrap4-popper', 'bootstrap4', 'dataTable', 'dataTable-bootstrap4',
924                   'gstatic-charts']:
925            self.hw.open_tag('script', src=URLS[js]).close_tag()
926
927        self.hw.open_tag('script').add(
928            "google.charts.load('current', {'packages': ['corechart', 'table']});").close_tag()
929        self.hw.open_tag('style', type='text/css').add("""
930            .colForLine { width: 50px; }
931            .colForCount { width: 100px; }
932            .tableCell { font-size: 17px; }
933            .boldTableCell { font-weight: bold; font-size: 17px; }
934            """).close_tag()
935        self.hw.close_tag('head')
936        self.hw.open_tag('body')
937
938    def write_content_div(self):
939        self.hw.open_tag('div', id='report_content').close_tag()
940
941    def write_record_data(self, record_data: Dict[str, Any]):
942        self.hw.open_tag('script', id='record_data', type='application/json')
943        self.hw.add(json.dumps(record_data))
944        self.hw.close_tag()
945
946    def write_script(self):
947        self.hw.open_tag('script').add_file('report_html.js').close_tag()
948
949    def finish(self):
950        self.hw.close_tag('body')
951        self.hw.close_tag('html')
952        self.hw.close()
953
954
955def get_args() -> argparse.Namespace:
956    parser = BaseArgumentParser(description='report profiling data')
957    parser.add_argument('-i', '--record_file', nargs='+', default=['perf.data'], help="""
958                        Set profiling data file to report.""")
959    parser.add_argument('-o', '--report_path', default='report.html', help='Set output html file')
960    parser.add_argument('--min_func_percent', default=0.01, type=float, help="""
961                        Set min percentage of functions shown in the report.
962                        For example, when set to 0.01, only functions taking >= 0.01%% of total
963                        event count are collected in the report.""")
964    parser.add_argument('--min_callchain_percent', default=0.01, type=float, help="""
965                        Set min percentage of callchains shown in the report.
966                        It is used to limit nodes shown in the function flamegraph. For example,
967                        when set to 0.01, only callchains taking >= 0.01%% of the event count of
968                        the starting function are collected in the report.""")
969    parser.add_argument('--add_source_code', action='store_true', help='Add source code.')
970    parser.add_argument('--source_dirs', nargs='+', help='Source code directories.')
971    parser.add_argument('--add_disassembly', action='store_true', help='Add disassembled code.')
972    parser.add_argument('--binary_filter', nargs='+', help="""Annotate source code and disassembly
973                        only for selected binaries.""")
974    parser.add_argument(
975        '-j', '--jobs', type=int, default=os.cpu_count(),
976        help='Use multithreading to speed up disassembly and source code annotation.')
977    parser.add_argument('--ndk_path', nargs=1, help='Find tools in the ndk path.')
978    parser.add_argument('--no_browser', action='store_true', help="Don't open report in browser.")
979    parser.add_argument('--aggregate-by-thread-name', action='store_true', help="""aggregate
980                        samples by thread name instead of thread id. This is useful for
981                        showing multiple perf.data generated for the same app.""")
982    parser.add_report_lib_options()
983    return parser.parse_args()
984
985
986def main():
987    sys.setrecursionlimit(MAX_CALLSTACK_LENGTH * 2 + 50)
988    args = get_args()
989
990    # 1. Process args.
991    binary_cache_path = 'binary_cache'
992    if not os.path.isdir(binary_cache_path):
993        if args.add_source_code or args.add_disassembly:
994            log_exit("""binary_cache/ doesn't exist. Can't add source code or disassembled code
995                        without collected binaries. Please run binary_cache_builder.py to
996                        collect binaries for current profiling data, or run app_profiler.py
997                        without -nb option.""")
998        binary_cache_path = None
999
1000    if args.add_source_code and not args.source_dirs:
1001        log_exit('--source_dirs is needed to add source code.')
1002    build_addr_hit_map = args.add_source_code or args.add_disassembly
1003    ndk_path = None if not args.ndk_path else args.ndk_path[0]
1004    if args.jobs < 1:
1005        log_exit('Invalid --jobs option.')
1006
1007    # 2. Produce record data.
1008    record_data = RecordData(binary_cache_path, ndk_path, build_addr_hit_map)
1009    for record_file in args.record_file:
1010        record_data.load_record_file(record_file, args.report_lib_options)
1011    if args.aggregate_by_thread_name:
1012        record_data.aggregate_by_thread_name()
1013    record_data.limit_percents(args.min_func_percent, args.min_callchain_percent)
1014
1015    def filter_lib(lib_name: str) -> bool:
1016        if not args.binary_filter:
1017            return True
1018        for binary in args.binary_filter:
1019            if binary in lib_name:
1020                return True
1021        return False
1022    if args.add_source_code:
1023        record_data.add_source_code(args.source_dirs, filter_lib, args.jobs)
1024    if args.add_disassembly:
1025        record_data.add_disassembly(filter_lib, args.jobs)
1026
1027    # 3. Generate report html.
1028    report_generator = ReportGenerator(args.report_path)
1029    report_generator.write_script()
1030    report_generator.write_content_div()
1031    report_generator.write_record_data(record_data.gen_record_info())
1032    report_generator.finish()
1033
1034    if not args.no_browser:
1035        open_report_in_browser(args.report_path)
1036    logging.info("Report generated at '%s'." % args.report_path)
1037
1038
1039if __name__ == '__main__':
1040    main()
1041