• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/python
2#
3# Copyright (C) 2017 The Android Open Source Project
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9#      http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16#
17
18import argparse
19import datetime
20import json
21import os
22import subprocess
23import sys
24import tempfile
25
26from simpleperf_report_lib import ReportLib
27from utils import *
28
29
30class HtmlWriter(object):
31
32    def __init__(self, output_path):
33        self.fh = open(output_path, 'w')
34        self.tag_stack = []
35
36    def close(self):
37        self.fh.close()
38
39    def open_tag(self, tag, **attrs):
40        attr_str = ''
41        for key in attrs:
42            attr_str += ' %s="%s"' % (key, attrs[key])
43        self.fh.write('<%s%s>' % (tag, attr_str))
44        self.tag_stack.append(tag)
45        return self
46
47    def close_tag(self, tag=None):
48        if tag:
49            assert tag == self.tag_stack[-1]
50        self.fh.write('</%s>\n' % self.tag_stack.pop())
51
52    def add(self, text):
53        self.fh.write(text)
54        return self
55
56    def add_file(self, file_path):
57        file_path = os.path.join(get_script_dir(), file_path)
58        with open(file_path, 'r') as f:
59            self.add(f.read())
60        return self
61
62def modify_text_for_html(text):
63    return text.replace('>', '&gt;').replace('<', '&lt;')
64
65class EventScope(object):
66
67    def __init__(self, name):
68        self.name = name
69        self.processes = {}  # map from pid to ProcessScope
70        self.sample_count = 0
71        self.event_count = 0
72
73    def get_process(self, pid):
74        process = self.processes.get(pid)
75        if not process:
76            process = self.processes[pid] = ProcessScope(pid)
77        return process
78
79    def get_sample_info(self, gen_addr_hit_map):
80        result = {}
81        result['eventName'] = self.name
82        result['eventCount'] = self.event_count
83        result['processes'] = [process.get_sample_info(gen_addr_hit_map)
84                                  for process in self.processes.values()]
85        return result
86
87
88class ProcessScope(object):
89
90    def __init__(self, pid):
91        self.pid = pid
92        self.name = ''
93        self.event_count = 0
94        self.threads = {}  # map from tid to ThreadScope
95
96    def get_thread(self, tid, thread_name):
97        thread = self.threads.get(tid)
98        if not thread:
99            thread = self.threads[tid] = ThreadScope(tid)
100        thread.name = thread_name
101        if self.pid == tid:
102            self.name = thread_name
103        return thread
104
105    def get_sample_info(self, gen_addr_hit_map):
106        result = {}
107        result['pid'] = self.pid
108        result['eventCount'] = self.event_count
109        result['threads'] = [thread.get_sample_info(gen_addr_hit_map)
110                                for thread in self.threads.values()]
111        return result
112
113
114class ThreadScope(object):
115
116    def __init__(self, tid):
117        self.tid = tid
118        self.name = ''
119        self.event_count = 0
120        self.libs = {}  # map from lib_id to LibScope
121
122    def add_callstack(self, event_count, callstack, build_addr_hit_map):
123        """ callstack is a list of tuple (lib_id, func_id, addr).
124            For each i > 0, callstack[i] calls callstack[i-1]."""
125        hit_func_ids = set()
126        for i in range(len(callstack)):
127            lib_id, func_id, addr = callstack[i]
128            # When a callstack contains recursive function, only add for each function once.
129            if func_id in hit_func_ids:
130                continue
131            hit_func_ids.add(func_id)
132
133            lib = self.libs.get(lib_id)
134            if not lib:
135                lib = self.libs[lib_id] = LibScope(lib_id)
136            function = lib.get_function(func_id)
137            if i == 0:
138                lib.event_count += event_count
139                function.sample_count += 1
140            function.add_reverse_callchain(callstack, i + 1, len(callstack), event_count)
141
142            if build_addr_hit_map:
143                function.build_addr_hit_map(addr, event_count if i == 0 else 0, event_count)
144
145        hit_func_ids.clear()
146        for i in range(len(callstack) - 1, -1, -1):
147            lib_id, func_id, _ = callstack[i]
148            # When a callstack contains recursive function, only add for each function once.
149            if func_id in hit_func_ids:
150                continue
151            hit_func_ids.add(func_id)
152            lib = self.libs.get(lib_id)
153            lib.get_function(func_id).add_callchain(callstack, i - 1, -1, event_count)
154
155    def get_sample_info(self, gen_addr_hit_map):
156        result = {}
157        result['tid'] = self.tid
158        result['eventCount'] = self.event_count
159        result['libs'] = [lib.gen_sample_info(gen_addr_hit_map)
160                            for lib in self.libs.values()]
161        return result
162
163
164class LibScope(object):
165
166    def __init__(self, lib_id):
167        self.lib_id = lib_id
168        self.event_count = 0
169        self.functions = {}  # map from func_id to FunctionScope.
170
171    def get_function(self, func_id):
172        function = self.functions.get(func_id)
173        if not function:
174            function = self.functions[func_id] = FunctionScope(func_id)
175        return function
176
177    def gen_sample_info(self, gen_addr_hit_map):
178        result = {}
179        result['libId'] = self.lib_id
180        result['eventCount'] = self.event_count
181        result['functions'] = [func.gen_sample_info(gen_addr_hit_map)
182                                  for func in self.functions.values()]
183        return result
184
185
186class FunctionScope(object):
187
188    def __init__(self, func_id):
189        self.sample_count = 0
190        self.call_graph = CallNode(func_id)
191        self.reverse_call_graph = CallNode(func_id)
192        self.addr_hit_map = None  # map from addr to [event_count, subtree_event_count].
193        # map from (source_file_id, line) to [event_count, subtree_event_count].
194        self.line_hit_map = None
195
196    def add_callchain(self, callchain, start, end, event_count):
197        node = self.call_graph
198        for i in range(start, end, -1):
199            node = node.get_child(callchain[i][1])
200        node.event_count += event_count
201
202    def add_reverse_callchain(self, callchain, start, end, event_count):
203        node = self.reverse_call_graph
204        for i in range(start, end):
205            node = node.get_child(callchain[i][1])
206        node.event_count += event_count
207
208    def build_addr_hit_map(self, addr, event_count, subtree_event_count):
209        if self.addr_hit_map is None:
210            self.addr_hit_map = {}
211        count_info = self.addr_hit_map.get(addr)
212        if count_info is None:
213            self.addr_hit_map[addr] = [event_count, subtree_event_count]
214        else:
215            count_info[0] += event_count
216            count_info[1] += subtree_event_count
217
218    def build_line_hit_map(self, source_file_id, line, event_count, subtree_event_count):
219        if self.line_hit_map is None:
220            self.line_hit_map = {}
221        key = (source_file_id, line)
222        count_info = self.line_hit_map.get(key)
223        if count_info is None:
224            self.line_hit_map[key] = [event_count, subtree_event_count]
225        else:
226            count_info[0] += event_count
227            count_info[1] += subtree_event_count
228
229    def update_subtree_event_count(self):
230        a = self.call_graph.update_subtree_event_count()
231        b = self.reverse_call_graph.update_subtree_event_count()
232        return max(a, b)
233
234    def limit_callchain_percent(self, min_callchain_percent, hit_func_ids):
235        min_limit = min_callchain_percent * 0.01 * self.call_graph.subtree_event_count
236        self.call_graph.cut_edge(min_limit, hit_func_ids)
237        self.reverse_call_graph.cut_edge(min_limit, hit_func_ids)
238
239    def gen_sample_info(self, gen_addr_hit_map):
240        result = {}
241        result['c'] = self.sample_count
242        result['g'] = self.call_graph.gen_sample_info()
243        result['rg'] = self.reverse_call_graph.gen_sample_info()
244        if self.line_hit_map:
245            items = []
246            for key in self.line_hit_map:
247                count_info = self.line_hit_map[key]
248                item = {'f': key[0], 'l': key[1], 'e': count_info[0], 's': count_info[1]}
249                items.append(item)
250            result['s'] = items
251        if gen_addr_hit_map and self.addr_hit_map:
252            items = []
253            for addr in sorted(self.addr_hit_map):
254                count_info = self.addr_hit_map[addr]
255                items.append({'a': addr, 'e': count_info[0], 's': count_info[1]})
256            result['a'] = items
257        return result
258
259
260class CallNode(object):
261
262    def __init__(self, func_id):
263        self.event_count = 0
264        self.subtree_event_count = 0
265        self.func_id = func_id
266        self.children = {}  # map from func_id to CallNode
267
268    def get_child(self, func_id):
269        child = self.children.get(func_id)
270        if not child:
271            child = self.children[func_id] = CallNode(func_id)
272        return child
273
274    def update_subtree_event_count(self):
275        self.subtree_event_count = self.event_count
276        for child in self.children.values():
277            self.subtree_event_count += child.update_subtree_event_count()
278        return self.subtree_event_count
279
280    def cut_edge(self, min_limit, hit_func_ids):
281        hit_func_ids.add(self.func_id)
282        to_del_children = []
283        for key in self.children:
284            child = self.children[key]
285            if child.subtree_event_count < min_limit:
286                to_del_children.append(key)
287            else:
288                child.cut_edge(min_limit, hit_func_ids)
289        for key in to_del_children:
290            del self.children[key]
291
292    def gen_sample_info(self):
293        result = {}
294        result['e'] = self.event_count
295        result['s'] = self.subtree_event_count
296        result['f'] = self.func_id
297        result['c'] = [child.gen_sample_info() for child in self.children.values()]
298        return result
299
300
301class LibSet(object):
302    """ Collection of shared libraries used in perf.data. """
303    def __init__(self):
304        self.lib_name_to_id = {}
305        self.lib_id_to_name = []
306
307    def get_lib_id(self, lib_name):
308        lib_id = self.lib_name_to_id.get(lib_name)
309        if lib_id is None:
310            lib_id = len(self.lib_id_to_name)
311            self.lib_name_to_id[lib_name] = lib_id
312            self.lib_id_to_name.append(lib_name)
313        return lib_id
314
315    def get_lib_name(self, lib_id):
316        return self.lib_id_to_name[lib_id]
317
318
319class Function(object):
320    """ Represent a function in a shared library. """
321    def __init__(self, lib_id, func_name, func_id, start_addr, addr_len):
322        self.lib_id = lib_id
323        self.func_name = func_name
324        self.func_id = func_id
325        self.start_addr = start_addr
326        self.addr_len = addr_len
327        self.source_info = None
328        self.disassembly = None
329
330
331class FunctionSet(object):
332    """ Collection of functions used in perf.data. """
333    def __init__(self):
334        self.name_to_func = {}
335        self.id_to_func = {}
336
337    def get_func_id(self, lib_id, symbol):
338        key = (lib_id, symbol.symbol_name)
339        function = self.name_to_func.get(key)
340        if function is None:
341            func_id = len(self.id_to_func)
342            function = Function(lib_id, symbol.symbol_name, func_id, symbol.symbol_addr,
343                                symbol.symbol_len)
344            self.name_to_func[key] = function
345            self.id_to_func[func_id] = function
346        return function.func_id
347
348    def trim_functions(self, left_func_ids):
349        """ Remove functions excepts those in left_func_ids. """
350        for function in self.name_to_func.values():
351            if function.func_id not in left_func_ids:
352                del self.id_to_func[function.func_id]
353        # name_to_func will not be used.
354        self.name_to_func = None
355
356
357class SourceFile(object):
358    """ A source file containing source code hit by samples. """
359    def __init__(self, file_id, abstract_path):
360        self.file_id = file_id
361        self.abstract_path = abstract_path  # path reported by addr2line
362        self.real_path = None  # file path in the file system
363        self.requested_lines = set()
364        self.line_to_code = {}  # map from line to code in that line.
365
366    def request_lines(self, start_line, end_line):
367        self.requested_lines |= set(range(start_line, end_line + 1))
368
369    def add_source_code(self, real_path):
370        self.real_path = real_path
371        with open(real_path, 'r') as f:
372            source_code = f.readlines()
373        max_line = len(source_code)
374        for line in self.requested_lines:
375            if line > 0 and line <= max_line:
376                self.line_to_code[line] = source_code[line - 1]
377        # requested_lines is no longer used.
378        self.requested_lines = None
379
380
381class SourceFileSet(object):
382    """ Collection of source files. """
383    def __init__(self):
384        self.path_to_source_files = {}  # map from file path to SourceFile.
385
386    def get_source_file(self, file_path):
387        source_file = self.path_to_source_files.get(file_path)
388        if source_file is None:
389            source_file = SourceFile(len(self.path_to_source_files), file_path)
390            self.path_to_source_files[file_path] = source_file
391        return source_file
392
393    def load_source_code(self, source_dirs):
394        file_searcher = SourceFileSearcher(source_dirs)
395        for source_file in self.path_to_source_files.values():
396            real_path = file_searcher.get_real_path(source_file.abstract_path)
397            if real_path:
398                source_file.add_source_code(real_path)
399
400
401class SourceFileSearcher(object):
402
403    SOURCE_FILE_EXTS = {'.h', '.hh', '.H', '.hxx', '.hpp', '.h++',
404                        '.c', '.cc', '.C', '.cxx', '.cpp', '.c++',
405                        '.java', '.kt'}
406
407    @classmethod
408    def is_source_filename(cls, filename):
409        ext = os.path.splitext(filename)[1]
410        return ext in cls.SOURCE_FILE_EXTS
411
412    """" Find source file paths in the file system.
413        The file paths reported by addr2line are the paths stored in debug sections
414        of shared libraries. And we need to convert them to file paths in the file
415        system. It is done in below steps:
416        1. Collect all file paths under the provided source_dirs. The suffix of a
417           source file should contain one of below:
418            h: for C/C++ header files.
419            c: for C/C++ source files.
420            java: for Java source files.
421            kt: for Kotlin source files.
422        2. Given an abstract_path reported by addr2line, select the best real path
423           as below:
424           2.1 Find all real paths with the same file name as the abstract path.
425           2.2 Select the real path having the longest common suffix with the abstract path.
426    """
427    def __init__(self, source_dirs):
428        # Map from filename to a list of reversed directory path containing filename.
429        self.filename_to_rparents = {}
430        self._collect_paths(source_dirs)
431
432    def _collect_paths(self, source_dirs):
433        for source_dir in source_dirs:
434            for parent, _, file_names in os.walk(source_dir):
435                rparent = None
436                for file_name in file_names:
437                    if self.is_source_filename(file_name):
438                        rparents = self.filename_to_rparents.get(file_name)
439                        if rparents is None:
440                            rparents = self.filename_to_rparents[file_name] = []
441                        if rparent is None:
442                            rparent = parent[::-1]
443                        rparents.append(rparent)
444
445    def get_real_path(self, abstract_path):
446        abstract_path = abstract_path.replace('/', os.sep)
447        abstract_parent, file_name = os.path.split(abstract_path)
448        abstract_rparent = abstract_parent[::-1]
449        real_rparents = self.filename_to_rparents.get(file_name)
450        if real_rparents is None:
451            return None
452        best_matched_rparent = None
453        best_common_length = -1
454        for real_rparent in real_rparents:
455            length = len(os.path.commonprefix((real_rparent, abstract_rparent)))
456            if length > best_common_length:
457                best_common_length = length
458                best_matched_rparent = real_rparent
459        if best_matched_rparent is None:
460            return None
461        return os.path.join(best_matched_rparent[::-1], file_name)
462
463
464class RecordData(object):
465
466    """RecordData reads perf.data, and generates data used by report.js in json format.
467        All generated items are listed as below:
468            1. recordTime: string
469            2. machineType: string
470            3. androidVersion: string
471            4. recordCmdline: string
472            5. totalSamples: int
473            6. processNames: map from pid to processName.
474            7. threadNames: map from tid to threadName.
475            8. libList: an array of libNames, indexed by libId.
476            9. functionMap: map from functionId to funcData.
477                funcData = {
478                    l: libId
479                    f: functionName
480                    s: [sourceFileId, startLine, endLine] [optional]
481                    d: [(disassembly, addr)] [optional]
482                }
483
484            10.  sampleInfo = [eventInfo]
485                eventInfo = {
486                    eventName
487                    eventCount
488                    processes: [processInfo]
489                }
490                processInfo = {
491                    pid
492                    eventCount
493                    threads: [threadInfo]
494                }
495                threadInfo = {
496                    tid
497                    eventCount
498                    libs: [libInfo],
499                }
500                libInfo = {
501                    libId,
502                    eventCount,
503                    functions: [funcInfo]
504                }
505                funcInfo = {
506                    c: sampleCount
507                    g: callGraph
508                    rg: reverseCallgraph
509                    s: [sourceCodeInfo] [optional]
510                    a: [addrInfo] (sorted by addrInfo.addr) [optional]
511                }
512                callGraph and reverseCallGraph are both of type CallNode.
513                callGraph shows how a function calls other functions.
514                reverseCallGraph shows how a function is called by other functions.
515                CallNode {
516                    e: selfEventCount
517                    s: subTreeEventCount
518                    f: functionId
519                    c: [CallNode] # children
520                }
521
522                sourceCodeInfo {
523                    f: sourceFileId
524                    l: line
525                    e: eventCount
526                    s: subtreeEventCount
527                }
528
529                addrInfo {
530                    a: addr
531                    e: eventCount
532                    s: subtreeEventCount
533                }
534
535            11. sourceFiles: an array of sourceFile, indexed by sourceFileId.
536                sourceFile {
537                    path
538                    code:  # a map from line to code for that line.
539                }
540    """
541
542    def __init__(self, binary_cache_path, ndk_path, build_addr_hit_map):
543        self.binary_cache_path = binary_cache_path
544        self.ndk_path = ndk_path
545        self.build_addr_hit_map = build_addr_hit_map
546        self.meta_info = None
547        self.cmdline = None
548        self.arch = None
549        self.events = {}
550        self.libs = LibSet()
551        self.functions = FunctionSet()
552        self.total_samples = 0
553        self.source_files = SourceFileSet()
554        self.gen_addr_hit_map_in_record_info = False
555
556    def load_record_file(self, record_file):
557        lib = ReportLib()
558        lib.SetRecordFile(record_file)
559        # If not showing ip for unknown symbols, the percent of the unknown symbol may be
560        # accumulated to very big, and ranks first in the sample table.
561        lib.ShowIpForUnknownSymbol()
562        if self.binary_cache_path:
563            lib.SetSymfs(self.binary_cache_path)
564        self.meta_info = lib.MetaInfo()
565        self.cmdline = lib.GetRecordCmd()
566        self.arch = lib.GetArch()
567        while True:
568            raw_sample = lib.GetNextSample()
569            if not raw_sample:
570                lib.Close()
571                break
572            raw_event = lib.GetEventOfCurrentSample()
573            symbol = lib.GetSymbolOfCurrentSample()
574            callchain = lib.GetCallChainOfCurrentSample()
575            event = self._get_event(raw_event.name)
576            self.total_samples += 1
577            event.sample_count += 1
578            event.event_count += raw_sample.period
579            process = event.get_process(raw_sample.pid)
580            process.event_count += raw_sample.period
581            thread = process.get_thread(raw_sample.tid, raw_sample.thread_comm)
582            thread.event_count += raw_sample.period
583
584            lib_id = self.libs.get_lib_id(symbol.dso_name)
585            func_id = self.functions.get_func_id(lib_id, symbol)
586            callstack = [(lib_id, func_id, symbol.vaddr_in_file)]
587            for i in range(callchain.nr):
588                symbol = callchain.entries[i].symbol
589                lib_id = self.libs.get_lib_id(symbol.dso_name)
590                func_id = self.functions.get_func_id(lib_id, symbol)
591                callstack.append((lib_id, func_id, symbol.vaddr_in_file))
592            thread.add_callstack(raw_sample.period, callstack, self.build_addr_hit_map)
593
594        for event in self.events.values():
595            for process in event.processes.values():
596                for thread in process.threads.values():
597                    for lib in thread.libs.values():
598                        for func_id in lib.functions:
599                            function = lib.functions[func_id]
600                            function.update_subtree_event_count()
601
602    def limit_percents(self, min_func_percent, min_callchain_percent):
603        hit_func_ids = set()
604        for event in self.events.values():
605            min_limit = event.event_count * min_func_percent * 0.01
606            for process in event.processes.values():
607                for thread in process.threads.values():
608                    for lib in thread.libs.values():
609                        to_del_func_ids = []
610                        for func_id in lib.functions:
611                            function = lib.functions[func_id]
612                            if function.call_graph.subtree_event_count < min_limit:
613                                to_del_func_ids.append(func_id)
614                            else:
615                                function.limit_callchain_percent(min_callchain_percent,
616                                                                 hit_func_ids)
617                        for func_id in to_del_func_ids:
618                            del lib.functions[func_id]
619        self.functions.trim_functions(hit_func_ids)
620
621    def _get_event(self, event_name):
622        if event_name not in self.events:
623            self.events[event_name] = EventScope(event_name)
624        return self.events[event_name]
625
626    def add_source_code(self, source_dirs):
627        """ Collect source code information:
628            1. Find line ranges for each function in FunctionSet.
629            2. Find line for each addr in FunctionScope.addr_hit_map.
630            3. Collect needed source code in SourceFileSet.
631        """
632        addr2line = Addr2Nearestline(self.ndk_path, self.binary_cache_path)
633        # Request line range for each function.
634        for function in self.functions.id_to_func.values():
635            if function.func_name == 'unknown':
636                continue
637            lib_name = self.libs.get_lib_name(function.lib_id)
638            addr2line.add_addr(lib_name, function.start_addr, function.start_addr)
639            addr2line.add_addr(lib_name, function.start_addr,
640                               function.start_addr + function.addr_len - 1)
641        # Request line for each addr in FunctionScope.addr_hit_map.
642        for event in self.events.values():
643            for process in event.processes.values():
644                for thread in process.threads.values():
645                    for lib in thread.libs.values():
646                        lib_name = self.libs.get_lib_name(lib.lib_id)
647                        for function in lib.functions.values():
648                            func_addr = self.functions.id_to_func[
649                                            function.call_graph.func_id].start_addr
650                            for addr in function.addr_hit_map:
651                                addr2line.add_addr(lib_name, func_addr, addr)
652        addr2line.convert_addrs_to_lines()
653
654        # Set line range for each function.
655        for function in self.functions.id_to_func.values():
656            if function.func_name == 'unknown':
657                continue
658            dso = addr2line.get_dso(self.libs.get_lib_name(function.lib_id))
659            start_source = addr2line.get_addr_source(dso, function.start_addr)
660            end_source = addr2line.get_addr_source(dso,
661                            function.start_addr + function.addr_len - 1)
662            if not start_source or not end_source:
663                continue
664            start_file_path, start_line = start_source[-1]
665            end_file_path, end_line = end_source[-1]
666            if start_file_path != end_file_path or start_line > end_line:
667                continue
668            source_file = self.source_files.get_source_file(start_file_path)
669            source_file.request_lines(start_line, end_line)
670            function.source_info = (source_file.file_id, start_line, end_line)
671
672        # Build FunctionScope.line_hit_map.
673        for event in self.events.values():
674            for process in event.processes.values():
675                for thread in process.threads.values():
676                    for lib in thread.libs.values():
677                        dso = addr2line.get_dso(self.libs.get_lib_name(lib.lib_id))
678                        for function in lib.functions.values():
679                            for addr in function.addr_hit_map:
680                                source = addr2line.get_addr_source(dso, addr)
681                                if not source:
682                                    continue
683                                for file_path, line in source:
684                                    source_file = self.source_files.get_source_file(file_path)
685                                    # Show [line - 5, line + 5] of the line hit by a sample.
686                                    source_file.request_lines(line - 5, line + 5)
687                                    count_info = function.addr_hit_map[addr]
688                                    function.build_line_hit_map(source_file.file_id, line,
689                                                                count_info[0], count_info[1])
690
691        # Collect needed source code in SourceFileSet.
692        self.source_files.load_source_code(source_dirs)
693
694    def add_disassembly(self):
695        """ Collect disassembly information:
696            1. Use objdump to collect disassembly for each function in FunctionSet.
697            2. Set flag to dump addr_hit_map when generating record info.
698        """
699        objdump = Objdump(self.ndk_path, self.binary_cache_path)
700        for function in self.functions.id_to_func.values():
701            if function.func_name == 'unknown':
702                continue
703            lib_name = self.libs.get_lib_name(function.lib_id)
704            code = objdump.disassemble_code(lib_name, function.start_addr, function.addr_len)
705            function.disassembly = code
706
707        self.gen_addr_hit_map_in_record_info = True
708
709    def gen_record_info(self):
710        record_info = {}
711        timestamp = self.meta_info.get('timestamp')
712        if timestamp:
713            t = datetime.datetime.fromtimestamp(int(timestamp))
714        else:
715            t = datetime.datetime.now()
716        record_info['recordTime'] = t.strftime('%Y-%m-%d (%A) %H:%M:%S')
717
718        product_props = self.meta_info.get('product_props')
719        machine_type = self.arch
720        if product_props:
721            manufacturer, model, name = product_props.split(':')
722            machine_type = '%s (%s) by %s, arch %s' % (model, name, manufacturer, self.arch)
723        record_info['machineType'] = machine_type
724        record_info['androidVersion'] = self.meta_info.get('android_version', '')
725        record_info['recordCmdline'] = self.cmdline
726        record_info['totalSamples'] = self.total_samples
727        record_info['processNames'] = self._gen_process_names()
728        record_info['threadNames'] = self._gen_thread_names()
729        record_info['libList'] = self._gen_lib_list()
730        record_info['functionMap'] = self._gen_function_map()
731        record_info['sampleInfo'] = self._gen_sample_info()
732        record_info['sourceFiles'] = self._gen_source_files()
733        return record_info
734
735    def _gen_process_names(self):
736        process_names = {}
737        for event in self.events.values():
738            for process in event.processes.values():
739                process_names[process.pid] = process.name
740        return process_names
741
742    def _gen_thread_names(self):
743        thread_names = {}
744        for event in self.events.values():
745            for process in event.processes.values():
746                for thread in process.threads.values():
747                    thread_names[thread.tid] = thread.name
748        return thread_names
749
750    def _gen_lib_list(self):
751        return [modify_text_for_html(x) for x in self.libs.lib_id_to_name]
752
753    def _gen_function_map(self):
754        func_map = {}
755        for func_id in sorted(self.functions.id_to_func):
756            function = self.functions.id_to_func[func_id]
757            func_data = {}
758            func_data['l'] = function.lib_id
759            func_data['f'] = modify_text_for_html(function.func_name)
760            if function.source_info:
761                func_data['s'] = function.source_info
762            if function.disassembly:
763                disassembly_list = []
764                for code, addr in function.disassembly:
765                    disassembly_list.append([modify_text_for_html(code), addr])
766                func_data['d'] = disassembly_list
767            func_map[func_id] = func_data
768        return func_map
769
770    def _gen_sample_info(self):
771        return [event.get_sample_info(self.gen_addr_hit_map_in_record_info)
772                    for event in self.events.values()]
773
774    def _gen_source_files(self):
775        source_files = sorted(self.source_files.path_to_source_files.values(),
776                              key=lambda x: x.file_id)
777        file_list = []
778        for source_file in source_files:
779            file_data = {}
780            if not source_file.real_path:
781                file_data['path'] = ''
782                file_data['code'] = {}
783            else:
784                file_data['path'] = source_file.real_path
785                code_map = {}
786                for line in source_file.line_to_code:
787                    code_map[line] = modify_text_for_html(source_file.line_to_code[line])
788                file_data['code'] = code_map
789            file_list.append(file_data)
790        return file_list
791
792
793class ReportGenerator(object):
794
795    def __init__(self, html_path):
796        self.hw = HtmlWriter(html_path)
797        self.hw.open_tag('html')
798        self.hw.open_tag('head')
799        self.hw.open_tag('link', rel='stylesheet', type='text/css',
800            href='https://code.jquery.com/ui/1.12.0/themes/smoothness/jquery-ui.css'
801                         ).close_tag()
802
803        self.hw.open_tag('link', rel='stylesheet', type='text/css',
804             href='https://cdn.datatables.net/1.10.16/css/jquery.dataTables.min.css'
805                         ).close_tag()
806        self.hw.open_tag('script', src='https://www.gstatic.com/charts/loader.js').close_tag()
807        self.hw.open_tag('script').add(
808            "google.charts.load('current', {'packages': ['corechart', 'table']});").close_tag()
809        self.hw.open_tag('script', src='https://code.jquery.com/jquery-3.2.1.js').close_tag()
810        self.hw.open_tag('script', src='https://code.jquery.com/ui/1.12.1/jquery-ui.js'
811                         ).close_tag()
812        self.hw.open_tag('script',
813            src='https://cdn.datatables.net/1.10.16/js/jquery.dataTables.min.js').close_tag()
814        self.hw.open_tag('script',
815            src='https://cdn.datatables.net/1.10.16/js/dataTables.jqueryui.min.js').close_tag()
816        self.hw.open_tag('style', type='text/css').add("""
817            .colForLine { width: 50px; }
818            .colForCount { width: 100px; }
819            .tableCell { font-size: 17px; }
820            .boldTableCell { font-weight: bold; font-size: 17px; }
821            """).close_tag()
822        self.hw.close_tag('head')
823        self.hw.open_tag('body')
824        self.record_info = {}
825
826    def write_content_div(self):
827        self.hw.open_tag('div', id='report_content').close_tag()
828
829    def write_record_data(self, record_data):
830        self.hw.open_tag('script', id='record_data', type='application/json')
831        self.hw.add(json.dumps(record_data))
832        self.hw.close_tag()
833
834    def write_flamegraph(self, flamegraph):
835        self.hw.add(flamegraph)
836
837    def write_script(self):
838        self.hw.open_tag('script').add_file('report_html.js').close_tag()
839
840    def finish(self):
841        self.hw.close_tag('body')
842        self.hw.close_tag('html')
843        self.hw.close()
844
845
846def gen_flamegraph(record_file):
847    fd, flamegraph_path = tempfile.mkstemp()
848    os.close(fd)
849    inferno_script_path = os.path.join(get_script_dir(), 'inferno', 'inferno.py')
850    subprocess.check_call([sys.executable, inferno_script_path, '-sc', '-o', flamegraph_path,
851                           '--record_file', record_file, '--embedded_flamegraph', '--no_browser'])
852    with open(flamegraph_path, 'r') as fh:
853        data = fh.read()
854    remove(flamegraph_path)
855    return data
856
857
858def main():
859    parser = argparse.ArgumentParser(description='report profiling data')
860    parser.add_argument('-i', '--record_file', nargs='+', default=['perf.data'], help="""
861                        Set profiling data file to report. Default is perf.data.""")
862    parser.add_argument('-o', '--report_path', default='report.html', help="""
863                        Set output html file. Default is report.html.""")
864    parser.add_argument('--min_func_percent', default=0.01, type=float, help="""
865                        Set min percentage of functions shown in the report.
866                        For example, when set to 0.01, only functions taking >= 0.01%% of total
867                        event count are collected in the report. Default is 0.01.""")
868    parser.add_argument('--min_callchain_percent', default=0.01, type=float, help="""
869                        Set min percentage of callchains shown in the report.
870                        It is used to limit nodes shown in the function flamegraph. For example,
871                        when set to 0.01, only callchains taking >= 0.01%% of the event count of
872                        the starting function are collected in the report. Default is 0.01.""")
873    parser.add_argument('--add_source_code', action='store_true', help='Add source code.')
874    parser.add_argument('--source_dirs', nargs='+', help='Source code directories.')
875    parser.add_argument('--add_disassembly', action='store_true', help='Add disassembled code.')
876    parser.add_argument('--ndk_path', nargs=1, help='Find tools in the ndk path.')
877    parser.add_argument('--no_browser', action='store_true', help="Don't open report in browser.")
878    args = parser.parse_args()
879
880    # 1. Process args.
881    binary_cache_path = 'binary_cache'
882    if not os.path.isdir(binary_cache_path):
883        if args.add_source_code or args.add_disassembly:
884            log_exit("""binary_cache/ doesn't exist. Can't add source code or disassembled code
885                        without collected binaries. Please run binary_cache_builder.py to
886                        collect binaries for current profiling data, or run app_profiler.py
887                        without -nb option.""")
888        binary_cache_path = None
889
890    if args.add_source_code and not args.source_dirs:
891        log_exit('--source_dirs is needed to add source code.')
892    build_addr_hit_map = args.add_source_code or args.add_disassembly
893    ndk_path = None if not args.ndk_path else args.ndk_path[0]
894
895    # 2. Produce record data.
896    record_data = RecordData(binary_cache_path, ndk_path, build_addr_hit_map)
897    for record_file in args.record_file:
898        record_data.load_record_file(record_file)
899    record_data.limit_percents(args.min_func_percent, args.min_callchain_percent)
900    if args.add_source_code:
901        record_data.add_source_code(args.source_dirs)
902    if args.add_disassembly:
903        record_data.add_disassembly()
904
905    # 3. Generate report html.
906    report_generator = ReportGenerator(args.report_path)
907    report_generator.write_content_div()
908    report_generator.write_record_data(record_data.gen_record_info())
909    report_generator.write_script()
910    # TODO: support multiple perf.data in flamegraph.
911    if len(args.record_file) > 1:
912        log_warning('flamegraph will only be shown for %s' % args.record_file[0])
913    flamegraph = gen_flamegraph(args.record_file[0])
914    report_generator.write_flamegraph(flamegraph)
915    report_generator.finish()
916
917    if not args.no_browser:
918        open_report_in_browser(args.report_path)
919    log_info("Report generated at '%s'." % args.report_path)
920
921
922if __name__ == '__main__':
923    main()
924