• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/env python3
2#
3# Copyright (C) 2016 The Android Open Source Project
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9#      http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16#
17
18"""annotate.py: annotate source files based on perf.data.
19"""
20
21import logging
22import os
23import os.path
24import shutil
25from texttable import Texttable
26from typing import Dict, Union
27
28from simpleperf_report_lib import ReportLib
29from simpleperf_utils import (
30    Addr2Nearestline, BaseArgumentParser, BinaryFinder, extant_dir, flatten_arg_list, is_windows,
31    log_exit, ReadElf, SourceFileSearcher)
32
33
34class SourceLine(object):
35    def __init__(self, file_id, function, line):
36        self.file = file_id
37        self.function = function
38        self.line = line
39
40    @property
41    def file_key(self):
42        return self.file
43
44    @property
45    def function_key(self):
46        return (self.file, self.function)
47
48    @property
49    def line_key(self):
50        return (self.file, self.line)
51
52
53class Addr2Line(object):
54    """collect information of how to map [dso_name, vaddr] to [source_file:line].
55    """
56
57    def __init__(self, ndk_path, binary_cache_path, source_dirs):
58        binary_finder = BinaryFinder(binary_cache_path, ReadElf(ndk_path))
59        self.addr2line = Addr2Nearestline(ndk_path, binary_finder, True)
60        self.source_searcher = SourceFileSearcher(source_dirs)
61
62    def add_addr(self, dso_path: str, build_id: str, func_addr: int, addr: int):
63        self.addr2line.add_addr(dso_path, build_id, func_addr, addr)
64
65    def convert_addrs_to_lines(self):
66        self.addr2line.convert_addrs_to_lines(jobs=os.cpu_count())
67
68    def get_sources(self, dso_path, addr):
69        dso = self.addr2line.get_dso(dso_path)
70        if not dso:
71            return []
72        source = self.addr2line.get_addr_source(dso, addr)
73        if not source:
74            return []
75        result = []
76        for (source_file, source_line, function_name) in source:
77            source_file_path = self.source_searcher.get_real_path(source_file)
78            if not source_file_path:
79                source_file_path = source_file
80            result.append(SourceLine(source_file_path, function_name, source_line))
81        return result
82
83
84class Period(object):
85    """event count information. It can be used to represent event count
86       of a line, a function, a source file, or a binary. It contains two
87       parts: period and acc_period.
88       When used for a line, period is the event count occurred when running
89       that line, acc_period is the accumulated event count occurred when
90       running that line and functions called by that line. Same thing applies
91       when it is used for a function, a source file, or a binary.
92    """
93
94    def __init__(self, period=0, acc_period=0):
95        self.period = period
96        self.acc_period = acc_period
97
98    def __iadd__(self, other):
99        self.period += other.period
100        self.acc_period += other.acc_period
101        return self
102
103
104class DsoPeriod(object):
105    """Period for each shared library"""
106
107    def __init__(self, dso_name):
108        self.dso_name = dso_name
109        self.period = Period()
110
111    def add_period(self, period):
112        self.period += period
113
114
115class FilePeriod(object):
116    """Period for each source file"""
117
118    def __init__(self, file_id):
119        self.file = file_id
120        self.period = Period()
121        # Period for each line in the file.
122        self.line_dict = {}
123        # Period for each function in the source file.
124        self.function_dict = {}
125
126    def add_period(self, period):
127        self.period += period
128
129    def add_line_period(self, line, period):
130        a = self.line_dict.get(line)
131        if a is None:
132            self.line_dict[line] = a = Period()
133        a += period
134
135    def add_function_period(self, function_name, function_start_line, period):
136        a = self.function_dict.get(function_name)
137        if not a:
138            if function_start_line is None:
139                function_start_line = -1
140            self.function_dict[function_name] = a = [function_start_line, Period()]
141        a[1] += period
142
143
144class SourceFileAnnotator(object):
145    """group code for annotating source files"""
146
147    def __init__(self, config):
148        # check config variables
149        config_names = ['perf_data_list', 'source_dirs', 'dso_filters', 'ndk_path']
150        for name in config_names:
151            if name not in config:
152                log_exit('config [%s] is missing' % name)
153        symfs_dir = 'binary_cache'
154        if not os.path.isdir(symfs_dir):
155            symfs_dir = None
156        kallsyms = 'binary_cache/kallsyms'
157        if not os.path.isfile(kallsyms):
158            kallsyms = None
159
160        # init member variables
161        self.config = config
162        self.symfs_dir = symfs_dir
163        self.kallsyms = kallsyms
164        self.dso_filter = set(config['dso_filters']) if config.get('dso_filters') else None
165
166        config['annotate_dest_dir'] = 'annotated_files'
167        output_dir = config['annotate_dest_dir']
168        if os.path.isdir(output_dir):
169            shutil.rmtree(output_dir)
170        os.makedirs(output_dir)
171
172        self.addr2line = Addr2Line(self.config['ndk_path'], symfs_dir, config.get('source_dirs'))
173        self.period = 0
174        self.dso_periods = {}
175        self.file_periods = {}
176
177    def annotate(self):
178        self._collect_addrs()
179        self._convert_addrs_to_lines()
180        self._generate_periods()
181        self._write_summary()
182        self._annotate_files()
183
184    def _collect_addrs(self):
185        """Read perf.data, collect all addresses we need to convert to
186           source file:line.
187        """
188        for perf_data in self.config['perf_data_list']:
189            lib = ReportLib()
190            lib.SetRecordFile(perf_data)
191            if self.symfs_dir:
192                lib.SetSymfs(self.symfs_dir)
193            if self.kallsyms:
194                lib.SetKallsymsFile(self.kallsyms)
195            lib.SetReportOptions(self.config['report_lib_options'])
196            while True:
197                sample = lib.GetNextSample()
198                if sample is None:
199                    lib.Close()
200                    break
201                symbols = []
202                symbols.append(lib.GetSymbolOfCurrentSample())
203                callchain = lib.GetCallChainOfCurrentSample()
204                for i in range(callchain.nr):
205                    symbols.append(callchain.entries[i].symbol)
206                for symbol in symbols:
207                    if self._filter_symbol(symbol):
208                        build_id = lib.GetBuildIdForPath(symbol.dso_name)
209                        self.addr2line.add_addr(symbol.dso_name, build_id, symbol.symbol_addr,
210                                                symbol.vaddr_in_file)
211                        self.addr2line.add_addr(symbol.dso_name, build_id, symbol.symbol_addr,
212                                                symbol.symbol_addr)
213
214    def _filter_symbol(self, symbol):
215        if not self.dso_filter or symbol.dso_name in self.dso_filter:
216            return True
217        return False
218
219    def _convert_addrs_to_lines(self):
220        self.addr2line.convert_addrs_to_lines()
221
222    def _generate_periods(self):
223        """read perf.data, collect Period for all types:
224            binaries, source files, functions, lines.
225        """
226        for perf_data in self.config['perf_data_list']:
227            lib = ReportLib()
228            lib.SetRecordFile(perf_data)
229            if self.symfs_dir:
230                lib.SetSymfs(self.symfs_dir)
231            if self.kallsyms:
232                lib.SetKallsymsFile(self.kallsyms)
233            lib.SetReportOptions(self.config['report_lib_options'])
234            while True:
235                sample = lib.GetNextSample()
236                if sample is None:
237                    lib.Close()
238                    break
239                self._generate_periods_for_sample(lib, sample)
240
241    def _generate_periods_for_sample(self, lib, sample):
242        symbols = []
243        symbols.append(lib.GetSymbolOfCurrentSample())
244        callchain = lib.GetCallChainOfCurrentSample()
245        for i in range(callchain.nr):
246            symbols.append(callchain.entries[i].symbol)
247        # Each sample has a callchain, but its period is only used once
248        # to add period for each function/source_line/source_file/binary.
249        # For example, if more than one entry in the callchain hits a
250        # function, the event count of that function is only increased once.
251        # Otherwise, we may get periods > 100%.
252        is_sample_used = False
253        used_dso_dict = {}
254        used_file_dict = {}
255        used_function_dict = {}
256        used_line_dict = {}
257        period = Period(sample.period, sample.period)
258        for j, symbol in enumerate(symbols):
259            if j == 1:
260                period = Period(0, sample.period)
261            if not self._filter_symbol(symbol):
262                continue
263            is_sample_used = True
264            # Add period to dso.
265            self._add_dso_period(symbol.dso_name, period, used_dso_dict)
266            # Add period to source file.
267            sources = self.addr2line.get_sources(symbol.dso_name, symbol.vaddr_in_file)
268            for source in sources:
269                if source.file:
270                    self._add_file_period(source, period, used_file_dict)
271                    # Add period to line.
272                    if source.line:
273                        self._add_line_period(source, period, used_line_dict)
274            # Add period to function.
275            sources = self.addr2line.get_sources(symbol.dso_name, symbol.symbol_addr)
276            for source in sources:
277                if source.file:
278                    self._add_file_period(source, period, used_file_dict)
279                    if source.function:
280                        self._add_function_period(source, period, used_function_dict)
281
282        if is_sample_used:
283            self.period += sample.period
284
285    def _add_dso_period(self, dso_name: str, period: Period, used_dso_dict: Dict[str, bool]):
286        if dso_name not in used_dso_dict:
287            used_dso_dict[dso_name] = True
288            dso_period = self.dso_periods.get(dso_name)
289            if dso_period is None:
290                dso_period = self.dso_periods[dso_name] = DsoPeriod(dso_name)
291            dso_period.add_period(period)
292
293    def _add_file_period(self, source, period, used_file_dict):
294        if source.file_key not in used_file_dict:
295            used_file_dict[source.file_key] = True
296            file_period = self.file_periods.get(source.file)
297            if file_period is None:
298                file_period = self.file_periods[source.file] = FilePeriod(source.file)
299            file_period.add_period(period)
300
301    def _add_line_period(self, source, period, used_line_dict):
302        if source.line_key not in used_line_dict:
303            used_line_dict[source.line_key] = True
304            file_period = self.file_periods[source.file]
305            file_period.add_line_period(source.line, period)
306
307    def _add_function_period(self, source, period, used_function_dict):
308        if source.function_key not in used_function_dict:
309            used_function_dict[source.function_key] = True
310            file_period = self.file_periods[source.file]
311            file_period.add_function_period(source.function, source.line, period)
312
313    def _write_summary(self):
314        summary = os.path.join(self.config['annotate_dest_dir'], 'summary')
315        with open(summary, 'w') as f:
316            f.write('total period: %d\n\n' % self.period)
317            self._write_dso_summary(f)
318            self._write_file_summary(f)
319
320            file_periods = sorted(self.file_periods.values(),
321                                  key=lambda x: x.period.acc_period, reverse=True)
322            for file_period in file_periods:
323                self._write_function_line_summary(f, file_period)
324
325    def _write_dso_summary(self, summary_fh):
326        dso_periods = sorted(self.dso_periods.values(),
327                             key=lambda x: x.period.acc_period, reverse=True)
328        table = Texttable(max_width=self.config['summary_width'])
329        table.set_cols_align(['l', 'l', 'l'])
330        table.add_row(['Total', 'Self', 'DSO'])
331        for dso_period in dso_periods:
332            total_str = self._get_period_str(dso_period.period.acc_period)
333            self_str = self._get_period_str(dso_period.period.period)
334            table.add_row([total_str, self_str, dso_period.dso_name])
335        print(table.draw(), file=summary_fh)
336        print(file=summary_fh)
337
338    def _write_file_summary(self, summary_fh):
339        file_periods = sorted(self.file_periods.values(),
340                              key=lambda x: x.period.acc_period, reverse=True)
341        table = Texttable(max_width=self.config['summary_width'])
342        table.set_cols_align(['l', 'l', 'l'])
343        table.add_row(['Total', 'Self', 'Source File'])
344        for file_period in file_periods:
345            total_str = self._get_period_str(file_period.period.acc_period)
346            self_str = self._get_period_str(file_period.period.period)
347            table.add_row([total_str, self_str, file_period.file])
348        print(table.draw(), file=summary_fh)
349        print(file=summary_fh)
350
351    def _write_function_line_summary(self, summary_fh, file_period: FilePeriod):
352        table = Texttable(max_width=self.config['summary_width'])
353        table.set_cols_align(['l', 'l', 'l'])
354        table.add_row(['Total', 'Self', 'Function/Line in ' + file_period.file])
355        values = []
356        for func_name in file_period.function_dict.keys():
357            func_start_line, period = file_period.function_dict[func_name]
358            values.append((func_name, func_start_line, period))
359        values.sort(key=lambda x: x[2].acc_period, reverse=True)
360        for func_name, func_start_line, period in values:
361            total_str = self._get_period_str(period.acc_period)
362            self_str = self._get_period_str(period.period)
363            name = func_name + ' (line %d)' % func_start_line
364            table.add_row([total_str, self_str, name])
365        for line in sorted(file_period.line_dict.keys()):
366            period = file_period.line_dict[line]
367            total_str = self._get_period_str(period.acc_period)
368            self_str = self._get_period_str(period.period)
369            name = 'line %d' % line
370            table.add_row([total_str, self_str, name])
371
372        print(table.draw(), file=summary_fh)
373        print(file=summary_fh)
374
375    def _get_period_str(self, period: Union[Period, int]) -> str:
376        if isinstance(period, Period):
377            return 'Total %s, Self %s' % (
378                self._get_period_str(period.acc_period),
379                self._get_period_str(period.period))
380        if self.config['raw_period'] or self.period == 0:
381            return str(period)
382        return '%.2f%%' % (100.0 * period / self.period)
383
384    def _annotate_files(self):
385        """Annotate Source files: add acc_period/period for each source file.
386           1. Annotate java source files, which have $JAVA_SRC_ROOT prefix.
387           2. Annotate c++ source files.
388        """
389        dest_dir = self.config['annotate_dest_dir']
390        for key in self.file_periods:
391            from_path = key
392            if not os.path.isfile(from_path):
393                logging.warning("can't find source file for path %s" % from_path)
394                continue
395            if from_path.startswith('/'):
396                to_path = os.path.join(dest_dir, from_path[1:])
397            elif is_windows() and ':\\' in from_path:
398                to_path = os.path.join(dest_dir, from_path.replace(':\\', os.sep))
399            else:
400                to_path = os.path.join(dest_dir, from_path)
401            is_java = from_path.endswith('.java')
402            self._annotate_file(from_path, to_path, self.file_periods[key], is_java)
403
404    def _annotate_file(self, from_path, to_path, file_period, is_java):
405        """Annotate a source file.
406
407        Annotate a source file in three steps:
408          1. In the first line, show periods of this file.
409          2. For each function, show periods of this function.
410          3. For each line not hitting the same line as functions, show
411             line periods.
412        """
413        logging.info('annotate file %s' % from_path)
414        with open(from_path, 'r') as rf:
415            lines = rf.readlines()
416
417        annotates = {}
418        for line in file_period.line_dict.keys():
419            annotates[line] = self._get_period_str(file_period.line_dict[line])
420        for func_name in file_period.function_dict.keys():
421            func_start_line, period = file_period.function_dict[func_name]
422            if func_start_line == -1:
423                continue
424            line = func_start_line - 1 if is_java else func_start_line
425            annotates[line] = '[func] ' + self._get_period_str(period)
426        annotates[1] = '[file] ' + self._get_period_str(file_period.period)
427
428        max_annotate_cols = 0
429        for key in annotates:
430            max_annotate_cols = max(max_annotate_cols, len(annotates[key]))
431
432        empty_annotate = ' ' * (max_annotate_cols + 6)
433
434        dirname = os.path.dirname(to_path)
435        if not os.path.isdir(dirname):
436            os.makedirs(dirname)
437        with open(to_path, 'w') as wf:
438            for line in range(1, len(lines) + 1):
439                annotate = annotates.get(line)
440                if annotate is None:
441                    if not lines[line-1].strip():
442                        annotate = ''
443                    else:
444                        annotate = empty_annotate
445                else:
446                    annotate = '/* ' + annotate + (
447                        ' ' * (max_annotate_cols - len(annotate))) + ' */'
448                wf.write(annotate)
449                wf.write(lines[line-1])
450
451
452def main():
453    parser = BaseArgumentParser(description="""
454        Annotate source files based on profiling data. It reads line information from binary_cache
455        generated by app_profiler.py or binary_cache_builder.py, and generate annotated source
456        files in annotated_files directory.""")
457    parser.add_argument('-i', '--perf_data_list', nargs='+', action='append', help="""
458        The paths of profiling data. Default is perf.data.""")
459    parser.add_argument('-s', '--source_dirs', type=extant_dir, nargs='+', action='append', help="""
460        Directories to find source files.""")
461    parser.add_argument('--ndk_path', type=extant_dir, help='Set the path of a ndk release.')
462    parser.add_argument('--raw-period', action='store_true',
463                        help='show raw period instead of percentage')
464    parser.add_argument('--summary-width', type=int, default=80, help='max width of summary file')
465    sample_filter_group = parser.add_argument_group('Sample filter options')
466    sample_filter_group.add_argument('--dso', nargs='+', action='append', help="""
467        Use samples only in selected binaries.""")
468    parser.add_report_lib_options(sample_filter_group=sample_filter_group)
469
470    args = parser.parse_args()
471    config = {}
472    config['perf_data_list'] = flatten_arg_list(args.perf_data_list)
473    if not config['perf_data_list']:
474        config['perf_data_list'].append('perf.data')
475    config['source_dirs'] = flatten_arg_list(args.source_dirs)
476    config['dso_filters'] = flatten_arg_list(args.dso)
477    config['ndk_path'] = args.ndk_path
478    config['raw_period'] = args.raw_period
479    config['summary_width'] = args.summary_width
480    config['report_lib_options'] = args.report_lib_options
481
482    annotator = SourceFileAnnotator(config)
483    annotator.annotate()
484    logging.info('annotate finish successfully, please check result in annotated_files/.')
485
486
487if __name__ == '__main__':
488    main()
489