• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/env python
2#
3# Copyright (C) 2016 The Android Open Source Project
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9#      http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16#
17
18"""annotate.py: annotate source files based on perf.data.
19"""
20
21
22import argparse
23import os
24import os.path
25import shutil
26import subprocess
27import sys
28
29from simpleperf_report_lib import *
30from utils import *
31
32class SourceLine(object):
33    def __init__(self, file, function, line):
34        self.file = file
35        self.function = function
36        self.line = line
37
38    @property
39    def file_key(self):
40        return self.file
41
42    @property
43    def function_key(self):
44        return (self.file, self.function)
45
46    @property
47    def line_key(self):
48        return (self.file, self.line)
49
50
51# TODO: using addr2line can't convert from function_start_address to
52# source_file:line very well for java code. Because in .debug_line section,
53# there is some distance between function_start_address and the address
54# of the first instruction which can be mapped to source line.
55class Addr2Line(object):
56    """collect information of how to map [dso_name,vaddr] to [source_file:line].
57    """
58    def __init__(self, addr2line_path, symfs_dir=None):
59        self.dso_dict = dict()
60        if addr2line_path and is_executable_available(addr2line_path):
61            self.addr2line_path = addr2line_path
62        else:
63            self.addr2line_path = find_tool_path('addr2line')
64            if not self.addr2line_path:
65                log_exit("Can't find addr2line.")
66        self.symfs_dir = symfs_dir
67
68
69    def add_addr(self, dso_name, addr):
70        dso = self.dso_dict.get(dso_name)
71        if dso is None:
72            self.dso_dict[dso_name] = dso = dict()
73        if addr not in dso:
74            dso[addr] = None
75
76
77    def convert_addrs_to_lines(self):
78        # store a list of source files
79        self.file_list = []
80        # map from file to id with file_list[id] == file
81        self.file_dict = {}
82        self.file_list.append('')
83        self.file_dict[''] = 0
84
85        for dso_name in self.dso_dict.keys():
86            self._convert_addrs_to_lines(dso_name, self.dso_dict[dso_name])
87        self._combine_source_files()
88
89
90    def _convert_addrs_to_lines(self, dso_name, dso):
91        dso_path = self._find_dso_path(dso_name)
92        if dso_path is None:
93            log_warning("can't find dso '%s'" % dso_name)
94            dso.clear()
95            return
96        addrs = sorted(dso.keys())
97        addr_str = []
98        for addr in addrs:
99            addr_str.append('0x%x' % addr)
100        addr_str = '\n'.join(addr_str)
101        subproc = subprocess.Popen([self.addr2line_path, '-e', dso_path, '-aifC'],
102                                   stdin=subprocess.PIPE, stdout=subprocess.PIPE)
103        (stdoutdata, _) = subproc.communicate(str_to_bytes(addr_str))
104        stdoutdata = bytes_to_str(stdoutdata)
105        stdoutdata = stdoutdata.strip().split('\n')
106        if len(stdoutdata) < len(addrs):
107            log_fatal("addr2line didn't output enough lines")
108        addr_pos = 0
109        out_pos = 0
110        while addr_pos < len(addrs) and out_pos < len(stdoutdata):
111            addr_line = stdoutdata[out_pos]
112            out_pos += 1
113            assert addr_line[:2] == "0x"
114            assert out_pos < len(stdoutdata)
115            source_lines = []
116            while out_pos < len(stdoutdata) and stdoutdata[out_pos][:2] != "0x":
117                function = stdoutdata[out_pos]
118                out_pos += 1
119                assert out_pos < len(stdoutdata)
120                # Handle lines like "C:\Users\...\file:32".
121                items = stdoutdata[out_pos].rsplit(':', 1)
122                if len(items) != 2:
123                    continue
124                (file, line) = items
125                line = line.split()[0]  # Remove comments after line number
126                out_pos += 1
127                if '?' in file:
128                    file = 0
129                else:
130                    file = self._get_file_id(file)
131                if '?' in line:
132                    line = 0
133                else:
134                    line = int(line)
135                source_lines.append(SourceLine(file, function, line))
136            dso[addrs[addr_pos]] = source_lines
137            addr_pos += 1
138        assert addr_pos == len(addrs)
139
140
141    def _get_file_id(self, file):
142        id = self.file_dict.get(file)
143        if id is None:
144            id = len(self.file_list)
145            self.file_list.append(file)
146            self.file_dict[file] = id
147        return id
148
149    def _combine_source_files(self):
150        """It is possible that addr2line gives us different names for the same
151           file, like:
152            /usr/local/.../src/main/jni/sudo-game-jni.cpp
153            sudo-game-jni.cpp
154           We'd better combine these two files. We can do it by combining
155           source files with no conflicts in path.
156        """
157        # Collect files having the same filename.
158        filename_dict = dict()
159        for file in self.file_list:
160            index = max(file.rfind('/'), file.rfind(os.sep))
161            filename = file[index+1:]
162            entry = filename_dict.get(filename)
163            if entry is None:
164                filename_dict[filename] = entry = []
165            entry.append(file)
166
167        # Combine files having the same filename and having no conflicts in path.
168        for filename in filename_dict.keys():
169            files = filename_dict[filename]
170            if len(files) == 1:
171                continue
172            for file in files:
173                to_file = file
174                # Test if we can merge files[i] with another file having longer
175                # path.
176                for f in files:
177                    if len(f) > len(to_file) and f.find(file) != -1:
178                        to_file = f
179                if to_file != file:
180                    from_id = self.file_dict[file]
181                    to_id = self.file_dict[to_file]
182                    self.file_list[from_id] = self.file_list[to_id]
183
184
185    def get_sources(self, dso_name, addr):
186        dso = self.dso_dict.get(dso_name)
187        if dso is None:
188            return []
189        item = dso.get(addr, [])
190        source_lines = []
191        for source in item:
192            source_lines.append(SourceLine(self.file_list[source.file],
193                                           source.function, source.line))
194        return source_lines
195
196
197    def _find_dso_path(self, dso):
198        if dso[0] != '/' or dso == '//anon':
199            return None
200        if self.symfs_dir:
201            dso_path = os.path.join(self.symfs_dir, dso[1:])
202            if os.path.isfile(dso_path):
203                return dso_path
204        if os.path.isfile(dso):
205            return dso
206        return None
207
208
209class Period(object):
210    """event count information. It can be used to represent event count
211       of a line, a function, a source file, or a binary. It contains two
212       parts: period and acc_period.
213       When used for a line, period is the event count occurred when running
214       that line, acc_period is the accumulated event count occurred when
215       running that line and functions called by that line. Same thing applies
216       when it is used for a function, a source file, or a binary.
217    """
218    def __init__(self, period=0, acc_period=0):
219        self.period = period
220        self.acc_period = acc_period
221
222
223    def __iadd__(self, other):
224        self.period += other.period
225        self.acc_period += other.acc_period
226        return self
227
228
229class DsoPeriod(object):
230    """Period for each shared library"""
231    def __init__(self, dso_name):
232        self.dso_name = dso_name
233        self.period = Period()
234
235
236    def add_period(self, period):
237        self.period += period
238
239
240class FilePeriod(object):
241    """Period for each source file"""
242    def __init__(self, file):
243        self.file = file
244        self.period = Period()
245        # Period for each line in the file.
246        self.line_dict = {}
247        # Period for each function in the source file.
248        self.function_dict = {}
249
250
251    def add_period(self, period):
252        self.period += period
253
254
255    def add_line_period(self, line, period):
256        a = self.line_dict.get(line)
257        if a is None:
258            self.line_dict[line] = a = Period()
259        a += period
260
261
262    def add_function_period(self, function_name, function_start_line, period):
263        a = self.function_dict.get(function_name)
264        if not a:
265            if function_start_line is None:
266                function_start_line = -1
267            self.function_dict[function_name] = a = [function_start_line, Period()]
268        a[1] += period
269
270
271class SourceFileAnnotator(object):
272    """group code for annotating source files"""
273    def __init__(self, config):
274        # check config variables
275        config_names = ['perf_data_list', 'source_dirs', 'comm_filters',
276                        'pid_filters', 'tid_filters', 'dso_filters', 'addr2line_path']
277        for name in config_names:
278            if name not in config:
279                log_exit('config [%s] is missing' % name)
280        symfs_dir = 'binary_cache'
281        if not os.path.isdir(symfs_dir):
282            symfs_dir = None
283        kallsyms = 'binary_cache/kallsyms'
284        if not os.path.isfile(kallsyms):
285            kallsyms = None
286        source_dirs = config['source_dirs']
287        for dir in source_dirs:
288            if not os.path.isdir(dir):
289                log_exit('[source_dirs] "%s" is not a dir' % dir)
290        if not config['source_dirs']:
291            log_exit('Please set source directories.')
292
293        # init member variables
294        self.config = config
295        self.symfs_dir = symfs_dir
296        self.kallsyms = kallsyms
297        self.comm_filter = set(config['comm_filters']) if config.get('comm_filters') else None
298        if config.get('pid_filters'):
299            self.pid_filter = {int(x) for x in config['pid_filters']}
300        else:
301            self.pid_filter = None
302        if config.get('tid_filters'):
303            self.tid_filter = {int(x) for x in config['tid_filters']}
304        else:
305            self.tid_filter = None
306        self.dso_filter = set(config['dso_filters']) if config.get('dso_filters') else None
307
308        config['annotate_dest_dir'] = 'annotated_files'
309        output_dir = config['annotate_dest_dir']
310        if os.path.isdir(output_dir):
311            shutil.rmtree(output_dir)
312        os.makedirs(output_dir)
313
314        self.addr2line = Addr2Line(self.config['addr2line_path'], symfs_dir)
315
316
317    def annotate(self):
318        self._collect_addrs()
319        self._convert_addrs_to_lines()
320        self._generate_periods()
321        self._write_summary()
322        self._collect_source_files()
323        self._annotate_files()
324
325
326    def _collect_addrs(self):
327        """Read perf.data, collect all addresses we need to convert to
328           source file:line.
329        """
330        for perf_data in self.config['perf_data_list']:
331            lib = ReportLib()
332            lib.SetRecordFile(perf_data)
333            if self.symfs_dir:
334                lib.SetSymfs(self.symfs_dir)
335            if self.kallsyms:
336                lib.SetKallsymsFile(self.kallsyms)
337            while True:
338                sample = lib.GetNextSample()
339                if sample is None:
340                    lib.Close()
341                    break
342                if not self._filter_sample(sample):
343                    continue
344                symbols = []
345                symbols.append(lib.GetSymbolOfCurrentSample())
346                callchain = lib.GetCallChainOfCurrentSample()
347                for i in range(callchain.nr):
348                    symbols.append(callchain.entries[i].symbol)
349                for symbol in symbols:
350                    if self._filter_symbol(symbol):
351                        self.addr2line.add_addr(symbol.dso_name, symbol.vaddr_in_file)
352                        self.addr2line.add_addr(symbol.dso_name, symbol.symbol_addr)
353
354
355    def _filter_sample(self, sample):
356        """Return true if the sample can be used."""
357        if self.comm_filter:
358            if sample.thread_comm not in self.comm_filter:
359                return False
360        if self.pid_filter:
361            if sample.pid not in self.pid_filter:
362                return False
363        if self.tid_filter:
364            if sample.tid not in self.tid_filter:
365                return False
366        return True
367
368
369    def _filter_symbol(self, symbol):
370        if not self.dso_filter or symbol.dso_name in self.dso_filter:
371            return True
372        return False
373
374
375    def _convert_addrs_to_lines(self):
376        self.addr2line.convert_addrs_to_lines()
377
378
379    def _generate_periods(self):
380        """read perf.data, collect Period for all types:
381            binaries, source files, functions, lines.
382        """
383        self.period = 0
384        self.dso_periods = dict()
385        self.file_periods = dict()
386        for perf_data in self.config['perf_data_list']:
387            lib = ReportLib()
388            lib.SetRecordFile(perf_data)
389            if self.symfs_dir:
390                lib.SetSymfs(self.symfs_dir)
391            if self.kallsyms:
392                lib.SetKallsymsFile(self.kallsyms)
393            while True:
394                sample = lib.GetNextSample()
395                if sample is None:
396                    lib.Close()
397                    break
398                if not self._filter_sample(sample):
399                    continue
400                symbols = []
401                symbols.append(lib.GetSymbolOfCurrentSample())
402                callchain = lib.GetCallChainOfCurrentSample()
403                for i in range(callchain.nr):
404                    symbols.append(callchain.entries[i].symbol)
405                # Each sample has a callchain, but its period is only used once
406                # to add period for each function/source_line/source_file/binary.
407                # For example, if more than one entry in the callchain hits a
408                # function, the event count of that function is only increased once.
409                # Otherwise, we may get periods > 100%.
410                is_sample_used = False
411                used_dso_dict = dict()
412                used_file_dict = dict()
413                used_function_dict = dict()
414                used_line_dict = dict()
415                period = Period(sample.period, sample.period)
416                for i in range(len(symbols)):
417                    symbol = symbols[i]
418                    if i == 1:
419                        period = Period(0, sample.period)
420                    if not self._filter_symbol(symbol):
421                        continue
422                    is_sample_used = True
423                    # Add period to dso.
424                    self._add_dso_period(symbol.dso_name, period, used_dso_dict)
425                    # Add period to source file.
426                    sources = self.addr2line.get_sources(symbol.dso_name, symbol.vaddr_in_file)
427                    for source in sources:
428                        if source.file:
429                            self._add_file_period(source, period, used_file_dict)
430                            # Add period to line.
431                            if source.line:
432                                self._add_line_period(source, period, used_line_dict)
433                    # Add period to function.
434                    sources = self.addr2line.get_sources(symbol.dso_name, symbol.symbol_addr)
435                    for source in sources:
436                        if source.file:
437                            self._add_file_period(source, period, used_file_dict)
438                            if source.function:
439                                self._add_function_period(source, period, used_function_dict)
440
441                if is_sample_used:
442                    self.period += sample.period
443
444
445    def _add_dso_period(self, dso_name, period, used_dso_dict):
446        if dso_name not in used_dso_dict:
447            used_dso_dict[dso_name] = True
448            dso_period = self.dso_periods.get(dso_name)
449            if dso_period is None:
450                dso_period = self.dso_periods[dso_name] = DsoPeriod(dso_name)
451            dso_period.add_period(period)
452
453
454    def _add_file_period(self, source, period, used_file_dict):
455        if source.file_key not in used_file_dict:
456            used_file_dict[source.file_key] = True
457            file_period = self.file_periods.get(source.file)
458            if file_period is None:
459                file_period = self.file_periods[source.file] = FilePeriod(source.file)
460            file_period.add_period(period)
461
462
463    def _add_line_period(self, source, period, used_line_dict):
464        if source.line_key not in used_line_dict:
465            used_line_dict[source.line_key] = True
466            file_period = self.file_periods[source.file]
467            file_period.add_line_period(source.line, period)
468
469
470    def _add_function_period(self, source, period, used_function_dict):
471        if source.function_key not in used_function_dict:
472            used_function_dict[source.function_key] = True
473            file_period = self.file_periods[source.file]
474            file_period.add_function_period(source.function, source.line, period)
475
476
477    def _write_summary(self):
478        summary = os.path.join(self.config['annotate_dest_dir'], 'summary')
479        with open(summary, 'w') as f:
480            f.write('total period: %d\n\n' % self.period)
481            dso_periods = sorted(self.dso_periods.values(),
482                                 key=lambda x: x.period.acc_period, reverse=True)
483            for dso_period in dso_periods:
484                f.write('dso %s: %s\n' % (dso_period.dso_name,
485                                          self._get_percentage_str(dso_period.period)))
486            f.write('\n')
487
488            file_periods = sorted(self.file_periods.values(),
489                                  key=lambda x: x.period.acc_period, reverse=True)
490            for file_period in file_periods:
491                f.write('file %s: %s\n' % (file_period.file,
492                                           self._get_percentage_str(file_period.period)))
493            for file_period in file_periods:
494                f.write('\n\n%s: %s\n' % (file_period.file,
495                                          self._get_percentage_str(file_period.period)))
496                values = []
497                for func_name in file_period.function_dict.keys():
498                    func_start_line, period = file_period.function_dict[func_name]
499                    values.append((func_name, func_start_line, period))
500                values = sorted(values, key=lambda x: x[2].acc_period, reverse=True)
501                for value in values:
502                    f.write('\tfunction (%s): line %d, %s\n' % (
503                        value[0], value[1], self._get_percentage_str(value[2])))
504                f.write('\n')
505                for line in sorted(file_period.line_dict.keys()):
506                    f.write('\tline %d: %s\n' % (
507                        line, self._get_percentage_str(file_period.line_dict[line])))
508
509
510    def _get_percentage_str(self, period, short=False):
511        s = 'acc_p: %f%%, p: %f%%' if short else 'accumulated_period: %f%%, period: %f%%'
512        return s % self._get_percentage(period)
513
514
515    def _get_percentage(self, period):
516        if self.period == 0:
517            return (0, 0)
518        acc_p = 100.0 * period.acc_period / self.period
519        p = 100.0 * period.period / self.period
520        return (acc_p, p)
521
522
523    def _collect_source_files(self):
524        self.source_file_dict = dict()
525        source_file_suffix = ['h', 'c', 'cpp', 'cc', 'java', 'kt']
526        for source_dir in self.config['source_dirs']:
527            for root, _, files in os.walk(source_dir):
528                for file in files:
529                    if file[file.rfind('.')+1:] in source_file_suffix:
530                        entry = self.source_file_dict.get(file)
531                        if entry is None:
532                            entry = self.source_file_dict[file] = []
533                        entry.append(os.path.join(root, file))
534
535
536    def _find_source_file(self, file):
537        filename = file[file.rfind(os.sep)+1:]
538        source_files = self.source_file_dict.get(filename)
539        if source_files is None:
540            return None
541        best_path_count = 0
542        best_path = None
543        best_suffix_len = 0
544        for path in source_files:
545            suffix_len = len(os.path.commonprefix((path[::-1], file[::-1])))
546            if suffix_len > best_suffix_len:
547                best_suffix_len = suffix_len
548                best_path = path
549                best_path_count = 1
550            elif suffix_len == best_suffix_len:
551                best_path_count += 1
552        if best_path_count > 1:
553            log_warning('multiple source for %s, select %s' % (file, best_path))
554        return best_path
555
556
557    def _annotate_files(self):
558        """Annotate Source files: add acc_period/period for each source file.
559           1. Annotate java source files, which have $JAVA_SRC_ROOT prefix.
560           2. Annotate c++ source files.
561        """
562        dest_dir = self.config['annotate_dest_dir']
563        for key in self.file_periods.keys():
564            is_java = False
565            if key.startswith('$JAVA_SRC_ROOT/'):
566                path = key[len('$JAVA_SRC_ROOT/'):]
567                items = path.split('/')
568                path = os.sep.join(items)
569                from_path = self._find_source_file(path)
570                to_path = os.path.join(dest_dir, 'java', path)
571                is_java = True
572            elif key.startswith('/') and os.path.isfile(key):
573                path = key
574                from_path = path
575                to_path = os.path.join(dest_dir, path[1:])
576            elif is_windows() and ':\\' in key and os.path.isfile(key):
577                from_path = key
578                to_path = os.path.join(dest_dir, key.replace(':\\', '\\'))
579            else:
580                path = key[1:] if key.startswith('/') else key
581                # Change path on device to path on host
582                path = os.sep.join(path.split('/'))
583                from_path = self._find_source_file(path)
584                to_path = os.path.join(dest_dir, path)
585            if from_path is None:
586                log_warning("can't find source file for path %s" % key)
587                continue
588            self._annotate_file(from_path, to_path, self.file_periods[key], is_java)
589
590
591    def _annotate_file(self, from_path, to_path, file_period, is_java):
592        """Annotate a source file.
593
594        Annotate a source file in three steps:
595          1. In the first line, show periods of this file.
596          2. For each function, show periods of this function.
597          3. For each line not hitting the same line as functions, show
598             line periods.
599        """
600        log_info('annotate file %s' % from_path)
601        with open(from_path, 'r') as rf:
602            lines = rf.readlines()
603
604        annotates = dict()
605        for line in file_period.line_dict.keys():
606            annotates[line] = self._get_percentage_str(file_period.line_dict[line], True)
607        for func_name in file_period.function_dict.keys():
608            func_start_line, period = file_period.function_dict[func_name]
609            if func_start_line == -1:
610                continue
611            line = func_start_line - 1 if is_java else func_start_line
612            annotates[line] = '[func] ' + self._get_percentage_str(period, True)
613        annotates[1] = '[file] ' + self._get_percentage_str(file_period.period, True)
614
615        max_annotate_cols = 0
616        for key in annotates.keys():
617            max_annotate_cols = max(max_annotate_cols, len(annotates[key]))
618
619        empty_annotate = ' ' * (max_annotate_cols + 6)
620
621        dirname = os.path.dirname(to_path)
622        if not os.path.isdir(dirname):
623            os.makedirs(dirname)
624        with open(to_path, 'w') as wf:
625            for line in range(1, len(lines) + 1):
626                annotate = annotates.get(line)
627                if annotate is None:
628                    if not lines[line-1].strip():
629                        annotate = ''
630                    else:
631                        annotate = empty_annotate
632                else:
633                    annotate = '/* ' + annotate + (
634                        ' ' * (max_annotate_cols - len(annotate))) + ' */'
635                wf.write(annotate)
636                wf.write(lines[line-1])
637
638def main():
639    parser = argparse.ArgumentParser(description=
640"""Annotate source files based on profiling data. It reads line information from
641binary_cache generated by app_profiler.py or binary_cache_builder.py, and
642generate annotated source files in annotated_files directory.""")
643    parser.add_argument('-i', '--perf_data_list', nargs='+', action='append', help=
644"""The paths of profiling data. Default is perf.data.""")
645    parser.add_argument('-s', '--source_dirs', nargs='+', action='append', help=
646"""Directories to find source files.""")
647    parser.add_argument('--comm', nargs='+', action='append', help=
648"""Use samples only in threads with selected names.""")
649    parser.add_argument('--pid', nargs='+', action='append', help=
650"""Use samples only in processes with selected process ids.""")
651    parser.add_argument('--tid', nargs='+', action='append', help=
652"""Use samples only in threads with selected thread ids.""")
653    parser.add_argument('--dso', nargs='+', action='append', help=
654"""Use samples only in selected binaries.""")
655    parser.add_argument('--addr2line', help=
656"""Set the path of addr2line.""")
657
658    args = parser.parse_args()
659    config = {}
660    config['perf_data_list'] = flatten_arg_list(args.perf_data_list)
661    if not config['perf_data_list']:
662        config['perf_data_list'].append('perf.data')
663    config['source_dirs'] = flatten_arg_list(args.source_dirs)
664    config['comm_filters'] = flatten_arg_list(args.comm)
665    config['pid_filters'] = flatten_arg_list(args.pid)
666    config['tid_filters'] = flatten_arg_list(args.tid)
667    config['dso_filters'] = flatten_arg_list(args.dso)
668    config['addr2line_path'] = args.addr2line
669
670    annotator = SourceFileAnnotator(config)
671    annotator.annotate()
672    log_info('annotate finish successfully, please check result in annotated_files/.')
673
674if __name__ == '__main__':
675    main()
676