• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/env python
2#
3# Copyright (C) 2017 The Android Open Source Project
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9#      http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16#
17
18"""pprof_proto_generator.py: read perf.data, generate pprof.profile, which can be
19    used by pprof.
20
21  Example:
22    python app_profiler.py
23    python pprof_proto_generator.py
24    pprof -text pprof.profile
25"""
26
27from __future__ import print_function
28import argparse
29import os
30import os.path
31
32from simpleperf_report_lib import ReportLib
33from utils import Addr2Nearestline, bytes_to_str, extant_dir, find_tool_path, flatten_arg_list
34from utils import log_info, log_exit, str_to_bytes
35try:
36    import profile_pb2
37except ImportError:
38    log_exit('google.protobuf module is missing. Please install it first.')
39
40def load_pprof_profile(filename):
41    profile = profile_pb2.Profile()
42    with open(filename, "rb") as f:
43        profile.ParseFromString(bytes_to_str(f.read()))
44    return profile
45
46
47def store_pprof_profile(filename, profile):
48    with open(filename, 'wb') as f:
49        f.write(str_to_bytes(profile.SerializeToString()))
50
51
52class PprofProfilePrinter(object):
53
54    def __init__(self, profile):
55        self.profile = profile
56        self.string_table = profile.string_table
57
58    def show(self):
59        p = self.profile
60        sub_space = '  '
61        print('Profile {')
62        print('%d sample_types' % len(p.sample_type))
63        for i in range(len(p.sample_type)):
64            print('sample_type[%d] = ' % i, end='')
65            self.show_value_type(p.sample_type[i])
66        print('%d samples' % len(p.sample))
67        for i in range(len(p.sample)):
68            print('sample[%d]:' % i)
69            self.show_sample(p.sample[i], sub_space)
70        print('%d mappings' % len(p.mapping))
71        for i in range(len(p.mapping)):
72            print('mapping[%d]:' % i)
73            self.show_mapping(p.mapping[i], sub_space)
74        print('%d locations' % len(p.location))
75        for i in range(len(p.location)):
76            print('location[%d]:' % i)
77            self.show_location(p.location[i], sub_space)
78        for i in range(len(p.function)):
79            print('function[%d]:' % i)
80            self.show_function(p.function[i], sub_space)
81        print('%d strings' % len(p.string_table))
82        for i in range(len(p.string_table)):
83            print('string[%d]: %s' % (i, p.string_table[i]))
84        print('drop_frames: %s' % self.string(p.drop_frames))
85        print('keep_frames: %s' % self.string(p.keep_frames))
86        print('time_nanos: %u' % p.time_nanos)
87        print('duration_nanos: %u' % p.duration_nanos)
88        print('period_type: ', end='')
89        self.show_value_type(p.period_type)
90        print('period: %u' % p.period)
91        for i in range(len(p.comment)):
92            print('comment[%d] = %s' % (i, self.string(p.comment[i])))
93        print('default_sample_type: %d' % p.default_sample_type)
94        print('} // Profile')
95        print()
96
97    def show_value_type(self, value_type, space=''):
98        print('%sValueType(typeID=%d, unitID=%d, type=%s, unit=%s)' %
99              (space, value_type.type, value_type.unit,
100               self.string(value_type.type), self.string(value_type.unit)))
101
102    def show_sample(self, sample, space=''):
103        sub_space = space + '  '
104        for i in range(len(sample.location_id)):
105            print('%slocation_id[%d]: id %d' % (space, i, sample.location_id[i]))
106            self.show_location_id(sample.location_id[i], sub_space)
107        for i in range(len(sample.value)):
108            print('%svalue[%d] = %d' % (space, i, sample.value[i]))
109        for i in range(len(sample.label)):
110            print('%slabel[%d] = ', (space, i))
111
112    def show_location_id(self, location_id, space=''):
113        location = self.profile.location[location_id - 1]
114        self.show_location(location, space)
115
116    def show_location(self, location, space=''):
117        sub_space = space + '  '
118        print('%sid: %d' % (space, location.id))
119        print('%smapping_id: %d' % (space, location.mapping_id))
120        self.show_mapping_id(location.mapping_id, sub_space)
121        print('%saddress: %x' % (space, location.address))
122        for i in range(len(location.line)):
123            print('%sline[%d]:' % (space, i))
124            self.show_line(location.line[i], sub_space)
125
126    def show_mapping_id(self, mapping_id, space=''):
127        mapping = self.profile.mapping[mapping_id - 1]
128        self.show_mapping(mapping, space)
129
130    def show_mapping(self, mapping, space=''):
131        print('%sid: %d' % (space, mapping.id))
132        print('%smemory_start: %x' % (space, mapping.memory_start))
133        print('%smemory_limit: %x' % (space, mapping.memory_limit))
134        print('%sfile_offset: %x' % (space, mapping.file_offset))
135        print('%sfilename: %s(%d)' % (space, self.string(mapping.filename),
136                                      mapping.filename))
137        print('%sbuild_id: %s(%d)' % (space, self.string(mapping.build_id),
138                                      mapping.build_id))
139        print('%shas_functions: %s' % (space, mapping.has_functions))
140        print('%shas_filenames: %s' % (space, mapping.has_filenames))
141        print('%shas_line_numbers: %s' % (space, mapping.has_line_numbers))
142        print('%shas_inline_frames: %s' % (space, mapping.has_inline_frames))
143
144    def show_line(self, line, space=''):
145        sub_space = space + '  '
146        print('%sfunction_id: %d' % (space, line.function_id))
147        self.show_function_id(line.function_id, sub_space)
148        print('%sline: %d' % (space, line.line))
149
150    def show_function_id(self, function_id, space=''):
151        function = self.profile.function[function_id - 1]
152        self.show_function(function, space)
153
154    def show_function(self, function, space=''):
155        print('%sid: %d' % (space, function.id))
156        print('%sname: %s' % (space, self.string(function.name)))
157        print('%ssystem_name: %s' % (space, self.string(function.system_name)))
158        print('%sfilename: %s' % (space, self.string(function.filename)))
159        print('%sstart_line: %d' % (space, function.start_line))
160
161    def string(self, string_id):
162        return self.string_table[string_id]
163
164
165class Sample(object):
166
167    def __init__(self):
168        self.location_ids = []
169        self.values = {}
170
171    def add_location_id(self, location_id):
172        self.location_ids.append(location_id)
173
174    def add_value(self, sample_type_id, value):
175        self.values[sample_type_id] = self.values.get(sample_type_id, 0) + value
176
177    def add_values(self, values):
178        for sample_type_id, value in values.items():
179            self.add_value(sample_type_id, value)
180
181    @property
182    def key(self):
183        return tuple(self.location_ids)
184
185
186class Location(object):
187
188    def __init__(self, mapping_id, address, vaddr_in_dso):
189        self.id = -1  # unset
190        self.mapping_id = mapping_id
191        self.address = address
192        self.vaddr_in_dso = vaddr_in_dso
193        self.lines = []
194
195    @property
196    def key(self):
197        return (self.mapping_id, self.address)
198
199
200class Line(object):
201
202    def __init__(self):
203        self.function_id = 0
204        self.line = 0
205
206
207class Mapping(object):
208
209    def __init__(self, start, end, pgoff, filename_id, build_id_id):
210        self.id = -1  # unset
211        self.memory_start = start
212        self.memory_limit = end
213        self.file_offset = pgoff
214        self.filename_id = filename_id
215        self.build_id_id = build_id_id
216
217    @property
218    def key(self):
219        return (
220            self.memory_start,
221            self.memory_limit,
222            self.file_offset,
223            self.filename_id,
224            self.build_id_id)
225
226
227class Function(object):
228
229    def __init__(self, name_id, dso_name_id, vaddr_in_dso):
230        self.id = -1  # unset
231        self.name_id = name_id
232        self.dso_name_id = dso_name_id
233        self.vaddr_in_dso = vaddr_in_dso
234        self.source_filename_id = 0
235        self.start_line = 0
236
237    @property
238    def key(self):
239        return (self.name_id, self.dso_name_id)
240
241
242# pylint: disable=no-member
243class PprofProfileGenerator(object):
244
245    def __init__(self, config):
246        self.config = config
247        self.lib = ReportLib()
248
249        config['binary_cache_dir'] = 'binary_cache'
250        if not os.path.isdir(config['binary_cache_dir']):
251            config['binary_cache_dir'] = None
252        else:
253            self.lib.SetSymfs(config['binary_cache_dir'])
254        if config.get('perf_data_path'):
255            self.lib.SetRecordFile(config['perf_data_path'])
256        kallsyms = 'binary_cache/kallsyms'
257        if os.path.isfile(kallsyms):
258            self.lib.SetKallsymsFile(kallsyms)
259        self.comm_filter = set(config['comm_filters']) if config.get('comm_filters') else None
260        if config.get('pid_filters'):
261            self.pid_filter = {int(x) for x in config['pid_filters']}
262        else:
263            self.pid_filter = None
264        if config.get('tid_filters'):
265            self.tid_filter = {int(x) for x in config['tid_filters']}
266        else:
267            self.tid_filter = None
268        self.dso_filter = set(config['dso_filters']) if config.get('dso_filters') else None
269        self.profile = profile_pb2.Profile()
270        self.profile.string_table.append('')
271        self.string_table = {}
272        self.sample_types = {}
273        self.sample_map = {}
274        self.sample_list = []
275        self.location_map = {}
276        self.location_list = []
277        self.mapping_map = {}
278        self.mapping_list = []
279        self.function_map = {}
280        self.function_list = []
281
282    def gen(self):
283        # 1. Process all samples in perf.data, aggregate samples.
284        while True:
285            report_sample = self.lib.GetNextSample()
286            if report_sample is None:
287                self.lib.Close()
288                break
289            event = self.lib.GetEventOfCurrentSample()
290            symbol = self.lib.GetSymbolOfCurrentSample()
291            callchain = self.lib.GetCallChainOfCurrentSample()
292
293            if not self._filter_report_sample(report_sample):
294                continue
295
296            sample_type_id = self.get_sample_type_id(event.name)
297            sample = Sample()
298            sample.add_value(sample_type_id, 1)
299            sample.add_value(sample_type_id + 1, report_sample.period)
300            if self._filter_symbol(symbol):
301                location_id = self.get_location_id(symbol.vaddr_in_file, symbol)
302                sample.add_location_id(location_id)
303            for i in range(callchain.nr):
304                entry = callchain.entries[i]
305                if self._filter_symbol(symbol):
306                    location_id = self.get_location_id(entry.ip, entry.symbol)
307                    sample.add_location_id(location_id)
308            if sample.location_ids:
309                self.add_sample(sample)
310
311        # 2. Generate line info for locations and functions.
312        self.gen_source_lines()
313
314        # 3. Produce samples/locations/functions in profile
315        for sample in self.sample_list:
316            self.gen_profile_sample(sample)
317        for mapping in self.mapping_list:
318            self.gen_profile_mapping(mapping)
319        for location in self.location_list:
320            self.gen_profile_location(location)
321        for function in self.function_list:
322            self.gen_profile_function(function)
323
324        return self.profile
325
326    def _filter_report_sample(self, sample):
327        """Return true if the sample can be used."""
328        if self.comm_filter:
329            if sample.thread_comm not in self.comm_filter:
330                return False
331            if self.pid_filter:
332                if sample.pid not in self.pid_filter:
333                    return False
334            if self.tid_filter:
335                if sample.tid not in self.tid_filter:
336                    return False
337        return True
338
339    def _filter_symbol(self, symbol):
340        if not self.dso_filter or symbol.dso_name in self.dso_filter:
341            return True
342        return False
343
344    def get_string_id(self, str_value):
345        if not str_value:
346            return 0
347        str_id = self.string_table.get(str_value)
348        if str_id is not None:
349            return str_id
350        str_id = len(self.string_table) + 1
351        self.string_table[str_value] = str_id
352        self.profile.string_table.append(str_value)
353        return str_id
354
355    def get_string(self, str_id):
356        return self.profile.string_table[str_id]
357
358    def get_sample_type_id(self, name):
359        sample_type_id = self.sample_types.get(name)
360        if sample_type_id is not None:
361            return sample_type_id
362        sample_type_id = len(self.profile.sample_type)
363        sample_type = self.profile.sample_type.add()
364        sample_type.type = self.get_string_id('event_' + name + '_samples')
365        sample_type.unit = self.get_string_id('count')
366        sample_type = self.profile.sample_type.add()
367        sample_type.type = self.get_string_id('event_' + name + '_count')
368        sample_type.unit = self.get_string_id('count')
369        self.sample_types[name] = sample_type_id
370        return sample_type_id
371
372    def get_location_id(self, ip, symbol):
373        mapping_id = self.get_mapping_id(symbol.mapping[0], symbol.dso_name)
374        location = Location(mapping_id, ip, symbol.vaddr_in_file)
375        function_id = self.get_function_id(symbol.symbol_name, symbol.dso_name,
376                                           symbol.symbol_addr)
377        if function_id:
378            # Add Line only when it has a valid function id, see http://b/36988814.
379            # Default line info only contains the function name
380            line = Line()
381            line.function_id = function_id
382            location.lines.append(line)
383
384        exist_location = self.location_map.get(location.key)
385        if exist_location:
386            return exist_location.id
387        # location_id starts from 1
388        location.id = len(self.location_list) + 1
389        self.location_list.append(location)
390        self.location_map[location.key] = location
391        return location.id
392
393    def get_mapping_id(self, report_mapping, filename):
394        filename_id = self.get_string_id(filename)
395        build_id = self.lib.GetBuildIdForPath(filename)
396        if build_id and build_id[0:2] == "0x":
397            build_id = build_id[2:]
398        build_id_id = self.get_string_id(build_id)
399        mapping = Mapping(report_mapping.start, report_mapping.end,
400                          report_mapping.pgoff, filename_id, build_id_id)
401        exist_mapping = self.mapping_map.get(mapping.key)
402        if exist_mapping:
403            return exist_mapping.id
404        # mapping_id starts from 1
405        mapping.id = len(self.mapping_list) + 1
406        self.mapping_list.append(mapping)
407        self.mapping_map[mapping.key] = mapping
408        return mapping.id
409
410    def get_mapping(self, mapping_id):
411        return self.mapping_list[mapping_id - 1] if mapping_id > 0 else None
412
413    def get_function_id(self, name, dso_name, vaddr_in_file):
414        if name == 'unknown':
415            return 0
416        function = Function(self.get_string_id(name), self.get_string_id(dso_name), vaddr_in_file)
417        exist_function = self.function_map.get(function.key)
418        if exist_function:
419            return exist_function.id
420        # function_id starts from 1
421        function.id = len(self.function_list) + 1
422        self.function_list.append(function)
423        self.function_map[function.key] = function
424        return function.id
425
426    def get_function(self, function_id):
427        return self.function_list[function_id - 1] if function_id > 0 else None
428
429    def add_sample(self, sample):
430        exist_sample = self.sample_map.get(sample.key)
431        if exist_sample:
432            exist_sample.add_values(sample.values)
433        else:
434            self.sample_list.append(sample)
435            self.sample_map[sample.key] = sample
436
437    def gen_source_lines(self):
438        # 1. Create Addr2line instance
439        if not self.config.get('binary_cache_dir'):
440            log_info("Can't generate line information because binary_cache is missing.")
441            return
442        if not find_tool_path('addr2line', self.config['ndk_path']):
443            log_info("Can't generate line information because can't find addr2line.")
444            return
445        addr2line = Addr2Nearestline(self.config['ndk_path'], self.config['binary_cache_dir'], True)
446
447        # 2. Put all needed addresses to it.
448        for location in self.location_list:
449            mapping = self.get_mapping(location.mapping_id)
450            dso_name = self.get_string(mapping.filename_id)
451            if location.lines:
452                function = self.get_function(location.lines[0].function_id)
453                addr2line.add_addr(dso_name, function.vaddr_in_dso, location.vaddr_in_dso)
454        for function in self.function_list:
455            dso_name = self.get_string(function.dso_name_id)
456            addr2line.add_addr(dso_name, function.vaddr_in_dso, function.vaddr_in_dso)
457
458        # 3. Generate source lines.
459        addr2line.convert_addrs_to_lines()
460
461        # 4. Annotate locations and functions.
462        for location in self.location_list:
463            if not location.lines:
464                continue
465            mapping = self.get_mapping(location.mapping_id)
466            dso_name = self.get_string(mapping.filename_id)
467            dso = addr2line.get_dso(dso_name)
468            if not dso:
469                continue
470            sources = addr2line.get_addr_source(dso, location.vaddr_in_dso)
471            if not sources:
472                continue
473            for (source_id, source) in enumerate(sources):
474                source_file, source_line, function_name = source
475                function_id = self.get_function_id(function_name, dso_name, 0)
476                if function_id == 0:
477                    continue
478                if source_id == 0:
479                    # Clear default line info
480                    location.lines = []
481                location.lines.append(self.add_line(source_file, source_line, function_id))
482
483        for function in self.function_list:
484            dso_name = self.get_string(function.dso_name_id)
485            if function.vaddr_in_dso:
486                dso = addr2line.get_dso(dso_name)
487                if not dso:
488                    continue
489                sources = addr2line.get_addr_source(dso, function.vaddr_in_dso)
490                if sources:
491                    source_file, source_line, _ = sources[0]
492                    function.source_filename_id = self.get_string_id(source_file)
493                    function.start_line = source_line
494
495    def add_line(self, source_file, source_line, function_id):
496        line = Line()
497        function = self.get_function(function_id)
498        function.source_filename_id = self.get_string_id(source_file)
499        line.function_id = function_id
500        line.line = source_line
501        return line
502
503    def gen_profile_sample(self, sample):
504        profile_sample = self.profile.sample.add()
505        profile_sample.location_id.extend(sample.location_ids)
506        sample_type_count = len(self.sample_types) * 2
507        values = [0] * sample_type_count
508        for sample_type_id in sample.values:
509            values[sample_type_id] = sample.values[sample_type_id]
510        profile_sample.value.extend(values)
511
512    def gen_profile_mapping(self, mapping):
513        profile_mapping = self.profile.mapping.add()
514        profile_mapping.id = mapping.id
515        profile_mapping.memory_start = mapping.memory_start
516        profile_mapping.memory_limit = mapping.memory_limit
517        profile_mapping.file_offset = mapping.file_offset
518        profile_mapping.filename = mapping.filename_id
519        profile_mapping.build_id = mapping.build_id_id
520        profile_mapping.has_filenames = True
521        profile_mapping.has_functions = True
522        if self.config.get('binary_cache_dir'):
523            profile_mapping.has_line_numbers = True
524            profile_mapping.has_inline_frames = True
525        else:
526            profile_mapping.has_line_numbers = False
527            profile_mapping.has_inline_frames = False
528
529    def gen_profile_location(self, location):
530        profile_location = self.profile.location.add()
531        profile_location.id = location.id
532        profile_location.mapping_id = location.mapping_id
533        profile_location.address = location.address
534        for i in range(len(location.lines)):
535            line = profile_location.line.add()
536            line.function_id = location.lines[i].function_id
537            line.line = location.lines[i].line
538
539    def gen_profile_function(self, function):
540        profile_function = self.profile.function.add()
541        profile_function.id = function.id
542        profile_function.name = function.name_id
543        profile_function.system_name = function.name_id
544        profile_function.filename = function.source_filename_id
545        profile_function.start_line = function.start_line
546
547
548def main():
549    parser = argparse.ArgumentParser(description='Generate pprof profile data in pprof.profile.')
550    parser.add_argument('--show', nargs='?', action='append', help='print existing pprof.profile.')
551    parser.add_argument('-i', '--perf_data_path', default='perf.data', help="""
552        The path of profiling data.""")
553    parser.add_argument('-o', '--output_file', default='pprof.profile', help="""
554        The path of generated pprof profile data.""")
555    parser.add_argument('--comm', nargs='+', action='append', help="""
556        Use samples only in threads with selected names.""")
557    parser.add_argument('--pid', nargs='+', action='append', help="""
558        Use samples only in processes with selected process ids.""")
559    parser.add_argument('--tid', nargs='+', action='append', help="""
560        Use samples only in threads with selected thread ids.""")
561    parser.add_argument('--dso', nargs='+', action='append', help="""
562        Use samples only in selected binaries.""")
563    parser.add_argument('--ndk_path', type=extant_dir, help='Set the path of a ndk release.')
564
565    args = parser.parse_args()
566    if args.show:
567        show_file = args.show[0] if args.show[0] else 'pprof.profile'
568        profile = load_pprof_profile(show_file)
569        printer = PprofProfilePrinter(profile)
570        printer.show()
571        return
572
573    config = {}
574    config['perf_data_path'] = args.perf_data_path
575    config['output_file'] = args.output_file
576    config['comm_filters'] = flatten_arg_list(args.comm)
577    config['pid_filters'] = flatten_arg_list(args.pid)
578    config['tid_filters'] = flatten_arg_list(args.tid)
579    config['dso_filters'] = flatten_arg_list(args.dso)
580    config['ndk_path'] = args.ndk_path
581    generator = PprofProfileGenerator(config)
582    profile = generator.gen()
583    store_pprof_profile(config['output_file'], profile)
584
585
586if __name__ == '__main__':
587    main()
588