• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/python
2#
3# Copyright (C) 2017 The Android Open Source Project
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9#      http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16
17# Super simplistic printer of a perfprofd output proto. Illustrates
18# how to parse and traverse a perfprofd output proto in Python.
19
20# This relies on libunwindstack's unwind_symbol. Build with
21#   mmma system/core/libunwindstack
22
23import argparse
24from datetime import datetime
25import itertools
26import json
27
28import logging
29logging.basicConfig(format = "%(message)s")
30
31from multiprocessing.dummy import Pool as ThreadPool
32import os.path
33from sorted_collection import SortedCollection
34import subprocess
35from threading import Timer
36
37
38# Generate with:
39#  aprotoc -I=external/perf_data_converter/src/quipper \
40#      --python_out=system/extras/perfprofd/scripts \
41#      external/perf_data_converter/src/quipper/perf_data.proto
42#  aprotoc -I=external/perf_data_converter/src/quipper -I=system/extras/perfprofd \
43#      --python_out=system/extras/perfprofd/scripts \
44#      system/extras/perfprofd/perfprofd_record.proto
45import perfprofd_record_pb2
46
47# Make sure that symbol is on the PYTHONPATH, e.g., run as
48# PYTHONPATH=$PYTHONPATH:$ANDROID_BUILD_TOP/development/scripts python ...
49import symbol
50from symbol import SymbolInformation
51
52# This is wrong. But then the symbol module is a bad quagmire.
53# TODO: Check build IDs.
54symbol.SetAbi(["ABI: 'arm64'"])
55
56class MmapState(object):
57    def __init__(self):
58        self._list = SortedCollection((), lambda x : x[0])
59
60    def add_map(self, start, length, pgoff, name):
61        tuple = (start, length, pgoff, name)
62        self._list.insert(tuple)
63
64    def find(self, addr):
65        try:
66            tuple = self._list.find_le(addr)
67            if addr < tuple[0] + tuple[1]:
68                return tuple
69            return None
70        except ValueError:
71            return None
72
73    def copy(self):
74        ret = MmapState()
75        ret._list = self._list.copy()
76        return ret
77
78    def __str__(self):
79        return "MmapState: " + self._list.__str__()
80    def __repr__(self):
81        return self.__str__()
82
83class SymbolMap(object):
84    def __init__(self, min_v):
85        self._list = SortedCollection((), lambda x : x[0])
86        self._min_vaddr = min_v
87
88    def add_symbol(self, start, length, name):
89        tuple = (start, length, name)
90        self._list.insert(tuple)
91
92    def find(self, addr):
93        try:
94            tuple = self._list.find_le(addr)
95            if addr < tuple[0] + tuple[1]:
96                return tuple[2]
97            return None
98        except ValueError:
99            return None
100
101    def copy(self):
102        ret = SymbolMap()
103        ret._list = self._list.copy()
104        return ret
105
106    def __str__(self):
107        return "SymbolMap: " + self._list.__str__()
108    def __repr__(self):
109        return self.__str__()
110
111def intern_uni(u):
112    return intern(u.encode('ascii', 'replace'))
113
114def collect_tid_names(perf_data):
115    tid_name_map = {}
116    for event in perf_data.events:
117        if event.HasField('comm_event'):
118            tid_name_map[event.comm_event.tid] = intern_uni(event.comm_event.comm)
119    return tid_name_map
120
121def create_symbol_maps(profile):
122    symbol_maps = {}
123    for si in profile.symbol_info:
124        map = SymbolMap(si.min_vaddr)
125        symbol_maps[si.filename] = map
126        for sym in si.symbols:
127            map.add_symbol(sym.addr, sym.size, intern_uni(sym.name))
128    return symbol_maps
129
130def update_mmap_states(event, state_map):
131    if event.HasField('mmap_event'):
132        mmap_event = event.mmap_event
133        # Skip kernel stuff.
134        if mmap_event.tid == 0:
135            return
136        # Create new map, if necessary.
137        if not mmap_event.pid in state_map:
138            state_map[mmap_event.pid] = MmapState()
139        state_map[mmap_event.pid].add_map(mmap_event.start, mmap_event.len, mmap_event.pgoff,
140            intern_uni(mmap_event.filename))
141    elif event.HasField('fork_event'):
142        fork_event = event.fork_event
143        # Skip threads
144        if fork_event.pid == fork_event.ppid:
145            return
146        if fork_event.ppid not in state_map:
147            logging.warn("fork from %d without map", fork_event.ppid)
148            return
149        state_map[fork_event.pid] = state_map[fork_event.ppid].copy()
150
151skip_dso = set()
152vaddr = {}
153
154def find_vaddr(vaddr_map, filename):
155    if filename in vaddr_map:
156        return vaddr_map[filename]
157
158    path = "%s/%s" % (symbol.SYMBOLS_DIR, filename)
159    if not os.path.isfile(path):
160        logging.warn('Cannot find %s for min_vaddr', filename)
161        vaddr_map[filename] = 0
162        return 0
163
164    try:
165        # Use "-W" to have single-line format.
166        res = subprocess.check_output(['readelf', '-lW', path])
167        lines = res.split("\n")
168        reading_headers = False
169        min_vaddr = None
170        min_fn = lambda x, y: y if x is None else min(x, y)
171        # Using counting loop for access to next line.
172        for i in range(0, len(lines) - 1):
173            line = lines[i].strip()
174            if reading_headers:
175                if line == "":
176                    # Block is done, won't find anything else.
177                    break
178                if line.startswith("LOAD"):
179                    # Look at the current line to distinguish 32-bit from 64-bit
180                    line_split = line.split()
181                    if len(line_split) >= 8:
182                        if " R E " in line:
183                            # Found something expected. So parse VirtAddr.
184                            try:
185                                min_vaddr = min_fn(min_vaddr, int(line_split[2], 0))
186                            except ValueError:
187                                pass
188                    else:
189                        logging.warn('Could not parse readelf line %s', line)
190            else:
191                if line.strip() == "Program Headers:":
192                    reading_headers = True
193
194        if min_vaddr is None:
195            min_vaddr = 0
196        logging.debug("min_vaddr for %s is %d", filename, min_vaddr)
197        vaddr_map[filename] = min_vaddr
198    except subprocess.CalledProcessError:
199        logging.warn('Error finding min_vaddr for %s', filename)
200        vaddr_map[filename] = 0
201    return vaddr_map[filename]
202
203unwind_symbols_cache = {}
204unwind_symbols_warn_missing_cache = set()
205def run_unwind_symbols(filename, offset_hex):
206    path = "%s/%s" % (symbol.SYMBOLS_DIR, filename)
207    if not os.path.isfile(path):
208        if path not in unwind_symbols_warn_missing_cache:
209            logging.warn('Cannot find %s for unwind_symbols', filename)
210            unwind_symbols_warn_missing_cache.add(path)
211        return None
212
213    if (path, offset_hex) in unwind_symbols_cache:
214        pair = unwind_symbols_cache[(path, offset_hex)]
215        if pair is None:
216            return None
217        return [(pair[0], pair[1], filename)]
218
219    try:
220        res = subprocess.check_output(['unwind_symbols', path, offset_hex])
221        lines = res.split("\n")
222        for line in lines:
223            if line.startswith('<0x'):
224                parts = line.split(' ', 1)
225                if len(parts) == 2:
226                    # Get offset, too.
227                    offset = 0
228                    plus_index = parts[0].find('>+')
229                    if plus_index > 0:
230                        offset_str = parts[0][plus_index + 2:-1]
231                        try:
232                            offset = int(offset_str)
233                        except ValueError:
234                            logging.warn('error parsing offset from %s', parts[0])
235
236                    # TODO C++ demangling necessary.
237                    logging.debug('unwind_symbols: %s %s -> %s +%d', filename, offset_hex, parts[1],
238                        offset)
239                    sym = intern(parts[1])
240                    unwind_symbols_cache[(path, offset_hex)] = (sym, offset)
241                    return [(sym, offset, filename)]
242    except subprocess.CalledProcessError:
243        logging.warn('Failed running unwind_symbols for %s', filename)
244        unwind_symbols_cache[(path, offset_hex)] = None
245        return None
246
247
248def decode_with_symbol_lib(name, addr_rel_hex):
249    info = SymbolInformation(name, addr_rel_hex)
250    # As-is, only info[0] (inner-most inlined function) is recognized.
251    (source_symbol, source_location, object_symbol_with_offset) = info[0]
252
253    def parse_symbol_lib_output(s):
254        i = s.rfind('+')
255        if i > 0:
256            try:
257                off = int(s[i+1:])
258                return (s[0:i], off)
259            except ValueError:
260                pass
261        return (s, 0)
262
263    ret = []
264
265    if object_symbol_with_offset is not None:
266        pair = parse_symbol_lib_output(object_symbol_with_offset)
267        ret.append((intern(pair[0]), pair[1], name))
268    if source_symbol is not None:
269        iterinfo = iter(info)
270        next(iterinfo)
271        for (sym_inlined, loc_inlined, _) in iterinfo:
272            # TODO: Figure out what's going on here:
273            if sym_inlined is not None:
274                pair = parse_symbol_lib_output(sym_inlined)
275                ret.insert(0, (intern(pair[0]), pair[1], name))
276    if len(ret) > 0:
277        return ret
278    return None
279
280def decode_addr(addr, mmap_state, device_symbols):
281    """Try to decode the given address against the current mmap table and device symbols.
282
283    First, look up the address in the mmap state. If none is found, use a simple address
284    heuristic to guess kernel frames on 64-bit devices.
285
286    Next, check on-device symbolization for a hit.
287
288    Last, try to symbolize against host information. First try the symbol module. However,
289    as it is based on addr2line, it will not work for pure-gnu_debugdata DSOs (e.g., ART
290    preopt artifacts). For that case, use libunwindstack's unwind_symbols.
291    """
292
293    map = mmap_state.find(addr)
294    if map is None:
295        # If it looks large enough, assume it's from
296        # the kernel.
297        if addr > 18000000000000000000:
298            return [("[kernel]", 0, "[kernel]")]
299        return [("%d (no mapped segment)" % addr, 0, None)]
300    name = map[3]
301    logging.debug('%d is %s (%d +%d)', addr, name, map[0], map[1])
302
303    # Once relocation packer is off, it would be:
304    #   offset = addr - map.start + map.pgoff
305    # Right now it is
306    #   offset = addr - map.start (+ min_vaddr)
307    # Note that on-device symbolization doesn't include min_vaddr but
308    # does include pgoff.
309    offset = addr - map[0]
310
311    if name in device_symbols:
312        offset = offset + map[2]
313        symbol = device_symbols[name].find(offset)
314        if symbol is None:
315            return [("%s (missing on-device symbol)" % (name), offset, name)]
316        else:
317            # TODO: Should we change the format?
318            return [(symbol, 0, name)]
319    offset = offset + find_vaddr(vaddr, name)
320    if (name, offset) in skip_dso:
321        # We already failed, skip symbol finding.
322        return [(name, offset, name)]
323    else:
324        addr_rel_hex = intern("%x" % offset)
325        ret = decode_with_symbol_lib(name, addr_rel_hex)
326        if ret is not None and len(ret) != 0:
327            # Addr2line may report oatexec+xyz. Let unwind_symbols take care of that.
328            if len(ret) != 1 or ret[0][0] != 'oatexec':
329                logging.debug('Got result from symbol module: %s', str(ret))
330                return ret
331        # Try unwind_symbols
332        ret = run_unwind_symbols(name, addr_rel_hex)
333        if ret is not None and len(ret) != 0:
334            return ret
335        logging.warn("Failed to find symbol for %s +%d (%d)", name, offset, addr)
336        # Remember the fail.
337        skip_dso.add((name, offset))
338        return [(name, offset, name)]
339
340
341def print_sample(sample, tid_name_map):
342    if sample[0] in tid_name_map:
343        pid_name = "%s (%d)" % (tid_name_map[sample[0]], sample[0])
344    elif sample[0] == 0:
345        pid_name = "kernel (0)"
346    else:
347        pid_name = "unknown (%d)" % (sample[0])
348    if sample[1] in tid_name_map:
349        tid_name = "%s (%d)" % (tid_name_map[sample[1]], sample[1])
350    elif sample[1] == 0:
351        tid_name = "kernel (0)"
352    else:
353        tid_name = "unknown (%d)" % (sample[1])
354    print " %s - %s:" % (pid_name, tid_name)
355    for sym in sample[2]:
356        print "   %s +%d (%s)" % (sym[0], sym[1], sym[2])
357
358def print_samples(samples, tid_name_map):
359    for sample in samples:
360        print_sample(sample, tid_name_map)
361
362def symbolize_events(perf_data, device_symbols, tid_name_map, printSamples = False,
363        removeKernelTop = False):
364    samples = []
365    mmap_states = {}
366    for event in perf_data.events:
367        update_mmap_states(event, mmap_states)
368        if event.HasField('sample_event'):
369            sample_ev = event.sample_event
370            # Handle sample.
371            new_sample = None
372            if sample_ev.pid in mmap_states:
373                mmap_state = mmap_states[sample_ev.pid]
374                ip_sym = decode_addr(sample_ev.ip, mmap_state, device_symbols)
375                stack = ip_sym
376                for cc_ip in sample_ev.callchain:
377                    cc_sym = decode_addr(cc_ip, mmap_state, device_symbols)
378                    stack.extend(cc_sym)
379                if removeKernelTop:
380                    while len(stack) > 1 and stack[0][0] == "[kernel]":
381                        stack.pop(0)
382                new_sample = (sample_ev.pid, sample_ev.tid, stack)
383            else:
384                # Handle kernel symbols specially.
385                if sample_ev.pid == 0:
386                    samples.append((0, sample_ev.tid, [("[kernel]", 0, "[kernel]")]))
387                elif sample_ev.pid in tid_name_map:
388                    samples.append((sample_ev.pid, sample_ev.tid, [(tid_name_map[sample_ev.pid], 0,
389                        None)]))
390                else:
391                    samples.append((sample_ev.pid, sample_ev.tid, [("[unknown]", 0, None)]))
392            if new_sample is not None:
393                samples.append(new_sample)
394                if printSamples:
395                    print_sample(new_sample, tid_name_map)
396    return samples
397
398def count_key_reduce_function(x, y, key_fn):
399    key = key_fn(y)
400    if key not in x:
401        x[key] = 0
402    x[key] = x[key] + 1
403    return x
404
405def print_histogram(samples, reduce_key_fn, label_key_fn, size):
406    # Create a sorted list of top samples.
407    sorted_count_list = sorted(
408        reduce(lambda x, y: count_key_reduce_function(x, y, reduce_key_fn), samples, {}).
409            iteritems(),
410        cmp=lambda x,y: cmp(x[1], y[1]),
411        reverse=True)
412    sorted_count_topX = list(itertools.islice(sorted_count_list, size))
413
414    # Print top-size samples.
415    print 'Histogram top-%d:' % (size)
416    for i in xrange(0, len(sorted_count_topX)):
417        print '  %d: %s (%s)' % (i+1, label_key_fn(sorted_count_topX[i][0]),
418            sorted_count_topX[i][1])
419
420def get_name(pid):
421    if pid in tid_name_map:
422        return tid_name_map[pid]
423    if pid == 0:
424        return "[kernel]"
425    return "[unknown]"
426
427def create_cmd(args, f):
428    ret = ['python', '-u', 'system/extras/perfprofd/scripts/perf_proto_stack.py']
429    if args.syms is not None:
430        ret.extend(['--syms', args.syms[0]])
431    if args.print_samples is not None:
432        ret.append('--print-samples')
433    if args.skip_kernel_syms is not None:
434        ret.append('--skip-kernel-syms')
435    if args.print_pid_histogram is not None:
436        ret.append('--print-pid-histogram')
437    if args.print_sym_histogram is not None:
438        ret.append('--print-sym-histogram')
439    if args.print_dso_histogram is not None:
440        ret.append('--print-dso-histogram')
441    ret.extend(['--json-out', '%s.json' % (f)])
442    ret.append(f)
443    return ret
444
445def run_cmd(x):
446    args = x[0]
447    f = x[1]
448    cmd = create_cmd(args,f)
449    logging.warn('Running on %s', f)
450    success = False
451    logging.debug('%r', cmd)
452    err_out = open('%s.err' % (f), 'w')
453    kill = lambda process: process.kill()
454    start = datetime.now()
455    p = subprocess.Popen(cmd, stderr=err_out)
456    kill_timer = Timer(3600, kill, [p])
457    try:
458        kill_timer.start()
459        stdout, stderr = p.communicate()
460        success = True
461    finally:
462        kill_timer.cancel()
463    err_out.close()
464    end = datetime.now()
465    logging.warn('Ended %s (%s)', f, str(end-start))
466    return '%s: %r' % (f, success)
467
468def parallel_runner(args):
469    pool = ThreadPool(args.parallel)
470    map_args = map(lambda f: (args, f), args.file)
471    result = pool.map(run_cmd, map_args)
472    pool.close()
473    pool.join()
474    print result
475
476def run(args):
477    if args.syms is not None:
478        symbol.SYMBOLS_DIR = args.syms[0]
479    print_symbols = args.print_samples is not None
480    skip_kernel_syms = args.skip_kernel_syms is not None
481
482    # TODO: accept argument for parsing.
483    file = open(args.file[0], 'rb')
484    data = file.read()
485    file.close()
486
487    profile = perfprofd_record_pb2.PerfprofdRecord()
488    profile.ParseFromString(data)
489
490    perf_data = profile.perf_data
491
492    print "Stats: ", perf_data.stats
493
494    tid_name_map = collect_tid_names(perf_data)
495    symbol_maps = create_symbol_maps(profile)
496
497    samples = symbolize_events(perf_data, symbol_maps, tid_name_map, printSamples=print_symbols,
498        removeKernelTop=skip_kernel_syms)
499
500    if args.print_pid_histogram is not None:
501        print_histogram(samples, lambda x: x[0], lambda x: get_name(x), 25)
502    if args.print_sym_histogram is not None:
503        print_histogram(samples, lambda x: x[2][0][0], lambda x: x, 100)
504    if args.print_dso_histogram is not None:
505        print_histogram(samples, lambda x: x[2][0][2], lambda x: x, 25)
506
507    if args.json_out is not None:
508        json_file = open(args.json_out[0], 'w')
509        json_data = { 'samples': samples, 'names': tid_name_map }
510        json.dump(json_data, json_file)
511        json_file.close()
512
513if __name__ == "__main__":
514    parser = argparse.ArgumentParser(description='Process a perfprofd record.')
515
516    parser.add_argument('file', help='proto file to parse', metavar='file', nargs='+')
517    parser.add_argument('--syms', help='directory for symbols', nargs=1)
518    parser.add_argument('--json-out', help='output file for JSON', nargs=1)
519    parser.add_argument('--print-samples', help='print samples', action='store_const', const=True)
520    parser.add_argument('--skip-kernel-syms', help='skip kernel symbols at the top of stack',
521        action='store_const', const=True)
522    parser.add_argument('--print-pid-histogram', help='print a top-25 histogram of processes',
523        action='store_const', const=True)
524    parser.add_argument('--print-sym-histogram', help='print a top-100 histogram of symbols',
525        action='store_const', const=True)
526    parser.add_argument('--print-dso-histogram', help='print a top-25 histogram of maps',
527        action='store_const', const=True)
528    parser.add_argument('--parallel', help='run parallel jobs', type=int)
529
530    args = parser.parse_args()
531    if args is not None:
532        if args.parallel is not None:
533            parallel_runner(args)
534        else:
535            run(args)
536