1#!/usr/bin/python 2# Copyright 2013 The Chromium Authors. All rights reserved. 3# Use of this source code is governed by a BSD-style license that can be 4# found in the LICENSE file. 5 6# Use: ../mergetraces.py `ls cyglog.* -Sr` > merged_cyglog 7 8""""Merge multiple logs files from different processes into a single log. 9 10Given two log files of execution traces, merge the traces into a single trace. 11Merging will use timestamps (i.e. the first two columns of logged calls) to 12create a single log that is an ordered trace of calls by both processes. 13""" 14 15import optparse 16import string 17import sys 18 19def ParseLogLines(lines): 20 """Parse log file lines. 21 22 Args: 23 lines: lines from log file produced by profiled run 24 25 Below is an example of a small log file: 26 5086e000-52e92000 r-xp 00000000 b3:02 51276 libchromeview.so 27 secs usecs pid:threadid func 28 START 29 1314897086 795828 3587:1074648168 0x509e105c 30 1314897086 795874 3587:1074648168 0x509e0eb4 31 1314897086 796326 3587:1074648168 0x509e0e3c 32 1314897086 796552 3587:1074648168 0x509e07bc 33 END 34 35 Returns: 36 tuple conisiting of 1) an ordered list of the logged calls, as an array of 37 fields, 2) the virtual start address of the library, used to compute the 38 offset of the symbol in the library and 3) the virtual end address 39 """ 40 call_lines = [] 41 vm_start = 0 42 vm_end = 0 43 dash_index = lines[0].find ('-') 44 space_index = lines[0].find (' ') 45 vm_start = int (lines[0][:dash_index], 16) 46 vm_end = int (lines[0][dash_index+1:space_index], 16) 47 for line in lines[2:]: 48 line = line.strip() 49 # print hex (vm_start) 50 fields = line.split() 51 call_lines.append (fields) 52 53 return (call_lines, vm_start, vm_end) 54 55def HasDuplicates(calls): 56 """Makes sure that calls are only logged once. 57 58 Args: 59 calls: list of calls logged 60 61 Returns: 62 boolean indicating if calls has duplicate calls 63 """ 64 seen = set([]) 65 for call in calls: 66 if call[3] in seen: 67 return True 68 seen.add(call[3]) 69 return False 70 71def CheckTimestamps(calls): 72 """Prints warning to stderr if the call timestamps are not in order. 73 74 Args: 75 calls: list of calls logged 76 """ 77 index = 0 78 last_timestamp_secs = -1 79 last_timestamp_us = -1 80 while (index < len (calls)): 81 timestamp_secs = int (calls[index][0]) 82 timestamp_us = int (calls[index][1]) 83 timestamp = (timestamp_secs * 1000000) + timestamp_us 84 last_timestamp = (last_timestamp_secs * 1000000) + last_timestamp_us 85 if (timestamp < last_timestamp): 86 raise Exception("last_timestamp: " + str(last_timestamp_secs) 87 + " " + str(last_timestamp_us) + " timestamp: " 88 + str(timestamp_secs) + " " + str(timestamp_us) + "\n") 89 last_timestamp_secs = timestamp_secs 90 last_timestamp_us = timestamp_us 91 index = index + 1 92 93def Convert (call_lines, startAddr, endAddr): 94 """Converts the call addresses to static offsets and removes invalid calls. 95 96 Removes profiled calls not in shared library using start and end virtual 97 addresses, converts strings to integer values, coverts virtual addresses to 98 address in shared library. 99 100 Returns: 101 list of calls as tuples (sec, usec, pid:tid, callee) 102 """ 103 converted_calls = [] 104 call_addresses = [] 105 for fields in call_lines: 106 secs = int (fields[0]) 107 usecs = int (fields[1]) 108 callee = int (fields[3], 16) 109 # print ("callee: " + hex (callee) + " start: " + hex (startAddr) + " end: " 110 # + hex (endAddr)) 111 if (callee >= startAddr and callee < endAddr 112 and (not callee in call_addresses)): 113 converted_calls.append((secs, usecs, fields[2], (callee - startAddr))) 114 call_addresses.append(callee) 115 return converted_calls 116 117def Timestamp(trace_entry): 118 return int (trace_entry[0]) * 1000000 + int(trace_entry[1]) 119 120def AddTrace (tracemap, trace): 121 """Adds a trace to the tracemap. 122 123 Adds entries in the trace to the tracemap. All new calls will be added to 124 the tracemap. If the calls already exist in the tracemap then they will be 125 replaced if they happened sooner in the new trace. 126 127 Args: 128 tracemap: the tracemap 129 trace: the trace 130 131 """ 132 for trace_entry in trace: 133 call = trace_entry[3] 134 if (not call in tracemap) or ( 135 Timestamp(tracemap[call]) > Timestamp(trace_entry)): 136 tracemap[call] = trace_entry 137 138def GroupByProcessAndThreadId(input_trace): 139 """Returns an array of traces grouped by pid and tid. 140 141 This is used to make the order of functions not depend on thread scheduling 142 which can be greatly impacted when profiling is done with cygprofile. As a 143 result each thread has its own contiguous segment of code (ordered by 144 timestamp) and processes also have their code isolated (i.e. not interleaved). 145 """ 146 def MakeTimestamp(sec, usec): 147 return sec * 1000000 + usec 148 149 def PidAndTidFromString(pid_and_tid): 150 strings = pid_and_tid.split(':') 151 return (int(strings[0]), int(strings[1])) 152 153 tid_to_pid_map = {} 154 pid_first_seen = {} 155 tid_first_seen = {} 156 157 for (sec, usec, pid_and_tid, _) in input_trace: 158 (pid, tid) = PidAndTidFromString(pid_and_tid) 159 160 # Make sure that thread IDs are unique since this is a property we rely on. 161 if tid_to_pid_map.setdefault(tid, pid) != pid: 162 raise Exception( 163 'Seen PIDs %d and %d for TID=%d. Thread-IDs must be unique' % ( 164 tid_to_pid_map[tid], pid, tid)) 165 166 if not pid in pid_first_seen: 167 pid_first_seen[pid] = MakeTimestamp(sec, usec) 168 if not tid in tid_first_seen: 169 tid_first_seen[tid] = MakeTimestamp(sec, usec) 170 171 def CompareEvents(event1, event2): 172 (sec1, usec1, pid_and_tid, _) = event1 173 (pid1, tid1) = PidAndTidFromString(pid_and_tid) 174 (sec2, usec2, pid_and_tid, _) = event2 175 (pid2, tid2) = PidAndTidFromString(pid_and_tid) 176 177 pid_cmp = cmp(pid_first_seen[pid1], pid_first_seen[pid2]) 178 if pid_cmp != 0: 179 return pid_cmp 180 tid_cmp = cmp(tid_first_seen[tid1], tid_first_seen[tid2]) 181 if tid_cmp != 0: 182 return tid_cmp 183 return cmp(MakeTimestamp(sec1, usec1), MakeTimestamp(sec2, usec2)) 184 185 return sorted(input_trace, cmp=CompareEvents) 186 187def main(): 188 """Merge two traces for code in specified library and write to stdout. 189 190 Merges the two traces and coverts the virtual addresses to the offsets in the 191 library. First line of merged trace has dummy virtual address of 0-ffffffff 192 so that symbolizing the addresses uses the addresses in the log, since the 193 addresses have already been converted to static offsets. 194 """ 195 parser = optparse.OptionParser('usage: %prog trace1 ... traceN') 196 (_, args) = parser.parse_args() 197 if len(args) <= 1: 198 parser.error('expected at least the following args: trace1 trace2') 199 200 step = 0 201 202 # Maps function addresses to their corresponding trace entry. 203 tracemap = dict() 204 205 for trace_file in args: 206 step += 1 207 sys.stderr.write(" " + str(step) + "/" + str(len(args)) + 208 ": " + trace_file + ":\n") 209 210 trace_lines = map(string.rstrip, open(trace_file).readlines()) 211 (trace_calls, trace_start, trace_end) = ParseLogLines(trace_lines) 212 CheckTimestamps(trace_calls) 213 sys.stderr.write("Len: " + str(len(trace_calls)) + 214 ". Start: " + hex(trace_start) + 215 ", end: " + hex(trace_end) + '\n') 216 217 trace_calls = Convert(trace_calls, trace_start, trace_end) 218 sys.stderr.write("Converted len: " + str(len(trace_calls)) + "\n") 219 220 AddTrace(tracemap, trace_calls) 221 sys.stderr.write("Merged len: " + str(len(tracemap)) + "\n") 222 223 # Extract the resulting trace from the tracemap 224 merged_trace = [] 225 for call in tracemap: 226 merged_trace.append(tracemap[call]) 227 merged_trace.sort(key=Timestamp) 228 229 grouped_trace = GroupByProcessAndThreadId(merged_trace) 230 231 print "0-ffffffff r-xp 00000000 xx:00 00000 ./" 232 print "secs\tusecs\tpid:threadid\tfunc" 233 for call in grouped_trace: 234 print (str(call[0]) + "\t" + str(call[1]) + "\t" + call[2] + "\t" + 235 hex(call[3])) 236 237if __name__ == '__main__': 238 main() 239