1#!/usr/bin/env python3 2# 3# Copyright 2018 - The Android Open Source Project 4# 5# Licensed under the Apache License, Version 2.0 (the "License"); 6# you may not use this file except in compliance with the License. 7# You may obtain a copy of the License at 8# 9# http://www.apache.org/licenses/LICENSE-2.0 10# 11# Unless required by applicable law or agreed to in writing, software 12# distributed under the License is distributed on an "AS IS" BASIS, 13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14# See the License for the specific language governing permissions and 15# limitations under the License. 16 17"""Processes profiling data to output latency numbers.""" 18# 19# Type "python latency.py -h" for help 20# 21# Currently the log data is assumed to be in the following format: 22# PROF:<event-id> <timestamp> 23# The <event-id> and <timestamp> can be in the form of any valid 24# (positive)integer literal in Python 25# Examples: 26# PROF:0x0006 0x00000155e0d043f1 27# PROF:6 1468355593201 28 29import argparse 30from collections import defaultdict 31import csv 32import logging 33import math 34import os 35import string 36import xml.etree.ElementTree as ET 37 38valid_fname_chars = '-_.()%s%s' % (string.ascii_letters, string.digits) 39PERCENTILE_STEP = 1 40PROFILER_DATA_PREFIX = 'PROF:' 41 42 43class EventPair(object): 44 45 def __init__(self, pair_id, latency, name): 46 self.pair_id = pair_id 47 self.latency = latency 48 self.name = name 49 50 51class LatencyEntry(object): 52 53 def __init__(self, start_timestamp, latency): 54 self.start_timestamp = start_timestamp 55 self.latency = latency 56 57 58def parse_xml(xml_file): 59 """ 60 Parse the configuration xml file. 61 62 Returns: 63 event_pairs_by_pair_id: dict mapping event id to event pair object 64 event_pairs_by_start_id: dict mapping starting event to list of event pairs 65 with that starting event. 66 event_pairs_by_end_id: dict mapping ending event to list of event pairs 67 with that ending event. 68 """ 69 root = ET.parse(xml_file).getroot() 70 event_pairs = root.findall('event-pair') 71 event_pairs_by_pair_id = {} 72 event_pairs_by_start_id = defaultdict(list) 73 event_pairs_by_end_id = defaultdict(list) 74 75 for event_pair in event_pairs: 76 start_evt = root.find( 77 "./event[@id='{0:}']".format(event_pair.attrib['start-event'])) 78 end_evt = root.find( 79 "./event[@id='{0:}']".format(event_pair.attrib['end-event'])) 80 start = int(start_evt.attrib['id'], 0) 81 end = int(end_evt.attrib['id'], 0) 82 paird_id = start << 32 | end 83 if paird_id in event_pairs_by_pair_id: 84 logging.error('Latency event repeated: start id = %d, end id = %d', 85 start, 86 end) 87 continue 88 # Create the output file name base by concatenating: 89 # "input file name base" + start event name + "_to_" + end event name 90 evt_pair_name = start_evt.attrib['name'] + '_to_' + end_evt.attrib[ 91 'name'] 92 evt_pair_name = [ 93 c if c in valid_fname_chars else '_' for c in evt_pair_name 94 ] 95 evt_pair_name = ''.join(evt_pair_name) 96 evt_list = EventPair(paird_id, 0, evt_pair_name) 97 event_pairs_by_pair_id[paird_id] = evt_list 98 event_pairs_by_start_id[start].append(evt_list) 99 event_pairs_by_end_id[end].append(evt_list) 100 return (event_pairs_by_pair_id, event_pairs_by_start_id, 101 event_pairs_by_end_id) 102 103 104def percentile_to_index(num_entries, percentile): 105 """ 106 Returns the index in an array corresponding to a percentile. 107 108 Arguments: 109 num_entries: the number of entries in the array. 110 percentile: which percentile to calculate the index for. 111 Returns: 112 ind: the index in the array corresponding to the percentile. 113 """ 114 ind = int(math.floor(float(num_entries) * percentile / 100)) 115 if ind > 0: 116 ind -= 1 117 return ind 118 119 120def compute_latencies(input_file, event_pairs_by_start_id, 121 event_pairs_by_end_id): 122 """Parse the input data file and compute latencies.""" 123 line_num = 0 124 lat_tables_by_pair_id = defaultdict(list) 125 while True: 126 line_num += 1 127 line = input_file.readline() 128 if not line: 129 break 130 data = line.partition(PROFILER_DATA_PREFIX)[2] 131 if not data: 132 continue 133 try: 134 event_id, timestamp = [int(x, 0) for x in data.split()] 135 except ValueError: 136 logging.error('Badly formed event entry at line #%s: %s', line_num, 137 line) 138 continue 139 # We use event_pair.latency to temporarily store the timestamp 140 # of the start event 141 for event_pair in event_pairs_by_start_id[event_id]: 142 event_pair.latency = timestamp 143 for event_pair in event_pairs_by_end_id[event_id]: 144 # compute the latency only if we have seen the corresponding 145 # start event already 146 if event_pair.latency: 147 lat_tables_by_pair_id[event_pair.pair_id].append( 148 LatencyEntry(event_pair.latency, 149 timestamp - event_pair.latency)) 150 event_pair.latency = 0 151 return lat_tables_by_pair_id 152 153 154def write_data(fname_base, event_pairs_by_pair_id, lat_tables_by_pair_id): 155 for event_id, lat_table in lat_tables_by_pair_id.items(): 156 event_pair = event_pairs_by_pair_id[event_id] 157 with open(fname_base + '_' + event_pair.name + '_data.csv', 158 'wb') as out_file: 159 csv_writer = csv.writer(out_file) 160 for dat in lat_table: 161 csv_writer.writerow([dat.start_timestamp, dat.latency]) 162 163 164def write_summary(fname_base, event_pairs_by_pair_id, lat_tables_by_pair_id): 165 summaries = get_summaries(event_pairs_by_pair_id, lat_tables_by_pair_id) 166 for event_id, lat_table in lat_tables_by_pair_id.items(): 167 event_pair = event_pairs_by_pair_id[event_id] 168 summary = summaries[event_pair.name] 169 latencies = summary['latencies'] 170 num_latencies = summary['num_latencies'] 171 with open(fname_base + '_' + event_pair.name + '_summary.txt', 172 'wb') as out_file: 173 csv_writer = csv.writer(out_file) 174 csv_writer.writerow(['Percentile', 'Latency']) 175 # Write percentile table 176 for percentile in range(1, 101): 177 ind = percentile_to_index(num_latencies, percentile) 178 csv_writer.writerow([percentile, latencies[ind]]) 179 180 # Write summary 181 print('\n\nTotal number of samples = {}'.format(num_latencies), 182 file=out_file) 183 print('Min = {}'.format(summary['min_lat']), file=out_file) 184 print('Max = {}'.format(summary['max_lat']), file=out_file) 185 print('Average = {}'.format(summary['average_lat']), file=out_file) 186 print('Median = {}'.format(summary['median_lat']), file=out_file) 187 print('90 %ile = {}'.format(summary['90pctile']), file=out_file) 188 print('95 %ile = {}'.format(summary['95pctile']), file=out_file) 189 190 191def process_latencies(config_xml, input_file): 192 """ 193 End to end function to compute latencies and summaries from input file. 194 Writes latency results to files in current directory. 195 196 Arguments: 197 config_xml: xml file specifying which event pairs to compute latency 198 btwn. 199 input_file: text file containing the timestamped events, like a log file. 200 """ 201 # Parse the event configuration file 202 (event_pairs_by_pair_id, event_pairs_by_start_id, 203 event_pairs_by_end_id) = parse_xml(config_xml) 204 # Compute latencies 205 lat_tables_by_pair_id = compute_latencies(input_file, 206 event_pairs_by_start_id, 207 event_pairs_by_end_id) 208 fname_base = os.path.splitext(os.path.basename(input_file.name))[0] 209 # Write the latency data and summary to respective files 210 write_data(fname_base, event_pairs_by_pair_id, lat_tables_by_pair_id) 211 write_summary(fname_base, event_pairs_by_pair_id, lat_tables_by_pair_id) 212 213 214def get_summaries(event_pairs_by_pair_id, lat_tables_by_pair_id): 215 """ 216 Process significant summaries from a table of latencies. 217 218 Arguments: 219 event_pairs_by_pair_id: dict mapping event id to event pair object 220 lat_tables_by_pair_id: dict mapping event id to latency table 221 Returns: 222 summaries: dict mapping event pair name to significant summary metrics. 223 """ 224 summaries = {} 225 for event_id, lat_table in lat_tables_by_pair_id.items(): 226 event_summary = {} 227 event_pair = event_pairs_by_pair_id[event_id] 228 latencies = [entry.latency for entry in lat_table] 229 latencies.sort() 230 event_summary['latencies'] = latencies 231 event_summary['num_latencies'] = len(latencies) 232 event_summary['min_lat'] = latencies[0] 233 event_summary['max_lat'] = latencies[-1] 234 event_summary['average_lat'] = sum(latencies) / len(latencies) 235 event_summary['median'] = latencies[len(latencies) // 2] 236 event_summary['90pctile'] = latencies[percentile_to_index( 237 len(latencies), 90)] 238 event_summary['95pctile'] = latencies[percentile_to_index( 239 len(latencies), 95)] 240 summaries[event_pair.name] = event_summary 241 return summaries 242 243 244def get_summaries_from_log(input_file_name, config_xml=None): 245 """ 246 End to end function to compute latencies and summaries from input file. 247 Returns a summary dictionary. 248 249 Arguments: 250 input_file_name: text file containing the timestamped events, like a 251 log file. 252 config_xml: xml file specifying which event pairs to compute latency btwn. 253 Returns: 254 summaries: dict mapping event pair name to significant summary metrics. 255 """ 256 config_xml = config_xml or os.path.join(os.path.dirname(__file__), 257 'latency.xml') 258 (event_pairs_by_pair_id, event_pairs_by_start_id, 259 event_pairs_by_end_id) = parse_xml(config_xml) 260 # Compute latencies 261 input_file = open(input_file_name, 'r') 262 lat_tables_by_pair_id = compute_latencies(input_file, 263 event_pairs_by_start_id, 264 event_pairs_by_end_id) 265 return get_summaries(event_pairs_by_pair_id, lat_tables_by_pair_id) 266 267 268if __name__ == '__main__': 269 # Parse command-line arguments 270 parser = argparse.ArgumentParser( 271 description='Processes profiling data to output latency numbers') 272 parser.add_argument( 273 '--events-config', 274 type=argparse.FileType('r'), 275 default=os.path.join(os.path.dirname(__file__), 'latency.xml'), 276 help='The configuration XML file for events.' 277 ' If not specified uses latency.xml from current folder') 278 parser.add_argument( 279 'input', type=argparse.FileType('r'), help='The input log') 280 args = parser.parse_args() 281 process_latencies(args.events_config, args.input) 282