1#!/usr/bin/python 2# 3# fiologparser.py 4# 5# This tool lets you parse multiple fio log files and look at interaval 6# statistics even when samples are non-uniform. For instance: 7# 8# fiologparser.py -s *bw* 9# 10# to see per-interval sums for all bandwidth logs or: 11# 12# fiologparser.py -a *clat* 13# 14# to see per-interval average completion latency. 15 16import argparse 17import math 18 19def parse_args(): 20 parser = argparse.ArgumentParser() 21 parser.add_argument('-i', '--interval', required=False, type=int, default=1000, help='interval of time in seconds.') 22 parser.add_argument('-d', '--divisor', required=False, type=int, default=1, help='divide the results by this value.') 23 parser.add_argument('-f', '--full', dest='full', action='store_true', default=False, help='print full output.') 24 parser.add_argument('-A', '--all', dest='allstats', action='store_true', default=False, 25 help='print all stats for each interval.') 26 parser.add_argument('-a', '--average', dest='average', action='store_true', default=False, help='print the average for each interval.') 27 parser.add_argument('-s', '--sum', dest='sum', action='store_true', default=False, help='print the sum for each interval.') 28 parser.add_argument("FILE", help="collectl log output files to parse", nargs="+") 29 args = parser.parse_args() 30 31 return args 32 33def get_ftime(series): 34 ftime = 0 35 for ts in series: 36 if ftime == 0 or ts.last.end < ftime: 37 ftime = ts.last.end 38 return ftime 39 40def print_full(ctx, series): 41 ftime = get_ftime(series) 42 start = 0 43 end = ctx.interval 44 45 while (start < ftime): 46 end = ftime if ftime < end else end 47 results = [ts.get_value(start, end) for ts in series] 48 print("%s, %s" % (end, ', '.join(["%0.3f" % i for i in results]))) 49 start += ctx.interval 50 end += ctx.interval 51 52def print_sums(ctx, series): 53 ftime = get_ftime(series) 54 start = 0 55 end = ctx.interval 56 57 while (start < ftime): 58 end = ftime if ftime < end else end 59 results = [ts.get_value(start, end) for ts in series] 60 print("%s, %0.3f" % (end, sum(results))) 61 start += ctx.interval 62 end += ctx.interval 63 64def print_averages(ctx, series): 65 ftime = get_ftime(series) 66 start = 0 67 end = ctx.interval 68 69 while (start < ftime): 70 end = ftime if ftime < end else end 71 results = [ts.get_value(start, end) for ts in series] 72 print("%s, %0.3f" % (end, float(sum(results))/len(results))) 73 start += ctx.interval 74 end += ctx.interval 75 76# FIXME: this routine is computationally inefficient 77# and has O(N^2) behavior 78# it would be better to make one pass through samples 79# to segment them into a series of time intervals, and 80# then compute stats on each time interval instead. 81# to debug this routine, use 82# # sort -n -t ',' -k 2 small.log 83# on your input. 84 85def my_extend( vlist, val ): 86 vlist.extend(val) 87 return vlist 88 89array_collapser = lambda vlist, val: my_extend(vlist, val) 90 91def print_all_stats(ctx, series): 92 ftime = get_ftime(series) 93 start = 0 94 end = ctx.interval 95 print('start-time, samples, min, avg, median, 90%, 95%, 99%, max') 96 while (start < ftime): # for each time interval 97 end = ftime if ftime < end else end 98 sample_arrays = [ s.get_samples(start, end) for s in series ] 99 samplevalue_arrays = [] 100 for sample_array in sample_arrays: 101 samplevalue_arrays.append( 102 [ sample.value for sample in sample_array ] ) 103 # collapse list of lists of sample values into list of sample values 104 samplevalues = reduce( array_collapser, samplevalue_arrays, [] ) 105 # compute all stats and print them 106 mymin = min(samplevalues) 107 myavg = sum(samplevalues) / float(len(samplevalues)) 108 mymedian = median(samplevalues) 109 my90th = percentile(samplevalues, 0.90) 110 my95th = percentile(samplevalues, 0.95) 111 my99th = percentile(samplevalues, 0.99) 112 mymax = max(samplevalues) 113 print( '%f, %d, %f, %f, %f, %f, %f, %f, %f' % ( 114 start, len(samplevalues), 115 mymin, myavg, mymedian, my90th, my95th, my99th, mymax)) 116 117 # advance to next interval 118 start += ctx.interval 119 end += ctx.interval 120 121def median(values): 122 s=sorted(values) 123 return float(s[(len(s)-1)/2]+s[(len(s)/2)])/2 124 125def percentile(values, p): 126 s = sorted(values) 127 k = (len(s)-1) * p 128 f = math.floor(k) 129 c = math.ceil(k) 130 if f == c: 131 return s[int(k)] 132 return (s[int(f)] * (c-k)) + (s[int(c)] * (k-f)) 133 134def print_default(ctx, series): 135 ftime = get_ftime(series) 136 start = 0 137 end = ctx.interval 138 averages = [] 139 weights = [] 140 141 while (start < ftime): 142 end = ftime if ftime < end else end 143 results = [ts.get_value(start, end) for ts in series] 144 averages.append(sum(results)) 145 weights.append(end-start) 146 start += ctx.interval 147 end += ctx.interval 148 149 total = 0 150 for i in range(0, len(averages)): 151 total += averages[i]*weights[i] 152 print('%0.3f' % (total/sum(weights))) 153 154class TimeSeries(object): 155 def __init__(self, ctx, fn): 156 self.ctx = ctx 157 self.last = None 158 self.samples = [] 159 self.read_data(fn) 160 161 def read_data(self, fn): 162 f = open(fn, 'r') 163 p_time = 0 164 for line in f: 165 (time, value, foo, bar) = line.rstrip('\r\n').rsplit(', ') 166 self.add_sample(p_time, int(time), int(value)) 167 p_time = int(time) 168 169 def add_sample(self, start, end, value): 170 sample = Sample(ctx, start, end, value) 171 if not self.last or self.last.end < end: 172 self.last = sample 173 self.samples.append(sample) 174 175 def get_samples(self, start, end): 176 sample_list = [] 177 for s in self.samples: 178 if s.start >= start and s.end <= end: 179 sample_list.append(s) 180 return sample_list 181 182 def get_value(self, start, end): 183 value = 0 184 for sample in self.samples: 185 value += sample.get_contribution(start, end) 186 return value 187 188class Sample(object): 189 def __init__(self, ctx, start, end, value): 190 self.ctx = ctx 191 self.start = start 192 self.end = end 193 self.value = value 194 195 def get_contribution(self, start, end): 196 # short circuit if not within the bound 197 if (end < self.start or start > self.end): 198 return 0 199 200 sbound = self.start if start < self.start else start 201 ebound = self.end if end > self.end else end 202 ratio = float(ebound-sbound) / (end-start) 203 return self.value*ratio/ctx.divisor 204 205 206if __name__ == '__main__': 207 ctx = parse_args() 208 series = [] 209 for fn in ctx.FILE: 210 series.append(TimeSeries(ctx, fn)) 211 if ctx.sum: 212 print_sums(ctx, series) 213 elif ctx.average: 214 print_averages(ctx, series) 215 elif ctx.full: 216 print_full(ctx, series) 217 elif ctx.allstats: 218 print_all_stats(ctx, series) 219 else: 220 print_default(ctx, series) 221 222