#!/usr/bin/python # # fiologparser.py # # This tool lets you parse multiple fio log files and look at interaval # statistics even when samples are non-uniform. For instance: # # fiologparser.py -s *bw* # # to see per-interval sums for all bandwidth logs or: # # fiologparser.py -a *clat* # # to see per-interval average completion latency. import argparse import math def parse_args(): parser = argparse.ArgumentParser() parser.add_argument('-i', '--interval', required=False, type=int, default=1000, help='interval of time in seconds.') parser.add_argument('-d', '--divisor', required=False, type=int, default=1, help='divide the results by this value.') parser.add_argument('-f', '--full', dest='full', action='store_true', default=False, help='print full output.') parser.add_argument('-A', '--all', dest='allstats', action='store_true', default=False, help='print all stats for each interval.') parser.add_argument('-a', '--average', dest='average', action='store_true', default=False, help='print the average for each interval.') parser.add_argument('-s', '--sum', dest='sum', action='store_true', default=False, help='print the sum for each interval.') parser.add_argument("FILE", help="collectl log output files to parse", nargs="+") args = parser.parse_args() return args def get_ftime(series): ftime = 0 for ts in series: if ftime == 0 or ts.last.end < ftime: ftime = ts.last.end return ftime def print_full(ctx, series): ftime = get_ftime(series) start = 0 end = ctx.interval while (start < ftime): end = ftime if ftime < end else end results = [ts.get_value(start, end) for ts in series] print("%s, %s" % (end, ', '.join(["%0.3f" % i for i in results]))) start += ctx.interval end += ctx.interval def print_sums(ctx, series): ftime = get_ftime(series) start = 0 end = ctx.interval while (start < ftime): end = ftime if ftime < end else end results = [ts.get_value(start, end) for ts in series] print("%s, %0.3f" % (end, sum(results))) start += ctx.interval end += ctx.interval def print_averages(ctx, series): ftime = get_ftime(series) start = 0 end = ctx.interval while (start < ftime): end = ftime if ftime < end else end results = [ts.get_value(start, end) for ts in series] print("%s, %0.3f" % (end, float(sum(results))/len(results))) start += ctx.interval end += ctx.interval # FIXME: this routine is computationally inefficient # and has O(N^2) behavior # it would be better to make one pass through samples # to segment them into a series of time intervals, and # then compute stats on each time interval instead. # to debug this routine, use # # sort -n -t ',' -k 2 small.log # on your input. def my_extend( vlist, val ): vlist.extend(val) return vlist array_collapser = lambda vlist, val: my_extend(vlist, val) def print_all_stats(ctx, series): ftime = get_ftime(series) start = 0 end = ctx.interval print('start-time, samples, min, avg, median, 90%, 95%, 99%, max') while (start < ftime): # for each time interval end = ftime if ftime < end else end sample_arrays = [ s.get_samples(start, end) for s in series ] samplevalue_arrays = [] for sample_array in sample_arrays: samplevalue_arrays.append( [ sample.value for sample in sample_array ] ) # collapse list of lists of sample values into list of sample values samplevalues = reduce( array_collapser, samplevalue_arrays, [] ) # compute all stats and print them mymin = min(samplevalues) myavg = sum(samplevalues) / float(len(samplevalues)) mymedian = median(samplevalues) my90th = percentile(samplevalues, 0.90) my95th = percentile(samplevalues, 0.95) my99th = percentile(samplevalues, 0.99) mymax = max(samplevalues) print( '%f, %d, %f, %f, %f, %f, %f, %f, %f' % ( start, len(samplevalues), mymin, myavg, mymedian, my90th, my95th, my99th, mymax)) # advance to next interval start += ctx.interval end += ctx.interval def median(values): s=sorted(values) return float(s[(len(s)-1)/2]+s[(len(s)/2)])/2 def percentile(values, p): s = sorted(values) k = (len(s)-1) * p f = math.floor(k) c = math.ceil(k) if f == c: return s[int(k)] return (s[int(f)] * (c-k)) + (s[int(c)] * (k-f)) def print_default(ctx, series): ftime = get_ftime(series) start = 0 end = ctx.interval averages = [] weights = [] while (start < ftime): end = ftime if ftime < end else end results = [ts.get_value(start, end) for ts in series] averages.append(sum(results)) weights.append(end-start) start += ctx.interval end += ctx.interval total = 0 for i in range(0, len(averages)): total += averages[i]*weights[i] print('%0.3f' % (total/sum(weights))) class TimeSeries(object): def __init__(self, ctx, fn): self.ctx = ctx self.last = None self.samples = [] self.read_data(fn) def read_data(self, fn): f = open(fn, 'r') p_time = 0 for line in f: (time, value, foo, bar) = line.rstrip('\r\n').rsplit(', ') self.add_sample(p_time, int(time), int(value)) p_time = int(time) def add_sample(self, start, end, value): sample = Sample(ctx, start, end, value) if not self.last or self.last.end < end: self.last = sample self.samples.append(sample) def get_samples(self, start, end): sample_list = [] for s in self.samples: if s.start >= start and s.end <= end: sample_list.append(s) return sample_list def get_value(self, start, end): value = 0 for sample in self.samples: value += sample.get_contribution(start, end) return value class Sample(object): def __init__(self, ctx, start, end, value): self.ctx = ctx self.start = start self.end = end self.value = value def get_contribution(self, start, end): # short circuit if not within the bound if (end < self.start or start > self.end): return 0 sbound = self.start if start < self.start else start ebound = self.end if end > self.end else end ratio = float(ebound-sbound) / (end-start) return self.value*ratio/ctx.divisor if __name__ == '__main__': ctx = parse_args() series = [] for fn in ctx.FILE: series.append(TimeSeries(ctx, fn)) if ctx.sum: print_sums(ctx, series) elif ctx.average: print_averages(ctx, series) elif ctx.full: print_full(ctx, series) elif ctx.allstats: print_all_stats(ctx, series) else: print_default(ctx, series)