• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/python
2#
3# fiologparser.py
4#
5# This tool lets you parse multiple fio log files and look at interaval
6# statistics even when samples are non-uniform.  For instance:
7#
8# fiologparser.py -s *bw*
9#
10# to see per-interval sums for all bandwidth logs or:
11#
12# fiologparser.py -a *clat*
13#
14# to see per-interval average completion latency.
15
16import argparse
17import math
18
19def parse_args():
20    parser = argparse.ArgumentParser()
21    parser.add_argument('-i', '--interval', required=False, type=int, default=1000, help='interval of time in seconds.')
22    parser.add_argument('-d', '--divisor', required=False, type=int, default=1, help='divide the results by this value.')
23    parser.add_argument('-f', '--full', dest='full', action='store_true', default=False, help='print full output.')
24    parser.add_argument('-A', '--all', dest='allstats', action='store_true', default=False,
25                        help='print all stats for each interval.')
26    parser.add_argument('-a', '--average', dest='average', action='store_true', default=False, help='print the average for each interval.')
27    parser.add_argument('-s', '--sum', dest='sum', action='store_true', default=False, help='print the sum for each interval.')
28    parser.add_argument("FILE", help="collectl log output files to parse", nargs="+")
29    args = parser.parse_args()
30
31    return args
32
33def get_ftime(series):
34    ftime = 0
35    for ts in series:
36        if ftime == 0 or ts.last.end < ftime:
37            ftime = ts.last.end
38    return ftime
39
40def print_full(ctx, series):
41    ftime = get_ftime(series)
42    start = 0
43    end = ctx.interval
44
45    while (start < ftime):
46        end = ftime if ftime < end else end
47        results = [ts.get_value(start, end) for ts in series]
48        print("%s, %s" % (end, ', '.join(["%0.3f" % i for i in results])))
49        start += ctx.interval
50        end += ctx.interval
51
52def print_sums(ctx, series):
53    ftime = get_ftime(series)
54    start = 0
55    end = ctx.interval
56
57    while (start < ftime):
58        end = ftime if ftime < end else end
59        results = [ts.get_value(start, end) for ts in series]
60        print("%s, %0.3f" % (end, sum(results)))
61        start += ctx.interval
62        end += ctx.interval
63
64def print_averages(ctx, series):
65    ftime = get_ftime(series)
66    start = 0
67    end = ctx.interval
68
69    while (start < ftime):
70        end = ftime if ftime < end else end
71        results = [ts.get_value(start, end) for ts in series]
72        print("%s, %0.3f" % (end, float(sum(results))/len(results)))
73        start += ctx.interval
74        end += ctx.interval
75
76# FIXME: this routine is computationally inefficient
77# and has O(N^2) behavior
78# it would be better to make one pass through samples
79# to segment them into a series of time intervals, and
80# then compute stats on each time interval instead.
81# to debug this routine, use
82#   # sort -n -t ',' -k 2 small.log
83# on your input.
84
85def my_extend( vlist, val ):
86    vlist.extend(val)
87    return vlist
88
89array_collapser = lambda vlist, val:  my_extend(vlist, val)
90
91def print_all_stats(ctx, series):
92    ftime = get_ftime(series)
93    start = 0
94    end = ctx.interval
95    print('start-time, samples, min, avg, median, 90%, 95%, 99%, max')
96    while (start < ftime):  # for each time interval
97        end = ftime if ftime < end else end
98        sample_arrays = [ s.get_samples(start, end) for s in series ]
99        samplevalue_arrays = []
100        for sample_array in sample_arrays:
101            samplevalue_arrays.append(
102                [ sample.value for sample in sample_array ] )
103        # collapse list of lists of sample values into list of sample values
104        samplevalues = reduce( array_collapser, samplevalue_arrays, [] )
105        # compute all stats and print them
106        mymin = min(samplevalues)
107        myavg = sum(samplevalues) / float(len(samplevalues))
108        mymedian = median(samplevalues)
109        my90th = percentile(samplevalues, 0.90)
110        my95th = percentile(samplevalues, 0.95)
111        my99th = percentile(samplevalues, 0.99)
112        mymax = max(samplevalues)
113        print( '%f, %d, %f, %f, %f, %f, %f, %f, %f' % (
114            start, len(samplevalues),
115            mymin, myavg, mymedian, my90th, my95th, my99th, mymax))
116
117        # advance to next interval
118        start += ctx.interval
119        end += ctx.interval
120
121def median(values):
122    s=sorted(values)
123    return float(s[(len(s)-1)/2]+s[(len(s)/2)])/2
124
125def percentile(values, p):
126    s = sorted(values)
127    k = (len(s)-1) * p
128    f = math.floor(k)
129    c = math.ceil(k)
130    if f == c:
131        return s[int(k)]
132    return (s[int(f)] * (c-k)) + (s[int(c)] * (k-f))
133
134def print_default(ctx, series):
135    ftime = get_ftime(series)
136    start = 0
137    end = ctx.interval
138    averages = []
139    weights = []
140
141    while (start < ftime):
142        end = ftime if ftime < end else end
143        results = [ts.get_value(start, end) for ts in series]
144        averages.append(sum(results))
145        weights.append(end-start)
146        start += ctx.interval
147        end += ctx.interval
148
149    total = 0
150    for i in range(0, len(averages)):
151        total += averages[i]*weights[i]
152    print('%0.3f' % (total/sum(weights)))
153
154class TimeSeries(object):
155    def __init__(self, ctx, fn):
156        self.ctx = ctx
157        self.last = None
158        self.samples = []
159        self.read_data(fn)
160
161    def read_data(self, fn):
162        f = open(fn, 'r')
163        p_time = 0
164        for line in f:
165            (time, value, foo, bar) = line.rstrip('\r\n').rsplit(', ')
166            self.add_sample(p_time, int(time), int(value))
167            p_time = int(time)
168
169    def add_sample(self, start, end, value):
170        sample = Sample(ctx, start, end, value)
171        if not self.last or self.last.end < end:
172            self.last = sample
173        self.samples.append(sample)
174
175    def get_samples(self, start, end):
176        sample_list = []
177        for s in self.samples:
178            if s.start >= start and s.end <= end:
179                sample_list.append(s)
180        return sample_list
181
182    def get_value(self, start, end):
183        value = 0
184        for sample in self.samples:
185            value += sample.get_contribution(start, end)
186        return value
187
188class Sample(object):
189    def __init__(self, ctx, start, end, value):
190       self.ctx = ctx
191       self.start = start
192       self.end = end
193       self.value = value
194
195    def get_contribution(self, start, end):
196       # short circuit if not within the bound
197       if (end < self.start or start > self.end):
198           return 0
199
200       sbound = self.start if start < self.start else start
201       ebound = self.end if end > self.end else end
202       ratio = float(ebound-sbound) / (end-start)
203       return self.value*ratio/ctx.divisor
204
205
206if __name__ == '__main__':
207    ctx = parse_args()
208    series = []
209    for fn in ctx.FILE:
210       series.append(TimeSeries(ctx, fn))
211    if ctx.sum:
212        print_sums(ctx, series)
213    elif ctx.average:
214        print_averages(ctx, series)
215    elif ctx.full:
216        print_full(ctx, series)
217    elif ctx.allstats:
218        print_all_stats(ctx, series)
219    else:
220        print_default(ctx, series)
221
222