• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1'''
2Created on May 19, 2011
3
4@author: bungeman
5'''
6
7import re
8import math
9
10# bench representation algorithm constant names
11ALGORITHM_AVERAGE = 'avg'
12ALGORITHM_MEDIAN = 'med'
13ALGORITHM_MINIMUM = 'min'
14ALGORITHM_25TH_PERCENTILE = '25th'
15
16class BenchDataPoint:
17    """A single data point produced by bench.
18
19    (str, str, str, float, {str:str})"""
20    def __init__(self, bench, config, time_type, time, settings):
21        self.bench = bench
22        self.config = config
23        self.time_type = time_type
24        self.time = time
25        self.settings = settings
26
27    def __repr__(self):
28        return "BenchDataPoint(%s, %s, %s, %s, %s)" % (
29                   str(self.bench),
30                   str(self.config),
31                   str(self.time_type),
32                   str(self.time),
33                   str(self.settings),
34               )
35
36class _ExtremeType(object):
37    """Instances of this class compare greater or less than other objects."""
38    def __init__(self, cmpr, rep):
39        object.__init__(self)
40        self._cmpr = cmpr
41        self._rep = rep
42
43    def __cmp__(self, other):
44        if isinstance(other, self.__class__) and other._cmpr == self._cmpr:
45            return 0
46        return self._cmpr
47
48    def __repr__(self):
49        return self._rep
50
51Max = _ExtremeType(1, "Max")
52Min = _ExtremeType(-1, "Min")
53
54class _ListAlgorithm(object):
55    """Algorithm for selecting the representation value from a given list.
56    representation is one of the ALGORITHM_XXX representation types."""
57    def __init__(self, data, representation=None):
58        if not representation:
59            representation = ALGORITHM_AVERAGE  # default algorithm
60        self._data = data
61        self._len = len(data)
62        if representation == ALGORITHM_AVERAGE:
63            self._rep = sum(self._data) / self._len
64        else:
65            self._data.sort()
66            if representation == ALGORITHM_MINIMUM:
67                self._rep = self._data[0]
68            else:
69                # for percentiles, we use the value below which x% of values are
70                # found, which allows for better detection of quantum behaviors.
71                if representation == ALGORITHM_MEDIAN:
72                    x = int(round(0.5 * self._len + 0.5))
73                elif representation == ALGORITHM_25TH_PERCENTILE:
74                    x = int(round(0.25 * self._len + 0.5))
75                else:
76                    raise Exception("invalid representation algorithm %s!" %
77                                    representation)
78                self._rep = self._data[x - 1]
79
80    def compute(self):
81        return self._rep
82
83def _ParseAndStoreTimes(config_re, time_re, line, bench, dic,
84                        representation=None):
85    """Parses given bench time line with regex and adds data to the given dic.
86    config_re: regular expression for parsing the config line.
87    time_re: regular expression for parsing bench time.
88    line: input string line to parse.
89    bench: name of bench for the time values.
90    dic: dictionary to store bench values. See bench_dic in parse() below.
91    representation: should match one of the ALGORITHM_XXX types."""
92
93    for config in re.finditer(config_re, line):
94        current_config = config.group(1)
95        if config_re.startswith('  tile_'):  # per-tile bench, add name prefix
96            current_config = 'tile_' + current_config
97        times = config.group(2)
98        for new_time in re.finditer(time_re, times):
99            current_time_type = new_time.group(1)
100            iters = [float(i) for i in
101                     new_time.group(2).strip().split(',')]
102            dic.setdefault(bench, {}).setdefault(current_config, {}).setdefault(
103                current_time_type, []).append(_ListAlgorithm(
104                    iters, representation).compute())
105
106def parse(settings, lines, representation=None):
107    """Parses bench output into a useful data structure.
108
109    ({str:str}, __iter__ -> str) -> [BenchDataPoint]
110    representation is one of the ALGORITHM_XXX types."""
111
112    benches = []
113    current_bench = None
114    bench_dic = {}  # [bench][config][time_type] -> [list of bench values]
115    setting_re = '([^\s=]+)(?:=(\S+))?'
116    settings_re = 'skia bench:((?:\s+' + setting_re + ')*)'
117    bench_re = 'running bench (?:\[\d+ \d+\] )?\s*(\S+)'
118    time_re = '(?:(\w*)msecs = )?\s*((?:\d+\.\d+)(?:,\d+\.\d+)*)'
119    # non-per-tile benches have configs that don't end with ']'
120    config_re = '(\S+[^\]]): ((?:' + time_re + '\s+)+)'
121    # per-tile bench lines are in the following format. Note that there are
122    # non-averaged bench numbers in separate lines, which we ignore now due to
123    # their inaccuracy.
124    tile_re = ('  tile_(\S+): tile \[\d+,\d+\] out of \[\d+,\d+\] <averaged>:'
125               ' ((?:' + time_re + '\s+)+)')
126
127    for line in lines:
128
129        # see if this line is a settings line
130        settingsMatch = re.search(settings_re, line)
131        if (settingsMatch):
132            settings = dict(settings)
133            for settingMatch in re.finditer(setting_re, settingsMatch.group(1)):
134                if (settingMatch.group(2)):
135                    settings[settingMatch.group(1)] = settingMatch.group(2)
136                else:
137                    settings[settingMatch.group(1)] = True
138
139        # see if this line starts a new bench
140        new_bench = re.search(bench_re, line)
141        if new_bench:
142            current_bench = new_bench.group(1)
143
144        # add configs on this line to the bench_dic
145        if current_bench:
146            for regex in [config_re, tile_re]:
147                 _ParseAndStoreTimes(regex, time_re, line, current_bench,
148                                     bench_dic, representation)
149
150    # append benches to list, use the total time as final bench value.
151    for bench in bench_dic:
152        for config in bench_dic[bench]:
153            for time_type in bench_dic[bench][config]:
154                benches.append(BenchDataPoint(
155                    bench,
156                    config,
157                    time_type,
158                    sum(bench_dic[bench][config][time_type]),
159                    settings))
160
161    return benches
162
163class LinearRegression:
164    """Linear regression data based on a set of data points.
165
166    ([(Number,Number)])
167    There must be at least two points for this to make sense."""
168    def __init__(self, points):
169        n = len(points)
170        max_x = Min
171        min_x = Max
172
173        Sx = 0.0
174        Sy = 0.0
175        Sxx = 0.0
176        Sxy = 0.0
177        Syy = 0.0
178        for point in points:
179            x = point[0]
180            y = point[1]
181            max_x = max(max_x, x)
182            min_x = min(min_x, x)
183
184            Sx += x
185            Sy += y
186            Sxx += x*x
187            Sxy += x*y
188            Syy += y*y
189
190        denom = n*Sxx - Sx*Sx
191        if (denom != 0.0):
192            B = (n*Sxy - Sx*Sy) / denom
193        else:
194            B = 0.0
195        a = (1.0/n)*(Sy - B*Sx)
196
197        se2 = 0
198        sB2 = 0
199        sa2 = 0
200        if (n >= 3 and denom != 0.0):
201            se2 = (1.0/(n*(n-2)) * (n*Syy - Sy*Sy - B*B*denom))
202            sB2 = (n*se2) / denom
203            sa2 = sB2 * (1.0/n) * Sxx
204
205
206        self.slope = B
207        self.intercept = a
208        self.serror = math.sqrt(max(0, se2))
209        self.serror_slope = math.sqrt(max(0, sB2))
210        self.serror_intercept = math.sqrt(max(0, sa2))
211        self.max_x = max_x
212        self.min_x = min_x
213
214    def __repr__(self):
215        return "LinearRegression(%s, %s, %s, %s, %s)" % (
216                   str(self.slope),
217                   str(self.intercept),
218                   str(self.serror),
219                   str(self.serror_slope),
220                   str(self.serror_intercept),
221               )
222
223    def find_min_slope(self):
224        """Finds the minimal slope given one standard deviation."""
225        slope = self.slope
226        intercept = self.intercept
227        error = self.serror
228        regr_start = self.min_x
229        regr_end = self.max_x
230        regr_width = regr_end - regr_start
231
232        if slope < 0:
233            lower_left_y = slope*regr_start + intercept - error
234            upper_right_y = slope*regr_end + intercept + error
235            return min(0, (upper_right_y - lower_left_y) / regr_width)
236
237        elif slope > 0:
238            upper_left_y = slope*regr_start + intercept + error
239            lower_right_y = slope*regr_end + intercept - error
240            return max(0, (lower_right_y - upper_left_y) / regr_width)
241
242        return 0
243
244def CreateRevisionLink(revision_number):
245    """Returns HTML displaying the given revision number and linking to
246    that revision's change page at code.google.com, e.g.
247    http://code.google.com/p/skia/source/detail?r=2056
248    """
249    return '<a href="http://code.google.com/p/skia/source/detail?r=%s">%s</a>'%(
250        revision_number, revision_number)
251
252def main():
253    foo = [[0.0, 0.0], [0.0, 1.0], [0.0, 2.0], [0.0, 3.0]]
254    LinearRegression(foo)
255
256if __name__ == "__main__":
257    main()
258