1#!/usr/bin/env python 2# Copyright (c) 2012 The Chromium Authors. All rights reserved. 3# Use of this source code is governed by a BSD-style license that can be 4# found in the LICENSE file. 5 6# For instructions see: 7# http://www.chromium.org/developers/tree-sheriffs/perf-sheriffs 8 9import hashlib 10import math 11import optparse 12import os 13import re 14import subprocess 15import sys 16import time 17import urllib2 18 19 20try: 21 import json 22except ImportError: 23 import simplejson as json 24 25 26__version__ = '1.0' 27EXPECTATIONS_DIR = os.path.dirname(os.path.abspath(__file__)) 28DEFAULT_CONFIG_FILE = os.path.join(EXPECTATIONS_DIR, 29 'chromium_perf_expectations.cfg') 30DEFAULT_TOLERANCE = 0.05 31USAGE = '' 32 33 34def ReadFile(filename): 35 try: 36 file = open(filename, 'rb') 37 except IOError, e: 38 print >> sys.stderr, ('I/O Error reading file %s(%s): %s' % 39 (filename, e.errno, e.strerror)) 40 raise e 41 contents = file.read() 42 file.close() 43 return contents 44 45 46def ConvertJsonIntoDict(string): 47 """Read a JSON string and convert its contents into a Python datatype.""" 48 if len(string) == 0: 49 print >> sys.stderr, ('Error could not parse empty string') 50 raise Exception('JSON data missing') 51 52 try: 53 jsondata = json.loads(string) 54 except ValueError, e: 55 print >> sys.stderr, ('Error parsing string: "%s"' % string) 56 raise e 57 return jsondata 58 59 60# Floating point representation of last time we fetched a URL. 61last_fetched_at = None 62def FetchUrlContents(url): 63 global last_fetched_at 64 if last_fetched_at and ((time.time() - last_fetched_at) <= 0.5): 65 # Sleep for half a second to avoid overloading the server. 66 time.sleep(0.5) 67 try: 68 last_fetched_at = time.time() 69 connection = urllib2.urlopen(url) 70 except urllib2.HTTPError, e: 71 if e.code == 404: 72 return None 73 raise e 74 text = connection.read().strip() 75 connection.close() 76 return text 77 78 79def GetRowData(data, key): 80 rowdata = [] 81 # reva and revb always come first. 82 for subkey in ['reva', 'revb']: 83 if subkey in data[key]: 84 rowdata.append('"%s": %s' % (subkey, data[key][subkey])) 85 # Strings, like type, come next. 86 for subkey in ['type', 'better']: 87 if subkey in data[key]: 88 rowdata.append('"%s": "%s"' % (subkey, data[key][subkey])) 89 # Finally the main numbers come last. 90 for subkey in ['improve', 'regress', 'tolerance']: 91 if subkey in data[key]: 92 rowdata.append('"%s": %s' % (subkey, data[key][subkey])) 93 return rowdata 94 95 96def GetRowDigest(rowdata, key): 97 sha1 = hashlib.sha1() 98 rowdata = [str(possibly_unicode_string).encode('ascii') 99 for possibly_unicode_string in rowdata] 100 sha1.update(str(rowdata) + key) 101 return sha1.hexdigest()[0:8] 102 103 104def WriteJson(filename, data, keys, calculate_sha1=True): 105 """Write a list of |keys| in |data| to the file specified in |filename|.""" 106 try: 107 file = open(filename, 'wb') 108 except IOError, e: 109 print >> sys.stderr, ('I/O Error writing file %s(%s): %s' % 110 (filename, e.errno, e.strerror)) 111 return False 112 jsondata = [] 113 for key in keys: 114 rowdata = GetRowData(data, key) 115 if calculate_sha1: 116 # Include an updated checksum. 117 rowdata.append('"sha1": "%s"' % GetRowDigest(rowdata, key)) 118 else: 119 if 'sha1' in data[key]: 120 rowdata.append('"sha1": "%s"' % (data[key]['sha1'])) 121 jsondata.append('"%s": {%s}' % (key, ', '.join(rowdata))) 122 jsondata.append('"load": true') 123 jsontext = '{%s\n}' % ',\n '.join(jsondata) 124 file.write(jsontext + '\n') 125 file.close() 126 return True 127 128 129def FloatIsInt(f): 130 epsilon = 1.0e-10 131 return abs(f - int(f)) <= epsilon 132 133 134last_key_printed = None 135def Main(args): 136 def OutputMessage(message, verbose_message=True): 137 global last_key_printed 138 if not options.verbose and verbose_message: 139 return 140 141 if key != last_key_printed: 142 last_key_printed = key 143 print '\n' + key + ':' 144 print ' %s' % message 145 146 parser = optparse.OptionParser(usage=USAGE, version=__version__) 147 parser.add_option('-v', '--verbose', action='store_true', default=False, 148 help='enable verbose output') 149 parser.add_option('-s', '--checksum', action='store_true', 150 help='test if any changes are pending') 151 parser.add_option('-c', '--config', dest='config_file', 152 default=DEFAULT_CONFIG_FILE, 153 help='set the config file to FILE', metavar='FILE') 154 options, args = parser.parse_args(args) 155 156 if options.verbose: 157 print 'Verbose output enabled.' 158 159 config = ConvertJsonIntoDict(ReadFile(options.config_file)) 160 161 # Get the list of summaries for a test. 162 base_url = config['base_url'] 163 # Make the perf expectations file relative to the path of the config file. 164 perf_file = os.path.join( 165 os.path.dirname(options.config_file), config['perf_file']) 166 perf = ConvertJsonIntoDict(ReadFile(perf_file)) 167 168 # Fetch graphs.dat for this combination. 169 perfkeys = perf.keys() 170 # In perf_expectations.json, ignore the 'load' key. 171 perfkeys.remove('load') 172 perfkeys.sort() 173 174 write_new_expectations = False 175 found_checksum_mismatch = False 176 for key in perfkeys: 177 value = perf[key] 178 tolerance = value.get('tolerance', DEFAULT_TOLERANCE) 179 better = value.get('better', None) 180 181 # Verify the checksum. 182 original_checksum = value.get('sha1', '') 183 if 'sha1' in value: 184 del value['sha1'] 185 rowdata = GetRowData(perf, key) 186 computed_checksum = GetRowDigest(rowdata, key) 187 if original_checksum == computed_checksum: 188 OutputMessage('checksum matches, skipping') 189 continue 190 elif options.checksum: 191 found_checksum_mismatch = True 192 continue 193 194 # Skip expectations that are missing a reva or revb. We can't generate 195 # expectations for those. 196 if not(value.has_key('reva') and value.has_key('revb')): 197 OutputMessage('missing revision range, skipping') 198 continue 199 revb = int(value['revb']) 200 reva = int(value['reva']) 201 202 # Ensure that reva is less than revb. 203 if reva > revb: 204 temp = reva 205 reva = revb 206 revb = temp 207 208 # Get the system/test/graph/tracename and reftracename for the current key. 209 matchData = re.match(r'^([^/]+)\/([^/]+)\/([^/]+)\/([^/]+)$', key) 210 if not matchData: 211 OutputMessage('cannot parse key, skipping') 212 continue 213 system = matchData.group(1) 214 test = matchData.group(2) 215 graph = matchData.group(3) 216 tracename = matchData.group(4) 217 reftracename = tracename + '_ref' 218 219 # Create the summary_url and get the json data for that URL. 220 # FetchUrlContents() may sleep to avoid overloading the server with 221 # requests. 222 summary_url = '%s/%s/%s/%s-summary.dat' % (base_url, system, test, graph) 223 summaryjson = FetchUrlContents(summary_url) 224 if not summaryjson: 225 OutputMessage('ERROR: cannot find json data, please verify', 226 verbose_message=False) 227 return 0 228 229 # Set value's type to 'relative' by default. 230 value_type = value.get('type', 'relative') 231 232 summarylist = summaryjson.split('\n') 233 trace_values = {} 234 traces = [tracename] 235 if value_type == 'relative': 236 traces += [reftracename] 237 for trace in traces: 238 trace_values.setdefault(trace, {}) 239 240 # Find the high and low values for each of the traces. 241 scanning = False 242 for line in summarylist: 243 jsondata = ConvertJsonIntoDict(line) 244 if int(jsondata['rev']) <= revb: 245 scanning = True 246 if int(jsondata['rev']) < reva: 247 break 248 249 # We found the upper revision in the range. Scan for trace data until we 250 # find the lower revision in the range. 251 if scanning: 252 for trace in traces: 253 if trace not in jsondata['traces']: 254 OutputMessage('trace %s missing' % trace) 255 continue 256 if type(jsondata['traces'][trace]) != type([]): 257 OutputMessage('trace %s format not recognized' % trace) 258 continue 259 try: 260 tracevalue = float(jsondata['traces'][trace][0]) 261 except ValueError: 262 OutputMessage('trace %s value error: %s' % ( 263 trace, str(jsondata['traces'][trace][0]))) 264 continue 265 266 for bound in ['high', 'low']: 267 trace_values[trace].setdefault(bound, tracevalue) 268 269 trace_values[trace]['high'] = max(trace_values[trace]['high'], 270 tracevalue) 271 trace_values[trace]['low'] = min(trace_values[trace]['low'], 272 tracevalue) 273 274 if 'high' not in trace_values[tracename]: 275 OutputMessage('no suitable traces matched, skipping') 276 continue 277 278 if value_type == 'relative': 279 # Calculate assuming high deltas are regressions and low deltas are 280 # improvements. 281 regress = (float(trace_values[tracename]['high']) - 282 float(trace_values[reftracename]['low'])) 283 improve = (float(trace_values[tracename]['low']) - 284 float(trace_values[reftracename]['high'])) 285 elif value_type == 'absolute': 286 # Calculate assuming high absolutes are regressions and low absolutes are 287 # improvements. 288 regress = float(trace_values[tracename]['high']) 289 improve = float(trace_values[tracename]['low']) 290 291 # So far we've assumed better is lower (regress > improve). If the actual 292 # values for regress and improve are equal, though, and better was not 293 # specified, alert the user so we don't let them create a new file with 294 # ambiguous rules. 295 if better == None and regress == improve: 296 OutputMessage('regress (%s) is equal to improve (%s), and "better" is ' 297 'unspecified, please fix by setting "better": "lower" or ' 298 '"better": "higher" in this perf trace\'s expectation' % ( 299 regress, improve), verbose_message=False) 300 return 1 301 302 # If the existing values assume regressions are low deltas relative to 303 # improvements, swap our regress and improve. This value must be a 304 # scores-like result. 305 if 'regress' in perf[key] and 'improve' in perf[key]: 306 if perf[key]['regress'] < perf[key]['improve']: 307 assert(better != 'lower') 308 better = 'higher' 309 temp = regress 310 regress = improve 311 improve = temp 312 else: 313 # Sometimes values are equal, e.g., when they are both 0, 314 # 'better' may still be set to 'higher'. 315 assert(better != 'higher' or 316 perf[key]['regress'] == perf[key]['improve']) 317 better = 'lower' 318 319 # If both were ints keep as int, otherwise use the float version. 320 originally_ints = False 321 if FloatIsInt(regress) and FloatIsInt(improve): 322 originally_ints = True 323 324 if better == 'higher': 325 if originally_ints: 326 regress = int(math.floor(regress - abs(regress*tolerance))) 327 improve = int(math.ceil(improve + abs(improve*tolerance))) 328 else: 329 regress = regress - abs(regress*tolerance) 330 improve = improve + abs(improve*tolerance) 331 else: 332 if originally_ints: 333 improve = int(math.floor(improve - abs(improve*tolerance))) 334 regress = int(math.ceil(regress + abs(regress*tolerance))) 335 else: 336 improve = improve - abs(improve*tolerance) 337 regress = regress + abs(regress*tolerance) 338 339 # Calculate the new checksum to test if this is the only thing that may have 340 # changed. 341 checksum_rowdata = GetRowData(perf, key) 342 new_checksum = GetRowDigest(checksum_rowdata, key) 343 344 if ('regress' in perf[key] and 'improve' in perf[key] and 345 perf[key]['regress'] == regress and perf[key]['improve'] == improve and 346 original_checksum == new_checksum): 347 OutputMessage('no change') 348 continue 349 350 write_new_expectations = True 351 OutputMessage('traces: %s' % trace_values, verbose_message=False) 352 OutputMessage('before: %s' % perf[key], verbose_message=False) 353 perf[key]['regress'] = regress 354 perf[key]['improve'] = improve 355 OutputMessage('after: %s' % perf[key], verbose_message=False) 356 357 if options.checksum: 358 if found_checksum_mismatch: 359 return 1 360 else: 361 return 0 362 363 if write_new_expectations: 364 print '\nWriting expectations... ', 365 WriteJson(perf_file, perf, perfkeys) 366 print 'done' 367 else: 368 if options.verbose: 369 print '' 370 print 'No changes.' 371 return 0 372 373 374if __name__ == '__main__': 375 sys.exit(Main(sys.argv)) 376