1#!/usr/bin/python2 2# 3# Copyright 2019 Google Inc. 4# 5# Use of this source code is governed by a BSD-style license that can be 6# found in the LICENSE file. 7# 8# Helper script that takes as input 2 CSVs downloaded from perf.skia.org and 9# outputs a CSV with test_name, avg_value1 (from CSV1), avg_value2 (from CSV2), 10# perc_diff between avg_value1 and avg_value2. 11# This script also discards NUM_OUTLIERS_TO_REMOVE min values and 12# NUM_OUTLIERS_TO_REMOVE max values. 13 14 15import csv 16import optparse 17import sys 18import re 19 20 21MISSING_STR = 'N/A' 22NUM_OUTLIERS_TO_REMOVE = 2 23 24 25def read_from_csv(csv_file): 26 test_to_avg = {} 27 with open(csv_file, 'rb') as f: 28 csv_reader = csv.reader(f, delimiter=',') 29 # First row should contain headers. Validate that it does. 30 header_row = csv_reader.next() 31 if header_row[0] != 'id': 32 raise Exception('%s in unexpected format' % csv_file) 33 p = re.compile('^.*,test=(.*),$') 34 for v in csv_reader: 35 # Extract the test name. 36 result = p.search(v[0]) 37 test_name = result.group(1) 38 39 vals = [float(i) for i in v[1:]] 40 vals.sort() 41 # Discard outliers. 42 vals = vals[NUM_OUTLIERS_TO_REMOVE:-NUM_OUTLIERS_TO_REMOVE] 43 # Find the avg val. 44 avg_val = reduce(lambda x, y: x+y, vals) / float(len(vals)) 45 test_to_avg[test_name] = avg_val 46 return test_to_avg 47 48 49def combine_results(d1, d2): 50 test_to_result = {} 51 for test1, v1 in d1.items(): 52 v2 = d2.get(test1, MISSING_STR) 53 perc_diff = MISSING_STR 54 if v2 != MISSING_STR: 55 diff = v2 - v1 56 avg = (v2 + v1)/2 57 perc_diff = 0 if avg == 0 else diff/avg * 100 58 result = { 59 'test_name': test1, 60 'csv1': v1, 61 'csv2': v2, 62 'perc_diff': perc_diff, 63 } 64 test_to_result[test1] = result 65 66 # Also add keys in d2 and not d1. 67 for test2, v2 in d2.items(): 68 if test2 in test_to_result: 69 continue 70 test_to_result[test2] = { 71 'test_name': test2, 72 'csv1': MISSING_STR, 73 'csv2': v2, 74 'perc_diff': MISSING_STR, 75 } 76 77 return test_to_result 78 79 80def write_to_csv(output_dict, output_csv): 81 with open(output_csv, 'w') as f: 82 fieldnames = ['test_name', 'csv1', 'csv2', 'perc_diff'] 83 writer = csv.DictWriter(f, fieldnames=fieldnames) 84 writer.writeheader() 85 tests = output_dict.keys() 86 tests.sort() 87 for test in tests: 88 writer.writerow(output_dict[test]) 89 90 91def parse_and_output(csv1, csv2, output_csv): 92 test_to_avg1 = read_from_csv(csv1) 93 test_to_avg2 = read_from_csv(csv2) 94 output_dict = combine_results(test_to_avg1, test_to_avg2) 95 write_to_csv(output_dict, output_csv) 96 97 98def main(): 99 option_parser = optparse.OptionParser() 100 option_parser.add_option( 101 '', '--csv1', type=str, 102 help='The first CSV to parse.') 103 option_parser.add_option( 104 '', '--csv2', type=str, 105 help='The second CSV to parse.') 106 option_parser.add_option( 107 '', '--output_csv', type=str, 108 help='The file to write the output CSV to.') 109 options, _ = option_parser.parse_args() 110 sys.exit(parse_and_output(options.csv1, options.csv2, options.output_csv)) 111 112 113if __name__ == '__main__': 114 main() 115