1# Copyright 2017 The PDFium Authors. All rights reserved. 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4 5"""Classes that draw conclusions out of a comparison and represent them.""" 6 7from collections import Counter 8 9 10FORMAT_RED = '\033[01;31m{0}\033[00m' 11FORMAT_GREEN = '\033[01;32m{0}\033[00m' 12FORMAT_MAGENTA = '\033[01;35m{0}\033[00m' 13FORMAT_CYAN = '\033[01;36m{0}\033[00m' 14FORMAT_NORMAL = '{0}' 15 16RATING_FAILURE = 'failure' 17RATING_REGRESSION = 'regression' 18RATING_IMPROVEMENT = 'improvement' 19RATING_NO_CHANGE = 'no_change' 20RATING_SMALL_CHANGE = 'small_change' 21 22RATINGS = [ 23 RATING_FAILURE, 24 RATING_REGRESSION, 25 RATING_IMPROVEMENT, 26 RATING_NO_CHANGE, 27 RATING_SMALL_CHANGE 28] 29 30RATING_TO_COLOR = { 31 RATING_FAILURE: FORMAT_MAGENTA, 32 RATING_REGRESSION: FORMAT_RED, 33 RATING_IMPROVEMENT: FORMAT_CYAN, 34 RATING_NO_CHANGE: FORMAT_GREEN, 35 RATING_SMALL_CHANGE: FORMAT_NORMAL, 36} 37 38 39class ComparisonConclusions(object): 40 """All conclusions drawn from a comparison. 41 42 This is initialized empty and then processes pairs of results for each test 43 case, determining the rating for that case, which can be: 44 "failure" if either or both runs for the case failed. 45 "regression" if there is a significant increase in time for the test case. 46 "improvement" if there is a significant decrease in time for the test case. 47 "no_change" if the time for the test case did not change at all. 48 "small_change" if the time for the test case changed but within the threshold. 49 """ 50 51 def __init__(self, threshold_significant): 52 """Initializes an empty ComparisonConclusions. 53 54 Args: 55 threshold_significant: Float with the tolerance beyond which changes in 56 measurements are considered significant. 57 58 The change is considered as a multiplication rather than an addition 59 of a fraction of the previous measurement, that is, a 60 threshold_significant of 1.0 will flag test cases that became over 61 100% slower (> 200% of the previous time measured) or over 100% faster 62 (< 50% of the previous time measured). 63 64 threshold_significant 0.02 -> 98.04% to 102% is not significant 65 threshold_significant 0.1 -> 90.9% to 110% is not significant 66 threshold_significant 0.25 -> 80% to 125% is not significant 67 threshold_significant 1 -> 50% to 200% is not significant 68 threshold_significant 4 -> 20% to 500% is not significant 69 70 """ 71 self.threshold_significant = threshold_significant 72 self.threshold_significant_negative = (1 / (1 + threshold_significant)) - 1 73 74 self.params = {'threshold': threshold_significant} 75 self.summary = ComparisonSummary() 76 self.case_results = {} 77 78 def ProcessCase(self, case_name, before, after): 79 """Feeds a test case results to the ComparisonConclusions. 80 81 Args: 82 case_name: String identifying the case. 83 before: Measurement for the "before" version of the code. 84 after: Measurement for the "after" version of the code. 85 """ 86 87 # Switch 0 to None to simplify the json dict output. All zeros are 88 # considered failed runs, so they will be represented by "null". 89 if not before: 90 before = None 91 if not after: 92 after = None 93 94 if not before or not after: 95 ratio = None 96 rating = RATING_FAILURE 97 else: 98 ratio = (float(after) / before) - 1.0 99 if ratio > self.threshold_significant: 100 rating = RATING_REGRESSION 101 elif ratio < self.threshold_significant_negative: 102 rating = RATING_IMPROVEMENT 103 elif ratio == 0: 104 rating = RATING_NO_CHANGE 105 else: 106 rating = RATING_SMALL_CHANGE 107 108 case_result = CaseResult(case_name, before, after, ratio, rating) 109 110 self.summary.ProcessCaseResult(case_result) 111 self.case_results[case_name] = case_result 112 113 def GetSummary(self): 114 """Gets the ComparisonSummary with consolidated totals.""" 115 return self.summary 116 117 def GetCaseResults(self): 118 """Gets a dict mapping each test case identifier to its CaseResult.""" 119 return self.case_results 120 121 def GetOutputDict(self): 122 """Returns a conclusions dict with all the conclusions drawn. 123 124 Returns: 125 A serializable dict with the format illustrated below: 126 { 127 "version": 1, 128 "params": { 129 "threshold": 0.02 130 }, 131 "summary": { 132 "total": 123, 133 "failure": 1, 134 "regression": 2, 135 "improvement": 1, 136 "no_change": 100, 137 "small_change": 19 138 }, 139 "comparison_by_case": { 140 "testing/resources/new_test.pdf": { 141 "before": None, 142 "after": 1000, 143 "ratio": None, 144 "rating": "failure" 145 }, 146 "testing/resources/test1.pdf": { 147 "before": 100, 148 "after": 120, 149 "ratio": 0.2, 150 "rating": "regression" 151 }, 152 "testing/resources/test2.pdf": { 153 "before": 100, 154 "after": 2000, 155 "ratio": 19.0, 156 "rating": "regression" 157 }, 158 "testing/resources/test3.pdf": { 159 "before": 1000, 160 "after": 1005, 161 "ratio": 0.005, 162 "rating": "small_change" 163 }, 164 "testing/resources/test4.pdf": { 165 "before": 1000, 166 "after": 1000, 167 "ratio": 0.0, 168 "rating": "no_change" 169 }, 170 "testing/resources/test5.pdf": { 171 "before": 1000, 172 "after": 600, 173 "ratio": -0.4, 174 "rating": "improvement" 175 } 176 } 177 } 178 """ 179 output_dict = {} 180 output_dict['version'] = 1 181 output_dict['params'] = {'threshold': self.threshold_significant} 182 output_dict['summary'] = self.summary.GetOutputDict() 183 output_dict['comparison_by_case'] = { 184 cr.case_name.decode('utf-8'): cr.GetOutputDict() 185 for cr in self.GetCaseResults().values() 186 } 187 return output_dict 188 189 190class ComparisonSummary(object): 191 """Totals computed for a comparison.""" 192 193 def __init__(self): 194 self.rating_counter = Counter() 195 196 def ProcessCaseResult(self, case_result): 197 self.rating_counter[case_result.rating] += 1 198 199 def GetTotal(self): 200 """Gets the number of test cases processed.""" 201 return sum(self.rating_counter.values()) 202 203 def GetCount(self, rating): 204 """Gets the number of test cases processed with a given rating.""" 205 return self.rating_counter[rating] 206 207 def GetOutputDict(self): 208 """Returns a dict that can be serialized with all the totals.""" 209 result = {'total': self.GetTotal()} 210 for rating in RATINGS: 211 result[rating] = self.GetCount(rating) 212 return result 213 214 215class CaseResult(object): 216 """The conclusion for the comparison of a single test case.""" 217 218 def __init__(self, case_name, before, after, ratio, rating): 219 """Initializes an empty ComparisonConclusions. 220 221 Args: 222 case_name: String identifying the case. 223 before: Measurement for the "before" version of the code. 224 after: Measurement for the "after" version of the code. 225 ratio: Difference between |after| and |before| as a fraction of |before|. 226 rating: Rating for this test case. 227 """ 228 self.case_name = case_name 229 self.before = before 230 self.after = after 231 self.ratio = ratio 232 self.rating = rating 233 234 def GetOutputDict(self): 235 """Returns a dict with the test case's conclusions.""" 236 return {'before': self.before, 237 'after': self.after, 238 'ratio': self.ratio, 239 'rating': self.rating} 240 241 242def PrintConclusionsDictHumanReadable(conclusions_dict, colored, key=None): 243 """Prints a conclusions dict in a human-readable way. 244 245 Args: 246 conclusions_dict: Dict to print. 247 colored: Whether to color the output to highlight significant changes. 248 key: String with the CaseResult dictionary key to sort the cases. 249 """ 250 # Print header 251 print '=' * 80 252 print '{0:>11s} {1:>15s} {2}' .format( 253 '% Change', 254 'Time after', 255 'Test case') 256 print '-' * 80 257 258 color = FORMAT_NORMAL 259 260 # Print cases 261 if key is not None: 262 case_pairs = sorted(conclusions_dict['comparison_by_case'].iteritems(), 263 key=lambda kv: kv[1][key]) 264 else: 265 case_pairs = sorted(conclusions_dict['comparison_by_case'].iteritems()) 266 267 for case_name, case_dict in case_pairs: 268 if colored: 269 color = RATING_TO_COLOR[case_dict['rating']] 270 271 if case_dict['rating'] == RATING_FAILURE: 272 print u'{} to measure time for {}'.format( 273 color.format('Failed'), 274 case_name).encode('utf-8') 275 continue 276 277 print u'{0} {1:15,d} {2}' .format( 278 color.format('{:+11.4%}'.format(case_dict['ratio'])), 279 case_dict['after'], 280 case_name).encode('utf-8') 281 282 # Print totals 283 totals = conclusions_dict['summary'] 284 print '=' * 80 285 print 'Test cases run: %d' % totals['total'] 286 287 if colored: 288 color = FORMAT_MAGENTA if totals[RATING_FAILURE] else FORMAT_GREEN 289 print ('Failed to measure: %s' 290 % color.format(totals[RATING_FAILURE])) 291 292 if colored: 293 color = FORMAT_RED if totals[RATING_REGRESSION] else FORMAT_GREEN 294 print ('Regressions: %s' 295 % color.format(totals[RATING_REGRESSION])) 296 297 if colored: 298 color = FORMAT_CYAN if totals[RATING_IMPROVEMENT] else FORMAT_GREEN 299 print ('Improvements: %s' 300 % color.format(totals[RATING_IMPROVEMENT])) 301