1# Copyright 2017 The PDFium Authors. All rights reserved. 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4"""Classes that draw conclusions out of a comparison and represent them.""" 5 6from collections import Counter 7 8FORMAT_RED = '\033[01;31m{0}\033[00m' 9FORMAT_GREEN = '\033[01;32m{0}\033[00m' 10FORMAT_MAGENTA = '\033[01;35m{0}\033[00m' 11FORMAT_CYAN = '\033[01;36m{0}\033[00m' 12FORMAT_NORMAL = '{0}' 13 14RATING_FAILURE = 'failure' 15RATING_REGRESSION = 'regression' 16RATING_IMPROVEMENT = 'improvement' 17RATING_NO_CHANGE = 'no_change' 18RATING_SMALL_CHANGE = 'small_change' 19 20RATINGS = [ 21 RATING_FAILURE, RATING_REGRESSION, RATING_IMPROVEMENT, RATING_NO_CHANGE, 22 RATING_SMALL_CHANGE 23] 24 25RATING_TO_COLOR = { 26 RATING_FAILURE: FORMAT_MAGENTA, 27 RATING_REGRESSION: FORMAT_RED, 28 RATING_IMPROVEMENT: FORMAT_CYAN, 29 RATING_NO_CHANGE: FORMAT_GREEN, 30 RATING_SMALL_CHANGE: FORMAT_NORMAL, 31} 32 33 34class ComparisonConclusions(object): 35 """All conclusions drawn from a comparison. 36 37 This is initialized empty and then processes pairs of results for each test 38 case, determining the rating for that case, which can be: 39 "failure" if either or both runs for the case failed. 40 "regression" if there is a significant increase in time for the test case. 41 "improvement" if there is a significant decrease in time for the test case. 42 "no_change" if the time for the test case did not change at all. 43 "small_change" if the time for the test case changed but within the threshold. 44 """ 45 46 def __init__(self, threshold_significant): 47 """Initializes an empty ComparisonConclusions. 48 49 Args: 50 threshold_significant: Float with the tolerance beyond which changes in 51 measurements are considered significant. 52 53 The change is considered as a multiplication rather than an addition 54 of a fraction of the previous measurement, that is, a 55 threshold_significant of 1.0 will flag test cases that became over 56 100% slower (> 200% of the previous time measured) or over 100% faster 57 (< 50% of the previous time measured). 58 59 threshold_significant 0.02 -> 98.04% to 102% is not significant 60 threshold_significant 0.1 -> 90.9% to 110% is not significant 61 threshold_significant 0.25 -> 80% to 125% is not significant 62 threshold_significant 1 -> 50% to 200% is not significant 63 threshold_significant 4 -> 20% to 500% is not significant 64 65 """ 66 self.threshold_significant = threshold_significant 67 self.threshold_significant_negative = (1 / (1 + threshold_significant)) - 1 68 69 self.params = {'threshold': threshold_significant} 70 self.summary = ComparisonSummary() 71 self.case_results = {} 72 73 def ProcessCase(self, case_name, before, after): 74 """Feeds a test case results to the ComparisonConclusions. 75 76 Args: 77 case_name: String identifying the case. 78 before: Measurement for the "before" version of the code. 79 after: Measurement for the "after" version of the code. 80 """ 81 82 # Switch 0 to None to simplify the json dict output. All zeros are 83 # considered failed runs, so they will be represented by "null". 84 if not before: 85 before = None 86 if not after: 87 after = None 88 89 if not before or not after: 90 ratio = None 91 rating = RATING_FAILURE 92 else: 93 ratio = (float(after) / before) - 1.0 94 if ratio > self.threshold_significant: 95 rating = RATING_REGRESSION 96 elif ratio < self.threshold_significant_negative: 97 rating = RATING_IMPROVEMENT 98 elif ratio == 0: 99 rating = RATING_NO_CHANGE 100 else: 101 rating = RATING_SMALL_CHANGE 102 103 case_result = CaseResult(case_name, before, after, ratio, rating) 104 105 self.summary.ProcessCaseResult(case_result) 106 self.case_results[case_name] = case_result 107 108 def GetSummary(self): 109 """Gets the ComparisonSummary with consolidated totals.""" 110 return self.summary 111 112 def GetCaseResults(self): 113 """Gets a dict mapping each test case identifier to its CaseResult.""" 114 return self.case_results 115 116 def GetOutputDict(self): 117 """Returns a conclusions dict with all the conclusions drawn. 118 119 Returns: 120 A serializable dict with the format illustrated below: 121 { 122 "version": 1, 123 "params": { 124 "threshold": 0.02 125 }, 126 "summary": { 127 "total": 123, 128 "failure": 1, 129 "regression": 2, 130 "improvement": 1, 131 "no_change": 100, 132 "small_change": 19 133 }, 134 "comparison_by_case": { 135 "testing/resources/new_test.pdf": { 136 "before": None, 137 "after": 1000, 138 "ratio": None, 139 "rating": "failure" 140 }, 141 "testing/resources/test1.pdf": { 142 "before": 100, 143 "after": 120, 144 "ratio": 0.2, 145 "rating": "regression" 146 }, 147 "testing/resources/test2.pdf": { 148 "before": 100, 149 "after": 2000, 150 "ratio": 19.0, 151 "rating": "regression" 152 }, 153 "testing/resources/test3.pdf": { 154 "before": 1000, 155 "after": 1005, 156 "ratio": 0.005, 157 "rating": "small_change" 158 }, 159 "testing/resources/test4.pdf": { 160 "before": 1000, 161 "after": 1000, 162 "ratio": 0.0, 163 "rating": "no_change" 164 }, 165 "testing/resources/test5.pdf": { 166 "before": 1000, 167 "after": 600, 168 "ratio": -0.4, 169 "rating": "improvement" 170 } 171 } 172 } 173 """ 174 output_dict = {} 175 output_dict['version'] = 1 176 output_dict['params'] = {'threshold': self.threshold_significant} 177 output_dict['summary'] = self.summary.GetOutputDict() 178 output_dict['comparison_by_case'] = { 179 cr.case_name.decode('utf-8'): cr.GetOutputDict() 180 for cr in self.GetCaseResults().values() 181 } 182 return output_dict 183 184 185class ComparisonSummary(object): 186 """Totals computed for a comparison.""" 187 188 def __init__(self): 189 self.rating_counter = Counter() 190 191 def ProcessCaseResult(self, case_result): 192 self.rating_counter[case_result.rating] += 1 193 194 def GetTotal(self): 195 """Gets the number of test cases processed.""" 196 return sum(self.rating_counter.values()) 197 198 def GetCount(self, rating): 199 """Gets the number of test cases processed with a given rating.""" 200 return self.rating_counter[rating] 201 202 def GetOutputDict(self): 203 """Returns a dict that can be serialized with all the totals.""" 204 result = {'total': self.GetTotal()} 205 for rating in RATINGS: 206 result[rating] = self.GetCount(rating) 207 return result 208 209 210class CaseResult(object): 211 """The conclusion for the comparison of a single test case.""" 212 213 def __init__(self, case_name, before, after, ratio, rating): 214 """Initializes an empty ComparisonConclusions. 215 216 Args: 217 case_name: String identifying the case. 218 before: Measurement for the "before" version of the code. 219 after: Measurement for the "after" version of the code. 220 ratio: Difference between |after| and |before| as a fraction of |before|. 221 rating: Rating for this test case. 222 """ 223 self.case_name = case_name 224 self.before = before 225 self.after = after 226 self.ratio = ratio 227 self.rating = rating 228 229 def GetOutputDict(self): 230 """Returns a dict with the test case's conclusions.""" 231 return { 232 'before': self.before, 233 'after': self.after, 234 'ratio': self.ratio, 235 'rating': self.rating 236 } 237 238 239def PrintConclusionsDictHumanReadable(conclusions_dict, colored, key=None): 240 """Prints a conclusions dict in a human-readable way. 241 242 Args: 243 conclusions_dict: Dict to print. 244 colored: Whether to color the output to highlight significant changes. 245 key: String with the CaseResult dictionary key to sort the cases. 246 """ 247 # Print header 248 print '=' * 80 249 print '{0:>11s} {1:>15s} {2}'.format('% Change', 'Time after', 'Test case') 250 print '-' * 80 251 252 color = FORMAT_NORMAL 253 254 # Print cases 255 if key is not None: 256 case_pairs = sorted( 257 conclusions_dict['comparison_by_case'].iteritems(), 258 key=lambda kv: kv[1][key]) 259 else: 260 case_pairs = sorted(conclusions_dict['comparison_by_case'].iteritems()) 261 262 for case_name, case_dict in case_pairs: 263 if colored: 264 color = RATING_TO_COLOR[case_dict['rating']] 265 266 if case_dict['rating'] == RATING_FAILURE: 267 print u'{} to measure time for {}'.format( 268 color.format('Failed'), case_name).encode('utf-8') 269 continue 270 271 print u'{0} {1:15,d} {2}'.format( 272 color.format('{:+11.4%}'.format(case_dict['ratio'])), 273 case_dict['after'], case_name).encode('utf-8') 274 275 # Print totals 276 totals = conclusions_dict['summary'] 277 print '=' * 80 278 print 'Test cases run: %d' % totals['total'] 279 280 if colored: 281 color = FORMAT_MAGENTA if totals[RATING_FAILURE] else FORMAT_GREEN 282 print('Failed to measure: %s' % color.format(totals[RATING_FAILURE])) 283 284 if colored: 285 color = FORMAT_RED if totals[RATING_REGRESSION] else FORMAT_GREEN 286 print('Regressions: %s' % color.format(totals[RATING_REGRESSION])) 287 288 if colored: 289 color = FORMAT_CYAN if totals[RATING_IMPROVEMENT] else FORMAT_GREEN 290 print('Improvements: %s' % color.format(totals[RATING_IMPROVEMENT])) 291