1#!/usr/bin/env python 2 3""" 4CmpRuns - A simple tool for comparing two static analyzer runs to determine 5which reports have been added, removed, or changed. 6 7This is designed to support automated testing using the static analyzer, from 8two perspectives: 9 1. To monitor changes in the static analyzer's reports on real code bases, for 10 regression testing. 11 12 2. For use by end users who want to integrate regular static analyzer testing 13 into a buildbot like environment. 14 15Usage: 16 17 # Load the results of both runs, to obtain lists of the corresponding 18 # AnalysisDiagnostic objects. 19 # 20 # root - the name of the root directory, which will be disregarded when 21 # determining the source file name 22 # 23 resultsA = loadResults(dirA, opts, root, deleteEmpty) 24 resultsB = loadResults(dirB, opts, root, deleteEmpty) 25 26 # Generate a relation from diagnostics in run A to diagnostics in run B 27 # to obtain a list of triples (a, b, confidence). 28 diff = compareResults(resultsA, resultsB) 29 30""" 31 32import os 33import plistlib 34 35# 36class AnalysisDiagnostic: 37 def __init__(self, data, report, htmlReport): 38 self._data = data 39 self._loc = self._data['location'] 40 self._report = report 41 self._htmlReport = htmlReport 42 43 def getFileName(self): 44 return self._report.run.getSourceName(self._report.files[self._loc['file']]) 45 46 def getLine(self): 47 return self._loc['line'] 48 49 def getColumn(self): 50 return self._loc['col'] 51 52 def getCategory(self): 53 return self._data['category'] 54 55 def getDescription(self): 56 return self._data['description'] 57 58 def getIssueIdentifier(self) : 59 id = '' 60 if 'issue_context' in self._data : 61 id += self._data['issue_context'] + ":" 62 if 'issue_hash' in self._data : 63 id += str(self._data['issue_hash']) + ":" 64 return id + ":" + self.getFileName() 65 66 def getReport(self): 67 if self._htmlReport is None: 68 return " " 69 return os.path.join(self._report.run.path, self._htmlReport) 70 71 def getReadableName(self): 72 return '%s:%d:%d, %s: %s' % (self.getFileName(), self.getLine(), 73 self.getColumn(), self.getCategory(), 74 self.getDescription()) 75 76class multidict: 77 def __init__(self, elts=()): 78 self.data = {} 79 for key,value in elts: 80 self[key] = value 81 82 def __getitem__(self, item): 83 return self.data[item] 84 def __setitem__(self, key, value): 85 if key in self.data: 86 self.data[key].append(value) 87 else: 88 self.data[key] = [value] 89 def items(self): 90 return self.data.items() 91 def values(self): 92 return self.data.values() 93 def keys(self): 94 return self.data.keys() 95 def __len__(self): 96 return len(self.data) 97 def get(self, key, default=None): 98 return self.data.get(key, default) 99 100# 101 102class CmpOptions: 103 def __init__(self, verboseLog=None, rootA="", rootB=""): 104 self.rootA = rootA 105 self.rootB = rootB 106 self.verboseLog = verboseLog 107 108class AnalysisReport: 109 def __init__(self, run, files): 110 self.run = run 111 self.files = files 112 113class AnalysisRun: 114 def __init__(self, path, root, opts): 115 self.path = path 116 self.root = root 117 self.reports = [] 118 self.diagnostics = [] 119 self.opts = opts 120 121 def getSourceName(self, path): 122 if path.startswith(self.root): 123 return path[len(self.root):] 124 return path 125 126def loadResults(path, opts, root = "", deleteEmpty=True): 127 run = AnalysisRun(path, root, opts) 128 129 for f in os.listdir(path): 130 if (not f.startswith('report') or 131 not f.endswith('plist')): 132 continue 133 134 p = os.path.join(path, f) 135 data = plistlib.readPlist(p) 136 137 # Ignore/delete empty reports. 138 if not data['files']: 139 if deleteEmpty == True: 140 os.remove(p) 141 continue 142 143 # Extract the HTML reports, if they exists. 144 if 'HTMLDiagnostics_files' in data['diagnostics'][0]: 145 htmlFiles = [] 146 for d in data['diagnostics']: 147 # FIXME: Why is this named files, when does it have multiple 148 # files? 149 assert len(d['HTMLDiagnostics_files']) == 1 150 htmlFiles.append(d.pop('HTMLDiagnostics_files')[0]) 151 else: 152 htmlFiles = [None] * len(data['diagnostics']) 153 154 report = AnalysisReport(run, data.pop('files')) 155 diagnostics = [AnalysisDiagnostic(d, report, h) 156 for d,h in zip(data.pop('diagnostics'), 157 htmlFiles)] 158 159 assert not data 160 161 run.reports.append(report) 162 run.diagnostics.extend(diagnostics) 163 164 return run 165 166def cmpAnalysisDiagnostic(d) : 167 return d.getIssueIdentifier() 168 169def compareResults(A, B): 170 """ 171 compareResults - Generate a relation from diagnostics in run A to 172 diagnostics in run B. 173 174 The result is the relation as a list of triples (a, b, confidence) where 175 each element {a,b} is None or an element from the respective run, and 176 confidence is a measure of the match quality (where 0 indicates equality, 177 and None is used if either element is None). 178 """ 179 180 res = [] 181 182 # Quickly eliminate equal elements. 183 neqA = [] 184 neqB = [] 185 eltsA = list(A.diagnostics) 186 eltsB = list(B.diagnostics) 187 eltsA.sort(key = cmpAnalysisDiagnostic) 188 eltsB.sort(key = cmpAnalysisDiagnostic) 189 while eltsA and eltsB: 190 a = eltsA.pop() 191 b = eltsB.pop() 192 if (a.getIssueIdentifier() == b.getIssueIdentifier()) : 193 res.append((a, b, 0)) 194 elif a._data > b._data: 195 neqA.append(a) 196 eltsB.append(b) 197 else: 198 neqB.append(b) 199 eltsA.append(a) 200 neqA.extend(eltsA) 201 neqB.extend(eltsB) 202 203 # FIXME: Add fuzzy matching. One simple and possible effective idea would be 204 # to bin the diagnostics, print them in a normalized form (based solely on 205 # the structure of the diagnostic), compute the diff, then use that as the 206 # basis for matching. This has the nice property that we don't depend in any 207 # way on the diagnostic format. 208 209 for a in neqA: 210 res.append((a, None, None)) 211 for b in neqB: 212 res.append((None, b, None)) 213 214 return res 215 216def dumpScanBuildResultsDiff(dirA, dirB, opts, deleteEmpty=True): 217 # Load the run results. 218 resultsA = loadResults(dirA, opts, opts.rootA, deleteEmpty) 219 resultsB = loadResults(dirB, opts, opts.rootB, deleteEmpty) 220 221 # Open the verbose log, if given. 222 if opts.verboseLog: 223 auxLog = open(opts.verboseLog, "wb") 224 else: 225 auxLog = None 226 227 diff = compareResults(resultsA, resultsB) 228 foundDiffs = 0 229 for res in diff: 230 a,b,confidence = res 231 if a is None: 232 print "ADDED: %r" % b.getReadableName() 233 foundDiffs += 1 234 if auxLog: 235 print >>auxLog, ("('ADDED', %r, %r)" % (b.getReadableName(), 236 b.getReport())) 237 elif b is None: 238 print "REMOVED: %r" % a.getReadableName() 239 foundDiffs += 1 240 if auxLog: 241 print >>auxLog, ("('REMOVED', %r, %r)" % (a.getReadableName(), 242 a.getReport())) 243 elif confidence: 244 print "CHANGED: %r to %r" % (a.getReadableName(), 245 b.getReadableName()) 246 foundDiffs += 1 247 if auxLog: 248 print >>auxLog, ("('CHANGED', %r, %r, %r, %r)" 249 % (a.getReadableName(), 250 b.getReadableName(), 251 a.getReport(), 252 b.getReport())) 253 else: 254 pass 255 256 TotalReports = len(resultsB.diagnostics) 257 print "TOTAL REPORTS: %r" % TotalReports 258 print "TOTAL DIFFERENCES: %r" % foundDiffs 259 if auxLog: 260 print >>auxLog, "('TOTAL NEW REPORTS', %r)" % TotalReports 261 print >>auxLog, "('TOTAL DIFFERENCES', %r)" % foundDiffs 262 263 return foundDiffs 264 265def main(): 266 from optparse import OptionParser 267 parser = OptionParser("usage: %prog [options] [dir A] [dir B]") 268 parser.add_option("", "--rootA", dest="rootA", 269 help="Prefix to ignore on source files for directory A", 270 action="store", type=str, default="") 271 parser.add_option("", "--rootB", dest="rootB", 272 help="Prefix to ignore on source files for directory B", 273 action="store", type=str, default="") 274 parser.add_option("", "--verbose-log", dest="verboseLog", 275 help="Write additional information to LOG [default=None]", 276 action="store", type=str, default=None, 277 metavar="LOG") 278 (opts, args) = parser.parse_args() 279 280 if len(args) != 2: 281 parser.error("invalid number of arguments") 282 283 dirA,dirB = args 284 285 dumpScanBuildResultsDiff(dirA, dirB, opts) 286 287if __name__ == '__main__': 288 main() 289