1#!/usr/bin/env python 2 3""" 4CmpRuns - A simple tool for comparing two static analyzer runs to determine 5which reports have been added, removed, or changed. 6 7This is designed to support automated testing using the static analyzer, from 8two perspectives: 9 1. To monitor changes in the static analyzer's reports on real code bases, for 10 regression testing. 11 12 2. For use by end users who want to integrate regular static analyzer testing 13 into a buildbot like environment. 14 15Usage: 16 17 # Load the results of both runs, to obtain lists of the corresponding 18 # AnalysisDiagnostic objects. 19 # 20 resultsA = loadResultsFromSingleRun(singleRunInfoA, deleteEmpty) 21 resultsB = loadResultsFromSingleRun(singleRunInfoB, deleteEmpty) 22 23 # Generate a relation from diagnostics in run A to diagnostics in run B 24 # to obtain a list of triples (a, b, confidence). 25 diff = compareResults(resultsA, resultsB) 26 27""" 28 29import os 30import plistlib 31import CmpRuns 32 33# Information about analysis run: 34# path - the analysis output directory 35# root - the name of the root directory, which will be disregarded when 36# determining the source file name 37class SingleRunInfo: 38 def __init__(self, path, root="", verboseLog=None): 39 self.path = path 40 self.root = root.rstrip("/\\") 41 self.verboseLog = verboseLog 42 43class AnalysisDiagnostic: 44 def __init__(self, data, report, htmlReport): 45 self._data = data 46 self._loc = self._data['location'] 47 self._report = report 48 self._htmlReport = htmlReport 49 50 def getFileName(self): 51 root = self._report.run.root 52 fileName = self._report.files[self._loc['file']] 53 if fileName.startswith(root) and len(root) > 0: 54 return fileName[len(root)+1:] 55 return fileName 56 57 def getLine(self): 58 return self._loc['line'] 59 60 def getColumn(self): 61 return self._loc['col'] 62 63 def getCategory(self): 64 return self._data['category'] 65 66 def getDescription(self): 67 return self._data['description'] 68 69 def getIssueIdentifier(self) : 70 id = self.getFileName() + "+" 71 if 'issue_context' in self._data : 72 id += self._data['issue_context'] + "+" 73 if 'issue_hash_content_of_line_in_context' in self._data : 74 id += str(self._data['issue_hash_content_of_line_in_context']) 75 return id 76 77 def getReport(self): 78 if self._htmlReport is None: 79 return " " 80 return os.path.join(self._report.run.path, self._htmlReport) 81 82 def getReadableName(self): 83 return '%s:%d:%d, %s: %s' % (self.getFileName(), self.getLine(), 84 self.getColumn(), self.getCategory(), 85 self.getDescription()) 86 87 # Note, the data format is not an API and may change from one analyzer 88 # version to another. 89 def getRawData(self): 90 return self._data 91 92class multidict: 93 def __init__(self, elts=()): 94 self.data = {} 95 for key,value in elts: 96 self[key] = value 97 98 def __getitem__(self, item): 99 return self.data[item] 100 def __setitem__(self, key, value): 101 if key in self.data: 102 self.data[key].append(value) 103 else: 104 self.data[key] = [value] 105 def items(self): 106 return self.data.items() 107 def values(self): 108 return self.data.values() 109 def keys(self): 110 return self.data.keys() 111 def __len__(self): 112 return len(self.data) 113 def get(self, key, default=None): 114 return self.data.get(key, default) 115 116class CmpOptions: 117 def __init__(self, verboseLog=None, rootA="", rootB=""): 118 self.rootA = rootA 119 self.rootB = rootB 120 self.verboseLog = verboseLog 121 122class AnalysisReport: 123 def __init__(self, run, files): 124 self.run = run 125 self.files = files 126 self.diagnostics = [] 127 128class AnalysisRun: 129 def __init__(self, info): 130 self.path = info.path 131 self.root = info.root 132 self.info = info 133 self.reports = [] 134 # Cumulative list of all diagnostics from all the reports. 135 self.diagnostics = [] 136 self.clang_version = None 137 138 def getClangVersion(self): 139 return self.clang_version 140 141 def readSingleFile(self, p, deleteEmpty): 142 data = plistlib.readPlist(p) 143 144 # We want to retrieve the clang version even if there are no 145 # reports. Assume that all reports were created using the same 146 # clang version (this is always true and is more efficient). 147 if 'clang_version' in data: 148 if self.clang_version == None: 149 self.clang_version = data.pop('clang_version') 150 else: 151 data.pop('clang_version') 152 153 # Ignore/delete empty reports. 154 if not data['files']: 155 if deleteEmpty == True: 156 os.remove(p) 157 return 158 159 # Extract the HTML reports, if they exists. 160 if 'HTMLDiagnostics_files' in data['diagnostics'][0]: 161 htmlFiles = [] 162 for d in data['diagnostics']: 163 # FIXME: Why is this named files, when does it have multiple 164 # files? 165 assert len(d['HTMLDiagnostics_files']) == 1 166 htmlFiles.append(d.pop('HTMLDiagnostics_files')[0]) 167 else: 168 htmlFiles = [None] * len(data['diagnostics']) 169 170 report = AnalysisReport(self, data.pop('files')) 171 diagnostics = [AnalysisDiagnostic(d, report, h) 172 for d,h in zip(data.pop('diagnostics'), 173 htmlFiles)] 174 175 assert not data 176 177 report.diagnostics.extend(diagnostics) 178 self.reports.append(report) 179 self.diagnostics.extend(diagnostics) 180 181 182# Backward compatibility API. 183def loadResults(path, opts, root = "", deleteEmpty=True): 184 return loadResultsFromSingleRun(SingleRunInfo(path, root, opts.verboseLog), 185 deleteEmpty) 186 187# Load results of the analyzes from a given output folder. 188# - info is the SingleRunInfo object 189# - deleteEmpty specifies if the empty plist files should be deleted 190def loadResultsFromSingleRun(info, deleteEmpty=True): 191 path = info.path 192 run = AnalysisRun(info) 193 194 if os.path.isfile(path): 195 run.readSingleFile(path, deleteEmpty) 196 else: 197 for (dirpath, dirnames, filenames) in os.walk(path): 198 for f in filenames: 199 if (not f.endswith('plist')): 200 continue 201 p = os.path.join(dirpath, f) 202 run.readSingleFile(p, deleteEmpty) 203 204 return run 205 206def cmpAnalysisDiagnostic(d) : 207 return d.getIssueIdentifier() 208 209def compareResults(A, B): 210 """ 211 compareResults - Generate a relation from diagnostics in run A to 212 diagnostics in run B. 213 214 The result is the relation as a list of triples (a, b, confidence) where 215 each element {a,b} is None or an element from the respective run, and 216 confidence is a measure of the match quality (where 0 indicates equality, 217 and None is used if either element is None). 218 """ 219 220 res = [] 221 222 # Quickly eliminate equal elements. 223 neqA = [] 224 neqB = [] 225 eltsA = list(A.diagnostics) 226 eltsB = list(B.diagnostics) 227 eltsA.sort(key = cmpAnalysisDiagnostic) 228 eltsB.sort(key = cmpAnalysisDiagnostic) 229 while eltsA and eltsB: 230 a = eltsA.pop() 231 b = eltsB.pop() 232 if (a.getIssueIdentifier() == b.getIssueIdentifier()) : 233 res.append((a, b, 0)) 234 elif a.getIssueIdentifier() > b.getIssueIdentifier(): 235 eltsB.append(b) 236 neqA.append(a) 237 else: 238 eltsA.append(a) 239 neqB.append(b) 240 neqA.extend(eltsA) 241 neqB.extend(eltsB) 242 243 # FIXME: Add fuzzy matching. One simple and possible effective idea would be 244 # to bin the diagnostics, print them in a normalized form (based solely on 245 # the structure of the diagnostic), compute the diff, then use that as the 246 # basis for matching. This has the nice property that we don't depend in any 247 # way on the diagnostic format. 248 249 for a in neqA: 250 res.append((a, None, None)) 251 for b in neqB: 252 res.append((None, b, None)) 253 254 return res 255 256def dumpScanBuildResultsDiff(dirA, dirB, opts, deleteEmpty=True): 257 # Load the run results. 258 resultsA = loadResults(dirA, opts, opts.rootA, deleteEmpty) 259 resultsB = loadResults(dirB, opts, opts.rootB, deleteEmpty) 260 261 # Open the verbose log, if given. 262 if opts.verboseLog: 263 auxLog = open(opts.verboseLog, "wb") 264 else: 265 auxLog = None 266 267 diff = compareResults(resultsA, resultsB) 268 foundDiffs = 0 269 for res in diff: 270 a,b,confidence = res 271 if a is None: 272 print "ADDED: %r" % b.getReadableName() 273 foundDiffs += 1 274 if auxLog: 275 print >>auxLog, ("('ADDED', %r, %r)" % (b.getReadableName(), 276 b.getReport())) 277 elif b is None: 278 print "REMOVED: %r" % a.getReadableName() 279 foundDiffs += 1 280 if auxLog: 281 print >>auxLog, ("('REMOVED', %r, %r)" % (a.getReadableName(), 282 a.getReport())) 283 elif confidence: 284 print "CHANGED: %r to %r" % (a.getReadableName(), 285 b.getReadableName()) 286 foundDiffs += 1 287 if auxLog: 288 print >>auxLog, ("('CHANGED', %r, %r, %r, %r)" 289 % (a.getReadableName(), 290 b.getReadableName(), 291 a.getReport(), 292 b.getReport())) 293 else: 294 pass 295 296 TotalReports = len(resultsB.diagnostics) 297 print "TOTAL REPORTS: %r" % TotalReports 298 print "TOTAL DIFFERENCES: %r" % foundDiffs 299 if auxLog: 300 print >>auxLog, "('TOTAL NEW REPORTS', %r)" % TotalReports 301 print >>auxLog, "('TOTAL DIFFERENCES', %r)" % foundDiffs 302 303 return foundDiffs, len(resultsA.diagnostics), len(resultsB.diagnostics) 304 305def main(): 306 from optparse import OptionParser 307 parser = OptionParser("usage: %prog [options] [dir A] [dir B]") 308 parser.add_option("", "--rootA", dest="rootA", 309 help="Prefix to ignore on source files for directory A", 310 action="store", type=str, default="") 311 parser.add_option("", "--rootB", dest="rootB", 312 help="Prefix to ignore on source files for directory B", 313 action="store", type=str, default="") 314 parser.add_option("", "--verbose-log", dest="verboseLog", 315 help="Write additional information to LOG [default=None]", 316 action="store", type=str, default=None, 317 metavar="LOG") 318 (opts, args) = parser.parse_args() 319 320 if len(args) != 2: 321 parser.error("invalid number of arguments") 322 323 dirA,dirB = args 324 325 dumpScanBuildResultsDiff(dirA, dirB, opts) 326 327if __name__ == '__main__': 328 main() 329