1#!/usr/bin/env python 2 3""" 4CmpRuns - A simple tool for comparing two static analyzer runs to determine 5which reports have been added, removed, or changed. 6 7This is designed to support automated testing using the static analyzer, from 8two perspectives: 9 1. To monitor changes in the static analyzer's reports on real code bases, for 10 regression testing. 11 12 2. For use by end users who want to integrate regular static analyzer testing 13 into a buildbot like environment. 14""" 15 16import os 17import plistlib 18 19# 20 21class multidict: 22 def __init__(self, elts=()): 23 self.data = {} 24 for key,value in elts: 25 self[key] = value 26 27 def __getitem__(self, item): 28 return self.data[item] 29 def __setitem__(self, key, value): 30 if key in self.data: 31 self.data[key].append(value) 32 else: 33 self.data[key] = [value] 34 def items(self): 35 return self.data.items() 36 def values(self): 37 return self.data.values() 38 def keys(self): 39 return self.data.keys() 40 def __len__(self): 41 return len(self.data) 42 def get(self, key, default=None): 43 return self.data.get(key, default) 44 45# 46 47class CmpOptions: 48 def __init__(self, verboseLog=None, root=""): 49 self.root = root 50 self.verboseLog = verboseLog 51 52class AnalysisReport: 53 def __init__(self, run, files): 54 self.run = run 55 self.files = files 56 57class AnalysisDiagnostic: 58 def __init__(self, data, report, htmlReport): 59 self.data = data 60 self.report = report 61 self.htmlReport = htmlReport 62 63 def getReadableName(self): 64 loc = self.data['location'] 65 filename = self.report.run.getSourceName(self.report.files[loc['file']]) 66 line = loc['line'] 67 column = loc['col'] 68 category = self.data['category'] 69 description = self.data['description'] 70 71 # FIXME: Get a report number based on this key, to 'distinguish' 72 # reports, or something. 73 74 return '%s:%d:%d, %s: %s' % (filename, line, column, category, 75 description) 76 77 def getReportData(self): 78 if self.htmlReport is None: 79 return " " 80 return os.path.join(self.report.run.path, self.htmlReport) 81 # We could also dump the report with: 82 # return open(os.path.join(self.report.run.path, 83 # self.htmlReport), "rb").read() 84 85class AnalysisRun: 86 def __init__(self, path, opts): 87 self.path = path 88 self.reports = [] 89 self.diagnostics = [] 90 self.opts = opts 91 92 def getSourceName(self, path): 93 if path.startswith(self.opts.root): 94 return path[len(self.opts.root):] 95 return path 96 97def loadResults(path, opts, deleteEmpty=True): 98 run = AnalysisRun(path, opts) 99 100 for f in os.listdir(path): 101 if (not f.startswith('report') or 102 not f.endswith('plist')): 103 continue 104 105 p = os.path.join(path, f) 106 data = plistlib.readPlist(p) 107 108 # Ignore/delete empty reports. 109 if not data['files']: 110 if deleteEmpty == True: 111 os.remove(p) 112 continue 113 114 # Extract the HTML reports, if they exists. 115 if 'HTMLDiagnostics_files' in data['diagnostics'][0]: 116 htmlFiles = [] 117 for d in data['diagnostics']: 118 # FIXME: Why is this named files, when does it have multiple 119 # files? 120 assert len(d['HTMLDiagnostics_files']) == 1 121 htmlFiles.append(d.pop('HTMLDiagnostics_files')[0]) 122 else: 123 htmlFiles = [None] * len(data['diagnostics']) 124 125 report = AnalysisReport(run, data.pop('files')) 126 diagnostics = [AnalysisDiagnostic(d, report, h) 127 for d,h in zip(data.pop('diagnostics'), 128 htmlFiles)] 129 130 assert not data 131 132 run.reports.append(report) 133 run.diagnostics.extend(diagnostics) 134 135 return run 136 137def compareResults(A, B): 138 """ 139 compareResults - Generate a relation from diagnostics in run A to 140 diagnostics in run B. 141 142 The result is the relation as a list of triples (a, b, confidence) where 143 each element {a,b} is None or an element from the respective run, and 144 confidence is a measure of the match quality (where 0 indicates equality, 145 and None is used if either element is None). 146 """ 147 148 res = [] 149 150 # Quickly eliminate equal elements. 151 neqA = [] 152 neqB = [] 153 eltsA = list(A.diagnostics) 154 eltsB = list(B.diagnostics) 155 eltsA.sort(key = lambda d: d.data) 156 eltsB.sort(key = lambda d: d.data) 157 while eltsA and eltsB: 158 a = eltsA.pop() 159 b = eltsB.pop() 160 if a.data['location'] == b.data['location']: 161 res.append((a, b, 0)) 162 elif a.data > b.data: 163 neqA.append(a) 164 eltsB.append(b) 165 else: 166 neqB.append(b) 167 eltsA.append(a) 168 neqA.extend(eltsA) 169 neqB.extend(eltsB) 170 171 # FIXME: Add fuzzy matching. One simple and possible effective idea would be 172 # to bin the diagnostics, print them in a normalized form (based solely on 173 # the structure of the diagnostic), compute the diff, then use that as the 174 # basis for matching. This has the nice property that we don't depend in any 175 # way on the diagnostic format. 176 177 for a in neqA: 178 res.append((a, None, None)) 179 for b in neqB: 180 res.append((None, b, None)) 181 182 return res 183 184def cmpScanBuildResults(dirA, dirB, opts, deleteEmpty=True): 185 # Load the run results. 186 resultsA = loadResults(dirA, opts, deleteEmpty) 187 resultsB = loadResults(dirB, opts, deleteEmpty) 188 189 # Open the verbose log, if given. 190 if opts.verboseLog: 191 auxLog = open(opts.verboseLog, "wb") 192 else: 193 auxLog = None 194 195 diff = compareResults(resultsA, resultsB) 196 foundDiffs = 0 197 for res in diff: 198 a,b,confidence = res 199 if a is None: 200 print "ADDED: %r" % b.getReadableName() 201 foundDiffs += 1 202 if auxLog: 203 print >>auxLog, ("('ADDED', %r, %r)" % (b.getReadableName(), 204 b.getReportData())) 205 elif b is None: 206 print "REMOVED: %r" % a.getReadableName() 207 foundDiffs += 1 208 if auxLog: 209 print >>auxLog, ("('REMOVED', %r, %r)" % (a.getReadableName(), 210 a.getReportData())) 211 elif confidence: 212 print "CHANGED: %r to %r" % (a.getReadableName(), 213 b.getReadableName()) 214 foundDiffs += 1 215 if auxLog: 216 print >>auxLog, ("('CHANGED', %r, %r, %r, %r)" 217 % (a.getReadableName(), 218 b.getReadableName(), 219 a.getReportData(), 220 b.getReportData())) 221 else: 222 pass 223 224 TotalReports = len(resultsB.diagnostics) 225 print "TOTAL REPORTS: %r" % TotalReports 226 print "TOTAL DIFFERENCES: %r" % foundDiffs 227 if auxLog: 228 print >>auxLog, "('TOTAL NEW REPORTS', %r)" % TotalReports 229 print >>auxLog, "('TOTAL DIFFERENCES', %r)" % foundDiffs 230 231 return foundDiffs 232 233def main(): 234 from optparse import OptionParser 235 parser = OptionParser("usage: %prog [options] [dir A] [dir B]") 236 parser.add_option("", "--root", dest="root", 237 help="Prefix to ignore on source files", 238 action="store", type=str, default="") 239 parser.add_option("", "--verbose-log", dest="verboseLog", 240 help="Write additional information to LOG [default=None]", 241 action="store", type=str, default=None, 242 metavar="LOG") 243 (opts, args) = parser.parse_args() 244 245 if len(args) != 2: 246 parser.error("invalid number of arguments") 247 248 dirA,dirB = args 249 250 cmpScanBuildResults(dirA, dirB, opts) 251 252if __name__ == '__main__': 253 main() 254