• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/env python
2
3"""
4CmpRuns - A simple tool for comparing two static analyzer runs to determine
5which reports have been added, removed, or changed.
6
7This is designed to support automated testing using the static analyzer, from
8two perspectives:
9  1. To monitor changes in the static analyzer's reports on real code bases, for
10     regression testing.
11
12  2. For use by end users who want to integrate regular static analyzer testing
13     into a buildbot like environment.
14
15Usage:
16
17    # Load the results of both runs, to obtain lists of the corresponding
18    # AnalysisDiagnostic objects.
19    #
20    # root - the name of the root directory, which will be disregarded when
21    # determining the source file name
22    #
23    resultsA = loadResults(dirA, opts, root, deleteEmpty)
24    resultsB = loadResults(dirB, opts, root, deleteEmpty)
25
26    # Generate a relation from diagnostics in run A to diagnostics in run B
27    # to obtain a list of triples (a, b, confidence).
28    diff = compareResults(resultsA, resultsB)
29
30"""
31
32import os
33import plistlib
34
35#
36class AnalysisDiagnostic:
37    def __init__(self, data, report, htmlReport):
38        self._data = data
39        self._loc = self._data['location']
40        self._report = report
41        self._htmlReport = htmlReport
42
43    def getFileName(self):
44        return self._report.run.getSourceName(self._report.files[self._loc['file']])
45
46    def getLine(self):
47        return self._loc['line']
48
49    def getColumn(self):
50        return self._loc['col']
51
52    def getCategory(self):
53        return self._data['category']
54
55    def getDescription(self):
56        return self._data['description']
57
58    def getIssueIdentifier(self) :
59        id = ''
60        if 'issue_context' in self._data :
61          id += self._data['issue_context'] + ":"
62        if 'issue_hash' in self._data :
63          id += str(self._data['issue_hash']) + ":"
64        return id + ":" + self.getFileName()
65
66    def getReport(self):
67        if self._htmlReport is None:
68            return " "
69        return os.path.join(self._report.run.path, self._htmlReport)
70
71    def getReadableName(self):
72        return '%s:%d:%d, %s: %s' % (self.getFileName(), self.getLine(),
73                                     self.getColumn(), self.getCategory(),
74                                     self.getDescription())
75
76class multidict:
77    def __init__(self, elts=()):
78        self.data = {}
79        for key,value in elts:
80            self[key] = value
81
82    def __getitem__(self, item):
83        return self.data[item]
84    def __setitem__(self, key, value):
85        if key in self.data:
86            self.data[key].append(value)
87        else:
88            self.data[key] = [value]
89    def items(self):
90        return self.data.items()
91    def values(self):
92        return self.data.values()
93    def keys(self):
94        return self.data.keys()
95    def __len__(self):
96        return len(self.data)
97    def get(self, key, default=None):
98        return self.data.get(key, default)
99
100#
101
102class CmpOptions:
103    def __init__(self, verboseLog=None, rootA="", rootB=""):
104        self.rootA = rootA
105        self.rootB = rootB
106        self.verboseLog = verboseLog
107
108class AnalysisReport:
109    def __init__(self, run, files):
110        self.run = run
111        self.files = files
112
113class AnalysisRun:
114    def __init__(self, path, root, opts):
115        self.path = path
116        self.root = root
117        self.reports = []
118        self.diagnostics = []
119        self.opts = opts
120
121    def getSourceName(self, path):
122        if path.startswith(self.root):
123            return path[len(self.root):]
124        return path
125
126def loadResults(path, opts, root = "", deleteEmpty=True):
127    run = AnalysisRun(path, root, opts)
128
129    for f in os.listdir(path):
130        if (not f.startswith('report') or
131            not f.endswith('plist')):
132            continue
133
134        p = os.path.join(path, f)
135        data = plistlib.readPlist(p)
136
137        # Ignore/delete empty reports.
138        if not data['files']:
139            if deleteEmpty == True:
140                os.remove(p)
141            continue
142
143        # Extract the HTML reports, if they exists.
144        if 'HTMLDiagnostics_files' in data['diagnostics'][0]:
145            htmlFiles = []
146            for d in data['diagnostics']:
147                # FIXME: Why is this named files, when does it have multiple
148                # files?
149                assert len(d['HTMLDiagnostics_files']) == 1
150                htmlFiles.append(d.pop('HTMLDiagnostics_files')[0])
151        else:
152            htmlFiles = [None] * len(data['diagnostics'])
153
154        report = AnalysisReport(run, data.pop('files'))
155        diagnostics = [AnalysisDiagnostic(d, report, h)
156                       for d,h in zip(data.pop('diagnostics'),
157                                      htmlFiles)]
158
159        assert not data
160
161        run.reports.append(report)
162        run.diagnostics.extend(diagnostics)
163
164    return run
165
166def cmpAnalysisDiagnostic(d) :
167    return d.getIssueIdentifier()
168
169def compareResults(A, B):
170    """
171    compareResults - Generate a relation from diagnostics in run A to
172    diagnostics in run B.
173
174    The result is the relation as a list of triples (a, b, confidence) where
175    each element {a,b} is None or an element from the respective run, and
176    confidence is a measure of the match quality (where 0 indicates equality,
177    and None is used if either element is None).
178    """
179
180    res = []
181
182    # Quickly eliminate equal elements.
183    neqA = []
184    neqB = []
185    eltsA = list(A.diagnostics)
186    eltsB = list(B.diagnostics)
187    eltsA.sort(key = cmpAnalysisDiagnostic)
188    eltsB.sort(key = cmpAnalysisDiagnostic)
189    while eltsA and eltsB:
190        a = eltsA.pop()
191        b = eltsB.pop()
192        if (a.getIssueIdentifier() == b.getIssueIdentifier()) :
193            res.append((a, b, 0))
194        elif a._data > b._data:
195            neqA.append(a)
196            eltsB.append(b)
197        else:
198            neqB.append(b)
199            eltsA.append(a)
200    neqA.extend(eltsA)
201    neqB.extend(eltsB)
202
203    # FIXME: Add fuzzy matching. One simple and possible effective idea would be
204    # to bin the diagnostics, print them in a normalized form (based solely on
205    # the structure of the diagnostic), compute the diff, then use that as the
206    # basis for matching. This has the nice property that we don't depend in any
207    # way on the diagnostic format.
208
209    for a in neqA:
210        res.append((a, None, None))
211    for b in neqB:
212        res.append((None, b, None))
213
214    return res
215
216def dumpScanBuildResultsDiff(dirA, dirB, opts, deleteEmpty=True):
217    # Load the run results.
218    resultsA = loadResults(dirA, opts, opts.rootA, deleteEmpty)
219    resultsB = loadResults(dirB, opts, opts.rootB, deleteEmpty)
220
221    # Open the verbose log, if given.
222    if opts.verboseLog:
223        auxLog = open(opts.verboseLog, "wb")
224    else:
225        auxLog = None
226
227    diff = compareResults(resultsA, resultsB)
228    foundDiffs = 0
229    for res in diff:
230        a,b,confidence = res
231        if a is None:
232            print "ADDED: %r" % b.getReadableName()
233            foundDiffs += 1
234            if auxLog:
235                print >>auxLog, ("('ADDED', %r, %r)" % (b.getReadableName(),
236                                                        b.getReport()))
237        elif b is None:
238            print "REMOVED: %r" % a.getReadableName()
239            foundDiffs += 1
240            if auxLog:
241                print >>auxLog, ("('REMOVED', %r, %r)" % (a.getReadableName(),
242                                                          a.getReport()))
243        elif confidence:
244            print "CHANGED: %r to %r" % (a.getReadableName(),
245                                         b.getReadableName())
246            foundDiffs += 1
247            if auxLog:
248                print >>auxLog, ("('CHANGED', %r, %r, %r, %r)"
249                                 % (a.getReadableName(),
250                                    b.getReadableName(),
251                                    a.getReport(),
252                                    b.getReport()))
253        else:
254            pass
255
256    TotalReports = len(resultsB.diagnostics)
257    print "TOTAL REPORTS: %r" % TotalReports
258    print "TOTAL DIFFERENCES: %r" % foundDiffs
259    if auxLog:
260        print >>auxLog, "('TOTAL NEW REPORTS', %r)" % TotalReports
261        print >>auxLog, "('TOTAL DIFFERENCES', %r)" % foundDiffs
262
263    return foundDiffs
264
265def main():
266    from optparse import OptionParser
267    parser = OptionParser("usage: %prog [options] [dir A] [dir B]")
268    parser.add_option("", "--rootA", dest="rootA",
269                      help="Prefix to ignore on source files for directory A",
270                      action="store", type=str, default="")
271    parser.add_option("", "--rootB", dest="rootB",
272                      help="Prefix to ignore on source files for directory B",
273                      action="store", type=str, default="")
274    parser.add_option("", "--verbose-log", dest="verboseLog",
275                      help="Write additional information to LOG [default=None]",
276                      action="store", type=str, default=None,
277                      metavar="LOG")
278    (opts, args) = parser.parse_args()
279
280    if len(args) != 2:
281        parser.error("invalid number of arguments")
282
283    dirA,dirB = args
284
285    dumpScanBuildResultsDiff(dirA, dirB, opts)
286
287if __name__ == '__main__':
288    main()
289