• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/env python
2
3"""
4CmpRuns - A simple tool for comparing two static analyzer runs to determine
5which reports have been added, removed, or changed.
6
7This is designed to support automated testing using the static analyzer, from
8two perspectives:
9  1. To monitor changes in the static analyzer's reports on real code bases, for
10     regression testing.
11
12  2. For use by end users who want to integrate regular static analyzer testing
13     into a buildbot like environment.
14
15Usage:
16
17    # Load the results of both runs, to obtain lists of the corresponding
18    # AnalysisDiagnostic objects.
19    #
20    resultsA = loadResultsFromSingleRun(singleRunInfoA, deleteEmpty)
21    resultsB = loadResultsFromSingleRun(singleRunInfoB, deleteEmpty)
22
23    # Generate a relation from diagnostics in run A to diagnostics in run B
24    # to obtain a list of triples (a, b, confidence).
25    diff = compareResults(resultsA, resultsB)
26
27"""
28
29import os
30import plistlib
31import CmpRuns
32
33# Information about analysis run:
34# path - the analysis output directory
35# root - the name of the root directory, which will be disregarded when
36# determining the source file name
37class SingleRunInfo:
38    def __init__(self, path, root="", verboseLog=None):
39        self.path = path
40        self.root = root
41        self.verboseLog = verboseLog
42
43class AnalysisDiagnostic:
44    def __init__(self, data, report, htmlReport):
45        self._data = data
46        self._loc = self._data['location']
47        self._report = report
48        self._htmlReport = htmlReport
49
50    def getFileName(self):
51        root = self._report.run.root
52        fileName = self._report.files[self._loc['file']]
53        if fileName.startswith(root) :
54            return fileName[len(root):]
55        return fileName
56
57    def getLine(self):
58        return self._loc['line']
59
60    def getColumn(self):
61        return self._loc['col']
62
63    def getCategory(self):
64        return self._data['category']
65
66    def getDescription(self):
67        return self._data['description']
68
69    def getIssueIdentifier(self) :
70        id = self.getFileName() + "+"
71        if 'issue_context' in self._data :
72          id += self._data['issue_context'] + "+"
73        if 'issue_hash' in self._data :
74          id += str(self._data['issue_hash'])
75        return id
76
77    def getReport(self):
78        if self._htmlReport is None:
79            return " "
80        return os.path.join(self._report.run.path, self._htmlReport)
81
82    def getReadableName(self):
83        return '%s:%d:%d, %s: %s' % (self.getFileName(), self.getLine(),
84                                     self.getColumn(), self.getCategory(),
85                                     self.getDescription())
86
87    # Note, the data format is not an API and may change from one analyzer
88    # version to another.
89    def getRawData(self):
90        return self._data
91
92class multidict:
93    def __init__(self, elts=()):
94        self.data = {}
95        for key,value in elts:
96            self[key] = value
97
98    def __getitem__(self, item):
99        return self.data[item]
100    def __setitem__(self, key, value):
101        if key in self.data:
102            self.data[key].append(value)
103        else:
104            self.data[key] = [value]
105    def items(self):
106        return self.data.items()
107    def values(self):
108        return self.data.values()
109    def keys(self):
110        return self.data.keys()
111    def __len__(self):
112        return len(self.data)
113    def get(self, key, default=None):
114        return self.data.get(key, default)
115
116class CmpOptions:
117    def __init__(self, verboseLog=None, rootA="", rootB=""):
118        self.rootA = rootA
119        self.rootB = rootB
120        self.verboseLog = verboseLog
121
122class AnalysisReport:
123    def __init__(self, run, files):
124        self.run = run
125        self.files = files
126        self.diagnostics = []
127
128class AnalysisRun:
129    def __init__(self, info):
130        self.path = info.path
131        self.root = info.root
132        self.info = info
133        self.reports = []
134        # Cumulative list of all diagnostics from all the reports.
135        self.diagnostics = []
136        self.clang_version = None
137
138    def getClangVersion(self):
139        return self.clang_version
140
141
142# Backward compatibility API.
143def loadResults(path, opts, root = "", deleteEmpty=True):
144    return loadResultsFromSingleRun(SingleRunInfo(path, root, opts.verboseLog),
145                                    deleteEmpty)
146
147# Load results of the analyzes from a given output folder.
148# - info is the SingleRunInfo object
149# - deleteEmpty specifies if the empty plist files should be deleted
150def loadResultsFromSingleRun(info, deleteEmpty=True):
151    path = info.path
152    run = AnalysisRun(info)
153
154    for (dirpath, dirnames, filenames) in os.walk(path):
155        for f in filenames:
156            if (not f.endswith('plist')):
157                continue
158
159            p = os.path.join(dirpath, f)
160            data = plistlib.readPlist(p)
161
162            # We want to retrieve the clang version even if there are no
163            # reports. Assume that all reports were created using the same
164            # clang version (this is always true and is more efficient).
165            if ('clang_version' in data) :
166                if (run.clang_version == None) :
167                    run.clang_version = data.pop('clang_version')
168                else:
169                    data.pop('clang_version')
170
171            # Ignore/delete empty reports.
172            if not data['files']:
173                if deleteEmpty == True:
174                    os.remove(p)
175                continue
176
177            # Extract the HTML reports, if they exists.
178            if 'HTMLDiagnostics_files' in data['diagnostics'][0]:
179                htmlFiles = []
180                for d in data['diagnostics']:
181                    # FIXME: Why is this named files, when does it have multiple
182                    # files?
183                    assert len(d['HTMLDiagnostics_files']) == 1
184                    htmlFiles.append(d.pop('HTMLDiagnostics_files')[0])
185            else:
186                htmlFiles = [None] * len(data['diagnostics'])
187
188            report = AnalysisReport(run, data.pop('files'))
189            diagnostics = [AnalysisDiagnostic(d, report, h)
190                           for d,h in zip(data.pop('diagnostics'),
191                                          htmlFiles)]
192
193            assert not data
194
195            report.diagnostics.extend(diagnostics)
196            run.reports.append(report)
197            run.diagnostics.extend(diagnostics)
198
199    return run
200
201def cmpAnalysisDiagnostic(d) :
202    return d.getIssueIdentifier()
203
204def compareResults(A, B):
205    """
206    compareResults - Generate a relation from diagnostics in run A to
207    diagnostics in run B.
208
209    The result is the relation as a list of triples (a, b, confidence) where
210    each element {a,b} is None or an element from the respective run, and
211    confidence is a measure of the match quality (where 0 indicates equality,
212    and None is used if either element is None).
213    """
214
215    res = []
216
217    # Quickly eliminate equal elements.
218    neqA = []
219    neqB = []
220    eltsA = list(A.diagnostics)
221    eltsB = list(B.diagnostics)
222    eltsA.sort(key = cmpAnalysisDiagnostic)
223    eltsB.sort(key = cmpAnalysisDiagnostic)
224    while eltsA and eltsB:
225        a = eltsA.pop()
226        b = eltsB.pop()
227        if (a.getIssueIdentifier() == b.getIssueIdentifier()) :
228            res.append((a, b, 0))
229        elif a.getIssueIdentifier() > b.getIssueIdentifier():
230            eltsB.append(b)
231            neqA.append(a)
232        else:
233            eltsA.append(a)
234            neqB.append(b)
235    neqA.extend(eltsA)
236    neqB.extend(eltsB)
237
238    # FIXME: Add fuzzy matching. One simple and possible effective idea would be
239    # to bin the diagnostics, print them in a normalized form (based solely on
240    # the structure of the diagnostic), compute the diff, then use that as the
241    # basis for matching. This has the nice property that we don't depend in any
242    # way on the diagnostic format.
243
244    for a in neqA:
245        res.append((a, None, None))
246    for b in neqB:
247        res.append((None, b, None))
248
249    return res
250
251def dumpScanBuildResultsDiff(dirA, dirB, opts, deleteEmpty=True):
252    # Load the run results.
253    resultsA = loadResults(dirA, opts, opts.rootA, deleteEmpty)
254    resultsB = loadResults(dirB, opts, opts.rootB, deleteEmpty)
255
256    # Open the verbose log, if given.
257    if opts.verboseLog:
258        auxLog = open(opts.verboseLog, "wb")
259    else:
260        auxLog = None
261
262    diff = compareResults(resultsA, resultsB)
263    foundDiffs = 0
264    for res in diff:
265        a,b,confidence = res
266        if a is None:
267            print "ADDED: %r" % b.getReadableName()
268            foundDiffs += 1
269            if auxLog:
270                print >>auxLog, ("('ADDED', %r, %r)" % (b.getReadableName(),
271                                                        b.getReport()))
272        elif b is None:
273            print "REMOVED: %r" % a.getReadableName()
274            foundDiffs += 1
275            if auxLog:
276                print >>auxLog, ("('REMOVED', %r, %r)" % (a.getReadableName(),
277                                                          a.getReport()))
278        elif confidence:
279            print "CHANGED: %r to %r" % (a.getReadableName(),
280                                         b.getReadableName())
281            foundDiffs += 1
282            if auxLog:
283                print >>auxLog, ("('CHANGED', %r, %r, %r, %r)"
284                                 % (a.getReadableName(),
285                                    b.getReadableName(),
286                                    a.getReport(),
287                                    b.getReport()))
288        else:
289            pass
290
291    TotalReports = len(resultsB.diagnostics)
292    print "TOTAL REPORTS: %r" % TotalReports
293    print "TOTAL DIFFERENCES: %r" % foundDiffs
294    if auxLog:
295        print >>auxLog, "('TOTAL NEW REPORTS', %r)" % TotalReports
296        print >>auxLog, "('TOTAL DIFFERENCES', %r)" % foundDiffs
297
298    return foundDiffs
299
300def main():
301    from optparse import OptionParser
302    parser = OptionParser("usage: %prog [options] [dir A] [dir B]")
303    parser.add_option("", "--rootA", dest="rootA",
304                      help="Prefix to ignore on source files for directory A",
305                      action="store", type=str, default="")
306    parser.add_option("", "--rootB", dest="rootB",
307                      help="Prefix to ignore on source files for directory B",
308                      action="store", type=str, default="")
309    parser.add_option("", "--verbose-log", dest="verboseLog",
310                      help="Write additional information to LOG [default=None]",
311                      action="store", type=str, default=None,
312                      metavar="LOG")
313    (opts, args) = parser.parse_args()
314
315    if len(args) != 2:
316        parser.error("invalid number of arguments")
317
318    dirA,dirB = args
319
320    dumpScanBuildResultsDiff(dirA, dirB, opts)
321
322if __name__ == '__main__':
323    main()
324