• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/env python3
2"""
3    Author: Bryan Gillespie
4
5    A massively parallel gcov wrapper for generating intermediate coverage formats fast
6
7    The goal of fastcov is to generate code coverage intermediate formats as fast as possible
8    (ideally < 1 second), even for large projects with hundreds of gcda objects. The intermediate
9    formats may then be consumed by a report generator such as lcov's genhtml, or a dedicated front
10    end such as coveralls.
11
12    Sample Usage:
13        $ cd build_dir
14        $ ./fastcov.py --zerocounters
15        $ <run unit tests>
16        $ ./fastcov.py --exclude /usr/include test/ --lcov -o report.info
17        $ genhtml -o code_coverage report.info
18"""
19
20import re
21import os
22import sys
23import glob
24import json
25import time
26import argparse
27import threading
28import subprocess
29import multiprocessing
30
31MINIMUM_GCOV = (9,0,0)
32MINIMUM_CHUNK_SIZE = 5
33
34# Interesting metrics
35START_TIME = time.time()
36GCOVS_TOTAL = []
37GCOVS_SKIPPED = []
38
39def chunks(l, n):
40    """Yield successive n-sized chunks from l."""
41    for i in range(0, len(l), n):
42        yield l[i:i + n]
43
44def stopwatch():
45    """Return number of seconds since last time this was called"""
46    global START_TIME
47    end_time   = time.time()
48    delta      = end_time - START_TIME
49    START_TIME = end_time
50    return delta
51
52def parseVersionFromLine(version_str):
53    """Given a string containing a dotted integer version, parse out integers and return as tuple"""
54    version = re.search(r'(\d+\.\d+\.\d+)[^\.]', version_str)
55
56    if not version:
57        return (0,0,0)
58
59    return tuple(map(int, version.group(1).split(".")))
60
61def getGcovVersion(gcov):
62    p = subprocess.Popen([gcov, "-v"], stdout=subprocess.PIPE)
63    output = p.communicate()[0].decode('UTF-8')
64    p.wait()
65    return parseVersionFromLine(output.split("\n")[0])
66
67def removeFiles(files):
68    for file in files:
69        os.remove(file)
70
71def getFilteredGcdaFiles(gcda_files, exclude):
72    def excludeGcda(gcda):
73        for ex in exclude:
74            if ex in gcda:
75                return False
76        return True
77    return list(filter(excludeGcda, gcda_files))
78
79def getGcdaFiles(cwd, gcda_files):
80    if not gcda_files:
81        gcda_files = glob.glob(os.path.join(os.path.abspath(cwd), "**/*.gcda"), recursive=True)
82    return gcda_files
83
84def gcovWorker(cwd, gcov, files, chunk, gcov_filter_options, branch_coverage):
85    gcov_args = "-it"
86    if branch_coverage:
87        gcov_args += "b"
88
89    p = subprocess.Popen([gcov, gcov_args] + chunk, cwd=cwd, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
90    for line in iter(p.stdout.readline, b''):
91        intermediate_json = json.loads(line.decode(sys.stdout.encoding))
92        intermediate_json_files = processGcovs(cwd, intermediate_json["files"], gcov_filter_options)
93        for f in intermediate_json_files:
94            files.append(f) #thread safe, there might be a better way to do this though
95        GCOVS_TOTAL.append(len(intermediate_json["files"]))
96        GCOVS_SKIPPED.append(len(intermediate_json["files"])-len(intermediate_json_files))
97    p.wait()
98
99def processGcdas(cwd, gcov, jobs, gcda_files, gcov_filter_options, branch_coverage):
100    chunk_size = max(MINIMUM_CHUNK_SIZE, int(len(gcda_files) / jobs) + 1)
101
102    threads = []
103    intermediate_json_files = []
104    for chunk in chunks(gcda_files, chunk_size):
105        t = threading.Thread(target=gcovWorker, args=(cwd, gcov, intermediate_json_files, chunk, gcov_filter_options, branch_coverage))
106        threads.append(t)
107        t.start()
108
109    log("Spawned %d gcov threads, each processing at most %d gcda files" % (len(threads), chunk_size))
110    for t in threads:
111        t.join()
112
113    return intermediate_json_files
114
115def processGcov(cwd, gcov, files, gcov_filter_options):
116    # Add absolute path
117    gcov["file_abs"] = os.path.abspath(os.path.join(cwd, gcov["file"]))
118
119    # If explicit sources were passed, check for match
120    if gcov_filter_options["sources"]:
121        if gcov["file_abs"] in gcov_filter_options["sources"]:
122            files.append(gcov)
123        return
124
125    # Check include filter
126    if gcov_filter_options["include"]:
127        for ex in gcov_filter_options["include"]:
128            if ex in gcov["file"]:
129                files.append(gcov)
130                break
131        return
132
133    # Check exclude filter
134    for ex in gcov_filter_options["exclude"]:
135        if ex in gcov["file"]:
136            return
137
138    files.append(gcov)
139
140def processGcovs(cwd, gcov_files, gcov_filter_options):
141    files = []
142    for gcov in gcov_files:
143        processGcov(cwd, gcov, files, gcov_filter_options)
144    return files
145
146def dumpBranchCoverageToLcovInfo(f, branches):
147    branch_miss  = 0
148    for line_num, branch_counts in branches.items():
149        for i, count in enumerate(branch_counts):
150            #Branch (<line number>, <block number>, <branch number>, <taken>)
151            f.write("BRDA:%s,%d,%d,%d\n" % (line_num, int(i/2), i, count))
152            branch_miss += int(count == 0)
153    f.write("BRF:%d\n" % len(branches))                 #Branches Found
154    f.write("BRH:%d\n" % (len(branches) - branch_miss)) #Branches Hit
155
156def dumpToLcovInfo(fastcov_json, output):
157    with open(output, "w") as f:
158        for sf, data in fastcov_json["sources"].items():
159            f.write("SF:%s\n" % sf) #Source File
160
161            fn_miss = 0
162            for function, fdata in data["functions"].items():
163                f.write("FN:%d,%s\n" % (fdata["start_line"], function))          #Function Start Line
164                f.write("FNDA:%d,%s\n" % (fdata["execution_count"], function))   #Function Hits
165                fn_miss += int(fdata["execution_count"] == 0)
166            f.write("FNF:%d\n" % len(data["functions"]))                #Functions Found
167            f.write("FNH:%d\n" % (len(data["functions"]) - fn_miss))    #Functions Hit
168
169            if data["branches"]:
170                dumpBranchCoverageToLcovInfo(f, data["branches"])
171
172            line_miss = 0
173            for line_num, count in data["lines"].items():
174                f.write("DA:%s,%d\n" % (line_num, count)) #Line
175                line_miss += int(count == 0)
176            f.write("LF:%d\n" % len(data["lines"]))                 #Lines Found
177            f.write("LH:%d\n" % (len(data["lines"]) - line_miss))   #Lines Hit
178            f.write("end_of_record\n")
179
180def exclMarkerWorker(fastcov_sources, chunk):
181    for source in chunk:
182        # If there are no covered lines, skip
183        if not fastcov_sources[source]["lines"]:
184            continue
185
186        start_line = 0
187        end_line = 0
188        with open(source) as f:
189            for i, line in enumerate(f, 1): #Start enumeration at line 1
190                if not "LCOV_EXCL" in line:
191                    continue
192
193                if "LCOV_EXCL_LINE" in line:
194                    if str(i) in fastcov_sources[source]["lines"]:
195                        del fastcov_sources[source]["lines"][str(i)]
196                    if str(i) in fastcov_sources[source]["branches"]:
197                        del fastcov_sources[source]["branches"][str(i)]
198                elif "LCOV_EXCL_START" in line:
199                    start_line = i
200                elif "LCOV_EXCL_STOP" in line:
201                    end_line = i
202
203                    if not start_line:
204                        end_line = 0
205                        continue
206
207                    for key in ["lines", "branches"]:
208                        for line_num in list(fastcov_sources[source][key].keys()):
209                            if int(line_num) <= end_line and int(line_num) >= start_line:
210                                del fastcov_sources[source][key][line_num]
211
212                    start_line = end_line = 0
213
214def scanExclusionMarkers(fastcov_json, jobs):
215    chunk_size = max(MINIMUM_CHUNK_SIZE, int(len(fastcov_json["sources"]) / jobs) + 1)
216
217    threads = []
218    for chunk in chunks(list(fastcov_json["sources"].keys()), chunk_size):
219        t = threading.Thread(target=exclMarkerWorker, args=(fastcov_json["sources"], chunk))
220        threads.append(t)
221        t.start()
222
223    log("Spawned %d threads each scanning at most %d source files" % (len(threads), chunk_size))
224    for t in threads:
225        t.join()
226
227def distillFunction(function_raw, functions):
228    function_name = function_raw["name"]
229    if function_name not in functions:
230        functions[function_name] = {
231            "start_line": function_raw["start_line"],
232            "execution_count": function_raw["execution_count"]
233        }
234    else:
235        functions[function_name]["execution_count"] += function_raw["execution_count"]
236
237def distillLine(line_raw, lines, branches):
238    line_number = str(line_raw["line_number"])
239    if line_number not in lines:
240        lines[line_number] = line_raw["count"]
241    else:
242        lines[line_number] += line_raw["count"]
243
244    for i, branch in enumerate(line_raw["branches"]):
245        if line_number not in branches:
246            branches[line_number] = []
247        blen = len(branches[line_number])
248        glen = len(line_raw["branches"])
249        if blen < glen:
250            branches[line_number] += [0] * (glen - blen)
251        branches[line_number][i] += branch["count"]
252
253def distillSource(source_raw, sources):
254    source_name = source_raw["file_abs"]
255    if source_name not in sources:
256        sources[source_name] = {
257            "functions": {},
258            "branches": {},
259            "lines": {},
260        }
261
262    for function in source_raw["functions"]:
263        distillFunction(function, sources[source_name]["functions"])
264
265    for line in source_raw["lines"]:
266        distillLine(line, sources[source_name]["lines"], sources[source_name]["branches"])
267
268def distillReport(report_raw):
269    report_json = {
270        "sources": {}
271    }
272
273    for source in report_raw:
274        distillSource(source, report_json["sources"])
275
276    return report_json
277
278def dumpToJson(intermediate, output):
279    with open(output, "w") as f:
280        json.dump(intermediate, f)
281
282def log(line):
283    if not args.quiet:
284        print("[{:.3f}s] {}".format(stopwatch(), line))
285
286def getGcovFilterOptions(args):
287    return {
288        "sources": set([os.path.abspath(s) for s in args.sources]), #Make paths absolute, use set for fast lookups
289        "include": args.includepost,
290        "exclude": args.excludepost,
291    }
292
293def main(args):
294    # Need at least gcov 9.0.0 because that's when gcov JSON and stdout streaming was introduced
295    current_gcov_version = getGcovVersion(args.gcov)
296    if current_gcov_version < MINIMUM_GCOV:
297        sys.stderr.write("Minimum gcov version {} required, found {}\n".format(".".join(map(str, MINIMUM_GCOV)), ".".join(map(str, current_gcov_version))))
298        exit(1)
299
300    # Get list of gcda files to process
301    gcda_files = getGcdaFiles(args.directory, args.gcda_files)
302    log("Found {} .gcda files ".format(len(gcda_files)))
303
304    # If gcda filtering is enabled, filter them out now
305    if args.excludepre:
306        gcda_files = getFilteredGcdaFiles(gcda_files, args.excludepre)
307        log("{} .gcda files after filtering".format(len(gcda_files)))
308
309    # We "zero" the "counters" by simply deleting all gcda files
310    if args.zerocounters:
311        removeFiles(gcda_files)
312        log("{} .gcda files removed".format(len(gcda_files)))
313        return
314
315    # Fire up one gcov per cpu and start processing gcdas
316    gcov_filter_options = getGcovFilterOptions(args)
317    intermediate_json_files = processGcdas(args.cdirectory, args.gcov, args.jobs, gcda_files, gcov_filter_options, args.branchcoverage)
318
319    # Summarize processing results
320    gcov_total = sum(GCOVS_TOTAL)
321    gcov_skipped = sum(GCOVS_SKIPPED)
322    log("Processed {} .gcov files ({} total, {} skipped)".format(gcov_total - gcov_skipped, gcov_total, gcov_skipped))
323
324    # Distill all the extraneous info gcov gives us down to the core report
325    fastcov_json = distillReport(intermediate_json_files)
326    log("Aggregated raw gcov JSON into fastcov JSON report")
327
328    # Dump to desired file format
329    if args.lcov:
330        scanExclusionMarkers(fastcov_json, args.jobs)
331        log("Scanned {} source files for exclusion markers".format(len(fastcov_json["sources"])))
332        dumpToLcovInfo(fastcov_json, args.output)
333        log("Created lcov info file '{}'".format(args.output))
334    elif args.gcov_raw:
335        dumpToJson(intermediate_json_files, args.output)
336        log("Created gcov raw json file '{}'".format(args.output))
337    else:
338        dumpToJson(fastcov_json, args.output)
339        log("Created fastcov json file '{}'".format(args.output))
340
341
342if __name__ == '__main__':
343    parser = argparse.ArgumentParser(description='A parallel gcov wrapper for fast coverage report generation')
344    parser.add_argument('-z', '--zerocounters', dest='zerocounters', action="store_true", help='Recursively delete all gcda files')
345
346    # Enable Branch Coverage
347    parser.add_argument('-b', '--branch-coverage', dest='branchcoverage', action="store_true", help='Include branch counts in the coverage report')
348
349    # Filtering Options
350    parser.add_argument('-s', '--source-files', dest='sources',     nargs="+", metavar='', default=[], help='Filter: Specify exactly which source files should be included in the final report. Paths must be either absolute or relative to current directory.')
351    parser.add_argument('-e', '--exclude',      dest='excludepost', nargs="+", metavar='', default=[], help='Filter: Exclude source files from final report if they contain one of the provided substrings (i.e. /usr/include test/, etc.)')
352    parser.add_argument('-i', '--include',      dest='includepost', nargs="+", metavar='', default=[], help='Filter: Only include source files in final report that contain one of the provided substrings (i.e. src/ etc.)')
353    parser.add_argument('-f', '--gcda-files',   dest='gcda_files',  nargs="+", metavar='', default=[], help='Filter: Specify exactly which gcda files should be processed instead of recursively searching the search directory.')
354    parser.add_argument('-E', '--exclude-gcda', dest='excludepre',  nargs="+", metavar='', default=[], help='Filter: Exclude gcda files from being processed via simple find matching (not regex)')
355
356    parser.add_argument('-g', '--gcov', dest='gcov', default='gcov', help='Which gcov binary to use')
357
358    parser.add_argument('-d', '--search-directory', dest='directory', default=".", help='Base directory to recursively search for gcda files (default: .)')
359    parser.add_argument('-c', '--compiler-directory', dest='cdirectory', default=".", help='Base directory compiler was invoked from (default: .) \
360                                                                                            This needs to be set if invoking fastcov from somewhere other than the base compiler directory.')
361
362    parser.add_argument('-j', '--jobs', dest='jobs', type=int, default=multiprocessing.cpu_count(), help='Number of parallel gcov to spawn (default: %d).' % multiprocessing.cpu_count())
363    parser.add_argument('-m', '--minimum-chunk-size', dest='minimum_chunk', type=int, default=5, help='Minimum number of files a thread should process (default: 5). \
364                                                                                                       If you have only 4 gcda files but they are monstrously huge, you could change this value to a 1 so that each thread will only process 1 gcda. Otherise fastcov will spawn only 1 thread to process all of them.')
365
366    parser.add_argument('-l', '--lcov',     dest='lcov',     action="store_true", help='Output in lcov info format instead of fastcov json')
367    parser.add_argument('-r', '--gcov-raw', dest='gcov_raw', action="store_true", help='Output in gcov raw json instead of fastcov json')
368    parser.add_argument('-o', '--output',  dest='output', default="coverage.json", help='Name of output file (default: coverage.json)')
369    parser.add_argument('-q', '--quiet', dest='quiet', action="store_true", help='Suppress output to stdout')
370
371    args = parser.parse_args()
372    main(args)