1#!/usr/bin/env python3 2""" 3 Author: Bryan Gillespie 4 5 A massively parallel gcov wrapper for generating intermediate coverage formats fast 6 7 The goal of fastcov is to generate code coverage intermediate formats as fast as possible 8 (ideally < 1 second), even for large projects with hundreds of gcda objects. The intermediate 9 formats may then be consumed by a report generator such as lcov's genhtml, or a dedicated front 10 end such as coveralls. 11 12 Sample Usage: 13 $ cd build_dir 14 $ ./fastcov.py --zerocounters 15 $ <run unit tests> 16 $ ./fastcov.py --exclude /usr/include test/ --lcov -o report.info 17 $ genhtml -o code_coverage report.info 18""" 19 20import re 21import os 22import sys 23import glob 24import json 25import time 26import argparse 27import threading 28import subprocess 29import multiprocessing 30 31MINIMUM_GCOV = (9,0,0) 32MINIMUM_CHUNK_SIZE = 5 33 34# Interesting metrics 35START_TIME = time.time() 36GCOVS_TOTAL = [] 37GCOVS_SKIPPED = [] 38 39def chunks(l, n): 40 """Yield successive n-sized chunks from l.""" 41 for i in range(0, len(l), n): 42 yield l[i:i + n] 43 44def stopwatch(): 45 """Return number of seconds since last time this was called""" 46 global START_TIME 47 end_time = time.time() 48 delta = end_time - START_TIME 49 START_TIME = end_time 50 return delta 51 52def parseVersionFromLine(version_str): 53 """Given a string containing a dotted integer version, parse out integers and return as tuple""" 54 version = re.search(r'(\d+\.\d+\.\d+)[^\.]', version_str) 55 56 if not version: 57 return (0,0,0) 58 59 return tuple(map(int, version.group(1).split("."))) 60 61def getGcovVersion(gcov): 62 p = subprocess.Popen([gcov, "-v"], stdout=subprocess.PIPE) 63 output = p.communicate()[0].decode('UTF-8') 64 p.wait() 65 return parseVersionFromLine(output.split("\n")[0]) 66 67def removeFiles(files): 68 for file in files: 69 os.remove(file) 70 71def getFilteredGcdaFiles(gcda_files, exclude): 72 def excludeGcda(gcda): 73 for ex in exclude: 74 if ex in gcda: 75 return False 76 return True 77 return list(filter(excludeGcda, gcda_files)) 78 79def getGcdaFiles(cwd, gcda_files): 80 if not gcda_files: 81 gcda_files = glob.glob(os.path.join(os.path.abspath(cwd), "**/*.gcda"), recursive=True) 82 return gcda_files 83 84def gcovWorker(cwd, gcov, files, chunk, gcov_filter_options, branch_coverage): 85 gcov_args = "-it" 86 if branch_coverage: 87 gcov_args += "b" 88 89 p = subprocess.Popen([gcov, gcov_args] + chunk, cwd=cwd, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL) 90 for line in iter(p.stdout.readline, b''): 91 intermediate_json = json.loads(line.decode(sys.stdout.encoding)) 92 intermediate_json_files = processGcovs(cwd, intermediate_json["files"], gcov_filter_options) 93 for f in intermediate_json_files: 94 files.append(f) #thread safe, there might be a better way to do this though 95 GCOVS_TOTAL.append(len(intermediate_json["files"])) 96 GCOVS_SKIPPED.append(len(intermediate_json["files"])-len(intermediate_json_files)) 97 p.wait() 98 99def processGcdas(cwd, gcov, jobs, gcda_files, gcov_filter_options, branch_coverage): 100 chunk_size = max(MINIMUM_CHUNK_SIZE, int(len(gcda_files) / jobs) + 1) 101 102 threads = [] 103 intermediate_json_files = [] 104 for chunk in chunks(gcda_files, chunk_size): 105 t = threading.Thread(target=gcovWorker, args=(cwd, gcov, intermediate_json_files, chunk, gcov_filter_options, branch_coverage)) 106 threads.append(t) 107 t.start() 108 109 log("Spawned %d gcov threads, each processing at most %d gcda files" % (len(threads), chunk_size)) 110 for t in threads: 111 t.join() 112 113 return intermediate_json_files 114 115def processGcov(cwd, gcov, files, gcov_filter_options): 116 # Add absolute path 117 gcov["file_abs"] = os.path.abspath(os.path.join(cwd, gcov["file"])) 118 119 # If explicit sources were passed, check for match 120 if gcov_filter_options["sources"]: 121 if gcov["file_abs"] in gcov_filter_options["sources"]: 122 files.append(gcov) 123 return 124 125 # Check include filter 126 if gcov_filter_options["include"]: 127 for ex in gcov_filter_options["include"]: 128 if ex in gcov["file"]: 129 files.append(gcov) 130 break 131 return 132 133 # Check exclude filter 134 for ex in gcov_filter_options["exclude"]: 135 if ex in gcov["file"]: 136 return 137 138 files.append(gcov) 139 140def processGcovs(cwd, gcov_files, gcov_filter_options): 141 files = [] 142 for gcov in gcov_files: 143 processGcov(cwd, gcov, files, gcov_filter_options) 144 return files 145 146def dumpBranchCoverageToLcovInfo(f, branches): 147 branch_miss = 0 148 for line_num, branch_counts in branches.items(): 149 for i, count in enumerate(branch_counts): 150 #Branch (<line number>, <block number>, <branch number>, <taken>) 151 f.write("BRDA:%s,%d,%d,%d\n" % (line_num, int(i/2), i, count)) 152 branch_miss += int(count == 0) 153 f.write("BRF:%d\n" % len(branches)) #Branches Found 154 f.write("BRH:%d\n" % (len(branches) - branch_miss)) #Branches Hit 155 156def dumpToLcovInfo(fastcov_json, output): 157 with open(output, "w") as f: 158 for sf, data in fastcov_json["sources"].items(): 159 f.write("SF:%s\n" % sf) #Source File 160 161 fn_miss = 0 162 for function, fdata in data["functions"].items(): 163 f.write("FN:%d,%s\n" % (fdata["start_line"], function)) #Function Start Line 164 f.write("FNDA:%d,%s\n" % (fdata["execution_count"], function)) #Function Hits 165 fn_miss += int(fdata["execution_count"] == 0) 166 f.write("FNF:%d\n" % len(data["functions"])) #Functions Found 167 f.write("FNH:%d\n" % (len(data["functions"]) - fn_miss)) #Functions Hit 168 169 if data["branches"]: 170 dumpBranchCoverageToLcovInfo(f, data["branches"]) 171 172 line_miss = 0 173 for line_num, count in data["lines"].items(): 174 f.write("DA:%s,%d\n" % (line_num, count)) #Line 175 line_miss += int(count == 0) 176 f.write("LF:%d\n" % len(data["lines"])) #Lines Found 177 f.write("LH:%d\n" % (len(data["lines"]) - line_miss)) #Lines Hit 178 f.write("end_of_record\n") 179 180def exclMarkerWorker(fastcov_sources, chunk): 181 for source in chunk: 182 # If there are no covered lines, skip 183 if not fastcov_sources[source]["lines"]: 184 continue 185 186 start_line = 0 187 end_line = 0 188 with open(source) as f: 189 for i, line in enumerate(f, 1): #Start enumeration at line 1 190 if not "LCOV_EXCL" in line: 191 continue 192 193 if "LCOV_EXCL_LINE" in line: 194 if str(i) in fastcov_sources[source]["lines"]: 195 del fastcov_sources[source]["lines"][str(i)] 196 if str(i) in fastcov_sources[source]["branches"]: 197 del fastcov_sources[source]["branches"][str(i)] 198 elif "LCOV_EXCL_START" in line: 199 start_line = i 200 elif "LCOV_EXCL_STOP" in line: 201 end_line = i 202 203 if not start_line: 204 end_line = 0 205 continue 206 207 for key in ["lines", "branches"]: 208 for line_num in list(fastcov_sources[source][key].keys()): 209 if int(line_num) <= end_line and int(line_num) >= start_line: 210 del fastcov_sources[source][key][line_num] 211 212 start_line = end_line = 0 213 214def scanExclusionMarkers(fastcov_json, jobs): 215 chunk_size = max(MINIMUM_CHUNK_SIZE, int(len(fastcov_json["sources"]) / jobs) + 1) 216 217 threads = [] 218 for chunk in chunks(list(fastcov_json["sources"].keys()), chunk_size): 219 t = threading.Thread(target=exclMarkerWorker, args=(fastcov_json["sources"], chunk)) 220 threads.append(t) 221 t.start() 222 223 log("Spawned %d threads each scanning at most %d source files" % (len(threads), chunk_size)) 224 for t in threads: 225 t.join() 226 227def distillFunction(function_raw, functions): 228 function_name = function_raw["name"] 229 if function_name not in functions: 230 functions[function_name] = { 231 "start_line": function_raw["start_line"], 232 "execution_count": function_raw["execution_count"] 233 } 234 else: 235 functions[function_name]["execution_count"] += function_raw["execution_count"] 236 237def distillLine(line_raw, lines, branches): 238 line_number = str(line_raw["line_number"]) 239 if line_number not in lines: 240 lines[line_number] = line_raw["count"] 241 else: 242 lines[line_number] += line_raw["count"] 243 244 for i, branch in enumerate(line_raw["branches"]): 245 if line_number not in branches: 246 branches[line_number] = [] 247 blen = len(branches[line_number]) 248 glen = len(line_raw["branches"]) 249 if blen < glen: 250 branches[line_number] += [0] * (glen - blen) 251 branches[line_number][i] += branch["count"] 252 253def distillSource(source_raw, sources): 254 source_name = source_raw["file_abs"] 255 if source_name not in sources: 256 sources[source_name] = { 257 "functions": {}, 258 "branches": {}, 259 "lines": {}, 260 } 261 262 for function in source_raw["functions"]: 263 distillFunction(function, sources[source_name]["functions"]) 264 265 for line in source_raw["lines"]: 266 distillLine(line, sources[source_name]["lines"], sources[source_name]["branches"]) 267 268def distillReport(report_raw): 269 report_json = { 270 "sources": {} 271 } 272 273 for source in report_raw: 274 distillSource(source, report_json["sources"]) 275 276 return report_json 277 278def dumpToJson(intermediate, output): 279 with open(output, "w") as f: 280 json.dump(intermediate, f) 281 282def log(line): 283 if not args.quiet: 284 print("[{:.3f}s] {}".format(stopwatch(), line)) 285 286def getGcovFilterOptions(args): 287 return { 288 "sources": set([os.path.abspath(s) for s in args.sources]), #Make paths absolute, use set for fast lookups 289 "include": args.includepost, 290 "exclude": args.excludepost, 291 } 292 293def main(args): 294 # Need at least gcov 9.0.0 because that's when gcov JSON and stdout streaming was introduced 295 current_gcov_version = getGcovVersion(args.gcov) 296 if current_gcov_version < MINIMUM_GCOV: 297 sys.stderr.write("Minimum gcov version {} required, found {}\n".format(".".join(map(str, MINIMUM_GCOV)), ".".join(map(str, current_gcov_version)))) 298 exit(1) 299 300 # Get list of gcda files to process 301 gcda_files = getGcdaFiles(args.directory, args.gcda_files) 302 log("Found {} .gcda files ".format(len(gcda_files))) 303 304 # If gcda filtering is enabled, filter them out now 305 if args.excludepre: 306 gcda_files = getFilteredGcdaFiles(gcda_files, args.excludepre) 307 log("{} .gcda files after filtering".format(len(gcda_files))) 308 309 # We "zero" the "counters" by simply deleting all gcda files 310 if args.zerocounters: 311 removeFiles(gcda_files) 312 log("{} .gcda files removed".format(len(gcda_files))) 313 return 314 315 # Fire up one gcov per cpu and start processing gcdas 316 gcov_filter_options = getGcovFilterOptions(args) 317 intermediate_json_files = processGcdas(args.cdirectory, args.gcov, args.jobs, gcda_files, gcov_filter_options, args.branchcoverage) 318 319 # Summarize processing results 320 gcov_total = sum(GCOVS_TOTAL) 321 gcov_skipped = sum(GCOVS_SKIPPED) 322 log("Processed {} .gcov files ({} total, {} skipped)".format(gcov_total - gcov_skipped, gcov_total, gcov_skipped)) 323 324 # Distill all the extraneous info gcov gives us down to the core report 325 fastcov_json = distillReport(intermediate_json_files) 326 log("Aggregated raw gcov JSON into fastcov JSON report") 327 328 # Dump to desired file format 329 if args.lcov: 330 scanExclusionMarkers(fastcov_json, args.jobs) 331 log("Scanned {} source files for exclusion markers".format(len(fastcov_json["sources"]))) 332 dumpToLcovInfo(fastcov_json, args.output) 333 log("Created lcov info file '{}'".format(args.output)) 334 elif args.gcov_raw: 335 dumpToJson(intermediate_json_files, args.output) 336 log("Created gcov raw json file '{}'".format(args.output)) 337 else: 338 dumpToJson(fastcov_json, args.output) 339 log("Created fastcov json file '{}'".format(args.output)) 340 341 342if __name__ == '__main__': 343 parser = argparse.ArgumentParser(description='A parallel gcov wrapper for fast coverage report generation') 344 parser.add_argument('-z', '--zerocounters', dest='zerocounters', action="store_true", help='Recursively delete all gcda files') 345 346 # Enable Branch Coverage 347 parser.add_argument('-b', '--branch-coverage', dest='branchcoverage', action="store_true", help='Include branch counts in the coverage report') 348 349 # Filtering Options 350 parser.add_argument('-s', '--source-files', dest='sources', nargs="+", metavar='', default=[], help='Filter: Specify exactly which source files should be included in the final report. Paths must be either absolute or relative to current directory.') 351 parser.add_argument('-e', '--exclude', dest='excludepost', nargs="+", metavar='', default=[], help='Filter: Exclude source files from final report if they contain one of the provided substrings (i.e. /usr/include test/, etc.)') 352 parser.add_argument('-i', '--include', dest='includepost', nargs="+", metavar='', default=[], help='Filter: Only include source files in final report that contain one of the provided substrings (i.e. src/ etc.)') 353 parser.add_argument('-f', '--gcda-files', dest='gcda_files', nargs="+", metavar='', default=[], help='Filter: Specify exactly which gcda files should be processed instead of recursively searching the search directory.') 354 parser.add_argument('-E', '--exclude-gcda', dest='excludepre', nargs="+", metavar='', default=[], help='Filter: Exclude gcda files from being processed via simple find matching (not regex)') 355 356 parser.add_argument('-g', '--gcov', dest='gcov', default='gcov', help='Which gcov binary to use') 357 358 parser.add_argument('-d', '--search-directory', dest='directory', default=".", help='Base directory to recursively search for gcda files (default: .)') 359 parser.add_argument('-c', '--compiler-directory', dest='cdirectory', default=".", help='Base directory compiler was invoked from (default: .) \ 360 This needs to be set if invoking fastcov from somewhere other than the base compiler directory.') 361 362 parser.add_argument('-j', '--jobs', dest='jobs', type=int, default=multiprocessing.cpu_count(), help='Number of parallel gcov to spawn (default: %d).' % multiprocessing.cpu_count()) 363 parser.add_argument('-m', '--minimum-chunk-size', dest='minimum_chunk', type=int, default=5, help='Minimum number of files a thread should process (default: 5). \ 364 If you have only 4 gcda files but they are monstrously huge, you could change this value to a 1 so that each thread will only process 1 gcda. Otherise fastcov will spawn only 1 thread to process all of them.') 365 366 parser.add_argument('-l', '--lcov', dest='lcov', action="store_true", help='Output in lcov info format instead of fastcov json') 367 parser.add_argument('-r', '--gcov-raw', dest='gcov_raw', action="store_true", help='Output in gcov raw json instead of fastcov json') 368 parser.add_argument('-o', '--output', dest='output', default="coverage.json", help='Name of output file (default: coverage.json)') 369 parser.add_argument('-q', '--quiet', dest='quiet', action="store_true", help='Suppress output to stdout') 370 371 args = parser.parse_args() 372 main(args)