1#!/usr/bin/env python 2# Copyright (c) 2015 The Chromium Authors. All rights reserved. 3# Use of this source code is governed by a BSD-style license that can be 4# found in the LICENSE file. 5 6 7"""Parse an LLVM coverage report to generate useable results.""" 8 9 10import argparse 11import json 12import os 13import re 14import subprocess 15import sys 16 17 18def _fix_filename(filename): 19 """Return a filename which we can use to identify the file. 20 21 The file paths printed by llvm-cov take the form: 22 23 /path/to/repo/out/dir/../../src/filename.cpp 24 25 And then they're truncated to 22 characters with leading ellipses: 26 27 ...../../src/filename.cpp 28 29 This makes it really tough to determine whether the file actually belongs in 30 the Skia repo. This function strips out the leading junk so that, if the file 31 exists in the repo, the returned string matches the end of some relative path 32 in the repo. This doesn't guarantee correctness, but it's about as close as 33 we can get. 34 """ 35 return filename.split('..')[-1].lstrip('./') 36 37 38def _file_in_repo(filename, all_files): 39 """Return the name of the checked-in file matching the given filename. 40 41 Use suffix matching to determine which checked-in files the given filename 42 matches. If there are no matches or multiple matches, return None. 43 """ 44 new_file = _fix_filename(filename) 45 matched = [] 46 for f in all_files: 47 if f.endswith(new_file): 48 matched.append(f) 49 if len(matched) == 1: 50 return matched[0] 51 elif len(matched) > 1: 52 print >> sys.stderr, ('WARNING: multiple matches for %s; skipping:\n\t%s' 53 % (new_file, '\n\t'.join(matched))) 54 return None 55 56 57def _get_per_file_per_line_coverage(report): 58 """Return a dict whose keys are file names and values are coverage data. 59 60 Values are lists which take the form (lineno, coverage, code). 61 """ 62 all_files = [] 63 for root, dirs, files in os.walk(os.getcwd()): 64 if 'third_party/externals' in root: 65 continue 66 files = [f for f in files if not (f[0] == '.' or f.endswith('.pyc'))] 67 dirs[:] = [d for d in dirs if not d[0] == '.'] 68 for name in files: 69 all_files.append(os.path.join(root[(len(os.getcwd()) + 1):], name)) 70 all_files.sort() 71 72 lines = report.splitlines() 73 current_file = None 74 file_lines = [] 75 files = {} 76 not_checked_in = '%' # Use this as the file name for not-checked-in files. 77 for line in lines: 78 m = re.match('([a-zA-Z0-9\./_-]+):', line) 79 if m: 80 if current_file and current_file != not_checked_in: 81 files[current_file] = file_lines 82 match_filename = _file_in_repo(m.groups()[0], all_files) 83 current_file = match_filename or not_checked_in 84 file_lines = [] 85 else: 86 if current_file != not_checked_in: 87 skip = re.match('^\s{2}-+$|^\s{2}\|.+$', line) 88 if line and not skip: 89 cov, linenum, code = line.split('|', 2) 90 cov = cov.strip() 91 if cov: 92 cov = int(cov) 93 else: 94 cov = None # We don't care about coverage for this line. 95 linenum = int(linenum.strip()) 96 assert linenum == len(file_lines) + 1 97 file_lines.append((linenum, cov, code.decode('utf-8', 'replace'))) 98 return files 99 100 101 102def _testname(filename): 103 """Transform the file name into an ingestible test name.""" 104 return re.sub(r'[^a-zA-Z0-9]', '_', filename) 105 106 107def _nanobench_json(results, properties, key): 108 """Return the results in JSON format like that produced by nanobench.""" 109 rv = {} 110 # Copy over the properties first, then set the 'key' and 'results' keys, 111 # in order to avoid bad formatting in case the user passes in a properties 112 # dict containing those keys. 113 rv.update(properties) 114 rv['key'] = key 115 rv['results'] = { 116 _testname(f): { 117 'coverage': { 118 'percent': percent, 119 'lines_not_covered': not_covered_lines, 120 'options': { 121 'fullname': f, 122 'dir': os.path.dirname(f), 123 'source_type': 'coverage', 124 }, 125 }, 126 } for percent, not_covered_lines, f in results 127 } 128 return rv 129 130 131def _parse_key_value(kv_list): 132 """Return a dict whose key/value pairs are derived from the given list. 133 134 For example: 135 136 ['k1', 'v1', 'k2', 'v2'] 137 becomes: 138 139 {'k1': 'v1', 140 'k2': 'v2'} 141 """ 142 if len(kv_list) % 2 != 0: 143 raise Exception('Invalid key/value pairs: %s' % kv_list) 144 145 rv = {} 146 for i in xrange(len(kv_list) / 2): 147 rv[kv_list[i*2]] = kv_list[i*2+1] 148 return rv 149 150 151def _get_per_file_summaries(line_by_line): 152 """Summarize the full line-by-line coverage report by file.""" 153 per_file = [] 154 for filepath, lines in line_by_line.iteritems(): 155 total_lines = 0 156 covered_lines = 0 157 for _, cov, _ in lines: 158 if cov is not None: 159 total_lines += 1 160 if cov > 0: 161 covered_lines += 1 162 if total_lines > 0: 163 per_file.append((float(covered_lines)/float(total_lines)*100.0, 164 total_lines - covered_lines, 165 filepath)) 166 return per_file 167 168 169def main(): 170 """Generate useful data from a coverage report.""" 171 # Parse args. 172 parser = argparse.ArgumentParser() 173 parser.add_argument('--report', help='input file; an llvm coverage report.', 174 required=True) 175 parser.add_argument('--nanobench', help='output file for nanobench data.') 176 parser.add_argument( 177 '--key', metavar='key_or_value', nargs='+', 178 help='key/value pairs identifying this bot.') 179 parser.add_argument( 180 '--properties', metavar='key_or_value', nargs='+', 181 help='key/value pairs representing properties of this build.') 182 parser.add_argument('--linebyline', 183 help='output file for line-by-line JSON data.') 184 args = parser.parse_args() 185 186 if args.nanobench and not (args.key and args.properties): 187 raise Exception('--key and --properties are required with --nanobench') 188 189 with open(args.report) as f: 190 report = f.read() 191 192 line_by_line = _get_per_file_per_line_coverage(report) 193 194 if args.linebyline: 195 with open(args.linebyline, 'w') as f: 196 json.dump(line_by_line, f) 197 198 if args.nanobench: 199 # Parse the key and properties for use in the nanobench JSON output. 200 key = _parse_key_value(args.key) 201 properties = _parse_key_value(args.properties) 202 203 # Get per-file summaries. 204 per_file = _get_per_file_summaries(line_by_line) 205 206 # Write results. 207 format_results = _nanobench_json(per_file, properties, key) 208 with open(args.nanobench, 'w') as f: 209 json.dump(format_results, f) 210 211 212if __name__ == '__main__': 213 main() 214