1#!/usr/bin/env python 2# Copyright 2016 the V8 project authors. All rights reserved. 3# Use of this source code is governed by a BSD-style license that can be 4# found in the LICENSE file. 5 6"""Script for merging sancov files in parallel. 7 8When merging test runner output, the sancov files are expected 9to be located in one directory with the file-name pattern: 10<executable name>.test.<id>.<attempt>.sancov 11 12For each executable, this script writes a new file: 13<executable name>.result.sancov 14 15When --swarming-output-dir is specified, this script will merge the result 16files found there into the coverage folder. 17 18The sancov tool is expected to be in the llvm compiler-rt third-party 19directory. It's not checked out by default and must be added as a custom deps: 20'v8/third_party/llvm/projects/compiler-rt': 21 'https://chromium.googlesource.com/external/llvm.org/compiler-rt.git' 22""" 23 24import argparse 25import logging 26import math 27import os 28import re 29import subprocess 30import sys 31 32from multiprocessing import Pool, cpu_count 33 34 35logging.basicConfig(level=logging.INFO) 36 37# V8 checkout directory. 38BASE_DIR = os.path.dirname(os.path.dirname(os.path.dirname( 39 os.path.abspath(__file__)))) 40 41# The sancov tool location. 42SANCOV_TOOL = os.path.join( 43 BASE_DIR, 'third_party', 'llvm', 'projects', 'compiler-rt', 44 'lib', 'sanitizer_common', 'scripts', 'sancov.py') 45 46# Number of cpus. 47CPUS = cpu_count() 48 49# Regexp to find sancov file as output by the v8 test runner. Also grabs the 50# executable name in group 1. 51SANCOV_FILE_RE = re.compile(r'^(.*)\.test\.\d+\.\d+\.sancov$') 52 53# Regexp to find sancov result files as returned from swarming. 54SANCOV_RESULTS_FILE_RE = re.compile(r'^.*\.result\.sancov$') 55 56 57def merge(args): 58 """Merge several sancov files into one. 59 60 Called trough multiprocessing pool. The args are expected to unpack to: 61 keep: Option if source and intermediate sancov files should be kept. 62 coverage_dir: Folder where to find the sancov files. 63 executable: Name of the executable whose sancov files should be merged. 64 index: A number to be put into the intermediate result file name. 65 If None, this is a final result. 66 bucket: The list of sancov files to be merged. 67 Returns: A tuple with the executable name and the result file name. 68 """ 69 keep, coverage_dir, executable, index, bucket = args 70 process = subprocess.Popen( 71 [SANCOV_TOOL, 'merge'] + bucket, 72 stdout=subprocess.PIPE, 73 stderr=subprocess.PIPE, 74 cwd=coverage_dir, 75 ) 76 output, _ = process.communicate() 77 assert process.returncode == 0 78 if index is not None: 79 # This is an intermediate result, add the bucket index to the file name. 80 result_file_name = '%s.result.%d.sancov' % (executable, index) 81 else: 82 # This is the final result without bucket index. 83 result_file_name = '%s.result.sancov' % executable 84 with open(os.path.join(coverage_dir, result_file_name), "wb") as f: 85 f.write(output) 86 if not keep: 87 for f in bucket: 88 os.remove(os.path.join(coverage_dir, f)) 89 return executable, result_file_name 90 91 92def generate_inputs(keep, coverage_dir, file_map, cpus): 93 """Generate inputs for multiprocessed merging. 94 95 Splits the sancov files into several buckets, so that each bucket can be 96 merged in a separate process. We have only few executables in total with 97 mostly lots of associated files. In the general case, with many executables 98 we might need to avoid splitting buckets of executables with few files. 99 100 Returns: List of args as expected by merge above. 101 """ 102 inputs = [] 103 for executable, files in file_map.iteritems(): 104 # What's the bucket size for distributing files for merging? E.g. with 105 # 2 cpus and 9 files we want bucket size 5. 106 n = max(2, int(math.ceil(len(files) / float(cpus)))) 107 108 # Chop files into buckets. 109 buckets = [files[i:i+n] for i in range(0, len(files), n)] 110 111 # Inputs for multiprocessing. List of tuples containing: 112 # Keep-files option, base path, executable name, index of bucket, 113 # list of files. 114 inputs.extend([(keep, coverage_dir, executable, i, b) 115 for i, b in enumerate(buckets)]) 116 return inputs 117 118 119def merge_parallel(inputs, merge_fun=merge): 120 """Process several merge jobs in parallel.""" 121 pool = Pool(CPUS) 122 try: 123 return pool.map(merge_fun, inputs) 124 finally: 125 pool.close() 126 127 128def merge_test_runner_output(options): 129 # Map executable names to their respective sancov files. 130 file_map = {} 131 for f in os.listdir(options.coverage_dir): 132 match = SANCOV_FILE_RE.match(f) 133 if match: 134 file_map.setdefault(match.group(1), []).append(f) 135 136 inputs = generate_inputs( 137 options.keep, options.coverage_dir, file_map, CPUS) 138 139 logging.info('Executing %d merge jobs in parallel for %d executables.' % 140 (len(inputs), len(file_map))) 141 142 results = merge_parallel(inputs) 143 144 # Map executable names to intermediate bucket result files. 145 file_map = {} 146 for executable, f in results: 147 file_map.setdefault(executable, []).append(f) 148 149 # Merge the bucket results for each executable. 150 # The final result has index None, so no index will appear in the 151 # file name. 152 inputs = [(options.keep, options.coverage_dir, executable, None, files) 153 for executable, files in file_map.iteritems()] 154 155 logging.info('Merging %d intermediate results.' % len(inputs)) 156 157 merge_parallel(inputs) 158 159 160def merge_two(args): 161 """Merge two sancov files. 162 163 Called trough multiprocessing pool. The args are expected to unpack to: 164 swarming_output_dir: Folder where to find the new file. 165 coverage_dir: Folder where to find the existing file. 166 f: File name of the file to be merged. 167 """ 168 swarming_output_dir, coverage_dir, f = args 169 input_file = os.path.join(swarming_output_dir, f) 170 output_file = os.path.join(coverage_dir, f) 171 process = subprocess.Popen( 172 [SANCOV_TOOL, 'merge', input_file, output_file], 173 stdout=subprocess.PIPE, 174 stderr=subprocess.PIPE, 175 ) 176 output, _ = process.communicate() 177 assert process.returncode == 0 178 with open(output_file, "wb") as f: 179 f.write(output) 180 181 182def merge_swarming_output(options): 183 # Iterate sancov files from swarming. 184 files = [] 185 for f in os.listdir(options.swarming_output_dir): 186 match = SANCOV_RESULTS_FILE_RE.match(f) 187 if match: 188 if os.path.exists(os.path.join(options.coverage_dir, f)): 189 # If the same file already exists, we'll merge the data. 190 files.append(f) 191 else: 192 # No file yet? Just move it. 193 os.rename(os.path.join(options.swarming_output_dir, f), 194 os.path.join(options.coverage_dir, f)) 195 196 inputs = [(options.swarming_output_dir, options.coverage_dir, f) 197 for f in files] 198 199 logging.info('Executing %d merge jobs in parallel.' % len(inputs)) 200 merge_parallel(inputs, merge_two) 201 202 203def main(): 204 parser = argparse.ArgumentParser() 205 parser.add_argument('--coverage-dir', required=True, 206 help='Path to the sancov output files.') 207 parser.add_argument('--keep', default=False, action='store_true', 208 help='Keep sancov output files after merging.') 209 parser.add_argument('--swarming-output-dir', 210 help='Folder containing a results shard from swarming.') 211 options = parser.parse_args() 212 213 # Check if folder with coverage output exists. 214 assert (os.path.exists(options.coverage_dir) and 215 os.path.isdir(options.coverage_dir)) 216 217 if options.swarming_output_dir: 218 # Check if folder with swarming output exists. 219 assert (os.path.exists(options.swarming_output_dir) and 220 os.path.isdir(options.swarming_output_dir)) 221 merge_swarming_output(options) 222 else: 223 merge_test_runner_output(options) 224 225 return 0 226 227 228if __name__ == '__main__': 229 sys.exit(main()) 230