1#!/usr/bin/env/python 2# Copyright 2020 The Chromium Authors. All rights reserved. 3# Use of this source code is governed by a BSD-style license that can be 4# found in the LICENSE file. 5"""Functions for interacting with llvm-profdata 6 7This script is taken from the chromium build tools and is synced 8manually on an as-needed basis: 9https://source.chromium.org/chromium/chromium/src/+/master:testing/merge_scripts/code_coverage/merge_lib.py 10""" 11 12import logging 13import multiprocessing 14import os 15import re 16import shutil 17import subprocess 18 19_DIR_SOURCE_ROOT = os.path.normpath( 20 os.path.join(os.path.dirname(__file__), '..', '..', '..')) 21 22_JAVA_PATH = os.path.join(_DIR_SOURCE_ROOT, 'third_party', 'jdk', 'current', 23 'bin', 'java') 24 25logging.basicConfig( 26 format='[%(asctime)s %(levelname)s] %(message)s', level=logging.DEBUG) 27 28 29def _call_profdata_tool(profile_input_file_paths, 30 profile_output_file_path, 31 profdata_tool_path, 32 sparse=True): 33 """Calls the llvm-profdata tool. 34 35 Args: 36 profile_input_file_paths: A list of relative paths to the files that 37 are to be merged. 38 profile_output_file_path: The path to the merged file to write. 39 profdata_tool_path: The path to the llvm-profdata executable. 40 sparse (bool): flag to indicate whether to run llvm-profdata with --sparse. 41 Doc: https://llvm.org/docs/CommandGuide/llvm-profdata.html#profdata-merge 42 43 Returns: 44 A list of paths to profiles that had to be excluded to get the merge to 45 succeed, suspected of being corrupted or malformed. 46 47 Raises: 48 CalledProcessError: An error occurred merging profiles. 49 """ 50 logging.debug('Profile input paths: %r' % profile_input_file_paths) 51 logging.debug('Profile output path: %r' % profile_output_file_path) 52 try: 53 subprocess_cmd = [ 54 profdata_tool_path, 'merge', '-o', profile_output_file_path, 55 ] 56 if sparse: 57 subprocess_cmd += ['-sparse=true',] 58 subprocess_cmd.extend(profile_input_file_paths) 59 logging.info('profdata command: %r', ' '.join(subprocess_cmd)) 60 61 # Redirecting stderr is required because when error happens, llvm-profdata 62 # writes the error output to stderr and our error handling logic relies on 63 # that output. 64 output = subprocess.check_output(subprocess_cmd, stderr=subprocess.STDOUT) 65 logging.info('Merge succeeded with output: %r', output) 66 except subprocess.CalledProcessError as error: 67 logging.error('Failed to merge profiles, return code (%d), output: %r' % 68 (error.returncode, error.output)) 69 raise error 70 71 logging.info('Profile data is created as: "%r".', profile_output_file_path) 72 return [] 73 74 75def _get_profile_paths(input_dir, 76 input_extension, 77 input_filename_pattern='.*'): 78 """Finds all the profiles in the given directory (recursively).""" 79 paths = [] 80 for dir_path, _sub_dirs, file_names in os.walk(input_dir): 81 paths.extend([ 82 os.path.join(dir_path, fn) 83 for fn in file_names 84 if fn.endswith(input_extension) and re.search(input_filename_pattern,fn) 85 ]) 86 return paths 87 88 89def _validate_and_convert_profraws(profraw_files, 90 profdata_tool_path, 91 sparse=True): 92 """Validates and converts profraws to profdatas. 93 94 For each given .profraw file in the input, this method first validates it by 95 trying to convert it to an indexed .profdata file, and if the validation and 96 conversion succeeds, the generated .profdata file will be included in the 97 output, otherwise, won't. 98 99 This method is mainly used to filter out invalid profraw files. 100 101 Args: 102 profraw_files: A list of .profraw paths. 103 profdata_tool_path: The path to the llvm-profdata executable. 104 sparse (bool): flag to indicate whether to run llvm-profdata with --sparse. 105 Doc: https://llvm.org/docs/CommandGuide/llvm-profdata.html#profdata-merge 106 107 Returns: 108 A tulple: 109 A list of converted .profdata files of *valid* profraw files. 110 A list of *invalid* profraw files. 111 A list of profraw files that have counter overflows. 112 """ 113 for profraw_file in profraw_files: 114 if not profraw_file.endswith('.profraw'): 115 raise RuntimeError('%r is expected to be a .profraw file.' % profraw_file) 116 117 cpu_count = multiprocessing.cpu_count() 118 counts = max(10, cpu_count - 5) # Use 10+ processes, but leave 5 cpu cores. 119 pool = multiprocessing.Pool(counts) 120 output_profdata_files = multiprocessing.Manager().list() 121 invalid_profraw_files = multiprocessing.Manager().list() 122 counter_overflows = multiprocessing.Manager().list() 123 124 for profraw_file in profraw_files: 125 logging.info('Converting profraw file: %r', profraw_file) 126 pool.apply_async( 127 _validate_and_convert_profraw, 128 (profraw_file, output_profdata_files, invalid_profraw_files, 129 counter_overflows, profdata_tool_path, sparse)) 130 131 pool.close() 132 pool.join() 133 134 # Remove inputs, as they won't be needed and they can be pretty large. 135 for input_file in profraw_files: 136 os.remove(input_file) 137 138 return list(output_profdata_files), list(invalid_profraw_files), list( 139 counter_overflows) 140 141 142def _validate_and_convert_profraw(profraw_file, output_profdata_files, 143 invalid_profraw_files, counter_overflows, 144 profdata_tool_path, sparse=True): 145 output_profdata_file = profraw_file.replace('.profraw', '.profdata') 146 subprocess_cmd = [ 147 profdata_tool_path, 148 'merge', 149 '-o', 150 output_profdata_file, 151 ] 152 if sparse: 153 subprocess_cmd.append('--sparse') 154 155 subprocess_cmd.append(profraw_file) 156 157 profile_valid = False 158 counter_overflow = False 159 validation_output = None 160 161 logging.info('profdata command: %r', ' '.join(subprocess_cmd)) 162 163 # 1. Determine if the profile is valid. 164 try: 165 # Redirecting stderr is required because when error happens, llvm-profdata 166 # writes the error output to stderr and our error handling logic relies on 167 # that output. 168 logging.info('Converting %r to %r', profraw_file, output_profdata_file) 169 validation_output = subprocess.check_output( 170 subprocess_cmd, stderr=subprocess.STDOUT) 171 logging.info('Validating and converting %r to %r succeeded with output: %r', 172 profraw_file, output_profdata_file, validation_output) 173 if 'Counter overflow' in validation_output: 174 counter_overflow = True 175 else: 176 profile_valid = True 177 except subprocess.CalledProcessError as error: 178 logging.warning('Validating and converting %r to %r failed with output: %r', 179 profraw_file, output_profdata_file, error.output) 180 validation_output = error.output 181 182 # 2. Add the profile to the appropriate list(s). 183 if profile_valid: 184 output_profdata_files.append(output_profdata_file) 185 else: 186 invalid_profraw_files.append(profraw_file) 187 if counter_overflow: 188 counter_overflows.append(profraw_file) 189 190 # 3. Log appropriate message 191 if not profile_valid: 192 template = 'Bad profile: %r, output: %r' 193 if counter_overflow: 194 template = 'Counter overflow: %r, output: %r' 195 logging.warning(template, profraw_file, validation_output) 196 197 # 4. Delete profdata for invalid profiles if present. 198 if os.path.exists(output_profdata_file): 199 # The output file may be created before llvm-profdata determines the 200 # input is invalid. Delete it so that it does not leak and affect other 201 # merge scripts. 202 os.remove(output_profdata_file) 203 204def merge_java_exec_files(input_dir, output_path, jacococli_path): 205 """Merges generated .exec files to output_path. 206 207 Args: 208 input_dir (str): The path to traverse to find input files. 209 output_path (str): Where to write the merged .exec file. 210 jacococli_path: The path to jacococli.jar. 211 212 Raises: 213 CalledProcessError: merge command failed. 214 """ 215 exec_input_file_paths = _get_profile_paths(input_dir, '.exec') 216 if not exec_input_file_paths: 217 logging.info('No exec file found under %s', input_dir) 218 return 219 220 cmd = [_JAVA_PATH, '-jar', jacococli_path, 'merge'] 221 cmd.extend(exec_input_file_paths) 222 cmd.extend(['--destfile', output_path]) 223 output = subprocess.check_output(cmd, stderr=subprocess.STDOUT) 224 logging.info('Merge succeeded with output: %r', output) 225 226 227def merge_profiles(input_dir, 228 output_file, 229 input_extension, 230 profdata_tool_path, 231 input_filename_pattern='.*', 232 sparse=True, 233 skip_validation=False): 234 """Merges the profiles produced by the shards using llvm-profdata. 235 236 Args: 237 input_dir (str): The path to traverse to find input profiles. 238 output_file (str): Where to write the merged profile. 239 input_extension (str): File extension to look for in the input_dir. 240 e.g. '.profdata' or '.profraw' 241 profdata_tool_path: The path to the llvm-profdata executable. 242 input_filename_pattern (str): The regex pattern of input filename. Should be 243 a valid regex pattern if present. 244 sparse (bool): flag to indicate whether to run llvm-profdata with --sparse. 245 Doc: https://llvm.org/docs/CommandGuide/llvm-profdata.html#profdata-merge 246 skip_validation (bool): flag to skip the _validate_and_convert_profraws 247 invocation. only applicable when input_extension is .profraw. 248 249 Returns: 250 The list of profiles that had to be excluded to get the merge to 251 succeed and a list of profiles that had a counter overflow. 252 """ 253 profile_input_file_paths = _get_profile_paths(input_dir, 254 input_extension, 255 input_filename_pattern) 256 invalid_profraw_files = [] 257 counter_overflows = [] 258 259 if skip_validation: 260 logging.warning('--skip-validation has been enabled. Skipping conversion ' 261 'to ensure that profiles are valid.') 262 263 if input_extension == '.profraw' and not skip_validation: 264 profile_input_file_paths, invalid_profraw_files, counter_overflows = ( 265 _validate_and_convert_profraws(profile_input_file_paths, 266 profdata_tool_path, 267 sparse=sparse)) 268 logging.info('List of converted .profdata files: %r', 269 profile_input_file_paths) 270 logging.info(( 271 'List of invalid .profraw files that failed to validate and convert: %r' 272 ), invalid_profraw_files) 273 274 if counter_overflows: 275 logging.warning('There were %d profiles with counter overflows', 276 len(counter_overflows)) 277 278 # The list of input files could be empty in the following scenarios: 279 # 1. The test target is pure Python scripts test which doesn't execute any 280 # C/C++ binaries, such as devtools_type_check. 281 # 2. The test target executes binary and does dumps coverage profile data 282 # files, however, all of them turned out to be invalid. 283 if not profile_input_file_paths: 284 logging.info('There is no valid profraw/profdata files to merge, skip ' 285 'invoking profdata tools.') 286 return invalid_profraw_files, counter_overflows 287 288 invalid_profdata_files = _call_profdata_tool( 289 profile_input_file_paths=profile_input_file_paths, 290 profile_output_file_path=output_file, 291 profdata_tool_path=profdata_tool_path, 292 sparse=sparse) 293 294 # Remove inputs when merging profraws as they won't be needed and they can be 295 # pretty large. If the inputs are profdata files, do not remove them as they 296 # might be used again for multiple test types coverage. 297 if input_extension == '.profraw': 298 for input_file in profile_input_file_paths: 299 os.remove(input_file) 300 301 return invalid_profraw_files + invalid_profdata_files, counter_overflows 302 303# We want to retry shards that contain one or more profiles that cannot be 304# merged (typically due to corruption described in crbug.com/937521). 305def get_shards_to_retry(bad_profiles): 306 bad_shard_ids = set() 307 308 def is_task_id(s): 309 # Swarming task ids are 16 hex chars. The pythonic way to validate this is 310 # to cast to int and catch a value error. 311 try: 312 assert len(s) == 16, 'Swarming task IDs are expected be of length 16' 313 _int_id = int(s, 16) 314 return True 315 except (AssertionError, ValueError): 316 return False 317 318 for profile in bad_profiles: 319 # E.g. /b/s/w/ir/tmp/t/tmpSvBRii/44b643576cf39f10/profraw/default-1.profraw 320 _base_path, task_id, _profraw, _filename = os.path.normpath(profile).rsplit( 321 os.path.sep, 3) 322 # Since we are getting a task_id from a file path, which is less than ideal, 323 # do some checking to at least verify that the snippet looks like a valid 324 # task id. 325 assert is_task_id(task_id) 326 bad_shard_ids.add(task_id) 327 return bad_shard_ids 328 329