1# Copyright 2019 The Chromium Authors 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4"""Functions for interacting with llvm-profdata""" 5 6import logging 7import multiprocessing 8import os 9import re 10import subprocess 11import sys 12 13_DIR_SOURCE_ROOT = os.path.normpath( 14 os.path.join(os.path.dirname(__file__), '..', '..', '..')) 15 16_JAVA_PATH = os.path.join(_DIR_SOURCE_ROOT, 'third_party', 'jdk', 'current', 17 'bin', 'java') 18 19logging.basicConfig(format='[%(asctime)s %(levelname)s] %(message)s', 20 level=logging.DEBUG) 21 22 23def _call_profdata_tool(profile_input_file_paths, 24 profile_output_file_path, 25 profdata_tool_path, 26 sparse=False, 27 timeout=3600, 28 show_profdata=True, 29 weights=None): 30 """Calls the llvm-profdata tool. 31 32 Args: 33 profile_input_file_paths: A list of relative paths to the files that 34 are to be merged. 35 profile_output_file_path: The path to the merged file to write. 36 profdata_tool_path: The path to the llvm-profdata executable. 37 sparse (bool): flag to indicate whether to run llvm-profdata with --sparse. 38 Doc: https://llvm.org/docs/CommandGuide/llvm-profdata.html#profdata-merge 39 timeout (int): timeout (sec) for the call to merge profiles. This should 40 not take > 1 hr, and so defaults to 3600 seconds. 41 show_profdata (bool): flag on whether the merged output information should 42 be shown for debugging purposes. 43 weights (dict): maps from benchmark name to weight. 44 45 Raises: 46 CalledProcessError: An error occurred merging profiles. 47 """ 48 # There might be too many files in input and argument limit might be 49 # violated, so make the tool read a list of paths from a file. 50 output_dir = os.path.dirname(profile_output_file_path) 51 # Normalize to POSIX style paths for consistent results. 52 input_file = os.path.join(output_dir, 53 'input-profdata-files.txt').replace('\\', '/') 54 input_files_with_weights = [] 55 for file_path in profile_input_file_paths: 56 weight = 1 57 if weights: 58 for benchmark, w in weights.items(): 59 if file_path.endswith(benchmark): 60 weight = w 61 break 62 input_file_with_weight = file_path 63 if weight != 1: 64 input_file_with_weight = weight + ',' + file_path 65 input_files_with_weights.append(input_file_with_weight) 66 67 with open(input_file, 'w') as fd: 68 for f in input_files_with_weights: 69 fd.write('%s\n' % f) 70 71 logging.info('Contents of input-profdata-files.txt %s', 72 input_files_with_weights) 73 74 try: 75 subprocess_cmd = [ 76 profdata_tool_path, 77 'merge', 78 '-o', 79 profile_output_file_path, 80 ] 81 if sparse: 82 subprocess_cmd += [ 83 '-sparse=true', 84 ] 85 subprocess_cmd.extend(['-f', input_file]) 86 logging.info('profdata command: %r', subprocess_cmd) 87 88 # Redirecting stderr is required because when error happens, llvm-profdata 89 # writes the error output to stderr and our error handling logic relies on 90 # that output. stdout=None should print to console. 91 # Timeout in seconds, set to 1 hr (60*60) 92 p = subprocess.run(subprocess_cmd, 93 capture_output=True, 94 text=True, 95 timeout=timeout, 96 check=True) 97 logging.info(p.stdout) 98 except subprocess.CalledProcessError as error: 99 logging.info('stdout: %s', error.output) 100 logging.error('Failed to merge profiles, return code (%d), error: %r', 101 error.returncode, error.stderr) 102 raise error 103 except subprocess.TimeoutExpired as e: 104 logging.info('stdout: %s', e.output) 105 raise e 106 107 if show_profdata: 108 _call_profdata_show(profile_output_file_path, profdata_tool_path) 109 110 logging.info('Profile data is created as: "%r".', profile_output_file_path) 111 112 113def _call_profdata_show(profile_path, 114 profdata_tool_path, 115 topn=1000, 116 timeout=60): 117 """Calls the llvm-profdata show command. 118 119 Args: 120 profile_path: The path to the profdata file to show. 121 profdata_tool_path: The path to the llvm-profdata executable. 122 topn: Only show functions with the topn hottest basic blocks. 123 timeout (int): timeout (sec) for the call to show profiles. 124 """ 125 126 try: 127 subprocess_cmd = [ 128 profdata_tool_path, 129 'show', 130 '-topn', 131 str(topn), 132 profile_path, 133 ] 134 logging.info('profdata command: %r', subprocess_cmd) 135 136 p = subprocess.run(subprocess_cmd, 137 capture_output=True, 138 text=True, 139 timeout=timeout, 140 check=True) 141 logging.info(p.stdout) 142 except subprocess.CalledProcessError as error: 143 logging.info('stdout: %s', error.output) 144 logging.error('Failed to show profile, return code (%d), error: %r', 145 error.returncode, error.stderr) 146 except subprocess.TimeoutExpired as e: 147 logging.info('stdout: %s', e.output) 148 149 150def _get_profile_paths(input_dir, input_extension, input_filename_pattern='.*'): 151 """Finds all the profiles in the given directory (recursively).""" 152 paths = [] 153 for dir_path, _sub_dirs, file_names in os.walk(input_dir): 154 paths.extend([ 155 # Normalize to POSIX style paths for consistent results. 156 os.path.join(dir_path, fn).replace('\\', '/') for fn in file_names if 157 fn.endswith(input_extension) and re.search(input_filename_pattern, fn) 158 ]) 159 return paths 160 161 162def _validate_and_convert_profraws(profraw_files, 163 profdata_tool_path, 164 sparse=False): 165 """Validates and converts profraws to profdatas. 166 167 For each given .profraw file in the input, this method first validates it by 168 trying to convert it to an indexed .profdata file, and if the validation and 169 conversion succeeds, the generated .profdata file will be included in the 170 output, otherwise, won't. 171 172 This method is mainly used to filter out invalid profraw files. 173 174 Args: 175 profraw_files: A list of .profraw paths. 176 profdata_tool_path: The path to the llvm-profdata executable. 177 sparse (bool): flag to indicate whether to run llvm-profdata with --sparse. 178 Doc: https://llvm.org/docs/CommandGuide/llvm-profdata.html#profdata-merge 179 180 Returns: 181 A tuple: 182 A list of converted .profdata files of *valid* profraw files. 183 A list of *invalid* profraw files. 184 A list of profraw files that have counter overflows. 185 """ 186 for profraw_file in profraw_files: 187 if not profraw_file.endswith('.profraw'): 188 raise RuntimeError('%r is expected to be a .profraw file.' % profraw_file) 189 190 cpu_count = multiprocessing.cpu_count() 191 counts = max(10, cpu_count - 5) # Use 10+ processes, but leave 5 cpu cores. 192 if sys.platform == 'win32': 193 # TODO(crbug.com/40755900) - we can't use more than 56 child processes on 194 # Windows or Python3 may hang. 195 counts = min(counts, 56) 196 pool = multiprocessing.Pool(counts) 197 output_profdata_files = multiprocessing.Manager().list() 198 invalid_profraw_files = multiprocessing.Manager().list() 199 counter_overflows = multiprocessing.Manager().list() 200 201 results = [] 202 for profraw_file in profraw_files: 203 results.append( 204 pool.apply_async( 205 _validate_and_convert_profraw, 206 (profraw_file, output_profdata_files, invalid_profraw_files, 207 counter_overflows, profdata_tool_path, sparse))) 208 209 pool.close() 210 pool.join() 211 212 for x in results: 213 x.get() 214 215 # Remove inputs, as they won't be needed and they can be pretty large. 216 for input_file in profraw_files: 217 os.remove(input_file) 218 219 return list(output_profdata_files), list(invalid_profraw_files), list( 220 counter_overflows) 221 222 223def _validate_and_convert_profraw(profraw_file, 224 output_profdata_files, 225 invalid_profraw_files, 226 counter_overflows, 227 profdata_tool_path, 228 sparse=False, 229 show_profdata=True): 230 output_profdata_file = profraw_file.replace('.profraw', '.profdata') 231 subprocess_cmd = [ 232 profdata_tool_path, 233 'merge', 234 '-o', 235 output_profdata_file, 236 ] 237 if sparse: 238 subprocess_cmd.append('--sparse') 239 240 subprocess_cmd.append(profraw_file) 241 logging.info('profdata command: %r', subprocess_cmd) 242 243 profile_valid = False 244 counter_overflow = False 245 validation_output = None 246 247 # 1. Determine if the profile is valid. 248 try: 249 # Redirecting stderr is required because when error happens, llvm-profdata 250 # writes the error output to stderr and our error handling logic relies on 251 # that output. 252 validation_output = subprocess.check_output(subprocess_cmd, 253 stderr=subprocess.STDOUT, 254 encoding='UTF-8') 255 if 'Counter overflow' in validation_output: 256 counter_overflow = True 257 else: 258 profile_valid = True 259 except subprocess.CalledProcessError as error: 260 logging.warning('Validating and converting %r to %r failed with output: %r', 261 profraw_file, output_profdata_file, error.output) 262 validation_output = error.output 263 264 # 2. Add the profile to the appropriate list(s). 265 if profile_valid: 266 output_profdata_files.append(output_profdata_file) 267 else: 268 invalid_profraw_files.append(profraw_file) 269 if counter_overflow: 270 counter_overflows.append(profraw_file) 271 272 # 3. Log appropriate message 273 if not profile_valid: 274 template = 'Bad profile: %r, output: %r' 275 if counter_overflow: 276 template = 'Counter overflow: %r, output: %r' 277 logging.warning(template, profraw_file, validation_output) 278 279 # 4. Delete profdata for invalid profiles if present. 280 if os.path.exists(output_profdata_file): 281 # The output file may be created before llvm-profdata determines the 282 # input is invalid. Delete it so that it does not leak and affect other 283 # merge scripts. 284 os.remove(output_profdata_file) 285 286 # 5. Show profdata information. 287 if show_profdata: 288 _call_profdata_show(output_profdata_file, profdata_tool_path) 289 290 291def merge_java_exec_files(input_dir, output_path, jacococli_path): 292 """Merges generated .exec files to output_path. 293 294 Args: 295 input_dir (str): The path to traverse to find input files. 296 output_path (str): Where to write the merged .exec file. 297 jacococli_path: The path to jacococli.jar. 298 299 Raises: 300 CalledProcessError: merge command failed. 301 """ 302 exec_input_file_paths = _get_profile_paths(input_dir, '.exec') 303 if not exec_input_file_paths: 304 logging.info('No exec file found under %s', input_dir) 305 return 306 307 cmd = [_JAVA_PATH, '-jar', jacococli_path, 'merge'] 308 cmd.extend(exec_input_file_paths) 309 cmd.extend(['--destfile', output_path]) 310 subprocess.check_call(cmd, stderr=subprocess.STDOUT) 311 312 313def merge_profiles(input_dir, 314 output_file, 315 input_extension, 316 profdata_tool_path, 317 input_filename_pattern='.*', 318 sparse=False, 319 skip_validation=False, 320 merge_timeout=3600, 321 show_profdata=True, 322 weights=None): 323 """Merges the profiles produced by the shards using llvm-profdata. 324 325 Args: 326 input_dir (str): The path to traverse to find input profiles. 327 output_file (str): Where to write the merged profile. 328 input_extension (str): File extension to look for in the input_dir. 329 e.g. '.profdata' or '.profraw' 330 profdata_tool_path: The path to the llvm-profdata executable. 331 input_filename_pattern (str): The regex pattern of input filename. Should be 332 a valid regex pattern if present. 333 sparse (bool): flag to indicate whether to run llvm-profdata with --sparse. 334 Doc: https://llvm.org/docs/CommandGuide/llvm-profdata.html#profdata-merge 335 skip_validation (bool): flag to skip the _validate_and_convert_profraws 336 invocation. only applicable when input_extension is .profraw. 337 merge_timeout (int): timeout (sec) for the call to merge profiles. This 338 should not take > 1 hr, and so defaults to 3600 seconds. 339 weights (dict): maps from profdata file to weight. 340 341 Returns: 342 The list of profiles that had to be excluded to get the merge to 343 succeed and a list of profiles that had a counter overflow. 344 """ 345 profile_input_file_paths = _get_profile_paths(input_dir, input_extension, 346 input_filename_pattern) 347 invalid_profraw_files = [] 348 counter_overflows = [] 349 350 if skip_validation: 351 logging.warning('--skip-validation has been enabled. Skipping conversion ' 352 'to ensure that profiles are valid.') 353 354 if input_extension == '.profraw' and not skip_validation: 355 profile_input_file_paths, invalid_profraw_files, counter_overflows = ( 356 _validate_and_convert_profraws(profile_input_file_paths, 357 profdata_tool_path, 358 sparse=sparse)) 359 logging.info(( 360 'List of invalid .profraw files that failed to validate and convert: %r' 361 ), invalid_profraw_files) 362 363 if counter_overflows: 364 logging.warning('There were %d profiles with counter overflows', 365 len(counter_overflows)) 366 367 # The list of input files could be empty in the following scenarios: 368 # 1. The test target is pure Python scripts test which doesn't execute any 369 # C/C++ binaries, such as devtools_type_check. 370 # 2. The test target executes binary and does dumps coverage profile data 371 # files, however, all of them turned out to be invalid. 372 if not profile_input_file_paths: 373 logging.info('There is no valid profraw/profdata files to merge, skip ' 374 'invoking profdata tools.') 375 return invalid_profraw_files, counter_overflows 376 377 _call_profdata_tool(profile_input_file_paths=profile_input_file_paths, 378 profile_output_file_path=output_file, 379 profdata_tool_path=profdata_tool_path, 380 sparse=sparse, 381 timeout=merge_timeout, 382 show_profdata=show_profdata, 383 weights=weights) 384 385 # Remove inputs when merging profraws as they won't be needed and they can be 386 # pretty large. If the inputs are profdata files, do not remove them as they 387 # might be used again for multiple test types coverage. 388 if input_extension == '.profraw': 389 for input_file in profile_input_file_paths: 390 os.remove(input_file) 391 392 return invalid_profraw_files, counter_overflows 393 394 395# We want to retry shards that contain one or more profiles that cannot be 396# merged (typically due to corruption described in crbug.com/937521). 397def get_shards_to_retry(bad_profiles): 398 bad_shard_ids = set() 399 400 def is_task_id(s): 401 # Swarming task ids are 16 hex chars. The pythonic way to validate this is 402 # to cast to int and catch a value error. 403 try: 404 assert len(s) == 16, 'Swarming task IDs are expected be of length 16' 405 _int_id = int(s, 16) 406 return True 407 except (AssertionError, ValueError): 408 return False 409 410 for profile in bad_profiles: 411 # E.g. /b/s/w/ir/tmp/t/tmpSvBRii/44b643576cf39f10/profraw/default-1.profraw 412 _base_path, task_id, _profraw, _filename = os.path.normpath(profile).rsplit( 413 os.path.sep, 3) 414 # Since we are getting a task_id from a file path, which is less than ideal, 415 # do some checking to at least verify that the snippet looks like a valid 416 # task id. 417 assert is_task_id(task_id) 418 bad_shard_ids.add(task_id) 419 return bad_shard_ids 420