• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/env/python
2# Copyright 2020 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5"""Functions for interacting with llvm-profdata
6
7This script is taken from the chromium build tools and is synced
8manually on an as-needed basis:
9https://source.chromium.org/chromium/chromium/src/+/master:testing/merge_scripts/code_coverage/merge_lib.py
10"""
11
12import logging
13import multiprocessing
14import os
15import re
16import shutil
17import subprocess
18
19_DIR_SOURCE_ROOT = os.path.normpath(
20    os.path.join(os.path.dirname(__file__), '..', '..', '..'))
21
22_JAVA_PATH = os.path.join(_DIR_SOURCE_ROOT, 'third_party', 'jdk', 'current',
23                          'bin', 'java')
24
25logging.basicConfig(
26    format='[%(asctime)s %(levelname)s] %(message)s', level=logging.DEBUG)
27
28
29def _call_profdata_tool(profile_input_file_paths,
30                        profile_output_file_path,
31                        profdata_tool_path,
32                        sparse=True):
33  """Calls the llvm-profdata tool.
34
35  Args:
36    profile_input_file_paths: A list of relative paths to the files that
37        are to be merged.
38    profile_output_file_path: The path to the merged file to write.
39    profdata_tool_path: The path to the llvm-profdata executable.
40    sparse (bool): flag to indicate whether to run llvm-profdata with --sparse.
41      Doc: https://llvm.org/docs/CommandGuide/llvm-profdata.html#profdata-merge
42
43  Returns:
44    A list of paths to profiles that had to be excluded to get the merge to
45    succeed, suspected of being corrupted or malformed.
46
47  Raises:
48    CalledProcessError: An error occurred merging profiles.
49  """
50  logging.debug('Profile input paths: %r' % profile_input_file_paths)
51  logging.debug('Profile output path: %r' % profile_output_file_path)
52  try:
53    subprocess_cmd = [
54        profdata_tool_path, 'merge', '-o', profile_output_file_path,
55    ]
56    if sparse:
57      subprocess_cmd += ['-sparse=true',]
58    subprocess_cmd.extend(profile_input_file_paths)
59    logging.info('profdata command: %r', ' '.join(subprocess_cmd))
60
61    # Redirecting stderr is required because when error happens, llvm-profdata
62    # writes the error output to stderr and our error handling logic relies on
63    # that output.
64    output = subprocess.check_output(subprocess_cmd, stderr=subprocess.STDOUT)
65    logging.info('Merge succeeded with output: %r', output)
66  except subprocess.CalledProcessError as error:
67    logging.error('Failed to merge profiles, return code (%d), output: %r' %
68                  (error.returncode, error.output))
69    raise error
70
71  logging.info('Profile data is created as: "%r".', profile_output_file_path)
72  return []
73
74
75def _get_profile_paths(input_dir,
76                       input_extension,
77                       input_filename_pattern='.*'):
78  """Finds all the profiles in the given directory (recursively)."""
79  paths = []
80  for dir_path, _sub_dirs, file_names in os.walk(input_dir):
81    paths.extend([
82        os.path.join(dir_path, fn)
83        for fn in file_names
84        if fn.endswith(input_extension) and re.search(input_filename_pattern,fn)
85    ])
86  return paths
87
88
89def _validate_and_convert_profraws(profraw_files,
90                                   profdata_tool_path,
91                                   sparse=True):
92  """Validates and converts profraws to profdatas.
93
94  For each given .profraw file in the input, this method first validates it by
95  trying to convert it to an indexed .profdata file, and if the validation and
96  conversion succeeds, the generated .profdata file will be included in the
97  output, otherwise, won't.
98
99  This method is mainly used to filter out invalid profraw files.
100
101  Args:
102    profraw_files: A list of .profraw paths.
103    profdata_tool_path: The path to the llvm-profdata executable.
104    sparse (bool): flag to indicate whether to run llvm-profdata with --sparse.
105      Doc: https://llvm.org/docs/CommandGuide/llvm-profdata.html#profdata-merge
106
107  Returns:
108    A tulple:
109      A list of converted .profdata files of *valid* profraw files.
110      A list of *invalid* profraw files.
111      A list of profraw files that have counter overflows.
112  """
113  for profraw_file in profraw_files:
114    if not profraw_file.endswith('.profraw'):
115      raise RuntimeError('%r is expected to be a .profraw file.' % profraw_file)
116
117  cpu_count = multiprocessing.cpu_count()
118  counts = max(10, cpu_count - 5)  # Use 10+ processes, but leave 5 cpu cores.
119  pool = multiprocessing.Pool(counts)
120  output_profdata_files = multiprocessing.Manager().list()
121  invalid_profraw_files = multiprocessing.Manager().list()
122  counter_overflows = multiprocessing.Manager().list()
123
124  for profraw_file in profraw_files:
125    logging.info('Converting profraw file: %r', profraw_file)
126    pool.apply_async(
127        _validate_and_convert_profraw,
128        (profraw_file, output_profdata_files, invalid_profraw_files,
129         counter_overflows, profdata_tool_path, sparse))
130
131  pool.close()
132  pool.join()
133
134  # Remove inputs, as they won't be needed and they can be pretty large.
135  for input_file in profraw_files:
136    os.remove(input_file)
137
138  return list(output_profdata_files), list(invalid_profraw_files), list(
139      counter_overflows)
140
141
142def _validate_and_convert_profraw(profraw_file, output_profdata_files,
143                                  invalid_profraw_files, counter_overflows,
144                                  profdata_tool_path, sparse=True):
145  output_profdata_file = profraw_file.replace('.profraw', '.profdata')
146  subprocess_cmd = [
147      profdata_tool_path,
148      'merge',
149      '-o',
150      output_profdata_file,
151  ]
152  if sparse:
153    subprocess_cmd.append('--sparse')
154
155  subprocess_cmd.append(profraw_file)
156
157  profile_valid = False
158  counter_overflow = False
159  validation_output = None
160
161  logging.info('profdata command: %r', ' '.join(subprocess_cmd))
162
163  # 1. Determine if the profile is valid.
164  try:
165    # Redirecting stderr is required because when error happens, llvm-profdata
166    # writes the error output to stderr and our error handling logic relies on
167    # that output.
168    logging.info('Converting %r to %r', profraw_file, output_profdata_file)
169    validation_output = subprocess.check_output(
170        subprocess_cmd, stderr=subprocess.STDOUT)
171    logging.info('Validating and converting %r to %r succeeded with output: %r',
172                 profraw_file, output_profdata_file, validation_output)
173    if 'Counter overflow' in validation_output:
174      counter_overflow = True
175    else:
176      profile_valid = True
177  except subprocess.CalledProcessError as error:
178    logging.warning('Validating and converting %r to %r failed with output: %r',
179                    profraw_file, output_profdata_file, error.output)
180    validation_output = error.output
181
182  # 2. Add the profile to the appropriate list(s).
183  if profile_valid:
184    output_profdata_files.append(output_profdata_file)
185  else:
186    invalid_profraw_files.append(profraw_file)
187    if counter_overflow:
188      counter_overflows.append(profraw_file)
189
190  # 3. Log appropriate message
191  if not profile_valid:
192    template = 'Bad profile: %r, output: %r'
193    if counter_overflow:
194      template = 'Counter overflow: %r, output: %r'
195    logging.warning(template, profraw_file, validation_output)
196
197    # 4. Delete profdata for invalid profiles if present.
198    if os.path.exists(output_profdata_file):
199      # The output file may be created before llvm-profdata determines the
200      # input is invalid. Delete it so that it does not leak and affect other
201      # merge scripts.
202      os.remove(output_profdata_file)
203
204def merge_java_exec_files(input_dir, output_path, jacococli_path):
205  """Merges generated .exec files to output_path.
206
207  Args:
208    input_dir (str): The path to traverse to find input files.
209    output_path (str): Where to write the merged .exec file.
210    jacococli_path: The path to jacococli.jar.
211
212  Raises:
213    CalledProcessError: merge command failed.
214  """
215  exec_input_file_paths = _get_profile_paths(input_dir, '.exec')
216  if not exec_input_file_paths:
217    logging.info('No exec file found under %s', input_dir)
218    return
219
220  cmd = [_JAVA_PATH, '-jar', jacococli_path, 'merge']
221  cmd.extend(exec_input_file_paths)
222  cmd.extend(['--destfile', output_path])
223  output = subprocess.check_output(cmd, stderr=subprocess.STDOUT)
224  logging.info('Merge succeeded with output: %r', output)
225
226
227def merge_profiles(input_dir,
228                   output_file,
229                   input_extension,
230                   profdata_tool_path,
231                   input_filename_pattern='.*',
232                   sparse=True,
233                   skip_validation=False):
234  """Merges the profiles produced by the shards using llvm-profdata.
235
236  Args:
237    input_dir (str): The path to traverse to find input profiles.
238    output_file (str): Where to write the merged profile.
239    input_extension (str): File extension to look for in the input_dir.
240        e.g. '.profdata' or '.profraw'
241    profdata_tool_path: The path to the llvm-profdata executable.
242    input_filename_pattern (str): The regex pattern of input filename. Should be
243        a valid regex pattern if present.
244    sparse (bool): flag to indicate whether to run llvm-profdata with --sparse.
245      Doc: https://llvm.org/docs/CommandGuide/llvm-profdata.html#profdata-merge
246    skip_validation (bool): flag to skip the _validate_and_convert_profraws
247        invocation. only applicable when input_extension is .profraw.
248
249  Returns:
250    The list of profiles that had to be excluded to get the merge to
251    succeed and a list of profiles that had a counter overflow.
252  """
253  profile_input_file_paths = _get_profile_paths(input_dir,
254                                                input_extension,
255                                                input_filename_pattern)
256  invalid_profraw_files = []
257  counter_overflows = []
258
259  if skip_validation:
260    logging.warning('--skip-validation has been enabled. Skipping conversion '
261                    'to ensure that profiles are valid.')
262
263  if input_extension == '.profraw' and not skip_validation:
264    profile_input_file_paths, invalid_profraw_files, counter_overflows = (
265        _validate_and_convert_profraws(profile_input_file_paths,
266                                       profdata_tool_path,
267                                       sparse=sparse))
268    logging.info('List of converted .profdata files: %r',
269                 profile_input_file_paths)
270    logging.info((
271        'List of invalid .profraw files that failed to validate and convert: %r'
272    ), invalid_profraw_files)
273
274    if counter_overflows:
275      logging.warning('There were %d profiles with counter overflows',
276                      len(counter_overflows))
277
278  # The list of input files could be empty in the following scenarios:
279  # 1. The test target is pure Python scripts test which doesn't execute any
280  #    C/C++ binaries, such as devtools_type_check.
281  # 2. The test target executes binary and does dumps coverage profile data
282  #    files, however, all of them turned out to be invalid.
283  if not profile_input_file_paths:
284    logging.info('There is no valid profraw/profdata files to merge, skip '
285                 'invoking profdata tools.')
286    return invalid_profraw_files, counter_overflows
287
288  invalid_profdata_files = _call_profdata_tool(
289      profile_input_file_paths=profile_input_file_paths,
290      profile_output_file_path=output_file,
291      profdata_tool_path=profdata_tool_path,
292      sparse=sparse)
293
294  # Remove inputs when merging profraws as they won't be needed and they can be
295  # pretty large. If the inputs are profdata files, do not remove them as they
296  # might be used again for multiple test types coverage.
297  if input_extension == '.profraw':
298    for input_file in profile_input_file_paths:
299      os.remove(input_file)
300
301  return invalid_profraw_files + invalid_profdata_files, counter_overflows
302
303# We want to retry shards that contain one or more profiles that cannot be
304# merged (typically due to corruption described in crbug.com/937521).
305def get_shards_to_retry(bad_profiles):
306  bad_shard_ids = set()
307
308  def is_task_id(s):
309    # Swarming task ids are 16 hex chars. The pythonic way to validate this is
310    # to cast to int and catch a value error.
311    try:
312      assert len(s) == 16, 'Swarming task IDs are expected be of length 16'
313      _int_id = int(s, 16)
314      return True
315    except (AssertionError, ValueError):
316      return False
317
318  for profile in bad_profiles:
319    # E.g. /b/s/w/ir/tmp/t/tmpSvBRii/44b643576cf39f10/profraw/default-1.profraw
320    _base_path, task_id, _profraw, _filename = os.path.normpath(profile).rsplit(
321        os.path.sep, 3)
322    # Since we are getting a task_id from a file path, which is less than ideal,
323    # do some checking to at least verify that the snippet looks like a valid
324    # task id.
325    assert is_task_id(task_id)
326    bad_shard_ids.add(task_id)
327  return bad_shard_ids
328
329