• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright 2019 The Chromium Authors
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4"""Functions for interacting with llvm-profdata"""
5
6import logging
7import multiprocessing
8import os
9import re
10import shutil
11import subprocess
12import sys
13
14_DIR_SOURCE_ROOT = os.path.normpath(
15    os.path.join(os.path.dirname(__file__), '..', '..', '..'))
16
17_JAVA_PATH = os.path.join(_DIR_SOURCE_ROOT, 'third_party', 'jdk', 'current',
18                          'bin', 'java')
19
20logging.basicConfig(
21    format='[%(asctime)s %(levelname)s] %(message)s', level=logging.DEBUG)
22
23
24def _call_profdata_tool(profile_input_file_paths,
25                        profile_output_file_path,
26                        profdata_tool_path,
27                        sparse=False,
28                        timeout=3600):
29  """Calls the llvm-profdata tool.
30
31  Args:
32    profile_input_file_paths: A list of relative paths to the files that
33        are to be merged.
34    profile_output_file_path: The path to the merged file to write.
35    profdata_tool_path: The path to the llvm-profdata executable.
36    sparse (bool): flag to indicate whether to run llvm-profdata with --sparse.
37      Doc: https://llvm.org/docs/CommandGuide/llvm-profdata.html#profdata-merge
38    timeout (int): timeout (sec) for the call to merge profiles. This should
39      not take > 1 hr, and so defaults to 3600 seconds.
40
41  Raises:
42    CalledProcessError: An error occurred merging profiles.
43  """
44  try:
45    subprocess_cmd = [
46        profdata_tool_path, 'merge', '-o', profile_output_file_path,
47    ]
48    if sparse:
49      subprocess_cmd += ['-sparse=true',]
50    subprocess_cmd.extend(profile_input_file_paths)
51    logging.info('profdata command: %r', subprocess_cmd)
52
53    # Redirecting stderr is required because when error happens, llvm-profdata
54    # writes the error output to stderr and our error handling logic relies on
55    # that output. stdout=None should print to console.
56    # Timeout in seconds, set to 1 hr (60*60)
57    p = subprocess.run(subprocess_cmd,
58                        capture_output=True,
59                        text=True,
60                        timeout=timeout,
61                        check=True)
62    logging.info(p.stdout)
63  except subprocess.CalledProcessError as error:
64    logging.info('stdout: %s' % error.output)
65    logging.error('Failed to merge profiles, return code (%d), error: %r' %
66                  (error.returncode, error.stderr))
67    raise error
68  except subprocess.TimeoutExpired as e:
69    logging.info('stdout: %s' % e.output)
70    raise e
71
72  logging.info('Profile data is created as: "%r".', profile_output_file_path)
73
74
75def _get_profile_paths(input_dir,
76                       input_extension,
77                       input_filename_pattern='.*'):
78  """Finds all the profiles in the given directory (recursively)."""
79  paths = []
80  for dir_path, _sub_dirs, file_names in os.walk(input_dir):
81    paths.extend([
82        # Normalize to POSIX style paths for consistent results.
83        os.path.join(dir_path, fn).replace('\\', '/')
84        for fn in file_names
85        if fn.endswith(input_extension) and re.search(input_filename_pattern,fn)
86    ])
87  return paths
88
89
90def _validate_and_convert_profraws(profraw_files,
91                                   profdata_tool_path,
92                                   sparse=False):
93  """Validates and converts profraws to profdatas.
94
95  For each given .profraw file in the input, this method first validates it by
96  trying to convert it to an indexed .profdata file, and if the validation and
97  conversion succeeds, the generated .profdata file will be included in the
98  output, otherwise, won't.
99
100  This method is mainly used to filter out invalid profraw files.
101
102  Args:
103    profraw_files: A list of .profraw paths.
104    profdata_tool_path: The path to the llvm-profdata executable.
105    sparse (bool): flag to indicate whether to run llvm-profdata with --sparse.
106      Doc: https://llvm.org/docs/CommandGuide/llvm-profdata.html#profdata-merge
107
108  Returns:
109    A tuple:
110      A list of converted .profdata files of *valid* profraw files.
111      A list of *invalid* profraw files.
112      A list of profraw files that have counter overflows.
113  """
114  for profraw_file in profraw_files:
115    if not profraw_file.endswith('.profraw'):
116      raise RuntimeError('%r is expected to be a .profraw file.' % profraw_file)
117
118  cpu_count = multiprocessing.cpu_count()
119  counts = max(10, cpu_count - 5)  # Use 10+ processes, but leave 5 cpu cores.
120  if sys.platform == 'win32':
121    # TODO(crbug.com/1190269) - we can't use more than 56 child processes on
122    # Windows or Python3 may hang.
123    counts = min(counts, 56)
124  pool = multiprocessing.Pool(counts)
125  output_profdata_files = multiprocessing.Manager().list()
126  invalid_profraw_files = multiprocessing.Manager().list()
127  counter_overflows = multiprocessing.Manager().list()
128
129  results = []
130  for profraw_file in profraw_files:
131    results.append(pool.apply_async(
132      _validate_and_convert_profraw,
133      (profraw_file, output_profdata_files, invalid_profraw_files,
134        counter_overflows, profdata_tool_path, sparse)))
135
136  pool.close()
137  pool.join()
138
139  for x in results:
140    x.get()
141
142  # Remove inputs, as they won't be needed and they can be pretty large.
143  for input_file in profraw_files:
144    os.remove(input_file)
145
146  return list(output_profdata_files), list(invalid_profraw_files), list(
147      counter_overflows)
148
149
150def _validate_and_convert_profraw(profraw_file, output_profdata_files,
151                                  invalid_profraw_files, counter_overflows,
152                                  profdata_tool_path, sparse=False):
153  output_profdata_file = profraw_file.replace('.profraw', '.profdata')
154  subprocess_cmd = [
155      profdata_tool_path,
156      'merge',
157      '-o',
158      output_profdata_file,
159  ]
160  if sparse:
161    subprocess_cmd.append('--sparse')
162
163  subprocess_cmd.append(profraw_file)
164  logging.info('profdata command: %r', subprocess_cmd)
165
166  profile_valid = False
167  counter_overflow = False
168  validation_output = None
169
170  # 1. Determine if the profile is valid.
171  try:
172    # Redirecting stderr is required because when error happens, llvm-profdata
173    # writes the error output to stderr and our error handling logic relies on
174    # that output.
175    validation_output = subprocess.check_output(
176        subprocess_cmd, stderr=subprocess.STDOUT, encoding = 'UTF-8')
177    if 'Counter overflow' in validation_output:
178      counter_overflow = True
179    else:
180      profile_valid = True
181  except subprocess.CalledProcessError as error:
182    logging.warning('Validating and converting %r to %r failed with output: %r',
183                    profraw_file, output_profdata_file, error.output)
184    validation_output = error.output
185
186  # 2. Add the profile to the appropriate list(s).
187  if profile_valid:
188    output_profdata_files.append(output_profdata_file)
189  else:
190    invalid_profraw_files.append(profraw_file)
191    if counter_overflow:
192      counter_overflows.append(profraw_file)
193
194  # 3. Log appropriate message
195  if not profile_valid:
196    template = 'Bad profile: %r, output: %r'
197    if counter_overflow:
198      template = 'Counter overflow: %r, output: %r'
199    logging.warning(template, profraw_file, validation_output)
200
201    # 4. Delete profdata for invalid profiles if present.
202    if os.path.exists(output_profdata_file):
203      # The output file may be created before llvm-profdata determines the
204      # input is invalid. Delete it so that it does not leak and affect other
205      # merge scripts.
206      os.remove(output_profdata_file)
207
208def merge_java_exec_files(input_dir, output_path, jacococli_path):
209  """Merges generated .exec files to output_path.
210
211  Args:
212    input_dir (str): The path to traverse to find input files.
213    output_path (str): Where to write the merged .exec file.
214    jacococli_path: The path to jacococli.jar.
215
216  Raises:
217    CalledProcessError: merge command failed.
218  """
219  exec_input_file_paths = _get_profile_paths(input_dir, '.exec')
220  if not exec_input_file_paths:
221    logging.info('No exec file found under %s', input_dir)
222    return
223
224  cmd = [_JAVA_PATH, '-jar', jacococli_path, 'merge']
225  cmd.extend(exec_input_file_paths)
226  cmd.extend(['--destfile', output_path])
227  subprocess.check_call(cmd, stderr=subprocess.STDOUT)
228
229
230def merge_profiles(input_dir,
231                   output_file,
232                   input_extension,
233                   profdata_tool_path,
234                   input_filename_pattern='.*',
235                   sparse=False,
236                   skip_validation=False,
237                   merge_timeout=3600):
238  """Merges the profiles produced by the shards using llvm-profdata.
239
240  Args:
241    input_dir (str): The path to traverse to find input profiles.
242    output_file (str): Where to write the merged profile.
243    input_extension (str): File extension to look for in the input_dir.
244        e.g. '.profdata' or '.profraw'
245    profdata_tool_path: The path to the llvm-profdata executable.
246    input_filename_pattern (str): The regex pattern of input filename. Should be
247        a valid regex pattern if present.
248    sparse (bool): flag to indicate whether to run llvm-profdata with --sparse.
249      Doc: https://llvm.org/docs/CommandGuide/llvm-profdata.html#profdata-merge
250    skip_validation (bool): flag to skip the _validate_and_convert_profraws
251        invocation. only applicable when input_extension is .profraw.
252    merge_timeout (int): timeout (sec) for the call to merge profiles. This
253      should not take > 1 hr, and so defaults to 3600 seconds.
254
255  Returns:
256    The list of profiles that had to be excluded to get the merge to
257    succeed and a list of profiles that had a counter overflow.
258  """
259  profile_input_file_paths = _get_profile_paths(input_dir,
260                                                input_extension,
261                                                input_filename_pattern)
262  invalid_profraw_files = []
263  counter_overflows = []
264
265  if skip_validation:
266    logging.warning('--skip-validation has been enabled. Skipping conversion '
267                    'to ensure that profiles are valid.')
268
269  if input_extension == '.profraw' and not skip_validation:
270    profile_input_file_paths, invalid_profraw_files, counter_overflows = (
271        _validate_and_convert_profraws(profile_input_file_paths,
272                                       profdata_tool_path,
273                                       sparse=sparse))
274    logging.info((
275        'List of invalid .profraw files that failed to validate and convert: %r'
276    ), invalid_profraw_files)
277
278    if counter_overflows:
279      logging.warning('There were %d profiles with counter overflows',
280                      len(counter_overflows))
281
282  # The list of input files could be empty in the following scenarios:
283  # 1. The test target is pure Python scripts test which doesn't execute any
284  #    C/C++ binaries, such as devtools_type_check.
285  # 2. The test target executes binary and does dumps coverage profile data
286  #    files, however, all of them turned out to be invalid.
287  if not profile_input_file_paths:
288    logging.info('There is no valid profraw/profdata files to merge, skip '
289                 'invoking profdata tools.')
290    return invalid_profraw_files, counter_overflows
291
292  _call_profdata_tool(
293      profile_input_file_paths=profile_input_file_paths,
294      profile_output_file_path=output_file,
295      profdata_tool_path=profdata_tool_path,
296      sparse=sparse,
297      timeout=merge_timeout)
298
299  # Remove inputs when merging profraws as they won't be needed and they can be
300  # pretty large. If the inputs are profdata files, do not remove them as they
301  # might be used again for multiple test types coverage.
302  if input_extension == '.profraw':
303    for input_file in profile_input_file_paths:
304      os.remove(input_file)
305
306  return invalid_profraw_files, counter_overflows
307
308# We want to retry shards that contain one or more profiles that cannot be
309# merged (typically due to corruption described in crbug.com/937521).
310def get_shards_to_retry(bad_profiles):
311  bad_shard_ids = set()
312
313  def is_task_id(s):
314    # Swarming task ids are 16 hex chars. The pythonic way to validate this is
315    # to cast to int and catch a value error.
316    try:
317      assert len(s) == 16, 'Swarming task IDs are expected be of length 16'
318      _int_id = int(s, 16)
319      return True
320    except (AssertionError, ValueError):
321      return False
322
323  for profile in bad_profiles:
324    # E.g. /b/s/w/ir/tmp/t/tmpSvBRii/44b643576cf39f10/profraw/default-1.profraw
325    _base_path, task_id, _profraw, _filename = os.path.normpath(profile).rsplit(
326        os.path.sep, 3)
327    # Since we are getting a task_id from a file path, which is less than ideal,
328    # do some checking to at least verify that the snippet looks like a valid
329    # task id.
330    assert is_task_id(task_id)
331    bad_shard_ids.add(task_id)
332  return bad_shard_ids
333