• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright 2019 The Chromium Authors
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4"""Functions for interacting with llvm-profdata"""
5
6import logging
7import multiprocessing
8import os
9import re
10import subprocess
11import sys
12
13_DIR_SOURCE_ROOT = os.path.normpath(
14    os.path.join(os.path.dirname(__file__), '..', '..', '..'))
15
16_JAVA_PATH = os.path.join(_DIR_SOURCE_ROOT, 'third_party', 'jdk', 'current',
17                          'bin', 'java')
18
19logging.basicConfig(format='[%(asctime)s %(levelname)s] %(message)s',
20                    level=logging.DEBUG)
21
22
23def _call_profdata_tool(profile_input_file_paths,
24                        profile_output_file_path,
25                        profdata_tool_path,
26                        sparse=False,
27                        timeout=3600,
28                        show_profdata=True,
29                        weights=None):
30  """Calls the llvm-profdata tool.
31
32  Args:
33    profile_input_file_paths: A list of relative paths to the files that
34        are to be merged.
35    profile_output_file_path: The path to the merged file to write.
36    profdata_tool_path: The path to the llvm-profdata executable.
37    sparse (bool): flag to indicate whether to run llvm-profdata with --sparse.
38      Doc: https://llvm.org/docs/CommandGuide/llvm-profdata.html#profdata-merge
39    timeout (int): timeout (sec) for the call to merge profiles. This should
40      not take > 1 hr, and so defaults to 3600 seconds.
41    show_profdata (bool): flag on whether the merged output information should
42    be shown for debugging purposes.
43    weights (dict): maps from benchmark name to weight.
44
45  Raises:
46    CalledProcessError: An error occurred merging profiles.
47  """
48  # There might be too many files in input and argument limit might be
49  # violated, so make the tool read a list of paths from a file.
50  output_dir = os.path.dirname(profile_output_file_path)
51  # Normalize to POSIX style paths for consistent results.
52  input_file = os.path.join(output_dir,
53                            'input-profdata-files.txt').replace('\\', '/')
54  input_files_with_weights = []
55  for file_path in profile_input_file_paths:
56    weight = 1
57    if weights:
58      for benchmark, w in weights.items():
59        if file_path.endswith(benchmark):
60          weight = w
61          break
62    input_file_with_weight = file_path
63    if weight != 1:
64      input_file_with_weight = weight + ',' + file_path
65    input_files_with_weights.append(input_file_with_weight)
66
67  with open(input_file, 'w') as fd:
68    for f in input_files_with_weights:
69      fd.write('%s\n' % f)
70
71  logging.info('Contents of input-profdata-files.txt %s',
72               input_files_with_weights)
73
74  try:
75    subprocess_cmd = [
76        profdata_tool_path,
77        'merge',
78        '-o',
79        profile_output_file_path,
80    ]
81    if sparse:
82      subprocess_cmd += [
83          '-sparse=true',
84      ]
85    subprocess_cmd.extend(['-f', input_file])
86    logging.info('profdata command: %r', subprocess_cmd)
87
88    # Redirecting stderr is required because when error happens, llvm-profdata
89    # writes the error output to stderr and our error handling logic relies on
90    # that output. stdout=None should print to console.
91    # Timeout in seconds, set to 1 hr (60*60)
92    p = subprocess.run(subprocess_cmd,
93                       capture_output=True,
94                       text=True,
95                       timeout=timeout,
96                       check=True)
97    logging.info(p.stdout)
98  except subprocess.CalledProcessError as error:
99    logging.info('stdout: %s', error.output)
100    logging.error('Failed to merge profiles, return code (%d), error: %r',
101                  error.returncode, error.stderr)
102    raise error
103  except subprocess.TimeoutExpired as e:
104    logging.info('stdout: %s', e.output)
105    raise e
106
107  if show_profdata:
108    _call_profdata_show(profile_output_file_path, profdata_tool_path)
109
110  logging.info('Profile data is created as: "%r".', profile_output_file_path)
111
112
113def _call_profdata_show(profile_path,
114                        profdata_tool_path,
115                        topn=1000,
116                        timeout=60):
117  """Calls the llvm-profdata show command.
118
119  Args:
120    profile_path: The path to the profdata file to show.
121    profdata_tool_path: The path to the llvm-profdata executable.
122    topn: Only show functions with the topn hottest basic blocks.
123    timeout (int): timeout (sec) for the call to show profiles.
124  """
125
126  try:
127    subprocess_cmd = [
128        profdata_tool_path,
129        'show',
130        '-topn',
131        str(topn),
132        profile_path,
133    ]
134    logging.info('profdata command: %r', subprocess_cmd)
135
136    p = subprocess.run(subprocess_cmd,
137                       capture_output=True,
138                       text=True,
139                       timeout=timeout,
140                       check=True)
141    logging.info(p.stdout)
142  except subprocess.CalledProcessError as error:
143    logging.info('stdout: %s', error.output)
144    logging.error('Failed to show profile, return code (%d), error: %r',
145                  error.returncode, error.stderr)
146  except subprocess.TimeoutExpired as e:
147    logging.info('stdout: %s', e.output)
148
149
150def _get_profile_paths(input_dir, input_extension, input_filename_pattern='.*'):
151  """Finds all the profiles in the given directory (recursively)."""
152  paths = []
153  for dir_path, _sub_dirs, file_names in os.walk(input_dir):
154    paths.extend([
155        # Normalize to POSIX style paths for consistent results.
156        os.path.join(dir_path, fn).replace('\\', '/') for fn in file_names if
157        fn.endswith(input_extension) and re.search(input_filename_pattern, fn)
158    ])
159  return paths
160
161
162def _validate_and_convert_profraws(profraw_files,
163                                   profdata_tool_path,
164                                   sparse=False):
165  """Validates and converts profraws to profdatas.
166
167  For each given .profraw file in the input, this method first validates it by
168  trying to convert it to an indexed .profdata file, and if the validation and
169  conversion succeeds, the generated .profdata file will be included in the
170  output, otherwise, won't.
171
172  This method is mainly used to filter out invalid profraw files.
173
174  Args:
175    profraw_files: A list of .profraw paths.
176    profdata_tool_path: The path to the llvm-profdata executable.
177    sparse (bool): flag to indicate whether to run llvm-profdata with --sparse.
178      Doc: https://llvm.org/docs/CommandGuide/llvm-profdata.html#profdata-merge
179
180  Returns:
181    A tuple:
182      A list of converted .profdata files of *valid* profraw files.
183      A list of *invalid* profraw files.
184      A list of profraw files that have counter overflows.
185  """
186  for profraw_file in profraw_files:
187    if not profraw_file.endswith('.profraw'):
188      raise RuntimeError('%r is expected to be a .profraw file.' % profraw_file)
189
190  cpu_count = multiprocessing.cpu_count()
191  counts = max(10, cpu_count - 5)  # Use 10+ processes, but leave 5 cpu cores.
192  if sys.platform == 'win32':
193    # TODO(crbug.com/40755900) - we can't use more than 56 child processes on
194    # Windows or Python3 may hang.
195    counts = min(counts, 56)
196  pool = multiprocessing.Pool(counts)
197  output_profdata_files = multiprocessing.Manager().list()
198  invalid_profraw_files = multiprocessing.Manager().list()
199  counter_overflows = multiprocessing.Manager().list()
200
201  results = []
202  for profraw_file in profraw_files:
203    results.append(
204        pool.apply_async(
205            _validate_and_convert_profraw,
206            (profraw_file, output_profdata_files, invalid_profraw_files,
207             counter_overflows, profdata_tool_path, sparse)))
208
209  pool.close()
210  pool.join()
211
212  for x in results:
213    x.get()
214
215  # Remove inputs, as they won't be needed and they can be pretty large.
216  for input_file in profraw_files:
217    os.remove(input_file)
218
219  return list(output_profdata_files), list(invalid_profraw_files), list(
220      counter_overflows)
221
222
223def _validate_and_convert_profraw(profraw_file,
224                                  output_profdata_files,
225                                  invalid_profraw_files,
226                                  counter_overflows,
227                                  profdata_tool_path,
228                                  sparse=False,
229                                  show_profdata=True):
230  output_profdata_file = profraw_file.replace('.profraw', '.profdata')
231  subprocess_cmd = [
232      profdata_tool_path,
233      'merge',
234      '-o',
235      output_profdata_file,
236  ]
237  if sparse:
238    subprocess_cmd.append('--sparse')
239
240  subprocess_cmd.append(profraw_file)
241  logging.info('profdata command: %r', subprocess_cmd)
242
243  profile_valid = False
244  counter_overflow = False
245  validation_output = None
246
247  # 1. Determine if the profile is valid.
248  try:
249    # Redirecting stderr is required because when error happens, llvm-profdata
250    # writes the error output to stderr and our error handling logic relies on
251    # that output.
252    validation_output = subprocess.check_output(subprocess_cmd,
253                                                stderr=subprocess.STDOUT,
254                                                encoding='UTF-8')
255    if 'Counter overflow' in validation_output:
256      counter_overflow = True
257    else:
258      profile_valid = True
259  except subprocess.CalledProcessError as error:
260    logging.warning('Validating and converting %r to %r failed with output: %r',
261                    profraw_file, output_profdata_file, error.output)
262    validation_output = error.output
263
264  # 2. Add the profile to the appropriate list(s).
265  if profile_valid:
266    output_profdata_files.append(output_profdata_file)
267  else:
268    invalid_profraw_files.append(profraw_file)
269    if counter_overflow:
270      counter_overflows.append(profraw_file)
271
272  # 3. Log appropriate message
273  if not profile_valid:
274    template = 'Bad profile: %r, output: %r'
275    if counter_overflow:
276      template = 'Counter overflow: %r, output: %r'
277    logging.warning(template, profraw_file, validation_output)
278
279    # 4. Delete profdata for invalid profiles if present.
280    if os.path.exists(output_profdata_file):
281      # The output file may be created before llvm-profdata determines the
282      # input is invalid. Delete it so that it does not leak and affect other
283      # merge scripts.
284      os.remove(output_profdata_file)
285
286  # 5. Show profdata information.
287  if show_profdata:
288    _call_profdata_show(output_profdata_file, profdata_tool_path)
289
290
291def merge_java_exec_files(input_dir, output_path, jacococli_path):
292  """Merges generated .exec files to output_path.
293
294  Args:
295    input_dir (str): The path to traverse to find input files.
296    output_path (str): Where to write the merged .exec file.
297    jacococli_path: The path to jacococli.jar.
298
299  Raises:
300    CalledProcessError: merge command failed.
301  """
302  exec_input_file_paths = _get_profile_paths(input_dir, '.exec')
303  if not exec_input_file_paths:
304    logging.info('No exec file found under %s', input_dir)
305    return
306
307  cmd = [_JAVA_PATH, '-jar', jacococli_path, 'merge']
308  cmd.extend(exec_input_file_paths)
309  cmd.extend(['--destfile', output_path])
310  subprocess.check_call(cmd, stderr=subprocess.STDOUT)
311
312
313def merge_profiles(input_dir,
314                   output_file,
315                   input_extension,
316                   profdata_tool_path,
317                   input_filename_pattern='.*',
318                   sparse=False,
319                   skip_validation=False,
320                   merge_timeout=3600,
321                   show_profdata=True,
322                   weights=None):
323  """Merges the profiles produced by the shards using llvm-profdata.
324
325  Args:
326    input_dir (str): The path to traverse to find input profiles.
327    output_file (str): Where to write the merged profile.
328    input_extension (str): File extension to look for in the input_dir.
329        e.g. '.profdata' or '.profraw'
330    profdata_tool_path: The path to the llvm-profdata executable.
331    input_filename_pattern (str): The regex pattern of input filename. Should be
332        a valid regex pattern if present.
333    sparse (bool): flag to indicate whether to run llvm-profdata with --sparse.
334      Doc: https://llvm.org/docs/CommandGuide/llvm-profdata.html#profdata-merge
335    skip_validation (bool): flag to skip the _validate_and_convert_profraws
336        invocation. only applicable when input_extension is .profraw.
337    merge_timeout (int): timeout (sec) for the call to merge profiles. This
338      should not take > 1 hr, and so defaults to 3600 seconds.
339    weights (dict): maps from profdata file to weight.
340
341  Returns:
342    The list of profiles that had to be excluded to get the merge to
343    succeed and a list of profiles that had a counter overflow.
344  """
345  profile_input_file_paths = _get_profile_paths(input_dir, input_extension,
346                                                input_filename_pattern)
347  invalid_profraw_files = []
348  counter_overflows = []
349
350  if skip_validation:
351    logging.warning('--skip-validation has been enabled. Skipping conversion '
352                    'to ensure that profiles are valid.')
353
354  if input_extension == '.profraw' and not skip_validation:
355    profile_input_file_paths, invalid_profraw_files, counter_overflows = (
356        _validate_and_convert_profraws(profile_input_file_paths,
357                                       profdata_tool_path,
358                                       sparse=sparse))
359    logging.info((
360        'List of invalid .profraw files that failed to validate and convert: %r'
361    ), invalid_profraw_files)
362
363    if counter_overflows:
364      logging.warning('There were %d profiles with counter overflows',
365                      len(counter_overflows))
366
367  # The list of input files could be empty in the following scenarios:
368  # 1. The test target is pure Python scripts test which doesn't execute any
369  #    C/C++ binaries, such as devtools_type_check.
370  # 2. The test target executes binary and does dumps coverage profile data
371  #    files, however, all of them turned out to be invalid.
372  if not profile_input_file_paths:
373    logging.info('There is no valid profraw/profdata files to merge, skip '
374                 'invoking profdata tools.')
375    return invalid_profraw_files, counter_overflows
376
377  _call_profdata_tool(profile_input_file_paths=profile_input_file_paths,
378                      profile_output_file_path=output_file,
379                      profdata_tool_path=profdata_tool_path,
380                      sparse=sparse,
381                      timeout=merge_timeout,
382                      show_profdata=show_profdata,
383                      weights=weights)
384
385  # Remove inputs when merging profraws as they won't be needed and they can be
386  # pretty large. If the inputs are profdata files, do not remove them as they
387  # might be used again for multiple test types coverage.
388  if input_extension == '.profraw':
389    for input_file in profile_input_file_paths:
390      os.remove(input_file)
391
392  return invalid_profraw_files, counter_overflows
393
394
395# We want to retry shards that contain one or more profiles that cannot be
396# merged (typically due to corruption described in crbug.com/937521).
397def get_shards_to_retry(bad_profiles):
398  bad_shard_ids = set()
399
400  def is_task_id(s):
401    # Swarming task ids are 16 hex chars. The pythonic way to validate this is
402    # to cast to int and catch a value error.
403    try:
404      assert len(s) == 16, 'Swarming task IDs are expected be of length 16'
405      _int_id = int(s, 16)
406      return True
407    except (AssertionError, ValueError):
408      return False
409
410  for profile in bad_profiles:
411    # E.g. /b/s/w/ir/tmp/t/tmpSvBRii/44b643576cf39f10/profraw/default-1.profraw
412    _base_path, task_id, _profraw, _filename = os.path.normpath(profile).rsplit(
413        os.path.sep, 3)
414    # Since we are getting a task_id from a file path, which is less than ideal,
415    # do some checking to at least verify that the snippet looks like a valid
416    # task id.
417    assert is_task_id(task_id)
418    bad_shard_ids.add(task_id)
419  return bad_shard_ids
420