• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/env python3
2# -*- coding: utf-8 -*-
3# Copyright 2019 The Chromium OS Authors. All rights reserved.
4# Use of this source code is governed by a BSD-style license that can be
5# found in the LICENSE file.
6
7"""Download profdata from different arches, merge them and upload to gs
8
9The script is used for updating the PGO profiles for LLVM. The workflow
10is that the script will download profdata from different PGO builds, merge
11them and then upload it to a gs location that LLVM can access.
12
13The simplest way of using this script, is to run:
14    ./merge_profdata_and_upload.py --all_latest_profiles
15which will automatically grab profdata from latest PGO generate builders
16for three different architectures and merge them. LLVM hash is also
17detected automatically from the artifacts.
18
19If you want to specify certain llvm hash, run it with:
20    ./merge_profdata_and_upload.py --all_latest_profiles --llvm_hash LLVM_HASH
21Note that hash checking will fail if the llvm hash you provided is not the
22same as those in artifacts, or llvm hash in different artifacts are not the
23same.
24
25To only use profiles from PGO generate tryjob, run it with:
26    ./merge_profdata_and_upload.py --nolatest -t TRYJOB1 -t TRYJOB2 ...
27Using of --nolatest will tell the script not to use any results from builders,
28and merge only the profdata from the tryjobs you specified.
29
30There is a chance that builders only succeeded partially, in this case, you
31can run this script to merge both profdata from builder and tryjob:
32    ./merge_profdata_and_upload.py -l arm -l amd64 -t TRYJOB_FOR_ARM64
33In this example, the script will merge profdata from arm and amd64 builder, and
34profdata from an arm64 tryjob.
35"""
36
37from __future__ import print_function
38
39import argparse
40import collections
41import distutils.spawn
42import json
43import os
44import os.path
45import shutil
46import subprocess
47import sys
48import tempfile
49
50_LLVM_PROFDATA = '/usr/bin/llvm-profdata'
51_GS_PREFIX = 'gs://'
52
53_LLVMMetadata = collections.namedtuple('_LLVMMetadata', ['head_sha'])
54
55
56def _get_gs_latest(remote_lastest):
57  assert remote_lastest.startswith(_GS_PREFIX)
58  try:
59    return subprocess.check_output(['gsutil', 'cat', remote_lastest],
60                                   encoding='utf-8')
61  except subprocess.CalledProcessError:
62    raise RuntimeError('Lastest artifacts not found: %s' % remote_lastest)
63
64
65def _fetch_gs_artifact(remote_name, local_name):
66  assert remote_name.startswith(_GS_PREFIX)
67
68  print('Fetching %r to %r' % (remote_name, local_name))
69  subprocess.check_call(['gsutil', 'cp', remote_name, local_name])
70
71
72def _find_latest_artifacts(arch):
73  remote_latest = (
74      '%schromeos-image-archive/'
75      '%s-pgo-generate-llvm-next-toolchain/LATEST-master' % (_GS_PREFIX, arch))
76  version = _get_gs_latest(remote_latest)
77  return '%s-pgo-generate-llvm-next-toolchain/%s' % (arch, version)
78
79
80def _get_gs_profdata(remote_base, base_dir):
81  remote_profdata_basename = 'llvm_profdata.tar.xz'
82
83  remote_profdata = os.path.join(remote_base, remote_profdata_basename)
84  tar = 'llvm_profdata.tar.xz'
85  _fetch_gs_artifact(remote_profdata, tar)
86  extract_cmd = ['tar', '-xf', tar]
87
88  print('Extracting profdata tarball.\nCMD: %s\n' % extract_cmd)
89  subprocess.check_call(extract_cmd)
90  # Return directory to the llvm.profdata extracted.
91  if '-tryjob/' in base_dir:
92    prefix = 'b/s/w/ir/cache/cbuild/repository/trybot_archive/'
93  else:
94    prefix = 'b/s/w/ir/cache/cbuild/repository/buildbot_archive/'
95  return os.path.join(prefix, base_dir, 'llvm.profdata')
96
97
98def _get_gs_metadata(remote_base):
99  metadata_basename = 'llvm_metadata.json'
100  _fetch_gs_artifact(
101      os.path.join(remote_base, metadata_basename), metadata_basename)
102
103  with open(metadata_basename) as f:
104    result = json.load(f)
105
106  return _LLVMMetadata(head_sha=result['head_sha'])
107
108
109def _get_gs_artifacts(base_dir):
110  remote_base = '%schromeos-image-archive/%s' % (_GS_PREFIX, base_dir)
111  profile_path = _get_gs_profdata(remote_base, base_dir)
112  metadata = _get_gs_metadata(remote_base)
113  return metadata, profile_path
114
115
116def _merge_profdata(profdata_list, output_name):
117  merge_cmd = [_LLVM_PROFDATA, 'merge', '-output', output_name] + profdata_list
118  print('Merging PGO profiles.\nCMD: %s\n' % merge_cmd)
119  subprocess.check_call(merge_cmd)
120
121
122def _tar_and_upload_profdata(profdata, name_suffix):
123  tarball = 'llvm-profdata-%s.tar.xz' % name_suffix
124  print('Making profdata tarball: %s' % tarball)
125  subprocess.check_call(
126      ['tar', '--sparse', '-I', 'xz', '-cf', tarball, profdata])
127
128  upload_location = '%schromeos-localmirror/distfiles/%s' % (_GS_PREFIX,
129                                                             tarball)
130
131  # TODO: it's better to create a subdir: distfiles/llvm_pgo_profile, but
132  # now llvm could only recognize distfiles.
133  upload_cmd = [
134      'gsutil',
135      '-m',
136      'cp',
137      '-n',
138      '-a',
139      'public-read',
140      tarball,
141      upload_location,
142  ]
143  print('Uploading tarball to gs.\nCMD: %s\n' % upload_cmd)
144
145  # gsutil prints all status to stderr, oddly enough.
146  gs_output = subprocess.check_output(
147      upload_cmd, stderr=subprocess.STDOUT, encoding='utf-8')
148  print(gs_output)
149
150  # gsutil exits successfully even if it uploaded nothing. It prints a summary
151  # of what all it did, though. Successful uploads are just a progress bar,
152  # unsuccessful ones note that items were skipped.
153  if 'Skipping existing item' in gs_output:
154    raise ValueError('Profile upload failed: would overwrite an existing '
155                     'profile at %s' % upload_location)
156
157
158def main():
159  parser = argparse.ArgumentParser(
160      description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
161  parser.add_argument(
162      '-a',
163      '--all_latest_profiles',
164      action='store_true',
165      help='Merge and upload profiles from the latest builders.')
166  parser.add_argument(
167      '-l',
168      '--latest',
169      default=[],
170      action='append',
171      help='User can specify the profdata from which builder with specific '
172      'architecture to download. By default, we merge profdata from arm, '
173      'arm64, amd64.')
174  parser.add_argument(
175      '-t',
176      '--tryjob',
177      default=[],
178      action='append',
179      help='Extra pgo-generate-llvm-next-toolchain/tryjob results to be used. '
180      'Format should be '
181      '{arch}-pgo-generate-llvm-next-toolchain(-tryjob)/{VERSION}.')
182  parser.add_argument(
183      '-o',
184      '--output',
185      default='llvm.profdata',
186      help='Where to put merged PGO profile. The default is to not save it '
187      'anywhere.')
188  parser.add_argument(
189      '--llvm_hash',
190      help='The LLVM hash to select for the profiles. Generally autodetected.')
191  args = parser.parse_args()
192
193  if not args.all_latest_profiles and not (args.latest or args.tryjob):
194    sys.exit('Please specify whether to use latest profiles or profiles from '
195             'tryjobs')
196
197  if args.all_latest_profiles and (args.latest or args.tryjob):
198    sys.exit('--all_latest_profiles cannot be specified together with '
199             '--latest or --tryjob.')
200
201  latest = ['arm', 'arm64', 'amd64'] \
202    if args.all_latest_profiles else args.latest
203
204  if not distutils.spawn.find_executable(_LLVM_PROFDATA):
205    sys.exit(_LLVM_PROFDATA + ' not found; are you in the chroot?')
206
207  initial_dir = os.getcwd()
208  temp_dir = tempfile.mkdtemp(prefix='merge_pgo')
209  success = True
210  try:
211    os.chdir(temp_dir)
212    profdata_list = []
213    heads = set()
214
215    def fetch_and_append_artifacts(gs_url):
216      llvm_metadata, profdata_loc = _get_gs_artifacts(gs_url)
217      if os.path.getsize(profdata_loc) < 512 * 1024:
218        raise RuntimeError('The PGO profile in %s (local path: %s) is '
219                           'suspiciously small. Something might have gone '
220                           'wrong.' % (gs_url, profdata_loc))
221
222      heads.add(llvm_metadata.head_sha)
223      profdata_list.append(profdata_loc)
224
225    for arch in latest:
226      fetch_and_append_artifacts(_find_latest_artifacts(arch))
227
228    if args.tryjob:
229      for tryjob in args.tryjob:
230        fetch_and_append_artifacts(tryjob)
231
232    assert heads, "Didn't fetch anything?"
233
234    def die_with_head_complaint(complaint):
235      extra = ' (HEADs found: %s)' % sorted(heads)
236      raise RuntimeError(complaint.rstrip() + extra)
237
238    llvm_hash = args.llvm_hash
239    if not llvm_hash:
240      if len(heads) != 1:
241        die_with_head_complaint(
242            '%d LLVM HEADs were found, which is more than one. You probably '
243            'want a consistent set of HEADs for a profile. If you know you '
244            "don't, please specify --llvm_hash, and note that *all* profiles "
245            'will be merged into this final profile, regardless of their '
246            'reported HEAD.' % len(heads))
247      llvm_hash, = heads
248
249    if llvm_hash not in heads:
250      assert llvm_hash == args.llvm_hash
251      die_with_head_complaint(
252          "HEAD %s wasn't found in any fetched artifacts." % llvm_hash)
253
254    print('Using LLVM hash: %s' % llvm_hash)
255
256    _merge_profdata(profdata_list, args.output)
257    print('Merged profdata locates at %s\n' % os.path.abspath(args.output))
258    _tar_and_upload_profdata(args.output, name_suffix=llvm_hash)
259    print('Merged profdata uploaded successfully.')
260  except:
261    success = False
262    raise
263  finally:
264    os.chdir(initial_dir)
265    if success:
266      print('Clearing temp directory.')
267      shutil.rmtree(temp_dir, ignore_errors=True)
268    else:
269      print('Script fails, temp directory is at: %s' % temp_dir)
270
271
272if __name__ == '__main__':
273  sys.exit(main())
274