1#!/usr/bin/env python3 2# -*- coding: utf-8 -*- 3# Copyright 2019 The Chromium OS Authors. All rights reserved. 4# Use of this source code is governed by a BSD-style license that can be 5# found in the LICENSE file. 6 7"""Download profdata from different arches, merge them and upload to gs 8 9The script is used for updating the PGO profiles for LLVM. The workflow 10is that the script will download profdata from different PGO builds, merge 11them and then upload it to a gs location that LLVM can access. 12 13The simplest way of using this script, is to run: 14 ./merge_profdata_and_upload.py --all_latest_profiles 15which will automatically grab profdata from latest PGO generate builders 16for three different architectures and merge them. LLVM hash is also 17detected automatically from the artifacts. 18 19If you want to specify certain llvm hash, run it with: 20 ./merge_profdata_and_upload.py --all_latest_profiles --llvm_hash LLVM_HASH 21Note that hash checking will fail if the llvm hash you provided is not the 22same as those in artifacts, or llvm hash in different artifacts are not the 23same. 24 25To only use profiles from PGO generate tryjob, run it with: 26 ./merge_profdata_and_upload.py --nolatest -t TRYJOB1 -t TRYJOB2 ... 27Using of --nolatest will tell the script not to use any results from builders, 28and merge only the profdata from the tryjobs you specified. 29 30There is a chance that builders only succeeded partially, in this case, you 31can run this script to merge both profdata from builder and tryjob: 32 ./merge_profdata_and_upload.py -l arm -l amd64 -t TRYJOB_FOR_ARM64 33In this example, the script will merge profdata from arm and amd64 builder, and 34profdata from an arm64 tryjob. 35""" 36 37from __future__ import print_function 38 39import argparse 40import collections 41import distutils.spawn 42import json 43import os 44import os.path 45import shutil 46import subprocess 47import sys 48import tempfile 49 50_LLVM_PROFDATA = '/usr/bin/llvm-profdata' 51_GS_PREFIX = 'gs://' 52 53_LLVMMetadata = collections.namedtuple('_LLVMMetadata', ['head_sha']) 54 55 56def _get_gs_latest(remote_lastest): 57 assert remote_lastest.startswith(_GS_PREFIX) 58 try: 59 return subprocess.check_output(['gsutil', 'cat', remote_lastest], 60 encoding='utf-8') 61 except subprocess.CalledProcessError: 62 raise RuntimeError('Lastest artifacts not found: %s' % remote_lastest) 63 64 65def _fetch_gs_artifact(remote_name, local_name): 66 assert remote_name.startswith(_GS_PREFIX) 67 68 print('Fetching %r to %r' % (remote_name, local_name)) 69 subprocess.check_call(['gsutil', 'cp', remote_name, local_name]) 70 71 72def _find_latest_artifacts(arch): 73 remote_latest = ( 74 '%schromeos-image-archive/' 75 '%s-pgo-generate-llvm-next-toolchain/LATEST-master' % (_GS_PREFIX, arch)) 76 version = _get_gs_latest(remote_latest) 77 return '%s-pgo-generate-llvm-next-toolchain/%s' % (arch, version) 78 79 80def _get_gs_profdata(remote_base, base_dir): 81 remote_profdata_basename = 'llvm_profdata.tar.xz' 82 83 remote_profdata = os.path.join(remote_base, remote_profdata_basename) 84 tar = 'llvm_profdata.tar.xz' 85 _fetch_gs_artifact(remote_profdata, tar) 86 extract_cmd = ['tar', '-xf', tar] 87 88 print('Extracting profdata tarball.\nCMD: %s\n' % extract_cmd) 89 subprocess.check_call(extract_cmd) 90 # Return directory to the llvm.profdata extracted. 91 if '-tryjob/' in base_dir: 92 prefix = 'b/s/w/ir/cache/cbuild/repository/trybot_archive/' 93 else: 94 prefix = 'b/s/w/ir/cache/cbuild/repository/buildbot_archive/' 95 return os.path.join(prefix, base_dir, 'llvm.profdata') 96 97 98def _get_gs_metadata(remote_base): 99 metadata_basename = 'llvm_metadata.json' 100 _fetch_gs_artifact( 101 os.path.join(remote_base, metadata_basename), metadata_basename) 102 103 with open(metadata_basename) as f: 104 result = json.load(f) 105 106 return _LLVMMetadata(head_sha=result['head_sha']) 107 108 109def _get_gs_artifacts(base_dir): 110 remote_base = '%schromeos-image-archive/%s' % (_GS_PREFIX, base_dir) 111 profile_path = _get_gs_profdata(remote_base, base_dir) 112 metadata = _get_gs_metadata(remote_base) 113 return metadata, profile_path 114 115 116def _merge_profdata(profdata_list, output_name): 117 merge_cmd = [_LLVM_PROFDATA, 'merge', '-output', output_name] + profdata_list 118 print('Merging PGO profiles.\nCMD: %s\n' % merge_cmd) 119 subprocess.check_call(merge_cmd) 120 121 122def _tar_and_upload_profdata(profdata, name_suffix): 123 tarball = 'llvm-profdata-%s.tar.xz' % name_suffix 124 print('Making profdata tarball: %s' % tarball) 125 subprocess.check_call( 126 ['tar', '--sparse', '-I', 'xz', '-cf', tarball, profdata]) 127 128 upload_location = '%schromeos-localmirror/distfiles/%s' % (_GS_PREFIX, 129 tarball) 130 131 # TODO: it's better to create a subdir: distfiles/llvm_pgo_profile, but 132 # now llvm could only recognize distfiles. 133 upload_cmd = [ 134 'gsutil', 135 '-m', 136 'cp', 137 '-n', 138 '-a', 139 'public-read', 140 tarball, 141 upload_location, 142 ] 143 print('Uploading tarball to gs.\nCMD: %s\n' % upload_cmd) 144 145 # gsutil prints all status to stderr, oddly enough. 146 gs_output = subprocess.check_output( 147 upload_cmd, stderr=subprocess.STDOUT, encoding='utf-8') 148 print(gs_output) 149 150 # gsutil exits successfully even if it uploaded nothing. It prints a summary 151 # of what all it did, though. Successful uploads are just a progress bar, 152 # unsuccessful ones note that items were skipped. 153 if 'Skipping existing item' in gs_output: 154 raise ValueError('Profile upload failed: would overwrite an existing ' 155 'profile at %s' % upload_location) 156 157 158def main(): 159 parser = argparse.ArgumentParser( 160 description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) 161 parser.add_argument( 162 '-a', 163 '--all_latest_profiles', 164 action='store_true', 165 help='Merge and upload profiles from the latest builders.') 166 parser.add_argument( 167 '-l', 168 '--latest', 169 default=[], 170 action='append', 171 help='User can specify the profdata from which builder with specific ' 172 'architecture to download. By default, we merge profdata from arm, ' 173 'arm64, amd64.') 174 parser.add_argument( 175 '-t', 176 '--tryjob', 177 default=[], 178 action='append', 179 help='Extra pgo-generate-llvm-next-toolchain/tryjob results to be used. ' 180 'Format should be ' 181 '{arch}-pgo-generate-llvm-next-toolchain(-tryjob)/{VERSION}.') 182 parser.add_argument( 183 '-o', 184 '--output', 185 default='llvm.profdata', 186 help='Where to put merged PGO profile. The default is to not save it ' 187 'anywhere.') 188 parser.add_argument( 189 '--llvm_hash', 190 help='The LLVM hash to select for the profiles. Generally autodetected.') 191 args = parser.parse_args() 192 193 if not args.all_latest_profiles and not (args.latest or args.tryjob): 194 sys.exit('Please specify whether to use latest profiles or profiles from ' 195 'tryjobs') 196 197 if args.all_latest_profiles and (args.latest or args.tryjob): 198 sys.exit('--all_latest_profiles cannot be specified together with ' 199 '--latest or --tryjob.') 200 201 latest = ['arm', 'arm64', 'amd64'] \ 202 if args.all_latest_profiles else args.latest 203 204 if not distutils.spawn.find_executable(_LLVM_PROFDATA): 205 sys.exit(_LLVM_PROFDATA + ' not found; are you in the chroot?') 206 207 initial_dir = os.getcwd() 208 temp_dir = tempfile.mkdtemp(prefix='merge_pgo') 209 success = True 210 try: 211 os.chdir(temp_dir) 212 profdata_list = [] 213 heads = set() 214 215 def fetch_and_append_artifacts(gs_url): 216 llvm_metadata, profdata_loc = _get_gs_artifacts(gs_url) 217 if os.path.getsize(profdata_loc) < 512 * 1024: 218 raise RuntimeError('The PGO profile in %s (local path: %s) is ' 219 'suspiciously small. Something might have gone ' 220 'wrong.' % (gs_url, profdata_loc)) 221 222 heads.add(llvm_metadata.head_sha) 223 profdata_list.append(profdata_loc) 224 225 for arch in latest: 226 fetch_and_append_artifacts(_find_latest_artifacts(arch)) 227 228 if args.tryjob: 229 for tryjob in args.tryjob: 230 fetch_and_append_artifacts(tryjob) 231 232 assert heads, "Didn't fetch anything?" 233 234 def die_with_head_complaint(complaint): 235 extra = ' (HEADs found: %s)' % sorted(heads) 236 raise RuntimeError(complaint.rstrip() + extra) 237 238 llvm_hash = args.llvm_hash 239 if not llvm_hash: 240 if len(heads) != 1: 241 die_with_head_complaint( 242 '%d LLVM HEADs were found, which is more than one. You probably ' 243 'want a consistent set of HEADs for a profile. If you know you ' 244 "don't, please specify --llvm_hash, and note that *all* profiles " 245 'will be merged into this final profile, regardless of their ' 246 'reported HEAD.' % len(heads)) 247 llvm_hash, = heads 248 249 if llvm_hash not in heads: 250 assert llvm_hash == args.llvm_hash 251 die_with_head_complaint( 252 "HEAD %s wasn't found in any fetched artifacts." % llvm_hash) 253 254 print('Using LLVM hash: %s' % llvm_hash) 255 256 _merge_profdata(profdata_list, args.output) 257 print('Merged profdata locates at %s\n' % os.path.abspath(args.output)) 258 _tar_and_upload_profdata(args.output, name_suffix=llvm_hash) 259 print('Merged profdata uploaded successfully.') 260 except: 261 success = False 262 raise 263 finally: 264 os.chdir(initial_dir) 265 if success: 266 print('Clearing temp directory.') 267 shutil.rmtree(temp_dir, ignore_errors=True) 268 else: 269 print('Script fails, temp directory is at: %s' % temp_dir) 270 271 272if __name__ == '__main__': 273 sys.exit(main()) 274