1#!/usr/bin/env python3 2""" 3This script: 4- Builds clang with user-defined flags 5- Uses that clang to build an instrumented clang, which can be used to collect 6 PGO samples 7- Builds a user-defined set of sources (default: clang) to act as a 8 "benchmark" to generate a PGO profile 9- Builds clang once more with the PGO profile generated above 10 11This is a total of four clean builds of clang (by default). This may take a 12while. :) 13 14This scripts duplicates https://llvm.org/docs/AdvancedBuilds.html#multi-stage-pgo 15Eventually, it will be updated to instead call the cmake cache mentioned there. 16""" 17 18import argparse 19import collections 20import multiprocessing 21import os 22import shlex 23import shutil 24import subprocess 25import sys 26 27### User configuration 28 29 30# If you want to use a different 'benchmark' than building clang, make this 31# function do what you want. out_dir is the build directory for clang, so all 32# of the clang binaries will live under "${out_dir}/bin/". Using clang in 33# ${out_dir} will magically have the profiles go to the right place. 34# 35# You may assume that out_dir is a freshly-built directory that you can reach 36# in to build more things, if you'd like. 37def _run_benchmark(env, out_dir, include_debug_info): 38 """The 'benchmark' we run to generate profile data.""" 39 target_dir = env.output_subdir('instrumentation_run') 40 41 # `check-llvm` and `check-clang` are cheap ways to increase coverage. The 42 # former lets us touch on the non-x86 backends a bit if configured, and the 43 # latter gives us more C to chew on (and will send us through diagnostic 44 # paths a fair amount, though the `if (stuff_is_broken) { diag() ... }` 45 # branches should still heavily be weighted in the not-taken direction, 46 # since we built all of LLVM/etc). 47 _build_things_in(env, out_dir, what=['check-llvm', 'check-clang']) 48 49 # Building tblgen gets us coverage; don't skip it. (out_dir may also not 50 # have them anyway, but that's less of an issue) 51 cmake = _get_cmake_invocation_for_bootstrap_from( 52 env, out_dir, skip_tablegens=False) 53 54 if include_debug_info: 55 cmake.add_flag('CMAKE_BUILD_TYPE', 'RelWithDebInfo') 56 57 _run_fresh_cmake(env, cmake, target_dir) 58 59 # Just build all the things. The more data we have, the better. 60 _build_things_in(env, target_dir, what=['all']) 61 62### Script 63 64 65class CmakeInvocation: 66 _cflags = ['CMAKE_C_FLAGS', 'CMAKE_CXX_FLAGS'] 67 _ldflags = [ 68 'CMAKE_EXE_LINKER_FLAGS', 69 'CMAKE_MODULE_LINKER_FLAGS', 70 'CMAKE_SHARED_LINKER_FLAGS', 71 ] 72 73 def __init__(self, cmake, maker, cmake_dir): 74 self._prefix = [cmake, '-G', maker, cmake_dir] 75 76 # Map of str -> (list|str). 77 self._flags = {} 78 for flag in CmakeInvocation._cflags + CmakeInvocation._ldflags: 79 self._flags[flag] = [] 80 81 def add_new_flag(self, key, value): 82 self.add_flag(key, value, allow_overwrites=False) 83 84 def add_flag(self, key, value, allow_overwrites=True): 85 if key not in self._flags: 86 self._flags[key] = value 87 return 88 89 existing_value = self._flags[key] 90 if isinstance(existing_value, list): 91 existing_value.append(value) 92 return 93 94 if not allow_overwrites: 95 raise ValueError('Invalid overwrite of %s requested' % key) 96 97 self._flags[key] = value 98 99 def add_cflags(self, flags): 100 # No, I didn't intend to append ['-', 'O', '2'] to my flags, thanks :) 101 assert not isinstance(flags, str) 102 for f in CmakeInvocation._cflags: 103 self._flags[f].extend(flags) 104 105 def add_ldflags(self, flags): 106 assert not isinstance(flags, str) 107 for f in CmakeInvocation._ldflags: 108 self._flags[f].extend(flags) 109 110 def to_args(self): 111 args = self._prefix.copy() 112 for key, value in sorted(self._flags.items()): 113 if isinstance(value, list): 114 # We preload all of the list-y values (cflags, ...). If we've 115 # nothing to add, don't. 116 if not value: 117 continue 118 value = ' '.join(value) 119 120 arg = '-D' + key 121 if value != '': 122 arg += '=' + value 123 args.append(arg) 124 return args 125 126 127class Env: 128 def __init__(self, llvm_dir, use_make, output_dir, default_cmake_args, 129 dry_run): 130 self.llvm_dir = llvm_dir 131 self.use_make = use_make 132 self.output_dir = output_dir 133 self.default_cmake_args = default_cmake_args.copy() 134 self.dry_run = dry_run 135 136 def get_default_cmake_args_kv(self): 137 return self.default_cmake_args.items() 138 139 def get_cmake_maker(self): 140 return 'Ninja' if not self.use_make else 'Unix Makefiles' 141 142 def get_make_command(self): 143 if self.use_make: 144 return ['make', '-j{}'.format(multiprocessing.cpu_count())] 145 return ['ninja'] 146 147 def output_subdir(self, name): 148 return os.path.join(self.output_dir, name) 149 150 def has_llvm_subproject(self, name): 151 if name == 'compiler-rt': 152 subdir = '../compiler-rt' 153 elif name == 'clang': 154 subdir = '../clang' 155 else: 156 raise ValueError('Unknown subproject: %s' % name) 157 158 return os.path.isdir(os.path.join(self.llvm_dir, subdir)) 159 160 # Note that we don't allow capturing stdout/stderr. This works quite nicely 161 # with dry_run. 162 def run_command(self, 163 cmd, 164 cwd=None, 165 check=False, 166 silent_unless_error=False): 167 print( 168 'Running `%s` in %s' % (cmd, shlex.quote(cwd or os.getcwd()))) 169 170 if self.dry_run: 171 return 172 173 if silent_unless_error: 174 stdout, stderr = subprocess.PIPE, subprocess.STDOUT 175 else: 176 stdout, stderr = None, None 177 178 # Don't use subprocess.run because it's >= py3.5 only, and it's not too 179 # much extra effort to get what it gives us anyway. 180 popen = subprocess.Popen( 181 cmd, 182 stdin=subprocess.DEVNULL, 183 stdout=stdout, 184 stderr=stderr, 185 cwd=cwd) 186 stdout, _ = popen.communicate() 187 return_code = popen.wait(timeout=0) 188 189 if not return_code: 190 return 191 192 if silent_unless_error: 193 print(stdout.decode('utf-8', 'ignore')) 194 195 if check: 196 raise subprocess.CalledProcessError( 197 returncode=return_code, cmd=cmd, output=stdout, stderr=None) 198 199 200def _get_default_cmake_invocation(env): 201 inv = CmakeInvocation( 202 cmake='cmake', maker=env.get_cmake_maker(), cmake_dir=env.llvm_dir) 203 for key, value in env.get_default_cmake_args_kv(): 204 inv.add_new_flag(key, value) 205 return inv 206 207 208def _get_cmake_invocation_for_bootstrap_from(env, out_dir, 209 skip_tablegens=True): 210 clang = os.path.join(out_dir, 'bin', 'clang') 211 cmake = _get_default_cmake_invocation(env) 212 cmake.add_new_flag('CMAKE_C_COMPILER', clang) 213 cmake.add_new_flag('CMAKE_CXX_COMPILER', clang + '++') 214 215 # We often get no value out of building new tblgens; the previous build 216 # should have them. It's still correct to build them, just slower. 217 def add_tablegen(key, binary): 218 path = os.path.join(out_dir, 'bin', binary) 219 220 # Check that this exists, since the user's allowed to specify their own 221 # stage1 directory (which is generally where we'll source everything 222 # from). Dry runs should hope for the best from our user, as well. 223 if env.dry_run or os.path.exists(path): 224 cmake.add_new_flag(key, path) 225 226 if skip_tablegens: 227 add_tablegen('LLVM_TABLEGEN', 'llvm-tblgen') 228 add_tablegen('CLANG_TABLEGEN', 'clang-tblgen') 229 230 return cmake 231 232 233def _build_things_in(env, target_dir, what): 234 cmd = env.get_make_command() + what 235 env.run_command(cmd, cwd=target_dir, check=True) 236 237 238def _run_fresh_cmake(env, cmake, target_dir): 239 if not env.dry_run: 240 try: 241 shutil.rmtree(target_dir) 242 except FileNotFoundError: 243 pass 244 245 os.makedirs(target_dir, mode=0o755) 246 247 cmake_args = cmake.to_args() 248 env.run_command( 249 cmake_args, cwd=target_dir, check=True, silent_unless_error=True) 250 251 252def _build_stage1_clang(env): 253 target_dir = env.output_subdir('stage1') 254 cmake = _get_default_cmake_invocation(env) 255 _run_fresh_cmake(env, cmake, target_dir) 256 _build_things_in(env, target_dir, what=['clang', 'llvm-profdata', 'profile']) 257 return target_dir 258 259 260def _generate_instrumented_clang_profile(env, stage1_dir, profile_dir, 261 output_file): 262 llvm_profdata = os.path.join(stage1_dir, 'bin', 'llvm-profdata') 263 if env.dry_run: 264 profiles = [os.path.join(profile_dir, '*.profraw')] 265 else: 266 profiles = [ 267 os.path.join(profile_dir, f) for f in os.listdir(profile_dir) 268 if f.endswith('.profraw') 269 ] 270 cmd = [llvm_profdata, 'merge', '-output=' + output_file] + profiles 271 env.run_command(cmd, check=True) 272 273 274def _build_instrumented_clang(env, stage1_dir): 275 assert os.path.isabs(stage1_dir) 276 277 target_dir = os.path.join(env.output_dir, 'instrumented') 278 cmake = _get_cmake_invocation_for_bootstrap_from(env, stage1_dir) 279 cmake.add_new_flag('LLVM_BUILD_INSTRUMENTED', 'IR') 280 281 # libcxx's configure step messes with our link order: we'll link 282 # libclang_rt.profile after libgcc, and the former requires atexit from the 283 # latter. So, configure checks fail. 284 # 285 # Since we don't need libcxx or compiler-rt anyway, just disable them. 286 cmake.add_new_flag('LLVM_BUILD_RUNTIME', 'No') 287 288 _run_fresh_cmake(env, cmake, target_dir) 289 _build_things_in(env, target_dir, what=['clang', 'lld']) 290 291 profiles_dir = os.path.join(target_dir, 'profiles') 292 return target_dir, profiles_dir 293 294 295def _build_optimized_clang(env, stage1_dir, profdata_file): 296 if not env.dry_run and not os.path.exists(profdata_file): 297 raise ValueError('Looks like the profdata file at %s doesn\'t exist' % 298 profdata_file) 299 300 target_dir = os.path.join(env.output_dir, 'optimized') 301 cmake = _get_cmake_invocation_for_bootstrap_from(env, stage1_dir) 302 cmake.add_new_flag('LLVM_PROFDATA_FILE', os.path.abspath(profdata_file)) 303 304 # We'll get complaints about hash mismatches in `main` in tools/etc. Ignore 305 # it. 306 cmake.add_cflags(['-Wno-backend-plugin']) 307 _run_fresh_cmake(env, cmake, target_dir) 308 _build_things_in(env, target_dir, what=['clang']) 309 return target_dir 310 311 312Args = collections.namedtuple('Args', [ 313 'do_optimized_build', 314 'include_debug_info', 315 'profile_location', 316 'stage1_dir', 317]) 318 319 320def _parse_args(): 321 parser = argparse.ArgumentParser( 322 description='Builds LLVM and Clang with instrumentation, collects ' 323 'instrumentation profiles for them, and (optionally) builds things' 324 'with these PGO profiles. By default, it\'s assumed that you\'re ' 325 'running this from your LLVM root, and all build artifacts will be ' 326 'saved to $PWD/out.') 327 parser.add_argument( 328 '--cmake-extra-arg', 329 action='append', 330 default=[], 331 help='an extra arg to pass to all cmake invocations. Note that this ' 332 'is interpreted as a -D argument, e.g. --cmake-extra-arg FOO=BAR will ' 333 'be passed as -DFOO=BAR. This may be specified multiple times.') 334 parser.add_argument( 335 '--dry-run', 336 action='store_true', 337 help='print commands instead of running them') 338 parser.add_argument( 339 '--llvm-dir', 340 default='.', 341 help='directory containing an LLVM checkout (default: $PWD)') 342 parser.add_argument( 343 '--no-optimized-build', 344 action='store_true', 345 help='disable the final, PGO-optimized build') 346 parser.add_argument( 347 '--out-dir', 348 help='directory to write artifacts to (default: $llvm_dir/out)') 349 parser.add_argument( 350 '--profile-output', 351 help='where to output the profile (default is $out/pgo_profile.prof)') 352 parser.add_argument( 353 '--stage1-dir', 354 help='instead of having an initial build of everything, use the given ' 355 'directory. It is expected that this directory will have clang, ' 356 'llvm-profdata, and the appropriate libclang_rt.profile already built') 357 parser.add_argument( 358 '--use-debug-info-in-benchmark', 359 action='store_true', 360 help='use a regular build instead of RelWithDebInfo in the benchmark. ' 361 'This increases benchmark execution time and disk space requirements, ' 362 'but gives more coverage over debuginfo bits in LLVM and clang.') 363 parser.add_argument( 364 '--use-make', 365 action='store_true', 366 default=shutil.which('ninja') is None, 367 help='use Makefiles instead of ninja') 368 369 args = parser.parse_args() 370 371 llvm_dir = os.path.abspath(args.llvm_dir) 372 if args.out_dir is None: 373 output_dir = os.path.join(llvm_dir, 'out') 374 else: 375 output_dir = os.path.abspath(args.out_dir) 376 377 extra_args = {'CMAKE_BUILD_TYPE': 'Release', 378 'LLVM_ENABLE_PROJECTS': 'clang;compiler-rt;lld'} 379 for arg in args.cmake_extra_arg: 380 if arg.startswith('-D'): 381 arg = arg[2:] 382 elif arg.startswith('-'): 383 raise ValueError('Unknown not- -D arg encountered; you may need ' 384 'to tweak the source...') 385 split = arg.split('=', 1) 386 if len(split) == 1: 387 key, val = split[0], '' 388 else: 389 key, val = split 390 extra_args[key] = val 391 392 env = Env( 393 default_cmake_args=extra_args, 394 dry_run=args.dry_run, 395 llvm_dir=llvm_dir, 396 output_dir=output_dir, 397 use_make=args.use_make, 398 ) 399 400 if args.profile_output is not None: 401 profile_location = args.profile_output 402 else: 403 profile_location = os.path.join(env.output_dir, 'pgo_profile.prof') 404 405 result_args = Args( 406 do_optimized_build=not args.no_optimized_build, 407 include_debug_info=args.use_debug_info_in_benchmark, 408 profile_location=profile_location, 409 stage1_dir=args.stage1_dir, 410 ) 411 412 return env, result_args 413 414 415def _looks_like_llvm_dir(directory): 416 """Arbitrary set of heuristics to determine if `directory` is an llvm dir. 417 418 Errs on the side of false-positives.""" 419 420 contents = set(os.listdir(directory)) 421 expected_contents = [ 422 'CODE_OWNERS.TXT', 423 'cmake', 424 'docs', 425 'include', 426 'utils', 427 ] 428 429 if not all(c in contents for c in expected_contents): 430 return False 431 432 try: 433 include_listing = os.listdir(os.path.join(directory, 'include')) 434 except NotADirectoryError: 435 return False 436 437 return 'llvm' in include_listing 438 439 440def _die(*args, **kwargs): 441 kwargs['file'] = sys.stderr 442 print(*args, **kwargs) 443 sys.exit(1) 444 445 446def _main(): 447 env, args = _parse_args() 448 449 if not _looks_like_llvm_dir(env.llvm_dir): 450 _die('Looks like %s isn\'t an LLVM directory; please see --help' % 451 env.llvm_dir) 452 if not env.has_llvm_subproject('clang'): 453 _die('Need a clang checkout at tools/clang') 454 if not env.has_llvm_subproject('compiler-rt'): 455 _die('Need a compiler-rt checkout at projects/compiler-rt') 456 457 def status(*args): 458 print(*args, file=sys.stderr) 459 460 if args.stage1_dir is None: 461 status('*** Building stage1 clang...') 462 stage1_out = _build_stage1_clang(env) 463 else: 464 stage1_out = args.stage1_dir 465 466 status('*** Building instrumented clang...') 467 instrumented_out, profile_dir = _build_instrumented_clang(env, stage1_out) 468 status('*** Running profdata benchmarks...') 469 _run_benchmark(env, instrumented_out, args.include_debug_info) 470 status('*** Generating profile...') 471 _generate_instrumented_clang_profile(env, stage1_out, profile_dir, 472 args.profile_location) 473 474 print('Final profile:', args.profile_location) 475 if args.do_optimized_build: 476 status('*** Building PGO-optimized binaries...') 477 optimized_out = _build_optimized_clang(env, stage1_out, 478 args.profile_location) 479 print('Final build directory:', optimized_out) 480 481 482if __name__ == '__main__': 483 _main() 484