1#!/usr/bin/env python2 2 3import argparse 4import os 5import pipes 6import re 7import sys 8 9from utils import FindBaseNaCl, GetObjcopyCmd, get_sfi_string, shellcmd 10 11def NewerThanOrNotThere(old_path, new_path): 12 """Returns whether old_path is newer than new_path. 13 14 Also returns true if either path doesn't exist. 15 """ 16 if not (os.path.exists(old_path) and os.path.exists(new_path)): 17 return True 18 return os.path.getmtime(old_path) > os.path.getmtime(new_path) 19 20def BuildRegex(patterns, syms): 21 """Build a regular expression string for inclusion or exclusion. 22 23 Creates a regex string from an array of patterns and an array 24 of symbol names. Each element in the patterns array is either a 25 regex, or a range of entries in the symbol name array, e.g. '2:9'. 26 """ 27 pattern_list = [] 28 for pattern in patterns: 29 if pattern[0].isdigit() or pattern[0] == ':': 30 # Legitimate symbols or regexes shouldn't start with a 31 # digit or a ':', so interpret the pattern as a range. 32 interval = pattern.split(':') 33 if len(interval) == 1: 34 # Treat singleton 'n' as 'n:n+1'. 35 lower = int(interval[0]) 36 upper = lower + 1 37 elif len(interval) == 2: 38 # Handle 'a:b', 'a:', and ':b' with suitable defaults. 39 lower = int(interval[0]) if len(interval[0]) else 0 40 upper = int(interval[1]) if len(interval[1]) else len(syms) 41 else: 42 print 'Invalid range syntax: {p}'.format(p=pattern) 43 exit(1) 44 pattern = '$|^'.join([re.escape(p) for p in syms[lower:upper]]) 45 pattern_list.append('^' + pattern + '$') 46 return '|'.join(pattern_list) if len(pattern_list) else '^$' 47 48def MatchSymbol(sym, re_include, re_exclude, default_match): 49 """Match a symbol name against inclusion/exclusion rules. 50 51 Returns True or False depending on whether the given symbol 52 matches the compiled include or exclude regexes. The default is 53 returned if neither the include nor the exclude regex matches. 54 """ 55 if re_exclude.match(sym): 56 # Always honor an explicit exclude before considering 57 # includes. 58 return False 59 if re_include.match(sym): 60 return True 61 return default_match 62 63def AddOptionalArgs(argparser): 64 argparser.add_argument('--force', dest='force', type=int, choices=[0, 1], 65 default=1, 66 help='Force all re-translations of the pexe.' + 67 ' Default %(default)s.') 68 argparser.add_argument('--include', '-i', default=[], dest='include', 69 action='append', 70 help='Subzero symbols to include ' + 71 '(regex or line range)') 72 argparser.add_argument('--exclude', '-e', default=[], dest='exclude', 73 action='append', 74 help='Subzero symbols to exclude ' + 75 '(regex or line range)') 76 argparser.add_argument('--output', '-o', default='a.out', dest='output', 77 action='store', 78 help='Output executable. Default %(default)s.') 79 argparser.add_argument('-O', default='2', dest='optlevel', 80 choices=['m1', '-1', '0', '1', '2'], 81 help='Optimization level ' + 82 '(m1 and -1 are equivalent).' + 83 ' Default %(default)s.') 84 argparser.add_argument('--filetype', default='iasm', dest='filetype', 85 choices=['obj', 'asm', 'iasm'], 86 help='Output file type. Default %(default)s.') 87 argparser.add_argument('--sandbox', dest='sandbox', action='store_true', 88 help='Enable sandboxing in the translator') 89 argparser.add_argument('--nonsfi', dest='nonsfi', action='store_true', 90 help='Enable Non-SFI in the translator') 91 argparser.add_argument('--enable-block-profile', 92 dest='enable_block_profile', action='store_true', 93 help='Enable basic block profiling.') 94 argparser.add_argument('--target', default='x8632', dest='target', 95 choices=['arm32', 'x8632', 'x8664'], 96 help='Generate code for specified target.') 97 argparser.add_argument('--verbose', '-v', dest='verbose', 98 action='store_true', 99 help='Display some extra debugging output') 100 argparser.add_argument('--sz', dest='sz_args', action='append', default=[], 101 help='Extra arguments for Subzero') 102 argparser.add_argument('--llc', dest='llc_args', action='append', 103 default=[], help='Extra arguments for llc') 104 argparser.add_argument('--no-sz', dest='nosz', action='store_true', 105 help='Run only post-Subzero build steps') 106 argparser.add_argument('--fsanitize-address', dest='asan', 107 action='store_true', 108 help='Instrument with AddressSanitizer') 109 110def LinkSandbox(objs, exe, target, verbose=True): 111 assert target in ('x8632', 'x8664', 'arm32'), \ 112 '-sandbox is not available for %s' % target 113 nacl_root = FindBaseNaCl() 114 gold = ('{root}/toolchain/linux_x86/pnacl_newlib_raw/bin/' + 115 'le32-nacl-ld.gold').format(root=nacl_root) 116 target_lib_dir = { 117 'arm32': 'arm', 118 'x8632': 'x86-32', 119 'x8664': 'x86-64', 120 }[target] 121 linklib = ('{root}/toolchain/linux_x86/pnacl_newlib_raw/translator/' + 122 '{target_dir}/lib').format(root=nacl_root, 123 target_dir=target_lib_dir) 124 shellcmd([gold, 125 '-nostdlib', 126 '--no-fix-cortex-a8', 127 '--eh-frame-hdr', 128 '-z', 'text', 129 #'-z', 'noexecstack', 130 '--build-id', 131 '--entry=__pnacl_start', 132 '-static', #'-pie', 133 '{linklib}/crtbegin.o'.format(linklib=linklib)] + 134 objs + 135 [('{root}/toolchain_build/src/subzero/build/runtime/' + 136 'szrt_sb_{target}.o').format(root=nacl_root, target=target), 137 '{linklib}/libpnacl_irt_shim_dummy.a'.format(linklib=linklib), 138 '--start-group', 139 '{linklib}/libgcc.a'.format(linklib=linklib), 140 '{linklib}/libcrt_platform.a'.format(linklib=linklib), 141 '--end-group', 142 '{linklib}/crtend.o'.format(linklib=linklib), 143 '--undefined=_start', 144 '--defsym=__Sz_AbsoluteZero=0', 145 #'--defsym=_begin=0', 146 '-o', exe 147 ], echo=verbose) 148 149def LinkNonsfi(objs, exe, target, verbose=True): 150 nacl_root = FindBaseNaCl() 151 gold = ('{root}/toolchain/linux_x86/pnacl_newlib_raw/bin/' + 152 'le32-nacl-ld.gold').format(root=nacl_root) 153 target_lib_dir = { 154 'arm32': 'arm-nonsfi', 155 'x8632': 'x86-32-nonsfi', 156 }[target] 157 linklib = ('{root}/toolchain/linux_x86/pnacl_newlib_raw/translator/' + 158 '{target_dir}/lib').format(root=nacl_root, 159 target_dir=target_lib_dir) 160 shellcmd([gold, 161 '-nostdlib', 162 '--no-fix-cortex-a8', 163 '--eh-frame-hdr', 164 '-z', 'text', 165 '-z', 'noexecstack', 166 '--build-id', 167 '--entry=__pnacl_start', 168 '-pie', 169 '{linklib}/crtbegin.o'.format(linklib=linklib)] + 170 objs + 171 [('{root}/toolchain_build/src/subzero/build/runtime/' + 172 'szrt_nonsfi_{target}.o').format(root=nacl_root, target=target), 173 '{linklib}/libpnacl_irt_shim_dummy.a'.format(linklib=linklib), 174 '--start-group', 175 '{linklib}/libgcc.a'.format(linklib=linklib), 176 '{linklib}/libcrt_platform.a'.format(linklib=linklib), 177 '--end-group', 178 '{linklib}/crtend.o'.format(linklib=linklib), 179 '--undefined=_start', 180 '--defsym=__Sz_AbsoluteZero=0', 181 '--defsym=_begin=0', 182 '-o', exe 183 ], echo=verbose) 184 185def LinkNative(objs, exe, target, verbose=True): 186 nacl_root = FindBaseNaCl() 187 linker = { 188 'arm32': '/usr/bin/arm-linux-gnueabihf-g++', 189 'mips32': '/usr/bin/mipsel-linux-gnu-g++', 190 'x8632': ('{root}/../third_party/llvm-build/Release+Asserts/bin/clang' 191 ).format(root=nacl_root), 192 'x8664': ('{root}/../third_party/llvm-build/Release+Asserts/bin/clang' 193 ).format(root=nacl_root) 194 }[target] 195 196 extra_linker_args = { 197 'arm32': ['-mcpu=cortex-a9'], 198 'x8632': ['-m32'], 199 'x8664': ['-mx32'] 200 }[target] 201 202 lib_dir = { 203 'arm32': 'arm-linux', 204 'x8632': 'x86-32-linux', 205 'x8664': 'x86-64-linux', 206 }[target] 207 208 shellcmd([linker] + 209 extra_linker_args + 210 objs + 211 ['-o', exe, 212 ('{root}/toolchain/linux_x86/pnacl_newlib_raw/translator/' + 213 '{lib_dir}/lib/' + 214 '{{unsandboxed_irt,irt_random,irt_query_list}}.o').format( 215 root=nacl_root, lib_dir=lib_dir), 216 ('{root}/toolchain_build/src/subzero/build/runtime/' + 217 'szrt_native_{target}.o').format(root=nacl_root, target=target), 218 '-lm', '-lpthread', '-lrt', 219 '-Wl,--defsym=__Sz_AbsoluteZero=0' 220 ], echo=verbose) 221 222def main(): 223 """Create a hybrid translation from Subzero and llc. 224 225 Takes a finalized pexe and builds a native executable as a hybrid of Subzero 226 and llc translated bitcode. Linker tricks are used to determine whether 227 Subzero or llc generated symbols are used, on a per-symbol basis. 228 229 By default, for every symbol, its Subzero version is used. Subzero and llc 230 symbols can be selectively enabled/disabled via regular expressions on the 231 symbol name, or by ranges of lines in this program's auto-generated symbol 232 file. 233 234 For each symbol, the --exclude arguments are first checked (the symbol is 235 'rejected' on a match), followed by the --include arguments (the symbol is 236 'accepted' on a match), followed by unconditional 'rejection'. The Subzero 237 version is used for an 'accepted' symbol, and the llc version is used for a 238 'rejected' symbol. 239 240 Each --include and --exclude argument can be a regular expression or a range 241 of lines in the symbol file. Each regular expression is wrapped inside 242 '^$', so if you want a substring match on 'foo', use '.*foo.*' instead. 243 Ranges use python-style 'first:last' notation, so e.g. use '0:10' or ':10' 244 for the first 10 lines of the file, or '1' for the second line of the file. 245 246 If no --include or --exclude arguments are given, the executable is produced 247 entirely using Subzero, without using llc or linker tricks. 248 249 When using the --force=0 option, this script uses file modification 250 timestamps to determine whether llc and Subzero re-translation are needed. 251 It checks timestamps of llc, pnacl-sz, and the pexe against the translated 252 object files to determine the minimal work necessary. The --force=1 option 253 (default) suppresses those checks and re-translates everything. 254 255 This script expects various PNaCl and LLVM tools to be found within the 256 native_client tree. When changes are made to these tools, copy them this 257 way: 258 cd native_client 259 toolchain_build/toolchain_build_pnacl.py llvm_x86_64_linux \\ 260 --install=toolchain/linux_x86/pnacl_newlib_raw 261 """ 262 argparser = argparse.ArgumentParser( 263 description=' ' + main.__doc__, 264 formatter_class=argparse.RawTextHelpFormatter) 265 AddOptionalArgs(argparser) 266 argparser.add_argument('pexe', help='Finalized pexe to translate') 267 args = argparser.parse_args() 268 pexe = args.pexe 269 exe = args.output 270 ProcessPexe(args, pexe, exe) 271 272def ProcessPexe(args, pexe, exe): 273 [pexe_base, ext] = os.path.splitext(pexe) 274 if ext != '.pexe': 275 pexe_base = pexe 276 pexe_base_unescaped = pexe_base 277 pexe_base = pipes.quote(pexe_base) 278 pexe = pipes.quote(pexe) 279 280 nacl_root = FindBaseNaCl() 281 path_addition = ( 282 '{root}/toolchain/linux_x86/pnacl_newlib_raw/bin' 283 ).format(root=nacl_root) 284 obj_llc = pexe_base + '.llc.o' 285 obj_sz = pexe_base + '.sz.o' 286 asm_sz = pexe_base + '.sz.s' 287 obj_llc_weak = pexe_base + '.weak.llc.o' 288 obj_sz_weak = pexe_base + '.weak.sz.o' 289 obj_partial = obj_sz # overridden for hybrid mode 290 sym_llc = pexe_base + '.sym.llc.txt' 291 sym_sz = pexe_base + '.sym.sz.txt' 292 sym_sz_unescaped = pexe_base_unescaped + '.sym.sz.txt' 293 whitelist_sz = pexe_base + '.wl.sz.txt' 294 whitelist_sz_unescaped = pexe_base_unescaped + '.wl.sz.txt' 295 pnacl_sz = ( 296 '{root}/toolchain_build/src/subzero/pnacl-sz' 297 ).format(root=nacl_root) 298 llcbin = '{base}/pnacl-llc'.format(base=path_addition) 299 gold = '{base}/le32-nacl-ld.gold'.format(base=path_addition) 300 objcopy = '{base}/{objcopy}'.format(base=path_addition, 301 objcopy=GetObjcopyCmd(args.target)) 302 opt_level = args.optlevel 303 opt_level_map = { 'm1':'0', '-1':'0', '0':'0', '1':'1', '2':'2' } 304 hybrid = args.include or args.exclude 305 native = not args.sandbox and not args.nonsfi 306 if args.asan: 307 if args.sandbox or args.nonsfi: 308 print 'Can only use AddressSanitizer with a native build' 309 exit(1) 310 if '-fsanitize-address' not in args.sz_args: 311 args.sz_args.append('-fsanitize-address') 312 313 if hybrid and (args.force or 314 NewerThanOrNotThere(pexe, obj_llc) or 315 NewerThanOrNotThere(llcbin, obj_llc)): 316 arch = { 317 'arm32': 'arm' + get_sfi_string(args, 'v7', '-nonsfi', '-nonsfi'), 318 'x8632': 'x86-32' + get_sfi_string(args, '', '-nonsfi', '-linux'), 319 'x8664': 'x86-64' + get_sfi_string(args, '', '', '-linux') 320 }[args.target] 321 322 # Only run pnacl-translate in hybrid mode. 323 shellcmd(['{base}/pnacl-translate'.format(base=path_addition), 324 '-split-module=1', 325 '-ffunction-sections', 326 '-fdata-sections', 327 '-c', 328 '-arch', arch, 329 '-O' + opt_level_map[opt_level], 330 '--pnacl-driver-append-LLC_FLAGS_EXTRA=-externalize', 331 '-o', obj_llc] + 332 (['--pnacl-driver-verbose'] if args.verbose else []) + 333 args.llc_args + 334 [pexe], 335 echo=args.verbose) 336 if native: 337 shellcmd(( 338 '{objcopy} --redefine-sym _start=_user_start {obj}' 339 ).format(objcopy=objcopy, obj=obj_llc), echo=args.verbose) 340 # Generate llc syms file for consistency, even though it's not used. 341 shellcmd(( 342 'nm {obj} | sed -n "s/.* [a-zA-Z] //p" > {sym}' 343 ).format(obj=obj_llc, sym=sym_llc), echo=args.verbose) 344 345 if (args.force or 346 NewerThanOrNotThere(pexe, obj_sz) or 347 NewerThanOrNotThere(pnacl_sz, obj_sz)): 348 if not args.nosz: 349 # Run pnacl-sz regardless of hybrid mode. 350 shellcmd([pnacl_sz, 351 '-O' + opt_level, 352 '-bitcode-format=pnacl', 353 '-filetype=' + args.filetype, 354 '-o', obj_sz if args.filetype == 'obj' else asm_sz, 355 '-target=' + args.target] + 356 (['-externalize', 357 '-ffunction-sections', 358 '-fdata-sections'] if hybrid else []) + 359 (['-sandbox'] if args.sandbox else []) + 360 (['-nonsfi'] if args.nonsfi else []) + 361 (['-enable-block-profile'] if 362 args.enable_block_profile and not args.sandbox 363 else []) + 364 args.sz_args + 365 [pexe], 366 echo=args.verbose) 367 if args.filetype != 'obj': 368 triple = { 369 'arm32': 'arm' + get_sfi_string(args, '-nacl', '', ''), 370 'x8632': 'i686' + get_sfi_string(args, '-nacl', '', ''), 371 'x8664': 'x86_64' + 372 get_sfi_string(args, '-nacl', '-linux-gnux32', 373 '-linux-gnux32'), 374 }[args.target] 375 376 shellcmd(( 377 '{base}/llvm-mc -triple={triple} -filetype=obj -o {obj} {asm}' 378 ).format(base=path_addition, asm=asm_sz, obj=obj_sz, 379 triple=triple), 380 echo=args.verbose) 381 if native: 382 shellcmd(( 383 '{objcopy} --redefine-sym _start=_user_start {obj}' 384 ).format(objcopy=objcopy, obj=obj_sz), echo=args.verbose) 385 if hybrid: 386 shellcmd(( 387 'nm {obj} | sed -n "s/.* [a-zA-Z] //p" > {sym}' 388 ).format(obj=obj_sz, sym=sym_sz), echo=args.verbose) 389 390 if hybrid: 391 with open(sym_sz_unescaped) as f: 392 sz_syms = f.read().splitlines() 393 re_include_str = BuildRegex(args.include, sz_syms) 394 re_exclude_str = BuildRegex(args.exclude, sz_syms) 395 re_include = re.compile(re_include_str) 396 re_exclude = re.compile(re_exclude_str) 397 # If a symbol doesn't explicitly match re_include or re_exclude, 398 # the default MatchSymbol() result is True, unless some --include 399 # args are provided. 400 default_match = not args.include 401 402 whitelist_has_items = False 403 with open(whitelist_sz_unescaped, 'w') as f: 404 for sym in sz_syms: 405 if MatchSymbol(sym, re_include, re_exclude, default_match): 406 f.write(sym + '\n') 407 whitelist_has_items = True 408 shellcmd(( 409 '{objcopy} --weaken {obj} {weak}' 410 ).format(objcopy=objcopy, obj=obj_sz, weak=obj_sz_weak), 411 echo=args.verbose) 412 if whitelist_has_items: 413 # objcopy returns an error if the --weaken-symbols file is empty. 414 shellcmd(( 415 '{objcopy} --weaken-symbols={whitelist} {obj} {weak}' 416 ).format(objcopy=objcopy, 417 whitelist=whitelist_sz, obj=obj_llc, 418 weak=obj_llc_weak), 419 echo=args.verbose) 420 else: 421 shellcmd(( 422 '{objcopy} {obj} {weak}' 423 ).format(objcopy=objcopy, obj=obj_llc, weak=obj_llc_weak), 424 echo=args.verbose) 425 obj_partial = pexe_base + '.o' 426 ld = { 427 'arm32': 'arm-linux-gnueabihf-ld', 428 'x8632': 'ld', 429 'x8664': 'ld', 430 }[args.target] 431 emulation = { 432 'arm32': 'armelf_linux_eabi', 433 'x8632': 'elf_i386', 434 'x8664': 'elf32_x86_64' if not args.sandbox else 'elf_x86_64', 435 }[args.target] 436 shellcmd(( 437 '{ld} -r -m {emulation} -o {partial} {sz} {llc}' 438 ).format(ld=ld, emulation=emulation, partial=obj_partial, 439 sz=obj_sz_weak, llc=obj_llc_weak), 440 echo=args.verbose) 441 shellcmd(( 442 '{objcopy} -w --localize-symbol="*" {partial}' 443 ).format(objcopy=objcopy, partial=obj_partial), 444 echo=args.verbose) 445 shellcmd(( 446 '{objcopy} --globalize-symbol={start} ' + 447 '--globalize-symbol=__Sz_block_profile_info {partial}' 448 ).format(objcopy=objcopy, partial=obj_partial, 449 start=get_sfi_string(args, '_start', '_start', 450 '_user_start')), 451 echo=args.verbose) 452 453 # Run the linker regardless of hybrid mode. 454 if args.sandbox: 455 LinkSandbox([obj_partial], exe, args.target, args.verbose) 456 elif args.nonsfi: 457 LinkNonsfi([obj_partial], exe, args.target, args.verbose) 458 else: 459 objs = [obj_partial] 460 if args.asan: 461 objs.append( 462 ('{root}/toolchain_build/src/subzero/build/runtime/' + 463 'szrt_asan_{target}.o').format(root=nacl_root, 464 target=args.target)) 465 LinkNative(objs, exe, args.target, args.verbose) 466 467 # Put the extra verbose printing at the end. 468 if args.verbose and hybrid: 469 print 'include={regex}'.format(regex=re_include_str) 470 print 'exclude={regex}'.format(regex=re_exclude_str) 471 print 'default_match={dm}'.format(dm=default_match) 472 print 'Number of Subzero syms = {num}'.format(num=len(sz_syms)) 473 474if __name__ == '__main__': 475 main() 476