#!/usr/bin/env python2 import argparse import os import pipes import re import sys from utils import FindBaseNaCl, GetObjcopyCmd, get_sfi_string, shellcmd def NewerThanOrNotThere(old_path, new_path): """Returns whether old_path is newer than new_path. Also returns true if either path doesn't exist. """ if not (os.path.exists(old_path) and os.path.exists(new_path)): return True return os.path.getmtime(old_path) > os.path.getmtime(new_path) def BuildRegex(patterns, syms): """Build a regular expression string for inclusion or exclusion. Creates a regex string from an array of patterns and an array of symbol names. Each element in the patterns array is either a regex, or a range of entries in the symbol name array, e.g. '2:9'. """ pattern_list = [] for pattern in patterns: if pattern[0].isdigit() or pattern[0] == ':': # Legitimate symbols or regexes shouldn't start with a # digit or a ':', so interpret the pattern as a range. interval = pattern.split(':') if len(interval) == 1: # Treat singleton 'n' as 'n:n+1'. lower = int(interval[0]) upper = lower + 1 elif len(interval) == 2: # Handle 'a:b', 'a:', and ':b' with suitable defaults. lower = int(interval[0]) if len(interval[0]) else 0 upper = int(interval[1]) if len(interval[1]) else len(syms) else: print 'Invalid range syntax: {p}'.format(p=pattern) exit(1) pattern = '$|^'.join([re.escape(p) for p in syms[lower:upper]]) pattern_list.append('^' + pattern + '$') return '|'.join(pattern_list) if len(pattern_list) else '^$' def MatchSymbol(sym, re_include, re_exclude, default_match): """Match a symbol name against inclusion/exclusion rules. Returns True or False depending on whether the given symbol matches the compiled include or exclude regexes. The default is returned if neither the include nor the exclude regex matches. """ if re_exclude.match(sym): # Always honor an explicit exclude before considering # includes. return False if re_include.match(sym): return True return default_match def AddOptionalArgs(argparser): argparser.add_argument('--force', dest='force', type=int, choices=[0, 1], default=1, help='Force all re-translations of the pexe.' + ' Default %(default)s.') argparser.add_argument('--include', '-i', default=[], dest='include', action='append', help='Subzero symbols to include ' + '(regex or line range)') argparser.add_argument('--exclude', '-e', default=[], dest='exclude', action='append', help='Subzero symbols to exclude ' + '(regex or line range)') argparser.add_argument('--output', '-o', default='a.out', dest='output', action='store', help='Output executable. Default %(default)s.') argparser.add_argument('-O', default='2', dest='optlevel', choices=['m1', '-1', '0', '1', '2'], help='Optimization level ' + '(m1 and -1 are equivalent).' + ' Default %(default)s.') argparser.add_argument('--filetype', default='iasm', dest='filetype', choices=['obj', 'asm', 'iasm'], help='Output file type. Default %(default)s.') argparser.add_argument('--sandbox', dest='sandbox', action='store_true', help='Enable sandboxing in the translator') argparser.add_argument('--nonsfi', dest='nonsfi', action='store_true', help='Enable Non-SFI in the translator') argparser.add_argument('--enable-block-profile', dest='enable_block_profile', action='store_true', help='Enable basic block profiling.') argparser.add_argument('--target', default='x8632', dest='target', choices=['arm32', 'x8632', 'x8664'], help='Generate code for specified target.') argparser.add_argument('--verbose', '-v', dest='verbose', action='store_true', help='Display some extra debugging output') argparser.add_argument('--sz', dest='sz_args', action='append', default=[], help='Extra arguments for Subzero') argparser.add_argument('--llc', dest='llc_args', action='append', default=[], help='Extra arguments for llc') argparser.add_argument('--no-sz', dest='nosz', action='store_true', help='Run only post-Subzero build steps') argparser.add_argument('--fsanitize-address', dest='asan', action='store_true', help='Instrument with AddressSanitizer') def LinkSandbox(objs, exe, target, verbose=True): assert target in ('x8632', 'x8664', 'arm32'), \ '-sandbox is not available for %s' % target nacl_root = FindBaseNaCl() gold = ('{root}/toolchain/linux_x86/pnacl_newlib_raw/bin/' + 'le32-nacl-ld.gold').format(root=nacl_root) target_lib_dir = { 'arm32': 'arm', 'x8632': 'x86-32', 'x8664': 'x86-64', }[target] linklib = ('{root}/toolchain/linux_x86/pnacl_newlib_raw/translator/' + '{target_dir}/lib').format(root=nacl_root, target_dir=target_lib_dir) shellcmd([gold, '-nostdlib', '--no-fix-cortex-a8', '--eh-frame-hdr', '-z', 'text', #'-z', 'noexecstack', '--build-id', '--entry=__pnacl_start', '-static', #'-pie', '{linklib}/crtbegin.o'.format(linklib=linklib)] + objs + [('{root}/toolchain_build/src/subzero/build/runtime/' + 'szrt_sb_{target}.o').format(root=nacl_root, target=target), '{linklib}/libpnacl_irt_shim_dummy.a'.format(linklib=linklib), '--start-group', '{linklib}/libgcc.a'.format(linklib=linklib), '{linklib}/libcrt_platform.a'.format(linklib=linklib), '--end-group', '{linklib}/crtend.o'.format(linklib=linklib), '--undefined=_start', '--defsym=__Sz_AbsoluteZero=0', #'--defsym=_begin=0', '-o', exe ], echo=verbose) def LinkNonsfi(objs, exe, target, verbose=True): nacl_root = FindBaseNaCl() gold = ('{root}/toolchain/linux_x86/pnacl_newlib_raw/bin/' + 'le32-nacl-ld.gold').format(root=nacl_root) target_lib_dir = { 'arm32': 'arm-nonsfi', 'x8632': 'x86-32-nonsfi', }[target] linklib = ('{root}/toolchain/linux_x86/pnacl_newlib_raw/translator/' + '{target_dir}/lib').format(root=nacl_root, target_dir=target_lib_dir) shellcmd([gold, '-nostdlib', '--no-fix-cortex-a8', '--eh-frame-hdr', '-z', 'text', '-z', 'noexecstack', '--build-id', '--entry=__pnacl_start', '-pie', '{linklib}/crtbegin.o'.format(linklib=linklib)] + objs + [('{root}/toolchain_build/src/subzero/build/runtime/' + 'szrt_nonsfi_{target}.o').format(root=nacl_root, target=target), '{linklib}/libpnacl_irt_shim_dummy.a'.format(linklib=linklib), '--start-group', '{linklib}/libgcc.a'.format(linklib=linklib), '{linklib}/libcrt_platform.a'.format(linklib=linklib), '--end-group', '{linklib}/crtend.o'.format(linklib=linklib), '--undefined=_start', '--defsym=__Sz_AbsoluteZero=0', '--defsym=_begin=0', '-o', exe ], echo=verbose) def LinkNative(objs, exe, target, verbose=True): nacl_root = FindBaseNaCl() linker = { 'arm32': '/usr/bin/arm-linux-gnueabihf-g++', 'mips32': '/usr/bin/mipsel-linux-gnu-g++', 'x8632': ('{root}/../third_party/llvm-build/Release+Asserts/bin/clang' ).format(root=nacl_root), 'x8664': ('{root}/../third_party/llvm-build/Release+Asserts/bin/clang' ).format(root=nacl_root) }[target] extra_linker_args = { 'arm32': ['-mcpu=cortex-a9'], 'x8632': ['-m32'], 'x8664': ['-mx32'] }[target] lib_dir = { 'arm32': 'arm-linux', 'x8632': 'x86-32-linux', 'x8664': 'x86-64-linux', }[target] shellcmd([linker] + extra_linker_args + objs + ['-o', exe, ('{root}/toolchain/linux_x86/pnacl_newlib_raw/translator/' + '{lib_dir}/lib/' + '{{unsandboxed_irt,irt_random,irt_query_list}}.o').format( root=nacl_root, lib_dir=lib_dir), ('{root}/toolchain_build/src/subzero/build/runtime/' + 'szrt_native_{target}.o').format(root=nacl_root, target=target), '-lm', '-lpthread', '-lrt', '-Wl,--defsym=__Sz_AbsoluteZero=0' ], echo=verbose) def main(): """Create a hybrid translation from Subzero and llc. Takes a finalized pexe and builds a native executable as a hybrid of Subzero and llc translated bitcode. Linker tricks are used to determine whether Subzero or llc generated symbols are used, on a per-symbol basis. By default, for every symbol, its Subzero version is used. Subzero and llc symbols can be selectively enabled/disabled via regular expressions on the symbol name, or by ranges of lines in this program's auto-generated symbol file. For each symbol, the --exclude arguments are first checked (the symbol is 'rejected' on a match), followed by the --include arguments (the symbol is 'accepted' on a match), followed by unconditional 'rejection'. The Subzero version is used for an 'accepted' symbol, and the llc version is used for a 'rejected' symbol. Each --include and --exclude argument can be a regular expression or a range of lines in the symbol file. Each regular expression is wrapped inside '^$', so if you want a substring match on 'foo', use '.*foo.*' instead. Ranges use python-style 'first:last' notation, so e.g. use '0:10' or ':10' for the first 10 lines of the file, or '1' for the second line of the file. If no --include or --exclude arguments are given, the executable is produced entirely using Subzero, without using llc or linker tricks. When using the --force=0 option, this script uses file modification timestamps to determine whether llc and Subzero re-translation are needed. It checks timestamps of llc, pnacl-sz, and the pexe against the translated object files to determine the minimal work necessary. The --force=1 option (default) suppresses those checks and re-translates everything. This script expects various PNaCl and LLVM tools to be found within the native_client tree. When changes are made to these tools, copy them this way: cd native_client toolchain_build/toolchain_build_pnacl.py llvm_x86_64_linux \\ --install=toolchain/linux_x86/pnacl_newlib_raw """ argparser = argparse.ArgumentParser( description=' ' + main.__doc__, formatter_class=argparse.RawTextHelpFormatter) AddOptionalArgs(argparser) argparser.add_argument('pexe', help='Finalized pexe to translate') args = argparser.parse_args() pexe = args.pexe exe = args.output ProcessPexe(args, pexe, exe) def ProcessPexe(args, pexe, exe): [pexe_base, ext] = os.path.splitext(pexe) if ext != '.pexe': pexe_base = pexe pexe_base_unescaped = pexe_base pexe_base = pipes.quote(pexe_base) pexe = pipes.quote(pexe) nacl_root = FindBaseNaCl() path_addition = ( '{root}/toolchain/linux_x86/pnacl_newlib_raw/bin' ).format(root=nacl_root) obj_llc = pexe_base + '.llc.o' obj_sz = pexe_base + '.sz.o' asm_sz = pexe_base + '.sz.s' obj_llc_weak = pexe_base + '.weak.llc.o' obj_sz_weak = pexe_base + '.weak.sz.o' obj_partial = obj_sz # overridden for hybrid mode sym_llc = pexe_base + '.sym.llc.txt' sym_sz = pexe_base + '.sym.sz.txt' sym_sz_unescaped = pexe_base_unescaped + '.sym.sz.txt' whitelist_sz = pexe_base + '.wl.sz.txt' whitelist_sz_unescaped = pexe_base_unescaped + '.wl.sz.txt' pnacl_sz = ( '{root}/toolchain_build/src/subzero/pnacl-sz' ).format(root=nacl_root) llcbin = '{base}/pnacl-llc'.format(base=path_addition) gold = '{base}/le32-nacl-ld.gold'.format(base=path_addition) objcopy = '{base}/{objcopy}'.format(base=path_addition, objcopy=GetObjcopyCmd(args.target)) opt_level = args.optlevel opt_level_map = { 'm1':'0', '-1':'0', '0':'0', '1':'1', '2':'2' } hybrid = args.include or args.exclude native = not args.sandbox and not args.nonsfi if args.asan: if args.sandbox or args.nonsfi: print 'Can only use AddressSanitizer with a native build' exit(1) if '-fsanitize-address' not in args.sz_args: args.sz_args.append('-fsanitize-address') if hybrid and (args.force or NewerThanOrNotThere(pexe, obj_llc) or NewerThanOrNotThere(llcbin, obj_llc)): arch = { 'arm32': 'arm' + get_sfi_string(args, 'v7', '-nonsfi', '-nonsfi'), 'x8632': 'x86-32' + get_sfi_string(args, '', '-nonsfi', '-linux'), 'x8664': 'x86-64' + get_sfi_string(args, '', '', '-linux') }[args.target] # Only run pnacl-translate in hybrid mode. shellcmd(['{base}/pnacl-translate'.format(base=path_addition), '-split-module=1', '-ffunction-sections', '-fdata-sections', '-c', '-arch', arch, '-O' + opt_level_map[opt_level], '--pnacl-driver-append-LLC_FLAGS_EXTRA=-externalize', '-o', obj_llc] + (['--pnacl-driver-verbose'] if args.verbose else []) + args.llc_args + [pexe], echo=args.verbose) if native: shellcmd(( '{objcopy} --redefine-sym _start=_user_start {obj}' ).format(objcopy=objcopy, obj=obj_llc), echo=args.verbose) # Generate llc syms file for consistency, even though it's not used. shellcmd(( 'nm {obj} | sed -n "s/.* [a-zA-Z] //p" > {sym}' ).format(obj=obj_llc, sym=sym_llc), echo=args.verbose) if (args.force or NewerThanOrNotThere(pexe, obj_sz) or NewerThanOrNotThere(pnacl_sz, obj_sz)): if not args.nosz: # Run pnacl-sz regardless of hybrid mode. shellcmd([pnacl_sz, '-O' + opt_level, '-bitcode-format=pnacl', '-filetype=' + args.filetype, '-o', obj_sz if args.filetype == 'obj' else asm_sz, '-target=' + args.target] + (['-externalize', '-ffunction-sections', '-fdata-sections'] if hybrid else []) + (['-sandbox'] if args.sandbox else []) + (['-nonsfi'] if args.nonsfi else []) + (['-enable-block-profile'] if args.enable_block_profile and not args.sandbox else []) + args.sz_args + [pexe], echo=args.verbose) if args.filetype != 'obj': triple = { 'arm32': 'arm' + get_sfi_string(args, '-nacl', '', ''), 'x8632': 'i686' + get_sfi_string(args, '-nacl', '', ''), 'x8664': 'x86_64' + get_sfi_string(args, '-nacl', '-linux-gnux32', '-linux-gnux32'), }[args.target] shellcmd(( '{base}/llvm-mc -triple={triple} -filetype=obj -o {obj} {asm}' ).format(base=path_addition, asm=asm_sz, obj=obj_sz, triple=triple), echo=args.verbose) if native: shellcmd(( '{objcopy} --redefine-sym _start=_user_start {obj}' ).format(objcopy=objcopy, obj=obj_sz), echo=args.verbose) if hybrid: shellcmd(( 'nm {obj} | sed -n "s/.* [a-zA-Z] //p" > {sym}' ).format(obj=obj_sz, sym=sym_sz), echo=args.verbose) if hybrid: with open(sym_sz_unescaped) as f: sz_syms = f.read().splitlines() re_include_str = BuildRegex(args.include, sz_syms) re_exclude_str = BuildRegex(args.exclude, sz_syms) re_include = re.compile(re_include_str) re_exclude = re.compile(re_exclude_str) # If a symbol doesn't explicitly match re_include or re_exclude, # the default MatchSymbol() result is True, unless some --include # args are provided. default_match = not args.include whitelist_has_items = False with open(whitelist_sz_unescaped, 'w') as f: for sym in sz_syms: if MatchSymbol(sym, re_include, re_exclude, default_match): f.write(sym + '\n') whitelist_has_items = True shellcmd(( '{objcopy} --weaken {obj} {weak}' ).format(objcopy=objcopy, obj=obj_sz, weak=obj_sz_weak), echo=args.verbose) if whitelist_has_items: # objcopy returns an error if the --weaken-symbols file is empty. shellcmd(( '{objcopy} --weaken-symbols={whitelist} {obj} {weak}' ).format(objcopy=objcopy, whitelist=whitelist_sz, obj=obj_llc, weak=obj_llc_weak), echo=args.verbose) else: shellcmd(( '{objcopy} {obj} {weak}' ).format(objcopy=objcopy, obj=obj_llc, weak=obj_llc_weak), echo=args.verbose) obj_partial = pexe_base + '.o' ld = { 'arm32': 'arm-linux-gnueabihf-ld', 'x8632': 'ld', 'x8664': 'ld', }[args.target] emulation = { 'arm32': 'armelf_linux_eabi', 'x8632': 'elf_i386', 'x8664': 'elf32_x86_64' if not args.sandbox else 'elf_x86_64', }[args.target] shellcmd(( '{ld} -r -m {emulation} -o {partial} {sz} {llc}' ).format(ld=ld, emulation=emulation, partial=obj_partial, sz=obj_sz_weak, llc=obj_llc_weak), echo=args.verbose) shellcmd(( '{objcopy} -w --localize-symbol="*" {partial}' ).format(objcopy=objcopy, partial=obj_partial), echo=args.verbose) shellcmd(( '{objcopy} --globalize-symbol={start} ' + '--globalize-symbol=__Sz_block_profile_info {partial}' ).format(objcopy=objcopy, partial=obj_partial, start=get_sfi_string(args, '_start', '_start', '_user_start')), echo=args.verbose) # Run the linker regardless of hybrid mode. if args.sandbox: LinkSandbox([obj_partial], exe, args.target, args.verbose) elif args.nonsfi: LinkNonsfi([obj_partial], exe, args.target, args.verbose) else: objs = [obj_partial] if args.asan: objs.append( ('{root}/toolchain_build/src/subzero/build/runtime/' + 'szrt_asan_{target}.o').format(root=nacl_root, target=args.target)) LinkNative(objs, exe, args.target, args.verbose) # Put the extra verbose printing at the end. if args.verbose and hybrid: print 'include={regex}'.format(regex=re_include_str) print 'exclude={regex}'.format(regex=re_exclude_str) print 'default_match={dm}'.format(dm=default_match) print 'Number of Subzero syms = {num}'.format(num=len(sz_syms)) if __name__ == '__main__': main()