1#!/usr/bin/env python2.7 2# 3# Copyright 2017 Google Inc. 4# 5# Use of this source code is governed by a BSD-style license that can be 6# found in the LICENSE file. 7 8import re 9import subprocess 10import sys 11 12clang = 'clang-4.0' 13objdump = 'gobjdump' 14ccache = 'ccache' 15stages = 'src/jumper/SkJumper_stages.cpp' 16stages_lowp = 'src/jumper/SkJumper_stages_lowp.cpp' 17generated = 'src/jumper/SkJumper_generated.S' 18generated_win = 'src/jumper/SkJumper_generated_win.S' 19 20clang = sys.argv[1] if len(sys.argv) > 1 else clang 21objdump = sys.argv[2] if len(sys.argv) > 2 else objdump 22ccache = sys.argv[3] if len(sys.argv) > 3 else ccache 23stages = sys.argv[4] if len(sys.argv) > 4 else stages 24stages_lowp = sys.argv[5] if len(sys.argv) > 5 else stages_lowp 25generated = sys.argv[6] if len(sys.argv) > 6 else generated 26generated_win = sys.argv[7] if len(sys.argv) > 7 else generated_win 27 28clang = [ccache, clang, '-x', 'c++'] 29 30 31cflags = ['-std=c++11', '-Os', '-DJUMPER', 32 '-momit-leaf-frame-pointer', '-ffp-contract=fast', 33 '-fno-exceptions', '-fno-rtti', '-fno-unwind-tables'] 34 35x86 = [ '-m32' ] 36win = ['-DWIN', '-mno-red-zone'] 37sse2 = ['-msse2', '-mno-sse3', '-mno-ssse3', '-mno-sse4.1'] 38subprocess.check_call(clang + cflags + sse2 + 39 ['-c', stages] + 40 ['-o', 'sse2.o']) 41subprocess.check_call(clang + cflags + sse2 + win + 42 ['-c', stages] + 43 ['-o', 'win_sse2.o']) 44subprocess.check_call(clang + cflags + sse2 + x86 + 45 ['-c', stages] + 46 ['-o', 'x86_sse2.o']) 47subprocess.check_call(clang + cflags + sse2 + win + x86 + 48 ['-c', stages] + 49 ['-o', 'win_x86_sse2.o']) 50 51ssse3 = ['-mssse3', '-mno-sse4.1'] 52subprocess.check_call(clang + cflags + ssse3 + 53 ['-c', stages_lowp] + 54 ['-o', 'lowp_ssse3.o']) 55subprocess.check_call(clang + cflags + ssse3 + win + 56 ['-c', stages_lowp] + 57 ['-o', 'win_lowp_ssse3.o']) 58 59sse41 = ['-msse4.1'] 60subprocess.check_call(clang + cflags + sse41 + 61 ['-c', stages] + 62 ['-o', 'sse41.o']) 63subprocess.check_call(clang + cflags + sse41 + win + 64 ['-c', stages] + 65 ['-o', 'win_sse41.o']) 66 67avx = ['-mavx'] 68subprocess.check_call(clang + cflags + avx + 69 ['-c', stages] + 70 ['-o', 'avx.o']) 71subprocess.check_call(clang + cflags + avx + win + 72 ['-c', stages] + 73 ['-o', 'win_avx.o']) 74 75hsw = ['-mavx2', '-mfma', '-mf16c'] 76subprocess.check_call(clang + cflags + hsw + 77 ['-c', stages] + 78 ['-o', 'hsw.o']) 79subprocess.check_call(clang + cflags + hsw + win + 80 ['-c', stages] + 81 ['-o', 'win_hsw.o']) 82subprocess.check_call(clang + cflags + hsw + 83 ['-c', stages_lowp] + 84 ['-o', 'lowp_hsw.o']) 85subprocess.check_call(clang + cflags + hsw + win + 86 ['-c', stages_lowp] + 87 ['-o', 'win_lowp_hsw.o']) 88 89aarch64 = [ '--target=aarch64' ] 90subprocess.check_call(clang + cflags + aarch64 + 91 ['-c', stages] + 92 ['-o', 'aarch64.o']) 93 94vfp4 = [ 95 '--target=armv7a-linux-gnueabihf', 96 '-mfpu=neon-vfpv4', 97] 98subprocess.check_call(clang + cflags + vfp4 + 99 ['-c', stages] + 100 ['-o', 'vfp4.o']) 101 102def parse_object_file(dot_o, directive, target=None): 103 globl, hidden, label, comment, align = \ 104 '.globl', 'HIDDEN', ':', '// ', 'BALIGN' 105 if 'win' in dot_o: 106 globl, hidden, label, comment, align = \ 107 'PUBLIC', '', ' LABEL PROC', '; ', 'ALIGN ' 108 109 cmd = [objdump] 110 if target: 111 cmd += ['--target', target] 112 113 # Look for sections we know we can't handle. 114 section_headers = subprocess.check_output(cmd + ['-h', dot_o]) 115 for snippet in ['.rodata']: 116 if snippet in section_headers: 117 print >>sys.stderr, 'Found %s in section.' % snippet 118 assert snippet not in section_headers 119 120 if directive == '.long': 121 disassemble = ['-d', dot_o] 122 dehex = lambda h: '0x'+h 123 else: 124 # x86-64... as long as we're using %rip-relative addressing, 125 # literal sections should be fine to just dump in with .text. 126 disassemble = ['-d', # DO NOT USE -D. 127 '-z', # Print zero bytes instead of ... 128 '--insn-width=11', 129 '-j', '.text', 130 '-j', '.literal4', 131 '-j', '.literal16', 132 '-j', '.const', 133 dot_o] 134 dehex = lambda h: str(int(h,16)) 135 136 # Ok. Let's disassemble. 137 for line in subprocess.check_output(cmd + disassemble).split('\n'): 138 line = line.strip() 139 140 if not line or line.startswith(dot_o) or line.startswith('Disassembly'): 141 continue 142 143 # E.g. 00000000000003a4 <_load_f16>: 144 m = re.match('''[0-9a-f]+ <_?(.*)>:''', line) 145 if m: 146 print 147 sym = m.group(1) 148 if sym.startswith('.literal'): # .literal4, .literal16, etc 149 print sym.replace('.literal', align) 150 elif sym.startswith('.const'): # 32-byte constants 151 print align + '32' 152 elif not sym.startswith('sk_'): 153 print >>sys.stderr, "build_stages.py can't handle '%s' (yet?)." % sym 154 assert sym.startswith('sk_') 155 else: # a stage function 156 if hidden: 157 print hidden + ' _' + sym 158 print globl + ' _' + sym 159 if 'win' not in dot_o: 160 print 'FUNCTION(_' + sym + ')' 161 print '_' + sym + label 162 continue 163 164 columns = line.split('\t') 165 #print >>sys.stderr, columns 166 code = columns[1] 167 if len(columns) >= 4: 168 inst = columns[2] 169 args = columns[3] 170 else: 171 inst, args = columns[2], '' 172 if ' ' in columns[2]: 173 inst, args = columns[2].split(' ', 1) 174 code, inst, args = code.strip(), inst.strip(), args.strip() 175 176 hexed = ','.join(dehex(x) for x in code.split(' ')) 177 print ' ' + directive + ' ' + hexed + ' '*(36-len(hexed)) + \ 178 comment + inst + (' '*(14-len(inst)) + args if args else '') 179 180sys.stdout = open(generated, 'w') 181 182print '''# Copyright 2017 Google Inc. 183# 184# Use of this source code is governed by a BSD-style license that can be 185# found in the LICENSE file. 186 187# This file is generated semi-automatically with this command: 188# $ src/jumper/build_stages.py 189''' 190print '#if defined(__MACH__)' 191print ' #define HIDDEN .private_extern' 192print ' #define FUNCTION(name)' 193print ' #define BALIGN4 .align 2' 194print ' #define BALIGN16 .align 4' 195print ' #define BALIGN32 .align 5' 196print '#else' 197print ' .section .note.GNU-stack,"",%progbits' 198print ' #define HIDDEN .hidden' 199print ' #define FUNCTION(name) .type name,%function' 200print ' #define BALIGN4 .balign 4' 201print ' #define BALIGN16 .balign 16' 202print ' #define BALIGN32 .balign 32' 203print '#endif' 204 205print '.text' 206print '#if defined(__aarch64__)' 207print 'BALIGN4' 208parse_object_file('aarch64.o', '.long') 209 210print '#elif defined(__arm__)' 211print 'BALIGN4' 212parse_object_file('vfp4.o', '.long', target='elf32-littlearm') 213 214print '#elif defined(__x86_64__)' 215print 'BALIGN32' 216parse_object_file('hsw.o', '.byte') 217print 'BALIGN32' 218parse_object_file('avx.o', '.byte') 219print 'BALIGN32' 220parse_object_file('sse41.o', '.byte') 221print 'BALIGN32' 222parse_object_file('sse2.o', '.byte') 223print 'BALIGN32' 224parse_object_file('lowp_hsw.o', '.byte') 225print 'BALIGN32' 226parse_object_file('lowp_ssse3.o', '.byte') 227 228print '#elif defined(__i386__)' 229print 'BALIGN32' 230parse_object_file('x86_sse2.o', '.byte') 231 232print '#endif' 233 234sys.stdout = open(generated_win, 'w') 235print '''; Copyright 2017 Google Inc. 236; 237; Use of this source code is governed by a BSD-style license that can be 238; found in the LICENSE file. 239 240; This file is generated semi-automatically with this command: 241; $ src/jumper/build_stages.py 242''' 243print 'IFDEF RAX' 244print "_text32 SEGMENT ALIGN(32) 'CODE'" 245print 'ALIGN 32' 246parse_object_file('win_hsw.o', 'DB') 247print 'ALIGN 32' 248parse_object_file('win_avx.o', 'DB') 249print 'ALIGN 32' 250parse_object_file('win_sse41.o', 'DB') 251print 'ALIGN 32' 252parse_object_file('win_sse2.o', 'DB') 253print 'ALIGN 32' 254parse_object_file('win_lowp_hsw.o', 'DB') 255print 'ALIGN 32' 256parse_object_file('win_lowp_ssse3.o', 'DB') 257 258print 'ELSE' 259print '.MODEL FLAT,C' 260print "_text32 SEGMENT ALIGN(32) 'CODE'" 261print 'ALIGN 32' 262parse_object_file('win_x86_sse2.o', 'DB') 263 264print 'ENDIF' 265print 'END' 266