• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/env python2.7
2#
3# Copyright 2017 Google Inc.
4#
5# Use of this source code is governed by a BSD-style license that can be
6# found in the LICENSE file.
7
8import re
9import subprocess
10import sys
11
12clang         = 'clang-4.0'
13objdump       = 'gobjdump'
14ccache        = 'ccache'
15stages        = 'src/jumper/SkJumper_stages.cpp'
16stages_lowp   = 'src/jumper/SkJumper_stages_lowp.cpp'
17generated     = 'src/jumper/SkJumper_generated.S'
18generated_win = 'src/jumper/SkJumper_generated_win.S'
19
20clang         = sys.argv[1] if len(sys.argv) > 1 else clang
21objdump       = sys.argv[2] if len(sys.argv) > 2 else objdump
22ccache        = sys.argv[3] if len(sys.argv) > 3 else ccache
23stages        = sys.argv[4] if len(sys.argv) > 4 else stages
24stages_lowp   = sys.argv[5] if len(sys.argv) > 5 else stages_lowp
25generated     = sys.argv[6] if len(sys.argv) > 6 else generated
26generated_win = sys.argv[7] if len(sys.argv) > 7 else generated_win
27
28clang = [ccache, clang, '-x', 'c++']
29
30
31cflags = ['-std=c++11', '-Os', '-DJUMPER',
32          '-momit-leaf-frame-pointer', '-ffp-contract=fast',
33          '-fno-exceptions', '-fno-rtti', '-fno-unwind-tables']
34
35x86 = [ '-m32' ]
36win = ['-DWIN', '-mno-red-zone']
37sse2 = ['-msse2', '-mno-sse3', '-mno-ssse3', '-mno-sse4.1']
38subprocess.check_call(clang + cflags + sse2 +
39                      ['-c', stages] +
40                      ['-o', 'sse2.o'])
41subprocess.check_call(clang + cflags + sse2 + win +
42                      ['-c', stages] +
43                      ['-o', 'win_sse2.o'])
44subprocess.check_call(clang + cflags + sse2 + x86 +
45                      ['-c', stages] +
46                      ['-o', 'x86_sse2.o'])
47subprocess.check_call(clang + cflags + sse2 + win + x86 +
48                      ['-c', stages] +
49                      ['-o', 'win_x86_sse2.o'])
50
51ssse3 = ['-mssse3', '-mno-sse4.1']
52subprocess.check_call(clang + cflags + ssse3 +
53                      ['-c', stages_lowp] +
54                      ['-o', 'lowp_ssse3.o'])
55subprocess.check_call(clang + cflags + ssse3 + win +
56                      ['-c', stages_lowp] +
57                      ['-o', 'win_lowp_ssse3.o'])
58
59sse41 = ['-msse4.1']
60subprocess.check_call(clang + cflags + sse41 +
61                      ['-c', stages] +
62                      ['-o', 'sse41.o'])
63subprocess.check_call(clang + cflags + sse41 + win +
64                      ['-c', stages] +
65                      ['-o', 'win_sse41.o'])
66
67avx = ['-mavx']
68subprocess.check_call(clang + cflags + avx +
69                      ['-c', stages] +
70                      ['-o', 'avx.o'])
71subprocess.check_call(clang + cflags + avx + win +
72                      ['-c', stages] +
73                      ['-o', 'win_avx.o'])
74
75hsw = ['-mavx2', '-mfma', '-mf16c']
76subprocess.check_call(clang + cflags + hsw +
77                      ['-c', stages] +
78                      ['-o', 'hsw.o'])
79subprocess.check_call(clang + cflags + hsw + win +
80                      ['-c', stages] +
81                      ['-o', 'win_hsw.o'])
82subprocess.check_call(clang + cflags + hsw +
83                      ['-c', stages_lowp] +
84                      ['-o', 'lowp_hsw.o'])
85subprocess.check_call(clang + cflags + hsw + win +
86                      ['-c', stages_lowp] +
87                      ['-o', 'win_lowp_hsw.o'])
88
89aarch64 = [ '--target=aarch64' ]
90subprocess.check_call(clang + cflags + aarch64 +
91                      ['-c', stages] +
92                      ['-o', 'aarch64.o'])
93
94vfp4 = [
95    '--target=armv7a-linux-gnueabihf',
96    '-mfpu=neon-vfpv4',
97]
98subprocess.check_call(clang + cflags + vfp4 +
99                      ['-c', stages] +
100                      ['-o', 'vfp4.o'])
101
102def parse_object_file(dot_o, directive, target=None):
103  globl, hidden, label, comment, align = \
104      '.globl', 'HIDDEN', ':', '// ', 'BALIGN'
105  if 'win' in dot_o:
106    globl, hidden, label, comment, align = \
107        'PUBLIC', '', ' LABEL PROC', '; ', 'ALIGN '
108
109  cmd = [objdump]
110  if target:
111    cmd += ['--target', target]
112
113  # Look for sections we know we can't handle.
114  section_headers = subprocess.check_output(cmd + ['-h', dot_o])
115  for snippet in ['.rodata']:
116    if snippet in section_headers:
117      print >>sys.stderr, 'Found %s in section.' % snippet
118      assert snippet not in section_headers
119
120  if directive == '.long':
121    disassemble = ['-d', dot_o]
122    dehex = lambda h: '0x'+h
123  else:
124    # x86-64... as long as we're using %rip-relative addressing,
125    # literal sections should be fine to just dump in with .text.
126    disassemble = ['-d',               # DO NOT USE -D.
127                   '-z',               # Print zero bytes instead of ...
128                   '--insn-width=11',
129                   '-j', '.text',
130                   '-j', '.literal4',
131                   '-j', '.literal16',
132                   '-j', '.const',
133                   dot_o]
134    dehex = lambda h: str(int(h,16))
135
136  # Ok.  Let's disassemble.
137  for line in subprocess.check_output(cmd + disassemble).split('\n'):
138    line = line.strip()
139
140    if not line or line.startswith(dot_o) or line.startswith('Disassembly'):
141      continue
142
143    # E.g. 00000000000003a4 <_load_f16>:
144    m = re.match('''[0-9a-f]+ <_?(.*)>:''', line)
145    if m:
146      print
147      sym = m.group(1)
148      if sym.startswith('.literal'):  # .literal4, .literal16, etc
149        print sym.replace('.literal', align)
150      elif sym.startswith('.const'):  # 32-byte constants
151        print align + '32'
152      elif not sym.startswith('sk_'):
153        print >>sys.stderr, "build_stages.py can't handle '%s' (yet?)." % sym
154        assert sym.startswith('sk_')
155      else:  # a stage function
156        if hidden:
157          print hidden + ' _' + sym
158        print globl + ' _' + sym
159        if 'win' not in dot_o:
160          print 'FUNCTION(_' + sym + ')'
161        print '_' + sym + label
162      continue
163
164    columns = line.split('\t')
165   #print >>sys.stderr, columns
166    code = columns[1]
167    if len(columns) >= 4:
168      inst = columns[2]
169      args = columns[3]
170    else:
171      inst, args = columns[2], ''
172      if ' ' in columns[2]:
173        inst, args = columns[2].split(' ', 1)
174    code, inst, args = code.strip(), inst.strip(), args.strip()
175
176    hexed = ','.join(dehex(x) for x in code.split(' '))
177    print '  ' + directive + '  ' + hexed + ' '*(36-len(hexed)) + \
178          comment + inst + (' '*(14-len(inst)) + args if args else '')
179
180sys.stdout = open(generated, 'w')
181
182print '''# Copyright 2017 Google Inc.
183#
184# Use of this source code is governed by a BSD-style license that can be
185# found in the LICENSE file.
186
187# This file is generated semi-automatically with this command:
188#   $ src/jumper/build_stages.py
189'''
190print '#if defined(__MACH__)'
191print '    #define HIDDEN .private_extern'
192print '    #define FUNCTION(name)'
193print '    #define BALIGN4  .align 2'
194print '    #define BALIGN16 .align 4'
195print '    #define BALIGN32 .align 5'
196print '#else'
197print '    .section .note.GNU-stack,"",%progbits'
198print '    #define HIDDEN .hidden'
199print '    #define FUNCTION(name) .type name,%function'
200print '    #define BALIGN4  .balign 4'
201print '    #define BALIGN16 .balign 16'
202print '    #define BALIGN32 .balign 32'
203print '#endif'
204
205print '.text'
206print '#if defined(__aarch64__)'
207print 'BALIGN4'
208parse_object_file('aarch64.o', '.long')
209
210print '#elif defined(__arm__)'
211print 'BALIGN4'
212parse_object_file('vfp4.o', '.long', target='elf32-littlearm')
213
214print '#elif defined(__x86_64__)'
215print 'BALIGN32'
216parse_object_file('hsw.o',   '.byte')
217print 'BALIGN32'
218parse_object_file('avx.o',   '.byte')
219print 'BALIGN32'
220parse_object_file('sse41.o', '.byte')
221print 'BALIGN32'
222parse_object_file('sse2.o',  '.byte')
223print 'BALIGN32'
224parse_object_file('lowp_hsw.o',  '.byte')
225print 'BALIGN32'
226parse_object_file('lowp_ssse3.o',  '.byte')
227
228print '#elif defined(__i386__)'
229print 'BALIGN32'
230parse_object_file('x86_sse2.o', '.byte')
231
232print '#endif'
233
234sys.stdout = open(generated_win, 'w')
235print '''; Copyright 2017 Google Inc.
236;
237; Use of this source code is governed by a BSD-style license that can be
238; found in the LICENSE file.
239
240; This file is generated semi-automatically with this command:
241;   $ src/jumper/build_stages.py
242'''
243print 'IFDEF RAX'
244print "_text32 SEGMENT ALIGN(32) 'CODE'"
245print 'ALIGN 32'
246parse_object_file('win_hsw.o',   'DB')
247print 'ALIGN 32'
248parse_object_file('win_avx.o',   'DB')
249print 'ALIGN 32'
250parse_object_file('win_sse41.o', 'DB')
251print 'ALIGN 32'
252parse_object_file('win_sse2.o',  'DB')
253print 'ALIGN 32'
254parse_object_file('win_lowp_hsw.o',  'DB')
255print 'ALIGN 32'
256parse_object_file('win_lowp_ssse3.o',  'DB')
257
258print 'ELSE'
259print '.MODEL FLAT,C'
260print "_text32 SEGMENT ALIGN(32) 'CODE'"
261print 'ALIGN 32'
262parse_object_file('win_x86_sse2.o', 'DB')
263
264print 'ENDIF'
265print 'END'
266