• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1;
2; jsimdext.inc - common declarations
3;
4; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
5; Copyright 2010 D. R. Commander
6;
7; Based on
8; x86 SIMD extension for IJG JPEG library - version 1.02
9;
10; Copyright (C) 1999-2006, MIYASAKA Masaru.
11;
12; This software is provided 'as-is', without any express or implied
13; warranty.  In no event will the authors be held liable for any damages
14; arising from the use of this software.
15;
16; Permission is granted to anyone to use this software for any purpose,
17; including commercial applications, and to alter it and redistribute it
18; freely, subject to the following restrictions:
19;
20; 1. The origin of this software must not be misrepresented; you must not
21;    claim that you wrote the original software. If you use this software
22;    in a product, an acknowledgment in the product documentation would be
23;    appreciated but is not required.
24; 2. Altered source versions must be plainly marked as such, and must not be
25;    misrepresented as being the original software.
26; 3. This notice may not be removed or altered from any source distribution.
27;
28; [TAB8]
29
30; ==========================================================================
31;  System-dependent configurations
32
33%ifdef WIN32	; ----(nasm -fwin32 -DWIN32 ...)--------
34; * Microsoft Visual C++
35; * MinGW (Minimalist GNU for Windows)
36; * CygWin
37; * LCC-Win32
38
39; -- segment definition --
40;
41%ifdef __YASM_VER__
42%define SEG_TEXT    .text  align=16
43%define SEG_CONST   .rdata align=16
44%else
45%define SEG_TEXT    .text  align=16 public use32 class=CODE
46%define SEG_CONST   .rdata align=16 public use32 class=CONST
47%endif
48
49%elifdef WIN64	; ----(nasm -fwin64 -DWIN64 ...)--------
50; * Microsoft Visual C++
51
52; -- segment definition --
53;
54%ifdef __YASM_VER__
55%define SEG_TEXT    .text  align=16
56%define SEG_CONST   .rdata align=16
57%else
58%define SEG_TEXT    .text  align=16 public use64 class=CODE
59%define SEG_CONST   .rdata align=16 public use64 class=CONST
60%endif
61%define EXTN(name)  name			; foo() -> foo
62
63%elifdef OBJ32	; ----(nasm -fobj -DOBJ32 ...)----------
64; * Borland C++ (Win32)
65
66; -- segment definition --
67;
68%define SEG_TEXT    .text  align=16 public use32 class=CODE
69%define SEG_CONST   .data  align=16 public use32 class=DATA
70
71%elifdef ELF	; ----(nasm -felf[64] -DELF ...)------------
72; * Linux
73; * *BSD family Unix using elf format
74; * Unix System V, including Solaris x86, UnixWare and SCO Unix
75
76; PIC is the default on Linux
77%define PIC
78
79; mark stack as non-executable
80section .note.GNU-stack noalloc noexec nowrite progbits
81
82; -- segment definition --
83;
84%ifdef __x86_64__
85%define SEG_TEXT    .text   progbits align=16
86%define SEG_CONST   .rodata progbits align=16
87%else
88%define SEG_TEXT    .text   progbits alloc exec   nowrite align=16
89%define SEG_CONST   .rodata progbits alloc noexec nowrite align=16
90%endif
91
92; To make the code position-independent, append -DPIC to the commandline
93;
94%define GOT_SYMBOL  _GLOBAL_OFFSET_TABLE_	; ELF supports PIC
95%define EXTN(name)  name			; foo() -> foo
96
97%elifdef AOUT	; ----(nasm -faoutb/aout -DAOUT ...)----
98; * Older Linux using a.out format  (nasm -f aout -DAOUT ...)
99; * *BSD family Unix using a.out format  (nasm -f aoutb -DAOUT ...)
100
101; -- segment definition --
102;
103%define SEG_TEXT    .text
104%define SEG_CONST   .data
105
106; To make the code position-independent, append -DPIC to the commandline
107;
108%define GOT_SYMBOL  __GLOBAL_OFFSET_TABLE_	; BSD-style a.out supports PIC
109
110%elifdef MACHO	; ----(nasm -fmacho -DMACHO ...)--------
111; * NeXTstep/OpenStep/Rhapsody/Darwin/MacOS X (Mach-O format)
112
113; -- segment definition --
114;
115%define SEG_TEXT    .text  ;align=16	; nasm doesn't accept align=16. why?
116%define SEG_CONST   .rodata align=16
117
118; The generation of position-independent code (PIC) is the default on Darwin.
119;
120%define PIC
121%define GOT_SYMBOL  _MACHO_PIC_		; Mach-O style code-relative addressing
122
123%else		; ----(Other case)----------------------
124
125; -- segment definition --
126;
127%define SEG_TEXT    .text
128%define SEG_CONST   .data
129
130%endif	; ----------------------------------------------
131
132; ==========================================================================
133
134; --------------------------------------------------------------------------
135;  Common types
136;
137%ifdef __x86_64__
138%define POINTER                 qword           ; general pointer type
139%define SIZEOF_POINTER          SIZEOF_QWORD    ; sizeof(POINTER)
140%define POINTER_BIT             QWORD_BIT       ; sizeof(POINTER)*BYTE_BIT
141%else
142%define POINTER                 dword           ; general pointer type
143%define SIZEOF_POINTER          SIZEOF_DWORD    ; sizeof(POINTER)
144%define POINTER_BIT             DWORD_BIT       ; sizeof(POINTER)*BYTE_BIT
145%endif
146
147%define INT                     dword           ; signed integer type
148%define SIZEOF_INT              SIZEOF_DWORD    ; sizeof(INT)
149%define INT_BIT                 DWORD_BIT       ; sizeof(INT)*BYTE_BIT
150
151%define FP32                    dword           ; IEEE754 single
152%define SIZEOF_FP32             SIZEOF_DWORD    ; sizeof(FP32)
153%define FP32_BIT                DWORD_BIT       ; sizeof(FP32)*BYTE_BIT
154
155%define MMWORD                  qword           ; int64  (MMX register)
156%define SIZEOF_MMWORD           SIZEOF_QWORD    ; sizeof(MMWORD)
157%define MMWORD_BIT              QWORD_BIT       ; sizeof(MMWORD)*BYTE_BIT
158
159; NASM is buggy and doesn't properly handle operand sizes for SSE
160; instructions, so for now we have to define XMMWORD as blank.
161%define XMMWORD                                 ; int128 (SSE register)
162%define SIZEOF_XMMWORD          SIZEOF_OWORD    ; sizeof(XMMWORD)
163%define XMMWORD_BIT             OWORD_BIT       ; sizeof(XMMWORD)*BYTE_BIT
164
165; Similar hacks for when we load a dword or MMWORD into an xmm# register
166%define XMM_DWORD
167%define XMM_MMWORD
168
169%define SIZEOF_BYTE             1               ; sizeof(BYTE)
170%define SIZEOF_WORD             2               ; sizeof(WORD)
171%define SIZEOF_DWORD            4               ; sizeof(DWORD)
172%define SIZEOF_QWORD            8               ; sizeof(QWORD)
173%define SIZEOF_OWORD            16              ; sizeof(OWORD)
174
175%define BYTE_BIT                8               ; CHAR_BIT in C
176%define WORD_BIT                16              ; sizeof(WORD)*BYTE_BIT
177%define DWORD_BIT               32              ; sizeof(DWORD)*BYTE_BIT
178%define QWORD_BIT               64              ; sizeof(QWORD)*BYTE_BIT
179%define OWORD_BIT               128             ; sizeof(OWORD)*BYTE_BIT
180
181; --------------------------------------------------------------------------
182;  External Symbol Name
183;
184%ifndef EXTN
185%define EXTN(name)   _ %+ name		; foo() -> _foo
186%endif
187
188; --------------------------------------------------------------------------
189;  Macros for position-independent code (PIC) support
190;
191%ifndef GOT_SYMBOL
192%undef PIC
193%endif
194
195%ifdef PIC ; -------------------------------------------
196
197%ifidn GOT_SYMBOL,_MACHO_PIC_ ; --------------------
198
199; At present, nasm doesn't seem to support PIC generation for Mach-O.
200; The PIC support code below is a little tricky.
201
202	SECTION	SEG_CONST
203const_base:
204
205%define GOTOFF(got,sym) (got) + (sym) - const_base
206
207%imacro get_GOT	1
208	; NOTE: this macro destroys ecx resister.
209	call	%%geteip
210	add	ecx, byte (%%ref - $)
211	jmp	short %%adjust
212%%geteip:
213	mov	ecx, POINTER [esp]
214	ret
215%%adjust:
216	push	ebp
217	xor	ebp,ebp		; ebp = 0
218%ifidni %1,ebx	; (%1 == ebx)
219	; db 0x8D,0x9C + jmp near const_base =
220	;   lea ebx, [ecx+ebp*8+(const_base-%%ref)] ; 8D,9C,E9,(offset32)
221	db	0x8D,0x9C		; 8D,9C
222	jmp	near const_base		; E9,(const_base-%%ref)
223%%ref:
224%else  ; (%1 != ebx)
225	; db 0x8D,0x8C + jmp near const_base =
226	;   lea ecx, [ecx+ebp*8+(const_base-%%ref)] ; 8D,8C,E9,(offset32)
227	db	0x8D,0x8C		; 8D,8C
228	jmp	near const_base		; E9,(const_base-%%ref)
229%%ref:	mov	%1, ecx
230%endif ; (%1 == ebx)
231	pop	ebp
232%endmacro
233
234%else	; GOT_SYMBOL != _MACHO_PIC_ ----------------
235
236%define GOTOFF(got,sym) (got) + (sym) wrt ..gotoff
237
238%imacro get_GOT	1
239	extern	GOT_SYMBOL
240	call	%%geteip
241	add	%1, GOT_SYMBOL + $$ - $ wrt ..gotpc
242	jmp	short %%done
243%%geteip:
244	mov	%1, POINTER [esp]
245	ret
246%%done:
247%endmacro
248
249%endif	; GOT_SYMBOL == _MACHO_PIC_ ----------------
250
251%imacro pushpic	1.nolist
252	push	%1
253%endmacro
254%imacro poppic	1.nolist
255	pop	%1
256%endmacro
257%imacro movpic	2.nolist
258	mov	%1,%2
259%endmacro
260
261%else	; !PIC -----------------------------------------
262
263%define GOTOFF(got,sym) (sym)
264
265%imacro get_GOT	1.nolist
266%endmacro
267%imacro pushpic	1.nolist
268%endmacro
269%imacro poppic	1.nolist
270%endmacro
271%imacro movpic	2.nolist
272%endmacro
273
274%endif	;  PIC -----------------------------------------
275
276; --------------------------------------------------------------------------
277;  Align the next instruction on {2,4,8,16,..}-byte boundary.
278;  ".balign n,,m" in GNU as
279;
280%define MSKLE(x,y)  (~(((y) & 0xFFFF) - ((x) & 0xFFFF)) >> 16)
281%define FILLB(b,n)  (($$-(b)) & ((n)-1))
282
283%imacro alignx 1-2.nolist 0xFFFF
284%%bs:	times MSKLE(FILLB(%%bs,%1),%2) & MSKLE(16,FILLB($,%1)) & FILLB($,%1) \
285	       db 0x90                               ; nop
286	times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/9 \
287	       db 0x8D,0x9C,0x23,0x00,0x00,0x00,0x00 ; lea ebx,[ebx+0x00000000]
288	times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/7 \
289	       db 0x8D,0xAC,0x25,0x00,0x00,0x00,0x00 ; lea ebp,[ebp+0x00000000]
290	times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/6 \
291	       db 0x8D,0xAD,0x00,0x00,0x00,0x00      ; lea ebp,[ebp+0x00000000]
292	times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/4 \
293	       db 0x8D,0x6C,0x25,0x00                ; lea ebp,[ebp+0x00]
294	times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/3 \
295	       db 0x8D,0x6D,0x00                     ; lea ebp,[ebp+0x00]
296	times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/2 \
297	       db 0x8B,0xED                          ; mov ebp,ebp
298	times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/1 \
299	       db 0x90                               ; nop
300%endmacro
301
302; Align the next data on {2,4,8,16,..}-byte boundary.
303;
304%imacro alignz 1.nolist
305	align %1, db 0		; filling zeros
306%endmacro
307
308%ifdef __x86_64__
309
310%ifdef WIN64
311
312%imacro collect_args 0
313	push r12
314	push r13
315	push r14
316	push r15
317	mov r10, rcx
318	mov r11, rdx
319	mov r12, r8
320	mov r13, r9
321	mov r14, [rax+48]
322	mov r15, [rax+56]
323	push rsi
324	push rdi
325	sub     rsp, SIZEOF_XMMWORD
326	movaps  XMMWORD [rsp], xmm6
327	sub     rsp, SIZEOF_XMMWORD
328	movaps  XMMWORD [rsp], xmm7
329%endmacro
330
331%imacro uncollect_args 0
332	movaps  xmm7, XMMWORD [rsp]
333	add     rsp, SIZEOF_XMMWORD
334	movaps  xmm6, XMMWORD [rsp]
335	add     rsp, SIZEOF_XMMWORD
336	pop rdi
337	pop rsi
338	pop r15
339	pop r14
340	pop r13
341	pop r12
342%endmacro
343
344%else
345
346%imacro collect_args 0
347	push r10
348	push r11
349	push r12
350	push r13
351	push r14
352	push r15
353	mov r10, rdi
354	mov r11, rsi
355	mov r12, rdx
356	mov r13, rcx
357	mov r14, r8
358	mov r15, r9
359%endmacro
360
361%imacro uncollect_args 0
362	pop r15
363	pop r14
364	pop r13
365	pop r12
366	pop r11
367	pop r10
368%endmacro
369
370%endif
371
372%endif
373
374; --------------------------------------------------------------------------
375;  Defines picked up from the C headers
376;
377%include "jsimdcfg.inc"
378
379; Begin chromium edits
380%ifdef MACHO ; ----(nasm -fmacho -DMACHO ...)--------
381%define PRIVATE :private_extern
382%elifdef ELF ; ----(nasm -felf[64] -DELF ...)------------
383%define PRIVATE :hidden
384%else
385%define PRIVATE
386%endif
387; End chromium edits
388
389; --------------------------------------------------------------------------
390