1; 2; jsimdext.inc - common declarations 3; 4; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB 5; Copyright (C) 2010, D. R. Commander. 6; 7; Based on the x86 SIMD extension for IJG JPEG library - version 1.02 8; 9; Copyright (C) 1999-2006, MIYASAKA Masaru. 10; 11; This software is provided 'as-is', without any express or implied 12; warranty. In no event will the authors be held liable for any damages 13; arising from the use of this software. 14; 15; Permission is granted to anyone to use this software for any purpose, 16; including commercial applications, and to alter it and redistribute it 17; freely, subject to the following restrictions: 18; 19; 1. The origin of this software must not be misrepresented; you must not 20; claim that you wrote the original software. If you use this software 21; in a product, an acknowledgment in the product documentation would be 22; appreciated but is not required. 23; 2. Altered source versions must be plainly marked as such, and must not be 24; misrepresented as being the original software. 25; 3. This notice may not be removed or altered from any source distribution. 26; 27; [TAB8] 28 29; ========================================================================== 30; System-dependent configurations 31 32%ifdef WIN32 ; ----(nasm -fwin32 -DWIN32 ...)-------- 33; * Microsoft Visual C++ 34; * MinGW (Minimalist GNU for Windows) 35; * CygWin 36; * LCC-Win32 37 38; -- segment definition -- 39; 40%ifdef __YASM_VER__ 41%define SEG_TEXT .text align=16 42%define SEG_CONST .rdata align=16 43%else 44%define SEG_TEXT .text align=16 public use32 class=CODE 45%define SEG_CONST .rdata align=16 public use32 class=CONST 46%endif 47 48%elifdef WIN64 ; ----(nasm -fwin64 -DWIN64 ...)-------- 49; * Microsoft Visual C++ 50 51; -- segment definition -- 52; 53%ifdef __YASM_VER__ 54%define SEG_TEXT .text align=16 55%define SEG_CONST .rdata align=16 56%else 57%define SEG_TEXT .text align=16 public use64 class=CODE 58%define SEG_CONST .rdata align=16 public use64 class=CONST 59%endif 60%define EXTN(name) name ; foo() -> foo 61 62%elifdef OBJ32 ; ----(nasm -fobj -DOBJ32 ...)---------- 63; * Borland C++ (Win32) 64 65; -- segment definition -- 66; 67%define SEG_TEXT _text align=16 public use32 class=CODE 68%define SEG_CONST _data align=16 public use32 class=DATA 69 70%elifdef ELF ; ----(nasm -felf[64] -DELF ...)------------ 71; * Linux 72; * *BSD family Unix using elf format 73; * Unix System V, including Solaris x86, UnixWare and SCO Unix 74 75; mark stack as non-executable 76section .note.GNU-stack noalloc noexec nowrite progbits 77 78; -- segment definition -- 79; 80%ifdef __x86_64__ 81%define SEG_TEXT .text progbits align=16 82%define SEG_CONST .rodata progbits align=16 83%else 84%define SEG_TEXT .text progbits alloc exec nowrite align=16 85%define SEG_CONST .rodata progbits alloc noexec nowrite align=16 86%endif 87 88; To make the code position-independent, append -DPIC to the commandline 89; 90%define GOT_SYMBOL _GLOBAL_OFFSET_TABLE_ ; ELF supports PIC 91%define EXTN(name) name ; foo() -> foo 92 93%elifdef AOUT ; ----(nasm -faoutb/aout -DAOUT ...)---- 94; * Older Linux using a.out format (nasm -f aout -DAOUT ...) 95; * *BSD family Unix using a.out format (nasm -f aoutb -DAOUT ...) 96 97; -- segment definition -- 98; 99%define SEG_TEXT .text 100%define SEG_CONST .data 101 102; To make the code position-independent, append -DPIC to the commandline 103; 104%define GOT_SYMBOL __GLOBAL_OFFSET_TABLE_ ; BSD-style a.out supports PIC 105 106%elifdef MACHO ; ----(nasm -fmacho -DMACHO ...)-------- 107; * NeXTstep/OpenStep/Rhapsody/Darwin/MacOS X (Mach-O format) 108 109; -- segment definition -- 110; 111%define SEG_TEXT .text ;align=16 ; nasm doesn't accept align=16. why? 112%define SEG_CONST .rodata align=16 113 114; The generation of position-independent code (PIC) is the default on Darwin. 115; 116%define PIC 117%define GOT_SYMBOL _MACHO_PIC_ ; Mach-O style code-relative addressing 118 119%else ; ----(Other case)---------------------- 120 121; -- segment definition -- 122; 123%define SEG_TEXT .text 124%define SEG_CONST .data 125 126%endif ; ---------------------------------------------- 127 128; ========================================================================== 129 130; -------------------------------------------------------------------------- 131; Common types 132; 133%ifdef __x86_64__ 134%define POINTER qword ; general pointer type 135%define SIZEOF_POINTER SIZEOF_QWORD ; sizeof(POINTER) 136%define POINTER_BIT QWORD_BIT ; sizeof(POINTER)*BYTE_BIT 137%else 138%define POINTER dword ; general pointer type 139%define SIZEOF_POINTER SIZEOF_DWORD ; sizeof(POINTER) 140%define POINTER_BIT DWORD_BIT ; sizeof(POINTER)*BYTE_BIT 141%endif 142 143%define INT dword ; signed integer type 144%define SIZEOF_INT SIZEOF_DWORD ; sizeof(INT) 145%define INT_BIT DWORD_BIT ; sizeof(INT)*BYTE_BIT 146 147%define FP32 dword ; IEEE754 single 148%define SIZEOF_FP32 SIZEOF_DWORD ; sizeof(FP32) 149%define FP32_BIT DWORD_BIT ; sizeof(FP32)*BYTE_BIT 150 151%define MMWORD qword ; int64 (MMX register) 152%define SIZEOF_MMWORD SIZEOF_QWORD ; sizeof(MMWORD) 153%define MMWORD_BIT QWORD_BIT ; sizeof(MMWORD)*BYTE_BIT 154 155; NASM is buggy and doesn't properly handle operand sizes for SSE 156; instructions, so for now we have to define XMMWORD as blank. 157%define XMMWORD ; int128 (SSE register) 158%define SIZEOF_XMMWORD SIZEOF_OWORD ; sizeof(XMMWORD) 159%define XMMWORD_BIT OWORD_BIT ; sizeof(XMMWORD)*BYTE_BIT 160 161; Similar hacks for when we load a dword or MMWORD into an xmm# register 162%define XMM_DWORD 163%define XMM_MMWORD 164 165%define SIZEOF_BYTE 1 ; sizeof(BYTE) 166%define SIZEOF_WORD 2 ; sizeof(WORD) 167%define SIZEOF_DWORD 4 ; sizeof(DWORD) 168%define SIZEOF_QWORD 8 ; sizeof(QWORD) 169%define SIZEOF_OWORD 16 ; sizeof(OWORD) 170 171%define BYTE_BIT 8 ; CHAR_BIT in C 172%define WORD_BIT 16 ; sizeof(WORD)*BYTE_BIT 173%define DWORD_BIT 32 ; sizeof(DWORD)*BYTE_BIT 174%define QWORD_BIT 64 ; sizeof(QWORD)*BYTE_BIT 175%define OWORD_BIT 128 ; sizeof(OWORD)*BYTE_BIT 176 177; -------------------------------------------------------------------------- 178; External Symbol Name 179; 180%ifndef EXTN 181%define EXTN(name) _ %+ name ; foo() -> _foo 182%endif 183 184; -------------------------------------------------------------------------- 185; Macros for position-independent code (PIC) support 186; 187%ifndef GOT_SYMBOL 188%undef PIC 189%endif 190 191%ifdef PIC ; ------------------------------------------- 192 193%ifidn GOT_SYMBOL,_MACHO_PIC_ ; -------------------- 194 195; At present, nasm doesn't seem to support PIC generation for Mach-O. 196; The PIC support code below is a little tricky. 197 198 SECTION SEG_CONST 199const_base: 200 201%define GOTOFF(got,sym) (got) + (sym) - const_base 202 203%imacro get_GOT 1 204 ; NOTE: this macro destroys ecx resister. 205 call %%geteip 206 add ecx, byte (%%ref - $) 207 jmp short %%adjust 208%%geteip: 209 mov ecx, POINTER [esp] 210 ret 211%%adjust: 212 push ebp 213 xor ebp,ebp ; ebp = 0 214%ifidni %1,ebx ; (%1 == ebx) 215 ; db 0x8D,0x9C + jmp near const_base = 216 ; lea ebx, [ecx+ebp*8+(const_base-%%ref)] ; 8D,9C,E9,(offset32) 217 db 0x8D,0x9C ; 8D,9C 218 jmp near const_base ; E9,(const_base-%%ref) 219%%ref: 220%else ; (%1 != ebx) 221 ; db 0x8D,0x8C + jmp near const_base = 222 ; lea ecx, [ecx+ebp*8+(const_base-%%ref)] ; 8D,8C,E9,(offset32) 223 db 0x8D,0x8C ; 8D,8C 224 jmp near const_base ; E9,(const_base-%%ref) 225%%ref: mov %1, ecx 226%endif ; (%1 == ebx) 227 pop ebp 228%endmacro 229 230%else ; GOT_SYMBOL != _MACHO_PIC_ ---------------- 231 232%define GOTOFF(got,sym) (got) + (sym) wrt ..gotoff 233 234%imacro get_GOT 1 235 extern GOT_SYMBOL 236 call %%geteip 237 add %1, GOT_SYMBOL + $$ - $ wrt ..gotpc 238 jmp short %%done 239%%geteip: 240 mov %1, POINTER [esp] 241 ret 242%%done: 243%endmacro 244 245%endif ; GOT_SYMBOL == _MACHO_PIC_ ---------------- 246 247%imacro pushpic 1.nolist 248 push %1 249%endmacro 250%imacro poppic 1.nolist 251 pop %1 252%endmacro 253%imacro movpic 2.nolist 254 mov %1,%2 255%endmacro 256 257%else ; !PIC ----------------------------------------- 258 259%define GOTOFF(got,sym) (sym) 260 261%imacro get_GOT 1.nolist 262%endmacro 263%imacro pushpic 1.nolist 264%endmacro 265%imacro poppic 1.nolist 266%endmacro 267%imacro movpic 2.nolist 268%endmacro 269 270%endif ; PIC ----------------------------------------- 271 272; -------------------------------------------------------------------------- 273; Align the next instruction on {2,4,8,16,..}-byte boundary. 274; ".balign n,,m" in GNU as 275; 276%define MSKLE(x,y) (~(((y) & 0xFFFF) - ((x) & 0xFFFF)) >> 16) 277%define FILLB(b,n) (($$-(b)) & ((n)-1)) 278 279%imacro alignx 1-2.nolist 0xFFFF 280%%bs: times MSKLE(FILLB(%%bs,%1),%2) & MSKLE(16,FILLB($,%1)) & FILLB($,%1) \ 281 db 0x90 ; nop 282 times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/9 \ 283 db 0x8D,0x9C,0x23,0x00,0x00,0x00,0x00 ; lea ebx,[ebx+0x00000000] 284 times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/7 \ 285 db 0x8D,0xAC,0x25,0x00,0x00,0x00,0x00 ; lea ebp,[ebp+0x00000000] 286 times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/6 \ 287 db 0x8D,0xAD,0x00,0x00,0x00,0x00 ; lea ebp,[ebp+0x00000000] 288 times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/4 \ 289 db 0x8D,0x6C,0x25,0x00 ; lea ebp,[ebp+0x00] 290 times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/3 \ 291 db 0x8D,0x6D,0x00 ; lea ebp,[ebp+0x00] 292 times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/2 \ 293 db 0x8B,0xED ; mov ebp,ebp 294 times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/1 \ 295 db 0x90 ; nop 296%endmacro 297 298; Align the next data on {2,4,8,16,..}-byte boundary. 299; 300%imacro alignz 1.nolist 301 align %1, db 0 ; filling zeros 302%endmacro 303 304%ifdef __x86_64__ 305 306%ifdef WIN64 307 308%imacro collect_args 0 309 push r12 310 push r13 311 push r14 312 push r15 313 mov r10, rcx 314 mov r11, rdx 315 mov r12, r8 316 mov r13, r9 317 mov r14, [rax+48] 318 mov r15, [rax+56] 319 push rsi 320 push rdi 321 sub rsp, SIZEOF_XMMWORD 322 movaps XMMWORD [rsp], xmm6 323 sub rsp, SIZEOF_XMMWORD 324 movaps XMMWORD [rsp], xmm7 325%endmacro 326 327%imacro uncollect_args 0 328 movaps xmm7, XMMWORD [rsp] 329 add rsp, SIZEOF_XMMWORD 330 movaps xmm6, XMMWORD [rsp] 331 add rsp, SIZEOF_XMMWORD 332 pop rdi 333 pop rsi 334 pop r15 335 pop r14 336 pop r13 337 pop r12 338%endmacro 339 340%else 341 342%imacro collect_args 0 343 push r10 344 push r11 345 push r12 346 push r13 347 push r14 348 push r15 349 mov r10, rdi 350 mov r11, rsi 351 mov r12, rdx 352 mov r13, rcx 353 mov r14, r8 354 mov r15, r9 355%endmacro 356 357%imacro uncollect_args 0 358 pop r15 359 pop r14 360 pop r13 361 pop r12 362 pop r11 363 pop r10 364%endmacro 365 366%endif 367 368%endif 369 370; -------------------------------------------------------------------------- 371; Defines picked up from the C headers 372; 373%include "jsimdcfg.inc" 374 375; -------------------------------------------------------------------------- 376