1; 2; jsimdext.inc - common declarations 3; 4; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB 5; Copyright 2010 D. R. Commander 6; 7; Based on 8; x86 SIMD extension for IJG JPEG library - version 1.02 9; 10; Copyright (C) 1999-2006, MIYASAKA Masaru. 11; 12; This software is provided 'as-is', without any express or implied 13; warranty. In no event will the authors be held liable for any damages 14; arising from the use of this software. 15; 16; Permission is granted to anyone to use this software for any purpose, 17; including commercial applications, and to alter it and redistribute it 18; freely, subject to the following restrictions: 19; 20; 1. The origin of this software must not be misrepresented; you must not 21; claim that you wrote the original software. If you use this software 22; in a product, an acknowledgment in the product documentation would be 23; appreciated but is not required. 24; 2. Altered source versions must be plainly marked as such, and must not be 25; misrepresented as being the original software. 26; 3. This notice may not be removed or altered from any source distribution. 27; 28; [TAB8] 29 30; ========================================================================== 31; System-dependent configurations 32 33%ifdef WIN32 ; ----(nasm -fwin32 -DWIN32 ...)-------- 34; * Microsoft Visual C++ 35; * MinGW (Minimalist GNU for Windows) 36; * CygWin 37; * LCC-Win32 38 39; -- segment definition -- 40; 41%ifdef __YASM_VER__ 42%define SEG_TEXT .text align=16 43%define SEG_CONST .rdata align=16 44%else 45%define SEG_TEXT .text align=16 public use32 class=CODE 46%define SEG_CONST .rdata align=16 public use32 class=CONST 47%endif 48 49%elifdef WIN64 ; ----(nasm -fwin64 -DWIN64 ...)-------- 50; * Microsoft Visual C++ 51 52; -- segment definition -- 53; 54%ifdef __YASM_VER__ 55%define SEG_TEXT .text align=16 56%define SEG_CONST .rdata align=16 57%else 58%define SEG_TEXT .text align=16 public use64 class=CODE 59%define SEG_CONST .rdata align=16 public use64 class=CONST 60%endif 61%define EXTN(name) name ; foo() -> foo 62 63%elifdef OBJ32 ; ----(nasm -fobj -DOBJ32 ...)---------- 64; * Borland C++ (Win32) 65 66; -- segment definition -- 67; 68%define SEG_TEXT .text align=16 public use32 class=CODE 69%define SEG_CONST .data align=16 public use32 class=DATA 70 71%elifdef ELF ; ----(nasm -felf[64] -DELF ...)------------ 72; * Linux 73; * *BSD family Unix using elf format 74; * Unix System V, including Solaris x86, UnixWare and SCO Unix 75 76; PIC is the default on Linux 77%define PIC 78 79; mark stack as non-executable 80section .note.GNU-stack noalloc noexec nowrite progbits 81 82; -- segment definition -- 83; 84%ifdef __x86_64__ 85%define SEG_TEXT .text progbits align=16 86%define SEG_CONST .rodata progbits align=16 87%else 88%define SEG_TEXT .text progbits alloc exec nowrite align=16 89%define SEG_CONST .rodata progbits alloc noexec nowrite align=16 90%endif 91 92; To make the code position-independent, append -DPIC to the commandline 93; 94%define GOT_SYMBOL _GLOBAL_OFFSET_TABLE_ ; ELF supports PIC 95%define EXTN(name) name ; foo() -> foo 96 97%elifdef AOUT ; ----(nasm -faoutb/aout -DAOUT ...)---- 98; * Older Linux using a.out format (nasm -f aout -DAOUT ...) 99; * *BSD family Unix using a.out format (nasm -f aoutb -DAOUT ...) 100 101; -- segment definition -- 102; 103%define SEG_TEXT .text 104%define SEG_CONST .data 105 106; To make the code position-independent, append -DPIC to the commandline 107; 108%define GOT_SYMBOL __GLOBAL_OFFSET_TABLE_ ; BSD-style a.out supports PIC 109 110%elifdef MACHO ; ----(nasm -fmacho -DMACHO ...)-------- 111; * NeXTstep/OpenStep/Rhapsody/Darwin/MacOS X (Mach-O format) 112 113; -- segment definition -- 114; 115%define SEG_TEXT .text ;align=16 ; nasm doesn't accept align=16. why? 116%define SEG_CONST .rodata align=16 117 118; The generation of position-independent code (PIC) is the default on Darwin. 119; 120%define PIC 121%define GOT_SYMBOL _MACHO_PIC_ ; Mach-O style code-relative addressing 122 123%else ; ----(Other case)---------------------- 124 125; -- segment definition -- 126; 127%define SEG_TEXT .text 128%define SEG_CONST .data 129 130%endif ; ---------------------------------------------- 131 132; ========================================================================== 133 134; -------------------------------------------------------------------------- 135; Common types 136; 137%ifdef __x86_64__ 138%define POINTER qword ; general pointer type 139%define SIZEOF_POINTER SIZEOF_QWORD ; sizeof(POINTER) 140%define POINTER_BIT QWORD_BIT ; sizeof(POINTER)*BYTE_BIT 141%else 142%define POINTER dword ; general pointer type 143%define SIZEOF_POINTER SIZEOF_DWORD ; sizeof(POINTER) 144%define POINTER_BIT DWORD_BIT ; sizeof(POINTER)*BYTE_BIT 145%endif 146 147%define INT dword ; signed integer type 148%define SIZEOF_INT SIZEOF_DWORD ; sizeof(INT) 149%define INT_BIT DWORD_BIT ; sizeof(INT)*BYTE_BIT 150 151%define FP32 dword ; IEEE754 single 152%define SIZEOF_FP32 SIZEOF_DWORD ; sizeof(FP32) 153%define FP32_BIT DWORD_BIT ; sizeof(FP32)*BYTE_BIT 154 155%define MMWORD qword ; int64 (MMX register) 156%define SIZEOF_MMWORD SIZEOF_QWORD ; sizeof(MMWORD) 157%define MMWORD_BIT QWORD_BIT ; sizeof(MMWORD)*BYTE_BIT 158 159; NASM is buggy and doesn't properly handle operand sizes for SSE 160; instructions, so for now we have to define XMMWORD as blank. 161%define XMMWORD ; int128 (SSE register) 162%define SIZEOF_XMMWORD SIZEOF_OWORD ; sizeof(XMMWORD) 163%define XMMWORD_BIT OWORD_BIT ; sizeof(XMMWORD)*BYTE_BIT 164 165; Similar hacks for when we load a dword or MMWORD into an xmm# register 166%define XMM_DWORD 167%define XMM_MMWORD 168 169%define SIZEOF_BYTE 1 ; sizeof(BYTE) 170%define SIZEOF_WORD 2 ; sizeof(WORD) 171%define SIZEOF_DWORD 4 ; sizeof(DWORD) 172%define SIZEOF_QWORD 8 ; sizeof(QWORD) 173%define SIZEOF_OWORD 16 ; sizeof(OWORD) 174 175%define BYTE_BIT 8 ; CHAR_BIT in C 176%define WORD_BIT 16 ; sizeof(WORD)*BYTE_BIT 177%define DWORD_BIT 32 ; sizeof(DWORD)*BYTE_BIT 178%define QWORD_BIT 64 ; sizeof(QWORD)*BYTE_BIT 179%define OWORD_BIT 128 ; sizeof(OWORD)*BYTE_BIT 180 181; -------------------------------------------------------------------------- 182; External Symbol Name 183; 184%ifndef EXTN 185%define EXTN(name) _ %+ name ; foo() -> _foo 186%endif 187 188; -------------------------------------------------------------------------- 189; Macros for position-independent code (PIC) support 190; 191%ifndef GOT_SYMBOL 192%undef PIC 193%endif 194 195%ifdef PIC ; ------------------------------------------- 196 197%ifidn GOT_SYMBOL,_MACHO_PIC_ ; -------------------- 198 199; At present, nasm doesn't seem to support PIC generation for Mach-O. 200; The PIC support code below is a little tricky. 201 202 SECTION SEG_CONST 203const_base: 204 205%define GOTOFF(got,sym) (got) + (sym) - const_base 206 207%imacro get_GOT 1 208 ; NOTE: this macro destroys ecx resister. 209 call %%geteip 210 add ecx, byte (%%ref - $) 211 jmp short %%adjust 212%%geteip: 213 mov ecx, POINTER [esp] 214 ret 215%%adjust: 216 push ebp 217 xor ebp,ebp ; ebp = 0 218%ifidni %1,ebx ; (%1 == ebx) 219 ; db 0x8D,0x9C + jmp near const_base = 220 ; lea ebx, [ecx+ebp*8+(const_base-%%ref)] ; 8D,9C,E9,(offset32) 221 db 0x8D,0x9C ; 8D,9C 222 jmp near const_base ; E9,(const_base-%%ref) 223%%ref: 224%else ; (%1 != ebx) 225 ; db 0x8D,0x8C + jmp near const_base = 226 ; lea ecx, [ecx+ebp*8+(const_base-%%ref)] ; 8D,8C,E9,(offset32) 227 db 0x8D,0x8C ; 8D,8C 228 jmp near const_base ; E9,(const_base-%%ref) 229%%ref: mov %1, ecx 230%endif ; (%1 == ebx) 231 pop ebp 232%endmacro 233 234%else ; GOT_SYMBOL != _MACHO_PIC_ ---------------- 235 236%define GOTOFF(got,sym) (got) + (sym) wrt ..gotoff 237 238%imacro get_GOT 1 239 extern GOT_SYMBOL 240 call %%geteip 241 add %1, GOT_SYMBOL + $$ - $ wrt ..gotpc 242 jmp short %%done 243%%geteip: 244 mov %1, POINTER [esp] 245 ret 246%%done: 247%endmacro 248 249%endif ; GOT_SYMBOL == _MACHO_PIC_ ---------------- 250 251%imacro pushpic 1.nolist 252 push %1 253%endmacro 254%imacro poppic 1.nolist 255 pop %1 256%endmacro 257%imacro movpic 2.nolist 258 mov %1,%2 259%endmacro 260 261%else ; !PIC ----------------------------------------- 262 263%define GOTOFF(got,sym) (sym) 264 265%imacro get_GOT 1.nolist 266%endmacro 267%imacro pushpic 1.nolist 268%endmacro 269%imacro poppic 1.nolist 270%endmacro 271%imacro movpic 2.nolist 272%endmacro 273 274%endif ; PIC ----------------------------------------- 275 276; -------------------------------------------------------------------------- 277; Align the next instruction on {2,4,8,16,..}-byte boundary. 278; ".balign n,,m" in GNU as 279; 280%define MSKLE(x,y) (~(((y) & 0xFFFF) - ((x) & 0xFFFF)) >> 16) 281%define FILLB(b,n) (($$-(b)) & ((n)-1)) 282 283%imacro alignx 1-2.nolist 0xFFFF 284%%bs: times MSKLE(FILLB(%%bs,%1),%2) & MSKLE(16,FILLB($,%1)) & FILLB($,%1) \ 285 db 0x90 ; nop 286 times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/9 \ 287 db 0x8D,0x9C,0x23,0x00,0x00,0x00,0x00 ; lea ebx,[ebx+0x00000000] 288 times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/7 \ 289 db 0x8D,0xAC,0x25,0x00,0x00,0x00,0x00 ; lea ebp,[ebp+0x00000000] 290 times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/6 \ 291 db 0x8D,0xAD,0x00,0x00,0x00,0x00 ; lea ebp,[ebp+0x00000000] 292 times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/4 \ 293 db 0x8D,0x6C,0x25,0x00 ; lea ebp,[ebp+0x00] 294 times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/3 \ 295 db 0x8D,0x6D,0x00 ; lea ebp,[ebp+0x00] 296 times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/2 \ 297 db 0x8B,0xED ; mov ebp,ebp 298 times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/1 \ 299 db 0x90 ; nop 300%endmacro 301 302; Align the next data on {2,4,8,16,..}-byte boundary. 303; 304%imacro alignz 1.nolist 305 align %1, db 0 ; filling zeros 306%endmacro 307 308%ifdef __x86_64__ 309 310%ifdef WIN64 311 312%imacro collect_args 0 313 push r12 314 push r13 315 push r14 316 push r15 317 mov r10, rcx 318 mov r11, rdx 319 mov r12, r8 320 mov r13, r9 321 mov r14, [rax+48] 322 mov r15, [rax+56] 323 push rsi 324 push rdi 325 sub rsp, SIZEOF_XMMWORD 326 movaps XMMWORD [rsp], xmm6 327 sub rsp, SIZEOF_XMMWORD 328 movaps XMMWORD [rsp], xmm7 329%endmacro 330 331%imacro uncollect_args 0 332 movaps xmm7, XMMWORD [rsp] 333 add rsp, SIZEOF_XMMWORD 334 movaps xmm6, XMMWORD [rsp] 335 add rsp, SIZEOF_XMMWORD 336 pop rdi 337 pop rsi 338 pop r15 339 pop r14 340 pop r13 341 pop r12 342%endmacro 343 344%else 345 346%imacro collect_args 0 347 push r10 348 push r11 349 push r12 350 push r13 351 push r14 352 push r15 353 mov r10, rdi 354 mov r11, rsi 355 mov r12, rdx 356 mov r13, rcx 357 mov r14, r8 358 mov r15, r9 359%endmacro 360 361%imacro uncollect_args 0 362 pop r15 363 pop r14 364 pop r13 365 pop r12 366 pop r11 367 pop r10 368%endmacro 369 370%endif 371 372%endif 373 374; -------------------------------------------------------------------------- 375; Defines picked up from the C headers 376; 377%include "jsimdcfg.inc" 378 379; Begin chromium edits 380%ifdef MACHO ; ----(nasm -fmacho -DMACHO ...)-------- 381%define PRIVATE :private_extern 382%elifdef ELF ; ----(nasm -felf[64] -DELF ...)------------ 383%define PRIVATE :hidden 384%else 385%define PRIVATE 386%endif 387; End chromium edits 388 389; -------------------------------------------------------------------------- 390