1; 2; Copyright (c) 2010 The WebM project authors. All Rights Reserved. 3; 4; Use of this source code is governed by a BSD-style license 5; that can be found in the LICENSE file in the root of the source 6; tree. An additional intellectual property rights grant can be found 7; in the file PATENTS. All contributing project authors may 8; be found in the AUTHORS file in the root of the source tree. 9; 10 11 12%include "vpx_config.asm" 13 14; 32/64 bit compatibility macros 15; 16; In general, we make the source use 64 bit syntax, then twiddle with it using 17; the preprocessor to get the 32 bit syntax on 32 bit platforms. 18; 19%ifidn __OUTPUT_FORMAT__,elf32 20%define ABI_IS_32BIT 1 21%elifidn __OUTPUT_FORMAT__,macho32 22%define ABI_IS_32BIT 1 23%elifidn __OUTPUT_FORMAT__,win32 24%define ABI_IS_32BIT 1 25%elifidn __OUTPUT_FORMAT__,aout 26%define ABI_IS_32BIT 1 27%else 28%define ABI_IS_32BIT 0 29%endif 30 31%if ABI_IS_32BIT 32%define rax eax 33%define rbx ebx 34%define rcx ecx 35%define rdx edx 36%define rsi esi 37%define rdi edi 38%define rsp esp 39%define rbp ebp 40%define movsxd mov 41%macro movq 2 42 %ifidn %1,eax 43 movd %1,%2 44 %elifidn %2,eax 45 movd %1,%2 46 %elifidn %1,ebx 47 movd %1,%2 48 %elifidn %2,ebx 49 movd %1,%2 50 %elifidn %1,ecx 51 movd %1,%2 52 %elifidn %2,ecx 53 movd %1,%2 54 %elifidn %1,edx 55 movd %1,%2 56 %elifidn %2,edx 57 movd %1,%2 58 %elifidn %1,esi 59 movd %1,%2 60 %elifidn %2,esi 61 movd %1,%2 62 %elifidn %1,edi 63 movd %1,%2 64 %elifidn %2,edi 65 movd %1,%2 66 %elifidn %1,esp 67 movd %1,%2 68 %elifidn %2,esp 69 movd %1,%2 70 %elifidn %1,ebp 71 movd %1,%2 72 %elifidn %2,ebp 73 movd %1,%2 74 %else 75 movq %1,%2 76 %endif 77%endmacro 78%endif 79 80 81; LIBVPX_YASM_WIN64 82; Set LIBVPX_YASM_WIN64 if output is Windows 64bit so the code will work if x64 83; or win64 is defined on the Yasm command line. 84%ifidn __OUTPUT_FORMAT__,win64 85%define LIBVPX_YASM_WIN64 1 86%elifidn __OUTPUT_FORMAT__,x64 87%define LIBVPX_YASM_WIN64 1 88%else 89%define LIBVPX_YASM_WIN64 0 90%endif 91 92; Declare groups of platforms 93%ifidn __OUTPUT_FORMAT__,elf32 94 %define LIBVPX_ELF 1 95%elifidn __OUTPUT_FORMAT__,elfx32 96 %define LIBVPX_ELF 1 97%elifidn __OUTPUT_FORMAT__,elf64 98 %define LIBVPX_ELF 1 99%else 100 %define LIBVPX_ELF 0 101%endif 102 103%ifidn __OUTPUT_FORMAT__,macho32 104 %define LIBVPX_MACHO 1 105%elifidn __OUTPUT_FORMAT__,macho64 106 %define LIBVPX_MACHO 1 107%else 108 %define LIBVPX_MACHO 0 109%endif 110 111; sym() 112; Return the proper symbol name for the target ABI. 113; 114; Certain ABIs, notably MS COFF and Darwin MACH-O, require that symbols 115; with C linkage be prefixed with an underscore. 116; 117%if LIBVPX_ELF || LIBVPX_YASM_WIN64 118 %define sym(x) x 119%else 120 ; Mach-O / COFF 121 %define sym(x) _ %+ x 122%endif 123 124; globalsym() 125; Return a global declaration with the proper decoration for the target ABI. 126; 127; When CHROMIUM is defined, include attributes to hide the symbol from the 128; global namespace. 129; 130; Chromium doesn't like exported global symbols due to symbol clashing with 131; plugins among other things. 132; 133; Requires Chromium's patched copy of yasm: 134; http://src.chromium.org/viewvc/chrome?view=rev&revision=73761 135; http://www.tortall.net/projects/yasm/ticket/236 136; or nasm > 2.14. 137; 138%ifdef CHROMIUM 139 %ifdef __NASM_VER__ 140 %if __NASM_VERSION_ID__ < 0x020e0000 ; 2.14 141 ; nasm < 2.14 does not support :private_extern directive 142 %fatal Must use nasm 2.14 or newer 143 %endif 144 %endif 145 146 %if LIBVPX_ELF 147 %define globalsym(x) global sym(x) %+ :function hidden 148 %elif LIBVPX_MACHO 149 %define globalsym(x) global sym(x) %+ :private_extern 150 %else 151 ; COFF / PE32+ 152 %define globalsym(x) global sym(x) 153 %endif 154%else 155 %define globalsym(x) global sym(x) 156%endif 157 158; arg() 159; Return the address specification of the given argument 160; 161%if ABI_IS_32BIT 162 %define arg(x) [ebp+8+4*x] 163%else 164 ; 64 bit ABI passes arguments in registers. This is a workaround to get up 165 ; and running quickly. Relies on SHADOW_ARGS_TO_STACK 166 %if LIBVPX_YASM_WIN64 167 %define arg(x) [rbp+16+8*x] 168 %else 169 %define arg(x) [rbp-8-8*x] 170 %endif 171%endif 172 173; REG_SZ_BYTES, REG_SZ_BITS 174; Size of a register 175%if ABI_IS_32BIT 176%define REG_SZ_BYTES 4 177%define REG_SZ_BITS 32 178%else 179%define REG_SZ_BYTES 8 180%define REG_SZ_BITS 64 181%endif 182 183 184; ALIGN_STACK <alignment> <register> 185; This macro aligns the stack to the given alignment (in bytes). The stack 186; is left such that the previous value of the stack pointer is the first 187; argument on the stack (ie, the inverse of this macro is 'pop rsp.') 188; This macro uses one temporary register, which is not preserved, and thus 189; must be specified as an argument. 190%macro ALIGN_STACK 2 191 mov %2, rsp 192 and rsp, -%1 193 lea rsp, [rsp - (%1 - REG_SZ_BYTES)] 194 push %2 195%endmacro 196 197 198; 199; The Microsoft assembler tries to impose a certain amount of type safety in 200; its register usage. YASM doesn't recognize these directives, so we just 201; %define them away to maintain as much compatibility as possible with the 202; original inline assembler we're porting from. 203; 204%idefine PTR 205%idefine XMMWORD 206%idefine MMWORD 207 208; PIC macros 209; 210%if ABI_IS_32BIT 211 %if CONFIG_PIC=1 212 %ifidn __OUTPUT_FORMAT__,elf32 213 %define WRT_PLT wrt ..plt 214 %macro GET_GOT 1 215 extern _GLOBAL_OFFSET_TABLE_ 216 push %1 217 call %%get_got 218 %%sub_offset: 219 jmp %%exitGG 220 %%get_got: 221 mov %1, [esp] 222 add %1, _GLOBAL_OFFSET_TABLE_ + $$ - %%sub_offset wrt ..gotpc 223 ret 224 %%exitGG: 225 %undef GLOBAL 226 %define GLOBAL(x) x + %1 wrt ..gotoff 227 %undef RESTORE_GOT 228 %define RESTORE_GOT pop %1 229 %endmacro 230 %elifidn __OUTPUT_FORMAT__,macho32 231 %macro GET_GOT 1 232 push %1 233 call %%get_got 234 %%get_got: 235 pop %1 236 %undef GLOBAL 237 %define GLOBAL(x) x + %1 - %%get_got 238 %undef RESTORE_GOT 239 %define RESTORE_GOT pop %1 240 %endmacro 241 %endif 242 %endif 243 244 %ifdef CHROMIUM 245 %ifidn __OUTPUT_FORMAT__,macho32 246 %define HIDDEN_DATA(x) x:private_extern 247 %else 248 %define HIDDEN_DATA(x) x 249 %endif 250 %else 251 %define HIDDEN_DATA(x) x 252 %endif 253%else 254 %macro GET_GOT 1 255 %endmacro 256 %define GLOBAL(x) rel x 257 %ifidn __OUTPUT_FORMAT__,elf64 258 %define WRT_PLT wrt ..plt 259 %define HIDDEN_DATA(x) x:data hidden 260 %elifidn __OUTPUT_FORMAT__,elfx32 261 %define WRT_PLT wrt ..plt 262 %define HIDDEN_DATA(x) x:data hidden 263 %elifidn __OUTPUT_FORMAT__,macho64 264 %ifdef CHROMIUM 265 %define HIDDEN_DATA(x) x:private_extern 266 %else 267 %define HIDDEN_DATA(x) x 268 %endif 269 %else 270 %define HIDDEN_DATA(x) x 271 %endif 272%endif 273%ifnmacro GET_GOT 274 %macro GET_GOT 1 275 %endmacro 276 %define GLOBAL(x) x 277%endif 278%ifndef RESTORE_GOT 279%define RESTORE_GOT 280%endif 281%ifndef WRT_PLT 282%define WRT_PLT 283%endif 284 285%if ABI_IS_32BIT 286 %macro SHADOW_ARGS_TO_STACK 1 287 %endm 288 %define UNSHADOW_ARGS 289%else 290%if LIBVPX_YASM_WIN64 291 %macro SHADOW_ARGS_TO_STACK 1 ; argc 292 %if %1 > 0 293 mov arg(0),rcx 294 %endif 295 %if %1 > 1 296 mov arg(1),rdx 297 %endif 298 %if %1 > 2 299 mov arg(2),r8 300 %endif 301 %if %1 > 3 302 mov arg(3),r9 303 %endif 304 %endm 305%else 306 %macro SHADOW_ARGS_TO_STACK 1 ; argc 307 %if %1 > 0 308 push rdi 309 %endif 310 %if %1 > 1 311 push rsi 312 %endif 313 %if %1 > 2 314 push rdx 315 %endif 316 %if %1 > 3 317 push rcx 318 %endif 319 %if %1 > 4 320 push r8 321 %endif 322 %if %1 > 5 323 push r9 324 %endif 325 %if %1 > 6 326 %assign i %1-6 327 %assign off 16 328 %rep i 329 mov rax,[rbp+off] 330 push rax 331 %assign off off+8 332 %endrep 333 %endif 334 %endm 335%endif 336 %define UNSHADOW_ARGS mov rsp, rbp 337%endif 338 339; Win64 ABI requires that XMM6:XMM15 are callee saved 340; SAVE_XMM n, [u] 341; store registers 6-n on the stack 342; if u is specified, use unaligned movs. 343; Win64 ABI requires 16 byte stack alignment, but then pushes an 8 byte return 344; value. Typically we follow this up with 'push rbp' - re-aligning the stack - 345; but in some cases this is not done and unaligned movs must be used. 346%if LIBVPX_YASM_WIN64 347%macro SAVE_XMM 1-2 a 348 %if %1 < 6 349 %error Only xmm registers 6-15 must be preserved 350 %else 351 %assign last_xmm %1 352 %define movxmm movdq %+ %2 353 %assign xmm_stack_space ((last_xmm - 5) * 16) 354 sub rsp, xmm_stack_space 355 %assign i 6 356 %rep (last_xmm - 5) 357 movxmm [rsp + ((i - 6) * 16)], xmm %+ i 358 %assign i i+1 359 %endrep 360 %endif 361%endmacro 362%macro RESTORE_XMM 0 363 %ifndef last_xmm 364 %error RESTORE_XMM must be paired with SAVE_XMM n 365 %else 366 %assign i last_xmm 367 %rep (last_xmm - 5) 368 movxmm xmm %+ i, [rsp +((i - 6) * 16)] 369 %assign i i-1 370 %endrep 371 add rsp, xmm_stack_space 372 ; there are a couple functions which return from multiple places. 373 ; otherwise, we could uncomment these: 374 ; %undef last_xmm 375 ; %undef xmm_stack_space 376 ; %undef movxmm 377 %endif 378%endmacro 379%else 380%macro SAVE_XMM 1-2 381%endmacro 382%macro RESTORE_XMM 0 383%endmacro 384%endif 385 386; Name of the rodata section 387; 388; .rodata seems to be an elf-ism, as it doesn't work on OSX. 389; 390%ifidn __OUTPUT_FORMAT__,macho64 391%define SECTION_RODATA section .text 392%elifidn __OUTPUT_FORMAT__,macho32 393%macro SECTION_RODATA 0 394section .text 395%endmacro 396%elifidn __OUTPUT_FORMAT__,aout 397%define SECTION_RODATA section .data 398%else 399%define SECTION_RODATA section .rodata 400%endif 401 402 403; Tell GNU ld that we don't require an executable stack. 404%ifidn __OUTPUT_FORMAT__,elf32 405section .note.GNU-stack noalloc noexec nowrite progbits 406section .text 407%elifidn __OUTPUT_FORMAT__,elf64 408section .note.GNU-stack noalloc noexec nowrite progbits 409section .text 410%elifidn __OUTPUT_FORMAT__,elfx32 411section .note.GNU-stack noalloc noexec nowrite progbits 412section .text 413%endif 414 415; On Android platforms use lrand48 when building postproc routines. Prior to L 416; rand() was not available. 417%if CONFIG_POSTPROC=1 || CONFIG_VP9_POSTPROC=1 418%ifdef __ANDROID__ 419extern sym(lrand48) 420%define LIBVPX_RAND lrand48 421%else 422extern sym(rand) 423%define LIBVPX_RAND rand 424%endif 425%endif ; CONFIG_POSTPROC || CONFIG_VP9_POSTPROC 426