• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1;*****************************************************************************
2;* Assembly testing and benchmarking tool
3;* Copyright (c) 2008 Loren Merritt
4;* Copyright (c) 2012 Henrik Gramner
5;*
6;* This file is part of FFmpeg.
7;*
8;* FFmpeg is free software; you can redistribute it and/or modify
9;* it under the terms of the GNU General Public License as published by
10;* the Free Software Foundation; either version 2 of the License, or
11;* (at your option) any later version.
12;*
13;* FFmpeg is distributed in the hope that it will be useful,
14;* but WITHOUT ANY WARRANTY; without even the implied warranty of
15;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16;* GNU General Public License for more details.
17;*
18;* You should have received a copy of the GNU General Public License
19;* along with this program; if not, write to the Free Software
20;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
21;*****************************************************************************
22
23%define private_prefix checkasm
24%include "libavutil/x86/x86inc.asm"
25
26SECTION_RODATA
27
28error_message: db "failed to preserve register", 0
29error_message_emms: db "failed to issue emms", 0
30
31%if ARCH_X86_64
32; just random numbers to reduce the chance of incidental match
33ALIGN 16
34x6:  dq 0x1a1b2550a612b48c,0x79445c159ce79064
35x7:  dq 0x2eed899d5a28ddcd,0x86b2536fcd8cf636
36x8:  dq 0xb0856806085e7943,0x3f2bf84fc0fcca4e
37x9:  dq 0xacbd382dcf5b8de2,0xd229e1f5b281303f
38x10: dq 0x71aeaff20b095fd9,0xab63e2e11fa38ed9
39x11: dq 0x89b0c0765892729a,0x77d410d5c42c882d
40x12: dq 0xc45ea11a955d8dd5,0x24b3c1d2a024048b
41x13: dq 0x2e8ec680de14b47c,0xdd7b8919edd42786
42x14: dq 0x135ce6888fa02cbf,0x11e53e2b2ac655ef
43x15: dq 0x011ff554472a7a10,0x6de8f4c914c334d5
44n7:  dq 0x21f86d66c8ca00ce
45n8:  dq 0x75b6ba21077c48ad
46n9:  dq 0xed56bb2dcb3c7736
47n10: dq 0x8bda43d3fd1a7e06
48n11: dq 0xb64a9c9e5d318408
49n12: dq 0xdf9a54b303f1d3a3
50n13: dq 0x4a75479abd64e097
51n14: dq 0x249214109d5d1c88
52%endif
53
54SECTION .text
55
56cextern fail_func
57
58; max number of args used by any asm function.
59; (max_args % 4) must equal 3 for stack alignment
60%define max_args 15
61
62%if ARCH_X86_64
63
64;-----------------------------------------------------------------------------
65; int checkasm_stack_clobber(uint64_t clobber, ...)
66;-----------------------------------------------------------------------------
67cglobal stack_clobber, 1,2
68    ; Clobber the stack with junk below the stack pointer
69    %define argsize (max_args+6)*8
70    SUB  rsp, argsize
71    mov   r1, argsize-8
72.loop:
73    mov [rsp+r1], r0
74    sub   r1, 8
75    jge .loop
76    ADD  rsp, argsize
77    RET
78
79%if WIN64
80    %assign free_regs 7
81    DECLARE_REG_TMP 4
82%else
83    %assign free_regs 9
84    DECLARE_REG_TMP 7
85%endif
86
87%macro report_fail 1
88    mov  r9, rax
89    mov r10, rdx
90    lea  r0, [%1]
91    xor eax, eax
92    call fail_func
93    mov rdx, r10
94    mov rax, r9
95%endmacro
96
97;-----------------------------------------------------------------------------
98; void checkasm_checked_call(void *func, ...)
99;-----------------------------------------------------------------------------
100INIT_XMM
101%macro CHECKED_CALL 0-1
102cglobal checked_call%1, 2,15,16,max_args*8+8
103    mov  t0, r0
104
105    ; All arguments have been pushed on the stack instead of registers in order to
106    ; test for incorrect assumptions that 32-bit ints are zero-extended to 64-bit.
107    mov  r0, r6mp
108    mov  r1, r7mp
109    mov  r2, r8mp
110    mov  r3, r9mp
111%if UNIX64
112    mov  r4, r10mp
113    mov  r5, r11mp
114    %assign i 6
115    %rep max_args-6
116        mov  r9, [rsp+stack_offset+(i+1)*8]
117        mov  [rsp+(i-6)*8], r9
118        %assign i i+1
119    %endrep
120%else ; WIN64
121    %assign i 4
122    %rep max_args-4
123        mov  r9, [rsp+stack_offset+(i+7)*8]
124        mov  [rsp+i*8], r9
125        %assign i i+1
126    %endrep
127
128    ; Move possible floating-point arguments to the correct registers
129    movq m0, r0
130    movq m1, r1
131    movq m2, r2
132    movq m3, r3
133
134    %assign i 6
135    %rep 16-6
136        mova m %+ i, [x %+ i]
137        %assign i i+1
138    %endrep
139%endif
140
141%assign i 14
142%rep 15-free_regs
143    mov r %+ i, [n %+ i]
144    %assign i i-1
145%endrep
146    call t0
147%assign i 14
148%rep 15-free_regs
149    xor r %+ i, [n %+ i]
150    or  r14, r %+ i
151    %assign i i-1
152%endrep
153
154%if WIN64
155    %assign i 6
156    %rep 16-6
157        pxor m %+ i, [x %+ i]
158        por  m6, m %+ i
159        %assign i i+1
160    %endrep
161    packsswb m6, m6
162    movq r5, m6
163    or  r14, r5
164%endif
165
166    ; Call fail_func() with a descriptive message to mark it as a failure
167    ; if the called function didn't preserve all callee-saved registers.
168    ; Save the return value located in rdx:rax first to prevent clobbering.
169    jz .clobber_ok
170    report_fail error_message
171.clobber_ok:
172%ifidn %1, _emms
173    emms
174%elifnidn %1, _float
175    fstenv [rsp]
176    cmp  word [rsp + 8], 0xffff
177    je   .emms_ok
178    report_fail error_message_emms
179    emms
180.emms_ok:
181%endif
182    RET
183%endmacro
184
185%else
186
187; just random numbers to reduce the chance of incidental match
188%define n3 dword 0x6549315c
189%define n4 dword 0xe02f3e23
190%define n5 dword 0xb78d0d1d
191%define n6 dword 0x33627ba7
192
193%macro report_fail 1
194    mov  r3, eax
195    mov  r4, edx
196    lea  r0, [%1]
197    mov [esp], r0
198    call fail_func
199    mov  edx, r4
200    mov  eax, r3
201%endmacro
202
203%macro CHECKED_CALL 0-1
204;-----------------------------------------------------------------------------
205; void checkasm_checked_call(void *func, ...)
206;-----------------------------------------------------------------------------
207cglobal checked_call%1, 1,7
208    mov  r3, n3
209    mov  r4, n4
210    mov  r5, n5
211    mov  r6, n6
212%rep max_args
213    PUSH dword [esp+20+max_args*4]
214%endrep
215    call r0
216    xor  r3, n3
217    xor  r4, n4
218    xor  r5, n5
219    xor  r6, n6
220    or   r3, r4
221    or   r5, r6
222    or   r3, r5
223    jz .clobber_ok
224    report_fail error_message
225.clobber_ok:
226%ifidn %1, _emms
227    emms
228%elifnidn %1, _float
229    fstenv [esp]
230    cmp  word [esp + 8], 0xffff
231    je   .emms_ok
232    report_fail error_message_emms
233    emms
234.emms_ok:
235%endif
236    add  esp, max_args*4
237    REP_RET
238%endmacro
239
240%endif ; ARCH_X86_64
241
242CHECKED_CALL
243CHECKED_CALL _emms
244CHECKED_CALL _float
245