• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1;
2;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3;
4;  Use of this source code is governed by a BSD-style license
5;  that can be found in the LICENSE file in the root of the source
6;  tree. An additional intellectual property rights grant can be found
7;  in the file PATENTS.  All contributing project authors may
8;  be found in the AUTHORS file in the root of the source tree.
9;
10
11
12%include "vpx_ports/x86_abi_support.asm"
13
14;unsigned int vp8_sad16x16_wmt(
15;    unsigned char *src_ptr,
16;    int  src_stride,
17;    unsigned char *ref_ptr,
18;    int  ref_stride)
19global sym(vp8_sad16x16_wmt)
20sym(vp8_sad16x16_wmt):
21    push        rbp
22    mov         rbp, rsp
23    SHADOW_ARGS_TO_STACK 4
24    push        rsi
25    push        rdi
26    ; end prolog
27
28        mov             rsi,        arg(0) ;src_ptr
29        mov             rdi,        arg(2) ;ref_ptr
30
31        movsxd          rax,        dword ptr arg(1) ;src_stride
32        movsxd          rdx,        dword ptr arg(3) ;ref_stride
33
34        lea             rcx,        [rsi+rax*8]
35
36        lea             rcx,        [rcx+rax*8]
37        pxor            xmm7,       xmm7
38
39x16x16sad_wmt_loop:
40
41        movq            xmm0,       QWORD PTR [rsi]
42        movq            xmm2,       QWORD PTR [rsi+8]
43
44        movq            xmm1,       QWORD PTR [rdi]
45        movq            xmm3,       QWORD PTR [rdi+8]
46
47        movq            xmm4,       QWORD PTR [rsi+rax]
48        movq            xmm5,       QWORD PTR [rdi+rdx]
49
50
51        punpcklbw       xmm0,       xmm2
52        punpcklbw       xmm1,       xmm3
53
54        psadbw          xmm0,       xmm1
55        movq            xmm6,       QWORD PTR [rsi+rax+8]
56
57        movq            xmm3,       QWORD PTR [rdi+rdx+8]
58        lea             rsi,        [rsi+rax*2]
59
60        lea             rdi,        [rdi+rdx*2]
61        punpcklbw       xmm4,       xmm6
62
63        punpcklbw       xmm5,       xmm3
64        psadbw          xmm4,       xmm5
65
66        paddw           xmm7,       xmm0
67        paddw           xmm7,       xmm4
68
69        cmp             rsi,        rcx
70        jne             x16x16sad_wmt_loop
71
72        movq            xmm0,       xmm7
73        psrldq          xmm7,       8
74
75        paddw           xmm0,       xmm7
76        movq            rax,        xmm0
77
78    ; begin epilog
79    pop rdi
80    pop rsi
81    UNSHADOW_ARGS
82    pop         rbp
83    ret
84
85;unsigned int vp8_sad8x16_wmt(
86;    unsigned char *src_ptr,
87;    int  src_stride,
88;    unsigned char *ref_ptr,
89;    int  ref_stride,
90;    int  max_err)
91global sym(vp8_sad8x16_wmt)
92sym(vp8_sad8x16_wmt):
93    push        rbp
94    mov         rbp, rsp
95    SHADOW_ARGS_TO_STACK 5
96    push        rbx
97    push        rsi
98    push        rdi
99    ; end prolog
100
101        mov             rsi,        arg(0) ;src_ptr
102        mov             rdi,        arg(2) ;ref_ptr
103
104        movsxd          rbx,        dword ptr arg(1) ;src_stride
105        movsxd          rdx,        dword ptr arg(3) ;ref_stride
106
107        lea             rcx,        [rsi+rbx*8]
108
109        lea             rcx,        [rcx+rbx*8]
110        pxor            mm7,        mm7
111
112x8x16sad_wmt_loop:
113
114        movq            rax,        mm7
115        cmp             rax,        arg(4)
116        jg              x8x16sad_wmt_early_exit
117
118        movq            mm0,        QWORD PTR [rsi]
119        movq            mm1,        QWORD PTR [rdi]
120
121        movq            mm2,        QWORD PTR [rsi+rbx]
122        movq            mm3,        QWORD PTR [rdi+rdx]
123
124        psadbw          mm0,        mm1
125        psadbw          mm2,        mm3
126
127        lea             rsi,        [rsi+rbx*2]
128        lea             rdi,        [rdi+rdx*2]
129
130        paddw           mm7,        mm0
131        paddw           mm7,        mm2
132
133        cmp             rsi,        rcx
134        jne             x8x16sad_wmt_loop
135
136        movq            rax,        mm7
137
138x8x16sad_wmt_early_exit:
139
140    ; begin epilog
141    pop         rdi
142    pop         rsi
143    pop         rbx
144    UNSHADOW_ARGS
145    pop         rbp
146    ret
147
148
149;unsigned int vp8_sad8x8_wmt(
150;    unsigned char *src_ptr,
151;    int  src_stride,
152;    unsigned char *ref_ptr,
153;    int  ref_stride)
154global sym(vp8_sad8x8_wmt)
155sym(vp8_sad8x8_wmt):
156    push        rbp
157    mov         rbp, rsp
158    SHADOW_ARGS_TO_STACK 5
159    push        rbx
160    push        rsi
161    push        rdi
162    ; end prolog
163
164        mov             rsi,        arg(0) ;src_ptr
165        mov             rdi,        arg(2) ;ref_ptr
166
167        movsxd          rbx,        dword ptr arg(1) ;src_stride
168        movsxd          rdx,        dword ptr arg(3) ;ref_stride
169
170        lea             rcx,        [rsi+rbx*8]
171        pxor            mm7,        mm7
172
173x8x8sad_wmt_loop:
174
175        movq            rax,        mm7
176        cmp             rax,        arg(4)
177        jg              x8x8sad_wmt_early_exit
178
179        movq            mm0,        QWORD PTR [rsi]
180        movq            mm1,        QWORD PTR [rdi]
181
182        psadbw          mm0,        mm1
183        lea             rsi,        [rsi+rbx]
184
185        add             rdi,        rdx
186        paddw           mm7,        mm0
187
188        cmp             rsi,        rcx
189        jne             x8x8sad_wmt_loop
190
191        movq            rax,        mm7
192x8x8sad_wmt_early_exit:
193
194    ; begin epilog
195    pop         rdi
196    pop         rsi
197    pop         rbx
198    UNSHADOW_ARGS
199    pop         rbp
200    ret
201
202;unsigned int vp8_sad4x4_wmt(
203;    unsigned char *src_ptr,
204;    int  src_stride,
205;    unsigned char *ref_ptr,
206;    int  ref_stride)
207global sym(vp8_sad4x4_wmt)
208sym(vp8_sad4x4_wmt):
209    push        rbp
210    mov         rbp, rsp
211    SHADOW_ARGS_TO_STACK 4
212    push        rsi
213    push        rdi
214    ; end prolog
215
216        mov             rsi,        arg(0) ;src_ptr
217        mov             rdi,        arg(2) ;ref_ptr
218
219        movsxd          rax,        dword ptr arg(1) ;src_stride
220        movsxd          rdx,        dword ptr arg(3) ;ref_stride
221
222        movd            mm0,        DWORD PTR [rsi]
223        movd            mm1,        DWORD PTR [rdi]
224
225        movd            mm2,        DWORD PTR [rsi+rax]
226        movd            mm3,        DWORD PTR [rdi+rdx]
227
228        punpcklbw       mm0,        mm2
229        punpcklbw       mm1,        mm3
230
231        psadbw          mm0,        mm1
232        lea             rsi,        [rsi+rax*2]
233
234        lea             rdi,        [rdi+rdx*2]
235        movd            mm4,        DWORD PTR [rsi]
236
237        movd            mm5,        DWORD PTR [rdi]
238        movd            mm6,        DWORD PTR [rsi+rax]
239
240        movd            mm7,        DWORD PTR [rdi+rdx]
241        punpcklbw       mm4,        mm6
242
243        punpcklbw       mm5,        mm7
244        psadbw          mm4,        mm5
245
246        paddw           mm0,        mm4
247        movq            rax,        mm0
248
249    ; begin epilog
250    pop rdi
251    pop rsi
252    UNSHADOW_ARGS
253    pop         rbp
254    ret
255
256
257;unsigned int vp8_sad16x8_wmt(
258;    unsigned char *src_ptr,
259;    int  src_stride,
260;    unsigned char *ref_ptr,
261;    int  ref_stride)
262global sym(vp8_sad16x8_wmt)
263sym(vp8_sad16x8_wmt):
264    push        rbp
265    mov         rbp, rsp
266    SHADOW_ARGS_TO_STACK 5
267    push        rbx
268    push        rsi
269    push        rdi
270    ; end prolog
271
272
273        mov             rsi,        arg(0) ;src_ptr
274        mov             rdi,        arg(2) ;ref_ptr
275
276        movsxd          rbx,        dword ptr arg(1) ;src_stride
277        movsxd          rdx,        dword ptr arg(3) ;ref_stride
278
279        lea             rcx,        [rsi+rbx*8]
280        pxor            mm7,        mm7
281
282x16x8sad_wmt_loop:
283
284        movq            rax,        mm7
285        cmp             rax,        arg(4)
286        jg              x16x8sad_wmt_early_exit
287
288        movq            mm0,        QWORD PTR [rsi]
289        movq            mm2,        QWORD PTR [rsi+8]
290
291        movq            mm1,        QWORD PTR [rdi]
292        movq            mm3,        QWORD PTR [rdi+8]
293
294        movq            mm4,        QWORD PTR [rsi+rbx]
295        movq            mm5,        QWORD PTR [rdi+rdx]
296
297        psadbw          mm0,        mm1
298        psadbw          mm2,        mm3
299
300        movq            mm1,        QWORD PTR [rsi+rbx+8]
301        movq            mm3,        QWORD PTR [rdi+rdx+8]
302
303        psadbw          mm4,        mm5
304        psadbw          mm1,        mm3
305
306        lea             rsi,        [rsi+rbx*2]
307        lea             rdi,        [rdi+rdx*2]
308
309        paddw           mm0,        mm2
310        paddw           mm4,        mm1
311
312        paddw           mm7,        mm0
313        paddw           mm7,        mm4
314
315        cmp             rsi,        rcx
316        jne             x16x8sad_wmt_loop
317
318        movq            rax,        mm7
319
320x16x8sad_wmt_early_exit:
321
322    ; begin epilog
323    pop         rdi
324    pop         rsi
325    pop         rbx
326    UNSHADOW_ARGS
327    pop         rbp
328    ret
329