• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1;
2;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3;
4;  Use of this source code is governed by a BSD-style license
5;  that can be found in the LICENSE file in the root of the source
6;  tree. An additional intellectual property rights grant can be found
7;  in the file PATENTS.  All contributing project authors may
8;  be found in the AUTHORS file in the root of the source tree.
9;
10
11
12%include "vpx_ports/x86_abi_support.asm"
13
14;int vp8_block_error_sse2(short *coeff_ptr,  short *dcoef_ptr)
15global sym(vp8_block_error_sse2) PRIVATE
16sym(vp8_block_error_sse2):
17    push        rbp
18    mov         rbp, rsp
19    SHADOW_ARGS_TO_STACK 2
20    push rsi
21    push rdi
22    ; end prologue
23
24        mov         rsi,        arg(0) ;coeff_ptr
25        mov         rdi,        arg(1) ;dcoef_ptr
26
27        movdqa      xmm0,       [rsi]
28        movdqa      xmm1,       [rdi]
29
30        movdqa      xmm2,       [rsi+16]
31        movdqa      xmm3,       [rdi+16]
32
33        psubw       xmm0,       xmm1
34        psubw       xmm2,       xmm3
35
36        pmaddwd     xmm0,       xmm0
37        pmaddwd     xmm2,       xmm2
38
39        paddd       xmm0,       xmm2
40
41        pxor        xmm5,       xmm5
42        movdqa      xmm1,       xmm0
43
44        punpckldq   xmm0,       xmm5
45        punpckhdq   xmm1,       xmm5
46
47        paddd       xmm0,       xmm1
48        movdqa      xmm1,       xmm0
49
50        psrldq      xmm0,       8
51        paddd       xmm0,       xmm1
52
53        movq        rax,        xmm0
54
55    pop rdi
56    pop rsi
57    ; begin epilog
58    UNSHADOW_ARGS
59    pop         rbp
60    ret
61
62;int vp8_mbblock_error_sse2_impl(short *coeff_ptr, short *dcoef_ptr, int dc);
63global sym(vp8_mbblock_error_sse2_impl) PRIVATE
64sym(vp8_mbblock_error_sse2_impl):
65    push        rbp
66    mov         rbp, rsp
67    SHADOW_ARGS_TO_STACK 3
68    SAVE_XMM 6
69    push rsi
70    push rdi
71    ; end prolog
72
73
74        mov         rsi,        arg(0) ;coeff_ptr
75        pxor        xmm6,       xmm6
76
77        mov         rdi,        arg(1) ;dcoef_ptr
78        pxor        xmm4,       xmm4
79
80        movd        xmm5,       dword ptr arg(2) ;dc
81        por         xmm5,       xmm4
82
83        pcmpeqw     xmm5,       xmm6
84        mov         rcx,        16
85
86.mberror_loop:
87        movdqa      xmm0,       [rsi]
88        movdqa      xmm1,       [rdi]
89
90        movdqa      xmm2,       [rsi+16]
91        movdqa      xmm3,       [rdi+16]
92
93
94        psubw       xmm2,       xmm3
95        pmaddwd     xmm2,       xmm2
96
97        psubw       xmm0,       xmm1
98        pand        xmm0,       xmm5
99
100        pmaddwd     xmm0,       xmm0
101        add         rsi,        32
102
103        add         rdi,        32
104
105        sub         rcx,        1
106        paddd       xmm4,       xmm2
107
108        paddd       xmm4,       xmm0
109        jnz         .mberror_loop
110
111        movdqa      xmm0,       xmm4
112        punpckldq   xmm0,       xmm6
113
114        punpckhdq   xmm4,       xmm6
115        paddd       xmm0,       xmm4
116
117        movdqa      xmm1,       xmm0
118        psrldq      xmm0,       8
119
120        paddd       xmm0,       xmm1
121        movq        rax,        xmm0
122
123    pop rdi
124    pop rsi
125    ; begin epilog
126    RESTORE_XMM
127    UNSHADOW_ARGS
128    pop         rbp
129    ret
130
131
132;int vp8_mbuverror_sse2_impl(short *s_ptr, short *d_ptr);
133global sym(vp8_mbuverror_sse2_impl) PRIVATE
134sym(vp8_mbuverror_sse2_impl):
135    push        rbp
136    mov         rbp, rsp
137    SHADOW_ARGS_TO_STACK 2
138    push rsi
139    push rdi
140    ; end prolog
141
142
143        mov             rsi,        arg(0) ;s_ptr
144        mov             rdi,        arg(1) ;d_ptr
145
146        mov             rcx,        16
147        pxor            xmm3,       xmm3
148
149.mbuverror_loop:
150
151        movdqa          xmm1,       [rsi]
152        movdqa          xmm2,       [rdi]
153
154        psubw           xmm1,       xmm2
155        pmaddwd         xmm1,       xmm1
156
157        paddd           xmm3,       xmm1
158
159        add             rsi,        16
160        add             rdi,        16
161
162        dec             rcx
163        jnz             .mbuverror_loop
164
165        pxor        xmm0,           xmm0
166        movdqa      xmm1,           xmm3
167
168        movdqa      xmm2,           xmm1
169        punpckldq   xmm1,           xmm0
170
171        punpckhdq   xmm2,           xmm0
172        paddd       xmm1,           xmm2
173
174        movdqa      xmm2,           xmm1
175
176        psrldq      xmm1,           8
177        paddd       xmm1,           xmm2
178
179        movq            rax,            xmm1
180
181    pop rdi
182    pop rsi
183    ; begin epilog
184    UNSHADOW_ARGS
185    pop         rbp
186    ret
187