1; 2; Copyright (c) 2010 The WebM project authors. All Rights Reserved. 3; 4; Use of this source code is governed by a BSD-style license 5; that can be found in the LICENSE file in the root of the source 6; tree. An additional intellectual property rights grant can be found 7; in the file PATENTS. All contributing project authors may 8; be found in the AUTHORS file in the root of the source tree. 9; 10 11 12%include "vpx_ports/x86_abi_support.asm" 13 14;int vp8_block_error_sse2(short *coeff_ptr, short *dcoef_ptr) 15global sym(vp8_block_error_sse2) PRIVATE 16sym(vp8_block_error_sse2): 17 push rbp 18 mov rbp, rsp 19 SHADOW_ARGS_TO_STACK 2 20 push rsi 21 push rdi 22 ; end prologue 23 24 mov rsi, arg(0) ;coeff_ptr 25 mov rdi, arg(1) ;dcoef_ptr 26 27 movdqa xmm0, [rsi] 28 movdqa xmm1, [rdi] 29 30 movdqa xmm2, [rsi+16] 31 movdqa xmm3, [rdi+16] 32 33 psubw xmm0, xmm1 34 psubw xmm2, xmm3 35 36 pmaddwd xmm0, xmm0 37 pmaddwd xmm2, xmm2 38 39 paddd xmm0, xmm2 40 41 pxor xmm5, xmm5 42 movdqa xmm1, xmm0 43 44 punpckldq xmm0, xmm5 45 punpckhdq xmm1, xmm5 46 47 paddd xmm0, xmm1 48 movdqa xmm1, xmm0 49 50 psrldq xmm0, 8 51 paddd xmm0, xmm1 52 53 movq rax, xmm0 54 55 pop rdi 56 pop rsi 57 ; begin epilog 58 UNSHADOW_ARGS 59 pop rbp 60 ret 61 62;int vp8_mbblock_error_sse2_impl(short *coeff_ptr, short *dcoef_ptr, int dc); 63global sym(vp8_mbblock_error_sse2_impl) PRIVATE 64sym(vp8_mbblock_error_sse2_impl): 65 push rbp 66 mov rbp, rsp 67 SHADOW_ARGS_TO_STACK 3 68 SAVE_XMM 6 69 push rsi 70 push rdi 71 ; end prolog 72 73 74 mov rsi, arg(0) ;coeff_ptr 75 pxor xmm6, xmm6 76 77 mov rdi, arg(1) ;dcoef_ptr 78 pxor xmm4, xmm4 79 80 movd xmm5, dword ptr arg(2) ;dc 81 por xmm5, xmm4 82 83 pcmpeqw xmm5, xmm6 84 mov rcx, 16 85 86.mberror_loop: 87 movdqa xmm0, [rsi] 88 movdqa xmm1, [rdi] 89 90 movdqa xmm2, [rsi+16] 91 movdqa xmm3, [rdi+16] 92 93 94 psubw xmm2, xmm3 95 pmaddwd xmm2, xmm2 96 97 psubw xmm0, xmm1 98 pand xmm0, xmm5 99 100 pmaddwd xmm0, xmm0 101 add rsi, 32 102 103 add rdi, 32 104 105 sub rcx, 1 106 paddd xmm4, xmm2 107 108 paddd xmm4, xmm0 109 jnz .mberror_loop 110 111 movdqa xmm0, xmm4 112 punpckldq xmm0, xmm6 113 114 punpckhdq xmm4, xmm6 115 paddd xmm0, xmm4 116 117 movdqa xmm1, xmm0 118 psrldq xmm0, 8 119 120 paddd xmm0, xmm1 121 movq rax, xmm0 122 123 pop rdi 124 pop rsi 125 ; begin epilog 126 RESTORE_XMM 127 UNSHADOW_ARGS 128 pop rbp 129 ret 130 131 132;int vp8_mbuverror_sse2_impl(short *s_ptr, short *d_ptr); 133global sym(vp8_mbuverror_sse2_impl) PRIVATE 134sym(vp8_mbuverror_sse2_impl): 135 push rbp 136 mov rbp, rsp 137 SHADOW_ARGS_TO_STACK 2 138 push rsi 139 push rdi 140 ; end prolog 141 142 143 mov rsi, arg(0) ;s_ptr 144 mov rdi, arg(1) ;d_ptr 145 146 mov rcx, 16 147 pxor xmm3, xmm3 148 149.mbuverror_loop: 150 151 movdqa xmm1, [rsi] 152 movdqa xmm2, [rdi] 153 154 psubw xmm1, xmm2 155 pmaddwd xmm1, xmm1 156 157 paddd xmm3, xmm1 158 159 add rsi, 16 160 add rdi, 16 161 162 dec rcx 163 jnz .mbuverror_loop 164 165 pxor xmm0, xmm0 166 movdqa xmm1, xmm3 167 168 movdqa xmm2, xmm1 169 punpckldq xmm1, xmm0 170 171 punpckhdq xmm2, xmm0 172 paddd xmm1, xmm2 173 174 movdqa xmm2, xmm1 175 176 psrldq xmm1, 8 177 paddd xmm1, xmm2 178 179 movq rax, xmm1 180 181 pop rdi 182 pop rsi 183 ; begin epilog 184 UNSHADOW_ARGS 185 pop rbp 186 ret 187