1; 2; Copyright (c) 2010 The WebM project authors. All Rights Reserved. 3; 4; Use of this source code is governed by a BSD-style license 5; that can be found in the LICENSE file in the root of the source 6; tree. An additional intellectual property rights grant can be found 7; in the file PATENTS. All contributing project authors may 8; be found in the AUTHORS file in the root of the source tree. 9; 10 11 12%include "vpx_ports/x86_abi_support.asm" 13 14SECTION .text 15 16;int vp8_block_error_sse2(short *coeff_ptr, short *dcoef_ptr) 17global sym(vp8_block_error_sse2) PRIVATE 18sym(vp8_block_error_sse2): 19 push rbp 20 mov rbp, rsp 21 SHADOW_ARGS_TO_STACK 2 22 push rsi 23 push rdi 24 ; end prologue 25 26 mov rsi, arg(0) ;coeff_ptr 27 mov rdi, arg(1) ;dcoef_ptr 28 29 movdqa xmm0, [rsi] 30 movdqa xmm1, [rdi] 31 32 movdqa xmm2, [rsi+16] 33 movdqa xmm3, [rdi+16] 34 35 psubw xmm0, xmm1 36 psubw xmm2, xmm3 37 38 pmaddwd xmm0, xmm0 39 pmaddwd xmm2, xmm2 40 41 paddd xmm0, xmm2 42 43 pxor xmm5, xmm5 44 movdqa xmm1, xmm0 45 46 punpckldq xmm0, xmm5 47 punpckhdq xmm1, xmm5 48 49 paddd xmm0, xmm1 50 movdqa xmm1, xmm0 51 52 psrldq xmm0, 8 53 paddd xmm0, xmm1 54 55 movq rax, xmm0 56 57 pop rdi 58 pop rsi 59 ; begin epilog 60 UNSHADOW_ARGS 61 pop rbp 62 ret 63 64;int vp8_mbblock_error_sse2_impl(short *coeff_ptr, short *dcoef_ptr, int dc); 65global sym(vp8_mbblock_error_sse2_impl) PRIVATE 66sym(vp8_mbblock_error_sse2_impl): 67 push rbp 68 mov rbp, rsp 69 SHADOW_ARGS_TO_STACK 3 70 SAVE_XMM 6 71 push rsi 72 push rdi 73 ; end prolog 74 75 76 mov rsi, arg(0) ;coeff_ptr 77 pxor xmm6, xmm6 78 79 mov rdi, arg(1) ;dcoef_ptr 80 pxor xmm4, xmm4 81 82 movd xmm5, dword ptr arg(2) ;dc 83 por xmm5, xmm4 84 85 pcmpeqw xmm5, xmm6 86 mov rcx, 16 87 88.mberror_loop: 89 movdqa xmm0, [rsi] 90 movdqa xmm1, [rdi] 91 92 movdqa xmm2, [rsi+16] 93 movdqa xmm3, [rdi+16] 94 95 96 psubw xmm2, xmm3 97 pmaddwd xmm2, xmm2 98 99 psubw xmm0, xmm1 100 pand xmm0, xmm5 101 102 pmaddwd xmm0, xmm0 103 add rsi, 32 104 105 add rdi, 32 106 107 sub rcx, 1 108 paddd xmm4, xmm2 109 110 paddd xmm4, xmm0 111 jnz .mberror_loop 112 113 movdqa xmm0, xmm4 114 punpckldq xmm0, xmm6 115 116 punpckhdq xmm4, xmm6 117 paddd xmm0, xmm4 118 119 movdqa xmm1, xmm0 120 psrldq xmm0, 8 121 122 paddd xmm0, xmm1 123 movq rax, xmm0 124 125 pop rdi 126 pop rsi 127 ; begin epilog 128 RESTORE_XMM 129 UNSHADOW_ARGS 130 pop rbp 131 ret 132 133 134;int vp8_mbuverror_sse2_impl(short *s_ptr, short *d_ptr); 135global sym(vp8_mbuverror_sse2_impl) PRIVATE 136sym(vp8_mbuverror_sse2_impl): 137 push rbp 138 mov rbp, rsp 139 SHADOW_ARGS_TO_STACK 2 140 push rsi 141 push rdi 142 ; end prolog 143 144 145 mov rsi, arg(0) ;s_ptr 146 mov rdi, arg(1) ;d_ptr 147 148 mov rcx, 16 149 pxor xmm3, xmm3 150 151.mbuverror_loop: 152 153 movdqa xmm1, [rsi] 154 movdqa xmm2, [rdi] 155 156 psubw xmm1, xmm2 157 pmaddwd xmm1, xmm1 158 159 paddd xmm3, xmm1 160 161 add rsi, 16 162 add rdi, 16 163 164 dec rcx 165 jnz .mbuverror_loop 166 167 pxor xmm0, xmm0 168 movdqa xmm1, xmm3 169 170 movdqa xmm2, xmm1 171 punpckldq xmm1, xmm0 172 173 punpckhdq xmm2, xmm0 174 paddd xmm1, xmm2 175 176 movdqa xmm2, xmm1 177 178 psrldq xmm1, 8 179 paddd xmm1, xmm2 180 181 movq rax, xmm1 182 183 pop rdi 184 pop rsi 185 ; begin epilog 186 UNSHADOW_ARGS 187 pop rbp 188 ret 189