1; 2; Copyright (c) 2010 The WebM project authors. All Rights Reserved. 3; 4; Use of this source code is governed by a BSD-style license 5; that can be found in the LICENSE file in the root of the source 6; tree. An additional intellectual property rights grant can be found 7; in the file PATENTS. All contributing project authors may 8; be found in the AUTHORS file in the root of the source tree. 9; 10 11 12%include "vpx_ports/x86_abi_support.asm" 13 14;unsigned int vp8_sad16x16_wmt( 15; unsigned char *src_ptr, 16; int src_stride, 17; unsigned char *ref_ptr, 18; int ref_stride) 19global sym(vp8_sad16x16_wmt) 20sym(vp8_sad16x16_wmt): 21 push rbp 22 mov rbp, rsp 23 SHADOW_ARGS_TO_STACK 4 24 push rsi 25 push rdi 26 ; end prolog 27 28 mov rsi, arg(0) ;src_ptr 29 mov rdi, arg(2) ;ref_ptr 30 31 movsxd rax, dword ptr arg(1) ;src_stride 32 movsxd rdx, dword ptr arg(3) ;ref_stride 33 34 lea rcx, [rsi+rax*8] 35 36 lea rcx, [rcx+rax*8] 37 pxor xmm7, xmm7 38 39x16x16sad_wmt_loop: 40 41 movq xmm0, QWORD PTR [rsi] 42 movq xmm2, QWORD PTR [rsi+8] 43 44 movq xmm1, QWORD PTR [rdi] 45 movq xmm3, QWORD PTR [rdi+8] 46 47 movq xmm4, QWORD PTR [rsi+rax] 48 movq xmm5, QWORD PTR [rdi+rdx] 49 50 51 punpcklbw xmm0, xmm2 52 punpcklbw xmm1, xmm3 53 54 psadbw xmm0, xmm1 55 movq xmm6, QWORD PTR [rsi+rax+8] 56 57 movq xmm3, QWORD PTR [rdi+rdx+8] 58 lea rsi, [rsi+rax*2] 59 60 lea rdi, [rdi+rdx*2] 61 punpcklbw xmm4, xmm6 62 63 punpcklbw xmm5, xmm3 64 psadbw xmm4, xmm5 65 66 paddw xmm7, xmm0 67 paddw xmm7, xmm4 68 69 cmp rsi, rcx 70 jne x16x16sad_wmt_loop 71 72 movq xmm0, xmm7 73 psrldq xmm7, 8 74 75 paddw xmm0, xmm7 76 movq rax, xmm0 77 78 ; begin epilog 79 pop rdi 80 pop rsi 81 UNSHADOW_ARGS 82 pop rbp 83 ret 84 85;unsigned int vp8_sad8x16_wmt( 86; unsigned char *src_ptr, 87; int src_stride, 88; unsigned char *ref_ptr, 89; int ref_stride, 90; int max_err) 91global sym(vp8_sad8x16_wmt) 92sym(vp8_sad8x16_wmt): 93 push rbp 94 mov rbp, rsp 95 SHADOW_ARGS_TO_STACK 5 96 push rbx 97 push rsi 98 push rdi 99 ; end prolog 100 101 mov rsi, arg(0) ;src_ptr 102 mov rdi, arg(2) ;ref_ptr 103 104 movsxd rbx, dword ptr arg(1) ;src_stride 105 movsxd rdx, dword ptr arg(3) ;ref_stride 106 107 lea rcx, [rsi+rbx*8] 108 109 lea rcx, [rcx+rbx*8] 110 pxor mm7, mm7 111 112x8x16sad_wmt_loop: 113 114 movq rax, mm7 115 cmp rax, arg(4) 116 jg x8x16sad_wmt_early_exit 117 118 movq mm0, QWORD PTR [rsi] 119 movq mm1, QWORD PTR [rdi] 120 121 movq mm2, QWORD PTR [rsi+rbx] 122 movq mm3, QWORD PTR [rdi+rdx] 123 124 psadbw mm0, mm1 125 psadbw mm2, mm3 126 127 lea rsi, [rsi+rbx*2] 128 lea rdi, [rdi+rdx*2] 129 130 paddw mm7, mm0 131 paddw mm7, mm2 132 133 cmp rsi, rcx 134 jne x8x16sad_wmt_loop 135 136 movq rax, mm7 137 138x8x16sad_wmt_early_exit: 139 140 ; begin epilog 141 pop rdi 142 pop rsi 143 pop rbx 144 UNSHADOW_ARGS 145 pop rbp 146 ret 147 148 149;unsigned int vp8_sad8x8_wmt( 150; unsigned char *src_ptr, 151; int src_stride, 152; unsigned char *ref_ptr, 153; int ref_stride) 154global sym(vp8_sad8x8_wmt) 155sym(vp8_sad8x8_wmt): 156 push rbp 157 mov rbp, rsp 158 SHADOW_ARGS_TO_STACK 5 159 push rbx 160 push rsi 161 push rdi 162 ; end prolog 163 164 mov rsi, arg(0) ;src_ptr 165 mov rdi, arg(2) ;ref_ptr 166 167 movsxd rbx, dword ptr arg(1) ;src_stride 168 movsxd rdx, dword ptr arg(3) ;ref_stride 169 170 lea rcx, [rsi+rbx*8] 171 pxor mm7, mm7 172 173x8x8sad_wmt_loop: 174 175 movq rax, mm7 176 cmp rax, arg(4) 177 jg x8x8sad_wmt_early_exit 178 179 movq mm0, QWORD PTR [rsi] 180 movq mm1, QWORD PTR [rdi] 181 182 psadbw mm0, mm1 183 lea rsi, [rsi+rbx] 184 185 add rdi, rdx 186 paddw mm7, mm0 187 188 cmp rsi, rcx 189 jne x8x8sad_wmt_loop 190 191 movq rax, mm7 192x8x8sad_wmt_early_exit: 193 194 ; begin epilog 195 pop rdi 196 pop rsi 197 pop rbx 198 UNSHADOW_ARGS 199 pop rbp 200 ret 201 202;unsigned int vp8_sad4x4_wmt( 203; unsigned char *src_ptr, 204; int src_stride, 205; unsigned char *ref_ptr, 206; int ref_stride) 207global sym(vp8_sad4x4_wmt) 208sym(vp8_sad4x4_wmt): 209 push rbp 210 mov rbp, rsp 211 SHADOW_ARGS_TO_STACK 4 212 push rsi 213 push rdi 214 ; end prolog 215 216 mov rsi, arg(0) ;src_ptr 217 mov rdi, arg(2) ;ref_ptr 218 219 movsxd rax, dword ptr arg(1) ;src_stride 220 movsxd rdx, dword ptr arg(3) ;ref_stride 221 222 movd mm0, DWORD PTR [rsi] 223 movd mm1, DWORD PTR [rdi] 224 225 movd mm2, DWORD PTR [rsi+rax] 226 movd mm3, DWORD PTR [rdi+rdx] 227 228 punpcklbw mm0, mm2 229 punpcklbw mm1, mm3 230 231 psadbw mm0, mm1 232 lea rsi, [rsi+rax*2] 233 234 lea rdi, [rdi+rdx*2] 235 movd mm4, DWORD PTR [rsi] 236 237 movd mm5, DWORD PTR [rdi] 238 movd mm6, DWORD PTR [rsi+rax] 239 240 movd mm7, DWORD PTR [rdi+rdx] 241 punpcklbw mm4, mm6 242 243 punpcklbw mm5, mm7 244 psadbw mm4, mm5 245 246 paddw mm0, mm4 247 movq rax, mm0 248 249 ; begin epilog 250 pop rdi 251 pop rsi 252 UNSHADOW_ARGS 253 pop rbp 254 ret 255 256 257;unsigned int vp8_sad16x8_wmt( 258; unsigned char *src_ptr, 259; int src_stride, 260; unsigned char *ref_ptr, 261; int ref_stride) 262global sym(vp8_sad16x8_wmt) 263sym(vp8_sad16x8_wmt): 264 push rbp 265 mov rbp, rsp 266 SHADOW_ARGS_TO_STACK 5 267 push rbx 268 push rsi 269 push rdi 270 ; end prolog 271 272 273 mov rsi, arg(0) ;src_ptr 274 mov rdi, arg(2) ;ref_ptr 275 276 movsxd rbx, dword ptr arg(1) ;src_stride 277 movsxd rdx, dword ptr arg(3) ;ref_stride 278 279 lea rcx, [rsi+rbx*8] 280 pxor mm7, mm7 281 282x16x8sad_wmt_loop: 283 284 movq rax, mm7 285 cmp rax, arg(4) 286 jg x16x8sad_wmt_early_exit 287 288 movq mm0, QWORD PTR [rsi] 289 movq mm2, QWORD PTR [rsi+8] 290 291 movq mm1, QWORD PTR [rdi] 292 movq mm3, QWORD PTR [rdi+8] 293 294 movq mm4, QWORD PTR [rsi+rbx] 295 movq mm5, QWORD PTR [rdi+rdx] 296 297 psadbw mm0, mm1 298 psadbw mm2, mm3 299 300 movq mm1, QWORD PTR [rsi+rbx+8] 301 movq mm3, QWORD PTR [rdi+rdx+8] 302 303 psadbw mm4, mm5 304 psadbw mm1, mm3 305 306 lea rsi, [rsi+rbx*2] 307 lea rdi, [rdi+rdx*2] 308 309 paddw mm0, mm2 310 paddw mm4, mm1 311 312 paddw mm7, mm0 313 paddw mm7, mm4 314 315 cmp rsi, rcx 316 jne x16x8sad_wmt_loop 317 318 movq rax, mm7 319 320x16x8sad_wmt_early_exit: 321 322 ; begin epilog 323 pop rdi 324 pop rsi 325 pop rbx 326 UNSHADOW_ARGS 327 pop rbp 328 ret 329