1; 2; Copyright (c) 2010 The WebM project authors. All Rights Reserved. 3; 4; Use of this source code is governed by a BSD-style license 5; that can be found in the LICENSE file in the root of the source 6; tree. An additional intellectual property rights grant can be found 7; in the file PATENTS. All contributing project authors may 8; be found in the AUTHORS file in the root of the source tree. 9; 10 11 12%include "vpx_ports/x86_abi_support.asm" 13 14 15;void vp8_copy32xn_sse2( 16; unsigned char *src_ptr, 17; int src_stride, 18; unsigned char *dst_ptr, 19; int dst_stride, 20; int height); 21global sym(vp8_copy32xn_sse2) PRIVATE 22sym(vp8_copy32xn_sse2): 23 push rbp 24 mov rbp, rsp 25 SHADOW_ARGS_TO_STACK 5 26 SAVE_XMM 7 27 push rsi 28 push rdi 29 ; end prolog 30 31 mov rsi, arg(0) ;src_ptr 32 mov rdi, arg(2) ;dst_ptr 33 34 movsxd rax, dword ptr arg(1) ;src_stride 35 movsxd rdx, dword ptr arg(3) ;dst_stride 36 movsxd rcx, dword ptr arg(4) ;height 37 38.block_copy_sse2_loopx4: 39 movdqu xmm0, XMMWORD PTR [rsi] 40 movdqu xmm1, XMMWORD PTR [rsi + 16] 41 movdqu xmm2, XMMWORD PTR [rsi + rax] 42 movdqu xmm3, XMMWORD PTR [rsi + rax + 16] 43 44 lea rsi, [rsi+rax*2] 45 46 movdqu xmm4, XMMWORD PTR [rsi] 47 movdqu xmm5, XMMWORD PTR [rsi + 16] 48 movdqu xmm6, XMMWORD PTR [rsi + rax] 49 movdqu xmm7, XMMWORD PTR [rsi + rax + 16] 50 51 lea rsi, [rsi+rax*2] 52 53 movdqa XMMWORD PTR [rdi], xmm0 54 movdqa XMMWORD PTR [rdi + 16], xmm1 55 movdqa XMMWORD PTR [rdi + rdx], xmm2 56 movdqa XMMWORD PTR [rdi + rdx + 16], xmm3 57 58 lea rdi, [rdi+rdx*2] 59 60 movdqa XMMWORD PTR [rdi], xmm4 61 movdqa XMMWORD PTR [rdi + 16], xmm5 62 movdqa XMMWORD PTR [rdi + rdx], xmm6 63 movdqa XMMWORD PTR [rdi + rdx + 16], xmm7 64 65 lea rdi, [rdi+rdx*2] 66 67 sub rcx, 4 68 cmp rcx, 4 69 jge .block_copy_sse2_loopx4 70 71 cmp rcx, 0 72 je .copy_is_done 73 74.block_copy_sse2_loop: 75 movdqu xmm0, XMMWORD PTR [rsi] 76 movdqu xmm1, XMMWORD PTR [rsi + 16] 77 lea rsi, [rsi+rax] 78 79 movdqa XMMWORD PTR [rdi], xmm0 80 movdqa XMMWORD PTR [rdi + 16], xmm1 81 lea rdi, [rdi+rdx] 82 83 sub rcx, 1 84 jne .block_copy_sse2_loop 85 86.copy_is_done: 87 ; begin epilog 88 pop rdi 89 pop rsi 90 RESTORE_XMM 91 UNSHADOW_ARGS 92 pop rbp 93 ret 94