1;***************************************************************************** 2;* SIMD-optimized pixel operations 3;***************************************************************************** 4;* Copyright (c) 2000, 2001 Fabrice Bellard 5;* Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at> 6;* 7;* This file is part of FFmpeg. 8;* 9;* FFmpeg is free software; you can redistribute it and/or 10;* modify it under the terms of the GNU Lesser General Public 11;* License as published by the Free Software Foundation; either 12;* version 2.1 of the License, or (at your option) any later version. 13;* 14;* FFmpeg is distributed in the hope that it will be useful, 15;* but WITHOUT ANY WARRANTY; without even the implied warranty of 16;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 17;* Lesser General Public License for more details. 18;* 19;* You should have received a copy of the GNU Lesser General Public 20;* License along with FFmpeg; if not, write to the Free Software 21;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 22;***************************************************************************** 23 24%include "libavutil/x86/x86util.asm" 25 26SECTION .text 27 28INIT_XMM sse2 29cglobal get_pixels, 3, 4, 5 30 lea r3, [r2*3] 31 pxor m4, m4 32 movh m0, [r1] 33 movh m1, [r1+r2] 34 movh m2, [r1+r2*2] 35 movh m3, [r1+r3] 36 lea r1, [r1+r2*4] 37 punpcklbw m0, m4 38 punpcklbw m1, m4 39 punpcklbw m2, m4 40 punpcklbw m3, m4 41 mova [r0], m0 42 mova [r0+0x10], m1 43 mova [r0+0x20], m2 44 mova [r0+0x30], m3 45 movh m0, [r1] 46 movh m1, [r1+r2*1] 47 movh m2, [r1+r2*2] 48 movh m3, [r1+r3] 49 punpcklbw m0, m4 50 punpcklbw m1, m4 51 punpcklbw m2, m4 52 punpcklbw m3, m4 53 mova [r0+0x40], m0 54 mova [r0+0x50], m1 55 mova [r0+0x60], m2 56 mova [r0+0x70], m3 57 RET 58 59; void ff_diff_pixels(int16_t *block, const uint8_t *s1, const uint8_t *s2, 60; ptrdiff_t stride); 61INIT_XMM sse2 62cglobal diff_pixels, 4,5,5 63 pxor m4, m4 64 add r0, 128 65 mov r4, -128 66.loop: 67 movq m0, [r1] 68 movq m2, [r2] 69 movq m1, [r1+r3] 70 movq m3, [r2+r3] 71 punpcklbw m0, m4 72 punpcklbw m1, m4 73 punpcklbw m2, m4 74 punpcklbw m3, m4 75 psubw m0, m2 76 psubw m1, m3 77 mova [r0+r4+0], m0 78 mova [r0+r4+mmsize], m1 79 lea r1, [r1+r3*2] 80 lea r2, [r2+r3*2] 81 add r4, 2 * mmsize 82 jne .loop 83 RET 84