1;****************************************************************************** 2;* SIMD-optimized fullpel functions 3;* Copyright (c) 2008 Loren Merritt 4;* Copyright (c) 2003-2013 Michael Niedermayer 5;* Copyright (c) 2013 Daniel Kang 6;* 7;* This file is part of FFmpeg. 8;* 9;* FFmpeg is free software; you can redistribute it and/or 10;* modify it under the terms of the GNU Lesser General Public 11;* License as published by the Free Software Foundation; either 12;* version 2.1 of the License, or (at your option) any later version. 13;* 14;* FFmpeg is distributed in the hope that it will be useful, 15;* but WITHOUT ANY WARRANTY; without even the implied warranty of 16;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 17;* Lesser General Public License for more details. 18;* 19;* You should have received a copy of the GNU Lesser General Public 20;* License along with FFmpeg; if not, write to the Free Software 21;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 22;****************************************************************************** 23 24%include "libavutil/x86/x86util.asm" 25 26SECTION .text 27 28%macro PAVGB_MMX 4 29 LOAD %3, %1 30 por %3, %2 31 pxor %2, %1 32 pand %2, %4 33 psrlq %2, 1 34 psubb %3, %2 35 SWAP %2, %3 36%endmacro 37 38; void ff_put/avg_pixels(uint8_t *block, const uint8_t *pixels, 39; ptrdiff_t line_size, int h) 40%macro OP_PIXELS 2 41%if %2 == mmsize/2 42%define LOAD movh 43%define SAVE movh 44%define LEN mmsize 45%else 46%define LOAD movu 47%define SAVE mova 48%define LEN %2 49%endif 50cglobal %1_pixels%2, 4,5,4 51 lea r4, [r2*3] 52%ifidn %1, avg 53%if notcpuflag(mmxext) 54 pcmpeqd m6, m6 55 paddb m6, m6 56%endif 57%endif 58.loop: 59%assign %%i 0 60%rep LEN/mmsize 61 LOAD m0, [r1 + %%i] 62 LOAD m1, [r1+r2 + %%i] 63 LOAD m2, [r1+r2*2 + %%i] 64 LOAD m3, [r1+r4 + %%i] 65%ifidn %1, avg 66%if notcpuflag(mmxext) 67 PAVGB_MMX [r0 + %%i], m0, m4, m6 68 PAVGB_MMX [r0+r2 + %%i], m1, m5, m6 69 PAVGB_MMX [r0+r2*2 + %%i], m2, m4, m6 70 PAVGB_MMX [r0+r4 + %%i], m3, m5, m6 71%else 72 pavgb m0, [r0 + %%i] 73 pavgb m1, [r0+r2 + %%i] 74 pavgb m2, [r0+r2*2 + %%i] 75 pavgb m3, [r0+r4 + %%i] 76%endif 77%endif 78 SAVE [r0 + %%i], m0 79 SAVE [r0+r2 + %%i], m1 80 SAVE [r0+r2*2 + %%i], m2 81 SAVE [r0+r4 + %%i], m3 82%assign %%i %%i+mmsize 83%endrep 84 sub r3d, 4 85 lea r1, [r1+r2*4] 86 lea r0, [r0+r2*4] 87 jne .loop 88 RET 89%endmacro 90 91INIT_MMX mmx 92OP_PIXELS put, 4 93OP_PIXELS avg, 4 94OP_PIXELS put, 8 95OP_PIXELS avg, 8 96OP_PIXELS put, 16 97OP_PIXELS avg, 16 98 99INIT_MMX mmxext 100OP_PIXELS avg, 4 101OP_PIXELS avg, 8 102OP_PIXELS avg, 16 103 104INIT_XMM sse2 105OP_PIXELS put, 16 106OP_PIXELS avg, 16 107