1;****************************************************************************** 2;* SIMD-optimized clear block functions 3;* Copyright (c) 2002 Michael Niedermayer 4;* Copyright (c) 2008 Loren Merritt 5;* Copyright (c) 2009 Fiona Glaser 6;* 7;* AVX version by Jokyo Images 8;* 9;* This file is part of FFmpeg. 10;* 11;* FFmpeg is free software; you can redistribute it and/or 12;* modify it under the terms of the GNU Lesser General Public 13;* License as published by the Free Software Foundation; either 14;* version 2.1 of the License, or (at your option) any later version. 15;* 16;* FFmpeg is distributed in the hope that it will be useful, 17;* but WITHOUT ANY WARRANTY; without even the implied warranty of 18;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 19;* Lesser General Public License for more details. 20;* 21;* You should have received a copy of the GNU Lesser General Public 22;* License along with FFmpeg; if not, write to the Free Software 23;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 24;****************************************************************************** 25 26%include "libavutil/x86/x86util.asm" 27 28SECTION .text 29 30;---------------------------------------- 31; void ff_clear_block(int16_t *blocks); 32;---------------------------------------- 33; %1 = number of xmm registers used 34; %2 = number of inline store loops 35%macro CLEAR_BLOCK 2 36cglobal clear_block, 1, 1, %1, blocks 37 ZERO m0, m0, m0 38%assign %%i 0 39%rep %2 40 mova [blocksq+mmsize*(0+%%i)], m0 41 mova [blocksq+mmsize*(1+%%i)], m0 42 mova [blocksq+mmsize*(2+%%i)], m0 43 mova [blocksq+mmsize*(3+%%i)], m0 44%assign %%i %%i+4 45%endrep 46 RET 47%endmacro 48 49INIT_MMX mmx 50%define ZERO pxor 51CLEAR_BLOCK 0, 4 52INIT_XMM sse 53%define ZERO xorps 54CLEAR_BLOCK 1, 2 55INIT_YMM avx 56CLEAR_BLOCK 1, 1 57 58;----------------------------------------- 59; void ff_clear_blocks(int16_t *blocks); 60;----------------------------------------- 61; %1 = number of xmm registers used 62%macro CLEAR_BLOCKS 1 63cglobal clear_blocks, 1, 2, %1, blocks, len 64 add blocksq, 768 65 mov lenq, -768 66 ZERO m0, m0, m0 67.loop: 68 mova [blocksq+lenq+mmsize*0], m0 69 mova [blocksq+lenq+mmsize*1], m0 70 mova [blocksq+lenq+mmsize*2], m0 71 mova [blocksq+lenq+mmsize*3], m0 72 mova [blocksq+lenq+mmsize*4], m0 73 mova [blocksq+lenq+mmsize*5], m0 74 mova [blocksq+lenq+mmsize*6], m0 75 mova [blocksq+lenq+mmsize*7], m0 76 add lenq, mmsize*8 77 js .loop 78 RET 79%endmacro 80 81INIT_MMX mmx 82%define ZERO pxor 83CLEAR_BLOCKS 0 84INIT_XMM sse 85%define ZERO xorps 86CLEAR_BLOCKS 1 87INIT_YMM avx 88CLEAR_BLOCKS 1 89