1;************************************************************************ 2;* SIMD-optimized HuffYUV encoding functions 3;* Copyright (c) 2000, 2001 Fabrice Bellard 4;* Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at> 5;* 6;* MMX optimization by Nick Kurshev <nickols_k@mail.ru> 7;* Conversion to NASM format by Tiancheng "Timothy" Gu <timothygu99@gmail.com> 8;* 9;* This file is part of FFmpeg. 10;* 11;* FFmpeg is free software; you can redistribute it and/or 12;* modify it under the terms of the GNU Lesser General Public 13;* License as published by the Free Software Foundation; either 14;* version 2.1 of the License, or (at your option) any later version. 15;* 16;* FFmpeg is distributed in the hope that it will be useful, 17;* but WITHOUT ANY WARRANTY; without even the implied warranty of 18;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 19;* Lesser General Public License for more details. 20;* 21;* You should have received a copy of the GNU Lesser General Public 22;* License along with FFmpeg; if not, write to the Free Software 23;* 51, Inc., Foundation Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 24;****************************************************************************** 25 26%include "libavutil/x86/x86util.asm" 27 28SECTION .text 29 30%include "libavcodec/x86/huffyuvdsp_template.asm" 31 32;------------------------------------------------------------------------------ 33; void ff_diff_int16(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, 34; unsigned mask, int w); 35;------------------------------------------------------------------------------ 36 37%macro DIFF_INT16 0 38cglobal diff_int16, 5,5,5, dst, src1, src2, mask, w, tmp 39%if mmsize > 8 40 test src1q, mmsize-1 41 jnz .unaligned 42 test src2q, mmsize-1 43 jnz .unaligned 44 test dstq, mmsize-1 45 jnz .unaligned 46%endif 47 INT16_LOOP a, sub 48%if mmsize > 8 49.unaligned: 50 INT16_LOOP u, sub 51%endif 52%endmacro 53 54%if ARCH_X86_32 55INIT_MMX mmx 56DIFF_INT16 57%endif 58 59INIT_XMM sse2 60DIFF_INT16 61 62%if HAVE_AVX2_EXTERNAL 63INIT_YMM avx2 64DIFF_INT16 65%endif 66 67INIT_MMX mmxext 68cglobal sub_hfyu_median_pred_int16, 7,7,0, dst, src1, src2, mask, w, left, left_top 69 add wd, wd 70 movd mm7, maskd 71 SPLATW mm7, mm7 72 movq mm0, [src1q] 73 movq mm2, [src2q] 74 psllq mm0, 16 75 psllq mm2, 16 76 movd mm6, [left_topq] 77 por mm0, mm6 78 movd mm6, [leftq] 79 por mm2, mm6 80 xor maskq, maskq 81.loop: 82 movq mm1, [src1q + maskq] 83 movq mm3, [src2q + maskq] 84 movq mm4, mm2 85 psubw mm2, mm0 86 paddw mm2, mm1 87 pand mm2, mm7 88 movq mm5, mm4 89 pmaxsw mm4, mm1 90 pminsw mm1, mm5 91 pminsw mm4, mm2 92 pmaxsw mm4, mm1 93 psubw mm3, mm4 94 pand mm3, mm7 95 movq [dstq + maskq], mm3 96 add maskq, 8 97 movq mm0, [src1q + maskq - 2] 98 movq mm2, [src2q + maskq - 2] 99 cmp maskq, wq 100 jb .loop 101 movzx maskd, word [src1q + wq - 2] 102 mov [left_topq], maskd 103 movzx maskd, word [src2q + wq - 2] 104 mov [leftq], maskd 105 RET 106