1;****************************************************************************** 2;* SIMD-optimized HuffYUV functions 3;* Copyright (c) 2008 Loren Merritt 4;* Copyright (c) 2014 Christophe Gisquet 5;* 6;* This file is part of FFmpeg. 7;* 8;* FFmpeg is free software; you can redistribute it and/or 9;* modify it under the terms of the GNU Lesser General Public 10;* License as published by the Free Software Foundation; either 11;* version 2.1 of the License, or (at your option) any later version. 12;* 13;* FFmpeg is distributed in the hope that it will be useful, 14;* but WITHOUT ANY WARRANTY; without even the implied warranty of 15;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 16;* Lesser General Public License for more details. 17;* 18;* You should have received a copy of the GNU Lesser General Public 19;* License along with FFmpeg; if not, write to the Free Software 20;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 21;****************************************************************************** 22 23%include "libavutil/x86/x86util.asm" 24 25SECTION .text 26 27%include "libavcodec/x86/huffyuvdsp_template.asm" 28 29;------------------------------------------------------------------------------ 30; void (*add_int16)(uint16_t *dst, const uint16_t *src, unsigned mask, int w); 31;------------------------------------------------------------------------------ 32 33%macro ADD_INT16 0 34cglobal add_int16, 4,4,5, dst, src, mask, w, tmp 35%if mmsize > 8 36 test srcq, mmsize-1 37 jnz .unaligned 38 test dstq, mmsize-1 39 jnz .unaligned 40%endif 41 INT16_LOOP a, add 42%if mmsize > 8 43.unaligned: 44 INT16_LOOP u, add 45%endif 46%endmacro 47 48%if ARCH_X86_32 49INIT_MMX mmx 50ADD_INT16 51%endif 52 53INIT_XMM sse2 54ADD_INT16 55 56%if HAVE_AVX2_EXTERNAL 57INIT_YMM avx2 58ADD_INT16 59%endif 60 61; void add_hfyu_left_pred_bgr32(uint8_t *dst, const uint8_t *src, 62; intptr_t w, uint8_t *left) 63%macro LEFT_BGR32 0 64cglobal add_hfyu_left_pred_bgr32, 4,4,3, dst, src, w, left 65 shl wq, 2 66 movd m0, [leftq] 67 lea dstq, [dstq + wq] 68 lea srcq, [srcq + wq] 69 LSHIFT m0, mmsize-4 70 neg wq 71.loop: 72 movu m1, [srcq+wq] 73 mova m2, m1 74%if mmsize == 8 75 punpckhdq m0, m0 76%endif 77 LSHIFT m1, 4 78 paddb m1, m2 79%if mmsize == 16 80 pshufd m0, m0, q3333 81 mova m2, m1 82 LSHIFT m1, 8 83 paddb m1, m2 84%endif 85 paddb m0, m1 86 movu [dstq+wq], m0 87 add wq, mmsize 88 jl .loop 89 movd m0, [dstq-4] 90 movd [leftq], m0 91 REP_RET 92%endmacro 93 94%if ARCH_X86_32 95INIT_MMX mmx 96LEFT_BGR32 97%endif 98INIT_XMM sse2 99LEFT_BGR32 100 101; void add_hfyu_median_prediction_mmxext(uint8_t *dst, const uint8_t *top, const uint8_t *diff, int mask, int w, int *left, int *left_top) 102INIT_MMX mmxext 103cglobal add_hfyu_median_pred_int16, 7,7,0, dst, top, diff, mask, w, left, left_top 104 add wd, wd 105 movd mm6, maskd 106 SPLATW mm6, mm6 107 movq mm0, [topq] 108 movq mm2, mm0 109 movd mm4, [left_topq] 110 psllq mm2, 16 111 movq mm1, mm0 112 por mm4, mm2 113 movd mm3, [leftq] 114 psubw mm0, mm4 ; t-tl 115 add dstq, wq 116 add topq, wq 117 add diffq, wq 118 neg wq 119 jmp .skip 120.loop: 121 movq mm4, [topq+wq] 122 movq mm0, mm4 123 psllq mm4, 16 124 por mm4, mm1 125 movq mm1, mm0 ; t 126 psubw mm0, mm4 ; t-tl 127.skip: 128 movq mm2, [diffq+wq] 129%assign i 0 130%rep 4 131 movq mm4, mm0 132 paddw mm4, mm3 ; t-tl+l 133 pand mm4, mm6 134 movq mm5, mm3 135 pmaxsw mm3, mm1 136 pminsw mm5, mm1 137 pminsw mm3, mm4 138 pmaxsw mm3, mm5 ; median 139 paddw mm3, mm2 ; +residual 140 pand mm3, mm6 141%if i==0 142 movq mm7, mm3 143 psllq mm7, 48 144%else 145 movq mm4, mm3 146 psrlq mm7, 16 147 psllq mm4, 48 148 por mm7, mm4 149%endif 150%if i<3 151 psrlq mm0, 16 152 psrlq mm1, 16 153 psrlq mm2, 16 154%endif 155%assign i i+1 156%endrep 157 movq [dstq+wq], mm7 158 add wq, 8 159 jl .loop 160 movzx r2d, word [dstq-2] 161 mov [leftq], r2d 162 movzx r2d, word [topq-2] 163 mov [left_topq], r2d 164 RET 165