1;****************************************************************************** 2;* x86 optimized dithering format conversion 3;* Copyright (c) 2012 Justin Ruggles <justin.ruggles@gmail.com> 4;* 5;* This file is part of FFmpeg. 6;* 7;* FFmpeg is free software; you can redistribute it and/or 8;* modify it under the terms of the GNU Lesser General Public 9;* License as published by the Free Software Foundation; either 10;* version 2.1 of the License, or (at your option) any later version. 11;* 12;* FFmpeg is distributed in the hope that it will be useful, 13;* but WITHOUT ANY WARRANTY; without even the implied warranty of 14;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15;* Lesser General Public License for more details. 16;* 17;* You should have received a copy of the GNU Lesser General Public 18;* License along with FFmpeg; if not, write to the Free Software 19;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 20;****************************************************************************** 21 22%include "libavutil/x86/x86util.asm" 23 24SECTION_RODATA 32 25 26; 1.0f / (2.0f * INT32_MAX) 27pf_dither_scale: times 8 dd 2.32830643762e-10 28 29pf_s16_scale: times 4 dd 32753.0 30 31SECTION .text 32 33;------------------------------------------------------------------------------ 34; void ff_quantize(int16_t *dst, float *src, float *dither, int len); 35;------------------------------------------------------------------------------ 36 37INIT_XMM sse2 38cglobal quantize, 4,4,3, dst, src, dither, len 39 lea lenq, [2*lend] 40 add dstq, lenq 41 lea srcq, [srcq+2*lenq] 42 lea ditherq, [ditherq+2*lenq] 43 neg lenq 44 mova m2, [pf_s16_scale] 45.loop: 46 mulps m0, m2, [srcq+2*lenq] 47 mulps m1, m2, [srcq+2*lenq+mmsize] 48 addps m0, [ditherq+2*lenq] 49 addps m1, [ditherq+2*lenq+mmsize] 50 cvtps2dq m0, m0 51 cvtps2dq m1, m1 52 packssdw m0, m1 53 mova [dstq+lenq], m0 54 add lenq, mmsize 55 jl .loop 56 REP_RET 57 58;------------------------------------------------------------------------------ 59; void ff_dither_int_to_float_rectangular(float *dst, int *src, int len) 60;------------------------------------------------------------------------------ 61 62%macro DITHER_INT_TO_FLOAT_RECTANGULAR 0 63cglobal dither_int_to_float_rectangular, 3,3,3, dst, src, len 64 lea lenq, [4*lend] 65 add srcq, lenq 66 add dstq, lenq 67 neg lenq 68 mova m0, [pf_dither_scale] 69.loop: 70 cvtdq2ps m1, [srcq+lenq] 71 cvtdq2ps m2, [srcq+lenq+mmsize] 72 mulps m1, m1, m0 73 mulps m2, m2, m0 74 mova [dstq+lenq], m1 75 mova [dstq+lenq+mmsize], m2 76 add lenq, 2*mmsize 77 jl .loop 78 REP_RET 79%endmacro 80 81INIT_XMM sse2 82DITHER_INT_TO_FLOAT_RECTANGULAR 83INIT_YMM avx 84DITHER_INT_TO_FLOAT_RECTANGULAR 85 86;------------------------------------------------------------------------------ 87; void ff_dither_int_to_float_triangular(float *dst, int *src0, int len) 88;------------------------------------------------------------------------------ 89 90%macro DITHER_INT_TO_FLOAT_TRIANGULAR 0 91cglobal dither_int_to_float_triangular, 3,4,5, dst, src0, len, src1 92 lea lenq, [4*lend] 93 lea src1q, [src0q+2*lenq] 94 add src0q, lenq 95 add dstq, lenq 96 neg lenq 97 mova m0, [pf_dither_scale] 98.loop: 99 cvtdq2ps m1, [src0q+lenq] 100 cvtdq2ps m2, [src0q+lenq+mmsize] 101 cvtdq2ps m3, [src1q+lenq] 102 cvtdq2ps m4, [src1q+lenq+mmsize] 103 addps m1, m1, m3 104 addps m2, m2, m4 105 mulps m1, m1, m0 106 mulps m2, m2, m0 107 mova [dstq+lenq], m1 108 mova [dstq+lenq+mmsize], m2 109 add lenq, 2*mmsize 110 jl .loop 111 REP_RET 112%endmacro 113 114INIT_XMM sse2 115DITHER_INT_TO_FLOAT_TRIANGULAR 116INIT_YMM avx 117DITHER_INT_TO_FLOAT_TRIANGULAR 118