1;****************************************************************************** 2;* SIMD-optimized UTVideo functions 3;* Copyright (c) 2017 Paul B Mahol 4;* Copyright (c) 2017 Jokyo Images 5;* 6;* This file is part of FFmpeg. 7;* 8;* FFmpeg is free software; you can redistribute it and/or 9;* modify it under the terms of the GNU Lesser General Public 10;* License as published by the Free Software Foundation; either 11;* version 2.1 of the License, or (at your option) any later version. 12;* 13;* FFmpeg is distributed in the hope that it will be useful, 14;* but WITHOUT ANY WARRANTY; without even the implied warranty of 15;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 16;* Lesser General Public License for more details. 17;* 18;* You should have received a copy of the GNU Lesser General Public 19;* License along with FFmpeg; if not, write to the Free Software 20;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 21;****************************************************************************** 22 23%include "libavutil/x86/x86util.asm" 24 25SECTION_RODATA 26 27cextern pb_80 28cextern pw_512 29cextern pw_1023 30 31SECTION .text 32 33;------------------------------------------------------------------------------------------- 34; void restore_rgb_planes(uint8_t *src_r, uint8_t *src_g, uint8_t *src_b, 35; ptrdiff_t linesize_r, ptrdiff_t linesize_g, ptrdiff_t linesize_b, 36; int width, int height) 37;------------------------------------------------------------------------------------------- 38%macro RESTORE_RGB_PLANES 0 39cglobal restore_rgb_planes, 7 + ARCH_X86_64, 7 + ARCH_X86_64 * 2, 4, src_r, src_g, src_b, linesize_r, linesize_g, linesize_b, w, h, x 40 movsxdifnidn wq, wd 41 add src_rq, wq 42 add src_gq, wq 43 add src_bq, wq 44 neg wq 45%if ARCH_X86_64 == 0 46 mov wm, wq 47DEFINE_ARGS src_r, src_g, src_b, linesize_r, linesize_g, linesize_b, x 48%define wq r6m 49%define hd r7mp 50%endif 51 mova m3, [pb_80] 52.nextrow: 53 mov xq, wq 54 55 .loop: 56 mova m0, [src_rq + xq] 57 mova m1, [src_gq + xq] 58 mova m2, [src_bq + xq] 59 psubb m1, m3 60 paddb m0, m1 61 paddb m2, m1 62 mova [src_rq+xq], m0 63 mova [src_bq+xq], m2 64 add xq, mmsize 65 jl .loop 66 67 add src_rq, linesize_rq 68 add src_gq, linesize_gq 69 add src_bq, linesize_bq 70 sub hd, 1 71 jg .nextrow 72 REP_RET 73%endmacro 74 75INIT_XMM sse2 76RESTORE_RGB_PLANES 77 78%if HAVE_AVX2_EXTERNAL 79INIT_YMM avx2 80RESTORE_RGB_PLANES 81%endif 82 83;------------------------------------------------------------------------------------------- 84; void restore_rgb_planes10(uint16_t *src_r, uint16_t *src_g, uint16_t *src_b, 85; ptrdiff_t linesize_r, ptrdiff_t linesize_g, ptrdiff_t linesize_b, 86; int width, int height) 87;------------------------------------------------------------------------------------------- 88%macro RESTORE_RGB_PLANES10 0 89cglobal restore_rgb_planes10, 7 + ARCH_X86_64, 7 + ARCH_X86_64 * 2, 5, src_r, src_g, src_b, linesize_r, linesize_g, linesize_b, w, h, x 90 shl wd, 1 91 shl linesize_rq, 1 92 shl linesize_gq, 1 93 shl linesize_bq, 1 94 add src_rq, wq 95 add src_gq, wq 96 add src_bq, wq 97 mova m3, [pw_512] 98 mova m4, [pw_1023] 99 neg wq 100%if ARCH_X86_64 == 0 101 mov wm, wq 102DEFINE_ARGS src_r, src_g, src_b, linesize_r, linesize_g, linesize_b, x 103%define wq r6m 104%define hd r7mp 105%endif 106.nextrow: 107 mov xq, wq 108 109 .loop: 110 mova m0, [src_rq + xq] 111 mova m1, [src_gq + xq] 112 mova m2, [src_bq + xq] 113 psubw m1, m3 114 paddw m0, m1 115 paddw m2, m1 116 pand m0, m4 117 pand m2, m4 118 mova [src_rq+xq], m0 119 mova [src_bq+xq], m2 120 add xq, mmsize 121 jl .loop 122 123 add src_rq, linesize_rq 124 add src_gq, linesize_gq 125 add src_bq, linesize_bq 126 sub hd, 1 127 jg .nextrow 128 REP_RET 129%endmacro 130 131INIT_XMM sse2 132RESTORE_RGB_PLANES10 133 134%if HAVE_AVX2_EXTERNAL 135INIT_YMM avx2 136RESTORE_RGB_PLANES10 137%endif 138