• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1;******************************************************************************
2;* SIMD-optimized UTVideo functions
3;* Copyright (c) 2017 Paul B Mahol
4;* Copyright (c) 2017 Jokyo Images
5;*
6;* This file is part of FFmpeg.
7;*
8;* FFmpeg is free software; you can redistribute it and/or
9;* modify it under the terms of the GNU Lesser General Public
10;* License as published by the Free Software Foundation; either
11;* version 2.1 of the License, or (at your option) any later version.
12;*
13;* FFmpeg is distributed in the hope that it will be useful,
14;* but WITHOUT ANY WARRANTY; without even the implied warranty of
15;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16;* Lesser General Public License for more details.
17;*
18;* You should have received a copy of the GNU Lesser General Public
19;* License along with FFmpeg; if not, write to the Free Software
20;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21;******************************************************************************
22
23%include "libavutil/x86/x86util.asm"
24
25SECTION_RODATA
26
27cextern pb_80
28cextern pw_512
29cextern pw_1023
30
31SECTION .text
32
33;-------------------------------------------------------------------------------------------
34; void restore_rgb_planes(uint8_t *src_r, uint8_t *src_g, uint8_t *src_b,
35;                         ptrdiff_t linesize_r, ptrdiff_t linesize_g, ptrdiff_t linesize_b,
36;                         int width, int height)
37;-------------------------------------------------------------------------------------------
38%macro RESTORE_RGB_PLANES 0
39cglobal restore_rgb_planes, 7 + ARCH_X86_64, 7 + ARCH_X86_64 * 2, 4, src_r, src_g, src_b, linesize_r, linesize_g, linesize_b, w, h, x
40    movsxdifnidn wq, wd
41    add      src_rq, wq
42    add      src_gq, wq
43    add      src_bq, wq
44    neg          wq
45%if ARCH_X86_64 == 0
46    mov          wm, wq
47DEFINE_ARGS src_r, src_g, src_b, linesize_r, linesize_g, linesize_b, x
48%define wq r6m
49%define hd r7mp
50%endif
51    mova         m3, [pb_80]
52.nextrow:
53    mov          xq, wq
54
55    .loop:
56        mova           m0, [src_rq + xq]
57        mova           m1, [src_gq + xq]
58        mova           m2, [src_bq + xq]
59        psubb          m1, m3
60        paddb          m0, m1
61        paddb          m2, m1
62        mova  [src_rq+xq], m0
63        mova  [src_bq+xq], m2
64        add            xq, mmsize
65    jl .loop
66
67    add        src_rq, linesize_rq
68    add        src_gq, linesize_gq
69    add        src_bq, linesize_bq
70    sub        hd, 1
71    jg .nextrow
72    REP_RET
73%endmacro
74
75INIT_XMM sse2
76RESTORE_RGB_PLANES
77
78%if HAVE_AVX2_EXTERNAL
79INIT_YMM avx2
80RESTORE_RGB_PLANES
81%endif
82
83;-------------------------------------------------------------------------------------------
84; void restore_rgb_planes10(uint16_t *src_r, uint16_t *src_g, uint16_t *src_b,
85;                         ptrdiff_t linesize_r, ptrdiff_t linesize_g, ptrdiff_t linesize_b,
86;                         int width, int height)
87;-------------------------------------------------------------------------------------------
88%macro RESTORE_RGB_PLANES10 0
89cglobal restore_rgb_planes10, 7 + ARCH_X86_64, 7 + ARCH_X86_64 * 2, 5, src_r, src_g, src_b, linesize_r, linesize_g, linesize_b, w, h, x
90    shl          wd, 1
91    shl linesize_rq, 1
92    shl linesize_gq, 1
93    shl linesize_bq, 1
94    add      src_rq, wq
95    add      src_gq, wq
96    add      src_bq, wq
97    mova         m3, [pw_512]
98    mova         m4, [pw_1023]
99    neg          wq
100%if ARCH_X86_64 == 0
101    mov          wm, wq
102DEFINE_ARGS src_r, src_g, src_b, linesize_r, linesize_g, linesize_b, x
103%define wq r6m
104%define hd r7mp
105%endif
106.nextrow:
107    mov          xq, wq
108
109    .loop:
110        mova           m0, [src_rq + xq]
111        mova           m1, [src_gq + xq]
112        mova           m2, [src_bq + xq]
113        psubw          m1, m3
114        paddw          m0, m1
115        paddw          m2, m1
116        pand           m0, m4
117        pand           m2, m4
118        mova  [src_rq+xq], m0
119        mova  [src_bq+xq], m2
120        add            xq, mmsize
121    jl .loop
122
123    add        src_rq, linesize_rq
124    add        src_gq, linesize_gq
125    add        src_bq, linesize_bq
126    sub        hd, 1
127    jg .nextrow
128    REP_RET
129%endmacro
130
131INIT_XMM sse2
132RESTORE_RGB_PLANES10
133
134%if HAVE_AVX2_EXTERNAL
135INIT_YMM avx2
136RESTORE_RGB_PLANES10
137%endif
138