• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1;******************************************************************************
2;* Copyright (c) 2012 Michael Niedermayer
3;*
4;* This file is part of FFmpeg.
5;*
6;* FFmpeg is free software; you can redistribute it and/or
7;* modify it under the terms of the GNU Lesser General Public
8;* License as published by the Free Software Foundation; either
9;* version 2.1 of the License, or (at your option) any later version.
10;*
11;* FFmpeg is distributed in the hope that it will be useful,
12;* but WITHOUT ANY WARRANTY; without even the implied warranty of
13;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14;* Lesser General Public License for more details.
15;*
16;* You should have received a copy of the GNU Lesser General Public
17;* License along with FFmpeg; if not, write to the Free Software
18;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19;******************************************************************************
20
21%include "libavutil/x86/x86util.asm"
22
23
24SECTION_RODATA 32
25dw1: times 8  dd 1
26w1 : times 16 dw 1
27
28SECTION .text
29
30%macro MIX2_FLT 1
31cglobal mix_2_1_%1_float, 7, 7, 6, out, in1, in2, coeffp, index1, index2, len
32%ifidn %1, a
33    test in1q, mmsize-1
34        jne mix_2_1_float_u_int %+ SUFFIX
35    test in2q, mmsize-1
36        jne mix_2_1_float_u_int %+ SUFFIX
37    test outq, mmsize-1
38        jne mix_2_1_float_u_int %+ SUFFIX
39%else
40mix_2_1_float_u_int %+ SUFFIX:
41%endif
42    VBROADCASTSS m4, [coeffpq + 4*index1q]
43    VBROADCASTSS m5, [coeffpq + 4*index2q]
44    shl lend    , 2
45    add in1q    , lenq
46    add in2q    , lenq
47    add outq    , lenq
48    neg lenq
49.next:
50%ifidn %1, a
51    mulps        m0, m4, [in1q + lenq         ]
52    mulps        m1, m5, [in2q + lenq         ]
53    mulps        m2, m4, [in1q + lenq + mmsize]
54    mulps        m3, m5, [in2q + lenq + mmsize]
55%else
56    movu         m0, [in1q + lenq         ]
57    movu         m1, [in2q + lenq         ]
58    movu         m2, [in1q + lenq + mmsize]
59    movu         m3, [in2q + lenq + mmsize]
60    mulps        m0, m0, m4
61    mulps        m1, m1, m5
62    mulps        m2, m2, m4
63    mulps        m3, m3, m5
64%endif
65    addps        m0, m0, m1
66    addps        m2, m2, m3
67    mov%1  [outq + lenq         ], m0
68    mov%1  [outq + lenq + mmsize], m2
69    add        lenq, mmsize*2
70        jl .next
71    REP_RET
72%endmacro
73
74%macro MIX1_FLT 1
75cglobal mix_1_1_%1_float, 5, 5, 3, out, in, coeffp, index, len
76%ifidn %1, a
77    test inq, mmsize-1
78        jne mix_1_1_float_u_int %+ SUFFIX
79    test outq, mmsize-1
80        jne mix_1_1_float_u_int %+ SUFFIX
81%else
82mix_1_1_float_u_int %+ SUFFIX:
83%endif
84    VBROADCASTSS m2, [coeffpq + 4*indexq]
85    shl lenq    , 2
86    add inq     , lenq
87    add outq    , lenq
88    neg lenq
89.next:
90%ifidn %1, a
91    mulps        m0, m2, [inq + lenq         ]
92    mulps        m1, m2, [inq + lenq + mmsize]
93%else
94    movu         m0, [inq + lenq         ]
95    movu         m1, [inq + lenq + mmsize]
96    mulps        m0, m0, m2
97    mulps        m1, m1, m2
98%endif
99    mov%1  [outq + lenq         ], m0
100    mov%1  [outq + lenq + mmsize], m1
101    add        lenq, mmsize*2
102        jl .next
103    REP_RET
104%endmacro
105
106%macro MIX1_INT16 1
107cglobal mix_1_1_%1_int16, 5, 5, 6, out, in, coeffp, index, len
108%ifidn %1, a
109    test inq, mmsize-1
110        jne mix_1_1_int16_u_int %+ SUFFIX
111    test outq, mmsize-1
112        jne mix_1_1_int16_u_int %+ SUFFIX
113%else
114mix_1_1_int16_u_int %+ SUFFIX:
115%endif
116    movd   m4, [coeffpq + 4*indexq]
117    SPLATW m5, m4
118    psllq  m4, 32
119    psrlq  m4, 48
120    mova   m0, [w1]
121    psllw  m0, m4
122    psrlw  m0, 1
123    punpcklwd m5, m0
124    add lenq    , lenq
125    add inq     , lenq
126    add outq    , lenq
127    neg lenq
128.next:
129    mov%1        m0, [inq + lenq         ]
130    mov%1        m2, [inq + lenq + mmsize]
131    mova         m1, m0
132    mova         m3, m2
133    punpcklwd    m0, [w1]
134    punpckhwd    m1, [w1]
135    punpcklwd    m2, [w1]
136    punpckhwd    m3, [w1]
137    pmaddwd      m0, m5
138    pmaddwd      m1, m5
139    pmaddwd      m2, m5
140    pmaddwd      m3, m5
141    psrad        m0, m4
142    psrad        m1, m4
143    psrad        m2, m4
144    psrad        m3, m4
145    packssdw     m0, m1
146    packssdw     m2, m3
147    mov%1  [outq + lenq         ], m0
148    mov%1  [outq + lenq + mmsize], m2
149    add        lenq, mmsize*2
150        jl .next
151%if mmsize == 8
152    emms
153    RET
154%else
155    REP_RET
156%endif
157%endmacro
158
159%macro MIX2_INT16 1
160cglobal mix_2_1_%1_int16, 7, 7, 8, out, in1, in2, coeffp, index1, index2, len
161%ifidn %1, a
162    test in1q, mmsize-1
163        jne mix_2_1_int16_u_int %+ SUFFIX
164    test in2q, mmsize-1
165        jne mix_2_1_int16_u_int %+ SUFFIX
166    test outq, mmsize-1
167        jne mix_2_1_int16_u_int %+ SUFFIX
168%else
169mix_2_1_int16_u_int %+ SUFFIX:
170%endif
171    movd   m4, [coeffpq + 4*index1q]
172    movd   m6, [coeffpq + 4*index2q]
173    SPLATW m5, m4
174    SPLATW m6, m6
175    psllq  m4, 32
176    psrlq  m4, 48
177    mova   m7, [dw1]
178    pslld  m7, m4
179    psrld  m7, 1
180    punpcklwd m5, m6
181    add lend    , lend
182    add in1q    , lenq
183    add in2q    , lenq
184    add outq    , lenq
185    neg lenq
186.next:
187    mov%1        m0, [in1q + lenq         ]
188    mov%1        m2, [in2q + lenq         ]
189    mova         m1, m0
190    punpcklwd    m0, m2
191    punpckhwd    m1, m2
192
193    mov%1        m2, [in1q + lenq + mmsize]
194    mov%1        m6, [in2q + lenq + mmsize]
195    mova         m3, m2
196    punpcklwd    m2, m6
197    punpckhwd    m3, m6
198
199    pmaddwd      m0, m5
200    pmaddwd      m1, m5
201    pmaddwd      m2, m5
202    pmaddwd      m3, m5
203    paddd        m0, m7
204    paddd        m1, m7
205    paddd        m2, m7
206    paddd        m3, m7
207    psrad        m0, m4
208    psrad        m1, m4
209    psrad        m2, m4
210    psrad        m3, m4
211    packssdw     m0, m1
212    packssdw     m2, m3
213    mov%1  [outq + lenq         ], m0
214    mov%1  [outq + lenq + mmsize], m2
215    add        lenq, mmsize*2
216        jl .next
217%if mmsize == 8
218    emms
219    RET
220%else
221    REP_RET
222%endif
223%endmacro
224
225
226INIT_MMX mmx
227MIX1_INT16 u
228MIX1_INT16 a
229MIX2_INT16 u
230MIX2_INT16 a
231
232INIT_XMM sse
233MIX2_FLT u
234MIX2_FLT a
235MIX1_FLT u
236MIX1_FLT a
237
238INIT_XMM sse2
239MIX1_INT16 u
240MIX1_INT16 a
241MIX2_INT16 u
242MIX2_INT16 a
243
244%if HAVE_AVX_EXTERNAL
245INIT_YMM avx
246MIX2_FLT u
247MIX2_FLT a
248MIX1_FLT u
249MIX1_FLT a
250%endif
251