1 /*
2 * This file is part of FFmpeg.
3 *
4 * FFmpeg is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * FFmpeg is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License along
15 * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
16 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
17 */
18
19 #include "libavcodec/aacpsdsp.h"
20 #include "libavutil/intfloat.h"
21
22 #include "checkasm.h"
23
24 #define N 32
25 #define STRIDE 128
26 #define BUF_SIZE (N * STRIDE)
27
28 #define randomize(buf, len) do { \
29 int i; \
30 for (i = 0; i < len; i++) { \
31 const INTFLOAT f = (INTFLOAT)rnd() / UINT_MAX; \
32 (buf)[i] = f; \
33 } \
34 } while (0)
35
36 #define EPS 0.005
37
clear_less_significant_bits(INTFLOAT * buf,int len,int bits)38 static void clear_less_significant_bits(INTFLOAT *buf, int len, int bits)
39 {
40 int i;
41 for (i = 0; i < len; i++) {
42 union av_intfloat32 u = { .f = buf[i] };
43 u.i &= (0xffffffff << bits);
44 buf[i] = u.f;
45 }
46 }
47
test_add_squares(void)48 static void test_add_squares(void)
49 {
50 LOCAL_ALIGNED_16(INTFLOAT, dst0, [BUF_SIZE]);
51 LOCAL_ALIGNED_16(INTFLOAT, dst1, [BUF_SIZE]);
52 LOCAL_ALIGNED_16(INTFLOAT, src, [BUF_SIZE], [2]);
53
54 declare_func(void, INTFLOAT *dst,
55 const INTFLOAT (*src)[2], int n);
56
57 randomize((INTFLOAT *)src, BUF_SIZE * 2);
58 randomize(dst0, BUF_SIZE);
59 memcpy(dst1, dst0, BUF_SIZE * sizeof(INTFLOAT));
60 call_ref(dst0, src, BUF_SIZE);
61 call_new(dst1, src, BUF_SIZE);
62 if (!float_near_abs_eps_array(dst0, dst1, EPS, BUF_SIZE))
63 fail();
64 bench_new(dst1, src, BUF_SIZE);
65 }
66
test_mul_pair_single(void)67 static void test_mul_pair_single(void)
68 {
69 LOCAL_ALIGNED_16(INTFLOAT, dst0, [BUF_SIZE], [2]);
70 LOCAL_ALIGNED_16(INTFLOAT, dst1, [BUF_SIZE], [2]);
71 LOCAL_ALIGNED_16(INTFLOAT, src0, [BUF_SIZE], [2]);
72 LOCAL_ALIGNED_16(INTFLOAT, src1, [BUF_SIZE]);
73
74 declare_func(void, INTFLOAT (*dst)[2],
75 INTFLOAT (*src0)[2], INTFLOAT *src1, int n);
76
77 randomize((INTFLOAT *)src0, BUF_SIZE * 2);
78 randomize(src1, BUF_SIZE);
79 call_ref(dst0, src0, src1, BUF_SIZE);
80 call_new(dst1, src0, src1, BUF_SIZE);
81 if (!float_near_abs_eps_array((float *)dst0, (float *)dst1, EPS, BUF_SIZE * 2))
82 fail();
83 bench_new(dst1, src0, src1, BUF_SIZE);
84 }
85
test_hybrid_analysis(void)86 static void test_hybrid_analysis(void)
87 {
88 LOCAL_ALIGNED_16(INTFLOAT, dst0, [BUF_SIZE], [2]);
89 LOCAL_ALIGNED_16(INTFLOAT, dst1, [BUF_SIZE], [2]);
90 LOCAL_ALIGNED_16(INTFLOAT, in, [13], [2]);
91 LOCAL_ALIGNED_16(INTFLOAT, filter, [N], [8][2]);
92
93 declare_func(void, INTFLOAT (*out)[2], INTFLOAT (*in)[2],
94 const INTFLOAT (*filter)[8][2],
95 ptrdiff_t stride, int n);
96
97 randomize((INTFLOAT *)in, 13 * 2);
98 randomize((INTFLOAT *)filter, N * 8 * 2);
99
100 randomize((INTFLOAT *)dst0, BUF_SIZE * 2);
101 memcpy(dst1, dst0, BUF_SIZE * 2 * sizeof(INTFLOAT));
102
103 call_ref(dst0, in, filter, STRIDE, N);
104 call_new(dst1, in, filter, STRIDE, N);
105
106 if (!float_near_abs_eps_array((float *)dst0, (float *)dst1, EPS, BUF_SIZE * 2))
107 fail();
108 bench_new(dst1, in, filter, STRIDE, N);
109 }
110
test_hybrid_analysis_ileave(void)111 static void test_hybrid_analysis_ileave(void)
112 {
113 LOCAL_ALIGNED_16(INTFLOAT, in, [2], [38][64]);
114 LOCAL_ALIGNED_16(INTFLOAT, out0, [91], [32][2]);
115 LOCAL_ALIGNED_16(INTFLOAT, out1, [91], [32][2]);
116
117 declare_func(void, INTFLOAT (*out)[32][2], INTFLOAT L[2][38][64],
118 int i, int len);
119
120 randomize((INTFLOAT *)out0, 91 * 32 * 2);
121 randomize((INTFLOAT *)in, 2 * 38 * 64);
122 memcpy(out1, out0, 91 * 32 * 2 * sizeof(INTFLOAT));
123
124 /* len is hardcoded to 32 as that's the only value used in
125 libavcodec. asm functions are likely to be optimized
126 hardcoding this value in their loops and could fail with
127 anything else.
128 i is hardcoded to the two values currently used by the
129 aac decoder because the arm neon implementation is
130 micro-optimized for them and will fail for almost every
131 other value. */
132 call_ref(out0, in, 3, 32);
133 call_new(out1, in, 3, 32);
134
135 /* the function just moves data around, so memcmp is enough */
136 if (memcmp(out0, out1, 91 * 32 * 2 * sizeof(INTFLOAT)))
137 fail();
138
139 call_ref(out0, in, 5, 32);
140 call_new(out1, in, 5, 32);
141
142 if (memcmp(out0, out1, 91 * 32 * 2 * sizeof(INTFLOAT)))
143 fail();
144
145 bench_new(out1, in, 3, 32);
146 }
147
test_hybrid_synthesis_deint(void)148 static void test_hybrid_synthesis_deint(void)
149 {
150 LOCAL_ALIGNED_16(INTFLOAT, out0, [2], [38][64]);
151 LOCAL_ALIGNED_16(INTFLOAT, out1, [2], [38][64]);
152 LOCAL_ALIGNED_16(INTFLOAT, in, [91], [32][2]);
153
154 declare_func(void, INTFLOAT out[2][38][64], INTFLOAT (*in)[32][2],
155 int i, int len);
156
157 randomize((INTFLOAT *)in, 91 * 32 * 2);
158 randomize((INTFLOAT *)out0, 2 * 38 * 64);
159 memcpy(out1, out0, 2 * 38 * 64 * sizeof(INTFLOAT));
160
161 /* len is hardcoded to 32 as that's the only value used in
162 libavcodec. asm functions are likely to be optimized
163 hardcoding this value in their loops and could fail with
164 anything else.
165 i is hardcoded to the two values currently used by the
166 aac decoder because the arm neon implementation is
167 micro-optimized for them and will fail for almost every
168 other value. */
169 call_ref(out0, in, 3, 32);
170 call_new(out1, in, 3, 32);
171
172 /* the function just moves data around, so memcmp is enough */
173 if (memcmp(out0, out1, 2 * 38 * 64 * sizeof(INTFLOAT)))
174 fail();
175
176 call_ref(out0, in, 5, 32);
177 call_new(out1, in, 5, 32);
178
179 if (memcmp(out0, out1, 2 * 38 * 64 * sizeof(INTFLOAT)))
180 fail();
181
182 bench_new(out1, in, 3, 32);
183 }
184
test_stereo_interpolate(PSDSPContext * psdsp)185 static void test_stereo_interpolate(PSDSPContext *psdsp)
186 {
187 int i;
188 LOCAL_ALIGNED_16(INTFLOAT, l, [BUF_SIZE], [2]);
189 LOCAL_ALIGNED_16(INTFLOAT, r, [BUF_SIZE], [2]);
190 LOCAL_ALIGNED_16(INTFLOAT, l0, [BUF_SIZE], [2]);
191 LOCAL_ALIGNED_16(INTFLOAT, r0, [BUF_SIZE], [2]);
192 LOCAL_ALIGNED_16(INTFLOAT, l1, [BUF_SIZE], [2]);
193 LOCAL_ALIGNED_16(INTFLOAT, r1, [BUF_SIZE], [2]);
194 LOCAL_ALIGNED_16(INTFLOAT, h, [2], [4]);
195 LOCAL_ALIGNED_16(INTFLOAT, h_step, [2], [4]);
196
197 declare_func(void, INTFLOAT (*l)[2], INTFLOAT (*r)[2],
198 INTFLOAT h[2][4], INTFLOAT h_step[2][4], int len);
199
200 randomize((INTFLOAT *)l, BUF_SIZE * 2);
201 randomize((INTFLOAT *)r, BUF_SIZE * 2);
202
203 for (i = 0; i < 2; i++) {
204 if (check_func(psdsp->stereo_interpolate[i], "ps_stereo_interpolate%s", i ? "_ipdopd" : "")) {
205 memcpy(l0, l, BUF_SIZE * 2 * sizeof(INTFLOAT));
206 memcpy(l1, l, BUF_SIZE * 2 * sizeof(INTFLOAT));
207 memcpy(r0, r, BUF_SIZE * 2 * sizeof(INTFLOAT));
208 memcpy(r1, r, BUF_SIZE * 2 * sizeof(INTFLOAT));
209
210 randomize((INTFLOAT *)h, 2 * 4);
211 randomize((INTFLOAT *)h_step, 2 * 4);
212 // Clear the least significant 14 bits of h_step, to avoid
213 // divergence when accumulating h_step BUF_SIZE times into
214 // a float variable which may or may not have extra intermediate
215 // precision. Therefore clear roughly log2(BUF_SIZE) less
216 // significant bits, to get the same result regardless of any
217 // extra precision in the accumulator.
218 clear_less_significant_bits((INTFLOAT *)h_step, 2 * 4, 14);
219
220 call_ref(l0, r0, h, h_step, BUF_SIZE);
221 call_new(l1, r1, h, h_step, BUF_SIZE);
222 if (!float_near_abs_eps_array((float *)l0, (float *)l1, EPS, BUF_SIZE * 2) ||
223 !float_near_abs_eps_array((float *)r0, (float *)r1, EPS, BUF_SIZE * 2))
224 fail();
225
226 memcpy(l1, l, BUF_SIZE * 2 * sizeof(INTFLOAT));
227 memcpy(r1, r, BUF_SIZE * 2 * sizeof(INTFLOAT));
228 bench_new(l1, r1, h, h_step, BUF_SIZE);
229 }
230 }
231 }
232
checkasm_check_aacpsdsp(void)233 void checkasm_check_aacpsdsp(void)
234 {
235 PSDSPContext psdsp;
236
237 ff_psdsp_init(&psdsp);
238
239 if (check_func(psdsp.add_squares, "ps_add_squares"))
240 test_add_squares();
241 report("add_squares");
242
243 if (check_func(psdsp.mul_pair_single, "ps_mul_pair_single"))
244 test_mul_pair_single();
245 report("mul_pair_single");
246
247 if (check_func(psdsp.hybrid_analysis, "ps_hybrid_analysis"))
248 test_hybrid_analysis();
249 report("hybrid_analysis");
250
251 if (check_func(psdsp.hybrid_analysis_ileave, "ps_hybrid_analysis_ileave"))
252 test_hybrid_analysis_ileave();
253 report("hybrid_analysis_ileave");
254
255 if (check_func(psdsp.hybrid_synthesis_deint, "ps_hybrid_synthesis_deint"))
256 test_hybrid_synthesis_deint();
257 report("hybrid_synthesis_deint");
258
259 test_stereo_interpolate(&psdsp);
260 report("stereo_interpolate");
261 }
262