1 /*
2 * This file is part of FFmpeg.
3 *
4 * FFmpeg is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * FFmpeg is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License along
15 * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
16 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
17 */
18
19 #include "libavcodec/aacpsdsp.h"
20 #include "libavutil/intfloat.h"
21 #include "libavutil/mem_internal.h"
22
23 #include "checkasm.h"
24
25 #define N 32
26 #define STRIDE 128
27 #define BUF_SIZE (N * STRIDE)
28
29 #define randomize(buf, len) do { \
30 int i; \
31 for (i = 0; i < len; i++) { \
32 const INTFLOAT f = (INTFLOAT)rnd() / UINT_MAX; \
33 (buf)[i] = f; \
34 } \
35 } while (0)
36
37 #define EPS 0.005
38
clear_less_significant_bits(INTFLOAT * buf,int len,int bits)39 static void clear_less_significant_bits(INTFLOAT *buf, int len, int bits)
40 {
41 int i;
42 for (i = 0; i < len; i++) {
43 union av_intfloat32 u = { .f = buf[i] };
44 u.i &= (0xffffffff << bits);
45 buf[i] = u.f;
46 }
47 }
48
test_add_squares(void)49 static void test_add_squares(void)
50 {
51 LOCAL_ALIGNED_16(INTFLOAT, dst0, [BUF_SIZE]);
52 LOCAL_ALIGNED_16(INTFLOAT, dst1, [BUF_SIZE]);
53 LOCAL_ALIGNED_16(INTFLOAT, src, [BUF_SIZE], [2]);
54
55 declare_func(void, INTFLOAT *dst,
56 const INTFLOAT (*src)[2], int n);
57
58 randomize((INTFLOAT *)src, BUF_SIZE * 2);
59 randomize(dst0, BUF_SIZE);
60 memcpy(dst1, dst0, BUF_SIZE * sizeof(INTFLOAT));
61 call_ref(dst0, src, BUF_SIZE);
62 call_new(dst1, src, BUF_SIZE);
63 if (!float_near_abs_eps_array(dst0, dst1, EPS, BUF_SIZE))
64 fail();
65 bench_new(dst1, src, BUF_SIZE);
66 }
67
test_mul_pair_single(void)68 static void test_mul_pair_single(void)
69 {
70 LOCAL_ALIGNED_16(INTFLOAT, dst0, [BUF_SIZE], [2]);
71 LOCAL_ALIGNED_16(INTFLOAT, dst1, [BUF_SIZE], [2]);
72 LOCAL_ALIGNED_16(INTFLOAT, src0, [BUF_SIZE], [2]);
73 LOCAL_ALIGNED_16(INTFLOAT, src1, [BUF_SIZE]);
74
75 declare_func(void, INTFLOAT (*dst)[2],
76 INTFLOAT (*src0)[2], INTFLOAT *src1, int n);
77
78 randomize((INTFLOAT *)src0, BUF_SIZE * 2);
79 randomize(src1, BUF_SIZE);
80 call_ref(dst0, src0, src1, BUF_SIZE);
81 call_new(dst1, src0, src1, BUF_SIZE);
82 if (!float_near_abs_eps_array((float *)dst0, (float *)dst1, EPS, BUF_SIZE * 2))
83 fail();
84 bench_new(dst1, src0, src1, BUF_SIZE);
85 }
86
test_hybrid_analysis(void)87 static void test_hybrid_analysis(void)
88 {
89 LOCAL_ALIGNED_16(INTFLOAT, dst0, [BUF_SIZE], [2]);
90 LOCAL_ALIGNED_16(INTFLOAT, dst1, [BUF_SIZE], [2]);
91 LOCAL_ALIGNED_16(INTFLOAT, in, [13], [2]);
92 LOCAL_ALIGNED_16(INTFLOAT, filter, [N], [8][2]);
93
94 declare_func(void, INTFLOAT (*out)[2], INTFLOAT (*in)[2],
95 const INTFLOAT (*filter)[8][2],
96 ptrdiff_t stride, int n);
97
98 randomize((INTFLOAT *)in, 13 * 2);
99 randomize((INTFLOAT *)filter, N * 8 * 2);
100
101 randomize((INTFLOAT *)dst0, BUF_SIZE * 2);
102 memcpy(dst1, dst0, BUF_SIZE * 2 * sizeof(INTFLOAT));
103
104 call_ref(dst0, in, filter, STRIDE, N);
105 call_new(dst1, in, filter, STRIDE, N);
106
107 if (!float_near_abs_eps_array((float *)dst0, (float *)dst1, EPS, BUF_SIZE * 2))
108 fail();
109 bench_new(dst1, in, filter, STRIDE, N);
110 }
111
test_hybrid_analysis_ileave(void)112 static void test_hybrid_analysis_ileave(void)
113 {
114 LOCAL_ALIGNED_16(INTFLOAT, in, [2], [38][64]);
115 LOCAL_ALIGNED_16(INTFLOAT, out0, [91], [32][2]);
116 LOCAL_ALIGNED_16(INTFLOAT, out1, [91], [32][2]);
117
118 declare_func(void, INTFLOAT (*out)[32][2], INTFLOAT L[2][38][64],
119 int i, int len);
120
121 randomize((INTFLOAT *)out0, 91 * 32 * 2);
122 randomize((INTFLOAT *)in, 2 * 38 * 64);
123 memcpy(out1, out0, 91 * 32 * 2 * sizeof(INTFLOAT));
124
125 /* len is hardcoded to 32 as that's the only value used in
126 libavcodec. asm functions are likely to be optimized
127 hardcoding this value in their loops and could fail with
128 anything else.
129 i is hardcoded to the two values currently used by the
130 aac decoder because the arm neon implementation is
131 micro-optimized for them and will fail for almost every
132 other value. */
133 call_ref(out0, in, 3, 32);
134 call_new(out1, in, 3, 32);
135
136 /* the function just moves data around, so memcmp is enough */
137 if (memcmp(out0, out1, 91 * 32 * 2 * sizeof(INTFLOAT)))
138 fail();
139
140 call_ref(out0, in, 5, 32);
141 call_new(out1, in, 5, 32);
142
143 if (memcmp(out0, out1, 91 * 32 * 2 * sizeof(INTFLOAT)))
144 fail();
145
146 bench_new(out1, in, 3, 32);
147 }
148
test_hybrid_synthesis_deint(void)149 static void test_hybrid_synthesis_deint(void)
150 {
151 LOCAL_ALIGNED_16(INTFLOAT, out0, [2], [38][64]);
152 LOCAL_ALIGNED_16(INTFLOAT, out1, [2], [38][64]);
153 LOCAL_ALIGNED_16(INTFLOAT, in, [91], [32][2]);
154
155 declare_func(void, INTFLOAT out[2][38][64], INTFLOAT (*in)[32][2],
156 int i, int len);
157
158 randomize((INTFLOAT *)in, 91 * 32 * 2);
159 randomize((INTFLOAT *)out0, 2 * 38 * 64);
160 memcpy(out1, out0, 2 * 38 * 64 * sizeof(INTFLOAT));
161
162 /* len is hardcoded to 32 as that's the only value used in
163 libavcodec. asm functions are likely to be optimized
164 hardcoding this value in their loops and could fail with
165 anything else.
166 i is hardcoded to the two values currently used by the
167 aac decoder because the arm neon implementation is
168 micro-optimized for them and will fail for almost every
169 other value. */
170 call_ref(out0, in, 3, 32);
171 call_new(out1, in, 3, 32);
172
173 /* the function just moves data around, so memcmp is enough */
174 if (memcmp(out0, out1, 2 * 38 * 64 * sizeof(INTFLOAT)))
175 fail();
176
177 call_ref(out0, in, 5, 32);
178 call_new(out1, in, 5, 32);
179
180 if (memcmp(out0, out1, 2 * 38 * 64 * sizeof(INTFLOAT)))
181 fail();
182
183 bench_new(out1, in, 3, 32);
184 }
185
test_stereo_interpolate(PSDSPContext * psdsp)186 static void test_stereo_interpolate(PSDSPContext *psdsp)
187 {
188 int i;
189 LOCAL_ALIGNED_16(INTFLOAT, l, [BUF_SIZE], [2]);
190 LOCAL_ALIGNED_16(INTFLOAT, r, [BUF_SIZE], [2]);
191 LOCAL_ALIGNED_16(INTFLOAT, l0, [BUF_SIZE], [2]);
192 LOCAL_ALIGNED_16(INTFLOAT, r0, [BUF_SIZE], [2]);
193 LOCAL_ALIGNED_16(INTFLOAT, l1, [BUF_SIZE], [2]);
194 LOCAL_ALIGNED_16(INTFLOAT, r1, [BUF_SIZE], [2]);
195 LOCAL_ALIGNED_16(INTFLOAT, h, [2], [4]);
196 LOCAL_ALIGNED_16(INTFLOAT, h_step, [2], [4]);
197
198 declare_func(void, INTFLOAT (*l)[2], INTFLOAT (*r)[2],
199 INTFLOAT h[2][4], INTFLOAT h_step[2][4], int len);
200
201 randomize((INTFLOAT *)l, BUF_SIZE * 2);
202 randomize((INTFLOAT *)r, BUF_SIZE * 2);
203
204 for (i = 0; i < 2; i++) {
205 if (check_func(psdsp->stereo_interpolate[i], "ps_stereo_interpolate%s", i ? "_ipdopd" : "")) {
206 memcpy(l0, l, BUF_SIZE * 2 * sizeof(INTFLOAT));
207 memcpy(l1, l, BUF_SIZE * 2 * sizeof(INTFLOAT));
208 memcpy(r0, r, BUF_SIZE * 2 * sizeof(INTFLOAT));
209 memcpy(r1, r, BUF_SIZE * 2 * sizeof(INTFLOAT));
210
211 randomize((INTFLOAT *)h, 2 * 4);
212 randomize((INTFLOAT *)h_step, 2 * 4);
213 // Clear the least significant 14 bits of h_step, to avoid
214 // divergence when accumulating h_step BUF_SIZE times into
215 // a float variable which may or may not have extra intermediate
216 // precision. Therefore clear roughly log2(BUF_SIZE) less
217 // significant bits, to get the same result regardless of any
218 // extra precision in the accumulator.
219 clear_less_significant_bits((INTFLOAT *)h_step, 2 * 4, 14);
220
221 call_ref(l0, r0, h, h_step, BUF_SIZE);
222 call_new(l1, r1, h, h_step, BUF_SIZE);
223 if (!float_near_abs_eps_array((float *)l0, (float *)l1, EPS, BUF_SIZE * 2) ||
224 !float_near_abs_eps_array((float *)r0, (float *)r1, EPS, BUF_SIZE * 2))
225 fail();
226
227 memcpy(l1, l, BUF_SIZE * 2 * sizeof(INTFLOAT));
228 memcpy(r1, r, BUF_SIZE * 2 * sizeof(INTFLOAT));
229 bench_new(l1, r1, h, h_step, BUF_SIZE);
230 }
231 }
232 }
233
checkasm_check_aacpsdsp(void)234 void checkasm_check_aacpsdsp(void)
235 {
236 PSDSPContext psdsp;
237
238 ff_psdsp_init(&psdsp);
239
240 if (check_func(psdsp.add_squares, "ps_add_squares"))
241 test_add_squares();
242 report("add_squares");
243
244 if (check_func(psdsp.mul_pair_single, "ps_mul_pair_single"))
245 test_mul_pair_single();
246 report("mul_pair_single");
247
248 if (check_func(psdsp.hybrid_analysis, "ps_hybrid_analysis"))
249 test_hybrid_analysis();
250 report("hybrid_analysis");
251
252 if (check_func(psdsp.hybrid_analysis_ileave, "ps_hybrid_analysis_ileave"))
253 test_hybrid_analysis_ileave();
254 report("hybrid_analysis_ileave");
255
256 if (check_func(psdsp.hybrid_synthesis_deint, "ps_hybrid_synthesis_deint"))
257 test_hybrid_synthesis_deint();
258 report("hybrid_synthesis_deint");
259
260 test_stereo_interpolate(&psdsp);
261 report("stereo_interpolate");
262 }
263