• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * This file is part of FFmpeg.
3  *
4  * FFmpeg is free software; you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License as published by
6  * the Free Software Foundation; either version 2 of the License, or
7  * (at your option) any later version.
8  *
9  * FFmpeg is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License along
15  * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
16  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
17  */
18 
19 #include "libavcodec/aacpsdsp.h"
20 #include "libavutil/intfloat.h"
21 #include "libavutil/mem_internal.h"
22 
23 #include "checkasm.h"
24 
25 #define N 32
26 #define STRIDE 128
27 #define BUF_SIZE (N * STRIDE)
28 
29 #define randomize(buf, len) do {                                \
30     int i;                                                      \
31     for (i = 0; i < len; i++) {                                 \
32         const INTFLOAT f = (INTFLOAT)rnd() / UINT_MAX;          \
33         (buf)[i] = f;                                           \
34     }                                                           \
35 } while (0)
36 
37 #define EPS 0.005
38 
clear_less_significant_bits(INTFLOAT * buf,int len,int bits)39 static void clear_less_significant_bits(INTFLOAT *buf, int len, int bits)
40 {
41     int i;
42     for (i = 0; i < len; i++) {
43         union av_intfloat32 u = { .f = buf[i] };
44         u.i &= (0xffffffff << bits);
45         buf[i] = u.f;
46     }
47 }
48 
test_add_squares(void)49 static void test_add_squares(void)
50 {
51     LOCAL_ALIGNED_16(INTFLOAT, dst0, [BUF_SIZE]);
52     LOCAL_ALIGNED_16(INTFLOAT, dst1, [BUF_SIZE]);
53     LOCAL_ALIGNED_16(INTFLOAT, src, [BUF_SIZE], [2]);
54 
55     declare_func(void, INTFLOAT *dst,
56                  const INTFLOAT (*src)[2], int n);
57 
58     randomize((INTFLOAT *)src, BUF_SIZE * 2);
59     randomize(dst0, BUF_SIZE);
60     memcpy(dst1, dst0, BUF_SIZE * sizeof(INTFLOAT));
61     call_ref(dst0, src, BUF_SIZE);
62     call_new(dst1, src, BUF_SIZE);
63     if (!float_near_abs_eps_array(dst0, dst1, EPS, BUF_SIZE))
64         fail();
65     bench_new(dst1, src, BUF_SIZE);
66 }
67 
test_mul_pair_single(void)68 static void test_mul_pair_single(void)
69 {
70     LOCAL_ALIGNED_16(INTFLOAT, dst0, [BUF_SIZE], [2]);
71     LOCAL_ALIGNED_16(INTFLOAT, dst1, [BUF_SIZE], [2]);
72     LOCAL_ALIGNED_16(INTFLOAT, src0, [BUF_SIZE], [2]);
73     LOCAL_ALIGNED_16(INTFLOAT, src1, [BUF_SIZE]);
74 
75     declare_func(void, INTFLOAT (*dst)[2],
76                        INTFLOAT (*src0)[2], INTFLOAT *src1, int n);
77 
78     randomize((INTFLOAT *)src0, BUF_SIZE * 2);
79     randomize(src1, BUF_SIZE);
80     call_ref(dst0, src0, src1, BUF_SIZE);
81     call_new(dst1, src0, src1, BUF_SIZE);
82     if (!float_near_abs_eps_array((float *)dst0, (float *)dst1, EPS, BUF_SIZE * 2))
83         fail();
84     bench_new(dst1, src0, src1, BUF_SIZE);
85 }
86 
test_hybrid_analysis(void)87 static void test_hybrid_analysis(void)
88 {
89     LOCAL_ALIGNED_16(INTFLOAT, dst0, [BUF_SIZE], [2]);
90     LOCAL_ALIGNED_16(INTFLOAT, dst1, [BUF_SIZE], [2]);
91     LOCAL_ALIGNED_16(INTFLOAT, in, [13], [2]);
92     LOCAL_ALIGNED_16(INTFLOAT, filter, [N], [8][2]);
93 
94     declare_func(void, INTFLOAT (*out)[2], INTFLOAT (*in)[2],
95                  const INTFLOAT (*filter)[8][2],
96                  ptrdiff_t stride, int n);
97 
98     randomize((INTFLOAT *)in, 13 * 2);
99     randomize((INTFLOAT *)filter, N * 8 * 2);
100 
101     randomize((INTFLOAT *)dst0, BUF_SIZE * 2);
102     memcpy(dst1, dst0, BUF_SIZE * 2 * sizeof(INTFLOAT));
103 
104     call_ref(dst0, in, filter, STRIDE, N);
105     call_new(dst1, in, filter, STRIDE, N);
106 
107     if (!float_near_abs_eps_array((float *)dst0, (float *)dst1, EPS, BUF_SIZE * 2))
108         fail();
109     bench_new(dst1, in, filter, STRIDE, N);
110 }
111 
test_hybrid_analysis_ileave(void)112 static void test_hybrid_analysis_ileave(void)
113 {
114     LOCAL_ALIGNED_16(INTFLOAT, in,   [2], [38][64]);
115     LOCAL_ALIGNED_16(INTFLOAT, out0, [91], [32][2]);
116     LOCAL_ALIGNED_16(INTFLOAT, out1, [91], [32][2]);
117 
118     declare_func(void, INTFLOAT (*out)[32][2], INTFLOAT L[2][38][64],
119                        int i, int len);
120 
121     randomize((INTFLOAT *)out0, 91 * 32 * 2);
122     randomize((INTFLOAT *)in,    2 * 38 * 64);
123     memcpy(out1, out0, 91 * 32 * 2 * sizeof(INTFLOAT));
124 
125     /* len is hardcoded to 32 as that's the only value used in
126        libavcodec. asm functions are likely to be optimized
127        hardcoding this value in their loops and could fail with
128        anything else.
129        i is hardcoded to the two values currently used by the
130        aac decoder because the arm neon implementation is
131        micro-optimized for them and will fail for almost every
132        other value. */
133     call_ref(out0, in, 3, 32);
134     call_new(out1, in, 3, 32);
135 
136     /* the function just moves data around, so memcmp is enough */
137     if (memcmp(out0, out1, 91 * 32 * 2 * sizeof(INTFLOAT)))
138         fail();
139 
140     call_ref(out0, in, 5, 32);
141     call_new(out1, in, 5, 32);
142 
143     if (memcmp(out0, out1, 91 * 32 * 2 * sizeof(INTFLOAT)))
144         fail();
145 
146     bench_new(out1, in, 3, 32);
147 }
148 
test_hybrid_synthesis_deint(void)149 static void test_hybrid_synthesis_deint(void)
150 {
151     LOCAL_ALIGNED_16(INTFLOAT, out0, [2], [38][64]);
152     LOCAL_ALIGNED_16(INTFLOAT, out1, [2], [38][64]);
153     LOCAL_ALIGNED_16(INTFLOAT, in,  [91], [32][2]);
154 
155     declare_func(void, INTFLOAT out[2][38][64], INTFLOAT (*in)[32][2],
156                        int i, int len);
157 
158     randomize((INTFLOAT *)in,  91 * 32 * 2);
159     randomize((INTFLOAT *)out0, 2 * 38 * 64);
160     memcpy(out1, out0, 2 * 38 * 64 * sizeof(INTFLOAT));
161 
162     /* len is hardcoded to 32 as that's the only value used in
163        libavcodec. asm functions are likely to be optimized
164        hardcoding this value in their loops and could fail with
165        anything else.
166        i is hardcoded to the two values currently used by the
167        aac decoder because the arm neon implementation is
168        micro-optimized for them and will fail for almost every
169        other value. */
170     call_ref(out0, in, 3, 32);
171     call_new(out1, in, 3, 32);
172 
173     /* the function just moves data around, so memcmp is enough */
174     if (memcmp(out0, out1, 2 * 38 * 64 * sizeof(INTFLOAT)))
175         fail();
176 
177     call_ref(out0, in, 5, 32);
178     call_new(out1, in, 5, 32);
179 
180     if (memcmp(out0, out1, 2 * 38 * 64 * sizeof(INTFLOAT)))
181         fail();
182 
183     bench_new(out1, in, 3, 32);
184 }
185 
test_stereo_interpolate(PSDSPContext * psdsp)186 static void test_stereo_interpolate(PSDSPContext *psdsp)
187 {
188     int i;
189     LOCAL_ALIGNED_16(INTFLOAT, l,  [BUF_SIZE], [2]);
190     LOCAL_ALIGNED_16(INTFLOAT, r,  [BUF_SIZE], [2]);
191     LOCAL_ALIGNED_16(INTFLOAT, l0, [BUF_SIZE], [2]);
192     LOCAL_ALIGNED_16(INTFLOAT, r0, [BUF_SIZE], [2]);
193     LOCAL_ALIGNED_16(INTFLOAT, l1, [BUF_SIZE], [2]);
194     LOCAL_ALIGNED_16(INTFLOAT, r1, [BUF_SIZE], [2]);
195     LOCAL_ALIGNED_16(INTFLOAT, h, [2], [4]);
196     LOCAL_ALIGNED_16(INTFLOAT, h_step, [2], [4]);
197 
198     declare_func(void, INTFLOAT (*l)[2], INTFLOAT (*r)[2],
199                        INTFLOAT h[2][4], INTFLOAT h_step[2][4], int len);
200 
201     randomize((INTFLOAT *)l, BUF_SIZE * 2);
202     randomize((INTFLOAT *)r, BUF_SIZE * 2);
203 
204     for (i = 0; i < 2; i++) {
205         if (check_func(psdsp->stereo_interpolate[i], "ps_stereo_interpolate%s", i ? "_ipdopd" : "")) {
206             memcpy(l0, l, BUF_SIZE * 2 * sizeof(INTFLOAT));
207             memcpy(l1, l, BUF_SIZE * 2 * sizeof(INTFLOAT));
208             memcpy(r0, r, BUF_SIZE * 2 * sizeof(INTFLOAT));
209             memcpy(r1, r, BUF_SIZE * 2 * sizeof(INTFLOAT));
210 
211             randomize((INTFLOAT *)h, 2 * 4);
212             randomize((INTFLOAT *)h_step, 2 * 4);
213             // Clear the least significant 14 bits of h_step, to avoid
214             // divergence when accumulating h_step BUF_SIZE times into
215             // a float variable which may or may not have extra intermediate
216             // precision. Therefore clear roughly log2(BUF_SIZE) less
217             // significant bits, to get the same result regardless of any
218             // extra precision in the accumulator.
219             clear_less_significant_bits((INTFLOAT *)h_step, 2 * 4, 14);
220 
221             call_ref(l0, r0, h, h_step, BUF_SIZE);
222             call_new(l1, r1, h, h_step, BUF_SIZE);
223             if (!float_near_abs_eps_array((float *)l0, (float *)l1, EPS, BUF_SIZE * 2) ||
224                 !float_near_abs_eps_array((float *)r0, (float *)r1, EPS, BUF_SIZE * 2))
225                 fail();
226 
227             memcpy(l1, l, BUF_SIZE * 2 * sizeof(INTFLOAT));
228             memcpy(r1, r, BUF_SIZE * 2 * sizeof(INTFLOAT));
229             bench_new(l1, r1, h, h_step, BUF_SIZE);
230         }
231     }
232 }
233 
checkasm_check_aacpsdsp(void)234 void checkasm_check_aacpsdsp(void)
235 {
236     PSDSPContext psdsp;
237 
238     ff_psdsp_init(&psdsp);
239 
240     if (check_func(psdsp.add_squares, "ps_add_squares"))
241         test_add_squares();
242     report("add_squares");
243 
244     if (check_func(psdsp.mul_pair_single, "ps_mul_pair_single"))
245         test_mul_pair_single();
246     report("mul_pair_single");
247 
248     if (check_func(psdsp.hybrid_analysis, "ps_hybrid_analysis"))
249         test_hybrid_analysis();
250     report("hybrid_analysis");
251 
252     if (check_func(psdsp.hybrid_analysis_ileave, "ps_hybrid_analysis_ileave"))
253         test_hybrid_analysis_ileave();
254     report("hybrid_analysis_ileave");
255 
256     if (check_func(psdsp.hybrid_synthesis_deint, "ps_hybrid_synthesis_deint"))
257         test_hybrid_synthesis_deint();
258     report("hybrid_synthesis_deint");
259 
260     test_stereo_interpolate(&psdsp);
261     report("stereo_interpolate");
262 }
263