1 /* Copyright 2016 The Chromium OS Authors. All rights reserved.
2 * Use of this source code is governed by a BSD-style license that can be
3 * found in the LICENSE file.
4 */
5
6 #include <math.h> /* for abs() */
7 #include <stdio.h> /* for printf() */
8 #include <string.h> /* for memset() */
9 #include <stdint.h> /* for uint64 definition */
10 #include <stdlib.h> /* for exit() definition */
11 #include <time.h> /* for clock_gettime */
12
13 #include "../drc_math.h"
14 #include "../dsp_util.h"
15
16 /* Constant for converting time to milliseconds. */
17 #define BILLION 1000000000LL
18 /* Number of iterations for performance testing. */
19 #define ITERATIONS 400000
20
21 #if defined(__aarch64__)
float_to_short(float a)22 int16_t float_to_short(float a)
23 {
24 int32_t ret;
25 asm volatile("fcvtas %s[ret], %s[a]\n"
26 "sqxtn %h[ret], %s[ret]\n"
27 : [ret] "=w"(ret)
28 : [a] "w"(a)
29 :);
30 return (int16_t)(ret);
31 }
32 #else
float_to_short(float a)33 int16_t float_to_short(float a)
34 {
35 a += (a >= 0) ? 0.5f : -0.5f;
36 return (int16_t)(max(-32768, min(32767, a)));
37 }
38 #endif
39
dsp_util_deinterleave_reference(int16_t * input,float * const * output,int channels,int frames)40 void dsp_util_deinterleave_reference(int16_t *input, float *const *output,
41 int channels, int frames)
42 {
43 float *output_ptr[channels];
44 int i, j;
45
46 for (i = 0; i < channels; i++)
47 output_ptr[i] = output[i];
48
49 for (i = 0; i < frames; i++)
50 for (j = 0; j < channels; j++)
51 *(output_ptr[j]++) = *input++ / 32768.0f;
52 }
53
dsp_util_interleave_reference(float * const * input,int16_t * output,int channels,int frames)54 void dsp_util_interleave_reference(float *const *input, int16_t *output,
55 int channels, int frames)
56 {
57 float *input_ptr[channels];
58 int i, j;
59
60 for (i = 0; i < channels; i++)
61 input_ptr[i] = input[i];
62
63 for (i = 0; i < frames; i++)
64 for (j = 0; j < channels; j++) {
65 float f = *(input_ptr[j]++) * 32768.0f;
66 *output++ = float_to_short(f);
67 }
68 }
69
70 /* Use fixed size allocation to avoid performance fluctuation of allocation. */
71 #define MAXSAMPLES 4096
72 #define MINSAMPLES 256
73 /* PAD buffer to check for overflows. */
74 #define PAD 4096
75
TestRounding(float in,int16_t expected,int samples)76 void TestRounding(float in, int16_t expected, int samples)
77 {
78 int i;
79 int max_diff;
80 int d;
81
82 short *in_shorts = (short *)malloc(MAXSAMPLES * 2 * 2 + PAD);
83 float *out_floats_left_c = (float *)malloc(MAXSAMPLES * 4 + PAD);
84 float *out_floats_right_c = (float *)malloc(MAXSAMPLES * 4 + PAD);
85 float *out_floats_left_opt = (float *)malloc(MAXSAMPLES * 4 + PAD);
86 float *out_floats_right_opt = (float *)malloc(MAXSAMPLES * 4 + PAD);
87 short *out_shorts_c = (short *)malloc(MAXSAMPLES * 2 * 2 + PAD);
88 short *out_shorts_opt = (short *)malloc(MAXSAMPLES * 2 * 2 + PAD);
89
90 memset(in_shorts, 0xfb, MAXSAMPLES * 2 * 2 + PAD);
91 memset(out_floats_left_c, 0xfb, MAXSAMPLES * 4 + PAD);
92 memset(out_floats_right_c, 0xfb, MAXSAMPLES * 4 + PAD);
93 memset(out_floats_left_opt, 0xfb, MAXSAMPLES * 4 + PAD);
94 memset(out_floats_right_opt, 0xfb, MAXSAMPLES * 4 + PAD);
95 memset(out_shorts_c, 0xfb, MAXSAMPLES * 2 * 2 + PAD);
96 memset(out_shorts_opt, 0xfb, MAXSAMPLES * 2 * 2 + PAD);
97
98 float *out_floats_ptr_c[2];
99 float *out_floats_ptr_opt[2];
100
101 out_floats_ptr_c[0] = out_floats_left_c;
102 out_floats_ptr_c[1] = out_floats_right_c;
103 out_floats_ptr_opt[0] = out_floats_left_opt;
104 out_floats_ptr_opt[1] = out_floats_right_opt;
105
106 for (i = 0; i < MAXSAMPLES; ++i) {
107 out_floats_left_c[i] = in;
108 out_floats_right_c[i] = in;
109 }
110
111 /* reference C interleave */
112 dsp_util_interleave_reference(out_floats_ptr_c, out_shorts_c, 2,
113 samples);
114
115 /* measure optimized interleave */
116 for (i = 0; i < ITERATIONS; ++i) {
117 dsp_util_interleave(out_floats_ptr_c, (uint8_t *)out_shorts_opt,
118 2, SND_PCM_FORMAT_S16_LE, samples);
119 }
120
121 max_diff = 0;
122 for (i = 0; i < (MAXSAMPLES * 2 + PAD / 2); ++i) {
123 d = abs(out_shorts_c[i] - out_shorts_opt[i]);
124 if (d > max_diff) {
125 max_diff = d;
126 }
127 }
128 printf("test interleave compare %6d, %10f %13f %6d %6d %6d %s\n",
129 max_diff, in, in * 32768.0f, out_shorts_c[0], out_shorts_opt[0],
130 expected,
131 max_diff == 0 ? "PASS" :
132 (out_shorts_opt[0] == expected ?
133 "EXPECTED DIFFERENCE" :
134 "UNEXPECTED DIFFERENCE"));
135
136 /* measure reference C deinterleave */
137 dsp_util_deinterleave_reference(in_shorts, out_floats_ptr_c, 2,
138 samples);
139
140 /* measure optimized deinterleave */
141 dsp_util_deinterleave((uint8_t *)in_shorts, out_floats_ptr_opt, 2,
142 SND_PCM_FORMAT_S16_LE, samples);
143
144 d = memcmp(out_floats_ptr_c[0], out_floats_ptr_opt[0], samples * 4);
145 if (d)
146 printf("left compare %d, %f %f\n", d, out_floats_ptr_c[0][0],
147 out_floats_ptr_opt[0][0]);
148 d = memcmp(out_floats_ptr_c[1], out_floats_ptr_opt[1], samples * 4);
149 if (d)
150 printf("right compare %d, %f %f\n", d, out_floats_ptr_c[1][0],
151 out_floats_ptr_opt[1][0]);
152
153 free(in_shorts);
154 free(out_floats_left_c);
155 free(out_floats_right_c);
156 free(out_floats_left_opt);
157 free(out_floats_right_opt);
158 free(out_shorts_c);
159 free(out_shorts_opt);
160 }
161
main(int argc,char ** argv)162 int main(int argc, char **argv)
163 {
164 float e = 0.000000001f;
165 int samples = 16;
166
167 dsp_enable_flush_denormal_to_zero();
168
169 // Print headings for TestRounding output.
170 printf("test interleave compare maxdif, float, float * 32k "
171 "C SIMD expect pass\n");
172
173 // test clamping
174 TestRounding(1.0f, 32767, samples);
175 TestRounding(-1.0f, -32768, samples);
176 TestRounding(1.1f, 32767, samples);
177 TestRounding(-1.1f, -32768, samples);
178 TestRounding(2000000000.f / 32768.f, 32767, samples);
179 TestRounding(-2000000000.f / 32768.f, -32768, samples);
180
181 /* Infinity produces zero on arm64. */
182 #if defined(__aarch64__)
183 #define EXPECTED_INF_RESULT 0
184 #define EXPECTED_NEGINF_RESULT 0
185 #elif defined(__i386__) || defined(__x86_64__)
186 #define EXPECTED_INF_RESULT -32768
187 #define EXPECTED_NEGINF_RESULT 0
188 #else
189 #define EXPECTED_INF_RESULT 32767
190 #define EXPECTED_NEGINF_RESULT -32768
191 #endif
192
193 TestRounding(5000000000.f / 32768.f, EXPECTED_INF_RESULT, samples);
194 TestRounding(-5000000000.f / 32768.f, EXPECTED_NEGINF_RESULT, samples);
195
196 // test infinity
197 union ieee754_float inf;
198 inf.ieee.negative = 0;
199 inf.ieee.exponent = 0xfe;
200 inf.ieee.mantissa = 0x7fffff;
201 TestRounding(inf.f, EXPECTED_INF_RESULT, samples); // expect fail
202 inf.ieee.negative = 1;
203 inf.ieee.exponent = 0xfe;
204 inf.ieee.mantissa = 0x7fffff;
205 TestRounding(inf.f, EXPECTED_NEGINF_RESULT, samples); // expect fail
206
207 // test rounding
208 TestRounding(0.25f, 8192, samples);
209 TestRounding(-0.25f, -8192, samples);
210 TestRounding(0.50f, 16384, samples);
211 TestRounding(-0.50f, -16384, samples);
212 TestRounding(1.0f / 32768.0f, 1, samples);
213 TestRounding(-1.0f / 32768.0f, -1, samples);
214 TestRounding(1.0f / 32768.0f + e, 1, samples);
215 TestRounding(-1.0f / 32768.0f - e, -1, samples);
216 TestRounding(1.0f / 32768.0f - e, 1, samples);
217 TestRounding(-1.0f / 32768.0f + e, -1, samples);
218
219 /* Rounding on 'tie' is different for Intel. */
220 #if defined(__i386__) || defined(__x86_64__)
221 TestRounding(0.5f / 32768.0f, 0, samples); /* Expect round to even */
222 TestRounding(-0.5f / 32768.0f, 0, samples);
223 #else
224 TestRounding(0.5f / 32768.0f, 1, samples); /* Expect round away */
225 TestRounding(-0.5f / 32768.0f, -1, samples);
226 #endif
227
228 TestRounding(0.5f / 32768.0f + e, 1, samples);
229 TestRounding(-0.5f / 32768.0f - e, 1, samples);
230 TestRounding(0.5f / 32768.0f - e, 0, samples);
231 TestRounding(-0.5f / 32768.0f + e, 0, samples);
232
233 TestRounding(1.5f / 32768.0f, 2, samples);
234 TestRounding(-1.5f / 32768.0f, -2, samples);
235 TestRounding(1.5f / 32768.0f + e, 2, samples);
236 TestRounding(-1.5f / 32768.0f - e, -2, samples);
237 TestRounding(1.5f / 32768.0f - e, 1, samples);
238 TestRounding(-1.5f / 32768.0f + e, -1, samples);
239
240 /* Test denormals */
241 union ieee754_float denorm;
242 denorm.ieee.negative = 0;
243 denorm.ieee.exponent = 0;
244 denorm.ieee.mantissa = 1;
245 TestRounding(denorm.f, 0, samples);
246 denorm.ieee.negative = 1;
247 denorm.ieee.exponent = 0;
248 denorm.ieee.mantissa = 1;
249 TestRounding(denorm.f, 0, samples);
250
251 /* Test NaNs. Caveat Results vary by implementation. */
252 #if defined(__i386__) || defined(__x86_64__)
253 #define EXPECTED_NAN_RESULT -32768
254 #else
255 #define EXPECTED_NAN_RESULT 0
256 #endif
257 union ieee754_float nan; /* Quiet NaN */
258 nan.ieee.negative = 0;
259 nan.ieee.exponent = 0xff;
260 nan.ieee.mantissa = 0x400001;
261 TestRounding(nan.f, EXPECTED_NAN_RESULT, samples);
262 nan.ieee.negative = 0;
263 nan.ieee.exponent = 0xff;
264 nan.ieee.mantissa = 0x000001; /* Signalling NaN */
265 TestRounding(nan.f, EXPECTED_NAN_RESULT, samples);
266
267 /* Test Performance */
268 uint64_t diff;
269 struct timespec start, end;
270 int i;
271 int d;
272
273 short *in_shorts = (short *)malloc(MAXSAMPLES * 2 * 2 + PAD);
274 float *out_floats_left_c = (float *)malloc(MAXSAMPLES * 4 + PAD);
275 float *out_floats_right_c = (float *)malloc(MAXSAMPLES * 4 + PAD);
276 float *out_floats_left_opt = (float *)malloc(MAXSAMPLES * 4 + PAD);
277 float *out_floats_right_opt = (float *)malloc(MAXSAMPLES * 4 + PAD);
278 short *out_shorts_c = (short *)malloc(MAXSAMPLES * 2 * 2 + PAD);
279 short *out_shorts_opt = (short *)malloc(MAXSAMPLES * 2 * 2 + PAD);
280
281 memset(in_shorts, 0x11, MAXSAMPLES * 2 * 2 + PAD);
282 memset(out_floats_left_c, 0x22, MAXSAMPLES * 4 + PAD);
283 memset(out_floats_right_c, 0x33, MAXSAMPLES * 4 + PAD);
284 memset(out_floats_left_opt, 0x44, MAXSAMPLES * 4 + PAD);
285 memset(out_floats_right_opt, 0x55, MAXSAMPLES * 4 + PAD);
286 memset(out_shorts_c, 0x66, MAXSAMPLES * 2 * 2 + PAD);
287 memset(out_shorts_opt, 0x66, MAXSAMPLES * 2 * 2 + PAD);
288
289 float *out_floats_ptr_c[2];
290 float *out_floats_ptr_opt[2];
291
292 out_floats_ptr_c[0] = out_floats_left_c;
293 out_floats_ptr_c[1] = out_floats_right_c;
294 out_floats_ptr_opt[0] = out_floats_left_opt;
295 out_floats_ptr_opt[1] = out_floats_right_opt;
296
297 /* Benchmark dsp_util_interleave */
298 for (samples = MAXSAMPLES; samples >= MINSAMPLES; samples /= 2) {
299 /* measure original C interleave */
300 clock_gettime(CLOCK_MONOTONIC, &start); /* mark start time */
301 for (i = 0; i < ITERATIONS; ++i) {
302 dsp_util_interleave_reference(out_floats_ptr_c,
303 out_shorts_c, 2, samples);
304 }
305 clock_gettime(CLOCK_MONOTONIC, &end); /* mark the end time */
306 diff = (BILLION * (end.tv_sec - start.tv_sec) + end.tv_nsec -
307 start.tv_nsec) /
308 1000000;
309 printf("interleave ORIG size = %6d, elapsed time = %llu ms\n",
310 samples, (long long unsigned int)diff);
311
312 /* measure optimized interleave */
313 clock_gettime(CLOCK_MONOTONIC, &start); /* mark start time */
314 for (i = 0; i < ITERATIONS; ++i) {
315 dsp_util_interleave(out_floats_ptr_c,
316 (uint8_t *)out_shorts_opt, 2,
317 SND_PCM_FORMAT_S16_LE, samples);
318 }
319 clock_gettime(CLOCK_MONOTONIC, &end); /* mark the end time */
320 diff = (BILLION * (end.tv_sec - start.tv_sec) + end.tv_nsec -
321 start.tv_nsec) /
322 1000000;
323 printf("interleave SIMD size = %6d, elapsed time = %llu ms\n",
324 samples, (long long unsigned int)diff);
325
326 /* Test C and SIMD output match */
327 d = memcmp(out_shorts_c, out_shorts_opt,
328 MAXSAMPLES * 2 * 2 + PAD);
329 if (d)
330 printf("interleave compare %d, %d %d, %d %d\n", d,
331 out_shorts_c[0], out_shorts_c[1],
332 out_shorts_opt[0], out_shorts_opt[1]);
333 }
334
335 /* Benchmark dsp_util_deinterleave */
336 for (samples = MAXSAMPLES; samples >= MINSAMPLES; samples /= 2) {
337 /* Measure original C deinterleave */
338 clock_gettime(CLOCK_MONOTONIC, &start); /* mark start time */
339 for (i = 0; i < ITERATIONS; ++i) {
340 dsp_util_deinterleave_reference(
341 in_shorts, out_floats_ptr_c, 2, samples);
342 }
343 clock_gettime(CLOCK_MONOTONIC, &end); /* mark the end time */
344 diff = (BILLION * (end.tv_sec - start.tv_sec) + end.tv_nsec -
345 start.tv_nsec) /
346 1000000;
347 printf("deinterleave ORIG size = %6d, "
348 "elapsed time = %llu ms\n",
349 samples, (long long unsigned int)diff);
350
351 /* Measure optimized deinterleave */
352 clock_gettime(CLOCK_MONOTONIC, &start); /* mark start time */
353 for (i = 0; i < ITERATIONS; ++i) {
354 dsp_util_deinterleave((uint8_t *)in_shorts,
355 out_floats_ptr_opt, 2,
356 SND_PCM_FORMAT_S16_LE, samples);
357 }
358 clock_gettime(CLOCK_MONOTONIC, &end); /* mark the end time */
359 diff = (BILLION * (end.tv_sec - start.tv_sec) + end.tv_nsec -
360 start.tv_nsec) /
361 1000000;
362 printf("deinterleave SIMD size = %6d, elapsed time = %llu ms\n",
363 samples, (long long unsigned int)diff);
364
365 /* Test C and SIMD output match */
366 d = memcmp(out_floats_ptr_c[0], out_floats_ptr_opt[0],
367 samples * 4);
368 if (d)
369 printf("left compare %d, %f %f\n", d,
370 out_floats_ptr_c[0][0],
371 out_floats_ptr_opt[0][0]);
372 d = memcmp(out_floats_ptr_c[1], out_floats_ptr_opt[1],
373 samples * 4);
374 if (d)
375 printf("right compare %d, %f %f\n", d,
376 out_floats_ptr_c[1][0],
377 out_floats_ptr_opt[1][0]);
378 }
379
380 free(in_shorts);
381 free(out_floats_left_c);
382 free(out_floats_right_c);
383 free(out_floats_left_opt);
384 free(out_floats_right_opt);
385 free(out_shorts_c);
386 free(out_shorts_opt);
387
388 return 0;
389 }
390