1 /* Copyright 2016 The Chromium OS Authors. All rights reserved.
2 * Use of this source code is governed by a BSD-style license that can be
3 * found in the LICENSE file.
4 */
5
6 #include <math.h> /* for abs() */
7 #include <stdio.h> /* for printf() */
8 #include <string.h> /* for memset() */
9 #include <stdint.h> /* for uint64 definition */
10 #include <stdlib.h> /* for exit() definition */
11 #include <time.h> /* for clock_gettime */
12
13 #include "../drc_math.h"
14 #include "../dsp_util.h"
15
16
17 /* Constant for converting time to milliseconds. */
18 #define BILLION 1000000000LL
19 /* Number of iterations for performance testing. */
20 #define ITERATIONS 400000
21
22 #if defined(__aarch64__)
float_to_short(float a)23 int16_t float_to_short(float a) {
24 int32_t ret;
25 asm volatile ("fcvtas %s[ret], %s[a]\n"
26 "sqxtn %h[ret], %s[ret]\n"
27 : [ret] "=w" (ret)
28 : [a] "w" (a)
29 :);
30 return (int16_t)(ret);
31 }
32 #else
float_to_short(float a)33 int16_t float_to_short(float a) {
34 a += (a >= 0) ? 0.5f : -0.5f;
35 return (int16_t)(max(-32768, min(32767, a)));
36 }
37 #endif
38
dsp_util_deinterleave_reference(int16_t * input,float * const * output,int channels,int frames)39 void dsp_util_deinterleave_reference(int16_t *input, float *const *output,
40 int channels, int frames)
41 {
42 float *output_ptr[channels];
43 int i, j;
44
45 for (i = 0; i < channels; i++)
46 output_ptr[i] = output[i];
47
48 for (i = 0; i < frames; i++)
49 for (j = 0; j < channels; j++)
50 *(output_ptr[j]++) = *input++ / 32768.0f;
51 }
52
dsp_util_interleave_reference(float * const * input,int16_t * output,int channels,int frames)53 void dsp_util_interleave_reference(float *const *input, int16_t *output,
54 int channels, int frames)
55 {
56 float *input_ptr[channels];
57 int i, j;
58
59 for (i = 0; i < channels; i++)
60 input_ptr[i] = input[i];
61
62 for (i = 0; i < frames; i++)
63 for (j = 0; j < channels; j++) {
64 float f = *(input_ptr[j]++) * 32768.0f;
65 *output++ = float_to_short(f);
66 }
67 }
68
69 /* Use fixed size allocation to avoid performance fluctuation of allocation. */
70 #define MAXSAMPLES 4096
71 #define MINSAMPLES 256
72 /* PAD buffer to check for overflows. */
73 #define PAD 4096
74
TestRounding(float in,int16_t expected,int samples)75 void TestRounding(float in, int16_t expected, int samples)
76 {
77 int i;
78 int max_diff;
79 int d;
80
81 short* in_shorts = (short*) malloc(MAXSAMPLES * 2 * 2 + PAD);
82 float* out_floats_left_c = (float*) malloc(MAXSAMPLES * 4 + PAD);
83 float* out_floats_right_c = (float*) malloc(MAXSAMPLES * 4 + PAD);
84 float* out_floats_left_opt = (float*) malloc(MAXSAMPLES * 4 + PAD);
85 float* out_floats_right_opt = (float*) malloc(MAXSAMPLES * 4 + PAD);
86 short* out_shorts_c = (short*) malloc(MAXSAMPLES * 2 * 2 + PAD);
87 short* out_shorts_opt = (short*) malloc(MAXSAMPLES * 2 * 2 + PAD);
88
89 memset(in_shorts, 0xfb, MAXSAMPLES * 2 * 2 + PAD);
90 memset(out_floats_left_c, 0xfb, MAXSAMPLES * 4 + PAD);
91 memset(out_floats_right_c, 0xfb, MAXSAMPLES * 4 + PAD);
92 memset(out_floats_left_opt, 0xfb, MAXSAMPLES * 4 + PAD);
93 memset(out_floats_right_opt, 0xfb, MAXSAMPLES * 4 + PAD);
94 memset(out_shorts_c, 0xfb, MAXSAMPLES * 2 * 2 + PAD);
95 memset(out_shorts_opt, 0xfb, MAXSAMPLES * 2 * 2 + PAD);
96
97 float *out_floats_ptr_c[2];
98 float *out_floats_ptr_opt[2];
99
100 out_floats_ptr_c[0] = out_floats_left_c;
101 out_floats_ptr_c[1] = out_floats_right_c;
102 out_floats_ptr_opt[0] = out_floats_left_opt;
103 out_floats_ptr_opt[1] = out_floats_right_opt;
104
105 for (i = 0; i < MAXSAMPLES; ++i) {
106 out_floats_left_c[i] = in;
107 out_floats_right_c[i] = in;
108 }
109
110 /* reference C interleave */
111 dsp_util_interleave_reference(out_floats_ptr_c, out_shorts_c, 2,
112 samples);
113
114 /* measure optimized interleave */
115 for (i = 0; i < ITERATIONS; ++i) {
116 dsp_util_interleave(out_floats_ptr_c, out_shorts_opt, 2,
117 samples);
118 }
119
120 max_diff = 0;
121 for (i = 0; i < (MAXSAMPLES * 2 + PAD / 2); ++i) {
122 d = abs(out_shorts_c[i] - out_shorts_opt[i]);
123 if (d > max_diff) {
124 max_diff = d;
125 }
126 }
127 printf("test interleave compare %6d, %10f %13f %6d %6d %6d %s\n",
128 max_diff, in, in * 32768.0f, out_shorts_c[0], out_shorts_opt[0],
129 expected,
130 max_diff == 0 ? "PASS" : (out_shorts_opt[0] == expected ?
131 "EXPECTED DIFFERENCE" : "UNEXPECTED DIFFERENCE"));
132
133 /* measure reference C deinterleave */
134 dsp_util_deinterleave_reference(in_shorts, out_floats_ptr_c, 2,
135 samples);
136
137 /* measure optimized deinterleave */
138 dsp_util_deinterleave(in_shorts, out_floats_ptr_opt, 2, samples);
139
140 d = memcmp(out_floats_ptr_c[0], out_floats_ptr_opt[0], samples * 4);
141 if (d) printf("left compare %d, %f %f\n", d, out_floats_ptr_c[0][0],
142 out_floats_ptr_opt[0][0]);
143 d = memcmp(out_floats_ptr_c[1], out_floats_ptr_opt[1], samples * 4);
144 if (d) printf("right compare %d, %f %f\n", d, out_floats_ptr_c[1][0],
145 out_floats_ptr_opt[1][0]);
146
147 free(in_shorts);
148 free(out_floats_left_c);
149 free(out_floats_right_c);
150 free(out_floats_left_opt);
151 free(out_floats_right_opt);
152 free(out_shorts_c);
153 free(out_shorts_opt);
154 }
155
main(int argc,char ** argv)156 int main(int argc, char **argv)
157 {
158 float e = 0.000000001f;
159 int samples = 16;
160
161 dsp_enable_flush_denormal_to_zero();
162
163 // Print headings for TestRounding output.
164 printf("test interleave compare maxdif, float, float * 32k "
165 "C SIMD expect pass\n");
166
167 // test clamping
168 TestRounding(1.0f, 32767, samples);
169 TestRounding(-1.0f, -32768, samples);
170 TestRounding(1.1f, 32767, samples);
171 TestRounding(-1.1f, -32768, samples);
172 TestRounding(2000000000.f / 32768.f, 32767, samples);
173 TestRounding(-2000000000.f / 32768.f, -32768, samples);
174
175 /* Infinity produces zero on arm64. */
176 #if defined(__aarch64__)
177 #define EXPECTED_INF_RESULT 0
178 #define EXPECTED_NEGINF_RESULT 0
179 #elif defined(__i386__) || defined(__x86_64__)
180 #define EXPECTED_INF_RESULT -32768
181 #define EXPECTED_NEGINF_RESULT 0
182 #else
183 #define EXPECTED_INF_RESULT 32767
184 #define EXPECTED_NEGINF_RESULT -32768
185 #endif
186
187 TestRounding(5000000000.f / 32768.f, EXPECTED_INF_RESULT, samples);
188 TestRounding(-5000000000.f / 32768.f, EXPECTED_NEGINF_RESULT, samples);
189
190 // test infinity
191 union ieee754_float inf;
192 inf.ieee.negative = 0;
193 inf.ieee.exponent = 0xfe;
194 inf.ieee.mantissa = 0x7fffff;
195 TestRounding(inf.f, EXPECTED_INF_RESULT, samples); // expect fail
196 inf.ieee.negative = 1;
197 inf.ieee.exponent = 0xfe;
198 inf.ieee.mantissa = 0x7fffff;
199 TestRounding(inf.f, EXPECTED_NEGINF_RESULT, samples); // expect fail
200
201 // test rounding
202 TestRounding(0.25f, 8192, samples);
203 TestRounding(-0.25f, -8192, samples);
204 TestRounding(0.50f, 16384, samples);
205 TestRounding(-0.50f, -16384, samples);
206 TestRounding(1.0f / 32768.0f, 1, samples);
207 TestRounding(-1.0f / 32768.0f, -1, samples);
208 TestRounding(1.0f / 32768.0f + e, 1, samples);
209 TestRounding(-1.0f / 32768.0f - e, -1, samples);
210 TestRounding(1.0f / 32768.0f - e, 1, samples);
211 TestRounding(-1.0f / 32768.0f + e, -1, samples);
212
213 /* Rounding on 'tie' is different for Intel. */
214 #if defined(__i386__) || defined(__x86_64__)
215 TestRounding(0.5f / 32768.0f, 0, samples); /* Expect round to even */
216 TestRounding(-0.5f / 32768.0f, 0, samples);
217 #else
218 TestRounding(0.5f / 32768.0f, 1, samples); /* Expect round away */
219 TestRounding(-0.5f / 32768.0f, -1, samples);
220 #endif
221
222 TestRounding(0.5f / 32768.0f + e, 1, samples);
223 TestRounding(-0.5f / 32768.0f - e, 1, samples);
224 TestRounding(0.5f / 32768.0f - e, 0, samples);
225 TestRounding(-0.5f / 32768.0f + e, 0, samples);
226
227 TestRounding(1.5f / 32768.0f, 2, samples);
228 TestRounding(-1.5f / 32768.0f, -2, samples);
229 TestRounding(1.5f / 32768.0f + e, 2, samples);
230 TestRounding(-1.5f / 32768.0f - e, -2, samples);
231 TestRounding(1.5f / 32768.0f - e, 1, samples);
232 TestRounding(-1.5f / 32768.0f + e, -1, samples);
233
234 /* Test denormals */
235 union ieee754_float denorm;
236 denorm.ieee.negative = 0;
237 denorm.ieee.exponent = 0;
238 denorm.ieee.mantissa = 1;
239 TestRounding(denorm.f, 0, samples);
240 denorm.ieee.negative = 1;
241 denorm.ieee.exponent = 0;
242 denorm.ieee.mantissa = 1;
243 TestRounding(denorm.f, 0, samples);
244
245 /* Test NaNs. Caveat Results vary by implementation. */
246 #if defined(__i386__) || defined(__x86_64__)
247 #define EXPECTED_NAN_RESULT -32768
248 #else
249 #define EXPECTED_NAN_RESULT 0
250 #endif
251 union ieee754_float nan; /* Quiet NaN */
252 nan.ieee.negative = 0;
253 nan.ieee.exponent = 0xff;
254 nan.ieee.mantissa = 0x400001;
255 TestRounding(nan.f, EXPECTED_NAN_RESULT, samples);
256 nan.ieee.negative = 0;
257 nan.ieee.exponent = 0xff;
258 nan.ieee.mantissa = 0x000001; /* Signalling NaN */
259 TestRounding(nan.f, EXPECTED_NAN_RESULT, samples);
260
261 /* Test Performance */
262 uint64_t diff;
263 struct timespec start, end;
264 int i;
265 int d;
266
267 short* in_shorts = (short*) malloc(MAXSAMPLES * 2 * 2 + PAD);
268 float* out_floats_left_c = (float*) malloc(MAXSAMPLES * 4 + PAD);
269 float* out_floats_right_c = (float*) malloc(MAXSAMPLES * 4 + PAD);
270 float* out_floats_left_opt = (float*) malloc(MAXSAMPLES * 4 + PAD);
271 float* out_floats_right_opt = (float*) malloc(MAXSAMPLES * 4 + PAD);
272 short* out_shorts_c = (short*) malloc(MAXSAMPLES * 2 * 2 + PAD);
273 short* out_shorts_opt = (short*) malloc(MAXSAMPLES * 2 * 2 + PAD);
274
275 memset(in_shorts, 0x11, MAXSAMPLES * 2 * 2 + PAD);
276 memset(out_floats_left_c, 0x22, MAXSAMPLES * 4 + PAD);
277 memset(out_floats_right_c, 0x33, MAXSAMPLES * 4 + PAD);
278 memset(out_floats_left_opt, 0x44, MAXSAMPLES * 4 + PAD);
279 memset(out_floats_right_opt, 0x55, MAXSAMPLES * 4 + PAD);
280 memset(out_shorts_c, 0x66, MAXSAMPLES * 2 * 2 + PAD);
281 memset(out_shorts_opt, 0x66, MAXSAMPLES * 2 * 2 + PAD);
282
283 float *out_floats_ptr_c[2];
284 float *out_floats_ptr_opt[2];
285
286 out_floats_ptr_c[0] = out_floats_left_c;
287 out_floats_ptr_c[1] = out_floats_right_c;
288 out_floats_ptr_opt[0] = out_floats_left_opt;
289 out_floats_ptr_opt[1] = out_floats_right_opt;
290
291 /* Benchmark dsp_util_interleave */
292 for (samples = MAXSAMPLES; samples >= MINSAMPLES; samples /= 2) {
293
294 /* measure original C interleave */
295 clock_gettime(CLOCK_MONOTONIC, &start); /* mark start time */
296 for (i = 0; i < ITERATIONS; ++i) {
297 dsp_util_interleave_reference(out_floats_ptr_c,
298 out_shorts_c,
299 2, samples);
300 }
301 clock_gettime(CLOCK_MONOTONIC, &end); /* mark the end time */
302 diff = (BILLION * (end.tv_sec - start.tv_sec) +
303 end.tv_nsec - start.tv_nsec) / 1000000;
304 printf("interleave ORIG size = %6d, elapsed time = %llu ms\n",
305 samples, (long long unsigned int) diff);
306
307 /* measure optimized interleave */
308 clock_gettime(CLOCK_MONOTONIC, &start); /* mark start time */
309 for (i = 0; i < ITERATIONS; ++i) {
310 dsp_util_interleave(out_floats_ptr_c, out_shorts_opt, 2,
311 samples);
312 }
313 clock_gettime(CLOCK_MONOTONIC, &end); /* mark the end time */
314 diff = (BILLION * (end.tv_sec - start.tv_sec) +
315 end.tv_nsec - start.tv_nsec) / 1000000;
316 printf("interleave SIMD size = %6d, elapsed time = %llu ms\n",
317 samples, (long long unsigned int) diff);
318
319 /* Test C and SIMD output match */
320 d = memcmp(out_shorts_c, out_shorts_opt,
321 MAXSAMPLES * 2 * 2 + PAD);
322 if (d) printf("interleave compare %d, %d %d, %d %d\n", d,
323 out_shorts_c[0], out_shorts_c[1],
324 out_shorts_opt[0], out_shorts_opt[1]);
325 }
326
327 /* Benchmark dsp_util_deinterleave */
328 for (samples = MAXSAMPLES; samples >= MINSAMPLES; samples /= 2) {
329
330 /* Measure original C deinterleave */
331 clock_gettime(CLOCK_MONOTONIC, &start); /* mark start time */
332 for (i = 0; i < ITERATIONS; ++i) {
333 dsp_util_deinterleave_reference(in_shorts,
334 out_floats_ptr_c,
335 2, samples);
336 }
337 clock_gettime(CLOCK_MONOTONIC, &end); /* mark the end time */
338 diff = (BILLION * (end.tv_sec - start.tv_sec) +
339 end.tv_nsec - start.tv_nsec) / 1000000;
340 printf("deinterleave ORIG size = %6d, "
341 "elapsed time = %llu ms\n",
342 samples, (long long unsigned int) diff);
343
344 /* Measure optimized deinterleave */
345 clock_gettime(CLOCK_MONOTONIC, &start); /* mark start time */
346 for (i = 0; i < ITERATIONS; ++i) {
347 dsp_util_deinterleave(in_shorts, out_floats_ptr_opt, 2,
348 samples);
349 }
350 clock_gettime(CLOCK_MONOTONIC, &end); /* mark the end time */
351 diff = (BILLION * (end.tv_sec - start.tv_sec) +
352 end.tv_nsec - start.tv_nsec) / 1000000;
353 printf("deinterleave SIMD size = %6d, elapsed time = %llu ms\n",
354 samples, (long long unsigned int) diff);
355
356 /* Test C and SIMD output match */
357 d = memcmp(out_floats_ptr_c[0], out_floats_ptr_opt[0],
358 samples * 4);
359 if (d) printf("left compare %d, %f %f\n", d,
360 out_floats_ptr_c[0][0], out_floats_ptr_opt[0][0]);
361 d = memcmp(out_floats_ptr_c[1], out_floats_ptr_opt[1],
362 samples * 4);
363 if (d) printf("right compare %d, %f %f\n", d,
364 out_floats_ptr_c[1][0], out_floats_ptr_opt[1][0]);
365 }
366
367 free(in_shorts);
368 free(out_floats_left_c);
369 free(out_floats_right_c);
370 free(out_floats_left_opt);
371 free(out_floats_right_opt);
372 free(out_shorts_c);
373 free(out_shorts_opt);
374
375 return 0;
376 }