• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2016 The Chromium OS Authors. All rights reserved.
2  * Use of this source code is governed by a BSD-style license that can be
3  * found in the LICENSE file.
4  */
5 
6 #include <math.h> /* for abs() */
7 #include <stdio.h> /* for printf() */
8 #include <string.h> /* for memset() */
9 #include <stdint.h> /* for uint64 definition */
10 #include <stdlib.h> /* for exit() definition */
11 #include <time.h> /* for clock_gettime */
12 
13 #include "../drc_math.h"
14 #include "../dsp_util.h"
15 
16 /* Constant for converting time to milliseconds. */
17 #define BILLION 1000000000LL
18 /* Number of iterations for performance testing. */
19 #define ITERATIONS 400000
20 
21 #if defined(__aarch64__)
float_to_short(float a)22 int16_t float_to_short(float a)
23 {
24 	int32_t ret;
25 	asm volatile("fcvtas %s[ret], %s[a]\n"
26 		     "sqxtn %h[ret], %s[ret]\n"
27 		     : [ret] "=w"(ret)
28 		     : [a] "w"(a)
29 		     :);
30 	return (int16_t)(ret);
31 }
32 #else
float_to_short(float a)33 int16_t float_to_short(float a)
34 {
35 	a += (a >= 0) ? 0.5f : -0.5f;
36 	return (int16_t)(max(-32768, min(32767, a)));
37 }
38 #endif
39 
dsp_util_deinterleave_reference(int16_t * input,float * const * output,int channels,int frames)40 void dsp_util_deinterleave_reference(int16_t *input, float *const *output,
41 				     int channels, int frames)
42 {
43 	float *output_ptr[channels];
44 	int i, j;
45 
46 	for (i = 0; i < channels; i++)
47 		output_ptr[i] = output[i];
48 
49 	for (i = 0; i < frames; i++)
50 		for (j = 0; j < channels; j++)
51 			*(output_ptr[j]++) = *input++ / 32768.0f;
52 }
53 
dsp_util_interleave_reference(float * const * input,int16_t * output,int channels,int frames)54 void dsp_util_interleave_reference(float *const *input, int16_t *output,
55 				   int channels, int frames)
56 {
57 	float *input_ptr[channels];
58 	int i, j;
59 
60 	for (i = 0; i < channels; i++)
61 		input_ptr[i] = input[i];
62 
63 	for (i = 0; i < frames; i++)
64 		for (j = 0; j < channels; j++) {
65 			float f = *(input_ptr[j]++) * 32768.0f;
66 			*output++ = float_to_short(f);
67 		}
68 }
69 
70 /* Use fixed size allocation to avoid performance fluctuation of allocation. */
71 #define MAXSAMPLES 4096
72 #define MINSAMPLES 256
73 /* PAD buffer to check for overflows. */
74 #define PAD 4096
75 
TestRounding(float in,int16_t expected,int samples)76 void TestRounding(float in, int16_t expected, int samples)
77 {
78 	int i;
79 	int max_diff;
80 	int d;
81 
82 	short *in_shorts = (short *)malloc(MAXSAMPLES * 2 * 2 + PAD);
83 	float *out_floats_left_c = (float *)malloc(MAXSAMPLES * 4 + PAD);
84 	float *out_floats_right_c = (float *)malloc(MAXSAMPLES * 4 + PAD);
85 	float *out_floats_left_opt = (float *)malloc(MAXSAMPLES * 4 + PAD);
86 	float *out_floats_right_opt = (float *)malloc(MAXSAMPLES * 4 + PAD);
87 	short *out_shorts_c = (short *)malloc(MAXSAMPLES * 2 * 2 + PAD);
88 	short *out_shorts_opt = (short *)malloc(MAXSAMPLES * 2 * 2 + PAD);
89 
90 	memset(in_shorts, 0xfb, MAXSAMPLES * 2 * 2 + PAD);
91 	memset(out_floats_left_c, 0xfb, MAXSAMPLES * 4 + PAD);
92 	memset(out_floats_right_c, 0xfb, MAXSAMPLES * 4 + PAD);
93 	memset(out_floats_left_opt, 0xfb, MAXSAMPLES * 4 + PAD);
94 	memset(out_floats_right_opt, 0xfb, MAXSAMPLES * 4 + PAD);
95 	memset(out_shorts_c, 0xfb, MAXSAMPLES * 2 * 2 + PAD);
96 	memset(out_shorts_opt, 0xfb, MAXSAMPLES * 2 * 2 + PAD);
97 
98 	float *out_floats_ptr_c[2];
99 	float *out_floats_ptr_opt[2];
100 
101 	out_floats_ptr_c[0] = out_floats_left_c;
102 	out_floats_ptr_c[1] = out_floats_right_c;
103 	out_floats_ptr_opt[0] = out_floats_left_opt;
104 	out_floats_ptr_opt[1] = out_floats_right_opt;
105 
106 	for (i = 0; i < MAXSAMPLES; ++i) {
107 		out_floats_left_c[i] = in;
108 		out_floats_right_c[i] = in;
109 	}
110 
111 	/*  reference C interleave */
112 	dsp_util_interleave_reference(out_floats_ptr_c, out_shorts_c, 2,
113 				      samples);
114 
115 	/* measure optimized interleave */
116 	for (i = 0; i < ITERATIONS; ++i) {
117 		dsp_util_interleave(out_floats_ptr_c, (uint8_t *)out_shorts_opt,
118 				    2, SND_PCM_FORMAT_S16_LE, samples);
119 	}
120 
121 	max_diff = 0;
122 	for (i = 0; i < (MAXSAMPLES * 2 + PAD / 2); ++i) {
123 		d = abs(out_shorts_c[i] - out_shorts_opt[i]);
124 		if (d > max_diff) {
125 			max_diff = d;
126 		}
127 	}
128 	printf("test interleave compare %6d, %10f %13f %6d %6d %6d %s\n",
129 	       max_diff, in, in * 32768.0f, out_shorts_c[0], out_shorts_opt[0],
130 	       expected,
131 	       max_diff == 0 ? "PASS" :
132 			       (out_shorts_opt[0] == expected ?
133 					"EXPECTED DIFFERENCE" :
134 					"UNEXPECTED DIFFERENCE"));
135 
136 	/* measure reference C deinterleave */
137 	dsp_util_deinterleave_reference(in_shorts, out_floats_ptr_c, 2,
138 					samples);
139 
140 	/* measure optimized deinterleave */
141 	dsp_util_deinterleave((uint8_t *)in_shorts, out_floats_ptr_opt, 2,
142 			      SND_PCM_FORMAT_S16_LE, samples);
143 
144 	d = memcmp(out_floats_ptr_c[0], out_floats_ptr_opt[0], samples * 4);
145 	if (d)
146 		printf("left compare %d, %f %f\n", d, out_floats_ptr_c[0][0],
147 		       out_floats_ptr_opt[0][0]);
148 	d = memcmp(out_floats_ptr_c[1], out_floats_ptr_opt[1], samples * 4);
149 	if (d)
150 		printf("right compare %d, %f %f\n", d, out_floats_ptr_c[1][0],
151 		       out_floats_ptr_opt[1][0]);
152 
153 	free(in_shorts);
154 	free(out_floats_left_c);
155 	free(out_floats_right_c);
156 	free(out_floats_left_opt);
157 	free(out_floats_right_opt);
158 	free(out_shorts_c);
159 	free(out_shorts_opt);
160 }
161 
main(int argc,char ** argv)162 int main(int argc, char **argv)
163 {
164 	float e = 0.000000001f;
165 	int samples = 16;
166 
167 	dsp_enable_flush_denormal_to_zero();
168 
169 	// Print headings for TestRounding output.
170 	printf("test interleave compare maxdif,     float,   float * 32k      "
171 	       "C   SIMD expect pass\n");
172 
173 	// test clamping
174 	TestRounding(1.0f, 32767, samples);
175 	TestRounding(-1.0f, -32768, samples);
176 	TestRounding(1.1f, 32767, samples);
177 	TestRounding(-1.1f, -32768, samples);
178 	TestRounding(2000000000.f / 32768.f, 32767, samples);
179 	TestRounding(-2000000000.f / 32768.f, -32768, samples);
180 
181 	/* Infinity produces zero on arm64. */
182 #if defined(__aarch64__)
183 #define EXPECTED_INF_RESULT 0
184 #define EXPECTED_NEGINF_RESULT 0
185 #elif defined(__i386__) || defined(__x86_64__)
186 #define EXPECTED_INF_RESULT -32768
187 #define EXPECTED_NEGINF_RESULT 0
188 #else
189 #define EXPECTED_INF_RESULT 32767
190 #define EXPECTED_NEGINF_RESULT -32768
191 #endif
192 
193 	TestRounding(5000000000.f / 32768.f, EXPECTED_INF_RESULT, samples);
194 	TestRounding(-5000000000.f / 32768.f, EXPECTED_NEGINF_RESULT, samples);
195 
196 	// test infinity
197 	union ieee754_float inf;
198 	inf.ieee.negative = 0;
199 	inf.ieee.exponent = 0xfe;
200 	inf.ieee.mantissa = 0x7fffff;
201 	TestRounding(inf.f, EXPECTED_INF_RESULT, samples); // expect fail
202 	inf.ieee.negative = 1;
203 	inf.ieee.exponent = 0xfe;
204 	inf.ieee.mantissa = 0x7fffff;
205 	TestRounding(inf.f, EXPECTED_NEGINF_RESULT, samples); // expect fail
206 
207 	// test rounding
208 	TestRounding(0.25f, 8192, samples);
209 	TestRounding(-0.25f, -8192, samples);
210 	TestRounding(0.50f, 16384, samples);
211 	TestRounding(-0.50f, -16384, samples);
212 	TestRounding(1.0f / 32768.0f, 1, samples);
213 	TestRounding(-1.0f / 32768.0f, -1, samples);
214 	TestRounding(1.0f / 32768.0f + e, 1, samples);
215 	TestRounding(-1.0f / 32768.0f - e, -1, samples);
216 	TestRounding(1.0f / 32768.0f - e, 1, samples);
217 	TestRounding(-1.0f / 32768.0f + e, -1, samples);
218 
219 	/* Rounding on 'tie' is different for Intel. */
220 #if defined(__i386__) || defined(__x86_64__)
221 	TestRounding(0.5f / 32768.0f, 0, samples); /* Expect round to even */
222 	TestRounding(-0.5f / 32768.0f, 0, samples);
223 #else
224 	TestRounding(0.5f / 32768.0f, 1, samples); /* Expect round away */
225 	TestRounding(-0.5f / 32768.0f, -1, samples);
226 #endif
227 
228 	TestRounding(0.5f / 32768.0f + e, 1, samples);
229 	TestRounding(-0.5f / 32768.0f - e, 1, samples);
230 	TestRounding(0.5f / 32768.0f - e, 0, samples);
231 	TestRounding(-0.5f / 32768.0f + e, 0, samples);
232 
233 	TestRounding(1.5f / 32768.0f, 2, samples);
234 	TestRounding(-1.5f / 32768.0f, -2, samples);
235 	TestRounding(1.5f / 32768.0f + e, 2, samples);
236 	TestRounding(-1.5f / 32768.0f - e, -2, samples);
237 	TestRounding(1.5f / 32768.0f - e, 1, samples);
238 	TestRounding(-1.5f / 32768.0f + e, -1, samples);
239 
240 	/* Test denormals */
241 	union ieee754_float denorm;
242 	denorm.ieee.negative = 0;
243 	denorm.ieee.exponent = 0;
244 	denorm.ieee.mantissa = 1;
245 	TestRounding(denorm.f, 0, samples);
246 	denorm.ieee.negative = 1;
247 	denorm.ieee.exponent = 0;
248 	denorm.ieee.mantissa = 1;
249 	TestRounding(denorm.f, 0, samples);
250 
251 	/* Test NaNs. Caveat Results vary by implementation. */
252 #if defined(__i386__) || defined(__x86_64__)
253 #define EXPECTED_NAN_RESULT -32768
254 #else
255 #define EXPECTED_NAN_RESULT 0
256 #endif
257 	union ieee754_float nan; /* Quiet NaN */
258 	nan.ieee.negative = 0;
259 	nan.ieee.exponent = 0xff;
260 	nan.ieee.mantissa = 0x400001;
261 	TestRounding(nan.f, EXPECTED_NAN_RESULT, samples);
262 	nan.ieee.negative = 0;
263 	nan.ieee.exponent = 0xff;
264 	nan.ieee.mantissa = 0x000001; /* Signalling NaN */
265 	TestRounding(nan.f, EXPECTED_NAN_RESULT, samples);
266 
267 	/* Test Performance */
268 	uint64_t diff;
269 	struct timespec start, end;
270 	int i;
271 	int d;
272 
273 	short *in_shorts = (short *)malloc(MAXSAMPLES * 2 * 2 + PAD);
274 	float *out_floats_left_c = (float *)malloc(MAXSAMPLES * 4 + PAD);
275 	float *out_floats_right_c = (float *)malloc(MAXSAMPLES * 4 + PAD);
276 	float *out_floats_left_opt = (float *)malloc(MAXSAMPLES * 4 + PAD);
277 	float *out_floats_right_opt = (float *)malloc(MAXSAMPLES * 4 + PAD);
278 	short *out_shorts_c = (short *)malloc(MAXSAMPLES * 2 * 2 + PAD);
279 	short *out_shorts_opt = (short *)malloc(MAXSAMPLES * 2 * 2 + PAD);
280 
281 	memset(in_shorts, 0x11, MAXSAMPLES * 2 * 2 + PAD);
282 	memset(out_floats_left_c, 0x22, MAXSAMPLES * 4 + PAD);
283 	memset(out_floats_right_c, 0x33, MAXSAMPLES * 4 + PAD);
284 	memset(out_floats_left_opt, 0x44, MAXSAMPLES * 4 + PAD);
285 	memset(out_floats_right_opt, 0x55, MAXSAMPLES * 4 + PAD);
286 	memset(out_shorts_c, 0x66, MAXSAMPLES * 2 * 2 + PAD);
287 	memset(out_shorts_opt, 0x66, MAXSAMPLES * 2 * 2 + PAD);
288 
289 	float *out_floats_ptr_c[2];
290 	float *out_floats_ptr_opt[2];
291 
292 	out_floats_ptr_c[0] = out_floats_left_c;
293 	out_floats_ptr_c[1] = out_floats_right_c;
294 	out_floats_ptr_opt[0] = out_floats_left_opt;
295 	out_floats_ptr_opt[1] = out_floats_right_opt;
296 
297 	/* Benchmark dsp_util_interleave */
298 	for (samples = MAXSAMPLES; samples >= MINSAMPLES; samples /= 2) {
299 		/* measure original C interleave */
300 		clock_gettime(CLOCK_MONOTONIC, &start); /* mark start time */
301 		for (i = 0; i < ITERATIONS; ++i) {
302 			dsp_util_interleave_reference(out_floats_ptr_c,
303 						      out_shorts_c, 2, samples);
304 		}
305 		clock_gettime(CLOCK_MONOTONIC, &end); /* mark the end time */
306 		diff = (BILLION * (end.tv_sec - start.tv_sec) + end.tv_nsec -
307 			start.tv_nsec) /
308 		       1000000;
309 		printf("interleave   ORIG size = %6d, elapsed time = %llu ms\n",
310 		       samples, (long long unsigned int)diff);
311 
312 		/* measure optimized interleave */
313 		clock_gettime(CLOCK_MONOTONIC, &start); /* mark start time */
314 		for (i = 0; i < ITERATIONS; ++i) {
315 			dsp_util_interleave(out_floats_ptr_c,
316 					    (uint8_t *)out_shorts_opt, 2,
317 					    SND_PCM_FORMAT_S16_LE, samples);
318 		}
319 		clock_gettime(CLOCK_MONOTONIC, &end); /* mark the end time */
320 		diff = (BILLION * (end.tv_sec - start.tv_sec) + end.tv_nsec -
321 			start.tv_nsec) /
322 		       1000000;
323 		printf("interleave   SIMD size = %6d, elapsed time = %llu ms\n",
324 		       samples, (long long unsigned int)diff);
325 
326 		/* Test C and SIMD output match */
327 		d = memcmp(out_shorts_c, out_shorts_opt,
328 			   MAXSAMPLES * 2 * 2 + PAD);
329 		if (d)
330 			printf("interleave compare %d, %d %d, %d %d\n", d,
331 			       out_shorts_c[0], out_shorts_c[1],
332 			       out_shorts_opt[0], out_shorts_opt[1]);
333 	}
334 
335 	/* Benchmark dsp_util_deinterleave */
336 	for (samples = MAXSAMPLES; samples >= MINSAMPLES; samples /= 2) {
337 		/* Measure original C deinterleave */
338 		clock_gettime(CLOCK_MONOTONIC, &start); /* mark start time */
339 		for (i = 0; i < ITERATIONS; ++i) {
340 			dsp_util_deinterleave_reference(
341 				in_shorts, out_floats_ptr_c, 2, samples);
342 		}
343 		clock_gettime(CLOCK_MONOTONIC, &end); /* mark the end time */
344 		diff = (BILLION * (end.tv_sec - start.tv_sec) + end.tv_nsec -
345 			start.tv_nsec) /
346 		       1000000;
347 		printf("deinterleave ORIG size = %6d, "
348 		       "elapsed time = %llu ms\n",
349 		       samples, (long long unsigned int)diff);
350 
351 		/* Measure optimized deinterleave */
352 		clock_gettime(CLOCK_MONOTONIC, &start); /* mark start time */
353 		for (i = 0; i < ITERATIONS; ++i) {
354 			dsp_util_deinterleave((uint8_t *)in_shorts,
355 					      out_floats_ptr_opt, 2,
356 					      SND_PCM_FORMAT_S16_LE, samples);
357 		}
358 		clock_gettime(CLOCK_MONOTONIC, &end); /* mark the end time */
359 		diff = (BILLION * (end.tv_sec - start.tv_sec) + end.tv_nsec -
360 			start.tv_nsec) /
361 		       1000000;
362 		printf("deinterleave SIMD size = %6d, elapsed time = %llu ms\n",
363 		       samples, (long long unsigned int)diff);
364 
365 		/* Test C and SIMD output match */
366 		d = memcmp(out_floats_ptr_c[0], out_floats_ptr_opt[0],
367 			   samples * 4);
368 		if (d)
369 			printf("left compare %d, %f %f\n", d,
370 			       out_floats_ptr_c[0][0],
371 			       out_floats_ptr_opt[0][0]);
372 		d = memcmp(out_floats_ptr_c[1], out_floats_ptr_opt[1],
373 			   samples * 4);
374 		if (d)
375 			printf("right compare %d, %f %f\n", d,
376 			       out_floats_ptr_c[1][0],
377 			       out_floats_ptr_opt[1][0]);
378 	}
379 
380 	free(in_shorts);
381 	free(out_floats_left_c);
382 	free(out_floats_right_c);
383 	free(out_floats_left_opt);
384 	free(out_floats_right_opt);
385 	free(out_shorts_c);
386 	free(out_shorts_opt);
387 
388 	return 0;
389 }
390