• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2016 The Chromium OS Authors. All rights reserved.
2  * Use of this source code is governed by a BSD-style license that can be
3  * found in the LICENSE file.
4  */
5 
6 #include <math.h>  /* for abs() */
7 #include <stdio.h>  /* for printf() */
8 #include <string.h> /* for memset() */
9 #include <stdint.h> /* for uint64 definition */
10 #include <stdlib.h> /* for exit() definition */
11 #include <time.h> /* for clock_gettime */
12 
13 #include "../drc_math.h"
14 #include "../dsp_util.h"
15 
16 
17 /* Constant for converting time to milliseconds. */
18 #define BILLION 1000000000LL
19 /* Number of iterations for performance testing. */
20 #define ITERATIONS 400000
21 
22 #if defined(__aarch64__)
float_to_short(float a)23 int16_t float_to_short(float a) {
24 	int32_t ret;
25 	asm volatile ("fcvtas %s[ret], %s[a]\n"
26 		      "sqxtn %h[ret], %s[ret]\n"
27 		      : [ret] "=w" (ret)
28 		      : [a] "w" (a)
29 		      :);
30 	return (int16_t)(ret);
31 }
32 #else
float_to_short(float a)33 int16_t float_to_short(float a) {
34 	a += (a >= 0) ? 0.5f : -0.5f;
35 	return (int16_t)(max(-32768, min(32767, a)));
36 }
37 #endif
38 
dsp_util_deinterleave_reference(int16_t * input,float * const * output,int channels,int frames)39 void dsp_util_deinterleave_reference(int16_t *input, float *const *output,
40 				     int channels, int frames)
41 {
42 	float *output_ptr[channels];
43 	int i, j;
44 
45 	for (i = 0; i < channels; i++)
46 		output_ptr[i] = output[i];
47 
48 	for (i = 0; i < frames; i++)
49 		for (j = 0; j < channels; j++)
50 			*(output_ptr[j]++) = *input++ / 32768.0f;
51 }
52 
dsp_util_interleave_reference(float * const * input,int16_t * output,int channels,int frames)53 void dsp_util_interleave_reference(float *const *input, int16_t *output,
54 				   int channels, int frames)
55 {
56 	float *input_ptr[channels];
57 	int i, j;
58 
59 	for (i = 0; i < channels; i++)
60 		input_ptr[i] = input[i];
61 
62 	for (i = 0; i < frames; i++)
63 		for (j = 0; j < channels; j++) {
64 			float f = *(input_ptr[j]++) * 32768.0f;
65 			*output++ = float_to_short(f);
66 		}
67 }
68 
69 /* Use fixed size allocation to avoid performance fluctuation of allocation. */
70 #define MAXSAMPLES 4096
71 #define MINSAMPLES 256
72 /* PAD buffer to check for overflows. */
73 #define PAD 4096
74 
TestRounding(float in,int16_t expected,int samples)75 void TestRounding(float in, int16_t expected, int samples)
76 {
77 	int i;
78 	int max_diff;
79 	int d;
80 
81 	short* in_shorts = (short*) malloc(MAXSAMPLES * 2 * 2 + PAD);
82 	float* out_floats_left_c = (float*) malloc(MAXSAMPLES * 4 + PAD);
83 	float* out_floats_right_c = (float*) malloc(MAXSAMPLES * 4 + PAD);
84 	float* out_floats_left_opt = (float*) malloc(MAXSAMPLES * 4 + PAD);
85 	float* out_floats_right_opt = (float*) malloc(MAXSAMPLES * 4 + PAD);
86 	short* out_shorts_c = (short*) malloc(MAXSAMPLES * 2 * 2 + PAD);
87 	short* out_shorts_opt = (short*) malloc(MAXSAMPLES * 2 * 2 + PAD);
88 
89 	memset(in_shorts, 0xfb, MAXSAMPLES * 2 * 2 + PAD);
90 	memset(out_floats_left_c, 0xfb, MAXSAMPLES * 4 + PAD);
91 	memset(out_floats_right_c, 0xfb, MAXSAMPLES * 4 + PAD);
92 	memset(out_floats_left_opt, 0xfb, MAXSAMPLES * 4 + PAD);
93 	memset(out_floats_right_opt, 0xfb, MAXSAMPLES * 4 + PAD);
94 	memset(out_shorts_c, 0xfb, MAXSAMPLES * 2 * 2 + PAD);
95 	memset(out_shorts_opt, 0xfb, MAXSAMPLES * 2 * 2 + PAD);
96 
97 	float *out_floats_ptr_c[2];
98 	float *out_floats_ptr_opt[2];
99 
100 	out_floats_ptr_c[0] = out_floats_left_c;
101 	out_floats_ptr_c[1] = out_floats_right_c;
102 	out_floats_ptr_opt[0] = out_floats_left_opt;
103 	out_floats_ptr_opt[1] = out_floats_right_opt;
104 
105 	for (i = 0; i < MAXSAMPLES; ++i) {
106 		out_floats_left_c[i] = in;
107 		out_floats_right_c[i] = in;
108 	}
109 
110 	/*  reference C interleave */
111 	dsp_util_interleave_reference(out_floats_ptr_c, out_shorts_c, 2,
112 				      samples);
113 
114 	/* measure optimized interleave */
115 	for (i = 0; i < ITERATIONS; ++i) {
116 		dsp_util_interleave(out_floats_ptr_c, out_shorts_opt, 2,
117 				    samples);
118 	}
119 
120 	max_diff = 0;
121 	for (i = 0; i < (MAXSAMPLES * 2 + PAD / 2); ++i) {
122 		d = abs(out_shorts_c[i] - out_shorts_opt[i]);
123 		if (d > max_diff) {
124 			max_diff = d;
125 		}
126 	}
127 	printf("test interleave compare %6d, %10f %13f %6d %6d %6d %s\n",
128 		max_diff, in, in * 32768.0f, out_shorts_c[0], out_shorts_opt[0],
129 		expected,
130 		max_diff == 0 ? "PASS" : (out_shorts_opt[0] == expected ?
131 		"EXPECTED DIFFERENCE" : "UNEXPECTED DIFFERENCE"));
132 
133 	/* measure reference C deinterleave */
134 	dsp_util_deinterleave_reference(in_shorts, out_floats_ptr_c, 2,
135 					samples);
136 
137 	/* measure optimized deinterleave */
138 	dsp_util_deinterleave(in_shorts, out_floats_ptr_opt, 2, samples);
139 
140 	d = memcmp(out_floats_ptr_c[0], out_floats_ptr_opt[0], samples * 4);
141 	if (d) printf("left compare %d, %f %f\n", d, out_floats_ptr_c[0][0],
142 		      out_floats_ptr_opt[0][0]);
143 	d = memcmp(out_floats_ptr_c[1], out_floats_ptr_opt[1], samples * 4);
144 	if (d) printf("right compare %d, %f %f\n", d, out_floats_ptr_c[1][0],
145 		      out_floats_ptr_opt[1][0]);
146 
147 	free(in_shorts);
148 	free(out_floats_left_c);
149 	free(out_floats_right_c);
150 	free(out_floats_left_opt);
151 	free(out_floats_right_opt);
152 	free(out_shorts_c);
153 	free(out_shorts_opt);
154 }
155 
main(int argc,char ** argv)156 int main(int argc, char **argv)
157 {
158 	float e = 0.000000001f;
159 	int samples = 16;
160 
161 	dsp_enable_flush_denormal_to_zero();
162 
163 	// Print headings for TestRounding output.
164 	printf("test interleave compare maxdif,     float,   float * 32k      "
165 	       "C   SIMD expect pass\n");
166 
167 	// test clamping
168 	TestRounding(1.0f, 32767, samples);
169 	TestRounding(-1.0f, -32768, samples);
170 	TestRounding(1.1f, 32767, samples);
171 	TestRounding(-1.1f, -32768, samples);
172 	TestRounding(2000000000.f / 32768.f, 32767, samples);
173 	TestRounding(-2000000000.f / 32768.f, -32768, samples);
174 
175 	/* Infinity produces zero on arm64. */
176 #if defined(__aarch64__)
177 #define EXPECTED_INF_RESULT 0
178 #define EXPECTED_NEGINF_RESULT 0
179 #elif defined(__i386__) || defined(__x86_64__)
180 #define EXPECTED_INF_RESULT -32768
181 #define EXPECTED_NEGINF_RESULT 0
182 #else
183 #define EXPECTED_INF_RESULT 32767
184 #define EXPECTED_NEGINF_RESULT -32768
185 #endif
186 
187 	TestRounding(5000000000.f / 32768.f, EXPECTED_INF_RESULT, samples);
188 	TestRounding(-5000000000.f / 32768.f, EXPECTED_NEGINF_RESULT, samples);
189 
190 	// test infinity
191 	union ieee754_float inf;
192 	inf.ieee.negative = 0;
193 	inf.ieee.exponent = 0xfe;
194 	inf.ieee.mantissa = 0x7fffff;
195 	TestRounding(inf.f, EXPECTED_INF_RESULT, samples);  // expect fail
196 	inf.ieee.negative = 1;
197 	inf.ieee.exponent = 0xfe;
198 	inf.ieee.mantissa = 0x7fffff;
199 	TestRounding(inf.f, EXPECTED_NEGINF_RESULT, samples);  // expect fail
200 
201 	// test rounding
202 	TestRounding(0.25f, 8192, samples);
203 	TestRounding(-0.25f, -8192, samples);
204 	TestRounding(0.50f, 16384, samples);
205 	TestRounding(-0.50f, -16384, samples);
206 	TestRounding(1.0f / 32768.0f, 1, samples);
207 	TestRounding(-1.0f / 32768.0f, -1, samples);
208 	TestRounding(1.0f / 32768.0f + e, 1, samples);
209 	TestRounding(-1.0f / 32768.0f - e, -1, samples);
210 	TestRounding(1.0f / 32768.0f - e, 1, samples);
211 	TestRounding(-1.0f / 32768.0f + e, -1, samples);
212 
213 	/* Rounding on 'tie' is different for Intel. */
214 #if defined(__i386__) || defined(__x86_64__)
215 	TestRounding(0.5f / 32768.0f, 0, samples);  /* Expect round to even */
216 	TestRounding(-0.5f / 32768.0f, 0, samples);
217 #else
218 	TestRounding(0.5f / 32768.0f, 1, samples);  /* Expect round away */
219 	TestRounding(-0.5f / 32768.0f, -1, samples);
220 #endif
221 
222 	TestRounding(0.5f / 32768.0f + e, 1, samples);
223 	TestRounding(-0.5f / 32768.0f - e, 1, samples);
224 	TestRounding(0.5f / 32768.0f - e, 0, samples);
225 	TestRounding(-0.5f / 32768.0f + e, 0, samples);
226 
227 	TestRounding(1.5f / 32768.0f, 2, samples);
228 	TestRounding(-1.5f / 32768.0f, -2, samples);
229 	TestRounding(1.5f / 32768.0f + e, 2, samples);
230 	TestRounding(-1.5f / 32768.0f - e, -2, samples);
231 	TestRounding(1.5f / 32768.0f - e, 1, samples);
232 	TestRounding(-1.5f / 32768.0f + e, -1, samples);
233 
234 	/* Test denormals */
235 	union ieee754_float denorm;
236 	denorm.ieee.negative = 0;
237 	denorm.ieee.exponent = 0;
238 	denorm.ieee.mantissa = 1;
239 	TestRounding(denorm.f, 0, samples);
240 	denorm.ieee.negative = 1;
241 	denorm.ieee.exponent = 0;
242 	denorm.ieee.mantissa = 1;
243 	TestRounding(denorm.f, 0, samples);
244 
245 	/* Test NaNs. Caveat Results vary by implementation. */
246 #if defined(__i386__) || defined(__x86_64__)
247 #define EXPECTED_NAN_RESULT -32768
248 #else
249 #define EXPECTED_NAN_RESULT 0
250 #endif
251 	union ieee754_float nan;  /* Quiet NaN */
252 	nan.ieee.negative = 0;
253 	nan.ieee.exponent = 0xff;
254 	nan.ieee.mantissa = 0x400001;
255 	TestRounding(nan.f, EXPECTED_NAN_RESULT, samples);
256 	nan.ieee.negative = 0;
257 	nan.ieee.exponent = 0xff;
258 	nan.ieee.mantissa = 0x000001;  /* Signalling NaN */
259 	TestRounding(nan.f, EXPECTED_NAN_RESULT, samples);
260 
261 	/* Test Performance */
262 	uint64_t diff;
263 	struct timespec start, end;
264 	int i;
265 	int d;
266 
267 	short* in_shorts = (short*) malloc(MAXSAMPLES * 2 * 2 + PAD);
268 	float* out_floats_left_c = (float*) malloc(MAXSAMPLES * 4 + PAD);
269 	float* out_floats_right_c = (float*) malloc(MAXSAMPLES * 4 + PAD);
270 	float* out_floats_left_opt = (float*) malloc(MAXSAMPLES * 4 + PAD);
271 	float* out_floats_right_opt = (float*) malloc(MAXSAMPLES * 4 + PAD);
272 	short* out_shorts_c = (short*) malloc(MAXSAMPLES * 2 * 2 + PAD);
273 	short* out_shorts_opt = (short*) malloc(MAXSAMPLES * 2 * 2 + PAD);
274 
275 	memset(in_shorts, 0x11, MAXSAMPLES * 2 * 2 + PAD);
276 	memset(out_floats_left_c, 0x22, MAXSAMPLES * 4 + PAD);
277 	memset(out_floats_right_c, 0x33, MAXSAMPLES * 4 + PAD);
278 	memset(out_floats_left_opt, 0x44, MAXSAMPLES * 4 + PAD);
279 	memset(out_floats_right_opt, 0x55, MAXSAMPLES * 4 + PAD);
280 	memset(out_shorts_c, 0x66, MAXSAMPLES * 2 * 2 + PAD);
281 	memset(out_shorts_opt, 0x66, MAXSAMPLES * 2 * 2 + PAD);
282 
283 	float *out_floats_ptr_c[2];
284 	float *out_floats_ptr_opt[2];
285 
286 	out_floats_ptr_c[0] = out_floats_left_c;
287 	out_floats_ptr_c[1] = out_floats_right_c;
288 	out_floats_ptr_opt[0] = out_floats_left_opt;
289 	out_floats_ptr_opt[1] = out_floats_right_opt;
290 
291 	/* Benchmark dsp_util_interleave */
292 	for (samples = MAXSAMPLES; samples >= MINSAMPLES; samples /= 2) {
293 
294 		/* measure original C interleave */
295 		clock_gettime(CLOCK_MONOTONIC, &start); /* mark start time */
296 		for (i = 0; i < ITERATIONS; ++i) {
297 			dsp_util_interleave_reference(out_floats_ptr_c,
298 						      out_shorts_c,
299 						      2, samples);
300 		}
301 		clock_gettime(CLOCK_MONOTONIC, &end); /* mark the end time */
302 		diff = (BILLION * (end.tv_sec - start.tv_sec) +
303 			end.tv_nsec - start.tv_nsec) / 1000000;
304 		printf("interleave   ORIG size = %6d, elapsed time = %llu ms\n",
305 		       samples, (long long unsigned int) diff);
306 
307 		/* measure optimized interleave */
308 		clock_gettime(CLOCK_MONOTONIC, &start); /* mark start time */
309 		for (i = 0; i < ITERATIONS; ++i) {
310 			dsp_util_interleave(out_floats_ptr_c, out_shorts_opt, 2,
311 					    samples);
312 		}
313 		clock_gettime(CLOCK_MONOTONIC, &end); /* mark the end time */
314 		diff = (BILLION * (end.tv_sec - start.tv_sec) +
315 			end.tv_nsec - start.tv_nsec) / 1000000;
316 		printf("interleave   SIMD size = %6d, elapsed time = %llu ms\n",
317 		       samples, (long long unsigned int) diff);
318 
319 		/* Test C and SIMD output match */
320 		d = memcmp(out_shorts_c, out_shorts_opt,
321 			   MAXSAMPLES * 2 * 2 + PAD);
322 		if (d) printf("interleave compare %d, %d %d, %d %d\n", d,
323 			      out_shorts_c[0], out_shorts_c[1],
324 			      out_shorts_opt[0], out_shorts_opt[1]);
325 	}
326 
327 	/* Benchmark dsp_util_deinterleave */
328 	for (samples = MAXSAMPLES; samples >= MINSAMPLES; samples /= 2) {
329 
330 		/* Measure original C deinterleave */
331 		clock_gettime(CLOCK_MONOTONIC, &start); /* mark start time */
332 		for (i = 0; i < ITERATIONS; ++i) {
333 			dsp_util_deinterleave_reference(in_shorts,
334 							out_floats_ptr_c,
335 							2, samples);
336 		}
337 		clock_gettime(CLOCK_MONOTONIC, &end); /* mark the end time */
338 		diff = (BILLION * (end.tv_sec - start.tv_sec) +
339 			end.tv_nsec - start.tv_nsec) / 1000000;
340 			printf("deinterleave ORIG size = %6d, "
341 			       "elapsed time = %llu ms\n",
342 			       samples, (long long unsigned int) diff);
343 
344 		/* Measure optimized deinterleave */
345 		clock_gettime(CLOCK_MONOTONIC, &start); /* mark start time */
346 		for (i = 0; i < ITERATIONS; ++i) {
347 			dsp_util_deinterleave(in_shorts, out_floats_ptr_opt, 2,
348 					      samples);
349 		}
350 		clock_gettime(CLOCK_MONOTONIC, &end); /* mark the end time */
351 		diff = (BILLION * (end.tv_sec - start.tv_sec) +
352 			end.tv_nsec - start.tv_nsec) / 1000000;
353 		printf("deinterleave SIMD size = %6d, elapsed time = %llu ms\n",
354 			samples, (long long unsigned int) diff);
355 
356 		/* Test C and SIMD output match */
357 		d = memcmp(out_floats_ptr_c[0], out_floats_ptr_opt[0],
358 			   samples * 4);
359 		if (d) printf("left compare %d, %f %f\n", d,
360 			      out_floats_ptr_c[0][0], out_floats_ptr_opt[0][0]);
361 		d = memcmp(out_floats_ptr_c[1], out_floats_ptr_opt[1],
362 			   samples * 4);
363 		if (d) printf("right compare %d, %f %f\n", d,
364 			      out_floats_ptr_c[1][0], out_floats_ptr_opt[1][0]);
365 	}
366 
367 	free(in_shorts);
368 	free(out_floats_left_c);
369 	free(out_floats_right_c);
370 	free(out_floats_left_opt);
371 	free(out_floats_right_opt);
372 	free(out_shorts_c);
373 	free(out_shorts_opt);
374 
375 	return 0;
376 }