• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 
2 /*  Copyright (C) 2006 Dave Nomura
3        dcnltc@us.ibm.com
4 
5     This program is free software; you can redistribute it and/or
6     modify it under the terms of the GNU General Public License as
7     published by the Free Software Foundation; either version 2 of the
8     License, or (at your option) any later version.
9 
10     This program is distributed in the hope that it will be useful, but
11     WITHOUT ANY WARRANTY; without even the implied warranty of
12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13     General Public License for more details.
14 
15     You should have received a copy of the GNU General Public License
16     along with this program; if not, write to the Free Software
17     Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
18     02111-1307, USA.
19 
20     The GNU General Public License is contained in the file COPYING.
21 */
22 
23 #include <stdio.h>
24 #include <stdlib.h>
25 #include <limits.h>
26 
27 typedef enum { FALSE=0, TRUE } bool_t;
28 
29 typedef enum {
30 	FADDS, FSUBS, FMULS, FDIVS,
31 	FMADDS, FMSUBS, FNMADDS, FNMSUBS,
32 	FADD, FSUB, FMUL, FDIV, FMADD,
33 	FMSUB, FNMADD, FNMSUB, FSQRT
34 } flt_op_t;
35 
36 typedef enum {
37 	TO_NEAREST=0, TO_ZERO, TO_PLUS_INFINITY, TO_MINUS_INFINITY } round_mode_t;
38 char *round_mode_name[] = { "near", "zero", "+inf", "-inf" };
39 
40 const char *flt_op_names[] = {
41 	"fadds", "fsubs", "fmuls", "fdivs",
42 	"fmadds", "fmsubs", "fnmadds", "fnmsubs",
43 	"fadd", "fsub", "fmul", "fdiv", "fmadd", "fmsub", "fnmadd",
44 	"fnmsub", "fsqrt"
45 };
46 
47 typedef unsigned int fpscr_t;
48 
49 typedef union {
50 	float flt;
51 	struct {
52 		unsigned int sign:1;
53 		unsigned int exp:8;
54 		unsigned int frac:23;
55 	} layout;
56 } flt_overlay;
57 
58 typedef union {
59 	double dbl;
60 	struct {
61 		unsigned int sign:1;
62 		unsigned int exp:11;
63 		unsigned int frac_hi:20;
64 		unsigned int frac_lo:32;
65 	} layout;
66 	struct {
67 		unsigned int hi;
68 		unsigned int lo;
69 	} dbl_pair;
70 } dbl_overlay;
71 
72 void assert_fail(const char *msg,
73 	const char* expr, const char* file, int line, const char*fn);
74 
75 #define STRING(__str)  #__str
76 #define assert(msg, expr)                                           \
77   ((void) ((expr) ? 0 :                                         \
78            (assert_fail (msg, STRING(expr),                  \
79                              __FILE__, __LINE__,                \
80                              __PRETTY_FUNCTION__), 0)))
81 float denorm_small;
82 double dbl_denorm_small;
83 float norm_small;
84 bool_t debug = FALSE;
85 bool_t long_is_64_bits = sizeof(long) == 8;
86 
assert_fail(msg,expr,file,line,fn)87 void assert_fail (msg, expr, file, line, fn)
88 const char* msg;
89 const char* expr;
90 const char* file;
91 int line;
92 const char*fn;
93 {
94    printf( "\n%s: %s:%d (%s): Assertion `%s' failed.\n",
95                msg, file, line, fn, expr );
96    exit( 1 );
97 }
set_rounding_mode(round_mode_t mode)98 void set_rounding_mode(round_mode_t mode)
99 {
100 	switch(mode) {
101 	case TO_NEAREST:
102 		asm volatile("mtfsfi 7, 0");
103 		break;
104 	case TO_ZERO:
105 		asm volatile("mtfsfi 7, 1");
106 		break;
107 	case TO_PLUS_INFINITY:
108 		asm volatile("mtfsfi 7, 2");
109 		break;
110 	case TO_MINUS_INFINITY:
111 		asm volatile("mtfsfi 7, 3");
112 		break;
113 	}
114 }
115 
print_double(char * msg,double dbl)116 void print_double(char *msg, double dbl)
117 {
118 	dbl_overlay D;
119 	D.dbl = dbl;
120 
121 	printf("%15s : dbl %-20a = %c(%4d, %05x%08x)\n",
122 			msg, D.dbl, (D.layout.sign == 0 ? '+' : '-'),
123 			D.layout.exp, D.layout.frac_hi, D.layout.frac_lo);
124 }
125 
print_single(char * msg,float * flt)126 void print_single(char *msg, float *flt)
127 {
128 	flt_overlay F;
129 	F.flt = *flt;
130 
131 	/* NOTE: for the purposes of comparing the fraction of a single with
132 	**       a double left shift the .frac so that hex digits are grouped
133 	**	     from left to right.  this is necessary because the size of a
134 	**		 single mantissa (23) bits is not a multiple of 4
135 	*/
136 	printf("%15s : flt %-20a = %c(%4d, %06x)\n",
137 		msg, F.flt, (F.layout.sign == 0 ? '+' : '-'), F.layout.exp, F.layout.frac << 1);
138 }
139 
check_dbl_to_flt_round(round_mode_t mode,double dbl,float * expected)140 int check_dbl_to_flt_round(round_mode_t mode, double dbl, float *expected)
141 {
142 	int status = 0;
143 	flt_overlay R, E;
144 	char *result;
145 
146 	set_rounding_mode(mode);
147 
148 	E.flt = *expected;
149 	R.flt = (float)dbl;
150 
151 	if ((R.layout.sign != E.layout.sign) ||
152 		(R.layout.exp != E.layout.exp) ||
153 		(R.layout.frac != E.layout.frac)) {
154 		result = "FAILED";
155 		status = 1;
156 	} else {
157 		result = "PASSED";
158 		status = 0;
159 	}
160 	printf("%s:%s:(double)(%-20a) = %20a",
161 		round_mode_name[mode], result, R.flt, dbl);
162 	if (status) {
163 		print_single("\n\texpected", &E.flt);
164 		print_single("\n\trounded ", &R.flt);
165 	}
166 	putchar('\n');
167 	return status;
168 }
169 
test_dbl_to_float_convert(char * msg,float * base)170 int test_dbl_to_float_convert(char *msg, float *base)
171 {
172 	int status = 0;
173 	double half = (double)denorm_small/2;
174 	double qtr = half/2;
175 	double D_hi = (double)*base + half + qtr;
176 	double D_lo = (double)*base + half - qtr;
177 	float F_lo = *base;
178 	float F_hi = F_lo + denorm_small;
179 
180 
181 	/*
182 	** .....+-----+-----+-----+-----+---....
183 	**      ^F_lo ^           ^     ^
184 	**            D_lo
185 	**                        D_hi
186 	**                              F_hi
187 	** F_lo and F_hi are two consecutive single float model numbers
188 	** denorm_small distance apart. D_lo and D_hi are two numbers
189 	** within that range that are not representable as single floats
190 	** and will be rounded to either F_lo or F_hi.
191 	*/
192 	printf("-------------------------- %s --------------------------\n", msg);
193 	if (debug) {
194 		print_double("D_lo", D_lo);
195 		print_double("D_hi", D_hi);
196 		print_single("F_lo", &F_lo);
197 		print_single("F_hi", &F_hi);
198 	}
199 
200 	/* round to nearest */
201 	status |= check_dbl_to_flt_round(TO_NEAREST, D_hi, &F_hi);
202 	status |= check_dbl_to_flt_round(TO_NEAREST, D_lo, &F_lo);
203 
204 	/* round to zero */
205 	status |= check_dbl_to_flt_round(TO_ZERO, D_hi, (D_hi > 0 ? &F_lo : &F_hi));
206 	status |= check_dbl_to_flt_round(TO_ZERO, D_lo, (D_hi > 0 ? &F_lo : &F_hi));
207 
208 	/* round to +inf */
209 	status |= check_dbl_to_flt_round(TO_PLUS_INFINITY, D_hi, &F_hi);
210 	status |= check_dbl_to_flt_round(TO_PLUS_INFINITY, D_lo, &F_hi);
211 
212 	/* round to -inf */
213 	status |= check_dbl_to_flt_round(TO_MINUS_INFINITY, D_hi, &F_lo);
214 	status |= check_dbl_to_flt_round(TO_MINUS_INFINITY, D_lo, &F_lo);
215 	return status;
216 }
217 
218 void
init()219 init()
220 {
221 	flt_overlay F;
222 	dbl_overlay D;
223 
224 	/* small is the smallest denormalized single float number */
225 	F.layout.sign = 0;
226 	F.layout.exp = 0;
227 	F.layout.frac = 1;
228 	denorm_small = F.flt;	/* == 2^(-149) */
229 	if (debug) {
230 		print_double("float small", F.flt);
231 	}
232 
233 	D.layout.sign = 0;
234 	D.layout.exp = 0;
235 	D.layout.frac_hi = 0;
236 	D.layout.frac_lo = 1;
237 	dbl_denorm_small = D.dbl;	/* == 2^(-1022) */
238 	if (debug) {
239 		print_double("double small", D.dbl);
240 	}
241 
242 	/* n_small is the smallest normalized single precision float */
243 	F.layout.exp = 1;
244 	norm_small = F.flt;
245 }
246 
check_int_to_flt_round(round_mode_t mode,long L,float * expected)247 int check_int_to_flt_round(round_mode_t mode, long L, float *expected)
248 {
249 	int status = 0;
250 	int I = L;
251 	char *int_name = "int";
252 	flt_overlay R, E;
253 	char *result;
254 	int iter;
255 
256 	set_rounding_mode(mode);
257 	E.flt = *expected;
258 
259 	for (iter = 0; iter < 2; iter++) {
260 		int stat = 0;
261 		R.flt = (iter == 0 ? (float)I : (float)L);
262 
263 		if ((R.layout.sign != E.layout.sign) ||
264 			(R.layout.exp != E.layout.exp) ||
265 			(R.layout.frac != E.layout.frac)) {
266 			result = "FAILED";
267 			stat = 1;
268 		} else {
269 			result = "PASSED";
270 			stat = 0;
271 		}
272 		printf("%s:%s:(float)(%4s)%9d = %11.1f",
273 			round_mode_name[mode], result, int_name, I, R.flt);
274 		if (stat) {
275 			print_single("\n\texpected: %.1f ", &E.flt);
276 			print_single("\n\trounded ", &R.flt);
277 		}
278 		putchar('\n');
279 		status |= stat;
280 
281 		if (!long_is_64_bits) break;
282 		int_name = "long";
283 	}
284 	return status;
285 }
286 
check_long_to_dbl_round(round_mode_t mode,long L,double * expected)287 int check_long_to_dbl_round(round_mode_t mode, long L, double *expected)
288 {
289 	int status = 0;
290 	dbl_overlay R, E;
291 	char *result;
292 
293 	set_rounding_mode(mode);
294 	E.dbl = *expected;
295 
296 	R.dbl = (double)L;
297 
298 	if ((R.layout.sign != E.layout.sign) ||
299 		(R.layout.exp != E.layout.exp) ||
300 		(R.layout.frac_lo != E.layout.frac_lo) ||
301 		(R.layout.frac_hi != E.layout.frac_hi)) {
302 		result = "FAILED";
303 		status = 1;
304 	} else {
305 		result = "PASSED";
306 		status = 0;
307 	}
308 	printf("%s:%s:(double)(%18ld) = %20.1f",
309 		round_mode_name[mode], result, L, R.dbl);
310 	if (status) {
311 		printf("\n\texpected %.1f : ", E.dbl);
312 	}
313 	putchar('\n');
314 	return status;
315 }
316 
test_int_to_float_convert(char * msg)317 int test_int_to_float_convert(char *msg)
318 {
319 	int status = 0;
320 	int int24_hi = 0x03ff0fff;
321 	int int24_lo = 0x03ff0ffd;
322 	float pos_flt_lo = 67047420.0;
323 	float pos_flt_hi = 67047424.0;
324 	float neg_flt_lo = -67047420.0;
325 	float neg_flt_hi = -67047424.0;
326 
327 	printf("-------------------------- %s --------------------------\n", msg);
328 	status |= check_int_to_flt_round(TO_NEAREST, int24_lo, &pos_flt_lo);
329 	status |= check_int_to_flt_round(TO_NEAREST, int24_hi, &pos_flt_hi);
330 	status |= check_int_to_flt_round(TO_ZERO, int24_lo, &pos_flt_lo);
331 	status |= check_int_to_flt_round(TO_ZERO, int24_hi, &pos_flt_lo);
332 	status |= check_int_to_flt_round(TO_PLUS_INFINITY, int24_lo, &pos_flt_hi);
333 	status |= check_int_to_flt_round(TO_PLUS_INFINITY, int24_hi, &pos_flt_hi);
334 	status |= check_int_to_flt_round(TO_MINUS_INFINITY, int24_lo, &pos_flt_lo);
335 	status |= check_int_to_flt_round(TO_MINUS_INFINITY, int24_hi, &pos_flt_lo);
336 
337 	status |= check_int_to_flt_round(TO_NEAREST, -int24_lo, &neg_flt_lo);
338 	status |= check_int_to_flt_round(TO_NEAREST, -int24_hi, &neg_flt_hi);
339 	status |= check_int_to_flt_round(TO_ZERO, -int24_lo, &neg_flt_lo);
340 	status |= check_int_to_flt_round(TO_ZERO, -int24_hi, &neg_flt_lo);
341 	status |= check_int_to_flt_round(TO_PLUS_INFINITY, -int24_lo, &neg_flt_lo);
342 	status |= check_int_to_flt_round(TO_PLUS_INFINITY, -int24_hi, &neg_flt_lo);
343 	status |= check_int_to_flt_round(TO_MINUS_INFINITY, -int24_lo, &neg_flt_hi);
344 	status |= check_int_to_flt_round(TO_MINUS_INFINITY, -int24_hi, &neg_flt_hi);
345 	return status;
346 }
347 
348 #ifdef __powerpc64__
test_long_to_double_convert(char * msg)349 int test_long_to_double_convert(char *msg)
350 {
351 	int status = 0;
352 	long long55_hi = 0x07ff0ffffffffff;
353 	long long55_lo = 0x07ff0fffffffffd;
354 	double pos_dbl_lo = 36012304344547324.0;
355 	double pos_dbl_hi = 36012304344547328.0;
356 	double neg_dbl_lo = -36012304344547324.0;
357 	double neg_dbl_hi = -36012304344547328.0;
358 
359 	printf("-------------------------- %s --------------------------\n", msg);
360 	status |= check_long_to_dbl_round(TO_NEAREST, long55_lo, &pos_dbl_lo);
361 	status |= check_long_to_dbl_round(TO_NEAREST, long55_hi, &pos_dbl_hi);
362 	status |= check_long_to_dbl_round(TO_ZERO, long55_lo, &pos_dbl_lo);
363 	status |= check_long_to_dbl_round(TO_ZERO, long55_hi, &pos_dbl_lo);
364 	status |= check_long_to_dbl_round(TO_PLUS_INFINITY, long55_lo, &pos_dbl_hi);
365 	status |= check_long_to_dbl_round(TO_PLUS_INFINITY, long55_hi, &pos_dbl_hi);
366 	status |= check_long_to_dbl_round(TO_MINUS_INFINITY, long55_lo, &pos_dbl_lo);
367 	status |= check_long_to_dbl_round(TO_MINUS_INFINITY, long55_hi, &pos_dbl_lo);
368 
369 	status |= check_long_to_dbl_round(TO_NEAREST, -long55_lo, &neg_dbl_lo);
370 	status |= check_long_to_dbl_round(TO_NEAREST, -long55_hi, &neg_dbl_hi);
371 	status |= check_long_to_dbl_round(TO_ZERO, -long55_lo, &neg_dbl_lo);
372 	status |= check_long_to_dbl_round(TO_ZERO, -long55_hi, &neg_dbl_lo);
373 	status |= check_long_to_dbl_round(TO_PLUS_INFINITY, -long55_lo, &neg_dbl_lo);
374 	status |= check_long_to_dbl_round(TO_PLUS_INFINITY, -long55_hi, &neg_dbl_lo);
375 	status |= check_long_to_dbl_round(TO_MINUS_INFINITY, -long55_lo, &neg_dbl_hi);
376 	status |= check_long_to_dbl_round(TO_MINUS_INFINITY, -long55_hi, &neg_dbl_hi);
377 	return status;
378 }
379 #endif
380 
check_single_arithmetic_op(flt_op_t op)381 int check_single_arithmetic_op(flt_op_t op)
382 {
383 		char *result;
384         int status = 0;
385         dbl_overlay R, E;
386         double qtr, half, fA, fB, fD;
387 		round_mode_t mode;
388 		int q, s;
389 		bool_t two_args = TRUE;
390 		float whole = denorm_small;
391 
392 #define BINOP(op) \
393         __asm__ volatile( \
394 					op" %0, %1, %2\n\t" \
395 					: "=f"(fD) : "f"(fA) , "f"(fB));
396 #define UNOP(op) \
397         __asm__ volatile( \
398 					op" %0, %1\n\t" \
399 					: "=f"(fD) : "f"(fA));
400 
401 		half = (double)whole/2;
402 		qtr = half/2;
403 
404 		if (debug) {
405 			print_double("qtr", qtr);
406 			print_double("whole", whole);
407 			print_double("2*whole", 2*whole);
408 		}
409 
410 		for (mode = TO_NEAREST; mode <= TO_MINUS_INFINITY; mode++)
411 		for (s = -1; s < 2; s += 2)
412 		for (q = 1; q < 4; q += 2) {
413 			double expected;
414 			double lo = s*whole;
415 			double hi = s*2*whole;
416 
417 			switch(op) {
418 			case FADDS:
419 				fA = s*whole;
420 				fB = s*q*qtr;
421 				break;
422 			case FSUBS:
423 				fA = s*2*whole;
424 				fB = s*(q == 1 ? 3 : 1)*qtr;
425 				break;
426 			case FMULS:
427 				fA = 0.5;
428 				fB = s*(4+q)*half;
429 				break;
430 			case FDIVS:
431 				fA = s*(4+q)*half;
432 				fB = 2.0;
433 				break;
434 			default:
435 				assert("check_single_arithmetic_op: unexpected op",
436 					FALSE);
437 				break;
438 			}
439 
440 			switch(mode) {
441 			case TO_NEAREST:
442 				expected = (q == 1 ? lo : hi);
443 				break;
444 			case TO_ZERO:
445 				expected = lo;
446 				break;
447 			case TO_PLUS_INFINITY:
448 				expected = (s == 1 ? hi : lo);
449 				break;
450 			case TO_MINUS_INFINITY:
451 				expected = (s == 1 ? lo : hi);
452 				break;
453 			}
454 
455 			set_rounding_mode(mode);
456 
457 			/*
458 			** do the double precision dual operation just for comparison
459 			** when debugging
460 			*/
461 			switch(op) {
462 			case FADDS:
463 				BINOP("fadds");
464 				R.dbl = fD;
465 				BINOP("fadd");
466 				break;
467 			case FSUBS:
468 				BINOP("fsubs");
469 				R.dbl = fD;
470 				BINOP("fsub");
471 				break;
472 			case FMULS:
473 				BINOP("fmuls");
474 				R.dbl = fD;
475 				BINOP("fmul");
476 				break;
477 			case FDIVS:
478 				BINOP("fdivs");
479 				R.dbl = fD;
480 				BINOP("fdiv");
481 				break;
482 			default:
483 				assert("check_single_arithmetic_op: unexpected op",
484 					FALSE);
485 				break;
486 			}
487 #undef UNOP
488 #undef BINOP
489 
490 			E.dbl = expected;
491 
492 			if ((R.layout.sign != E.layout.sign) ||
493 				(R.layout.exp != E.layout.exp) ||
494 				(R.layout.frac_lo != E.layout.frac_lo) ||
495 				(R.layout.frac_hi != E.layout.frac_hi)) {
496 				result = "FAILED";
497 				status = 1;
498 			} else {
499 				result = "PASSED";
500 				status = 0;
501 			}
502 
503 			printf("%s:%s:%s(%-13a",
504 				round_mode_name[mode], result, flt_op_names[op], fA);
505 			if (two_args) printf(", %-13a", fB);
506 			printf(") = %-13a", R.dbl);
507 			if (status) printf("\n\texpected %a", E.dbl);
508 			putchar('\n');
509 
510 			if (debug) {
511 				print_double("hi", hi);
512 				print_double("lo", lo);
513 				print_double("expected", expected);
514 				print_double("got", R.dbl);
515 				print_double("double result", fD);
516 			}
517 		}
518 
519 		return status;
520 }
521 
check_single_guarded_arithmetic_op(flt_op_t op)522 int check_single_guarded_arithmetic_op(flt_op_t op)
523 {
524 		typedef struct {
525 			int num, den, frac;
526 		} fdivs_t;
527 
528 		char *result;
529         int status = 0;
530         flt_overlay A, B, Z;
531         dbl_overlay Res, Exp;
532         double fA, fB, fC, fD;
533 		round_mode_t mode;
534 		int g, s;
535 		int arg_count;
536 
537 		fdivs_t divs_guard_cases[16] = {
538 			{ 105, 56, 0x700000 },  /* : 0 */
539 			{ 100, 57, 0x608FB8 },  /* : 1 */
540 			{ 000, 00, 0x000000 },  /* : X */
541 			{ 100, 52, 0x762762 },  /* : 3 */
542 			{ 000, 00, 0x000000 },  /* : X */
543 			{ 100, 55, 0x68BA2E },  /* : 5 */
544 			{ 000, 00, 0x000000 },  /* : X */
545 			{ 100, 51, 0x7AFAFA },  /* : 7 */
546 			{ 000, 00, 0x000000 },  /* : X */
547 			{ 100, 56, 0x649249 },  /* : 9 */
548 			{ 000, 00, 0x000000 },  /* : X */
549 			{ 100, 54, 0x6D097B },  /* : B */
550 			{ 000, 00, 0x000000 },  /* : X */
551 			{ 100, 59, 0x58F2FB },  /* : D */
552 			{ 000, 00, 0x000000 },  /* : X */
553 			{ 101, 52, 0x789D89 }  /* : F */
554 		};
555 
556 		/*	0x1.00000 00000000p-3 */
557 		/* set up the invariant fields of B, the arg to cause rounding */
558 		B.flt = 0.0;
559 		B.layout.exp = 124;  /* -3 */
560 
561 		/* set up args so result is always Z = 1.200000000000<g>p+0 */
562 		Z.flt = 1.0;
563 		Z.layout.sign = 0;
564 
565 #define TERNOP(op) \
566 		arg_count = 3; \
567         __asm__ volatile( \
568 					op" %0, %1, %2, %3\n\t" \
569 					: "=f"(fD) : "f"(fA) , "f"(fB), "f"(fC));
570 #define BINOP(op) \
571 		arg_count = 2; \
572         __asm__ volatile( \
573 					op" %0, %1, %2\n\t" \
574 					: "=f"(fD) : "f"(fA) , "f"(fB));
575 #define UNOP(op) \
576 		arg_count = 1; \
577         __asm__ volatile( \
578 					op" %0, %1\n\t" \
579 					: "=f"(fD) : "f"(fA));
580 
581 	for (mode = TO_NEAREST; mode <= TO_MINUS_INFINITY; mode++)
582 	for (s = -1; s < 2; s += 2)
583 	for (g = 0; g < 16; g += 1) {
584 		double lo, hi, expected;
585 		int LSB;
586 		int guard = 0;
587 		int z_sign = s;
588 
589 		/*
590 		** one argument will have exponent = 0 as will the result (by
591 		** design) so choose the other argument with exponent -3 to
592 		** force a 3 bit shift for scaling leaving us with 3 guard bits
593 		** and the LSB bit at the bottom of the manitssa.
594 		*/
595 		switch(op) {
596 		case FADDS:
597 			/* 1p+0 + 1.00000<g>p-3 */
598 			B.layout.frac = g;
599 
600 			fB = s*B.flt;
601 			fA = s*1.0;
602 
603 			/* set up Z to be truncated result */
604 
605 			/* mask off LSB from resulting guard bits */
606 			guard = g & 7;
607 
608 			Z.layout.frac = 0x100000 | (g >> 3);
609 			break;
610 		case FSUBS:
611 			/* 1.200002p+0 - 1.000000000000<g>p-3 */
612 			A.flt = 1.125;
613 			/* add enough to avoid scaling of the result */
614 			A.layout.frac |= 0x2;
615 			fA = s*A.flt;
616 
617 			B.layout.frac = g;
618 			fB = s*B.flt;
619 
620 			/* set up Z to be truncated result */
621 			guard = (0x10-g);
622 			Z.layout.frac = guard>>3;
623 
624 			/* mask off LSB from resulting guard bits */
625 			guard &= 7;
626 			break;
627 		case FMULS:
628 			/* 1 + g*2^-23 */
629 			A.flt = 1.0;
630 			A.layout.frac = g;
631 			fA = s*A.flt;
632 			fB = 1.125;
633 
634 			/* set up Z to be truncated result */
635 			Z.flt = 1.0;
636 			Z.layout.frac = 0x100000;
637 			Z.layout.frac |= g + (g>>3);
638 			guard = g & 7;
639 			break;
640 		case FDIVS:
641 			/* g >> 3 == LSB, g & 7 == guard bits */
642 			guard = g & 7;
643 			if ((guard & 1) == 0) {
644 				/* special case: guard bit X = 0 */
645 				A.flt = denorm_small;
646 				A.layout.frac = g;
647 				fA = A.flt;
648 				fB = s*8.0;
649 				Z.flt = 0.0;
650 				Z.layout.frac |= (g >> 3);
651 			} else {
652 				fA = s*divs_guard_cases[g].num;
653 				fB = divs_guard_cases[g].den;
654 
655 				Z.flt = 1.0;
656 				Z.layout.frac = divs_guard_cases[g].frac;
657 			}
658 			break;
659 		case FMADDS:
660 		case FMSUBS:
661 		case FNMADDS:
662 		case FNMSUBS:
663 			/* 1 + g*2^-23 */
664 			A.flt = 1.0;
665 			A.layout.frac = g;
666 			fA = s*A.flt;
667 			fB = 1.125;
668 
669 			/* 1.000001p-1 */
670 			A.flt = 0.5;
671 			A.layout.frac = 1;
672 			fC = (op == FMADDS || op == FNMADDS ? s : -s)*A.flt;
673 
674 			/* set up Z to be truncated result */
675 			z_sign = (op == FNMADDS || op == FNMSUBS ? -s : s);
676 			guard = ((g & 7) + 0x4) & 7;
677 			Z.flt = 1.0;
678 			Z.layout.frac = 0x500000;
679 			Z.layout.frac |= g + (g>>3) + ((g & 7)>> 2 ? 1 : 0);
680 			break;
681 		default:
682 			assert("check_single_arithmetic_op: unexpected op",
683 				FALSE);
684 			break;
685 		}
686 
687 		/* get LSB for tie breaking */
688 		LSB = Z.layout.frac & 1;
689 
690 		/* set up hi and lo */
691 		lo = z_sign*Z.flt;
692 		Z.layout.frac += 1;
693 		hi = z_sign*Z.flt;
694 
695 		switch(mode) {
696 		case TO_NEAREST:
697 			/* look at 3 guard bits to determine expected rounding */
698 			switch(guard) {
699 			case 0:
700 			case 1: case 2: case 3:
701 				expected = lo;
702 				break;
703 			case 4:	/* tie: round to even */
704 				if (debug) printf("tie: LSB = %d\n", LSB);
705 				expected = (LSB == 0 ? lo : hi);
706 				break;
707 			case 5: case 6: case 7:
708 				expected = hi;
709 				break;
710 			default:
711 				assert("check_single_guarded_arithmetic_op: unexpected guard",
712 					FALSE);
713 			}
714 			break;
715 		case TO_ZERO:
716 			expected = lo;
717 			break;
718 		case TO_PLUS_INFINITY:
719 			if (guard == 0) {
720 				/* no rounding */
721 				expected = lo;
722 			} else {
723 				expected = (s == 1 ? hi : lo);
724 			}
725 			break;
726 		case TO_MINUS_INFINITY:
727 			if (guard == 0) {
728 				/* no rounding */
729 				expected = lo;
730 			} else {
731 				expected = (s == 1 ? lo : hi);
732 			}
733 			break;
734 		}
735 
736 		set_rounding_mode(mode);
737 
738 		/*
739 		** do the double precision dual operation just for comparison
740 		** when debugging
741 		*/
742 		switch(op) {
743 		case FADDS:
744 			BINOP("fadds");
745 			Res.dbl = fD;
746 			break;
747 		case FSUBS:
748 			BINOP("fsubs");
749 			Res.dbl = fD;
750 			break;
751 		case FMULS:
752 			BINOP("fmuls");
753 			Res.dbl = fD;
754 			break;
755 		case FDIVS:
756 			BINOP("fdivs");
757 			Res.dbl = fD;
758 			break;
759 		case FMADDS:
760 			TERNOP("fmadds");
761 			Res.dbl = fD;
762 			break;
763 		case FMSUBS:
764 			TERNOP("fmsubs");
765 			Res.dbl = fD;
766 			break;
767 		case FNMADDS:
768 			TERNOP("fnmadds");
769 			Res.dbl = fD;
770 			break;
771 		case FNMSUBS:
772 			TERNOP("fnmsubs");
773 			Res.dbl = fD;
774 			break;
775 		default:
776 			assert("check_single_guarded_arithmetic_op: unexpected op",
777 				FALSE);
778 			break;
779 		}
780 #undef UNOP
781 #undef BINOP
782 #undef TERNOP
783 
784 		Exp.dbl = expected;
785 
786 		if ((Res.layout.sign != Exp.layout.sign) ||
787 			(Res.layout.exp != Exp.layout.exp) ||
788 			(Res.layout.frac_lo != Exp.layout.frac_lo) ||
789 			(Res.layout.frac_hi != Exp.layout.frac_hi)) {
790 			result = "FAILED";
791 			status = 1;
792 		} else {
793 			result = "PASSED";
794 			status = 0;
795 		}
796 
797 		printf("%s:%s:%s(%-13f",
798 			round_mode_name[mode], result, flt_op_names[op], fA);
799 		if (arg_count > 1) printf(", %-13a", fB);
800 		if (arg_count > 2) printf(", %-13a", fC);
801 		printf(") = %-13a", Res.dbl);
802 		if (status) printf("\n\texpected %a", Exp.dbl);
803 		putchar('\n');
804 
805 		if (debug) {
806 			print_double("hi", hi);
807 			print_double("lo", lo);
808 			print_double("expected", expected);
809 			print_double("got", Res.dbl);
810 		}
811 	}
812 
813 	return status;
814 }
815 
check_double_guarded_arithmetic_op(flt_op_t op)816 int check_double_guarded_arithmetic_op(flt_op_t op)
817 {
818 	typedef struct {
819 		int num, den, hi, lo;
820 	} fdiv_t;
821 	typedef struct {
822 		double arg;
823 		int exp, hi, lo;
824 	} fsqrt_t;
825 
826 	char *result;
827 	int status = 0;
828 	dbl_overlay A, B, Z;
829 	dbl_overlay Res, Exp;
830 	double fA, fB, fC, fD;
831 	round_mode_t mode;
832 	int g, s;
833 	int arg_count;
834 	fdiv_t div_guard_cases[16] = {
835 		{ 62, 62, 0x00000, 0x00000000 },	/* 0 */
836 		{ 64, 62, 0x08421, 0x08421084 },	/* 1 */
837 		{ 66, 62, 0x10842, 0x10842108 },	/* 2 */
838 		{ 100, 62, 0x9ce73, 0x9ce739ce },	/* 3 */
839 		{ 100, 62, 0x9ce73, 0x9ce739ce },	/* X */
840 		{ 102, 62, 0xa5294, 0xa5294a52 },	/* 5 */
841 		{ 106, 62, 0xb5ad6, 0xb5ad6b5a },	/* 6 */
842 		{ 108, 62, 0xbdef7, 0xbdef7bde },	/* 7 */
843 		{ 108, 108, 0x00000, 0x00000000 },	/* 8 */
844 		{ 112, 62, 0xce739, 0xce739ce7 },	/* 9 */
845 		{ 114, 62, 0xd6b5a, 0xd6b5ad6b },	/* A */
846 		{ 116, 62, 0xdef7b, 0xdef7bdef },	/* B */
847 		{ 84, 62, 0x5ad6b, 0x5ad6b5ad },	/* X */
848 		{ 118, 62, 0xe739c, 0xe739ce73 },	/* D */
849 		{ 90, 62, 0x739ce, 0x739ce739 },	/* E */
850 		{ 92, 62, 0x7bdef, 0x7bdef7bd }		/* F */
851 	};
852 
853 
854 	fsqrt_t sqrt_guard_cases[16] = {
855 		{ 0x1.08800p0,  0, 0x04371, 0xd9ab72fb}, /* :0 B8.8440  */
856 		{ 0x0.D2200p0, -1, 0xcfdca, 0xf353049e}, /* :1 A4.6910  */
857 		{ 0x1.A8220p0,  0, 0x49830, 0x2b49cd6d}, /* :2 E9.D411  */
858 		{ 0x1.05A20p0,  0, 0x02cd1, 0x3b44f3bf}, /* :3 B7.82D1  */
859 		{ 0x0.CA820p0, -1, 0xc7607, 0x3cec0937}, /* :4 A1.6541  */
860 		{ 0x1.DCA20p0,  0, 0x5d4f8, 0xd4e4c2b2}, /* :5 F7.EE51  */
861 		{ 0x1.02C80p0,  0, 0x01630, 0x9cde7483}, /* :6 B6.8164  */
862 		{ 0x0.DC800p0, -1, 0xdb2cf, 0xe686fe7c}, /* :7 A8.6E40  */
863 		{ 0x0.CF920p0, -1, 0xcd089, 0xb6860626}, /* :8 A3.67C9  */
864 		{ 0x1.1D020p0,  0, 0x0e1d6, 0x2e78ed9d}, /* :9 BF.8E81  */
865 		{ 0x0.E1C80p0, -1, 0xe0d52, 0x6020fb6b}, /* :A AA.70E4  */
866 		{ 0x0.C8000p0, -1, 0xc48c6, 0x001f0abf}, /* :B A0.6400  */
867 		{ 0x1.48520p0,  0, 0x21e9e, 0xd813e2e2}, /* :C CD.A429  */
868 		{ 0x0.F4C20p0, -1, 0xf4a1b, 0x09bbf0b0}, /* :D B1.7A61  */
869 		{ 0x0.CD080p0, -1, 0xca348, 0x79b907ae}, /* :E A2.6684  */
870 		{ 0x1.76B20p0,  0, 0x35b67, 0x81aed827}  /* :F DB.BB59  */
871 	};
872 
873 	/*	0x1.00000 00000000p-3 */
874 	/* set up the invariant fields of B, the arg to cause rounding */
875 	B.dbl = 0.0;
876 	B.layout.exp = 1020;
877 
878 	/* set up args so result is always Z = 1.200000000000<g>p+0 */
879 	Z.dbl = 1.0;
880 	Z.layout.sign = 0;
881 
882 #define TERNOP(op) \
883 		arg_count = 3; \
884         __asm__ volatile( \
885 					op" %0, %1, %2, %3\n\t" \
886 					: "=f"(fD) : "f"(fA) , "f"(fB), "f"(fC));
887 #define BINOP(op) \
888 		arg_count = 2; \
889         __asm__ volatile( \
890 					op" %0, %1, %2\n\t" \
891 					: "=f"(fD) : "f"(fA) , "f"(fB));
892 #define UNOP(op) \
893 		arg_count = 1; \
894         __asm__ volatile( \
895 					op" %0, %1\n\t" \
896 					: "=f"(fD) : "f"(fA));
897 
898 	for (mode = TO_NEAREST; mode <= TO_MINUS_INFINITY; mode++)
899 	for (s = (op != FSQRT ? -1 : 1); s < 2; s += 2)
900 	for (g = 0; g < 16; g += 1) {
901 		double lo, hi, expected;
902 		int LSB;
903 		int guard;
904 		int z_sign = s;
905 
906 		/*
907 		** one argument will have exponent = 0 as will the result (by
908 		** design) so choose the other argument with exponent -3 to
909 		** force a 3 bit shift for scaling leaving us with 3 guard bits
910 		** and the LSB bit at the bottom of the manitssa.
911 		*/
912 		switch(op) {
913 		case FADD:
914 			/* 1p+0 + 1.000000000000<g>p-3 */
915 			B.layout.frac_lo = g;
916 
917 			fB = s*B.dbl;
918 			fA = s*1.0;
919 
920 			/* set up Z to be truncated result */
921 
922 			/* mask off LSB from resulting guard bits */
923 			guard = g & 7;
924 
925 			Z.layout.frac_hi = 0x20000;
926 			Z.layout.frac_lo = g >> 3;
927 
928 			break;
929 		case FSUB:
930 			/* 1.2000000000002p+0 - 1.000000000000<g>p-3 */
931 			A.dbl = 1.125;
932 			/* add enough to avoid scaling of the result */
933 			A.layout.frac_lo = 0x2;
934 			fA = s*A.dbl;
935 
936 			B.layout.frac_lo = g;
937 			fB = s*B.dbl;
938 
939 			/* set up Z to be truncated result */
940 			guard = (0x10-g);
941 			Z.layout.frac_hi = 0x0;
942 			Z.layout.frac_lo = guard>>3;
943 
944 			/* mask off LSB from resulting guard bits */
945 			guard &= 7;
946 			break;
947 		case FMUL:
948 			/* 1 + g*2^-52 */
949 			A.dbl = 1.0;
950 			A.layout.frac_lo = g;
951 			fA = s*A.dbl;
952 			fB = 1.125;
953 
954 			/* set up Z to be truncated result */
955 			Z.dbl = 1.0;
956 			Z.layout.frac_hi = 0x20000;
957 			Z.layout.frac_lo = g + (g>>3);
958 			guard = g & 7;
959 			break;
960 		case FMADD:
961 		case FMSUB:
962 		case FNMADD:
963 		case FNMSUB:
964 			/* 1 + g*2^-52 */
965 			A.dbl = 1.0;
966 			A.layout.frac_lo = g;
967 			fA = s*A.dbl;
968 			fB = 1.125;
969 
970 			/* 1.0000000000001p-1 */
971 			A.dbl = 0.5;
972 			A.layout.frac_lo = 1;
973 			fC = (op == FMADD || op == FNMADD ? s : -s)*A.dbl;
974 
975 			/* set up Z to be truncated result */
976 			z_sign = (op == FNMADD || op == FNMSUB ? -s : s);
977 			guard = ((g & 7) + 0x4) & 7;
978 			Z.dbl = 1.0;
979 			Z.layout.frac_hi = 0xa0000;
980 			Z.layout.frac_lo = g + (g>>3) + ((g & 7)>> 2 ? 1 : 0);
981 			break;
982 		case FDIV:
983 			/* g >> 3 == LSB, g & 7 == guard bits */
984 			guard = g & 7;
985 			if (guard == 0x4) {
986 				/* special case guard bits == 4, inexact tie */
987 				fB = s*2.0;
988 				Z.dbl = 0.0;
989 				if (g >> 3) {
990 					fA = dbl_denorm_small + 2*dbl_denorm_small;
991 					Z.layout.frac_lo = 0x1;
992 				} else {
993 					fA = dbl_denorm_small;
994 				}
995 			} else {
996 				fA = s*div_guard_cases[g].num;
997 				fB = div_guard_cases[g].den;
998 
999 				printf("%d/%d\n",
1000 					s*div_guard_cases[g].num,
1001 					div_guard_cases[g].den);
1002 				Z.dbl = 1.0;
1003 				Z.layout.frac_hi = div_guard_cases[g].hi;
1004 				Z.layout.frac_lo = div_guard_cases[g].lo;
1005 			}
1006 			break;
1007 		case FSQRT:
1008 			fA = s*sqrt_guard_cases[g].arg;
1009 			Z.dbl = 1.0;
1010 			Z.layout.exp = sqrt_guard_cases[g].exp + 1023;
1011 			Z.layout.frac_hi = sqrt_guard_cases[g].hi;
1012 			Z.layout.frac_lo = sqrt_guard_cases[g].lo;
1013 			guard = g >> 1;
1014 			if (g & 1) guard |= 1;
1015 			/* don't have test cases for when X bit = 0 */
1016 			if (guard == 0 || guard == 4) continue;
1017 			break;
1018 		default:
1019 			assert("check_double_guarded_arithmetic_op: unexpected op",
1020 				FALSE);
1021 			break;
1022 		}
1023 
1024 		/* get LSB for tie breaking */
1025 		LSB = Z.layout.frac_lo & 1;
1026 
1027 		/* set up hi and lo */
1028 		lo = z_sign*Z.dbl;
1029 		Z.layout.frac_lo += 1;
1030 		hi = z_sign*Z.dbl;
1031 
1032 		switch(mode) {
1033 		case TO_NEAREST:
1034 			/* look at 3 guard bits to determine expected rounding */
1035 			switch(guard) {
1036 			case 0:
1037 			case 1: case 2: case 3:
1038 				expected = lo;
1039 				break;
1040 			case 4:	/* tie: round to even */
1041 				if (debug) printf("tie: LSB = %d\n", LSB);
1042 				expected = (LSB == 0 ? lo : hi);
1043 				break;
1044 			case 5: case 6: case 7:
1045 				expected = hi;
1046 				break;
1047 			default:
1048 				assert("check_double_guarded_arithmetic_op: unexpected guard",
1049 					FALSE);
1050 			}
1051 			break;
1052 		case TO_ZERO:
1053 			expected = lo;
1054 			break;
1055 		case TO_PLUS_INFINITY:
1056 			if (guard == 0) {
1057 				/* no rounding */
1058 				expected = lo;
1059 			} else {
1060 				expected = (s == 1 ? hi : lo);
1061 			}
1062 			break;
1063 		case TO_MINUS_INFINITY:
1064 			if (guard == 0) {
1065 				/* no rounding */
1066 				expected = lo;
1067 			} else {
1068 				expected = (s == 1 ? lo : hi);
1069 			}
1070 			break;
1071 		}
1072 
1073 		set_rounding_mode(mode);
1074 
1075 		/*
1076 		** do the double precision dual operation just for comparison
1077 		** when debugging
1078 		*/
1079 		switch(op) {
1080 		case FADD:
1081 			BINOP("fadd");
1082 			Res.dbl = fD;
1083 			break;
1084 		case FSUB:
1085 			BINOP("fsub");
1086 			Res.dbl = fD;
1087 			break;
1088 		case FMUL:
1089 			BINOP("fmul");
1090 			Res.dbl = fD;
1091 			break;
1092 		case FMADD:
1093 			TERNOP("fmadd");
1094 			Res.dbl = fD;
1095 			break;
1096 		case FMSUB:
1097 			TERNOP("fmsub");
1098 			Res.dbl = fD;
1099 			break;
1100 		case FNMADD:
1101 			TERNOP("fnmadd");
1102 			Res.dbl = fD;
1103 			break;
1104 		case FNMSUB:
1105 			TERNOP("fnmsub");
1106 			Res.dbl = fD;
1107 			break;
1108 		case FDIV:
1109 			BINOP("fdiv");
1110 			Res.dbl = fD;
1111 			break;
1112 		case FSQRT:
1113 			UNOP("fsqrt");
1114 			Res.dbl = fD;
1115 			break;
1116 		default:
1117 			assert("check_double_guarded_arithmetic_op: unexpected op",
1118 				FALSE);
1119 			break;
1120 		}
1121 #undef UNOP
1122 #undef BINOP
1123 #undef TERNOP
1124 
1125 		Exp.dbl = expected;
1126 
1127 		if ((Res.layout.sign != Exp.layout.sign) ||
1128 			(Res.layout.exp != Exp.layout.exp) ||
1129 			(Res.layout.frac_lo != Exp.layout.frac_lo) ||
1130 			(Res.layout.frac_hi != Exp.layout.frac_hi)) {
1131 			result = "FAILED";
1132 			status = 1;
1133 		} else {
1134 			result = "PASSED";
1135 			status = 0;
1136 		}
1137 
1138 		printf("%s:%s:%s(%-13a",
1139 			round_mode_name[mode], result, flt_op_names[op], fA);
1140 		if (arg_count > 1) printf(", %-13a", fB);
1141 		if (arg_count > 2) printf(", %-13a", fC);
1142 		printf(") = %-13a", Res.dbl);
1143 		if (status) printf("\n\texpected %a", Exp.dbl);
1144 		putchar('\n');
1145 
1146 		if (debug) {
1147 			print_double("hi", hi);
1148 			print_double("lo", lo);
1149 			print_double("expected", expected);
1150 			print_double("got", Res.dbl);
1151 		}
1152 	}
1153 
1154 	return status;
1155 }
1156 
test_float_arithmetic_ops()1157 int test_float_arithmetic_ops()
1158 {
1159 	int status = 0;
1160 	flt_op_t op;
1161 
1162 	/*
1163 	** choose FP operands whose result should be rounded to either
1164 	** lo or hi.
1165 	*/
1166 
1167 	printf("-------------------------- %s --------------------------\n",
1168 		"test rounding of float operators without guard bits");
1169 	for (op = FADDS; op <= FDIVS; op++) {
1170 		status |= check_single_arithmetic_op(op);
1171 	}
1172 
1173 	printf("-------------------------- %s --------------------------\n",
1174 		"test rounding of float operators with guard bits");
1175 	for (op = FADDS; op <= FNMSUBS; op++) {
1176 		status |= check_single_guarded_arithmetic_op(op);
1177 	}
1178 
1179 	printf("-------------------------- %s --------------------------\n",
1180 		"test rounding of double operators with guard bits");
1181 	for (op = FADD; op <= FSQRT; op++) {
1182 		status |= check_double_guarded_arithmetic_op(op);
1183 	}
1184 	return status;
1185 }
1186 
1187 
1188 int
main()1189 main()
1190 {
1191 	int status = 0;
1192 
1193 	init();
1194 
1195 	status |= test_dbl_to_float_convert("test denormalized convert", &denorm_small);
1196 	status |= test_dbl_to_float_convert("test normalized convert", &norm_small);
1197 	status |= test_int_to_float_convert("test (float)int convert");
1198 	status |= test_int_to_float_convert("test (float)int convert");
1199 
1200 #ifdef __powerpc64__
1201 	status |= test_long_to_double_convert("test (double)long convert");
1202 #endif
1203 	status |= test_float_arithmetic_ops();
1204 	return status;
1205 }
1206