• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 
2 /*  Copyright (C) 2006 Dave Nomura
3        dcnltc@us.ibm.com
4 
5     This program is free software; you can redistribute it and/or
6     modify it under the terms of the GNU General Public License as
7     published by the Free Software Foundation; either version 2 of the
8     License, or (at your option) any later version.
9 
10     This program is distributed in the hope that it will be useful, but
11     WITHOUT ANY WARRANTY; without even the implied warranty of
12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13     General Public License for more details.
14 
15     You should have received a copy of the GNU General Public License
16     along with this program; if not, write to the Free Software
17     Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
18     02111-1307, USA.
19 
20     The GNU General Public License is contained in the file COPYING.
21 */
22 
23 #include <stdio.h>
24 #include <stdlib.h>
25 #include <limits.h>
26 
27 typedef enum { FALSE=0, TRUE } bool_t;
28 
29 typedef enum {
30 	FADDS, FSUBS, FMULS, FDIVS,
31 	FMADDS, FMSUBS, FNMADDS, FNMSUBS,
32 	FADD, FSUB, FMUL, FDIV, FMADD,
33 	FMSUB, FNMADD, FNMSUB, FSQRT
34 } flt_op_t;
35 
36 typedef enum {
37 	TO_NEAREST=0, TO_ZERO, TO_PLUS_INFINITY, TO_MINUS_INFINITY } round_mode_t;
38 char *round_mode_name[] = { "near", "zero", "+inf", "-inf" };
39 
40 const char *flt_op_names[] = {
41 	"fadds", "fsubs", "fmuls", "fdivs",
42 	"fmadds", "fmsubs", "fnmadds", "fnmsubs",
43 	"fadd", "fsub", "fmul", "fdiv", "fmadd", "fmsub", "fnmadd",
44 	"fnmsub", "fsqrt"
45 };
46 
47 typedef unsigned int fpscr_t;
48 
49 typedef union {
50 	float flt;
51 	struct {
52 		unsigned int sign:1;
53 		unsigned int exp:8;
54 		unsigned int frac:23;
55 	} layout;
56 } flt_overlay;
57 
58 typedef union {
59 	double dbl;
60 	struct {
61 		unsigned int sign:1;
62 		unsigned int exp:11;
63 		unsigned int frac_hi:20;
64 		unsigned int frac_lo:32;
65 	} layout;
66 	struct {
67 		unsigned int hi;
68 		unsigned int lo;
69 	} dbl_pair;
70 } dbl_overlay;
71 
72 void assert_fail(const char *msg,
73 	const char* expr, const char* file, int line, const char*fn);
74 
75 #define STRING(__str)  #__str
76 #define assert(msg, expr)                                           \
77   ((void) ((expr) ? 0 :                                         \
78            (assert_fail (msg, STRING(expr),                  \
79                              __FILE__, __LINE__,                \
80                              __PRETTY_FUNCTION__), 0)))
81 float denorm_small;
82 double dbl_denorm_small;
83 float norm_small;
84 bool_t debug = FALSE;
85 bool_t long_is_64_bits = sizeof(long) == 8;
86 
assert_fail(msg,expr,file,line,fn)87 void assert_fail (msg, expr, file, line, fn)
88 const char* msg;
89 const char* expr;
90 const char* file;
91 int line;
92 const char*fn;
93 {
94    printf( "\n%s: %s:%d (%s): Assertion `%s' failed.\n",
95                msg, file, line, fn, expr );
96    exit( 1 );
97 }
set_rounding_mode(round_mode_t mode)98 void set_rounding_mode(round_mode_t mode)
99 {
100 	switch(mode) {
101 	case TO_NEAREST:
102 		asm volatile("mtfsfi 7, 0");
103 		break;
104 	case TO_ZERO:
105 		asm volatile("mtfsfi 7, 1");
106 		break;
107 	case TO_PLUS_INFINITY:
108 		asm volatile("mtfsfi 7, 2");
109 		break;
110 	case TO_MINUS_INFINITY:
111 		asm volatile("mtfsfi 7, 3");
112 		break;
113 	}
114 }
115 
print_double(char * msg,double dbl)116 void print_double(char *msg, double dbl)
117 {
118 	dbl_overlay D;
119 	D.dbl = dbl;
120 
121 	printf("%15s : dbl %-20a = %c(%4d, %05x%08x)\n",
122 			msg, D.dbl, (D.layout.sign == 0 ? '+' : '-'),
123 			D.layout.exp, D.layout.frac_hi, D.layout.frac_lo);
124 }
125 
print_single(char * msg,float * flt)126 void print_single(char *msg, float *flt)
127 {
128 	flt_overlay F;
129 	F.flt = *flt;
130 
131 	/* NOTE: for the purposes of comparing the fraction of a single with
132 	**       a double left shift the .frac so that hex digits are grouped
133 	**	     from left to right.  this is necessary because the size of a
134 	**		 single mantissa (23) bits is not a multiple of 4
135 	*/
136 	printf("%15s : flt %-20a = %c(%4d, %06x)\n",
137 		msg, F.flt, (F.layout.sign == 0 ? '+' : '-'), F.layout.exp, F.layout.frac << 1);
138 }
139 
check_dbl_to_flt_round(round_mode_t mode,double dbl,float * expected)140 int check_dbl_to_flt_round(round_mode_t mode, double dbl, float *expected)
141 {
142 	int status = 0;
143 	flt_overlay R, E;
144 	char *result;
145 	char *eq_ne;
146 
147 	set_rounding_mode(mode);
148 
149 	E.flt = *expected;
150 	R.flt = (float)dbl;
151 
152 	if ((R.layout.sign != E.layout.sign) ||
153 		(R.layout.exp != E.layout.exp) ||
154 		(R.layout.frac != E.layout.frac)) {
155 		result = "FAILED";
156 		eq_ne = "!=";
157 		status = 1;
158 	} else {
159 		result = "PASSED";
160 		eq_ne = "==";
161 		status = 0;
162 	}
163 	printf("%s:%s:(double)(%-20a) = %20a",
164 		round_mode_name[mode], result, R.flt, dbl);
165 	if (status) {
166 		print_single("\n\texpected", &E.flt);
167 		print_single("\n\trounded ", &R.flt);
168 	}
169 	putchar('\n');
170 	return status;
171 }
172 
test_dbl_to_float_convert(char * msg,float * base)173 int test_dbl_to_float_convert(char *msg, float *base)
174 {
175 	int status = 0;
176 	double half = (double)denorm_small/2;
177 	double qtr = half/2;
178 	double D_hi = (double)*base + half + qtr;
179 	double D_lo = (double)*base + half - qtr;
180 	float F_lo = *base;
181 	float F_hi = F_lo + denorm_small;
182 
183 
184 	/*
185 	** .....+-----+-----+-----+-----+---....
186 	**      ^F_lo ^           ^     ^
187 	**            D_lo
188 	**                        D_hi
189 	**                              F_hi
190 	** F_lo and F_hi are two consecutive single float model numbers
191 	** denorm_small distance apart. D_lo and D_hi are two numbers
192 	** within that range that are not representable as single floats
193 	** and will be rounded to either F_lo or F_hi.
194 	*/
195 	printf("-------------------------- %s --------------------------\n", msg);
196 	if (debug) {
197 		print_double("D_lo", D_lo);
198 		print_double("D_hi", D_hi);
199 		print_single("F_lo", &F_lo);
200 		print_single("F_hi", &F_hi);
201 	}
202 
203 	/* round to nearest */
204 	status |= check_dbl_to_flt_round(TO_NEAREST, D_hi, &F_hi);
205 	status |= check_dbl_to_flt_round(TO_NEAREST, D_lo, &F_lo);
206 
207 	/* round to zero */
208 	status |= check_dbl_to_flt_round(TO_ZERO, D_hi, (D_hi > 0 ? &F_lo : &F_hi));
209 	status |= check_dbl_to_flt_round(TO_ZERO, D_lo, (D_hi > 0 ? &F_lo : &F_hi));
210 
211 	/* round to +inf */
212 	status |= check_dbl_to_flt_round(TO_PLUS_INFINITY, D_hi, &F_hi);
213 	status |= check_dbl_to_flt_round(TO_PLUS_INFINITY, D_lo, &F_hi);
214 
215 	/* round to -inf */
216 	status |= check_dbl_to_flt_round(TO_MINUS_INFINITY, D_hi, &F_lo);
217 	status |= check_dbl_to_flt_round(TO_MINUS_INFINITY, D_lo, &F_lo);
218 	return status;
219 }
220 
221 void
init()222 init()
223 {
224 	flt_overlay F;
225 	dbl_overlay D;
226 
227 	/* small is the smallest denormalized single float number */
228 	F.layout.sign = 0;
229 	F.layout.exp = 0;
230 	F.layout.frac = 1;
231 	denorm_small = F.flt;	/* == 2^(-149) */
232 	if (debug) {
233 		print_double("float small", F.flt);
234 	}
235 
236 	D.layout.sign = 0;
237 	D.layout.exp = 0;
238 	D.layout.frac_hi = 0;
239 	D.layout.frac_lo = 1;
240 	dbl_denorm_small = D.dbl;	/* == 2^(-1022) */
241 	if (debug) {
242 		print_double("double small", D.dbl);
243 	}
244 
245 	/* n_small is the smallest normalized single precision float */
246 	F.layout.exp = 1;
247 	norm_small = F.flt;
248 }
249 
check_int_to_flt_round(round_mode_t mode,long L,float * expected)250 int check_int_to_flt_round(round_mode_t mode, long L, float *expected)
251 {
252 	int status = 0;
253 	int I = L;
254 	char *int_name = "int";
255 	flt_overlay R, E;
256 	char *result;
257 	int iter;
258 
259 	set_rounding_mode(mode);
260 	E.flt = *expected;
261 
262 	for (iter = 0; iter < 2; iter++) {
263 		int stat = 0;
264 		R.flt = (iter == 0 ? (float)I : (float)L);
265 
266 		if ((R.layout.sign != E.layout.sign) ||
267 			(R.layout.exp != E.layout.exp) ||
268 			(R.layout.frac != E.layout.frac)) {
269 			result = "FAILED";
270 			stat = 1;
271 		} else {
272 			result = "PASSED";
273 			stat = 0;
274 		}
275 		printf("%s:%s:(float)(%4s)%9d = %11.1f",
276 			round_mode_name[mode], result, int_name, I, R.flt);
277 		if (stat) {
278 			print_single("\n\texpected: %.1f ", &E.flt);
279 			print_single("\n\trounded ", &R.flt);
280 		}
281 		putchar('\n');
282 		status |= stat;
283 
284 		if (!long_is_64_bits) break;
285 		int_name = "long";
286 	}
287 	return status;
288 }
289 
check_long_to_dbl_round(round_mode_t mode,long L,double * expected)290 int check_long_to_dbl_round(round_mode_t mode, long L, double *expected)
291 {
292 	int status = 0;
293 	dbl_overlay R, E;
294 	char *result;
295 
296 	set_rounding_mode(mode);
297 	E.dbl = *expected;
298 
299 	R.dbl = (double)L;
300 
301 	if ((R.layout.sign != E.layout.sign) ||
302 		(R.layout.exp != E.layout.exp) ||
303 		(R.layout.frac_lo != E.layout.frac_lo) ||
304 		(R.layout.frac_hi != E.layout.frac_hi)) {
305 		result = "FAILED";
306 		status = 1;
307 	} else {
308 		result = "PASSED";
309 		status = 0;
310 	}
311 	printf("%s:%s:(double)(%18ld) = %20.1f",
312 		round_mode_name[mode], result, L, R.dbl);
313 	if (status) {
314 		printf("\n\texpected %.1f : ", E.dbl);
315 	}
316 	putchar('\n');
317 	return status;
318 }
319 
test_int_to_float_convert(char * msg)320 int test_int_to_float_convert(char *msg)
321 {
322 	int status = 0;
323 	int int24_hi = 0x03ff0fff;
324 	int int24_lo = 0x03ff0ffd;
325 	float pos_flt_lo = 67047420.0;
326 	float pos_flt_hi = 67047424.0;
327 	float neg_flt_lo = -67047420.0;
328 	float neg_flt_hi = -67047424.0;
329 
330 	printf("-------------------------- %s --------------------------\n", msg);
331 	status |= check_int_to_flt_round(TO_NEAREST, int24_lo, &pos_flt_lo);
332 	status |= check_int_to_flt_round(TO_NEAREST, int24_hi, &pos_flt_hi);
333 	status |= check_int_to_flt_round(TO_ZERO, int24_lo, &pos_flt_lo);
334 	status |= check_int_to_flt_round(TO_ZERO, int24_hi, &pos_flt_lo);
335 	status |= check_int_to_flt_round(TO_PLUS_INFINITY, int24_lo, &pos_flt_hi);
336 	status |= check_int_to_flt_round(TO_PLUS_INFINITY, int24_hi, &pos_flt_hi);
337 	status |= check_int_to_flt_round(TO_MINUS_INFINITY, int24_lo, &pos_flt_lo);
338 	status |= check_int_to_flt_round(TO_MINUS_INFINITY, int24_hi, &pos_flt_lo);
339 
340 	status |= check_int_to_flt_round(TO_NEAREST, -int24_lo, &neg_flt_lo);
341 	status |= check_int_to_flt_round(TO_NEAREST, -int24_hi, &neg_flt_hi);
342 	status |= check_int_to_flt_round(TO_ZERO, -int24_lo, &neg_flt_lo);
343 	status |= check_int_to_flt_round(TO_ZERO, -int24_hi, &neg_flt_lo);
344 	status |= check_int_to_flt_round(TO_PLUS_INFINITY, -int24_lo, &neg_flt_lo);
345 	status |= check_int_to_flt_round(TO_PLUS_INFINITY, -int24_hi, &neg_flt_lo);
346 	status |= check_int_to_flt_round(TO_MINUS_INFINITY, -int24_lo, &neg_flt_hi);
347 	status |= check_int_to_flt_round(TO_MINUS_INFINITY, -int24_hi, &neg_flt_hi);
348 	return status;
349 }
350 
351 #ifdef __powerpc64__
test_long_to_double_convert(char * msg)352 int test_long_to_double_convert(char *msg)
353 {
354 	int status = 0;
355 	long long55_hi = 0x07ff0ffffffffff;
356 	long long55_lo = 0x07ff0fffffffffd;
357 	double pos_dbl_lo = 36012304344547324.0;
358 	double pos_dbl_hi = 36012304344547328.0;
359 	double neg_dbl_lo = -36012304344547324.0;
360 	double neg_dbl_hi = -36012304344547328.0;
361 
362 	printf("-------------------------- %s --------------------------\n", msg);
363 	status |= check_long_to_dbl_round(TO_NEAREST, long55_lo, &pos_dbl_lo);
364 	status |= check_long_to_dbl_round(TO_NEAREST, long55_hi, &pos_dbl_hi);
365 	status |= check_long_to_dbl_round(TO_ZERO, long55_lo, &pos_dbl_lo);
366 	status |= check_long_to_dbl_round(TO_ZERO, long55_hi, &pos_dbl_lo);
367 	status |= check_long_to_dbl_round(TO_PLUS_INFINITY, long55_lo, &pos_dbl_hi);
368 	status |= check_long_to_dbl_round(TO_PLUS_INFINITY, long55_hi, &pos_dbl_hi);
369 	status |= check_long_to_dbl_round(TO_MINUS_INFINITY, long55_lo, &pos_dbl_lo);
370 	status |= check_long_to_dbl_round(TO_MINUS_INFINITY, long55_hi, &pos_dbl_lo);
371 
372 	status |= check_long_to_dbl_round(TO_NEAREST, -long55_lo, &neg_dbl_lo);
373 	status |= check_long_to_dbl_round(TO_NEAREST, -long55_hi, &neg_dbl_hi);
374 	status |= check_long_to_dbl_round(TO_ZERO, -long55_lo, &neg_dbl_lo);
375 	status |= check_long_to_dbl_round(TO_ZERO, -long55_hi, &neg_dbl_lo);
376 	status |= check_long_to_dbl_round(TO_PLUS_INFINITY, -long55_lo, &neg_dbl_lo);
377 	status |= check_long_to_dbl_round(TO_PLUS_INFINITY, -long55_hi, &neg_dbl_lo);
378 	status |= check_long_to_dbl_round(TO_MINUS_INFINITY, -long55_lo, &neg_dbl_hi);
379 	status |= check_long_to_dbl_round(TO_MINUS_INFINITY, -long55_hi, &neg_dbl_hi);
380 	return status;
381 }
382 #endif
383 
check_single_arithmetic_op(flt_op_t op)384 int check_single_arithmetic_op(flt_op_t op)
385 {
386 		char *result;
387         int status = 0;
388         dbl_overlay R, E;
389         double qtr, half, fA, fB, fD;
390 		round_mode_t mode;
391 		int q, s;
392 		bool_t two_args = TRUE;
393 		float whole = denorm_small;
394 
395 #define BINOP(op) \
396         __asm__ volatile( \
397 					op" %0, %1, %2\n\t" \
398 					: "=f"(fD) : "f"(fA) , "f"(fB));
399 #define UNOP(op) \
400         __asm__ volatile( \
401 					op" %0, %1\n\t" \
402 					: "=f"(fD) : "f"(fA));
403 
404 		half = (double)whole/2;
405 		qtr = half/2;
406 
407 		if (debug) {
408 			print_double("qtr", qtr);
409 			print_double("whole", whole);
410 			print_double("2*whole", 2*whole);
411 		}
412 
413 		for (mode = TO_NEAREST; mode <= TO_MINUS_INFINITY; mode++)
414 		for (s = -1; s < 2; s += 2)
415 		for (q = 1; q < 4; q += 2) {
416 			double expected;
417 			double lo = s*whole;
418 			double hi = s*2*whole;
419 
420 			switch(op) {
421 			case FADDS:
422 				fA = s*whole;
423 				fB = s*q*qtr;
424 				break;
425 			case FSUBS:
426 				fA = s*2*whole;
427 				fB = s*(q == 1 ? 3 : 1)*qtr;
428 				break;
429 			case FMULS:
430 				fA = 0.5;
431 				fB = s*(4+q)*half;
432 				break;
433 			case FDIVS:
434 				fA = s*(4+q)*half;
435 				fB = 2.0;
436 				break;
437 			default:
438 				assert("check_single_arithmetic_op: unexpected op",
439 					FALSE);
440 				break;
441 			}
442 
443 			switch(mode) {
444 			case TO_NEAREST:
445 				expected = (q == 1 ? lo : hi);
446 				break;
447 			case TO_ZERO:
448 				expected = lo;
449 				break;
450 			case TO_PLUS_INFINITY:
451 				expected = (s == 1 ? hi : lo);
452 				break;
453 			case TO_MINUS_INFINITY:
454 				expected = (s == 1 ? lo : hi);
455 				break;
456 			}
457 
458 			set_rounding_mode(mode);
459 
460 			/*
461 			** do the double precision dual operation just for comparison
462 			** when debugging
463 			*/
464 			switch(op) {
465 			case FADDS:
466 				BINOP("fadds");
467 				R.dbl = fD;
468 				BINOP("fadd");
469 				break;
470 			case FSUBS:
471 				BINOP("fsubs");
472 				R.dbl = fD;
473 				BINOP("fsub");
474 				break;
475 			case FMULS:
476 				BINOP("fmuls");
477 				R.dbl = fD;
478 				BINOP("fmul");
479 				break;
480 			case FDIVS:
481 				BINOP("fdivs");
482 				R.dbl = fD;
483 				BINOP("fdiv");
484 				break;
485 			default:
486 				assert("check_single_arithmetic_op: unexpected op",
487 					FALSE);
488 				break;
489 			}
490 #undef UNOP
491 #undef BINOP
492 
493 			E.dbl = expected;
494 
495 			if ((R.layout.sign != E.layout.sign) ||
496 				(R.layout.exp != E.layout.exp) ||
497 				(R.layout.frac_lo != E.layout.frac_lo) ||
498 				(R.layout.frac_hi != E.layout.frac_hi)) {
499 				result = "FAILED";
500 				status = 1;
501 			} else {
502 				result = "PASSED";
503 				status = 0;
504 			}
505 
506 			printf("%s:%s:%s(%-13a",
507 				round_mode_name[mode], result, flt_op_names[op], fA);
508 			if (two_args) printf(", %-13a", fB);
509 			printf(") = %-13a", R.dbl);
510 			if (status) printf("\n\texpected %a", E.dbl);
511 			putchar('\n');
512 
513 			if (debug) {
514 				print_double("hi", hi);
515 				print_double("lo", lo);
516 				print_double("expected", expected);
517 				print_double("got", R.dbl);
518 				print_double("double result", fD);
519 			}
520 		}
521 
522 		return status;
523 }
524 
check_single_guarded_arithmetic_op(flt_op_t op)525 int check_single_guarded_arithmetic_op(flt_op_t op)
526 {
527 		typedef struct {
528 			int num, den, frac;
529 		} fdivs_t;
530 
531 		char *result;
532         int status = 0;
533         flt_overlay A, B, Z;
534         dbl_overlay Res, Exp;
535         double fA, fB, fC, fD;
536 		round_mode_t mode;
537 		int g, s;
538 		int arg_count;
539 
540 		fdivs_t divs_guard_cases[16] = {
541 			{ 105, 56, 0x700000 },  /* : 0 */
542 			{ 100, 57, 0x608FB8 },  /* : 1 */
543 			{ 000, 00, 0x000000 },  /* : X */
544 			{ 100, 52, 0x762762 },  /* : 3 */
545 			{ 000, 00, 0x000000 },  /* : X */
546 			{ 100, 55, 0x68BA2E },  /* : 5 */
547 			{ 000, 00, 0x000000 },  /* : X */
548 			{ 100, 51, 0x7AFAFA },  /* : 7 */
549 			{ 000, 00, 0x000000 },  /* : X */
550 			{ 100, 56, 0x649249 },  /* : 9 */
551 			{ 000, 00, 0x000000 },  /* : X */
552 			{ 100, 54, 0x6D097B },  /* : B */
553 			{ 000, 00, 0x000000 },  /* : X */
554 			{ 100, 59, 0x58F2FB },  /* : D */
555 			{ 000, 00, 0x000000 },  /* : X */
556 			{ 101, 52, 0x789D89 }  /* : F */
557 		};
558 
559 		/*	0x1.00000 00000000p-3 */
560 		/* set up the invariant fields of B, the arg to cause rounding */
561 		B.flt = 0.0;
562 		B.layout.exp = 124;  /* -3 */
563 
564 		/* set up args so result is always Z = 1.200000000000<g>p+0 */
565 		Z.flt = 1.0;
566 		Z.layout.sign = 0;
567 
568 #define TERNOP(op) \
569 		arg_count = 3; \
570         __asm__ volatile( \
571 					op" %0, %1, %2, %3\n\t" \
572 					: "=f"(fD) : "f"(fA) , "f"(fB), "f"(fC));
573 #define BINOP(op) \
574 		arg_count = 2; \
575         __asm__ volatile( \
576 					op" %0, %1, %2\n\t" \
577 					: "=f"(fD) : "f"(fA) , "f"(fB));
578 #define UNOP(op) \
579 		arg_count = 1; \
580         __asm__ volatile( \
581 					op" %0, %1\n\t" \
582 					: "=f"(fD) : "f"(fA));
583 
584 	for (mode = TO_NEAREST; mode <= TO_MINUS_INFINITY; mode++)
585 	for (s = -1; s < 2; s += 2)
586 	for (g = 0; g < 16; g += 1) {
587 		double lo, hi, expected;
588 		int LSB;
589 		int guard = 0;
590 		int z_sign = s;
591 
592 		/*
593 		** one argument will have exponent = 0 as will the result (by
594 		** design) so choose the other argument with exponent -3 to
595 		** force a 3 bit shift for scaling leaving us with 3 guard bits
596 		** and the LSB bit at the bottom of the manitssa.
597 		*/
598 		switch(op) {
599 		case FADDS:
600 			/* 1p+0 + 1.00000<g>p-3 */
601 			B.layout.frac = g;
602 
603 			fB = s*B.flt;
604 			fA = s*1.0;
605 
606 			/* set up Z to be truncated result */
607 
608 			/* mask off LSB from resulting guard bits */
609 			guard = g & 7;
610 
611 			Z.layout.frac = 0x100000 | (g >> 3);
612 			break;
613 		case FSUBS:
614 			/* 1.200002p+0 - 1.000000000000<g>p-3 */
615 			A.flt = 1.125;
616 			/* add enough to avoid scaling of the result */
617 			A.layout.frac |= 0x2;
618 			fA = s*A.flt;
619 
620 			B.layout.frac = g;
621 			fB = s*B.flt;
622 
623 			/* set up Z to be truncated result */
624 			guard = (0x10-g);
625 			Z.layout.frac = guard>>3;
626 
627 			/* mask off LSB from resulting guard bits */
628 			guard &= 7;
629 			break;
630 		case FMULS:
631 			/* 1 + g*2^-23 */
632 			A.flt = 1.0;
633 			A.layout.frac = g;
634 			fA = s*A.flt;
635 			fB = 1.125;
636 
637 			/* set up Z to be truncated result */
638 			Z.flt = 1.0;
639 			Z.layout.frac = 0x100000;
640 			Z.layout.frac |= g + (g>>3);
641 			guard = g & 7;
642 			break;
643 		case FDIVS:
644 			/* g >> 3 == LSB, g & 7 == guard bits */
645 			guard = g & 7;
646 			if ((guard & 1) == 0) {
647 				/* special case: guard bit X = 0 */
648 				A.flt = denorm_small;
649 				A.layout.frac = g;
650 				fA = A.flt;
651 				fB = s*8.0;
652 				Z.flt = 0.0;
653 				Z.layout.frac |= (g >> 3);
654 			} else {
655 				fA = s*divs_guard_cases[g].num;
656 				fB = divs_guard_cases[g].den;
657 
658 				Z.flt = 1.0;
659 				Z.layout.frac = divs_guard_cases[g].frac;
660 			}
661 			break;
662 		case FMADDS:
663 		case FMSUBS:
664 		case FNMADDS:
665 		case FNMSUBS:
666 			/* 1 + g*2^-23 */
667 			A.flt = 1.0;
668 			A.layout.frac = g;
669 			fA = s*A.flt;
670 			fB = 1.125;
671 
672 			/* 1.000001p-1 */
673 			A.flt = 0.5;
674 			A.layout.frac = 1;
675 			fC = (op == FMADDS || op == FNMADDS ? s : -s)*A.flt;
676 
677 			/* set up Z to be truncated result */
678 			z_sign = (op == FNMADDS || op == FNMSUBS ? -s : s);
679 			guard = ((g & 7) + 0x4) & 7;
680 			Z.flt = 1.0;
681 			Z.layout.frac = 0x500000;
682 			Z.layout.frac |= g + (g>>3) + ((g & 7)>> 2 ? 1 : 0);
683 			break;
684 		default:
685 			assert("check_single_arithmetic_op: unexpected op",
686 				FALSE);
687 			break;
688 		}
689 
690 		/* get LSB for tie breaking */
691 		LSB = Z.layout.frac & 1;
692 
693 		/* set up hi and lo */
694 		lo = z_sign*Z.flt;
695 		Z.layout.frac += 1;
696 		hi = z_sign*Z.flt;
697 
698 		switch(mode) {
699 		case TO_NEAREST:
700 			/* look at 3 guard bits to determine expected rounding */
701 			switch(guard) {
702 			case 0:
703 			case 1: case 2: case 3:
704 				expected = lo;
705 				break;
706 			case 4:	/* tie: round to even */
707 				if (debug) printf("tie: LSB = %d\n", LSB);
708 				expected = (LSB == 0 ? lo : hi);
709 				break;
710 			case 5: case 6: case 7:
711 				expected = hi;
712 				break;
713 			default:
714 				assert("check_single_guarded_arithmetic_op: unexpected guard",
715 					FALSE);
716 			}
717 			break;
718 		case TO_ZERO:
719 			expected = lo;
720 			break;
721 		case TO_PLUS_INFINITY:
722 			if (guard == 0) {
723 				/* no rounding */
724 				expected = lo;
725 			} else {
726 				expected = (s == 1 ? hi : lo);
727 			}
728 			break;
729 		case TO_MINUS_INFINITY:
730 			if (guard == 0) {
731 				/* no rounding */
732 				expected = lo;
733 			} else {
734 				expected = (s == 1 ? lo : hi);
735 			}
736 			break;
737 		}
738 
739 		set_rounding_mode(mode);
740 
741 		/*
742 		** do the double precision dual operation just for comparison
743 		** when debugging
744 		*/
745 		switch(op) {
746 		case FADDS:
747 			BINOP("fadds");
748 			Res.dbl = fD;
749 			break;
750 		case FSUBS:
751 			BINOP("fsubs");
752 			Res.dbl = fD;
753 			break;
754 		case FMULS:
755 			BINOP("fmuls");
756 			Res.dbl = fD;
757 			break;
758 		case FDIVS:
759 			BINOP("fdivs");
760 			Res.dbl = fD;
761 			break;
762 		case FMADDS:
763 			TERNOP("fmadds");
764 			Res.dbl = fD;
765 			break;
766 		case FMSUBS:
767 			TERNOP("fmsubs");
768 			Res.dbl = fD;
769 			break;
770 		case FNMADDS:
771 			TERNOP("fnmadds");
772 			Res.dbl = fD;
773 			break;
774 		case FNMSUBS:
775 			TERNOP("fnmsubs");
776 			Res.dbl = fD;
777 			break;
778 		default:
779 			assert("check_single_guarded_arithmetic_op: unexpected op",
780 				FALSE);
781 			break;
782 		}
783 #undef UNOP
784 #undef BINOP
785 #undef TERNOP
786 
787 		Exp.dbl = expected;
788 
789 		if ((Res.layout.sign != Exp.layout.sign) ||
790 			(Res.layout.exp != Exp.layout.exp) ||
791 			(Res.layout.frac_lo != Exp.layout.frac_lo) ||
792 			(Res.layout.frac_hi != Exp.layout.frac_hi)) {
793 			result = "FAILED";
794 			status = 1;
795 		} else {
796 			result = "PASSED";
797 			status = 0;
798 		}
799 
800 		printf("%s:%s:%s(%-13f",
801 			round_mode_name[mode], result, flt_op_names[op], fA);
802 		if (arg_count > 1) printf(", %-13a", fB);
803 		if (arg_count > 2) printf(", %-13a", fC);
804 		printf(") = %-13a", Res.dbl);
805 		if (status) printf("\n\texpected %a", Exp.dbl);
806 		putchar('\n');
807 
808 		if (debug) {
809 			print_double("hi", hi);
810 			print_double("lo", lo);
811 			print_double("expected", expected);
812 			print_double("got", Res.dbl);
813 		}
814 	}
815 
816 	return status;
817 }
818 
check_double_guarded_arithmetic_op(flt_op_t op)819 int check_double_guarded_arithmetic_op(flt_op_t op)
820 {
821 	typedef struct {
822 		int num, den, hi, lo;
823 	} fdiv_t;
824 	typedef struct {
825 		double arg;
826 		int exp, hi, lo;
827 	} fsqrt_t;
828 
829 	char *result;
830 	int status = 0;
831 	dbl_overlay A, B, Z;
832 	dbl_overlay Res, Exp;
833 	double fA, fB, fC, fD;
834 	round_mode_t mode;
835 	int g, s;
836 	int arg_count;
837 	fdiv_t div_guard_cases[16] = {
838 		{ 62, 62, 0x00000, 0x00000000 },	/* 0 */
839 		{ 64, 62, 0x08421, 0x08421084 },	/* 1 */
840 		{ 66, 62, 0x10842, 0x10842108 },	/* 2 */
841 		{ 100, 62, 0x9ce73, 0x9ce739ce },	/* 3 */
842 		{ 100, 62, 0x9ce73, 0x9ce739ce },	/* X */
843 		{ 102, 62, 0xa5294, 0xa5294a52 },	/* 5 */
844 		{ 106, 62, 0xb5ad6, 0xb5ad6b5a },	/* 6 */
845 		{ 108, 62, 0xbdef7, 0xbdef7bde },	/* 7 */
846 		{ 108, 108, 0x00000, 0x00000000 },	/* 8 */
847 		{ 112, 62, 0xce739, 0xce739ce7 },	/* 9 */
848 		{ 114, 62, 0xd6b5a, 0xd6b5ad6b },	/* A */
849 		{ 116, 62, 0xdef7b, 0xdef7bdef },	/* B */
850 		{ 84, 62, 0x5ad6b, 0x5ad6b5ad },	/* X */
851 		{ 118, 62, 0xe739c, 0xe739ce73 },	/* D */
852 		{ 90, 62, 0x739ce, 0x739ce739 },	/* E */
853 		{ 92, 62, 0x7bdef, 0x7bdef7bd }		/* F */
854 	};
855 
856 
857 	fsqrt_t sqrt_guard_cases[16] = {
858 		{ 0x1.08800p0,  0, 0x04371, 0xd9ab72fb}, /* :0 B8.8440  */
859 		{ 0x0.D2200p0, -1, 0xcfdca, 0xf353049e}, /* :1 A4.6910  */
860 		{ 0x1.A8220p0,  0, 0x49830, 0x2b49cd6d}, /* :2 E9.D411  */
861 		{ 0x1.05A20p0,  0, 0x02cd1, 0x3b44f3bf}, /* :3 B7.82D1  */
862 		{ 0x0.CA820p0, -1, 0xc7607, 0x3cec0937}, /* :4 A1.6541  */
863 		{ 0x1.DCA20p0,  0, 0x5d4f8, 0xd4e4c2b2}, /* :5 F7.EE51  */
864 		{ 0x1.02C80p0,  0, 0x01630, 0x9cde7483}, /* :6 B6.8164  */
865 		{ 0x0.DC800p0, -1, 0xdb2cf, 0xe686fe7c}, /* :7 A8.6E40  */
866 		{ 0x0.CF920p0, -1, 0xcd089, 0xb6860626}, /* :8 A3.67C9  */
867 		{ 0x1.1D020p0,  0, 0x0e1d6, 0x2e78ed9d}, /* :9 BF.8E81  */
868 		{ 0x0.E1C80p0, -1, 0xe0d52, 0x6020fb6b}, /* :A AA.70E4  */
869 		{ 0x0.C8000p0, -1, 0xc48c6, 0x001f0abf}, /* :B A0.6400  */
870 		{ 0x1.48520p0,  0, 0x21e9e, 0xd813e2e2}, /* :C CD.A429  */
871 		{ 0x0.F4C20p0, -1, 0xf4a1b, 0x09bbf0b0}, /* :D B1.7A61  */
872 		{ 0x0.CD080p0, -1, 0xca348, 0x79b907ae}, /* :E A2.6684  */
873 		{ 0x1.76B20p0,  0, 0x35b67, 0x81aed827}  /* :F DB.BB59  */
874 	};
875 
876 	/*	0x1.00000 00000000p-3 */
877 	/* set up the invariant fields of B, the arg to cause rounding */
878 	B.dbl = 0.0;
879 	B.layout.exp = 1020;
880 
881 	/* set up args so result is always Z = 1.200000000000<g>p+0 */
882 	Z.dbl = 1.0;
883 	Z.layout.sign = 0;
884 
885 #define TERNOP(op) \
886 		arg_count = 3; \
887         __asm__ volatile( \
888 					op" %0, %1, %2, %3\n\t" \
889 					: "=f"(fD) : "f"(fA) , "f"(fB), "f"(fC));
890 #define BINOP(op) \
891 		arg_count = 2; \
892         __asm__ volatile( \
893 					op" %0, %1, %2\n\t" \
894 					: "=f"(fD) : "f"(fA) , "f"(fB));
895 #define UNOP(op) \
896 		arg_count = 1; \
897         __asm__ volatile( \
898 					op" %0, %1\n\t" \
899 					: "=f"(fD) : "f"(fA));
900 
901 	for (mode = TO_NEAREST; mode <= TO_MINUS_INFINITY; mode++)
902 	for (s = (op != FSQRT ? -1 : 1); s < 2; s += 2)
903 	for (g = 0; g < 16; g += 1) {
904 		double lo, hi, expected;
905 		int LSB;
906 		int guard;
907 		int z_sign = s;
908 
909 		/*
910 		** one argument will have exponent = 0 as will the result (by
911 		** design) so choose the other argument with exponent -3 to
912 		** force a 3 bit shift for scaling leaving us with 3 guard bits
913 		** and the LSB bit at the bottom of the manitssa.
914 		*/
915 		switch(op) {
916 		case FADD:
917 			/* 1p+0 + 1.000000000000<g>p-3 */
918 			B.layout.frac_lo = g;
919 
920 			fB = s*B.dbl;
921 			fA = s*1.0;
922 
923 			/* set up Z to be truncated result */
924 
925 			/* mask off LSB from resulting guard bits */
926 			guard = g & 7;
927 
928 			Z.layout.frac_hi = 0x20000;
929 			Z.layout.frac_lo = g >> 3;
930 
931 			break;
932 		case FSUB:
933 			/* 1.2000000000002p+0 - 1.000000000000<g>p-3 */
934 			A.dbl = 1.125;
935 			/* add enough to avoid scaling of the result */
936 			A.layout.frac_lo = 0x2;
937 			fA = s*A.dbl;
938 
939 			B.layout.frac_lo = g;
940 			fB = s*B.dbl;
941 
942 			/* set up Z to be truncated result */
943 			guard = (0x10-g);
944 			Z.layout.frac_hi = 0x0;
945 			Z.layout.frac_lo = guard>>3;
946 
947 			/* mask off LSB from resulting guard bits */
948 			guard &= 7;
949 			break;
950 		case FMUL:
951 			/* 1 + g*2^-52 */
952 			A.dbl = 1.0;
953 			A.layout.frac_lo = g;
954 			fA = s*A.dbl;
955 			fB = 1.125;
956 
957 			/* set up Z to be truncated result */
958 			Z.dbl = 1.0;
959 			Z.layout.frac_hi = 0x20000;
960 			Z.layout.frac_lo = g + (g>>3);
961 			guard = g & 7;
962 			break;
963 		case FMADD:
964 		case FMSUB:
965 		case FNMADD:
966 		case FNMSUB:
967 			/* 1 + g*2^-52 */
968 			A.dbl = 1.0;
969 			A.layout.frac_lo = g;
970 			fA = s*A.dbl;
971 			fB = 1.125;
972 
973 			/* 1.0000000000001p-1 */
974 			A.dbl = 0.5;
975 			A.layout.frac_lo = 1;
976 			fC = (op == FMADD || op == FNMADD ? s : -s)*A.dbl;
977 
978 			/* set up Z to be truncated result */
979 			z_sign = (op == FNMADD || op == FNMSUB ? -s : s);
980 			guard = ((g & 7) + 0x4) & 7;
981 			Z.dbl = 1.0;
982 			Z.layout.frac_hi = 0xa0000;
983 			Z.layout.frac_lo = g + (g>>3) + ((g & 7)>> 2 ? 1 : 0);
984 			break;
985 		case FDIV:
986 			/* g >> 3 == LSB, g & 7 == guard bits */
987 			guard = g & 7;
988 			if (guard == 0x4) {
989 				/* special case guard bits == 4, inexact tie */
990 				fB = s*2.0;
991 				Z.dbl = 0.0;
992 				if (g >> 3) {
993 					fA = dbl_denorm_small + 2*dbl_denorm_small;
994 					Z.layout.frac_lo = 0x1;
995 				} else {
996 					fA = dbl_denorm_small;
997 				}
998 			} else {
999 				fA = s*div_guard_cases[g].num;
1000 				fB = div_guard_cases[g].den;
1001 
1002 				printf("%d/%d\n",
1003 					s*div_guard_cases[g].num,
1004 					div_guard_cases[g].den);
1005 				Z.dbl = 1.0;
1006 				Z.layout.frac_hi = div_guard_cases[g].hi;
1007 				Z.layout.frac_lo = div_guard_cases[g].lo;
1008 			}
1009 			break;
1010 		case FSQRT:
1011 			fA = s*sqrt_guard_cases[g].arg;
1012 			Z.dbl = 1.0;
1013 			Z.layout.exp = sqrt_guard_cases[g].exp + 1023;
1014 			Z.layout.frac_hi = sqrt_guard_cases[g].hi;
1015 			Z.layout.frac_lo = sqrt_guard_cases[g].lo;
1016 			guard = g >> 1;
1017 			if (g & 1) guard |= 1;
1018 			/* don't have test cases for when X bit = 0 */
1019 			if (guard == 0 || guard == 4) continue;
1020 			break;
1021 		default:
1022 			assert("check_double_guarded_arithmetic_op: unexpected op",
1023 				FALSE);
1024 			break;
1025 		}
1026 
1027 		/* get LSB for tie breaking */
1028 		LSB = Z.layout.frac_lo & 1;
1029 
1030 		/* set up hi and lo */
1031 		lo = z_sign*Z.dbl;
1032 		Z.layout.frac_lo += 1;
1033 		hi = z_sign*Z.dbl;
1034 
1035 		switch(mode) {
1036 		case TO_NEAREST:
1037 			/* look at 3 guard bits to determine expected rounding */
1038 			switch(guard) {
1039 			case 0:
1040 			case 1: case 2: case 3:
1041 				expected = lo;
1042 				break;
1043 			case 4:	/* tie: round to even */
1044 				if (debug) printf("tie: LSB = %d\n", LSB);
1045 				expected = (LSB == 0 ? lo : hi);
1046 				break;
1047 			case 5: case 6: case 7:
1048 				expected = hi;
1049 				break;
1050 			default:
1051 				assert("check_double_guarded_arithmetic_op: unexpected guard",
1052 					FALSE);
1053 			}
1054 			break;
1055 		case TO_ZERO:
1056 			expected = lo;
1057 			break;
1058 		case TO_PLUS_INFINITY:
1059 			if (guard == 0) {
1060 				/* no rounding */
1061 				expected = lo;
1062 			} else {
1063 				expected = (s == 1 ? hi : lo);
1064 			}
1065 			break;
1066 		case TO_MINUS_INFINITY:
1067 			if (guard == 0) {
1068 				/* no rounding */
1069 				expected = lo;
1070 			} else {
1071 				expected = (s == 1 ? lo : hi);
1072 			}
1073 			break;
1074 		}
1075 
1076 		set_rounding_mode(mode);
1077 
1078 		/*
1079 		** do the double precision dual operation just for comparison
1080 		** when debugging
1081 		*/
1082 		switch(op) {
1083 		case FADD:
1084 			BINOP("fadd");
1085 			Res.dbl = fD;
1086 			break;
1087 		case FSUB:
1088 			BINOP("fsub");
1089 			Res.dbl = fD;
1090 			break;
1091 		case FMUL:
1092 			BINOP("fmul");
1093 			Res.dbl = fD;
1094 			break;
1095 		case FMADD:
1096 			TERNOP("fmadd");
1097 			Res.dbl = fD;
1098 			break;
1099 		case FMSUB:
1100 			TERNOP("fmsub");
1101 			Res.dbl = fD;
1102 			break;
1103 		case FNMADD:
1104 			TERNOP("fnmadd");
1105 			Res.dbl = fD;
1106 			break;
1107 		case FNMSUB:
1108 			TERNOP("fnmsub");
1109 			Res.dbl = fD;
1110 			break;
1111 		case FDIV:
1112 			BINOP("fdiv");
1113 			Res.dbl = fD;
1114 			break;
1115 		case FSQRT:
1116 			UNOP("fsqrt");
1117 			Res.dbl = fD;
1118 			break;
1119 		default:
1120 			assert("check_double_guarded_arithmetic_op: unexpected op",
1121 				FALSE);
1122 			break;
1123 		}
1124 #undef UNOP
1125 #undef BINOP
1126 #undef TERNOP
1127 
1128 		Exp.dbl = expected;
1129 
1130 		if ((Res.layout.sign != Exp.layout.sign) ||
1131 			(Res.layout.exp != Exp.layout.exp) ||
1132 			(Res.layout.frac_lo != Exp.layout.frac_lo) ||
1133 			(Res.layout.frac_hi != Exp.layout.frac_hi)) {
1134 			result = "FAILED";
1135 			status = 1;
1136 		} else {
1137 			result = "PASSED";
1138 			status = 0;
1139 		}
1140 
1141 		printf("%s:%s:%s(%-13a",
1142 			round_mode_name[mode], result, flt_op_names[op], fA);
1143 		if (arg_count > 1) printf(", %-13a", fB);
1144 		if (arg_count > 2) printf(", %-13a", fC);
1145 		printf(") = %-13a", Res.dbl);
1146 		if (status) printf("\n\texpected %a", Exp.dbl);
1147 		putchar('\n');
1148 
1149 		if (debug) {
1150 			print_double("hi", hi);
1151 			print_double("lo", lo);
1152 			print_double("expected", expected);
1153 			print_double("got", Res.dbl);
1154 		}
1155 	}
1156 
1157 	return status;
1158 }
1159 
test_float_arithmetic_ops()1160 int test_float_arithmetic_ops()
1161 {
1162 	int status = 0;
1163 	flt_op_t op;
1164 
1165 	/*
1166 	** choose FP operands whose result should be rounded to either
1167 	** lo or hi.
1168 	*/
1169 
1170 	printf("-------------------------- %s --------------------------\n",
1171 		"test rounding of float operators without guard bits");
1172 	for (op = FADDS; op <= FDIVS; op++) {
1173 		status |= check_single_arithmetic_op(op);
1174 	}
1175 
1176 	printf("-------------------------- %s --------------------------\n",
1177 		"test rounding of float operators with guard bits");
1178 	for (op = FADDS; op <= FNMSUBS; op++) {
1179 		status |= check_single_guarded_arithmetic_op(op);
1180 	}
1181 
1182 	printf("-------------------------- %s --------------------------\n",
1183 		"test rounding of double operators with guard bits");
1184 	for (op = FADD; op <= FSQRT; op++) {
1185 		status |= check_double_guarded_arithmetic_op(op);
1186 	}
1187 	return status;
1188 }
1189 
1190 
1191 int
main()1192 main()
1193 {
1194 	int status = 0;
1195 
1196 	init();
1197 
1198 	status |= test_dbl_to_float_convert("test denormalized convert", &denorm_small);
1199 	status |= test_dbl_to_float_convert("test normalized convert", &norm_small);
1200 	status |= test_int_to_float_convert("test (float)int convert");
1201 	status |= test_int_to_float_convert("test (float)int convert");
1202 
1203 #ifdef __powerpc64__
1204 	status |= test_long_to_double_convert("test (double)long convert");
1205 #endif
1206 	status |= test_float_arithmetic_ops();
1207 	return status;
1208 }
1209