1
2 /* Copyright (C) 2006 Dave Nomura
3 dcnltc@us.ibm.com
4
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
9
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
18 02111-1307, USA.
19
20 The GNU General Public License is contained in the file COPYING.
21 */
22
23 #include <stdio.h>
24 #include <stdlib.h>
25 #include <limits.h>
26
27 typedef enum { FALSE=0, TRUE } bool_t;
28
29 typedef enum {
30 FADDS, FSUBS, FMULS, FDIVS,
31 FMADDS, FMSUBS, FNMADDS, FNMSUBS,
32 FADD, FSUB, FMUL, FDIV, FMADD,
33 FMSUB, FNMADD, FNMSUB, FSQRT
34 } flt_op_t;
35
36 typedef enum {
37 TO_NEAREST=0, TO_ZERO, TO_PLUS_INFINITY, TO_MINUS_INFINITY } round_mode_t;
38 char *round_mode_name[] = { "near", "zero", "+inf", "-inf" };
39
40 const char *flt_op_names[] = {
41 "fadds", "fsubs", "fmuls", "fdivs",
42 "fmadds", "fmsubs", "fnmadds", "fnmsubs",
43 "fadd", "fsub", "fmul", "fdiv", "fmadd", "fmsub", "fnmadd",
44 "fnmsub", "fsqrt"
45 };
46
47 typedef unsigned int fpscr_t;
48
49 typedef union {
50 float flt;
51 struct {
52 unsigned int sign:1;
53 unsigned int exp:8;
54 unsigned int frac:23;
55 } layout;
56 } flt_overlay;
57
58 typedef union {
59 double dbl;
60 struct {
61 unsigned int sign:1;
62 unsigned int exp:11;
63 unsigned int frac_hi:20;
64 unsigned int frac_lo:32;
65 } layout;
66 struct {
67 unsigned int hi;
68 unsigned int lo;
69 } dbl_pair;
70 } dbl_overlay;
71
72 void assert_fail(const char *msg,
73 const char* expr, const char* file, int line, const char*fn);
74
75 #define STRING(__str) #__str
76 #define assert(msg, expr) \
77 ((void) ((expr) ? 0 : \
78 (assert_fail (msg, STRING(expr), \
79 __FILE__, __LINE__, \
80 __PRETTY_FUNCTION__), 0)))
81 float denorm_small;
82 double dbl_denorm_small;
83 float norm_small;
84 bool_t debug = FALSE;
85 bool_t long_is_64_bits = sizeof(long) == 8;
86
assert_fail(msg,expr,file,line,fn)87 void assert_fail (msg, expr, file, line, fn)
88 const char* msg;
89 const char* expr;
90 const char* file;
91 int line;
92 const char*fn;
93 {
94 printf( "\n%s: %s:%d (%s): Assertion `%s' failed.\n",
95 msg, file, line, fn, expr );
96 exit( 1 );
97 }
set_rounding_mode(round_mode_t mode)98 void set_rounding_mode(round_mode_t mode)
99 {
100 switch(mode) {
101 case TO_NEAREST:
102 asm volatile("mtfsfi 7, 0");
103 break;
104 case TO_ZERO:
105 asm volatile("mtfsfi 7, 1");
106 break;
107 case TO_PLUS_INFINITY:
108 asm volatile("mtfsfi 7, 2");
109 break;
110 case TO_MINUS_INFINITY:
111 asm volatile("mtfsfi 7, 3");
112 break;
113 }
114 }
115
print_double(char * msg,double dbl)116 void print_double(char *msg, double dbl)
117 {
118 dbl_overlay D;
119 D.dbl = dbl;
120
121 printf("%15s : dbl %-20a = %c(%4d, %05x%08x)\n",
122 msg, D.dbl, (D.layout.sign == 0 ? '+' : '-'),
123 D.layout.exp, D.layout.frac_hi, D.layout.frac_lo);
124 }
125
print_single(char * msg,float * flt)126 void print_single(char *msg, float *flt)
127 {
128 flt_overlay F;
129 F.flt = *flt;
130
131 /* NOTE: for the purposes of comparing the fraction of a single with
132 ** a double left shift the .frac so that hex digits are grouped
133 ** from left to right. this is necessary because the size of a
134 ** single mantissa (23) bits is not a multiple of 4
135 */
136 printf("%15s : flt %-20a = %c(%4d, %06x)\n",
137 msg, F.flt, (F.layout.sign == 0 ? '+' : '-'), F.layout.exp, F.layout.frac << 1);
138 }
139
check_dbl_to_flt_round(round_mode_t mode,double dbl,float * expected)140 int check_dbl_to_flt_round(round_mode_t mode, double dbl, float *expected)
141 {
142 int status = 0;
143 flt_overlay R, E;
144 char *result;
145 char *eq_ne;
146
147 set_rounding_mode(mode);
148
149 E.flt = *expected;
150 R.flt = (float)dbl;
151
152 if ((R.layout.sign != E.layout.sign) ||
153 (R.layout.exp != E.layout.exp) ||
154 (R.layout.frac != E.layout.frac)) {
155 result = "FAILED";
156 eq_ne = "!=";
157 status = 1;
158 } else {
159 result = "PASSED";
160 eq_ne = "==";
161 status = 0;
162 }
163 printf("%s:%s:(double)(%-20a) = %20a",
164 round_mode_name[mode], result, R.flt, dbl);
165 if (status) {
166 print_single("\n\texpected", &E.flt);
167 print_single("\n\trounded ", &R.flt);
168 }
169 putchar('\n');
170 return status;
171 }
172
test_dbl_to_float_convert(char * msg,float * base)173 int test_dbl_to_float_convert(char *msg, float *base)
174 {
175 int status = 0;
176 double half = (double)denorm_small/2;
177 double qtr = half/2;
178 double D_hi = (double)*base + half + qtr;
179 double D_lo = (double)*base + half - qtr;
180 float F_lo = *base;
181 float F_hi = F_lo + denorm_small;
182
183
184 /*
185 ** .....+-----+-----+-----+-----+---....
186 ** ^F_lo ^ ^ ^
187 ** D_lo
188 ** D_hi
189 ** F_hi
190 ** F_lo and F_hi are two consecutive single float model numbers
191 ** denorm_small distance apart. D_lo and D_hi are two numbers
192 ** within that range that are not representable as single floats
193 ** and will be rounded to either F_lo or F_hi.
194 */
195 printf("-------------------------- %s --------------------------\n", msg);
196 if (debug) {
197 print_double("D_lo", D_lo);
198 print_double("D_hi", D_hi);
199 print_single("F_lo", &F_lo);
200 print_single("F_hi", &F_hi);
201 }
202
203 /* round to nearest */
204 status |= check_dbl_to_flt_round(TO_NEAREST, D_hi, &F_hi);
205 status |= check_dbl_to_flt_round(TO_NEAREST, D_lo, &F_lo);
206
207 /* round to zero */
208 status |= check_dbl_to_flt_round(TO_ZERO, D_hi, (D_hi > 0 ? &F_lo : &F_hi));
209 status |= check_dbl_to_flt_round(TO_ZERO, D_lo, (D_hi > 0 ? &F_lo : &F_hi));
210
211 /* round to +inf */
212 status |= check_dbl_to_flt_round(TO_PLUS_INFINITY, D_hi, &F_hi);
213 status |= check_dbl_to_flt_round(TO_PLUS_INFINITY, D_lo, &F_hi);
214
215 /* round to -inf */
216 status |= check_dbl_to_flt_round(TO_MINUS_INFINITY, D_hi, &F_lo);
217 status |= check_dbl_to_flt_round(TO_MINUS_INFINITY, D_lo, &F_lo);
218 return status;
219 }
220
221 void
init()222 init()
223 {
224 flt_overlay F;
225 dbl_overlay D;
226
227 /* small is the smallest denormalized single float number */
228 F.layout.sign = 0;
229 F.layout.exp = 0;
230 F.layout.frac = 1;
231 denorm_small = F.flt; /* == 2^(-149) */
232 if (debug) {
233 print_double("float small", F.flt);
234 }
235
236 D.layout.sign = 0;
237 D.layout.exp = 0;
238 D.layout.frac_hi = 0;
239 D.layout.frac_lo = 1;
240 dbl_denorm_small = D.dbl; /* == 2^(-1022) */
241 if (debug) {
242 print_double("double small", D.dbl);
243 }
244
245 /* n_small is the smallest normalized single precision float */
246 F.layout.exp = 1;
247 norm_small = F.flt;
248 }
249
check_int_to_flt_round(round_mode_t mode,long L,float * expected)250 int check_int_to_flt_round(round_mode_t mode, long L, float *expected)
251 {
252 int status = 0;
253 int I = L;
254 char *int_name = "int";
255 flt_overlay R, E;
256 char *result;
257 int iter;
258
259 set_rounding_mode(mode);
260 E.flt = *expected;
261
262 for (iter = 0; iter < 2; iter++) {
263 int stat = 0;
264 R.flt = (iter == 0 ? (float)I : (float)L);
265
266 if ((R.layout.sign != E.layout.sign) ||
267 (R.layout.exp != E.layout.exp) ||
268 (R.layout.frac != E.layout.frac)) {
269 result = "FAILED";
270 stat = 1;
271 } else {
272 result = "PASSED";
273 stat = 0;
274 }
275 printf("%s:%s:(float)(%4s)%9d = %11.1f",
276 round_mode_name[mode], result, int_name, I, R.flt);
277 if (stat) {
278 print_single("\n\texpected: %.1f ", &E.flt);
279 print_single("\n\trounded ", &R.flt);
280 }
281 putchar('\n');
282 status |= stat;
283
284 if (!long_is_64_bits) break;
285 int_name = "long";
286 }
287 return status;
288 }
289
check_long_to_dbl_round(round_mode_t mode,long L,double * expected)290 int check_long_to_dbl_round(round_mode_t mode, long L, double *expected)
291 {
292 int status = 0;
293 dbl_overlay R, E;
294 char *result;
295
296 set_rounding_mode(mode);
297 E.dbl = *expected;
298
299 R.dbl = (double)L;
300
301 if ((R.layout.sign != E.layout.sign) ||
302 (R.layout.exp != E.layout.exp) ||
303 (R.layout.frac_lo != E.layout.frac_lo) ||
304 (R.layout.frac_hi != E.layout.frac_hi)) {
305 result = "FAILED";
306 status = 1;
307 } else {
308 result = "PASSED";
309 status = 0;
310 }
311 printf("%s:%s:(double)(%18ld) = %20.1f",
312 round_mode_name[mode], result, L, R.dbl);
313 if (status) {
314 printf("\n\texpected %.1f : ", E.dbl);
315 }
316 putchar('\n');
317 return status;
318 }
319
test_int_to_float_convert(char * msg)320 int test_int_to_float_convert(char *msg)
321 {
322 int status = 0;
323 int int24_hi = 0x03ff0fff;
324 int int24_lo = 0x03ff0ffd;
325 float pos_flt_lo = 67047420.0;
326 float pos_flt_hi = 67047424.0;
327 float neg_flt_lo = -67047420.0;
328 float neg_flt_hi = -67047424.0;
329
330 printf("-------------------------- %s --------------------------\n", msg);
331 status |= check_int_to_flt_round(TO_NEAREST, int24_lo, &pos_flt_lo);
332 status |= check_int_to_flt_round(TO_NEAREST, int24_hi, &pos_flt_hi);
333 status |= check_int_to_flt_round(TO_ZERO, int24_lo, &pos_flt_lo);
334 status |= check_int_to_flt_round(TO_ZERO, int24_hi, &pos_flt_lo);
335 status |= check_int_to_flt_round(TO_PLUS_INFINITY, int24_lo, &pos_flt_hi);
336 status |= check_int_to_flt_round(TO_PLUS_INFINITY, int24_hi, &pos_flt_hi);
337 status |= check_int_to_flt_round(TO_MINUS_INFINITY, int24_lo, &pos_flt_lo);
338 status |= check_int_to_flt_round(TO_MINUS_INFINITY, int24_hi, &pos_flt_lo);
339
340 status |= check_int_to_flt_round(TO_NEAREST, -int24_lo, &neg_flt_lo);
341 status |= check_int_to_flt_round(TO_NEAREST, -int24_hi, &neg_flt_hi);
342 status |= check_int_to_flt_round(TO_ZERO, -int24_lo, &neg_flt_lo);
343 status |= check_int_to_flt_round(TO_ZERO, -int24_hi, &neg_flt_lo);
344 status |= check_int_to_flt_round(TO_PLUS_INFINITY, -int24_lo, &neg_flt_lo);
345 status |= check_int_to_flt_round(TO_PLUS_INFINITY, -int24_hi, &neg_flt_lo);
346 status |= check_int_to_flt_round(TO_MINUS_INFINITY, -int24_lo, &neg_flt_hi);
347 status |= check_int_to_flt_round(TO_MINUS_INFINITY, -int24_hi, &neg_flt_hi);
348 return status;
349 }
350
351 #ifdef __powerpc64__
test_long_to_double_convert(char * msg)352 int test_long_to_double_convert(char *msg)
353 {
354 int status = 0;
355 long long55_hi = 0x07ff0ffffffffff;
356 long long55_lo = 0x07ff0fffffffffd;
357 double pos_dbl_lo = 36012304344547324.0;
358 double pos_dbl_hi = 36012304344547328.0;
359 double neg_dbl_lo = -36012304344547324.0;
360 double neg_dbl_hi = -36012304344547328.0;
361
362 printf("-------------------------- %s --------------------------\n", msg);
363 status |= check_long_to_dbl_round(TO_NEAREST, long55_lo, &pos_dbl_lo);
364 status |= check_long_to_dbl_round(TO_NEAREST, long55_hi, &pos_dbl_hi);
365 status |= check_long_to_dbl_round(TO_ZERO, long55_lo, &pos_dbl_lo);
366 status |= check_long_to_dbl_round(TO_ZERO, long55_hi, &pos_dbl_lo);
367 status |= check_long_to_dbl_round(TO_PLUS_INFINITY, long55_lo, &pos_dbl_hi);
368 status |= check_long_to_dbl_round(TO_PLUS_INFINITY, long55_hi, &pos_dbl_hi);
369 status |= check_long_to_dbl_round(TO_MINUS_INFINITY, long55_lo, &pos_dbl_lo);
370 status |= check_long_to_dbl_round(TO_MINUS_INFINITY, long55_hi, &pos_dbl_lo);
371
372 status |= check_long_to_dbl_round(TO_NEAREST, -long55_lo, &neg_dbl_lo);
373 status |= check_long_to_dbl_round(TO_NEAREST, -long55_hi, &neg_dbl_hi);
374 status |= check_long_to_dbl_round(TO_ZERO, -long55_lo, &neg_dbl_lo);
375 status |= check_long_to_dbl_round(TO_ZERO, -long55_hi, &neg_dbl_lo);
376 status |= check_long_to_dbl_round(TO_PLUS_INFINITY, -long55_lo, &neg_dbl_lo);
377 status |= check_long_to_dbl_round(TO_PLUS_INFINITY, -long55_hi, &neg_dbl_lo);
378 status |= check_long_to_dbl_round(TO_MINUS_INFINITY, -long55_lo, &neg_dbl_hi);
379 status |= check_long_to_dbl_round(TO_MINUS_INFINITY, -long55_hi, &neg_dbl_hi);
380 return status;
381 }
382 #endif
383
check_single_arithmetic_op(flt_op_t op)384 int check_single_arithmetic_op(flt_op_t op)
385 {
386 char *result;
387 int status = 0;
388 dbl_overlay R, E;
389 double qtr, half, fA, fB, fD;
390 round_mode_t mode;
391 int q, s;
392 bool_t two_args = TRUE;
393 float whole = denorm_small;
394
395 #define BINOP(op) \
396 __asm__ volatile( \
397 op" %0, %1, %2\n\t" \
398 : "=f"(fD) : "f"(fA) , "f"(fB));
399 #define UNOP(op) \
400 __asm__ volatile( \
401 op" %0, %1\n\t" \
402 : "=f"(fD) : "f"(fA));
403
404 half = (double)whole/2;
405 qtr = half/2;
406
407 if (debug) {
408 print_double("qtr", qtr);
409 print_double("whole", whole);
410 print_double("2*whole", 2*whole);
411 }
412
413 for (mode = TO_NEAREST; mode <= TO_MINUS_INFINITY; mode++)
414 for (s = -1; s < 2; s += 2)
415 for (q = 1; q < 4; q += 2) {
416 double expected;
417 double lo = s*whole;
418 double hi = s*2*whole;
419
420 switch(op) {
421 case FADDS:
422 fA = s*whole;
423 fB = s*q*qtr;
424 break;
425 case FSUBS:
426 fA = s*2*whole;
427 fB = s*(q == 1 ? 3 : 1)*qtr;
428 break;
429 case FMULS:
430 fA = 0.5;
431 fB = s*(4+q)*half;
432 break;
433 case FDIVS:
434 fA = s*(4+q)*half;
435 fB = 2.0;
436 break;
437 default:
438 assert("check_single_arithmetic_op: unexpected op",
439 FALSE);
440 break;
441 }
442
443 switch(mode) {
444 case TO_NEAREST:
445 expected = (q == 1 ? lo : hi);
446 break;
447 case TO_ZERO:
448 expected = lo;
449 break;
450 case TO_PLUS_INFINITY:
451 expected = (s == 1 ? hi : lo);
452 break;
453 case TO_MINUS_INFINITY:
454 expected = (s == 1 ? lo : hi);
455 break;
456 }
457
458 set_rounding_mode(mode);
459
460 /*
461 ** do the double precision dual operation just for comparison
462 ** when debugging
463 */
464 switch(op) {
465 case FADDS:
466 BINOP("fadds");
467 R.dbl = fD;
468 BINOP("fadd");
469 break;
470 case FSUBS:
471 BINOP("fsubs");
472 R.dbl = fD;
473 BINOP("fsub");
474 break;
475 case FMULS:
476 BINOP("fmuls");
477 R.dbl = fD;
478 BINOP("fmul");
479 break;
480 case FDIVS:
481 BINOP("fdivs");
482 R.dbl = fD;
483 BINOP("fdiv");
484 break;
485 default:
486 assert("check_single_arithmetic_op: unexpected op",
487 FALSE);
488 break;
489 }
490 #undef UNOP
491 #undef BINOP
492
493 E.dbl = expected;
494
495 if ((R.layout.sign != E.layout.sign) ||
496 (R.layout.exp != E.layout.exp) ||
497 (R.layout.frac_lo != E.layout.frac_lo) ||
498 (R.layout.frac_hi != E.layout.frac_hi)) {
499 result = "FAILED";
500 status = 1;
501 } else {
502 result = "PASSED";
503 status = 0;
504 }
505
506 printf("%s:%s:%s(%-13a",
507 round_mode_name[mode], result, flt_op_names[op], fA);
508 if (two_args) printf(", %-13a", fB);
509 printf(") = %-13a", R.dbl);
510 if (status) printf("\n\texpected %a", E.dbl);
511 putchar('\n');
512
513 if (debug) {
514 print_double("hi", hi);
515 print_double("lo", lo);
516 print_double("expected", expected);
517 print_double("got", R.dbl);
518 print_double("double result", fD);
519 }
520 }
521
522 return status;
523 }
524
check_single_guarded_arithmetic_op(flt_op_t op)525 int check_single_guarded_arithmetic_op(flt_op_t op)
526 {
527 typedef struct {
528 int num, den, frac;
529 } fdivs_t;
530
531 char *result;
532 int status = 0;
533 flt_overlay A, B, Z;
534 dbl_overlay Res, Exp;
535 double fA, fB, fC, fD;
536 round_mode_t mode;
537 int g, s;
538 int arg_count;
539
540 fdivs_t divs_guard_cases[16] = {
541 { 105, 56, 0x700000 }, /* : 0 */
542 { 100, 57, 0x608FB8 }, /* : 1 */
543 { 000, 00, 0x000000 }, /* : X */
544 { 100, 52, 0x762762 }, /* : 3 */
545 { 000, 00, 0x000000 }, /* : X */
546 { 100, 55, 0x68BA2E }, /* : 5 */
547 { 000, 00, 0x000000 }, /* : X */
548 { 100, 51, 0x7AFAFA }, /* : 7 */
549 { 000, 00, 0x000000 }, /* : X */
550 { 100, 56, 0x649249 }, /* : 9 */
551 { 000, 00, 0x000000 }, /* : X */
552 { 100, 54, 0x6D097B }, /* : B */
553 { 000, 00, 0x000000 }, /* : X */
554 { 100, 59, 0x58F2FB }, /* : D */
555 { 000, 00, 0x000000 }, /* : X */
556 { 101, 52, 0x789D89 } /* : F */
557 };
558
559 /* 0x1.00000 00000000p-3 */
560 /* set up the invariant fields of B, the arg to cause rounding */
561 B.flt = 0.0;
562 B.layout.exp = 124; /* -3 */
563
564 /* set up args so result is always Z = 1.200000000000<g>p+0 */
565 Z.flt = 1.0;
566 Z.layout.sign = 0;
567
568 #define TERNOP(op) \
569 arg_count = 3; \
570 __asm__ volatile( \
571 op" %0, %1, %2, %3\n\t" \
572 : "=f"(fD) : "f"(fA) , "f"(fB), "f"(fC));
573 #define BINOP(op) \
574 arg_count = 2; \
575 __asm__ volatile( \
576 op" %0, %1, %2\n\t" \
577 : "=f"(fD) : "f"(fA) , "f"(fB));
578 #define UNOP(op) \
579 arg_count = 1; \
580 __asm__ volatile( \
581 op" %0, %1\n\t" \
582 : "=f"(fD) : "f"(fA));
583
584 for (mode = TO_NEAREST; mode <= TO_MINUS_INFINITY; mode++)
585 for (s = -1; s < 2; s += 2)
586 for (g = 0; g < 16; g += 1) {
587 double lo, hi, expected;
588 int LSB;
589 int guard = 0;
590 int z_sign = s;
591
592 /*
593 ** one argument will have exponent = 0 as will the result (by
594 ** design) so choose the other argument with exponent -3 to
595 ** force a 3 bit shift for scaling leaving us with 3 guard bits
596 ** and the LSB bit at the bottom of the manitssa.
597 */
598 switch(op) {
599 case FADDS:
600 /* 1p+0 + 1.00000<g>p-3 */
601 B.layout.frac = g;
602
603 fB = s*B.flt;
604 fA = s*1.0;
605
606 /* set up Z to be truncated result */
607
608 /* mask off LSB from resulting guard bits */
609 guard = g & 7;
610
611 Z.layout.frac = 0x100000 | (g >> 3);
612 break;
613 case FSUBS:
614 /* 1.200002p+0 - 1.000000000000<g>p-3 */
615 A.flt = 1.125;
616 /* add enough to avoid scaling of the result */
617 A.layout.frac |= 0x2;
618 fA = s*A.flt;
619
620 B.layout.frac = g;
621 fB = s*B.flt;
622
623 /* set up Z to be truncated result */
624 guard = (0x10-g);
625 Z.layout.frac = guard>>3;
626
627 /* mask off LSB from resulting guard bits */
628 guard &= 7;
629 break;
630 case FMULS:
631 /* 1 + g*2^-23 */
632 A.flt = 1.0;
633 A.layout.frac = g;
634 fA = s*A.flt;
635 fB = 1.125;
636
637 /* set up Z to be truncated result */
638 Z.flt = 1.0;
639 Z.layout.frac = 0x100000;
640 Z.layout.frac |= g + (g>>3);
641 guard = g & 7;
642 break;
643 case FDIVS:
644 /* g >> 3 == LSB, g & 7 == guard bits */
645 guard = g & 7;
646 if ((guard & 1) == 0) {
647 /* special case: guard bit X = 0 */
648 A.flt = denorm_small;
649 A.layout.frac = g;
650 fA = A.flt;
651 fB = s*8.0;
652 Z.flt = 0.0;
653 Z.layout.frac |= (g >> 3);
654 } else {
655 fA = s*divs_guard_cases[g].num;
656 fB = divs_guard_cases[g].den;
657
658 Z.flt = 1.0;
659 Z.layout.frac = divs_guard_cases[g].frac;
660 }
661 break;
662 case FMADDS:
663 case FMSUBS:
664 case FNMADDS:
665 case FNMSUBS:
666 /* 1 + g*2^-23 */
667 A.flt = 1.0;
668 A.layout.frac = g;
669 fA = s*A.flt;
670 fB = 1.125;
671
672 /* 1.000001p-1 */
673 A.flt = 0.5;
674 A.layout.frac = 1;
675 fC = (op == FMADDS || op == FNMADDS ? s : -s)*A.flt;
676
677 /* set up Z to be truncated result */
678 z_sign = (op == FNMADDS || op == FNMSUBS ? -s : s);
679 guard = ((g & 7) + 0x4) & 7;
680 Z.flt = 1.0;
681 Z.layout.frac = 0x500000;
682 Z.layout.frac |= g + (g>>3) + ((g & 7)>> 2 ? 1 : 0);
683 break;
684 default:
685 assert("check_single_arithmetic_op: unexpected op",
686 FALSE);
687 break;
688 }
689
690 /* get LSB for tie breaking */
691 LSB = Z.layout.frac & 1;
692
693 /* set up hi and lo */
694 lo = z_sign*Z.flt;
695 Z.layout.frac += 1;
696 hi = z_sign*Z.flt;
697
698 switch(mode) {
699 case TO_NEAREST:
700 /* look at 3 guard bits to determine expected rounding */
701 switch(guard) {
702 case 0:
703 case 1: case 2: case 3:
704 expected = lo;
705 break;
706 case 4: /* tie: round to even */
707 if (debug) printf("tie: LSB = %d\n", LSB);
708 expected = (LSB == 0 ? lo : hi);
709 break;
710 case 5: case 6: case 7:
711 expected = hi;
712 break;
713 default:
714 assert("check_single_guarded_arithmetic_op: unexpected guard",
715 FALSE);
716 }
717 break;
718 case TO_ZERO:
719 expected = lo;
720 break;
721 case TO_PLUS_INFINITY:
722 if (guard == 0) {
723 /* no rounding */
724 expected = lo;
725 } else {
726 expected = (s == 1 ? hi : lo);
727 }
728 break;
729 case TO_MINUS_INFINITY:
730 if (guard == 0) {
731 /* no rounding */
732 expected = lo;
733 } else {
734 expected = (s == 1 ? lo : hi);
735 }
736 break;
737 }
738
739 set_rounding_mode(mode);
740
741 /*
742 ** do the double precision dual operation just for comparison
743 ** when debugging
744 */
745 switch(op) {
746 case FADDS:
747 BINOP("fadds");
748 Res.dbl = fD;
749 break;
750 case FSUBS:
751 BINOP("fsubs");
752 Res.dbl = fD;
753 break;
754 case FMULS:
755 BINOP("fmuls");
756 Res.dbl = fD;
757 break;
758 case FDIVS:
759 BINOP("fdivs");
760 Res.dbl = fD;
761 break;
762 case FMADDS:
763 TERNOP("fmadds");
764 Res.dbl = fD;
765 break;
766 case FMSUBS:
767 TERNOP("fmsubs");
768 Res.dbl = fD;
769 break;
770 case FNMADDS:
771 TERNOP("fnmadds");
772 Res.dbl = fD;
773 break;
774 case FNMSUBS:
775 TERNOP("fnmsubs");
776 Res.dbl = fD;
777 break;
778 default:
779 assert("check_single_guarded_arithmetic_op: unexpected op",
780 FALSE);
781 break;
782 }
783 #undef UNOP
784 #undef BINOP
785 #undef TERNOP
786
787 Exp.dbl = expected;
788
789 if ((Res.layout.sign != Exp.layout.sign) ||
790 (Res.layout.exp != Exp.layout.exp) ||
791 (Res.layout.frac_lo != Exp.layout.frac_lo) ||
792 (Res.layout.frac_hi != Exp.layout.frac_hi)) {
793 result = "FAILED";
794 status = 1;
795 } else {
796 result = "PASSED";
797 status = 0;
798 }
799
800 printf("%s:%s:%s(%-13f",
801 round_mode_name[mode], result, flt_op_names[op], fA);
802 if (arg_count > 1) printf(", %-13a", fB);
803 if (arg_count > 2) printf(", %-13a", fC);
804 printf(") = %-13a", Res.dbl);
805 if (status) printf("\n\texpected %a", Exp.dbl);
806 putchar('\n');
807
808 if (debug) {
809 print_double("hi", hi);
810 print_double("lo", lo);
811 print_double("expected", expected);
812 print_double("got", Res.dbl);
813 }
814 }
815
816 return status;
817 }
818
check_double_guarded_arithmetic_op(flt_op_t op)819 int check_double_guarded_arithmetic_op(flt_op_t op)
820 {
821 typedef struct {
822 int num, den, hi, lo;
823 } fdiv_t;
824 typedef struct {
825 double arg;
826 int exp, hi, lo;
827 } fsqrt_t;
828
829 char *result;
830 int status = 0;
831 dbl_overlay A, B, Z;
832 dbl_overlay Res, Exp;
833 double fA, fB, fC, fD;
834 round_mode_t mode;
835 int g, s;
836 int arg_count;
837 fdiv_t div_guard_cases[16] = {
838 { 62, 62, 0x00000, 0x00000000 }, /* 0 */
839 { 64, 62, 0x08421, 0x08421084 }, /* 1 */
840 { 66, 62, 0x10842, 0x10842108 }, /* 2 */
841 { 100, 62, 0x9ce73, 0x9ce739ce }, /* 3 */
842 { 100, 62, 0x9ce73, 0x9ce739ce }, /* X */
843 { 102, 62, 0xa5294, 0xa5294a52 }, /* 5 */
844 { 106, 62, 0xb5ad6, 0xb5ad6b5a }, /* 6 */
845 { 108, 62, 0xbdef7, 0xbdef7bde }, /* 7 */
846 { 108, 108, 0x00000, 0x00000000 }, /* 8 */
847 { 112, 62, 0xce739, 0xce739ce7 }, /* 9 */
848 { 114, 62, 0xd6b5a, 0xd6b5ad6b }, /* A */
849 { 116, 62, 0xdef7b, 0xdef7bdef }, /* B */
850 { 84, 62, 0x5ad6b, 0x5ad6b5ad }, /* X */
851 { 118, 62, 0xe739c, 0xe739ce73 }, /* D */
852 { 90, 62, 0x739ce, 0x739ce739 }, /* E */
853 { 92, 62, 0x7bdef, 0x7bdef7bd } /* F */
854 };
855
856
857 fsqrt_t sqrt_guard_cases[16] = {
858 { 0x1.08800p0, 0, 0x04371, 0xd9ab72fb}, /* :0 B8.8440 */
859 { 0x0.D2200p0, -1, 0xcfdca, 0xf353049e}, /* :1 A4.6910 */
860 { 0x1.A8220p0, 0, 0x49830, 0x2b49cd6d}, /* :2 E9.D411 */
861 { 0x1.05A20p0, 0, 0x02cd1, 0x3b44f3bf}, /* :3 B7.82D1 */
862 { 0x0.CA820p0, -1, 0xc7607, 0x3cec0937}, /* :4 A1.6541 */
863 { 0x1.DCA20p0, 0, 0x5d4f8, 0xd4e4c2b2}, /* :5 F7.EE51 */
864 { 0x1.02C80p0, 0, 0x01630, 0x9cde7483}, /* :6 B6.8164 */
865 { 0x0.DC800p0, -1, 0xdb2cf, 0xe686fe7c}, /* :7 A8.6E40 */
866 { 0x0.CF920p0, -1, 0xcd089, 0xb6860626}, /* :8 A3.67C9 */
867 { 0x1.1D020p0, 0, 0x0e1d6, 0x2e78ed9d}, /* :9 BF.8E81 */
868 { 0x0.E1C80p0, -1, 0xe0d52, 0x6020fb6b}, /* :A AA.70E4 */
869 { 0x0.C8000p0, -1, 0xc48c6, 0x001f0abf}, /* :B A0.6400 */
870 { 0x1.48520p0, 0, 0x21e9e, 0xd813e2e2}, /* :C CD.A429 */
871 { 0x0.F4C20p0, -1, 0xf4a1b, 0x09bbf0b0}, /* :D B1.7A61 */
872 { 0x0.CD080p0, -1, 0xca348, 0x79b907ae}, /* :E A2.6684 */
873 { 0x1.76B20p0, 0, 0x35b67, 0x81aed827} /* :F DB.BB59 */
874 };
875
876 /* 0x1.00000 00000000p-3 */
877 /* set up the invariant fields of B, the arg to cause rounding */
878 B.dbl = 0.0;
879 B.layout.exp = 1020;
880
881 /* set up args so result is always Z = 1.200000000000<g>p+0 */
882 Z.dbl = 1.0;
883 Z.layout.sign = 0;
884
885 #define TERNOP(op) \
886 arg_count = 3; \
887 __asm__ volatile( \
888 op" %0, %1, %2, %3\n\t" \
889 : "=f"(fD) : "f"(fA) , "f"(fB), "f"(fC));
890 #define BINOP(op) \
891 arg_count = 2; \
892 __asm__ volatile( \
893 op" %0, %1, %2\n\t" \
894 : "=f"(fD) : "f"(fA) , "f"(fB));
895 #define UNOP(op) \
896 arg_count = 1; \
897 __asm__ volatile( \
898 op" %0, %1\n\t" \
899 : "=f"(fD) : "f"(fA));
900
901 for (mode = TO_NEAREST; mode <= TO_MINUS_INFINITY; mode++)
902 for (s = (op != FSQRT ? -1 : 1); s < 2; s += 2)
903 for (g = 0; g < 16; g += 1) {
904 double lo, hi, expected;
905 int LSB;
906 int guard;
907 int z_sign = s;
908
909 /*
910 ** one argument will have exponent = 0 as will the result (by
911 ** design) so choose the other argument with exponent -3 to
912 ** force a 3 bit shift for scaling leaving us with 3 guard bits
913 ** and the LSB bit at the bottom of the manitssa.
914 */
915 switch(op) {
916 case FADD:
917 /* 1p+0 + 1.000000000000<g>p-3 */
918 B.layout.frac_lo = g;
919
920 fB = s*B.dbl;
921 fA = s*1.0;
922
923 /* set up Z to be truncated result */
924
925 /* mask off LSB from resulting guard bits */
926 guard = g & 7;
927
928 Z.layout.frac_hi = 0x20000;
929 Z.layout.frac_lo = g >> 3;
930
931 break;
932 case FSUB:
933 /* 1.2000000000002p+0 - 1.000000000000<g>p-3 */
934 A.dbl = 1.125;
935 /* add enough to avoid scaling of the result */
936 A.layout.frac_lo = 0x2;
937 fA = s*A.dbl;
938
939 B.layout.frac_lo = g;
940 fB = s*B.dbl;
941
942 /* set up Z to be truncated result */
943 guard = (0x10-g);
944 Z.layout.frac_hi = 0x0;
945 Z.layout.frac_lo = guard>>3;
946
947 /* mask off LSB from resulting guard bits */
948 guard &= 7;
949 break;
950 case FMUL:
951 /* 1 + g*2^-52 */
952 A.dbl = 1.0;
953 A.layout.frac_lo = g;
954 fA = s*A.dbl;
955 fB = 1.125;
956
957 /* set up Z to be truncated result */
958 Z.dbl = 1.0;
959 Z.layout.frac_hi = 0x20000;
960 Z.layout.frac_lo = g + (g>>3);
961 guard = g & 7;
962 break;
963 case FMADD:
964 case FMSUB:
965 case FNMADD:
966 case FNMSUB:
967 /* 1 + g*2^-52 */
968 A.dbl = 1.0;
969 A.layout.frac_lo = g;
970 fA = s*A.dbl;
971 fB = 1.125;
972
973 /* 1.0000000000001p-1 */
974 A.dbl = 0.5;
975 A.layout.frac_lo = 1;
976 fC = (op == FMADD || op == FNMADD ? s : -s)*A.dbl;
977
978 /* set up Z to be truncated result */
979 z_sign = (op == FNMADD || op == FNMSUB ? -s : s);
980 guard = ((g & 7) + 0x4) & 7;
981 Z.dbl = 1.0;
982 Z.layout.frac_hi = 0xa0000;
983 Z.layout.frac_lo = g + (g>>3) + ((g & 7)>> 2 ? 1 : 0);
984 break;
985 case FDIV:
986 /* g >> 3 == LSB, g & 7 == guard bits */
987 guard = g & 7;
988 if (guard == 0x4) {
989 /* special case guard bits == 4, inexact tie */
990 fB = s*2.0;
991 Z.dbl = 0.0;
992 if (g >> 3) {
993 fA = dbl_denorm_small + 2*dbl_denorm_small;
994 Z.layout.frac_lo = 0x1;
995 } else {
996 fA = dbl_denorm_small;
997 }
998 } else {
999 fA = s*div_guard_cases[g].num;
1000 fB = div_guard_cases[g].den;
1001
1002 printf("%d/%d\n",
1003 s*div_guard_cases[g].num,
1004 div_guard_cases[g].den);
1005 Z.dbl = 1.0;
1006 Z.layout.frac_hi = div_guard_cases[g].hi;
1007 Z.layout.frac_lo = div_guard_cases[g].lo;
1008 }
1009 break;
1010 case FSQRT:
1011 fA = s*sqrt_guard_cases[g].arg;
1012 Z.dbl = 1.0;
1013 Z.layout.exp = sqrt_guard_cases[g].exp + 1023;
1014 Z.layout.frac_hi = sqrt_guard_cases[g].hi;
1015 Z.layout.frac_lo = sqrt_guard_cases[g].lo;
1016 guard = g >> 1;
1017 if (g & 1) guard |= 1;
1018 /* don't have test cases for when X bit = 0 */
1019 if (guard == 0 || guard == 4) continue;
1020 break;
1021 default:
1022 assert("check_double_guarded_arithmetic_op: unexpected op",
1023 FALSE);
1024 break;
1025 }
1026
1027 /* get LSB for tie breaking */
1028 LSB = Z.layout.frac_lo & 1;
1029
1030 /* set up hi and lo */
1031 lo = z_sign*Z.dbl;
1032 Z.layout.frac_lo += 1;
1033 hi = z_sign*Z.dbl;
1034
1035 switch(mode) {
1036 case TO_NEAREST:
1037 /* look at 3 guard bits to determine expected rounding */
1038 switch(guard) {
1039 case 0:
1040 case 1: case 2: case 3:
1041 expected = lo;
1042 break;
1043 case 4: /* tie: round to even */
1044 if (debug) printf("tie: LSB = %d\n", LSB);
1045 expected = (LSB == 0 ? lo : hi);
1046 break;
1047 case 5: case 6: case 7:
1048 expected = hi;
1049 break;
1050 default:
1051 assert("check_double_guarded_arithmetic_op: unexpected guard",
1052 FALSE);
1053 }
1054 break;
1055 case TO_ZERO:
1056 expected = lo;
1057 break;
1058 case TO_PLUS_INFINITY:
1059 if (guard == 0) {
1060 /* no rounding */
1061 expected = lo;
1062 } else {
1063 expected = (s == 1 ? hi : lo);
1064 }
1065 break;
1066 case TO_MINUS_INFINITY:
1067 if (guard == 0) {
1068 /* no rounding */
1069 expected = lo;
1070 } else {
1071 expected = (s == 1 ? lo : hi);
1072 }
1073 break;
1074 }
1075
1076 set_rounding_mode(mode);
1077
1078 /*
1079 ** do the double precision dual operation just for comparison
1080 ** when debugging
1081 */
1082 switch(op) {
1083 case FADD:
1084 BINOP("fadd");
1085 Res.dbl = fD;
1086 break;
1087 case FSUB:
1088 BINOP("fsub");
1089 Res.dbl = fD;
1090 break;
1091 case FMUL:
1092 BINOP("fmul");
1093 Res.dbl = fD;
1094 break;
1095 case FMADD:
1096 TERNOP("fmadd");
1097 Res.dbl = fD;
1098 break;
1099 case FMSUB:
1100 TERNOP("fmsub");
1101 Res.dbl = fD;
1102 break;
1103 case FNMADD:
1104 TERNOP("fnmadd");
1105 Res.dbl = fD;
1106 break;
1107 case FNMSUB:
1108 TERNOP("fnmsub");
1109 Res.dbl = fD;
1110 break;
1111 case FDIV:
1112 BINOP("fdiv");
1113 Res.dbl = fD;
1114 break;
1115 case FSQRT:
1116 UNOP("fsqrt");
1117 Res.dbl = fD;
1118 break;
1119 default:
1120 assert("check_double_guarded_arithmetic_op: unexpected op",
1121 FALSE);
1122 break;
1123 }
1124 #undef UNOP
1125 #undef BINOP
1126 #undef TERNOP
1127
1128 Exp.dbl = expected;
1129
1130 if ((Res.layout.sign != Exp.layout.sign) ||
1131 (Res.layout.exp != Exp.layout.exp) ||
1132 (Res.layout.frac_lo != Exp.layout.frac_lo) ||
1133 (Res.layout.frac_hi != Exp.layout.frac_hi)) {
1134 result = "FAILED";
1135 status = 1;
1136 } else {
1137 result = "PASSED";
1138 status = 0;
1139 }
1140
1141 printf("%s:%s:%s(%-13a",
1142 round_mode_name[mode], result, flt_op_names[op], fA);
1143 if (arg_count > 1) printf(", %-13a", fB);
1144 if (arg_count > 2) printf(", %-13a", fC);
1145 printf(") = %-13a", Res.dbl);
1146 if (status) printf("\n\texpected %a", Exp.dbl);
1147 putchar('\n');
1148
1149 if (debug) {
1150 print_double("hi", hi);
1151 print_double("lo", lo);
1152 print_double("expected", expected);
1153 print_double("got", Res.dbl);
1154 }
1155 }
1156
1157 return status;
1158 }
1159
test_float_arithmetic_ops()1160 int test_float_arithmetic_ops()
1161 {
1162 int status = 0;
1163 flt_op_t op;
1164
1165 /*
1166 ** choose FP operands whose result should be rounded to either
1167 ** lo or hi.
1168 */
1169
1170 printf("-------------------------- %s --------------------------\n",
1171 "test rounding of float operators without guard bits");
1172 for (op = FADDS; op <= FDIVS; op++) {
1173 status |= check_single_arithmetic_op(op);
1174 }
1175
1176 printf("-------------------------- %s --------------------------\n",
1177 "test rounding of float operators with guard bits");
1178 for (op = FADDS; op <= FNMSUBS; op++) {
1179 status |= check_single_guarded_arithmetic_op(op);
1180 }
1181
1182 printf("-------------------------- %s --------------------------\n",
1183 "test rounding of double operators with guard bits");
1184 for (op = FADD; op <= FSQRT; op++) {
1185 status |= check_double_guarded_arithmetic_op(op);
1186 }
1187 return status;
1188 }
1189
1190
1191 int
main()1192 main()
1193 {
1194 int status = 0;
1195
1196 init();
1197
1198 status |= test_dbl_to_float_convert("test denormalized convert", &denorm_small);
1199 status |= test_dbl_to_float_convert("test normalized convert", &norm_small);
1200 status |= test_int_to_float_convert("test (float)int convert");
1201 status |= test_int_to_float_convert("test (float)int convert");
1202
1203 #ifdef __powerpc64__
1204 status |= test_long_to_double_convert("test (double)long convert");
1205 #endif
1206 status |= test_float_arithmetic_ops();
1207 return status;
1208 }
1209