1 /* Copyright (C) 2011 IBM
2
3 Author: Maynard Johnson <maynardj@us.ibm.com>
4
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
9
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
18 02111-1307, USA.
19
20 The GNU General Public License is contained in the file COPYING.
21 */
22
23 #ifdef HAS_VSX
24
25 #include <stdio.h>
26 #include <stdint.h>
27 #include <stdlib.h>
28 #include <string.h>
29 #include <malloc.h>
30 #include <altivec.h>
31 #include <math.h>
32
33 #ifndef __powerpc64__
34 typedef uint32_t HWord_t;
35 #else
36 typedef uint64_t HWord_t;
37 #endif /* __powerpc64__ */
38
39 typedef unsigned char Bool;
40 #define True 1
41 #define False 0
42 register HWord_t r14 __asm__ ("r14");
43 register HWord_t r15 __asm__ ("r15");
44 register HWord_t r16 __asm__ ("r16");
45 register HWord_t r17 __asm__ ("r17");
46 register double f14 __asm__ ("fr14");
47 register double f15 __asm__ ("fr15");
48 register double f16 __asm__ ("fr16");
49 register double f17 __asm__ ("fr17");
50
51 static volatile unsigned int div_flags, div_xer;
52
53 #define ALLCR "cr0","cr1","cr2","cr3","cr4","cr5","cr6","cr7"
54
55 #define SET_CR(_arg) \
56 __asm__ __volatile__ ("mtcr %0" : : "b"(_arg) : ALLCR );
57
58 #define SET_XER(_arg) \
59 __asm__ __volatile__ ("mtxer %0" : : "b"(_arg) : "xer" );
60
61 #define GET_CR(_lval) \
62 __asm__ __volatile__ ("mfcr %0" : "=b"(_lval) )
63
64 #define GET_XER(_lval) \
65 __asm__ __volatile__ ("mfxer %0" : "=b"(_lval) )
66
67 #define GET_CR_XER(_lval_cr,_lval_xer) \
68 do { GET_CR(_lval_cr); GET_XER(_lval_xer); } while (0)
69
70 #define SET_CR_ZERO \
71 SET_CR(0)
72
73 #define SET_XER_ZERO \
74 SET_XER(0)
75
76 #define SET_CR_XER_ZERO \
77 do { SET_CR_ZERO; SET_XER_ZERO; } while (0)
78
79 #define SET_FPSCR_ZERO \
80 do { double _d = 0.0; \
81 __asm__ __volatile__ ("mtfsf 0xFF, %0" : : "f"(_d) ); \
82 } while (0)
83
84
85 typedef void (*test_func_t)(void);
86 typedef struct test_table test_table_t;
87
88
89 /* These functions below that construct a table of floating point
90 * values were lifted from none/tests/ppc32/jm-insns.c.
91 */
92
93 #if defined (DEBUG_ARGS_BUILD)
94 #define AB_DPRINTF(fmt, args...) do { fprintf(stderr, fmt , ##args); } while (0)
95 #else
96 #define AB_DPRINTF(fmt, args...) do { } while (0)
97 #endif
98
register_farg(void * farg,int s,uint16_t _exp,uint64_t mant)99 static inline void register_farg (void *farg,
100 int s, uint16_t _exp, uint64_t mant)
101 {
102 uint64_t tmp;
103
104 tmp = ((uint64_t)s << 63) | ((uint64_t)_exp << 52) | mant;
105 *(uint64_t *)farg = tmp;
106 AB_DPRINTF("%d %03x %013llx => %016llx %0e\n",
107 s, _exp, mant, *(uint64_t *)farg, *(double *)farg);
108 }
109
register_sp_farg(void * farg,int s,uint16_t _exp,uint32_t mant)110 static inline void register_sp_farg (void *farg,
111 int s, uint16_t _exp, uint32_t mant)
112 {
113 uint32_t tmp;
114 tmp = ((uint32_t)s << 31) | ((uint32_t)_exp << 23) | mant;
115 *(uint32_t *)farg = tmp;
116 }
117
118
119 typedef struct fp_test_args {
120 int fra_idx;
121 int frb_idx;
122 } fp_test_args_t;
123
124
125 fp_test_args_t two_arg_fp_tests[] = {
126 {8, 8},
127 {8, 14},
128 {15, 16},
129 {8, 5},
130 {8, 4},
131 {8, 7},
132 {8, 9},
133 {8, 11},
134 {14, 8},
135 {14, 14},
136 {14, 6},
137 {14, 5},
138 {14, 4},
139 {14, 7},
140 {14, 9},
141 {14, 11},
142 {6, 8},
143 {6, 14},
144 {6, 6},
145 {6, 5},
146 {6, 4},
147 {6, 7},
148 {6, 9},
149 {6, 11},
150 {5, 8},
151 {5, 14},
152 {5, 6},
153 {5, 5},
154 {5, 4},
155 {5, 7},
156 {5, 9},
157 {5, 11},
158 {4, 8},
159 {4, 14},
160 {4, 6},
161 {4, 5},
162 {4, 1},
163 {4, 7},
164 {4, 9},
165 {4, 11},
166 {7, 8},
167 {7, 14},
168 {7, 6},
169 {7, 5},
170 {7, 4},
171 {7, 7},
172 {7, 9},
173 {7, 11},
174 {10, 8},
175 {10, 14},
176 {12, 6},
177 {12, 5},
178 {10, 4},
179 {10, 7},
180 {10, 9},
181 {10, 11},
182 {12, 8 },
183 {12, 14},
184 {12, 6},
185 {15, 16},
186 {15, 16},
187 {9, 11},
188 {11, 11},
189 {11, 12},
190 {16, 18},
191 {17, 16},
192 {19, 19},
193 {19, 18}
194 };
195
196
197 static int nb_special_fargs;
198 static double * spec_fargs;
199 static float * spec_sp_fargs;
200
build_special_fargs_table(void)201 static void build_special_fargs_table(void)
202 {
203 /*
204 Entry Sign Exp fraction Special value
205 0 0 3fd 0x8000000000000ULL Positive finite number
206 1 0 404 0xf000000000000ULL ...
207 2 0 001 0x8000000b77501ULL ...
208 3 0 7fe 0x800000000051bULL ...
209 4 0 012 0x3214569900000ULL ...
210 5 0 000 0x0000000000000ULL +0.0 (+zero)
211 6 1 000 0x0000000000000ULL -0.0 (-zero)
212 7 0 7ff 0x0000000000000ULL +infinity
213 8 1 7ff 0x0000000000000ULL -infinity
214 9 0 7ff 0x7FFFFFFFFFFFFULL +SNaN
215 10 1 7ff 0x7FFFFFFFFFFFFULL -SNaN
216 11 0 7ff 0x8000000000000ULL +QNaN
217 12 1 7ff 0x8000000000000ULL -QNaN
218 13 1 000 0x8340000078000ULL Denormalized val (zero exp and non-zero fraction)
219 14 1 40d 0x0650f5a07b353ULL Negative finite number
220 15 0 412 0x32585a9900000ULL A few more positive finite numbers
221 16 0 413 0x82511a2000000ULL ...
222 17 . . . . . . . . . . . . . . . . . . . . . . .
223 18 . . . . . . . . . . . . . . . . . . . . . . .
224 19 . . . . . . . . . . . . . . . . . . . . . . .
225 */
226
227 uint64_t mant;
228 uint32_t mant_sp;
229 uint16_t _exp;
230 int s;
231 int j, i = 0;
232
233 if (spec_fargs)
234 return;
235
236 spec_fargs = malloc( 20 * sizeof(double) );
237 spec_sp_fargs = malloc( 20 * sizeof(float) );
238
239 // #0
240 s = 0;
241 _exp = 0x3fd;
242 mant = 0x8000000000000ULL;
243 register_farg(&spec_fargs[i++], s, _exp, mant);
244
245 // #1
246 s = 0;
247 _exp = 0x404;
248 mant = 0xf000000000000ULL;
249 register_farg(&spec_fargs[i++], s, _exp, mant);
250
251 // #2
252 s = 0;
253 _exp = 0x001;
254 mant = 0x8000000b77501ULL;
255 register_farg(&spec_fargs[i++], s, _exp, mant);
256
257 // #3
258 s = 0;
259 _exp = 0x7fe;
260 mant = 0x800000000051bULL;
261 register_farg(&spec_fargs[i++], s, _exp, mant);
262
263 // #4
264 s = 0;
265 _exp = 0x012;
266 mant = 0x3214569900000ULL;
267 register_farg(&spec_fargs[i++], s, _exp, mant);
268
269
270 /* Special values */
271 /* +0.0 : 0 0x000 0x0000000000000 */
272 // #5
273 s = 0;
274 _exp = 0x000;
275 mant = 0x0000000000000ULL;
276 register_farg(&spec_fargs[i++], s, _exp, mant);
277
278 /* -0.0 : 1 0x000 0x0000000000000 */
279 // #6
280 s = 1;
281 _exp = 0x000;
282 mant = 0x0000000000000ULL;
283 register_farg(&spec_fargs[i++], s, _exp, mant);
284
285 /* +infinity : 0 0x7FF 0x0000000000000 */
286 // #7
287 s = 0;
288 _exp = 0x7FF;
289 mant = 0x0000000000000ULL;
290 register_farg(&spec_fargs[i++], s, _exp, mant);
291
292 /* -infinity : 1 0x7FF 0x0000000000000 */
293 // #8
294 s = 1;
295 _exp = 0x7FF;
296 mant = 0x0000000000000ULL;
297 register_farg(&spec_fargs[i++], s, _exp, mant);
298
299 /*
300 * This comment applies to values #9 and #10 below:
301 * When src is a SNaN, it's converted to a QNaN first before rounding to single-precision,
302 * so we can't just copy the double-precision value to the corresponding slot in the
303 * single-precision array (i.e., in the loop at the end of this function). Instead, we
304 * have to manually set the bits using register_sp_farg().
305 */
306
307 /* +SNaN : 0 0x7FF 0x7FFFFFFFFFFFF */
308 // #9
309 s = 0;
310 _exp = 0x7FF;
311 mant = 0x7FFFFFFFFFFFFULL;
312 register_farg(&spec_fargs[i++], s, _exp, mant);
313 _exp = 0xff;
314 mant_sp = 0x3FFFFF;
315 register_sp_farg(&spec_sp_fargs[i-1], s, _exp, mant_sp);
316
317 /* -SNaN : 1 0x7FF 0x7FFFFFFFFFFFF */
318 // #10
319 s = 1;
320 _exp = 0x7FF;
321 mant = 0x7FFFFFFFFFFFFULL;
322 register_farg(&spec_fargs[i++], s, _exp, mant);
323 _exp = 0xff;
324 mant_sp = 0x3FFFFF;
325 register_sp_farg(&spec_sp_fargs[i-1], s, _exp, mant_sp);
326
327 /* +QNaN : 0 0x7FF 0x8000000000000 */
328 // #11
329 s = 0;
330 _exp = 0x7FF;
331 mant = 0x8000000000000ULL;
332 register_farg(&spec_fargs[i++], s, _exp, mant);
333
334 /* -QNaN : 1 0x7FF 0x8000000000000 */
335 // #12
336 s = 1;
337 _exp = 0x7FF;
338 mant = 0x8000000000000ULL;
339 register_farg(&spec_fargs[i++], s, _exp, mant);
340
341 /* denormalized value */
342 // #13
343 s = 1;
344 _exp = 0x000;
345 mant = 0x8340000078000ULL;
346 register_farg(&spec_fargs[i++], s, _exp, mant);
347
348 /* Negative finite number */
349 // #14
350 s = 1;
351 _exp = 0x40d;
352 mant = 0x0650f5a07b353ULL;
353 register_farg(&spec_fargs[i++], s, _exp, mant);
354
355 /* A few positive finite numbers ... */
356 // #15
357 s = 0;
358 _exp = 0x412;
359 mant = 0x32585a9900000ULL;
360 register_farg(&spec_fargs[i++], s, _exp, mant);
361
362 // #16
363 s = 0;
364 _exp = 0x413;
365 mant = 0x82511a2000000ULL;
366 register_farg(&spec_fargs[i++], s, _exp, mant);
367
368 // #17
369 s = 0;
370 _exp = 0x403;
371 mant = 0x12ef5a9300000ULL;
372 register_farg(&spec_fargs[i++], s, _exp, mant);
373
374 // #18
375 s = 0;
376 _exp = 0x405;
377 mant = 0x14bf5d2300000ULL;
378 register_farg(&spec_fargs[i++], s, _exp, mant);
379
380 // #19
381 s = 0;
382 _exp = 0x409;
383 mant = 0x76bf982440000ULL;
384 register_farg(&spec_fargs[i++], s, _exp, mant);
385
386 nb_special_fargs = i;
387 for (j = 0; j < i; j++) {
388 if (!(j == 9 || j == 10))
389 spec_sp_fargs[j] = spec_fargs[j];
390 }
391 }
392
393
394 struct test_table
395 {
396 test_func_t test_category;
397 char * name;
398 };
399
400 /* Type of input for floating point operations.*/
401 typedef enum {
402 SINGLE_TEST,
403 DOUBLE_TEST
404 } precision_type_t;
405
406 typedef enum {
407 VX_SCALAR_CONV_TO_WORD,
408 VX_CONV_TO_SINGLE,
409 VX_CONV_TO_DOUBLE,
410 VX_ESTIMATE,
411 VX_DEFAULT
412 } vx_fp_test_type;
413
414 static vector unsigned int vec_out, vec_inA, vec_inB;
415
416 /* This function is for checking the reciprocal and reciprocal square root
417 * estimate instructions.
418 */
check_estimate(precision_type_t type,Bool is_rsqrte,int idx,int output_vec_idx)419 Bool check_estimate(precision_type_t type, Bool is_rsqrte, int idx, int output_vec_idx)
420 {
421 /* Technically, the number of bits of precision for xvredp and xvrsqrtedp is
422 * 14 bits (14 = log2 16384). However, the VEX emulation of these instructions
423 * does an actual reciprocal calculation versus estimation, so the answer we get back from
424 * valgrind can easily differ from the estimate in the lower bits (within the 14 bits of
425 * precision) and the estimate may still be within expected tolerances. On top of that,
426 * we can't count on these estimates always being the same across implementations.
427 * For example, with the fre[s] instruction (which should be correct to within one part
428 * in 256 -- i.e., 8 bits of precision) . . . When approximating the value 1.0111_1111_1111,
429 * one implementation could return 1.0111_1111_0000 and another implementation could return
430 * 1.1000_0000_0000. Both estimates meet the 1/256 accuracy requirement, but share only a
431 * single bit in common.
432 *
433 * The upshot is we can't validate the VEX output for these instructions by comparing against
434 * stored bit patterns. We must check that the result is within expected tolerances.
435 */
436
437
438 /* A mask to be used for validation as a last resort.
439 * Only use 12 bits of precision for reasons discussed above.
440 */
441 #define VSX_RECIP_ESTIMATE_MASK_DP 0xFFFFFF0000000000ULL
442 #define VSX_RECIP_ESTIMATE_MASK_SP 0xFFFFFF00
443
444 Bool result = False;
445 Bool dp_test = type == DOUBLE_TEST;
446 double src_dp, res_dp;
447 float src_sp, res_sp;
448 src_dp = res_dp = 0;
449 src_sp = res_sp = 0;
450 #define SRC (dp_test ? src_dp : src_sp)
451 #define RES (dp_test ? res_dp : res_sp)
452 Bool src_is_negative = False;
453 Bool res_is_negative = False;
454 unsigned long long * dst_dp = NULL;
455 unsigned int * dst_sp = NULL;
456 if (dp_test) {
457 unsigned long long * src_dp_ull;
458 dst_dp = (unsigned long long *) &vec_out;
459 src_dp = spec_fargs[idx];
460 src_dp_ull = (unsigned long long *) &src_dp;
461 src_is_negative = (*src_dp_ull & 0x8000000000000000ULL) ? True : False;
462 res_is_negative = (dst_dp[output_vec_idx] & 0x8000000000000000ULL) ? True : False;
463 memcpy(&res_dp, &dst_dp[output_vec_idx], 8);
464 } else {
465 unsigned int * src_sp_uint;
466 dst_sp = (unsigned int *) &vec_out;
467 src_sp = spec_sp_fargs[idx];
468 src_sp_uint = (unsigned int *) &src_sp;
469 src_is_negative = (*src_sp_uint & 0x80000000) ? True : False;
470 res_is_negative = (dst_sp[output_vec_idx] & 0x80000000) ? True : False;
471 memcpy(&res_sp, &dst_sp[output_vec_idx], 4);
472 }
473
474 // Below are common rules for xvre{d|s}p and xvrsqrte{d|s}p
475 if (isnan(SRC))
476 return isnan(RES);
477 if (fpclassify(SRC) == FP_ZERO)
478 return isinf(RES);
479 if (!src_is_negative && isinf(SRC))
480 return !res_is_negative && (fpclassify(RES) == FP_ZERO);
481 if (is_rsqrte) {
482 if (src_is_negative)
483 return isnan(RES);
484 } else {
485 if (src_is_negative && isinf(SRC))
486 return res_is_negative && (fpclassify(RES) == FP_ZERO);
487 }
488 if (dp_test) {
489 double calc_diff;
490 double real_diff;
491 double recip_divisor;
492 double div_result;
493 double calc_diff_tmp;
494
495 if (is_rsqrte)
496 recip_divisor = sqrt(src_dp);
497 else
498 recip_divisor = src_dp;
499
500 div_result = 1.0/recip_divisor;
501 calc_diff_tmp = recip_divisor * 16384.0;
502 if (isnormal(calc_diff_tmp)) {
503 calc_diff = fabs(1.0/calc_diff_tmp);
504 real_diff = fabs(res_dp - div_result);
505 result = ( ( res_dp == div_result )
506 || ( real_diff <= calc_diff ) );
507 } else {
508 /* Unable to compute theoretical difference, so we fall back to masking out
509 * un-precise bits.
510 */
511 unsigned long long * div_result_dp = (unsigned long long *) &div_result;
512 result = (dst_dp[output_vec_idx] & VSX_RECIP_ESTIMATE_MASK_DP) == (*div_result_dp & VSX_RECIP_ESTIMATE_MASK_DP);
513 }
514 /* For debug use . . .
515 if (!result) {
516 unsigned long long * dv = &div_result;
517 unsigned long long * rd = &real_diff;
518 unsigned long long * cd = &calc_diff;
519 printf("\n\t {actual div_result: %016llx; real_diff: %016llx; calc_diff: %016llx}\n",
520 *dv, *rd, *cd);
521 }
522 */
523 } else { // single precision test (only have xvrsqrtesp, since xvresp was implemented in stage 2)
524 float calc_diff;
525 float real_diff;
526 float div_result;
527 float calc_diff_tmp;
528 float recip_divisor = sqrt(src_sp);
529
530 div_result = 1.0/recip_divisor;
531 calc_diff_tmp = recip_divisor * 16384.0;
532 if (isnormal(calc_diff_tmp)) {
533 calc_diff = fabsf(1.0/calc_diff_tmp);
534 real_diff = fabsf(res_sp - div_result);
535 result = ( ( res_sp == div_result )
536 || ( real_diff <= calc_diff ) );
537 } else {
538 /* Unable to compute theoretical difference, so we fall back to masking out
539 * un-precise bits.
540 */
541 unsigned int * div_result_sp = (unsigned int *) &div_result;
542 result = (dst_sp[output_vec_idx] & VSX_RECIP_ESTIMATE_MASK_SP) == (*div_result_sp & VSX_RECIP_ESTIMATE_MASK_SP);
543 }
544 /* For debug use . . .
545 if (!result) {
546 unsigned long long * dv = &div_result;
547 unsigned long long * rd = &real_diff;
548 unsigned long long * cd = &calc_diff;
549 printf("\n\t {actual div_result: %016llx; real_diff: %016llx; calc_diff: %016llx}\n",
550 *dv, *rd, *cd);
551 }
552 */
553 }
554 return result;
555 }
556
557 typedef struct vx_fp_test
558 {
559 test_func_t test_func;
560 const char * name;
561 fp_test_args_t * targs;
562 int num_tests;
563 precision_type_t precision;
564 vx_fp_test_type type;
565 const char * op;
566 } vx_fp_test_t;
567
568
569 static Bool do_dot;
570
test_xvredp(void)571 static void test_xvredp(void)
572 {
573 __asm__ __volatile__ ("xvredp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
574 }
575
test_xsredp(void)576 static void test_xsredp(void)
577 {
578 __asm__ __volatile__ ("xsredp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
579 }
580
test_xvrsqrtedp(void)581 static void test_xvrsqrtedp(void)
582 {
583 __asm__ __volatile__ ("xvrsqrtedp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
584 }
585
test_xsrsqrtedp(void)586 static void test_xsrsqrtedp(void)
587 {
588 __asm__ __volatile__ ("xsrsqrtedp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
589 }
590
test_xvrsqrtesp(void)591 static void test_xvrsqrtesp(void)
592 {
593 __asm__ __volatile__ ("xvrsqrtesp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
594 }
595
test_xstsqrtdp(void)596 static void test_xstsqrtdp(void)
597 {
598 __asm__ __volatile__ ("xstsqrtdp cr1, %x0" : : "wa" (vec_inB));
599 }
600
test_xvtsqrtdp(void)601 static void test_xvtsqrtdp(void)
602 {
603 __asm__ __volatile__ ("xvtsqrtdp cr1, %x0" : : "wa" (vec_inB));
604 }
605
test_xvtsqrtsp(void)606 static void test_xvtsqrtsp(void)
607 {
608 __asm__ __volatile__ ("xvtsqrtsp cr1, %x0" : : "wa" (vec_inB));
609 }
610
test_xvsqrtdp(void)611 static void test_xvsqrtdp(void)
612 {
613 __asm__ __volatile__ ("xvsqrtdp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
614 }
615
test_xvsqrtsp(void)616 static void test_xvsqrtsp(void)
617 {
618 __asm__ __volatile__ ("xvsqrtsp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
619 }
620
test_xvtdivdp(void)621 static void test_xvtdivdp(void)
622 {
623 __asm__ __volatile__ ("xvtdivdp cr1, %x0, %x1" : : "wa" (vec_inA), "wa" (vec_inB));
624 }
625
test_xvtdivsp(void)626 static void test_xvtdivsp(void)
627 {
628 __asm__ __volatile__ ("xvtdivsp cr1, %x0, %x1" : : "wa" (vec_inA), "wa" (vec_inB));
629 }
630
test_xscvdpsp(void)631 static void test_xscvdpsp(void)
632 {
633 __asm__ __volatile__ ("xscvdpsp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
634 }
635
test_xscvdpuxws(void)636 static void test_xscvdpuxws(void)
637 {
638 __asm__ __volatile__ ("xscvdpuxws %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
639 }
640
test_xscvspdp(void)641 static void test_xscvspdp(void)
642 {
643 __asm__ __volatile__ ("xscvspdp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
644 }
645
test_xvcvdpsp(void)646 static void test_xvcvdpsp(void)
647 {
648 __asm__ __volatile__ ("xvcvdpsp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
649 }
650
test_xvcvdpuxds(void)651 static void test_xvcvdpuxds(void)
652 {
653 __asm__ __volatile__ ("xvcvdpuxds %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
654 }
655
test_xvcvdpuxws(void)656 static void test_xvcvdpuxws(void)
657 {
658 __asm__ __volatile__ ("xvcvdpuxws %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
659 }
660
test_xvcvspdp(void)661 static void test_xvcvspdp(void)
662 {
663 __asm__ __volatile__ ("xvcvspdp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
664 }
665
test_xvcvspsxds(void)666 static void test_xvcvspsxds(void)
667 {
668 __asm__ __volatile__ ("xvcvspsxds %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
669 }
670
test_xvcvspuxds(void)671 static void test_xvcvspuxds(void)
672 {
673 __asm__ __volatile__ ("xvcvspuxds %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
674 }
675
test_xvcvdpsxds(void)676 static void test_xvcvdpsxds(void)
677 {
678 __asm__ __volatile__ ("xvcvdpsxds %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
679 }
680
test_xvcvspuxws(void)681 static void test_xvcvspuxws(void)
682 {
683 __asm__ __volatile__ ("xvcvspuxws %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
684 }
685
test_xvcvsxddp(void)686 static void test_xvcvsxddp(void)
687 {
688 __asm__ __volatile__ ("xvcvsxddp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
689 }
690
test_xvcvuxddp(void)691 static void test_xvcvuxddp(void)
692 {
693 __asm__ __volatile__ ("xvcvuxddp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
694 }
695
test_xvcvsxdsp(void)696 static void test_xvcvsxdsp(void)
697 {
698 __asm__ __volatile__ ("xvcvsxdsp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
699 }
700
test_xvcvuxdsp(void)701 static void test_xvcvuxdsp(void)
702 {
703 __asm__ __volatile__ ("xvcvuxdsp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
704 }
705
test_xvcvsxwdp(void)706 static void test_xvcvsxwdp(void)
707 {
708 __asm__ __volatile__ ("xvcvsxwdp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
709 }
710
test_xvcvuxwdp(void)711 static void test_xvcvuxwdp(void)
712 {
713 __asm__ __volatile__ ("xvcvuxwdp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
714 }
715
test_xvcvsxwsp(void)716 static void test_xvcvsxwsp(void)
717 {
718 __asm__ __volatile__ ("xvcvsxwsp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
719 }
720
test_xvcvuxwsp(void)721 static void test_xvcvuxwsp(void)
722 {
723 __asm__ __volatile__ ("xvcvuxwsp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
724 }
725
test_xsrdpic(void)726 static void test_xsrdpic(void)
727 {
728 __asm__ __volatile__ ("xsrdpic %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
729 }
730
test_xsrdpiz(void)731 static void test_xsrdpiz(void)
732 {
733 __asm__ __volatile__ ("xsrdpiz %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
734 }
735
test_xsrdpi(void)736 static void test_xsrdpi(void)
737 {
738 __asm__ __volatile__ ("xsrdpi %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
739 }
740
test_xvabsdp(void)741 static void test_xvabsdp(void)
742 {
743 __asm__ __volatile__ ("xvabsdp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
744 }
745
test_xvnabsdp(void)746 static void test_xvnabsdp(void)
747 {
748 __asm__ __volatile__ ("xvnabsdp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
749 }
750
test_xvnegdp(void)751 static void test_xvnegdp(void)
752 {
753 __asm__ __volatile__ ("xvnegdp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
754 }
755
test_xvabssp(void)756 static void test_xvabssp(void)
757 {
758 __asm__ __volatile__ ("xvabssp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
759 }
760
test_xvnabssp(void)761 static void test_xvnabssp(void)
762 {
763 __asm__ __volatile__ ("xvnabssp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
764 }
765
test_xvrdpi(void)766 static void test_xvrdpi(void)
767 {
768 __asm__ __volatile__ ("xvrdpi %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
769 }
770
test_xvrdpic(void)771 static void test_xvrdpic(void)
772 {
773 __asm__ __volatile__ ("xvrdpic %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
774 }
775
test_xvrdpim(void)776 static void test_xvrdpim(void)
777 {
778 __asm__ __volatile__ ("xvrdpim %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
779 }
780
test_xvrdpip(void)781 static void test_xvrdpip(void)
782 {
783 __asm__ __volatile__ ("xvrdpip %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
784 }
785
test_xvrdpiz(void)786 static void test_xvrdpiz(void)
787 {
788 __asm__ __volatile__ ("xvrdpiz %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
789 }
790
test_xvrspi(void)791 static void test_xvrspi(void)
792 {
793 __asm__ __volatile__ ("xvrspi %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
794 }
795
test_xvrspic(void)796 static void test_xvrspic(void)
797 {
798 __asm__ __volatile__ ("xvrspic %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
799 }
800
test_xvrspim(void)801 static void test_xvrspim(void)
802 {
803 __asm__ __volatile__ ("xvrspim %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
804 }
805
test_xvrspip(void)806 static void test_xvrspip(void)
807 {
808 __asm__ __volatile__ ("xvrspip %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
809 }
810
test_xvrspiz(void)811 static void test_xvrspiz(void)
812 {
813 __asm__ __volatile__ ("xvrspiz %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
814 }
815
816 static vx_fp_test_t
817 vsx_one_fp_arg_tests[] = {
818 { &test_xvredp, "xvredp", NULL, 18, DOUBLE_TEST, VX_ESTIMATE, "1/x"},
819 { &test_xsredp, "xsredp", NULL, 18, DOUBLE_TEST, VX_ESTIMATE, "1/x"},
820 { &test_xvrsqrtedp, "xvrsqrtedp", NULL, 18, DOUBLE_TEST, VX_ESTIMATE, "1/x-sqrt"},
821 { &test_xsrsqrtedp, "xsrsqrtedp", NULL, 18, DOUBLE_TEST, VX_ESTIMATE, "1/x-sqrt"},
822 { &test_xvrsqrtesp, "xvrsqrtesp", NULL, 18, SINGLE_TEST, VX_ESTIMATE, "1/x-sqrt"},
823 { &test_xvsqrtdp, "xvsqrtdp", NULL, 18, DOUBLE_TEST, VX_DEFAULT, "sqrt"},
824 { &test_xvsqrtsp, "xvsqrtsp", NULL, 18, SINGLE_TEST, VX_DEFAULT, "sqrt"},
825 { &test_xscvdpsp, "xscvdpsp", NULL, 20, DOUBLE_TEST, VX_CONV_TO_SINGLE, "conv"},
826 { &test_xscvdpuxws, "xscvdpuxws", NULL, 20, DOUBLE_TEST, VX_SCALAR_CONV_TO_WORD, "conv"},
827 { &test_xscvspdp, "xscvspdp", NULL, 20, SINGLE_TEST, VX_CONV_TO_DOUBLE, "conv"},
828 { &test_xvcvdpsp, "xvcvdpsp", NULL, 20, DOUBLE_TEST, VX_CONV_TO_SINGLE, "conv"},
829 { &test_xvcvdpuxds, "xvcvdpuxds", NULL, 20, DOUBLE_TEST, VX_CONV_TO_DOUBLE, "conv"},
830 { &test_xvcvdpuxws, "xvcvdpuxws", NULL, 20, DOUBLE_TEST, VX_CONV_TO_SINGLE, "conv"},
831 { &test_xvcvspdp, "xvcvspdp", NULL, 20, SINGLE_TEST, VX_CONV_TO_DOUBLE, "conv"},
832 { &test_xvcvspsxds, "xvcvspsxds", NULL, 20, SINGLE_TEST, VX_CONV_TO_DOUBLE, "conv"},
833 { &test_xvcvdpsxds, "xvcvdpsxds", NULL, 20, DOUBLE_TEST, VX_CONV_TO_DOUBLE, "conv"},
834 { &test_xvcvspuxds, "xvcvspuxds", NULL, 20, SINGLE_TEST, VX_CONV_TO_DOUBLE, "conv"},
835 { &test_xvcvspuxws, "xvcvspuxws", NULL, 20, SINGLE_TEST, VX_CONV_TO_SINGLE, "conv"},
836 { &test_xsrdpic, "xsrdpic", NULL, 20, DOUBLE_TEST, VX_CONV_TO_DOUBLE, "round"},
837 { &test_xsrdpiz, "xsrdpiz", NULL, 20, DOUBLE_TEST, VX_CONV_TO_DOUBLE, "round"},
838 { &test_xsrdpi, "xsrdpi", NULL, 20, DOUBLE_TEST, VX_CONV_TO_DOUBLE, "round"},
839 { &test_xvabsdp, "xvabsdp", NULL, 20, DOUBLE_TEST, VX_DEFAULT, "abs"},
840 { &test_xvnabsdp, "xvnabsdp", NULL, 20, DOUBLE_TEST, VX_DEFAULT, "nabs"},
841 { &test_xvnegdp, "xvnegdp", NULL, 20, DOUBLE_TEST, VX_DEFAULT, "neg"},
842 { &test_xvabssp, "xvabssp", NULL, 20, SINGLE_TEST, VX_DEFAULT, "abs"},
843 { &test_xvnabssp, "xvnabssp", NULL, 20, SINGLE_TEST, VX_DEFAULT, "nabs"},
844 { &test_xvrdpi, "xvrdpi", NULL, 20, DOUBLE_TEST, VX_CONV_TO_DOUBLE, "round"},
845 { &test_xvrdpic, "xvrdpic", NULL, 20, DOUBLE_TEST, VX_CONV_TO_DOUBLE, "round"},
846 { &test_xvrdpim, "xvrdpim", NULL, 20, DOUBLE_TEST, VX_CONV_TO_DOUBLE, "round"},
847 { &test_xvrdpip, "xvrdpip", NULL, 20, DOUBLE_TEST, VX_CONV_TO_DOUBLE, "round"},
848 { &test_xvrdpiz, "xvrdpiz", NULL, 20, DOUBLE_TEST, VX_CONV_TO_DOUBLE, "round"},
849 { &test_xvrspi, "xvrspi", NULL, 20, SINGLE_TEST, VX_CONV_TO_SINGLE, "round"},
850 { &test_xvrspic, "xvrspic", NULL, 20, SINGLE_TEST, VX_CONV_TO_SINGLE, "round"},
851 { &test_xvrspim, "xvrspim", NULL, 20, SINGLE_TEST, VX_CONV_TO_SINGLE, "round"},
852 { &test_xvrspip, "xvrspip", NULL, 20, SINGLE_TEST, VX_CONV_TO_SINGLE, "round"},
853 { &test_xvrspiz, "xvrspiz", NULL, 20, SINGLE_TEST, VX_CONV_TO_SINGLE, "round"},
854 { NULL, NULL, NULL, 0, 0, 0, NULL}
855 };
856
857 static vx_fp_test_t
858 vx_tdivORtsqrt_tests[] = {
859 { &test_xstsqrtdp, "xstsqrtdp", NULL, 20, DOUBLE_TEST, VX_DEFAULT, "test-sqrt"},
860 { &test_xvtsqrtdp, "xvtsqrtdp", NULL, 20, DOUBLE_TEST, VX_DEFAULT, "test-sqrt"},
861 { &test_xvtsqrtsp, "xvtsqrtsp", NULL, 20, SINGLE_TEST, VX_DEFAULT, "test-sqrt"},
862 { &test_xvtdivdp, "xvtdivdp", two_arg_fp_tests, 68, DOUBLE_TEST, VX_DEFAULT, "test-div"},
863 { &test_xvtdivsp, "xvtdivsp", two_arg_fp_tests, 68, SINGLE_TEST, VX_DEFAULT, "test-div"},
864 { NULL, NULL, NULL, 0 , 0, 0, NULL}
865 };
866
867 static unsigned long long doubleWord[] = { 0,
868 0xffffffff00000000LL,
869 0x00000000ffffffffLL,
870 0xffffffffffffffffLL,
871 0x89abcde123456789LL,
872 0x0102030405060708LL,
873 0x00000000a0b1c2d3LL,
874 0x1111222233334444LL
875 };
876
877 static unsigned int singleWord[] = {0,
878 0xffff0000,
879 0x0000ffff,
880 0xffffffff,
881 0x89a73522,
882 0x01020304,
883 0x0000abcd,
884 0x11223344
885 };
886
887 typedef struct vx_intToFp_test
888 {
889 test_func_t test_func;
890 const char * name;
891 void * targs;
892 int num_tests;
893 precision_type_t precision;
894 vx_fp_test_type type;
895 } vx_intToFp_test_t;
896
897 static vx_intToFp_test_t
898 intToFp_tests[] = {
899 { test_xvcvsxddp, "xvcvsxddp", (void *)doubleWord, 8, DOUBLE_TEST, VX_CONV_TO_DOUBLE },
900 { test_xvcvuxddp, "xvcvuxddp", (void *)doubleWord, 8, DOUBLE_TEST, VX_CONV_TO_DOUBLE },
901 { test_xvcvsxdsp, "xvcvsxdsp", (void *)doubleWord, 8, DOUBLE_TEST, VX_CONV_TO_SINGLE },
902 { test_xvcvuxdsp, "xvcvuxdsp", (void *)doubleWord, 8, DOUBLE_TEST, VX_CONV_TO_SINGLE },
903 { test_xvcvsxwdp, "xvcvsxwdp", (void *)singleWord, 8, SINGLE_TEST, VX_CONV_TO_DOUBLE },
904 { test_xvcvuxwdp, "xvcvuxwdp", (void *)singleWord, 8, SINGLE_TEST, VX_CONV_TO_DOUBLE },
905 { test_xvcvsxwsp, "xvcvsxwsp", (void *)singleWord, 8, SINGLE_TEST, VX_CONV_TO_SINGLE },
906 { test_xvcvuxwsp, "xvcvuxwsp", (void *)singleWord, 8, SINGLE_TEST, VX_CONV_TO_SINGLE },
907 { NULL, NULL, NULL, 0, 0 }
908 };
909
910 static Bool do_OE;
911 typedef enum {
912 DIV_BASE = 1,
913 DIV_OE = 2,
914 DIV_DOT = 4,
915 } div_type_t;
916 /* Possible divde type combinations are:
917 * - base
918 * - base+dot
919 * - base+OE
920 * - base+OE+dot
921 */
922 #ifdef __powerpc64__
test_divdeu(void)923 static void test_divdeu(void)
924 {
925 int divdeu_type = DIV_BASE;
926 if (do_OE)
927 divdeu_type |= DIV_OE;
928 if (do_dot)
929 divdeu_type |= DIV_DOT;
930
931 switch (divdeu_type) {
932 case 1:
933 SET_CR_XER_ZERO;
934 __asm__ __volatile__ ("divdeu %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15));
935 GET_CR_XER(div_flags, div_xer);
936 break;
937 case 3:
938 SET_CR_XER_ZERO;
939 __asm__ __volatile__ ("divdeuo %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15));
940 GET_CR_XER(div_flags, div_xer);
941 break;
942 case 5:
943 SET_CR_XER_ZERO;
944 __asm__ __volatile__ ("divdeu. %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15));
945 GET_CR_XER(div_flags, div_xer);
946 break;
947 case 7:
948 SET_CR_XER_ZERO;
949 __asm__ __volatile__ ("divdeuo. %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15));
950 GET_CR_XER(div_flags, div_xer);
951 break;
952 default:
953 fprintf(stderr, "Invalid divdeu type. Exiting\n");
954 exit(1);
955 }
956 }
957 #endif
958
test_divwe(void)959 static void test_divwe(void)
960 {
961 int divwe_type = DIV_BASE;
962 if (do_OE)
963 divwe_type |= DIV_OE;
964 if (do_dot)
965 divwe_type |= DIV_DOT;
966
967 switch (divwe_type) {
968 case 1:
969 SET_CR_XER_ZERO;
970 __asm__ __volatile__ ("divwe %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15));
971 GET_CR_XER(div_flags, div_xer);
972 break;
973 case 3:
974 SET_CR_XER_ZERO;
975 __asm__ __volatile__ ("divweo %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15));
976 GET_CR_XER(div_flags, div_xer);
977 break;
978 case 5:
979 SET_CR_XER_ZERO;
980 __asm__ __volatile__ ("divwe. %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15));
981 GET_CR_XER(div_flags, div_xer);
982 break;
983 case 7:
984 SET_CR_XER_ZERO;
985 __asm__ __volatile__ ("divweo. %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15));
986 GET_CR_XER(div_flags, div_xer);
987 break;
988 default:
989 fprintf(stderr, "Invalid divweu type. Exiting\n");
990 exit(1);
991 }
992 }
993
994
995 typedef struct simple_test {
996 test_func_t test_func;
997 char * name;
998 precision_type_t precision;
999 } simple_test_t;
1000
1001
setup_sp_fp_args(fp_test_args_t * targs,Bool swap_inputs)1002 static void setup_sp_fp_args(fp_test_args_t * targs, Bool swap_inputs)
1003 {
1004 int a_idx, b_idx, i;
1005 void * inA, * inB;
1006 void * vec_src = swap_inputs ? &vec_out : &vec_inB;
1007
1008 for (i = 0; i < 4; i++) {
1009 a_idx = targs->fra_idx;
1010 b_idx = targs->frb_idx;
1011 inA = (void *)&spec_sp_fargs[a_idx];
1012 inB = (void *)&spec_sp_fargs[b_idx];
1013 // copy single precision FP into vector element i
1014 memcpy(((void *)&vec_inA) + (i * 4), inA, 4);
1015 memcpy(vec_src + (i * 4), inB, 4);
1016 targs++;
1017 }
1018 }
1019
setup_dp_fp_args(fp_test_args_t * targs,Bool swap_inputs)1020 static void setup_dp_fp_args(fp_test_args_t * targs, Bool swap_inputs)
1021 {
1022 int a_idx, b_idx, i;
1023 void * inA, * inB;
1024 void * vec_src = swap_inputs ? (void *)&vec_out : (void *)&vec_inB;
1025
1026 for (i = 0; i < 2; i++) {
1027 a_idx = targs->fra_idx;
1028 b_idx = targs->frb_idx;
1029 inA = (void *)&spec_fargs[a_idx];
1030 inB = (void *)&spec_fargs[b_idx];
1031 // copy double precision FP into vector element i
1032 memcpy(((void *)&vec_inA) + (i * 8), inA, 8);
1033 memcpy(vec_src + (i * 8), inB, 8);
1034 targs++;
1035 }
1036 }
1037
1038 #define VX_NOT_CMP_OP 0xffffffff
print_vector_fp_result(unsigned int cc,vx_fp_test_t * test_group,int i,Bool print_vec_out)1039 static void print_vector_fp_result(unsigned int cc, vx_fp_test_t * test_group, int i, Bool print_vec_out)
1040 {
1041 int a_idx, b_idx, k;
1042 char * name = malloc(20);
1043 int dp = test_group->precision == DOUBLE_TEST ? 1 : 0;
1044 int loops = dp ? 2 : 4;
1045 fp_test_args_t * targs = &test_group->targs[i];
1046 unsigned long long * frA_dp, * frB_dp, * dst_dp;
1047 unsigned int * frA_sp, *frB_sp, * dst_sp;
1048 strcpy(name, test_group->name);
1049 printf("#%d: %s%s ", dp? i/2 : i/4, name, (do_dot ? "." : ""));
1050 for (k = 0; k < loops; k++) {
1051 a_idx = targs->fra_idx;
1052 b_idx = targs->frb_idx;
1053 if (k)
1054 printf(" AND ");
1055 if (dp) {
1056 frA_dp = (unsigned long long *)&spec_fargs[a_idx];
1057 frB_dp = (unsigned long long *)&spec_fargs[b_idx];
1058 printf("%016llx %s %016llx", *frA_dp, test_group->op, *frB_dp);
1059 } else {
1060 frA_sp = (unsigned int *)&spec_sp_fargs[a_idx];
1061 frB_sp = (unsigned int *)&spec_sp_fargs[b_idx];
1062 printf("%08x %s %08x", *frA_sp, test_group->op, *frB_sp);
1063 }
1064 targs++;
1065 }
1066 if (cc != VX_NOT_CMP_OP)
1067 printf(" ? cc=%x", cc);
1068
1069 if (print_vec_out) {
1070 if (dp) {
1071 dst_dp = (unsigned long long *) &vec_out;
1072 printf(" => %016llx %016llx\n", dst_dp[0], dst_dp[1]);
1073 } else {
1074 dst_sp = (unsigned int *) &vec_out;
1075 printf(" => %08x %08x %08x %08x\n", dst_sp[0], dst_sp[1], dst_sp[2], dst_sp[3]);
1076 }
1077 } else {
1078 printf("\n");
1079 }
1080 free(name);
1081 }
1082
1083
1084
test_vsx_one_fp_arg(void)1085 static void test_vsx_one_fp_arg(void)
1086 {
1087 test_func_t func;
1088 int k;
1089 k = 0;
1090 build_special_fargs_table();
1091
1092 while ((func = vsx_one_fp_arg_tests[k].test_func)) {
1093 int idx, i;
1094 vx_fp_test_t test_group = vsx_one_fp_arg_tests[k];
1095 Bool estimate = (test_group.type == VX_ESTIMATE);
1096 Bool dp = (test_group.precision == DOUBLE_TEST) ? True : False;
1097 Bool is_sqrt = (strstr(test_group.name, "sqrt")) ? True : False;
1098 Bool is_scalar = (strstr(test_group.name, "xs")) ? True : False;
1099 Bool sparse_sp = False;
1100 int stride = dp ? 2 : 4;
1101 int loops = is_scalar ? 1 : stride;
1102 stride = is_scalar ? 1: stride;
1103
1104 /* For conversions of single to double, the 128-bit input register is sparsely populated:
1105 * |___ SP___|_Unused_|___SP___|__Unused__| // for vector op
1106 * or
1107 * |___ SP___|_Unused_|_Unused_|__Unused__| // for scalar op
1108 *
1109 * For the vector op case, we need to adjust stride from '4' to '2', since
1110 * we'll only be loading two values per loop into the input register.
1111 */
1112 if (!dp && !is_scalar && test_group.type == VX_CONV_TO_DOUBLE) {
1113 sparse_sp = True;
1114 stride = 2;
1115 }
1116
1117 for (i = 0; i < test_group.num_tests; i+=stride) {
1118 unsigned int * pv;
1119 void * inB;
1120
1121 pv = (unsigned int *)&vec_out;
1122 // clear vec_out
1123 for (idx = 0; idx < 4; idx++, pv++)
1124 *pv = 0;
1125
1126 if (dp) {
1127 int j;
1128 unsigned long long * frB_dp, *dst_dp;
1129 for (j = 0; j < loops; j++) {
1130 inB = (void *)&spec_fargs[i + j];
1131 // copy double precision FP into vector element i
1132 memcpy(((void *)&vec_inB) + (j * 8), inB, 8);
1133 }
1134 // execute test insn
1135 (*func)();
1136 dst_dp = (unsigned long long *) &vec_out;
1137 printf("#%d: %s ", i/stride, test_group.name);
1138 for (j = 0; j < loops; j++) {
1139 if (j)
1140 printf("; ");
1141 frB_dp = (unsigned long long *)&spec_fargs[i + j];
1142 printf("%s(%016llx)", test_group.op, *frB_dp);
1143 if (estimate) {
1144 Bool res = check_estimate(DOUBLE_TEST, is_sqrt, i + j, j);
1145 printf(" ==> %s)", res ? "PASS" : "FAIL");
1146 /* For debugging . . .
1147 printf(" ==> %s (res=%016llx)", res ? "PASS" : "FAIL", dst_dp[j]);
1148 */
1149 } else {
1150 vx_fp_test_type type = test_group.type;
1151 switch (type) {
1152 case VX_SCALAR_CONV_TO_WORD:
1153 printf(" = %016llx", dst_dp[j] & 0x00000000ffffffffULL);
1154 break;
1155 case VX_CONV_TO_SINGLE:
1156 printf(" = %016llx", dst_dp[j] & 0xffffffff00000000ULL);
1157 break;
1158 default: // For VX_CONV_TO_DOUBLE and non-convert instructions . . .
1159 printf(" = %016llx", dst_dp[j]);
1160 }
1161 }
1162 }
1163 printf("\n");
1164 } else {
1165 int j, skip_slot;
1166 unsigned int * frB_sp, * dst_sp = NULL;
1167 unsigned long long * dst_dp = NULL;
1168 if (sparse_sp) {
1169 skip_slot = 1;
1170 loops = 2;
1171 } else {
1172 skip_slot = 0;
1173 }
1174 for (j = 0; j < loops; j++) {
1175 inB = (void *)&spec_sp_fargs[i + j];
1176 // copy single precision FP into vector element i
1177 if (skip_slot && j > 0)
1178 memcpy(((void *)&vec_inB) + ((j + j) * 4), inB, 4);
1179 else
1180 memcpy(((void *)&vec_inB) + (j * 4), inB, 4);
1181 }
1182 // execute test insn
1183 (*func)();
1184 if (test_group.type == VX_CONV_TO_DOUBLE)
1185 dst_dp = (unsigned long long *) &vec_out;
1186 else
1187 dst_sp = (unsigned int *) &vec_out;
1188 // print result
1189 printf("#%d: %s ", i/stride, test_group.name);
1190 for (j = 0; j < loops; j++) {
1191 if (j)
1192 printf("; ");
1193 frB_sp = (unsigned int *)&spec_sp_fargs[i + j];
1194 printf("%s(%08x)", test_group.op, *frB_sp);
1195 if (estimate) {
1196 Bool res = check_estimate(SINGLE_TEST, is_sqrt, i + j, j);
1197 printf(" ==> %s)", res ? "PASS" : "FAIL");
1198 } else {
1199 if (test_group.type == VX_CONV_TO_DOUBLE)
1200 printf(" = %016llx", dst_dp[j]);
1201 else
1202 /* Special case: Current VEX implementation for fsqrts (single precision)
1203 * uses the same implementation as that used for double precision fsqrt.
1204 * However, I've found that for xvsqrtsp, the result from that implementation
1205 * may be off by the two LSBs. Generally, even this small inaccuracy can cause the
1206 * output to appear very different if you end up with a carry. But for the given
1207 * inputs in this testcase, we can simply mask out these bits.
1208 */
1209 printf(" = %08x", is_sqrt ? (dst_sp[j] & 0xfffffffc) : dst_sp[j]);
1210 }
1211 }
1212 printf("\n");
1213 }
1214 }
1215 k++;
1216 printf( "\n" );
1217 }
1218 }
1219
test_int_to_fp_convert(void)1220 static void test_int_to_fp_convert(void)
1221 {
1222 test_func_t func;
1223 int k;
1224 k = 0;
1225
1226 while ((func = intToFp_tests[k].test_func)) {
1227 int idx, i;
1228 vx_intToFp_test_t test_group = intToFp_tests[k];
1229 Bool dp = (test_group.precision == DOUBLE_TEST) ? True : False;
1230 Bool sparse_sp = False;
1231 int stride = dp ? 2 : 4;
1232 int loops = stride;
1233
1234 /* For conversions of single to double, the 128-bit input register is sparsely populated:
1235 * |___ int___|_Unused_|___int___|__Unused__| // for vector op
1236 * or
1237 * We need to adjust stride from '4' to '2', since we'll only be loading
1238 * two values per loop into the input register.
1239 */
1240 if (!dp && test_group.type == VX_CONV_TO_DOUBLE) {
1241 sparse_sp = True;
1242 stride = 2;
1243 }
1244
1245 for (i = 0; i < test_group.num_tests; i+=stride) {
1246 unsigned int * pv;
1247 void * inB;
1248
1249 pv = (unsigned int *)&vec_out;
1250 // clear vec_out
1251 for (idx = 0; idx < 4; idx++, pv++)
1252 *pv = 0;
1253
1254 if (dp) {
1255 int j;
1256 unsigned long long *dst_dw, * targs = test_group.targs;
1257 for (j = 0; j < loops; j++) {
1258 inB = (void *)&targs[i + j];
1259 // copy doubleword into vector element i
1260 memcpy(((void *)&vec_inB) + (j * 8), inB, 8);
1261 }
1262 // execute test insn
1263 (*func)();
1264 dst_dw = (unsigned long long *) &vec_out;
1265 printf("#%d: %s ", i/stride, test_group.name);
1266 for (j = 0; j < loops; j++) {
1267 if (j)
1268 printf("; ");
1269 printf("conv(%016llx)", targs[i + j]);
1270
1271 if (test_group.type == VX_CONV_TO_SINGLE)
1272 printf(" = %016llx", dst_dw[j] & 0xffffffff00000000ULL);
1273 else
1274 printf(" = %016llx", dst_dw[j]);
1275 }
1276 printf("\n");
1277 } else {
1278 int j, skip_slot;
1279 unsigned int * dst_sp = NULL;
1280 unsigned int * targs = test_group.targs;
1281 unsigned long long * dst_dp = NULL;
1282 if (sparse_sp) {
1283 skip_slot = 1;
1284 loops = 2;
1285 } else {
1286 skip_slot = 0;
1287 }
1288 for (j = 0; j < loops; j++) {
1289 inB = (void *)&targs[i + j];
1290 // copy single word into vector element i
1291 if (skip_slot && j > 0)
1292 memcpy(((void *)&vec_inB) + ((j + j) * 4), inB, 4);
1293 else
1294 memcpy(((void *)&vec_inB) + (j * 4), inB, 4);
1295 }
1296 // execute test insn
1297 (*func)();
1298 if (test_group.type == VX_CONV_TO_DOUBLE)
1299 dst_dp = (unsigned long long *) &vec_out;
1300 else
1301 dst_sp = (unsigned int *) &vec_out;
1302 // print result
1303 printf("#%d: %s ", i/stride, test_group.name);
1304 for (j = 0; j < loops; j++) {
1305 if (j)
1306 printf("; ");
1307 printf("conv(%08x)", targs[i + j]);
1308 if (test_group.type == VX_CONV_TO_DOUBLE)
1309 printf(" = %016llx", dst_dp[j]);
1310 else
1311 printf(" = %08x", dst_sp[j]);
1312 }
1313 printf("\n");
1314 }
1315 }
1316 k++;
1317 printf( "\n" );
1318 }
1319 }
1320
1321
1322
1323 // The div doubleword test data
1324 signed long long div_dw_tdata[13][2] = {
1325 { 4, -4 },
1326 { 4, -3 },
1327 { 4, 4 },
1328 { 4, -5 },
1329 { 3, 8 },
1330 { 0x8000000000000000ULL, 0xa },
1331 { 0x50c, -1 },
1332 { 0x50c, -4096 },
1333 { 0x1234fedc, 0x8000a873 },
1334 { 0xabcd87651234fedcULL, 0xa123b893 },
1335 { 0x123456789abdcULL, 0 },
1336 { 0, 2 },
1337 { 0x77, 0xa3499 }
1338 };
1339 #define dw_tdata_len (sizeof(div_dw_tdata)/sizeof(signed long long)/2)
1340
1341 // The div word test data
1342 unsigned int div_w_tdata[6][2] = {
1343 { 0, 2 },
1344 { 2, 0 },
1345 { 0x7abc1234, 0xf0000000 },
1346 { 0xfabc1234, 5 },
1347 { 77, 66 },
1348 { 5, 0xfabc1234 },
1349 };
1350 #define w_tdata_len (sizeof(div_w_tdata)/sizeof(unsigned int)/2)
1351
1352 typedef struct div_ext_test
1353 {
1354 test_func_t test_func;
1355 const char *name;
1356 int num_tests;
1357 div_type_t div_type;
1358 precision_type_t precision;
1359 } div_ext_test_t;
1360
1361 static div_ext_test_t div_tests[] = {
1362 #ifdef __powerpc64__
1363 { &test_divdeu, "divdeu", dw_tdata_len, DIV_BASE, DOUBLE_TEST },
1364 { &test_divdeu, "divdeuo", dw_tdata_len, DIV_OE, DOUBLE_TEST },
1365 #endif
1366 { &test_divwe, "divwe", w_tdata_len, DIV_BASE, SINGLE_TEST },
1367 { &test_divwe, "divweo", w_tdata_len, DIV_OE, SINGLE_TEST },
1368 { NULL, NULL, 0, 0, 0 }
1369 };
1370
test_div_extensions(void)1371 static void test_div_extensions(void)
1372 {
1373 test_func_t func;
1374 int k;
1375 k = 0;
1376
1377 while ((func = div_tests[k].test_func)) {
1378 int i, repeat = 1;
1379 div_ext_test_t test_group = div_tests[k];
1380 do_dot = False;
1381
1382 again:
1383 for (i = 0; i < test_group.num_tests; i++) {
1384 unsigned int condreg;
1385
1386 if (test_group.div_type == DIV_OE)
1387 do_OE = True;
1388 else
1389 do_OE = False;
1390
1391 if (test_group.precision == DOUBLE_TEST) {
1392 r14 = div_dw_tdata[i][0];
1393 r15 = div_dw_tdata[i][1];
1394 } else {
1395 r14 = div_w_tdata[i][0];
1396 r15 = div_w_tdata[i][1];
1397 }
1398 // execute test insn
1399 (*func)();
1400 condreg = (div_flags & 0xf0000000) >> 28;
1401 printf("#%d: %s%s: ", i, test_group.name, do_dot ? "." : "");
1402 if (test_group.precision == DOUBLE_TEST) {
1403 printf("0x%016llx0000000000000000 / 0x%016llx = 0x%016llx;",
1404 div_dw_tdata[i][0], div_dw_tdata[i][1], (signed long long) r17);
1405 } else {
1406 printf("0x%08x00000000 / 0x%08x = 0x%08x;",
1407 div_w_tdata[i][0], div_w_tdata[i][1], (unsigned int) r17);
1408 }
1409 printf(" CR=%x; XER=%x\n", condreg, div_xer);
1410 }
1411 printf("\n");
1412 if (repeat) {
1413 repeat = 0;
1414 do_dot = True;
1415 goto again;
1416 }
1417 k++;
1418 printf( "\n" );
1419 }
1420 }
1421
1422
test_vx_tdivORtsqrt(void)1423 static void test_vx_tdivORtsqrt(void)
1424 {
1425 test_func_t func;
1426 int k, crx;
1427 unsigned int flags;
1428 k = 0;
1429 do_dot = False;
1430 build_special_fargs_table();
1431
1432 while ((func = vx_tdivORtsqrt_tests[k].test_func)) {
1433 int idx, i;
1434 vx_fp_test_t test_group = vx_tdivORtsqrt_tests[k];
1435 Bool dp = (test_group.precision == DOUBLE_TEST) ? True : False;
1436 Bool is_scalar = (strstr(test_group.name, "xs")) ? True : False;
1437 Bool two_args = test_group.targs ? True : False;
1438 int stride = dp ? 2 : 4;
1439 int loops = is_scalar ? 1 : stride;
1440 stride = is_scalar ? 1: stride;
1441
1442 for (i = 0; i < test_group.num_tests; i+=stride) {
1443 unsigned int * pv;
1444 void * inB;
1445
1446 pv = (unsigned int *)&vec_out;
1447 // clear vec_out
1448 for (idx = 0; idx < 4; idx++, pv++)
1449 *pv = 0;
1450
1451 if (dp) {
1452 int j;
1453 unsigned long long * frB_dp;
1454 if (two_args) {
1455 setup_dp_fp_args(&test_group.targs[i], False);
1456 } else {
1457 for (j = 0; j < loops; j++) {
1458 inB = (void *)&spec_fargs[i + j];
1459 // copy double precision FP into vector element i
1460 memcpy(((void *)&vec_inB) + (j * 8), inB, 8);
1461 }
1462 }
1463 // execute test insn
1464 // Must do set/get of CRs immediately before/after calling the asm func
1465 // to avoid CRs being modified by other instructions.
1466 SET_FPSCR_ZERO;
1467 SET_CR_XER_ZERO;
1468 (*func)();
1469 GET_CR(flags);
1470 // assumes using CR1
1471 crx = (flags & 0x0f000000) >> 24;
1472 if (two_args) {
1473 print_vector_fp_result(crx, &test_group, i, False/*do not print vec_out*/);
1474 } else {
1475 printf("#%d: %s ", i/stride, test_group.name);
1476 for (j = 0; j < loops; j++) {
1477 if (j)
1478 printf("; ");
1479 frB_dp = (unsigned long long *)&spec_fargs[i + j];
1480 printf("%s(%016llx)", test_group.op, *frB_dp);
1481 }
1482 printf( " ? %x (CRx)\n", crx);
1483 }
1484 } else {
1485 int j;
1486 unsigned int * frB_sp;
1487 if (two_args) {
1488 setup_sp_fp_args(&test_group.targs[i], False);
1489 } else {
1490 for (j = 0; j < loops; j++) {
1491 inB = (void *)&spec_sp_fargs[i + j];
1492 // copy single precision FP into vector element i
1493 memcpy(((void *)&vec_inB) + (j * 4), inB, 4);
1494 }
1495 }
1496 // execute test insn
1497 SET_FPSCR_ZERO;
1498 SET_CR_XER_ZERO;
1499 (*func)();
1500 GET_CR(flags);
1501 crx = (flags & 0x0f000000) >> 24;
1502 // print result
1503 if (two_args) {
1504 print_vector_fp_result(crx, &test_group, i, False/*do not print vec_out*/);
1505 } else {
1506 printf("#%d: %s ", i/stride, test_group.name);
1507 for (j = 0; j < loops; j++) {
1508 if (j)
1509 printf("; ");
1510 frB_sp = (unsigned int *)&spec_sp_fargs[i + j];
1511 printf("%s(%08x)", test_group.op, *frB_sp);
1512 }
1513 printf( " ? %x (CRx)\n", crx);
1514 }
1515 }
1516 }
1517 k++;
1518 printf( "\n" );
1519 }
1520 }
1521
1522
test_ftsqrt(void)1523 static void test_ftsqrt(void)
1524 {
1525 int i, crx;
1526 unsigned int flags;
1527 unsigned long long * frbp;
1528 build_special_fargs_table();
1529
1530
1531 for (i = 0; i < nb_special_fargs; i++) {
1532 f14 = spec_fargs[i];
1533 frbp = (unsigned long long *)&spec_fargs[i];
1534 SET_FPSCR_ZERO;
1535 SET_CR_XER_ZERO;
1536 __asm__ __volatile__ ("ftsqrt cr1, %0" : : "d" (f14));
1537 GET_CR(flags);
1538 crx = (flags & 0x0f000000) >> 24;
1539 printf( "ftsqrt: %016llx ? %x (CRx)\n", *frbp, crx);
1540 }
1541 printf( "\n" );
1542 }
1543
1544 static void
test_popcntw(void)1545 test_popcntw(void)
1546 {
1547 #ifdef __powerpc64__
1548 uint64_t res;
1549 unsigned long long src = 0x9182736405504536ULL;
1550 r14 = src;
1551 __asm__ __volatile__ ("popcntw %0, %1" : "=r" (res): "r" (r14));
1552 printf("popcntw: 0x%llx => 0x%016llx\n", (unsigned long long)src, (unsigned long long)res);
1553 #else
1554 uint32_t res;
1555 unsigned int src = 0x9182730E;
1556 r14 = src;
1557 __asm__ __volatile__ ("popcntw %0, %1" : "=r" (res): "r" (r14));
1558 printf("popcntw: 0x%x => 0x%08x\n", src, (int)res);
1559 #endif
1560 printf( "\n" );
1561 }
1562
1563
1564 static test_table_t
1565 all_tests[] =
1566 {
1567
1568 { &test_vsx_one_fp_arg,
1569 "Test VSX vector and scalar single argument instructions"} ,
1570 { &test_int_to_fp_convert,
1571 "Test VSX vector integer to float conversion instructions" },
1572 { &test_div_extensions,
1573 "Test div extensions" },
1574 { &test_ftsqrt,
1575 "Test ftsqrt instruction" },
1576 { &test_vx_tdivORtsqrt,
1577 "Test vector and scalar tdiv and tsqrt instructions" },
1578 { &test_popcntw,
1579 "Test popcntw instruction" },
1580 { NULL, NULL }
1581 };
1582 #endif // HAS_VSX
1583
main(int argc,char * argv[])1584 int main(int argc, char *argv[])
1585 {
1586 #ifdef HAS_VSX
1587
1588 test_table_t aTest;
1589 test_func_t func;
1590 int i = 0;
1591
1592 while ((func = all_tests[i].test_category)) {
1593 aTest = all_tests[i];
1594 printf( "%s\n", aTest.name );
1595 (*func)();
1596 i++;
1597 }
1598 if (spec_fargs)
1599 free(spec_fargs);
1600 if (spec_sp_fargs)
1601 free(spec_sp_fargs);
1602
1603 #endif // HAS _VSX
1604
1605 return 0;
1606 }
1607