• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*  Copyright (C) 2013 IBM
2 
3  Authors: Carl Love  <carll@us.ibm.com>
4           Maynard Johnson <maynardj@us.ibm.com>
5 
6  This program is free software; you can redistribute it and/or
7  modify it under the terms of the GNU General Public License as
8  published by the Free Software Foundation; either version 2 of the
9  License, or (at your option) any later version.
10 
11  This program is distributed in the hope that it will be useful, but
12  WITHOUT ANY WARRANTY; without even the implied warranty of
13  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  General Public License for more details.
15 
16  You should have received a copy of the GNU General Public License
17  along with this program; if not, write to the Free Software
18  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
19  02111-1307, USA.
20 
21  The GNU General Public License is contained in the file COPYING.
22 
23  This program is based heavily on the test_isa_2_06_part*.c source files.
24  */
25 
26 #include <stdio.h>
27 
28 #ifdef HAS_ISA_2_07
29 
30 #include <stdint.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <malloc.h>
34 #include <altivec.h>
35 #include <math.h>
36 
37 #ifndef __powerpc64__
38 typedef uint32_t HWord_t;
39 #else
40 typedef uint64_t HWord_t;
41 #endif /* __powerpc64__ */
42 
43 #ifdef VGP_ppc64le_linux
44 #define isLE 1
45 #else
46 #define isLE 0
47 #endif
48 
49 register HWord_t r14 __asm__ ("r14");
50 register HWord_t r15 __asm__ ("r15");
51 register HWord_t r16 __asm__ ("r16");
52 register HWord_t r17 __asm__ ("r17");
53 register double f14 __asm__ ("fr14");
54 register double f15 __asm__ ("fr15");
55 register double f16 __asm__ ("fr16");
56 register double f17 __asm__ ("fr17");
57 
58 static volatile unsigned int cond_reg;
59 
60 #define True  1
61 #define False 0
62 
63 #define ALLCR "cr0","cr1","cr2","cr3","cr4","cr5","cr6","cr7"
64 
65 #define SET_CR(_arg) \
66       __asm__ __volatile__ ("mtcr  %0" : : "b"(_arg) : ALLCR );
67 
68 #define SET_XER(_arg) \
69       __asm__ __volatile__ ("mtxer %0" : : "b"(_arg) : "xer" );
70 
71 #define GET_CR(_lval) \
72       __asm__ __volatile__ ("mfcr %0"  : "=b"(_lval) )
73 
74 #define GET_XER(_lval) \
75       __asm__ __volatile__ ("mfxer %0" : "=b"(_lval) )
76 
77 #define GET_CR_XER(_lval_cr,_lval_xer) \
78    do { GET_CR(_lval_cr); GET_XER(_lval_xer); } while (0)
79 
80 #define SET_CR_ZERO \
81       SET_CR(0)
82 
83 #define SET_XER_ZERO \
84       SET_XER(0)
85 
86 #define SET_CR_XER_ZERO \
87    do { SET_CR_ZERO; SET_XER_ZERO; } while (0)
88 
89 #define SET_FPSCR_ZERO \
90    do { double _d = 0.0; \
91         __asm__ __volatile__ ("mtfsf 0xFF, %0" : : "f"(_d) ); \
92    } while (0)
93 
94 typedef unsigned char Bool;
95 
96 
97 /* These functions below that construct a table of floating point
98  * values were lifted from none/tests/ppc32/jm-insns.c.
99  */
100 
101 #if defined (DEBUG_ARGS_BUILD)
102 #define AB_DPRINTF(fmt, args...) do { fprintf(stderr, fmt , ##args); } while (0)
103 #else
104 #define AB_DPRINTF(fmt, args...) do { } while (0)
105 #endif
106 
register_farg(void * farg,int s,uint16_t _exp,uint64_t mant)107 static inline void register_farg (void *farg,
108                                   int s, uint16_t _exp, uint64_t mant)
109 {
110    uint64_t tmp;
111 
112    tmp = ((uint64_t)s << 63) | ((uint64_t)_exp << 52) | mant;
113    *(uint64_t *)farg = tmp;
114    AB_DPRINTF("%d %03x %013llx => %016llx %0e\n",
115               s, _exp, mant, *(uint64_t *)farg, *(double *)farg);
116 }
117 
register_sp_farg(void * farg,int s,uint16_t _exp,uint32_t mant)118 static inline void register_sp_farg (void *farg,
119                                      int s, uint16_t _exp, uint32_t mant)
120 {
121    uint32_t tmp;
122    tmp = ((uint32_t)s << 31) | ((uint32_t)_exp << 23) | mant;
123    *(uint32_t *)farg = tmp;
124 }
125 
126 
127 typedef struct fp_test_args {
128    int fra_idx;
129    int frb_idx;
130 } fp_test_args_t;
131 
132 static int nb_special_fargs;
133 static double * spec_fargs;
134 static float * spec_sp_fargs;
135 
build_special_fargs_table(void)136 static void build_special_fargs_table(void)
137 {
138    /*
139     * Double precision:
140     * Sign goes from zero to one               (1 bit)
141     * Exponent goes from 0 to ((1 << 12) - 1)  (11 bits)
142     * Mantissa goes from 1 to ((1 << 52) - 1)  (52 bits)
143     * + special values:
144     * +0.0      : 0 0x000 0x0000000000000 => 0x0000000000000000
145     * -0.0      : 1 0x000 0x0000000000000 => 0x8000000000000000
146     * +infinity : 0 0x7FF 0x0000000000000 => 0x7FF0000000000000
147     * -infinity : 1 0x7FF 0x0000000000000 => 0xFFF0000000000000
148     * +SNaN     : 0 0x7FF 0x7FFFFFFFFFFFF => 0x7FF7FFFFFFFFFFFF
149     * -SNaN     : 1 0x7FF 0x7FFFFFFFFFFFF => 0xFFF7FFFFFFFFFFFF
150     * +QNaN     : 0 0x7FF 0x8000000000000 => 0x7FF8000000000000
151     * -QNaN     : 1 0x7FF 0x8000000000000 => 0xFFF8000000000000
152     * (8 values)
153     *
154     * Single precision
155     * Sign:     1 bit
156     * Exponent: 8 bits
157     * Mantissa: 23 bits
158     * +0.0      : 0 0x00 0x000000 => 0x00000000
159     * -0.0      : 1 0x00 0x000000 => 0x80000000
160     * +infinity : 0 0xFF 0x000000 => 0x7F800000
161     * -infinity : 1 0xFF 0x000000 => 0xFF800000
162     * +SNaN     : 0 0xFF 0x3FFFFF => 0x7FBFFFFF
163     * -SNaN     : 1 0xFF 0x3FFFFF => 0xFFBFFFFF
164     * +QNaN     : 0 0xFF 0x400000 => 0x7FC00000
165     * -QNaN     : 1 0xFF 0x400000 => 0xFFC00000
166    */
167 
168    uint64_t mant;
169    uint32_t mant_sp;
170    uint16_t _exp;
171    int s;
172    int j, i = 0;
173 
174    if (spec_fargs)
175       return;
176 
177    spec_fargs = malloc( 20 * sizeof(double) );
178    spec_sp_fargs = malloc( 20 * sizeof(float) );
179 
180    // #0
181    s = 0;
182    _exp = 0x3fd;
183    mant = 0x8000000000000ULL;
184    register_farg(&spec_fargs[i++], s, _exp, mant);
185 
186    // #1
187    s = 0;
188    _exp = 0x404;
189    mant = 0xf000000000000ULL;
190    register_farg(&spec_fargs[i++], s, _exp, mant);
191 
192    // #2
193    s = 0;
194    _exp = 0x001;
195    mant = 0x8000000b77501ULL;
196    register_farg(&spec_fargs[i++], s, _exp, mant);
197 
198    // #3
199    s = 0;
200    _exp = 0x7fe;
201    mant = 0x800000000051bULL;
202    register_farg(&spec_fargs[i++], s, _exp, mant);
203 
204    // #4
205    s = 0;
206    _exp = 0x012;
207    mant = 0x3214569900000ULL;
208    register_farg(&spec_fargs[i++], s, _exp, mant);
209 
210    /* Special values */
211    /* +0.0      : 0 0x000 0x0000000000000 */
212    // #5
213    s = 0;
214    _exp = 0x000;
215    mant = 0x0000000000000ULL;
216    register_farg(&spec_fargs[i++], s, _exp, mant);
217 
218    /* -0.0      : 1 0x000 0x0000000000000 */
219    // #6
220    s = 1;
221    _exp = 0x000;
222    mant = 0x0000000000000ULL;
223    register_farg(&spec_fargs[i++], s, _exp, mant);
224 
225    /* +infinity : 0 0x7FF 0x0000000000000  */
226    // #7
227    s = 0;
228    _exp = 0x7FF;
229    mant = 0x0000000000000ULL;
230    register_farg(&spec_fargs[i++], s, _exp, mant);
231 
232    /* -infinity : 1 0x7FF 0x0000000000000 */
233    // #8
234    s = 1;
235    _exp = 0x7FF;
236    mant = 0x0000000000000ULL;
237    register_farg(&spec_fargs[i++], s, _exp, mant);
238 
239    /*
240     * This comment applies to values #9 and #10 below:
241     * When src is a SNaN, it's converted to a QNaN first before rounding to single-precision,
242     * so we can't just copy the double-precision value to the corresponding slot in the
243     * single-precision array (i.e., in the loop at the end of this function).  Instead, we
244     * have to manually set the bits using register_sp_farg().
245     */
246 
247    /* +SNaN     : 0 0x7FF 0x7FFFFFFFFFFFF */
248    // #9
249    s = 0;
250    _exp = 0x7FF;
251    mant = 0x7FFFFFFFFFFFFULL;
252    register_farg(&spec_fargs[i++], s, _exp, mant);
253    _exp = 0xff;
254    mant_sp = 0x3FFFFF;
255    register_sp_farg(&spec_sp_fargs[i-1], s, _exp, mant_sp);
256 
257    /* -SNaN     : 1 0x7FF 0x7FFFFFFFFFFFF */
258    // #10
259    s = 1;
260    _exp = 0x7FF;
261    mant = 0x7FFFFFFFFFFFFULL;
262    register_farg(&spec_fargs[i++], s, _exp, mant);
263    _exp = 0xff;
264    mant_sp = 0x3FFFFF;
265    register_sp_farg(&spec_sp_fargs[i-1], s, _exp, mant_sp);
266 
267    /* +QNaN     : 0 0x7FF 0x8000000000000 */
268    // #11
269    s = 0;
270    _exp = 0x7FF;
271    mant = 0x8000000000000ULL;
272    register_farg(&spec_fargs[i++], s, _exp, mant);
273 
274    /* -QNaN     : 1 0x7FF 0x8000000000000 */
275    // #12
276    s = 1;
277    _exp = 0x7FF;
278    mant = 0x8000000000000ULL;
279    register_farg(&spec_fargs[i++], s, _exp, mant);
280 
281    /* denormalized value */
282    // #13
283    s = 1;
284    _exp = 0x000;
285    mant = 0x8340000078000ULL;
286    register_farg(&spec_fargs[i++], s, _exp, mant);
287 
288    /* Negative finite number */
289    // #14
290    s = 1;
291    _exp = 0x40d;
292    mant = 0x0650f5a07b353ULL;
293    register_farg(&spec_fargs[i++], s, _exp, mant);
294 
295    /* A few positive finite numbers ... */
296    // #15
297    s = 0;
298    _exp = 0x412;
299    mant = 0x32585a9900000ULL;
300    register_farg(&spec_fargs[i++], s, _exp, mant);
301 
302    // #16
303    s = 0;
304    _exp = 0x413;
305    mant = 0x82511a2000000ULL;
306    register_farg(&spec_fargs[i++], s, _exp, mant);
307 
308    // #17
309    s = 0;
310    _exp = 0x403;
311    mant = 0x12ef5a9300000ULL;
312    register_farg(&spec_fargs[i++], s, _exp, mant);
313 
314    // #18
315    s = 0;
316    _exp = 0x405;
317    mant = 0x14bf5d2300000ULL;
318    register_farg(&spec_fargs[i++], s, _exp, mant);
319 
320    // #19
321    s = 0;
322    _exp = 0x409;
323    mant = 0x76bf982440000ULL;
324    register_farg(&spec_fargs[i++], s, _exp, mant);
325 
326 
327    nb_special_fargs = i;
328    for (j = 0; j < i; j++) {
329       if (!(j == 9 || j == 10))
330          spec_sp_fargs[j] = spec_fargs[j];
331    }
332 }
333 
334 static unsigned int vstg[] __attribute__ ((aligned (16))) = { 0, 0, 0,0,
335                                                               0, 0, 0, 0 };
336 
337 
338 static unsigned int viargs[] __attribute__ ((aligned (16))) = { 0x80000001,
339                                                                 0x89abcdef,
340                                                                 0x00112233,
341                                                                 0x74556677,
342                                                                 0x00001abb,
343                                                                 0x00000001,
344                                                                 0x31929394,
345                                                                 0xa1a2a3a4,
346 };
347 #define NUM_VIARGS_INTS (sizeof viargs/sizeof viargs[0])
348 #define NUM_VIARGS_VECS  (NUM_VIARGS_INTS/4)
349 
350 
351 static unsigned long long vdargs[] __attribute__ ((aligned (16))) = {
352                                                                      0x0102030405060708ULL,
353                                                                      0x090A0B0C0E0D0E0FULL,
354                                                                      0xF1F2F3F4F5F6F7F8ULL,
355                                                                      0xF9FAFBFCFEFDFEFFULL
356 };
357 #define NUM_VDARGS_INTS (sizeof vdargs/sizeof vdargs[0])
358 #define NUM_VDARGS_VECS  (NUM_VDARGS_INTS/2)
359 
360 typedef void (*test_func_t)(void);
361 
362 struct test_table
363 {
364    test_func_t test_category;
365    char * name;
366 };
367 
368 
369 typedef enum {
370    SINGLE_TEST,
371    SINGLE_TEST_SINGLE_RES,
372    DOUBLE_TEST,
373    DOUBLE_TEST_SINGLE_RES
374 } precision_type_t;
375 #define IS_DP_RESULT(x) ((x == SINGLE_TEST) || (x == DOUBLE_TEST))
376 
377 typedef enum {
378    VX_FP_SMAS,   // multiply add single precision result
379    VX_FP_SMSS,   // multiply sub single precision result
380    VX_FP_SNMAS,  // negative multiply add single precision result
381    VX_FP_SNMSS,  // negative multiply sub single precision result
382    VX_FP_OTHER,
383    VX_CONV_WORD,
384    VX_ESTIMATE,
385    VX_CONV_TO_SINGLE,
386    VX_CONV_TO_DOUBLE,
387    VX_SCALAR_CONV_TO_WORD,
388    VX_SCALAR_SP_TO_VECTOR_SP,
389    VX_DEFAULT
390 } vx_fp_test_type;
391 
392 typedef enum {
393    VSX_LOAD = 1,
394    VSX_LOAD_SPLAT,
395    VSX_STORE,
396 } vsx_ldst_type;
397 
398 typedef enum {
399    VSX_AND = 1,
400    VSX_NAND,
401    VSX_ANDC,
402    VSX_OR,
403    VSX_ORC,
404    VSX_NOR,
405    VSX_XOR,
406    VSX_EQV,
407 } vsx_log_op;
408 
409 struct vx_fp_test1
410 {
411    test_func_t test_func;
412    const char *name;
413    fp_test_args_t * targs;
414    int num_tests;
415     vx_fp_test_type test_type;
416  };
417 
418 struct ldst_test
419 {
420    test_func_t test_func;
421    const char *name;
422    precision_type_t precision;
423    void * base_addr;
424    uint32_t offset;
425    vsx_ldst_type type;
426 };
427 
428 struct vx_fp_test2
429 {
430    test_func_t test_func;
431    const char *name;
432    fp_test_args_t * targs;
433    int num_tests;
434    precision_type_t precision;
435    vx_fp_test_type test_type;
436    const char * op;
437 };
438 
439 struct xs_conv_test
440 {
441    test_func_t test_func;
442    const char *name;
443    int num_tests;
444 };
445 
446 struct simple_test
447 {
448    test_func_t test_func;
449    const char *name;
450 };
451 
452 struct vsx_logic_test
453 {
454    test_func_t test_func;
455    const char *name;
456    vsx_log_op op;
457 };
458 
459 typedef struct vsx_logic_test logic_test_t;
460 typedef struct ldst_test ldst_test_t;
461 typedef struct simple_test xs_conv_test_t;
462 typedef struct vx_fp_test1 vx_fp_test_basic_t;
463 typedef struct vx_fp_test2 vx_fp_test2_t;
464 typedef struct test_table test_table_t;
465 
466 
467 static vector unsigned int vec_out, vec_inA, vec_inB;
468 
test_xscvdpspn(void)469 static void test_xscvdpspn(void)
470 {
471    __asm__ __volatile__ ("xscvdpspn   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
472 }
473 
test_xscvspdpn(void)474 static void test_xscvspdpn(void)
475 {
476    __asm__ __volatile__ ("xscvspdpn  %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
477 }
478 
479 static int do_asp;
test_xsmadds(void)480 static void test_xsmadds(void)
481 {
482    if (do_asp)
483       __asm__ __volatile__ ("xsmaddasp          %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
484    else
485       __asm__ __volatile__ ("xsmaddmsp          %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
486 }
487 
test_xsmsubs(void)488 static void test_xsmsubs(void)
489 {
490    if (do_asp)
491       __asm__ __volatile__ ("xsmsubasp          %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
492    else
493       __asm__ __volatile__ ("xsmsubmsp          %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
494 }
495 
test_xscvsxdsp(void)496 static void test_xscvsxdsp (void)
497 {
498    __asm__ __volatile__ ("xscvsxdsp          %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
499 }
500 
test_xscvuxdsp(void)501 static void test_xscvuxdsp (void)
502 {
503    __asm__ __volatile__ ("xscvuxdsp          %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
504 }
505 
test_xsnmadds(void)506 static void test_xsnmadds(void)
507 {
508    if (do_asp)
509       __asm__ __volatile__ ("xsnmaddasp        %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
510    else
511       __asm__ __volatile__ ("xsnmaddmsp        %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
512 }
513 
test_xsnmsubs(void)514 static void test_xsnmsubs(void)
515 {
516    if (do_asp)
517       __asm__ __volatile__ ("xsnmsubasp        %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
518    else
519       __asm__ __volatile__ ("xsnmsubmsp        %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
520 }
521 
test_stxsspx(void)522 static void test_stxsspx(void)
523 {
524    __asm__ __volatile__ ("stxsspx          %x0, %1, %2" : : "wa" (vec_inA), "b" (r14),"r" (r15));
525 }
526 
test_stxsiwx(void)527 static void test_stxsiwx(void)
528 {
529    __asm__ __volatile__ ("stxsiwx          %x0, %1, %2" : : "wa" (vec_inA), "b" (r14),"r" (r15));
530 }
531 
test_lxsiwax(void)532 static void test_lxsiwax(void)
533 {
534    __asm__ __volatile__ ("lxsiwax          %x0, %1, %2" : "=wa" (vec_out): "b" (r14),"r" (r15));
535 }
536 
test_lxsiwzx(void)537 static void test_lxsiwzx(void)
538 {
539    __asm__ __volatile__ ("lxsiwzx          %x0, %1, %2" : "=wa" (vec_out): "b" (r14),"r" (r15));
540 }
541 
test_lxsspx(void)542 static void test_lxsspx(void)
543 {
544    __asm__ __volatile__ ("lxsspx          %x0, %1, %2" : "=wa" (vec_out): "b" (r14),"r" (r15));
545 }
546 
test_xssqrtsp(void)547 static void test_xssqrtsp(void)
548 {
549    __asm__ __volatile__ ("xssqrtsp         %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
550 }
551 
test_xsrsqrtesp(void)552 static void test_xsrsqrtesp(void)
553 {
554    __asm__ __volatile__ ("xsrsqrtesp         %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
555 }
556 
557 /* Three argument instuctions */
test_xxleqv(void)558 static void test_xxleqv(void)
559 {
560    __asm__ __volatile__ ("xxleqv          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
561 }
562 
test_xxlorc(void)563 static void test_xxlorc(void)
564 {
565    __asm__ __volatile__ ("xxlorc          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
566 }
567 
test_xxlnand(void)568 static void test_xxlnand(void)
569 {
570    __asm__ __volatile__ ("xxlnand         %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
571 }
572 
test_xsaddsp(void)573 static void test_xsaddsp(void)
574 {
575   __asm__ __volatile__ ("xsaddsp   %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA), "wa" (vec_inB));
576 }
577 
test_xssubsp(void)578 static void test_xssubsp(void)
579 {
580   __asm__ __volatile__ ("xssubsp   %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA), "wa" (vec_inB));
581 }
582 
test_xsdivsp(void)583 static void test_xsdivsp(void)
584 {
585   __asm__ __volatile__ ("xsdivsp   %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA), "wa" (vec_inB));
586 }
587 
test_xsmulsp(void)588 static void test_xsmulsp(void)
589 {
590    __asm__ __volatile__ ("xsmulsp          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
591 }
592 
test_xsresp(void)593 static void test_xsresp(void)
594 {
595    __asm__ __volatile__ ("xsresp   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
596 }
test_xsrsp(void)597 static void test_xsrsp(void)
598 {
599    __asm__ __volatile__ ("xsrsp   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
600 }
601 
602 fp_test_args_t vx_math_tests[] = {
603                                   {8, 8},
604                                   {8, 14},
605                                   {8, 6},
606                                   {8, 5},
607                                   {8, 4},
608                                   {8, 7},
609                                   {8, 9},
610                                   {8, 11},
611                                   {14, 8},
612                                   {14, 14},
613                                   {14, 6},
614                                   {14, 5},
615                                   {14, 4},
616                                   {14, 7},
617                                   {14, 9},
618                                   {14, 11},
619                                   {6, 8},
620                                   {6, 14},
621                                   {6, 6},
622                                   {6, 5},
623                                   {6, 4},
624                                   {6, 7},
625                                   {6, 9},
626                                   {6, 11},
627                                   {5, 8},
628                                   {5, 14},
629                                   {5, 6},
630                                   {5, 5},
631                                   {5, 4},
632                                   {5, 7},
633                                   {5, 9},
634                                   {5, 11},
635                                   {4, 8},
636                                   {4, 14},
637                                   {4, 6},
638                                   {4, 5},
639                                   {4, 1},
640                                   {4, 7},
641                                   {4, 9},
642                                   {4, 11},
643                                   {7, 8},
644                                   {7, 14},
645                                   {7, 6},
646                                   {7, 5},
647                                   {7, 4},
648                                   {7, 7},
649                                   {7, 9},
650                                   {7, 11},
651                                   {10, 8},
652                                   {10, 14},
653                                   {10, 6},
654                                   {10, 5},
655                                   {10, 4},
656                                   {10, 7},
657                                   {10, 9},
658                                   {10, 11},
659                                   {12, 8},
660                                   {12, 14},
661                                   {12, 6},
662                                   {12, 5},
663                                   {12, 4},
664                                   {12, 7},
665                                   {12, 9},
666                                   {12, 11},
667                                   {8, 8},
668                                   {8, 14},
669                                   {8, 6},
670                                   {8, 5},
671                                   {8, 4},
672                                   {8, 7},
673                                   {8, 9},
674                                   {8, 11},
675                                   {14, 8},
676                                   {14, 14},
677                                   {14, 6},
678                                   {14, 5},
679                                   {14, 4},
680                                   {14, 7},
681                                   {14, 9},
682                                   {14, 11},
683                                   {6, 8},
684                                   {6, 14},
685                                   {6, 6},
686                                   {6, 5},
687                                   {6, 4},
688                                   {6, 7},
689                                   {6, 9},
690                                   {6, 11},
691                                   {5, 8},
692                                   {5, 14},
693                                   {5, 6},
694                                   {5, 5},
695                                   {5, 4},
696                                   {5, 7},
697                                   {5, 9},
698                                   {5, 11},
699                                   {4, 8},
700                                   {4, 14},
701                                   {4, 6},
702                                   {4, 5},
703                                   {4, 1},
704                                   {4, 7},
705                                   {4, 9},
706                                   {4, 11},
707                                   {7, 8},
708                                   {7, 14},
709                                   {7, 6},
710                                   {7, 5},
711                                   {7, 4},
712                                   {7, 7},
713                                   {7, 9},
714                                   {7, 11},
715                                   {10, 8},
716                                   {10, 14},
717                                   {10, 6},
718                                   {10, 5},
719                                   {10, 4},
720                                   {10, 7},
721                                   {10, 9},
722                                   {10, 11},
723                                   {12, 8},
724                                   {12, 14},
725                                   {12, 6},
726                                   {12, 5},
727                                   {12, 4},
728                                   {12, 7},
729                                   {12, 9},
730                                   {12, 11}
731 };
732 
733 // These are all double precision inputs with double word outputs (mostly converted to single precision)
734 static vx_fp_test_basic_t vx_fp_tests[] = {
735                                      { &test_xsmadds, "xsmadd", vx_math_tests, 64, VX_FP_SMAS},
736                                      { &test_xsmsubs, "xsmsub", vx_math_tests, 64, VX_FP_SMSS},
737                                      { &test_xsmulsp, "xsmulsp", vx_math_tests, 64, VX_FP_OTHER},
738                                      { &test_xsdivsp, "xsdivsp", vx_math_tests, 64, VX_FP_OTHER},
739                                      { &test_xsnmadds, "xsnmadd", vx_math_tests, 64, VX_FP_SNMAS},
740                                      { &test_xsnmsubs, "xsnmsub", vx_math_tests, 64, VX_FP_SNMSS},
741                                      { NULL, NULL, NULL, 0, 0 }
742 };
743 
744 static vx_fp_test2_t
745 vsx_one_fp_arg_tests[] = {
746                           { &test_xscvdpspn, "xscvdpspn", NULL, 20, DOUBLE_TEST_SINGLE_RES, VX_SCALAR_SP_TO_VECTOR_SP, "conv"},
747                           { &test_xscvspdpn, "xscvspdpn", NULL, 20, SINGLE_TEST, VX_DEFAULT, "conv"},
748                           { &test_xsresp,    "xsresp", NULL, 20, DOUBLE_TEST, VX_ESTIMATE, "1/x"},
749                           { &test_xsrsp,     "xsrsp", NULL, 20, DOUBLE_TEST, VX_DEFAULT, "round"},
750                           { &test_xsrsqrtesp, "xsrsqrtesp", NULL, 20, DOUBLE_TEST, VX_ESTIMATE, "1/sqrt"},
751                           { &test_xssqrtsp, "xssqrtsp", NULL, 20, DOUBLE_TEST, VX_DEFAULT, "sqrt"},
752                           { NULL, NULL, NULL, 0, 0, 0, NULL}
753 };
754 
755 // These are all double precision inputs with double word outputs (mostly converted to single precision)
756 static vx_fp_test_basic_t
757 vx_simple_scalar_fp_tests[] = {
758                           { &test_xssubsp, "xssubsp", vx_math_tests, 64, VX_DEFAULT},
759                           { &test_xsaddsp, "xsaddsp", vx_math_tests, 64, VX_DEFAULT},
760                           { NULL, NULL, NULL, 0 , 0}
761 };
762 
763 static ldst_test_t
764 ldst_tests[] = {
765                     { &test_stxsspx, "stxsspx", DOUBLE_TEST_SINGLE_RES, vstg, 0, VSX_STORE },
766                     { &test_stxsiwx, "stxsiwx", SINGLE_TEST_SINGLE_RES, vstg, 4, VSX_STORE },
767                     { &test_lxsiwax, "lxsiwax", SINGLE_TEST, viargs, 0, VSX_LOAD },
768                     { &test_lxsiwzx, "lxsiwzx", SINGLE_TEST, viargs, 4, VSX_LOAD },
769                     { &test_lxsspx,  "lxsspx",  SINGLE_TEST, NULL, 0, VSX_LOAD },
770                     { NULL, NULL, 0, NULL, 0, 0 } };
771 
772 static xs_conv_test_t
773 xs_conv_tests[] = {
774                    { &test_xscvsxdsp, "xscvsxdsp"},
775                    { &test_xscvuxdsp, "xscvuxdsp"},
776                    { NULL, NULL}
777 };
778 
779 static logic_test_t
780 logic_tests[] = {
781                  { &test_xxleqv,  "xxleqv", VSX_EQV },
782                  { &test_xxlorc,  "xxlorc", VSX_ORC },
783                  { &test_xxlnand, "xxlnand", VSX_NAND },
784                  { NULL, NULL}
785 };
786 
check_reciprocal_estimate(Bool is_rsqrte,int idx,int output_vec_idx)787 Bool check_reciprocal_estimate(Bool is_rsqrte, int idx, int output_vec_idx)
788 {
789    /* NOTE:
790     * This function has been verified only with the xsresp and xsrsqrtes instructions.
791     *
792     * Technically, the number of bits of precision for xsresp and xsrsqrtesp is
793     * 14 bits (14 = log2 16384).  However, the VEX emulation of these instructions
794     * does an actual reciprocal calculation versus estimation, so the answer we get back from
795     * valgrind can easily differ from the estimate in the lower bits (within the 14 bits of
796     * precision) and the estimate may still be within expected tolerances.  On top of that,
797     * we can't count on these estimates always being the same across implementations.
798     * For example, with the fre[s] instruction (which should be correct to within one part
799     * in 256 -- i.e., 8 bits of precision) . . . When approximating the value 1.0111_1111_1111,
800     * one implementation could return 1.0111_1111_0000 and another implementation could return
801     * 1.1000_0000_0000.  Both estimates meet the 1/256 accuracy requirement, but share only a
802     * single bit in common.
803     *
804     * The upshot is we can't validate the VEX output for these instructions by comparing against
805     * stored bit patterns.  We must check that the result is within expected tolerances.
806     */
807 
808    /* A mask to be used for validation as a last resort.
809     * Only use 12 bits of precision for reasons discussed above.
810     */
811 #define VSX_RECIP_ESTIMATE_MASK_SP 0xFFFF8000
812 
813 
814    Bool result = False;
815    double src_dp, res_dp;
816    float calc_diff = 0;
817    float real_diff = 0;
818    double recip_divisor;
819    float div_result;
820    float calc_diff_tmp;
821 
822    src_dp = res_dp = 0;
823    Bool src_is_negative = False;
824    Bool res_is_negative = False;
825    unsigned long long * dst_dp = NULL;
826    unsigned long long * src_dp_ull;
827    dst_dp = (unsigned long long *) &vec_out;
828    src_dp = spec_fargs[idx];
829    src_dp_ull = (unsigned long long *) &src_dp;
830    src_is_negative = (*src_dp_ull & 0x8000000000000000ULL) ? True : False;
831    res_is_negative = (dst_dp[output_vec_idx] & 0x8000000000000000ULL) ? True : False;
832    memcpy(&res_dp, &dst_dp[output_vec_idx], 8);
833 
834 
835    // Below are common rules
836    if (isnan(src_dp))
837       return isnan(res_dp);
838    if (fpclassify(src_dp) == FP_ZERO)
839       return isinf(res_dp);
840    if (!src_is_negative && isinf(src_dp))
841       return !res_is_negative && (fpclassify(res_dp) == FP_ZERO);
842    if (is_rsqrte) {
843       if (src_is_negative)
844          return isnan(res_dp);
845    } else {
846       if (src_is_negative && isinf(src_dp))
847          return res_is_negative && (fpclassify(res_dp) == FP_ZERO);
848    }
849 
850    if (is_rsqrte)
851       recip_divisor = sqrt(src_dp);
852    else
853       recip_divisor = src_dp;
854 
855    /* The instructions handled by this function take a double precision
856     * input, perform a reciprocal estimate in double-precision, round
857     * the result to single precision and store into the destination
858     * register in double precision format.  So, to check the result
859     * for accuracy, we use float (single precision) values.
860     */
861    div_result = 1.0/recip_divisor;
862    calc_diff_tmp = recip_divisor * 16384.0;
863    if (isnormal(calc_diff_tmp)) {
864       calc_diff = fabs(1.0/calc_diff_tmp);
865       real_diff = fabs((float)res_dp - div_result);
866       result = ( ( res_dp == div_result )
867                || ( real_diff <= calc_diff ) );
868 #if FRES_DEBUG
869       unsigned int * dv = (unsigned int *)&div_result;
870       unsigned int * rd = (unsigned int *)&real_diff;
871       unsigned int * cd = (unsigned int *)&calc_diff;
872       printf("\n\t {computed div_result: %08x; real_diff:  %08x; calc_diff:  %08x}\n",
873              *dv, *rd, *cd);
874 #endif
875 
876    } else {
877       /* Unable to compute theoretical difference, so we fall back to masking out
878        * un-precise bits.
879        */
880       unsigned int * div_result_sp = (unsigned int *)&div_result;
881       float res_sp = (float)res_dp;
882       unsigned int * dst_sp = (unsigned int *)&res_sp;
883 #if FRES_DEBUG
884       unsigned int * calc_diff_tmp_sp = (unsigned int *)&calc_diff_tmp;
885       printf("Unable to compute theoretical difference, so we fall back to masking\n");
886       printf("\tcalc_diff_tmp: %08x; div_result: %08x; vector result (sp): %08x\n",
887              *calc_diff_tmp_sp, *div_result_sp, *dst_sp);
888 #endif
889       result = (*dst_sp & VSX_RECIP_ESTIMATE_MASK_SP) == (*div_result_sp & VSX_RECIP_ESTIMATE_MASK_SP);
890    }
891    return result;
892 }
893 
test_vx_fp_ops(void)894 static void test_vx_fp_ops(void)
895 {
896 
897    test_func_t func;
898    int k;
899    char * test_name = (char *)malloc(20);
900    void  * vecA_void_ptr, * vecB_void_ptr, * vecOut_void_ptr;
901 
902    if (isLE) {
903       vecA_void_ptr = (void *)&vec_inA + 8;
904       vecB_void_ptr = (void *)&vec_inB + 8;
905       vecOut_void_ptr = (void *)&vec_out + 8;
906    } else {
907       vecA_void_ptr = (void *)&vec_inA;
908       vecB_void_ptr = (void *)&vec_inB;
909       vecOut_void_ptr = (void *)&vec_out;
910    }
911 
912    k = 0;
913    build_special_fargs_table();
914    while ((func = vx_fp_tests[k].test_func)) {
915       int i, repeat = 0;
916       unsigned long long * frap, * frbp, * dst;
917       vx_fp_test_basic_t test_group = vx_fp_tests[k];
918       vx_fp_test_type test_type = test_group.test_type;
919 
920       switch (test_type) {
921          case VX_FP_SMAS:
922          case VX_FP_SMSS:
923          case VX_FP_SNMAS:
924          case VX_FP_SNMSS:
925             if (test_type == VX_FP_SMAS)
926                strcpy(test_name, "xsmadd");
927             else if (test_type == VX_FP_SMSS)
928                strcpy(test_name, "xsmsub");
929             else if (test_type == VX_FP_SNMAS)
930                strcpy(test_name, "xsnmadd");
931             else
932                strcpy(test_name, "xsnmsub");
933 
934             if (!repeat) {
935                repeat = 1;
936                strcat(test_name, "asp");
937                do_asp = 1;
938             }
939             break;
940          case VX_FP_OTHER:
941             strcpy(test_name, test_group.name);
942             break;
943          default:
944             printf("ERROR:  Invalid VX FP test type %d\n", test_type);
945             exit(1);
946       }
947 
948 again:
949       for (i = 0; i < test_group.num_tests; i++) {
950          unsigned int * inA, * inB, * pv;
951 
952          fp_test_args_t aTest = test_group.targs[i];
953          inA = (unsigned int *)&spec_fargs[aTest.fra_idx];
954          inB = (unsigned int *)&spec_fargs[aTest.frb_idx];
955          frap = (unsigned long long *)&spec_fargs[aTest.fra_idx];
956          frbp = (unsigned long long *)&spec_fargs[aTest.frb_idx];
957          int idx;
958          unsigned long long vsr_XT;
959          pv = (unsigned int *)&vec_out;
960 
961          // Only need to copy one doubleword into each vector's element 0
962          memcpy(vecA_void_ptr, inA, 8);
963          memcpy(vecB_void_ptr, inB, 8);
964 
965          // clear vec_out
966          for (idx = 0; idx < 4; idx++, pv++)
967             *pv = 0;
968 
969          if (test_type != VX_FP_OTHER) {
970             /* Then we need a third src argument, which is stored in element 0 of
971              * VSX[XT] -- i.e., vec_out.  For the xs<ZZZ>mdp cases, VSX[XT] holds
972              * src3 and VSX[XB] holds src2; for the xs<ZZZ>adp cases, VSX[XT] holds
973              * src2 and VSX[XB] holds src3.  The fp_test_args_t that holds the test
974              * data (input args, result) contain only two inputs, so I arbitrarily
975              * use spec_fargs elements 4 and 14 (alternating) for the third source
976              * argument.  We can use the same input data for a given pair of
977              * adp/mdp-type instructions by swapping the src2 and src3 arguments; thus
978              * the expected result should be the same.
979              */
980             int extra_arg_idx;
981             if (i % 2)
982                extra_arg_idx = 4;
983             else
984                extra_arg_idx = 14;
985 
986             if (repeat) {
987                /* We're on the first time through of one of the VX_FP_SMx
988                 * test types, meaning we're testing a xs<ZZZ>adp case, thus
989                 * we have to swap inputs as described above:
990                 *    src2 <= VSX[XT]
991                 *    src3 <= VSX[XB]
992                 */
993                memcpy(vecOut_void_ptr, inB, 8);  // src2
994                memcpy(vecB_void_ptr, &spec_fargs[extra_arg_idx], 8);  //src3
995                frbp = (unsigned long long *)&spec_fargs[extra_arg_idx];
996             } else {
997                // Don't need to init src2, as it's done before the switch()
998                memcpy(vecOut_void_ptr, &spec_fargs[extra_arg_idx], 8);  //src3
999             }
1000             memcpy(&vsr_XT, vecOut_void_ptr, 8);
1001          }
1002 
1003          (*func)();
1004          dst = (unsigned long long *) &vec_out;
1005          if (isLE)
1006             dst++;
1007 
1008          if (test_type == VX_FP_OTHER)
1009             printf("#%d: %s %016llx %016llx = %016llx\n", i, test_name,
1010                    *frap, *frbp, *dst);
1011          else
1012             printf( "#%d: %s %016llx %016llx %016llx = %016llx\n", i,
1013                     test_name, vsr_XT, *frap, *frbp, *dst );
1014 
1015       }
1016       /*
1017            {
1018                // Debug code.  Keep this block commented out except when debugging.
1019                double result, expected;
1020                memcpy(&result, dst, 8);
1021                memcpy(&expected, &aTest.dp_bin_result, 8);
1022                printf( "\tFRA + FRB: %e + %e: Expected = %e; Actual = %e\n",
1023                        spec_fargs[aTest.fra_idx], spec_fargs[aTest.frb_idx],
1024                        expected, result );
1025             }
1026        */
1027       printf( "\n" );
1028 
1029       if (repeat) {
1030          repeat = 0;
1031          strcat(test_name, "UNKNOWN");
1032          switch (test_type) {
1033             case VX_FP_SMAS:
1034             case VX_FP_SMSS:
1035             case VX_FP_SNMAS:
1036             case VX_FP_SNMSS:
1037                if (test_type == VX_FP_SMAS)
1038                   strcpy(test_name, "xsmadd");
1039                else if (test_type == VX_FP_SMSS)
1040                   strcpy(test_name, "xsmsub");
1041                else if (test_type == VX_FP_SNMAS)
1042                   strcpy(test_name, "xsnmadd");
1043                else
1044                   strcpy(test_name, "xsnmsub");
1045 
1046                do_asp = 0;
1047                strcat(test_name, "msp");
1048                break;
1049             default:
1050                break;
1051          }
1052          goto again;
1053       }
1054       k++;
1055    }
1056    printf( "\n" );
1057    free(test_name);
1058 }
1059 
1060 
test_vsx_one_fp_arg(void)1061 static void test_vsx_one_fp_arg(void)
1062 {
1063    test_func_t func;
1064    int k;
1065    void  * vecB_void_ptr;
1066 
1067    k = 0;
1068    build_special_fargs_table();
1069 
1070    while ((func = vsx_one_fp_arg_tests[k].test_func)) {
1071       int idx, i;
1072       unsigned long long *dst_dp;
1073       unsigned int * dst_sp;
1074       vx_fp_test2_t test_group = vsx_one_fp_arg_tests[k];
1075       /* size of source operands */
1076       Bool dp  = ((test_group.precision == DOUBLE_TEST) ||
1077 		  (test_group.precision == DOUBLE_TEST_SINGLE_RES)) ? True : False;
1078       /* size of result */
1079       Bool dp_res = IS_DP_RESULT(test_group.precision);
1080       Bool is_sqrt = (strstr(test_group.name, "sqrt")) ? True : False;
1081 
1082       vecB_void_ptr = (void *)&vec_inB;
1083       if (isLE) {
1084          vecB_void_ptr += dp? 8 : 12;
1085       }
1086 
1087       for (i = 0; i < test_group.num_tests; i++) {
1088          unsigned int * pv;
1089          void * inB;
1090 
1091          pv = (unsigned int *)&vec_out;
1092          // clear vec_out
1093          for (idx = 0; idx < 4; idx++, pv++)
1094             *pv = 0;
1095 
1096          if (dp) {
1097             int vec_out_idx;
1098             unsigned long long * frB_dp;
1099             if (isLE)
1100                vec_out_idx = dp_res ? 1 : 3;
1101             else
1102                vec_out_idx = 0;
1103 
1104             if (test_group.test_type == VX_SCALAR_SP_TO_VECTOR_SP) {
1105                /* Take a single-precision value stored in double word element 0
1106                 * of src in double-precision format and convert to single-
1107                 * precision and store in word element 0 of dst.
1108                 */
1109                double input = spec_sp_fargs[i];
1110                memcpy(vecB_void_ptr, (void *)&input, 8);
1111             } else {
1112                inB = (void *)&spec_fargs[i];
1113                // copy double precision FP into input vector element 0
1114                memcpy(vecB_void_ptr, inB, 8);
1115             }
1116 
1117             // execute test insn
1118             (*func)();
1119             if (dp_res)
1120                dst_dp = (unsigned long long *) &vec_out;
1121             else
1122                dst_sp = (unsigned int *) &vec_out;
1123 
1124             printf("#%d: %s ", i, test_group.name);
1125             frB_dp = (unsigned long long *)&spec_fargs[i];
1126             printf("%s(%016llx)", test_group.op, *frB_dp);
1127             if (test_group.test_type == VX_ESTIMATE)
1128             {
1129                Bool res;
1130                res = check_reciprocal_estimate(is_sqrt, i, vec_out_idx);
1131                printf(" ==> %s)", res ? "PASS" : "FAIL");
1132             } else if (dp_res) {
1133                printf(" = %016llx", dst_dp[vec_out_idx]);
1134             } else {
1135                printf(" = %08x", dst_sp[vec_out_idx]);
1136             }
1137 
1138             printf("\n");
1139          } else {  // single precision test type
1140             int vec_out_idx;
1141             if (isLE)
1142                vec_out_idx = dp_res ? 1 : 3;
1143             else
1144                vec_out_idx = 0;
1145             // Clear input vector
1146             pv = (unsigned int *)&vec_inB;
1147             for (idx = 0; idx < 4; idx++, pv++)
1148                *pv = 0;
1149             inB = (void *)&spec_sp_fargs[i];
1150             // copy single precision FP into input vector element i
1151             memcpy(vecB_void_ptr, inB, 4);
1152             // execute test insn
1153             (*func)();
1154             if (dp_res)
1155                dst_dp = (unsigned long long *) &vec_out;
1156             else
1157                dst_sp = (unsigned int *) &vec_out;
1158             // print result
1159             printf("#%d: %s ", i, test_group.name);
1160                printf("%s(%08x)", test_group.op, *((unsigned int *)&spec_sp_fargs[i]));
1161                if (dp_res)
1162                      printf(" = %016llx", dst_dp[vec_out_idx]);
1163                else
1164                   printf(" = %08x", dst_sp[vec_out_idx]);
1165 
1166             printf("\n");
1167          }
1168       }
1169       k++;
1170       printf( "\n" );
1171    }
1172 }
1173 
1174 /* This function currently only supports two double precision input arguments. */
test_vsx_two_fp_arg(void)1175 static void test_vsx_two_fp_arg(void)
1176 {
1177    test_func_t func;
1178    int k = 0;
1179    void  * vecA_void_ptr, * vecB_void_ptr;
1180 
1181    if (isLE) {
1182       vecA_void_ptr = (void *)&vec_inA + 8;
1183       vecB_void_ptr = (void *)&vec_inB + 8;
1184    } else {
1185       vecA_void_ptr = (void *)&vec_inA;
1186       vecB_void_ptr = (void *)&vec_inB;
1187    }
1188 
1189    build_special_fargs_table();
1190    while ((func = vx_simple_scalar_fp_tests[k].test_func)) {
1191       unsigned long long * frap, * frbp, * dst;
1192       unsigned int * pv;
1193       int idx;
1194       vx_fp_test_basic_t test_group = vx_simple_scalar_fp_tests[k];
1195       pv = (unsigned int *)&vec_out;
1196       // clear vec_out
1197       for (idx = 0; idx < 4; idx++, pv++)
1198          *pv = 0;
1199 
1200       void * inA, * inB;
1201       int i;
1202       for (i = 0; i < test_group.num_tests; i++) {
1203          fp_test_args_t aTest = test_group.targs[i];
1204          inA = (void *)&spec_fargs[aTest.fra_idx];
1205          inB = (void *)&spec_fargs[aTest.frb_idx];
1206          frap = (unsigned long long *)&spec_fargs[aTest.fra_idx];
1207          frbp = (unsigned long long *)&spec_fargs[aTest.frb_idx];
1208          // Only need to copy one doubleword into each vector's element 0
1209          memcpy(vecA_void_ptr, inA, 8);
1210          memcpy(vecB_void_ptr, inB, 8);
1211          (*func)();
1212          dst = (unsigned long long *) &vec_out;
1213          if (isLE)
1214             dst++;
1215          printf("#%d: %s %016llx,%016llx => %016llx\n", i, test_group.name,
1216                 *frap, *frbp, *dst);
1217       }
1218       printf( "\n" );
1219       k++;
1220    }
1221 }
1222 
1223 /* This function handles the following cases:
1224  *   1) Single precision value stored in double-precision
1225  *      floating-point format in doubleword element 0 of src VSX register
1226  *   2) Integer word value stored in word element 1 of src VSX register
1227  */
_do_store_test(ldst_test_t storeTest)1228 static void _do_store_test (ldst_test_t storeTest)
1229 {
1230    test_func_t func;
1231    unsigned int *dst32;
1232    unsigned int i, idx;
1233    unsigned int * pv = (unsigned int *) storeTest.base_addr;
1234    void  * vecA_void_ptr;
1235 
1236    if (isLE) {
1237       if (storeTest.precision == SINGLE_TEST_SINGLE_RES)
1238          vecA_void_ptr = (void *)&vec_inA + 8;
1239    } else {
1240       if (storeTest.precision == SINGLE_TEST_SINGLE_RES)
1241          vecA_void_ptr = (void *)&vec_inA + 4;
1242       else
1243          vecA_void_ptr = (void *)&vec_inA;
1244    }
1245 
1246    func = storeTest.test_func;
1247    r14 = (HWord_t) storeTest.base_addr;
1248    r15 = (HWord_t) storeTest.offset;
1249 
1250    /* test some of the pre-defined single precision values */
1251    for (i = 0; i < nb_special_fargs; i+=3) {
1252       // clear out storage destination
1253       for (idx = 0; idx < 4; idx++)
1254          *(pv + idx) = 0;
1255 
1256       printf( "%s:", storeTest.name );
1257       if (storeTest.precision == SINGLE_TEST_SINGLE_RES)
1258       {
1259          unsigned int * arg_ptr = (unsigned int *)&spec_sp_fargs[i];
1260          memcpy(vecA_void_ptr, arg_ptr, sizeof(unsigned int));
1261          printf(" %08x ==> ", *arg_ptr);
1262       } else {
1263          unsigned long long * dp;
1264          double input = spec_sp_fargs[i];
1265          dp = (unsigned long long *)&input;
1266          memcpy(vecA_void_ptr, dp, sizeof(unsigned long long));
1267          printf(" %016llx ==> ", *dp);
1268       }
1269 
1270       // execute test insn
1271       (*func)();
1272       dst32 = (unsigned int*)(storeTest.base_addr);
1273       dst32 += (storeTest.offset/sizeof(int));
1274       printf( "%08x\n", *dst32);
1275    }
1276 
1277    printf("\n");
1278 }
1279 
_do_load_test(ldst_test_t loadTest)1280 static void _do_load_test(ldst_test_t loadTest)
1281 {
1282    test_func_t func;
1283    unsigned int i;
1284    unsigned long long * dst_dp;
1285 
1286    func = loadTest.test_func;
1287    r15 = (HWord_t) loadTest.offset;
1288 
1289    if (loadTest.base_addr == NULL) {
1290       /* Test lxsspx: source is single precision value, so let's */
1291       /* test some of the pre-defined single precision values. */
1292       int num_loops = (loadTest.offset == 0) ?  nb_special_fargs : (nb_special_fargs - (loadTest.offset/sizeof(int)));
1293       for (i = 0; i < num_loops; i+=3) {
1294          unsigned int * sp = (unsigned int *)&spec_sp_fargs[i + (loadTest.offset/sizeof(int))];
1295          printf( "%s:", loadTest.name );
1296          printf(" %08x ==> ", *sp);
1297          r14 = (HWord_t)&spec_sp_fargs[i];
1298 
1299          // execute test insn
1300          (*func)();
1301          dst_dp = (unsigned long long *) &vec_out;
1302          if (isLE)
1303             dst_dp++;
1304          printf("%016llx\n", *dst_dp);
1305       }
1306    } else {
1307       // source is an integer word
1308       int num_loops = (loadTest.offset == 0) ?  NUM_VIARGS_INTS : (NUM_VIARGS_INTS - (loadTest.offset/sizeof(int)));
1309       for (i = 0; i < num_loops; i++) {
1310          printf( "%s:", loadTest.name );
1311          r14 = (HWord_t)&viargs[i];
1312          printf(" %08x ==> ", viargs[i + (loadTest.offset/sizeof(int))]);
1313 
1314          // execute test insn
1315          (*func)();
1316          dst_dp = (unsigned long long *) &vec_out;
1317          if (isLE)
1318             dst_dp++;
1319          printf("%016llx\n", *dst_dp);
1320       }
1321    }
1322    printf("\n");
1323 }
1324 
test_ldst(void)1325 static void test_ldst(void)
1326 {
1327    int k = 0;
1328 
1329    while (ldst_tests[k].test_func) {
1330       if (ldst_tests[k].type == VSX_STORE)
1331          _do_store_test(ldst_tests[k]);
1332       else {
1333          _do_load_test(ldst_tests[k]);
1334       }
1335       k++;
1336       printf("\n");
1337    }
1338 }
1339 
test_xs_conv_ops(void)1340 static void test_xs_conv_ops(void)
1341 {
1342 
1343    test_func_t func;
1344    int k = 0;
1345    void  * vecB_void_ptr;
1346 
1347    if (isLE)
1348       vecB_void_ptr = (void *)&vec_inB + 8;
1349    else
1350       vecB_void_ptr = (void *)&vec_inB;
1351 
1352    build_special_fargs_table();
1353    while ((func = xs_conv_tests[k].test_func)) {
1354       int i;
1355       unsigned long long * dst;
1356       xs_conv_test_t test_group = xs_conv_tests[k];
1357       for (i = 0; i < NUM_VDARGS_INTS; i++) {
1358          unsigned long long  * inB, * pv;
1359          int idx;
1360          inB = (unsigned long long *)&vdargs[i];
1361          memcpy(vecB_void_ptr, inB, 8);
1362          pv = (unsigned long long *)&vec_out;
1363          // clear vec_out
1364          for (idx = 0; idx < 2; idx++, pv++)
1365             *pv = 0ULL;
1366          (*func)();
1367          dst = (unsigned long long *) &vec_out;
1368          if (isLE)
1369             dst++;
1370          printf("#%d: %s %016llx => %016llx\n", i, test_group.name, vdargs[i], *dst);
1371       }
1372       k++;
1373       printf("\n");
1374    }
1375    printf( "\n" );
1376 }
1377 
1378 
test_vsx_logic(void)1379 static void test_vsx_logic(void)
1380 {
1381    logic_test_t aTest;
1382    test_func_t func;
1383    int k;
1384    k = 0;
1385 
1386    while ((func = logic_tests[k].test_func)) {
1387 
1388       unsigned int * pv;
1389       unsigned int * inA, * inB, * dst;
1390       int idx, i;
1391       aTest = logic_tests[k];
1392       for (i = 0; i <= NUM_VIARGS_VECS; i+=4) {
1393          pv = (unsigned int *)&vec_out;
1394          inA = &viargs[i];
1395          inB = &viargs[i];
1396          memcpy(&vec_inA, inA, sizeof(vector unsigned int));
1397          memcpy(&vec_inB, inB, sizeof(vector unsigned int));
1398          // clear vec_out
1399          for (idx = 0; idx < 4; idx++, pv++)
1400             *pv = 0;
1401 
1402          // execute test insn
1403          (*func)();
1404          dst = (unsigned int*) &vec_out;
1405 
1406          printf( "#%d: %10s ", k, aTest.name);
1407          printf( " (%08x %08x %08x %08x, ", inA[0], inA[1], inA[2], inA[3]);
1408          printf( " %08x %08x %08x %08x)", inB[0], inB[1], inB[2], inB[3]);
1409          printf(" ==> %08x %08x %08x %08x\n", dst[0], dst[1], dst[2], dst[3]);
1410       }
1411       k++;
1412    }
1413    printf( "\n" );
1414 }
1415 
1416 
1417 //----------------------------------------------------------
1418 
1419 static test_table_t all_tests[] = {
1420                                      { &test_vx_fp_ops,
1421                                        "Test VSX floating point instructions"},
1422                                      { &test_vsx_one_fp_arg,
1423                                        "Test VSX vector and scalar single argument instructions"} ,
1424                                      { &test_vsx_logic,
1425                                        "Test VSX logic instructions" },
1426                                      { &test_xs_conv_ops,
1427                                        "Test VSX scalar integer conversion instructions" },
1428                                      { &test_ldst,
1429                                        "Test VSX load/store dp to sp instructions" },
1430                                      { &test_vsx_two_fp_arg,
1431                                        "Test VSX vector and scalar two argument instructions"} ,
1432                                      { NULL, NULL }
1433 };
1434 
1435 #endif
1436 
main(int argc,char * argv[])1437 int main(int argc, char *argv[])
1438 {
1439 
1440 #ifdef HAS_ISA_2_07
1441    test_table_t aTest;
1442    test_func_t func;
1443    int i = 0;
1444 
1445    while ((func = all_tests[i].test_category)) {
1446       aTest = all_tests[i];
1447       printf( "%s\n", aTest.name );
1448       (*func)();
1449       i++;
1450    }
1451 #else
1452    printf("NO ISA 2.07 SUPPORT\n");
1453 #endif
1454    return 0;
1455 }
1456