• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*  Copyright (C) 2013 IBM
2 
3  Authors: Carl Love  <carll@us.ibm.com>
4           Maynard Johnson <maynardj@us.ibm.com>
5 
6  This program is free software; you can redistribute it and/or
7  modify it under the terms of the GNU General Public License as
8  published by the Free Software Foundation; either version 2 of the
9  License, or (at your option) any later version.
10 
11  This program is distributed in the hope that it will be useful, but
12  WITHOUT ANY WARRANTY; without even the implied warranty of
13  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  General Public License for more details.
15 
16  You should have received a copy of the GNU General Public License
17  along with this program; if not, write to the Free Software
18  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
19  02111-1307, USA.
20 
21  The GNU General Public License is contained in the file COPYING.
22 
23  This program is based heavily on the test_isa_2_06_part*.c source files.
24  */
25 
26 #include <stdio.h>
27 
28 #ifdef HAS_ISA_2_07
29 
30 #include <stdint.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <malloc.h>
34 #include <altivec.h>
35 #include <math.h>
36 
37 #ifndef __powerpc64__
38 typedef uint32_t HWord_t;
39 #else
40 typedef uint64_t HWord_t;
41 #endif /* __powerpc64__ */
42 
43 register HWord_t r14 __asm__ ("r14");
44 register HWord_t r15 __asm__ ("r15");
45 register HWord_t r16 __asm__ ("r16");
46 register HWord_t r17 __asm__ ("r17");
47 register double f14 __asm__ ("fr14");
48 register double f15 __asm__ ("fr15");
49 register double f16 __asm__ ("fr16");
50 register double f17 __asm__ ("fr17");
51 
52 static volatile unsigned int cond_reg;
53 
54 #define True  1
55 #define False 0
56 
57 #define ALLCR "cr0","cr1","cr2","cr3","cr4","cr5","cr6","cr7"
58 
59 #define SET_CR(_arg) \
60       __asm__ __volatile__ ("mtcr  %0" : : "b"(_arg) : ALLCR );
61 
62 #define SET_XER(_arg) \
63       __asm__ __volatile__ ("mtxer %0" : : "b"(_arg) : "xer" );
64 
65 #define GET_CR(_lval) \
66       __asm__ __volatile__ ("mfcr %0"  : "=b"(_lval) )
67 
68 #define GET_XER(_lval) \
69       __asm__ __volatile__ ("mfxer %0" : "=b"(_lval) )
70 
71 #define GET_CR_XER(_lval_cr,_lval_xer) \
72    do { GET_CR(_lval_cr); GET_XER(_lval_xer); } while (0)
73 
74 #define SET_CR_ZERO \
75       SET_CR(0)
76 
77 #define SET_XER_ZERO \
78       SET_XER(0)
79 
80 #define SET_CR_XER_ZERO \
81    do { SET_CR_ZERO; SET_XER_ZERO; } while (0)
82 
83 #define SET_FPSCR_ZERO \
84    do { double _d = 0.0; \
85         __asm__ __volatile__ ("mtfsf 0xFF, %0" : : "f"(_d) ); \
86    } while (0)
87 
88 typedef unsigned char Bool;
89 
90 
91 /* These functions below that construct a table of floating point
92  * values were lifted from none/tests/ppc32/jm-insns.c.
93  */
94 
95 #if defined (DEBUG_ARGS_BUILD)
96 #define AB_DPRINTF(fmt, args...) do { fprintf(stderr, fmt , ##args); } while (0)
97 #else
98 #define AB_DPRINTF(fmt, args...) do { } while (0)
99 #endif
100 
register_farg(void * farg,int s,uint16_t _exp,uint64_t mant)101 static inline void register_farg (void *farg,
102                                   int s, uint16_t _exp, uint64_t mant)
103 {
104    uint64_t tmp;
105 
106    tmp = ((uint64_t)s << 63) | ((uint64_t)_exp << 52) | mant;
107    *(uint64_t *)farg = tmp;
108    AB_DPRINTF("%d %03x %013llx => %016llx %0e\n",
109               s, _exp, mant, *(uint64_t *)farg, *(double *)farg);
110 }
111 
register_sp_farg(void * farg,int s,uint16_t _exp,uint32_t mant)112 static inline void register_sp_farg (void *farg,
113                                      int s, uint16_t _exp, uint32_t mant)
114 {
115    uint32_t tmp;
116    tmp = ((uint32_t)s << 31) | ((uint32_t)_exp << 23) | mant;
117    *(uint32_t *)farg = tmp;
118 }
119 
120 
121 typedef struct fp_test_args {
122    int fra_idx;
123    int frb_idx;
124 } fp_test_args_t;
125 
126 static int nb_special_fargs;
127 static double * spec_fargs;
128 static float * spec_sp_fargs;
129 
build_special_fargs_table(void)130 static void build_special_fargs_table(void)
131 {
132    /*
133     * Double precision:
134     * Sign goes from zero to one               (1 bit)
135     * Exponent goes from 0 to ((1 << 12) - 1)  (11 bits)
136     * Mantissa goes from 1 to ((1 << 52) - 1)  (52 bits)
137     * + special values:
138     * +0.0      : 0 0x000 0x0000000000000 => 0x0000000000000000
139     * -0.0      : 1 0x000 0x0000000000000 => 0x8000000000000000
140     * +infinity : 0 0x7FF 0x0000000000000 => 0x7FF0000000000000
141     * -infinity : 1 0x7FF 0x0000000000000 => 0xFFF0000000000000
142     * +SNaN     : 0 0x7FF 0x7FFFFFFFFFFFF => 0x7FF7FFFFFFFFFFFF
143     * -SNaN     : 1 0x7FF 0x7FFFFFFFFFFFF => 0xFFF7FFFFFFFFFFFF
144     * +QNaN     : 0 0x7FF 0x8000000000000 => 0x7FF8000000000000
145     * -QNaN     : 1 0x7FF 0x8000000000000 => 0xFFF8000000000000
146     * (8 values)
147     *
148     * Single precision
149     * Sign:     1 bit
150     * Exponent: 8 bits
151     * Mantissa: 23 bits
152     * +0.0      : 0 0x00 0x000000 => 0x00000000
153     * -0.0      : 1 0x00 0x000000 => 0x80000000
154     * +infinity : 0 0xFF 0x000000 => 0x7F800000
155     * -infinity : 1 0xFF 0x000000 => 0xFF800000
156     * +SNaN     : 0 0xFF 0x3FFFFF => 0x7FBFFFFF
157     * -SNaN     : 1 0xFF 0x3FFFFF => 0xFFBFFFFF
158     * +QNaN     : 0 0xFF 0x400000 => 0x7FC00000
159     * -QNaN     : 1 0xFF 0x400000 => 0xFFC00000
160    */
161 
162    uint64_t mant;
163    uint32_t mant_sp;
164    uint16_t _exp;
165    int s;
166    int j, i = 0;
167 
168    if (spec_fargs)
169       return;
170 
171    spec_fargs = malloc( 20 * sizeof(double) );
172    spec_sp_fargs = malloc( 20 * sizeof(float) );
173 
174    // #0
175    s = 0;
176    _exp = 0x3fd;
177    mant = 0x8000000000000ULL;
178    register_farg(&spec_fargs[i++], s, _exp, mant);
179 
180    // #1
181    s = 0;
182    _exp = 0x404;
183    mant = 0xf000000000000ULL;
184    register_farg(&spec_fargs[i++], s, _exp, mant);
185 
186    // #2
187    s = 0;
188    _exp = 0x001;
189    mant = 0x8000000b77501ULL;
190    register_farg(&spec_fargs[i++], s, _exp, mant);
191 
192    // #3
193    s = 0;
194    _exp = 0x7fe;
195    mant = 0x800000000051bULL;
196    register_farg(&spec_fargs[i++], s, _exp, mant);
197 
198    // #4
199    s = 0;
200    _exp = 0x012;
201    mant = 0x3214569900000ULL;
202    register_farg(&spec_fargs[i++], s, _exp, mant);
203 
204    /* Special values */
205    /* +0.0      : 0 0x000 0x0000000000000 */
206    // #5
207    s = 0;
208    _exp = 0x000;
209    mant = 0x0000000000000ULL;
210    register_farg(&spec_fargs[i++], s, _exp, mant);
211 
212    /* -0.0      : 1 0x000 0x0000000000000 */
213    // #6
214    s = 1;
215    _exp = 0x000;
216    mant = 0x0000000000000ULL;
217    register_farg(&spec_fargs[i++], s, _exp, mant);
218 
219    /* +infinity : 0 0x7FF 0x0000000000000  */
220    // #7
221    s = 0;
222    _exp = 0x7FF;
223    mant = 0x0000000000000ULL;
224    register_farg(&spec_fargs[i++], s, _exp, mant);
225 
226    /* -infinity : 1 0x7FF 0x0000000000000 */
227    // #8
228    s = 1;
229    _exp = 0x7FF;
230    mant = 0x0000000000000ULL;
231    register_farg(&spec_fargs[i++], s, _exp, mant);
232 
233    /*
234     * This comment applies to values #9 and #10 below:
235     * When src is a SNaN, it's converted to a QNaN first before rounding to single-precision,
236     * so we can't just copy the double-precision value to the corresponding slot in the
237     * single-precision array (i.e., in the loop at the end of this function).  Instead, we
238     * have to manually set the bits using register_sp_farg().
239     */
240 
241    /* +SNaN     : 0 0x7FF 0x7FFFFFFFFFFFF */
242    // #9
243    s = 0;
244    _exp = 0x7FF;
245    mant = 0x7FFFFFFFFFFFFULL;
246    register_farg(&spec_fargs[i++], s, _exp, mant);
247    _exp = 0xff;
248    mant_sp = 0x3FFFFF;
249    register_sp_farg(&spec_sp_fargs[i-1], s, _exp, mant_sp);
250 
251    /* -SNaN     : 1 0x7FF 0x7FFFFFFFFFFFF */
252    // #10
253    s = 1;
254    _exp = 0x7FF;
255    mant = 0x7FFFFFFFFFFFFULL;
256    register_farg(&spec_fargs[i++], s, _exp, mant);
257    _exp = 0xff;
258    mant_sp = 0x3FFFFF;
259    register_sp_farg(&spec_sp_fargs[i-1], s, _exp, mant_sp);
260 
261    /* +QNaN     : 0 0x7FF 0x8000000000000 */
262    // #11
263    s = 0;
264    _exp = 0x7FF;
265    mant = 0x8000000000000ULL;
266    register_farg(&spec_fargs[i++], s, _exp, mant);
267 
268    /* -QNaN     : 1 0x7FF 0x8000000000000 */
269    // #12
270    s = 1;
271    _exp = 0x7FF;
272    mant = 0x8000000000000ULL;
273    register_farg(&spec_fargs[i++], s, _exp, mant);
274 
275    /* denormalized value */
276    // #13
277    s = 1;
278    _exp = 0x000;
279    mant = 0x8340000078000ULL;
280    register_farg(&spec_fargs[i++], s, _exp, mant);
281 
282    /* Negative finite number */
283    // #14
284    s = 1;
285    _exp = 0x40d;
286    mant = 0x0650f5a07b353ULL;
287    register_farg(&spec_fargs[i++], s, _exp, mant);
288 
289    /* A few positive finite numbers ... */
290    // #15
291    s = 0;
292    _exp = 0x412;
293    mant = 0x32585a9900000ULL;
294    register_farg(&spec_fargs[i++], s, _exp, mant);
295 
296    // #16
297    s = 0;
298    _exp = 0x413;
299    mant = 0x82511a2000000ULL;
300    register_farg(&spec_fargs[i++], s, _exp, mant);
301 
302    // #17
303    s = 0;
304    _exp = 0x403;
305    mant = 0x12ef5a9300000ULL;
306    register_farg(&spec_fargs[i++], s, _exp, mant);
307 
308    // #18
309    s = 0;
310    _exp = 0x405;
311    mant = 0x14bf5d2300000ULL;
312    register_farg(&spec_fargs[i++], s, _exp, mant);
313 
314    // #19
315    s = 0;
316    _exp = 0x409;
317    mant = 0x76bf982440000ULL;
318    register_farg(&spec_fargs[i++], s, _exp, mant);
319 
320 
321    nb_special_fargs = i;
322    for (j = 0; j < i; j++) {
323       if (!(j == 9 || j == 10))
324          spec_sp_fargs[j] = spec_fargs[j];
325    }
326 }
327 
328 static unsigned int vstg[] __attribute__ ((aligned (16))) = { 0, 0, 0,0,
329                                                               0, 0, 0, 0 };
330 
331 
332 static unsigned int viargs[] __attribute__ ((aligned (16))) = { 0x80000001,
333                                                                 0x89abcdef,
334                                                                 0x00112233,
335                                                                 0x74556677,
336                                                                 0x00001abb,
337                                                                 0x00000001,
338                                                                 0x31929394,
339                                                                 0xa1a2a3a4,
340 };
341 #define NUM_VIARGS_INTS (sizeof viargs/sizeof viargs[0])
342 #define NUM_VIARGS_VECS  (NUM_VIARGS_INTS/4)
343 
344 typedef void (*test_func_t)(void);
345 
346 struct test_table
347 {
348    test_func_t test_category;
349    char * name;
350 };
351 
352 
353 typedef enum {
354    SINGLE_TEST,
355    SINGLE_TEST_SINGLE_RES,
356    DOUBLE_TEST,
357    DOUBLE_TEST_SINGLE_RES
358 } precision_type_t;
359 #define IS_DP_RESULT(x) ((x == SINGLE_TEST) || (x == DOUBLE_TEST))
360 
361 typedef enum {
362    VX_FP_SMAS,   // multiply add single precision result
363    VX_FP_SMSS,   // multiply sub single precision result
364    VX_FP_SNMAS,  // negative multiply add single precision result
365    VX_FP_SNMSS,  // negative multiply sub single precision result
366    VX_FP_OTHER,
367    VX_CONV_WORD,
368    VX_ESTIMATE,
369    VX_CONV_TO_SINGLE,
370    VX_CONV_TO_DOUBLE,
371    VX_SCALAR_CONV_TO_WORD,
372    VX_SCALAR_SP_TO_VECTOR_SP,
373    VX_DEFAULT
374 } vx_fp_test_type;
375 
376 typedef enum {
377    VSX_LOAD = 1,
378    VSX_LOAD_SPLAT,
379    VSX_STORE,
380 } vsx_ldst_type;
381 
382 typedef enum {
383    VSX_AND = 1,
384    VSX_NAND,
385    VSX_ANDC,
386    VSX_OR,
387    VSX_ORC,
388    VSX_NOR,
389    VSX_XOR,
390    VSX_EQV,
391 } vsx_log_op;
392 
393 struct vx_fp_test1
394 {
395    test_func_t test_func;
396    const char *name;
397    fp_test_args_t * targs;
398    int num_tests;
399     vx_fp_test_type test_type;
400  };
401 
402 struct ldst_test
403 {
404    test_func_t test_func;
405    const char *name;
406    precision_type_t precision;
407    void * base_addr;
408    uint32_t offset;
409    vsx_ldst_type type;
410 };
411 
412 struct vx_fp_test2
413 {
414    test_func_t test_func;
415    const char *name;
416    fp_test_args_t * targs;
417    int num_tests;
418    precision_type_t precision;
419    vx_fp_test_type test_type;
420    const char * op;
421 };
422 
423 struct xs_conv_test
424 {
425    test_func_t test_func;
426    const char *name;
427    int num_tests;
428 };
429 
430 struct simple_test
431 {
432    test_func_t test_func;
433    const char *name;
434 };
435 
436 struct vsx_logic_test
437 {
438    test_func_t test_func;
439    const char *name;
440    vsx_log_op op;
441 };
442 
443 typedef struct vsx_logic_test logic_test_t;
444 typedef struct ldst_test ldst_test_t;
445 typedef struct simple_test xs_conv_test_t;
446 typedef struct vx_fp_test1 vx_fp_test_basic_t;
447 typedef struct vx_fp_test2 vx_fp_test2_t;
448 typedef struct test_table test_table_t;
449 
450 
451 static vector unsigned int vec_out, vec_inA, vec_inB;
452 
test_xscvdpspn(void)453 static void test_xscvdpspn(void)
454 {
455    __asm__ __volatile__ ("xscvdpspn   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
456 }
457 
test_xscvspdpn(void)458 static void test_xscvspdpn(void)
459 {
460    __asm__ __volatile__ ("xscvspdpn  %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
461 }
462 
463 static int do_asp;
test_xsmadds(void)464 static void test_xsmadds(void)
465 {
466    if (do_asp)
467       __asm__ __volatile__ ("xsmaddasp          %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
468    else
469       __asm__ __volatile__ ("xsmaddmsp          %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
470 }
471 
test_xsmsubs(void)472 static void test_xsmsubs(void)
473 {
474    if (do_asp)
475       __asm__ __volatile__ ("xsmsubasp          %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
476    else
477       __asm__ __volatile__ ("xsmsubmsp          %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
478 }
479 
test_xscvsxdsp(void)480 static void test_xscvsxdsp (void)
481 {
482    __asm__ __volatile__ ("xscvsxdsp          %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
483 }
484 
test_xscvuxdsp(void)485 static void test_xscvuxdsp (void)
486 {
487    __asm__ __volatile__ ("xscvuxdsp          %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
488 }
489 
test_xsnmadds(void)490 static void test_xsnmadds(void)
491 {
492    if (do_asp)
493       __asm__ __volatile__ ("xsnmaddasp        %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
494    else
495       __asm__ __volatile__ ("xsnmaddmsp        %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
496 }
497 
test_xsnmsubs(void)498 static void test_xsnmsubs(void)
499 {
500    if (do_asp)
501       __asm__ __volatile__ ("xsnmsubasp        %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
502    else
503       __asm__ __volatile__ ("xsnmsubmsp        %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
504 }
505 
test_stxsspx(void)506 static void test_stxsspx(void)
507 {
508    __asm__ __volatile__ ("stxsspx          %x0, %1, %2" : : "wa" (vec_inA), "b" (r14),"r" (r15));
509 }
510 
test_stxsiwx(void)511 static void test_stxsiwx(void)
512 {
513    __asm__ __volatile__ ("stxsiwx          %x0, %1, %2" : : "wa" (vec_inA), "b" (r14),"r" (r15));
514 }
515 
test_lxsiwax(void)516 static void test_lxsiwax(void)
517 {
518    __asm__ __volatile__ ("lxsiwax          %x0, %1, %2" : "=wa" (vec_out): "b" (r14),"r" (r15));
519 }
520 
test_lxsiwzx(void)521 static void test_lxsiwzx(void)
522 {
523    __asm__ __volatile__ ("lxsiwzx          %x0, %1, %2" : "=wa" (vec_out): "b" (r14),"r" (r15));
524 }
525 
test_lxsspx(void)526 static void test_lxsspx(void)
527 {
528    __asm__ __volatile__ ("lxsspx          %x0, %1, %2" : "=wa" (vec_out): "b" (r14),"r" (r15));
529 }
530 
test_xssqrtsp(void)531 static void test_xssqrtsp(void)
532 {
533    __asm__ __volatile__ ("xssqrtsp         %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
534 }
535 
test_xsrsqrtesp(void)536 static void test_xsrsqrtesp(void)
537 {
538    __asm__ __volatile__ ("xsrsqrtesp         %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
539 }
540 
541 /* Three argument instuctions */
test_xxleqv(void)542 static void test_xxleqv(void)
543 {
544    __asm__ __volatile__ ("xxleqv          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
545 }
546 
test_xxlorc(void)547 static void test_xxlorc(void)
548 {
549    __asm__ __volatile__ ("xxlorc          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
550 }
551 
test_xxlnand(void)552 static void test_xxlnand(void)
553 {
554    __asm__ __volatile__ ("xxlnand         %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
555 }
556 
test_xsaddsp(void)557 static void test_xsaddsp(void)
558 {
559   __asm__ __volatile__ ("xsaddsp   %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA), "wa" (vec_inB));
560 }
561 
test_xssubsp(void)562 static void test_xssubsp(void)
563 {
564   __asm__ __volatile__ ("xssubsp   %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA), "wa" (vec_inB));
565 }
566 
test_xsdivsp(void)567 static void test_xsdivsp(void)
568 {
569   __asm__ __volatile__ ("xsdivsp   %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA), "wa" (vec_inB));
570 }
571 
test_xsmulsp(void)572 static void test_xsmulsp(void)
573 {
574    __asm__ __volatile__ ("xsmulsp          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
575 }
576 
test_xsresp(void)577 static void test_xsresp(void)
578 {
579    __asm__ __volatile__ ("xsresp   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
580 }
test_xsrsp(void)581 static void test_xsrsp(void)
582 {
583    __asm__ __volatile__ ("xsrsp   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
584 }
585 
586 fp_test_args_t vx_math_tests[] = {
587                                   {8, 8},
588                                   {8, 14},
589                                   {8, 6},
590                                   {8, 5},
591                                   {8, 4},
592                                   {8, 7},
593                                   {8, 9},
594                                   {8, 11},
595                                   {14, 8},
596                                   {14, 14},
597                                   {14, 6},
598                                   {14, 5},
599                                   {14, 4},
600                                   {14, 7},
601                                   {14, 9},
602                                   {14, 11},
603                                   {6, 8},
604                                   {6, 14},
605                                   {6, 6},
606                                   {6, 5},
607                                   {6, 4},
608                                   {6, 7},
609                                   {6, 9},
610                                   {6, 11},
611                                   {5, 8},
612                                   {5, 14},
613                                   {5, 6},
614                                   {5, 5},
615                                   {5, 4},
616                                   {5, 7},
617                                   {5, 9},
618                                   {5, 11},
619                                   {4, 8},
620                                   {4, 14},
621                                   {4, 6},
622                                   {4, 5},
623                                   {4, 1},
624                                   {4, 7},
625                                   {4, 9},
626                                   {4, 11},
627                                   {7, 8},
628                                   {7, 14},
629                                   {7, 6},
630                                   {7, 5},
631                                   {7, 4},
632                                   {7, 7},
633                                   {7, 9},
634                                   {7, 11},
635                                   {10, 8},
636                                   {10, 14},
637                                   {10, 6},
638                                   {10, 5},
639                                   {10, 4},
640                                   {10, 7},
641                                   {10, 9},
642                                   {10, 11},
643                                   {12, 8},
644                                   {12, 14},
645                                   {12, 6},
646                                   {12, 5},
647                                   {12, 4},
648                                   {12, 7},
649                                   {12, 9},
650                                   {12, 11},
651                                   {8, 8},
652                                   {8, 14},
653                                   {8, 6},
654                                   {8, 5},
655                                   {8, 4},
656                                   {8, 7},
657                                   {8, 9},
658                                   {8, 11},
659                                   {14, 8},
660                                   {14, 14},
661                                   {14, 6},
662                                   {14, 5},
663                                   {14, 4},
664                                   {14, 7},
665                                   {14, 9},
666                                   {14, 11},
667                                   {6, 8},
668                                   {6, 14},
669                                   {6, 6},
670                                   {6, 5},
671                                   {6, 4},
672                                   {6, 7},
673                                   {6, 9},
674                                   {6, 11},
675                                   {5, 8},
676                                   {5, 14},
677                                   {5, 6},
678                                   {5, 5},
679                                   {5, 4},
680                                   {5, 7},
681                                   {5, 9},
682                                   {5, 11},
683                                   {4, 8},
684                                   {4, 14},
685                                   {4, 6},
686                                   {4, 5},
687                                   {4, 1},
688                                   {4, 7},
689                                   {4, 9},
690                                   {4, 11},
691                                   {7, 8},
692                                   {7, 14},
693                                   {7, 6},
694                                   {7, 5},
695                                   {7, 4},
696                                   {7, 7},
697                                   {7, 9},
698                                   {7, 11},
699                                   {10, 8},
700                                   {10, 14},
701                                   {10, 6},
702                                   {10, 5},
703                                   {10, 4},
704                                   {10, 7},
705                                   {10, 9},
706                                   {10, 11},
707                                   {12, 8},
708                                   {12, 14},
709                                   {12, 6},
710                                   {12, 5},
711                                   {12, 4},
712                                   {12, 7},
713                                   {12, 9},
714                                   {12, 11}
715 };
716 
717 // These are all double precision inputs with double word outputs (mostly converted to single precision)
718 static vx_fp_test_basic_t vx_fp_tests[] = {
719                                      { &test_xsmadds, "xsmadd", vx_math_tests, 64, VX_FP_SMAS},
720                                      { &test_xsmsubs, "xsmsub", vx_math_tests, 64, VX_FP_SMSS},
721                                      { &test_xsmulsp, "xsmulsp", vx_math_tests, 64, VX_FP_OTHER},
722                                      { &test_xsdivsp, "xsdivsp", vx_math_tests, 64, VX_FP_OTHER},
723                                      { &test_xsnmadds, "xsnmadd", vx_math_tests, 64, VX_FP_SNMAS},
724                                      { &test_xsnmsubs, "xsnmsub", vx_math_tests, 64, VX_FP_SNMSS},
725                                      { NULL, NULL, NULL, 0, 0 }
726 };
727 
728 static vx_fp_test2_t
729 vsx_one_fp_arg_tests[] = {
730                           { &test_xscvdpspn, "xscvdpspn", NULL, 20, SINGLE_TEST_SINGLE_RES, VX_SCALAR_SP_TO_VECTOR_SP, "conv"},
731                           { &test_xscvspdpn, "xscvspdpn", NULL, 20, SINGLE_TEST, VX_DEFAULT, "conv"},
732                           { &test_xsresp,    "xsresp", NULL, 20, DOUBLE_TEST, VX_ESTIMATE, "1/x"},
733                           { &test_xsrsp,     "xsrsp", NULL, 20, DOUBLE_TEST, VX_DEFAULT, "round"},
734                           { &test_xsrsqrtesp, "xsrsqrtesp", NULL, 20, DOUBLE_TEST, VX_ESTIMATE, "1/sqrt"},
735                           { &test_xssqrtsp, "xssqrtsp", NULL, 20, DOUBLE_TEST, VX_DEFAULT, "sqrt"},
736                           { NULL, NULL, NULL, 0, 0, 0, NULL}
737 };
738 
739 // These are all double precision inputs with double word outputs (mostly converted to single precision)
740 static vx_fp_test_basic_t
741 vx_simple_scalar_fp_tests[] = {
742                           { &test_xssubsp, "xssubsp", vx_math_tests, 64, VX_DEFAULT},
743                           { &test_xsaddsp, "xsaddsp", vx_math_tests, 64, VX_DEFAULT},
744                           { NULL, NULL, NULL, 0 , 0}
745 };
746 
747 static ldst_test_t
748 ldst_tests[] = {
749                     { &test_stxsspx, "stxsspx", DOUBLE_TEST_SINGLE_RES, vstg, 0, VSX_STORE },
750                     { &test_stxsiwx, "stxsiwx", SINGLE_TEST_SINGLE_RES, vstg, 4, VSX_STORE },
751                     { &test_lxsiwax, "lxsiwax", SINGLE_TEST, viargs, 0, VSX_LOAD },
752                     { &test_lxsiwzx, "lxsiwzx", SINGLE_TEST, viargs, 1, VSX_LOAD },
753                     { &test_lxsspx,  "lxsspx",  SINGLE_TEST, NULL, 0, VSX_LOAD },
754                     { NULL, NULL, 0, NULL, 0, 0 } };
755 
756 static xs_conv_test_t
757 xs_conv_tests[] = {
758                    { &test_xscvsxdsp, "xscvsxdsp"},
759                    { &test_xscvuxdsp, "xscvuxdsp"},
760                    { NULL, NULL}
761 };
762 
763 static logic_test_t
764 logic_tests[] = {
765                  { &test_xxleqv,  "xxleqv", VSX_EQV },
766                  { &test_xxlorc,  "xxlorc", VSX_ORC },
767                  { &test_xxlnand, "xxlnand", VSX_NAND },
768                  { NULL, NULL}
769 };
770 
check_reciprocal_estimate(Bool is_rsqrte,int idx,int output_vec_idx)771 Bool check_reciprocal_estimate(Bool is_rsqrte, int idx, int output_vec_idx)
772 {
773    /* NOTE:
774     * This function has been verified only with the xsresp and xsrsqrtes instructions.
775     *
776     * Technically, the number of bits of precision for xsresp and xsrsqrtesp is
777     * 14 bits (14 = log2 16384).  However, the VEX emulation of these instructions
778     * does an actual reciprocal calculation versus estimation, so the answer we get back from
779     * valgrind can easily differ from the estimate in the lower bits (within the 14 bits of
780     * precision) and the estimate may still be within expected tolerances.  On top of that,
781     * we can't count on these estimates always being the same across implementations.
782     * For example, with the fre[s] instruction (which should be correct to within one part
783     * in 256 -- i.e., 8 bits of precision) . . . When approximating the value 1.0111_1111_1111,
784     * one implementation could return 1.0111_1111_0000 and another implementation could return
785     * 1.1000_0000_0000.  Both estimates meet the 1/256 accuracy requirement, but share only a
786     * single bit in common.
787     *
788     * The upshot is we can't validate the VEX output for these instructions by comparing against
789     * stored bit patterns.  We must check that the result is within expected tolerances.
790     */
791 
792    /* A mask to be used for validation as a last resort.
793     * Only use 12 bits of precision for reasons discussed above.
794     */
795 #define VSX_RECIP_ESTIMATE_MASK_SP 0xFFFF8000
796 
797 
798    Bool result = False;
799    double src_dp, res_dp;
800    float calc_diff = 0;
801    float real_diff = 0;
802    double recip_divisor;
803    float div_result;
804    float calc_diff_tmp;
805 
806    src_dp = res_dp = 0;
807    Bool src_is_negative = False;
808    Bool res_is_negative = False;
809    unsigned long long * dst_dp = NULL;
810    unsigned long long * src_dp_ull;
811    dst_dp = (unsigned long long *) &vec_out;
812    src_dp = spec_fargs[idx];
813    src_dp_ull = (unsigned long long *) &src_dp;
814    src_is_negative = (*src_dp_ull & 0x8000000000000000ULL) ? True : False;
815    res_is_negative = (dst_dp[output_vec_idx] & 0x8000000000000000ULL) ? True : False;
816    memcpy(&res_dp, &dst_dp[output_vec_idx], 8);
817 
818 
819    // Below are common rules
820    if (isnan(src_dp))
821       return isnan(res_dp);
822    if (fpclassify(src_dp) == FP_ZERO)
823       return isinf(res_dp);
824    if (!src_is_negative && isinf(src_dp))
825       return !res_is_negative && (fpclassify(res_dp) == FP_ZERO);
826    if (is_rsqrte) {
827       if (src_is_negative)
828          return isnan(res_dp);
829    } else {
830       if (src_is_negative && isinf(src_dp))
831          return res_is_negative && (fpclassify(res_dp) == FP_ZERO);
832    }
833 
834    if (is_rsqrte)
835       recip_divisor = sqrt(src_dp);
836    else
837       recip_divisor = src_dp;
838 
839    /* The instructions handled by this function take a double precision
840     * input, perform a reciprocal estimate in double-precision, round
841     * the result to single precision and store into the destination
842     * register in double precision format.  So, to check the result
843     * for accuracy, we use float (single precision) values.
844     */
845    div_result = 1.0/recip_divisor;
846    calc_diff_tmp = recip_divisor * 16384.0;
847    if (isnormal(calc_diff_tmp)) {
848       calc_diff = fabs(1.0/calc_diff_tmp);
849       real_diff = fabs((float)res_dp - div_result);
850       result = ( ( res_dp == div_result )
851                || ( real_diff <= calc_diff ) );
852 #if FRES_DEBUG
853       unsigned int * dv = (unsigned int *)&div_result;
854       unsigned int * rd = (unsigned int *)&real_diff;
855       unsigned int * cd = (unsigned int *)&calc_diff;
856       printf("\n\t {computed div_result: %08x; real_diff:  %08x; calc_diff:  %08x}\n",
857              *dv, *rd, *cd);
858 #endif
859 
860    } else {
861       /* Unable to compute theoretical difference, so we fall back to masking out
862        * un-precise bits.
863        */
864       unsigned int * div_result_sp = (unsigned int *)&div_result;
865       float res_sp = (float)res_dp;
866       unsigned int * dst_sp = (unsigned int *)&res_sp;
867 #if FRES_DEBUG
868       unsigned int * calc_diff_tmp_sp = (unsigned int *)&calc_diff_tmp;
869       printf("Unable to compute theoretical difference, so we fall back to masking\n");
870       printf("\tcalc_diff_tmp: %08x; div_result: %08x; vector result (sp): %08x\n",
871              *calc_diff_tmp_sp, *div_result_sp, *dst_sp);
872 #endif
873       result = (*dst_sp & VSX_RECIP_ESTIMATE_MASK_SP) == (*div_result_sp & VSX_RECIP_ESTIMATE_MASK_SP);
874    }
875    return result;
876 }
877 
test_vx_fp_ops(void)878 static void test_vx_fp_ops(void)
879 {
880 
881    test_func_t func;
882    int k;
883    char * test_name = (char *)malloc(20);
884    k = 0;
885 
886    build_special_fargs_table();
887    while ((func = vx_fp_tests[k].test_func)) {
888       int i, repeat = 0;
889       unsigned long long * frap, * frbp, * dst;
890       vx_fp_test_basic_t test_group = vx_fp_tests[k];
891       vx_fp_test_type test_type = test_group.test_type;
892 
893       switch (test_type) {
894          case VX_FP_SMAS:
895          case VX_FP_SMSS:
896          case VX_FP_SNMAS:
897          case VX_FP_SNMSS:
898             if (test_type == VX_FP_SMAS)
899                strcpy(test_name, "xsmadd");
900             else if (test_type == VX_FP_SMSS)
901                strcpy(test_name, "xsmsub");
902             else if (test_type == VX_FP_SNMAS)
903                strcpy(test_name, "xsnmadd");
904             else
905                strcpy(test_name, "xsnmsub");
906 
907             if (!repeat) {
908                repeat = 1;
909                strcat(test_name, "asp");
910                do_asp = 1;
911             }
912             break;
913          case VX_FP_OTHER:
914             strcpy(test_name, test_group.name);
915             break;
916          default:
917             printf("ERROR:  Invalid VX FP test type %d\n", test_type);
918             exit(1);
919       }
920 
921 again:
922       for (i = 0; i < test_group.num_tests; i++) {
923          unsigned int * inA, * inB, * pv;
924 
925          fp_test_args_t aTest = test_group.targs[i];
926          inA = (unsigned int *)&spec_fargs[aTest.fra_idx];
927          inB = (unsigned int *)&spec_fargs[aTest.frb_idx];
928          frap = (unsigned long long *)&spec_fargs[aTest.fra_idx];
929          frbp = (unsigned long long *)&spec_fargs[aTest.frb_idx];
930          int idx;
931          unsigned long long vsr_XT;
932          pv = (unsigned int *)&vec_out;
933 
934          // Only need to copy one doubleword into each vector's element 0
935          memcpy(&vec_inA, inA, 8);
936          memcpy(&vec_inB, inB, 8);
937 
938          // clear vec_out
939          for (idx = 0; idx < 4; idx++, pv++)
940             *pv = 0;
941 
942          if (test_type != VX_FP_OTHER) {
943             /* Then we need a third src argument, which is stored in element 0 of
944              * VSX[XT] -- i.e., vec_out.  For the xs<ZZZ>mdp cases, VSX[XT] holds
945              * src3 and VSX[XB] holds src2; for the xs<ZZZ>adp cases, VSX[XT] holds
946              * src2 and VSX[XB] holds src3.  The fp_test_args_t that holds the test
947              * data (input args, result) contain only two inputs, so I arbitrarily
948              * use spec_fargs elements 4 and 14 (alternating) for the third source
949              * argument.  We can use the same input data for a given pair of
950              * adp/mdp-type instructions by swapping the src2 and src3 arguments; thus
951              * the expected result should be the same.
952              */
953             int extra_arg_idx;
954             if (i % 2)
955                extra_arg_idx = 4;
956             else
957                extra_arg_idx = 14;
958 
959             if (repeat) {
960                /* We're on the first time through of one of the VX_FP_SMx
961                 * test types, meaning we're testing a xs<ZZZ>adp case, thus
962                 * we have to swap inputs as described above:
963                 *    src2 <= VSX[XT]
964                 *    src3 <= VSX[XB]
965                 */
966                memcpy(&vec_out, inB, 8);  // src2
967                memcpy(&vec_inB, &spec_fargs[extra_arg_idx], 8);  //src3
968                frbp = (unsigned long long *)&spec_fargs[extra_arg_idx];
969             } else {
970                // Don't need to init src2, as it's done before the switch()
971                memcpy(&vec_out, &spec_fargs[extra_arg_idx], 8);  //src3
972             }
973             memcpy(&vsr_XT, &vec_out, 8);
974          }
975 
976          (*func)();
977          dst = (unsigned long long *) &vec_out;
978 
979          if (test_type == VX_FP_OTHER)
980             printf("#%d: %s %016llx %016llx = %016llx\n", i, test_name,
981                    *frap, *frbp, *dst);
982          else
983             printf( "#%d: %s %016llx %016llx %016llx = %016llx\n", i,
984                     test_name, vsr_XT, *frap, *frbp, *dst );
985 
986       }
987       /*
988            {
989                // Debug code.  Keep this block commented out except when debugging.
990                double result, expected;
991                memcpy(&result, dst, 8);
992                memcpy(&expected, &aTest.dp_bin_result, 8);
993                printf( "\tFRA + FRB: %e + %e: Expected = %e; Actual = %e\n",
994                        spec_fargs[aTest.fra_idx], spec_fargs[aTest.frb_idx],
995                        expected, result );
996             }
997        */
998       printf( "\n" );
999 
1000       if (repeat) {
1001          repeat = 0;
1002          strcat(test_name, "UNKNOWN");
1003          switch (test_type) {
1004             case VX_FP_SMAS:
1005             case VX_FP_SMSS:
1006             case VX_FP_SNMAS:
1007             case VX_FP_SNMSS:
1008                if (test_type == VX_FP_SMAS)
1009                   strcpy(test_name, "xsmadd");
1010                else if (test_type == VX_FP_SMSS)
1011                   strcpy(test_name, "xsmsub");
1012                else if (test_type == VX_FP_SNMAS)
1013                   strcpy(test_name, "xsnmadd");
1014                else
1015                   strcpy(test_name, "xsnmsub");
1016 
1017                do_asp = 0;
1018                strcat(test_name, "msp");
1019                break;
1020             default:
1021                break;
1022          }
1023          goto again;
1024       }
1025       k++;
1026    }
1027    printf( "\n" );
1028    free(test_name);
1029 }
1030 
1031 
test_vsx_one_fp_arg(void)1032 static void test_vsx_one_fp_arg(void)
1033 {
1034    test_func_t func;
1035    int k;
1036    k = 0;
1037    build_special_fargs_table();
1038 
1039    while ((func = vsx_one_fp_arg_tests[k].test_func)) {
1040       int idx, i;
1041       unsigned long long *dst_dp;
1042       unsigned int * dst_sp;
1043       vx_fp_test2_t test_group = vsx_one_fp_arg_tests[k];
1044       /* size of source operands */
1045       Bool dp  = ((test_group.precision == DOUBLE_TEST) ||
1046 		  (test_group.precision == DOUBLE_TEST_SINGLE_RES)) ? True : False;
1047       /* size of result */
1048       Bool dp_res = IS_DP_RESULT(test_group.precision);
1049       Bool is_sqrt = (strstr(test_group.name, "sqrt")) ? True : False;
1050       Bool is_scalar = (strstr(test_group.name, "xs")) ? True : False;
1051       Bool sparse_sp = False;
1052       int stride = dp ? 2 : 4;
1053       int loops = is_scalar ? 1 : stride;
1054       stride = is_scalar ? 1: stride;
1055 
1056       /* For conversions of single to double, the 128-bit input register is sparsely populated:
1057        *    |___ SP___|_Unused_|___SP___|__Unused__|   // for vector op
1058        *                     or
1059        *    |___ SP___|_Unused_|_Unused_|__Unused__|   // for scalar op
1060        *
1061        * For the vector op case, we need to adjust stride from '4' to '2', since
1062        * we'll only be loading two values per loop into the input register.
1063        */
1064       if (!dp && !is_scalar && test_group.test_type == VX_CONV_TO_DOUBLE) {
1065          sparse_sp = True;
1066          stride = 2;
1067       }
1068 
1069       for (i = 0; i < test_group.num_tests; i+=stride) {
1070          unsigned int * pv;
1071          void * inB;
1072 
1073          pv = (unsigned int *)&vec_out;
1074          // clear vec_out
1075          for (idx = 0; idx < 4; idx++, pv++)
1076             *pv = 0;
1077 
1078          if (dp) {
1079             int j;
1080             unsigned long long * frB_dp;
1081             for (j = 0; j < loops; j++) {
1082                inB = (void *)&spec_fargs[i + j];
1083                // copy double precision FP into vector element i
1084                memcpy(((void *)&vec_inB) + (j * 8), inB, 8);
1085             }
1086             // execute test insn
1087             (*func)();
1088             if (dp_res)
1089                dst_dp = (unsigned long long *) &vec_out;
1090             else
1091                dst_sp = (unsigned int *) &vec_out;
1092 
1093             printf("#%d: %s ", i/stride, test_group.name);
1094             for (j = 0; j < loops; j++) {
1095                if (j)
1096                   printf("; ");
1097                frB_dp = (unsigned long long *)&spec_fargs[i + j];
1098                printf("%s(%016llx)", test_group.op, *frB_dp);
1099                if (test_group.test_type == VX_ESTIMATE)
1100                {
1101                   Bool res;
1102                   res = check_reciprocal_estimate(is_sqrt, i + j, j);
1103                   printf(" ==> %s)", res ? "PASS" : "FAIL");
1104                } else if (dp_res) {
1105                   printf(" = %016llx", dst_dp[j]);
1106                } else {
1107                   printf(" = %08x", dst_sp[j]);
1108                }
1109             }
1110             printf("\n");
1111          } else {  // single precision test type
1112             int j;
1113             // Clear input vector
1114             pv = (unsigned int *)&vec_inB;
1115             for (idx = 0; idx < 4; idx++, pv++)
1116                *pv = 0;
1117 
1118             if (test_group.test_type == VX_SCALAR_SP_TO_VECTOR_SP) {
1119                /* Take a single-precision value stored in double word element 0
1120                 * of src in double-precision format and convert to single-
1121                 * precision and store in word element 0 of dst.
1122                 */
1123                double input = spec_sp_fargs[i];
1124                memcpy(((void *)&vec_inB), (void *)&input, 8);
1125             } else {
1126                int skip_slot;
1127                if (sparse_sp) {
1128                   skip_slot = 1;
1129                   loops = 2;
1130                } else {
1131                   skip_slot = 0;
1132                }
1133                for (j = 0; j < loops; j++) {
1134                   inB = (void *)&spec_sp_fargs[i + j];
1135                   // copy single precision FP into vector element i
1136 
1137                   if (skip_slot && j > 0)
1138                      memcpy(((void *)&vec_inB) + ((j + j) * 4), inB, 4);
1139                   else
1140                      memcpy(((void *)&vec_inB) + (j * 4), inB, 4);
1141                }
1142             }
1143             // execute test insn
1144             (*func)();
1145             if (dp_res)
1146                dst_dp = (unsigned long long *) &vec_out;
1147             else
1148                dst_sp = (unsigned int *) &vec_out;
1149             // print result
1150             printf("#%d: %s ", i/stride, test_group.name);
1151             for (j = 0; j < loops; j++) {
1152                if (j)
1153                   printf("; ");
1154                printf("%s(%08x)", test_group.op, *((unsigned int *)&spec_sp_fargs[i + j]));
1155                if (dp_res)
1156                      printf(" = %016llx", dst_dp[j]);
1157                else
1158                   printf(" = %08x", dst_sp[j]);
1159             }
1160             printf("\n");
1161          }
1162       }
1163       k++;
1164       printf( "\n" );
1165    }
1166 }
1167 
1168 /* This function currently only supports two double precision input arguments. */
test_vsx_two_fp_arg(void)1169 static void test_vsx_two_fp_arg(void)
1170 {
1171    test_func_t func;
1172    int k = 0;
1173 
1174    build_special_fargs_table();
1175    while ((func = vx_simple_scalar_fp_tests[k].test_func)) {
1176       unsigned long long * frap, * frbp, * dst;
1177       unsigned int * pv;
1178       int idx;
1179       vx_fp_test_basic_t test_group = vx_simple_scalar_fp_tests[k];
1180       pv = (unsigned int *)&vec_out;
1181       // clear vec_out
1182       for (idx = 0; idx < 4; idx++, pv++)
1183          *pv = 0;
1184 
1185       void * inA, * inB;
1186       int i;
1187       for (i = 0; i < test_group.num_tests; i++) {
1188          fp_test_args_t aTest = test_group.targs[i];
1189          inA = (void *)&spec_fargs[aTest.fra_idx];
1190          inB = (void *)&spec_fargs[aTest.frb_idx];
1191          frap = (unsigned long long *)&spec_fargs[aTest.fra_idx];
1192          frbp = (unsigned long long *)&spec_fargs[aTest.frb_idx];
1193          // Only need to copy one doubleword into each vector's element 0
1194          memcpy(&vec_inA, inA, 8);
1195          memcpy(&vec_inB, inB, 8);
1196          (*func)();
1197          dst = (unsigned long long *) &vec_out;
1198          printf("#%d: %s %016llx,%016llx => %016llx\n", i, test_group.name,
1199                 *frap, *frbp, *dst);
1200       }
1201       printf( "\n" );
1202       k++;
1203    }
1204 }
1205 
1206 /* This function handles the following cases:
1207  *   1) Single precision value stored in double-precision
1208  *      floating-point format in doubleword element 0 of src VSX register
1209  *   2) Integer word value stored in word element 1 of src VSX register
1210  */
_do_store_test(ldst_test_t storeTest)1211 static void _do_store_test (ldst_test_t storeTest)
1212 {
1213    test_func_t func;
1214    unsigned int *dst32;
1215    unsigned int i, idx;
1216    unsigned int * pv = (unsigned int *) storeTest.base_addr;
1217 
1218    func = storeTest.test_func;
1219    r14 = (HWord_t) storeTest.base_addr;
1220    r15 = (HWord_t) storeTest.offset;
1221 
1222    if (storeTest.precision == DOUBLE_TEST_SINGLE_RES) {
1223       /* source is single precision stored in double precision format */
1224       /* test some of the pre-defined single precision values */
1225       for (i = 0; i < nb_special_fargs; i+=3) {
1226          // clear out storage destination
1227          for (idx = 0; idx < 4; idx++)
1228             *(pv + idx) = 0;
1229 
1230          printf( "%s:", storeTest.name );
1231          unsigned long long * dp;
1232          double input = spec_sp_fargs[i];
1233          dp = (unsigned long long *)&input;
1234          memcpy(&vec_inA, dp, sizeof(unsigned long long));
1235          printf(" %016llx ==> ", *dp);
1236 
1237          // execute test insn
1238          (*func)();
1239          dst32 = (unsigned int*)(storeTest.base_addr + storeTest.offset);
1240          printf( "%08x\n", *dst32);
1241       }
1242    } else {
1243       // source is an integer word
1244       for (i = 0; i < NUM_VIARGS_INTS; i++) {
1245          // clear out storage destination
1246          for (idx = 0; idx < 4; idx++)
1247             *(pv + idx) = 0;
1248          printf( "%s:", storeTest.name );
1249          unsigned int * pi = (unsigned int *)&vec_inA;
1250          memcpy(pi + 1, &viargs[i], sizeof(unsigned int));
1251          printf(" %08x ==> ", *(pi + 1));
1252 
1253          // execute test insn
1254          (*func)();
1255          dst32 = (unsigned int*)(storeTest.base_addr + storeTest.offset);
1256          printf( "%08x\n", *dst32);
1257       }
1258    }
1259    printf("\n");
1260 }
1261 
_do_load_test(ldst_test_t storeTest)1262 static void _do_load_test(ldst_test_t storeTest)
1263 {
1264    test_func_t func;
1265    unsigned int i;
1266    unsigned long long * dst_dp;
1267 
1268    func = storeTest.test_func;
1269    r15 = (HWord_t) storeTest.offset;
1270 
1271    if (storeTest.base_addr == NULL) {
1272       /* Test lxsspx: source is single precision value, so let's */
1273       /* test some of the pre-defined single precision values. */
1274       for (i = 0; i + storeTest.offset < nb_special_fargs; i+=3) {
1275          unsigned int * sp = (unsigned int *)&spec_sp_fargs[i + storeTest.offset];
1276          printf( "%s:", storeTest.name );
1277          printf(" %08x ==> ", *sp);
1278          r14 = (HWord_t)&spec_sp_fargs[i];
1279 
1280          // execute test insn
1281          (*func)();
1282          dst_dp = (unsigned long long *) &vec_out;
1283          printf("%016llx\n", *dst_dp);
1284       }
1285    } else {
1286       // source is an integer word
1287       for (i = 0; i < NUM_VIARGS_INTS; i++) {
1288          printf( "%s:", storeTest.name );
1289          r14 = (HWord_t)&viargs[i + storeTest.offset];
1290          printf(" %08x ==> ", viargs[i + storeTest.offset]);
1291 
1292          // execute test insn
1293          (*func)();
1294          dst_dp = (unsigned long long *) &vec_out;
1295          printf("%016llx\n", *dst_dp);
1296       }
1297    }
1298    printf("\n");
1299 }
1300 
test_ldst(void)1301 static void test_ldst(void)
1302 {
1303    int k = 0;
1304 
1305    while (ldst_tests[k].test_func) {
1306       if (ldst_tests[k].type == VSX_STORE)
1307          _do_store_test(ldst_tests[k]);
1308       else {
1309          _do_load_test(ldst_tests[k]);
1310       }
1311       k++;
1312       printf("\n");
1313    }
1314 }
1315 
test_xs_conv_ops(void)1316 static void test_xs_conv_ops(void)
1317 {
1318 
1319    test_func_t func;
1320    int k = 0;
1321 
1322    build_special_fargs_table();
1323    while ((func = xs_conv_tests[k].test_func)) {
1324       int i;
1325       unsigned long long * dst;
1326       xs_conv_test_t test_group = xs_conv_tests[k];
1327       for (i = 0; i < NUM_VIARGS_INTS; i++) {
1328          unsigned int * inB, * pv;
1329          int idx;
1330          inB = (unsigned int *)&viargs[i];
1331          memcpy(&vec_inB, inB, 4);
1332          pv = (unsigned int *)&vec_out;
1333          // clear vec_out
1334          for (idx = 0; idx < 4; idx++, pv++)
1335             *pv = 0;
1336          (*func)();
1337          dst = (unsigned long long *) &vec_out;
1338          printf("#%d: %s %08x => %016llx\n", i, test_group.name, viargs[i], *dst);
1339       }
1340       k++;
1341       printf("\n");
1342    }
1343    printf( "\n" );
1344 }
1345 
1346 
test_vsx_logic(void)1347 static void test_vsx_logic(void)
1348 {
1349    logic_test_t aTest;
1350    test_func_t func;
1351    int k;
1352    k = 0;
1353 
1354    while ((func = logic_tests[k].test_func)) {
1355 
1356       unsigned int * pv;
1357       unsigned int * inA, * inB, * dst;
1358       int idx, i;
1359       aTest = logic_tests[k];
1360       for (i = 0; i <= NUM_VIARGS_VECS; i+=4) {
1361          pv = (unsigned int *)&vec_out;
1362          inA = &viargs[i];
1363          inB = &viargs[i];
1364          memcpy(&vec_inA, inA, sizeof(vector unsigned int));
1365          memcpy(&vec_inB, inB, sizeof(vector unsigned int));
1366          // clear vec_out
1367          for (idx = 0; idx < 4; idx++, pv++)
1368             *pv = 0;
1369 
1370          // execute test insn
1371          (*func)();
1372          dst = (unsigned int*) &vec_out;
1373 
1374          printf( "#%d: %10s ", k, aTest.name);
1375          printf( " (%08x %08x %08x %08x, ", inA[0], inA[1], inA[2], inA[3]);
1376          printf( " %08x %08x %08x %08x)", inB[0], inB[1], inB[2], inB[3]);
1377          printf(" ==> %08x %08x %08x %08x\n", dst[0], dst[1], dst[2], dst[3]);
1378       }
1379       k++;
1380    }
1381    printf( "\n" );
1382 }
1383 
1384 
1385 //----------------------------------------------------------
1386 
1387 static test_table_t all_tests[] = {
1388                                      { &test_vx_fp_ops,
1389                                        "Test VSX floating point instructions"},
1390                                      { &test_vsx_one_fp_arg,
1391                                        "Test VSX vector and scalar single argument instructions"} ,
1392                                      { &test_vsx_logic,
1393                                        "Test VSX logic instructions" },
1394                                      { &test_xs_conv_ops,
1395                                        "Test VSX scalar integer conversion instructions" },
1396                                      { &test_ldst,
1397                                        "Test VSX load/store dp to sp instructions" },
1398                                      { &test_vsx_two_fp_arg,
1399                                        "Test VSX vector and scalar two argument instructions"} ,
1400                                      { NULL, NULL }
1401 };
1402 
1403 #endif
1404 
main(int argc,char * argv[])1405 int main(int argc, char *argv[])
1406 {
1407 
1408 #ifdef HAS_ISA_2_07
1409    test_table_t aTest;
1410    test_func_t func;
1411    int i = 0;
1412 
1413    while ((func = all_tests[i].test_category)) {
1414       aTest = all_tests[i];
1415       printf( "%s\n", aTest.name );
1416       (*func)();
1417       i++;
1418    }
1419 #else
1420    printf("NO ISA 2.07 SUPPORT\n");
1421 #endif
1422    return 0;
1423 }
1424