• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 #include <config.h>
2 #include <stdio.h>
3 #include <stdlib.h>
4 #include "tests/asm.h"
5 #include "tests/malloc.h"
6 #include <string.h>
7 
8 const unsigned int vec0[4]
9    = { 0x12345678, 0x11223344, 0x55667788, 0x87654321 };
10 
11 const unsigned int vec1[4]
12    = { 0xABCDEF01, 0xAABBCCDD, 0xEEFF0011, 0x10FEDCBA };
13 
14 const unsigned int vecZ[4]
15    = { 0, 0, 0, 0 };
16 
17 __attribute__((noinline))
do_fxsave(void * p,int rexw)18 void do_fxsave ( void* p, int rexw ) {
19    if (rexw) {
20 #ifdef HAVE_AS_AMD64_FXSAVE64
21       asm __volatile__("fxsave64 (%0)" : : "r" (p) : "memory" );
22 #else
23       asm __volatile__("rex64/fxsave (%0)" : : "r" (p) : "memory" );
24 #endif
25    } else {
26       asm __volatile__("fxsave (%0)" : : "r" (p) : "memory" );
27    }
28 }
29 
30 __attribute__((noinline))
do_fxrstor(void * p,int rexw)31 void do_fxrstor ( void* p, int rexw ) {
32    if (rexw) {
33 #ifdef HAVE_AS_AMD64_FXSAVE64
34       asm __volatile__("fxrstor64 (%0)" : : "r" (p) : "memory" );
35 #else
36       asm __volatile__("rex64/fxrstor (%0)" : : "r" (p) : "memory" );
37 #endif
38    } else {
39       asm __volatile__("fxrstor (%0)" : : "r" (p) : "memory" );
40    }
41 }
42 
do_zeroise(void)43 void do_zeroise ( void )
44 {
45    asm __volatile__("finit");
46    asm __volatile__(
47     "fldz\n\t"
48     "fldz\n\t"
49     "fldz\n\t"
50     "fldz\n\t"
51     "fldz\n\t"
52     "fldz\n\t"
53     "fldz\n\t"
54     "fldz\n\t"
55     "finit\n");
56 #ifndef VGP_amd64_darwin
57    asm __volatile__("movups " VG_SYM(vecZ) ", %xmm0");
58    asm __volatile__("movups " VG_SYM(vecZ) ", %xmm1");
59    asm __volatile__("movups " VG_SYM(vecZ) ", %xmm2");
60    asm __volatile__("movups " VG_SYM(vecZ) ", %xmm3");
61    asm __volatile__("movups " VG_SYM(vecZ) ", %xmm4");
62    asm __volatile__("movups " VG_SYM(vecZ) ", %xmm5");
63    asm __volatile__("movups " VG_SYM(vecZ) ", %xmm6");
64    asm __volatile__("movups " VG_SYM(vecZ) ", %xmm7");
65    asm __volatile__("movups " VG_SYM(vecZ) ", %xmm8");
66    asm __volatile__("movups " VG_SYM(vecZ) ", %xmm9");
67    asm __volatile__("movups " VG_SYM(vecZ) ", %xmm10");
68    asm __volatile__("movups " VG_SYM(vecZ) ", %xmm11");
69    asm __volatile__("movups " VG_SYM(vecZ) ", %xmm12");
70    asm __volatile__("movups " VG_SYM(vecZ) ", %xmm13");
71    asm __volatile__("movups " VG_SYM(vecZ) ", %xmm14");
72    asm __volatile__("movups " VG_SYM(vecZ) ", %xmm15");
73 #else
74    asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm0");
75    asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm1");
76    asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm2");
77    asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm3");
78    asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm4");
79    asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm5");
80    asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm6");
81    asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm7");
82    asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm8");
83    asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm9");
84    asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm10");
85    asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm11");
86    asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm12");
87    asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm13");
88    asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm14");
89    asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm15");
90 #endif
91    asm __volatile__(
92       "pushq $0\n\t"
93       "ldmxcsr 0(%rsp)\n\t"
94       "addq $8,%rsp\n");
95 }
96 
97 /* set up the FP and SSE state, and then dump it. */
do_setup_then_fxsave(void * p,int rexw)98 void do_setup_then_fxsave ( void* p, int rexw )
99 {
100    asm __volatile__("finit");
101    asm __volatile__("fldpi");
102    asm __volatile__("fld1");
103    asm __volatile__("fldln2");
104    asm __volatile__("fldlg2");
105    asm __volatile__("fld %st(3)");
106    asm __volatile__("fld %st(3)");
107    asm __volatile__("fld1");
108    asm __volatile__("movups (%0), %%xmm0" : : "r"(&vec0[0]) : "xmm0" );
109    asm __volatile__("movups (%0), %%xmm1" : : "r"(&vec1[0]) : "xmm1" );
110    asm __volatile__("xorps  %xmm2, %xmm2");
111    asm __volatile__("movaps %xmm0, %xmm3");
112    asm __volatile__("movaps %xmm1, %xmm4");
113    asm __volatile__("movaps %xmm2, %xmm5");
114    asm __volatile__("movaps %xmm0, %xmm6");
115    asm __volatile__("movaps %xmm1, %xmm7");
116    asm __volatile__("movaps %xmm1, %xmm8");
117    asm __volatile__("movaps %xmm2, %xmm9");
118    asm __volatile__("movaps %xmm0, %xmm10");
119    asm __volatile__("movaps %xmm1, %xmm11");
120    asm __volatile__("movaps %xmm1, %xmm12");
121    asm __volatile__("movaps %xmm2, %xmm13");
122    asm __volatile__("movaps %xmm0, %xmm14");
123    asm __volatile__("movaps %xmm1, %xmm15");
124    do_fxsave(p, rexw);
125 }
126 
isFPLsbs(int i)127 int isFPLsbs ( int i )
128 {
129    int q;
130    q = 32; if (i == q || i == q+1) return 1;
131    q = 48; if (i == q || i == q+1) return 1;
132    q = 64; if (i == q || i == q+1) return 1;
133    q = 80; if (i == q || i == q+1) return 1;
134    q = 96; if (i == q || i == q+1) return 1;
135    q = 112; if (i == q || i == q+1) return 1;
136    q = 128; if (i == q || i == q+1) return 1;
137    q = 144; if (i == q || i == q+1) return 1;
138    return 0;
139 }
140 
show(unsigned char * buf,int xx)141 void show ( unsigned char* buf, int xx )
142 {
143    int i;
144    for (i = 0; i < 512; i++) {
145       if ((i % 16) == 0)
146          printf("%3d   ", i);
147       if (xx && isFPLsbs(i))
148 	 printf("xx ");
149       else
150          printf("%02x ", buf[i]);
151       if (i > 0 && ((i % 16) == 15))
152           printf("\n");
153    }
154 }
155 
156 
main(int argc,char ** argv)157 int main ( int argc, char** argv )
158 {
159    unsigned char* buf1 = memalign16(512);
160    unsigned char* buf2 = memalign16(512);
161    unsigned char* buf3 = memalign16(512);
162    int xx = argc > 1;
163    printf("Re-run with any arg to suppress least-significant\n"
164           "   16 bits of FP numbers\n");
165 
166    printf("\n-------- FXSAVE non-64 (REX.W == 0) --------\n");
167 
168    memset(buf1, 0x55, 512);
169    memset(buf2, 0x55, 512);
170    memset(buf3, 0x55, 512);
171 
172    /* Load up x87/xmm state and dump it. */
173    do_setup_then_fxsave(buf1, 0);
174    printf("\nBEFORE\n");
175    show(buf1, xx);
176 
177    /* Zeroise x87/xmm state and dump it, to show that the
178       regs have been cleared out. */
179    do_zeroise();
180    do_fxsave(buf2, 0);
181    printf("\nZEROED\n");
182    show(buf2, xx);
183 
184    /* Reload x87/xmm state from buf1 and dump it in buf3. */
185    do_fxrstor(buf1, 0);
186    do_fxsave(buf3, 0);
187    printf("\nRESTORED\n");
188    show(buf3, xx);
189 
190    printf("\n-------- FXSAVE 64 (REX.W == 1) --------\n\n");
191 
192    memset(buf1, 0x55, 512);
193    memset(buf2, 0x55, 512);
194    memset(buf3, 0x55, 512);
195 
196    /* Load up x87/xmm state and dump it. */
197    do_setup_then_fxsave(buf1, 1);
198    printf("\nBEFORE\n");
199    show(buf1, xx);
200 
201    /* Zeroise x87/xmm state and dump it, to show that the
202       regs have been cleared out. */
203    do_zeroise();
204    do_fxsave(buf2, 1);
205    printf("\nZEROED\n");
206    show(buf2, xx);
207 
208    /* Reload x87/xmm state from buf1 and dump it in buf3. */
209    do_fxrstor(buf1, 1);
210    do_fxsave(buf3, 1);
211    printf("\nRESTORED\n");
212    show(buf3, xx);
213 
214 
215    free(buf1); free(buf2); free(buf3);
216 
217    return 0;
218 }
219