1 #include <config.h>
2 #include <stdio.h>
3 #include <stdlib.h>
4 #include "tests/asm.h"
5 #include "tests/malloc.h"
6 #include <string.h>
7
8 const unsigned int vec0[4]
9 = { 0x12345678, 0x11223344, 0x55667788, 0x87654321 };
10
11 const unsigned int vec1[4]
12 = { 0xABCDEF01, 0xAABBCCDD, 0xEEFF0011, 0x10FEDCBA };
13
14 const unsigned int vecZ[4]
15 = { 0, 0, 0, 0 };
16
17 __attribute__((noinline))
do_fxsave(void * p,int rexw)18 void do_fxsave ( void* p, int rexw ) {
19 if (rexw) {
20 #ifdef HAVE_AS_AMD64_FXSAVE64
21 asm __volatile__("fxsave64 (%0)" : : "r" (p) : "memory" );
22 #else
23 asm __volatile__("rex64/fxsave (%0)" : : "r" (p) : "memory" );
24 #endif
25 } else {
26 asm __volatile__("fxsave (%0)" : : "r" (p) : "memory" );
27 }
28 }
29
30 __attribute__((noinline))
do_fxrstor(void * p,int rexw)31 void do_fxrstor ( void* p, int rexw ) {
32 if (rexw) {
33 #ifdef HAVE_AS_AMD64_FXSAVE64
34 asm __volatile__("fxrstor64 (%0)" : : "r" (p) : "memory" );
35 #else
36 asm __volatile__("rex64/fxrstor (%0)" : : "r" (p) : "memory" );
37 #endif
38 } else {
39 asm __volatile__("fxrstor (%0)" : : "r" (p) : "memory" );
40 }
41 }
42
do_zeroise(void)43 void do_zeroise ( void )
44 {
45 asm __volatile__("finit");
46 asm __volatile__(
47 "fldz\n\t"
48 "fldz\n\t"
49 "fldz\n\t"
50 "fldz\n\t"
51 "fldz\n\t"
52 "fldz\n\t"
53 "fldz\n\t"
54 "fldz\n\t"
55 "finit\n");
56 #ifndef VGP_amd64_darwin
57 asm __volatile__("movups " VG_SYM(vecZ) ", %xmm0");
58 asm __volatile__("movups " VG_SYM(vecZ) ", %xmm1");
59 asm __volatile__("movups " VG_SYM(vecZ) ", %xmm2");
60 asm __volatile__("movups " VG_SYM(vecZ) ", %xmm3");
61 asm __volatile__("movups " VG_SYM(vecZ) ", %xmm4");
62 asm __volatile__("movups " VG_SYM(vecZ) ", %xmm5");
63 asm __volatile__("movups " VG_SYM(vecZ) ", %xmm6");
64 asm __volatile__("movups " VG_SYM(vecZ) ", %xmm7");
65 asm __volatile__("movups " VG_SYM(vecZ) ", %xmm8");
66 asm __volatile__("movups " VG_SYM(vecZ) ", %xmm9");
67 asm __volatile__("movups " VG_SYM(vecZ) ", %xmm10");
68 asm __volatile__("movups " VG_SYM(vecZ) ", %xmm11");
69 asm __volatile__("movups " VG_SYM(vecZ) ", %xmm12");
70 asm __volatile__("movups " VG_SYM(vecZ) ", %xmm13");
71 asm __volatile__("movups " VG_SYM(vecZ) ", %xmm14");
72 asm __volatile__("movups " VG_SYM(vecZ) ", %xmm15");
73 #else
74 asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm0");
75 asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm1");
76 asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm2");
77 asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm3");
78 asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm4");
79 asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm5");
80 asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm6");
81 asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm7");
82 asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm8");
83 asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm9");
84 asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm10");
85 asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm11");
86 asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm12");
87 asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm13");
88 asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm14");
89 asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm15");
90 #endif
91 asm __volatile__(
92 "pushq $0\n\t"
93 "ldmxcsr 0(%rsp)\n\t"
94 "addq $8,%rsp\n");
95 }
96
97 /* set up the FP and SSE state, and then dump it. */
do_setup_then_fxsave(void * p,int rexw)98 void do_setup_then_fxsave ( void* p, int rexw )
99 {
100 asm __volatile__("finit");
101 asm __volatile__("fldpi");
102 asm __volatile__("fld1");
103 asm __volatile__("fldln2");
104 asm __volatile__("fldlg2");
105 asm __volatile__("fld %st(3)");
106 asm __volatile__("fld %st(3)");
107 asm __volatile__("fld1");
108 asm __volatile__("movups (%0), %%xmm0" : : "r"(&vec0[0]) : "xmm0" );
109 asm __volatile__("movups (%0), %%xmm1" : : "r"(&vec1[0]) : "xmm1" );
110 asm __volatile__("xorps %xmm2, %xmm2");
111 asm __volatile__("movaps %xmm0, %xmm3");
112 asm __volatile__("movaps %xmm1, %xmm4");
113 asm __volatile__("movaps %xmm2, %xmm5");
114 asm __volatile__("movaps %xmm0, %xmm6");
115 asm __volatile__("movaps %xmm1, %xmm7");
116 asm __volatile__("movaps %xmm1, %xmm8");
117 asm __volatile__("movaps %xmm2, %xmm9");
118 asm __volatile__("movaps %xmm0, %xmm10");
119 asm __volatile__("movaps %xmm1, %xmm11");
120 asm __volatile__("movaps %xmm1, %xmm12");
121 asm __volatile__("movaps %xmm2, %xmm13");
122 asm __volatile__("movaps %xmm0, %xmm14");
123 asm __volatile__("movaps %xmm1, %xmm15");
124 do_fxsave(p, rexw);
125 }
126
isFPLsbs(int i)127 int isFPLsbs ( int i )
128 {
129 int q;
130 q = 32; if (i == q || i == q+1) return 1;
131 q = 48; if (i == q || i == q+1) return 1;
132 q = 64; if (i == q || i == q+1) return 1;
133 q = 80; if (i == q || i == q+1) return 1;
134 q = 96; if (i == q || i == q+1) return 1;
135 q = 112; if (i == q || i == q+1) return 1;
136 q = 128; if (i == q || i == q+1) return 1;
137 q = 144; if (i == q || i == q+1) return 1;
138 return 0;
139 }
140
show(unsigned char * buf,int xx)141 void show ( unsigned char* buf, int xx )
142 {
143 int i;
144 for (i = 0; i < 512; i++) {
145 if ((i % 16) == 0)
146 printf("%3d ", i);
147 if (xx && isFPLsbs(i))
148 printf("xx ");
149 else
150 printf("%02x ", buf[i]);
151 if (i > 0 && ((i % 16) == 15))
152 printf("\n");
153 }
154 }
155
156
main(int argc,char ** argv)157 int main ( int argc, char** argv )
158 {
159 unsigned char* buf1 = memalign16(512);
160 unsigned char* buf2 = memalign16(512);
161 unsigned char* buf3 = memalign16(512);
162 int xx = argc > 1;
163 printf("Re-run with any arg to suppress least-significant\n"
164 " 16 bits of FP numbers\n");
165
166 printf("\n-------- FXSAVE non-64 (REX.W == 0) --------\n");
167
168 memset(buf1, 0x55, 512);
169 memset(buf2, 0x55, 512);
170 memset(buf3, 0x55, 512);
171
172 /* Load up x87/xmm state and dump it. */
173 do_setup_then_fxsave(buf1, 0);
174 printf("\nBEFORE\n");
175 show(buf1, xx);
176
177 /* Zeroise x87/xmm state and dump it, to show that the
178 regs have been cleared out. */
179 do_zeroise();
180 do_fxsave(buf2, 0);
181 printf("\nZEROED\n");
182 show(buf2, xx);
183
184 /* Reload x87/xmm state from buf1 and dump it in buf3. */
185 do_fxrstor(buf1, 0);
186 do_fxsave(buf3, 0);
187 printf("\nRESTORED\n");
188 show(buf3, xx);
189
190 printf("\n-------- FXSAVE 64 (REX.W == 1) --------\n\n");
191
192 memset(buf1, 0x55, 512);
193 memset(buf2, 0x55, 512);
194 memset(buf3, 0x55, 512);
195
196 /* Load up x87/xmm state and dump it. */
197 do_setup_then_fxsave(buf1, 1);
198 printf("\nBEFORE\n");
199 show(buf1, xx);
200
201 /* Zeroise x87/xmm state and dump it, to show that the
202 regs have been cleared out. */
203 do_zeroise();
204 do_fxsave(buf2, 1);
205 printf("\nZEROED\n");
206 show(buf2, xx);
207
208 /* Reload x87/xmm state from buf1 and dump it in buf3. */
209 do_fxrstor(buf1, 1);
210 do_fxsave(buf3, 1);
211 printf("\nRESTORED\n");
212 show(buf3, xx);
213
214
215 free(buf1); free(buf2); free(buf3);
216
217 return 0;
218 }
219