1
2 #include <stdio.h>
3 #include <stdlib.h>
4 #include <assert.h>
5 #include "tests/asm.h"
6 #include "tests/malloc.h"
7 #include <string.h>
8
9 #define XSAVE_AREA_SIZE 832
10
11 typedef unsigned char UChar;
12 typedef unsigned int UInt;
13 typedef unsigned long long int ULong;
14
15 typedef unsigned long int UWord;
16
17 typedef unsigned char Bool;
18 #define True ((Bool)1)
19 #define False ((Bool)0)
20
21 const unsigned int vec0[8]
22 = { 0x12345678, 0x11223344, 0x55667788, 0x87654321,
23 0x15263748, 0x91929394, 0x19293949, 0x48372615 };
24
25 const unsigned int vec1[8]
26 = { 0xABCDEF01, 0xAABBCCDD, 0xEEFF0011, 0x10FEDCBA,
27 0xBADCFE10, 0xFFEE9988, 0x11667722, 0x01EFCDAB };
28
29 const unsigned int vecZ[8]
30 = { 0, 0, 0, 0, 0, 0, 0, 0 };
31
32 /* A version of memset that doesn't use XMM or YMM registers. */
33 static __attribute__((noinline))
my_memset(void * s,int c,size_t n)34 void* my_memset(void* s, int c, size_t n)
35 {
36 size_t i;
37 for (i = 0; i < n; i++) {
38 ((unsigned char*)s)[i] = (unsigned char)(unsigned int)c;
39 /* Defeat any attempt at autovectorisation */
40 __asm__ __volatile__("" ::: "cc","memory");
41 }
42 return s;
43 }
44
45 /* Ditto for memcpy */
46 static __attribute__((noinline))
my_memcpy(void * dest,const void * src,size_t n)47 void* my_memcpy(void *dest, const void *src, size_t n)
48 {
49 size_t i;
50 for (i = 0; i < n; i++) {
51 ((unsigned char*)dest)[i] = ((unsigned char*)src)[i];
52 __asm__ __volatile__("" ::: "cc","memory");
53 }
54 return dest;
55 }
56
memalign_zeroed64(size_t size)57 static void* memalign_zeroed64(size_t size)
58 {
59 char* p = memalign64(size);
60 if (p && size > 0) {
61 my_memset(p, 0, size);
62 }
63 return p;
64 }
65
66 __attribute__((noinline))
do_xsave(void * p,UInt rfbm)67 static void do_xsave ( void* p, UInt rfbm )
68 {
69 assert(rfbm <= 7);
70 __asm__ __volatile__(
71 "movq %0, %%rax; xorq %%rdx, %%rdx; xsave (%1)"
72 : /*OUT*/ : /*IN*/ "r"((ULong)rfbm), "r"(p)
73 : /*TRASH*/ "memory", "rax", "rdx"
74 );
75 }
76
77 __attribute__((noinline))
do_xrstor(void * p,UInt rfbm)78 static void do_xrstor ( void* p, UInt rfbm )
79 {
80 assert(rfbm <= 7);
81 __asm__ __volatile__(
82 "movq %0, %%rax; xorq %%rdx, %%rdx; xrstor (%1)"
83 : /*OUT*/ : /*IN*/ "r"((ULong)rfbm), "r"(p)
84 : /*TRASH*/ "rax", "rdx" /* FIXME plus all X87,SSE,AVX regs */
85 );
86 }
87
88 /* set up the FP, SSE and AVX state, and then dump it. */
do_setup_then_xsave(void * p,UInt rfbm)89 static void do_setup_then_xsave ( void* p, UInt rfbm )
90 {
91 __asm__ __volatile__("finit");
92 __asm__ __volatile__("fldpi");
93 __asm__ __volatile__("fld1");
94 __asm__ __volatile__("fldln2");
95 __asm__ __volatile__("fldlg2");
96 __asm__ __volatile__("fld %st(3)");
97 __asm__ __volatile__("fld %st(3)");
98 __asm__ __volatile__("fld1");
99 __asm__ __volatile__("vmovups (%0), %%ymm0" : : "r"(&vec0[0]) : "xmm0" );
100 __asm__ __volatile__("vmovups (%0), %%ymm1" : : "r"(&vec1[0]) : "xmm1" );
101 __asm__ __volatile__("vxorps %ymm2, %ymm2, %ymm2");
102 __asm__ __volatile__("vmovaps %ymm0, %ymm3");
103 __asm__ __volatile__("vmovaps %ymm1, %ymm4");
104 __asm__ __volatile__("vmovaps %ymm2, %ymm5");
105 __asm__ __volatile__("vmovaps %ymm0, %ymm6");
106 __asm__ __volatile__("vmovaps %ymm1, %ymm7");
107 __asm__ __volatile__("vmovaps %ymm1, %ymm8");
108 __asm__ __volatile__("vmovaps %ymm2, %ymm9");
109 __asm__ __volatile__("vmovaps %ymm0, %ymm10");
110 __asm__ __volatile__("vmovaps %ymm1, %ymm11");
111 __asm__ __volatile__("vmovaps %ymm1, %ymm12");
112 __asm__ __volatile__("vmovaps %ymm2, %ymm13");
113 __asm__ __volatile__("vmovaps %ymm0, %ymm14");
114 __asm__ __volatile__("vmovaps %ymm1, %ymm15");
115 do_xsave(p, rfbm);
116 }
117
isFPLsbs(int i)118 static int isFPLsbs ( int i )
119 {
120 int q;
121 q = 32; if (i == q || i == q+1) return 1;
122 q = 48; if (i == q || i == q+1) return 1;
123 q = 64; if (i == q || i == q+1) return 1;
124 q = 80; if (i == q || i == q+1) return 1;
125 q = 96; if (i == q || i == q+1) return 1;
126 q = 112; if (i == q || i == q+1) return 1;
127 q = 128; if (i == q || i == q+1) return 1;
128 q = 144; if (i == q || i == q+1) return 1;
129 return 0;
130 }
131
show(unsigned char * buf,Bool hideBits64to79)132 static void show ( unsigned char* buf, Bool hideBits64to79 )
133 {
134 int i;
135 for (i = 0; i < XSAVE_AREA_SIZE; i++) {
136 if ((i % 16) == 0)
137 fprintf(stderr, "%3d ", i);
138 if (hideBits64to79 && isFPLsbs(i))
139 fprintf(stderr, "xx ");
140 else
141 fprintf(stderr, "%02x ", buf[i]);
142 if (i > 0 && ((i % 16) == 15))
143 fprintf(stderr, "\n");
144 }
145 }
146
cpuid(UInt * eax,UInt * ebx,UInt * ecx,UInt * edx,UInt index,UInt ecx_in)147 static void cpuid ( UInt* eax, UInt* ebx, UInt* ecx, UInt* edx,
148 UInt index, UInt ecx_in )
149 {
150 UInt a,b,c,d;
151 asm volatile ("cpuid"
152 : "=a" (a), "=b" (b), "=c" (c), "=d" (d) \
153 : "0" (index), "2"(ecx_in) );
154 *eax = a; *ebx = b; *ecx = c; *edx = d;
155 //fprintf(stderr, "%08x %08x -> %08x %08x %08x %08x\n",
156 // index,ecx_in, a,b,c,d );
157 }
158
xgetbv(UInt * eax,UInt * edx,UInt ecx_in)159 static void xgetbv ( UInt* eax, UInt* edx, UInt ecx_in )
160 {
161 UInt a,d;
162 asm volatile ("xgetbv"
163 : "=a" (a), "=d" (d) \
164 : "c"(ecx_in) );
165 *eax = a; *edx = d;
166 }
167
check_for_xsave(void)168 static void check_for_xsave ( void )
169 {
170 UInt eax, ebx, ecx, edx;
171 Bool ok = True;
172
173 eax = ebx = ecx = edx = 0;
174 cpuid(&eax, &ebx, &ecx, &edx, 1,0);
175 //fprintf(stderr, "cpuid(1).ecx[26=xsave] = %u\n", (ecx >> 26) & 1);
176 ok = ok && (((ecx >> 26) & 1) == 1);
177
178 eax = ebx = ecx = edx = 0;
179 cpuid(&eax, &ebx, &ecx, &edx, 1,0);
180 //fprintf(stderr, "cpuid(1).ecx[27=osxsave] = %u\n", (ecx >> 27) & 1);
181 ok = ok && (((ecx >> 27) & 1) == 1);
182
183 eax = ebx = ecx = edx = 0;
184 xgetbv(&eax, &edx, 0);
185 //fprintf(stderr, "xgetbv(0) = %u:%u\n", edx, eax);
186 ok = ok && (edx == 0) && (eax == 7);
187
188 if (ok) return;
189
190 fprintf(stderr,
191 "This program must be run on a CPU that supports AVX and XSAVE.\n");
192 exit(1);
193 }
194
195
test_xsave(Bool hideBits64to79)196 void test_xsave ( Bool hideBits64to79 )
197 {
198 /* Testing XSAVE:
199
200 For RBFM in 0 .. 7 (that is, all combinations): set the x87, SSE
201 and AVX registers with some values, do XSAVE to dump it, and
202 print the resulting buffer. */
203
204 UInt rfbm;
205 for (rfbm = 0; rfbm <= 7; rfbm++) {
206 UChar* saved_img = memalign_zeroed64(XSAVE_AREA_SIZE);
207
208 my_memset(saved_img, 0xAA, XSAVE_AREA_SIZE);
209 saved_img[512] = 0;
210 do_setup_then_xsave(saved_img, rfbm);
211
212 fprintf(stderr,
213 "------------------ XSAVE, rfbm = %u ------------------\n", rfbm);
214 show(saved_img, hideBits64to79);
215 fprintf(stderr, "\n");
216
217 free(saved_img);
218 }
219 }
220
221
test_xrstor(Bool hideBits64to79)222 void test_xrstor ( Bool hideBits64to79 )
223 {
224 /* Testing XRSTOR is more complex than testing XSAVE, because the
225 loaded value(s) depend not only on what bits are requested (by
226 RBFM) but also on what bits are actually present in the image
227 (defined by XSTATE_BV). So we have to test all 64 (8 x 8)
228 combinations.
229
230 The approach is to fill a memory buffer with data, do XRSTOR
231 from the buffer, them dump all components with XSAVE in a new
232 buffer, and print the result. This is complicated by the fact
233 that we need to be able to see which parts of the state (in
234 registers) are neither overwritten nor zeroed by the restore.
235 Hence the registers must be pre-filled with values which are
236 neither zero nor the data to be loaded. We choose to use 0x55
237 where possible. */
238
239 UChar* fives = memalign_zeroed64(XSAVE_AREA_SIZE);
240 my_memset(fives, 0x55, XSAVE_AREA_SIZE);
241 /* Set MXCSR so that the insn doesn't fault */
242 fives[24] = 0x80;
243 fives[25] = 0x1f;
244 fives[26] = 0;
245 fives[27] = 0;
246 /* Ditto for the XSAVE header area. Also set XSTATE_BV. */
247 fives[512] = 7;
248 UInt i;
249 for (i = 1; i <= 23; i++) fives[512+i] = 0;
250 /* Fill the x87 register values with something that VEX's
251 80-vs-64-bit kludging won't mess up -- an 80 bit number which is
252 representable also as 64 bit: 123456789.0123 */
253 for (i = 0; i <= 7; i++) {
254 UChar* p = &fives[32 + 16 * i];
255 p[0]=0x00; p[1]=0xf8; p[2]=0xc2; p[3]=0x64; p[4]=0xa0;
256 p[5]=0xa2; p[6]=0x79; p[7]=0xeb; p[8]=0x19; p[9]=0x40;
257 }
258 /* And mark the tags for all 8 dumped regs as "valid". */
259 fives[4/*FTW*/] = 0xFF;
260
261 /* (1) (see comment in loop below) */
262 UChar* standard_test_data = memalign_zeroed64(XSAVE_AREA_SIZE);
263 do_setup_then_xsave(standard_test_data, 7);
264
265 UInt xstate_bv, rfbm;
266 for (xstate_bv = 0; xstate_bv <= 7; xstate_bv++) {
267 for (rfbm = 0; rfbm <= 7; rfbm++) {
268 //{ xstate_bv = 7;
269 // { rfbm = 6;
270 /* 1. Copy the "standard test data" into registers, and dump
271 it with XSAVE. This gives us an image we can try
272 restoring from.
273
274 2. Set the register state to all-0x55s (as far as is
275 possible), so we can see which parts get overwritten
276 and which parts get zeroed on the test restore.
277
278 3. Do the restore from the image prepared in (1).
279
280 4. Dump the state with XSAVE and print it.
281 */
282
283 /* (3a). We can't use |standard_test_data| directly, since we
284 need to put in the required |xstate_bv| value. So make a
285 copy and modify that instead. */
286 UChar* img_to_restore_from = memalign_zeroed64(XSAVE_AREA_SIZE);
287 my_memcpy(img_to_restore_from, standard_test_data, XSAVE_AREA_SIZE);
288 img_to_restore_from[512] = xstate_bv;
289
290 /* (4a) */
291 UChar* saved_img = memalign_zeroed64(XSAVE_AREA_SIZE);
292 my_memset(saved_img, 0xAA, XSAVE_AREA_SIZE);
293 saved_img[512] = 0;
294
295 /* (2) */
296 do_xrstor(fives, 7);
297
298 // X87, SSE, AVX state LIVE
299
300 /* (3b) */
301 /* and this is what we're actually trying to test */
302 do_xrstor(img_to_restore_from, rfbm);
303
304 // X87, SSE, AVX state LIVE
305
306 /* (4b) */
307 do_xsave(saved_img, 7);
308
309 fprintf(stderr,
310 "---------- XRSTOR, xstate_bv = %u, rfbm = %u ---------\n",
311 xstate_bv, rfbm);
312 show(saved_img, hideBits64to79);
313 fprintf(stderr, "\n");
314
315 free(saved_img);
316 free(img_to_restore_from);
317 }
318 }
319 }
320
321
main(int argc,char ** argv)322 int main ( int argc, char** argv )
323 {
324 Bool hideBits64to79 = argc > 1;
325 fprintf(stderr, "Re-run with any arg to suppress least-significant\n"
326 " 16 bits of 80-bit FP numbers\n");
327
328 check_for_xsave();
329
330 if (1)
331 test_xsave(hideBits64to79);
332
333 if (1)
334 test_xrstor(hideBits64to79);
335
336 return 0;
337 }
338