• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 
2 #include <stdio.h>
3 #include <stdlib.h>
4 #include "tests/asm.h"
5 #include "tests/malloc.h"
6 #include <string.h>
7 
8 const unsigned int vec0[4]
9    = { 0x12345678, 0x11223344, 0x55667788, 0x87654321 };
10 
11 const unsigned int vec1[4]
12    = { 0xABCDEF01, 0xAABBCCDD, 0xEEFF0011, 0x10FEDCBA };
13 
14 const unsigned int vecZ[4]
15    = { 0, 0, 0, 0 };
16 
17 __attribute__((noinline))
do_fxsave(void * p,int rexw)18 void do_fxsave ( void* p, int rexw ) {
19    if (rexw) {
20       asm __volatile__("rex64/fxsave (%0)" : : "r" (p) : "memory" );
21    } else {
22       asm __volatile__("fxsave (%0)" : : "r" (p) : "memory" );
23    }
24 }
25 
26 __attribute__((noinline))
do_fxrstor(void * p,int rexw)27 void do_fxrstor ( void* p, int rexw ) {
28    if (rexw) {
29       asm __volatile__("rex64/fxrstor (%0)" : : "r" (p) : "memory" );
30    } else {
31       asm __volatile__("fxrstor (%0)" : : "r" (p) : "memory" );
32    }
33 }
34 
do_zeroise(void)35 void do_zeroise ( void )
36 {
37    asm __volatile__("finit");
38    asm __volatile__(
39     "fldz\n\t"
40     "fldz\n\t"
41     "fldz\n\t"
42     "fldz\n\t"
43     "fldz\n\t"
44     "fldz\n\t"
45     "fldz\n\t"
46     "fldz\n\t"
47     "finit\n");
48 #ifndef VGP_amd64_darwin
49    asm __volatile__("movups " VG_SYM(vecZ) ", %xmm0");
50    asm __volatile__("movups " VG_SYM(vecZ) ", %xmm1");
51    asm __volatile__("movups " VG_SYM(vecZ) ", %xmm2");
52    asm __volatile__("movups " VG_SYM(vecZ) ", %xmm3");
53    asm __volatile__("movups " VG_SYM(vecZ) ", %xmm4");
54    asm __volatile__("movups " VG_SYM(vecZ) ", %xmm5");
55    asm __volatile__("movups " VG_SYM(vecZ) ", %xmm6");
56    asm __volatile__("movups " VG_SYM(vecZ) ", %xmm7");
57    asm __volatile__("movups " VG_SYM(vecZ) ", %xmm8");
58    asm __volatile__("movups " VG_SYM(vecZ) ", %xmm9");
59    asm __volatile__("movups " VG_SYM(vecZ) ", %xmm10");
60    asm __volatile__("movups " VG_SYM(vecZ) ", %xmm11");
61    asm __volatile__("movups " VG_SYM(vecZ) ", %xmm12");
62    asm __volatile__("movups " VG_SYM(vecZ) ", %xmm13");
63    asm __volatile__("movups " VG_SYM(vecZ) ", %xmm14");
64    asm __volatile__("movups " VG_SYM(vecZ) ", %xmm15");
65 #else
66    asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm0");
67    asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm1");
68    asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm2");
69    asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm3");
70    asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm4");
71    asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm5");
72    asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm6");
73    asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm7");
74    asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm8");
75    asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm9");
76    asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm10");
77    asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm11");
78    asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm12");
79    asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm13");
80    asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm14");
81    asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm15");
82 #endif
83    asm __volatile__(
84       "pushq $0\n\t"
85       "ldmxcsr 0(%rsp)\n\t"
86       "addq $8,%rsp\n");
87 }
88 
89 /* set up the FP and SSE state, and then dump it. */
do_setup_then_fxsave(void * p,int rexw)90 void do_setup_then_fxsave ( void* p, int rexw )
91 {
92    asm __volatile__("finit");
93    asm __volatile__("fldpi");
94    asm __volatile__("fld1");
95    asm __volatile__("fldln2");
96    asm __volatile__("fldlg2");
97    asm __volatile__("fld %st(3)");
98    asm __volatile__("fld %st(3)");
99    asm __volatile__("fld1");
100    asm __volatile__("movups (%0), %%xmm0" : : "r"(&vec0[0]) : "xmm0" );
101    asm __volatile__("movups (%0), %%xmm1" : : "r"(&vec1[0]) : "xmm1" );
102    asm __volatile__("xorps  %xmm2, %xmm2");
103    asm __volatile__("movaps %xmm0, %xmm3");
104    asm __volatile__("movaps %xmm1, %xmm4");
105    asm __volatile__("movaps %xmm2, %xmm5");
106    asm __volatile__("movaps %xmm0, %xmm6");
107    asm __volatile__("movaps %xmm1, %xmm7");
108    asm __volatile__("movaps %xmm1, %xmm8");
109    asm __volatile__("movaps %xmm2, %xmm9");
110    asm __volatile__("movaps %xmm0, %xmm10");
111    asm __volatile__("movaps %xmm1, %xmm11");
112    asm __volatile__("movaps %xmm1, %xmm12");
113    asm __volatile__("movaps %xmm2, %xmm13");
114    asm __volatile__("movaps %xmm0, %xmm14");
115    asm __volatile__("movaps %xmm1, %xmm15");
116    do_fxsave(p, rexw);
117 }
118 
isFPLsbs(int i)119 int isFPLsbs ( int i )
120 {
121    int q;
122    q = 32; if (i == q || i == q+1) return 1;
123    q = 48; if (i == q || i == q+1) return 1;
124    q = 64; if (i == q || i == q+1) return 1;
125    q = 80; if (i == q || i == q+1) return 1;
126    q = 96; if (i == q || i == q+1) return 1;
127    q = 112; if (i == q || i == q+1) return 1;
128    q = 128; if (i == q || i == q+1) return 1;
129    q = 144; if (i == q || i == q+1) return 1;
130    return 0;
131 }
132 
show(unsigned char * buf,int xx)133 void show ( unsigned char* buf, int xx )
134 {
135    int i;
136    for (i = 0; i < 512; i++) {
137       if ((i % 16) == 0)
138          printf("%3d   ", i);
139       if (xx && isFPLsbs(i))
140 	 printf("xx ");
141       else
142          printf("%02x ", buf[i]);
143       if (i > 0 && ((i % 16) == 15))
144           printf("\n");
145    }
146 }
147 
148 
main(int argc,char ** argv)149 int main ( int argc, char** argv )
150 {
151    unsigned char* buf1 = memalign16(512);
152    unsigned char* buf2 = memalign16(512);
153    unsigned char* buf3 = memalign16(512);
154    int xx = argc > 1;
155    printf("Re-run with any arg to suppress least-significant\n"
156           "   16 bits of FP numbers\n");
157 
158    printf("\n-------- FXSAVE non-64 (REX.W == 0) --------\n");
159 
160    memset(buf1, 0x55, 512);
161    memset(buf2, 0x55, 512);
162    memset(buf3, 0x55, 512);
163 
164    /* Load up x87/xmm state and dump it. */
165    do_setup_then_fxsave(buf1, 0);
166    printf("\nBEFORE\n");
167    show(buf1, xx);
168 
169    /* Zeroise x87/xmm state and dump it, to show that the
170       regs have been cleared out. */
171    do_zeroise();
172    do_fxsave(buf2, 0);
173    printf("\nZEROED\n");
174    show(buf2, xx);
175 
176    /* Reload x87/xmm state from buf1 and dump it in buf3. */
177    do_fxrstor(buf1, 0);
178    do_fxsave(buf3, 0);
179    printf("\nRESTORED\n");
180    show(buf3, xx);
181 
182    printf("\n-------- FXSAVE 64 (REX.W == 1) --------\n\n");
183 
184    memset(buf1, 0x55, 512);
185    memset(buf2, 0x55, 512);
186    memset(buf3, 0x55, 512);
187 
188    /* Load up x87/xmm state and dump it. */
189    do_setup_then_fxsave(buf1, 1);
190    printf("\nBEFORE\n");
191    show(buf1, xx);
192 
193    /* Zeroise x87/xmm state and dump it, to show that the
194       regs have been cleared out. */
195    do_zeroise();
196    do_fxsave(buf2, 1);
197    printf("\nZEROED\n");
198    show(buf2, xx);
199 
200    /* Reload x87/xmm state from buf1 and dump it in buf3. */
201    do_fxrstor(buf1, 1);
202    do_fxsave(buf3, 1);
203    printf("\nRESTORED\n");
204    show(buf3, xx);
205 
206 
207    free(buf1); free(buf2); free(buf3);
208 
209    return 0;
210 }
211