• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 
2 /* Tests e-vs-i or i-vs-m aspects for pcmp{e,i}str{i,m}.  Does not
3    check the core arithmetic in any detail. This file checks the 16-bit
4    character versions (w is for wide) */
5 
6 #include <string.h>
7 #include <stdio.h>
8 #include <assert.h>
9 
10 typedef  unsigned char  V128[16];
11 typedef  unsigned int   UInt;
12 typedef  signed int     Int;
13 typedef  unsigned char  UChar;
14 typedef  unsigned long long int ULong;
15 typedef  UChar          Bool;
16 #define False ((Bool)0)
17 #define True  ((Bool)1)
18 
show_V128(V128 * vec)19 void show_V128 ( V128* vec )
20 {
21    Int i;
22    for (i = 15; i >= 0; i--)
23       printf("%02x", (UInt)( (*vec)[i] ));
24 }
25 
expand(V128 * dst,char * summary)26 void expand ( V128* dst, char* summary )
27 {
28    Int i;
29    assert( strlen(summary) == 16 );
30    for (i = 0; i < 16; i++) {
31       UChar xx = 0;
32       UChar x = summary[15-i];
33       if      (x >= '0' && x <= '9') { xx = x - '0'; }
34       else if (x >= 'A' && x <= 'F') { xx = x - 'A' + 10; }
35       else if (x >= 'a' && x <= 'f') { xx = x - 'a' + 10; }
36       else assert(0);
37 
38       assert(xx < 16);
39       xx = (xx << 4) | xx;
40       assert(xx < 256);
41       (*dst)[i] = xx;
42    }
43 }
44 
one_test(char * summL,ULong rdxIN,char * summR,ULong raxIN)45 void one_test ( char* summL, ULong rdxIN, char* summR, ULong raxIN )
46 {
47    V128 argL, argR;
48    expand( &argL, summL );
49    expand( &argR, summR );
50    printf("\n");
51    printf("rdx %016llx  argL ", rdxIN);
52    show_V128(&argL);
53    printf("  rax %016llx  argR ", raxIN);
54    show_V128(&argR);
55    printf("\n");
56 
57    ULong block[ 2/*in:argL*/          // 0  0
58                 + 2/*in:argR*/        // 2  16
59                 + 1/*in:rdx*/         // 4  32
60                 + 1/*in:rax*/         // 5  40
61                 + 2/*inout:xmm0*/     // 6  48
62                 + 1/*inout:rcx*/      // 8  64
63                 + 1/*out:rflags*/ ];  // 9  72
64    assert(sizeof(block) == 80);
65 
66    UChar* blockC = (UChar*)&block[0];
67 
68    /* ---------------- ISTRI_4B ---------------- */
69    memset(blockC, 0x55, 80);
70    memcpy(blockC + 0,  &argL,  16);
71    memcpy(blockC + 16, &argR,  16);
72    memcpy(blockC + 24, &rdxIN, 8);
73    memcpy(blockC + 32, &raxIN, 8);
74    memcpy(blockC + 40, &rdxIN, 8);
75    __asm__ __volatile__(
76       "movupd    0(%0), %%xmm2"           "\n\t"
77       "movupd    16(%0), %%xmm13"         "\n\t"
78       "movq      32(%0), %%rdx"           "\n\t"
79       "movq      40(%0), %%rax"           "\n\t"
80       "movupd    48(%0), %%xmm0"          "\n\t"
81       "movw      64(%0), %%cx"            "\n\t"
82       "pcmpistri $0x4B, %%xmm2, %%xmm13"  "\n\t"
83       "movupd    %%xmm0, 48(%0)"          "\n\t"
84       "movw      %%cx, 64(%0)"            "\n\t"
85       "pushfq"                            "\n\t"
86       "popq      %%r15"                   "\n\t"
87       "movq      %%r15, 72(%0)"           "\n\t"
88       : /*out*/
89       : /*in*/"r"(blockC)
90       : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15"
91    );
92    printf("  istri $0x4B:  ");
93    printf("    xmm0 ");
94    show_V128( (V128*)(blockC+48) );
95    printf("  rcx %016llx  flags %08llx\n", block[8], block[9] & 0x8D5);
96 
97    /* ---------------- ISTRI_0B ---------------- */
98    memset(blockC, 0x55, 80);
99    memcpy(blockC + 0,  &argL,  16);
100    memcpy(blockC + 16, &argR,  16);
101    memcpy(blockC + 24, &rdxIN, 8);
102    memcpy(blockC + 32, &raxIN, 8);
103    memcpy(blockC + 40, &rdxIN, 8);
104    __asm__ __volatile__(
105       "movupd    0(%0), %%xmm2"           "\n\t"
106       "movupd    16(%0), %%xmm13"         "\n\t"
107       "movq      32(%0), %%rdx"           "\n\t"
108       "movq      40(%0), %%rax"           "\n\t"
109       "movupd    48(%0), %%xmm0"          "\n\t"
110       "movw      64(%0), %%cx"            "\n\t"
111       "pcmpistri $0x0B, %%xmm2, %%xmm13"  "\n\t"
112       "movupd    %%xmm0, 48(%0)"          "\n\t"
113       "movw      %%cx, 64(%0)"            "\n\t"
114       "pushfq"                            "\n\t"
115       "popq      %%r15"                   "\n\t"
116       "movq      %%r15, 72(%0)"           "\n\t"
117       : /*out*/
118       : /*in*/"r"(blockC)
119       : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15"
120    );
121    printf("  istri $0x0B:  ");
122    printf("    xmm0 ");
123    show_V128( (V128*)(blockC+48) );
124    printf("  rcx %016llx  flags %08llx\n", block[8], block[9] & 0x8D5);
125 
126    /* ---------------- ISTRM_4B ---------------- */
127    memset(blockC, 0x55, 80);
128    memcpy(blockC + 0,  &argL,  16);
129    memcpy(blockC + 16, &argR,  16);
130    memcpy(blockC + 24, &rdxIN, 8);
131    memcpy(blockC + 32, &raxIN, 8);
132    memcpy(blockC + 40, &rdxIN, 8);
133    __asm__ __volatile__(
134       "movupd    0(%0), %%xmm2"           "\n\t"
135       "movupd    16(%0), %%xmm13"         "\n\t"
136       "movq      32(%0), %%rdx"           "\n\t"
137       "movq      40(%0), %%rax"           "\n\t"
138       "movupd    48(%0), %%xmm0"          "\n\t"
139       "movw      64(%0), %%cx"            "\n\t"
140       "pcmpistrm $0x4B, %%xmm2, %%xmm13"  "\n\t"
141       "movupd    %%xmm0, 48(%0)"          "\n\t"
142       "movw      %%cx, 64(%0)"            "\n\t"
143       "pushfq"                            "\n\t"
144       "popq      %%r15"                   "\n\t"
145       "movq      %%r15, 72(%0)"           "\n\t"
146       : /*out*/
147       : /*in*/"r"(blockC)
148       : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15"
149    );
150    printf("  istrm $0x4B:  ");
151    printf("    xmm0 ");
152    show_V128( (V128*)(blockC+48) );
153    printf("  rcx %016llx  flags %08llx\n", block[8], block[9] & 0x8D5);
154 
155    /* ---------------- ISTRM_0B ---------------- */
156    memset(blockC, 0x55, 80);
157    memcpy(blockC + 0,  &argL,  16);
158    memcpy(blockC + 16, &argR,  16);
159    memcpy(blockC + 24, &rdxIN, 8);
160    memcpy(blockC + 32, &raxIN, 8);
161    memcpy(blockC + 40, &rdxIN, 8);
162    __asm__ __volatile__(
163       "movupd    0(%0), %%xmm2"           "\n\t"
164       "movupd    16(%0), %%xmm13"         "\n\t"
165       "movq      32(%0), %%rdx"           "\n\t"
166       "movq      40(%0), %%rax"           "\n\t"
167       "movupd    48(%0), %%xmm0"          "\n\t"
168       "movw      64(%0), %%cx"            "\n\t"
169       "pcmpistrm $0x0B, %%xmm2, %%xmm13"  "\n\t"
170       "movupd    %%xmm0, 48(%0)"          "\n\t"
171       "movw      %%cx, 64(%0)"            "\n\t"
172       "pushfq"                            "\n\t"
173       "popq      %%r15"                   "\n\t"
174       "movq      %%r15, 72(%0)"           "\n\t"
175       : /*out*/
176       : /*in*/"r"(blockC)
177       : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15"
178    );
179    printf("  istrm $0x0B:  ");
180    printf("    xmm0 ");
181    show_V128( (V128*)(blockC+48) );
182    printf("  rcx %016llx  flags %08llx\n", block[8], block[9] & 0x8D5);
183 
184    /* ---------------- ESTRI_4B ---------------- */
185    memset(blockC, 0x55, 80);
186    memcpy(blockC + 0,  &argL,  16);
187    memcpy(blockC + 16, &argR,  16);
188    memcpy(blockC + 24, &rdxIN, 8);
189    memcpy(blockC + 32, &raxIN, 8);
190    memcpy(blockC + 40, &rdxIN, 8);
191    __asm__ __volatile__(
192       "movupd    0(%0), %%xmm2"           "\n\t"
193       "movupd    16(%0), %%xmm13"         "\n\t"
194       "movq      32(%0), %%rdx"           "\n\t"
195       "movq      40(%0), %%rax"           "\n\t"
196       "movupd    48(%0), %%xmm0"          "\n\t"
197       "movw      64(%0), %%cx"            "\n\t"
198       "pcmpestri $0x4B, %%xmm2, %%xmm13"  "\n\t"
199       "movupd    %%xmm0, 48(%0)"          "\n\t"
200       "movw      %%cx, 64(%0)"            "\n\t"
201       "pushfq"                            "\n\t"
202       "popq      %%r15"                   "\n\t"
203       "movq      %%r15, 72(%0)"           "\n\t"
204       : /*out*/
205       : /*in*/"r"(blockC)
206       : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15"
207    );
208    printf("  estri $0x4B:  ");
209    printf("    xmm0 ");
210    show_V128( (V128*)(blockC+48) );
211    printf("  rcx %016llx  flags %08llx\n", block[8], block[9] & 0x8D5);
212 
213    /* ---------------- ESTRI_0B ---------------- */
214    memset(blockC, 0x55, 80);
215    memcpy(blockC + 0,  &argL,  16);
216    memcpy(blockC + 16, &argR,  16);
217    memcpy(blockC + 24, &rdxIN, 8);
218    memcpy(blockC + 32, &raxIN, 8);
219    memcpy(blockC + 40, &rdxIN, 8);
220    __asm__ __volatile__(
221       "movupd    0(%0), %%xmm2"           "\n\t"
222       "movupd    16(%0), %%xmm13"         "\n\t"
223       "movq      32(%0), %%rdx"           "\n\t"
224       "movq      40(%0), %%rax"           "\n\t"
225       "movupd    48(%0), %%xmm0"          "\n\t"
226       "movw      64(%0), %%cx"            "\n\t"
227       "pcmpestri $0x0B, %%xmm2, %%xmm13"  "\n\t"
228       "movupd    %%xmm0, 48(%0)"          "\n\t"
229       "movw      %%cx, 64(%0)"            "\n\t"
230       "pushfq"                            "\n\t"
231       "popq      %%r15"                   "\n\t"
232       "movq      %%r15, 72(%0)"           "\n\t"
233       : /*out*/
234       : /*in*/"r"(blockC)
235       : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15"
236    );
237    printf("  estri $0x0B:  ");
238    printf("    xmm0 ");
239    show_V128( (V128*)(blockC+48) );
240    printf("  rcx %016llx  flags %08llx\n", block[8], block[9] & 0x8D5);
241 
242    /* ---------------- ESTRM_4B ---------------- */
243    memset(blockC, 0x55, 80);
244    memcpy(blockC + 0,  &argL,  16);
245    memcpy(blockC + 16, &argR,  16);
246    memcpy(blockC + 24, &rdxIN, 8);
247    memcpy(blockC + 32, &raxIN, 8);
248    memcpy(blockC + 40, &rdxIN, 8);
249    __asm__ __volatile__(
250       "movupd    0(%0), %%xmm2"           "\n\t"
251       "movupd    16(%0), %%xmm13"         "\n\t"
252       "movq      32(%0), %%rdx"           "\n\t"
253       "movq      40(%0), %%rax"           "\n\t"
254       "movupd    48(%0), %%xmm0"          "\n\t"
255       "movw      64(%0), %%cx"            "\n\t"
256       "pcmpestrm $0x4B, %%xmm2, %%xmm13"  "\n\t"
257       "movupd    %%xmm0, 48(%0)"          "\n\t"
258       "movw      %%cx, 64(%0)"            "\n\t"
259       "pushfq"                            "\n\t"
260       "popq      %%r15"                   "\n\t"
261       "movq      %%r15, 72(%0)"           "\n\t"
262       : /*out*/
263       : /*in*/"r"(blockC)
264       : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15"
265    );
266    printf("  estrm $0x4B:  ");
267    printf("    xmm0 ");
268    show_V128( (V128*)(blockC+48) );
269    printf("  rcx %016llx  flags %08llx\n", block[8], block[9] & 0x8D5);
270 
271    /* ---------------- ESTRM_0B ---------------- */
272    memset(blockC, 0x55, 80);
273    memcpy(blockC + 0,  &argL,  16);
274    memcpy(blockC + 16, &argR,  16);
275    memcpy(blockC + 24, &rdxIN, 8);
276    memcpy(blockC + 32, &raxIN, 8);
277    memcpy(blockC + 40, &rdxIN, 8);
278    __asm__ __volatile__(
279       "movupd    0(%0), %%xmm2"           "\n\t"
280       "movupd    16(%0), %%xmm13"         "\n\t"
281       "movq      32(%0), %%rdx"           "\n\t"
282       "movq      40(%0), %%rax"           "\n\t"
283       "movupd    48(%0), %%xmm0"          "\n\t"
284       "movw      64(%0), %%cx"            "\n\t"
285       "pcmpestrm $0x0B, %%xmm2, %%xmm13"  "\n\t"
286       "movupd    %%xmm0, 48(%0)"          "\n\t"
287       "movw      %%cx, 64(%0)"            "\n\t"
288       "pushfq"                            "\n\t"
289       "popq      %%r15"                   "\n\t"
290       "movq      %%r15, 72(%0)"           "\n\t"
291       : /*out*/
292       : /*in*/"r"(blockC)
293       : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15"
294    );
295    printf("  estrm $0x0B:  ");
296    printf("    xmm0 ");
297    show_V128( (V128*)(blockC+48) );
298    printf("  rcx %016llx  flags %08llx\n", block[8], block[9] & 0x8D5);
299 
300 
301 
302 
303 }
304 
main(void)305 int main ( void )
306 {
307    one_test("aaaaaaaaaaaaaaaa", 0, "aaaaaaaa00aaaaaa", 0 );
308    one_test("0000000000000000", 0, "aaaaaaaa00aaaaaa", 0 );
309 
310    one_test("aaaaaaaaaaaaaaaa", 0, "aaaaaaaaaaaaaaaa", 0 );
311    one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", 0 );
312    one_test("aaaaaaaaaaaaaaaa", 0, "aaaaaaaaaaaaaaaa", 6 );
313 
314    one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", 6 );
315    one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", 15 );
316    one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", 16 );
317    one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", 17 );
318 
319    one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", -6 );
320    one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", -15 );
321    one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", -16 );
322    one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", -17 );
323 
324    one_test("aaaaaaaaaaaaaaaa", 5,  "aaaaaaaaaaaaaaaa", 6 );
325    one_test("aaaaaaaaaaaaaaaa", 15, "aaaaaaaaaaaaaaaa", 6 );
326    one_test("aaaaaaaaaaaaaaaa", 16, "aaaaaaaaaaaaaaaa", 6 );
327    one_test("aaaaaaaaaaaaaaaa", 17, "aaaaaaaaaaaaaaaa", 6 );
328 
329    one_test("aaaaaaaaaaaaaaaa", -5,  "aaaaaaaaaaaaaaaa", 6 );
330    one_test("aaaaaaaaaaaaaaaa", -15, "aaaaaaaaaaaaaaaa", 6 );
331    one_test("aaaaaaaaaaaaaaaa", -16, "aaaaaaaaaaaaaaaa", 6 );
332    one_test("aaaaaaaaaaaaaaaa", -17, "aaaaaaaaaaaaaaaa", 6 );
333 
334    return 0;
335 }
336