1
2 /* Tests e-vs-i or i-vs-m aspects for pcmp{e,i}str{i,m}. Does not
3 check the core arithmetic in any detail. */
4
5 #include <string.h>
6 #include <stdio.h>
7 #include <assert.h>
8
9 typedef unsigned char V128[16];
10 typedef unsigned int UInt;
11 typedef signed int Int;
12 typedef unsigned char UChar;
13 typedef unsigned long long int ULong;
14 typedef UChar Bool;
15 #define False ((Bool)0)
16 #define True ((Bool)1)
17
show_V128(V128 * vec)18 void show_V128 ( V128* vec )
19 {
20 Int i;
21 for (i = 15; i >= 0; i--)
22 printf("%02x", (UInt)( (*vec)[i] ));
23 }
24
expand(V128 * dst,char * summary)25 void expand ( V128* dst, char* summary )
26 {
27 Int i;
28 assert( strlen(summary) == 16 );
29 for (i = 0; i < 16; i++) {
30 UChar xx = 0;
31 UChar x = summary[15-i];
32 if (x >= '0' && x <= '9') { xx = x - '0'; }
33 else if (x >= 'A' && x <= 'F') { xx = x - 'A' + 10; }
34 else if (x >= 'a' && x <= 'f') { xx = x - 'a' + 10; }
35 else assert(0);
36
37 assert(xx < 16);
38 xx = (xx << 4) | xx;
39 assert(xx < 256);
40 (*dst)[i] = xx;
41 }
42 }
43
one_test(char * summL,ULong rdxIN,char * summR,ULong raxIN)44 void one_test ( char* summL, ULong rdxIN, char* summR, ULong raxIN )
45 {
46 V128 argL, argR;
47 expand( &argL, summL );
48 expand( &argR, summR );
49 printf("\n");
50 printf("rdx %016llx argL ", rdxIN);
51 show_V128(&argL);
52 printf(" rax %016llx argR ", raxIN);
53 show_V128(&argR);
54 printf("\n");
55
56 ULong block[ 2/*in:argL*/ // 0 0
57 + 2/*in:argR*/ // 2 16
58 + 1/*in:rdx*/ // 4 32
59 + 1/*in:rax*/ // 5 40
60 + 2/*inout:xmm0*/ // 6 48
61 + 1/*inout:rcx*/ // 8 64
62 + 1/*out:rflags*/ ]; // 9 72
63 assert(sizeof(block) == 80);
64
65 UChar* blockC = (UChar*)&block[0];
66
67 /* ---------------- ISTRI_4A ---------------- */
68 memset(blockC, 0x55, 80);
69 memcpy(blockC + 0, &argL, 16);
70 memcpy(blockC + 16, &argR, 16);
71 memcpy(blockC + 24, &rdxIN, 8);
72 memcpy(blockC + 32, &raxIN, 8);
73 memcpy(blockC + 40, &rdxIN, 8);
74 __asm__ __volatile__(
75 "movupd 0(%0), %%xmm2" "\n\t"
76 "movupd 16(%0), %%xmm13" "\n\t"
77 "movq 32(%0), %%rdx" "\n\t"
78 "movq 40(%0), %%rax" "\n\t"
79 "movupd 48(%0), %%xmm0" "\n\t"
80 "movw 64(%0), %%rcx" "\n\t"
81 "pcmpistri $0x4A, %%xmm2, %%xmm13" "\n\t"
82 "movupd %%xmm0, 48(%0)" "\n\t"
83 "movw %%rcx, 64(%0)" "\n\t"
84 "pushfq" "\n\t"
85 "popq %%r15" "\n\t"
86 "movq %%r15, 72(%0)" "\n\t"
87 : /*out*/
88 : /*in*/"r"(blockC)
89 : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15"
90 );
91 printf(" istri $0x4A: ");
92 printf(" xmm0 ");
93 show_V128( (V128*)(blockC+48) );
94 printf(" rcx %016llx flags %08llx\n", block[8], block[9] & 0x8D5);
95
96 /* ---------------- ISTRI_0A ---------------- */
97 memset(blockC, 0x55, 80);
98 memcpy(blockC + 0, &argL, 16);
99 memcpy(blockC + 16, &argR, 16);
100 memcpy(blockC + 24, &rdxIN, 8);
101 memcpy(blockC + 32, &raxIN, 8);
102 memcpy(blockC + 40, &rdxIN, 8);
103 __asm__ __volatile__(
104 "movupd 0(%0), %%xmm2" "\n\t"
105 "movupd 16(%0), %%xmm13" "\n\t"
106 "movq 32(%0), %%rdx" "\n\t"
107 "movq 40(%0), %%rax" "\n\t"
108 "movupd 48(%0), %%xmm0" "\n\t"
109 "movw 64(%0), %%rcx" "\n\t"
110 "pcmpistri $0x0A, %%xmm2, %%xmm13" "\n\t"
111 "movupd %%xmm0, 48(%0)" "\n\t"
112 "movw %%rcx, 64(%0)" "\n\t"
113 "pushfq" "\n\t"
114 "popq %%r15" "\n\t"
115 "movq %%r15, 72(%0)" "\n\t"
116 : /*out*/
117 : /*in*/"r"(blockC)
118 : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15"
119 );
120 printf(" istri $0x0A: ");
121 printf(" xmm0 ");
122 show_V128( (V128*)(blockC+48) );
123 printf(" rcx %016llx flags %08llx\n", block[8], block[9] & 0x8D5);
124
125 /* ---------------- ISTRM_4A ---------------- */
126 memset(blockC, 0x55, 80);
127 memcpy(blockC + 0, &argL, 16);
128 memcpy(blockC + 16, &argR, 16);
129 memcpy(blockC + 24, &rdxIN, 8);
130 memcpy(blockC + 32, &raxIN, 8);
131 memcpy(blockC + 40, &rdxIN, 8);
132 __asm__ __volatile__(
133 "movupd 0(%0), %%xmm2" "\n\t"
134 "movupd 16(%0), %%xmm13" "\n\t"
135 "movq 32(%0), %%rdx" "\n\t"
136 "movq 40(%0), %%rax" "\n\t"
137 "movupd 48(%0), %%xmm0" "\n\t"
138 "movw 64(%0), %%rcx" "\n\t"
139 "pcmpistrm $0x4A, %%xmm2, %%xmm13" "\n\t"
140 "movupd %%xmm0, 48(%0)" "\n\t"
141 "movw %%rcx, 64(%0)" "\n\t"
142 "pushfq" "\n\t"
143 "popq %%r15" "\n\t"
144 "movq %%r15, 72(%0)" "\n\t"
145 : /*out*/
146 : /*in*/"r"(blockC)
147 : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15"
148 );
149 printf(" istrm $0x4A: ");
150 printf(" xmm0 ");
151 show_V128( (V128*)(blockC+48) );
152 printf(" rcx %016llx flags %08llx\n", block[8], block[9] & 0x8D5);
153
154 /* ---------------- ISTRM_0A ---------------- */
155 memset(blockC, 0x55, 80);
156 memcpy(blockC + 0, &argL, 16);
157 memcpy(blockC + 16, &argR, 16);
158 memcpy(blockC + 24, &rdxIN, 8);
159 memcpy(blockC + 32, &raxIN, 8);
160 memcpy(blockC + 40, &rdxIN, 8);
161 __asm__ __volatile__(
162 "movupd 0(%0), %%xmm2" "\n\t"
163 "movupd 16(%0), %%xmm13" "\n\t"
164 "movq 32(%0), %%rdx" "\n\t"
165 "movq 40(%0), %%rax" "\n\t"
166 "movupd 48(%0), %%xmm0" "\n\t"
167 "movw 64(%0), %%rcx" "\n\t"
168 "pcmpistrm $0x0A, %%xmm2, %%xmm13" "\n\t"
169 "movupd %%xmm0, 48(%0)" "\n\t"
170 "movw %%rcx, 64(%0)" "\n\t"
171 "pushfq" "\n\t"
172 "popq %%r15" "\n\t"
173 "movq %%r15, 72(%0)" "\n\t"
174 : /*out*/
175 : /*in*/"r"(blockC)
176 : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15"
177 );
178 printf(" istrm $0x0A: ");
179 printf(" xmm0 ");
180 show_V128( (V128*)(blockC+48) );
181 printf(" rcx %016llx flags %08llx\n", block[8], block[9] & 0x8D5);
182
183 /* ---------------- ESTRI_4A ---------------- */
184 memset(blockC, 0x55, 80);
185 memcpy(blockC + 0, &argL, 16);
186 memcpy(blockC + 16, &argR, 16);
187 memcpy(blockC + 24, &rdxIN, 8);
188 memcpy(blockC + 32, &raxIN, 8);
189 memcpy(blockC + 40, &rdxIN, 8);
190 __asm__ __volatile__(
191 "movupd 0(%0), %%xmm2" "\n\t"
192 "movupd 16(%0), %%xmm13" "\n\t"
193 "movq 32(%0), %%rdx" "\n\t"
194 "movq 40(%0), %%rax" "\n\t"
195 "movupd 48(%0), %%xmm0" "\n\t"
196 "movw 64(%0), %%rcx" "\n\t"
197 "pcmpestri $0x4A, %%xmm2, %%xmm13" "\n\t"
198 "movupd %%xmm0, 48(%0)" "\n\t"
199 "movw %%rcx, 64(%0)" "\n\t"
200 "pushfq" "\n\t"
201 "popq %%r15" "\n\t"
202 "movq %%r15, 72(%0)" "\n\t"
203 : /*out*/
204 : /*in*/"r"(blockC)
205 : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15"
206 );
207 printf(" estri $0x4A: ");
208 printf(" xmm0 ");
209 show_V128( (V128*)(blockC+48) );
210 printf(" rcx %016llx flags %08llx\n", block[8], block[9] & 0x8D5);
211
212 /* ---------------- ESTRI_0A ---------------- */
213 memset(blockC, 0x55, 80);
214 memcpy(blockC + 0, &argL, 16);
215 memcpy(blockC + 16, &argR, 16);
216 memcpy(blockC + 24, &rdxIN, 8);
217 memcpy(blockC + 32, &raxIN, 8);
218 memcpy(blockC + 40, &rdxIN, 8);
219 __asm__ __volatile__(
220 "movupd 0(%0), %%xmm2" "\n\t"
221 "movupd 16(%0), %%xmm13" "\n\t"
222 "movq 32(%0), %%rdx" "\n\t"
223 "movq 40(%0), %%rax" "\n\t"
224 "movupd 48(%0), %%xmm0" "\n\t"
225 "movw 64(%0), %%rcx" "\n\t"
226 "pcmpestri $0x0A, %%xmm2, %%xmm13" "\n\t"
227 "movupd %%xmm0, 48(%0)" "\n\t"
228 "movw %%rcx, 64(%0)" "\n\t"
229 "pushfq" "\n\t"
230 "popq %%r15" "\n\t"
231 "movq %%r15, 72(%0)" "\n\t"
232 : /*out*/
233 : /*in*/"r"(blockC)
234 : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15"
235 );
236 printf(" estri $0x0A: ");
237 printf(" xmm0 ");
238 show_V128( (V128*)(blockC+48) );
239 printf(" rcx %016llx flags %08llx\n", block[8], block[9] & 0x8D5);
240
241 /* ---------------- ESTRM_4A ---------------- */
242 memset(blockC, 0x55, 80);
243 memcpy(blockC + 0, &argL, 16);
244 memcpy(blockC + 16, &argR, 16);
245 memcpy(blockC + 24, &rdxIN, 8);
246 memcpy(blockC + 32, &raxIN, 8);
247 memcpy(blockC + 40, &rdxIN, 8);
248 __asm__ __volatile__(
249 "movupd 0(%0), %%xmm2" "\n\t"
250 "movupd 16(%0), %%xmm13" "\n\t"
251 "movq 32(%0), %%rdx" "\n\t"
252 "movq 40(%0), %%rax" "\n\t"
253 "movupd 48(%0), %%xmm0" "\n\t"
254 "movw 64(%0), %%rcx" "\n\t"
255 "pcmpestrm $0x4A, %%xmm2, %%xmm13" "\n\t"
256 "movupd %%xmm0, 48(%0)" "\n\t"
257 "movw %%rcx, 64(%0)" "\n\t"
258 "pushfq" "\n\t"
259 "popq %%r15" "\n\t"
260 "movq %%r15, 72(%0)" "\n\t"
261 : /*out*/
262 : /*in*/"r"(blockC)
263 : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15"
264 );
265 printf(" estrm $0x4A: ");
266 printf(" xmm0 ");
267 show_V128( (V128*)(blockC+48) );
268 printf(" rcx %016llx flags %08llx\n", block[8], block[9] & 0x8D5);
269
270 /* ---------------- ESTRM_0A ---------------- */
271 memset(blockC, 0x55, 80);
272 memcpy(blockC + 0, &argL, 16);
273 memcpy(blockC + 16, &argR, 16);
274 memcpy(blockC + 24, &rdxIN, 8);
275 memcpy(blockC + 32, &raxIN, 8);
276 memcpy(blockC + 40, &rdxIN, 8);
277 __asm__ __volatile__(
278 "movupd 0(%0), %%xmm2" "\n\t"
279 "movupd 16(%0), %%xmm13" "\n\t"
280 "movq 32(%0), %%rdx" "\n\t"
281 "movq 40(%0), %%rax" "\n\t"
282 "movupd 48(%0), %%xmm0" "\n\t"
283 "movw 64(%0), %%rcx" "\n\t"
284 "pcmpestrm $0x0A, %%xmm2, %%xmm13" "\n\t"
285 "movupd %%xmm0, 48(%0)" "\n\t"
286 "movw %%rcx, 64(%0)" "\n\t"
287 "pushfq" "\n\t"
288 "popq %%r15" "\n\t"
289 "movq %%r15, 72(%0)" "\n\t"
290 : /*out*/
291 : /*in*/"r"(blockC)
292 : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15"
293 );
294 printf(" estrm $0x0A: ");
295 printf(" xmm0 ");
296 show_V128( (V128*)(blockC+48) );
297 printf(" rcx %016llx flags %08llx\n", block[8], block[9] & 0x8D5);
298
299
300
301
302 }
303
main(void)304 int main ( void )
305 {
306 one_test("aaaaaaaaaaaaaaaa", 0, "aaaaaaaa0aaaaaaa", 0 );
307 one_test("0000000000000000", 0, "aaaaaaaa0aaaaaaa", 0 );
308
309 one_test("aaaaaaaaaaaaaaaa", 0, "aaaaaaaaaaaaaaaa", 0 );
310 one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", 0 );
311 one_test("aaaaaaaaaaaaaaaa", 0, "aaaaaaaaaaaaaaaa", 6 );
312
313 one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", 6 );
314 one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", 15 );
315 one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", 16 );
316 one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", 17 );
317
318 one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", -6 );
319 one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", -15 );
320 one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", -16 );
321 one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", -17 );
322
323 one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", 6 );
324 one_test("aaaaaaaaaaaaaaaa", 15, "aaaaaaaaaaaaaaaa", 6 );
325 one_test("aaaaaaaaaaaaaaaa", 16, "aaaaaaaaaaaaaaaa", 6 );
326 one_test("aaaaaaaaaaaaaaaa", 17, "aaaaaaaaaaaaaaaa", 6 );
327
328 one_test("aaaaaaaaaaaaaaaa", -5, "aaaaaaaaaaaaaaaa", 6 );
329 one_test("aaaaaaaaaaaaaaaa", -15, "aaaaaaaaaaaaaaaa", 6 );
330 one_test("aaaaaaaaaaaaaaaa", -16, "aaaaaaaaaaaaaaaa", 6 );
331 one_test("aaaaaaaaaaaaaaaa", -17, "aaaaaaaaaaaaaaaa", 6 );
332
333 return 0;
334 }
335
336 /* Tests e-vs-i or i-vs-m aspects for pcmp{e,i}str{i,m}. Does not
337 check the core arithmetic in any detail. */
338
339 #include <string.h>
340 #include <stdio.h>
341 #include <assert.h>
342
343 typedef unsigned char V128[16];
344 typedef unsigned int UInt;
345 typedef signed int Int;
346 typedef unsigned char UChar;
347 typedef unsigned long long int ULong;
348 typedef UChar Bool;
349 #define False ((Bool)0)
350 #define True ((Bool)1)
351
show_V128(V128 * vec)352 void show_V128 ( V128* vec )
353 {
354 Int i;
355 for (i = 15; i >= 0; i--)
356 printf("%02x", (UInt)( (*vec)[i] ));
357 }
358
expand(V128 * dst,char * summary)359 void expand ( V128* dst, char* summary )
360 {
361 Int i;
362 assert( strlen(summary) == 16 );
363 for (i = 0; i < 16; i++) {
364 UChar xx = 0;
365 UChar x = summary[15-i];
366 if (x >= '0' && x <= '9') { xx = x - '0'; }
367 else if (x >= 'A' && x <= 'F') { xx = x - 'A' + 10; }
368 else if (x >= 'a' && x <= 'f') { xx = x - 'a' + 10; }
369 else assert(0);
370
371 assert(xx < 16);
372 xx = (xx << 4) | xx;
373 assert(xx < 256);
374 (*dst)[i] = xx;
375 }
376 }
377
one_test(char * summL,ULong rdxIN,char * summR,ULong raxIN)378 void one_test ( char* summL, ULong rdxIN, char* summR, ULong raxIN )
379 {
380 V128 argL, argR;
381 expand( &argL, summL );
382 expand( &argR, summR );
383 printf("\n");
384 printf("rdx %016llx argL ", rdxIN);
385 show_V128(&argL);
386 printf(" rax %016llx argR ", raxIN);
387 show_V128(&argR);
388 printf("\n");
389
390 ULong block[ 2/*in:argL*/ // 0 0
391 + 2/*in:argR*/ // 2 16
392 + 1/*in:rdx*/ // 4 32
393 + 1/*in:rax*/ // 5 40
394 + 2/*inout:xmm0*/ // 6 48
395 + 1/*inout:rcx*/ // 8 64
396 + 1/*out:rflags*/ ]; // 9 72
397 assert(sizeof(block) == 80);
398
399 UChar* blockC = (UChar*)&block[0];
400
401 /* ---------------- ISTRI_4A ---------------- */
402 memset(blockC, 0x55, 80);
403 memcpy(blockC + 0, &argL, 16);
404 memcpy(blockC + 16, &argR, 16);
405 memcpy(blockC + 24, &rdxIN, 8);
406 memcpy(blockC + 32, &raxIN, 8);
407 memcpy(blockC + 40, &rdxIN, 8);
408 __asm__ __volatile__(
409 "movupd 0(%0), %%xmm2" "\n\t"
410 "movupd 16(%0), %%xmm13" "\n\t"
411 "movq 32(%0), %%rdx" "\n\t"
412 "movq 40(%0), %%rax" "\n\t"
413 "movupd 48(%0), %%xmm0" "\n\t"
414 "movw 64(%0), %%rcx" "\n\t"
415 "pcmpistri $0x4A, %%xmm2, %%xmm13" "\n\t"
416 "movupd %%xmm0, 48(%0)" "\n\t"
417 "movw %%rcx, 64(%0)" "\n\t"
418 "pushfq" "\n\t"
419 "popq %%r15" "\n\t"
420 "movq %%r15, 72(%0)" "\n\t"
421 : /*out*/
422 : /*in*/"r"(blockC)
423 : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15"
424 );
425 printf(" istri $0x4A: ");
426 printf(" xmm0 ");
427 show_V128( (V128*)(blockC+48) );
428 printf(" rcx %016llx flags %08llx\n", block[8], block[9] & 0x8D5);
429
430 /* ---------------- ISTRI_0A ---------------- */
431 memset(blockC, 0x55, 80);
432 memcpy(blockC + 0, &argL, 16);
433 memcpy(blockC + 16, &argR, 16);
434 memcpy(blockC + 24, &rdxIN, 8);
435 memcpy(blockC + 32, &raxIN, 8);
436 memcpy(blockC + 40, &rdxIN, 8);
437 __asm__ __volatile__(
438 "movupd 0(%0), %%xmm2" "\n\t"
439 "movupd 16(%0), %%xmm13" "\n\t"
440 "movq 32(%0), %%rdx" "\n\t"
441 "movq 40(%0), %%rax" "\n\t"
442 "movupd 48(%0), %%xmm0" "\n\t"
443 "movw 64(%0), %%rcx" "\n\t"
444 "pcmpistri $0x0A, %%xmm2, %%xmm13" "\n\t"
445 "movupd %%xmm0, 48(%0)" "\n\t"
446 "movw %%rcx, 64(%0)" "\n\t"
447 "pushfq" "\n\t"
448 "popq %%r15" "\n\t"
449 "movq %%r15, 72(%0)" "\n\t"
450 : /*out*/
451 : /*in*/"r"(blockC)
452 : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15"
453 );
454 printf(" istri $0x0A: ");
455 printf(" xmm0 ");
456 show_V128( (V128*)(blockC+48) );
457 printf(" rcx %016llx flags %08llx\n", block[8], block[9] & 0x8D5);
458
459 /* ---------------- ISTRM_4A ---------------- */
460 memset(blockC, 0x55, 80);
461 memcpy(blockC + 0, &argL, 16);
462 memcpy(blockC + 16, &argR, 16);
463 memcpy(blockC + 24, &rdxIN, 8);
464 memcpy(blockC + 32, &raxIN, 8);
465 memcpy(blockC + 40, &rdxIN, 8);
466 __asm__ __volatile__(
467 "movupd 0(%0), %%xmm2" "\n\t"
468 "movupd 16(%0), %%xmm13" "\n\t"
469 "movq 32(%0), %%rdx" "\n\t"
470 "movq 40(%0), %%rax" "\n\t"
471 "movupd 48(%0), %%xmm0" "\n\t"
472 "movw 64(%0), %%rcx" "\n\t"
473 "pcmpistrm $0x4A, %%xmm2, %%xmm13" "\n\t"
474 "movupd %%xmm0, 48(%0)" "\n\t"
475 "movw %%rcx, 64(%0)" "\n\t"
476 "pushfq" "\n\t"
477 "popq %%r15" "\n\t"
478 "movq %%r15, 72(%0)" "\n\t"
479 : /*out*/
480 : /*in*/"r"(blockC)
481 : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15"
482 );
483 printf(" istrm $0x4A: ");
484 printf(" xmm0 ");
485 show_V128( (V128*)(blockC+48) );
486 printf(" rcx %016llx flags %08llx\n", block[8], block[9] & 0x8D5);
487
488 /* ---------------- ISTRM_0A ---------------- */
489 memset(blockC, 0x55, 80);
490 memcpy(blockC + 0, &argL, 16);
491 memcpy(blockC + 16, &argR, 16);
492 memcpy(blockC + 24, &rdxIN, 8);
493 memcpy(blockC + 32, &raxIN, 8);
494 memcpy(blockC + 40, &rdxIN, 8);
495 __asm__ __volatile__(
496 "movupd 0(%0), %%xmm2" "\n\t"
497 "movupd 16(%0), %%xmm13" "\n\t"
498 "movq 32(%0), %%rdx" "\n\t"
499 "movq 40(%0), %%rax" "\n\t"
500 "movupd 48(%0), %%xmm0" "\n\t"
501 "movw 64(%0), %%rcx" "\n\t"
502 "pcmpistrm $0x0A, %%xmm2, %%xmm13" "\n\t"
503 "movupd %%xmm0, 48(%0)" "\n\t"
504 "movw %%rcx, 64(%0)" "\n\t"
505 "pushfq" "\n\t"
506 "popq %%r15" "\n\t"
507 "movq %%r15, 72(%0)" "\n\t"
508 : /*out*/
509 : /*in*/"r"(blockC)
510 : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15"
511 );
512 printf(" istrm $0x0A: ");
513 printf(" xmm0 ");
514 show_V128( (V128*)(blockC+48) );
515 printf(" rcx %016llx flags %08llx\n", block[8], block[9] & 0x8D5);
516
517 /* ---------------- ESTRI_4A ---------------- */
518 memset(blockC, 0x55, 80);
519 memcpy(blockC + 0, &argL, 16);
520 memcpy(blockC + 16, &argR, 16);
521 memcpy(blockC + 24, &rdxIN, 8);
522 memcpy(blockC + 32, &raxIN, 8);
523 memcpy(blockC + 40, &rdxIN, 8);
524 __asm__ __volatile__(
525 "movupd 0(%0), %%xmm2" "\n\t"
526 "movupd 16(%0), %%xmm13" "\n\t"
527 "movq 32(%0), %%rdx" "\n\t"
528 "movq 40(%0), %%rax" "\n\t"
529 "movupd 48(%0), %%xmm0" "\n\t"
530 "movw 64(%0), %%rcx" "\n\t"
531 "pcmpestri $0x4A, %%xmm2, %%xmm13" "\n\t"
532 "movupd %%xmm0, 48(%0)" "\n\t"
533 "movw %%rcx, 64(%0)" "\n\t"
534 "pushfq" "\n\t"
535 "popq %%r15" "\n\t"
536 "movq %%r15, 72(%0)" "\n\t"
537 : /*out*/
538 : /*in*/"r"(blockC)
539 : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15"
540 );
541 printf(" estri $0x4A: ");
542 printf(" xmm0 ");
543 show_V128( (V128*)(blockC+48) );
544 printf(" rcx %016llx flags %08llx\n", block[8], block[9] & 0x8D5);
545
546 /* ---------------- ESTRI_0A ---------------- */
547 memset(blockC, 0x55, 80);
548 memcpy(blockC + 0, &argL, 16);
549 memcpy(blockC + 16, &argR, 16);
550 memcpy(blockC + 24, &rdxIN, 8);
551 memcpy(blockC + 32, &raxIN, 8);
552 memcpy(blockC + 40, &rdxIN, 8);
553 __asm__ __volatile__(
554 "movupd 0(%0), %%xmm2" "\n\t"
555 "movupd 16(%0), %%xmm13" "\n\t"
556 "movq 32(%0), %%rdx" "\n\t"
557 "movq 40(%0), %%rax" "\n\t"
558 "movupd 48(%0), %%xmm0" "\n\t"
559 "movw 64(%0), %%rcx" "\n\t"
560 "pcmpestri $0x0A, %%xmm2, %%xmm13" "\n\t"
561 "movupd %%xmm0, 48(%0)" "\n\t"
562 "movw %%rcx, 64(%0)" "\n\t"
563 "pushfq" "\n\t"
564 "popq %%r15" "\n\t"
565 "movq %%r15, 72(%0)" "\n\t"
566 : /*out*/
567 : /*in*/"r"(blockC)
568 : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15"
569 );
570 printf(" estri $0x0A: ");
571 printf(" xmm0 ");
572 show_V128( (V128*)(blockC+48) );
573 printf(" rcx %016llx flags %08llx\n", block[8], block[9] & 0x8D5);
574
575 /* ---------------- ESTRM_4A ---------------- */
576 memset(blockC, 0x55, 80);
577 memcpy(blockC + 0, &argL, 16);
578 memcpy(blockC + 16, &argR, 16);
579 memcpy(blockC + 24, &rdxIN, 8);
580 memcpy(blockC + 32, &raxIN, 8);
581 memcpy(blockC + 40, &rdxIN, 8);
582 __asm__ __volatile__(
583 "movupd 0(%0), %%xmm2" "\n\t"
584 "movupd 16(%0), %%xmm13" "\n\t"
585 "movq 32(%0), %%rdx" "\n\t"
586 "movq 40(%0), %%rax" "\n\t"
587 "movupd 48(%0), %%xmm0" "\n\t"
588 "movw 64(%0), %%rcx" "\n\t"
589 "pcmpestrm $0x4A, %%xmm2, %%xmm13" "\n\t"
590 "movupd %%xmm0, 48(%0)" "\n\t"
591 "movw %%rcx, 64(%0)" "\n\t"
592 "pushfq" "\n\t"
593 "popq %%r15" "\n\t"
594 "movq %%r15, 72(%0)" "\n\t"
595 : /*out*/
596 : /*in*/"r"(blockC)
597 : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15"
598 );
599 printf(" estrm $0x4A: ");
600 printf(" xmm0 ");
601 show_V128( (V128*)(blockC+48) );
602 printf(" rcx %016llx flags %08llx\n", block[8], block[9] & 0x8D5);
603
604 /* ---------------- ESTRM_0A ---------------- */
605 memset(blockC, 0x55, 80);
606 memcpy(blockC + 0, &argL, 16);
607 memcpy(blockC + 16, &argR, 16);
608 memcpy(blockC + 24, &rdxIN, 8);
609 memcpy(blockC + 32, &raxIN, 8);
610 memcpy(blockC + 40, &rdxIN, 8);
611 __asm__ __volatile__(
612 "movupd 0(%0), %%xmm2" "\n\t"
613 "movupd 16(%0), %%xmm13" "\n\t"
614 "movq 32(%0), %%rdx" "\n\t"
615 "movq 40(%0), %%rax" "\n\t"
616 "movupd 48(%0), %%xmm0" "\n\t"
617 "movw 64(%0), %%rcx" "\n\t"
618 "pcmpestrm $0x0A, %%xmm2, %%xmm13" "\n\t"
619 "movupd %%xmm0, 48(%0)" "\n\t"
620 "movw %%rcx, 64(%0)" "\n\t"
621 "pushfq" "\n\t"
622 "popq %%r15" "\n\t"
623 "movq %%r15, 72(%0)" "\n\t"
624 : /*out*/
625 : /*in*/"r"(blockC)
626 : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15"
627 );
628 printf(" estrm $0x0A: ");
629 printf(" xmm0 ");
630 show_V128( (V128*)(blockC+48) );
631 printf(" rcx %016llx flags %08llx\n", block[8], block[9] & 0x8D5);
632
633
634
635
636 }
637
main(void)638 int main ( void )
639 {
640 one_test("aaaaaaaaaaaaaaaa", 0, "aaaaaaaa0aaaaaaa", 0 );
641 one_test("0000000000000000", 0, "aaaaaaaa0aaaaaaa", 0 );
642
643 one_test("aaaaaaaaaaaaaaaa", 0, "aaaaaaaaaaaaaaaa", 0 );
644 one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", 0 );
645 one_test("aaaaaaaaaaaaaaaa", 0, "aaaaaaaaaaaaaaaa", 6 );
646
647 one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", 6 );
648 one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", 15 );
649 one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", 16 );
650 one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", 17 );
651
652 one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", -6 );
653 one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", -15 );
654 one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", -16 );
655 one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", -17 );
656
657 one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", 6 );
658 one_test("aaaaaaaaaaaaaaaa", 15, "aaaaaaaaaaaaaaaa", 6 );
659 one_test("aaaaaaaaaaaaaaaa", 16, "aaaaaaaaaaaaaaaa", 6 );
660 one_test("aaaaaaaaaaaaaaaa", 17, "aaaaaaaaaaaaaaaa", 6 );
661
662 one_test("aaaaaaaaaaaaaaaa", -5, "aaaaaaaaaaaaaaaa", 6 );
663 one_test("aaaaaaaaaaaaaaaa", -15, "aaaaaaaaaaaaaaaa", 6 );
664 one_test("aaaaaaaaaaaaaaaa", -16, "aaaaaaaaaaaaaaaa", 6 );
665 one_test("aaaaaaaaaaaaaaaa", -17, "aaaaaaaaaaaaaaaa", 6 );
666
667 return 0;
668 }
669