1
2 /* Tests in detail the core arithmetic for pcmp{e,i}str{i,m} using
3 pcmpistri to drive it. Does not check the e-vs-i or i-vs-m
4 aspect. */
5
6 #include <string.h>
7 #include <stdio.h>
8 #include <assert.h>
9
10 typedef unsigned int UInt;
11 typedef signed int Int;
12 typedef unsigned char UChar;
13 typedef signed char Char;
14 typedef unsigned long long int ULong;
15 typedef UChar Bool;
16 #define False ((Bool)0)
17 #define True ((Bool)1)
18
19 //typedef unsigned char V128[16];
20 typedef
21 union {
22 UChar uChar[16];
23 UInt uInt[4];
24 }
25 V128;
26
27 #define SHIFT_O 11
28 #define SHIFT_S 7
29 #define SHIFT_Z 6
30 #define SHIFT_A 4
31 #define SHIFT_C 0
32 #define SHIFT_P 2
33
34 #define MASK_O (1ULL << SHIFT_O)
35 #define MASK_S (1ULL << SHIFT_S)
36 #define MASK_Z (1ULL << SHIFT_Z)
37 #define MASK_A (1ULL << SHIFT_A)
38 #define MASK_C (1ULL << SHIFT_C)
39 #define MASK_P (1ULL << SHIFT_P)
40
41
clz32(UInt x)42 UInt clz32 ( UInt x )
43 {
44 Int y, m, n;
45 y = -(x >> 16);
46 m = (y >> 16) & 16;
47 n = 16 - m;
48 x = x >> m;
49 y = x - 0x100;
50 m = (y >> 16) & 8;
51 n = n + m;
52 x = x << m;
53 y = x - 0x1000;
54 m = (y >> 16) & 4;
55 n = n + m;
56 x = x << m;
57 y = x - 0x4000;
58 m = (y >> 16) & 2;
59 n = n + m;
60 x = x << m;
61 y = x >> 14;
62 m = y & ~(y >> 1);
63 return n + 2 - m;
64 }
65
ctz32(UInt x)66 UInt ctz32 ( UInt x )
67 {
68 return 32 - clz32((~x) & (x-1));
69 }
70
expand(V128 * dst,char * summary)71 void expand ( V128* dst, char* summary )
72 {
73 Int i;
74 assert( strlen(summary) == 16 );
75 for (i = 0; i < 16; i++) {
76 UChar xx = 0;
77 UChar x = summary[15-i];
78 if (x >= '0' && x <= '9') { xx = x - '0'; }
79 else if (x >= 'A' && x <= 'F') { xx = x - 'A' + 10; }
80 else if (x >= 'a' && x <= 'f') { xx = x - 'a' + 10; }
81 else assert(0);
82
83 assert(xx < 16);
84 xx = (xx << 4) | xx;
85 assert(xx < 256);
86 dst->uChar[i] = xx;
87 }
88 }
89
try_istri(char * which,UInt (* h_fn)(V128 *,V128 *),UInt (* s_fn)(V128 *,V128 *),char * summL,char * summR)90 void try_istri ( char* which,
91 UInt(*h_fn)(V128*,V128*),
92 UInt(*s_fn)(V128*,V128*),
93 char* summL, char* summR )
94 {
95 assert(strlen(which) == 2);
96 V128 argL, argR;
97 expand(&argL, summL);
98 expand(&argR, summR);
99 UInt h_res = h_fn(&argL, &argR);
100 UInt s_res = s_fn(&argL, &argR);
101 printf("istri %s %s %s -> %08x %08x %s\n",
102 which, summL, summR, h_res, s_res, h_res == s_res ? "" : "!!!!");
103 }
104
zmask_from_V128(V128 * arg)105 UInt zmask_from_V128 ( V128* arg )
106 {
107 UInt i, res = 0;
108 for (i = 0; i < 16; i++) {
109 res |= ((arg->uChar[i] == 0) ? 1 : 0) << i;
110 }
111 return res;
112 }
113
114 //////////////////////////////////////////////////////////
115 // //
116 // GENERAL //
117 // //
118 //////////////////////////////////////////////////////////
119
120
121 /* Given partial results from a pcmpXstrX operation (intRes1,
122 basically), generate an I format (index value for ECX) output, and
123 also the new OSZACP flags.
124 */
125 static
pcmpXstrX_WRK_gen_output_fmt_I(V128 * resV,UInt * resOSZACP,UInt intRes1,UInt zmaskL,UInt zmaskR,UInt validL,UInt pol,UInt idx)126 void pcmpXstrX_WRK_gen_output_fmt_I(/*OUT*/V128* resV,
127 /*OUT*/UInt* resOSZACP,
128 UInt intRes1,
129 UInt zmaskL, UInt zmaskR,
130 UInt validL,
131 UInt pol, UInt idx )
132 {
133 assert((pol >> 2) == 0);
134 assert((idx >> 1) == 0);
135
136 UInt intRes2 = 0;
137 switch (pol) {
138 case 0: intRes2 = intRes1; break; // pol +
139 case 1: intRes2 = ~intRes1; break; // pol -
140 case 2: intRes2 = intRes1; break; // pol m+
141 case 3: intRes2 = intRes1 ^ validL; break; // pol m-
142 }
143 intRes2 &= 0xFFFF;
144
145 // generate ecx value
146 UInt newECX = 0;
147 if (idx) {
148 // index of ms-1-bit
149 newECX = intRes2 == 0 ? 16 : (31 - clz32(intRes2));
150 } else {
151 // index of ls-1-bit
152 newECX = intRes2 == 0 ? 16 : ctz32(intRes2);
153 }
154
155 *(UInt*)(&resV[0]) = newECX;
156
157 // generate new flags, common to all ISTRI and ISTRM cases
158 *resOSZACP // A, P are zero
159 = ((intRes2 == 0) ? 0 : MASK_C) // C == 0 iff intRes2 == 0
160 | ((zmaskL == 0) ? 0 : MASK_Z) // Z == 1 iff any in argL is 0
161 | ((zmaskR == 0) ? 0 : MASK_S) // S == 1 iff any in argR is 0
162 | ((intRes2 & 1) << SHIFT_O); // O == IntRes2[0]
163 }
164
165
166 /* Compute result and new OSZACP flags for all PCMP{E,I}STR{I,M}
167 variants.
168
169 For xSTRI variants, the new ECX value is placed in the 32 bits
170 pointed to by *resV. For xSTRM variants, the result is a 128 bit
171 value and is placed at *resV in the obvious way.
172
173 For all variants, the new OSZACP value is placed at *resOSZACP.
174
175 argLV and argRV are the vector args. The caller must prepare a
176 16-bit mask for each, zmaskL and zmaskR. For ISTRx variants this
177 must be 1 for each zero byte of of the respective arg. For ESTRx
178 variants this is derived from the explicit length indication, and
179 must be 0 in all places except at the bit index corresponding to
180 the valid length (0 .. 16). If the valid length is 16 then the
181 mask must be all zeroes. In all cases, bits 31:16 must be zero.
182
183 imm8 is the original immediate from the instruction. isSTRM
184 indicates whether this is a xSTRM or xSTRI variant, which controls
185 how much of *res is written.
186
187 If the given imm8 case can be handled, the return value is True.
188 If not, False is returned, and neither *res not *resOSZACP are
189 altered.
190 */
191
pcmpXstrX_WRK(V128 * resV,UInt * resOSZACP,V128 * argLV,V128 * argRV,UInt zmaskL,UInt zmaskR,UInt imm8,Bool isSTRM)192 Bool pcmpXstrX_WRK ( /*OUT*/V128* resV,
193 /*OUT*/UInt* resOSZACP,
194 V128* argLV, V128* argRV,
195 UInt zmaskL, UInt zmaskR,
196 UInt imm8, Bool isSTRM )
197 {
198 assert(imm8 < 0x80);
199 assert((zmaskL >> 16) == 0);
200 assert((zmaskR >> 16) == 0);
201
202 /* Explicitly reject any imm8 values that haven't been validated,
203 even if they would probably work. Life is too short to have
204 unvalidated cases in the code base. */
205 switch (imm8) {
206 case 0x00: case 0x02: case 0x08: case 0x0A: case 0x0C: case 0x0E:
207 case 0x12: case 0x14: case 0x1A:
208 case 0x30: case 0x34: case 0x38: case 0x3A:
209 case 0x40: case 0x44: case 0x46: case 0x4A:
210 break;
211 default:
212 return False;
213 }
214
215 UInt fmt = (imm8 >> 0) & 3; // imm8[1:0] data format
216 UInt agg = (imm8 >> 2) & 3; // imm8[3:2] aggregation fn
217 UInt pol = (imm8 >> 4) & 3; // imm8[5:4] polarity
218 UInt idx = (imm8 >> 6) & 1; // imm8[6] 1==msb/bytemask
219
220 /*----------------------------------------*/
221 /*-- strcmp on byte data --*/
222 /*----------------------------------------*/
223
224 if (agg == 2/*equal each, aka strcmp*/
225 && (fmt == 0/*ub*/ || fmt == 2/*sb*/)
226 && !isSTRM) {
227 Int i;
228 UChar* argL = (UChar*)argLV;
229 UChar* argR = (UChar*)argRV;
230 UInt boolResII = 0;
231 for (i = 15; i >= 0; i--) {
232 UChar cL = argL[i];
233 UChar cR = argR[i];
234 boolResII = (boolResII << 1) | (cL == cR ? 1 : 0);
235 }
236 UInt validL = ~(zmaskL | -zmaskL); // not(left(zmaskL))
237 UInt validR = ~(zmaskR | -zmaskR); // not(left(zmaskR))
238
239 // do invalidation, common to all equal-each cases
240 UInt intRes1
241 = (boolResII & validL & validR) // if both valid, use cmpres
242 | (~ (validL | validR)); // if both invalid, force 1
243 // else force 0
244 intRes1 &= 0xFFFF;
245
246 // generate I-format output
247 pcmpXstrX_WRK_gen_output_fmt_I(
248 resV, resOSZACP,
249 intRes1, zmaskL, zmaskR, validL, pol, idx
250 );
251
252 return True;
253 }
254
255 /*----------------------------------------*/
256 /*-- set membership on byte data --*/
257 /*----------------------------------------*/
258
259 if (agg == 0/*equal any, aka find chars in a set*/
260 && (fmt == 0/*ub*/ || fmt == 2/*sb*/)
261 && !isSTRM) {
262 /* argL: the string, argR: charset */
263 UInt si, ci;
264 UChar* argL = (UChar*)argLV;
265 UChar* argR = (UChar*)argRV;
266 UInt boolRes = 0;
267 UInt validL = ~(zmaskL | -zmaskL); // not(left(zmaskL))
268 UInt validR = ~(zmaskR | -zmaskR); // not(left(zmaskR))
269
270 for (si = 0; si < 16; si++) {
271 if ((validL & (1 << si)) == 0)
272 // run off the end of the string.
273 break;
274 UInt m = 0;
275 for (ci = 0; ci < 16; ci++) {
276 if ((validR & (1 << ci)) == 0) break;
277 if (argR[ci] == argL[si]) { m = 1; break; }
278 }
279 boolRes |= (m << si);
280 }
281
282 // boolRes is "pre-invalidated"
283 UInt intRes1 = boolRes & 0xFFFF;
284
285 // generate I-format output
286 pcmpXstrX_WRK_gen_output_fmt_I(
287 resV, resOSZACP,
288 intRes1, zmaskL, zmaskR, validL, pol, idx
289 );
290
291 return True;
292 }
293
294 /*----------------------------------------*/
295 /*-- substring search on byte data --*/
296 /*----------------------------------------*/
297
298 if (agg == 3/*equal ordered, aka substring search*/
299 && (fmt == 0/*ub*/ || fmt == 2/*sb*/)
300 && !isSTRM) {
301
302 /* argL: haystack, argR: needle */
303 UInt ni, hi;
304 UChar* argL = (UChar*)argLV;
305 UChar* argR = (UChar*)argRV;
306 UInt boolRes = 0;
307 UInt validL = ~(zmaskL | -zmaskL); // not(left(zmaskL))
308 UInt validR = ~(zmaskR | -zmaskR); // not(left(zmaskR))
309 for (hi = 0; hi < 16; hi++) {
310 UInt m = 1;
311 for (ni = 0; ni < 16; ni++) {
312 if ((validR & (1 << ni)) == 0) break;
313 UInt i = ni + hi;
314 if (i >= 16) break;
315 if (argL[i] != argR[ni]) { m = 0; break; }
316 }
317 boolRes |= (m << hi);
318 if ((validL & (1 << hi)) == 0)
319 // run off the end of the haystack
320 break;
321 }
322
323 // boolRes is "pre-invalidated"
324 UInt intRes1 = boolRes & 0xFFFF;
325
326 // generate I-format output
327 pcmpXstrX_WRK_gen_output_fmt_I(
328 resV, resOSZACP,
329 intRes1, zmaskL, zmaskR, validL, pol, idx
330 );
331
332 return True;
333 }
334
335 /*----------------------------------------*/
336 /*-- ranges, unsigned byte data --*/
337 /*----------------------------------------*/
338
339 if (agg == 1/*ranges*/
340 && fmt == 0/*ub*/
341 && !isSTRM) {
342
343 /* argL: string, argR: range-pairs */
344 UInt ri, si;
345 UChar* argL = (UChar*)argLV;
346 UChar* argR = (UChar*)argRV;
347 UInt boolRes = 0;
348 UInt validL = ~(zmaskL | -zmaskL); // not(left(zmaskL))
349 UInt validR = ~(zmaskR | -zmaskR); // not(left(zmaskR))
350 for (si = 0; si < 16; si++) {
351 if ((validL & (1 << si)) == 0)
352 // run off the end of the string
353 break;
354 UInt m = 0;
355 for (ri = 0; ri < 16; ri += 2) {
356 if ((validR & (3 << ri)) != (3 << ri)) break;
357 if (argR[ri] <= argL[si] && argL[si] <= argR[ri+1]) {
358 m = 1; break;
359 }
360 }
361 boolRes |= (m << si);
362 }
363
364 // boolRes is "pre-invalidated"
365 UInt intRes1 = boolRes & 0xFFFF;
366
367 // generate I-format output
368 pcmpXstrX_WRK_gen_output_fmt_I(
369 resV, resOSZACP,
370 intRes1, zmaskL, zmaskR, validL, pol, idx
371 );
372
373 return True;
374 }
375
376 /*----------------------------------------*/
377 /*-- ranges, signed byte data --*/
378 /*----------------------------------------*/
379
380 if (agg == 1/*ranges*/
381 && fmt == 2/*sb*/
382 && !isSTRM) {
383
384 /* argL: string, argR: range-pairs */
385 UInt ri, si;
386 Char* argL = (Char*)argLV;
387 Char* argR = (Char*)argRV;
388 UInt boolRes = 0;
389 UInt validL = ~(zmaskL | -zmaskL); // not(left(zmaskL))
390 UInt validR = ~(zmaskR | -zmaskR); // not(left(zmaskR))
391 for (si = 0; si < 16; si++) {
392 if ((validL & (1 << si)) == 0)
393 // run off the end of the string
394 break;
395 UInt m = 0;
396 for (ri = 0; ri < 16; ri += 2) {
397 if ((validR & (3 << ri)) != (3 << ri)) break;
398 if (argR[ri] <= argL[si] && argL[si] <= argR[ri+1]) {
399 m = 1; break;
400 }
401 }
402 boolRes |= (m << si);
403 }
404
405 // boolRes is "pre-invalidated"
406 UInt intRes1 = boolRes & 0xFFFF;
407
408 // generate I-format output
409 pcmpXstrX_WRK_gen_output_fmt_I(
410 resV, resOSZACP,
411 intRes1, zmaskL, zmaskR, validL, pol, idx
412 );
413
414 return True;
415 }
416
417 return False;
418 }
419
420
421 //////////////////////////////////////////////////////////
422 // //
423 // ISTRI_4A //
424 // //
425 //////////////////////////////////////////////////////////
426
h_pcmpistri_4A(V128 * argL,V128 * argR)427 UInt h_pcmpistri_4A ( V128* argL, V128* argR )
428 {
429 V128 block[2];
430 memcpy(&block[0], argL, sizeof(V128));
431 memcpy(&block[1], argR, sizeof(V128));
432 ULong res, flags;
433 __asm__ __volatile__(
434 "subq $1024, %%rsp" "\n\t"
435 "movdqu 0(%2), %%xmm2" "\n\t"
436 "movdqu 16(%2), %%xmm11" "\n\t"
437 "pcmpistri $0x4A, %%xmm2, %%xmm11" "\n\t"
438 "pushfq" "\n\t"
439 "popq %%rdx" "\n\t"
440 "movq %%rcx, %0" "\n\t"
441 "movq %%rdx, %1" "\n\t"
442 "addq $1024, %%rsp" "\n\t"
443 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
444 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
445 );
446 return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
447 }
448
s_pcmpistri_4A(V128 * argLU,V128 * argRU)449 UInt s_pcmpistri_4A ( V128* argLU, V128* argRU )
450 {
451 V128 resV;
452 UInt resOSZACP, resECX;
453 Bool ok
454 = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
455 zmask_from_V128(argLU),
456 zmask_from_V128(argRU),
457 0x4A, False/*!isSTRM*/
458 );
459 assert(ok);
460 resECX = resV.uInt[0];
461 return (resOSZACP << 16) | resECX;
462 }
463
istri_4A(void)464 void istri_4A ( void )
465 {
466 char* wot = "4A";
467 UInt(*h)(V128*,V128*) = h_pcmpistri_4A;
468 UInt(*s)(V128*,V128*) = s_pcmpistri_4A;
469
470 try_istri(wot,h,s, "0000000000000000", "0000000000000000");
471
472 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
473 try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
474 try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa");
475 try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa");
476
477 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa");
478 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa");
479 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a");
480
481 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
482 try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
483 try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
484 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
485
486 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
487 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa");
488 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa");
489
490 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
491
492 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
493 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
494 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaa0aaa");
495
496 try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaaaaaa");
497 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
498 try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaa0aaa");
499
500 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
501 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa0aaaaaaa");
502 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaa0aaaaaaa");
503
504 try_istri(wot,h,s, "0000000000000000", "aaaaaaaa0aaaaaaa");
505 try_istri(wot,h,s, "8000000000000000", "aaaaaaaa0aaaaaaa");
506 try_istri(wot,h,s, "0000000000000001", "aaaaaaaa0aaaaaaa");
507
508 try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa");
509 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000");
510 }
511
512 //////////////////////////////////////////////////////////
513 // //
514 // ISTRI_3A //
515 // //
516 //////////////////////////////////////////////////////////
517
h_pcmpistri_3A(V128 * argL,V128 * argR)518 UInt h_pcmpistri_3A ( V128* argL, V128* argR )
519 {
520 V128 block[2];
521 memcpy(&block[0], argL, sizeof(V128));
522 memcpy(&block[1], argR, sizeof(V128));
523 ULong res, flags;
524 __asm__ __volatile__(
525 "subq $1024, %%rsp" "\n\t"
526 "movdqu 0(%2), %%xmm2" "\n\t"
527 "movdqu 16(%2), %%xmm11" "\n\t"
528 "pcmpistri $0x3A, %%xmm2, %%xmm11" "\n\t"
529 "pushfq" "\n\t"
530 "popq %%rdx" "\n\t"
531 "movq %%rcx, %0" "\n\t"
532 "movq %%rdx, %1" "\n\t"
533 "addq $1024, %%rsp" "\n\t"
534 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
535 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
536 );
537 return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
538 }
539
s_pcmpistri_3A(V128 * argLU,V128 * argRU)540 UInt s_pcmpistri_3A ( V128* argLU, V128* argRU )
541 {
542 V128 resV;
543 UInt resOSZACP, resECX;
544 Bool ok
545 = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
546 zmask_from_V128(argLU),
547 zmask_from_V128(argRU),
548 0x3A, False/*!isSTRM*/
549 );
550 assert(ok);
551 resECX = resV.uInt[0];
552 return (resOSZACP << 16) | resECX;
553 }
554
istri_3A(void)555 void istri_3A ( void )
556 {
557 char* wot = "3A";
558 UInt(*h)(V128*,V128*) = h_pcmpistri_3A;
559 UInt(*s)(V128*,V128*) = s_pcmpistri_3A;
560
561 try_istri(wot,h,s, "0000000000000000", "0000000000000000");
562
563 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
564 try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
565 try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa");
566 try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa");
567
568 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa");
569 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa");
570 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a");
571
572 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
573 try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
574 try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
575 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
576
577 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
578 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa");
579 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa");
580
581 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
582
583 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
584 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
585 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaa0aaa");
586
587 try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaaaaaa");
588 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
589 try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaa0aaa");
590
591 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
592 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa0aaaaaaa");
593 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaa0aaaaaaa");
594
595 try_istri(wot,h,s, "0000000000000000", "aaaaaaaa0aaaaaaa");
596 try_istri(wot,h,s, "8000000000000000", "aaaaaaaa0aaaaaaa");
597 try_istri(wot,h,s, "0000000000000001", "aaaaaaaa0aaaaaaa");
598
599 try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa");
600 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000");
601 }
602
603
604
605 //////////////////////////////////////////////////////////
606 // //
607 // ISTRI_0C //
608 // //
609 //////////////////////////////////////////////////////////
610
611 __attribute__((noinline))
h_pcmpistri_0C(V128 * argL,V128 * argR)612 UInt h_pcmpistri_0C ( V128* argL, V128* argR )
613 {
614 V128 block[2];
615 memcpy(&block[0], argL, sizeof(V128));
616 memcpy(&block[1], argR, sizeof(V128));
617 ULong res = 0, flags = 0;
618 __asm__ __volatile__(
619 "movdqu 0(%2), %%xmm2" "\n\t"
620 "movdqu 16(%2), %%xmm11" "\n\t"
621 "pcmpistri $0x0C, %%xmm2, %%xmm11" "\n\t"
622 //"pcmpistrm $0x0C, %%xmm2, %%xmm11" "\n\t"
623 //"movd %%xmm0, %%ecx" "\n\t"
624 "pushfq" "\n\t"
625 "popq %%rdx" "\n\t"
626 "movq %%rcx, %0" "\n\t"
627 "movq %%rdx, %1" "\n\t"
628 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
629 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
630 );
631 return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
632 }
633
s_pcmpistri_0C(V128 * argLU,V128 * argRU)634 UInt s_pcmpistri_0C ( V128* argLU, V128* argRU )
635 {
636 V128 resV;
637 UInt resOSZACP, resECX;
638 Bool ok
639 = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
640 zmask_from_V128(argLU),
641 zmask_from_V128(argRU),
642 0x0C, False/*!isSTRM*/
643 );
644 assert(ok);
645 resECX = resV.uInt[0];
646 return (resOSZACP << 16) | resECX;
647 }
648
istri_0C(void)649 void istri_0C ( void )
650 {
651 char* wot = "0C";
652 UInt(*h)(V128*,V128*) = h_pcmpistri_0C;
653 UInt(*s)(V128*,V128*) = s_pcmpistri_0C;
654
655 try_istri(wot,h,s, "111111111abcde11", "00000000000abcde");
656
657 try_istri(wot,h,s, "111111111abcde11", "0000abcde00abcde");
658
659 try_istri(wot,h,s, "1111111111abcde1", "00000000000abcde");
660 try_istri(wot,h,s, "11111111111abcde", "00000000000abcde");
661 try_istri(wot,h,s, "111111111111abcd", "00000000000abcde");
662
663 try_istri(wot,h,s, "111abcde1abcde11", "00000000000abcde");
664
665 try_istri(wot,h,s, "11abcde11abcde11", "00000000000abcde");
666 try_istri(wot,h,s, "1abcde111abcde11", "00000000000abcde");
667 try_istri(wot,h,s, "abcde1111abcde11", "00000000000abcde");
668 try_istri(wot,h,s, "bcde11111abcde11", "00000000000abcde");
669 try_istri(wot,h,s, "cde111111abcde11", "00000000000abcde");
670
671 try_istri(wot,h,s, "01abcde11abcde11", "00000000000abcde");
672 try_istri(wot,h,s, "00abcde11abcde11", "00000000000abcde");
673 try_istri(wot,h,s, "000bcde11abcde11", "00000000000abcde");
674
675 try_istri(wot,h,s, "00abcde10abcde11", "00000000000abcde");
676 try_istri(wot,h,s, "00abcde100bcde11", "00000000000abcde");
677
678 try_istri(wot,h,s, "1111111111111234", "0000000000000000");
679 try_istri(wot,h,s, "1111111111111234", "0000000000000001");
680 try_istri(wot,h,s, "1111111111111234", "0000000000000011");
681
682 try_istri(wot,h,s, "1111111111111234", "1111111111111234");
683 try_istri(wot,h,s, "a111111111111111", "000000000000000a");
684 try_istri(wot,h,s, "b111111111111111", "000000000000000a");
685
686 try_istri(wot,h,s, "b111111111111111", "0000000000000000");
687 try_istri(wot,h,s, "0000000000000000", "0000000000000000");
688 try_istri(wot,h,s, "123456789abcdef1", "0000000000000000");
689 try_istri(wot,h,s, "0000000000000000", "123456789abcdef1");
690 }
691
692
693 //////////////////////////////////////////////////////////
694 // //
695 // ISTRI_08 //
696 // //
697 //////////////////////////////////////////////////////////
698
h_pcmpistri_08(V128 * argL,V128 * argR)699 UInt h_pcmpistri_08 ( V128* argL, V128* argR )
700 {
701 V128 block[2];
702 memcpy(&block[0], argL, sizeof(V128));
703 memcpy(&block[1], argR, sizeof(V128));
704 ULong res, flags;
705 __asm__ __volatile__(
706 "subq $1024, %%rsp" "\n\t"
707 "movdqu 0(%2), %%xmm2" "\n\t"
708 "movdqu 16(%2), %%xmm11" "\n\t"
709 "pcmpistri $0x08, %%xmm2, %%xmm11" "\n\t"
710 "pushfq" "\n\t"
711 "popq %%rdx" "\n\t"
712 "movq %%rcx, %0" "\n\t"
713 "movq %%rdx, %1" "\n\t"
714 "addq $1024, %%rsp" "\n\t"
715 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
716 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
717 );
718 return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
719 }
720
s_pcmpistri_08(V128 * argLU,V128 * argRU)721 UInt s_pcmpistri_08 ( V128* argLU, V128* argRU )
722 {
723 V128 resV;
724 UInt resOSZACP, resECX;
725 Bool ok
726 = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
727 zmask_from_V128(argLU),
728 zmask_from_V128(argRU),
729 0x08, False/*!isSTRM*/
730 );
731 assert(ok);
732 resECX = resV.uInt[0];
733 return (resOSZACP << 16) | resECX;
734 }
735
istri_08(void)736 void istri_08 ( void )
737 {
738 char* wot = "08";
739 UInt(*h)(V128*,V128*) = h_pcmpistri_08;
740 UInt(*s)(V128*,V128*) = s_pcmpistri_08;
741
742 try_istri(wot,h,s, "0000000000000000", "0000000000000000");
743
744 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
745 try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
746 try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa");
747 try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa");
748
749 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa");
750 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa");
751 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a");
752
753 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
754 try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
755 try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
756 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
757
758 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
759 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa");
760 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa");
761
762 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
763
764 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
765 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
766 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaa0aaa");
767
768 try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaaaaaa");
769 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
770 try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaa0aaa");
771
772 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
773 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa0aaaaaaa");
774 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaa0aaaaaaa");
775
776 try_istri(wot,h,s, "0000000000000000", "aaaaaaaa0aaaaaaa");
777 try_istri(wot,h,s, "8000000000000000", "aaaaaaaa0aaaaaaa");
778 try_istri(wot,h,s, "0000000000000001", "aaaaaaaa0aaaaaaa");
779
780 try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa");
781 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000");
782 }
783
784
785
786 //////////////////////////////////////////////////////////
787 // //
788 // ISTRI_1A //
789 // //
790 //////////////////////////////////////////////////////////
791
h_pcmpistri_1A(V128 * argL,V128 * argR)792 UInt h_pcmpistri_1A ( V128* argL, V128* argR )
793 {
794 V128 block[2];
795 memcpy(&block[0], argL, sizeof(V128));
796 memcpy(&block[1], argR, sizeof(V128));
797 ULong res, flags;
798 __asm__ __volatile__(
799 "subq $1024, %%rsp" "\n\t"
800 "movdqu 0(%2), %%xmm2" "\n\t"
801 "movdqu 16(%2), %%xmm11" "\n\t"
802 "pcmpistri $0x1A, %%xmm2, %%xmm11" "\n\t"
803 "pushfq" "\n\t"
804 "popq %%rdx" "\n\t"
805 "movq %%rcx, %0" "\n\t"
806 "movq %%rdx, %1" "\n\t"
807 "addq $1024, %%rsp" "\n\t"
808 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
809 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
810 );
811 return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
812 }
813
s_pcmpistri_1A(V128 * argLU,V128 * argRU)814 UInt s_pcmpistri_1A ( V128* argLU, V128* argRU )
815 {
816 V128 resV;
817 UInt resOSZACP, resECX;
818 Bool ok
819 = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
820 zmask_from_V128(argLU),
821 zmask_from_V128(argRU),
822 0x1A, False/*!isSTRM*/
823 );
824 assert(ok);
825 resECX = resV.uInt[0];
826 return (resOSZACP << 16) | resECX;
827 }
828
istri_1A(void)829 void istri_1A ( void )
830 {
831 char* wot = "1A";
832 UInt(*h)(V128*,V128*) = h_pcmpistri_1A;
833 UInt(*s)(V128*,V128*) = s_pcmpistri_1A;
834
835 try_istri(wot,h,s, "0000000000000000", "0000000000000000");
836
837 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
838 try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
839 try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa");
840 try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa");
841
842 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa");
843 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa");
844 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a");
845
846 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
847 try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
848 try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
849 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
850
851 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
852 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa");
853 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa");
854
855 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
856
857 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
858 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
859 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaa0aaa");
860
861 try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaaaaaa");
862 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
863 try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaa0aaa");
864
865 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
866 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa0aaaaaaa");
867 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaa0aaaaaaa");
868
869 try_istri(wot,h,s, "0000000000000000", "aaaaaaaa0aaaaaaa");
870 try_istri(wot,h,s, "8000000000000000", "aaaaaaaa0aaaaaaa");
871 try_istri(wot,h,s, "0000000000000001", "aaaaaaaa0aaaaaaa");
872
873 try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa");
874 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000");
875 }
876
877
878
879 //////////////////////////////////////////////////////////
880 // //
881 // ISTRI_02 //
882 // //
883 //////////////////////////////////////////////////////////
884
h_pcmpistri_02(V128 * argL,V128 * argR)885 UInt h_pcmpistri_02 ( V128* argL, V128* argR )
886 {
887 V128 block[2];
888 memcpy(&block[0], argL, sizeof(V128));
889 memcpy(&block[1], argR, sizeof(V128));
890 ULong res, flags;
891 __asm__ __volatile__(
892 "subq $1024, %%rsp" "\n\t"
893 "movdqu 0(%2), %%xmm2" "\n\t"
894 "movdqu 16(%2), %%xmm11" "\n\t"
895 "pcmpistri $0x02, %%xmm2, %%xmm11" "\n\t"
896 //"pcmpistrm $0x02, %%xmm2, %%xmm11" "\n\t"
897 //"movd %%xmm0, %%ecx" "\n\t"
898 "pushfq" "\n\t"
899 "popq %%rdx" "\n\t"
900 "movq %%rcx, %0" "\n\t"
901 "movq %%rdx, %1" "\n\t"
902 "addq $1024, %%rsp" "\n\t"
903 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
904 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
905 );
906 return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
907 }
908
s_pcmpistri_02(V128 * argLU,V128 * argRU)909 UInt s_pcmpistri_02 ( V128* argLU, V128* argRU )
910 {
911 V128 resV;
912 UInt resOSZACP, resECX;
913 Bool ok
914 = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
915 zmask_from_V128(argLU),
916 zmask_from_V128(argRU),
917 0x02, False/*!isSTRM*/
918 );
919 assert(ok);
920 resECX = resV.uInt[0];
921 return (resOSZACP << 16) | resECX;
922 }
923
istri_02(void)924 void istri_02 ( void )
925 {
926 char* wot = "02";
927 UInt(*h)(V128*,V128*) = h_pcmpistri_02;
928 UInt(*s)(V128*,V128*) = s_pcmpistri_02;
929
930 try_istri(wot,h,s, "abcdacbdabcdabcd", "000000000000000a");
931 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000000b");
932 try_istri(wot,h,s, "abcdabcdabcdabcd", "00000000000000ab");
933 try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd");
934
935 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
936 try_istri(wot,h,s, "0bcdabcdabcdabcd", "000000000000abcd");
937 try_istri(wot,h,s, "abcdabcdabcda0cd", "000000000000abcd");
938 try_istri(wot,h,s, "abcdabcdabcdab0d", "000000000000abcd");
939 try_istri(wot,h,s, "abcdabcdabcdabc0", "000000000000abcd");
940
941 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
942 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000a0cd");
943 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000ab0d");
944 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abc0");
945
946 try_istri(wot,h,s, "0000000000000000", "0000000000000000");
947 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
948
949 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd");
950 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba");
951 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000bbbb");
952 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000baba");
953
954 try_istri(wot,h,s, "0000abcdabcdabcd", "00000000000baba0");
955
956 try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe");
957 try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe");
958 }
959
960
961 //////////////////////////////////////////////////////////
962 // //
963 // ISTRI_12 //
964 // //
965 //////////////////////////////////////////////////////////
966
h_pcmpistri_12(V128 * argL,V128 * argR)967 UInt h_pcmpistri_12 ( V128* argL, V128* argR )
968 {
969 V128 block[2];
970 memcpy(&block[0], argL, sizeof(V128));
971 memcpy(&block[1], argR, sizeof(V128));
972 ULong res, flags;
973 __asm__ __volatile__(
974 "subq $1024, %%rsp" "\n\t"
975 "movdqu 0(%2), %%xmm2" "\n\t"
976 "movdqu 16(%2), %%xmm11" "\n\t"
977 "pcmpistri $0x12, %%xmm2, %%xmm11" "\n\t"
978 //"pcmpistrm $0x12, %%xmm2, %%xmm11" "\n\t"
979 //"movd %%xmm0, %%ecx" "\n\t"
980 "pushfq" "\n\t"
981 "popq %%rdx" "\n\t"
982 "movq %%rcx, %0" "\n\t"
983 "movq %%rdx, %1" "\n\t"
984 "addq $1024, %%rsp" "\n\t"
985 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
986 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
987 );
988 return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
989 }
990
s_pcmpistri_12(V128 * argLU,V128 * argRU)991 UInt s_pcmpistri_12 ( V128* argLU, V128* argRU )
992 {
993 V128 resV;
994 UInt resOSZACP, resECX;
995 Bool ok
996 = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
997 zmask_from_V128(argLU),
998 zmask_from_V128(argRU),
999 0x12, False/*!isSTRM*/
1000 );
1001 assert(ok);
1002 resECX = resV.uInt[0];
1003 return (resOSZACP << 16) | resECX;
1004 }
1005
istri_12(void)1006 void istri_12 ( void )
1007 {
1008 char* wot = "12";
1009 UInt(*h)(V128*,V128*) = h_pcmpistri_12;
1010 UInt(*s)(V128*,V128*) = s_pcmpistri_12;
1011
1012 try_istri(wot,h,s, "abcdacbdabcdabcd", "000000000000000a");
1013 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000000b");
1014 try_istri(wot,h,s, "abcdabcdabcdabcd", "00000000000000ab");
1015 try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd");
1016
1017 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
1018 try_istri(wot,h,s, "0bcdabcdabcdabcd", "000000000000abcd");
1019 try_istri(wot,h,s, "abcdabcdabcda0cd", "000000000000abcd");
1020 try_istri(wot,h,s, "abcdabcdabcdab0d", "000000000000abcd");
1021 try_istri(wot,h,s, "abcdabcdabcdabc0", "000000000000abcd");
1022
1023 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
1024 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000a0cd");
1025 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000ab0d");
1026 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abc0");
1027
1028 try_istri(wot,h,s, "0000000000000000", "0000000000000000");
1029 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
1030
1031 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd");
1032 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba");
1033 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000bbbb");
1034 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000baba");
1035
1036 try_istri(wot,h,s, "0000abcdabcdabcd", "00000000000baba0");
1037
1038 try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe");
1039 try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe");
1040 }
1041
1042
1043
1044 //////////////////////////////////////////////////////////
1045 // //
1046 // ISTRI_44 //
1047 // //
1048 //////////////////////////////////////////////////////////
1049
h_pcmpistri_44(V128 * argL,V128 * argR)1050 UInt h_pcmpistri_44 ( V128* argL, V128* argR )
1051 {
1052 V128 block[2];
1053 memcpy(&block[0], argL, sizeof(V128));
1054 memcpy(&block[1], argR, sizeof(V128));
1055 ULong res, flags;
1056 __asm__ __volatile__(
1057 "subq $1024, %%rsp" "\n\t"
1058 "movdqu 0(%2), %%xmm2" "\n\t"
1059 "movdqu 16(%2), %%xmm11" "\n\t"
1060 "pcmpistri $0x44, %%xmm2, %%xmm11" "\n\t"
1061 //"pcmpistrm $0x04, %%xmm2, %%xmm11" "\n\t"
1062 //"movd %%xmm0, %%ecx" "\n\t"
1063 "pushfq" "\n\t"
1064 "popq %%rdx" "\n\t"
1065 "movq %%rcx, %0" "\n\t"
1066 "movq %%rdx, %1" "\n\t"
1067 "addq $1024, %%rsp" "\n\t"
1068 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
1069 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
1070 );
1071 return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
1072 }
1073
s_pcmpistri_44(V128 * argLU,V128 * argRU)1074 UInt s_pcmpistri_44 ( V128* argLU, V128* argRU )
1075 {
1076 V128 resV;
1077 UInt resOSZACP, resECX;
1078 Bool ok
1079 = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
1080 zmask_from_V128(argLU),
1081 zmask_from_V128(argRU),
1082 0x44, False/*!isSTRM*/
1083 );
1084 assert(ok);
1085 resECX = resV.uInt[0];
1086 return (resOSZACP << 16) | resECX;
1087 }
1088
istri_44(void)1089 void istri_44 ( void )
1090 {
1091 char* wot = "44";
1092 UInt(*h)(V128*,V128*) = h_pcmpistri_44;
1093 UInt(*s)(V128*,V128*) = s_pcmpistri_44;
1094
1095 try_istri(wot,h,s, "aaaabbbbccccdddd", "00000000000000bc");
1096 try_istri(wot,h,s, "aaaabbbbccccdddd", "00000000000000cb");
1097 try_istri(wot,h,s, "baaabbbbccccdddd", "00000000000000cb");
1098 try_istri(wot,h,s, "baaabbbbccccdddc", "00000000000000cb");
1099
1100 try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "00000000000000cb");
1101 try_istri(wot,h,s, "bbbbbbbb0bbbbbbb", "00000000000000cb");
1102 try_istri(wot,h,s, "bbbbbbbbbbbbbb0b", "00000000000000cb");
1103 try_istri(wot,h,s, "bbbbbbbbbbbbbbb0", "00000000000000cb");
1104 try_istri(wot,h,s, "0000000000000000", "00000000000000cb");
1105
1106 try_istri(wot,h,s, "0000000000000000", "0000000000000000");
1107
1108 try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "00000000000000cb");
1109 try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "000000000000000b");
1110 try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000062cb");
1111
1112 try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000002cb");
1113 try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000000cb");
1114 try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "000000000000000b");
1115
1116 try_istri(wot,h,s, "0123456789abcdef", "000000fecb975421");
1117 try_istri(wot,h,s, "123456789abcdef1", "000000fecb975421");
1118
1119 try_istri(wot,h,s, "0123456789abcdef", "00000000dca86532");
1120 try_istri(wot,h,s, "123456789abcdef1", "00000000dca86532");
1121
1122 try_istri(wot,h,s, "163887ec041a9b72", "fcd75adb9b3e895a");
1123 try_istri(wot,h,s, "fc937cbfbf53f8e2", "0d136bcb024d3fb7");
1124 try_istri(wot,h,s, "2ca34182c29a82ab", "302ebd646775ab54");
1125 try_istri(wot,h,s, "3f2987608c11be6f", "a9ecb661f8e0a8cb");
1126 }
1127
1128
1129 //////////////////////////////////////////////////////////
1130 // //
1131 // ISTRI_00 //
1132 // //
1133 //////////////////////////////////////////////////////////
1134
h_pcmpistri_00(V128 * argL,V128 * argR)1135 UInt h_pcmpistri_00 ( V128* argL, V128* argR )
1136 {
1137 V128 block[2];
1138 memcpy(&block[0], argL, sizeof(V128));
1139 memcpy(&block[1], argR, sizeof(V128));
1140 ULong res, flags;
1141 __asm__ __volatile__(
1142 "subq $1024, %%rsp" "\n\t"
1143 "movdqu 0(%2), %%xmm2" "\n\t"
1144 "movdqu 16(%2), %%xmm11" "\n\t"
1145 "pcmpistri $0x00, %%xmm2, %%xmm11" "\n\t"
1146 //"pcmpistrm $0x00, %%xmm2, %%xmm11" "\n\t"
1147 //"movd %%xmm0, %%ecx" "\n\t"
1148 "pushfq" "\n\t"
1149 "popq %%rdx" "\n\t"
1150 "movq %%rcx, %0" "\n\t"
1151 "movq %%rdx, %1" "\n\t"
1152 "addq $1024, %%rsp" "\n\t"
1153 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
1154 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
1155 );
1156 return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
1157 }
1158
s_pcmpistri_00(V128 * argLU,V128 * argRU)1159 UInt s_pcmpistri_00 ( V128* argLU, V128* argRU )
1160 {
1161 V128 resV;
1162 UInt resOSZACP, resECX;
1163 Bool ok
1164 = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
1165 zmask_from_V128(argLU),
1166 zmask_from_V128(argRU),
1167 0x00, False/*!isSTRM*/
1168 );
1169 assert(ok);
1170 resECX = resV.uInt[0];
1171 return (resOSZACP << 16) | resECX;
1172 }
1173
istri_00(void)1174 void istri_00 ( void )
1175 {
1176 char* wot = "00";
1177 UInt(*h)(V128*,V128*) = h_pcmpistri_00;
1178 UInt(*s)(V128*,V128*) = s_pcmpistri_00;
1179
1180 try_istri(wot,h,s, "abcdacbdabcdabcd", "000000000000000a");
1181 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000000b");
1182 try_istri(wot,h,s, "abcdabcdabcdabcd", "00000000000000ab");
1183 try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd");
1184
1185 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
1186 try_istri(wot,h,s, "0bcdabcdabcdabcd", "000000000000abcd");
1187 try_istri(wot,h,s, "abcdabcdabcda0cd", "000000000000abcd");
1188 try_istri(wot,h,s, "abcdabcdabcdab0d", "000000000000abcd");
1189 try_istri(wot,h,s, "abcdabcdabcdabc0", "000000000000abcd");
1190
1191 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
1192 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000a0cd");
1193 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000ab0d");
1194 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abc0");
1195
1196 try_istri(wot,h,s, "0000000000000000", "0000000000000000");
1197 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
1198
1199 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd");
1200 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba");
1201 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000bbbb");
1202 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000baba");
1203
1204 try_istri(wot,h,s, "0000abcdabcdabcd", "00000000000baba0");
1205
1206 try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe");
1207 try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe");
1208 }
1209
1210
1211 //////////////////////////////////////////////////////////
1212 // //
1213 // ISTRI_38 //
1214 // //
1215 //////////////////////////////////////////////////////////
1216
h_pcmpistri_38(V128 * argL,V128 * argR)1217 UInt h_pcmpistri_38 ( V128* argL, V128* argR )
1218 {
1219 V128 block[2];
1220 memcpy(&block[0], argL, sizeof(V128));
1221 memcpy(&block[1], argR, sizeof(V128));
1222 ULong res, flags;
1223 __asm__ __volatile__(
1224 "subq $1024, %%rsp" "\n\t"
1225 "movdqu 0(%2), %%xmm2" "\n\t"
1226 "movdqu 16(%2), %%xmm11" "\n\t"
1227 "pcmpistri $0x38, %%xmm2, %%xmm11" "\n\t"
1228 "pushfq" "\n\t"
1229 "popq %%rdx" "\n\t"
1230 "movq %%rcx, %0" "\n\t"
1231 "movq %%rdx, %1" "\n\t"
1232 "addq $1024, %%rsp" "\n\t"
1233 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
1234 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
1235 );
1236 return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
1237 }
1238
s_pcmpistri_38(V128 * argLU,V128 * argRU)1239 UInt s_pcmpistri_38 ( V128* argLU, V128* argRU )
1240 {
1241 V128 resV;
1242 UInt resOSZACP, resECX;
1243 Bool ok
1244 = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
1245 zmask_from_V128(argLU),
1246 zmask_from_V128(argRU),
1247 0x38, False/*!isSTRM*/
1248 );
1249 assert(ok);
1250 resECX = resV.uInt[0];
1251 return (resOSZACP << 16) | resECX;
1252 }
1253
istri_38(void)1254 void istri_38 ( void )
1255 {
1256 char* wot = "38";
1257 UInt(*h)(V128*,V128*) = h_pcmpistri_38;
1258 UInt(*s)(V128*,V128*) = s_pcmpistri_38;
1259
1260 try_istri(wot,h,s, "0000000000000000", "0000000000000000");
1261
1262 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
1263 try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
1264 try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa");
1265 try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa");
1266
1267 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa");
1268 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa");
1269 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a");
1270
1271 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
1272 try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
1273 try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
1274 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
1275
1276 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
1277 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa");
1278 try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa");
1279
1280 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
1281
1282 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
1283 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
1284 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaa0aaa");
1285
1286 try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaaaaaa");
1287 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
1288 try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaa0aaa");
1289
1290 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
1291 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa0aaaaaaa");
1292 try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaa0aaaaaaa");
1293
1294 try_istri(wot,h,s, "0000000000000000", "aaaaaaaa0aaaaaaa");
1295 try_istri(wot,h,s, "8000000000000000", "aaaaaaaa0aaaaaaa");
1296 try_istri(wot,h,s, "0000000000000001", "aaaaaaaa0aaaaaaa");
1297
1298 try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa");
1299 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000");
1300 }
1301
1302
1303
1304 //////////////////////////////////////////////////////////
1305 // //
1306 // ISTRI_46 //
1307 // //
1308 //////////////////////////////////////////////////////////
1309
h_pcmpistri_46(V128 * argL,V128 * argR)1310 UInt h_pcmpistri_46 ( V128* argL, V128* argR )
1311 {
1312 V128 block[2];
1313 memcpy(&block[0], argL, sizeof(V128));
1314 memcpy(&block[1], argR, sizeof(V128));
1315 ULong res, flags;
1316 __asm__ __volatile__(
1317 "subq $1024, %%rsp" "\n\t"
1318 "movdqu 0(%2), %%xmm2" "\n\t"
1319 "movdqu 16(%2), %%xmm11" "\n\t"
1320 "pcmpistri $0x46, %%xmm2, %%xmm11" "\n\t"
1321 "pushfq" "\n\t"
1322 "popq %%rdx" "\n\t"
1323 "movq %%rcx, %0" "\n\t"
1324 "movq %%rdx, %1" "\n\t"
1325 "addq $1024, %%rsp" "\n\t"
1326 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
1327 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
1328 );
1329 return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
1330 }
1331
s_pcmpistri_46(V128 * argLU,V128 * argRU)1332 UInt s_pcmpistri_46 ( V128* argLU, V128* argRU )
1333 {
1334 V128 resV;
1335 UInt resOSZACP, resECX;
1336 Bool ok
1337 = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
1338 zmask_from_V128(argLU),
1339 zmask_from_V128(argRU),
1340 0x46, False/*!isSTRM*/
1341 );
1342 assert(ok);
1343 resECX = resV.uInt[0];
1344 return (resOSZACP << 16) | resECX;
1345 }
1346
istri_46(void)1347 void istri_46 ( void )
1348 {
1349 char* wot = "46";
1350 UInt(*h)(V128*,V128*) = h_pcmpistri_46;
1351 UInt(*s)(V128*,V128*) = s_pcmpistri_46;
1352
1353 try_istri(wot,h,s, "aaaabbbbccccdddd", "00000000000000bc");
1354 try_istri(wot,h,s, "aaaabbbbccccdddd", "00000000000000cb");
1355 try_istri(wot,h,s, "baaabbbbccccdddd", "00000000000000cb");
1356 try_istri(wot,h,s, "baaabbbbccccdddc", "00000000000000cb");
1357
1358 try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "00000000000000cb");
1359 try_istri(wot,h,s, "bbbbbbbb0bbbbbbb", "00000000000000cb");
1360 try_istri(wot,h,s, "bbbbbbbbbbbbbb0b", "00000000000000cb");
1361 try_istri(wot,h,s, "bbbbbbbbbbbbbbb0", "00000000000000cb");
1362 try_istri(wot,h,s, "0000000000000000", "00000000000000cb");
1363
1364 try_istri(wot,h,s, "0000000000000000", "0000000000000000");
1365
1366 try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "00000000000000cb");
1367 try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "000000000000000b");
1368 try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000062cb");
1369
1370 try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000002cb");
1371 try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000000cb");
1372 try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "000000000000000b");
1373
1374 try_istri(wot,h,s, "0123456789abcdef", "000000fecb975421");
1375 try_istri(wot,h,s, "123456789abcdef1", "000000fecb975421");
1376
1377 try_istri(wot,h,s, "0123456789abcdef", "00000000dca86532");
1378 try_istri(wot,h,s, "123456789abcdef1", "00000000dca86532");
1379
1380 try_istri(wot,h,s, "163887ec041a9b72", "fcd75adb9b3e895a");
1381 try_istri(wot,h,s, "fc937cbfbf53f8e2", "0d136bcb024d3fb7");
1382 try_istri(wot,h,s, "2ca34182c29a82ab", "302ebd646775ab54");
1383 try_istri(wot,h,s, "3f2987608c11be6f", "a9ecb661f8e0a8cb");
1384 }
1385
1386
1387 //////////////////////////////////////////////////////////
1388 // //
1389 // ISTRI_30 //
1390 // //
1391 //////////////////////////////////////////////////////////
1392
h_pcmpistri_30(V128 * argL,V128 * argR)1393 UInt h_pcmpistri_30 ( V128* argL, V128* argR )
1394 {
1395 V128 block[2];
1396 memcpy(&block[0], argL, sizeof(V128));
1397 memcpy(&block[1], argR, sizeof(V128));
1398 ULong res, flags;
1399 __asm__ __volatile__(
1400 "subq $1024, %%rsp" "\n\t"
1401 "movdqu 0(%2), %%xmm2" "\n\t"
1402 "movdqu 16(%2), %%xmm11" "\n\t"
1403 "pcmpistri $0x30, %%xmm2, %%xmm11" "\n\t"
1404 "pushfq" "\n\t"
1405 "popq %%rdx" "\n\t"
1406 "movq %%rcx, %0" "\n\t"
1407 "movq %%rdx, %1" "\n\t"
1408 "addq $1024, %%rsp" "\n\t"
1409 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
1410 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
1411 );
1412 return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
1413 }
1414
s_pcmpistri_30(V128 * argLU,V128 * argRU)1415 UInt s_pcmpistri_30 ( V128* argLU, V128* argRU )
1416 {
1417 V128 resV;
1418 UInt resOSZACP, resECX;
1419 Bool ok
1420 = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
1421 zmask_from_V128(argLU),
1422 zmask_from_V128(argRU),
1423 0x30, False/*!isSTRM*/
1424 );
1425 assert(ok);
1426 resECX = resV.uInt[0];
1427 return (resOSZACP << 16) | resECX;
1428 }
1429
istri_30(void)1430 void istri_30 ( void )
1431 {
1432 char* wot = "30";
1433 UInt(*h)(V128*,V128*) = h_pcmpistri_30;
1434 UInt(*s)(V128*,V128*) = s_pcmpistri_30;
1435
1436 try_istri(wot,h,s, "abcdacbdabcdabcd", "000000000000000a");
1437 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000000b");
1438 try_istri(wot,h,s, "abcdabcdabcdabcd", "00000000000000ab");
1439 try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd");
1440
1441 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
1442 try_istri(wot,h,s, "0bcdabcdabcdabcd", "000000000000abcd");
1443 try_istri(wot,h,s, "abcdabcdabcda0cd", "000000000000abcd");
1444 try_istri(wot,h,s, "abcdabcdabcdab0d", "000000000000abcd");
1445 try_istri(wot,h,s, "abcdabcdabcdabc0", "000000000000abcd");
1446
1447 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
1448 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000a0cd");
1449 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000ab0d");
1450 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abc0");
1451
1452 try_istri(wot,h,s, "0000000000000000", "0000000000000000");
1453 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
1454
1455 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd");
1456 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba");
1457 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000bbbb");
1458 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000baba");
1459
1460 try_istri(wot,h,s, "0000abcdabcdabcd", "00000000000baba0");
1461
1462 try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe");
1463 try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe");
1464 }
1465
1466
1467 //////////////////////////////////////////////////////////
1468 // //
1469 // ISTRI_40 //
1470 // //
1471 //////////////////////////////////////////////////////////
1472
h_pcmpistri_40(V128 * argL,V128 * argR)1473 UInt h_pcmpistri_40 ( V128* argL, V128* argR )
1474 {
1475 V128 block[2];
1476 memcpy(&block[0], argL, sizeof(V128));
1477 memcpy(&block[1], argR, sizeof(V128));
1478 ULong res, flags;
1479 __asm__ __volatile__(
1480 "subq $1024, %%rsp" "\n\t"
1481 "movdqu 0(%2), %%xmm2" "\n\t"
1482 "movdqu 16(%2), %%xmm11" "\n\t"
1483 "pcmpistri $0x40, %%xmm2, %%xmm11" "\n\t"
1484 "pushfq" "\n\t"
1485 "popq %%rdx" "\n\t"
1486 "movq %%rcx, %0" "\n\t"
1487 "movq %%rdx, %1" "\n\t"
1488 "addq $1024, %%rsp" "\n\t"
1489 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
1490 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
1491 );
1492 return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
1493 }
1494
s_pcmpistri_40(V128 * argLU,V128 * argRU)1495 UInt s_pcmpistri_40 ( V128* argLU, V128* argRU )
1496 {
1497 V128 resV;
1498 UInt resOSZACP, resECX;
1499 Bool ok
1500 = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
1501 zmask_from_V128(argLU),
1502 zmask_from_V128(argRU),
1503 0x40, False/*!isSTRM*/
1504 );
1505 assert(ok);
1506 resECX = resV.uInt[0];
1507 return (resOSZACP << 16) | resECX;
1508 }
1509
istri_40(void)1510 void istri_40 ( void )
1511 {
1512 char* wot = "40";
1513 UInt(*h)(V128*,V128*) = h_pcmpistri_40;
1514 UInt(*s)(V128*,V128*) = s_pcmpistri_40;
1515
1516 try_istri(wot,h,s, "abcdacbdabcdabcd", "000000000000000a");
1517 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000000b");
1518 try_istri(wot,h,s, "abcdabcdabcdabcd", "00000000000000ab");
1519 try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd");
1520
1521 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
1522 try_istri(wot,h,s, "0bcdabcdabcdabcd", "000000000000abcd");
1523 try_istri(wot,h,s, "abcdabcdabcda0cd", "000000000000abcd");
1524 try_istri(wot,h,s, "abcdabcdabcdab0d", "000000000000abcd");
1525 try_istri(wot,h,s, "abcdabcdabcdabc0", "000000000000abcd");
1526
1527 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
1528 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000a0cd");
1529 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000ab0d");
1530 try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abc0");
1531
1532 try_istri(wot,h,s, "0000000000000000", "0000000000000000");
1533 try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
1534
1535 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd");
1536 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba");
1537 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000bbbb");
1538 try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000baba");
1539
1540 try_istri(wot,h,s, "0000abcdabcdabcd", "00000000000baba0");
1541
1542 try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe");
1543 try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe");
1544 }
1545
1546
1547 //////////////////////////////////////////////////////////
1548 // //
1549 // ISTRI_0E //
1550 // //
1551 //////////////////////////////////////////////////////////
1552
1553 __attribute__((noinline))
h_pcmpistri_0E(V128 * argL,V128 * argR)1554 UInt h_pcmpistri_0E ( V128* argL, V128* argR )
1555 {
1556 V128 block[2];
1557 memcpy(&block[0], argL, sizeof(V128));
1558 memcpy(&block[1], argR, sizeof(V128));
1559 ULong res = 0, flags = 0;
1560 __asm__ __volatile__(
1561 "movdqu 0(%2), %%xmm2" "\n\t"
1562 "movdqu 16(%2), %%xmm11" "\n\t"
1563 "pcmpistri $0x0E, %%xmm2, %%xmm11" "\n\t"
1564 "pushfq" "\n\t"
1565 "popq %%rdx" "\n\t"
1566 "movq %%rcx, %0" "\n\t"
1567 "movq %%rdx, %1" "\n\t"
1568 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
1569 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
1570 );
1571 return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
1572 }
1573
s_pcmpistri_0E(V128 * argLU,V128 * argRU)1574 UInt s_pcmpistri_0E ( V128* argLU, V128* argRU )
1575 {
1576 V128 resV;
1577 UInt resOSZACP, resECX;
1578 Bool ok
1579 = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
1580 zmask_from_V128(argLU),
1581 zmask_from_V128(argRU),
1582 0x0E, False/*!isSTRM*/
1583 );
1584 assert(ok);
1585 resECX = resV.uInt[0];
1586 return (resOSZACP << 16) | resECX;
1587 }
1588
istri_0E(void)1589 void istri_0E ( void )
1590 {
1591 char* wot = "0E";
1592 UInt(*h)(V128*,V128*) = h_pcmpistri_0E;
1593 UInt(*s)(V128*,V128*) = s_pcmpistri_0E;
1594
1595 try_istri(wot,h,s, "111111111abcde11", "00000000000abcde");
1596
1597 try_istri(wot,h,s, "111111111abcde11", "0000abcde00abcde");
1598
1599 try_istri(wot,h,s, "1111111111abcde1", "00000000000abcde");
1600 try_istri(wot,h,s, "11111111111abcde", "00000000000abcde");
1601 try_istri(wot,h,s, "111111111111abcd", "00000000000abcde");
1602
1603 try_istri(wot,h,s, "111abcde1abcde11", "00000000000abcde");
1604
1605 try_istri(wot,h,s, "11abcde11abcde11", "00000000000abcde");
1606 try_istri(wot,h,s, "1abcde111abcde11", "00000000000abcde");
1607 try_istri(wot,h,s, "abcde1111abcde11", "00000000000abcde");
1608 try_istri(wot,h,s, "bcde11111abcde11", "00000000000abcde");
1609 try_istri(wot,h,s, "cde111111abcde11", "00000000000abcde");
1610
1611 try_istri(wot,h,s, "01abcde11abcde11", "00000000000abcde");
1612 try_istri(wot,h,s, "00abcde11abcde11", "00000000000abcde");
1613 try_istri(wot,h,s, "000bcde11abcde11", "00000000000abcde");
1614
1615 try_istri(wot,h,s, "00abcde10abcde11", "00000000000abcde");
1616 try_istri(wot,h,s, "00abcde100bcde11", "00000000000abcde");
1617
1618 try_istri(wot,h,s, "1111111111111234", "0000000000000000");
1619 try_istri(wot,h,s, "1111111111111234", "0000000000000001");
1620 try_istri(wot,h,s, "1111111111111234", "0000000000000011");
1621
1622 try_istri(wot,h,s, "1111111111111234", "1111111111111234");
1623 try_istri(wot,h,s, "a111111111111111", "000000000000000a");
1624 try_istri(wot,h,s, "b111111111111111", "000000000000000a");
1625
1626 try_istri(wot,h,s, "b111111111111111", "0000000000000000");
1627 try_istri(wot,h,s, "0000000000000000", "0000000000000000");
1628 try_istri(wot,h,s, "123456789abcdef1", "0000000000000000");
1629 try_istri(wot,h,s, "0000000000000000", "123456789abcdef1");
1630 }
1631
1632
1633 //////////////////////////////////////////////////////////
1634 // //
1635 // ISTRI_34 //
1636 // //
1637 //////////////////////////////////////////////////////////
1638
h_pcmpistri_34(V128 * argL,V128 * argR)1639 UInt h_pcmpistri_34 ( V128* argL, V128* argR )
1640 {
1641 V128 block[2];
1642 memcpy(&block[0], argL, sizeof(V128));
1643 memcpy(&block[1], argR, sizeof(V128));
1644 ULong res, flags;
1645 __asm__ __volatile__(
1646 "subq $1024, %%rsp" "\n\t"
1647 "movdqu 0(%2), %%xmm2" "\n\t"
1648 "movdqu 16(%2), %%xmm11" "\n\t"
1649 "pcmpistri $0x34, %%xmm2, %%xmm11" "\n\t"
1650 "pushfq" "\n\t"
1651 "popq %%rdx" "\n\t"
1652 "movq %%rcx, %0" "\n\t"
1653 "movq %%rdx, %1" "\n\t"
1654 "addq $1024, %%rsp" "\n\t"
1655 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
1656 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
1657 );
1658 return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
1659 }
1660
s_pcmpistri_34(V128 * argLU,V128 * argRU)1661 UInt s_pcmpistri_34 ( V128* argLU, V128* argRU )
1662 {
1663 V128 resV;
1664 UInt resOSZACP, resECX;
1665 Bool ok
1666 = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
1667 zmask_from_V128(argLU),
1668 zmask_from_V128(argRU),
1669 0x34, False/*!isSTRM*/
1670 );
1671 assert(ok);
1672 resECX = resV.uInt[0];
1673 return (resOSZACP << 16) | resECX;
1674 }
1675
istri_34(void)1676 void istri_34 ( void )
1677 {
1678 char* wot = "34";
1679 UInt(*h)(V128*,V128*) = h_pcmpistri_34;
1680 UInt(*s)(V128*,V128*) = s_pcmpistri_34;
1681
1682 try_istri(wot,h,s, "aaaabbbbccccdddd", "00000000000000bc");
1683 try_istri(wot,h,s, "aaaabbbbccccdddd", "00000000000000cb");
1684 try_istri(wot,h,s, "baaabbbbccccdddd", "00000000000000cb");
1685 try_istri(wot,h,s, "baaabbbbccccdddc", "00000000000000cb");
1686
1687 try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "00000000000000cb");
1688 try_istri(wot,h,s, "bbbbbbbb0bbbbbbb", "00000000000000cb");
1689 try_istri(wot,h,s, "bbbbbbbbbbbbbb0b", "00000000000000cb");
1690 try_istri(wot,h,s, "bbbbbbbbbbbbbbb0", "00000000000000cb");
1691 try_istri(wot,h,s, "0000000000000000", "00000000000000cb");
1692
1693 try_istri(wot,h,s, "0000000000000000", "0000000000000000");
1694
1695 try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "00000000000000cb");
1696 try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "000000000000000b");
1697 try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000062cb");
1698
1699 try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000002cb");
1700 try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000000cb");
1701 try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "000000000000000b");
1702
1703 try_istri(wot,h,s, "0123456789abcdef", "000000fecb975421");
1704 try_istri(wot,h,s, "123456789abcdef1", "000000fecb975421");
1705
1706 try_istri(wot,h,s, "0123456789abcdef", "00000000dca86532");
1707 try_istri(wot,h,s, "123456789abcdef1", "00000000dca86532");
1708
1709 try_istri(wot,h,s, "163887ec041a9b72", "fcd75adb9b3e895a");
1710 try_istri(wot,h,s, "fc937cbfbf53f8e2", "0d136bcb024d3fb7");
1711 try_istri(wot,h,s, "2ca34182c29a82ab", "302ebd646775ab54");
1712 try_istri(wot,h,s, "3f2987608c11be6f", "a9ecb661f8e0a8cb");
1713 }
1714
1715
1716 //////////////////////////////////////////////////////////
1717 // //
1718 // ISTRI_14 //
1719 // //
1720 //////////////////////////////////////////////////////////
1721
h_pcmpistri_14(V128 * argL,V128 * argR)1722 UInt h_pcmpistri_14 ( V128* argL, V128* argR )
1723 {
1724 V128 block[2];
1725 memcpy(&block[0], argL, sizeof(V128));
1726 memcpy(&block[1], argR, sizeof(V128));
1727 ULong res, flags;
1728 __asm__ __volatile__(
1729 "subq $1024, %%rsp" "\n\t"
1730 "movdqu 0(%2), %%xmm2" "\n\t"
1731 "movdqu 16(%2), %%xmm11" "\n\t"
1732 "pcmpistri $0x14, %%xmm2, %%xmm11" "\n\t"
1733 "pushfq" "\n\t"
1734 "popq %%rdx" "\n\t"
1735 "movq %%rcx, %0" "\n\t"
1736 "movq %%rdx, %1" "\n\t"
1737 "addq $1024, %%rsp" "\n\t"
1738 : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
1739 : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
1740 );
1741 return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
1742 }
1743
s_pcmpistri_14(V128 * argLU,V128 * argRU)1744 UInt s_pcmpistri_14 ( V128* argLU, V128* argRU )
1745 {
1746 V128 resV;
1747 UInt resOSZACP, resECX;
1748 Bool ok
1749 = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
1750 zmask_from_V128(argLU),
1751 zmask_from_V128(argRU),
1752 0x14, False/*!isSTRM*/
1753 );
1754 assert(ok);
1755 resECX = resV.uInt[0];
1756 return (resOSZACP << 16) | resECX;
1757 }
1758
istri_14(void)1759 void istri_14 ( void )
1760 {
1761 char* wot = "14";
1762 UInt(*h)(V128*,V128*) = h_pcmpistri_14;
1763 UInt(*s)(V128*,V128*) = s_pcmpistri_14;
1764
1765 try_istri(wot,h,s, "aaaabbbbccccdddd", "00000000000000bc");
1766 try_istri(wot,h,s, "aaaabbbbccccdddd", "00000000000000cb");
1767 try_istri(wot,h,s, "baaabbbbccccdddd", "00000000000000cb");
1768 try_istri(wot,h,s, "baaabbbbccccdddc", "00000000000000cb");
1769
1770 try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "00000000000000cb");
1771 try_istri(wot,h,s, "bbbbbbbb0bbbbbbb", "00000000000000cb");
1772 try_istri(wot,h,s, "bbbbbbbbbbbbbb0b", "00000000000000cb");
1773 try_istri(wot,h,s, "bbbbbbbbbbbbbbb0", "00000000000000cb");
1774 try_istri(wot,h,s, "0000000000000000", "00000000000000cb");
1775
1776 try_istri(wot,h,s, "0000000000000000", "0000000000000000");
1777
1778 try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "00000000000000cb");
1779 try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "000000000000000b");
1780 try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000062cb");
1781
1782 try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000002cb");
1783 try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000000cb");
1784 try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "000000000000000b");
1785
1786 try_istri(wot,h,s, "0123456789abcdef", "000000fecb975421");
1787 try_istri(wot,h,s, "123456789abcdef1", "000000fecb975421");
1788
1789 try_istri(wot,h,s, "0123456789abcdef", "00000000dca86532");
1790 try_istri(wot,h,s, "123456789abcdef1", "00000000dca86532");
1791
1792 try_istri(wot,h,s, "163887ec041a9b72", "fcd75adb9b3e895a");
1793 try_istri(wot,h,s, "fc937cbfbf53f8e2", "0d136bcb024d3fb7");
1794 try_istri(wot,h,s, "2ca34182c29a82ab", "302ebd646775ab54");
1795 try_istri(wot,h,s, "3f2987608c11be6f", "a9ecb661f8e0a8cb");
1796 }
1797
1798
1799 //////////////////////////////////////////////////////////
1800 // //
1801 // main //
1802 // //
1803 //////////////////////////////////////////////////////////
1804
main(void)1805 int main ( void )
1806 {
1807 istri_4A();
1808 istri_3A();
1809 istri_08();
1810 istri_1A();
1811 istri_02();
1812 istri_0C();
1813 istri_12();
1814 istri_44();
1815 istri_00();
1816 istri_38();
1817 istri_46();
1818 istri_30();
1819 istri_40();
1820 istri_0E();
1821 istri_14();
1822 istri_34();
1823 return 0;
1824 }
1825