• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 
2 /* Tests in detail the core arithmetic for pcmp{e,i}str{i,m} using
3    pcmpistri to drive it.  Does not check the e-vs-i or i-vs-m
4    aspect. */
5 
6 #include <string.h>
7 #include <stdio.h>
8 #include <assert.h>
9 
10 typedef  unsigned int   UInt;
11 typedef  signed int     Int;
12 typedef  unsigned char  UChar;
13 typedef  signed char    Char;
14 typedef  unsigned long long int ULong;
15 typedef  UChar          Bool;
16 #define False ((Bool)0)
17 #define True  ((Bool)1)
18 
19 //typedef  unsigned char  V128[16];
20 typedef
21    union {
22       UChar uChar[16];
23       UInt  uInt[4];
24    }
25    V128;
26 
27 #define SHIFT_O   11
28 #define SHIFT_S   7
29 #define SHIFT_Z   6
30 #define SHIFT_A   4
31 #define SHIFT_C   0
32 #define SHIFT_P   2
33 
34 #define MASK_O    (1ULL << SHIFT_O)
35 #define MASK_S    (1ULL << SHIFT_S)
36 #define MASK_Z    (1ULL << SHIFT_Z)
37 #define MASK_A    (1ULL << SHIFT_A)
38 #define MASK_C    (1ULL << SHIFT_C)
39 #define MASK_P    (1ULL << SHIFT_P)
40 
41 
clz32(UInt x)42 UInt clz32 ( UInt x )
43 {
44    Int y, m, n;
45    y = -(x >> 16);
46    m = (y >> 16) & 16;
47    n = 16 - m;
48    x = x >> m;
49    y = x - 0x100;
50    m = (y >> 16) & 8;
51    n = n + m;
52    x = x << m;
53    y = x - 0x1000;
54    m = (y >> 16) & 4;
55    n = n + m;
56    x = x << m;
57    y = x - 0x4000;
58    m = (y >> 16) & 2;
59    n = n + m;
60    x = x << m;
61    y = x >> 14;
62    m = y & ~(y >> 1);
63    return n + 2 - m;
64 }
65 
ctz32(UInt x)66 UInt ctz32 ( UInt x )
67 {
68    return 32 - clz32((~x) & (x-1));
69 }
70 
expand(V128 * dst,char * summary)71 void expand ( V128* dst, char* summary )
72 {
73    Int i;
74    assert( strlen(summary) == 16 );
75    for (i = 0; i < 16; i++) {
76       UChar xx = 0;
77       UChar x = summary[15-i];
78       if      (x >= '0' && x <= '9') { xx = x - '0'; }
79       else if (x >= 'A' && x <= 'F') { xx = x - 'A' + 10; }
80       else if (x >= 'a' && x <= 'f') { xx = x - 'a' + 10; }
81       else assert(0);
82 
83       assert(xx < 16);
84       xx = (xx << 4) | xx;
85       assert(xx < 256);
86       dst->uChar[i] = xx;
87    }
88 }
89 
try_istri(char * which,UInt (* h_fn)(V128 *,V128 *),UInt (* s_fn)(V128 *,V128 *),char * summL,char * summR)90 void try_istri ( char* which,
91                  UInt(*h_fn)(V128*,V128*),
92                  UInt(*s_fn)(V128*,V128*),
93                  char* summL, char* summR )
94 {
95    assert(strlen(which) == 2);
96    V128 argL, argR;
97    expand(&argL, summL);
98    expand(&argR, summR);
99    UInt h_res = h_fn(&argL, &argR);
100    UInt s_res = s_fn(&argL, &argR);
101    printf("istri %s  %s %s -> %08x %08x %s\n",
102           which, summL, summR, h_res, s_res, h_res == s_res ? "" : "!!!!");
103 }
104 
zmask_from_V128(V128 * arg)105 UInt zmask_from_V128 ( V128* arg )
106 {
107    UInt i, res = 0;
108    for (i = 0; i < 16; i++) {
109       res |=  ((arg->uChar[i] == 0) ? 1 : 0) << i;
110    }
111    return res;
112 }
113 
114 //////////////////////////////////////////////////////////
115 //                                                      //
116 //                       GENERAL                        //
117 //                                                      //
118 //////////////////////////////////////////////////////////
119 
120 
121 /* Given partial results from a pcmpXstrX operation (intRes1,
122    basically), generate an I format (index value for ECX) output, and
123    also the new OSZACP flags.
124 */
125 static
pcmpXstrX_WRK_gen_output_fmt_I(V128 * resV,UInt * resOSZACP,UInt intRes1,UInt zmaskL,UInt zmaskR,UInt validL,UInt pol,UInt idx)126 void pcmpXstrX_WRK_gen_output_fmt_I(/*OUT*/V128* resV,
127                                     /*OUT*/UInt* resOSZACP,
128                                     UInt intRes1,
129                                     UInt zmaskL, UInt zmaskR,
130                                     UInt validL,
131                                     UInt pol, UInt idx )
132 {
133    assert((pol >> 2) == 0);
134    assert((idx >> 1) == 0);
135 
136    UInt intRes2 = 0;
137    switch (pol) {
138       case 0: intRes2 = intRes1;          break; // pol +
139       case 1: intRes2 = ~intRes1;         break; // pol -
140       case 2: intRes2 = intRes1;          break; // pol m+
141       case 3: intRes2 = intRes1 ^ validL; break; // pol m-
142    }
143    intRes2 &= 0xFFFF;
144 
145    // generate ecx value
146    UInt newECX = 0;
147    if (idx) {
148      // index of ms-1-bit
149      newECX = intRes2 == 0 ? 16 : (31 - clz32(intRes2));
150    } else {
151      // index of ls-1-bit
152      newECX = intRes2 == 0 ? 16 : ctz32(intRes2);
153    }
154 
155    *(UInt*)(&resV[0]) = newECX;
156 
157    // generate new flags, common to all ISTRI and ISTRM cases
158    *resOSZACP    // A, P are zero
159      = ((intRes2 == 0) ? 0 : MASK_C) // C == 0 iff intRes2 == 0
160      | ((zmaskL == 0)  ? 0 : MASK_Z) // Z == 1 iff any in argL is 0
161      | ((zmaskR == 0)  ? 0 : MASK_S) // S == 1 iff any in argR is 0
162      | ((intRes2 & 1) << SHIFT_O);   // O == IntRes2[0]
163 }
164 
165 
166 /* Compute result and new OSZACP flags for all PCMP{E,I}STR{I,M}
167    variants.
168 
169    For xSTRI variants, the new ECX value is placed in the 32 bits
170    pointed to by *resV.  For xSTRM variants, the result is a 128 bit
171    value and is placed at *resV in the obvious way.
172 
173    For all variants, the new OSZACP value is placed at *resOSZACP.
174 
175    argLV and argRV are the vector args.  The caller must prepare a
176    16-bit mask for each, zmaskL and zmaskR.  For ISTRx variants this
177    must be 1 for each zero byte of of the respective arg.  For ESTRx
178    variants this is derived from the explicit length indication, and
179    must be 0 in all places except at the bit index corresponding to
180    the valid length (0 .. 16).  If the valid length is 16 then the
181    mask must be all zeroes.  In all cases, bits 31:16 must be zero.
182 
183    imm8 is the original immediate from the instruction.  isSTRM
184    indicates whether this is a xSTRM or xSTRI variant, which controls
185    how much of *res is written.
186 
187    If the given imm8 case can be handled, the return value is True.
188    If not, False is returned, and neither *res not *resOSZACP are
189    altered.
190 */
191 
pcmpXstrX_WRK(V128 * resV,UInt * resOSZACP,V128 * argLV,V128 * argRV,UInt zmaskL,UInt zmaskR,UInt imm8,Bool isSTRM)192 Bool pcmpXstrX_WRK ( /*OUT*/V128* resV,
193                      /*OUT*/UInt* resOSZACP,
194                      V128* argLV,  V128* argRV,
195                      UInt zmaskL, UInt zmaskR,
196                      UInt imm8,   Bool isSTRM )
197 {
198    assert(imm8 < 0x80);
199    assert((zmaskL >> 16) == 0);
200    assert((zmaskR >> 16) == 0);
201 
202    /* Explicitly reject any imm8 values that haven't been validated,
203       even if they would probably work.  Life is too short to have
204       unvalidated cases in the code base. */
205    switch (imm8) {
206       case 0x00: case 0x02:
207       case 0x08: case 0x0A: case 0x0C: case 0x0E:
208       case 0x10: case 0x12: case 0x14:
209       case 0x18: case 0x1A:
210       case 0x30:            case 0x34:
211       case 0x38: case 0x3A:
212       case 0x40: case 0x42: case 0x44: case 0x46:
213                  case 0x4A:
214                  case 0x62:
215       case 0x70: case 0x72:
216          break;
217       default:
218          return False;
219    }
220 
221    UInt fmt = (imm8 >> 0) & 3; // imm8[1:0]  data format
222    UInt agg = (imm8 >> 2) & 3; // imm8[3:2]  aggregation fn
223    UInt pol = (imm8 >> 4) & 3; // imm8[5:4]  polarity
224    UInt idx = (imm8 >> 6) & 1; // imm8[6]    1==msb/bytemask
225 
226    /*----------------------------------------*/
227    /*-- strcmp on byte data                --*/
228    /*----------------------------------------*/
229 
230    if (agg == 2/*equal each, aka strcmp*/
231        && (fmt == 0/*ub*/ || fmt == 2/*sb*/)
232        && !isSTRM) {
233       Int    i;
234       UChar* argL = (UChar*)argLV;
235       UChar* argR = (UChar*)argRV;
236       UInt boolResII = 0;
237       for (i = 15; i >= 0; i--) {
238          UChar cL  = argL[i];
239          UChar cR  = argR[i];
240          boolResII = (boolResII << 1) | (cL == cR ? 1 : 0);
241       }
242       UInt validL = ~(zmaskL | -zmaskL);  // not(left(zmaskL))
243       UInt validR = ~(zmaskR | -zmaskR);  // not(left(zmaskR))
244 
245       // do invalidation, common to all equal-each cases
246       UInt intRes1
247          = (boolResII & validL & validR)  // if both valid, use cmpres
248            | (~ (validL | validR));       // if both invalid, force 1
249                                           // else force 0
250       intRes1 &= 0xFFFF;
251 
252       // generate I-format output
253       pcmpXstrX_WRK_gen_output_fmt_I(
254          resV, resOSZACP,
255          intRes1, zmaskL, zmaskR, validL, pol, idx
256       );
257 
258       return True;
259    }
260 
261    /*----------------------------------------*/
262    /*-- set membership on byte data        --*/
263    /*----------------------------------------*/
264 
265    if (agg == 0/*equal any, aka find chars in a set*/
266        && (fmt == 0/*ub*/ || fmt == 2/*sb*/)
267        && !isSTRM) {
268       /* argL: the string,  argR: charset */
269       UInt   si, ci;
270       UChar* argL    = (UChar*)argLV;
271       UChar* argR    = (UChar*)argRV;
272       UInt   boolRes = 0;
273       UInt   validL  = ~(zmaskL | -zmaskL);  // not(left(zmaskL))
274       UInt   validR  = ~(zmaskR | -zmaskR);  // not(left(zmaskR))
275 
276       for (si = 0; si < 16; si++) {
277          if ((validL & (1 << si)) == 0)
278             // run off the end of the string.
279             break;
280          UInt m = 0;
281          for (ci = 0; ci < 16; ci++) {
282             if ((validR & (1 << ci)) == 0) break;
283             if (argR[ci] == argL[si]) { m = 1; break; }
284          }
285          boolRes |= (m << si);
286       }
287 
288       // boolRes is "pre-invalidated"
289       UInt intRes1 = boolRes & 0xFFFF;
290 
291       // generate I-format output
292       pcmpXstrX_WRK_gen_output_fmt_I(
293          resV, resOSZACP,
294          intRes1, zmaskL, zmaskR, validL, pol, idx
295       );
296 
297       return True;
298    }
299 
300    /*----------------------------------------*/
301    /*-- substring search on byte data      --*/
302    /*----------------------------------------*/
303 
304    if (agg == 3/*equal ordered, aka substring search*/
305        && (fmt == 0/*ub*/ || fmt == 2/*sb*/)
306        && !isSTRM) {
307 
308       /* argL: haystack,  argR: needle */
309       UInt   ni, hi;
310       UChar* argL    = (UChar*)argLV;
311       UChar* argR    = (UChar*)argRV;
312       UInt   boolRes = 0;
313       UInt   validL  = ~(zmaskL | -zmaskL);  // not(left(zmaskL))
314       UInt   validR  = ~(zmaskR | -zmaskR);  // not(left(zmaskR))
315       for (hi = 0; hi < 16; hi++) {
316          UInt m = 1;
317          for (ni = 0; ni < 16; ni++) {
318             if ((validR & (1 << ni)) == 0) break;
319             UInt i = ni + hi;
320             if (i >= 16) break;
321             if (argL[i] != argR[ni]) { m = 0; break; }
322          }
323          boolRes |= (m << hi);
324          if ((validL & (1 << hi)) == 0)
325             // run off the end of the haystack
326             break;
327       }
328 
329       // boolRes is "pre-invalidated"
330       UInt intRes1 = boolRes & 0xFFFF;
331 
332       // generate I-format output
333       pcmpXstrX_WRK_gen_output_fmt_I(
334          resV, resOSZACP,
335          intRes1, zmaskL, zmaskR, validL, pol, idx
336       );
337 
338       return True;
339    }
340 
341    /*----------------------------------------*/
342    /*-- ranges, unsigned byte data         --*/
343    /*----------------------------------------*/
344 
345    if (agg == 1/*ranges*/
346        && fmt == 0/*ub*/
347        && !isSTRM) {
348 
349       /* argL: string,  argR: range-pairs */
350       UInt   ri, si;
351       UChar* argL    = (UChar*)argLV;
352       UChar* argR    = (UChar*)argRV;
353       UInt   boolRes = 0;
354       UInt   validL  = ~(zmaskL | -zmaskL);  // not(left(zmaskL))
355       UInt   validR  = ~(zmaskR | -zmaskR);  // not(left(zmaskR))
356       for (si = 0; si < 16; si++) {
357          if ((validL & (1 << si)) == 0)
358             // run off the end of the string
359             break;
360          UInt m = 0;
361          for (ri = 0; ri < 16; ri += 2) {
362             if ((validR & (3 << ri)) != (3 << ri)) break;
363             if (argR[ri] <= argL[si] && argL[si] <= argR[ri+1]) {
364                m = 1; break;
365             }
366          }
367          boolRes |= (m << si);
368       }
369 
370       // boolRes is "pre-invalidated"
371       UInt intRes1 = boolRes & 0xFFFF;
372 
373       // generate I-format output
374       pcmpXstrX_WRK_gen_output_fmt_I(
375          resV, resOSZACP,
376          intRes1, zmaskL, zmaskR, validL, pol, idx
377       );
378 
379       return True;
380    }
381 
382    /*----------------------------------------*/
383    /*-- ranges, signed byte data           --*/
384    /*----------------------------------------*/
385 
386    if (agg == 1/*ranges*/
387        && fmt == 2/*sb*/
388        && !isSTRM) {
389 
390       /* argL: string,  argR: range-pairs */
391       UInt   ri, si;
392       Char*  argL    = (Char*)argLV;
393       Char*  argR    = (Char*)argRV;
394       UInt   boolRes = 0;
395       UInt   validL  = ~(zmaskL | -zmaskL);  // not(left(zmaskL))
396       UInt   validR  = ~(zmaskR | -zmaskR);  // not(left(zmaskR))
397       for (si = 0; si < 16; si++) {
398          if ((validL & (1 << si)) == 0)
399             // run off the end of the string
400             break;
401          UInt m = 0;
402          for (ri = 0; ri < 16; ri += 2) {
403             if ((validR & (3 << ri)) != (3 << ri)) break;
404             if (argR[ri] <= argL[si] && argL[si] <= argR[ri+1]) {
405                m = 1; break;
406             }
407          }
408          boolRes |= (m << si);
409       }
410 
411       // boolRes is "pre-invalidated"
412       UInt intRes1 = boolRes & 0xFFFF;
413 
414       // generate I-format output
415       pcmpXstrX_WRK_gen_output_fmt_I(
416          resV, resOSZACP,
417          intRes1, zmaskL, zmaskR, validL, pol, idx
418       );
419 
420       return True;
421    }
422 
423    return False;
424 }
425 
426 
427 //////////////////////////////////////////////////////////
428 //                                                      //
429 //                       ISTRI_4A                       //
430 //                                                      //
431 //////////////////////////////////////////////////////////
432 
h_pcmpistri_4A(V128 * argL,V128 * argR)433 UInt h_pcmpistri_4A ( V128* argL, V128* argR )
434 {
435    V128 block[2];
436    memcpy(&block[0], argL, sizeof(V128));
437    memcpy(&block[1], argR, sizeof(V128));
438    ULong res, flags;
439    __asm__ __volatile__(
440       "subq      $1024,  %%rsp"             "\n\t"
441       "movdqu    0(%2),  %%xmm2"            "\n\t"
442       "movdqu    16(%2), %%xmm11"           "\n\t"
443       "pcmpistri $0x4A,  %%xmm2, %%xmm11"   "\n\t"
444       "pushfq"                              "\n\t"
445       "popq      %%rdx"                     "\n\t"
446       "movq      %%rcx,  %0"                "\n\t"
447       "movq      %%rdx,  %1"                "\n\t"
448       "addq      $1024,  %%rsp"             "\n\t"
449       : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
450       : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
451    );
452    return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
453 }
454 
s_pcmpistri_4A(V128 * argLU,V128 * argRU)455 UInt s_pcmpistri_4A ( V128* argLU, V128* argRU )
456 {
457    V128 resV;
458    UInt resOSZACP, resECX;
459    Bool ok
460       = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
461                        zmask_from_V128(argLU),
462                        zmask_from_V128(argRU),
463                        0x4A, False/*!isSTRM*/
464         );
465    assert(ok);
466    resECX = resV.uInt[0];
467    return (resOSZACP << 16) | resECX;
468 }
469 
istri_4A(void)470 void istri_4A ( void )
471 {
472    char* wot = "4A";
473    UInt(*h)(V128*,V128*) = h_pcmpistri_4A;
474    UInt(*s)(V128*,V128*) = s_pcmpistri_4A;
475 
476    try_istri(wot,h,s, "0000000000000000", "0000000000000000");
477 
478    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
479    try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
480    try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa");
481    try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa");
482 
483    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa");
484    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa");
485    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a");
486 
487    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
488    try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
489    try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
490    try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
491 
492    try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
493    try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa");
494    try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa");
495 
496    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
497 
498    try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
499    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
500    try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaa0aaa");
501 
502    try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaaaaaa");
503    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
504    try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaa0aaa");
505 
506    try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
507    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa0aaaaaaa");
508    try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaa0aaaaaaa");
509 
510    try_istri(wot,h,s, "0000000000000000", "aaaaaaaa0aaaaaaa");
511    try_istri(wot,h,s, "8000000000000000", "aaaaaaaa0aaaaaaa");
512    try_istri(wot,h,s, "0000000000000001", "aaaaaaaa0aaaaaaa");
513 
514    try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa");
515    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000");
516 }
517 
518 //////////////////////////////////////////////////////////
519 //                                                      //
520 //                       ISTRI_3A                       //
521 //                                                      //
522 //////////////////////////////////////////////////////////
523 
h_pcmpistri_3A(V128 * argL,V128 * argR)524 UInt h_pcmpistri_3A ( V128* argL, V128* argR )
525 {
526    V128 block[2];
527    memcpy(&block[0], argL, sizeof(V128));
528    memcpy(&block[1], argR, sizeof(V128));
529    ULong res, flags;
530    __asm__ __volatile__(
531       "subq      $1024,  %%rsp"             "\n\t"
532       "movdqu    0(%2),  %%xmm2"            "\n\t"
533       "movdqu    16(%2), %%xmm11"           "\n\t"
534       "pcmpistri $0x3A,  %%xmm2, %%xmm11"   "\n\t"
535       "pushfq"                              "\n\t"
536       "popq      %%rdx"                     "\n\t"
537       "movq      %%rcx,  %0"                "\n\t"
538       "movq      %%rdx,  %1"                "\n\t"
539       "addq      $1024,  %%rsp"             "\n\t"
540       : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
541       : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
542    );
543    return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
544 }
545 
s_pcmpistri_3A(V128 * argLU,V128 * argRU)546 UInt s_pcmpistri_3A ( V128* argLU, V128* argRU )
547 {
548    V128 resV;
549    UInt resOSZACP, resECX;
550    Bool ok
551       = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
552                        zmask_from_V128(argLU),
553                        zmask_from_V128(argRU),
554                        0x3A, False/*!isSTRM*/
555         );
556    assert(ok);
557    resECX = resV.uInt[0];
558    return (resOSZACP << 16) | resECX;
559 }
560 
istri_3A(void)561 void istri_3A ( void )
562 {
563    char* wot = "3A";
564    UInt(*h)(V128*,V128*) = h_pcmpistri_3A;
565    UInt(*s)(V128*,V128*) = s_pcmpistri_3A;
566 
567    try_istri(wot,h,s, "0000000000000000", "0000000000000000");
568 
569    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
570    try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
571    try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa");
572    try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa");
573 
574    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa");
575    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa");
576    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a");
577 
578    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
579    try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
580    try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
581    try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
582 
583    try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
584    try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa");
585    try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa");
586 
587    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
588 
589    try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
590    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
591    try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaa0aaa");
592 
593    try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaaaaaa");
594    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
595    try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaa0aaa");
596 
597    try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
598    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa0aaaaaaa");
599    try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaa0aaaaaaa");
600 
601    try_istri(wot,h,s, "0000000000000000", "aaaaaaaa0aaaaaaa");
602    try_istri(wot,h,s, "8000000000000000", "aaaaaaaa0aaaaaaa");
603    try_istri(wot,h,s, "0000000000000001", "aaaaaaaa0aaaaaaa");
604 
605    try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa");
606    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000");
607 }
608 
609 
610 
611 //////////////////////////////////////////////////////////
612 //                                                      //
613 //                       ISTRI_0C                       //
614 //                                                      //
615 //////////////////////////////////////////////////////////
616 
617 __attribute__((noinline))
h_pcmpistri_0C(V128 * argL,V128 * argR)618 UInt h_pcmpistri_0C ( V128* argL, V128* argR )
619 {
620    V128 block[2];
621    memcpy(&block[0], argL, sizeof(V128));
622    memcpy(&block[1], argR, sizeof(V128));
623    ULong res = 0, flags = 0;
624    __asm__ __volatile__(
625       "movdqu    0(%2),  %%xmm2"            "\n\t"
626       "movdqu    16(%2), %%xmm11"           "\n\t"
627       "pcmpistri $0x0C,  %%xmm2, %%xmm11"   "\n\t"
628       //"pcmpistrm $0x0C,  %%xmm2, %%xmm11"   "\n\t"
629       //"movd %%xmm0, %%ecx" "\n\t"
630       "pushfq"                              "\n\t"
631       "popq      %%rdx"                     "\n\t"
632       "movq      %%rcx,  %0"                "\n\t"
633       "movq      %%rdx,  %1"                "\n\t"
634       : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
635       : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
636    );
637    return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
638 }
639 
s_pcmpistri_0C(V128 * argLU,V128 * argRU)640 UInt s_pcmpistri_0C ( V128* argLU, V128* argRU )
641 {
642    V128 resV;
643    UInt resOSZACP, resECX;
644    Bool ok
645       = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
646                        zmask_from_V128(argLU),
647                        zmask_from_V128(argRU),
648                        0x0C, False/*!isSTRM*/
649         );
650    assert(ok);
651    resECX = resV.uInt[0];
652    return (resOSZACP << 16) | resECX;
653 }
654 
istri_0C(void)655 void istri_0C ( void )
656 {
657    char* wot = "0C";
658    UInt(*h)(V128*,V128*) = h_pcmpistri_0C;
659    UInt(*s)(V128*,V128*) = s_pcmpistri_0C;
660 
661    try_istri(wot,h,s, "111111111abcde11", "00000000000abcde");
662 
663    try_istri(wot,h,s, "111111111abcde11", "0000abcde00abcde");
664 
665    try_istri(wot,h,s, "1111111111abcde1", "00000000000abcde");
666    try_istri(wot,h,s, "11111111111abcde", "00000000000abcde");
667    try_istri(wot,h,s, "111111111111abcd", "00000000000abcde");
668 
669    try_istri(wot,h,s, "111abcde1abcde11", "00000000000abcde");
670 
671    try_istri(wot,h,s, "11abcde11abcde11", "00000000000abcde");
672    try_istri(wot,h,s, "1abcde111abcde11", "00000000000abcde");
673    try_istri(wot,h,s, "abcde1111abcde11", "00000000000abcde");
674    try_istri(wot,h,s, "bcde11111abcde11", "00000000000abcde");
675    try_istri(wot,h,s, "cde111111abcde11", "00000000000abcde");
676 
677    try_istri(wot,h,s, "01abcde11abcde11", "00000000000abcde");
678    try_istri(wot,h,s, "00abcde11abcde11", "00000000000abcde");
679    try_istri(wot,h,s, "000bcde11abcde11", "00000000000abcde");
680 
681    try_istri(wot,h,s, "00abcde10abcde11", "00000000000abcde");
682    try_istri(wot,h,s, "00abcde100bcde11", "00000000000abcde");
683 
684    try_istri(wot,h,s, "1111111111111234", "0000000000000000");
685    try_istri(wot,h,s, "1111111111111234", "0000000000000001");
686    try_istri(wot,h,s, "1111111111111234", "0000000000000011");
687 
688    try_istri(wot,h,s, "1111111111111234", "1111111111111234");
689    try_istri(wot,h,s, "a111111111111111", "000000000000000a");
690    try_istri(wot,h,s, "b111111111111111", "000000000000000a");
691 
692    try_istri(wot,h,s, "b111111111111111", "0000000000000000");
693    try_istri(wot,h,s, "0000000000000000", "0000000000000000");
694    try_istri(wot,h,s, "123456789abcdef1", "0000000000000000");
695    try_istri(wot,h,s, "0000000000000000", "123456789abcdef1");
696 }
697 
698 
699 //////////////////////////////////////////////////////////
700 //                                                      //
701 //                       ISTRI_08                       //
702 //                                                      //
703 //////////////////////////////////////////////////////////
704 
h_pcmpistri_08(V128 * argL,V128 * argR)705 UInt h_pcmpistri_08 ( V128* argL, V128* argR )
706 {
707    V128 block[2];
708    memcpy(&block[0], argL, sizeof(V128));
709    memcpy(&block[1], argR, sizeof(V128));
710    ULong res, flags;
711    __asm__ __volatile__(
712       "subq      $1024,  %%rsp"             "\n\t"
713       "movdqu    0(%2),  %%xmm2"            "\n\t"
714       "movdqu    16(%2), %%xmm11"           "\n\t"
715       "pcmpistri $0x08,  %%xmm2, %%xmm11"   "\n\t"
716       "pushfq"                              "\n\t"
717       "popq      %%rdx"                     "\n\t"
718       "movq      %%rcx,  %0"                "\n\t"
719       "movq      %%rdx,  %1"                "\n\t"
720       "addq      $1024,  %%rsp"             "\n\t"
721       : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
722       : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
723    );
724    return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
725 }
726 
s_pcmpistri_08(V128 * argLU,V128 * argRU)727 UInt s_pcmpistri_08 ( V128* argLU, V128* argRU )
728 {
729    V128 resV;
730    UInt resOSZACP, resECX;
731    Bool ok
732       = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
733                        zmask_from_V128(argLU),
734                        zmask_from_V128(argRU),
735                        0x08, False/*!isSTRM*/
736         );
737    assert(ok);
738    resECX = resV.uInt[0];
739    return (resOSZACP << 16) | resECX;
740 }
741 
istri_08(void)742 void istri_08 ( void )
743 {
744    char* wot = "08";
745    UInt(*h)(V128*,V128*) = h_pcmpistri_08;
746    UInt(*s)(V128*,V128*) = s_pcmpistri_08;
747 
748    try_istri(wot,h,s, "0000000000000000", "0000000000000000");
749 
750    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
751    try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
752    try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa");
753    try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa");
754 
755    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa");
756    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa");
757    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a");
758 
759    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
760    try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
761    try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
762    try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
763 
764    try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
765    try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa");
766    try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa");
767 
768    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
769 
770    try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
771    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
772    try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaa0aaa");
773 
774    try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaaaaaa");
775    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
776    try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaa0aaa");
777 
778    try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
779    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa0aaaaaaa");
780    try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaa0aaaaaaa");
781 
782    try_istri(wot,h,s, "0000000000000000", "aaaaaaaa0aaaaaaa");
783    try_istri(wot,h,s, "8000000000000000", "aaaaaaaa0aaaaaaa");
784    try_istri(wot,h,s, "0000000000000001", "aaaaaaaa0aaaaaaa");
785 
786    try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa");
787    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000");
788 }
789 
790 
791 
792 //////////////////////////////////////////////////////////
793 //                                                      //
794 //                       ISTRI_18                       //
795 //                                                      //
796 //////////////////////////////////////////////////////////
797 
h_pcmpistri_18(V128 * argL,V128 * argR)798 UInt h_pcmpistri_18 ( V128* argL, V128* argR )
799 {
800    V128 block[2];
801    memcpy(&block[0], argL, sizeof(V128));
802    memcpy(&block[1], argR, sizeof(V128));
803    ULong res, flags;
804    __asm__ __volatile__(
805       "subq      $1024,  %%rsp"             "\n\t"
806       "movdqu    0(%2),  %%xmm2"            "\n\t"
807       "movdqu    16(%2), %%xmm11"           "\n\t"
808       "pcmpistri $0x18,  %%xmm2, %%xmm11"   "\n\t"
809       "pushfq"                              "\n\t"
810       "popq      %%rdx"                     "\n\t"
811       "movq      %%rcx,  %0"                "\n\t"
812       "movq      %%rdx,  %1"                "\n\t"
813       "addq      $1024,  %%rsp"             "\n\t"
814       : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
815       : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
816    );
817    return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
818 }
819 
s_pcmpistri_18(V128 * argLU,V128 * argRU)820 UInt s_pcmpistri_18 ( V128* argLU, V128* argRU )
821 {
822    V128 resV;
823    UInt resOSZACP, resECX;
824    Bool ok
825       = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
826                        zmask_from_V128(argLU),
827                        zmask_from_V128(argRU),
828                        0x18, False/*!isSTRM*/
829         );
830    assert(ok);
831    resECX = resV.uInt[0];
832    return (resOSZACP << 16) | resECX;
833 }
834 
istri_18(void)835 void istri_18 ( void )
836 {
837    char* wot = "18";
838    UInt(*h)(V128*,V128*) = h_pcmpistri_18;
839    UInt(*s)(V128*,V128*) = s_pcmpistri_18;
840 
841    try_istri(wot,h,s, "0000000000000000", "0000000000000000");
842 
843    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
844    try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
845    try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa");
846    try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa");
847 
848    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa");
849    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa");
850    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a");
851 
852    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
853    try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
854    try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
855    try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
856 
857    try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
858    try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa");
859    try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa");
860 
861    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
862 
863    try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
864    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
865    try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaa0aaa");
866 
867    try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaaaaaa");
868    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
869    try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaa0aaa");
870 
871    try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
872    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa0aaaaaaa");
873    try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaa0aaaaaaa");
874 
875    try_istri(wot,h,s, "0000000000000000", "aaaaaaaa0aaaaaaa");
876    try_istri(wot,h,s, "8000000000000000", "aaaaaaaa0aaaaaaa");
877    try_istri(wot,h,s, "0000000000000001", "aaaaaaaa0aaaaaaa");
878 
879    try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa");
880    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000");
881 }
882 
883 
884 
885 //////////////////////////////////////////////////////////
886 //                                                      //
887 //                       ISTRI_1A                       //
888 //                                                      //
889 //////////////////////////////////////////////////////////
890 
h_pcmpistri_1A(V128 * argL,V128 * argR)891 UInt h_pcmpistri_1A ( V128* argL, V128* argR )
892 {
893    V128 block[2];
894    memcpy(&block[0], argL, sizeof(V128));
895    memcpy(&block[1], argR, sizeof(V128));
896    ULong res, flags;
897    __asm__ __volatile__(
898       "subq      $1024,  %%rsp"             "\n\t"
899       "movdqu    0(%2),  %%xmm2"            "\n\t"
900       "movdqu    16(%2), %%xmm11"           "\n\t"
901       "pcmpistri $0x1A,  %%xmm2, %%xmm11"   "\n\t"
902       "pushfq"                              "\n\t"
903       "popq      %%rdx"                     "\n\t"
904       "movq      %%rcx,  %0"                "\n\t"
905       "movq      %%rdx,  %1"                "\n\t"
906       "addq      $1024,  %%rsp"             "\n\t"
907       : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
908       : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
909    );
910    return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
911 }
912 
s_pcmpistri_1A(V128 * argLU,V128 * argRU)913 UInt s_pcmpistri_1A ( V128* argLU, V128* argRU )
914 {
915    V128 resV;
916    UInt resOSZACP, resECX;
917    Bool ok
918       = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
919                        zmask_from_V128(argLU),
920                        zmask_from_V128(argRU),
921                        0x1A, False/*!isSTRM*/
922         );
923    assert(ok);
924    resECX = resV.uInt[0];
925    return (resOSZACP << 16) | resECX;
926 }
927 
istri_1A(void)928 void istri_1A ( void )
929 {
930    char* wot = "1A";
931    UInt(*h)(V128*,V128*) = h_pcmpistri_1A;
932    UInt(*s)(V128*,V128*) = s_pcmpistri_1A;
933 
934    try_istri(wot,h,s, "0000000000000000", "0000000000000000");
935 
936    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
937    try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
938    try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa");
939    try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa");
940 
941    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa");
942    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa");
943    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a");
944 
945    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
946    try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
947    try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
948    try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
949 
950    try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
951    try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa");
952    try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa");
953 
954    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
955 
956    try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
957    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
958    try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaa0aaa");
959 
960    try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaaaaaa");
961    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
962    try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaa0aaa");
963 
964    try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
965    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa0aaaaaaa");
966    try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaa0aaaaaaa");
967 
968    try_istri(wot,h,s, "0000000000000000", "aaaaaaaa0aaaaaaa");
969    try_istri(wot,h,s, "8000000000000000", "aaaaaaaa0aaaaaaa");
970    try_istri(wot,h,s, "0000000000000001", "aaaaaaaa0aaaaaaa");
971 
972    try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa");
973    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000");
974 }
975 
976 
977 
978 //////////////////////////////////////////////////////////
979 //                                                      //
980 //                       ISTRI_02                       //
981 //                                                      //
982 //////////////////////////////////////////////////////////
983 
h_pcmpistri_02(V128 * argL,V128 * argR)984 UInt h_pcmpistri_02 ( V128* argL, V128* argR )
985 {
986    V128 block[2];
987    memcpy(&block[0], argL, sizeof(V128));
988    memcpy(&block[1], argR, sizeof(V128));
989    ULong res, flags;
990    __asm__ __volatile__(
991       "subq      $1024,  %%rsp"             "\n\t"
992       "movdqu    0(%2),  %%xmm2"            "\n\t"
993       "movdqu    16(%2), %%xmm11"           "\n\t"
994       "pcmpistri $0x02,  %%xmm2, %%xmm11"   "\n\t"
995 //"pcmpistrm $0x02, %%xmm2, %%xmm11"   "\n\t"
996 //"movd %%xmm0, %%ecx" "\n\t"
997       "pushfq"                              "\n\t"
998       "popq      %%rdx"                     "\n\t"
999       "movq      %%rcx,  %0"                "\n\t"
1000       "movq      %%rdx,  %1"                "\n\t"
1001       "addq      $1024,  %%rsp"             "\n\t"
1002       : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
1003       : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
1004    );
1005    return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
1006 }
1007 
s_pcmpistri_02(V128 * argLU,V128 * argRU)1008 UInt s_pcmpistri_02 ( V128* argLU, V128* argRU )
1009 {
1010    V128 resV;
1011    UInt resOSZACP, resECX;
1012    Bool ok
1013       = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
1014                        zmask_from_V128(argLU),
1015                        zmask_from_V128(argRU),
1016                        0x02, False/*!isSTRM*/
1017         );
1018    assert(ok);
1019    resECX = resV.uInt[0];
1020    return (resOSZACP << 16) | resECX;
1021 }
1022 
istri_02(void)1023 void istri_02 ( void )
1024 {
1025    char* wot = "02";
1026    UInt(*h)(V128*,V128*) = h_pcmpistri_02;
1027    UInt(*s)(V128*,V128*) = s_pcmpistri_02;
1028 
1029    try_istri(wot,h,s, "abcdacbdabcdabcd", "000000000000000a");
1030    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000000b");
1031    try_istri(wot,h,s, "abcdabcdabcdabcd", "00000000000000ab");
1032    try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd");
1033 
1034    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
1035    try_istri(wot,h,s, "0bcdabcdabcdabcd", "000000000000abcd");
1036    try_istri(wot,h,s, "abcdabcdabcda0cd", "000000000000abcd");
1037    try_istri(wot,h,s, "abcdabcdabcdab0d", "000000000000abcd");
1038    try_istri(wot,h,s, "abcdabcdabcdabc0", "000000000000abcd");
1039 
1040    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
1041    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000a0cd");
1042    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000ab0d");
1043    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abc0");
1044 
1045    try_istri(wot,h,s, "0000000000000000", "0000000000000000");
1046    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
1047 
1048    try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd");
1049    try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba");
1050    try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000bbbb");
1051    try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000baba");
1052 
1053    try_istri(wot,h,s, "0000abcdabcdabcd", "00000000000baba0");
1054 
1055    try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe");
1056    try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe");
1057 }
1058 
1059 
1060 //////////////////////////////////////////////////////////
1061 //                                                      //
1062 //                       ISTRI_12                       //
1063 //                                                      //
1064 //////////////////////////////////////////////////////////
1065 
h_pcmpistri_12(V128 * argL,V128 * argR)1066 UInt h_pcmpistri_12 ( V128* argL, V128* argR )
1067 {
1068    V128 block[2];
1069    memcpy(&block[0], argL, sizeof(V128));
1070    memcpy(&block[1], argR, sizeof(V128));
1071    ULong res, flags;
1072    __asm__ __volatile__(
1073       "subq      $1024,  %%rsp"             "\n\t"
1074       "movdqu    0(%2),  %%xmm2"            "\n\t"
1075       "movdqu    16(%2), %%xmm11"           "\n\t"
1076       "pcmpistri $0x12,  %%xmm2, %%xmm11"   "\n\t"
1077 //"pcmpistrm $0x12, %%xmm2, %%xmm11"   "\n\t"
1078 //"movd %%xmm0, %%ecx" "\n\t"
1079       "pushfq"                              "\n\t"
1080       "popq      %%rdx"                     "\n\t"
1081       "movq      %%rcx,  %0"                "\n\t"
1082       "movq      %%rdx,  %1"                "\n\t"
1083       "addq      $1024,  %%rsp"             "\n\t"
1084       : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
1085       : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
1086    );
1087    return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
1088 }
1089 
s_pcmpistri_12(V128 * argLU,V128 * argRU)1090 UInt s_pcmpistri_12 ( V128* argLU, V128* argRU )
1091 {
1092    V128 resV;
1093    UInt resOSZACP, resECX;
1094    Bool ok
1095       = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
1096                        zmask_from_V128(argLU),
1097                        zmask_from_V128(argRU),
1098                        0x12, False/*!isSTRM*/
1099         );
1100    assert(ok);
1101    resECX = resV.uInt[0];
1102    return (resOSZACP << 16) | resECX;
1103 }
1104 
istri_12(void)1105 void istri_12 ( void )
1106 {
1107    char* wot = "12";
1108    UInt(*h)(V128*,V128*) = h_pcmpistri_12;
1109    UInt(*s)(V128*,V128*) = s_pcmpistri_12;
1110 
1111    try_istri(wot,h,s, "abcdacbdabcdabcd", "000000000000000a");
1112    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000000b");
1113    try_istri(wot,h,s, "abcdabcdabcdabcd", "00000000000000ab");
1114    try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd");
1115 
1116    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
1117    try_istri(wot,h,s, "0bcdabcdabcdabcd", "000000000000abcd");
1118    try_istri(wot,h,s, "abcdabcdabcda0cd", "000000000000abcd");
1119    try_istri(wot,h,s, "abcdabcdabcdab0d", "000000000000abcd");
1120    try_istri(wot,h,s, "abcdabcdabcdabc0", "000000000000abcd");
1121 
1122    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
1123    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000a0cd");
1124    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000ab0d");
1125    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abc0");
1126 
1127    try_istri(wot,h,s, "0000000000000000", "0000000000000000");
1128    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
1129 
1130    try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd");
1131    try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba");
1132    try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000bbbb");
1133    try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000baba");
1134 
1135    try_istri(wot,h,s, "0000abcdabcdabcd", "00000000000baba0");
1136 
1137    try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe");
1138    try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe");
1139 }
1140 
1141 
1142 
1143 //////////////////////////////////////////////////////////
1144 //                                                      //
1145 //                       ISTRI_44                       //
1146 //                                                      //
1147 //////////////////////////////////////////////////////////
1148 
h_pcmpistri_44(V128 * argL,V128 * argR)1149 UInt h_pcmpistri_44 ( V128* argL, V128* argR )
1150 {
1151    V128 block[2];
1152    memcpy(&block[0], argL, sizeof(V128));
1153    memcpy(&block[1], argR, sizeof(V128));
1154    ULong res, flags;
1155    __asm__ __volatile__(
1156       "subq      $1024,  %%rsp"             "\n\t"
1157       "movdqu    0(%2),  %%xmm2"            "\n\t"
1158       "movdqu    16(%2), %%xmm11"           "\n\t"
1159       "pcmpistri $0x44,  %%xmm2, %%xmm11"   "\n\t"
1160 //"pcmpistrm $0x04, %%xmm2, %%xmm11"   "\n\t"
1161 //"movd %%xmm0, %%ecx" "\n\t"
1162       "pushfq"                              "\n\t"
1163       "popq      %%rdx"                     "\n\t"
1164       "movq      %%rcx,  %0"                "\n\t"
1165       "movq      %%rdx,  %1"                "\n\t"
1166       "addq      $1024,  %%rsp"             "\n\t"
1167       : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
1168       : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
1169    );
1170    return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
1171 }
1172 
s_pcmpistri_44(V128 * argLU,V128 * argRU)1173 UInt s_pcmpistri_44 ( V128* argLU, V128* argRU )
1174 {
1175    V128 resV;
1176    UInt resOSZACP, resECX;
1177    Bool ok
1178       = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
1179                        zmask_from_V128(argLU),
1180                        zmask_from_V128(argRU),
1181                        0x44, False/*!isSTRM*/
1182         );
1183    assert(ok);
1184    resECX = resV.uInt[0];
1185    return (resOSZACP << 16) | resECX;
1186 }
1187 
istri_44(void)1188 void istri_44 ( void )
1189 {
1190    char* wot = "44";
1191    UInt(*h)(V128*,V128*) = h_pcmpistri_44;
1192    UInt(*s)(V128*,V128*) = s_pcmpistri_44;
1193 
1194    try_istri(wot,h,s, "aaaabbbbccccdddd", "00000000000000bc");
1195    try_istri(wot,h,s, "aaaabbbbccccdddd", "00000000000000cb");
1196    try_istri(wot,h,s, "baaabbbbccccdddd", "00000000000000cb");
1197    try_istri(wot,h,s, "baaabbbbccccdddc", "00000000000000cb");
1198 
1199    try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "00000000000000cb");
1200    try_istri(wot,h,s, "bbbbbbbb0bbbbbbb", "00000000000000cb");
1201    try_istri(wot,h,s, "bbbbbbbbbbbbbb0b", "00000000000000cb");
1202    try_istri(wot,h,s, "bbbbbbbbbbbbbbb0", "00000000000000cb");
1203    try_istri(wot,h,s, "0000000000000000", "00000000000000cb");
1204 
1205    try_istri(wot,h,s, "0000000000000000", "0000000000000000");
1206 
1207    try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "00000000000000cb");
1208    try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "000000000000000b");
1209    try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000062cb");
1210 
1211    try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000002cb");
1212    try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000000cb");
1213    try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "000000000000000b");
1214 
1215    try_istri(wot,h,s, "0123456789abcdef", "000000fecb975421");
1216    try_istri(wot,h,s, "123456789abcdef1", "000000fecb975421");
1217 
1218    try_istri(wot,h,s, "0123456789abcdef", "00000000dca86532");
1219    try_istri(wot,h,s, "123456789abcdef1", "00000000dca86532");
1220 
1221    try_istri(wot,h,s, "163887ec041a9b72", "fcd75adb9b3e895a");
1222    try_istri(wot,h,s, "fc937cbfbf53f8e2", "0d136bcb024d3fb7");
1223    try_istri(wot,h,s, "2ca34182c29a82ab", "302ebd646775ab54");
1224    try_istri(wot,h,s, "3f2987608c11be6f", "a9ecb661f8e0a8cb");
1225 }
1226 
1227 
1228 //////////////////////////////////////////////////////////
1229 //                                                      //
1230 //                       ISTRI_00                       //
1231 //                                                      //
1232 //////////////////////////////////////////////////////////
1233 
h_pcmpistri_00(V128 * argL,V128 * argR)1234 UInt h_pcmpistri_00 ( V128* argL, V128* argR )
1235 {
1236    V128 block[2];
1237    memcpy(&block[0], argL, sizeof(V128));
1238    memcpy(&block[1], argR, sizeof(V128));
1239    ULong res, flags;
1240    __asm__ __volatile__(
1241       "subq      $1024,  %%rsp"             "\n\t"
1242       "movdqu    0(%2),  %%xmm2"            "\n\t"
1243       "movdqu    16(%2), %%xmm11"           "\n\t"
1244       "pcmpistri $0x00,  %%xmm2, %%xmm11"   "\n\t"
1245 //"pcmpistrm $0x00, %%xmm2, %%xmm11"   "\n\t"
1246 //"movd %%xmm0, %%ecx" "\n\t"
1247       "pushfq"                              "\n\t"
1248       "popq      %%rdx"                     "\n\t"
1249       "movq      %%rcx,  %0"                "\n\t"
1250       "movq      %%rdx,  %1"                "\n\t"
1251       "addq      $1024,  %%rsp"             "\n\t"
1252       : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
1253       : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
1254    );
1255    return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
1256 }
1257 
s_pcmpistri_00(V128 * argLU,V128 * argRU)1258 UInt s_pcmpistri_00 ( V128* argLU, V128* argRU )
1259 {
1260    V128 resV;
1261    UInt resOSZACP, resECX;
1262    Bool ok
1263       = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
1264                        zmask_from_V128(argLU),
1265                        zmask_from_V128(argRU),
1266                        0x00, False/*!isSTRM*/
1267         );
1268    assert(ok);
1269    resECX = resV.uInt[0];
1270    return (resOSZACP << 16) | resECX;
1271 }
1272 
istri_00(void)1273 void istri_00 ( void )
1274 {
1275    char* wot = "00";
1276    UInt(*h)(V128*,V128*) = h_pcmpistri_00;
1277    UInt(*s)(V128*,V128*) = s_pcmpistri_00;
1278 
1279    try_istri(wot,h,s, "abcdacbdabcdabcd", "000000000000000a");
1280    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000000b");
1281    try_istri(wot,h,s, "abcdabcdabcdabcd", "00000000000000ab");
1282    try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd");
1283 
1284    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
1285    try_istri(wot,h,s, "0bcdabcdabcdabcd", "000000000000abcd");
1286    try_istri(wot,h,s, "abcdabcdabcda0cd", "000000000000abcd");
1287    try_istri(wot,h,s, "abcdabcdabcdab0d", "000000000000abcd");
1288    try_istri(wot,h,s, "abcdabcdabcdabc0", "000000000000abcd");
1289 
1290    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
1291    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000a0cd");
1292    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000ab0d");
1293    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abc0");
1294 
1295    try_istri(wot,h,s, "0000000000000000", "0000000000000000");
1296    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
1297 
1298    try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd");
1299    try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba");
1300    try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000bbbb");
1301    try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000baba");
1302 
1303    try_istri(wot,h,s, "0000abcdabcdabcd", "00000000000baba0");
1304 
1305    try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe");
1306    try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe");
1307 }
1308 
1309 
1310 //////////////////////////////////////////////////////////
1311 //                                                      //
1312 //                       ISTRI_38                       //
1313 //                                                      //
1314 //////////////////////////////////////////////////////////
1315 
h_pcmpistri_38(V128 * argL,V128 * argR)1316 UInt h_pcmpistri_38 ( V128* argL, V128* argR )
1317 {
1318    V128 block[2];
1319    memcpy(&block[0], argL, sizeof(V128));
1320    memcpy(&block[1], argR, sizeof(V128));
1321    ULong res, flags;
1322    __asm__ __volatile__(
1323       "subq      $1024,  %%rsp"             "\n\t"
1324       "movdqu    0(%2),  %%xmm2"            "\n\t"
1325       "movdqu    16(%2), %%xmm11"           "\n\t"
1326       "pcmpistri $0x38,  %%xmm2, %%xmm11"   "\n\t"
1327       "pushfq"                              "\n\t"
1328       "popq      %%rdx"                     "\n\t"
1329       "movq      %%rcx,  %0"                "\n\t"
1330       "movq      %%rdx,  %1"                "\n\t"
1331       "addq      $1024,  %%rsp"             "\n\t"
1332       : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
1333       : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
1334    );
1335    return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
1336 }
1337 
s_pcmpistri_38(V128 * argLU,V128 * argRU)1338 UInt s_pcmpistri_38 ( V128* argLU, V128* argRU )
1339 {
1340    V128 resV;
1341    UInt resOSZACP, resECX;
1342    Bool ok
1343       = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
1344                        zmask_from_V128(argLU),
1345                        zmask_from_V128(argRU),
1346                        0x38, False/*!isSTRM*/
1347         );
1348    assert(ok);
1349    resECX = resV.uInt[0];
1350    return (resOSZACP << 16) | resECX;
1351 }
1352 
istri_38(void)1353 void istri_38 ( void )
1354 {
1355    char* wot = "38";
1356    UInt(*h)(V128*,V128*) = h_pcmpistri_38;
1357    UInt(*s)(V128*,V128*) = s_pcmpistri_38;
1358 
1359    try_istri(wot,h,s, "0000000000000000", "0000000000000000");
1360 
1361    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
1362    try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
1363    try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa");
1364    try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa");
1365 
1366    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa");
1367    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa");
1368    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a");
1369 
1370    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
1371    try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
1372    try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
1373    try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
1374 
1375    try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
1376    try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa");
1377    try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa");
1378 
1379    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
1380 
1381    try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
1382    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
1383    try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaa0aaa");
1384 
1385    try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaaaaaa");
1386    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa");
1387    try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaa0aaa");
1388 
1389    try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa");
1390    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa0aaaaaaa");
1391    try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaa0aaaaaaa");
1392 
1393    try_istri(wot,h,s, "0000000000000000", "aaaaaaaa0aaaaaaa");
1394    try_istri(wot,h,s, "8000000000000000", "aaaaaaaa0aaaaaaa");
1395    try_istri(wot,h,s, "0000000000000001", "aaaaaaaa0aaaaaaa");
1396 
1397    try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa");
1398    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000");
1399 }
1400 
1401 
1402 
1403 //////////////////////////////////////////////////////////
1404 //                                                      //
1405 //                       ISTRI_46                       //
1406 //                                                      //
1407 //////////////////////////////////////////////////////////
1408 
h_pcmpistri_46(V128 * argL,V128 * argR)1409 UInt h_pcmpistri_46 ( V128* argL, V128* argR )
1410 {
1411    V128 block[2];
1412    memcpy(&block[0], argL, sizeof(V128));
1413    memcpy(&block[1], argR, sizeof(V128));
1414    ULong res, flags;
1415    __asm__ __volatile__(
1416       "subq      $1024,  %%rsp"             "\n\t"
1417       "movdqu    0(%2),  %%xmm2"            "\n\t"
1418       "movdqu    16(%2), %%xmm11"           "\n\t"
1419       "pcmpistri $0x46,  %%xmm2, %%xmm11"   "\n\t"
1420       "pushfq"                              "\n\t"
1421       "popq      %%rdx"                     "\n\t"
1422       "movq      %%rcx,  %0"                "\n\t"
1423       "movq      %%rdx,  %1"                "\n\t"
1424       "addq      $1024,  %%rsp"             "\n\t"
1425       : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
1426       : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
1427    );
1428    return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
1429 }
1430 
s_pcmpistri_46(V128 * argLU,V128 * argRU)1431 UInt s_pcmpistri_46 ( V128* argLU, V128* argRU )
1432 {
1433    V128 resV;
1434    UInt resOSZACP, resECX;
1435    Bool ok
1436       = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
1437                        zmask_from_V128(argLU),
1438                        zmask_from_V128(argRU),
1439                        0x46, False/*!isSTRM*/
1440         );
1441    assert(ok);
1442    resECX = resV.uInt[0];
1443    return (resOSZACP << 16) | resECX;
1444 }
1445 
istri_46(void)1446 void istri_46 ( void )
1447 {
1448    char* wot = "46";
1449    UInt(*h)(V128*,V128*) = h_pcmpistri_46;
1450    UInt(*s)(V128*,V128*) = s_pcmpistri_46;
1451 
1452    try_istri(wot,h,s, "aaaabbbbccccdddd", "00000000000000bc");
1453    try_istri(wot,h,s, "aaaabbbbccccdddd", "00000000000000cb");
1454    try_istri(wot,h,s, "baaabbbbccccdddd", "00000000000000cb");
1455    try_istri(wot,h,s, "baaabbbbccccdddc", "00000000000000cb");
1456 
1457    try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "00000000000000cb");
1458    try_istri(wot,h,s, "bbbbbbbb0bbbbbbb", "00000000000000cb");
1459    try_istri(wot,h,s, "bbbbbbbbbbbbbb0b", "00000000000000cb");
1460    try_istri(wot,h,s, "bbbbbbbbbbbbbbb0", "00000000000000cb");
1461    try_istri(wot,h,s, "0000000000000000", "00000000000000cb");
1462 
1463    try_istri(wot,h,s, "0000000000000000", "0000000000000000");
1464 
1465    try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "00000000000000cb");
1466    try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "000000000000000b");
1467    try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000062cb");
1468 
1469    try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000002cb");
1470    try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000000cb");
1471    try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "000000000000000b");
1472 
1473    try_istri(wot,h,s, "0123456789abcdef", "000000fecb975421");
1474    try_istri(wot,h,s, "123456789abcdef1", "000000fecb975421");
1475 
1476    try_istri(wot,h,s, "0123456789abcdef", "00000000dca86532");
1477    try_istri(wot,h,s, "123456789abcdef1", "00000000dca86532");
1478 
1479    try_istri(wot,h,s, "163887ec041a9b72", "fcd75adb9b3e895a");
1480    try_istri(wot,h,s, "fc937cbfbf53f8e2", "0d136bcb024d3fb7");
1481    try_istri(wot,h,s, "2ca34182c29a82ab", "302ebd646775ab54");
1482    try_istri(wot,h,s, "3f2987608c11be6f", "a9ecb661f8e0a8cb");
1483 }
1484 
1485 
1486 //////////////////////////////////////////////////////////
1487 //                                                      //
1488 //                       ISTRI_30                       //
1489 //                                                      //
1490 //////////////////////////////////////////////////////////
1491 
h_pcmpistri_30(V128 * argL,V128 * argR)1492 UInt h_pcmpistri_30 ( V128* argL, V128* argR )
1493 {
1494    V128 block[2];
1495    memcpy(&block[0], argL, sizeof(V128));
1496    memcpy(&block[1], argR, sizeof(V128));
1497    ULong res, flags;
1498    __asm__ __volatile__(
1499       "subq      $1024,  %%rsp"             "\n\t"
1500       "movdqu    0(%2),  %%xmm2"            "\n\t"
1501       "movdqu    16(%2), %%xmm11"           "\n\t"
1502       "pcmpistri $0x30,  %%xmm2, %%xmm11"   "\n\t"
1503       "pushfq"                              "\n\t"
1504       "popq      %%rdx"                     "\n\t"
1505       "movq      %%rcx,  %0"                "\n\t"
1506       "movq      %%rdx,  %1"                "\n\t"
1507       "addq      $1024,  %%rsp"             "\n\t"
1508       : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
1509       : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
1510    );
1511    return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
1512 }
1513 
s_pcmpistri_30(V128 * argLU,V128 * argRU)1514 UInt s_pcmpistri_30 ( V128* argLU, V128* argRU )
1515 {
1516    V128 resV;
1517    UInt resOSZACP, resECX;
1518    Bool ok
1519       = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
1520                        zmask_from_V128(argLU),
1521                        zmask_from_V128(argRU),
1522                        0x30, False/*!isSTRM*/
1523         );
1524    assert(ok);
1525    resECX = resV.uInt[0];
1526    return (resOSZACP << 16) | resECX;
1527 }
1528 
istri_30(void)1529 void istri_30 ( void )
1530 {
1531    char* wot = "30";
1532    UInt(*h)(V128*,V128*) = h_pcmpistri_30;
1533    UInt(*s)(V128*,V128*) = s_pcmpistri_30;
1534 
1535    try_istri(wot,h,s, "abcdacbdabcdabcd", "000000000000000a");
1536    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000000b");
1537    try_istri(wot,h,s, "abcdabcdabcdabcd", "00000000000000ab");
1538    try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd");
1539 
1540    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
1541    try_istri(wot,h,s, "0bcdabcdabcdabcd", "000000000000abcd");
1542    try_istri(wot,h,s, "abcdabcdabcda0cd", "000000000000abcd");
1543    try_istri(wot,h,s, "abcdabcdabcdab0d", "000000000000abcd");
1544    try_istri(wot,h,s, "abcdabcdabcdabc0", "000000000000abcd");
1545 
1546    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
1547    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000a0cd");
1548    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000ab0d");
1549    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abc0");
1550 
1551    try_istri(wot,h,s, "0000000000000000", "0000000000000000");
1552    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
1553 
1554    try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd");
1555    try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba");
1556    try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000bbbb");
1557    try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000baba");
1558 
1559    try_istri(wot,h,s, "0000abcdabcdabcd", "00000000000baba0");
1560 
1561    try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe");
1562    try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe");
1563 }
1564 
1565 
1566 //////////////////////////////////////////////////////////
1567 //                                                      //
1568 //                       ISTRI_40                       //
1569 //                                                      //
1570 //////////////////////////////////////////////////////////
1571 
h_pcmpistri_40(V128 * argL,V128 * argR)1572 UInt h_pcmpistri_40 ( V128* argL, V128* argR )
1573 {
1574    V128 block[2];
1575    memcpy(&block[0], argL, sizeof(V128));
1576    memcpy(&block[1], argR, sizeof(V128));
1577    ULong res, flags;
1578    __asm__ __volatile__(
1579       "subq      $1024,  %%rsp"             "\n\t"
1580       "movdqu    0(%2),  %%xmm2"            "\n\t"
1581       "movdqu    16(%2), %%xmm11"           "\n\t"
1582       "pcmpistri $0x40,  %%xmm2, %%xmm11"   "\n\t"
1583       "pushfq"                              "\n\t"
1584       "popq      %%rdx"                     "\n\t"
1585       "movq      %%rcx,  %0"                "\n\t"
1586       "movq      %%rdx,  %1"                "\n\t"
1587       "addq      $1024,  %%rsp"             "\n\t"
1588       : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
1589       : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
1590    );
1591    return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
1592 }
1593 
s_pcmpistri_40(V128 * argLU,V128 * argRU)1594 UInt s_pcmpistri_40 ( V128* argLU, V128* argRU )
1595 {
1596    V128 resV;
1597    UInt resOSZACP, resECX;
1598    Bool ok
1599       = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
1600                        zmask_from_V128(argLU),
1601                        zmask_from_V128(argRU),
1602                        0x40, False/*!isSTRM*/
1603         );
1604    assert(ok);
1605    resECX = resV.uInt[0];
1606    return (resOSZACP << 16) | resECX;
1607 }
1608 
istri_40(void)1609 void istri_40 ( void )
1610 {
1611    char* wot = "40";
1612    UInt(*h)(V128*,V128*) = h_pcmpistri_40;
1613    UInt(*s)(V128*,V128*) = s_pcmpistri_40;
1614 
1615    try_istri(wot,h,s, "abcdacbdabcdabcd", "000000000000000a");
1616    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000000b");
1617    try_istri(wot,h,s, "abcdabcdabcdabcd", "00000000000000ab");
1618    try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd");
1619 
1620    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
1621    try_istri(wot,h,s, "0bcdabcdabcdabcd", "000000000000abcd");
1622    try_istri(wot,h,s, "abcdabcdabcda0cd", "000000000000abcd");
1623    try_istri(wot,h,s, "abcdabcdabcdab0d", "000000000000abcd");
1624    try_istri(wot,h,s, "abcdabcdabcdabc0", "000000000000abcd");
1625 
1626    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
1627    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000a0cd");
1628    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000ab0d");
1629    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abc0");
1630 
1631    try_istri(wot,h,s, "0000000000000000", "0000000000000000");
1632    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
1633 
1634    try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd");
1635    try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba");
1636    try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000bbbb");
1637    try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000baba");
1638 
1639    try_istri(wot,h,s, "0000abcdabcdabcd", "00000000000baba0");
1640 
1641    try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe");
1642    try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe");
1643 }
1644 
1645 
1646 //////////////////////////////////////////////////////////
1647 //                                                      //
1648 //                       ISTRI_42                       //
1649 //                                                      //
1650 //////////////////////////////////////////////////////////
1651 
h_pcmpistri_42(V128 * argL,V128 * argR)1652 UInt h_pcmpistri_42 ( V128* argL, V128* argR )
1653 {
1654    V128 block[2];
1655    memcpy(&block[0], argL, sizeof(V128));
1656    memcpy(&block[1], argR, sizeof(V128));
1657    ULong res, flags;
1658    __asm__ __volatile__(
1659       "subq      $1024,  %%rsp"             "\n\t"
1660       "movdqu    0(%2),  %%xmm2"            "\n\t"
1661       "movdqu    16(%2), %%xmm11"           "\n\t"
1662       "pcmpistri $0x42,  %%xmm2, %%xmm11"   "\n\t"
1663       "pushfq"                              "\n\t"
1664       "popq      %%rdx"                     "\n\t"
1665       "movq      %%rcx,  %0"                "\n\t"
1666       "movq      %%rdx,  %1"                "\n\t"
1667       "addq      $1024,  %%rsp"             "\n\t"
1668       : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
1669       : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
1670    );
1671    return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
1672 }
1673 
s_pcmpistri_42(V128 * argLU,V128 * argRU)1674 UInt s_pcmpistri_42 ( V128* argLU, V128* argRU )
1675 {
1676    V128 resV;
1677    UInt resOSZACP, resECX;
1678    Bool ok
1679       = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
1680                        zmask_from_V128(argLU),
1681                        zmask_from_V128(argRU),
1682                        0x42, False/*!isSTRM*/
1683         );
1684    assert(ok);
1685    resECX = resV.uInt[0];
1686    return (resOSZACP << 16) | resECX;
1687 }
1688 
istri_42(void)1689 void istri_42 ( void )
1690 {
1691    char* wot = "42";
1692    UInt(*h)(V128*,V128*) = h_pcmpistri_42;
1693    UInt(*s)(V128*,V128*) = s_pcmpistri_42;
1694 
1695    try_istri(wot,h,s, "abcdacbdabcdabcd", "000000000000000a");
1696    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000000b");
1697    try_istri(wot,h,s, "abcdabcdabcdabcd", "00000000000000ab");
1698    try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd");
1699 
1700    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
1701    try_istri(wot,h,s, "0bcdabcdabcdabcd", "000000000000abcd");
1702    try_istri(wot,h,s, "abcdabcdabcda0cd", "000000000000abcd");
1703    try_istri(wot,h,s, "abcdabcdabcdab0d", "000000000000abcd");
1704    try_istri(wot,h,s, "abcdabcdabcdabc0", "000000000000abcd");
1705 
1706    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
1707    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000a0cd");
1708    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000ab0d");
1709    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abc0");
1710 
1711    try_istri(wot,h,s, "0000000000000000", "0000000000000000");
1712    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
1713 
1714    try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd");
1715    try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba");
1716    try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000bbbb");
1717    try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000baba");
1718 
1719    try_istri(wot,h,s, "0000abcdabcdabcd", "00000000000baba0");
1720 
1721    try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe");
1722    try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe");
1723 }
1724 
1725 
1726 //////////////////////////////////////////////////////////
1727 //                                                      //
1728 //                       ISTRI_0E                       //
1729 //                                                      //
1730 //////////////////////////////////////////////////////////
1731 
1732 __attribute__((noinline))
h_pcmpistri_0E(V128 * argL,V128 * argR)1733 UInt h_pcmpistri_0E ( V128* argL, V128* argR )
1734 {
1735    V128 block[2];
1736    memcpy(&block[0], argL, sizeof(V128));
1737    memcpy(&block[1], argR, sizeof(V128));
1738    ULong res = 0, flags = 0;
1739    __asm__ __volatile__(
1740       "movdqu    0(%2),  %%xmm2"            "\n\t"
1741       "movdqu    16(%2), %%xmm11"           "\n\t"
1742       "pcmpistri $0x0E,  %%xmm2, %%xmm11"   "\n\t"
1743       "pushfq"                              "\n\t"
1744       "popq      %%rdx"                     "\n\t"
1745       "movq      %%rcx,  %0"                "\n\t"
1746       "movq      %%rdx,  %1"                "\n\t"
1747       : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
1748       : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
1749    );
1750    return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
1751 }
1752 
s_pcmpistri_0E(V128 * argLU,V128 * argRU)1753 UInt s_pcmpistri_0E ( V128* argLU, V128* argRU )
1754 {
1755    V128 resV;
1756    UInt resOSZACP, resECX;
1757    Bool ok
1758       = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
1759                        zmask_from_V128(argLU),
1760                        zmask_from_V128(argRU),
1761                        0x0E, False/*!isSTRM*/
1762         );
1763    assert(ok);
1764    resECX = resV.uInt[0];
1765    return (resOSZACP << 16) | resECX;
1766 }
1767 
istri_0E(void)1768 void istri_0E ( void )
1769 {
1770    char* wot = "0E";
1771    UInt(*h)(V128*,V128*) = h_pcmpistri_0E;
1772    UInt(*s)(V128*,V128*) = s_pcmpistri_0E;
1773 
1774    try_istri(wot,h,s, "111111111abcde11", "00000000000abcde");
1775 
1776    try_istri(wot,h,s, "111111111abcde11", "0000abcde00abcde");
1777 
1778    try_istri(wot,h,s, "1111111111abcde1", "00000000000abcde");
1779    try_istri(wot,h,s, "11111111111abcde", "00000000000abcde");
1780    try_istri(wot,h,s, "111111111111abcd", "00000000000abcde");
1781 
1782    try_istri(wot,h,s, "111abcde1abcde11", "00000000000abcde");
1783 
1784    try_istri(wot,h,s, "11abcde11abcde11", "00000000000abcde");
1785    try_istri(wot,h,s, "1abcde111abcde11", "00000000000abcde");
1786    try_istri(wot,h,s, "abcde1111abcde11", "00000000000abcde");
1787    try_istri(wot,h,s, "bcde11111abcde11", "00000000000abcde");
1788    try_istri(wot,h,s, "cde111111abcde11", "00000000000abcde");
1789 
1790    try_istri(wot,h,s, "01abcde11abcde11", "00000000000abcde");
1791    try_istri(wot,h,s, "00abcde11abcde11", "00000000000abcde");
1792    try_istri(wot,h,s, "000bcde11abcde11", "00000000000abcde");
1793 
1794    try_istri(wot,h,s, "00abcde10abcde11", "00000000000abcde");
1795    try_istri(wot,h,s, "00abcde100bcde11", "00000000000abcde");
1796 
1797    try_istri(wot,h,s, "1111111111111234", "0000000000000000");
1798    try_istri(wot,h,s, "1111111111111234", "0000000000000001");
1799    try_istri(wot,h,s, "1111111111111234", "0000000000000011");
1800 
1801    try_istri(wot,h,s, "1111111111111234", "1111111111111234");
1802    try_istri(wot,h,s, "a111111111111111", "000000000000000a");
1803    try_istri(wot,h,s, "b111111111111111", "000000000000000a");
1804 
1805    try_istri(wot,h,s, "b111111111111111", "0000000000000000");
1806    try_istri(wot,h,s, "0000000000000000", "0000000000000000");
1807    try_istri(wot,h,s, "123456789abcdef1", "0000000000000000");
1808    try_istri(wot,h,s, "0000000000000000", "123456789abcdef1");
1809 }
1810 
1811 
1812 //////////////////////////////////////////////////////////
1813 //                                                      //
1814 //                       ISTRI_34                       //
1815 //                                                      //
1816 //////////////////////////////////////////////////////////
1817 
h_pcmpistri_34(V128 * argL,V128 * argR)1818 UInt h_pcmpistri_34 ( V128* argL, V128* argR )
1819 {
1820    V128 block[2];
1821    memcpy(&block[0], argL, sizeof(V128));
1822    memcpy(&block[1], argR, sizeof(V128));
1823    ULong res, flags;
1824    __asm__ __volatile__(
1825       "subq      $1024,  %%rsp"             "\n\t"
1826       "movdqu    0(%2),  %%xmm2"            "\n\t"
1827       "movdqu    16(%2), %%xmm11"           "\n\t"
1828       "pcmpistri $0x34,  %%xmm2, %%xmm11"   "\n\t"
1829       "pushfq"                              "\n\t"
1830       "popq      %%rdx"                     "\n\t"
1831       "movq      %%rcx,  %0"                "\n\t"
1832       "movq      %%rdx,  %1"                "\n\t"
1833       "addq      $1024,  %%rsp"             "\n\t"
1834       : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
1835       : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
1836    );
1837    return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
1838 }
1839 
s_pcmpistri_34(V128 * argLU,V128 * argRU)1840 UInt s_pcmpistri_34 ( V128* argLU, V128* argRU )
1841 {
1842    V128 resV;
1843    UInt resOSZACP, resECX;
1844    Bool ok
1845       = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
1846                        zmask_from_V128(argLU),
1847                        zmask_from_V128(argRU),
1848                        0x34, False/*!isSTRM*/
1849         );
1850    assert(ok);
1851    resECX = resV.uInt[0];
1852    return (resOSZACP << 16) | resECX;
1853 }
1854 
istri_34(void)1855 void istri_34 ( void )
1856 {
1857    char* wot = "34";
1858    UInt(*h)(V128*,V128*) = h_pcmpistri_34;
1859    UInt(*s)(V128*,V128*) = s_pcmpistri_34;
1860 
1861    try_istri(wot,h,s, "aaaabbbbccccdddd", "00000000000000bc");
1862    try_istri(wot,h,s, "aaaabbbbccccdddd", "00000000000000cb");
1863    try_istri(wot,h,s, "baaabbbbccccdddd", "00000000000000cb");
1864    try_istri(wot,h,s, "baaabbbbccccdddc", "00000000000000cb");
1865 
1866    try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "00000000000000cb");
1867    try_istri(wot,h,s, "bbbbbbbb0bbbbbbb", "00000000000000cb");
1868    try_istri(wot,h,s, "bbbbbbbbbbbbbb0b", "00000000000000cb");
1869    try_istri(wot,h,s, "bbbbbbbbbbbbbbb0", "00000000000000cb");
1870    try_istri(wot,h,s, "0000000000000000", "00000000000000cb");
1871 
1872    try_istri(wot,h,s, "0000000000000000", "0000000000000000");
1873 
1874    try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "00000000000000cb");
1875    try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "000000000000000b");
1876    try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000062cb");
1877 
1878    try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000002cb");
1879    try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000000cb");
1880    try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "000000000000000b");
1881 
1882    try_istri(wot,h,s, "0123456789abcdef", "000000fecb975421");
1883    try_istri(wot,h,s, "123456789abcdef1", "000000fecb975421");
1884 
1885    try_istri(wot,h,s, "0123456789abcdef", "00000000dca86532");
1886    try_istri(wot,h,s, "123456789abcdef1", "00000000dca86532");
1887 
1888    try_istri(wot,h,s, "163887ec041a9b72", "fcd75adb9b3e895a");
1889    try_istri(wot,h,s, "fc937cbfbf53f8e2", "0d136bcb024d3fb7");
1890    try_istri(wot,h,s, "2ca34182c29a82ab", "302ebd646775ab54");
1891    try_istri(wot,h,s, "3f2987608c11be6f", "a9ecb661f8e0a8cb");
1892 }
1893 
1894 
1895 //////////////////////////////////////////////////////////
1896 //                                                      //
1897 //                       ISTRI_14                       //
1898 //                                                      //
1899 //////////////////////////////////////////////////////////
1900 
h_pcmpistri_14(V128 * argL,V128 * argR)1901 UInt h_pcmpistri_14 ( V128* argL, V128* argR )
1902 {
1903    V128 block[2];
1904    memcpy(&block[0], argL, sizeof(V128));
1905    memcpy(&block[1], argR, sizeof(V128));
1906    ULong res, flags;
1907    __asm__ __volatile__(
1908       "subq      $1024,  %%rsp"             "\n\t"
1909       "movdqu    0(%2),  %%xmm2"            "\n\t"
1910       "movdqu    16(%2), %%xmm11"           "\n\t"
1911       "pcmpistri $0x14,  %%xmm2, %%xmm11"   "\n\t"
1912       "pushfq"                              "\n\t"
1913       "popq      %%rdx"                     "\n\t"
1914       "movq      %%rcx,  %0"                "\n\t"
1915       "movq      %%rdx,  %1"                "\n\t"
1916       "addq      $1024,  %%rsp"             "\n\t"
1917       : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
1918       : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
1919    );
1920    return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
1921 }
1922 
s_pcmpistri_14(V128 * argLU,V128 * argRU)1923 UInt s_pcmpistri_14 ( V128* argLU, V128* argRU )
1924 {
1925    V128 resV;
1926    UInt resOSZACP, resECX;
1927    Bool ok
1928       = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
1929                        zmask_from_V128(argLU),
1930                        zmask_from_V128(argRU),
1931                        0x14, False/*!isSTRM*/
1932         );
1933    assert(ok);
1934    resECX = resV.uInt[0];
1935    return (resOSZACP << 16) | resECX;
1936 }
1937 
istri_14(void)1938 void istri_14 ( void )
1939 {
1940    char* wot = "14";
1941    UInt(*h)(V128*,V128*) = h_pcmpistri_14;
1942    UInt(*s)(V128*,V128*) = s_pcmpistri_14;
1943 
1944    try_istri(wot,h,s, "aaaabbbbccccdddd", "00000000000000bc");
1945    try_istri(wot,h,s, "aaaabbbbccccdddd", "00000000000000cb");
1946    try_istri(wot,h,s, "baaabbbbccccdddd", "00000000000000cb");
1947    try_istri(wot,h,s, "baaabbbbccccdddc", "00000000000000cb");
1948 
1949    try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "00000000000000cb");
1950    try_istri(wot,h,s, "bbbbbbbb0bbbbbbb", "00000000000000cb");
1951    try_istri(wot,h,s, "bbbbbbbbbbbbbb0b", "00000000000000cb");
1952    try_istri(wot,h,s, "bbbbbbbbbbbbbbb0", "00000000000000cb");
1953    try_istri(wot,h,s, "0000000000000000", "00000000000000cb");
1954 
1955    try_istri(wot,h,s, "0000000000000000", "0000000000000000");
1956 
1957    try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "00000000000000cb");
1958    try_istri(wot,h,s, "bbbbbbbbbbbbbbbb", "000000000000000b");
1959    try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000062cb");
1960 
1961    try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000002cb");
1962    try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "00000000000000cb");
1963    try_istri(wot,h,s, "b4b4b4b4b4b4b4b4", "000000000000000b");
1964 
1965    try_istri(wot,h,s, "0123456789abcdef", "000000fecb975421");
1966    try_istri(wot,h,s, "123456789abcdef1", "000000fecb975421");
1967 
1968    try_istri(wot,h,s, "0123456789abcdef", "00000000dca86532");
1969    try_istri(wot,h,s, "123456789abcdef1", "00000000dca86532");
1970 
1971    try_istri(wot,h,s, "163887ec041a9b72", "fcd75adb9b3e895a");
1972    try_istri(wot,h,s, "fc937cbfbf53f8e2", "0d136bcb024d3fb7");
1973    try_istri(wot,h,s, "2ca34182c29a82ab", "302ebd646775ab54");
1974    try_istri(wot,h,s, "3f2987608c11be6f", "a9ecb661f8e0a8cb");
1975 }
1976 
1977 
1978 //////////////////////////////////////////////////////////
1979 //                                                      //
1980 //                       ISTRI_70                       //
1981 //                                                      //
1982 //////////////////////////////////////////////////////////
1983 
h_pcmpistri_70(V128 * argL,V128 * argR)1984 UInt h_pcmpistri_70 ( V128* argL, V128* argR )
1985 {
1986    V128 block[2];
1987    memcpy(&block[0], argL, sizeof(V128));
1988    memcpy(&block[1], argR, sizeof(V128));
1989    ULong res, flags;
1990    __asm__ __volatile__(
1991       "subq      $1024,  %%rsp"             "\n\t"
1992       "movdqu    0(%2),  %%xmm2"            "\n\t"
1993       "movdqu    16(%2), %%xmm11"           "\n\t"
1994       "pcmpistri $0x70,  %%xmm2, %%xmm11"   "\n\t"
1995       "pushfq"                              "\n\t"
1996       "popq      %%rdx"                     "\n\t"
1997       "movq      %%rcx,  %0"                "\n\t"
1998       "movq      %%rdx,  %1"                "\n\t"
1999       "addq      $1024,  %%rsp"             "\n\t"
2000       : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
2001       : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
2002    );
2003    return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
2004 }
2005 
s_pcmpistri_70(V128 * argLU,V128 * argRU)2006 UInt s_pcmpistri_70 ( V128* argLU, V128* argRU )
2007 {
2008    V128 resV;
2009    UInt resOSZACP, resECX;
2010    Bool ok
2011       = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
2012                        zmask_from_V128(argLU),
2013                        zmask_from_V128(argRU),
2014                        0x70, False/*!isSTRM*/
2015         );
2016    assert(ok);
2017    resECX = resV.uInt[0];
2018    return (resOSZACP << 16) | resECX;
2019 }
2020 
istri_70(void)2021 void istri_70 ( void )
2022 {
2023    char* wot = "70";
2024    UInt(*h)(V128*,V128*) = h_pcmpistri_70;
2025    UInt(*s)(V128*,V128*) = s_pcmpistri_70;
2026 
2027    try_istri(wot,h,s, "abcdacbdabcdabcd", "000000000000000a");
2028    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000000b");
2029    try_istri(wot,h,s, "abcdabcdabcdabcd", "00000000000000ab");
2030    try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd");
2031 
2032    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
2033    try_istri(wot,h,s, "0bcdabcdabcdabcd", "000000000000abcd");
2034    try_istri(wot,h,s, "abcdabcdabcda0cd", "000000000000abcd");
2035    try_istri(wot,h,s, "abcdabcdabcdab0d", "000000000000abcd");
2036    try_istri(wot,h,s, "abcdabcdabcdabc0", "000000000000abcd");
2037 
2038    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
2039    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000a0cd");
2040    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000ab0d");
2041    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abc0");
2042 
2043    try_istri(wot,h,s, "0000000000000000", "0000000000000000");
2044    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
2045 
2046    try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd");
2047    try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba");
2048    try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000bbbb");
2049    try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000baba");
2050 
2051    try_istri(wot,h,s, "0000abcdabcdabcd", "00000000000baba0");
2052 
2053    try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe");
2054    try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe");
2055 }
2056 
2057 
2058 //////////////////////////////////////////////////////////
2059 //                                                      //
2060 //                       ISTRI_62                       //
2061 //                                                      //
2062 //////////////////////////////////////////////////////////
2063 
h_pcmpistri_62(V128 * argL,V128 * argR)2064 UInt h_pcmpistri_62 ( V128* argL, V128* argR )
2065 {
2066    V128 block[2];
2067    memcpy(&block[0], argL, sizeof(V128));
2068    memcpy(&block[1], argR, sizeof(V128));
2069    ULong res, flags;
2070    __asm__ __volatile__(
2071       "subq      $1024,  %%rsp"             "\n\t"
2072       "movdqu    0(%2),  %%xmm2"            "\n\t"
2073       "movdqu    16(%2), %%xmm11"           "\n\t"
2074       "pcmpistri $0x62,  %%xmm2, %%xmm11"   "\n\t"
2075       "pushfq"                              "\n\t"
2076       "popq      %%rdx"                     "\n\t"
2077       "movq      %%rcx,  %0"                "\n\t"
2078       "movq      %%rdx,  %1"                "\n\t"
2079       "addq      $1024,  %%rsp"             "\n\t"
2080       : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
2081       : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
2082    );
2083    return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
2084 }
2085 
s_pcmpistri_62(V128 * argLU,V128 * argRU)2086 UInt s_pcmpistri_62 ( V128* argLU, V128* argRU )
2087 {
2088    V128 resV;
2089    UInt resOSZACP, resECX;
2090    Bool ok
2091       = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
2092                        zmask_from_V128(argLU),
2093                        zmask_from_V128(argRU),
2094                        0x62, False/*!isSTRM*/
2095         );
2096    assert(ok);
2097    resECX = resV.uInt[0];
2098    return (resOSZACP << 16) | resECX;
2099 }
2100 
istri_62(void)2101 void istri_62 ( void )
2102 {
2103    char* wot = "62";
2104    UInt(*h)(V128*,V128*) = h_pcmpistri_62;
2105    UInt(*s)(V128*,V128*) = s_pcmpistri_62;
2106 
2107    try_istri(wot,h,s, "abcdacbdabcdabcd", "000000000000000a");
2108    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000000b");
2109    try_istri(wot,h,s, "abcdabcdabcdabcd", "00000000000000ab");
2110    try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd");
2111 
2112    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
2113    try_istri(wot,h,s, "0bcdabcdabcdabcd", "000000000000abcd");
2114    try_istri(wot,h,s, "abcdabcdabcda0cd", "000000000000abcd");
2115    try_istri(wot,h,s, "abcdabcdabcdab0d", "000000000000abcd");
2116    try_istri(wot,h,s, "abcdabcdabcdabc0", "000000000000abcd");
2117 
2118    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
2119    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000a0cd");
2120    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000ab0d");
2121    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abc0");
2122 
2123    try_istri(wot,h,s, "0000000000000000", "0000000000000000");
2124    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
2125 
2126    try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd");
2127    try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba");
2128    try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000bbbb");
2129    try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000baba");
2130 
2131    try_istri(wot,h,s, "0000abcdabcdabcd", "00000000000baba0");
2132 
2133    try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe");
2134    try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe");
2135 }
2136 
2137 
2138 //////////////////////////////////////////////////////////
2139 //                                                      //
2140 //                       ISTRI_72                       //
2141 //                                                      //
2142 //////////////////////////////////////////////////////////
2143 
h_pcmpistri_72(V128 * argL,V128 * argR)2144 UInt h_pcmpistri_72 ( V128* argL, V128* argR )
2145 {
2146    V128 block[2];
2147    memcpy(&block[0], argL, sizeof(V128));
2148    memcpy(&block[1], argR, sizeof(V128));
2149    ULong res, flags;
2150    __asm__ __volatile__(
2151       "subq      $1024,  %%rsp"             "\n\t"
2152       "movdqu    0(%2),  %%xmm2"            "\n\t"
2153       "movdqu    16(%2), %%xmm11"           "\n\t"
2154       "pcmpistri $0x72,  %%xmm2, %%xmm11"   "\n\t"
2155       "pushfq"                              "\n\t"
2156       "popq      %%rdx"                     "\n\t"
2157       "movq      %%rcx,  %0"                "\n\t"
2158       "movq      %%rdx,  %1"                "\n\t"
2159       "addq      $1024,  %%rsp"             "\n\t"
2160       : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
2161       : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
2162    );
2163    return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
2164 }
2165 
s_pcmpistri_72(V128 * argLU,V128 * argRU)2166 UInt s_pcmpistri_72 ( V128* argLU, V128* argRU )
2167 {
2168    V128 resV;
2169    UInt resOSZACP, resECX;
2170    Bool ok
2171       = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
2172                        zmask_from_V128(argLU),
2173                        zmask_from_V128(argRU),
2174                        0x72, False/*!isSTRM*/
2175         );
2176    assert(ok);
2177    resECX = resV.uInt[0];
2178    return (resOSZACP << 16) | resECX;
2179 }
2180 
istri_72(void)2181 void istri_72 ( void )
2182 {
2183    char* wot = "72";
2184    UInt(*h)(V128*,V128*) = h_pcmpistri_72;
2185    UInt(*s)(V128*,V128*) = s_pcmpistri_72;
2186 
2187    try_istri(wot,h,s, "abcdacbdabcdabcd", "000000000000000a");
2188    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000000b");
2189    try_istri(wot,h,s, "abcdabcdabcdabcd", "00000000000000ab");
2190    try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd");
2191 
2192    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
2193    try_istri(wot,h,s, "0bcdabcdabcdabcd", "000000000000abcd");
2194    try_istri(wot,h,s, "abcdabcdabcda0cd", "000000000000abcd");
2195    try_istri(wot,h,s, "abcdabcdabcdab0d", "000000000000abcd");
2196    try_istri(wot,h,s, "abcdabcdabcdabc0", "000000000000abcd");
2197 
2198    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
2199    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000a0cd");
2200    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000ab0d");
2201    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abc0");
2202 
2203    try_istri(wot,h,s, "0000000000000000", "0000000000000000");
2204    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
2205 
2206    try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd");
2207    try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba");
2208    try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000bbbb");
2209    try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000baba");
2210 
2211    try_istri(wot,h,s, "0000abcdabcdabcd", "00000000000baba0");
2212 
2213    try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe");
2214    try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe");
2215 }
2216 
2217 
2218 //////////////////////////////////////////////////////////
2219 //                                                      //
2220 //                       ISTRI_10                       //
2221 //                                                      //
2222 //////////////////////////////////////////////////////////
2223 
h_pcmpistri_10(V128 * argL,V128 * argR)2224 UInt h_pcmpistri_10 ( V128* argL, V128* argR )
2225 {
2226    V128 block[2];
2227    memcpy(&block[0], argL, sizeof(V128));
2228    memcpy(&block[1], argR, sizeof(V128));
2229    ULong res, flags;
2230    __asm__ __volatile__(
2231       "subq      $1024,  %%rsp"             "\n\t"
2232       "movdqu    0(%2),  %%xmm2"            "\n\t"
2233       "movdqu    16(%2), %%xmm11"           "\n\t"
2234       "pcmpistri $0x10,  %%xmm2, %%xmm11"   "\n\t"
2235 //"pcmpistrm $0x10, %%xmm2, %%xmm11"   "\n\t"
2236 //"movd %%xmm0, %%ecx" "\n\t"
2237       "pushfq"                              "\n\t"
2238       "popq      %%rdx"                     "\n\t"
2239       "movq      %%rcx,  %0"                "\n\t"
2240       "movq      %%rdx,  %1"                "\n\t"
2241       "addq      $1024,  %%rsp"             "\n\t"
2242       : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
2243       : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
2244    );
2245    return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
2246 }
2247 
s_pcmpistri_10(V128 * argLU,V128 * argRU)2248 UInt s_pcmpistri_10 ( V128* argLU, V128* argRU )
2249 {
2250    V128 resV;
2251    UInt resOSZACP, resECX;
2252    Bool ok
2253       = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
2254                        zmask_from_V128(argLU),
2255                        zmask_from_V128(argRU),
2256                        0x10, False/*!isSTRM*/
2257         );
2258    assert(ok);
2259    resECX = resV.uInt[0];
2260    return (resOSZACP << 16) | resECX;
2261 }
2262 
istri_10(void)2263 void istri_10 ( void )
2264 {
2265    char* wot = "10";
2266    UInt(*h)(V128*,V128*) = h_pcmpistri_10;
2267    UInt(*s)(V128*,V128*) = s_pcmpistri_10;
2268 
2269    try_istri(wot,h,s, "abcdacbdabcdabcd", "000000000000000a");
2270    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000000b");
2271    try_istri(wot,h,s, "abcdabcdabcdabcd", "00000000000000ab");
2272    try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd");
2273 
2274    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
2275    try_istri(wot,h,s, "0bcdabcdabcdabcd", "000000000000abcd");
2276    try_istri(wot,h,s, "abcdabcdabcda0cd", "000000000000abcd");
2277    try_istri(wot,h,s, "abcdabcdabcdab0d", "000000000000abcd");
2278    try_istri(wot,h,s, "abcdabcdabcdabc0", "000000000000abcd");
2279 
2280    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
2281    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000a0cd");
2282    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000ab0d");
2283    try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abc0");
2284 
2285    try_istri(wot,h,s, "0000000000000000", "0000000000000000");
2286    try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
2287 
2288    try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd");
2289    try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba");
2290    try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000bbbb");
2291    try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000baba");
2292 
2293    try_istri(wot,h,s, "0000abcdabcdabcd", "00000000000baba0");
2294 
2295    try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe");
2296    try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe");
2297 }
2298 
2299 
2300 //////////////////////////////////////////////////////////
2301 //                                                      //
2302 //                         main                         //
2303 //                                                      //
2304 //////////////////////////////////////////////////////////
2305 
main(void)2306 int main ( void )
2307 {
2308    istri_4A();
2309    istri_3A();
2310    istri_08();
2311    istri_18();
2312    istri_1A();
2313    istri_02();
2314    istri_0C();
2315    istri_12();
2316    istri_44();
2317    istri_00();
2318    istri_38();
2319    istri_46();
2320    istri_30();
2321    istri_40();
2322    istri_42();
2323    istri_0E();
2324    istri_14();
2325    istri_34();
2326    istri_70();
2327    istri_62();
2328    istri_72();
2329    istri_10();
2330    return 0;
2331 }
2332