• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 
2 /*---------------------------------------------------------------*/
3 /*--- begin                               guest_generic_x87.c ---*/
4 /*---------------------------------------------------------------*/
5 
6 /*
7    This file is part of Valgrind, a dynamic binary instrumentation
8    framework.
9 
10    Copyright (C) 2004-2015 OpenWorks LLP
11       info@open-works.net
12 
13    This program is free software; you can redistribute it and/or
14    modify it under the terms of the GNU General Public License as
15    published by the Free Software Foundation; either version 2 of the
16    License, or (at your option) any later version.
17 
18    This program is distributed in the hope that it will be useful, but
19    WITHOUT ANY WARRANTY; without even the implied warranty of
20    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21    General Public License for more details.
22 
23    You should have received a copy of the GNU General Public License
24    along with this program; if not, write to the Free Software
25    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
26    02110-1301, USA.
27 
28    The GNU General Public License is contained in the file COPYING.
29 
30    Neither the names of the U.S. Department of Energy nor the
31    University of California nor the names of its contributors may be
32    used to endorse or promote products derived from this software
33    without prior written permission.
34 */
35 
36 /* This file contains functions for doing some x87-specific
37    operations.  Both the amd64 and x86 front ends (guests) indirectly
38    call these functions via guest helper calls.  By putting them here,
39    code duplication is avoided.  Some of these functions are tricky
40    and hard to verify, so there is much to be said for only having one
41    copy thereof.
42 */
43 
44 #include "libvex_basictypes.h"
45 
46 #include "main_util.h"
47 #include "guest_generic_x87.h"
48 
49 
50 /* 80 and 64-bit floating point formats:
51 
52    80-bit:
53 
54     S  0       0-------0      zero
55     S  0       0X------X      denormals
56     S  1-7FFE  1X------X      normals (all normals have leading 1)
57     S  7FFF    10------0      infinity
58     S  7FFF    10X-----X      snan
59     S  7FFF    11X-----X      qnan
60 
61    S is the sign bit.  For runs X----X, at least one of the Xs must be
62    nonzero.  Exponent is 15 bits, fractional part is 63 bits, and
63    there is an explicitly represented leading 1, and a sign bit,
64    giving 80 in total.
65 
66    64-bit avoids the confusion of an explicitly represented leading 1
67    and so is simpler:
68 
69     S  0      0------0   zero
70     S  0      X------X   denormals
71     S  1-7FE  any        normals
72     S  7FF    0------0   infinity
73     S  7FF    0X-----X   snan
74     S  7FF    1X-----X   qnan
75 
76    Exponent is 11 bits, fractional part is 52 bits, and there is a
77    sign bit, giving 64 in total.
78 */
79 
80 
read_bit_array(UChar * arr,UInt n)81 static inline UInt read_bit_array ( UChar* arr, UInt n )
82 {
83    UChar c = arr[n >> 3];
84    c >>= (n&7);
85    return c & 1;
86 }
87 
write_bit_array(UChar * arr,UInt n,UInt b)88 static inline void write_bit_array ( UChar* arr, UInt n, UInt b )
89 {
90    UChar c = arr[n >> 3];
91    c = toUChar( c & ~(1 << (n&7)) );
92    c = toUChar( c | ((b&1) << (n&7)) );
93    arr[n >> 3] = c;
94 }
95 
96 /* Convert an IEEE754 double (64-bit) into an x87 extended double
97    (80-bit), mimicing the hardware fairly closely.  Both numbers are
98    stored little-endian.  Limitations, all of which could be fixed,
99    given some level of hassle:
100 
101    * Identity of NaNs is not preserved.
102 
103    See comments in the code for more details.
104 */
convert_f64le_to_f80le(UChar * f64,UChar * f80)105 void convert_f64le_to_f80le ( /*IN*/UChar* f64, /*OUT*/UChar* f80 )
106 {
107    Bool  mantissaIsZero;
108    Int   bexp, i, j, shift;
109    UChar sign;
110 
111    sign = toUChar( (f64[7] >> 7) & 1 );
112    bexp = (f64[7] << 4) | ((f64[6] >> 4) & 0x0F);
113    bexp &= 0x7FF;
114 
115    mantissaIsZero = False;
116    if (bexp == 0 || bexp == 0x7FF) {
117       /* We'll need to know whether or not the mantissa (bits 51:0) is
118          all zeroes in order to handle these cases.  So figure it
119          out. */
120       mantissaIsZero
121          = toBool(
122               (f64[6] & 0x0F) == 0
123               && f64[5] == 0 && f64[4] == 0 && f64[3] == 0
124               && f64[2] == 0 && f64[1] == 0 && f64[0] == 0
125            );
126    }
127 
128    /* If the exponent is zero, either we have a zero or a denormal.
129       Produce a zero.  This is a hack in that it forces denormals to
130       zero.  Could do better. */
131    if (bexp == 0) {
132       f80[9] = toUChar( sign << 7 );
133       f80[8] = f80[7] = f80[6] = f80[5] = f80[4]
134              = f80[3] = f80[2] = f80[1] = f80[0] = 0;
135 
136       if (mantissaIsZero)
137          /* It really is zero, so that's all we can do. */
138          return;
139 
140       /* There is at least one 1-bit in the mantissa.  So it's a
141          potentially denormalised double -- but we can produce a
142          normalised long double.  Count the leading zeroes in the
143          mantissa so as to decide how much to bump the exponent down
144          by.  Note, this is SLOW. */
145       shift = 0;
146       for (i = 51; i >= 0; i--) {
147         if (read_bit_array(f64, i))
148            break;
149         shift++;
150       }
151 
152       /* and copy into place as many bits as we can get our hands on. */
153       j = 63;
154       for (i = 51 - shift; i >= 0; i--) {
155          write_bit_array( f80, j,
156      	 read_bit_array( f64, i ) );
157          j--;
158       }
159 
160       /* Set the exponent appropriately, and we're done. */
161       bexp -= shift;
162       bexp += (16383 - 1023);
163       f80[9] = toUChar( (sign << 7) | ((bexp >> 8) & 0xFF) );
164       f80[8] = toUChar( bexp & 0xFF );
165       return;
166    }
167 
168    /* If the exponent is 7FF, this is either an Infinity, a SNaN or
169       QNaN, as determined by examining bits 51:0, thus:
170           0  ... 0    Inf
171           0X ... X    SNaN
172           1X ... X    QNaN
173       where at least one of the Xs is not zero.
174    */
175    if (bexp == 0x7FF) {
176       if (mantissaIsZero) {
177          /* Produce an appropriately signed infinity:
178             S 1--1 (15)  1  0--0 (63)
179          */
180          f80[9] = toUChar( (sign << 7) | 0x7F );
181          f80[8] = 0xFF;
182          f80[7] = 0x80;
183          f80[6] = f80[5] = f80[4] = f80[3]
184                 = f80[2] = f80[1] = f80[0] = 0;
185          return;
186       }
187       /* So it's either a QNaN or SNaN.  Distinguish by considering
188          bit 51.  Note, this destroys all the trailing bits
189          (identity?) of the NaN.  IEEE754 doesn't require preserving
190          these (it only requires that there be one QNaN value and one
191          SNaN value), but x87 does seem to have some ability to
192          preserve them.  Anyway, here, the NaN's identity is
193          destroyed.  Could be improved. */
194       if (f64[6] & 8) {
195          /* QNaN.  Make a canonical QNaN:
196             S 1--1 (15)  1 1  0--0 (62)
197          */
198          f80[9] = toUChar( (sign << 7) | 0x7F );
199          f80[8] = 0xFF;
200          f80[7] = 0xC0;
201          f80[6] = f80[5] = f80[4] = f80[3]
202                 = f80[2] = f80[1] = f80[0] = 0x00;
203       } else {
204          /* SNaN.  Make a SNaN:
205             S 1--1 (15)  1 0  1--1 (62)
206          */
207          f80[9] = toUChar( (sign << 7) | 0x7F );
208          f80[8] = 0xFF;
209          f80[7] = 0xBF;
210          f80[6] = f80[5] = f80[4] = f80[3]
211                 = f80[2] = f80[1] = f80[0] = 0xFF;
212       }
213       return;
214    }
215 
216    /* It's not a zero, denormal, infinity or nan.  So it must be a
217       normalised number.  Rebias the exponent and build the new
218       number.  */
219    bexp += (16383 - 1023);
220 
221    f80[9] = toUChar( (sign << 7) | ((bexp >> 8) & 0xFF) );
222    f80[8] = toUChar( bexp & 0xFF );
223    f80[7] = toUChar( (1 << 7) | ((f64[6] << 3) & 0x78)
224                               | ((f64[5] >> 5) & 7) );
225    f80[6] = toUChar( ((f64[5] << 3) & 0xF8) | ((f64[4] >> 5) & 7) );
226    f80[5] = toUChar( ((f64[4] << 3) & 0xF8) | ((f64[3] >> 5) & 7) );
227    f80[4] = toUChar( ((f64[3] << 3) & 0xF8) | ((f64[2] >> 5) & 7) );
228    f80[3] = toUChar( ((f64[2] << 3) & 0xF8) | ((f64[1] >> 5) & 7) );
229    f80[2] = toUChar( ((f64[1] << 3) & 0xF8) | ((f64[0] >> 5) & 7) );
230    f80[1] = toUChar( ((f64[0] << 3) & 0xF8) );
231    f80[0] = toUChar( 0 );
232 }
233 
234 
235 /* Convert an x87 extended double (80-bit) into an IEEE 754 double
236    (64-bit), mimicking the hardware fairly closely.  Both numbers are
237    stored little-endian.  Limitations, both of which could be fixed,
238    given some level of hassle:
239 
240    * Rounding following truncation could be a bit better.
241 
242    * Identity of NaNs is not preserved.
243 
244    See comments in the code for more details.
245 */
convert_f80le_to_f64le(UChar * f80,UChar * f64)246 void convert_f80le_to_f64le ( /*IN*/UChar* f80, /*OUT*/UChar* f64 )
247 {
248    Bool  isInf;
249    Int   bexp, i, j;
250    UChar sign;
251 
252    sign = toUChar((f80[9] >> 7) & 1);
253    bexp = (((UInt)f80[9]) << 8) | (UInt)f80[8];
254    bexp &= 0x7FFF;
255 
256    /* If the exponent is zero, either we have a zero or a denormal.
257       But an extended precision denormal becomes a double precision
258       zero, so in either case, just produce the appropriately signed
259       zero. */
260    if (bexp == 0) {
261       f64[7] = toUChar(sign << 7);
262       f64[6] = f64[5] = f64[4] = f64[3] = f64[2] = f64[1] = f64[0] = 0;
263       return;
264    }
265 
266    /* If the exponent is 7FFF, this is either an Infinity, a SNaN or
267       QNaN, as determined by examining bits 62:0, thus:
268           10  ... 0    Inf
269           10X ... X    SNaN
270           11X ... X    QNaN
271       where at least one of the Xs is not zero.
272    */
273    if (bexp == 0x7FFF) {
274       isInf = toBool(
275                  (f80[7] & 0x7F) == 0
276                  && f80[6] == 0 && f80[5] == 0 && f80[4] == 0
277                  && f80[3] == 0 && f80[2] == 0 && f80[1] == 0
278                  && f80[0] == 0
279               );
280       if (isInf) {
281          if (0 == (f80[7] & 0x80))
282             goto wierd_NaN;
283          /* Produce an appropriately signed infinity:
284             S 1--1 (11)  0--0 (52)
285          */
286          f64[7] = toUChar((sign << 7) | 0x7F);
287          f64[6] = 0xF0;
288          f64[5] = f64[4] = f64[3] = f64[2] = f64[1] = f64[0] = 0;
289          return;
290       }
291       /* So it's either a QNaN or SNaN.  Distinguish by considering
292          bit 61.  Note, this destroys all the trailing bits
293          (identity?) of the NaN.  IEEE754 doesn't require preserving
294          these (it only requires that there be one QNaN value and one
295          SNaN value), but x87 does seem to have some ability to
296          preserve them.  Anyway, here, the NaN's identity is
297          destroyed.  Could be improved. */
298       if (f80[7] & 0x40) {
299          /* QNaN.  Make a canonical QNaN:
300             S 1--1 (11)  1  0--0 (51)
301          */
302          f64[7] = toUChar((sign << 7) | 0x7F);
303          f64[6] = 0xF8;
304          f64[5] = f64[4] = f64[3] = f64[2] = f64[1] = f64[0] = 0x00;
305       } else {
306          /* SNaN.  Make a SNaN:
307             S 1--1 (11)  0  1--1 (51)
308          */
309          f64[7] = toUChar((sign << 7) | 0x7F);
310          f64[6] = 0xF7;
311          f64[5] = f64[4] = f64[3] = f64[2] = f64[1] = f64[0] = 0xFF;
312       }
313       return;
314    }
315 
316    /* If it's not a Zero, NaN or Inf, and the integer part (bit 62) is
317       zero, the x87 FPU appears to consider the number denormalised
318       and converts it to a QNaN. */
319    if (0 == (f80[7] & 0x80)) {
320       wierd_NaN:
321       /* Strange hardware QNaN:
322          S 1--1 (11)  1  0--0 (51)
323       */
324       /* On a PIII, these QNaNs always appear with sign==1.  I have
325          no idea why. */
326       f64[7] = (1 /*sign*/ << 7) | 0x7F;
327       f64[6] = 0xF8;
328       f64[5] = f64[4] = f64[3] = f64[2] = f64[1] = f64[0] = 0;
329       return;
330    }
331 
332    /* It's not a zero, denormal, infinity or nan.  So it must be a
333       normalised number.  Rebias the exponent and consider. */
334    bexp -= (16383 - 1023);
335    if (bexp >= 0x7FF) {
336       /* It's too big for a double.  Construct an infinity. */
337       f64[7] = toUChar((sign << 7) | 0x7F);
338       f64[6] = 0xF0;
339       f64[5] = f64[4] = f64[3] = f64[2] = f64[1] = f64[0] = 0;
340       return;
341    }
342 
343    if (bexp <= 0) {
344       /* It's too small for a normalised double.  First construct a
345          zero and then see if it can be improved into a denormal.  */
346       f64[7] = toUChar(sign << 7);
347       f64[6] = f64[5] = f64[4] = f64[3] = f64[2] = f64[1] = f64[0] = 0;
348 
349       if (bexp < -52)
350          /* Too small even for a denormal. */
351          return;
352 
353       /* Ok, let's make a denormal.  Note, this is SLOW. */
354       /* Copy bits 63, 62, 61, etc of the src mantissa into the dst,
355          indexes 52+bexp, 51+bexp, etc, until k+bexp < 0. */
356       /* bexp is in range -52 .. 0 inclusive */
357       for (i = 63; i >= 0; i--) {
358          j = i - 12 + bexp;
359          if (j < 0) break;
360          /* We shouldn't really call vassert from generated code. */
361          vassert(j >= 0 && j < 52);
362          write_bit_array ( f64,
363                            j,
364                            read_bit_array ( f80, i ) );
365       }
366       /* and now we might have to round ... */
367       if (read_bit_array(f80, 10+1 - bexp) == 1)
368          goto do_rounding;
369 
370       return;
371    }
372 
373    /* Ok, it's a normalised number which is representable as a double.
374       Copy the exponent and mantissa into place. */
375    /*
376    for (i = 0; i < 52; i++)
377       write_bit_array ( f64,
378                         i,
379                         read_bit_array ( f80, i+11 ) );
380    */
381    f64[0] = toUChar( (f80[1] >> 3) | (f80[2] << 5) );
382    f64[1] = toUChar( (f80[2] >> 3) | (f80[3] << 5) );
383    f64[2] = toUChar( (f80[3] >> 3) | (f80[4] << 5) );
384    f64[3] = toUChar( (f80[4] >> 3) | (f80[5] << 5) );
385    f64[4] = toUChar( (f80[5] >> 3) | (f80[6] << 5) );
386    f64[5] = toUChar( (f80[6] >> 3) | (f80[7] << 5) );
387 
388    f64[6] = toUChar( ((bexp << 4) & 0xF0) | ((f80[7] >> 3) & 0x0F) );
389 
390    f64[7] = toUChar( (sign << 7) | ((bexp >> 4) & 0x7F) );
391 
392    /* Now consider any rounding that needs to happen as a result of
393       truncating the mantissa. */
394    if (f80[1] & 4) /* read_bit_array(f80, 10) == 1) */ {
395 
396       /* If the bottom bits of f80 are "100 0000 0000", then the
397          infinitely precise value is deemed to be mid-way between the
398          two closest representable values.  Since we're doing
399          round-to-nearest (the default mode), in that case it is the
400          bit immediately above which indicates whether we should round
401          upwards or not -- if 0, we don't.  All that is encapsulated
402          in the following simple test. */
403       if ((f80[1] & 0xF) == 4/*0100b*/ && f80[0] == 0)
404          return;
405 
406       do_rounding:
407       /* Round upwards.  This is a kludge.  Once in every 2^24
408          roundings (statistically) the bottom three bytes are all 0xFF
409          and so we don't round at all.  Could be improved. */
410       if (f64[0] != 0xFF) {
411          f64[0]++;
412       }
413       else
414       if (f64[0] == 0xFF && f64[1] != 0xFF) {
415          f64[0] = 0;
416          f64[1]++;
417       }
418       else
419       if (f64[0] == 0xFF && f64[1] == 0xFF && f64[2] != 0xFF) {
420          f64[0] = 0;
421          f64[1] = 0;
422          f64[2]++;
423       }
424       /* else we don't round, but we should. */
425    }
426 }
427 
428 
429 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
430 /* Extract the signed significand or exponent component as per
431    fxtract.  Arg and result are doubles travelling under the guise of
432    ULongs.  Returns significand when getExp is zero and exponent
433    otherwise. */
x86amd64g_calculate_FXTRACT(ULong arg,HWord getExp)434 ULong x86amd64g_calculate_FXTRACT ( ULong arg, HWord getExp )
435 {
436    ULong  uSig, uExp;
437    /* Long   sSig; */
438    Int    sExp, i;
439    UInt   sign, expExp;
440 
441    /*
442     S  7FF    0------0   infinity
443     S  7FF    0X-----X   snan
444     S  7FF    1X-----X   qnan
445    */
446    const ULong posInf  = 0x7FF0000000000000ULL;
447    const ULong negInf  = 0xFFF0000000000000ULL;
448    const ULong nanMask = 0x7FF0000000000000ULL;
449    const ULong qNan    = 0x7FF8000000000000ULL;
450    const ULong posZero = 0x0000000000000000ULL;
451    const ULong negZero = 0x8000000000000000ULL;
452    const ULong bit51   = 1ULL << 51;
453    const ULong bit52   = 1ULL << 52;
454    const ULong sigMask = bit52 - 1;
455 
456    /* Mimic Core i5 behaviour for special cases. */
457    if (arg == posInf)
458       return getExp ? posInf : posInf;
459    if (arg == negInf)
460       return getExp ? posInf : negInf;
461    if ((arg & nanMask) == nanMask)
462       return qNan | (arg & (1ULL << 63));
463    if (arg == posZero)
464       return getExp ? negInf : posZero;
465    if (arg == negZero)
466       return getExp ? negInf : negZero;
467 
468    /* Split into sign, exponent and significand. */
469    sign = ((UInt)(arg >> 63)) & 1;
470 
471    /* Mask off exponent & sign. uSig is in range 0 .. 2^52-1. */
472    uSig = arg & sigMask;
473 
474    /* Get the exponent. */
475    sExp = ((Int)(arg >> 52)) & 0x7FF;
476 
477    /* Deal with denormals: if the exponent is zero, then the
478       significand cannot possibly be zero (negZero/posZero are handled
479       above).  Shift the significand left until bit 51 of it becomes
480       1, and decrease the exponent accordingly.
481    */
482    if (sExp == 0) {
483       for (i = 0; i < 52; i++) {
484          if (uSig & bit51)
485             break;
486          uSig <<= 1;
487          sExp--;
488       }
489       uSig <<= 1;
490    } else {
491       /* Add the implied leading-1 in the significand. */
492       uSig |= bit52;
493    }
494 
495    /* Roll in the sign. */
496    /* sSig = uSig; */
497    /* if (sign) sSig =- sSig; */
498 
499    /* Convert sig into a double.  This should be an exact conversion.
500       Then divide by 2^52, which should give a value in the range 1.0
501       to 2.0-epsilon, at least for normalised args. */
502    /* dSig = (Double)sSig; */
503    /* dSig /= 67108864.0;  */ /* 2^26 */
504    /* dSig /= 67108864.0;  */ /* 2^26 */
505    uSig &= sigMask;
506    uSig |= 0x3FF0000000000000ULL;
507    if (sign)
508       uSig ^= negZero;
509 
510    /* Convert exp into a double.  Also an exact conversion. */
511    /* dExp = (Double)(sExp - 1023); */
512    sExp -= 1023;
513    if (sExp == 0) {
514       uExp = 0;
515    } else {
516       uExp   = sExp < 0 ? -sExp : sExp;
517       expExp = 0x3FF +52;
518       /* 1 <= uExp <= 1074 */
519       /* Skip first 42 iterations of normalisation loop as we know they
520          will always happen */
521       uExp <<= 42;
522       expExp -= 42;
523       for (i = 0; i < 52-42; i++) {
524          if (uExp & bit52)
525             break;
526          uExp <<= 1;
527          expExp--;
528       }
529       uExp &= sigMask;
530       uExp |= ((ULong)expExp) << 52;
531       if (sExp < 0) uExp ^= negZero;
532    }
533 
534    return getExp ? uExp : uSig;
535 }
536 
537 
538 
539 /*---------------------------------------------------------*/
540 /*--- SSE4.2 PCMP{E,I}STR{I,M} helpers                  ---*/
541 /*---------------------------------------------------------*/
542 
543 /* We need the definitions for OSZACP eflags/rflags offsets.
544    #including guest_{amd64,x86}_defs.h causes chaos, so just copy the
545    required values directly.  They are not going to change in the
546    foreseeable future :-)
547 */
548 
549 #define SHIFT_O   11
550 #define SHIFT_S   7
551 #define SHIFT_Z   6
552 #define SHIFT_A   4
553 #define SHIFT_C   0
554 #define SHIFT_P   2
555 
556 #define MASK_O    (1 << SHIFT_O)
557 #define MASK_S    (1 << SHIFT_S)
558 #define MASK_Z    (1 << SHIFT_Z)
559 #define MASK_A    (1 << SHIFT_A)
560 #define MASK_C    (1 << SHIFT_C)
561 #define MASK_P    (1 << SHIFT_P)
562 
563 
564 /* Count leading zeroes, w/ 0-produces-32 semantics, a la Hacker's
565    Delight. */
clz32(UInt x)566 static UInt clz32 ( UInt x )
567 {
568    Int y, m, n;
569    y = -(x >> 16);
570    m = (y >> 16) & 16;
571    n = 16 - m;
572    x = x >> m;
573    y = x - 0x100;
574    m = (y >> 16) & 8;
575    n = n + m;
576    x = x << m;
577    y = x - 0x1000;
578    m = (y >> 16) & 4;
579    n = n + m;
580    x = x << m;
581    y = x - 0x4000;
582    m = (y >> 16) & 2;
583    n = n + m;
584    x = x << m;
585    y = x >> 14;
586    m = y & ~(y >> 1);
587    return n + 2 - m;
588 }
589 
ctz32(UInt x)590 static UInt ctz32 ( UInt x )
591 {
592    return 32 - clz32((~x) & (x-1));
593 }
594 
595 /* Convert a 4-bit value to a 32-bit value by cloning each bit 8
596    times.  There's surely a better way to do this, but I don't know
597    what it is. */
bits4_to_bytes4(UInt bits4)598 static UInt bits4_to_bytes4 ( UInt bits4 )
599 {
600    UInt r = 0;
601    r |= (bits4 & 1) ? 0x000000FF : 0;
602    r |= (bits4 & 2) ? 0x0000FF00 : 0;
603    r |= (bits4 & 4) ? 0x00FF0000 : 0;
604    r |= (bits4 & 8) ? 0xFF000000 : 0;
605    return r;
606 }
607 
608 
609 /* Convert a 2-bit value to a 32-bit value by cloning each bit 16
610    times.  There's surely a better way to do this, but I don't know
611    what it is. */
bits2_to_bytes4(UInt bits2)612 static UInt bits2_to_bytes4 ( UInt bits2 )
613 {
614    UInt r = 0;
615    r |= (bits2 & 1) ? 0x0000FFFF : 0;
616    r |= (bits2 & 2) ? 0xFFFF0000 : 0;
617    return r;
618 }
619 
620 
621 /* Given partial results from a pcmpXstrX operation (intRes1,
622    basically), generate an I- or M-format output value, also the new
623    OSZACP flags.  */
624 static
compute_PCMPxSTRx_gen_output(V128 * resV,UInt * resOSZACP,UInt intRes1,UInt zmaskL,UInt zmaskR,UInt validL,UInt pol,UInt idx,Bool isxSTRM)625 void compute_PCMPxSTRx_gen_output (/*OUT*/V128* resV,
626                                    /*OUT*/UInt* resOSZACP,
627                                    UInt intRes1,
628                                    UInt zmaskL, UInt zmaskR,
629                                    UInt validL,
630                                    UInt pol, UInt idx,
631                                    Bool isxSTRM )
632 {
633    vassert((pol >> 2) == 0);
634    vassert((idx >> 1) == 0);
635 
636    UInt intRes2 = 0;
637    switch (pol) {
638       case 0: intRes2 = intRes1;          break; // pol +
639       case 1: intRes2 = ~intRes1;         break; // pol -
640       case 2: intRes2 = intRes1;          break; // pol m+
641       case 3: intRes2 = intRes1 ^ validL; break; // pol m-
642    }
643    intRes2 &= 0xFFFF;
644 
645    if (isxSTRM) {
646 
647       // generate M-format output (a bit or byte mask in XMM0)
648       if (idx) {
649          resV->w32[0] = bits4_to_bytes4( (intRes2 >>  0) & 0xF );
650          resV->w32[1] = bits4_to_bytes4( (intRes2 >>  4) & 0xF );
651          resV->w32[2] = bits4_to_bytes4( (intRes2 >>  8) & 0xF );
652          resV->w32[3] = bits4_to_bytes4( (intRes2 >> 12) & 0xF );
653       } else {
654          resV->w32[0] = intRes2 & 0xFFFF;
655          resV->w32[1] = 0;
656          resV->w32[2] = 0;
657          resV->w32[3] = 0;
658       }
659 
660    } else {
661 
662       // generate I-format output (an index in ECX)
663       // generate ecx value
664       UInt newECX = 0;
665       if (idx) {
666          // index of ms-1-bit
667          newECX = intRes2 == 0 ? 16 : (31 - clz32(intRes2));
668       } else {
669          // index of ls-1-bit
670          newECX = intRes2 == 0 ? 16 : ctz32(intRes2);
671       }
672 
673       resV->w32[0] = newECX;
674       resV->w32[1] = 0;
675       resV->w32[2] = 0;
676       resV->w32[3] = 0;
677 
678    }
679 
680    // generate new flags, common to all ISTRI and ISTRM cases
681    *resOSZACP    // A, P are zero
682      = ((intRes2 == 0) ? 0 : MASK_C) // C == 0 iff intRes2 == 0
683      | ((zmaskL == 0)  ? 0 : MASK_Z) // Z == 1 iff any in argL is 0
684      | ((zmaskR == 0)  ? 0 : MASK_S) // S == 1 iff any in argR is 0
685      | ((intRes2 & 1) << SHIFT_O);   // O == IntRes2[0]
686 }
687 
688 
689 /* Given partial results from a 16-bit pcmpXstrX operation (intRes1,
690    basically), generate an I- or M-format output value, also the new
691    OSZACP flags.  */
692 static
compute_PCMPxSTRx_gen_output_wide(V128 * resV,UInt * resOSZACP,UInt intRes1,UInt zmaskL,UInt zmaskR,UInt validL,UInt pol,UInt idx,Bool isxSTRM)693 void compute_PCMPxSTRx_gen_output_wide (/*OUT*/V128* resV,
694                                         /*OUT*/UInt* resOSZACP,
695                                         UInt intRes1,
696                                         UInt zmaskL, UInt zmaskR,
697                                         UInt validL,
698                                         UInt pol, UInt idx,
699                                         Bool isxSTRM )
700 {
701    vassert((pol >> 2) == 0);
702    vassert((idx >> 1) == 0);
703 
704    UInt intRes2 = 0;
705    switch (pol) {
706       case 0: intRes2 = intRes1;          break; // pol +
707       case 1: intRes2 = ~intRes1;         break; // pol -
708       case 2: intRes2 = intRes1;          break; // pol m+
709       case 3: intRes2 = intRes1 ^ validL; break; // pol m-
710    }
711    intRes2 &= 0xFF;
712 
713    if (isxSTRM) {
714 
715       // generate M-format output (a bit or byte mask in XMM0)
716       if (idx) {
717          resV->w32[0] = bits2_to_bytes4( (intRes2 >> 0) & 0x3 );
718          resV->w32[1] = bits2_to_bytes4( (intRes2 >> 2) & 0x3 );
719          resV->w32[2] = bits2_to_bytes4( (intRes2 >> 4) & 0x3 );
720          resV->w32[3] = bits2_to_bytes4( (intRes2 >> 6) & 0x3 );
721       } else {
722          resV->w32[0] = intRes2 & 0xFF;
723          resV->w32[1] = 0;
724          resV->w32[2] = 0;
725          resV->w32[3] = 0;
726       }
727 
728    } else {
729 
730       // generate I-format output (an index in ECX)
731       // generate ecx value
732       UInt newECX = 0;
733       if (idx) {
734          // index of ms-1-bit
735          newECX = intRes2 == 0 ? 8 : (31 - clz32(intRes2));
736       } else {
737          // index of ls-1-bit
738          newECX = intRes2 == 0 ? 8 : ctz32(intRes2);
739       }
740 
741       resV->w32[0] = newECX;
742       resV->w32[1] = 0;
743       resV->w32[2] = 0;
744       resV->w32[3] = 0;
745 
746    }
747 
748    // generate new flags, common to all ISTRI and ISTRM cases
749    *resOSZACP    // A, P are zero
750      = ((intRes2 == 0) ? 0 : MASK_C) // C == 0 iff intRes2 == 0
751      | ((zmaskL == 0)  ? 0 : MASK_Z) // Z == 1 iff any in argL is 0
752      | ((zmaskR == 0)  ? 0 : MASK_S) // S == 1 iff any in argR is 0
753      | ((intRes2 & 1) << SHIFT_O);   // O == IntRes2[0]
754 }
755 
756 
757 /* Compute result and new OSZACP flags for all PCMP{E,I}STR{I,M}
758    variants on 8-bit data.
759 
760    For xSTRI variants, the new ECX value is placed in the 32 bits
761    pointed to by *resV, and the top 96 bits are zeroed.  For xSTRM
762    variants, the result is a 128 bit value and is placed at *resV in
763    the obvious way.
764 
765    For all variants, the new OSZACP value is placed at *resOSZACP.
766 
767    argLV and argRV are the vector args.  The caller must prepare a
768    16-bit mask for each, zmaskL and zmaskR.  For ISTRx variants this
769    must be 1 for each zero byte of of the respective arg.  For ESTRx
770    variants this is derived from the explicit length indication, and
771    must be 0 in all places except at the bit index corresponding to
772    the valid length (0 .. 16).  If the valid length is 16 then the
773    mask must be all zeroes.  In all cases, bits 31:16 must be zero.
774 
775    imm8 is the original immediate from the instruction.  isSTRM
776    indicates whether this is a xSTRM or xSTRI variant, which controls
777    how much of *res is written.
778 
779    If the given imm8 case can be handled, the return value is True.
780    If not, False is returned, and neither *res not *resOSZACP are
781    altered.
782 */
783 
compute_PCMPxSTRx(V128 * resV,UInt * resOSZACP,V128 * argLV,V128 * argRV,UInt zmaskL,UInt zmaskR,UInt imm8,Bool isxSTRM)784 Bool compute_PCMPxSTRx ( /*OUT*/V128* resV,
785                          /*OUT*/UInt* resOSZACP,
786                          V128* argLV,  V128* argRV,
787                          UInt zmaskL, UInt zmaskR,
788                          UInt imm8,   Bool isxSTRM )
789 {
790    vassert(imm8 < 0x80);
791    vassert((zmaskL >> 16) == 0);
792    vassert((zmaskR >> 16) == 0);
793 
794    /* Explicitly reject any imm8 values that haven't been validated,
795       even if they would probably work.  Life is too short to have
796       unvalidated cases in the code base. */
797    switch (imm8) {
798       case 0x00: case 0x02: case 0x08: case 0x0A: case 0x0C: case 0x0E:
799       case 0x12: case 0x14: case 0x18: case 0x1A:
800       case 0x30: case 0x34: case 0x38: case 0x3A:
801       case 0x40: case 0x42: case 0x44: case 0x46: case 0x4A:
802          break;
803       default:
804          return False;
805    }
806 
807    UInt fmt = (imm8 >> 0) & 3; // imm8[1:0]  data format
808    UInt agg = (imm8 >> 2) & 3; // imm8[3:2]  aggregation fn
809    UInt pol = (imm8 >> 4) & 3; // imm8[5:4]  polarity
810    UInt idx = (imm8 >> 6) & 1; // imm8[6]    1==msb/bytemask
811 
812    /*----------------------------------------*/
813    /*-- strcmp on byte data                --*/
814    /*----------------------------------------*/
815 
816    if (agg == 2/*equal each, aka strcmp*/
817        && (fmt == 0/*ub*/ || fmt == 2/*sb*/)) {
818       Int    i;
819       UChar* argL = (UChar*)argLV;
820       UChar* argR = (UChar*)argRV;
821       UInt boolResII = 0;
822       for (i = 15; i >= 0; i--) {
823          UChar cL  = argL[i];
824          UChar cR  = argR[i];
825          boolResII = (boolResII << 1) | (cL == cR ? 1 : 0);
826       }
827       UInt validL = ~(zmaskL | -zmaskL);  // not(left(zmaskL))
828       UInt validR = ~(zmaskR | -zmaskR);  // not(left(zmaskR))
829 
830       // do invalidation, common to all equal-each cases
831       UInt intRes1
832          = (boolResII & validL & validR)  // if both valid, use cmpres
833            | (~ (validL | validR));       // if both invalid, force 1
834                                           // else force 0
835       intRes1 &= 0xFFFF;
836 
837       // generate I-format output
838       compute_PCMPxSTRx_gen_output(
839          resV, resOSZACP,
840          intRes1, zmaskL, zmaskR, validL, pol, idx, isxSTRM
841       );
842 
843       return True;
844    }
845 
846    /*----------------------------------------*/
847    /*-- set membership on byte data        --*/
848    /*----------------------------------------*/
849 
850    if (agg == 0/*equal any, aka find chars in a set*/
851        && (fmt == 0/*ub*/ || fmt == 2/*sb*/)) {
852       /* argL: the string,  argR: charset */
853       UInt   si, ci;
854       UChar* argL    = (UChar*)argLV;
855       UChar* argR    = (UChar*)argRV;
856       UInt   boolRes = 0;
857       UInt   validL  = ~(zmaskL | -zmaskL);  // not(left(zmaskL))
858       UInt   validR  = ~(zmaskR | -zmaskR);  // not(left(zmaskR))
859 
860       for (si = 0; si < 16; si++) {
861          if ((validL & (1 << si)) == 0)
862             // run off the end of the string.
863             break;
864          UInt m = 0;
865          for (ci = 0; ci < 16; ci++) {
866             if ((validR & (1 << ci)) == 0) break;
867             if (argR[ci] == argL[si]) { m = 1; break; }
868          }
869          boolRes |= (m << si);
870       }
871 
872       // boolRes is "pre-invalidated"
873       UInt intRes1 = boolRes & 0xFFFF;
874 
875       // generate I-format output
876       compute_PCMPxSTRx_gen_output(
877          resV, resOSZACP,
878          intRes1, zmaskL, zmaskR, validL, pol, idx, isxSTRM
879       );
880 
881       return True;
882    }
883 
884    /*----------------------------------------*/
885    /*-- substring search on byte data      --*/
886    /*----------------------------------------*/
887 
888    if (agg == 3/*equal ordered, aka substring search*/
889        && (fmt == 0/*ub*/ || fmt == 2/*sb*/)) {
890 
891       /* argL: haystack,  argR: needle */
892       UInt   ni, hi;
893       UChar* argL    = (UChar*)argLV;
894       UChar* argR    = (UChar*)argRV;
895       UInt   boolRes = 0;
896       UInt   validL  = ~(zmaskL | -zmaskL);  // not(left(zmaskL))
897       UInt   validR  = ~(zmaskR | -zmaskR);  // not(left(zmaskR))
898       for (hi = 0; hi < 16; hi++) {
899          UInt m = 1;
900          for (ni = 0; ni < 16; ni++) {
901             if ((validR & (1 << ni)) == 0) break;
902             UInt i = ni + hi;
903             if (i >= 16) break;
904             if (argL[i] != argR[ni]) { m = 0; break; }
905          }
906          boolRes |= (m << hi);
907          if ((validL & (1 << hi)) == 0)
908             // run off the end of the haystack
909             break;
910       }
911 
912       // boolRes is "pre-invalidated"
913       UInt intRes1 = boolRes & 0xFFFF;
914 
915       // generate I-format output
916       compute_PCMPxSTRx_gen_output(
917          resV, resOSZACP,
918          intRes1, zmaskL, zmaskR, validL, pol, idx, isxSTRM
919       );
920 
921       return True;
922    }
923 
924    /*----------------------------------------*/
925    /*-- ranges, unsigned byte data         --*/
926    /*----------------------------------------*/
927 
928    if (agg == 1/*ranges*/
929        && fmt == 0/*ub*/) {
930 
931       /* argL: string,  argR: range-pairs */
932       UInt   ri, si;
933       UChar* argL    = (UChar*)argLV;
934       UChar* argR    = (UChar*)argRV;
935       UInt   boolRes = 0;
936       UInt   validL  = ~(zmaskL | -zmaskL);  // not(left(zmaskL))
937       UInt   validR  = ~(zmaskR | -zmaskR);  // not(left(zmaskR))
938       for (si = 0; si < 16; si++) {
939          if ((validL & (1 << si)) == 0)
940             // run off the end of the string
941             break;
942          UInt m = 0;
943          for (ri = 0; ri < 16; ri += 2) {
944             if ((validR & (3 << ri)) != (3 << ri)) break;
945             if (argR[ri] <= argL[si] && argL[si] <= argR[ri+1]) {
946                m = 1; break;
947             }
948          }
949          boolRes |= (m << si);
950       }
951 
952       // boolRes is "pre-invalidated"
953       UInt intRes1 = boolRes & 0xFFFF;
954 
955       // generate I-format output
956       compute_PCMPxSTRx_gen_output(
957          resV, resOSZACP,
958          intRes1, zmaskL, zmaskR, validL, pol, idx, isxSTRM
959       );
960 
961       return True;
962    }
963 
964    /*----------------------------------------*/
965    /*-- ranges, signed byte data           --*/
966    /*----------------------------------------*/
967 
968    if (agg == 1/*ranges*/
969        && fmt == 2/*sb*/) {
970 
971       /* argL: string,  argR: range-pairs */
972       UInt   ri, si;
973       Char*  argL    = (Char*)argLV;
974       Char*  argR    = (Char*)argRV;
975       UInt   boolRes = 0;
976       UInt   validL  = ~(zmaskL | -zmaskL);  // not(left(zmaskL))
977       UInt   validR  = ~(zmaskR | -zmaskR);  // not(left(zmaskR))
978       for (si = 0; si < 16; si++) {
979          if ((validL & (1 << si)) == 0)
980             // run off the end of the string
981             break;
982          UInt m = 0;
983          for (ri = 0; ri < 16; ri += 2) {
984             if ((validR & (3 << ri)) != (3 << ri)) break;
985             if (argR[ri] <= argL[si] && argL[si] <= argR[ri+1]) {
986                m = 1; break;
987             }
988          }
989          boolRes |= (m << si);
990       }
991 
992       // boolRes is "pre-invalidated"
993       UInt intRes1 = boolRes & 0xFFFF;
994 
995       // generate I-format output
996       compute_PCMPxSTRx_gen_output(
997          resV, resOSZACP,
998          intRes1, zmaskL, zmaskR, validL, pol, idx, isxSTRM
999       );
1000 
1001       return True;
1002    }
1003 
1004    return False;
1005 }
1006 
1007 
1008 /* Compute result and new OSZACP flags for all PCMP{E,I}STR{I,M}
1009    variants on 16-bit characters.
1010 
1011    For xSTRI variants, the new ECX value is placed in the 32 bits
1012    pointed to by *resV, and the top 96 bits are zeroed.  For xSTRM
1013    variants, the result is a 128 bit value and is placed at *resV in
1014    the obvious way.
1015 
1016    For all variants, the new OSZACP value is placed at *resOSZACP.
1017 
1018    argLV and argRV are the vector args.  The caller must prepare a
1019    8-bit mask for each, zmaskL and zmaskR.  For ISTRx variants this
1020    must be 1 for each zero byte of of the respective arg.  For ESTRx
1021    variants this is derived from the explicit length indication, and
1022    must be 0 in all places except at the bit index corresponding to
1023    the valid length (0 .. 8).  If the valid length is 8 then the
1024    mask must be all zeroes.  In all cases, bits 31:8 must be zero.
1025 
1026    imm8 is the original immediate from the instruction.  isSTRM
1027    indicates whether this is a xSTRM or xSTRI variant, which controls
1028    how much of *res is written.
1029 
1030    If the given imm8 case can be handled, the return value is True.
1031    If not, False is returned, and neither *res not *resOSZACP are
1032    altered.
1033 */
1034 
compute_PCMPxSTRx_wide(V128 * resV,UInt * resOSZACP,V128 * argLV,V128 * argRV,UInt zmaskL,UInt zmaskR,UInt imm8,Bool isxSTRM)1035 Bool compute_PCMPxSTRx_wide ( /*OUT*/V128* resV,
1036                               /*OUT*/UInt* resOSZACP,
1037                               V128* argLV,  V128* argRV,
1038                               UInt zmaskL, UInt zmaskR,
1039                               UInt imm8,   Bool isxSTRM )
1040 {
1041    vassert(imm8 < 0x80);
1042    vassert((zmaskL >> 8) == 0);
1043    vassert((zmaskR >> 8) == 0);
1044 
1045    /* Explicitly reject any imm8 values that haven't been validated,
1046       even if they would probably work.  Life is too short to have
1047       unvalidated cases in the code base. */
1048    switch (imm8) {
1049       case 0x01: case 0x03: case 0x09: case 0x0B: case 0x0D:
1050       case 0x13:            case 0x1B:
1051                             case 0x39: case 0x3B:
1052                  case 0x45:            case 0x4B:
1053          break;
1054       default:
1055          return False;
1056    }
1057 
1058    UInt fmt = (imm8 >> 0) & 3; // imm8[1:0]  data format
1059    UInt agg = (imm8 >> 2) & 3; // imm8[3:2]  aggregation fn
1060    UInt pol = (imm8 >> 4) & 3; // imm8[5:4]  polarity
1061    UInt idx = (imm8 >> 6) & 1; // imm8[6]    1==msb/bytemask
1062 
1063    /*----------------------------------------*/
1064    /*-- strcmp on wide data                --*/
1065    /*----------------------------------------*/
1066 
1067    if (agg == 2/*equal each, aka strcmp*/
1068        && (fmt == 1/*uw*/ || fmt == 3/*sw*/)) {
1069       Int     i;
1070       UShort* argL = (UShort*)argLV;
1071       UShort* argR = (UShort*)argRV;
1072       UInt boolResII = 0;
1073       for (i = 7; i >= 0; i--) {
1074          UShort cL  = argL[i];
1075          UShort cR  = argR[i];
1076          boolResII = (boolResII << 1) | (cL == cR ? 1 : 0);
1077       }
1078       UInt validL = ~(zmaskL | -zmaskL);  // not(left(zmaskL))
1079       UInt validR = ~(zmaskR | -zmaskR);  // not(left(zmaskR))
1080 
1081       // do invalidation, common to all equal-each cases
1082       UInt intRes1
1083          = (boolResII & validL & validR)  // if both valid, use cmpres
1084            | (~ (validL | validR));       // if both invalid, force 1
1085                                           // else force 0
1086       intRes1 &= 0xFF;
1087 
1088       // generate I-format output
1089       compute_PCMPxSTRx_gen_output_wide(
1090          resV, resOSZACP,
1091          intRes1, zmaskL, zmaskR, validL, pol, idx, isxSTRM
1092       );
1093 
1094       return True;
1095    }
1096 
1097    /*----------------------------------------*/
1098    /*-- set membership on wide data        --*/
1099    /*----------------------------------------*/
1100 
1101    if (agg == 0/*equal any, aka find chars in a set*/
1102        && (fmt == 1/*uw*/ || fmt == 3/*sw*/)) {
1103       /* argL: the string,  argR: charset */
1104       UInt    si, ci;
1105       UShort* argL    = (UShort*)argLV;
1106       UShort* argR    = (UShort*)argRV;
1107       UInt    boolRes = 0;
1108       UInt    validL  = ~(zmaskL | -zmaskL);  // not(left(zmaskL))
1109       UInt    validR  = ~(zmaskR | -zmaskR);  // not(left(zmaskR))
1110 
1111       for (si = 0; si < 8; si++) {
1112          if ((validL & (1 << si)) == 0)
1113             // run off the end of the string.
1114             break;
1115          UInt m = 0;
1116          for (ci = 0; ci < 8; ci++) {
1117             if ((validR & (1 << ci)) == 0) break;
1118             if (argR[ci] == argL[si]) { m = 1; break; }
1119          }
1120          boolRes |= (m << si);
1121       }
1122 
1123       // boolRes is "pre-invalidated"
1124       UInt intRes1 = boolRes & 0xFF;
1125 
1126       // generate I-format output
1127       compute_PCMPxSTRx_gen_output_wide(
1128          resV, resOSZACP,
1129          intRes1, zmaskL, zmaskR, validL, pol, idx, isxSTRM
1130       );
1131 
1132       return True;
1133    }
1134 
1135    /*----------------------------------------*/
1136    /*-- substring search on wide data      --*/
1137    /*----------------------------------------*/
1138 
1139    if (agg == 3/*equal ordered, aka substring search*/
1140        && (fmt == 1/*uw*/ || fmt == 3/*sw*/)) {
1141 
1142       /* argL: haystack,  argR: needle */
1143       UInt    ni, hi;
1144       UShort* argL    = (UShort*)argLV;
1145       UShort* argR    = (UShort*)argRV;
1146       UInt    boolRes = 0;
1147       UInt    validL  = ~(zmaskL | -zmaskL);  // not(left(zmaskL))
1148       UInt    validR  = ~(zmaskR | -zmaskR);  // not(left(zmaskR))
1149       for (hi = 0; hi < 8; hi++) {
1150          UInt m = 1;
1151          for (ni = 0; ni < 8; ni++) {
1152             if ((validR & (1 << ni)) == 0) break;
1153             UInt i = ni + hi;
1154             if (i >= 8) break;
1155             if (argL[i] != argR[ni]) { m = 0; break; }
1156          }
1157          boolRes |= (m << hi);
1158          if ((validL & (1 << hi)) == 0)
1159             // run off the end of the haystack
1160             break;
1161       }
1162 
1163       // boolRes is "pre-invalidated"
1164       UInt intRes1 = boolRes & 0xFF;
1165 
1166       // generate I-format output
1167       compute_PCMPxSTRx_gen_output_wide(
1168          resV, resOSZACP,
1169          intRes1, zmaskL, zmaskR, validL, pol, idx, isxSTRM
1170       );
1171 
1172       return True;
1173    }
1174 
1175    /*----------------------------------------*/
1176    /*-- ranges, unsigned wide data         --*/
1177    /*----------------------------------------*/
1178 
1179    if (agg == 1/*ranges*/
1180        && fmt == 1/*uw*/) {
1181 
1182       /* argL: string,  argR: range-pairs */
1183       UInt    ri, si;
1184       UShort* argL    = (UShort*)argLV;
1185       UShort* argR    = (UShort*)argRV;
1186       UInt    boolRes = 0;
1187       UInt    validL  = ~(zmaskL | -zmaskL);  // not(left(zmaskL))
1188       UInt    validR  = ~(zmaskR | -zmaskR);  // not(left(zmaskR))
1189       for (si = 0; si < 8; si++) {
1190          if ((validL & (1 << si)) == 0)
1191             // run off the end of the string
1192             break;
1193          UInt m = 0;
1194          for (ri = 0; ri < 8; ri += 2) {
1195             if ((validR & (3 << ri)) != (3 << ri)) break;
1196             if (argR[ri] <= argL[si] && argL[si] <= argR[ri+1]) {
1197                m = 1; break;
1198             }
1199          }
1200          boolRes |= (m << si);
1201       }
1202 
1203       // boolRes is "pre-invalidated"
1204       UInt intRes1 = boolRes & 0xFF;
1205 
1206       // generate I-format output
1207       compute_PCMPxSTRx_gen_output_wide(
1208          resV, resOSZACP,
1209          intRes1, zmaskL, zmaskR, validL, pol, idx, isxSTRM
1210       );
1211 
1212       return True;
1213    }
1214 
1215    return False;
1216 }
1217 
1218 
1219 /*---------------------------------------------------------------*/
1220 /*--- end                                 guest_generic_x87.c ---*/
1221 /*---------------------------------------------------------------*/
1222