• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 
2 /*---------------------------------------------------------------*/
3 /*--- begin                                       libvex_ir.h ---*/
4 /*---------------------------------------------------------------*/
5 
6 /*
7    This file is part of Valgrind, a dynamic binary instrumentation
8    framework.
9 
10    Copyright (C) 2004-2012 OpenWorks LLP
11       info@open-works.net
12 
13    This program is free software; you can redistribute it and/or
14    modify it under the terms of the GNU General Public License as
15    published by the Free Software Foundation; either version 2 of the
16    License, or (at your option) any later version.
17 
18    This program is distributed in the hope that it will be useful, but
19    WITHOUT ANY WARRANTY; without even the implied warranty of
20    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21    General Public License for more details.
22 
23    You should have received a copy of the GNU General Public License
24    along with this program; if not, write to the Free Software
25    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
26    02110-1301, USA.
27 
28    The GNU General Public License is contained in the file COPYING.
29 
30    Neither the names of the U.S. Department of Energy nor the
31    University of California nor the names of its contributors may be
32    used to endorse or promote products derived from this software
33    without prior written permission.
34 */
35 
36 #ifndef __LIBVEX_IR_H
37 #define __LIBVEX_IR_H
38 
39 #include "libvex_basictypes.h"
40 
41 
42 /*---------------------------------------------------------------*/
43 /*--- High-level IR description                               ---*/
44 /*---------------------------------------------------------------*/
45 
46 /* Vex IR is an architecture-neutral intermediate representation.
47    Unlike some IRs in systems similar to Vex, it is not like assembly
48    language (ie. a list of instructions).  Rather, it is more like the
49    IR that might be used in a compiler.
50 
51    Code blocks
52    ~~~~~~~~~~~
53    The code is broken into small code blocks ("superblocks", type:
54    'IRSB').  Each code block typically represents from 1 to perhaps 50
55    instructions.  IRSBs are single-entry, multiple-exit code blocks.
56    Each IRSB contains three things:
57    - a type environment, which indicates the type of each temporary
58      value present in the IRSB
59    - a list of statements, which represent code
60    - a jump that exits from the end the IRSB
61    Because the blocks are multiple-exit, there can be additional
62    conditional exit statements that cause control to leave the IRSB
63    before the final exit.  Also because of this, IRSBs can cover
64    multiple non-consecutive sequences of code (up to 3).  These are
65    recorded in the type VexGuestExtents (see libvex.h).
66 
67    Statements and expressions
68    ~~~~~~~~~~~~~~~~~~~~~~~~~~
69    Statements (type 'IRStmt') represent operations with side-effects,
70    eg.  guest register writes, stores, and assignments to temporaries.
71    Expressions (type 'IRExpr') represent operations without
72    side-effects, eg. arithmetic operations, loads, constants.
73    Expressions can contain sub-expressions, forming expression trees,
74    eg. (3 + (4 * load(addr1)).
75 
76    Storage of guest state
77    ~~~~~~~~~~~~~~~~~~~~~~
78    The "guest state" contains the guest registers of the guest machine
79    (ie.  the machine that we are simulating).  It is stored by default
80    in a block of memory supplied by the user of the VEX library,
81    generally referred to as the guest state (area).  To operate on
82    these registers, one must first read ("Get") them from the guest
83    state into a temporary value.  Afterwards, one can write ("Put")
84    them back into the guest state.
85 
86    Get and Put are characterised by a byte offset into the guest
87    state, a small integer which effectively gives the identity of the
88    referenced guest register, and a type, which indicates the size of
89    the value to be transferred.
90 
91    The basic "Get" and "Put" operations are sufficient to model normal
92    fixed registers on the guest.  Selected areas of the guest state
93    can be treated as a circular array of registers (type:
94    'IRRegArray'), which can be indexed at run-time.  This is done with
95    the "GetI" and "PutI" primitives.  This is necessary to describe
96    rotating register files, for example the x87 FPU stack, SPARC
97    register windows, and the Itanium register files.
98 
99    Examples, and flattened vs. unflattened code
100    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
101    For example, consider this x86 instruction:
102 
103      addl %eax, %ebx
104 
105    One Vex IR translation for this code would be this:
106 
107      ------ IMark(0x24F275, 7, 0) ------
108      t3 = GET:I32(0)             # get %eax, a 32-bit integer
109      t2 = GET:I32(12)            # get %ebx, a 32-bit integer
110      t1 = Add32(t3,t2)           # addl
111      PUT(0) = t1                 # put %eax
112 
113    (For simplicity, this ignores the effects on the condition codes, and
114    the update of the instruction pointer.)
115 
116    The "IMark" is an IR statement that doesn't represent actual code.
117    Instead it indicates the address and length of the original
118    instruction.  The numbers 0 and 12 are offsets into the guest state
119    for %eax and %ebx.  The full list of offsets for an architecture
120    <ARCH> can be found in the type VexGuest<ARCH>State in the file
121    VEX/pub/libvex_guest_<ARCH>.h.
122 
123    The five statements in this example are:
124    - the IMark
125    - three assignments to temporaries
126    - one register write (put)
127 
128    The six expressions in this example are:
129    - two register reads (gets)
130    - one arithmetic (add) operation
131    - three temporaries (two nested within the Add32, one in the PUT)
132 
133    The above IR is "flattened", ie. all sub-expressions are "atoms",
134    either constants or temporaries.  An equivalent, unflattened version
135    would be:
136 
137      PUT(0) = Add32(GET:I32(0), GET:I32(12))
138 
139    IR is guaranteed to be flattened at instrumentation-time.  This makes
140    instrumentation easier.  Equivalent flattened and unflattened IR
141    typically results in the same generated code.
142 
143    Another example, this one showing loads and stores:
144 
145      addl %edx,4(%eax)
146 
147    This becomes (again ignoring condition code and instruction pointer
148    updates):
149 
150      ------ IMark(0x4000ABA, 3, 0) ------
151      t3 = Add32(GET:I32(0),0x4:I32)
152      t2 = LDle:I32(t3)
153      t1 = GET:I32(8)
154      t0 = Add32(t2,t1)
155      STle(t3) = t0
156 
157    The "le" in "LDle" and "STle" is short for "little-endian".
158 
159    No need for deallocations
160    ~~~~~~~~~~~~~~~~~~~~~~~~~
161    Although there are allocation functions for various data structures
162    in this file, there are no deallocation functions.  This is because
163    Vex uses a memory allocation scheme that automatically reclaims the
164    memory used by allocated structures once translation is completed.
165    This makes things easier for tools that instruments/transforms code
166    blocks.
167 
168    SSAness and typing
169    ~~~~~~~~~~~~~~~~~~
170    The IR is fully typed.  For every IRSB (IR block) it is possible to
171    say unambiguously whether or not it is correctly typed.
172    Incorrectly typed IR has no meaning and the VEX will refuse to
173    process it.  At various points during processing VEX typechecks the
174    IR and aborts if any violations are found.  This seems overkill but
175    makes it a great deal easier to build a reliable JIT.
176 
177    IR also has the SSA property.  SSA stands for Static Single
178    Assignment, and what it means is that each IR temporary may be
179    assigned to only once.  This idea became widely used in compiler
180    construction in the mid to late 90s.  It makes many IR-level
181    transformations/code improvements easier, simpler and faster.
182    Whenever it typechecks an IR block, VEX also checks the SSA
183    property holds, and will abort if not so.  So SSAness is
184    mechanically and rigidly enforced.
185 */
186 
187 /*---------------------------------------------------------------*/
188 /*--- Type definitions for the IR                             ---*/
189 /*---------------------------------------------------------------*/
190 
191 /* General comments about naming schemes:
192 
193    All publically visible functions contain the name of the primary
194    type on which they operate (IRFoo, IRBar, etc).  Hence you should
195    be able to identify these functions by grepping for "IR[A-Z]".
196 
197    For some type 'IRFoo':
198 
199    - ppIRFoo is the printing method for IRFoo, printing it to the
200      output channel specified in the LibVEX_Initialise call.
201 
202    - eqIRFoo is a structural equality predicate for IRFoos.
203 
204    - deepCopyIRFoo is a deep copy constructor for IRFoos.
205      It recursively traverses the entire argument tree and
206      produces a complete new tree.  All types have a deep copy
207      constructor.
208 
209    - shallowCopyIRFoo is the shallow copy constructor for IRFoos.
210      It creates a new top-level copy of the supplied object,
211      but does not copy any sub-objects.  Only some types have a
212      shallow copy constructor.
213 */
214 
215 /* ------------------ Types ------------------ */
216 
217 /* A type indicates the size of a value, and whether it's an integer, a
218    float, or a vector (SIMD) value. */
219 typedef
220    enum {
221       Ity_INVALID=0x11000,
222       Ity_I1,
223       Ity_I8,
224       Ity_I16,
225       Ity_I32,
226       Ity_I64,
227       Ity_I128,  /* 128-bit scalar */
228       Ity_F32,   /* IEEE 754 float */
229       Ity_F64,   /* IEEE 754 double */
230       Ity_D32,   /* 32-bit Decimal floating point */
231       Ity_D64,   /* 64-bit Decimal floating point */
232       Ity_D128,  /* 128-bit Decimal floating point */
233       Ity_F128,  /* 128-bit floating point; implementation defined */
234       Ity_V128,  /* 128-bit SIMD */
235       Ity_V256   /* 256-bit SIMD */
236    }
237    IRType;
238 
239 /* Pretty-print an IRType */
240 extern void ppIRType ( IRType );
241 
242 /* Get the size (in bytes) of an IRType */
243 extern Int sizeofIRType ( IRType );
244 
245 
246 /* ------------------ Endianness ------------------ */
247 
248 /* IREndness is used in load IRExprs and store IRStmts. */
249 typedef
250    enum {
251       Iend_LE=0x12000, /* little endian */
252       Iend_BE          /* big endian */
253    }
254    IREndness;
255 
256 
257 /* ------------------ Constants ------------------ */
258 
259 /* IRConsts are used within 'Const' and 'Exit' IRExprs. */
260 
261 /* The various kinds of constant. */
262 typedef
263    enum {
264       Ico_U1=0x13000,
265       Ico_U8,
266       Ico_U16,
267       Ico_U32,
268       Ico_U64,
269       Ico_F32,   /* 32-bit IEEE754 floating */
270       Ico_F32i,  /* 32-bit unsigned int to be interpreted literally
271                     as a IEEE754 single value. */
272       Ico_F64,   /* 64-bit IEEE754 floating */
273       Ico_F64i,  /* 64-bit unsigned int to be interpreted literally
274                     as a IEEE754 double value. */
275       Ico_V128,  /* 128-bit restricted vector constant, with 1 bit
276                     (repeated 8 times) for each of the 16 x 1-byte lanes */
277       Ico_V256   /* 256-bit restricted vector constant, with 1 bit
278                     (repeated 8 times) for each of the 32 x 1-byte lanes */
279    }
280    IRConstTag;
281 
282 /* A constant.  Stored as a tagged union.  'tag' indicates what kind of
283    constant this is.  'Ico' is the union that holds the fields.  If an
284    IRConst 'c' has c.tag equal to Ico_U32, then it's a 32-bit constant,
285    and its value can be accessed with 'c.Ico.U32'. */
286 typedef
287    struct _IRConst {
288       IRConstTag tag;
289       union {
290          Bool   U1;
291          UChar  U8;
292          UShort U16;
293          UInt   U32;
294          ULong  U64;
295          Float  F32;
296          UInt   F32i;
297          Double F64;
298          ULong  F64i;
299          UShort V128;   /* 16-bit value; see Ico_V128 comment above */
300          UInt   V256;   /* 32-bit value; see Ico_V256 comment above */
301       } Ico;
302    }
303    IRConst;
304 
305 /* IRConst constructors */
306 extern IRConst* IRConst_U1   ( Bool );
307 extern IRConst* IRConst_U8   ( UChar );
308 extern IRConst* IRConst_U16  ( UShort );
309 extern IRConst* IRConst_U32  ( UInt );
310 extern IRConst* IRConst_U64  ( ULong );
311 extern IRConst* IRConst_F32  ( Float );
312 extern IRConst* IRConst_F32i ( UInt );
313 extern IRConst* IRConst_F64  ( Double );
314 extern IRConst* IRConst_F64i ( ULong );
315 extern IRConst* IRConst_V128 ( UShort );
316 extern IRConst* IRConst_V256 ( UInt );
317 
318 /* Deep-copy an IRConst */
319 extern IRConst* deepCopyIRConst ( IRConst* );
320 
321 /* Pretty-print an IRConst */
322 extern void ppIRConst ( IRConst* );
323 
324 /* Compare two IRConsts for equality */
325 extern Bool eqIRConst ( IRConst*, IRConst* );
326 
327 
328 /* ------------------ Call targets ------------------ */
329 
330 /* Describes a helper function to call.  The name part is purely for
331    pretty printing and not actually used.  regparms=n tells the back
332    end that the callee has been declared
333    "__attribute__((regparm(n)))", although indirectly using the
334    VEX_REGPARM(n) macro.  On some targets (x86) the back end will need
335    to construct a non-standard sequence to call a function declared
336    like this.
337 
338    mcx_mask is a sop to Memcheck.  It indicates which args should be
339    considered 'always defined' when lazily computing definedness of
340    the result.  Bit 0 of mcx_mask corresponds to args[0], bit 1 to
341    args[1], etc.  If a bit is set, the corresponding arg is excluded
342    (hence "x" in "mcx") from definedness checking.
343 */
344 
345 typedef
346    struct {
347       Int    regparms;
348       HChar* name;
349       void*  addr;
350       UInt   mcx_mask;
351    }
352    IRCallee;
353 
354 /* Create an IRCallee. */
355 extern IRCallee* mkIRCallee ( Int regparms, HChar* name, void* addr );
356 
357 /* Deep-copy an IRCallee. */
358 extern IRCallee* deepCopyIRCallee ( IRCallee* );
359 
360 /* Pretty-print an IRCallee. */
361 extern void ppIRCallee ( IRCallee* );
362 
363 
364 /* ------------------ Guest state arrays ------------------ */
365 
366 /* This describes a section of the guest state that we want to
367    be able to index at run time, so as to be able to describe
368    indexed or rotating register files on the guest. */
369 typedef
370    struct {
371       Int    base;   /* guest state offset of start of indexed area */
372       IRType elemTy; /* type of each element in the indexed area */
373       Int    nElems; /* number of elements in the indexed area */
374    }
375    IRRegArray;
376 
377 extern IRRegArray* mkIRRegArray ( Int, IRType, Int );
378 
379 extern IRRegArray* deepCopyIRRegArray ( IRRegArray* );
380 
381 extern void ppIRRegArray ( IRRegArray* );
382 extern Bool eqIRRegArray ( IRRegArray*, IRRegArray* );
383 
384 
385 /* ------------------ Temporaries ------------------ */
386 
387 /* This represents a temporary, eg. t1.  The IR optimiser relies on the
388    fact that IRTemps are 32-bit ints.  Do not change them to be ints of
389    any other size. */
390 typedef UInt IRTemp;
391 
392 /* Pretty-print an IRTemp. */
393 extern void ppIRTemp ( IRTemp );
394 
395 #define IRTemp_INVALID ((IRTemp)0xFFFFFFFF)
396 
397 
398 /* --------------- Primops (arity 1,2,3 and 4) --------------- */
399 
400 /* Primitive operations that are used in Unop, Binop, Triop and Qop
401    IRExprs.  Once we take into account integer, floating point and SIMD
402    operations of all the different sizes, there are quite a lot of them.
403    Most instructions supported by the architectures that Vex supports
404    (x86, PPC, etc) are represented.  Some more obscure ones (eg. cpuid)
405    are not;  they are instead handled with dirty helpers that emulate
406    their functionality.  Such obscure ones are thus not directly visible
407    in the IR, but their effects on guest state (memory and registers)
408    are made visible via the annotations in IRDirty structures.
409 */
410 typedef
411    enum {
412       /* -- Do not change this ordering.  The IR generators rely on
413             (eg) Iop_Add64 == IopAdd8 + 3. -- */
414 
415       Iop_INVALID=0x14000,
416       Iop_Add8,  Iop_Add16,  Iop_Add32,  Iop_Add64,
417       Iop_Sub8,  Iop_Sub16,  Iop_Sub32,  Iop_Sub64,
418       /* Signless mul.  MullS/MullU is elsewhere. */
419       Iop_Mul8,  Iop_Mul16,  Iop_Mul32,  Iop_Mul64,
420       Iop_Or8,   Iop_Or16,   Iop_Or32,   Iop_Or64,
421       Iop_And8,  Iop_And16,  Iop_And32,  Iop_And64,
422       Iop_Xor8,  Iop_Xor16,  Iop_Xor32,  Iop_Xor64,
423       Iop_Shl8,  Iop_Shl16,  Iop_Shl32,  Iop_Shl64,
424       Iop_Shr8,  Iop_Shr16,  Iop_Shr32,  Iop_Shr64,
425       Iop_Sar8,  Iop_Sar16,  Iop_Sar32,  Iop_Sar64,
426       /* Integer comparisons. */
427       Iop_CmpEQ8,  Iop_CmpEQ16,  Iop_CmpEQ32,  Iop_CmpEQ64,
428       Iop_CmpNE8,  Iop_CmpNE16,  Iop_CmpNE32,  Iop_CmpNE64,
429       /* Tags for unary ops */
430       Iop_Not8,  Iop_Not16,  Iop_Not32,  Iop_Not64,
431 
432       /* Exactly like CmpEQ8/16/32/64, but carrying the additional
433          hint that these compute the success/failure of a CAS
434          operation, and hence are almost certainly applied to two
435          copies of the same value, which in turn has implications for
436          Memcheck's instrumentation. */
437       Iop_CasCmpEQ8, Iop_CasCmpEQ16, Iop_CasCmpEQ32, Iop_CasCmpEQ64,
438       Iop_CasCmpNE8, Iop_CasCmpNE16, Iop_CasCmpNE32, Iop_CasCmpNE64,
439 
440       /* -- Ordering not important after here. -- */
441 
442       /* Widening multiplies */
443       Iop_MullS8, Iop_MullS16, Iop_MullS32, Iop_MullS64,
444       Iop_MullU8, Iop_MullU16, Iop_MullU32, Iop_MullU64,
445 
446       /* Wierdo integer stuff */
447       Iop_Clz64, Iop_Clz32,   /* count leading zeroes */
448       Iop_Ctz64, Iop_Ctz32,   /* count trailing zeros */
449       /* Ctz64/Ctz32/Clz64/Clz32 are UNDEFINED when given arguments of
450          zero.  You must ensure they are never given a zero argument.
451       */
452 
453       /* Standard integer comparisons */
454       Iop_CmpLT32S, Iop_CmpLT64S,
455       Iop_CmpLE32S, Iop_CmpLE64S,
456       Iop_CmpLT32U, Iop_CmpLT64U,
457       Iop_CmpLE32U, Iop_CmpLE64U,
458 
459       /* As a sop to Valgrind-Memcheck, the following are useful. */
460       Iop_CmpNEZ8, Iop_CmpNEZ16,  Iop_CmpNEZ32,  Iop_CmpNEZ64,
461       Iop_CmpwNEZ32, Iop_CmpwNEZ64, /* all-0s -> all-Os; other -> all-1s */
462       Iop_Left8, Iop_Left16, Iop_Left32, Iop_Left64, /*  \x -> x | -x */
463       Iop_Max32U, /* unsigned max */
464 
465       /* PowerPC-style 3-way integer comparisons.  Without them it is
466          difficult to simulate PPC efficiently.
467          op(x,y) | x < y  = 0x8 else
468                  | x > y  = 0x4 else
469                  | x == y = 0x2
470       */
471       Iop_CmpORD32U, Iop_CmpORD64U,
472       Iop_CmpORD32S, Iop_CmpORD64S,
473 
474       /* Division */
475       /* TODO: clarify semantics wrt rounding, negative values, whatever */
476       Iop_DivU32,   // :: I32,I32 -> I32 (simple div, no mod)
477       Iop_DivS32,   // ditto, signed
478       Iop_DivU64,   // :: I64,I64 -> I64 (simple div, no mod)
479       Iop_DivS64,   // ditto, signed
480       Iop_DivU64E,  // :: I64,I64 -> I64 (dividend is 64-bit arg (hi) concat with 64 0's (low))
481       Iop_DivS64E,  // ditto, signed
482       Iop_DivU32E,  // :: I32,I32 -> I32 (dividend is 32-bit arg (hi) concat with 32 0's (low))
483       Iop_DivS32E,  // ditto, signed
484 
485       Iop_DivModU64to32, // :: I64,I32 -> I64
486                          // of which lo half is div and hi half is mod
487       Iop_DivModS64to32, // ditto, signed
488 
489       Iop_DivModU128to64, // :: V128,I64 -> V128
490                           // of which lo half is div and hi half is mod
491       Iop_DivModS128to64, // ditto, signed
492 
493       Iop_DivModS64to64, // :: I64,I64 -> I128
494                          // of which lo half is div and hi half is mod
495 
496       /* Integer conversions.  Some of these are redundant (eg
497          Iop_64to8 is the same as Iop_64to32 and then Iop_32to8), but
498          having a complete set reduces the typical dynamic size of IR
499          and makes the instruction selectors easier to write. */
500 
501       /* Widening conversions */
502       Iop_8Uto16, Iop_8Uto32,  Iop_8Uto64,
503                   Iop_16Uto32, Iop_16Uto64,
504                                Iop_32Uto64,
505       Iop_8Sto16, Iop_8Sto32,  Iop_8Sto64,
506                   Iop_16Sto32, Iop_16Sto64,
507                                Iop_32Sto64,
508 
509       /* Narrowing conversions */
510       Iop_64to8, Iop_32to8, Iop_64to16,
511       /* 8 <-> 16 bit conversions */
512       Iop_16to8,      // :: I16 -> I8, low half
513       Iop_16HIto8,    // :: I16 -> I8, high half
514       Iop_8HLto16,    // :: (I8,I8) -> I16
515       /* 16 <-> 32 bit conversions */
516       Iop_32to16,     // :: I32 -> I16, low half
517       Iop_32HIto16,   // :: I32 -> I16, high half
518       Iop_16HLto32,   // :: (I16,I16) -> I32
519       /* 32 <-> 64 bit conversions */
520       Iop_64to32,     // :: I64 -> I32, low half
521       Iop_64HIto32,   // :: I64 -> I32, high half
522       Iop_32HLto64,   // :: (I32,I32) -> I64
523       /* 64 <-> 128 bit conversions */
524       Iop_128to64,    // :: I128 -> I64, low half
525       Iop_128HIto64,  // :: I128 -> I64, high half
526       Iop_64HLto128,  // :: (I64,I64) -> I128
527       /* 1-bit stuff */
528       Iop_Not1,   /* :: Ity_Bit -> Ity_Bit */
529       Iop_32to1,  /* :: Ity_I32 -> Ity_Bit, just select bit[0] */
530       Iop_64to1,  /* :: Ity_I64 -> Ity_Bit, just select bit[0] */
531       Iop_1Uto8,  /* :: Ity_Bit -> Ity_I8,  unsigned widen */
532       Iop_1Uto32, /* :: Ity_Bit -> Ity_I32, unsigned widen */
533       Iop_1Uto64, /* :: Ity_Bit -> Ity_I64, unsigned widen */
534       Iop_1Sto8,  /* :: Ity_Bit -> Ity_I8,  signed widen */
535       Iop_1Sto16, /* :: Ity_Bit -> Ity_I16, signed widen */
536       Iop_1Sto32, /* :: Ity_Bit -> Ity_I32, signed widen */
537       Iop_1Sto64, /* :: Ity_Bit -> Ity_I64, signed widen */
538 
539       /* ------ Floating point.  We try to be IEEE754 compliant. ------ */
540 
541       /* --- Simple stuff as mandated by 754. --- */
542 
543       /* Binary operations, with rounding. */
544       /* :: IRRoundingMode(I32) x F64 x F64 -> F64 */
545       Iop_AddF64, Iop_SubF64, Iop_MulF64, Iop_DivF64,
546 
547       /* :: IRRoundingMode(I32) x F32 x F32 -> F32 */
548       Iop_AddF32, Iop_SubF32, Iop_MulF32, Iop_DivF32,
549 
550       /* Variants of the above which produce a 64-bit result but which
551          round their result to a IEEE float range first. */
552       /* :: IRRoundingMode(I32) x F64 x F64 -> F64 */
553       Iop_AddF64r32, Iop_SubF64r32, Iop_MulF64r32, Iop_DivF64r32,
554 
555       /* Unary operations, without rounding. */
556       /* :: F64 -> F64 */
557       Iop_NegF64, Iop_AbsF64,
558 
559       /* :: F32 -> F32 */
560       Iop_NegF32, Iop_AbsF32,
561 
562       /* Unary operations, with rounding. */
563       /* :: IRRoundingMode(I32) x F64 -> F64 */
564       Iop_SqrtF64, Iop_SqrtF64r32,
565 
566       /* :: IRRoundingMode(I32) x F32 -> F32 */
567       Iop_SqrtF32,
568 
569       /* Comparison, yielding GT/LT/EQ/UN(ordered), as per the following:
570             0x45 Unordered
571             0x01 LT
572             0x00 GT
573             0x40 EQ
574          This just happens to be the Intel encoding.  The values
575          are recorded in the type IRCmpF64Result.
576       */
577       /* :: F64 x F64 -> IRCmpF64Result(I32) */
578       Iop_CmpF64,
579       Iop_CmpF32,
580       Iop_CmpF128,
581 
582       /* --- Int to/from FP conversions. --- */
583 
584       /* For the most part, these take a first argument :: Ity_I32 (as
585          IRRoundingMode) which is an indication of the rounding mode
586          to use, as per the following encoding ("the standard
587          encoding"):
588             00b  to nearest (the default)
589             01b  to -infinity
590             10b  to +infinity
591             11b  to zero
592          This just happens to be the Intel encoding.  For reference only,
593          the PPC encoding is:
594             00b  to nearest (the default)
595             01b  to zero
596             10b  to +infinity
597             11b  to -infinity
598          Any PPC -> IR front end will have to translate these PPC
599          encodings, as encoded in the guest state, to the standard
600          encodings, to pass to the primops.
601          For reference only, the ARM VFP encoding is:
602             00b  to nearest
603             01b  to +infinity
604             10b  to -infinity
605             11b  to zero
606          Again, this will have to be converted to the standard encoding
607          to pass to primops.
608 
609          If one of these conversions gets an out-of-range condition,
610          or a NaN, as an argument, the result is host-defined.  On x86
611          the "integer indefinite" value 0x80..00 is produced.  On PPC
612          it is either 0x80..00 or 0x7F..FF depending on the sign of
613          the argument.
614 
615          On ARMvfp, when converting to a signed integer result, the
616          overflow result is 0x80..00 for negative args and 0x7F..FF
617          for positive args.  For unsigned integer results it is
618          0x00..00 and 0xFF..FF respectively.
619 
620          Rounding is required whenever the destination type cannot
621          represent exactly all values of the source type.
622       */
623       Iop_F64toI16S, /* IRRoundingMode(I32) x F64 -> signed I16 */
624       Iop_F64toI32S, /* IRRoundingMode(I32) x F64 -> signed I32 */
625       Iop_F64toI64S, /* IRRoundingMode(I32) x F64 -> signed I64 */
626       Iop_F64toI64U, /* IRRoundingMode(I32) x F64 -> unsigned I64 */
627 
628       Iop_F64toI32U, /* IRRoundingMode(I32) x F64 -> unsigned I32 */
629 
630       Iop_I16StoF64, /*                       signed I16 -> F64 */
631       Iop_I32StoF64, /*                       signed I32 -> F64 */
632       Iop_I64StoF64, /* IRRoundingMode(I32) x signed I64 -> F64 */
633       Iop_I64UtoF64, /* IRRoundingMode(I32) x unsigned I64 -> F64 */
634       Iop_I64UtoF32, /* IRRoundingMode(I32) x unsigned I64 -> F32 */
635 
636       Iop_I32UtoF64, /*                       unsigned I32 -> F64 */
637 
638       Iop_F32toI16S, /* IRRoundingMode(I32) x F32 -> signed I16 */
639       Iop_F32toI32S, /* IRRoundingMode(I32) x F32 -> signed I32 */
640       Iop_F32toI64S, /* IRRoundingMode(I32) x F32 -> signed I64 */
641 
642       Iop_I16StoF32, /*                       signed I16 -> F32 */
643       Iop_I32StoF32, /* IRRoundingMode(I32) x signed I32 -> F32 */
644       Iop_I64StoF32, /* IRRoundingMode(I32) x signed I64 -> F32 */
645 
646       /* Conversion between floating point formats */
647       Iop_F32toF64,  /*                       F32 -> F64 */
648       Iop_F64toF32,  /* IRRoundingMode(I32) x F64 -> F32 */
649 
650       /* Reinterpretation.  Take an F64 and produce an I64 with
651          the same bit pattern, or vice versa. */
652       Iop_ReinterpF64asI64, Iop_ReinterpI64asF64,
653       Iop_ReinterpF32asI32, Iop_ReinterpI32asF32,
654 
655       /* Support for 128-bit floating point */
656       Iop_F64HLtoF128,/* (high half of F128,low half of F128) -> F128 */
657       Iop_F128HItoF64,/* F128 -> high half of F128 into a F64 register */
658       Iop_F128LOtoF64,/* F128 -> low  half of F128 into a F64 register */
659 
660       /* :: IRRoundingMode(I32) x F128 x F128 -> F128 */
661       Iop_AddF128, Iop_SubF128, Iop_MulF128, Iop_DivF128,
662 
663       /* :: F128 -> F128 */
664       Iop_NegF128, Iop_AbsF128,
665 
666       /* :: IRRoundingMode(I32) x F128 -> F128 */
667       Iop_SqrtF128,
668 
669       Iop_I32StoF128, /*                signed I32  -> F128 */
670       Iop_I64StoF128, /*                signed I64  -> F128 */
671       Iop_F32toF128,  /*                       F32  -> F128 */
672       Iop_F64toF128,  /*                       F64  -> F128 */
673 
674       Iop_F128toI32S, /* IRRoundingMode(I32) x F128 -> signed I32  */
675       Iop_F128toI64S, /* IRRoundingMode(I32) x F128 -> signed I64  */
676       Iop_F128toF64,  /* IRRoundingMode(I32) x F128 -> F64         */
677       Iop_F128toF32,  /* IRRoundingMode(I32) x F128 -> F32         */
678 
679       /* --- guest x86/amd64 specifics, not mandated by 754. --- */
680 
681       /* Binary ops, with rounding. */
682       /* :: IRRoundingMode(I32) x F64 x F64 -> F64 */
683       Iop_AtanF64,       /* FPATAN,  arctan(arg1/arg2)       */
684       Iop_Yl2xF64,       /* FYL2X,   arg1 * log2(arg2)       */
685       Iop_Yl2xp1F64,     /* FYL2XP1, arg1 * log2(arg2+1.0)   */
686       Iop_PRemF64,       /* FPREM,   non-IEEE remainder(arg1/arg2)    */
687       Iop_PRemC3210F64,  /* C3210 flags resulting from FPREM, :: I32 */
688       Iop_PRem1F64,      /* FPREM1,  IEEE remainder(arg1/arg2)    */
689       Iop_PRem1C3210F64, /* C3210 flags resulting from FPREM1, :: I32 */
690       Iop_ScaleF64,      /* FSCALE,  arg1 * (2^RoundTowardsZero(arg2)) */
691       /* Note that on x86 guest, PRem1{C3210} has the same behaviour
692          as the IEEE mandated RemF64, except it is limited in the
693          range of its operand.  Hence the partialness. */
694 
695       /* Unary ops, with rounding. */
696       /* :: IRRoundingMode(I32) x F64 -> F64 */
697       Iop_SinF64,    /* FSIN */
698       Iop_CosF64,    /* FCOS */
699       Iop_TanF64,    /* FTAN */
700       Iop_2xm1F64,   /* (2^arg - 1.0) */
701       Iop_RoundF64toInt, /* F64 value to nearest integral value (still
702                             as F64) */
703       Iop_RoundF32toInt, /* F32 value to nearest integral value (still
704                             as F32) */
705 
706       /* --- guest s390 specifics, not mandated by 754. --- */
707 
708       /* Fused multiply-add/sub */
709       /* :: IRRoundingMode(I32) x F32 x F32 x F32 -> F32
710             (computes op3 * op2 +/- op1 */
711       Iop_MAddF32, Iop_MSubF32,
712 
713       /* --- guest ppc32/64 specifics, not mandated by 754. --- */
714 
715       /* Ternary operations, with rounding. */
716       /* Fused multiply-add/sub, with 112-bit intermediate
717          precision for ppc.
718          Also used to implement fused multiply-add/sub for s390. */
719       /* :: IRRoundingMode(I32) x F64 x F64 x F64 -> F64
720             (computes arg2 * arg3 +/- arg4) */
721       Iop_MAddF64, Iop_MSubF64,
722 
723       /* Variants of the above which produce a 64-bit result but which
724          round their result to a IEEE float range first. */
725       /* :: IRRoundingMode(I32) x F64 x F64 x F64 -> F64 */
726       Iop_MAddF64r32, Iop_MSubF64r32,
727 
728       /* :: F64 -> F64 */
729       Iop_Est5FRSqrt,    /* reciprocal square root estimate, 5 good bits */
730       Iop_RoundF64toF64_NEAREST, /* frin */
731       Iop_RoundF64toF64_NegINF,  /* frim */
732       Iop_RoundF64toF64_PosINF,  /* frip */
733       Iop_RoundF64toF64_ZERO,    /* friz */
734 
735       /* :: F64 -> F32 */
736       Iop_TruncF64asF32, /* do F64->F32 truncation as per 'fsts' */
737 
738       /* :: IRRoundingMode(I32) x F64 -> F64 */
739       Iop_RoundF64toF32, /* round F64 to nearest F32 value (still as F64) */
740       /* NB: pretty much the same as Iop_F64toF32, except no change
741          of type. */
742 
743       /* :: F64 -> I32 */
744       Iop_CalcFPRF, /* Calc 5 fpscr[FPRF] bits (Class, <, =, >, Unord)
745                        from FP result */
746 
747       /* ------------------ 32-bit SIMD Integer ------------------ */
748 
749       /* 32x1 saturating add/sub (ok, well, not really SIMD :) */
750       Iop_QAdd32S,
751       Iop_QSub32S,
752 
753       /* 16x2 add/sub, also signed/unsigned saturating variants */
754       Iop_Add16x2, Iop_Sub16x2,
755       Iop_QAdd16Sx2, Iop_QAdd16Ux2,
756       Iop_QSub16Sx2, Iop_QSub16Ux2,
757 
758       /* 16x2 signed/unsigned halving add/sub.  For each lane, these
759          compute bits 16:1 of (eg) sx(argL) + sx(argR),
760          or zx(argL) - zx(argR) etc. */
761       Iop_HAdd16Ux2, Iop_HAdd16Sx2,
762       Iop_HSub16Ux2, Iop_HSub16Sx2,
763 
764       /* 8x4 add/sub, also signed/unsigned saturating variants */
765       Iop_Add8x4, Iop_Sub8x4,
766       Iop_QAdd8Sx4, Iop_QAdd8Ux4,
767       Iop_QSub8Sx4, Iop_QSub8Ux4,
768 
769       /* 8x4 signed/unsigned halving add/sub.  For each lane, these
770          compute bits 8:1 of (eg) sx(argL) + sx(argR),
771          or zx(argL) - zx(argR) etc. */
772       Iop_HAdd8Ux4, Iop_HAdd8Sx4,
773       Iop_HSub8Ux4, Iop_HSub8Sx4,
774 
775       /* 8x4 sum of absolute unsigned differences. */
776       Iop_Sad8Ux4,
777 
778       /* MISC (vector integer cmp != 0) */
779       Iop_CmpNEZ16x2, Iop_CmpNEZ8x4,
780 
781       /* ------------------ 64-bit SIMD FP ------------------------ */
782 
783       /* Convertion to/from int */
784       Iop_I32UtoFx2,  Iop_I32StoFx2,    /* I32x4 -> F32x4 */
785       Iop_FtoI32Ux2_RZ,  Iop_FtoI32Sx2_RZ,    /* F32x4 -> I32x4 */
786       /* Fixed32 format is floating-point number with fixed number of fraction
787          bits. The number of fraction bits is passed as a second argument of
788          type I8. */
789       Iop_F32ToFixed32Ux2_RZ, Iop_F32ToFixed32Sx2_RZ, /* fp -> fixed-point */
790       Iop_Fixed32UToF32x2_RN, Iop_Fixed32SToF32x2_RN, /* fixed-point -> fp */
791 
792       /* Binary operations */
793       Iop_Max32Fx2,      Iop_Min32Fx2,
794       /* Pairwise Min and Max. See integer pairwise operations for more
795          details. */
796       Iop_PwMax32Fx2,    Iop_PwMin32Fx2,
797       /* Note: For the following compares, the arm front-end assumes a
798          nan in a lane of either argument returns zero for that lane. */
799       Iop_CmpEQ32Fx2, Iop_CmpGT32Fx2, Iop_CmpGE32Fx2,
800 
801       /* Vector Reciprocal Estimate finds an approximate reciprocal of each
802       element in the operand vector, and places the results in the destination
803       vector.  */
804       Iop_Recip32Fx2,
805 
806       /* Vector Reciprocal Step computes (2.0 - arg1 * arg2).
807          Note, that if one of the arguments is zero and another one is infinity
808          of arbitrary sign the result of the operation is 2.0. */
809       Iop_Recps32Fx2,
810 
811       /* Vector Reciprocal Square Root Estimate finds an approximate reciprocal
812          square root of each element in the operand vector. */
813       Iop_Rsqrte32Fx2,
814 
815       /* Vector Reciprocal Square Root Step computes (3.0 - arg1 * arg2) / 2.0.
816          Note, that of one of the arguments is zero and another one is infiinty
817          of arbitrary sign the result of the operation is 1.5. */
818       Iop_Rsqrts32Fx2,
819 
820       /* Unary */
821       Iop_Neg32Fx2, Iop_Abs32Fx2,
822 
823       /* ------------------ 64-bit SIMD Integer. ------------------ */
824 
825       /* MISC (vector integer cmp != 0) */
826       Iop_CmpNEZ8x8, Iop_CmpNEZ16x4, Iop_CmpNEZ32x2,
827 
828       /* ADDITION (normal / unsigned sat / signed sat) */
829       Iop_Add8x8,   Iop_Add16x4,   Iop_Add32x2,
830       Iop_QAdd8Ux8, Iop_QAdd16Ux4, Iop_QAdd32Ux2, Iop_QAdd64Ux1,
831       Iop_QAdd8Sx8, Iop_QAdd16Sx4, Iop_QAdd32Sx2, Iop_QAdd64Sx1,
832 
833       /* PAIRWISE operations */
834       /* Iop_PwFoo16x4( [a,b,c,d], [e,f,g,h] ) =
835             [Foo16(a,b), Foo16(c,d), Foo16(e,f), Foo16(g,h)] */
836       Iop_PwAdd8x8,  Iop_PwAdd16x4,  Iop_PwAdd32x2,
837       Iop_PwMax8Sx8, Iop_PwMax16Sx4, Iop_PwMax32Sx2,
838       Iop_PwMax8Ux8, Iop_PwMax16Ux4, Iop_PwMax32Ux2,
839       Iop_PwMin8Sx8, Iop_PwMin16Sx4, Iop_PwMin32Sx2,
840       Iop_PwMin8Ux8, Iop_PwMin16Ux4, Iop_PwMin32Ux2,
841       /* Longening variant is unary. The resulting vector contains two times
842          less elements than operand, but they are two times wider.
843          Example:
844             Iop_PAddL16Ux4( [a,b,c,d] ) = [a+b,c+d]
845                where a+b and c+d are unsigned 32-bit values. */
846       Iop_PwAddL8Ux8, Iop_PwAddL16Ux4, Iop_PwAddL32Ux2,
847       Iop_PwAddL8Sx8, Iop_PwAddL16Sx4, Iop_PwAddL32Sx2,
848 
849       /* SUBTRACTION (normal / unsigned sat / signed sat) */
850       Iop_Sub8x8,   Iop_Sub16x4,   Iop_Sub32x2,
851       Iop_QSub8Ux8, Iop_QSub16Ux4, Iop_QSub32Ux2, Iop_QSub64Ux1,
852       Iop_QSub8Sx8, Iop_QSub16Sx4, Iop_QSub32Sx2, Iop_QSub64Sx1,
853 
854       /* ABSOLUTE VALUE */
855       Iop_Abs8x8, Iop_Abs16x4, Iop_Abs32x2,
856 
857       /* MULTIPLICATION (normal / high half of signed/unsigned / plynomial ) */
858       Iop_Mul8x8, Iop_Mul16x4, Iop_Mul32x2,
859       Iop_Mul32Fx2,
860       Iop_MulHi16Ux4,
861       Iop_MulHi16Sx4,
862       /* Plynomial multiplication treats it's arguments as coefficients of
863          polynoms over {0, 1}. */
864       Iop_PolynomialMul8x8,
865 
866       /* Vector Saturating Doubling Multiply Returning High Half and
867          Vector Saturating Rounding Doubling Multiply Returning High Half */
868       /* These IROp's multiply corresponding elements in two vectors, double
869          the results, and place the most significant half of the final results
870          in the destination vector. The results are truncated or rounded. If
871          any of the results overflow, they are saturated. */
872       Iop_QDMulHi16Sx4, Iop_QDMulHi32Sx2,
873       Iop_QRDMulHi16Sx4, Iop_QRDMulHi32Sx2,
874 
875       /* AVERAGING: note: (arg1 + arg2 + 1) >>u 1 */
876       Iop_Avg8Ux8,
877       Iop_Avg16Ux4,
878 
879       /* MIN/MAX */
880       Iop_Max8Sx8, Iop_Max16Sx4, Iop_Max32Sx2,
881       Iop_Max8Ux8, Iop_Max16Ux4, Iop_Max32Ux2,
882       Iop_Min8Sx8, Iop_Min16Sx4, Iop_Min32Sx2,
883       Iop_Min8Ux8, Iop_Min16Ux4, Iop_Min32Ux2,
884 
885       /* COMPARISON */
886       Iop_CmpEQ8x8,  Iop_CmpEQ16x4,  Iop_CmpEQ32x2,
887       Iop_CmpGT8Ux8, Iop_CmpGT16Ux4, Iop_CmpGT32Ux2,
888       Iop_CmpGT8Sx8, Iop_CmpGT16Sx4, Iop_CmpGT32Sx2,
889 
890       /* COUNT ones / leading zeroes / leading sign bits (not including topmost
891          bit) */
892       Iop_Cnt8x8,
893       Iop_Clz8Sx8, Iop_Clz16Sx4, Iop_Clz32Sx2,
894       Iop_Cls8Sx8, Iop_Cls16Sx4, Iop_Cls32Sx2,
895 
896       /* VECTOR x VECTOR SHIFT / ROTATE */
897       Iop_Shl8x8, Iop_Shl16x4, Iop_Shl32x2,
898       Iop_Shr8x8, Iop_Shr16x4, Iop_Shr32x2,
899       Iop_Sar8x8, Iop_Sar16x4, Iop_Sar32x2,
900       Iop_Sal8x8, Iop_Sal16x4, Iop_Sal32x2, Iop_Sal64x1,
901 
902       /* VECTOR x SCALAR SHIFT (shift amt :: Ity_I8) */
903       Iop_ShlN8x8, Iop_ShlN16x4, Iop_ShlN32x2,
904       Iop_ShrN8x8, Iop_ShrN16x4, Iop_ShrN32x2,
905       Iop_SarN8x8, Iop_SarN16x4, Iop_SarN32x2,
906 
907       /* VECTOR x VECTOR SATURATING SHIFT */
908       Iop_QShl8x8, Iop_QShl16x4, Iop_QShl32x2, Iop_QShl64x1,
909       Iop_QSal8x8, Iop_QSal16x4, Iop_QSal32x2, Iop_QSal64x1,
910       /* VECTOR x INTEGER SATURATING SHIFT */
911       Iop_QShlN8Sx8, Iop_QShlN16Sx4, Iop_QShlN32Sx2, Iop_QShlN64Sx1,
912       Iop_QShlN8x8, Iop_QShlN16x4, Iop_QShlN32x2, Iop_QShlN64x1,
913       Iop_QSalN8x8, Iop_QSalN16x4, Iop_QSalN32x2, Iop_QSalN64x1,
914 
915       /* NARROWING (binary)
916          -- narrow 2xI64 into 1xI64, hi half from left arg */
917       /* For saturated narrowing, I believe there are 4 variants of
918          the basic arithmetic operation, depending on the signedness
919          of argument and result.  Here are examples that exemplify
920          what I mean:
921 
922          QNarrow16Uto8U ( UShort x )  if (x >u 255) x = 255;
923                                       return x[7:0];
924 
925          QNarrow16Sto8S ( Short x )   if (x <s -128) x = -128;
926                                       if (x >s  127) x = 127;
927                                       return x[7:0];
928 
929          QNarrow16Uto8S ( UShort x )  if (x >u 127) x = 127;
930                                       return x[7:0];
931 
932          QNarrow16Sto8U ( Short x )   if (x <s 0)   x = 0;
933                                       if (x >s 255) x = 255;
934                                       return x[7:0];
935       */
936       Iop_QNarrowBin16Sto8Ux8,
937       Iop_QNarrowBin16Sto8Sx8, Iop_QNarrowBin32Sto16Sx4,
938       Iop_NarrowBin16to8x8,    Iop_NarrowBin32to16x4,
939 
940       /* INTERLEAVING */
941       /* Interleave lanes from low or high halves of
942          operands.  Most-significant result lane is from the left
943          arg. */
944       Iop_InterleaveHI8x8, Iop_InterleaveHI16x4, Iop_InterleaveHI32x2,
945       Iop_InterleaveLO8x8, Iop_InterleaveLO16x4, Iop_InterleaveLO32x2,
946       /* Interleave odd/even lanes of operands.  Most-significant result lane
947          is from the left arg.  Note that Interleave{Odd,Even}Lanes32x2 are
948          identical to Interleave{HI,LO}32x2 and so are omitted.*/
949       Iop_InterleaveOddLanes8x8, Iop_InterleaveEvenLanes8x8,
950       Iop_InterleaveOddLanes16x4, Iop_InterleaveEvenLanes16x4,
951 
952 
953       /* CONCATENATION -- build a new value by concatenating either
954          the even or odd lanes of both operands.  Note that
955          Cat{Odd,Even}Lanes32x2 are identical to Interleave{HI,LO}32x2
956          and so are omitted. */
957       Iop_CatOddLanes8x8, Iop_CatOddLanes16x4,
958       Iop_CatEvenLanes8x8, Iop_CatEvenLanes16x4,
959 
960       /* GET / SET elements of VECTOR
961          GET is binop (I64, I8) -> I<elem_size>
962          SET is triop (I64, I8, I<elem_size>) -> I64 */
963       /* Note: the arm back-end handles only constant second argument */
964       Iop_GetElem8x8, Iop_GetElem16x4, Iop_GetElem32x2,
965       Iop_SetElem8x8, Iop_SetElem16x4, Iop_SetElem32x2,
966 
967       /* DUPLICATING -- copy value to all lanes */
968       Iop_Dup8x8,   Iop_Dup16x4,   Iop_Dup32x2,
969 
970       /* EXTRACT -- copy 8-arg3 highest bytes from arg1 to 8-arg3 lowest bytes
971          of result and arg3 lowest bytes of arg2 to arg3 highest bytes of
972          result.
973          It is a triop: (I64, I64, I8) -> I64 */
974       /* Note: the arm back-end handles only constant third argumnet. */
975       Iop_Extract64,
976 
977       /* REVERSE the order of elements in each Half-words, Words,
978          Double-words */
979       /* Examples:
980             Reverse16_8x8([a,b,c,d,e,f,g,h]) = [b,a,d,c,f,e,h,g]
981             Reverse32_8x8([a,b,c,d,e,f,g,h]) = [d,c,b,a,h,g,f,e]
982             Reverse64_8x8([a,b,c,d,e,f,g,h]) = [h,g,f,e,d,c,b,a] */
983       Iop_Reverse16_8x8,
984       Iop_Reverse32_8x8, Iop_Reverse32_16x4,
985       Iop_Reverse64_8x8, Iop_Reverse64_16x4, Iop_Reverse64_32x2,
986 
987       /* PERMUTING -- copy src bytes to dst,
988          as indexed by control vector bytes:
989             for i in 0 .. 7 . result[i] = argL[ argR[i] ]
990          argR[i] values may only be in the range 0 .. 7, else behaviour
991          is undefined. */
992       Iop_Perm8x8,
993 
994       /* Vector Reciprocal Estimate and Vector Reciprocal Square Root Estimate
995          See floating-point equiwalents for details. */
996       Iop_Recip32x2, Iop_Rsqrte32x2,
997 
998       /* ------------------ Decimal Floating Point ------------------ */
999 
1000       /* ARITHMETIC INSTRUCTIONS   64-bit
1001 	 ----------------------------------
1002 	 IRRoundingModeDFP(I32) X D64 X D64 -> D64
1003       */
1004       Iop_AddD64, Iop_SubD64, Iop_MulD64, Iop_DivD64,
1005 
1006       /* ARITHMETIC INSTRUCTIONS  128-bit
1007 	 ----------------------------------
1008 	 IRRoundingModeDFP(I32) X D128 X D128 -> D128
1009       */
1010       Iop_AddD128, Iop_SubD128, Iop_MulD128, Iop_DivD128,
1011 
1012       /* SHIFT SIGNIFICAND INSTRUCTIONS
1013        *    The DFP significand is shifted by the number of digits specified
1014        *    by the U8 operand.  Digits shifted out of the leftmost digit are
1015        *    lost. Zeros are supplied to the vacated positions on the right.
1016        *    The sign of the result is the same as the sign of the original
1017        *    operand.
1018        *
1019        * D64 x U8  -> D64    left shift and right shift respectively */
1020       Iop_ShlD64, Iop_ShrD64,
1021 
1022       /* D128 x U8  -> D128  left shift and right shift respectively */
1023       Iop_ShlD128, Iop_ShrD128,
1024 
1025 
1026       /* FORMAT CONVERSION INSTRUCTIONS
1027        *   D32 -> D64
1028        */
1029       Iop_D32toD64,
1030 
1031       /*   D64 -> D128 */
1032       Iop_D64toD128,
1033 
1034       /*   I64S -> D128 */
1035       Iop_I64StoD128,
1036 
1037       /*   IRRoundingModeDFP(I32) x D64 -> D32 */
1038       Iop_D64toD32,
1039 
1040       /*   IRRoundingModeDFP(I32) x D128 -> D64 */
1041       Iop_D128toD64,
1042 
1043       /*   IRRoundingModeDFP(I32) x I64 -> D64 */
1044       Iop_I64StoD64,
1045 
1046       /*   IRRoundingModeDFP(I32) x D64 -> I64 */
1047       Iop_D64toI64S,
1048 
1049       /*   IRRoundingModeDFP(I32) x D128 -> I64 */
1050       Iop_D128toI64S,
1051 
1052       /* ROUNDING INSTRUCTIONS
1053        * IRRoundingMode(I32) x D64 -> D64
1054        * The D64 operand, if a finite number, is rounded to an integer value.
1055        */
1056       Iop_RoundD64toInt,
1057 
1058       /* IRRoundingMode(I32) x D128 -> D128 */
1059       Iop_RoundD128toInt,
1060 
1061       /* COMPARE INSTRUCTIONS
1062        * D64 x D64 -> IRCmpD64Result(I32) */
1063       Iop_CmpD64,
1064 
1065       /* D128 x D128 -> IRCmpD64Result(I32) */
1066       Iop_CmpD128,
1067 
1068       /* QUANTIZE AND ROUND INSTRUCTIONS
1069        * The source operand is converted and rounded to the form with the
1070        * immediate exponent specified by the rounding and exponent parameter.
1071        *
1072        * The second operand is converted and rounded to the form
1073        * of the first operand's exponent and the rounded based on the specified
1074        * rounding mode parameter.
1075        *
1076        * IRRoundingModeDFP(I32) x D64 x D64-> D64 */
1077       Iop_QuantizeD64,
1078 
1079       /* IRRoundingModeDFP(I32) x D128 x D128 -> D128 */
1080       Iop_QuantizeD128,
1081 
1082       /* IRRoundingModeDFP(I32) x I8 x D64 -> D64
1083        *    The Decimal Floating point operand is rounded to the requested
1084        *    significance given by the I8 operand as specified by the rounding
1085        *    mode.
1086        */
1087       Iop_SignificanceRoundD64,
1088 
1089       /* IRRoundingModeDFP(I32) x I8 x D128 -> D128 */
1090       Iop_SignificanceRoundD128,
1091 
1092       /* EXTRACT AND INSERT INSTRUCTIONS
1093        * D64 -> I64
1094        *    The exponent of the D32 or D64 operand is extracted.  The
1095        *    extracted exponent is converted to a 64-bit signed binary integer.
1096        */
1097       Iop_ExtractExpD64,
1098 
1099       /* D128 -> I64 */
1100       Iop_ExtractExpD128,
1101 
1102       /* I64 x I64  -> D64
1103        *    The exponent is specified by the first I64 operand the signed
1104        *    significand is given by the second I64 value.  The result is a D64
1105        *    value consisting of the specified significand and exponent whose
1106        *    sign is that of the specified significand.
1107        */
1108       Iop_InsertExpD64,
1109 
1110       /* I64 x I128 -> D128 */
1111       Iop_InsertExpD128,
1112 
1113       /* Support for 128-bit DFP type */
1114       Iop_D64HLtoD128, Iop_D128HItoD64, Iop_D128LOtoD64,
1115 
1116       /*  I64 -> I64
1117        *     Convert 50-bit densely packed BCD string to 60 bit BCD string
1118        */
1119       Iop_DPBtoBCD,
1120 
1121       /* I64 -> I64
1122        *     Convert 60 bit BCD string to 50-bit densely packed BCD string
1123        */
1124       Iop_BCDtoDPB,
1125 
1126       /* Conversion I64 -> D64 */
1127       Iop_ReinterpI64asD64,
1128 
1129       /* Conversion D64 -> I64 */
1130       Iop_ReinterpD64asI64,
1131 
1132       /* ------------------ 128-bit SIMD FP. ------------------ */
1133 
1134       /* --- 32x4 vector FP --- */
1135 
1136       /* binary */
1137       Iop_Add32Fx4, Iop_Sub32Fx4, Iop_Mul32Fx4, Iop_Div32Fx4,
1138       Iop_Max32Fx4, Iop_Min32Fx4,
1139       Iop_Add32Fx2, Iop_Sub32Fx2,
1140       /* Note: For the following compares, the ppc and arm front-ends assume a
1141          nan in a lane of either argument returns zero for that lane. */
1142       Iop_CmpEQ32Fx4, Iop_CmpLT32Fx4, Iop_CmpLE32Fx4, Iop_CmpUN32Fx4,
1143       Iop_CmpGT32Fx4, Iop_CmpGE32Fx4,
1144 
1145       /* Vector Absolute */
1146       Iop_Abs32Fx4,
1147 
1148       /* Pairwise Max and Min. See integer pairwise operations for details. */
1149       Iop_PwMax32Fx4, Iop_PwMin32Fx4,
1150 
1151       /* unary */
1152       Iop_Sqrt32Fx4, Iop_RSqrt32Fx4,
1153       Iop_Neg32Fx4,
1154 
1155       /* Vector Reciprocal Estimate finds an approximate reciprocal of each
1156       element in the operand vector, and places the results in the destination
1157       vector.  */
1158       Iop_Recip32Fx4,
1159 
1160       /* Vector Reciprocal Step computes (2.0 - arg1 * arg2).
1161          Note, that if one of the arguments is zero and another one is infinity
1162          of arbitrary sign the result of the operation is 2.0. */
1163       Iop_Recps32Fx4,
1164 
1165       /* Vector Reciprocal Square Root Estimate finds an approximate reciprocal
1166          square root of each element in the operand vector. */
1167       Iop_Rsqrte32Fx4,
1168 
1169       /* Vector Reciprocal Square Root Step computes (3.0 - arg1 * arg2) / 2.0.
1170          Note, that of one of the arguments is zero and another one is infiinty
1171          of arbitrary sign the result of the operation is 1.5. */
1172       Iop_Rsqrts32Fx4,
1173 
1174 
1175       /* --- Int to/from FP conversion --- */
1176       /* Unlike the standard fp conversions, these irops take no
1177          rounding mode argument. Instead the irop trailers _R{M,P,N,Z}
1178          indicate the mode: {-inf, +inf, nearest, zero} respectively. */
1179       Iop_I32UtoFx4,  Iop_I32StoFx4,       /* I32x4 -> F32x4       */
1180       Iop_FtoI32Ux4_RZ,  Iop_FtoI32Sx4_RZ,    /* F32x4 -> I32x4       */
1181       Iop_QFtoI32Ux4_RZ, Iop_QFtoI32Sx4_RZ,   /* F32x4 -> I32x4 (with saturation) */
1182       Iop_RoundF32x4_RM, Iop_RoundF32x4_RP,   /* round to fp integer  */
1183       Iop_RoundF32x4_RN, Iop_RoundF32x4_RZ,   /* round to fp integer  */
1184       /* Fixed32 format is floating-point number with fixed number of fraction
1185          bits. The number of fraction bits is passed as a second argument of
1186          type I8. */
1187       Iop_F32ToFixed32Ux4_RZ, Iop_F32ToFixed32Sx4_RZ, /* fp -> fixed-point */
1188       Iop_Fixed32UToF32x4_RN, Iop_Fixed32SToF32x4_RN, /* fixed-point -> fp */
1189 
1190       /* --- Single to/from half conversion --- */
1191       /* FIXME: what kind of rounding in F32x4 -> F16x4 case? */
1192       Iop_F32toF16x4, Iop_F16toF32x4,         /* F32x4 <-> F16x4      */
1193 
1194       /* --- 32x4 lowest-lane-only scalar FP --- */
1195 
1196       /* In binary cases, upper 3/4 is copied from first operand.  In
1197          unary cases, upper 3/4 is copied from the operand. */
1198 
1199       /* binary */
1200       Iop_Add32F0x4, Iop_Sub32F0x4, Iop_Mul32F0x4, Iop_Div32F0x4,
1201       Iop_Max32F0x4, Iop_Min32F0x4,
1202       Iop_CmpEQ32F0x4, Iop_CmpLT32F0x4, Iop_CmpLE32F0x4, Iop_CmpUN32F0x4,
1203 
1204       /* unary */
1205       Iop_Recip32F0x4, Iop_Sqrt32F0x4, Iop_RSqrt32F0x4,
1206 
1207       /* --- 64x2 vector FP --- */
1208 
1209       /* binary */
1210       Iop_Add64Fx2, Iop_Sub64Fx2, Iop_Mul64Fx2, Iop_Div64Fx2,
1211       Iop_Max64Fx2, Iop_Min64Fx2,
1212       Iop_CmpEQ64Fx2, Iop_CmpLT64Fx2, Iop_CmpLE64Fx2, Iop_CmpUN64Fx2,
1213 
1214       /* unary */
1215       Iop_Recip64Fx2, Iop_Sqrt64Fx2, Iop_RSqrt64Fx2,
1216 
1217       /* --- 64x2 lowest-lane-only scalar FP --- */
1218 
1219       /* In binary cases, upper half is copied from first operand.  In
1220          unary cases, upper half is copied from the operand. */
1221 
1222       /* binary */
1223       Iop_Add64F0x2, Iop_Sub64F0x2, Iop_Mul64F0x2, Iop_Div64F0x2,
1224       Iop_Max64F0x2, Iop_Min64F0x2,
1225       Iop_CmpEQ64F0x2, Iop_CmpLT64F0x2, Iop_CmpLE64F0x2, Iop_CmpUN64F0x2,
1226 
1227       /* unary */
1228       Iop_Recip64F0x2, Iop_Sqrt64F0x2, Iop_RSqrt64F0x2,
1229 
1230       /* --- pack / unpack --- */
1231 
1232       /* 64 <-> 128 bit vector */
1233       Iop_V128to64,     // :: V128 -> I64, low half
1234       Iop_V128HIto64,   // :: V128 -> I64, high half
1235       Iop_64HLtoV128,   // :: (I64,I64) -> V128
1236 
1237       Iop_64UtoV128,
1238       Iop_SetV128lo64,
1239 
1240       /* 32 <-> 128 bit vector */
1241       Iop_32UtoV128,
1242       Iop_V128to32,     // :: V128 -> I32, lowest lane
1243       Iop_SetV128lo32,  // :: (V128,I32) -> V128
1244 
1245       /* ------------------ 128-bit SIMD Integer. ------------------ */
1246 
1247       /* BITWISE OPS */
1248       Iop_NotV128,
1249       Iop_AndV128, Iop_OrV128, Iop_XorV128,
1250 
1251       /* VECTOR SHIFT (shift amt :: Ity_I8) */
1252       Iop_ShlV128, Iop_ShrV128,
1253 
1254       /* MISC (vector integer cmp != 0) */
1255       Iop_CmpNEZ8x16, Iop_CmpNEZ16x8, Iop_CmpNEZ32x4, Iop_CmpNEZ64x2,
1256 
1257       /* ADDITION (normal / unsigned sat / signed sat) */
1258       Iop_Add8x16,   Iop_Add16x8,   Iop_Add32x4,   Iop_Add64x2,
1259       Iop_QAdd8Ux16, Iop_QAdd16Ux8, Iop_QAdd32Ux4, Iop_QAdd64Ux2,
1260       Iop_QAdd8Sx16, Iop_QAdd16Sx8, Iop_QAdd32Sx4, Iop_QAdd64Sx2,
1261 
1262       /* SUBTRACTION (normal / unsigned sat / signed sat) */
1263       Iop_Sub8x16,   Iop_Sub16x8,   Iop_Sub32x4,   Iop_Sub64x2,
1264       Iop_QSub8Ux16, Iop_QSub16Ux8, Iop_QSub32Ux4, Iop_QSub64Ux2,
1265       Iop_QSub8Sx16, Iop_QSub16Sx8, Iop_QSub32Sx4, Iop_QSub64Sx2,
1266 
1267       /* MULTIPLICATION (normal / high half of signed/unsigned) */
1268       Iop_Mul8x16,  Iop_Mul16x8,    Iop_Mul32x4,
1269                     Iop_MulHi16Ux8, Iop_MulHi32Ux4,
1270                     Iop_MulHi16Sx8, Iop_MulHi32Sx4,
1271       /* (widening signed/unsigned of even lanes, with lowest lane=zero) */
1272       Iop_MullEven8Ux16, Iop_MullEven16Ux8,
1273       Iop_MullEven8Sx16, Iop_MullEven16Sx8,
1274       /* FIXME: document these */
1275       Iop_Mull8Ux8, Iop_Mull8Sx8,
1276       Iop_Mull16Ux4, Iop_Mull16Sx4,
1277       Iop_Mull32Ux2, Iop_Mull32Sx2,
1278       /* Vector Saturating Doubling Multiply Returning High Half and
1279          Vector Saturating Rounding Doubling Multiply Returning High Half */
1280       /* These IROp's multiply corresponding elements in two vectors, double
1281          the results, and place the most significant half of the final results
1282          in the destination vector. The results are truncated or rounded. If
1283          any of the results overflow, they are saturated. */
1284       Iop_QDMulHi16Sx8, Iop_QDMulHi32Sx4,
1285       Iop_QRDMulHi16Sx8, Iop_QRDMulHi32Sx4,
1286       /* Doubling saturating multiplication (long) (I64, I64) -> V128 */
1287       Iop_QDMulLong16Sx4, Iop_QDMulLong32Sx2,
1288       /* Plynomial multiplication treats it's arguments as coefficients of
1289          polynoms over {0, 1}. */
1290       Iop_PolynomialMul8x16, /* (V128, V128) -> V128 */
1291       Iop_PolynomialMull8x8, /*   (I64, I64) -> V128 */
1292 
1293       /* PAIRWISE operations */
1294       /* Iop_PwFoo16x4( [a,b,c,d], [e,f,g,h] ) =
1295             [Foo16(a,b), Foo16(c,d), Foo16(e,f), Foo16(g,h)] */
1296       Iop_PwAdd8x16, Iop_PwAdd16x8, Iop_PwAdd32x4,
1297       Iop_PwAdd32Fx2,
1298       /* Longening variant is unary. The resulting vector contains two times
1299          less elements than operand, but they are two times wider.
1300          Example:
1301             Iop_PwAddL16Ux4( [a,b,c,d] ) = [a+b,c+d]
1302                where a+b and c+d are unsigned 32-bit values. */
1303       Iop_PwAddL8Ux16, Iop_PwAddL16Ux8, Iop_PwAddL32Ux4,
1304       Iop_PwAddL8Sx16, Iop_PwAddL16Sx8, Iop_PwAddL32Sx4,
1305 
1306       /* ABSOLUTE VALUE */
1307       Iop_Abs8x16, Iop_Abs16x8, Iop_Abs32x4,
1308 
1309       /* AVERAGING: note: (arg1 + arg2 + 1) >>u 1 */
1310       Iop_Avg8Ux16, Iop_Avg16Ux8, Iop_Avg32Ux4,
1311       Iop_Avg8Sx16, Iop_Avg16Sx8, Iop_Avg32Sx4,
1312 
1313       /* MIN/MAX */
1314       Iop_Max8Sx16, Iop_Max16Sx8, Iop_Max32Sx4,
1315       Iop_Max8Ux16, Iop_Max16Ux8, Iop_Max32Ux4,
1316       Iop_Min8Sx16, Iop_Min16Sx8, Iop_Min32Sx4,
1317       Iop_Min8Ux16, Iop_Min16Ux8, Iop_Min32Ux4,
1318 
1319       /* COMPARISON */
1320       Iop_CmpEQ8x16,  Iop_CmpEQ16x8,  Iop_CmpEQ32x4,  Iop_CmpEQ64x2,
1321       Iop_CmpGT8Sx16, Iop_CmpGT16Sx8, Iop_CmpGT32Sx4, Iop_CmpGT64Sx2,
1322       Iop_CmpGT8Ux16, Iop_CmpGT16Ux8, Iop_CmpGT32Ux4,
1323 
1324       /* COUNT ones / leading zeroes / leading sign bits (not including topmost
1325          bit) */
1326       Iop_Cnt8x16,
1327       Iop_Clz8Sx16, Iop_Clz16Sx8, Iop_Clz32Sx4,
1328       Iop_Cls8Sx16, Iop_Cls16Sx8, Iop_Cls32Sx4,
1329 
1330       /* VECTOR x SCALAR SHIFT (shift amt :: Ity_I8) */
1331       Iop_ShlN8x16, Iop_ShlN16x8, Iop_ShlN32x4, Iop_ShlN64x2,
1332       Iop_ShrN8x16, Iop_ShrN16x8, Iop_ShrN32x4, Iop_ShrN64x2,
1333       Iop_SarN8x16, Iop_SarN16x8, Iop_SarN32x4, Iop_SarN64x2,
1334 
1335       /* VECTOR x VECTOR SHIFT / ROTATE */
1336       Iop_Shl8x16, Iop_Shl16x8, Iop_Shl32x4, Iop_Shl64x2,
1337       Iop_Shr8x16, Iop_Shr16x8, Iop_Shr32x4, Iop_Shr64x2,
1338       Iop_Sar8x16, Iop_Sar16x8, Iop_Sar32x4, Iop_Sar64x2,
1339       Iop_Sal8x16, Iop_Sal16x8, Iop_Sal32x4, Iop_Sal64x2,
1340       Iop_Rol8x16, Iop_Rol16x8, Iop_Rol32x4,
1341 
1342       /* VECTOR x VECTOR SATURATING SHIFT */
1343       Iop_QShl8x16, Iop_QShl16x8, Iop_QShl32x4, Iop_QShl64x2,
1344       Iop_QSal8x16, Iop_QSal16x8, Iop_QSal32x4, Iop_QSal64x2,
1345       /* VECTOR x INTEGER SATURATING SHIFT */
1346       Iop_QShlN8Sx16, Iop_QShlN16Sx8, Iop_QShlN32Sx4, Iop_QShlN64Sx2,
1347       Iop_QShlN8x16, Iop_QShlN16x8, Iop_QShlN32x4, Iop_QShlN64x2,
1348       Iop_QSalN8x16, Iop_QSalN16x8, Iop_QSalN32x4, Iop_QSalN64x2,
1349 
1350       /* NARROWING (binary)
1351          -- narrow 2xV128 into 1xV128, hi half from left arg */
1352       /* See comments above w.r.t. U vs S issues in saturated narrowing. */
1353       Iop_QNarrowBin16Sto8Ux16, Iop_QNarrowBin32Sto16Ux8,
1354       Iop_QNarrowBin16Sto8Sx16, Iop_QNarrowBin32Sto16Sx8,
1355       Iop_QNarrowBin16Uto8Ux16, Iop_QNarrowBin32Uto16Ux8,
1356       Iop_NarrowBin16to8x16, Iop_NarrowBin32to16x8,
1357 
1358       /* NARROWING (unary) -- narrow V128 into I64 */
1359       Iop_NarrowUn16to8x8, Iop_NarrowUn32to16x4, Iop_NarrowUn64to32x2,
1360       /* Saturating narrowing from signed source to signed/unsigned destination */
1361       Iop_QNarrowUn16Sto8Sx8, Iop_QNarrowUn32Sto16Sx4, Iop_QNarrowUn64Sto32Sx2,
1362       Iop_QNarrowUn16Sto8Ux8, Iop_QNarrowUn32Sto16Ux4, Iop_QNarrowUn64Sto32Ux2,
1363       /* Saturating narrowing from unsigned source to unsigned destination */
1364       Iop_QNarrowUn16Uto8Ux8, Iop_QNarrowUn32Uto16Ux4, Iop_QNarrowUn64Uto32Ux2,
1365 
1366       /* WIDENING -- sign or zero extend each element of the argument
1367          vector to the twice original size.  The resulting vector consists of
1368          the same number of elements but each element and the vector itself
1369          are twice as wide.
1370          All operations are I64->V128.
1371          Example
1372             Iop_Widen32Sto64x2( [a, b] ) = [c, d]
1373                where c = Iop_32Sto64(a) and d = Iop_32Sto64(b) */
1374       Iop_Widen8Uto16x8, Iop_Widen16Uto32x4, Iop_Widen32Uto64x2,
1375       Iop_Widen8Sto16x8, Iop_Widen16Sto32x4, Iop_Widen32Sto64x2,
1376 
1377       /* INTERLEAVING */
1378       /* Interleave lanes from low or high halves of
1379          operands.  Most-significant result lane is from the left
1380          arg. */
1381       Iop_InterleaveHI8x16, Iop_InterleaveHI16x8,
1382       Iop_InterleaveHI32x4, Iop_InterleaveHI64x2,
1383       Iop_InterleaveLO8x16, Iop_InterleaveLO16x8,
1384       Iop_InterleaveLO32x4, Iop_InterleaveLO64x2,
1385       /* Interleave odd/even lanes of operands.  Most-significant result lane
1386          is from the left arg. */
1387       Iop_InterleaveOddLanes8x16, Iop_InterleaveEvenLanes8x16,
1388       Iop_InterleaveOddLanes16x8, Iop_InterleaveEvenLanes16x8,
1389       Iop_InterleaveOddLanes32x4, Iop_InterleaveEvenLanes32x4,
1390 
1391       /* CONCATENATION -- build a new value by concatenating either
1392          the even or odd lanes of both operands. */
1393       Iop_CatOddLanes8x16, Iop_CatOddLanes16x8, Iop_CatOddLanes32x4,
1394       Iop_CatEvenLanes8x16, Iop_CatEvenLanes16x8, Iop_CatEvenLanes32x4,
1395 
1396       /* GET elements of VECTOR
1397          GET is binop (V128, I8) -> I<elem_size> */
1398       /* Note: the arm back-end handles only constant second argument. */
1399       Iop_GetElem8x16, Iop_GetElem16x8, Iop_GetElem32x4, Iop_GetElem64x2,
1400 
1401       /* DUPLICATING -- copy value to all lanes */
1402       Iop_Dup8x16,   Iop_Dup16x8,   Iop_Dup32x4,
1403 
1404       /* EXTRACT -- copy 16-arg3 highest bytes from arg1 to 16-arg3 lowest bytes
1405          of result and arg3 lowest bytes of arg2 to arg3 highest bytes of
1406          result.
1407          It is a triop: (V128, V128, I8) -> V128 */
1408       /* Note: the ARM back end handles only constant arg3 in this operation. */
1409       Iop_ExtractV128,
1410 
1411       /* REVERSE the order of elements in each Half-words, Words,
1412          Double-words */
1413       /* Examples:
1414             Reverse32_16x8([a,b,c,d,e,f,g,h]) = [b,a,d,c,f,e,h,g]
1415             Reverse64_16x8([a,b,c,d,e,f,g,h]) = [d,c,b,a,h,g,f,e] */
1416       Iop_Reverse16_8x16,
1417       Iop_Reverse32_8x16, Iop_Reverse32_16x8,
1418       Iop_Reverse64_8x16, Iop_Reverse64_16x8, Iop_Reverse64_32x4,
1419 
1420       /* PERMUTING -- copy src bytes to dst,
1421          as indexed by control vector bytes:
1422             for i in 0 .. 15 . result[i] = argL[ argR[i] ]
1423          argR[i] values may only be in the range 0 .. 15, else behaviour
1424          is undefined. */
1425       Iop_Perm8x16,
1426       Iop_Perm32x4, /* ditto, except argR values are restricted to 0 .. 3 */
1427 
1428       /* Vector Reciprocal Estimate and Vector Reciprocal Square Root Estimate
1429          See floating-point equiwalents for details. */
1430       Iop_Recip32x4, Iop_Rsqrte32x4,
1431 
1432       /* ------------------ 256-bit SIMD Integer. ------------------ */
1433 
1434       /* Pack/unpack */
1435       Iop_V256to64_0,  // V256 -> I64, extract least significant lane
1436       Iop_V256to64_1,
1437       Iop_V256to64_2,
1438       Iop_V256to64_3,  // V256 -> I64, extract most significant lane
1439 
1440       Iop_64x4toV256,  // (I64,I64,I64,I64)->V256
1441                        // first arg is most significant lane
1442 
1443       Iop_V256toV128_0, // V256 -> V128, less significant lane
1444       Iop_V256toV128_1, // V256 -> V128, more significant lane
1445       Iop_V128HLtoV256, // (V128,V128)->V256, first arg is most signif
1446 
1447       Iop_AndV256,
1448       Iop_OrV256,
1449       Iop_XorV256,
1450       Iop_NotV256,
1451 
1452       /* MISC (vector integer cmp != 0) */
1453       Iop_CmpNEZ32x8, Iop_CmpNEZ64x4,
1454 
1455       /* ------------------ 256-bit SIMD FP. ------------------ */
1456       Iop_Add64Fx4,
1457       Iop_Sub64Fx4,
1458       Iop_Mul64Fx4,
1459       Iop_Div64Fx4,
1460       Iop_Add32Fx8,
1461       Iop_Sub32Fx8,
1462       Iop_Mul32Fx8,
1463       Iop_Div32Fx8,
1464 
1465       Iop_Sqrt32Fx8,
1466       Iop_Sqrt64Fx4,
1467       Iop_RSqrt32Fx8,
1468       Iop_Recip32Fx8,
1469 
1470       Iop_Max32Fx8, Iop_Min32Fx8,
1471       Iop_Max64Fx4, Iop_Min64Fx4
1472    }
1473    IROp;
1474 
1475 /* Pretty-print an op. */
1476 extern void ppIROp ( IROp );
1477 
1478 
1479 /* Encoding of IEEE754-specified rounding modes.  This is the same as
1480    the encoding used by Intel IA32 to indicate x87 rounding mode.
1481    Note, various front and back ends rely on the actual numerical
1482    values of these, so do not change them. */
1483 typedef
1484    enum {
1485       Irrm_NEAREST = 0,
1486       Irrm_NegINF  = 1,
1487       Irrm_PosINF  = 2,
1488       Irrm_ZERO    = 3
1489    }
1490    IRRoundingMode;
1491 
1492 /* DFP encoding of IEEE754 2008 specified rounding modes extends the two bit
1493  * binary floating point rounding mode (IRRoundingMode) to three bits.  The
1494  * DFP rounding modes are a super set of the binary rounding modes.  The
1495  * encoding was chosen such that the mapping of the least significant two bits
1496  * of the IR to POWER encodings is same.  The upper IR encoding bit is just
1497  * a logical OR of the upper rounding mode bit from the POWER encoding.
1498  */
1499 typedef
1500    enum {
1501       Irrm_DFP_NEAREST              = 0,  // Round to nearest, ties to even
1502       Irrm_DFP_NegINF               = 1,  // Round to negative infinity
1503       Irrm_DFP_PosINF               = 2,  // Round to posative infinity
1504       Irrm_DFP_ZERO                 = 3,  // Round toward zero
1505       Irrm_DFP_NEAREST_TIE_AWAY_0   = 4,  // Round to nearest, ties away from 0
1506       Irrm_DFP_PREPARE_SHORTER      = 5,  // Round to prepare for storter
1507                                           // precision
1508       Irrm_DFP_AWAY_FROM_ZERO       = 6,  // Round to away from 0
1509       Irrm_DFP_NEAREST_TIE_TOWARD_0 = 7   // Round to nearest, ties towards 0
1510    }
1511    IRRoundingModeDFP;
1512 
1513 /* Floating point comparison result values, as created by Iop_CmpF64.
1514    This is also derived from what IA32 does. */
1515 typedef
1516    enum {
1517       Ircr_UN = 0x45,
1518       Ircr_LT = 0x01,
1519       Ircr_GT = 0x00,
1520       Ircr_EQ = 0x40
1521    }
1522    IRCmpF64Result;
1523 
1524 typedef IRCmpF64Result IRCmpF32Result;
1525 typedef IRCmpF64Result IRCmpF128Result;
1526 
1527 /* ------------------ Expressions ------------------ */
1528 
1529 typedef struct _IRQop   IRQop;   /* forward declaration */
1530 typedef struct _IRTriop IRTriop; /* forward declaration */
1531 
1532 
1533 /* The different kinds of expressions.  Their meaning is explained below
1534    in the comments for IRExpr. */
1535 typedef
1536    enum {
1537       Iex_Binder=0x15000,
1538       Iex_Get,
1539       Iex_GetI,
1540       Iex_RdTmp,
1541       Iex_Qop,
1542       Iex_Triop,
1543       Iex_Binop,
1544       Iex_Unop,
1545       Iex_Load,
1546       Iex_Const,
1547       Iex_Mux0X,
1548       Iex_CCall
1549    }
1550    IRExprTag;
1551 
1552 /* An expression.  Stored as a tagged union.  'tag' indicates what kind
1553    of expression this is.  'Iex' is the union that holds the fields.  If
1554    an IRExpr 'e' has e.tag equal to Iex_Load, then it's a load
1555    expression, and the fields can be accessed with
1556    'e.Iex.Load.<fieldname>'.
1557 
1558    For each kind of expression, we show what it looks like when
1559    pretty-printed with ppIRExpr().
1560 */
1561 typedef
1562    struct _IRExpr
1563    IRExpr;
1564 
1565 struct _IRExpr {
1566    IRExprTag tag;
1567    union {
1568       /* Used only in pattern matching within Vex.  Should not be seen
1569          outside of Vex. */
1570       struct {
1571          Int binder;
1572       } Binder;
1573 
1574       /* Read a guest register, at a fixed offset in the guest state.
1575          ppIRExpr output: GET:<ty>(<offset>), eg. GET:I32(0)
1576       */
1577       struct {
1578          Int    offset;    /* Offset into the guest state */
1579          IRType ty;        /* Type of the value being read */
1580       } Get;
1581 
1582       /* Read a guest register at a non-fixed offset in the guest
1583          state.  This allows circular indexing into parts of the guest
1584          state, which is essential for modelling situations where the
1585          identity of guest registers is not known until run time.  One
1586          example is the x87 FP register stack.
1587 
1588          The part of the guest state to be treated as a circular array
1589          is described in the IRRegArray 'descr' field.  It holds the
1590          offset of the first element in the array, the type of each
1591          element, and the number of elements.
1592 
1593          The array index is indicated rather indirectly, in a way
1594          which makes optimisation easy: as the sum of variable part
1595          (the 'ix' field) and a constant offset (the 'bias' field).
1596 
1597          Since the indexing is circular, the actual array index to use
1598          is computed as (ix + bias) % num-of-elems-in-the-array.
1599 
1600          Here's an example.  The description
1601 
1602             (96:8xF64)[t39,-7]
1603 
1604          describes an array of 8 F64-typed values, the
1605          guest-state-offset of the first being 96.  This array is
1606          being indexed at (t39 - 7) % 8.
1607 
1608          It is important to get the array size/type exactly correct
1609          since IR optimisation looks closely at such info in order to
1610          establish aliasing/non-aliasing between seperate GetI and
1611          PutI events, which is used to establish when they can be
1612          reordered, etc.  Putting incorrect info in will lead to
1613          obscure IR optimisation bugs.
1614 
1615             ppIRExpr output: GETI<descr>[<ix>,<bias]
1616                          eg. GETI(128:8xI8)[t1,0]
1617       */
1618       struct {
1619          IRRegArray* descr; /* Part of guest state treated as circular */
1620          IRExpr*     ix;    /* Variable part of index into array */
1621          Int         bias;  /* Constant offset part of index into array */
1622       } GetI;
1623 
1624       /* The value held by a temporary.
1625          ppIRExpr output: t<tmp>, eg. t1
1626       */
1627       struct {
1628          IRTemp tmp;       /* The temporary number */
1629       } RdTmp;
1630 
1631       /* A quaternary operation.
1632          ppIRExpr output: <op>(<arg1>, <arg2>, <arg3>, <arg4>),
1633                       eg. MAddF64r32(t1, t2, t3, t4)
1634       */
1635       struct {
1636         IRQop* details;
1637       } Qop;
1638 
1639       /* A ternary operation.
1640          ppIRExpr output: <op>(<arg1>, <arg2>, <arg3>),
1641                       eg. MulF64(1, 2.0, 3.0)
1642       */
1643       struct {
1644         IRTriop* details;
1645       } Triop;
1646 
1647       /* A binary operation.
1648          ppIRExpr output: <op>(<arg1>, <arg2>), eg. Add32(t1,t2)
1649       */
1650       struct {
1651          IROp op;          /* op-code   */
1652          IRExpr* arg1;     /* operand 1 */
1653          IRExpr* arg2;     /* operand 2 */
1654       } Binop;
1655 
1656       /* A unary operation.
1657          ppIRExpr output: <op>(<arg>), eg. Neg8(t1)
1658       */
1659       struct {
1660          IROp    op;       /* op-code */
1661          IRExpr* arg;      /* operand */
1662       } Unop;
1663 
1664       /* A load from memory -- a normal load, not a load-linked.
1665          Load-Linkeds (and Store-Conditionals) are instead represented
1666          by IRStmt.LLSC since Load-Linkeds have side effects and so
1667          are not semantically valid IRExpr's.
1668          ppIRExpr output: LD<end>:<ty>(<addr>), eg. LDle:I32(t1)
1669       */
1670       struct {
1671          IREndness end;    /* Endian-ness of the load */
1672          IRType    ty;     /* Type of the loaded value */
1673          IRExpr*   addr;   /* Address being loaded from */
1674       } Load;
1675 
1676       /* A constant-valued expression.
1677          ppIRExpr output: <con>, eg. 0x4:I32
1678       */
1679       struct {
1680          IRConst* con;     /* The constant itself */
1681       } Const;
1682 
1683       /* A call to a pure (no side-effects) helper C function.
1684 
1685          With the 'cee' field, 'name' is the function's name.  It is
1686          only used for pretty-printing purposes.  The address to call
1687          (host address, of course) is stored in the 'addr' field
1688          inside 'cee'.
1689 
1690          The 'args' field is a NULL-terminated array of arguments.
1691          The stated return IRType, and the implied argument types,
1692          must match that of the function being called well enough so
1693          that the back end can actually generate correct code for the
1694          call.
1695 
1696          The called function **must** satisfy the following:
1697 
1698          * no side effects -- must be a pure function, the result of
1699            which depends only on the passed parameters.
1700 
1701          * it may not look at, nor modify, any of the guest state
1702            since that would hide guest state transitions from
1703            instrumenters
1704 
1705          * it may not access guest memory, since that would hide
1706            guest memory transactions from the instrumenters
1707 
1708          * it must not assume that arguments are being evaluated in a
1709            particular order. The oder of evaluation is unspecified.
1710 
1711          This is restrictive, but makes the semantics clean, and does
1712          not interfere with IR optimisation.
1713 
1714          If you want to call a helper which can mess with guest state
1715          and/or memory, instead use Ist_Dirty.  This is a lot more
1716          flexible, but you have to give a bunch of details about what
1717          the helper does (and you better be telling the truth,
1718          otherwise any derived instrumentation will be wrong).  Also
1719          Ist_Dirty inhibits various IR optimisations and so can cause
1720          quite poor code to be generated.  Try to avoid it.
1721 
1722          ppIRExpr output: <cee>(<args>):<retty>
1723                       eg. foo{0x80489304}(t1, t2):I32
1724       */
1725       struct {
1726          IRCallee* cee;    /* Function to call. */
1727          IRType    retty;  /* Type of return value. */
1728          IRExpr**  args;   /* Vector of argument expressions. */
1729       }  CCall;
1730 
1731       /* A ternary if-then-else operator.  It returns expr0 if cond is
1732          zero, exprX otherwise.  Note that it is STRICT, ie. both
1733          expr0 and exprX are evaluated in all cases.
1734 
1735          ppIRExpr output: Mux0X(<cond>,<expr0>,<exprX>),
1736                          eg. Mux0X(t6,t7,t8)
1737       */
1738       struct {
1739          IRExpr* cond;     /* Condition */
1740          IRExpr* expr0;    /* True expression */
1741          IRExpr* exprX;    /* False expression */
1742       } Mux0X;
1743    } Iex;
1744 };
1745 
1746 /* ------------------ A ternary expression ---------------------- */
1747 struct _IRTriop {
1748    IROp op;          /* op-code   */
1749    IRExpr* arg1;     /* operand 1 */
1750    IRExpr* arg2;     /* operand 2 */
1751    IRExpr* arg3;     /* operand 3 */
1752 };
1753 
1754 /* ------------------ A quarternary expression ------------------ */
1755 struct _IRQop {
1756    IROp op;          /* op-code   */
1757    IRExpr* arg1;     /* operand 1 */
1758    IRExpr* arg2;     /* operand 2 */
1759    IRExpr* arg3;     /* operand 3 */
1760    IRExpr* arg4;     /* operand 4 */
1761 };
1762 
1763 /* Expression constructors. */
1764 extern IRExpr* IRExpr_Binder ( Int binder );
1765 extern IRExpr* IRExpr_Get    ( Int off, IRType ty );
1766 extern IRExpr* IRExpr_GetI   ( IRRegArray* descr, IRExpr* ix, Int bias );
1767 extern IRExpr* IRExpr_RdTmp  ( IRTemp tmp );
1768 extern IRExpr* IRExpr_Qop    ( IROp op, IRExpr* arg1, IRExpr* arg2,
1769                                         IRExpr* arg3, IRExpr* arg4 );
1770 extern IRExpr* IRExpr_Triop  ( IROp op, IRExpr* arg1,
1771                                         IRExpr* arg2, IRExpr* arg3 );
1772 extern IRExpr* IRExpr_Binop  ( IROp op, IRExpr* arg1, IRExpr* arg2 );
1773 extern IRExpr* IRExpr_Unop   ( IROp op, IRExpr* arg );
1774 extern IRExpr* IRExpr_Load   ( IREndness end, IRType ty, IRExpr* addr );
1775 extern IRExpr* IRExpr_Const  ( IRConst* con );
1776 extern IRExpr* IRExpr_CCall  ( IRCallee* cee, IRType retty, IRExpr** args );
1777 extern IRExpr* IRExpr_Mux0X  ( IRExpr* cond, IRExpr* expr0, IRExpr* exprX );
1778 
1779 /* Deep-copy an IRExpr. */
1780 extern IRExpr* deepCopyIRExpr ( IRExpr* );
1781 
1782 /* Pretty-print an IRExpr. */
1783 extern void ppIRExpr ( IRExpr* );
1784 
1785 /* NULL-terminated IRExpr vector constructors, suitable for
1786    use as arg lists in clean/dirty helper calls. */
1787 extern IRExpr** mkIRExprVec_0 ( void );
1788 extern IRExpr** mkIRExprVec_1 ( IRExpr* );
1789 extern IRExpr** mkIRExprVec_2 ( IRExpr*, IRExpr* );
1790 extern IRExpr** mkIRExprVec_3 ( IRExpr*, IRExpr*, IRExpr* );
1791 extern IRExpr** mkIRExprVec_4 ( IRExpr*, IRExpr*, IRExpr*, IRExpr* );
1792 extern IRExpr** mkIRExprVec_5 ( IRExpr*, IRExpr*, IRExpr*, IRExpr*,
1793                                 IRExpr* );
1794 extern IRExpr** mkIRExprVec_6 ( IRExpr*, IRExpr*, IRExpr*, IRExpr*,
1795                                 IRExpr*, IRExpr* );
1796 extern IRExpr** mkIRExprVec_7 ( IRExpr*, IRExpr*, IRExpr*, IRExpr*,
1797                                 IRExpr*, IRExpr*, IRExpr* );
1798 extern IRExpr** mkIRExprVec_8 ( IRExpr*, IRExpr*, IRExpr*, IRExpr*,
1799                                 IRExpr*, IRExpr*, IRExpr*, IRExpr*);
1800 
1801 /* IRExpr copiers:
1802    - shallowCopy: shallow-copy (ie. create a new vector that shares the
1803      elements with the original).
1804    - deepCopy: deep-copy (ie. create a completely new vector). */
1805 extern IRExpr** shallowCopyIRExprVec ( IRExpr** );
1806 extern IRExpr** deepCopyIRExprVec ( IRExpr** );
1807 
1808 /* Make a constant expression from the given host word taking into
1809    account (of course) the host word size. */
1810 extern IRExpr* mkIRExpr_HWord ( HWord );
1811 
1812 /* Convenience function for constructing clean helper calls. */
1813 extern
1814 IRExpr* mkIRExprCCall ( IRType retty,
1815                         Int regparms, HChar* name, void* addr,
1816                         IRExpr** args );
1817 
1818 
1819 /* Convenience functions for atoms (IRExprs which are either Iex_Tmp or
1820  * Iex_Const). */
isIRAtom(IRExpr * e)1821 static inline Bool isIRAtom ( IRExpr* e ) {
1822    return toBool(e->tag == Iex_RdTmp || e->tag == Iex_Const);
1823 }
1824 
1825 /* Are these two IR atoms identical?  Causes an assertion
1826    failure if they are passed non-atoms. */
1827 extern Bool eqIRAtom ( IRExpr*, IRExpr* );
1828 
1829 
1830 /* ------------------ Jump kinds ------------------ */
1831 
1832 /* This describes hints which can be passed to the dispatcher at guest
1833    control-flow transfer points.
1834 
1835    Re Ijk_TInval: the guest state _must_ have two pseudo-registers,
1836    guest_TISTART and guest_TILEN, which specify the start and length
1837    of the region to be invalidated.  These are both the size of a
1838    guest word.  It is the responsibility of the relevant toIR.c to
1839    ensure that these are filled in with suitable values before issuing
1840    a jump of kind Ijk_TInval.
1841 
1842    Re Ijk_EmWarn and Ijk_EmFail: the guest state must have a
1843    pseudo-register guest_EMWARN, which is 32-bits regardless of the
1844    host or guest word size.  That register should be made to hold an
1845    EmWarn_* value to indicate the reason for the exit.
1846 
1847    In the case of Ijk_EmFail, the exit is fatal (Vex-generated code
1848    cannot continue) and so the jump destination can be anything.
1849 
1850    Re Ijk_Sys_ (syscall jumps): the guest state must have a
1851    pseudo-register guest_IP_AT_SYSCALL, which is the size of a guest
1852    word.  Front ends should set this to be the IP at the most recently
1853    executed kernel-entering (system call) instruction.  This makes it
1854    very much easier (viz, actually possible at all) to back up the
1855    guest to restart a syscall that has been interrupted by a signal.
1856 */
1857 typedef
1858    enum {
1859       Ijk_INVALID=0x16000,
1860       Ijk_Boring,         /* not interesting; just goto next */
1861       Ijk_Call,           /* guest is doing a call */
1862       Ijk_Ret,            /* guest is doing a return */
1863       Ijk_ClientReq,      /* do guest client req before continuing */
1864       Ijk_Yield,          /* client is yielding to thread scheduler */
1865       Ijk_EmWarn,         /* report emulation warning before continuing */
1866       Ijk_EmFail,         /* emulation critical (FATAL) error; give up */
1867       Ijk_NoDecode,       /* next instruction cannot be decoded */
1868       Ijk_MapFail,        /* Vex-provided address translation failed */
1869       Ijk_TInval,         /* Invalidate translations before continuing. */
1870       Ijk_NoRedir,        /* Jump to un-redirected guest addr */
1871       Ijk_SigTRAP,        /* current instruction synths SIGTRAP */
1872       Ijk_SigSEGV,        /* current instruction synths SIGSEGV */
1873       Ijk_SigBUS,         /* current instruction synths SIGBUS */
1874       /* Unfortunately, various guest-dependent syscall kinds.  They
1875 	 all mean: do a syscall before continuing. */
1876       Ijk_Sys_syscall,    /* amd64 'syscall', ppc 'sc', arm 'svc #0' */
1877       Ijk_Sys_int32,      /* amd64/x86 'int $0x20' */
1878       Ijk_Sys_int128,     /* amd64/x86 'int $0x80' */
1879       Ijk_Sys_int129,     /* amd64/x86 'int $0x81' */
1880       Ijk_Sys_int130,     /* amd64/x86 'int $0x82' */
1881       Ijk_Sys_sysenter    /* x86 'sysenter'.  guest_EIP becomes
1882                              invalid at the point this happens. */
1883    }
1884    IRJumpKind;
1885 
1886 extern void ppIRJumpKind ( IRJumpKind );
1887 
1888 
1889 /* ------------------ Dirty helper calls ------------------ */
1890 
1891 /* A dirty call is a flexible mechanism for calling (possibly
1892    conditionally) a helper function or procedure.  The helper function
1893    may read, write or modify client memory, and may read, write or
1894    modify client state.  It can take arguments and optionally return a
1895    value.  It may return different results and/or do different things
1896    when called repeatedly with the same arguments, by means of storing
1897    private state.
1898 
1899    If a value is returned, it is assigned to the nominated return
1900    temporary.
1901 
1902    Dirty calls are statements rather than expressions for obvious
1903    reasons.  If a dirty call is marked as writing guest state, any
1904    values derived from the written parts of the guest state are
1905    invalid.  Similarly, if the dirty call is stated as writing
1906    memory, any loaded values are invalidated by it.
1907 
1908    In order that instrumentation is possible, the call must state, and
1909    state correctly:
1910 
1911    * whether it reads, writes or modifies memory, and if so where
1912      (only one chunk can be stated)
1913 
1914    * whether it reads, writes or modifies guest state, and if so which
1915      pieces (several pieces may be stated, and currently their extents
1916      must be known at translation-time).
1917 
1918    Normally, code is generated to pass just the args to the helper.
1919    However, if .needsBBP is set, then an extra first argument is
1920    passed, which is the baseblock pointer, so that the callee can
1921    access the guest state.  It is invalid for .nFxState to be zero
1922    but .needsBBP to be True, since .nFxState==0 is a claim that the
1923    call does not access guest state.
1924 
1925    IMPORTANT NOTE re GUARDS: Dirty calls are strict, very strict.  The
1926    arguments are evaluated REGARDLESS of the guard value.  The order of
1927    argument evaluation is unspecified. The guard expression is evaluated
1928    AFTER the arguments have been evaluated.
1929 */
1930 
1931 #define VEX_N_FXSTATE  7   /* enough for FXSAVE/FXRSTOR on x86 */
1932 
1933 /* Effects on resources (eg. registers, memory locations) */
1934 typedef
1935    enum {
1936       Ifx_None = 0x1700,    /* no effect */
1937       Ifx_Read,             /* reads the resource */
1938       Ifx_Write,            /* writes the resource */
1939       Ifx_Modify,           /* modifies the resource */
1940    }
1941    IREffect;
1942 
1943 /* Pretty-print an IREffect */
1944 extern void ppIREffect ( IREffect );
1945 
1946 
1947 typedef
1948    struct _IRDirty {
1949       /* What to call, and details of args/results.  .guard must be
1950          non-NULL.  If .tmp is not IRTemp_INVALID (that is, the call
1951          returns a result) then .guard must be demonstrably (at
1952          JIT-time) always true, that is, the call must be
1953          unconditional.  Conditional calls that assign .tmp are not
1954          allowed. */
1955       IRCallee* cee;    /* where to call */
1956       IRExpr*   guard;  /* :: Ity_Bit.  Controls whether call happens */
1957       IRExpr**  args;   /* arg list, ends in NULL */
1958       IRTemp    tmp;    /* to assign result to, or IRTemp_INVALID if none */
1959 
1960       /* Mem effects; we allow only one R/W/M region to be stated */
1961       IREffect  mFx;    /* indicates memory effects, if any */
1962       IRExpr*   mAddr;  /* of access, or NULL if mFx==Ifx_None */
1963       Int       mSize;  /* of access, or zero if mFx==Ifx_None */
1964 
1965       /* Guest state effects; up to N allowed */
1966       Bool needsBBP; /* True => also pass guest state ptr to callee */
1967       Int  nFxState; /* must be 0 .. VEX_N_FXSTATE */
1968       struct {
1969          IREffect fx:16;   /* read, write or modify?  Ifx_None is invalid. */
1970          UShort   offset;
1971          UShort   size;
1972          UChar    nRepeats;
1973          UChar    repeatLen;
1974       } fxState[VEX_N_FXSTATE];
1975       /* The access can be repeated, as specified by nRepeats and
1976          repeatLen.  To describe only a single access, nRepeats and
1977          repeatLen should be zero.  Otherwise, repeatLen must be a
1978          multiple of size and greater than size. */
1979       /* Overall, the parts of the guest state denoted by (offset,
1980          size, nRepeats, repeatLen) is
1981                [offset, +size)
1982             and, if nRepeats > 0,
1983                for (i = 1; i <= nRepeats; i++)
1984                   [offset + i * repeatLen, +size)
1985          A convenient way to enumerate all segments is therefore
1986             for (i = 0; i < 1 + nRepeats; i++)
1987                [offset + i * repeatLen, +size)
1988       */
1989    }
1990    IRDirty;
1991 
1992 /* Pretty-print a dirty call */
1993 extern void     ppIRDirty ( IRDirty* );
1994 
1995 /* Allocate an uninitialised dirty call */
1996 extern IRDirty* emptyIRDirty ( void );
1997 
1998 /* Deep-copy a dirty call */
1999 extern IRDirty* deepCopyIRDirty ( IRDirty* );
2000 
2001 /* A handy function which takes some of the tedium out of constructing
2002    dirty helper calls.  The called function impliedly does not return
2003    any value and has a constant-True guard.  The call is marked as
2004    accessing neither guest state nor memory (hence the "unsafe"
2005    designation) -- you can change this marking later if need be.  A
2006    suitable IRCallee is constructed from the supplied bits. */
2007 extern
2008 IRDirty* unsafeIRDirty_0_N ( Int regparms, HChar* name, void* addr,
2009                              IRExpr** args );
2010 
2011 /* Similarly, make a zero-annotation dirty call which returns a value,
2012    and assign that to the given temp. */
2013 extern
2014 IRDirty* unsafeIRDirty_1_N ( IRTemp dst,
2015                              Int regparms, HChar* name, void* addr,
2016                              IRExpr** args );
2017 
2018 
2019 /* --------------- Memory Bus Events --------------- */
2020 
2021 typedef
2022    enum {
2023       Imbe_Fence=0x18000,
2024       /* Needed only on ARM.  It cancels a reservation made by a
2025          preceding Linked-Load, and needs to be handed through to the
2026          back end, just as LL and SC themselves are. */
2027       Imbe_CancelReservation
2028    }
2029    IRMBusEvent;
2030 
2031 extern void ppIRMBusEvent ( IRMBusEvent );
2032 
2033 
2034 /* --------------- Compare and Swap --------------- */
2035 
2036 /* This denotes an atomic compare and swap operation, either
2037    a single-element one or a double-element one.
2038 
2039    In the single-element case:
2040 
2041      .addr is the memory address.
2042      .end  is the endianness with which memory is accessed
2043 
2044      If .addr contains the same value as .expdLo, then .dataLo is
2045      written there, else there is no write.  In both cases, the
2046      original value at .addr is copied into .oldLo.
2047 
2048      Types: .expdLo, .dataLo and .oldLo must all have the same type.
2049      It may be any integral type, viz: I8, I16, I32 or, for 64-bit
2050      guests, I64.
2051 
2052      .oldHi must be IRTemp_INVALID, and .expdHi and .dataHi must
2053      be NULL.
2054 
2055    In the double-element case:
2056 
2057      .addr is the memory address.
2058      .end  is the endianness with which memory is accessed
2059 
2060      The operation is the same:
2061 
2062      If .addr contains the same value as .expdHi:.expdLo, then
2063      .dataHi:.dataLo is written there, else there is no write.  In
2064      both cases the original value at .addr is copied into
2065      .oldHi:.oldLo.
2066 
2067      Types: .expdHi, .expdLo, .dataHi, .dataLo, .oldHi, .oldLo must
2068      all have the same type, which may be any integral type, viz: I8,
2069      I16, I32 or, for 64-bit guests, I64.
2070 
2071      The double-element case is complicated by the issue of
2072      endianness.  In all cases, the two elements are understood to be
2073      located adjacently in memory, starting at the address .addr.
2074 
2075        If .end is Iend_LE, then the .xxxLo component is at the lower
2076        address and the .xxxHi component is at the higher address, and
2077        each component is itself stored little-endianly.
2078 
2079        If .end is Iend_BE, then the .xxxHi component is at the lower
2080        address and the .xxxLo component is at the higher address, and
2081        each component is itself stored big-endianly.
2082 
2083    This allows representing more cases than most architectures can
2084    handle.  For example, x86 cannot do DCAS on 8- or 16-bit elements.
2085 
2086    How to know if the CAS succeeded?
2087 
2088    * if .oldLo == .expdLo (resp. .oldHi:.oldLo == .expdHi:.expdLo),
2089      then the CAS succeeded, .dataLo (resp. .dataHi:.dataLo) is now
2090      stored at .addr, and the original value there was .oldLo (resp
2091      .oldHi:.oldLo).
2092 
2093    * if .oldLo != .expdLo (resp. .oldHi:.oldLo != .expdHi:.expdLo),
2094      then the CAS failed, and the original value at .addr was .oldLo
2095      (resp. .oldHi:.oldLo).
2096 
2097    Hence it is easy to know whether or not the CAS succeeded.
2098 */
2099 typedef
2100    struct {
2101       IRTemp    oldHi;  /* old value of *addr is written here */
2102       IRTemp    oldLo;
2103       IREndness end;    /* endianness of the data in memory */
2104       IRExpr*   addr;   /* store address */
2105       IRExpr*   expdHi; /* expected old value at *addr */
2106       IRExpr*   expdLo;
2107       IRExpr*   dataHi; /* new value for *addr */
2108       IRExpr*   dataLo;
2109    }
2110    IRCAS;
2111 
2112 extern void ppIRCAS ( IRCAS* cas );
2113 
2114 extern IRCAS* mkIRCAS ( IRTemp oldHi, IRTemp oldLo,
2115                         IREndness end, IRExpr* addr,
2116                         IRExpr* expdHi, IRExpr* expdLo,
2117                         IRExpr* dataHi, IRExpr* dataLo );
2118 
2119 extern IRCAS* deepCopyIRCAS ( IRCAS* );
2120 
2121 
2122 /* ------------------ Circular Array Put ------------------ */
2123 typedef
2124    struct {
2125       IRRegArray* descr; /* Part of guest state treated as circular */
2126       IRExpr*     ix;    /* Variable part of index into array */
2127       Int         bias;  /* Constant offset part of index into array */
2128       IRExpr*     data;  /* The value to write */
2129    } IRPutI;
2130 
2131 extern void ppIRPutI ( IRPutI* puti );
2132 
2133 extern IRPutI* mkIRPutI ( IRRegArray* descr, IRExpr* ix,
2134                           Int bias, IRExpr* data );
2135 
2136 extern IRPutI* deepCopyIRPutI ( IRPutI* );
2137 
2138 
2139 /* ------------------ Statements ------------------ */
2140 
2141 /* The different kinds of statements.  Their meaning is explained
2142    below in the comments for IRStmt.
2143 
2144    Those marked META do not represent code, but rather extra
2145    information about the code.  These statements can be removed
2146    without affecting the functional behaviour of the code, however
2147    they are required by some IR consumers such as tools that
2148    instrument the code.
2149 */
2150 
2151 typedef
2152    enum {
2153       Ist_NoOp=0x19000,
2154       Ist_IMark,     /* META */
2155       Ist_AbiHint,   /* META */
2156       Ist_Put,
2157       Ist_PutI,
2158       Ist_WrTmp,
2159       Ist_Store,
2160       Ist_CAS,
2161       Ist_LLSC,
2162       Ist_Dirty,
2163       Ist_MBE,       /* META (maybe) */
2164       Ist_Exit
2165    }
2166    IRStmtTag;
2167 
2168 /* A statement.  Stored as a tagged union.  'tag' indicates what kind
2169    of expression this is.  'Ist' is the union that holds the fields.
2170    If an IRStmt 'st' has st.tag equal to Iex_Store, then it's a store
2171    statement, and the fields can be accessed with
2172    'st.Ist.Store.<fieldname>'.
2173 
2174    For each kind of statement, we show what it looks like when
2175    pretty-printed with ppIRStmt().
2176 */
2177 typedef
2178    struct _IRStmt {
2179       IRStmtTag tag;
2180       union {
2181          /* A no-op (usually resulting from IR optimisation).  Can be
2182             omitted without any effect.
2183 
2184             ppIRStmt output: IR-NoOp
2185          */
2186          struct {
2187 	 } NoOp;
2188 
2189          /* META: instruction mark.  Marks the start of the statements
2190             that represent a single machine instruction (the end of
2191             those statements is marked by the next IMark or the end of
2192             the IRSB).  Contains the address and length of the
2193             instruction.
2194 
2195             It also contains a delta value.  The delta must be
2196             subtracted from a guest program counter value before
2197             attempting to establish, by comparison with the address
2198             and length values, whether or not that program counter
2199             value refers to this instruction.  For x86, amd64, ppc32,
2200             ppc64 and arm, the delta value is zero.  For Thumb
2201             instructions, the delta value is one.  This is because, on
2202             Thumb, guest PC values (guest_R15T) are encoded using the
2203             top 31 bits of the instruction address and a 1 in the lsb;
2204             hence they appear to be (numerically) 1 past the start of
2205             the instruction they refer to.  IOW, guest_R15T on ARM
2206             holds a standard ARM interworking address.
2207 
2208             ppIRStmt output: ------ IMark(<addr>, <len>, <delta>) ------,
2209                          eg. ------ IMark(0x4000792, 5, 0) ------,
2210          */
2211          struct {
2212             Addr64 addr;   /* instruction address */
2213             Int    len;    /* instruction length */
2214             UChar  delta;  /* addr = program counter as encoded in guest state
2215                                      - delta */
2216          } IMark;
2217 
2218          /* META: An ABI hint, which says something about this
2219             platform's ABI.
2220 
2221             At the moment, the only AbiHint is one which indicates
2222             that a given chunk of address space, [base .. base+len-1],
2223             has become undefined.  This is used on amd64-linux and
2224             some ppc variants to pass stack-redzoning hints to whoever
2225             wants to see them.  It also indicates the address of the
2226             next (dynamic) instruction that will be executed.  This is
2227             to help Memcheck to origin tracking.
2228 
2229             ppIRStmt output: ====== AbiHint(<base>, <len>, <nia>) ======
2230                          eg. ====== AbiHint(t1, 16, t2) ======
2231          */
2232          struct {
2233             IRExpr* base;     /* Start  of undefined chunk */
2234             Int     len;      /* Length of undefined chunk */
2235             IRExpr* nia;      /* Address of next (guest) insn */
2236          } AbiHint;
2237 
2238          /* Write a guest register, at a fixed offset in the guest state.
2239             ppIRStmt output: PUT(<offset>) = <data>, eg. PUT(60) = t1
2240          */
2241          struct {
2242             Int     offset;   /* Offset into the guest state */
2243             IRExpr* data;     /* The value to write */
2244          } Put;
2245 
2246          /* Write a guest register, at a non-fixed offset in the guest
2247             state.  See the comment for GetI expressions for more
2248             information.
2249 
2250             ppIRStmt output: PUTI<descr>[<ix>,<bias>] = <data>,
2251                          eg. PUTI(64:8xF64)[t5,0] = t1
2252          */
2253          struct {
2254             IRPutI* details;
2255          } PutI;
2256 
2257          /* Assign a value to a temporary.  Note that SSA rules require
2258             each tmp is only assigned to once.  IR sanity checking will
2259             reject any block containing a temporary which is not assigned
2260             to exactly once.
2261 
2262             ppIRStmt output: t<tmp> = <data>, eg. t1 = 3
2263          */
2264          struct {
2265             IRTemp  tmp;   /* Temporary  (LHS of assignment) */
2266             IRExpr* data;  /* Expression (RHS of assignment) */
2267          } WrTmp;
2268 
2269          /* Write a value to memory.  This is a normal store, not a
2270             Store-Conditional.  To represent a Store-Conditional,
2271             instead use IRStmt.LLSC.
2272             ppIRStmt output: ST<end>(<addr>) = <data>, eg. STle(t1) = t2
2273          */
2274          struct {
2275             IREndness end;    /* Endianness of the store */
2276             IRExpr*   addr;   /* store address */
2277             IRExpr*   data;   /* value to write */
2278          } Store;
2279 
2280          /* Do an atomic compare-and-swap operation.  Semantics are
2281             described above on a comment at the definition of IRCAS.
2282 
2283             ppIRStmt output:
2284                t<tmp> = CAS<end>(<addr> :: <expected> -> <new>)
2285             eg
2286                t1 = CASle(t2 :: t3->Add32(t3,1))
2287                which denotes a 32-bit atomic increment
2288                of a value at address t2
2289 
2290             A double-element CAS may also be denoted, in which case <tmp>,
2291             <expected> and <new> are all pairs of items, separated by
2292             commas.
2293          */
2294          struct {
2295             IRCAS* details;
2296          } CAS;
2297 
2298          /* Either Load-Linked or Store-Conditional, depending on
2299             STOREDATA.
2300 
2301             If STOREDATA is NULL then this is a Load-Linked, meaning
2302             that data is loaded from memory as normal, but a
2303             'reservation' for the address is also lodged in the
2304             hardware.
2305 
2306                result = Load-Linked(addr, end)
2307 
2308             The data transfer type is the type of RESULT (I32, I64,
2309             etc).  ppIRStmt output:
2310 
2311                result = LD<end>-Linked(<addr>), eg. LDbe-Linked(t1)
2312 
2313             If STOREDATA is not NULL then this is a Store-Conditional,
2314             hence:
2315 
2316                result = Store-Conditional(addr, storedata, end)
2317 
2318             The data transfer type is the type of STOREDATA and RESULT
2319             has type Ity_I1. The store may fail or succeed depending
2320             on the state of a previously lodged reservation on this
2321             address.  RESULT is written 1 if the store succeeds and 0
2322             if it fails.  eg ppIRStmt output:
2323 
2324                result = ( ST<end>-Cond(<addr>) = <storedata> )
2325                eg t3 = ( STbe-Cond(t1, t2) )
2326 
2327             In all cases, the address must be naturally aligned for
2328             the transfer type -- any misaligned addresses should be
2329             caught by a dominating IR check and side exit.  This
2330             alignment restriction exists because on at least some
2331             LL/SC platforms (ppc), stwcx. etc will trap w/ SIGBUS on
2332             misaligned addresses, and we have to actually generate
2333             stwcx. on the host, and we don't want it trapping on the
2334             host.
2335 
2336             Summary of rules for transfer type:
2337               STOREDATA == NULL (LL):
2338                 transfer type = type of RESULT
2339               STOREDATA != NULL (SC):
2340                 transfer type = type of STOREDATA, and RESULT :: Ity_I1
2341          */
2342          struct {
2343             IREndness end;
2344             IRTemp    result;
2345             IRExpr*   addr;
2346             IRExpr*   storedata; /* NULL => LL, non-NULL => SC */
2347          } LLSC;
2348 
2349          /* Call (possibly conditionally) a C function that has side
2350             effects (ie. is "dirty").  See the comments above the
2351             IRDirty type declaration for more information.
2352 
2353             ppIRStmt output:
2354                t<tmp> = DIRTY <guard> <effects>
2355                   ::: <callee>(<args>)
2356             eg.
2357                t1 = DIRTY t27 RdFX-gst(16,4) RdFX-gst(60,4)
2358                      ::: foo{0x380035f4}(t2)
2359          */
2360          struct {
2361             IRDirty* details;
2362          } Dirty;
2363 
2364          /* A memory bus event - a fence, or acquisition/release of the
2365             hardware bus lock.  IR optimisation treats all these as fences
2366             across which no memory references may be moved.
2367             ppIRStmt output: MBusEvent-Fence,
2368                              MBusEvent-BusLock, MBusEvent-BusUnlock.
2369          */
2370          struct {
2371             IRMBusEvent event;
2372          } MBE;
2373 
2374          /* Conditional exit from the middle of an IRSB.
2375             ppIRStmt output: if (<guard>) goto {<jk>} <dst>
2376                          eg. if (t69) goto {Boring} 0x4000AAA:I32
2377             If <guard> is true, the guest state is also updated by
2378             PUT-ing <dst> at <offsIP>.  This is done because a
2379             taken exit must update the guest program counter.
2380          */
2381          struct {
2382             IRExpr*    guard;    /* Conditional expression */
2383             IRConst*   dst;      /* Jump target (constant only) */
2384             IRJumpKind jk;       /* Jump kind */
2385             Int        offsIP;   /* Guest state offset for IP */
2386          } Exit;
2387       } Ist;
2388    }
2389    IRStmt;
2390 
2391 /* Statement constructors. */
2392 extern IRStmt* IRStmt_NoOp    ( void );
2393 extern IRStmt* IRStmt_IMark   ( Addr64 addr, Int len, UChar delta );
2394 extern IRStmt* IRStmt_AbiHint ( IRExpr* base, Int len, IRExpr* nia );
2395 extern IRStmt* IRStmt_Put     ( Int off, IRExpr* data );
2396 extern IRStmt* IRStmt_PutI    ( IRPutI* details );
2397 extern IRStmt* IRStmt_WrTmp   ( IRTemp tmp, IRExpr* data );
2398 extern IRStmt* IRStmt_Store   ( IREndness end, IRExpr* addr, IRExpr* data );
2399 extern IRStmt* IRStmt_CAS     ( IRCAS* details );
2400 extern IRStmt* IRStmt_LLSC    ( IREndness end, IRTemp result,
2401                                 IRExpr* addr, IRExpr* storedata );
2402 extern IRStmt* IRStmt_Dirty   ( IRDirty* details );
2403 extern IRStmt* IRStmt_MBE     ( IRMBusEvent event );
2404 extern IRStmt* IRStmt_Exit    ( IRExpr* guard, IRJumpKind jk, IRConst* dst,
2405                                 Int offsIP );
2406 
2407 /* Deep-copy an IRStmt. */
2408 extern IRStmt* deepCopyIRStmt ( IRStmt* );
2409 
2410 /* Pretty-print an IRStmt. */
2411 extern void ppIRStmt ( IRStmt* );
2412 
2413 
2414 /* ------------------ Basic Blocks ------------------ */
2415 
2416 /* Type environments: a bunch of statements, expressions, etc, are
2417    incomplete without an environment indicating the type of each
2418    IRTemp.  So this provides one.  IR temporaries are really just
2419    unsigned ints and so this provides an array, 0 .. n_types_used-1 of
2420    them.
2421 */
2422 typedef
2423    struct {
2424       IRType* types;
2425       Int     types_size;
2426       Int     types_used;
2427    }
2428    IRTypeEnv;
2429 
2430 /* Obtain a new IRTemp */
2431 extern IRTemp newIRTemp ( IRTypeEnv*, IRType );
2432 
2433 /* Deep-copy a type environment */
2434 extern IRTypeEnv* deepCopyIRTypeEnv ( IRTypeEnv* );
2435 
2436 /* Pretty-print a type environment */
2437 extern void ppIRTypeEnv ( IRTypeEnv* );
2438 
2439 
2440 /* Code blocks, which in proper compiler terminology are superblocks
2441    (single entry, multiple exit code sequences) contain:
2442 
2443    - A table giving a type for each temp (the "type environment")
2444    - An expandable array of statements
2445    - An expression of type 32 or 64 bits, depending on the
2446      guest's word size, indicating the next destination if the block
2447      executes all the way to the end, without a side exit
2448    - An indication of any special actions (JumpKind) needed
2449      for this final jump.
2450    - Offset of the IP field in the guest state.  This will be
2451      updated before the final jump is done.
2452 
2453    "IRSB" stands for "IR Super Block".
2454 */
2455 typedef
2456    struct {
2457       IRTypeEnv* tyenv;
2458       IRStmt**   stmts;
2459       Int        stmts_size;
2460       Int        stmts_used;
2461       IRExpr*    next;
2462       IRJumpKind jumpkind;
2463       Int        offsIP;
2464    }
2465    IRSB;
2466 
2467 /* Allocate a new, uninitialised IRSB */
2468 extern IRSB* emptyIRSB ( void );
2469 
2470 /* Deep-copy an IRSB */
2471 extern IRSB* deepCopyIRSB ( IRSB* );
2472 
2473 /* Deep-copy an IRSB, except for the statements list, which set to be
2474    a new, empty, list of statements. */
2475 extern IRSB* deepCopyIRSBExceptStmts ( IRSB* );
2476 
2477 /* Pretty-print an IRSB */
2478 extern void ppIRSB ( IRSB* );
2479 
2480 /* Append an IRStmt to an IRSB */
2481 extern void addStmtToIRSB ( IRSB*, IRStmt* );
2482 
2483 
2484 /*---------------------------------------------------------------*/
2485 /*--- Helper functions for the IR                             ---*/
2486 /*---------------------------------------------------------------*/
2487 
2488 /* For messing with IR type environments */
2489 extern IRTypeEnv* emptyIRTypeEnv  ( void );
2490 
2491 /* What is the type of this expression? */
2492 extern IRType typeOfIRConst ( IRConst* );
2493 extern IRType typeOfIRTemp  ( IRTypeEnv*, IRTemp );
2494 extern IRType typeOfIRExpr  ( IRTypeEnv*, IRExpr* );
2495 
2496 /* Sanity check a BB of IR */
2497 extern void sanityCheckIRSB ( IRSB*  bb,
2498                               HChar* caller,
2499                               Bool   require_flatness,
2500                               IRType guest_word_size );
2501 extern Bool isFlatIRStmt ( IRStmt* );
2502 
2503 /* Is this any value actually in the enumeration 'IRType' ? */
2504 extern Bool isPlausibleIRType ( IRType ty );
2505 
2506 #endif /* ndef __LIBVEX_IR_H */
2507 
2508 
2509 /*---------------------------------------------------------------*/
2510 /*---                                             libvex_ir.h ---*/
2511 /*---------------------------------------------------------------*/
2512