1
2 /*---------------------------------------------------------------*/
3 /*--- begin libvex_ir.h ---*/
4 /*---------------------------------------------------------------*/
5
6 /*
7 This file is part of Valgrind, a dynamic binary instrumentation
8 framework.
9
10 Copyright (C) 2004-2012 OpenWorks LLP
11 info@open-works.net
12
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
17
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
26 02110-1301, USA.
27
28 The GNU General Public License is contained in the file COPYING.
29
30 Neither the names of the U.S. Department of Energy nor the
31 University of California nor the names of its contributors may be
32 used to endorse or promote products derived from this software
33 without prior written permission.
34 */
35
36 #ifndef __LIBVEX_IR_H
37 #define __LIBVEX_IR_H
38
39 #include "libvex_basictypes.h"
40
41
42 /*---------------------------------------------------------------*/
43 /*--- High-level IR description ---*/
44 /*---------------------------------------------------------------*/
45
46 /* Vex IR is an architecture-neutral intermediate representation.
47 Unlike some IRs in systems similar to Vex, it is not like assembly
48 language (ie. a list of instructions). Rather, it is more like the
49 IR that might be used in a compiler.
50
51 Code blocks
52 ~~~~~~~~~~~
53 The code is broken into small code blocks ("superblocks", type:
54 'IRSB'). Each code block typically represents from 1 to perhaps 50
55 instructions. IRSBs are single-entry, multiple-exit code blocks.
56 Each IRSB contains three things:
57 - a type environment, which indicates the type of each temporary
58 value present in the IRSB
59 - a list of statements, which represent code
60 - a jump that exits from the end the IRSB
61 Because the blocks are multiple-exit, there can be additional
62 conditional exit statements that cause control to leave the IRSB
63 before the final exit. Also because of this, IRSBs can cover
64 multiple non-consecutive sequences of code (up to 3). These are
65 recorded in the type VexGuestExtents (see libvex.h).
66
67 Statements and expressions
68 ~~~~~~~~~~~~~~~~~~~~~~~~~~
69 Statements (type 'IRStmt') represent operations with side-effects,
70 eg. guest register writes, stores, and assignments to temporaries.
71 Expressions (type 'IRExpr') represent operations without
72 side-effects, eg. arithmetic operations, loads, constants.
73 Expressions can contain sub-expressions, forming expression trees,
74 eg. (3 + (4 * load(addr1)).
75
76 Storage of guest state
77 ~~~~~~~~~~~~~~~~~~~~~~
78 The "guest state" contains the guest registers of the guest machine
79 (ie. the machine that we are simulating). It is stored by default
80 in a block of memory supplied by the user of the VEX library,
81 generally referred to as the guest state (area). To operate on
82 these registers, one must first read ("Get") them from the guest
83 state into a temporary value. Afterwards, one can write ("Put")
84 them back into the guest state.
85
86 Get and Put are characterised by a byte offset into the guest
87 state, a small integer which effectively gives the identity of the
88 referenced guest register, and a type, which indicates the size of
89 the value to be transferred.
90
91 The basic "Get" and "Put" operations are sufficient to model normal
92 fixed registers on the guest. Selected areas of the guest state
93 can be treated as a circular array of registers (type:
94 'IRRegArray'), which can be indexed at run-time. This is done with
95 the "GetI" and "PutI" primitives. This is necessary to describe
96 rotating register files, for example the x87 FPU stack, SPARC
97 register windows, and the Itanium register files.
98
99 Examples, and flattened vs. unflattened code
100 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
101 For example, consider this x86 instruction:
102
103 addl %eax, %ebx
104
105 One Vex IR translation for this code would be this:
106
107 ------ IMark(0x24F275, 7, 0) ------
108 t3 = GET:I32(0) # get %eax, a 32-bit integer
109 t2 = GET:I32(12) # get %ebx, a 32-bit integer
110 t1 = Add32(t3,t2) # addl
111 PUT(0) = t1 # put %eax
112
113 (For simplicity, this ignores the effects on the condition codes, and
114 the update of the instruction pointer.)
115
116 The "IMark" is an IR statement that doesn't represent actual code.
117 Instead it indicates the address and length of the original
118 instruction. The numbers 0 and 12 are offsets into the guest state
119 for %eax and %ebx. The full list of offsets for an architecture
120 <ARCH> can be found in the type VexGuest<ARCH>State in the file
121 VEX/pub/libvex_guest_<ARCH>.h.
122
123 The five statements in this example are:
124 - the IMark
125 - three assignments to temporaries
126 - one register write (put)
127
128 The six expressions in this example are:
129 - two register reads (gets)
130 - one arithmetic (add) operation
131 - three temporaries (two nested within the Add32, one in the PUT)
132
133 The above IR is "flattened", ie. all sub-expressions are "atoms",
134 either constants or temporaries. An equivalent, unflattened version
135 would be:
136
137 PUT(0) = Add32(GET:I32(0), GET:I32(12))
138
139 IR is guaranteed to be flattened at instrumentation-time. This makes
140 instrumentation easier. Equivalent flattened and unflattened IR
141 typically results in the same generated code.
142
143 Another example, this one showing loads and stores:
144
145 addl %edx,4(%eax)
146
147 This becomes (again ignoring condition code and instruction pointer
148 updates):
149
150 ------ IMark(0x4000ABA, 3, 0) ------
151 t3 = Add32(GET:I32(0),0x4:I32)
152 t2 = LDle:I32(t3)
153 t1 = GET:I32(8)
154 t0 = Add32(t2,t1)
155 STle(t3) = t0
156
157 The "le" in "LDle" and "STle" is short for "little-endian".
158
159 No need for deallocations
160 ~~~~~~~~~~~~~~~~~~~~~~~~~
161 Although there are allocation functions for various data structures
162 in this file, there are no deallocation functions. This is because
163 Vex uses a memory allocation scheme that automatically reclaims the
164 memory used by allocated structures once translation is completed.
165 This makes things easier for tools that instruments/transforms code
166 blocks.
167
168 SSAness and typing
169 ~~~~~~~~~~~~~~~~~~
170 The IR is fully typed. For every IRSB (IR block) it is possible to
171 say unambiguously whether or not it is correctly typed.
172 Incorrectly typed IR has no meaning and the VEX will refuse to
173 process it. At various points during processing VEX typechecks the
174 IR and aborts if any violations are found. This seems overkill but
175 makes it a great deal easier to build a reliable JIT.
176
177 IR also has the SSA property. SSA stands for Static Single
178 Assignment, and what it means is that each IR temporary may be
179 assigned to only once. This idea became widely used in compiler
180 construction in the mid to late 90s. It makes many IR-level
181 transformations/code improvements easier, simpler and faster.
182 Whenever it typechecks an IR block, VEX also checks the SSA
183 property holds, and will abort if not so. So SSAness is
184 mechanically and rigidly enforced.
185 */
186
187 /*---------------------------------------------------------------*/
188 /*--- Type definitions for the IR ---*/
189 /*---------------------------------------------------------------*/
190
191 /* General comments about naming schemes:
192
193 All publically visible functions contain the name of the primary
194 type on which they operate (IRFoo, IRBar, etc). Hence you should
195 be able to identify these functions by grepping for "IR[A-Z]".
196
197 For some type 'IRFoo':
198
199 - ppIRFoo is the printing method for IRFoo, printing it to the
200 output channel specified in the LibVEX_Initialise call.
201
202 - eqIRFoo is a structural equality predicate for IRFoos.
203
204 - deepCopyIRFoo is a deep copy constructor for IRFoos.
205 It recursively traverses the entire argument tree and
206 produces a complete new tree. All types have a deep copy
207 constructor.
208
209 - shallowCopyIRFoo is the shallow copy constructor for IRFoos.
210 It creates a new top-level copy of the supplied object,
211 but does not copy any sub-objects. Only some types have a
212 shallow copy constructor.
213 */
214
215 /* ------------------ Types ------------------ */
216
217 /* A type indicates the size of a value, and whether it's an integer, a
218 float, or a vector (SIMD) value. */
219 typedef
220 enum {
221 Ity_INVALID=0x11000,
222 Ity_I1,
223 Ity_I8,
224 Ity_I16,
225 Ity_I32,
226 Ity_I64,
227 Ity_I128, /* 128-bit scalar */
228 Ity_F32, /* IEEE 754 float */
229 Ity_F64, /* IEEE 754 double */
230 Ity_D32, /* 32-bit Decimal floating point */
231 Ity_D64, /* 64-bit Decimal floating point */
232 Ity_D128, /* 128-bit Decimal floating point */
233 Ity_F128, /* 128-bit floating point; implementation defined */
234 Ity_V128, /* 128-bit SIMD */
235 Ity_V256 /* 256-bit SIMD */
236 }
237 IRType;
238
239 /* Pretty-print an IRType */
240 extern void ppIRType ( IRType );
241
242 /* Get the size (in bytes) of an IRType */
243 extern Int sizeofIRType ( IRType );
244
245
246 /* ------------------ Endianness ------------------ */
247
248 /* IREndness is used in load IRExprs and store IRStmts. */
249 typedef
250 enum {
251 Iend_LE=0x12000, /* little endian */
252 Iend_BE /* big endian */
253 }
254 IREndness;
255
256
257 /* ------------------ Constants ------------------ */
258
259 /* IRConsts are used within 'Const' and 'Exit' IRExprs. */
260
261 /* The various kinds of constant. */
262 typedef
263 enum {
264 Ico_U1=0x13000,
265 Ico_U8,
266 Ico_U16,
267 Ico_U32,
268 Ico_U64,
269 Ico_F32, /* 32-bit IEEE754 floating */
270 Ico_F32i, /* 32-bit unsigned int to be interpreted literally
271 as a IEEE754 single value. */
272 Ico_F64, /* 64-bit IEEE754 floating */
273 Ico_F64i, /* 64-bit unsigned int to be interpreted literally
274 as a IEEE754 double value. */
275 Ico_V128, /* 128-bit restricted vector constant, with 1 bit
276 (repeated 8 times) for each of the 16 x 1-byte lanes */
277 Ico_V256 /* 256-bit restricted vector constant, with 1 bit
278 (repeated 8 times) for each of the 32 x 1-byte lanes */
279 }
280 IRConstTag;
281
282 /* A constant. Stored as a tagged union. 'tag' indicates what kind of
283 constant this is. 'Ico' is the union that holds the fields. If an
284 IRConst 'c' has c.tag equal to Ico_U32, then it's a 32-bit constant,
285 and its value can be accessed with 'c.Ico.U32'. */
286 typedef
287 struct _IRConst {
288 IRConstTag tag;
289 union {
290 Bool U1;
291 UChar U8;
292 UShort U16;
293 UInt U32;
294 ULong U64;
295 Float F32;
296 UInt F32i;
297 Double F64;
298 ULong F64i;
299 UShort V128; /* 16-bit value; see Ico_V128 comment above */
300 UInt V256; /* 32-bit value; see Ico_V256 comment above */
301 } Ico;
302 }
303 IRConst;
304
305 /* IRConst constructors */
306 extern IRConst* IRConst_U1 ( Bool );
307 extern IRConst* IRConst_U8 ( UChar );
308 extern IRConst* IRConst_U16 ( UShort );
309 extern IRConst* IRConst_U32 ( UInt );
310 extern IRConst* IRConst_U64 ( ULong );
311 extern IRConst* IRConst_F32 ( Float );
312 extern IRConst* IRConst_F32i ( UInt );
313 extern IRConst* IRConst_F64 ( Double );
314 extern IRConst* IRConst_F64i ( ULong );
315 extern IRConst* IRConst_V128 ( UShort );
316 extern IRConst* IRConst_V256 ( UInt );
317
318 /* Deep-copy an IRConst */
319 extern IRConst* deepCopyIRConst ( IRConst* );
320
321 /* Pretty-print an IRConst */
322 extern void ppIRConst ( IRConst* );
323
324 /* Compare two IRConsts for equality */
325 extern Bool eqIRConst ( IRConst*, IRConst* );
326
327
328 /* ------------------ Call targets ------------------ */
329
330 /* Describes a helper function to call. The name part is purely for
331 pretty printing and not actually used. regparms=n tells the back
332 end that the callee has been declared
333 "__attribute__((regparm(n)))", although indirectly using the
334 VEX_REGPARM(n) macro. On some targets (x86) the back end will need
335 to construct a non-standard sequence to call a function declared
336 like this.
337
338 mcx_mask is a sop to Memcheck. It indicates which args should be
339 considered 'always defined' when lazily computing definedness of
340 the result. Bit 0 of mcx_mask corresponds to args[0], bit 1 to
341 args[1], etc. If a bit is set, the corresponding arg is excluded
342 (hence "x" in "mcx") from definedness checking.
343 */
344
345 typedef
346 struct {
347 Int regparms;
348 HChar* name;
349 void* addr;
350 UInt mcx_mask;
351 }
352 IRCallee;
353
354 /* Create an IRCallee. */
355 extern IRCallee* mkIRCallee ( Int regparms, HChar* name, void* addr );
356
357 /* Deep-copy an IRCallee. */
358 extern IRCallee* deepCopyIRCallee ( IRCallee* );
359
360 /* Pretty-print an IRCallee. */
361 extern void ppIRCallee ( IRCallee* );
362
363
364 /* ------------------ Guest state arrays ------------------ */
365
366 /* This describes a section of the guest state that we want to
367 be able to index at run time, so as to be able to describe
368 indexed or rotating register files on the guest. */
369 typedef
370 struct {
371 Int base; /* guest state offset of start of indexed area */
372 IRType elemTy; /* type of each element in the indexed area */
373 Int nElems; /* number of elements in the indexed area */
374 }
375 IRRegArray;
376
377 extern IRRegArray* mkIRRegArray ( Int, IRType, Int );
378
379 extern IRRegArray* deepCopyIRRegArray ( IRRegArray* );
380
381 extern void ppIRRegArray ( IRRegArray* );
382 extern Bool eqIRRegArray ( IRRegArray*, IRRegArray* );
383
384
385 /* ------------------ Temporaries ------------------ */
386
387 /* This represents a temporary, eg. t1. The IR optimiser relies on the
388 fact that IRTemps are 32-bit ints. Do not change them to be ints of
389 any other size. */
390 typedef UInt IRTemp;
391
392 /* Pretty-print an IRTemp. */
393 extern void ppIRTemp ( IRTemp );
394
395 #define IRTemp_INVALID ((IRTemp)0xFFFFFFFF)
396
397
398 /* --------------- Primops (arity 1,2,3 and 4) --------------- */
399
400 /* Primitive operations that are used in Unop, Binop, Triop and Qop
401 IRExprs. Once we take into account integer, floating point and SIMD
402 operations of all the different sizes, there are quite a lot of them.
403 Most instructions supported by the architectures that Vex supports
404 (x86, PPC, etc) are represented. Some more obscure ones (eg. cpuid)
405 are not; they are instead handled with dirty helpers that emulate
406 their functionality. Such obscure ones are thus not directly visible
407 in the IR, but their effects on guest state (memory and registers)
408 are made visible via the annotations in IRDirty structures.
409 */
410 typedef
411 enum {
412 /* -- Do not change this ordering. The IR generators rely on
413 (eg) Iop_Add64 == IopAdd8 + 3. -- */
414
415 Iop_INVALID=0x14000,
416 Iop_Add8, Iop_Add16, Iop_Add32, Iop_Add64,
417 Iop_Sub8, Iop_Sub16, Iop_Sub32, Iop_Sub64,
418 /* Signless mul. MullS/MullU is elsewhere. */
419 Iop_Mul8, Iop_Mul16, Iop_Mul32, Iop_Mul64,
420 Iop_Or8, Iop_Or16, Iop_Or32, Iop_Or64,
421 Iop_And8, Iop_And16, Iop_And32, Iop_And64,
422 Iop_Xor8, Iop_Xor16, Iop_Xor32, Iop_Xor64,
423 Iop_Shl8, Iop_Shl16, Iop_Shl32, Iop_Shl64,
424 Iop_Shr8, Iop_Shr16, Iop_Shr32, Iop_Shr64,
425 Iop_Sar8, Iop_Sar16, Iop_Sar32, Iop_Sar64,
426 /* Integer comparisons. */
427 Iop_CmpEQ8, Iop_CmpEQ16, Iop_CmpEQ32, Iop_CmpEQ64,
428 Iop_CmpNE8, Iop_CmpNE16, Iop_CmpNE32, Iop_CmpNE64,
429 /* Tags for unary ops */
430 Iop_Not8, Iop_Not16, Iop_Not32, Iop_Not64,
431
432 /* Exactly like CmpEQ8/16/32/64, but carrying the additional
433 hint that these compute the success/failure of a CAS
434 operation, and hence are almost certainly applied to two
435 copies of the same value, which in turn has implications for
436 Memcheck's instrumentation. */
437 Iop_CasCmpEQ8, Iop_CasCmpEQ16, Iop_CasCmpEQ32, Iop_CasCmpEQ64,
438 Iop_CasCmpNE8, Iop_CasCmpNE16, Iop_CasCmpNE32, Iop_CasCmpNE64,
439
440 /* -- Ordering not important after here. -- */
441
442 /* Widening multiplies */
443 Iop_MullS8, Iop_MullS16, Iop_MullS32, Iop_MullS64,
444 Iop_MullU8, Iop_MullU16, Iop_MullU32, Iop_MullU64,
445
446 /* Wierdo integer stuff */
447 Iop_Clz64, Iop_Clz32, /* count leading zeroes */
448 Iop_Ctz64, Iop_Ctz32, /* count trailing zeros */
449 /* Ctz64/Ctz32/Clz64/Clz32 are UNDEFINED when given arguments of
450 zero. You must ensure they are never given a zero argument.
451 */
452
453 /* Standard integer comparisons */
454 Iop_CmpLT32S, Iop_CmpLT64S,
455 Iop_CmpLE32S, Iop_CmpLE64S,
456 Iop_CmpLT32U, Iop_CmpLT64U,
457 Iop_CmpLE32U, Iop_CmpLE64U,
458
459 /* As a sop to Valgrind-Memcheck, the following are useful. */
460 Iop_CmpNEZ8, Iop_CmpNEZ16, Iop_CmpNEZ32, Iop_CmpNEZ64,
461 Iop_CmpwNEZ32, Iop_CmpwNEZ64, /* all-0s -> all-Os; other -> all-1s */
462 Iop_Left8, Iop_Left16, Iop_Left32, Iop_Left64, /* \x -> x | -x */
463 Iop_Max32U, /* unsigned max */
464
465 /* PowerPC-style 3-way integer comparisons. Without them it is
466 difficult to simulate PPC efficiently.
467 op(x,y) | x < y = 0x8 else
468 | x > y = 0x4 else
469 | x == y = 0x2
470 */
471 Iop_CmpORD32U, Iop_CmpORD64U,
472 Iop_CmpORD32S, Iop_CmpORD64S,
473
474 /* Division */
475 /* TODO: clarify semantics wrt rounding, negative values, whatever */
476 Iop_DivU32, // :: I32,I32 -> I32 (simple div, no mod)
477 Iop_DivS32, // ditto, signed
478 Iop_DivU64, // :: I64,I64 -> I64 (simple div, no mod)
479 Iop_DivS64, // ditto, signed
480 Iop_DivU64E, // :: I64,I64 -> I64 (dividend is 64-bit arg (hi) concat with 64 0's (low))
481 Iop_DivS64E, // ditto, signed
482 Iop_DivU32E, // :: I32,I32 -> I32 (dividend is 32-bit arg (hi) concat with 32 0's (low))
483 Iop_DivS32E, // ditto, signed
484
485 Iop_DivModU64to32, // :: I64,I32 -> I64
486 // of which lo half is div and hi half is mod
487 Iop_DivModS64to32, // ditto, signed
488
489 Iop_DivModU128to64, // :: V128,I64 -> V128
490 // of which lo half is div and hi half is mod
491 Iop_DivModS128to64, // ditto, signed
492
493 Iop_DivModS64to64, // :: I64,I64 -> I128
494 // of which lo half is div and hi half is mod
495
496 /* Integer conversions. Some of these are redundant (eg
497 Iop_64to8 is the same as Iop_64to32 and then Iop_32to8), but
498 having a complete set reduces the typical dynamic size of IR
499 and makes the instruction selectors easier to write. */
500
501 /* Widening conversions */
502 Iop_8Uto16, Iop_8Uto32, Iop_8Uto64,
503 Iop_16Uto32, Iop_16Uto64,
504 Iop_32Uto64,
505 Iop_8Sto16, Iop_8Sto32, Iop_8Sto64,
506 Iop_16Sto32, Iop_16Sto64,
507 Iop_32Sto64,
508
509 /* Narrowing conversions */
510 Iop_64to8, Iop_32to8, Iop_64to16,
511 /* 8 <-> 16 bit conversions */
512 Iop_16to8, // :: I16 -> I8, low half
513 Iop_16HIto8, // :: I16 -> I8, high half
514 Iop_8HLto16, // :: (I8,I8) -> I16
515 /* 16 <-> 32 bit conversions */
516 Iop_32to16, // :: I32 -> I16, low half
517 Iop_32HIto16, // :: I32 -> I16, high half
518 Iop_16HLto32, // :: (I16,I16) -> I32
519 /* 32 <-> 64 bit conversions */
520 Iop_64to32, // :: I64 -> I32, low half
521 Iop_64HIto32, // :: I64 -> I32, high half
522 Iop_32HLto64, // :: (I32,I32) -> I64
523 /* 64 <-> 128 bit conversions */
524 Iop_128to64, // :: I128 -> I64, low half
525 Iop_128HIto64, // :: I128 -> I64, high half
526 Iop_64HLto128, // :: (I64,I64) -> I128
527 /* 1-bit stuff */
528 Iop_Not1, /* :: Ity_Bit -> Ity_Bit */
529 Iop_32to1, /* :: Ity_I32 -> Ity_Bit, just select bit[0] */
530 Iop_64to1, /* :: Ity_I64 -> Ity_Bit, just select bit[0] */
531 Iop_1Uto8, /* :: Ity_Bit -> Ity_I8, unsigned widen */
532 Iop_1Uto32, /* :: Ity_Bit -> Ity_I32, unsigned widen */
533 Iop_1Uto64, /* :: Ity_Bit -> Ity_I64, unsigned widen */
534 Iop_1Sto8, /* :: Ity_Bit -> Ity_I8, signed widen */
535 Iop_1Sto16, /* :: Ity_Bit -> Ity_I16, signed widen */
536 Iop_1Sto32, /* :: Ity_Bit -> Ity_I32, signed widen */
537 Iop_1Sto64, /* :: Ity_Bit -> Ity_I64, signed widen */
538
539 /* ------ Floating point. We try to be IEEE754 compliant. ------ */
540
541 /* --- Simple stuff as mandated by 754. --- */
542
543 /* Binary operations, with rounding. */
544 /* :: IRRoundingMode(I32) x F64 x F64 -> F64 */
545 Iop_AddF64, Iop_SubF64, Iop_MulF64, Iop_DivF64,
546
547 /* :: IRRoundingMode(I32) x F32 x F32 -> F32 */
548 Iop_AddF32, Iop_SubF32, Iop_MulF32, Iop_DivF32,
549
550 /* Variants of the above which produce a 64-bit result but which
551 round their result to a IEEE float range first. */
552 /* :: IRRoundingMode(I32) x F64 x F64 -> F64 */
553 Iop_AddF64r32, Iop_SubF64r32, Iop_MulF64r32, Iop_DivF64r32,
554
555 /* Unary operations, without rounding. */
556 /* :: F64 -> F64 */
557 Iop_NegF64, Iop_AbsF64,
558
559 /* :: F32 -> F32 */
560 Iop_NegF32, Iop_AbsF32,
561
562 /* Unary operations, with rounding. */
563 /* :: IRRoundingMode(I32) x F64 -> F64 */
564 Iop_SqrtF64, Iop_SqrtF64r32,
565
566 /* :: IRRoundingMode(I32) x F32 -> F32 */
567 Iop_SqrtF32,
568
569 /* Comparison, yielding GT/LT/EQ/UN(ordered), as per the following:
570 0x45 Unordered
571 0x01 LT
572 0x00 GT
573 0x40 EQ
574 This just happens to be the Intel encoding. The values
575 are recorded in the type IRCmpF64Result.
576 */
577 /* :: F64 x F64 -> IRCmpF64Result(I32) */
578 Iop_CmpF64,
579 Iop_CmpF32,
580 Iop_CmpF128,
581
582 /* --- Int to/from FP conversions. --- */
583
584 /* For the most part, these take a first argument :: Ity_I32 (as
585 IRRoundingMode) which is an indication of the rounding mode
586 to use, as per the following encoding ("the standard
587 encoding"):
588 00b to nearest (the default)
589 01b to -infinity
590 10b to +infinity
591 11b to zero
592 This just happens to be the Intel encoding. For reference only,
593 the PPC encoding is:
594 00b to nearest (the default)
595 01b to zero
596 10b to +infinity
597 11b to -infinity
598 Any PPC -> IR front end will have to translate these PPC
599 encodings, as encoded in the guest state, to the standard
600 encodings, to pass to the primops.
601 For reference only, the ARM VFP encoding is:
602 00b to nearest
603 01b to +infinity
604 10b to -infinity
605 11b to zero
606 Again, this will have to be converted to the standard encoding
607 to pass to primops.
608
609 If one of these conversions gets an out-of-range condition,
610 or a NaN, as an argument, the result is host-defined. On x86
611 the "integer indefinite" value 0x80..00 is produced. On PPC
612 it is either 0x80..00 or 0x7F..FF depending on the sign of
613 the argument.
614
615 On ARMvfp, when converting to a signed integer result, the
616 overflow result is 0x80..00 for negative args and 0x7F..FF
617 for positive args. For unsigned integer results it is
618 0x00..00 and 0xFF..FF respectively.
619
620 Rounding is required whenever the destination type cannot
621 represent exactly all values of the source type.
622 */
623 Iop_F64toI16S, /* IRRoundingMode(I32) x F64 -> signed I16 */
624 Iop_F64toI32S, /* IRRoundingMode(I32) x F64 -> signed I32 */
625 Iop_F64toI64S, /* IRRoundingMode(I32) x F64 -> signed I64 */
626 Iop_F64toI64U, /* IRRoundingMode(I32) x F64 -> unsigned I64 */
627
628 Iop_F64toI32U, /* IRRoundingMode(I32) x F64 -> unsigned I32 */
629
630 Iop_I16StoF64, /* signed I16 -> F64 */
631 Iop_I32StoF64, /* signed I32 -> F64 */
632 Iop_I64StoF64, /* IRRoundingMode(I32) x signed I64 -> F64 */
633 Iop_I64UtoF64, /* IRRoundingMode(I32) x unsigned I64 -> F64 */
634 Iop_I64UtoF32, /* IRRoundingMode(I32) x unsigned I64 -> F32 */
635
636 Iop_I32UtoF64, /* unsigned I32 -> F64 */
637
638 Iop_F32toI16S, /* IRRoundingMode(I32) x F32 -> signed I16 */
639 Iop_F32toI32S, /* IRRoundingMode(I32) x F32 -> signed I32 */
640 Iop_F32toI64S, /* IRRoundingMode(I32) x F32 -> signed I64 */
641
642 Iop_I16StoF32, /* signed I16 -> F32 */
643 Iop_I32StoF32, /* IRRoundingMode(I32) x signed I32 -> F32 */
644 Iop_I64StoF32, /* IRRoundingMode(I32) x signed I64 -> F32 */
645
646 /* Conversion between floating point formats */
647 Iop_F32toF64, /* F32 -> F64 */
648 Iop_F64toF32, /* IRRoundingMode(I32) x F64 -> F32 */
649
650 /* Reinterpretation. Take an F64 and produce an I64 with
651 the same bit pattern, or vice versa. */
652 Iop_ReinterpF64asI64, Iop_ReinterpI64asF64,
653 Iop_ReinterpF32asI32, Iop_ReinterpI32asF32,
654
655 /* Support for 128-bit floating point */
656 Iop_F64HLtoF128,/* (high half of F128,low half of F128) -> F128 */
657 Iop_F128HItoF64,/* F128 -> high half of F128 into a F64 register */
658 Iop_F128LOtoF64,/* F128 -> low half of F128 into a F64 register */
659
660 /* :: IRRoundingMode(I32) x F128 x F128 -> F128 */
661 Iop_AddF128, Iop_SubF128, Iop_MulF128, Iop_DivF128,
662
663 /* :: F128 -> F128 */
664 Iop_NegF128, Iop_AbsF128,
665
666 /* :: IRRoundingMode(I32) x F128 -> F128 */
667 Iop_SqrtF128,
668
669 Iop_I32StoF128, /* signed I32 -> F128 */
670 Iop_I64StoF128, /* signed I64 -> F128 */
671 Iop_F32toF128, /* F32 -> F128 */
672 Iop_F64toF128, /* F64 -> F128 */
673
674 Iop_F128toI32S, /* IRRoundingMode(I32) x F128 -> signed I32 */
675 Iop_F128toI64S, /* IRRoundingMode(I32) x F128 -> signed I64 */
676 Iop_F128toF64, /* IRRoundingMode(I32) x F128 -> F64 */
677 Iop_F128toF32, /* IRRoundingMode(I32) x F128 -> F32 */
678
679 /* --- guest x86/amd64 specifics, not mandated by 754. --- */
680
681 /* Binary ops, with rounding. */
682 /* :: IRRoundingMode(I32) x F64 x F64 -> F64 */
683 Iop_AtanF64, /* FPATAN, arctan(arg1/arg2) */
684 Iop_Yl2xF64, /* FYL2X, arg1 * log2(arg2) */
685 Iop_Yl2xp1F64, /* FYL2XP1, arg1 * log2(arg2+1.0) */
686 Iop_PRemF64, /* FPREM, non-IEEE remainder(arg1/arg2) */
687 Iop_PRemC3210F64, /* C3210 flags resulting from FPREM, :: I32 */
688 Iop_PRem1F64, /* FPREM1, IEEE remainder(arg1/arg2) */
689 Iop_PRem1C3210F64, /* C3210 flags resulting from FPREM1, :: I32 */
690 Iop_ScaleF64, /* FSCALE, arg1 * (2^RoundTowardsZero(arg2)) */
691 /* Note that on x86 guest, PRem1{C3210} has the same behaviour
692 as the IEEE mandated RemF64, except it is limited in the
693 range of its operand. Hence the partialness. */
694
695 /* Unary ops, with rounding. */
696 /* :: IRRoundingMode(I32) x F64 -> F64 */
697 Iop_SinF64, /* FSIN */
698 Iop_CosF64, /* FCOS */
699 Iop_TanF64, /* FTAN */
700 Iop_2xm1F64, /* (2^arg - 1.0) */
701 Iop_RoundF64toInt, /* F64 value to nearest integral value (still
702 as F64) */
703 Iop_RoundF32toInt, /* F32 value to nearest integral value (still
704 as F32) */
705
706 /* --- guest s390 specifics, not mandated by 754. --- */
707
708 /* Fused multiply-add/sub */
709 /* :: IRRoundingMode(I32) x F32 x F32 x F32 -> F32
710 (computes op3 * op2 +/- op1 */
711 Iop_MAddF32, Iop_MSubF32,
712
713 /* --- guest ppc32/64 specifics, not mandated by 754. --- */
714
715 /* Ternary operations, with rounding. */
716 /* Fused multiply-add/sub, with 112-bit intermediate
717 precision for ppc.
718 Also used to implement fused multiply-add/sub for s390. */
719 /* :: IRRoundingMode(I32) x F64 x F64 x F64 -> F64
720 (computes arg2 * arg3 +/- arg4) */
721 Iop_MAddF64, Iop_MSubF64,
722
723 /* Variants of the above which produce a 64-bit result but which
724 round their result to a IEEE float range first. */
725 /* :: IRRoundingMode(I32) x F64 x F64 x F64 -> F64 */
726 Iop_MAddF64r32, Iop_MSubF64r32,
727
728 /* :: F64 -> F64 */
729 Iop_Est5FRSqrt, /* reciprocal square root estimate, 5 good bits */
730 Iop_RoundF64toF64_NEAREST, /* frin */
731 Iop_RoundF64toF64_NegINF, /* frim */
732 Iop_RoundF64toF64_PosINF, /* frip */
733 Iop_RoundF64toF64_ZERO, /* friz */
734
735 /* :: F64 -> F32 */
736 Iop_TruncF64asF32, /* do F64->F32 truncation as per 'fsts' */
737
738 /* :: IRRoundingMode(I32) x F64 -> F64 */
739 Iop_RoundF64toF32, /* round F64 to nearest F32 value (still as F64) */
740 /* NB: pretty much the same as Iop_F64toF32, except no change
741 of type. */
742
743 /* :: F64 -> I32 */
744 Iop_CalcFPRF, /* Calc 5 fpscr[FPRF] bits (Class, <, =, >, Unord)
745 from FP result */
746
747 /* ------------------ 32-bit SIMD Integer ------------------ */
748
749 /* 32x1 saturating add/sub (ok, well, not really SIMD :) */
750 Iop_QAdd32S,
751 Iop_QSub32S,
752
753 /* 16x2 add/sub, also signed/unsigned saturating variants */
754 Iop_Add16x2, Iop_Sub16x2,
755 Iop_QAdd16Sx2, Iop_QAdd16Ux2,
756 Iop_QSub16Sx2, Iop_QSub16Ux2,
757
758 /* 16x2 signed/unsigned halving add/sub. For each lane, these
759 compute bits 16:1 of (eg) sx(argL) + sx(argR),
760 or zx(argL) - zx(argR) etc. */
761 Iop_HAdd16Ux2, Iop_HAdd16Sx2,
762 Iop_HSub16Ux2, Iop_HSub16Sx2,
763
764 /* 8x4 add/sub, also signed/unsigned saturating variants */
765 Iop_Add8x4, Iop_Sub8x4,
766 Iop_QAdd8Sx4, Iop_QAdd8Ux4,
767 Iop_QSub8Sx4, Iop_QSub8Ux4,
768
769 /* 8x4 signed/unsigned halving add/sub. For each lane, these
770 compute bits 8:1 of (eg) sx(argL) + sx(argR),
771 or zx(argL) - zx(argR) etc. */
772 Iop_HAdd8Ux4, Iop_HAdd8Sx4,
773 Iop_HSub8Ux4, Iop_HSub8Sx4,
774
775 /* 8x4 sum of absolute unsigned differences. */
776 Iop_Sad8Ux4,
777
778 /* MISC (vector integer cmp != 0) */
779 Iop_CmpNEZ16x2, Iop_CmpNEZ8x4,
780
781 /* ------------------ 64-bit SIMD FP ------------------------ */
782
783 /* Convertion to/from int */
784 Iop_I32UtoFx2, Iop_I32StoFx2, /* I32x4 -> F32x4 */
785 Iop_FtoI32Ux2_RZ, Iop_FtoI32Sx2_RZ, /* F32x4 -> I32x4 */
786 /* Fixed32 format is floating-point number with fixed number of fraction
787 bits. The number of fraction bits is passed as a second argument of
788 type I8. */
789 Iop_F32ToFixed32Ux2_RZ, Iop_F32ToFixed32Sx2_RZ, /* fp -> fixed-point */
790 Iop_Fixed32UToF32x2_RN, Iop_Fixed32SToF32x2_RN, /* fixed-point -> fp */
791
792 /* Binary operations */
793 Iop_Max32Fx2, Iop_Min32Fx2,
794 /* Pairwise Min and Max. See integer pairwise operations for more
795 details. */
796 Iop_PwMax32Fx2, Iop_PwMin32Fx2,
797 /* Note: For the following compares, the arm front-end assumes a
798 nan in a lane of either argument returns zero for that lane. */
799 Iop_CmpEQ32Fx2, Iop_CmpGT32Fx2, Iop_CmpGE32Fx2,
800
801 /* Vector Reciprocal Estimate finds an approximate reciprocal of each
802 element in the operand vector, and places the results in the destination
803 vector. */
804 Iop_Recip32Fx2,
805
806 /* Vector Reciprocal Step computes (2.0 - arg1 * arg2).
807 Note, that if one of the arguments is zero and another one is infinity
808 of arbitrary sign the result of the operation is 2.0. */
809 Iop_Recps32Fx2,
810
811 /* Vector Reciprocal Square Root Estimate finds an approximate reciprocal
812 square root of each element in the operand vector. */
813 Iop_Rsqrte32Fx2,
814
815 /* Vector Reciprocal Square Root Step computes (3.0 - arg1 * arg2) / 2.0.
816 Note, that of one of the arguments is zero and another one is infiinty
817 of arbitrary sign the result of the operation is 1.5. */
818 Iop_Rsqrts32Fx2,
819
820 /* Unary */
821 Iop_Neg32Fx2, Iop_Abs32Fx2,
822
823 /* ------------------ 64-bit SIMD Integer. ------------------ */
824
825 /* MISC (vector integer cmp != 0) */
826 Iop_CmpNEZ8x8, Iop_CmpNEZ16x4, Iop_CmpNEZ32x2,
827
828 /* ADDITION (normal / unsigned sat / signed sat) */
829 Iop_Add8x8, Iop_Add16x4, Iop_Add32x2,
830 Iop_QAdd8Ux8, Iop_QAdd16Ux4, Iop_QAdd32Ux2, Iop_QAdd64Ux1,
831 Iop_QAdd8Sx8, Iop_QAdd16Sx4, Iop_QAdd32Sx2, Iop_QAdd64Sx1,
832
833 /* PAIRWISE operations */
834 /* Iop_PwFoo16x4( [a,b,c,d], [e,f,g,h] ) =
835 [Foo16(a,b), Foo16(c,d), Foo16(e,f), Foo16(g,h)] */
836 Iop_PwAdd8x8, Iop_PwAdd16x4, Iop_PwAdd32x2,
837 Iop_PwMax8Sx8, Iop_PwMax16Sx4, Iop_PwMax32Sx2,
838 Iop_PwMax8Ux8, Iop_PwMax16Ux4, Iop_PwMax32Ux2,
839 Iop_PwMin8Sx8, Iop_PwMin16Sx4, Iop_PwMin32Sx2,
840 Iop_PwMin8Ux8, Iop_PwMin16Ux4, Iop_PwMin32Ux2,
841 /* Longening variant is unary. The resulting vector contains two times
842 less elements than operand, but they are two times wider.
843 Example:
844 Iop_PAddL16Ux4( [a,b,c,d] ) = [a+b,c+d]
845 where a+b and c+d are unsigned 32-bit values. */
846 Iop_PwAddL8Ux8, Iop_PwAddL16Ux4, Iop_PwAddL32Ux2,
847 Iop_PwAddL8Sx8, Iop_PwAddL16Sx4, Iop_PwAddL32Sx2,
848
849 /* SUBTRACTION (normal / unsigned sat / signed sat) */
850 Iop_Sub8x8, Iop_Sub16x4, Iop_Sub32x2,
851 Iop_QSub8Ux8, Iop_QSub16Ux4, Iop_QSub32Ux2, Iop_QSub64Ux1,
852 Iop_QSub8Sx8, Iop_QSub16Sx4, Iop_QSub32Sx2, Iop_QSub64Sx1,
853
854 /* ABSOLUTE VALUE */
855 Iop_Abs8x8, Iop_Abs16x4, Iop_Abs32x2,
856
857 /* MULTIPLICATION (normal / high half of signed/unsigned / plynomial ) */
858 Iop_Mul8x8, Iop_Mul16x4, Iop_Mul32x2,
859 Iop_Mul32Fx2,
860 Iop_MulHi16Ux4,
861 Iop_MulHi16Sx4,
862 /* Plynomial multiplication treats it's arguments as coefficients of
863 polynoms over {0, 1}. */
864 Iop_PolynomialMul8x8,
865
866 /* Vector Saturating Doubling Multiply Returning High Half and
867 Vector Saturating Rounding Doubling Multiply Returning High Half */
868 /* These IROp's multiply corresponding elements in two vectors, double
869 the results, and place the most significant half of the final results
870 in the destination vector. The results are truncated or rounded. If
871 any of the results overflow, they are saturated. */
872 Iop_QDMulHi16Sx4, Iop_QDMulHi32Sx2,
873 Iop_QRDMulHi16Sx4, Iop_QRDMulHi32Sx2,
874
875 /* AVERAGING: note: (arg1 + arg2 + 1) >>u 1 */
876 Iop_Avg8Ux8,
877 Iop_Avg16Ux4,
878
879 /* MIN/MAX */
880 Iop_Max8Sx8, Iop_Max16Sx4, Iop_Max32Sx2,
881 Iop_Max8Ux8, Iop_Max16Ux4, Iop_Max32Ux2,
882 Iop_Min8Sx8, Iop_Min16Sx4, Iop_Min32Sx2,
883 Iop_Min8Ux8, Iop_Min16Ux4, Iop_Min32Ux2,
884
885 /* COMPARISON */
886 Iop_CmpEQ8x8, Iop_CmpEQ16x4, Iop_CmpEQ32x2,
887 Iop_CmpGT8Ux8, Iop_CmpGT16Ux4, Iop_CmpGT32Ux2,
888 Iop_CmpGT8Sx8, Iop_CmpGT16Sx4, Iop_CmpGT32Sx2,
889
890 /* COUNT ones / leading zeroes / leading sign bits (not including topmost
891 bit) */
892 Iop_Cnt8x8,
893 Iop_Clz8Sx8, Iop_Clz16Sx4, Iop_Clz32Sx2,
894 Iop_Cls8Sx8, Iop_Cls16Sx4, Iop_Cls32Sx2,
895
896 /* VECTOR x VECTOR SHIFT / ROTATE */
897 Iop_Shl8x8, Iop_Shl16x4, Iop_Shl32x2,
898 Iop_Shr8x8, Iop_Shr16x4, Iop_Shr32x2,
899 Iop_Sar8x8, Iop_Sar16x4, Iop_Sar32x2,
900 Iop_Sal8x8, Iop_Sal16x4, Iop_Sal32x2, Iop_Sal64x1,
901
902 /* VECTOR x SCALAR SHIFT (shift amt :: Ity_I8) */
903 Iop_ShlN8x8, Iop_ShlN16x4, Iop_ShlN32x2,
904 Iop_ShrN8x8, Iop_ShrN16x4, Iop_ShrN32x2,
905 Iop_SarN8x8, Iop_SarN16x4, Iop_SarN32x2,
906
907 /* VECTOR x VECTOR SATURATING SHIFT */
908 Iop_QShl8x8, Iop_QShl16x4, Iop_QShl32x2, Iop_QShl64x1,
909 Iop_QSal8x8, Iop_QSal16x4, Iop_QSal32x2, Iop_QSal64x1,
910 /* VECTOR x INTEGER SATURATING SHIFT */
911 Iop_QShlN8Sx8, Iop_QShlN16Sx4, Iop_QShlN32Sx2, Iop_QShlN64Sx1,
912 Iop_QShlN8x8, Iop_QShlN16x4, Iop_QShlN32x2, Iop_QShlN64x1,
913 Iop_QSalN8x8, Iop_QSalN16x4, Iop_QSalN32x2, Iop_QSalN64x1,
914
915 /* NARROWING (binary)
916 -- narrow 2xI64 into 1xI64, hi half from left arg */
917 /* For saturated narrowing, I believe there are 4 variants of
918 the basic arithmetic operation, depending on the signedness
919 of argument and result. Here are examples that exemplify
920 what I mean:
921
922 QNarrow16Uto8U ( UShort x ) if (x >u 255) x = 255;
923 return x[7:0];
924
925 QNarrow16Sto8S ( Short x ) if (x <s -128) x = -128;
926 if (x >s 127) x = 127;
927 return x[7:0];
928
929 QNarrow16Uto8S ( UShort x ) if (x >u 127) x = 127;
930 return x[7:0];
931
932 QNarrow16Sto8U ( Short x ) if (x <s 0) x = 0;
933 if (x >s 255) x = 255;
934 return x[7:0];
935 */
936 Iop_QNarrowBin16Sto8Ux8,
937 Iop_QNarrowBin16Sto8Sx8, Iop_QNarrowBin32Sto16Sx4,
938 Iop_NarrowBin16to8x8, Iop_NarrowBin32to16x4,
939
940 /* INTERLEAVING */
941 /* Interleave lanes from low or high halves of
942 operands. Most-significant result lane is from the left
943 arg. */
944 Iop_InterleaveHI8x8, Iop_InterleaveHI16x4, Iop_InterleaveHI32x2,
945 Iop_InterleaveLO8x8, Iop_InterleaveLO16x4, Iop_InterleaveLO32x2,
946 /* Interleave odd/even lanes of operands. Most-significant result lane
947 is from the left arg. Note that Interleave{Odd,Even}Lanes32x2 are
948 identical to Interleave{HI,LO}32x2 and so are omitted.*/
949 Iop_InterleaveOddLanes8x8, Iop_InterleaveEvenLanes8x8,
950 Iop_InterleaveOddLanes16x4, Iop_InterleaveEvenLanes16x4,
951
952
953 /* CONCATENATION -- build a new value by concatenating either
954 the even or odd lanes of both operands. Note that
955 Cat{Odd,Even}Lanes32x2 are identical to Interleave{HI,LO}32x2
956 and so are omitted. */
957 Iop_CatOddLanes8x8, Iop_CatOddLanes16x4,
958 Iop_CatEvenLanes8x8, Iop_CatEvenLanes16x4,
959
960 /* GET / SET elements of VECTOR
961 GET is binop (I64, I8) -> I<elem_size>
962 SET is triop (I64, I8, I<elem_size>) -> I64 */
963 /* Note: the arm back-end handles only constant second argument */
964 Iop_GetElem8x8, Iop_GetElem16x4, Iop_GetElem32x2,
965 Iop_SetElem8x8, Iop_SetElem16x4, Iop_SetElem32x2,
966
967 /* DUPLICATING -- copy value to all lanes */
968 Iop_Dup8x8, Iop_Dup16x4, Iop_Dup32x2,
969
970 /* EXTRACT -- copy 8-arg3 highest bytes from arg1 to 8-arg3 lowest bytes
971 of result and arg3 lowest bytes of arg2 to arg3 highest bytes of
972 result.
973 It is a triop: (I64, I64, I8) -> I64 */
974 /* Note: the arm back-end handles only constant third argumnet. */
975 Iop_Extract64,
976
977 /* REVERSE the order of elements in each Half-words, Words,
978 Double-words */
979 /* Examples:
980 Reverse16_8x8([a,b,c,d,e,f,g,h]) = [b,a,d,c,f,e,h,g]
981 Reverse32_8x8([a,b,c,d,e,f,g,h]) = [d,c,b,a,h,g,f,e]
982 Reverse64_8x8([a,b,c,d,e,f,g,h]) = [h,g,f,e,d,c,b,a] */
983 Iop_Reverse16_8x8,
984 Iop_Reverse32_8x8, Iop_Reverse32_16x4,
985 Iop_Reverse64_8x8, Iop_Reverse64_16x4, Iop_Reverse64_32x2,
986
987 /* PERMUTING -- copy src bytes to dst,
988 as indexed by control vector bytes:
989 for i in 0 .. 7 . result[i] = argL[ argR[i] ]
990 argR[i] values may only be in the range 0 .. 7, else behaviour
991 is undefined. */
992 Iop_Perm8x8,
993
994 /* Vector Reciprocal Estimate and Vector Reciprocal Square Root Estimate
995 See floating-point equiwalents for details. */
996 Iop_Recip32x2, Iop_Rsqrte32x2,
997
998 /* ------------------ Decimal Floating Point ------------------ */
999
1000 /* ARITHMETIC INSTRUCTIONS 64-bit
1001 ----------------------------------
1002 IRRoundingModeDFP(I32) X D64 X D64 -> D64
1003 */
1004 Iop_AddD64, Iop_SubD64, Iop_MulD64, Iop_DivD64,
1005
1006 /* ARITHMETIC INSTRUCTIONS 128-bit
1007 ----------------------------------
1008 IRRoundingModeDFP(I32) X D128 X D128 -> D128
1009 */
1010 Iop_AddD128, Iop_SubD128, Iop_MulD128, Iop_DivD128,
1011
1012 /* SHIFT SIGNIFICAND INSTRUCTIONS
1013 * The DFP significand is shifted by the number of digits specified
1014 * by the U8 operand. Digits shifted out of the leftmost digit are
1015 * lost. Zeros are supplied to the vacated positions on the right.
1016 * The sign of the result is the same as the sign of the original
1017 * operand.
1018 *
1019 * D64 x U8 -> D64 left shift and right shift respectively */
1020 Iop_ShlD64, Iop_ShrD64,
1021
1022 /* D128 x U8 -> D128 left shift and right shift respectively */
1023 Iop_ShlD128, Iop_ShrD128,
1024
1025
1026 /* FORMAT CONVERSION INSTRUCTIONS
1027 * D32 -> D64
1028 */
1029 Iop_D32toD64,
1030
1031 /* D64 -> D128 */
1032 Iop_D64toD128,
1033
1034 /* I64S -> D128 */
1035 Iop_I64StoD128,
1036
1037 /* IRRoundingModeDFP(I32) x D64 -> D32 */
1038 Iop_D64toD32,
1039
1040 /* IRRoundingModeDFP(I32) x D128 -> D64 */
1041 Iop_D128toD64,
1042
1043 /* IRRoundingModeDFP(I32) x I64 -> D64 */
1044 Iop_I64StoD64,
1045
1046 /* IRRoundingModeDFP(I32) x D64 -> I64 */
1047 Iop_D64toI64S,
1048
1049 /* IRRoundingModeDFP(I32) x D128 -> I64 */
1050 Iop_D128toI64S,
1051
1052 /* ROUNDING INSTRUCTIONS
1053 * IRRoundingMode(I32) x D64 -> D64
1054 * The D64 operand, if a finite number, is rounded to an integer value.
1055 */
1056 Iop_RoundD64toInt,
1057
1058 /* IRRoundingMode(I32) x D128 -> D128 */
1059 Iop_RoundD128toInt,
1060
1061 /* COMPARE INSTRUCTIONS
1062 * D64 x D64 -> IRCmpD64Result(I32) */
1063 Iop_CmpD64,
1064
1065 /* D128 x D128 -> IRCmpD64Result(I32) */
1066 Iop_CmpD128,
1067
1068 /* QUANTIZE AND ROUND INSTRUCTIONS
1069 * The source operand is converted and rounded to the form with the
1070 * immediate exponent specified by the rounding and exponent parameter.
1071 *
1072 * The second operand is converted and rounded to the form
1073 * of the first operand's exponent and the rounded based on the specified
1074 * rounding mode parameter.
1075 *
1076 * IRRoundingModeDFP(I32) x D64 x D64-> D64 */
1077 Iop_QuantizeD64,
1078
1079 /* IRRoundingModeDFP(I32) x D128 x D128 -> D128 */
1080 Iop_QuantizeD128,
1081
1082 /* IRRoundingModeDFP(I32) x I8 x D64 -> D64
1083 * The Decimal Floating point operand is rounded to the requested
1084 * significance given by the I8 operand as specified by the rounding
1085 * mode.
1086 */
1087 Iop_SignificanceRoundD64,
1088
1089 /* IRRoundingModeDFP(I32) x I8 x D128 -> D128 */
1090 Iop_SignificanceRoundD128,
1091
1092 /* EXTRACT AND INSERT INSTRUCTIONS
1093 * D64 -> I64
1094 * The exponent of the D32 or D64 operand is extracted. The
1095 * extracted exponent is converted to a 64-bit signed binary integer.
1096 */
1097 Iop_ExtractExpD64,
1098
1099 /* D128 -> I64 */
1100 Iop_ExtractExpD128,
1101
1102 /* I64 x I64 -> D64
1103 * The exponent is specified by the first I64 operand the signed
1104 * significand is given by the second I64 value. The result is a D64
1105 * value consisting of the specified significand and exponent whose
1106 * sign is that of the specified significand.
1107 */
1108 Iop_InsertExpD64,
1109
1110 /* I64 x I128 -> D128 */
1111 Iop_InsertExpD128,
1112
1113 /* Support for 128-bit DFP type */
1114 Iop_D64HLtoD128, Iop_D128HItoD64, Iop_D128LOtoD64,
1115
1116 /* I64 -> I64
1117 * Convert 50-bit densely packed BCD string to 60 bit BCD string
1118 */
1119 Iop_DPBtoBCD,
1120
1121 /* I64 -> I64
1122 * Convert 60 bit BCD string to 50-bit densely packed BCD string
1123 */
1124 Iop_BCDtoDPB,
1125
1126 /* Conversion I64 -> D64 */
1127 Iop_ReinterpI64asD64,
1128
1129 /* Conversion D64 -> I64 */
1130 Iop_ReinterpD64asI64,
1131
1132 /* ------------------ 128-bit SIMD FP. ------------------ */
1133
1134 /* --- 32x4 vector FP --- */
1135
1136 /* binary */
1137 Iop_Add32Fx4, Iop_Sub32Fx4, Iop_Mul32Fx4, Iop_Div32Fx4,
1138 Iop_Max32Fx4, Iop_Min32Fx4,
1139 Iop_Add32Fx2, Iop_Sub32Fx2,
1140 /* Note: For the following compares, the ppc and arm front-ends assume a
1141 nan in a lane of either argument returns zero for that lane. */
1142 Iop_CmpEQ32Fx4, Iop_CmpLT32Fx4, Iop_CmpLE32Fx4, Iop_CmpUN32Fx4,
1143 Iop_CmpGT32Fx4, Iop_CmpGE32Fx4,
1144
1145 /* Vector Absolute */
1146 Iop_Abs32Fx4,
1147
1148 /* Pairwise Max and Min. See integer pairwise operations for details. */
1149 Iop_PwMax32Fx4, Iop_PwMin32Fx4,
1150
1151 /* unary */
1152 Iop_Sqrt32Fx4, Iop_RSqrt32Fx4,
1153 Iop_Neg32Fx4,
1154
1155 /* Vector Reciprocal Estimate finds an approximate reciprocal of each
1156 element in the operand vector, and places the results in the destination
1157 vector. */
1158 Iop_Recip32Fx4,
1159
1160 /* Vector Reciprocal Step computes (2.0 - arg1 * arg2).
1161 Note, that if one of the arguments is zero and another one is infinity
1162 of arbitrary sign the result of the operation is 2.0. */
1163 Iop_Recps32Fx4,
1164
1165 /* Vector Reciprocal Square Root Estimate finds an approximate reciprocal
1166 square root of each element in the operand vector. */
1167 Iop_Rsqrte32Fx4,
1168
1169 /* Vector Reciprocal Square Root Step computes (3.0 - arg1 * arg2) / 2.0.
1170 Note, that of one of the arguments is zero and another one is infiinty
1171 of arbitrary sign the result of the operation is 1.5. */
1172 Iop_Rsqrts32Fx4,
1173
1174
1175 /* --- Int to/from FP conversion --- */
1176 /* Unlike the standard fp conversions, these irops take no
1177 rounding mode argument. Instead the irop trailers _R{M,P,N,Z}
1178 indicate the mode: {-inf, +inf, nearest, zero} respectively. */
1179 Iop_I32UtoFx4, Iop_I32StoFx4, /* I32x4 -> F32x4 */
1180 Iop_FtoI32Ux4_RZ, Iop_FtoI32Sx4_RZ, /* F32x4 -> I32x4 */
1181 Iop_QFtoI32Ux4_RZ, Iop_QFtoI32Sx4_RZ, /* F32x4 -> I32x4 (with saturation) */
1182 Iop_RoundF32x4_RM, Iop_RoundF32x4_RP, /* round to fp integer */
1183 Iop_RoundF32x4_RN, Iop_RoundF32x4_RZ, /* round to fp integer */
1184 /* Fixed32 format is floating-point number with fixed number of fraction
1185 bits. The number of fraction bits is passed as a second argument of
1186 type I8. */
1187 Iop_F32ToFixed32Ux4_RZ, Iop_F32ToFixed32Sx4_RZ, /* fp -> fixed-point */
1188 Iop_Fixed32UToF32x4_RN, Iop_Fixed32SToF32x4_RN, /* fixed-point -> fp */
1189
1190 /* --- Single to/from half conversion --- */
1191 /* FIXME: what kind of rounding in F32x4 -> F16x4 case? */
1192 Iop_F32toF16x4, Iop_F16toF32x4, /* F32x4 <-> F16x4 */
1193
1194 /* --- 32x4 lowest-lane-only scalar FP --- */
1195
1196 /* In binary cases, upper 3/4 is copied from first operand. In
1197 unary cases, upper 3/4 is copied from the operand. */
1198
1199 /* binary */
1200 Iop_Add32F0x4, Iop_Sub32F0x4, Iop_Mul32F0x4, Iop_Div32F0x4,
1201 Iop_Max32F0x4, Iop_Min32F0x4,
1202 Iop_CmpEQ32F0x4, Iop_CmpLT32F0x4, Iop_CmpLE32F0x4, Iop_CmpUN32F0x4,
1203
1204 /* unary */
1205 Iop_Recip32F0x4, Iop_Sqrt32F0x4, Iop_RSqrt32F0x4,
1206
1207 /* --- 64x2 vector FP --- */
1208
1209 /* binary */
1210 Iop_Add64Fx2, Iop_Sub64Fx2, Iop_Mul64Fx2, Iop_Div64Fx2,
1211 Iop_Max64Fx2, Iop_Min64Fx2,
1212 Iop_CmpEQ64Fx2, Iop_CmpLT64Fx2, Iop_CmpLE64Fx2, Iop_CmpUN64Fx2,
1213
1214 /* unary */
1215 Iop_Recip64Fx2, Iop_Sqrt64Fx2, Iop_RSqrt64Fx2,
1216
1217 /* --- 64x2 lowest-lane-only scalar FP --- */
1218
1219 /* In binary cases, upper half is copied from first operand. In
1220 unary cases, upper half is copied from the operand. */
1221
1222 /* binary */
1223 Iop_Add64F0x2, Iop_Sub64F0x2, Iop_Mul64F0x2, Iop_Div64F0x2,
1224 Iop_Max64F0x2, Iop_Min64F0x2,
1225 Iop_CmpEQ64F0x2, Iop_CmpLT64F0x2, Iop_CmpLE64F0x2, Iop_CmpUN64F0x2,
1226
1227 /* unary */
1228 Iop_Recip64F0x2, Iop_Sqrt64F0x2, Iop_RSqrt64F0x2,
1229
1230 /* --- pack / unpack --- */
1231
1232 /* 64 <-> 128 bit vector */
1233 Iop_V128to64, // :: V128 -> I64, low half
1234 Iop_V128HIto64, // :: V128 -> I64, high half
1235 Iop_64HLtoV128, // :: (I64,I64) -> V128
1236
1237 Iop_64UtoV128,
1238 Iop_SetV128lo64,
1239
1240 /* 32 <-> 128 bit vector */
1241 Iop_32UtoV128,
1242 Iop_V128to32, // :: V128 -> I32, lowest lane
1243 Iop_SetV128lo32, // :: (V128,I32) -> V128
1244
1245 /* ------------------ 128-bit SIMD Integer. ------------------ */
1246
1247 /* BITWISE OPS */
1248 Iop_NotV128,
1249 Iop_AndV128, Iop_OrV128, Iop_XorV128,
1250
1251 /* VECTOR SHIFT (shift amt :: Ity_I8) */
1252 Iop_ShlV128, Iop_ShrV128,
1253
1254 /* MISC (vector integer cmp != 0) */
1255 Iop_CmpNEZ8x16, Iop_CmpNEZ16x8, Iop_CmpNEZ32x4, Iop_CmpNEZ64x2,
1256
1257 /* ADDITION (normal / unsigned sat / signed sat) */
1258 Iop_Add8x16, Iop_Add16x8, Iop_Add32x4, Iop_Add64x2,
1259 Iop_QAdd8Ux16, Iop_QAdd16Ux8, Iop_QAdd32Ux4, Iop_QAdd64Ux2,
1260 Iop_QAdd8Sx16, Iop_QAdd16Sx8, Iop_QAdd32Sx4, Iop_QAdd64Sx2,
1261
1262 /* SUBTRACTION (normal / unsigned sat / signed sat) */
1263 Iop_Sub8x16, Iop_Sub16x8, Iop_Sub32x4, Iop_Sub64x2,
1264 Iop_QSub8Ux16, Iop_QSub16Ux8, Iop_QSub32Ux4, Iop_QSub64Ux2,
1265 Iop_QSub8Sx16, Iop_QSub16Sx8, Iop_QSub32Sx4, Iop_QSub64Sx2,
1266
1267 /* MULTIPLICATION (normal / high half of signed/unsigned) */
1268 Iop_Mul8x16, Iop_Mul16x8, Iop_Mul32x4,
1269 Iop_MulHi16Ux8, Iop_MulHi32Ux4,
1270 Iop_MulHi16Sx8, Iop_MulHi32Sx4,
1271 /* (widening signed/unsigned of even lanes, with lowest lane=zero) */
1272 Iop_MullEven8Ux16, Iop_MullEven16Ux8,
1273 Iop_MullEven8Sx16, Iop_MullEven16Sx8,
1274 /* FIXME: document these */
1275 Iop_Mull8Ux8, Iop_Mull8Sx8,
1276 Iop_Mull16Ux4, Iop_Mull16Sx4,
1277 Iop_Mull32Ux2, Iop_Mull32Sx2,
1278 /* Vector Saturating Doubling Multiply Returning High Half and
1279 Vector Saturating Rounding Doubling Multiply Returning High Half */
1280 /* These IROp's multiply corresponding elements in two vectors, double
1281 the results, and place the most significant half of the final results
1282 in the destination vector. The results are truncated or rounded. If
1283 any of the results overflow, they are saturated. */
1284 Iop_QDMulHi16Sx8, Iop_QDMulHi32Sx4,
1285 Iop_QRDMulHi16Sx8, Iop_QRDMulHi32Sx4,
1286 /* Doubling saturating multiplication (long) (I64, I64) -> V128 */
1287 Iop_QDMulLong16Sx4, Iop_QDMulLong32Sx2,
1288 /* Plynomial multiplication treats it's arguments as coefficients of
1289 polynoms over {0, 1}. */
1290 Iop_PolynomialMul8x16, /* (V128, V128) -> V128 */
1291 Iop_PolynomialMull8x8, /* (I64, I64) -> V128 */
1292
1293 /* PAIRWISE operations */
1294 /* Iop_PwFoo16x4( [a,b,c,d], [e,f,g,h] ) =
1295 [Foo16(a,b), Foo16(c,d), Foo16(e,f), Foo16(g,h)] */
1296 Iop_PwAdd8x16, Iop_PwAdd16x8, Iop_PwAdd32x4,
1297 Iop_PwAdd32Fx2,
1298 /* Longening variant is unary. The resulting vector contains two times
1299 less elements than operand, but they are two times wider.
1300 Example:
1301 Iop_PwAddL16Ux4( [a,b,c,d] ) = [a+b,c+d]
1302 where a+b and c+d are unsigned 32-bit values. */
1303 Iop_PwAddL8Ux16, Iop_PwAddL16Ux8, Iop_PwAddL32Ux4,
1304 Iop_PwAddL8Sx16, Iop_PwAddL16Sx8, Iop_PwAddL32Sx4,
1305
1306 /* ABSOLUTE VALUE */
1307 Iop_Abs8x16, Iop_Abs16x8, Iop_Abs32x4,
1308
1309 /* AVERAGING: note: (arg1 + arg2 + 1) >>u 1 */
1310 Iop_Avg8Ux16, Iop_Avg16Ux8, Iop_Avg32Ux4,
1311 Iop_Avg8Sx16, Iop_Avg16Sx8, Iop_Avg32Sx4,
1312
1313 /* MIN/MAX */
1314 Iop_Max8Sx16, Iop_Max16Sx8, Iop_Max32Sx4,
1315 Iop_Max8Ux16, Iop_Max16Ux8, Iop_Max32Ux4,
1316 Iop_Min8Sx16, Iop_Min16Sx8, Iop_Min32Sx4,
1317 Iop_Min8Ux16, Iop_Min16Ux8, Iop_Min32Ux4,
1318
1319 /* COMPARISON */
1320 Iop_CmpEQ8x16, Iop_CmpEQ16x8, Iop_CmpEQ32x4, Iop_CmpEQ64x2,
1321 Iop_CmpGT8Sx16, Iop_CmpGT16Sx8, Iop_CmpGT32Sx4, Iop_CmpGT64Sx2,
1322 Iop_CmpGT8Ux16, Iop_CmpGT16Ux8, Iop_CmpGT32Ux4,
1323
1324 /* COUNT ones / leading zeroes / leading sign bits (not including topmost
1325 bit) */
1326 Iop_Cnt8x16,
1327 Iop_Clz8Sx16, Iop_Clz16Sx8, Iop_Clz32Sx4,
1328 Iop_Cls8Sx16, Iop_Cls16Sx8, Iop_Cls32Sx4,
1329
1330 /* VECTOR x SCALAR SHIFT (shift amt :: Ity_I8) */
1331 Iop_ShlN8x16, Iop_ShlN16x8, Iop_ShlN32x4, Iop_ShlN64x2,
1332 Iop_ShrN8x16, Iop_ShrN16x8, Iop_ShrN32x4, Iop_ShrN64x2,
1333 Iop_SarN8x16, Iop_SarN16x8, Iop_SarN32x4, Iop_SarN64x2,
1334
1335 /* VECTOR x VECTOR SHIFT / ROTATE */
1336 Iop_Shl8x16, Iop_Shl16x8, Iop_Shl32x4, Iop_Shl64x2,
1337 Iop_Shr8x16, Iop_Shr16x8, Iop_Shr32x4, Iop_Shr64x2,
1338 Iop_Sar8x16, Iop_Sar16x8, Iop_Sar32x4, Iop_Sar64x2,
1339 Iop_Sal8x16, Iop_Sal16x8, Iop_Sal32x4, Iop_Sal64x2,
1340 Iop_Rol8x16, Iop_Rol16x8, Iop_Rol32x4,
1341
1342 /* VECTOR x VECTOR SATURATING SHIFT */
1343 Iop_QShl8x16, Iop_QShl16x8, Iop_QShl32x4, Iop_QShl64x2,
1344 Iop_QSal8x16, Iop_QSal16x8, Iop_QSal32x4, Iop_QSal64x2,
1345 /* VECTOR x INTEGER SATURATING SHIFT */
1346 Iop_QShlN8Sx16, Iop_QShlN16Sx8, Iop_QShlN32Sx4, Iop_QShlN64Sx2,
1347 Iop_QShlN8x16, Iop_QShlN16x8, Iop_QShlN32x4, Iop_QShlN64x2,
1348 Iop_QSalN8x16, Iop_QSalN16x8, Iop_QSalN32x4, Iop_QSalN64x2,
1349
1350 /* NARROWING (binary)
1351 -- narrow 2xV128 into 1xV128, hi half from left arg */
1352 /* See comments above w.r.t. U vs S issues in saturated narrowing. */
1353 Iop_QNarrowBin16Sto8Ux16, Iop_QNarrowBin32Sto16Ux8,
1354 Iop_QNarrowBin16Sto8Sx16, Iop_QNarrowBin32Sto16Sx8,
1355 Iop_QNarrowBin16Uto8Ux16, Iop_QNarrowBin32Uto16Ux8,
1356 Iop_NarrowBin16to8x16, Iop_NarrowBin32to16x8,
1357
1358 /* NARROWING (unary) -- narrow V128 into I64 */
1359 Iop_NarrowUn16to8x8, Iop_NarrowUn32to16x4, Iop_NarrowUn64to32x2,
1360 /* Saturating narrowing from signed source to signed/unsigned destination */
1361 Iop_QNarrowUn16Sto8Sx8, Iop_QNarrowUn32Sto16Sx4, Iop_QNarrowUn64Sto32Sx2,
1362 Iop_QNarrowUn16Sto8Ux8, Iop_QNarrowUn32Sto16Ux4, Iop_QNarrowUn64Sto32Ux2,
1363 /* Saturating narrowing from unsigned source to unsigned destination */
1364 Iop_QNarrowUn16Uto8Ux8, Iop_QNarrowUn32Uto16Ux4, Iop_QNarrowUn64Uto32Ux2,
1365
1366 /* WIDENING -- sign or zero extend each element of the argument
1367 vector to the twice original size. The resulting vector consists of
1368 the same number of elements but each element and the vector itself
1369 are twice as wide.
1370 All operations are I64->V128.
1371 Example
1372 Iop_Widen32Sto64x2( [a, b] ) = [c, d]
1373 where c = Iop_32Sto64(a) and d = Iop_32Sto64(b) */
1374 Iop_Widen8Uto16x8, Iop_Widen16Uto32x4, Iop_Widen32Uto64x2,
1375 Iop_Widen8Sto16x8, Iop_Widen16Sto32x4, Iop_Widen32Sto64x2,
1376
1377 /* INTERLEAVING */
1378 /* Interleave lanes from low or high halves of
1379 operands. Most-significant result lane is from the left
1380 arg. */
1381 Iop_InterleaveHI8x16, Iop_InterleaveHI16x8,
1382 Iop_InterleaveHI32x4, Iop_InterleaveHI64x2,
1383 Iop_InterleaveLO8x16, Iop_InterleaveLO16x8,
1384 Iop_InterleaveLO32x4, Iop_InterleaveLO64x2,
1385 /* Interleave odd/even lanes of operands. Most-significant result lane
1386 is from the left arg. */
1387 Iop_InterleaveOddLanes8x16, Iop_InterleaveEvenLanes8x16,
1388 Iop_InterleaveOddLanes16x8, Iop_InterleaveEvenLanes16x8,
1389 Iop_InterleaveOddLanes32x4, Iop_InterleaveEvenLanes32x4,
1390
1391 /* CONCATENATION -- build a new value by concatenating either
1392 the even or odd lanes of both operands. */
1393 Iop_CatOddLanes8x16, Iop_CatOddLanes16x8, Iop_CatOddLanes32x4,
1394 Iop_CatEvenLanes8x16, Iop_CatEvenLanes16x8, Iop_CatEvenLanes32x4,
1395
1396 /* GET elements of VECTOR
1397 GET is binop (V128, I8) -> I<elem_size> */
1398 /* Note: the arm back-end handles only constant second argument. */
1399 Iop_GetElem8x16, Iop_GetElem16x8, Iop_GetElem32x4, Iop_GetElem64x2,
1400
1401 /* DUPLICATING -- copy value to all lanes */
1402 Iop_Dup8x16, Iop_Dup16x8, Iop_Dup32x4,
1403
1404 /* EXTRACT -- copy 16-arg3 highest bytes from arg1 to 16-arg3 lowest bytes
1405 of result and arg3 lowest bytes of arg2 to arg3 highest bytes of
1406 result.
1407 It is a triop: (V128, V128, I8) -> V128 */
1408 /* Note: the ARM back end handles only constant arg3 in this operation. */
1409 Iop_ExtractV128,
1410
1411 /* REVERSE the order of elements in each Half-words, Words,
1412 Double-words */
1413 /* Examples:
1414 Reverse32_16x8([a,b,c,d,e,f,g,h]) = [b,a,d,c,f,e,h,g]
1415 Reverse64_16x8([a,b,c,d,e,f,g,h]) = [d,c,b,a,h,g,f,e] */
1416 Iop_Reverse16_8x16,
1417 Iop_Reverse32_8x16, Iop_Reverse32_16x8,
1418 Iop_Reverse64_8x16, Iop_Reverse64_16x8, Iop_Reverse64_32x4,
1419
1420 /* PERMUTING -- copy src bytes to dst,
1421 as indexed by control vector bytes:
1422 for i in 0 .. 15 . result[i] = argL[ argR[i] ]
1423 argR[i] values may only be in the range 0 .. 15, else behaviour
1424 is undefined. */
1425 Iop_Perm8x16,
1426 Iop_Perm32x4, /* ditto, except argR values are restricted to 0 .. 3 */
1427
1428 /* Vector Reciprocal Estimate and Vector Reciprocal Square Root Estimate
1429 See floating-point equiwalents for details. */
1430 Iop_Recip32x4, Iop_Rsqrte32x4,
1431
1432 /* ------------------ 256-bit SIMD Integer. ------------------ */
1433
1434 /* Pack/unpack */
1435 Iop_V256to64_0, // V256 -> I64, extract least significant lane
1436 Iop_V256to64_1,
1437 Iop_V256to64_2,
1438 Iop_V256to64_3, // V256 -> I64, extract most significant lane
1439
1440 Iop_64x4toV256, // (I64,I64,I64,I64)->V256
1441 // first arg is most significant lane
1442
1443 Iop_V256toV128_0, // V256 -> V128, less significant lane
1444 Iop_V256toV128_1, // V256 -> V128, more significant lane
1445 Iop_V128HLtoV256, // (V128,V128)->V256, first arg is most signif
1446
1447 Iop_AndV256,
1448 Iop_OrV256,
1449 Iop_XorV256,
1450 Iop_NotV256,
1451
1452 /* MISC (vector integer cmp != 0) */
1453 Iop_CmpNEZ32x8, Iop_CmpNEZ64x4,
1454
1455 /* ------------------ 256-bit SIMD FP. ------------------ */
1456 Iop_Add64Fx4,
1457 Iop_Sub64Fx4,
1458 Iop_Mul64Fx4,
1459 Iop_Div64Fx4,
1460 Iop_Add32Fx8,
1461 Iop_Sub32Fx8,
1462 Iop_Mul32Fx8,
1463 Iop_Div32Fx8,
1464
1465 Iop_Sqrt32Fx8,
1466 Iop_Sqrt64Fx4,
1467 Iop_RSqrt32Fx8,
1468 Iop_Recip32Fx8,
1469
1470 Iop_Max32Fx8, Iop_Min32Fx8,
1471 Iop_Max64Fx4, Iop_Min64Fx4
1472 }
1473 IROp;
1474
1475 /* Pretty-print an op. */
1476 extern void ppIROp ( IROp );
1477
1478
1479 /* Encoding of IEEE754-specified rounding modes. This is the same as
1480 the encoding used by Intel IA32 to indicate x87 rounding mode.
1481 Note, various front and back ends rely on the actual numerical
1482 values of these, so do not change them. */
1483 typedef
1484 enum {
1485 Irrm_NEAREST = 0,
1486 Irrm_NegINF = 1,
1487 Irrm_PosINF = 2,
1488 Irrm_ZERO = 3
1489 }
1490 IRRoundingMode;
1491
1492 /* DFP encoding of IEEE754 2008 specified rounding modes extends the two bit
1493 * binary floating point rounding mode (IRRoundingMode) to three bits. The
1494 * DFP rounding modes are a super set of the binary rounding modes. The
1495 * encoding was chosen such that the mapping of the least significant two bits
1496 * of the IR to POWER encodings is same. The upper IR encoding bit is just
1497 * a logical OR of the upper rounding mode bit from the POWER encoding.
1498 */
1499 typedef
1500 enum {
1501 Irrm_DFP_NEAREST = 0, // Round to nearest, ties to even
1502 Irrm_DFP_NegINF = 1, // Round to negative infinity
1503 Irrm_DFP_PosINF = 2, // Round to posative infinity
1504 Irrm_DFP_ZERO = 3, // Round toward zero
1505 Irrm_DFP_NEAREST_TIE_AWAY_0 = 4, // Round to nearest, ties away from 0
1506 Irrm_DFP_PREPARE_SHORTER = 5, // Round to prepare for storter
1507 // precision
1508 Irrm_DFP_AWAY_FROM_ZERO = 6, // Round to away from 0
1509 Irrm_DFP_NEAREST_TIE_TOWARD_0 = 7 // Round to nearest, ties towards 0
1510 }
1511 IRRoundingModeDFP;
1512
1513 /* Floating point comparison result values, as created by Iop_CmpF64.
1514 This is also derived from what IA32 does. */
1515 typedef
1516 enum {
1517 Ircr_UN = 0x45,
1518 Ircr_LT = 0x01,
1519 Ircr_GT = 0x00,
1520 Ircr_EQ = 0x40
1521 }
1522 IRCmpF64Result;
1523
1524 typedef IRCmpF64Result IRCmpF32Result;
1525 typedef IRCmpF64Result IRCmpF128Result;
1526
1527 /* ------------------ Expressions ------------------ */
1528
1529 typedef struct _IRQop IRQop; /* forward declaration */
1530 typedef struct _IRTriop IRTriop; /* forward declaration */
1531
1532
1533 /* The different kinds of expressions. Their meaning is explained below
1534 in the comments for IRExpr. */
1535 typedef
1536 enum {
1537 Iex_Binder=0x15000,
1538 Iex_Get,
1539 Iex_GetI,
1540 Iex_RdTmp,
1541 Iex_Qop,
1542 Iex_Triop,
1543 Iex_Binop,
1544 Iex_Unop,
1545 Iex_Load,
1546 Iex_Const,
1547 Iex_Mux0X,
1548 Iex_CCall
1549 }
1550 IRExprTag;
1551
1552 /* An expression. Stored as a tagged union. 'tag' indicates what kind
1553 of expression this is. 'Iex' is the union that holds the fields. If
1554 an IRExpr 'e' has e.tag equal to Iex_Load, then it's a load
1555 expression, and the fields can be accessed with
1556 'e.Iex.Load.<fieldname>'.
1557
1558 For each kind of expression, we show what it looks like when
1559 pretty-printed with ppIRExpr().
1560 */
1561 typedef
1562 struct _IRExpr
1563 IRExpr;
1564
1565 struct _IRExpr {
1566 IRExprTag tag;
1567 union {
1568 /* Used only in pattern matching within Vex. Should not be seen
1569 outside of Vex. */
1570 struct {
1571 Int binder;
1572 } Binder;
1573
1574 /* Read a guest register, at a fixed offset in the guest state.
1575 ppIRExpr output: GET:<ty>(<offset>), eg. GET:I32(0)
1576 */
1577 struct {
1578 Int offset; /* Offset into the guest state */
1579 IRType ty; /* Type of the value being read */
1580 } Get;
1581
1582 /* Read a guest register at a non-fixed offset in the guest
1583 state. This allows circular indexing into parts of the guest
1584 state, which is essential for modelling situations where the
1585 identity of guest registers is not known until run time. One
1586 example is the x87 FP register stack.
1587
1588 The part of the guest state to be treated as a circular array
1589 is described in the IRRegArray 'descr' field. It holds the
1590 offset of the first element in the array, the type of each
1591 element, and the number of elements.
1592
1593 The array index is indicated rather indirectly, in a way
1594 which makes optimisation easy: as the sum of variable part
1595 (the 'ix' field) and a constant offset (the 'bias' field).
1596
1597 Since the indexing is circular, the actual array index to use
1598 is computed as (ix + bias) % num-of-elems-in-the-array.
1599
1600 Here's an example. The description
1601
1602 (96:8xF64)[t39,-7]
1603
1604 describes an array of 8 F64-typed values, the
1605 guest-state-offset of the first being 96. This array is
1606 being indexed at (t39 - 7) % 8.
1607
1608 It is important to get the array size/type exactly correct
1609 since IR optimisation looks closely at such info in order to
1610 establish aliasing/non-aliasing between seperate GetI and
1611 PutI events, which is used to establish when they can be
1612 reordered, etc. Putting incorrect info in will lead to
1613 obscure IR optimisation bugs.
1614
1615 ppIRExpr output: GETI<descr>[<ix>,<bias]
1616 eg. GETI(128:8xI8)[t1,0]
1617 */
1618 struct {
1619 IRRegArray* descr; /* Part of guest state treated as circular */
1620 IRExpr* ix; /* Variable part of index into array */
1621 Int bias; /* Constant offset part of index into array */
1622 } GetI;
1623
1624 /* The value held by a temporary.
1625 ppIRExpr output: t<tmp>, eg. t1
1626 */
1627 struct {
1628 IRTemp tmp; /* The temporary number */
1629 } RdTmp;
1630
1631 /* A quaternary operation.
1632 ppIRExpr output: <op>(<arg1>, <arg2>, <arg3>, <arg4>),
1633 eg. MAddF64r32(t1, t2, t3, t4)
1634 */
1635 struct {
1636 IRQop* details;
1637 } Qop;
1638
1639 /* A ternary operation.
1640 ppIRExpr output: <op>(<arg1>, <arg2>, <arg3>),
1641 eg. MulF64(1, 2.0, 3.0)
1642 */
1643 struct {
1644 IRTriop* details;
1645 } Triop;
1646
1647 /* A binary operation.
1648 ppIRExpr output: <op>(<arg1>, <arg2>), eg. Add32(t1,t2)
1649 */
1650 struct {
1651 IROp op; /* op-code */
1652 IRExpr* arg1; /* operand 1 */
1653 IRExpr* arg2; /* operand 2 */
1654 } Binop;
1655
1656 /* A unary operation.
1657 ppIRExpr output: <op>(<arg>), eg. Neg8(t1)
1658 */
1659 struct {
1660 IROp op; /* op-code */
1661 IRExpr* arg; /* operand */
1662 } Unop;
1663
1664 /* A load from memory -- a normal load, not a load-linked.
1665 Load-Linkeds (and Store-Conditionals) are instead represented
1666 by IRStmt.LLSC since Load-Linkeds have side effects and so
1667 are not semantically valid IRExpr's.
1668 ppIRExpr output: LD<end>:<ty>(<addr>), eg. LDle:I32(t1)
1669 */
1670 struct {
1671 IREndness end; /* Endian-ness of the load */
1672 IRType ty; /* Type of the loaded value */
1673 IRExpr* addr; /* Address being loaded from */
1674 } Load;
1675
1676 /* A constant-valued expression.
1677 ppIRExpr output: <con>, eg. 0x4:I32
1678 */
1679 struct {
1680 IRConst* con; /* The constant itself */
1681 } Const;
1682
1683 /* A call to a pure (no side-effects) helper C function.
1684
1685 With the 'cee' field, 'name' is the function's name. It is
1686 only used for pretty-printing purposes. The address to call
1687 (host address, of course) is stored in the 'addr' field
1688 inside 'cee'.
1689
1690 The 'args' field is a NULL-terminated array of arguments.
1691 The stated return IRType, and the implied argument types,
1692 must match that of the function being called well enough so
1693 that the back end can actually generate correct code for the
1694 call.
1695
1696 The called function **must** satisfy the following:
1697
1698 * no side effects -- must be a pure function, the result of
1699 which depends only on the passed parameters.
1700
1701 * it may not look at, nor modify, any of the guest state
1702 since that would hide guest state transitions from
1703 instrumenters
1704
1705 * it may not access guest memory, since that would hide
1706 guest memory transactions from the instrumenters
1707
1708 * it must not assume that arguments are being evaluated in a
1709 particular order. The oder of evaluation is unspecified.
1710
1711 This is restrictive, but makes the semantics clean, and does
1712 not interfere with IR optimisation.
1713
1714 If you want to call a helper which can mess with guest state
1715 and/or memory, instead use Ist_Dirty. This is a lot more
1716 flexible, but you have to give a bunch of details about what
1717 the helper does (and you better be telling the truth,
1718 otherwise any derived instrumentation will be wrong). Also
1719 Ist_Dirty inhibits various IR optimisations and so can cause
1720 quite poor code to be generated. Try to avoid it.
1721
1722 ppIRExpr output: <cee>(<args>):<retty>
1723 eg. foo{0x80489304}(t1, t2):I32
1724 */
1725 struct {
1726 IRCallee* cee; /* Function to call. */
1727 IRType retty; /* Type of return value. */
1728 IRExpr** args; /* Vector of argument expressions. */
1729 } CCall;
1730
1731 /* A ternary if-then-else operator. It returns expr0 if cond is
1732 zero, exprX otherwise. Note that it is STRICT, ie. both
1733 expr0 and exprX are evaluated in all cases.
1734
1735 ppIRExpr output: Mux0X(<cond>,<expr0>,<exprX>),
1736 eg. Mux0X(t6,t7,t8)
1737 */
1738 struct {
1739 IRExpr* cond; /* Condition */
1740 IRExpr* expr0; /* True expression */
1741 IRExpr* exprX; /* False expression */
1742 } Mux0X;
1743 } Iex;
1744 };
1745
1746 /* ------------------ A ternary expression ---------------------- */
1747 struct _IRTriop {
1748 IROp op; /* op-code */
1749 IRExpr* arg1; /* operand 1 */
1750 IRExpr* arg2; /* operand 2 */
1751 IRExpr* arg3; /* operand 3 */
1752 };
1753
1754 /* ------------------ A quarternary expression ------------------ */
1755 struct _IRQop {
1756 IROp op; /* op-code */
1757 IRExpr* arg1; /* operand 1 */
1758 IRExpr* arg2; /* operand 2 */
1759 IRExpr* arg3; /* operand 3 */
1760 IRExpr* arg4; /* operand 4 */
1761 };
1762
1763 /* Expression constructors. */
1764 extern IRExpr* IRExpr_Binder ( Int binder );
1765 extern IRExpr* IRExpr_Get ( Int off, IRType ty );
1766 extern IRExpr* IRExpr_GetI ( IRRegArray* descr, IRExpr* ix, Int bias );
1767 extern IRExpr* IRExpr_RdTmp ( IRTemp tmp );
1768 extern IRExpr* IRExpr_Qop ( IROp op, IRExpr* arg1, IRExpr* arg2,
1769 IRExpr* arg3, IRExpr* arg4 );
1770 extern IRExpr* IRExpr_Triop ( IROp op, IRExpr* arg1,
1771 IRExpr* arg2, IRExpr* arg3 );
1772 extern IRExpr* IRExpr_Binop ( IROp op, IRExpr* arg1, IRExpr* arg2 );
1773 extern IRExpr* IRExpr_Unop ( IROp op, IRExpr* arg );
1774 extern IRExpr* IRExpr_Load ( IREndness end, IRType ty, IRExpr* addr );
1775 extern IRExpr* IRExpr_Const ( IRConst* con );
1776 extern IRExpr* IRExpr_CCall ( IRCallee* cee, IRType retty, IRExpr** args );
1777 extern IRExpr* IRExpr_Mux0X ( IRExpr* cond, IRExpr* expr0, IRExpr* exprX );
1778
1779 /* Deep-copy an IRExpr. */
1780 extern IRExpr* deepCopyIRExpr ( IRExpr* );
1781
1782 /* Pretty-print an IRExpr. */
1783 extern void ppIRExpr ( IRExpr* );
1784
1785 /* NULL-terminated IRExpr vector constructors, suitable for
1786 use as arg lists in clean/dirty helper calls. */
1787 extern IRExpr** mkIRExprVec_0 ( void );
1788 extern IRExpr** mkIRExprVec_1 ( IRExpr* );
1789 extern IRExpr** mkIRExprVec_2 ( IRExpr*, IRExpr* );
1790 extern IRExpr** mkIRExprVec_3 ( IRExpr*, IRExpr*, IRExpr* );
1791 extern IRExpr** mkIRExprVec_4 ( IRExpr*, IRExpr*, IRExpr*, IRExpr* );
1792 extern IRExpr** mkIRExprVec_5 ( IRExpr*, IRExpr*, IRExpr*, IRExpr*,
1793 IRExpr* );
1794 extern IRExpr** mkIRExprVec_6 ( IRExpr*, IRExpr*, IRExpr*, IRExpr*,
1795 IRExpr*, IRExpr* );
1796 extern IRExpr** mkIRExprVec_7 ( IRExpr*, IRExpr*, IRExpr*, IRExpr*,
1797 IRExpr*, IRExpr*, IRExpr* );
1798 extern IRExpr** mkIRExprVec_8 ( IRExpr*, IRExpr*, IRExpr*, IRExpr*,
1799 IRExpr*, IRExpr*, IRExpr*, IRExpr*);
1800
1801 /* IRExpr copiers:
1802 - shallowCopy: shallow-copy (ie. create a new vector that shares the
1803 elements with the original).
1804 - deepCopy: deep-copy (ie. create a completely new vector). */
1805 extern IRExpr** shallowCopyIRExprVec ( IRExpr** );
1806 extern IRExpr** deepCopyIRExprVec ( IRExpr** );
1807
1808 /* Make a constant expression from the given host word taking into
1809 account (of course) the host word size. */
1810 extern IRExpr* mkIRExpr_HWord ( HWord );
1811
1812 /* Convenience function for constructing clean helper calls. */
1813 extern
1814 IRExpr* mkIRExprCCall ( IRType retty,
1815 Int regparms, HChar* name, void* addr,
1816 IRExpr** args );
1817
1818
1819 /* Convenience functions for atoms (IRExprs which are either Iex_Tmp or
1820 * Iex_Const). */
isIRAtom(IRExpr * e)1821 static inline Bool isIRAtom ( IRExpr* e ) {
1822 return toBool(e->tag == Iex_RdTmp || e->tag == Iex_Const);
1823 }
1824
1825 /* Are these two IR atoms identical? Causes an assertion
1826 failure if they are passed non-atoms. */
1827 extern Bool eqIRAtom ( IRExpr*, IRExpr* );
1828
1829
1830 /* ------------------ Jump kinds ------------------ */
1831
1832 /* This describes hints which can be passed to the dispatcher at guest
1833 control-flow transfer points.
1834
1835 Re Ijk_TInval: the guest state _must_ have two pseudo-registers,
1836 guest_TISTART and guest_TILEN, which specify the start and length
1837 of the region to be invalidated. These are both the size of a
1838 guest word. It is the responsibility of the relevant toIR.c to
1839 ensure that these are filled in with suitable values before issuing
1840 a jump of kind Ijk_TInval.
1841
1842 Re Ijk_EmWarn and Ijk_EmFail: the guest state must have a
1843 pseudo-register guest_EMWARN, which is 32-bits regardless of the
1844 host or guest word size. That register should be made to hold an
1845 EmWarn_* value to indicate the reason for the exit.
1846
1847 In the case of Ijk_EmFail, the exit is fatal (Vex-generated code
1848 cannot continue) and so the jump destination can be anything.
1849
1850 Re Ijk_Sys_ (syscall jumps): the guest state must have a
1851 pseudo-register guest_IP_AT_SYSCALL, which is the size of a guest
1852 word. Front ends should set this to be the IP at the most recently
1853 executed kernel-entering (system call) instruction. This makes it
1854 very much easier (viz, actually possible at all) to back up the
1855 guest to restart a syscall that has been interrupted by a signal.
1856 */
1857 typedef
1858 enum {
1859 Ijk_INVALID=0x16000,
1860 Ijk_Boring, /* not interesting; just goto next */
1861 Ijk_Call, /* guest is doing a call */
1862 Ijk_Ret, /* guest is doing a return */
1863 Ijk_ClientReq, /* do guest client req before continuing */
1864 Ijk_Yield, /* client is yielding to thread scheduler */
1865 Ijk_EmWarn, /* report emulation warning before continuing */
1866 Ijk_EmFail, /* emulation critical (FATAL) error; give up */
1867 Ijk_NoDecode, /* next instruction cannot be decoded */
1868 Ijk_MapFail, /* Vex-provided address translation failed */
1869 Ijk_TInval, /* Invalidate translations before continuing. */
1870 Ijk_NoRedir, /* Jump to un-redirected guest addr */
1871 Ijk_SigTRAP, /* current instruction synths SIGTRAP */
1872 Ijk_SigSEGV, /* current instruction synths SIGSEGV */
1873 Ijk_SigBUS, /* current instruction synths SIGBUS */
1874 /* Unfortunately, various guest-dependent syscall kinds. They
1875 all mean: do a syscall before continuing. */
1876 Ijk_Sys_syscall, /* amd64 'syscall', ppc 'sc', arm 'svc #0' */
1877 Ijk_Sys_int32, /* amd64/x86 'int $0x20' */
1878 Ijk_Sys_int128, /* amd64/x86 'int $0x80' */
1879 Ijk_Sys_int129, /* amd64/x86 'int $0x81' */
1880 Ijk_Sys_int130, /* amd64/x86 'int $0x82' */
1881 Ijk_Sys_sysenter /* x86 'sysenter'. guest_EIP becomes
1882 invalid at the point this happens. */
1883 }
1884 IRJumpKind;
1885
1886 extern void ppIRJumpKind ( IRJumpKind );
1887
1888
1889 /* ------------------ Dirty helper calls ------------------ */
1890
1891 /* A dirty call is a flexible mechanism for calling (possibly
1892 conditionally) a helper function or procedure. The helper function
1893 may read, write or modify client memory, and may read, write or
1894 modify client state. It can take arguments and optionally return a
1895 value. It may return different results and/or do different things
1896 when called repeatedly with the same arguments, by means of storing
1897 private state.
1898
1899 If a value is returned, it is assigned to the nominated return
1900 temporary.
1901
1902 Dirty calls are statements rather than expressions for obvious
1903 reasons. If a dirty call is marked as writing guest state, any
1904 values derived from the written parts of the guest state are
1905 invalid. Similarly, if the dirty call is stated as writing
1906 memory, any loaded values are invalidated by it.
1907
1908 In order that instrumentation is possible, the call must state, and
1909 state correctly:
1910
1911 * whether it reads, writes or modifies memory, and if so where
1912 (only one chunk can be stated)
1913
1914 * whether it reads, writes or modifies guest state, and if so which
1915 pieces (several pieces may be stated, and currently their extents
1916 must be known at translation-time).
1917
1918 Normally, code is generated to pass just the args to the helper.
1919 However, if .needsBBP is set, then an extra first argument is
1920 passed, which is the baseblock pointer, so that the callee can
1921 access the guest state. It is invalid for .nFxState to be zero
1922 but .needsBBP to be True, since .nFxState==0 is a claim that the
1923 call does not access guest state.
1924
1925 IMPORTANT NOTE re GUARDS: Dirty calls are strict, very strict. The
1926 arguments are evaluated REGARDLESS of the guard value. The order of
1927 argument evaluation is unspecified. The guard expression is evaluated
1928 AFTER the arguments have been evaluated.
1929 */
1930
1931 #define VEX_N_FXSTATE 7 /* enough for FXSAVE/FXRSTOR on x86 */
1932
1933 /* Effects on resources (eg. registers, memory locations) */
1934 typedef
1935 enum {
1936 Ifx_None = 0x1700, /* no effect */
1937 Ifx_Read, /* reads the resource */
1938 Ifx_Write, /* writes the resource */
1939 Ifx_Modify, /* modifies the resource */
1940 }
1941 IREffect;
1942
1943 /* Pretty-print an IREffect */
1944 extern void ppIREffect ( IREffect );
1945
1946
1947 typedef
1948 struct _IRDirty {
1949 /* What to call, and details of args/results. .guard must be
1950 non-NULL. If .tmp is not IRTemp_INVALID (that is, the call
1951 returns a result) then .guard must be demonstrably (at
1952 JIT-time) always true, that is, the call must be
1953 unconditional. Conditional calls that assign .tmp are not
1954 allowed. */
1955 IRCallee* cee; /* where to call */
1956 IRExpr* guard; /* :: Ity_Bit. Controls whether call happens */
1957 IRExpr** args; /* arg list, ends in NULL */
1958 IRTemp tmp; /* to assign result to, or IRTemp_INVALID if none */
1959
1960 /* Mem effects; we allow only one R/W/M region to be stated */
1961 IREffect mFx; /* indicates memory effects, if any */
1962 IRExpr* mAddr; /* of access, or NULL if mFx==Ifx_None */
1963 Int mSize; /* of access, or zero if mFx==Ifx_None */
1964
1965 /* Guest state effects; up to N allowed */
1966 Bool needsBBP; /* True => also pass guest state ptr to callee */
1967 Int nFxState; /* must be 0 .. VEX_N_FXSTATE */
1968 struct {
1969 IREffect fx:16; /* read, write or modify? Ifx_None is invalid. */
1970 UShort offset;
1971 UShort size;
1972 UChar nRepeats;
1973 UChar repeatLen;
1974 } fxState[VEX_N_FXSTATE];
1975 /* The access can be repeated, as specified by nRepeats and
1976 repeatLen. To describe only a single access, nRepeats and
1977 repeatLen should be zero. Otherwise, repeatLen must be a
1978 multiple of size and greater than size. */
1979 /* Overall, the parts of the guest state denoted by (offset,
1980 size, nRepeats, repeatLen) is
1981 [offset, +size)
1982 and, if nRepeats > 0,
1983 for (i = 1; i <= nRepeats; i++)
1984 [offset + i * repeatLen, +size)
1985 A convenient way to enumerate all segments is therefore
1986 for (i = 0; i < 1 + nRepeats; i++)
1987 [offset + i * repeatLen, +size)
1988 */
1989 }
1990 IRDirty;
1991
1992 /* Pretty-print a dirty call */
1993 extern void ppIRDirty ( IRDirty* );
1994
1995 /* Allocate an uninitialised dirty call */
1996 extern IRDirty* emptyIRDirty ( void );
1997
1998 /* Deep-copy a dirty call */
1999 extern IRDirty* deepCopyIRDirty ( IRDirty* );
2000
2001 /* A handy function which takes some of the tedium out of constructing
2002 dirty helper calls. The called function impliedly does not return
2003 any value and has a constant-True guard. The call is marked as
2004 accessing neither guest state nor memory (hence the "unsafe"
2005 designation) -- you can change this marking later if need be. A
2006 suitable IRCallee is constructed from the supplied bits. */
2007 extern
2008 IRDirty* unsafeIRDirty_0_N ( Int regparms, HChar* name, void* addr,
2009 IRExpr** args );
2010
2011 /* Similarly, make a zero-annotation dirty call which returns a value,
2012 and assign that to the given temp. */
2013 extern
2014 IRDirty* unsafeIRDirty_1_N ( IRTemp dst,
2015 Int regparms, HChar* name, void* addr,
2016 IRExpr** args );
2017
2018
2019 /* --------------- Memory Bus Events --------------- */
2020
2021 typedef
2022 enum {
2023 Imbe_Fence=0x18000,
2024 /* Needed only on ARM. It cancels a reservation made by a
2025 preceding Linked-Load, and needs to be handed through to the
2026 back end, just as LL and SC themselves are. */
2027 Imbe_CancelReservation
2028 }
2029 IRMBusEvent;
2030
2031 extern void ppIRMBusEvent ( IRMBusEvent );
2032
2033
2034 /* --------------- Compare and Swap --------------- */
2035
2036 /* This denotes an atomic compare and swap operation, either
2037 a single-element one or a double-element one.
2038
2039 In the single-element case:
2040
2041 .addr is the memory address.
2042 .end is the endianness with which memory is accessed
2043
2044 If .addr contains the same value as .expdLo, then .dataLo is
2045 written there, else there is no write. In both cases, the
2046 original value at .addr is copied into .oldLo.
2047
2048 Types: .expdLo, .dataLo and .oldLo must all have the same type.
2049 It may be any integral type, viz: I8, I16, I32 or, for 64-bit
2050 guests, I64.
2051
2052 .oldHi must be IRTemp_INVALID, and .expdHi and .dataHi must
2053 be NULL.
2054
2055 In the double-element case:
2056
2057 .addr is the memory address.
2058 .end is the endianness with which memory is accessed
2059
2060 The operation is the same:
2061
2062 If .addr contains the same value as .expdHi:.expdLo, then
2063 .dataHi:.dataLo is written there, else there is no write. In
2064 both cases the original value at .addr is copied into
2065 .oldHi:.oldLo.
2066
2067 Types: .expdHi, .expdLo, .dataHi, .dataLo, .oldHi, .oldLo must
2068 all have the same type, which may be any integral type, viz: I8,
2069 I16, I32 or, for 64-bit guests, I64.
2070
2071 The double-element case is complicated by the issue of
2072 endianness. In all cases, the two elements are understood to be
2073 located adjacently in memory, starting at the address .addr.
2074
2075 If .end is Iend_LE, then the .xxxLo component is at the lower
2076 address and the .xxxHi component is at the higher address, and
2077 each component is itself stored little-endianly.
2078
2079 If .end is Iend_BE, then the .xxxHi component is at the lower
2080 address and the .xxxLo component is at the higher address, and
2081 each component is itself stored big-endianly.
2082
2083 This allows representing more cases than most architectures can
2084 handle. For example, x86 cannot do DCAS on 8- or 16-bit elements.
2085
2086 How to know if the CAS succeeded?
2087
2088 * if .oldLo == .expdLo (resp. .oldHi:.oldLo == .expdHi:.expdLo),
2089 then the CAS succeeded, .dataLo (resp. .dataHi:.dataLo) is now
2090 stored at .addr, and the original value there was .oldLo (resp
2091 .oldHi:.oldLo).
2092
2093 * if .oldLo != .expdLo (resp. .oldHi:.oldLo != .expdHi:.expdLo),
2094 then the CAS failed, and the original value at .addr was .oldLo
2095 (resp. .oldHi:.oldLo).
2096
2097 Hence it is easy to know whether or not the CAS succeeded.
2098 */
2099 typedef
2100 struct {
2101 IRTemp oldHi; /* old value of *addr is written here */
2102 IRTemp oldLo;
2103 IREndness end; /* endianness of the data in memory */
2104 IRExpr* addr; /* store address */
2105 IRExpr* expdHi; /* expected old value at *addr */
2106 IRExpr* expdLo;
2107 IRExpr* dataHi; /* new value for *addr */
2108 IRExpr* dataLo;
2109 }
2110 IRCAS;
2111
2112 extern void ppIRCAS ( IRCAS* cas );
2113
2114 extern IRCAS* mkIRCAS ( IRTemp oldHi, IRTemp oldLo,
2115 IREndness end, IRExpr* addr,
2116 IRExpr* expdHi, IRExpr* expdLo,
2117 IRExpr* dataHi, IRExpr* dataLo );
2118
2119 extern IRCAS* deepCopyIRCAS ( IRCAS* );
2120
2121
2122 /* ------------------ Circular Array Put ------------------ */
2123 typedef
2124 struct {
2125 IRRegArray* descr; /* Part of guest state treated as circular */
2126 IRExpr* ix; /* Variable part of index into array */
2127 Int bias; /* Constant offset part of index into array */
2128 IRExpr* data; /* The value to write */
2129 } IRPutI;
2130
2131 extern void ppIRPutI ( IRPutI* puti );
2132
2133 extern IRPutI* mkIRPutI ( IRRegArray* descr, IRExpr* ix,
2134 Int bias, IRExpr* data );
2135
2136 extern IRPutI* deepCopyIRPutI ( IRPutI* );
2137
2138
2139 /* ------------------ Statements ------------------ */
2140
2141 /* The different kinds of statements. Their meaning is explained
2142 below in the comments for IRStmt.
2143
2144 Those marked META do not represent code, but rather extra
2145 information about the code. These statements can be removed
2146 without affecting the functional behaviour of the code, however
2147 they are required by some IR consumers such as tools that
2148 instrument the code.
2149 */
2150
2151 typedef
2152 enum {
2153 Ist_NoOp=0x19000,
2154 Ist_IMark, /* META */
2155 Ist_AbiHint, /* META */
2156 Ist_Put,
2157 Ist_PutI,
2158 Ist_WrTmp,
2159 Ist_Store,
2160 Ist_CAS,
2161 Ist_LLSC,
2162 Ist_Dirty,
2163 Ist_MBE, /* META (maybe) */
2164 Ist_Exit
2165 }
2166 IRStmtTag;
2167
2168 /* A statement. Stored as a tagged union. 'tag' indicates what kind
2169 of expression this is. 'Ist' is the union that holds the fields.
2170 If an IRStmt 'st' has st.tag equal to Iex_Store, then it's a store
2171 statement, and the fields can be accessed with
2172 'st.Ist.Store.<fieldname>'.
2173
2174 For each kind of statement, we show what it looks like when
2175 pretty-printed with ppIRStmt().
2176 */
2177 typedef
2178 struct _IRStmt {
2179 IRStmtTag tag;
2180 union {
2181 /* A no-op (usually resulting from IR optimisation). Can be
2182 omitted without any effect.
2183
2184 ppIRStmt output: IR-NoOp
2185 */
2186 struct {
2187 } NoOp;
2188
2189 /* META: instruction mark. Marks the start of the statements
2190 that represent a single machine instruction (the end of
2191 those statements is marked by the next IMark or the end of
2192 the IRSB). Contains the address and length of the
2193 instruction.
2194
2195 It also contains a delta value. The delta must be
2196 subtracted from a guest program counter value before
2197 attempting to establish, by comparison with the address
2198 and length values, whether or not that program counter
2199 value refers to this instruction. For x86, amd64, ppc32,
2200 ppc64 and arm, the delta value is zero. For Thumb
2201 instructions, the delta value is one. This is because, on
2202 Thumb, guest PC values (guest_R15T) are encoded using the
2203 top 31 bits of the instruction address and a 1 in the lsb;
2204 hence they appear to be (numerically) 1 past the start of
2205 the instruction they refer to. IOW, guest_R15T on ARM
2206 holds a standard ARM interworking address.
2207
2208 ppIRStmt output: ------ IMark(<addr>, <len>, <delta>) ------,
2209 eg. ------ IMark(0x4000792, 5, 0) ------,
2210 */
2211 struct {
2212 Addr64 addr; /* instruction address */
2213 Int len; /* instruction length */
2214 UChar delta; /* addr = program counter as encoded in guest state
2215 - delta */
2216 } IMark;
2217
2218 /* META: An ABI hint, which says something about this
2219 platform's ABI.
2220
2221 At the moment, the only AbiHint is one which indicates
2222 that a given chunk of address space, [base .. base+len-1],
2223 has become undefined. This is used on amd64-linux and
2224 some ppc variants to pass stack-redzoning hints to whoever
2225 wants to see them. It also indicates the address of the
2226 next (dynamic) instruction that will be executed. This is
2227 to help Memcheck to origin tracking.
2228
2229 ppIRStmt output: ====== AbiHint(<base>, <len>, <nia>) ======
2230 eg. ====== AbiHint(t1, 16, t2) ======
2231 */
2232 struct {
2233 IRExpr* base; /* Start of undefined chunk */
2234 Int len; /* Length of undefined chunk */
2235 IRExpr* nia; /* Address of next (guest) insn */
2236 } AbiHint;
2237
2238 /* Write a guest register, at a fixed offset in the guest state.
2239 ppIRStmt output: PUT(<offset>) = <data>, eg. PUT(60) = t1
2240 */
2241 struct {
2242 Int offset; /* Offset into the guest state */
2243 IRExpr* data; /* The value to write */
2244 } Put;
2245
2246 /* Write a guest register, at a non-fixed offset in the guest
2247 state. See the comment for GetI expressions for more
2248 information.
2249
2250 ppIRStmt output: PUTI<descr>[<ix>,<bias>] = <data>,
2251 eg. PUTI(64:8xF64)[t5,0] = t1
2252 */
2253 struct {
2254 IRPutI* details;
2255 } PutI;
2256
2257 /* Assign a value to a temporary. Note that SSA rules require
2258 each tmp is only assigned to once. IR sanity checking will
2259 reject any block containing a temporary which is not assigned
2260 to exactly once.
2261
2262 ppIRStmt output: t<tmp> = <data>, eg. t1 = 3
2263 */
2264 struct {
2265 IRTemp tmp; /* Temporary (LHS of assignment) */
2266 IRExpr* data; /* Expression (RHS of assignment) */
2267 } WrTmp;
2268
2269 /* Write a value to memory. This is a normal store, not a
2270 Store-Conditional. To represent a Store-Conditional,
2271 instead use IRStmt.LLSC.
2272 ppIRStmt output: ST<end>(<addr>) = <data>, eg. STle(t1) = t2
2273 */
2274 struct {
2275 IREndness end; /* Endianness of the store */
2276 IRExpr* addr; /* store address */
2277 IRExpr* data; /* value to write */
2278 } Store;
2279
2280 /* Do an atomic compare-and-swap operation. Semantics are
2281 described above on a comment at the definition of IRCAS.
2282
2283 ppIRStmt output:
2284 t<tmp> = CAS<end>(<addr> :: <expected> -> <new>)
2285 eg
2286 t1 = CASle(t2 :: t3->Add32(t3,1))
2287 which denotes a 32-bit atomic increment
2288 of a value at address t2
2289
2290 A double-element CAS may also be denoted, in which case <tmp>,
2291 <expected> and <new> are all pairs of items, separated by
2292 commas.
2293 */
2294 struct {
2295 IRCAS* details;
2296 } CAS;
2297
2298 /* Either Load-Linked or Store-Conditional, depending on
2299 STOREDATA.
2300
2301 If STOREDATA is NULL then this is a Load-Linked, meaning
2302 that data is loaded from memory as normal, but a
2303 'reservation' for the address is also lodged in the
2304 hardware.
2305
2306 result = Load-Linked(addr, end)
2307
2308 The data transfer type is the type of RESULT (I32, I64,
2309 etc). ppIRStmt output:
2310
2311 result = LD<end>-Linked(<addr>), eg. LDbe-Linked(t1)
2312
2313 If STOREDATA is not NULL then this is a Store-Conditional,
2314 hence:
2315
2316 result = Store-Conditional(addr, storedata, end)
2317
2318 The data transfer type is the type of STOREDATA and RESULT
2319 has type Ity_I1. The store may fail or succeed depending
2320 on the state of a previously lodged reservation on this
2321 address. RESULT is written 1 if the store succeeds and 0
2322 if it fails. eg ppIRStmt output:
2323
2324 result = ( ST<end>-Cond(<addr>) = <storedata> )
2325 eg t3 = ( STbe-Cond(t1, t2) )
2326
2327 In all cases, the address must be naturally aligned for
2328 the transfer type -- any misaligned addresses should be
2329 caught by a dominating IR check and side exit. This
2330 alignment restriction exists because on at least some
2331 LL/SC platforms (ppc), stwcx. etc will trap w/ SIGBUS on
2332 misaligned addresses, and we have to actually generate
2333 stwcx. on the host, and we don't want it trapping on the
2334 host.
2335
2336 Summary of rules for transfer type:
2337 STOREDATA == NULL (LL):
2338 transfer type = type of RESULT
2339 STOREDATA != NULL (SC):
2340 transfer type = type of STOREDATA, and RESULT :: Ity_I1
2341 */
2342 struct {
2343 IREndness end;
2344 IRTemp result;
2345 IRExpr* addr;
2346 IRExpr* storedata; /* NULL => LL, non-NULL => SC */
2347 } LLSC;
2348
2349 /* Call (possibly conditionally) a C function that has side
2350 effects (ie. is "dirty"). See the comments above the
2351 IRDirty type declaration for more information.
2352
2353 ppIRStmt output:
2354 t<tmp> = DIRTY <guard> <effects>
2355 ::: <callee>(<args>)
2356 eg.
2357 t1 = DIRTY t27 RdFX-gst(16,4) RdFX-gst(60,4)
2358 ::: foo{0x380035f4}(t2)
2359 */
2360 struct {
2361 IRDirty* details;
2362 } Dirty;
2363
2364 /* A memory bus event - a fence, or acquisition/release of the
2365 hardware bus lock. IR optimisation treats all these as fences
2366 across which no memory references may be moved.
2367 ppIRStmt output: MBusEvent-Fence,
2368 MBusEvent-BusLock, MBusEvent-BusUnlock.
2369 */
2370 struct {
2371 IRMBusEvent event;
2372 } MBE;
2373
2374 /* Conditional exit from the middle of an IRSB.
2375 ppIRStmt output: if (<guard>) goto {<jk>} <dst>
2376 eg. if (t69) goto {Boring} 0x4000AAA:I32
2377 If <guard> is true, the guest state is also updated by
2378 PUT-ing <dst> at <offsIP>. This is done because a
2379 taken exit must update the guest program counter.
2380 */
2381 struct {
2382 IRExpr* guard; /* Conditional expression */
2383 IRConst* dst; /* Jump target (constant only) */
2384 IRJumpKind jk; /* Jump kind */
2385 Int offsIP; /* Guest state offset for IP */
2386 } Exit;
2387 } Ist;
2388 }
2389 IRStmt;
2390
2391 /* Statement constructors. */
2392 extern IRStmt* IRStmt_NoOp ( void );
2393 extern IRStmt* IRStmt_IMark ( Addr64 addr, Int len, UChar delta );
2394 extern IRStmt* IRStmt_AbiHint ( IRExpr* base, Int len, IRExpr* nia );
2395 extern IRStmt* IRStmt_Put ( Int off, IRExpr* data );
2396 extern IRStmt* IRStmt_PutI ( IRPutI* details );
2397 extern IRStmt* IRStmt_WrTmp ( IRTemp tmp, IRExpr* data );
2398 extern IRStmt* IRStmt_Store ( IREndness end, IRExpr* addr, IRExpr* data );
2399 extern IRStmt* IRStmt_CAS ( IRCAS* details );
2400 extern IRStmt* IRStmt_LLSC ( IREndness end, IRTemp result,
2401 IRExpr* addr, IRExpr* storedata );
2402 extern IRStmt* IRStmt_Dirty ( IRDirty* details );
2403 extern IRStmt* IRStmt_MBE ( IRMBusEvent event );
2404 extern IRStmt* IRStmt_Exit ( IRExpr* guard, IRJumpKind jk, IRConst* dst,
2405 Int offsIP );
2406
2407 /* Deep-copy an IRStmt. */
2408 extern IRStmt* deepCopyIRStmt ( IRStmt* );
2409
2410 /* Pretty-print an IRStmt. */
2411 extern void ppIRStmt ( IRStmt* );
2412
2413
2414 /* ------------------ Basic Blocks ------------------ */
2415
2416 /* Type environments: a bunch of statements, expressions, etc, are
2417 incomplete without an environment indicating the type of each
2418 IRTemp. So this provides one. IR temporaries are really just
2419 unsigned ints and so this provides an array, 0 .. n_types_used-1 of
2420 them.
2421 */
2422 typedef
2423 struct {
2424 IRType* types;
2425 Int types_size;
2426 Int types_used;
2427 }
2428 IRTypeEnv;
2429
2430 /* Obtain a new IRTemp */
2431 extern IRTemp newIRTemp ( IRTypeEnv*, IRType );
2432
2433 /* Deep-copy a type environment */
2434 extern IRTypeEnv* deepCopyIRTypeEnv ( IRTypeEnv* );
2435
2436 /* Pretty-print a type environment */
2437 extern void ppIRTypeEnv ( IRTypeEnv* );
2438
2439
2440 /* Code blocks, which in proper compiler terminology are superblocks
2441 (single entry, multiple exit code sequences) contain:
2442
2443 - A table giving a type for each temp (the "type environment")
2444 - An expandable array of statements
2445 - An expression of type 32 or 64 bits, depending on the
2446 guest's word size, indicating the next destination if the block
2447 executes all the way to the end, without a side exit
2448 - An indication of any special actions (JumpKind) needed
2449 for this final jump.
2450 - Offset of the IP field in the guest state. This will be
2451 updated before the final jump is done.
2452
2453 "IRSB" stands for "IR Super Block".
2454 */
2455 typedef
2456 struct {
2457 IRTypeEnv* tyenv;
2458 IRStmt** stmts;
2459 Int stmts_size;
2460 Int stmts_used;
2461 IRExpr* next;
2462 IRJumpKind jumpkind;
2463 Int offsIP;
2464 }
2465 IRSB;
2466
2467 /* Allocate a new, uninitialised IRSB */
2468 extern IRSB* emptyIRSB ( void );
2469
2470 /* Deep-copy an IRSB */
2471 extern IRSB* deepCopyIRSB ( IRSB* );
2472
2473 /* Deep-copy an IRSB, except for the statements list, which set to be
2474 a new, empty, list of statements. */
2475 extern IRSB* deepCopyIRSBExceptStmts ( IRSB* );
2476
2477 /* Pretty-print an IRSB */
2478 extern void ppIRSB ( IRSB* );
2479
2480 /* Append an IRStmt to an IRSB */
2481 extern void addStmtToIRSB ( IRSB*, IRStmt* );
2482
2483
2484 /*---------------------------------------------------------------*/
2485 /*--- Helper functions for the IR ---*/
2486 /*---------------------------------------------------------------*/
2487
2488 /* For messing with IR type environments */
2489 extern IRTypeEnv* emptyIRTypeEnv ( void );
2490
2491 /* What is the type of this expression? */
2492 extern IRType typeOfIRConst ( IRConst* );
2493 extern IRType typeOfIRTemp ( IRTypeEnv*, IRTemp );
2494 extern IRType typeOfIRExpr ( IRTypeEnv*, IRExpr* );
2495
2496 /* Sanity check a BB of IR */
2497 extern void sanityCheckIRSB ( IRSB* bb,
2498 HChar* caller,
2499 Bool require_flatness,
2500 IRType guest_word_size );
2501 extern Bool isFlatIRStmt ( IRStmt* );
2502
2503 /* Is this any value actually in the enumeration 'IRType' ? */
2504 extern Bool isPlausibleIRType ( IRType ty );
2505
2506 #endif /* ndef __LIBVEX_IR_H */
2507
2508
2509 /*---------------------------------------------------------------*/
2510 /*--- libvex_ir.h ---*/
2511 /*---------------------------------------------------------------*/
2512