1
2 /*---------------------------------------------------------------*/
3 /*--- begin libvex_ir.h ---*/
4 /*---------------------------------------------------------------*/
5
6 /*
7 This file is part of Valgrind, a dynamic binary instrumentation
8 framework.
9
10 Copyright (C) 2004-2011 OpenWorks LLP
11 info@open-works.net
12
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
17
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
26 02110-1301, USA.
27
28 The GNU General Public License is contained in the file COPYING.
29
30 Neither the names of the U.S. Department of Energy nor the
31 University of California nor the names of its contributors may be
32 used to endorse or promote products derived from this software
33 without prior written permission.
34 */
35
36 #ifndef __LIBVEX_IR_H
37 #define __LIBVEX_IR_H
38
39 #include "libvex_basictypes.h"
40
41
42 /*---------------------------------------------------------------*/
43 /*--- High-level IR description ---*/
44 /*---------------------------------------------------------------*/
45
46 /* Vex IR is an architecture-neutral intermediate representation.
47 Unlike some IRs in systems similar to Vex, it is not like assembly
48 language (ie. a list of instructions). Rather, it is more like the
49 IR that might be used in a compiler.
50
51 Code blocks
52 ~~~~~~~~~~~
53 The code is broken into small code blocks ("superblocks", type:
54 'IRSB'). Each code block typically represents from 1 to perhaps 50
55 instructions. IRSBs are single-entry, multiple-exit code blocks.
56 Each IRSB contains three things:
57 - a type environment, which indicates the type of each temporary
58 value present in the IRSB
59 - a list of statements, which represent code
60 - a jump that exits from the end the IRSB
61 Because the blocks are multiple-exit, there can be additional
62 conditional exit statements that cause control to leave the IRSB
63 before the final exit. Also because of this, IRSBs can cover
64 multiple non-consecutive sequences of code (up to 3). These are
65 recorded in the type VexGuestExtents (see libvex.h).
66
67 Statements and expressions
68 ~~~~~~~~~~~~~~~~~~~~~~~~~~
69 Statements (type 'IRStmt') represent operations with side-effects,
70 eg. guest register writes, stores, and assignments to temporaries.
71 Expressions (type 'IRExpr') represent operations without
72 side-effects, eg. arithmetic operations, loads, constants.
73 Expressions can contain sub-expressions, forming expression trees,
74 eg. (3 + (4 * load(addr1)).
75
76 Storage of guest state
77 ~~~~~~~~~~~~~~~~~~~~~~
78 The "guest state" contains the guest registers of the guest machine
79 (ie. the machine that we are simulating). It is stored by default
80 in a block of memory supplied by the user of the VEX library,
81 generally referred to as the guest state (area). To operate on
82 these registers, one must first read ("Get") them from the guest
83 state into a temporary value. Afterwards, one can write ("Put")
84 them back into the guest state.
85
86 Get and Put are characterised by a byte offset into the guest
87 state, a small integer which effectively gives the identity of the
88 referenced guest register, and a type, which indicates the size of
89 the value to be transferred.
90
91 The basic "Get" and "Put" operations are sufficient to model normal
92 fixed registers on the guest. Selected areas of the guest state
93 can be treated as a circular array of registers (type:
94 'IRRegArray'), which can be indexed at run-time. This is done with
95 the "GetI" and "PutI" primitives. This is necessary to describe
96 rotating register files, for example the x87 FPU stack, SPARC
97 register windows, and the Itanium register files.
98
99 Examples, and flattened vs. unflattened code
100 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
101 For example, consider this x86 instruction:
102
103 addl %eax, %ebx
104
105 One Vex IR translation for this code would be this:
106
107 ------ IMark(0x24F275, 7, 0) ------
108 t3 = GET:I32(0) # get %eax, a 32-bit integer
109 t2 = GET:I32(12) # get %ebx, a 32-bit integer
110 t1 = Add32(t3,t2) # addl
111 PUT(0) = t1 # put %eax
112
113 (For simplicity, this ignores the effects on the condition codes, and
114 the update of the instruction pointer.)
115
116 The "IMark" is an IR statement that doesn't represent actual code.
117 Instead it indicates the address and length of the original
118 instruction. The numbers 0 and 12 are offsets into the guest state
119 for %eax and %ebx. The full list of offsets for an architecture
120 <ARCH> can be found in the type VexGuest<ARCH>State in the file
121 VEX/pub/libvex_guest_<ARCH>.h.
122
123 The five statements in this example are:
124 - the IMark
125 - three assignments to temporaries
126 - one register write (put)
127
128 The six expressions in this example are:
129 - two register reads (gets)
130 - one arithmetic (add) operation
131 - three temporaries (two nested within the Add32, one in the PUT)
132
133 The above IR is "flattened", ie. all sub-expressions are "atoms",
134 either constants or temporaries. An equivalent, unflattened version
135 would be:
136
137 PUT(0) = Add32(GET:I32(0), GET:I32(12))
138
139 IR is guaranteed to be flattened at instrumentation-time. This makes
140 instrumentation easier. Equivalent flattened and unflattened IR
141 typically results in the same generated code.
142
143 Another example, this one showing loads and stores:
144
145 addl %edx,4(%eax)
146
147 This becomes (again ignoring condition code and instruction pointer
148 updates):
149
150 ------ IMark(0x4000ABA, 3, 0) ------
151 t3 = Add32(GET:I32(0),0x4:I32)
152 t2 = LDle:I32(t3)
153 t1 = GET:I32(8)
154 t0 = Add32(t2,t1)
155 STle(t3) = t0
156
157 The "le" in "LDle" and "STle" is short for "little-endian".
158
159 No need for deallocations
160 ~~~~~~~~~~~~~~~~~~~~~~~~~
161 Although there are allocation functions for various data structures
162 in this file, there are no deallocation functions. This is because
163 Vex uses a memory allocation scheme that automatically reclaims the
164 memory used by allocated structures once translation is completed.
165 This makes things easier for tools that instruments/transforms code
166 blocks.
167
168 SSAness and typing
169 ~~~~~~~~~~~~~~~~~~
170 The IR is fully typed. For every IRSB (IR block) it is possible to
171 say unambiguously whether or not it is correctly typed.
172 Incorrectly typed IR has no meaning and the VEX will refuse to
173 process it. At various points during processing VEX typechecks the
174 IR and aborts if any violations are found. This seems overkill but
175 makes it a great deal easier to build a reliable JIT.
176
177 IR also has the SSA property. SSA stands for Static Single
178 Assignment, and what it means is that each IR temporary may be
179 assigned to only once. This idea became widely used in compiler
180 construction in the mid to late 90s. It makes many IR-level
181 transformations/code improvements easier, simpler and faster.
182 Whenever it typechecks an IR block, VEX also checks the SSA
183 property holds, and will abort if not so. So SSAness is
184 mechanically and rigidly enforced.
185 */
186
187 /*---------------------------------------------------------------*/
188 /*--- Type definitions for the IR ---*/
189 /*---------------------------------------------------------------*/
190
191 /* General comments about naming schemes:
192
193 All publically visible functions contain the name of the primary
194 type on which they operate (IRFoo, IRBar, etc). Hence you should
195 be able to identify these functions by grepping for "IR[A-Z]".
196
197 For some type 'IRFoo':
198
199 - ppIRFoo is the printing method for IRFoo, printing it to the
200 output channel specified in the LibVEX_Initialise call.
201
202 - eqIRFoo is a structural equality predicate for IRFoos.
203
204 - deepCopyIRFoo is a deep copy constructor for IRFoos.
205 It recursively traverses the entire argument tree and
206 produces a complete new tree. All types have a deep copy
207 constructor.
208
209 - shallowCopyIRFoo is the shallow copy constructor for IRFoos.
210 It creates a new top-level copy of the supplied object,
211 but does not copy any sub-objects. Only some types have a
212 shallow copy constructor.
213 */
214
215 /* ------------------ Types ------------------ */
216
217 /* A type indicates the size of a value, and whether it's an integer, a
218 float, or a vector (SIMD) value. */
219 typedef
220 enum {
221 Ity_INVALID=0x11000,
222 Ity_I1,
223 Ity_I8,
224 Ity_I16,
225 Ity_I32,
226 Ity_I64,
227 Ity_I128, /* 128-bit scalar */
228 Ity_F32, /* IEEE 754 float */
229 Ity_F64, /* IEEE 754 double */
230 Ity_F128, /* 128-bit floating point; implementation defined */
231 Ity_V128 /* 128-bit SIMD */
232 }
233 IRType;
234
235 /* Pretty-print an IRType */
236 extern void ppIRType ( IRType );
237
238 /* Get the size (in bytes) of an IRType */
239 extern Int sizeofIRType ( IRType );
240
241
242 /* ------------------ Endianness ------------------ */
243
244 /* IREndness is used in load IRExprs and store IRStmts. */
245 typedef
246 enum {
247 Iend_LE=0x12000, /* little endian */
248 Iend_BE /* big endian */
249 }
250 IREndness;
251
252
253 /* ------------------ Constants ------------------ */
254
255 /* IRConsts are used within 'Const' and 'Exit' IRExprs. */
256
257 /* The various kinds of constant. */
258 typedef
259 enum {
260 Ico_U1=0x13000,
261 Ico_U8,
262 Ico_U16,
263 Ico_U32,
264 Ico_U64,
265 Ico_F32, /* 32-bit IEEE754 floating */
266 Ico_F32i, /* 32-bit unsigned int to be interpreted literally
267 as a IEEE754 single value. */
268 Ico_F64, /* 64-bit IEEE754 floating */
269 Ico_F64i, /* 64-bit unsigned int to be interpreted literally
270 as a IEEE754 double value. */
271 Ico_V128 /* 128-bit restricted vector constant, with 1 bit
272 (repeated 8 times) for each of the 16 x 1-byte lanes */
273 }
274 IRConstTag;
275
276 /* A constant. Stored as a tagged union. 'tag' indicates what kind of
277 constant this is. 'Ico' is the union that holds the fields. If an
278 IRConst 'c' has c.tag equal to Ico_U32, then it's a 32-bit constant,
279 and its value can be accessed with 'c.Ico.U32'. */
280 typedef
281 struct _IRConst {
282 IRConstTag tag;
283 union {
284 Bool U1;
285 UChar U8;
286 UShort U16;
287 UInt U32;
288 ULong U64;
289 Float F32;
290 UInt F32i;
291 Double F64;
292 ULong F64i;
293 UShort V128; /* 16-bit value; see Ico_V128 comment above */
294 } Ico;
295 }
296 IRConst;
297
298 /* IRConst constructors */
299 extern IRConst* IRConst_U1 ( Bool );
300 extern IRConst* IRConst_U8 ( UChar );
301 extern IRConst* IRConst_U16 ( UShort );
302 extern IRConst* IRConst_U32 ( UInt );
303 extern IRConst* IRConst_U64 ( ULong );
304 extern IRConst* IRConst_F32 ( Float );
305 extern IRConst* IRConst_F32i ( UInt );
306 extern IRConst* IRConst_F64 ( Double );
307 extern IRConst* IRConst_F64i ( ULong );
308 extern IRConst* IRConst_V128 ( UShort );
309
310 /* Deep-copy an IRConst */
311 extern IRConst* deepCopyIRConst ( IRConst* );
312
313 /* Pretty-print an IRConst */
314 extern void ppIRConst ( IRConst* );
315
316 /* Compare two IRConsts for equality */
317 extern Bool eqIRConst ( IRConst*, IRConst* );
318
319
320 /* ------------------ Call targets ------------------ */
321
322 /* Describes a helper function to call. The name part is purely for
323 pretty printing and not actually used. regparms=n tells the back
324 end that the callee has been declared
325 "__attribute__((regparm(n)))", although indirectly using the
326 VEX_REGPARM(n) macro. On some targets (x86) the back end will need
327 to construct a non-standard sequence to call a function declared
328 like this.
329
330 mcx_mask is a sop to Memcheck. It indicates which args should be
331 considered 'always defined' when lazily computing definedness of
332 the result. Bit 0 of mcx_mask corresponds to args[0], bit 1 to
333 args[1], etc. If a bit is set, the corresponding arg is excluded
334 (hence "x" in "mcx") from definedness checking.
335 */
336
337 typedef
338 struct {
339 Int regparms;
340 HChar* name;
341 void* addr;
342 UInt mcx_mask;
343 }
344 IRCallee;
345
346 /* Create an IRCallee. */
347 extern IRCallee* mkIRCallee ( Int regparms, HChar* name, void* addr );
348
349 /* Deep-copy an IRCallee. */
350 extern IRCallee* deepCopyIRCallee ( IRCallee* );
351
352 /* Pretty-print an IRCallee. */
353 extern void ppIRCallee ( IRCallee* );
354
355
356 /* ------------------ Guest state arrays ------------------ */
357
358 /* This describes a section of the guest state that we want to
359 be able to index at run time, so as to be able to describe
360 indexed or rotating register files on the guest. */
361 typedef
362 struct {
363 Int base; /* guest state offset of start of indexed area */
364 IRType elemTy; /* type of each element in the indexed area */
365 Int nElems; /* number of elements in the indexed area */
366 }
367 IRRegArray;
368
369 extern IRRegArray* mkIRRegArray ( Int, IRType, Int );
370
371 extern IRRegArray* deepCopyIRRegArray ( IRRegArray* );
372
373 extern void ppIRRegArray ( IRRegArray* );
374 extern Bool eqIRRegArray ( IRRegArray*, IRRegArray* );
375
376
377 /* ------------------ Temporaries ------------------ */
378
379 /* This represents a temporary, eg. t1. The IR optimiser relies on the
380 fact that IRTemps are 32-bit ints. Do not change them to be ints of
381 any other size. */
382 typedef UInt IRTemp;
383
384 /* Pretty-print an IRTemp. */
385 extern void ppIRTemp ( IRTemp );
386
387 #define IRTemp_INVALID ((IRTemp)0xFFFFFFFF)
388
389
390 /* --------------- Primops (arity 1,2,3 and 4) --------------- */
391
392 /* Primitive operations that are used in Unop, Binop, Triop and Qop
393 IRExprs. Once we take into account integer, floating point and SIMD
394 operations of all the different sizes, there are quite a lot of them.
395 Most instructions supported by the architectures that Vex supports
396 (x86, PPC, etc) are represented. Some more obscure ones (eg. cpuid)
397 are not; they are instead handled with dirty helpers that emulate
398 their functionality. Such obscure ones are thus not directly visible
399 in the IR, but their effects on guest state (memory and registers)
400 are made visible via the annotations in IRDirty structures.
401 */
402 typedef
403 enum {
404 /* -- Do not change this ordering. The IR generators rely on
405 (eg) Iop_Add64 == IopAdd8 + 3. -- */
406
407 Iop_INVALID=0x14000,
408 Iop_Add8, Iop_Add16, Iop_Add32, Iop_Add64,
409 Iop_Sub8, Iop_Sub16, Iop_Sub32, Iop_Sub64,
410 /* Signless mul. MullS/MullU is elsewhere. */
411 Iop_Mul8, Iop_Mul16, Iop_Mul32, Iop_Mul64,
412 Iop_Or8, Iop_Or16, Iop_Or32, Iop_Or64,
413 Iop_And8, Iop_And16, Iop_And32, Iop_And64,
414 Iop_Xor8, Iop_Xor16, Iop_Xor32, Iop_Xor64,
415 Iop_Shl8, Iop_Shl16, Iop_Shl32, Iop_Shl64,
416 Iop_Shr8, Iop_Shr16, Iop_Shr32, Iop_Shr64,
417 Iop_Sar8, Iop_Sar16, Iop_Sar32, Iop_Sar64,
418 /* Integer comparisons. */
419 Iop_CmpEQ8, Iop_CmpEQ16, Iop_CmpEQ32, Iop_CmpEQ64,
420 Iop_CmpNE8, Iop_CmpNE16, Iop_CmpNE32, Iop_CmpNE64,
421 /* Tags for unary ops */
422 Iop_Not8, Iop_Not16, Iop_Not32, Iop_Not64,
423
424 /* Exactly like CmpEQ8/16/32/64, but carrying the additional
425 hint that these compute the success/failure of a CAS
426 operation, and hence are almost certainly applied to two
427 copies of the same value, which in turn has implications for
428 Memcheck's instrumentation. */
429 Iop_CasCmpEQ8, Iop_CasCmpEQ16, Iop_CasCmpEQ32, Iop_CasCmpEQ64,
430 Iop_CasCmpNE8, Iop_CasCmpNE16, Iop_CasCmpNE32, Iop_CasCmpNE64,
431
432 /* -- Ordering not important after here. -- */
433
434 /* Widening multiplies */
435 Iop_MullS8, Iop_MullS16, Iop_MullS32, Iop_MullS64,
436 Iop_MullU8, Iop_MullU16, Iop_MullU32, Iop_MullU64,
437
438 /* Wierdo integer stuff */
439 Iop_Clz64, Iop_Clz32, /* count leading zeroes */
440 Iop_Ctz64, Iop_Ctz32, /* count trailing zeros */
441 /* Ctz64/Ctz32/Clz64/Clz32 are UNDEFINED when given arguments of
442 zero. You must ensure they are never given a zero argument.
443 */
444
445 /* Standard integer comparisons */
446 Iop_CmpLT32S, Iop_CmpLT64S,
447 Iop_CmpLE32S, Iop_CmpLE64S,
448 Iop_CmpLT32U, Iop_CmpLT64U,
449 Iop_CmpLE32U, Iop_CmpLE64U,
450
451 /* As a sop to Valgrind-Memcheck, the following are useful. */
452 Iop_CmpNEZ8, Iop_CmpNEZ16, Iop_CmpNEZ32, Iop_CmpNEZ64,
453 Iop_CmpwNEZ32, Iop_CmpwNEZ64, /* all-0s -> all-Os; other -> all-1s */
454 Iop_Left8, Iop_Left16, Iop_Left32, Iop_Left64, /* \x -> x | -x */
455 Iop_Max32U, /* unsigned max */
456
457 /* PowerPC-style 3-way integer comparisons. Without them it is
458 difficult to simulate PPC efficiently.
459 op(x,y) | x < y = 0x8 else
460 | x > y = 0x4 else
461 | x == y = 0x2
462 */
463 Iop_CmpORD32U, Iop_CmpORD64U,
464 Iop_CmpORD32S, Iop_CmpORD64S,
465
466 /* Division */
467 /* TODO: clarify semantics wrt rounding, negative values, whatever */
468 Iop_DivU32, // :: I32,I32 -> I32 (simple div, no mod)
469 Iop_DivS32, // ditto, signed
470 Iop_DivU64, // :: I64,I64 -> I64 (simple div, no mod)
471 Iop_DivS64, // ditto, signed
472 Iop_DivU64E, // :: I64,I64 -> I64 (dividend is 64-bit arg (hi) concat with 64 0's (low))
473 Iop_DivS64E, // ditto, signed
474 Iop_DivU32E, // :: I32,I32 -> I32 (dividend is 32-bit arg (hi) concat with 32 0's (low))
475 Iop_DivS32E, // ditto, signed
476
477 Iop_DivModU64to32, // :: I64,I32 -> I64
478 // of which lo half is div and hi half is mod
479 Iop_DivModS64to32, // ditto, signed
480
481 Iop_DivModU128to64, // :: V128,I64 -> V128
482 // of which lo half is div and hi half is mod
483 Iop_DivModS128to64, // ditto, signed
484
485 Iop_DivModS64to64, // :: I64,I64 -> I128
486 // of which lo half is div and hi half is mod
487
488 /* Integer conversions. Some of these are redundant (eg
489 Iop_64to8 is the same as Iop_64to32 and then Iop_32to8), but
490 having a complete set reduces the typical dynamic size of IR
491 and makes the instruction selectors easier to write. */
492
493 /* Widening conversions */
494 Iop_8Uto16, Iop_8Uto32, Iop_8Uto64,
495 Iop_16Uto32, Iop_16Uto64,
496 Iop_32Uto64,
497 Iop_8Sto16, Iop_8Sto32, Iop_8Sto64,
498 Iop_16Sto32, Iop_16Sto64,
499 Iop_32Sto64,
500
501 /* Narrowing conversions */
502 Iop_64to8, Iop_32to8, Iop_64to16,
503 /* 8 <-> 16 bit conversions */
504 Iop_16to8, // :: I16 -> I8, low half
505 Iop_16HIto8, // :: I16 -> I8, high half
506 Iop_8HLto16, // :: (I8,I8) -> I16
507 /* 16 <-> 32 bit conversions */
508 Iop_32to16, // :: I32 -> I16, low half
509 Iop_32HIto16, // :: I32 -> I16, high half
510 Iop_16HLto32, // :: (I16,I16) -> I32
511 /* 32 <-> 64 bit conversions */
512 Iop_64to32, // :: I64 -> I32, low half
513 Iop_64HIto32, // :: I64 -> I32, high half
514 Iop_32HLto64, // :: (I32,I32) -> I64
515 /* 64 <-> 128 bit conversions */
516 Iop_128to64, // :: I128 -> I64, low half
517 Iop_128HIto64, // :: I128 -> I64, high half
518 Iop_64HLto128, // :: (I64,I64) -> I128
519 /* 1-bit stuff */
520 Iop_Not1, /* :: Ity_Bit -> Ity_Bit */
521 Iop_32to1, /* :: Ity_I32 -> Ity_Bit, just select bit[0] */
522 Iop_64to1, /* :: Ity_I64 -> Ity_Bit, just select bit[0] */
523 Iop_1Uto8, /* :: Ity_Bit -> Ity_I8, unsigned widen */
524 Iop_1Uto32, /* :: Ity_Bit -> Ity_I32, unsigned widen */
525 Iop_1Uto64, /* :: Ity_Bit -> Ity_I64, unsigned widen */
526 Iop_1Sto8, /* :: Ity_Bit -> Ity_I8, signed widen */
527 Iop_1Sto16, /* :: Ity_Bit -> Ity_I16, signed widen */
528 Iop_1Sto32, /* :: Ity_Bit -> Ity_I32, signed widen */
529 Iop_1Sto64, /* :: Ity_Bit -> Ity_I64, signed widen */
530
531 /* ------ Floating point. We try to be IEEE754 compliant. ------ */
532
533 /* --- Simple stuff as mandated by 754. --- */
534
535 /* Binary operations, with rounding. */
536 /* :: IRRoundingMode(I32) x F64 x F64 -> F64 */
537 Iop_AddF64, Iop_SubF64, Iop_MulF64, Iop_DivF64,
538
539 /* :: IRRoundingMode(I32) x F32 x F32 -> F32 */
540 Iop_AddF32, Iop_SubF32, Iop_MulF32, Iop_DivF32,
541
542 /* Variants of the above which produce a 64-bit result but which
543 round their result to a IEEE float range first. */
544 /* :: IRRoundingMode(I32) x F64 x F64 -> F64 */
545 Iop_AddF64r32, Iop_SubF64r32, Iop_MulF64r32, Iop_DivF64r32,
546
547 /* Unary operations, without rounding. */
548 /* :: F64 -> F64 */
549 Iop_NegF64, Iop_AbsF64,
550
551 /* :: F32 -> F32 */
552 Iop_NegF32, Iop_AbsF32,
553
554 /* Unary operations, with rounding. */
555 /* :: IRRoundingMode(I32) x F64 -> F64 */
556 Iop_SqrtF64, Iop_SqrtF64r32,
557
558 /* :: IRRoundingMode(I32) x F32 -> F32 */
559 Iop_SqrtF32,
560
561 /* Comparison, yielding GT/LT/EQ/UN(ordered), as per the following:
562 0x45 Unordered
563 0x01 LT
564 0x00 GT
565 0x40 EQ
566 This just happens to be the Intel encoding. The values
567 are recorded in the type IRCmpF64Result.
568 */
569 /* :: F64 x F64 -> IRCmpF64Result(I32) */
570 Iop_CmpF64,
571 Iop_CmpF32,
572 Iop_CmpF128,
573
574 /* --- Int to/from FP conversions. --- */
575
576 /* For the most part, these take a first argument :: Ity_I32 (as
577 IRRoundingMode) which is an indication of the rounding mode
578 to use, as per the following encoding ("the standard
579 encoding"):
580 00b to nearest (the default)
581 01b to -infinity
582 10b to +infinity
583 11b to zero
584 This just happens to be the Intel encoding. For reference only,
585 the PPC encoding is:
586 00b to nearest (the default)
587 01b to zero
588 10b to +infinity
589 11b to -infinity
590 Any PPC -> IR front end will have to translate these PPC
591 encodings, as encoded in the guest state, to the standard
592 encodings, to pass to the primops.
593 For reference only, the ARM VFP encoding is:
594 00b to nearest
595 01b to +infinity
596 10b to -infinity
597 11b to zero
598 Again, this will have to be converted to the standard encoding
599 to pass to primops.
600
601 If one of these conversions gets an out-of-range condition,
602 or a NaN, as an argument, the result is host-defined. On x86
603 the "integer indefinite" value 0x80..00 is produced. On PPC
604 it is either 0x80..00 or 0x7F..FF depending on the sign of
605 the argument.
606
607 On ARMvfp, when converting to a signed integer result, the
608 overflow result is 0x80..00 for negative args and 0x7F..FF
609 for positive args. For unsigned integer results it is
610 0x00..00 and 0xFF..FF respectively.
611
612 Rounding is required whenever the destination type cannot
613 represent exactly all values of the source type.
614 */
615 Iop_F64toI16S, /* IRRoundingMode(I32) x F64 -> signed I16 */
616 Iop_F64toI32S, /* IRRoundingMode(I32) x F64 -> signed I32 */
617 Iop_F64toI64S, /* IRRoundingMode(I32) x F64 -> signed I64 */
618 Iop_F64toI64U, /* IRRoundingMode(I32) x F64 -> unsigned I64 */
619
620 Iop_F64toI32U, /* IRRoundingMode(I32) x F64 -> unsigned I32 */
621
622 Iop_I16StoF64, /* signed I16 -> F64 */
623 Iop_I32StoF64, /* signed I32 -> F64 */
624 Iop_I64StoF64, /* IRRoundingMode(I32) x signed I64 -> F64 */
625 Iop_I64UtoF64, /* IRRoundingMode(I32) x unsigned I64 -> F64 */
626 Iop_I64UtoF32, /* IRRoundingMode(I32) x unsigned I64 -> F32 */
627
628 Iop_I32UtoF64, /* unsigned I32 -> F64 */
629
630 Iop_F32toI16S, /* IRRoundingMode(I32) x F32 -> signed I16 */
631 Iop_F32toI32S, /* IRRoundingMode(I32) x F32 -> signed I32 */
632 Iop_F32toI64S, /* IRRoundingMode(I32) x F32 -> signed I64 */
633
634 Iop_I16StoF32, /* signed I16 -> F32 */
635 Iop_I32StoF32, /* IRRoundingMode(I32) x signed I32 -> F32 */
636 Iop_I64StoF32, /* IRRoundingMode(I32) x signed I64 -> F32 */
637
638 /* Conversion between floating point formats */
639 Iop_F32toF64, /* F32 -> F64 */
640 Iop_F64toF32, /* IRRoundingMode(I32) x F64 -> F32 */
641
642 /* Reinterpretation. Take an F64 and produce an I64 with
643 the same bit pattern, or vice versa. */
644 Iop_ReinterpF64asI64, Iop_ReinterpI64asF64,
645 Iop_ReinterpF32asI32, Iop_ReinterpI32asF32,
646
647 /* Support for 128-bit floating point */
648 Iop_F64HLtoF128,/* (high half of F128,low half of F128) -> F128 */
649 Iop_F128HItoF64,/* F128 -> high half of F128 into a F64 register */
650 Iop_F128LOtoF64,/* F128 -> low half of F128 into a F64 register */
651
652 /* :: IRRoundingMode(I32) x F128 x F128 -> F128 */
653 Iop_AddF128, Iop_SubF128, Iop_MulF128, Iop_DivF128,
654
655 /* :: F128 -> F128 */
656 Iop_NegF128, Iop_AbsF128,
657
658 /* :: IRRoundingMode(I32) x F128 -> F128 */
659 Iop_SqrtF128,
660
661 Iop_I32StoF128, /* signed I32 -> F128 */
662 Iop_I64StoF128, /* signed I64 -> F128 */
663 Iop_F32toF128, /* F32 -> F128 */
664 Iop_F64toF128, /* F64 -> F128 */
665
666 Iop_F128toI32S, /* IRRoundingMode(I32) x F128 -> signed I32 */
667 Iop_F128toI64S, /* IRRoundingMode(I32) x F128 -> signed I64 */
668 Iop_F128toF64, /* IRRoundingMode(I32) x F128 -> F64 */
669 Iop_F128toF32, /* IRRoundingMode(I32) x F128 -> F32 */
670
671 /* --- guest x86/amd64 specifics, not mandated by 754. --- */
672
673 /* Binary ops, with rounding. */
674 /* :: IRRoundingMode(I32) x F64 x F64 -> F64 */
675 Iop_AtanF64, /* FPATAN, arctan(arg1/arg2) */
676 Iop_Yl2xF64, /* FYL2X, arg1 * log2(arg2) */
677 Iop_Yl2xp1F64, /* FYL2XP1, arg1 * log2(arg2+1.0) */
678 Iop_PRemF64, /* FPREM, non-IEEE remainder(arg1/arg2) */
679 Iop_PRemC3210F64, /* C3210 flags resulting from FPREM, :: I32 */
680 Iop_PRem1F64, /* FPREM1, IEEE remainder(arg1/arg2) */
681 Iop_PRem1C3210F64, /* C3210 flags resulting from FPREM1, :: I32 */
682 Iop_ScaleF64, /* FSCALE, arg1 * (2^RoundTowardsZero(arg2)) */
683 /* Note that on x86 guest, PRem1{C3210} has the same behaviour
684 as the IEEE mandated RemF64, except it is limited in the
685 range of its operand. Hence the partialness. */
686
687 /* Unary ops, with rounding. */
688 /* :: IRRoundingMode(I32) x F64 -> F64 */
689 Iop_SinF64, /* FSIN */
690 Iop_CosF64, /* FCOS */
691 Iop_TanF64, /* FTAN */
692 Iop_2xm1F64, /* (2^arg - 1.0) */
693 Iop_RoundF64toInt, /* F64 value to nearest integral value (still
694 as F64) */
695 Iop_RoundF32toInt, /* F32 value to nearest integral value (still
696 as F32) */
697
698 /* --- guest s390 specifics, not mandated by 754. --- */
699
700 /* Fused multiply-add/sub */
701 /* :: IRRoundingMode(I32) x F32 x F32 x F32 -> F32
702 (computes op3 * op2 +/- op1 */
703 Iop_MAddF32, Iop_MSubF32,
704
705 /* --- guest ppc32/64 specifics, not mandated by 754. --- */
706
707 /* Ternary operations, with rounding. */
708 /* Fused multiply-add/sub, with 112-bit intermediate
709 precision for ppc.
710 Also used to implement fused multiply-add/sub for s390. */
711 /* :: IRRoundingMode(I32) x F64 x F64 x F64 -> F64
712 (computes arg2 * arg3 +/- arg4) */
713 Iop_MAddF64, Iop_MSubF64,
714
715 /* Variants of the above which produce a 64-bit result but which
716 round their result to a IEEE float range first. */
717 /* :: IRRoundingMode(I32) x F64 x F64 x F64 -> F64 */
718 Iop_MAddF64r32, Iop_MSubF64r32,
719
720 /* :: F64 -> F64 */
721 Iop_Est5FRSqrt, /* reciprocal square root estimate, 5 good bits */
722 Iop_RoundF64toF64_NEAREST, /* frin */
723 Iop_RoundF64toF64_NegINF, /* frim */
724 Iop_RoundF64toF64_PosINF, /* frip */
725 Iop_RoundF64toF64_ZERO, /* friz */
726
727 /* :: F64 -> F32 */
728 Iop_TruncF64asF32, /* do F64->F32 truncation as per 'fsts' */
729
730 /* :: IRRoundingMode(I32) x F64 -> F64 */
731 Iop_RoundF64toF32, /* round F64 to nearest F32 value (still as F64) */
732 /* NB: pretty much the same as Iop_F64toF32, except no change
733 of type. */
734
735 /* :: F64 -> I32 */
736 Iop_CalcFPRF, /* Calc 5 fpscr[FPRF] bits (Class, <, =, >, Unord)
737 from FP result */
738
739 /* ------------------ 32-bit SIMD Integer ------------------ */
740
741 /* 16x2 add/sub, also signed/unsigned saturating variants */
742 Iop_Add16x2, Iop_Sub16x2,
743 Iop_QAdd16Sx2, Iop_QAdd16Ux2,
744 Iop_QSub16Sx2, Iop_QSub16Ux2,
745
746 /* 16x2 signed/unsigned halving add/sub. For each lane, these
747 compute bits 16:1 of (eg) sx(argL) + sx(argR),
748 or zx(argL) - zx(argR) etc. */
749 Iop_HAdd16Ux2, Iop_HAdd16Sx2,
750 Iop_HSub16Ux2, Iop_HSub16Sx2,
751
752 /* 8x4 add/sub, also signed/unsigned saturating variants */
753 Iop_Add8x4, Iop_Sub8x4,
754 Iop_QAdd8Sx4, Iop_QAdd8Ux4,
755 Iop_QSub8Sx4, Iop_QSub8Ux4,
756
757 /* 8x4 signed/unsigned halving add/sub. For each lane, these
758 compute bits 8:1 of (eg) sx(argL) + sx(argR),
759 or zx(argL) - zx(argR) etc. */
760 Iop_HAdd8Ux4, Iop_HAdd8Sx4,
761 Iop_HSub8Ux4, Iop_HSub8Sx4,
762
763 /* 8x4 sum of absolute unsigned differences. */
764 Iop_Sad8Ux4,
765
766 /* MISC (vector integer cmp != 0) */
767 Iop_CmpNEZ16x2, Iop_CmpNEZ8x4,
768
769 /* ------------------ 64-bit SIMD FP ------------------------ */
770
771 /* Convertion to/from int */
772 Iop_I32UtoFx2, Iop_I32StoFx2, /* I32x4 -> F32x4 */
773 Iop_FtoI32Ux2_RZ, Iop_FtoI32Sx2_RZ, /* F32x4 -> I32x4 */
774 /* Fixed32 format is floating-point number with fixed number of fraction
775 bits. The number of fraction bits is passed as a second argument of
776 type I8. */
777 Iop_F32ToFixed32Ux2_RZ, Iop_F32ToFixed32Sx2_RZ, /* fp -> fixed-point */
778 Iop_Fixed32UToF32x2_RN, Iop_Fixed32SToF32x2_RN, /* fixed-point -> fp */
779
780 /* Binary operations */
781 Iop_Max32Fx2, Iop_Min32Fx2,
782 /* Pairwise Min and Max. See integer pairwise operations for more
783 details. */
784 Iop_PwMax32Fx2, Iop_PwMin32Fx2,
785 /* Note: For the following compares, the arm front-end assumes a
786 nan in a lane of either argument returns zero for that lane. */
787 Iop_CmpEQ32Fx2, Iop_CmpGT32Fx2, Iop_CmpGE32Fx2,
788
789 /* Vector Reciprocal Estimate finds an approximate reciprocal of each
790 element in the operand vector, and places the results in the destination
791 vector. */
792 Iop_Recip32Fx2,
793
794 /* Vector Reciprocal Step computes (2.0 - arg1 * arg2).
795 Note, that if one of the arguments is zero and another one is infinity
796 of arbitrary sign the result of the operation is 2.0. */
797 Iop_Recps32Fx2,
798
799 /* Vector Reciprocal Square Root Estimate finds an approximate reciprocal
800 square root of each element in the operand vector. */
801 Iop_Rsqrte32Fx2,
802
803 /* Vector Reciprocal Square Root Step computes (3.0 - arg1 * arg2) / 2.0.
804 Note, that of one of the arguments is zero and another one is infiinty
805 of arbitrary sign the result of the operation is 1.5. */
806 Iop_Rsqrts32Fx2,
807
808 /* Unary */
809 Iop_Neg32Fx2, Iop_Abs32Fx2,
810
811 /* ------------------ 64-bit SIMD Integer. ------------------ */
812
813 /* MISC (vector integer cmp != 0) */
814 Iop_CmpNEZ8x8, Iop_CmpNEZ16x4, Iop_CmpNEZ32x2,
815
816 /* ADDITION (normal / unsigned sat / signed sat) */
817 Iop_Add8x8, Iop_Add16x4, Iop_Add32x2,
818 Iop_QAdd8Ux8, Iop_QAdd16Ux4, Iop_QAdd32Ux2, Iop_QAdd64Ux1,
819 Iop_QAdd8Sx8, Iop_QAdd16Sx4, Iop_QAdd32Sx2, Iop_QAdd64Sx1,
820
821 /* PAIRWISE operations */
822 /* Iop_PwFoo16x4( [a,b,c,d], [e,f,g,h] ) =
823 [Foo16(a,b), Foo16(c,d), Foo16(e,f), Foo16(g,h)] */
824 Iop_PwAdd8x8, Iop_PwAdd16x4, Iop_PwAdd32x2,
825 Iop_PwMax8Sx8, Iop_PwMax16Sx4, Iop_PwMax32Sx2,
826 Iop_PwMax8Ux8, Iop_PwMax16Ux4, Iop_PwMax32Ux2,
827 Iop_PwMin8Sx8, Iop_PwMin16Sx4, Iop_PwMin32Sx2,
828 Iop_PwMin8Ux8, Iop_PwMin16Ux4, Iop_PwMin32Ux2,
829 /* Longening variant is unary. The resulting vector contains two times
830 less elements than operand, but they are two times wider.
831 Example:
832 Iop_PAddL16Ux4( [a,b,c,d] ) = [a+b,c+d]
833 where a+b and c+d are unsigned 32-bit values. */
834 Iop_PwAddL8Ux8, Iop_PwAddL16Ux4, Iop_PwAddL32Ux2,
835 Iop_PwAddL8Sx8, Iop_PwAddL16Sx4, Iop_PwAddL32Sx2,
836
837 /* SUBTRACTION (normal / unsigned sat / signed sat) */
838 Iop_Sub8x8, Iop_Sub16x4, Iop_Sub32x2,
839 Iop_QSub8Ux8, Iop_QSub16Ux4, Iop_QSub32Ux2, Iop_QSub64Ux1,
840 Iop_QSub8Sx8, Iop_QSub16Sx4, Iop_QSub32Sx2, Iop_QSub64Sx1,
841
842 /* ABSOLUTE VALUE */
843 Iop_Abs8x8, Iop_Abs16x4, Iop_Abs32x2,
844
845 /* MULTIPLICATION (normal / high half of signed/unsigned / plynomial ) */
846 Iop_Mul8x8, Iop_Mul16x4, Iop_Mul32x2,
847 Iop_Mul32Fx2,
848 Iop_MulHi16Ux4,
849 Iop_MulHi16Sx4,
850 /* Plynomial multiplication treats it's arguments as coefficients of
851 polynoms over {0, 1}. */
852 Iop_PolynomialMul8x8,
853
854 /* Vector Saturating Doubling Multiply Returning High Half and
855 Vector Saturating Rounding Doubling Multiply Returning High Half */
856 /* These IROp's multiply corresponding elements in two vectors, double
857 the results, and place the most significant half of the final results
858 in the destination vector. The results are truncated or rounded. If
859 any of the results overflow, they are saturated. */
860 Iop_QDMulHi16Sx4, Iop_QDMulHi32Sx2,
861 Iop_QRDMulHi16Sx4, Iop_QRDMulHi32Sx2,
862
863 /* AVERAGING: note: (arg1 + arg2 + 1) >>u 1 */
864 Iop_Avg8Ux8,
865 Iop_Avg16Ux4,
866
867 /* MIN/MAX */
868 Iop_Max8Sx8, Iop_Max16Sx4, Iop_Max32Sx2,
869 Iop_Max8Ux8, Iop_Max16Ux4, Iop_Max32Ux2,
870 Iop_Min8Sx8, Iop_Min16Sx4, Iop_Min32Sx2,
871 Iop_Min8Ux8, Iop_Min16Ux4, Iop_Min32Ux2,
872
873 /* COMPARISON */
874 Iop_CmpEQ8x8, Iop_CmpEQ16x4, Iop_CmpEQ32x2,
875 Iop_CmpGT8Ux8, Iop_CmpGT16Ux4, Iop_CmpGT32Ux2,
876 Iop_CmpGT8Sx8, Iop_CmpGT16Sx4, Iop_CmpGT32Sx2,
877
878 /* COUNT ones / leading zeroes / leading sign bits (not including topmost
879 bit) */
880 Iop_Cnt8x8,
881 Iop_Clz8Sx8, Iop_Clz16Sx4, Iop_Clz32Sx2,
882 Iop_Cls8Sx8, Iop_Cls16Sx4, Iop_Cls32Sx2,
883
884 /* VECTOR x VECTOR SHIFT / ROTATE */
885 Iop_Shl8x8, Iop_Shl16x4, Iop_Shl32x2,
886 Iop_Shr8x8, Iop_Shr16x4, Iop_Shr32x2,
887 Iop_Sar8x8, Iop_Sar16x4, Iop_Sar32x2,
888 Iop_Sal8x8, Iop_Sal16x4, Iop_Sal32x2, Iop_Sal64x1,
889
890 /* VECTOR x SCALAR SHIFT (shift amt :: Ity_I8) */
891 Iop_ShlN8x8, Iop_ShlN16x4, Iop_ShlN32x2,
892 Iop_ShrN8x8, Iop_ShrN16x4, Iop_ShrN32x2,
893 Iop_SarN8x8, Iop_SarN16x4, Iop_SarN32x2,
894
895 /* VECTOR x VECTOR SATURATING SHIFT */
896 Iop_QShl8x8, Iop_QShl16x4, Iop_QShl32x2, Iop_QShl64x1,
897 Iop_QSal8x8, Iop_QSal16x4, Iop_QSal32x2, Iop_QSal64x1,
898 /* VECTOR x INTEGER SATURATING SHIFT */
899 Iop_QShlN8Sx8, Iop_QShlN16Sx4, Iop_QShlN32Sx2, Iop_QShlN64Sx1,
900 Iop_QShlN8x8, Iop_QShlN16x4, Iop_QShlN32x2, Iop_QShlN64x1,
901 Iop_QSalN8x8, Iop_QSalN16x4, Iop_QSalN32x2, Iop_QSalN64x1,
902
903 /* NARROWING (binary)
904 -- narrow 2xI64 into 1xI64, hi half from left arg */
905 /* For saturated narrowing, I believe there are 4 variants of
906 the basic arithmetic operation, depending on the signedness
907 of argument and result. Here are examples that exemplify
908 what I mean:
909
910 QNarrow16Uto8U ( UShort x ) if (x >u 255) x = 255;
911 return x[7:0];
912
913 QNarrow16Sto8S ( Short x ) if (x <s -128) x = -128;
914 if (x >s 127) x = 127;
915 return x[7:0];
916
917 QNarrow16Uto8S ( UShort x ) if (x >u 127) x = 127;
918 return x[7:0];
919
920 QNarrow16Sto8U ( Short x ) if (x <s 0) x = 0;
921 if (x >s 255) x = 255;
922 return x[7:0];
923 */
924 Iop_QNarrowBin16Sto8Ux8,
925 Iop_QNarrowBin16Sto8Sx8, Iop_QNarrowBin32Sto16Sx4,
926 Iop_NarrowBin16to8x8, Iop_NarrowBin32to16x4,
927
928 /* INTERLEAVING */
929 /* Interleave lanes from low or high halves of
930 operands. Most-significant result lane is from the left
931 arg. */
932 Iop_InterleaveHI8x8, Iop_InterleaveHI16x4, Iop_InterleaveHI32x2,
933 Iop_InterleaveLO8x8, Iop_InterleaveLO16x4, Iop_InterleaveLO32x2,
934 /* Interleave odd/even lanes of operands. Most-significant result lane
935 is from the left arg. Note that Interleave{Odd,Even}Lanes32x2 are
936 identical to Interleave{HI,LO}32x2 and so are omitted.*/
937 Iop_InterleaveOddLanes8x8, Iop_InterleaveEvenLanes8x8,
938 Iop_InterleaveOddLanes16x4, Iop_InterleaveEvenLanes16x4,
939
940
941 /* CONCATENATION -- build a new value by concatenating either
942 the even or odd lanes of both operands. Note that
943 Cat{Odd,Even}Lanes32x2 are identical to Interleave{HI,LO}32x2
944 and so are omitted. */
945 Iop_CatOddLanes8x8, Iop_CatOddLanes16x4,
946 Iop_CatEvenLanes8x8, Iop_CatEvenLanes16x4,
947
948 /* GET / SET elements of VECTOR
949 GET is binop (I64, I8) -> I<elem_size>
950 SET is triop (I64, I8, I<elem_size>) -> I64 */
951 /* Note: the arm back-end handles only constant second argument */
952 Iop_GetElem8x8, Iop_GetElem16x4, Iop_GetElem32x2,
953 Iop_SetElem8x8, Iop_SetElem16x4, Iop_SetElem32x2,
954
955 /* DUPLICATING -- copy value to all lanes */
956 Iop_Dup8x8, Iop_Dup16x4, Iop_Dup32x2,
957
958 /* EXTRACT -- copy 8-arg3 highest bytes from arg1 to 8-arg3 lowest bytes
959 of result and arg3 lowest bytes of arg2 to arg3 highest bytes of
960 result.
961 It is a triop: (I64, I64, I8) -> I64 */
962 /* Note: the arm back-end handles only constant third argumnet. */
963 Iop_Extract64,
964
965 /* REVERSE the order of elements in each Half-words, Words,
966 Double-words */
967 /* Examples:
968 Reverse16_8x8([a,b,c,d,e,f,g,h]) = [b,a,d,c,f,e,h,g]
969 Reverse32_8x8([a,b,c,d,e,f,g,h]) = [d,c,b,a,h,g,f,e]
970 Reverse64_8x8([a,b,c,d,e,f,g,h]) = [h,g,f,e,d,c,b,a] */
971 Iop_Reverse16_8x8,
972 Iop_Reverse32_8x8, Iop_Reverse32_16x4,
973 Iop_Reverse64_8x8, Iop_Reverse64_16x4, Iop_Reverse64_32x2,
974
975 /* PERMUTING -- copy src bytes to dst,
976 as indexed by control vector bytes:
977 for i in 0 .. 7 . result[i] = argL[ argR[i] ]
978 argR[i] values may only be in the range 0 .. 7, else behaviour
979 is undefined. */
980 Iop_Perm8x8,
981
982 /* Vector Reciprocal Estimate and Vector Reciprocal Square Root Estimate
983 See floating-point equiwalents for details. */
984 Iop_Recip32x2, Iop_Rsqrte32x2,
985
986 /* ------------------ 128-bit SIMD FP. ------------------ */
987
988 /* --- 32x4 vector FP --- */
989
990 /* binary */
991 Iop_Add32Fx4, Iop_Sub32Fx4, Iop_Mul32Fx4, Iop_Div32Fx4,
992 Iop_Max32Fx4, Iop_Min32Fx4,
993 Iop_Add32Fx2, Iop_Sub32Fx2,
994 /* Note: For the following compares, the ppc and arm front-ends assume a
995 nan in a lane of either argument returns zero for that lane. */
996 Iop_CmpEQ32Fx4, Iop_CmpLT32Fx4, Iop_CmpLE32Fx4, Iop_CmpUN32Fx4,
997 Iop_CmpGT32Fx4, Iop_CmpGE32Fx4,
998
999 /* Vector Absolute */
1000 Iop_Abs32Fx4,
1001
1002 /* Pairwise Max and Min. See integer pairwise operations for details. */
1003 Iop_PwMax32Fx4, Iop_PwMin32Fx4,
1004
1005 /* unary */
1006 Iop_Sqrt32Fx4, Iop_RSqrt32Fx4,
1007 Iop_Neg32Fx4,
1008
1009 /* Vector Reciprocal Estimate finds an approximate reciprocal of each
1010 element in the operand vector, and places the results in the destination
1011 vector. */
1012 Iop_Recip32Fx4,
1013
1014 /* Vector Reciprocal Step computes (2.0 - arg1 * arg2).
1015 Note, that if one of the arguments is zero and another one is infinity
1016 of arbitrary sign the result of the operation is 2.0. */
1017 Iop_Recps32Fx4,
1018
1019 /* Vector Reciprocal Square Root Estimate finds an approximate reciprocal
1020 square root of each element in the operand vector. */
1021 Iop_Rsqrte32Fx4,
1022
1023 /* Vector Reciprocal Square Root Step computes (3.0 - arg1 * arg2) / 2.0.
1024 Note, that of one of the arguments is zero and another one is infiinty
1025 of arbitrary sign the result of the operation is 1.5. */
1026 Iop_Rsqrts32Fx4,
1027
1028
1029 /* --- Int to/from FP conversion --- */
1030 /* Unlike the standard fp conversions, these irops take no
1031 rounding mode argument. Instead the irop trailers _R{M,P,N,Z}
1032 indicate the mode: {-inf, +inf, nearest, zero} respectively. */
1033 Iop_I32UtoFx4, Iop_I32StoFx4, /* I32x4 -> F32x4 */
1034 Iop_FtoI32Ux4_RZ, Iop_FtoI32Sx4_RZ, /* F32x4 -> I32x4 */
1035 Iop_QFtoI32Ux4_RZ, Iop_QFtoI32Sx4_RZ, /* F32x4 -> I32x4 (with saturation) */
1036 Iop_RoundF32x4_RM, Iop_RoundF32x4_RP, /* round to fp integer */
1037 Iop_RoundF32x4_RN, Iop_RoundF32x4_RZ, /* round to fp integer */
1038 /* Fixed32 format is floating-point number with fixed number of fraction
1039 bits. The number of fraction bits is passed as a second argument of
1040 type I8. */
1041 Iop_F32ToFixed32Ux4_RZ, Iop_F32ToFixed32Sx4_RZ, /* fp -> fixed-point */
1042 Iop_Fixed32UToF32x4_RN, Iop_Fixed32SToF32x4_RN, /* fixed-point -> fp */
1043
1044 /* --- Single to/from half conversion --- */
1045 /* FIXME: what kind of rounding in F32x4 -> F16x4 case? */
1046 Iop_F32toF16x4, Iop_F16toF32x4, /* F32x4 <-> F16x4 */
1047
1048 /* --- 32x4 lowest-lane-only scalar FP --- */
1049
1050 /* In binary cases, upper 3/4 is copied from first operand. In
1051 unary cases, upper 3/4 is copied from the operand. */
1052
1053 /* binary */
1054 Iop_Add32F0x4, Iop_Sub32F0x4, Iop_Mul32F0x4, Iop_Div32F0x4,
1055 Iop_Max32F0x4, Iop_Min32F0x4,
1056 Iop_CmpEQ32F0x4, Iop_CmpLT32F0x4, Iop_CmpLE32F0x4, Iop_CmpUN32F0x4,
1057
1058 /* unary */
1059 Iop_Recip32F0x4, Iop_Sqrt32F0x4, Iop_RSqrt32F0x4,
1060
1061 /* --- 64x2 vector FP --- */
1062
1063 /* binary */
1064 Iop_Add64Fx2, Iop_Sub64Fx2, Iop_Mul64Fx2, Iop_Div64Fx2,
1065 Iop_Max64Fx2, Iop_Min64Fx2,
1066 Iop_CmpEQ64Fx2, Iop_CmpLT64Fx2, Iop_CmpLE64Fx2, Iop_CmpUN64Fx2,
1067
1068 /* unary */
1069 Iop_Recip64Fx2, Iop_Sqrt64Fx2, Iop_RSqrt64Fx2,
1070
1071 /* --- 64x2 lowest-lane-only scalar FP --- */
1072
1073 /* In binary cases, upper half is copied from first operand. In
1074 unary cases, upper half is copied from the operand. */
1075
1076 /* binary */
1077 Iop_Add64F0x2, Iop_Sub64F0x2, Iop_Mul64F0x2, Iop_Div64F0x2,
1078 Iop_Max64F0x2, Iop_Min64F0x2,
1079 Iop_CmpEQ64F0x2, Iop_CmpLT64F0x2, Iop_CmpLE64F0x2, Iop_CmpUN64F0x2,
1080
1081 /* unary */
1082 Iop_Recip64F0x2, Iop_Sqrt64F0x2, Iop_RSqrt64F0x2,
1083
1084 /* --- pack / unpack --- */
1085
1086 /* 64 <-> 128 bit vector */
1087 Iop_V128to64, // :: V128 -> I64, low half
1088 Iop_V128HIto64, // :: V128 -> I64, high half
1089 Iop_64HLtoV128, // :: (I64,I64) -> V128
1090
1091 Iop_64UtoV128,
1092 Iop_SetV128lo64,
1093
1094 /* 32 <-> 128 bit vector */
1095 Iop_32UtoV128,
1096 Iop_V128to32, // :: V128 -> I32, lowest lane
1097 Iop_SetV128lo32, // :: (V128,I32) -> V128
1098
1099 /* ------------------ 128-bit SIMD Integer. ------------------ */
1100
1101 /* BITWISE OPS */
1102 Iop_NotV128,
1103 Iop_AndV128, Iop_OrV128, Iop_XorV128,
1104
1105 /* VECTOR SHIFT (shift amt :: Ity_I8) */
1106 Iop_ShlV128, Iop_ShrV128,
1107
1108 /* MISC (vector integer cmp != 0) */
1109 Iop_CmpNEZ8x16, Iop_CmpNEZ16x8, Iop_CmpNEZ32x4, Iop_CmpNEZ64x2,
1110
1111 /* ADDITION (normal / unsigned sat / signed sat) */
1112 Iop_Add8x16, Iop_Add16x8, Iop_Add32x4, Iop_Add64x2,
1113 Iop_QAdd8Ux16, Iop_QAdd16Ux8, Iop_QAdd32Ux4, Iop_QAdd64Ux2,
1114 Iop_QAdd8Sx16, Iop_QAdd16Sx8, Iop_QAdd32Sx4, Iop_QAdd64Sx2,
1115
1116 /* SUBTRACTION (normal / unsigned sat / signed sat) */
1117 Iop_Sub8x16, Iop_Sub16x8, Iop_Sub32x4, Iop_Sub64x2,
1118 Iop_QSub8Ux16, Iop_QSub16Ux8, Iop_QSub32Ux4, Iop_QSub64Ux2,
1119 Iop_QSub8Sx16, Iop_QSub16Sx8, Iop_QSub32Sx4, Iop_QSub64Sx2,
1120
1121 /* MULTIPLICATION (normal / high half of signed/unsigned) */
1122 Iop_Mul8x16, Iop_Mul16x8, Iop_Mul32x4,
1123 Iop_MulHi16Ux8, Iop_MulHi32Ux4,
1124 Iop_MulHi16Sx8, Iop_MulHi32Sx4,
1125 /* (widening signed/unsigned of even lanes, with lowest lane=zero) */
1126 Iop_MullEven8Ux16, Iop_MullEven16Ux8,
1127 Iop_MullEven8Sx16, Iop_MullEven16Sx8,
1128 /* FIXME: document these */
1129 Iop_Mull8Ux8, Iop_Mull8Sx8,
1130 Iop_Mull16Ux4, Iop_Mull16Sx4,
1131 Iop_Mull32Ux2, Iop_Mull32Sx2,
1132 /* Vector Saturating Doubling Multiply Returning High Half and
1133 Vector Saturating Rounding Doubling Multiply Returning High Half */
1134 /* These IROp's multiply corresponding elements in two vectors, double
1135 the results, and place the most significant half of the final results
1136 in the destination vector. The results are truncated or rounded. If
1137 any of the results overflow, they are saturated. */
1138 Iop_QDMulHi16Sx8, Iop_QDMulHi32Sx4,
1139 Iop_QRDMulHi16Sx8, Iop_QRDMulHi32Sx4,
1140 /* Doubling saturating multiplication (long) (I64, I64) -> V128 */
1141 Iop_QDMulLong16Sx4, Iop_QDMulLong32Sx2,
1142 /* Plynomial multiplication treats it's arguments as coefficients of
1143 polynoms over {0, 1}. */
1144 Iop_PolynomialMul8x16, /* (V128, V128) -> V128 */
1145 Iop_PolynomialMull8x8, /* (I64, I64) -> V128 */
1146
1147 /* PAIRWISE operations */
1148 /* Iop_PwFoo16x4( [a,b,c,d], [e,f,g,h] ) =
1149 [Foo16(a,b), Foo16(c,d), Foo16(e,f), Foo16(g,h)] */
1150 Iop_PwAdd8x16, Iop_PwAdd16x8, Iop_PwAdd32x4,
1151 Iop_PwAdd32Fx2,
1152 /* Longening variant is unary. The resulting vector contains two times
1153 less elements than operand, but they are two times wider.
1154 Example:
1155 Iop_PwAddL16Ux4( [a,b,c,d] ) = [a+b,c+d]
1156 where a+b and c+d are unsigned 32-bit values. */
1157 Iop_PwAddL8Ux16, Iop_PwAddL16Ux8, Iop_PwAddL32Ux4,
1158 Iop_PwAddL8Sx16, Iop_PwAddL16Sx8, Iop_PwAddL32Sx4,
1159
1160 /* ABSOLUTE VALUE */
1161 Iop_Abs8x16, Iop_Abs16x8, Iop_Abs32x4,
1162
1163 /* AVERAGING: note: (arg1 + arg2 + 1) >>u 1 */
1164 Iop_Avg8Ux16, Iop_Avg16Ux8, Iop_Avg32Ux4,
1165 Iop_Avg8Sx16, Iop_Avg16Sx8, Iop_Avg32Sx4,
1166
1167 /* MIN/MAX */
1168 Iop_Max8Sx16, Iop_Max16Sx8, Iop_Max32Sx4,
1169 Iop_Max8Ux16, Iop_Max16Ux8, Iop_Max32Ux4,
1170 Iop_Min8Sx16, Iop_Min16Sx8, Iop_Min32Sx4,
1171 Iop_Min8Ux16, Iop_Min16Ux8, Iop_Min32Ux4,
1172
1173 /* COMPARISON */
1174 Iop_CmpEQ8x16, Iop_CmpEQ16x8, Iop_CmpEQ32x4, Iop_CmpEQ64x2,
1175 Iop_CmpGT8Sx16, Iop_CmpGT16Sx8, Iop_CmpGT32Sx4, Iop_CmpGT64Sx2,
1176 Iop_CmpGT8Ux16, Iop_CmpGT16Ux8, Iop_CmpGT32Ux4,
1177
1178 /* COUNT ones / leading zeroes / leading sign bits (not including topmost
1179 bit) */
1180 Iop_Cnt8x16,
1181 Iop_Clz8Sx16, Iop_Clz16Sx8, Iop_Clz32Sx4,
1182 Iop_Cls8Sx16, Iop_Cls16Sx8, Iop_Cls32Sx4,
1183
1184 /* VECTOR x SCALAR SHIFT (shift amt :: Ity_I8) */
1185 Iop_ShlN8x16, Iop_ShlN16x8, Iop_ShlN32x4, Iop_ShlN64x2,
1186 Iop_ShrN8x16, Iop_ShrN16x8, Iop_ShrN32x4, Iop_ShrN64x2,
1187 Iop_SarN8x16, Iop_SarN16x8, Iop_SarN32x4, Iop_SarN64x2,
1188
1189 /* VECTOR x VECTOR SHIFT / ROTATE */
1190 Iop_Shl8x16, Iop_Shl16x8, Iop_Shl32x4, Iop_Shl64x2,
1191 Iop_Shr8x16, Iop_Shr16x8, Iop_Shr32x4, Iop_Shr64x2,
1192 Iop_Sar8x16, Iop_Sar16x8, Iop_Sar32x4, Iop_Sar64x2,
1193 Iop_Sal8x16, Iop_Sal16x8, Iop_Sal32x4, Iop_Sal64x2,
1194 Iop_Rol8x16, Iop_Rol16x8, Iop_Rol32x4,
1195
1196 /* VECTOR x VECTOR SATURATING SHIFT */
1197 Iop_QShl8x16, Iop_QShl16x8, Iop_QShl32x4, Iop_QShl64x2,
1198 Iop_QSal8x16, Iop_QSal16x8, Iop_QSal32x4, Iop_QSal64x2,
1199 /* VECTOR x INTEGER SATURATING SHIFT */
1200 Iop_QShlN8Sx16, Iop_QShlN16Sx8, Iop_QShlN32Sx4, Iop_QShlN64Sx2,
1201 Iop_QShlN8x16, Iop_QShlN16x8, Iop_QShlN32x4, Iop_QShlN64x2,
1202 Iop_QSalN8x16, Iop_QSalN16x8, Iop_QSalN32x4, Iop_QSalN64x2,
1203
1204 /* NARROWING (binary)
1205 -- narrow 2xV128 into 1xV128, hi half from left arg */
1206 /* See comments above w.r.t. U vs S issues in saturated narrowing. */
1207 Iop_QNarrowBin16Sto8Ux16, Iop_QNarrowBin32Sto16Ux8,
1208 Iop_QNarrowBin16Sto8Sx16, Iop_QNarrowBin32Sto16Sx8,
1209 Iop_QNarrowBin16Uto8Ux16, Iop_QNarrowBin32Uto16Ux8,
1210 Iop_NarrowBin16to8x16, Iop_NarrowBin32to16x8,
1211
1212 /* NARROWING (unary) -- narrow V128 into I64 */
1213 Iop_NarrowUn16to8x8, Iop_NarrowUn32to16x4, Iop_NarrowUn64to32x2,
1214 /* Saturating narrowing from signed source to signed/unsigned destination */
1215 Iop_QNarrowUn16Sto8Sx8, Iop_QNarrowUn32Sto16Sx4, Iop_QNarrowUn64Sto32Sx2,
1216 Iop_QNarrowUn16Sto8Ux8, Iop_QNarrowUn32Sto16Ux4, Iop_QNarrowUn64Sto32Ux2,
1217 /* Saturating narrowing from unsigned source to unsigned destination */
1218 Iop_QNarrowUn16Uto8Ux8, Iop_QNarrowUn32Uto16Ux4, Iop_QNarrowUn64Uto32Ux2,
1219
1220 /* WIDENING -- sign or zero extend each element of the argument
1221 vector to the twice original size. The resulting vector consists of
1222 the same number of elements but each element and the vector itself
1223 are twice as wide.
1224 All operations are I64->V128.
1225 Example
1226 Iop_Widen32Sto64x2( [a, b] ) = [c, d]
1227 where c = Iop_32Sto64(a) and d = Iop_32Sto64(b) */
1228 Iop_Widen8Uto16x8, Iop_Widen16Uto32x4, Iop_Widen32Uto64x2,
1229 Iop_Widen8Sto16x8, Iop_Widen16Sto32x4, Iop_Widen32Sto64x2,
1230
1231 /* INTERLEAVING */
1232 /* Interleave lanes from low or high halves of
1233 operands. Most-significant result lane is from the left
1234 arg. */
1235 Iop_InterleaveHI8x16, Iop_InterleaveHI16x8,
1236 Iop_InterleaveHI32x4, Iop_InterleaveHI64x2,
1237 Iop_InterleaveLO8x16, Iop_InterleaveLO16x8,
1238 Iop_InterleaveLO32x4, Iop_InterleaveLO64x2,
1239 /* Interleave odd/even lanes of operands. Most-significant result lane
1240 is from the left arg. */
1241 Iop_InterleaveOddLanes8x16, Iop_InterleaveEvenLanes8x16,
1242 Iop_InterleaveOddLanes16x8, Iop_InterleaveEvenLanes16x8,
1243 Iop_InterleaveOddLanes32x4, Iop_InterleaveEvenLanes32x4,
1244
1245 /* CONCATENATION -- build a new value by concatenating either
1246 the even or odd lanes of both operands. */
1247 Iop_CatOddLanes8x16, Iop_CatOddLanes16x8, Iop_CatOddLanes32x4,
1248 Iop_CatEvenLanes8x16, Iop_CatEvenLanes16x8, Iop_CatEvenLanes32x4,
1249
1250 /* GET elements of VECTOR
1251 GET is binop (V128, I8) -> I<elem_size> */
1252 /* Note: the arm back-end handles only constant second argument. */
1253 Iop_GetElem8x16, Iop_GetElem16x8, Iop_GetElem32x4, Iop_GetElem64x2,
1254
1255 /* DUPLICATING -- copy value to all lanes */
1256 Iop_Dup8x16, Iop_Dup16x8, Iop_Dup32x4,
1257
1258 /* EXTRACT -- copy 16-arg3 highest bytes from arg1 to 16-arg3 lowest bytes
1259 of result and arg3 lowest bytes of arg2 to arg3 highest bytes of
1260 result.
1261 It is a triop: (V128, V128, I8) -> V128 */
1262 /* Note: the ARM back end handles only constant arg3 in this operation. */
1263 Iop_ExtractV128,
1264
1265 /* REVERSE the order of elements in each Half-words, Words,
1266 Double-words */
1267 /* Examples:
1268 Reverse32_16x8([a,b,c,d,e,f,g,h]) = [b,a,d,c,f,e,h,g]
1269 Reverse64_16x8([a,b,c,d,e,f,g,h]) = [d,c,b,a,h,g,f,e] */
1270 Iop_Reverse16_8x16,
1271 Iop_Reverse32_8x16, Iop_Reverse32_16x8,
1272 Iop_Reverse64_8x16, Iop_Reverse64_16x8, Iop_Reverse64_32x4,
1273
1274 /* PERMUTING -- copy src bytes to dst,
1275 as indexed by control vector bytes:
1276 for i in 0 .. 15 . result[i] = argL[ argR[i] ]
1277 argR[i] values may only be in the range 0 .. 15, else behaviour
1278 is undefined. */
1279 Iop_Perm8x16,
1280
1281 /* Vector Reciprocal Estimate and Vector Reciprocal Square Root Estimate
1282 See floating-point equiwalents for details. */
1283 Iop_Recip32x4, Iop_Rsqrte32x4
1284 }
1285 IROp;
1286
1287 /* Pretty-print an op. */
1288 extern void ppIROp ( IROp );
1289
1290
1291 /* Encoding of IEEE754-specified rounding modes. This is the same as
1292 the encoding used by Intel IA32 to indicate x87 rounding mode.
1293 Note, various front and back ends rely on the actual numerical
1294 values of these, so do not change them. */
1295 typedef
1296 enum {
1297 Irrm_NEAREST = 0,
1298 Irrm_NegINF = 1,
1299 Irrm_PosINF = 2,
1300 Irrm_ZERO = 3
1301 }
1302 IRRoundingMode;
1303
1304 /* Floating point comparison result values, as created by Iop_CmpF64.
1305 This is also derived from what IA32 does. */
1306 typedef
1307 enum {
1308 Ircr_UN = 0x45,
1309 Ircr_LT = 0x01,
1310 Ircr_GT = 0x00,
1311 Ircr_EQ = 0x40
1312 }
1313 IRCmpF64Result;
1314
1315 typedef IRCmpF64Result IRCmpF32Result;
1316 typedef IRCmpF64Result IRCmpF128Result;
1317
1318 /* ------------------ Expressions ------------------ */
1319
1320 /* The different kinds of expressions. Their meaning is explained below
1321 in the comments for IRExpr. */
1322 typedef
1323 enum {
1324 Iex_Binder=0x15000,
1325 Iex_Get,
1326 Iex_GetI,
1327 Iex_RdTmp,
1328 Iex_Qop,
1329 Iex_Triop,
1330 Iex_Binop,
1331 Iex_Unop,
1332 Iex_Load,
1333 Iex_Const,
1334 Iex_Mux0X,
1335 Iex_CCall
1336 }
1337 IRExprTag;
1338
1339 /* An expression. Stored as a tagged union. 'tag' indicates what kind
1340 of expression this is. 'Iex' is the union that holds the fields. If
1341 an IRExpr 'e' has e.tag equal to Iex_Load, then it's a load
1342 expression, and the fields can be accessed with
1343 'e.Iex.Load.<fieldname>'.
1344
1345 For each kind of expression, we show what it looks like when
1346 pretty-printed with ppIRExpr().
1347 */
1348 typedef
1349 struct _IRExpr
1350 IRExpr;
1351
1352 struct _IRExpr {
1353 IRExprTag tag;
1354 union {
1355 /* Used only in pattern matching within Vex. Should not be seen
1356 outside of Vex. */
1357 struct {
1358 Int binder;
1359 } Binder;
1360
1361 /* Read a guest register, at a fixed offset in the guest state.
1362 ppIRExpr output: GET:<ty>(<offset>), eg. GET:I32(0)
1363 */
1364 struct {
1365 Int offset; /* Offset into the guest state */
1366 IRType ty; /* Type of the value being read */
1367 } Get;
1368
1369 /* Read a guest register at a non-fixed offset in the guest
1370 state. This allows circular indexing into parts of the guest
1371 state, which is essential for modelling situations where the
1372 identity of guest registers is not known until run time. One
1373 example is the x87 FP register stack.
1374
1375 The part of the guest state to be treated as a circular array
1376 is described in the IRRegArray 'descr' field. It holds the
1377 offset of the first element in the array, the type of each
1378 element, and the number of elements.
1379
1380 The array index is indicated rather indirectly, in a way
1381 which makes optimisation easy: as the sum of variable part
1382 (the 'ix' field) and a constant offset (the 'bias' field).
1383
1384 Since the indexing is circular, the actual array index to use
1385 is computed as (ix + bias) % num-of-elems-in-the-array.
1386
1387 Here's an example. The description
1388
1389 (96:8xF64)[t39,-7]
1390
1391 describes an array of 8 F64-typed values, the
1392 guest-state-offset of the first being 96. This array is
1393 being indexed at (t39 - 7) % 8.
1394
1395 It is important to get the array size/type exactly correct
1396 since IR optimisation looks closely at such info in order to
1397 establish aliasing/non-aliasing between seperate GetI and
1398 PutI events, which is used to establish when they can be
1399 reordered, etc. Putting incorrect info in will lead to
1400 obscure IR optimisation bugs.
1401
1402 ppIRExpr output: GETI<descr>[<ix>,<bias]
1403 eg. GETI(128:8xI8)[t1,0]
1404 */
1405 struct {
1406 IRRegArray* descr; /* Part of guest state treated as circular */
1407 IRExpr* ix; /* Variable part of index into array */
1408 Int bias; /* Constant offset part of index into array */
1409 } GetI;
1410
1411 /* The value held by a temporary.
1412 ppIRExpr output: t<tmp>, eg. t1
1413 */
1414 struct {
1415 IRTemp tmp; /* The temporary number */
1416 } RdTmp;
1417
1418 /* A quaternary operation.
1419 ppIRExpr output: <op>(<arg1>, <arg2>, <arg3>, <arg4>),
1420 eg. MAddF64r32(t1, t2, t3, t4)
1421 */
1422 struct {
1423 IROp op; /* op-code */
1424 IRExpr* arg1; /* operand 1 */
1425 IRExpr* arg2; /* operand 2 */
1426 IRExpr* arg3; /* operand 3 */
1427 IRExpr* arg4; /* operand 4 */
1428 } Qop;
1429
1430 /* A ternary operation.
1431 ppIRExpr output: <op>(<arg1>, <arg2>, <arg3>),
1432 eg. MulF64(1, 2.0, 3.0)
1433 */
1434 struct {
1435 IROp op; /* op-code */
1436 IRExpr* arg1; /* operand 1 */
1437 IRExpr* arg2; /* operand 2 */
1438 IRExpr* arg3; /* operand 3 */
1439 } Triop;
1440
1441 /* A binary operation.
1442 ppIRExpr output: <op>(<arg1>, <arg2>), eg. Add32(t1,t2)
1443 */
1444 struct {
1445 IROp op; /* op-code */
1446 IRExpr* arg1; /* operand 1 */
1447 IRExpr* arg2; /* operand 2 */
1448 } Binop;
1449
1450 /* A unary operation.
1451 ppIRExpr output: <op>(<arg>), eg. Neg8(t1)
1452 */
1453 struct {
1454 IROp op; /* op-code */
1455 IRExpr* arg; /* operand */
1456 } Unop;
1457
1458 /* A load from memory -- a normal load, not a load-linked.
1459 Load-Linkeds (and Store-Conditionals) are instead represented
1460 by IRStmt.LLSC since Load-Linkeds have side effects and so
1461 are not semantically valid IRExpr's.
1462 ppIRExpr output: LD<end>:<ty>(<addr>), eg. LDle:I32(t1)
1463 */
1464 struct {
1465 IREndness end; /* Endian-ness of the load */
1466 IRType ty; /* Type of the loaded value */
1467 IRExpr* addr; /* Address being loaded from */
1468 } Load;
1469
1470 /* A constant-valued expression.
1471 ppIRExpr output: <con>, eg. 0x4:I32
1472 */
1473 struct {
1474 IRConst* con; /* The constant itself */
1475 } Const;
1476
1477 /* A call to a pure (no side-effects) helper C function.
1478
1479 With the 'cee' field, 'name' is the function's name. It is
1480 only used for pretty-printing purposes. The address to call
1481 (host address, of course) is stored in the 'addr' field
1482 inside 'cee'.
1483
1484 The 'args' field is a NULL-terminated array of arguments.
1485 The stated return IRType, and the implied argument types,
1486 must match that of the function being called well enough so
1487 that the back end can actually generate correct code for the
1488 call.
1489
1490 The called function **must** satisfy the following:
1491
1492 * no side effects -- must be a pure function, the result of
1493 which depends only on the passed parameters.
1494
1495 * it may not look at, nor modify, any of the guest state
1496 since that would hide guest state transitions from
1497 instrumenters
1498
1499 * it may not access guest memory, since that would hide
1500 guest memory transactions from the instrumenters
1501
1502 This is restrictive, but makes the semantics clean, and does
1503 not interfere with IR optimisation.
1504
1505 If you want to call a helper which can mess with guest state
1506 and/or memory, instead use Ist_Dirty. This is a lot more
1507 flexible, but you have to give a bunch of details about what
1508 the helper does (and you better be telling the truth,
1509 otherwise any derived instrumentation will be wrong). Also
1510 Ist_Dirty inhibits various IR optimisations and so can cause
1511 quite poor code to be generated. Try to avoid it.
1512
1513 ppIRExpr output: <cee>(<args>):<retty>
1514 eg. foo{0x80489304}(t1, t2):I32
1515 */
1516 struct {
1517 IRCallee* cee; /* Function to call. */
1518 IRType retty; /* Type of return value. */
1519 IRExpr** args; /* Vector of argument expressions. */
1520 } CCall;
1521
1522 /* A ternary if-then-else operator. It returns expr0 if cond is
1523 zero, exprX otherwise. Note that it is STRICT, ie. both
1524 expr0 and exprX are evaluated in all cases.
1525
1526 ppIRExpr output: Mux0X(<cond>,<expr0>,<exprX>),
1527 eg. Mux0X(t6,t7,t8)
1528 */
1529 struct {
1530 IRExpr* cond; /* Condition */
1531 IRExpr* expr0; /* True expression */
1532 IRExpr* exprX; /* False expression */
1533 } Mux0X;
1534 } Iex;
1535 };
1536
1537 /* Expression constructors. */
1538 extern IRExpr* IRExpr_Binder ( Int binder );
1539 extern IRExpr* IRExpr_Get ( Int off, IRType ty );
1540 extern IRExpr* IRExpr_GetI ( IRRegArray* descr, IRExpr* ix, Int bias );
1541 extern IRExpr* IRExpr_RdTmp ( IRTemp tmp );
1542 extern IRExpr* IRExpr_Qop ( IROp op, IRExpr* arg1, IRExpr* arg2,
1543 IRExpr* arg3, IRExpr* arg4 );
1544 extern IRExpr* IRExpr_Triop ( IROp op, IRExpr* arg1,
1545 IRExpr* arg2, IRExpr* arg3 );
1546 extern IRExpr* IRExpr_Binop ( IROp op, IRExpr* arg1, IRExpr* arg2 );
1547 extern IRExpr* IRExpr_Unop ( IROp op, IRExpr* arg );
1548 extern IRExpr* IRExpr_Load ( IREndness end, IRType ty, IRExpr* addr );
1549 extern IRExpr* IRExpr_Const ( IRConst* con );
1550 extern IRExpr* IRExpr_CCall ( IRCallee* cee, IRType retty, IRExpr** args );
1551 extern IRExpr* IRExpr_Mux0X ( IRExpr* cond, IRExpr* expr0, IRExpr* exprX );
1552
1553 /* Deep-copy an IRExpr. */
1554 extern IRExpr* deepCopyIRExpr ( IRExpr* );
1555
1556 /* Pretty-print an IRExpr. */
1557 extern void ppIRExpr ( IRExpr* );
1558
1559 /* NULL-terminated IRExpr vector constructors, suitable for
1560 use as arg lists in clean/dirty helper calls. */
1561 extern IRExpr** mkIRExprVec_0 ( void );
1562 extern IRExpr** mkIRExprVec_1 ( IRExpr* );
1563 extern IRExpr** mkIRExprVec_2 ( IRExpr*, IRExpr* );
1564 extern IRExpr** mkIRExprVec_3 ( IRExpr*, IRExpr*, IRExpr* );
1565 extern IRExpr** mkIRExprVec_4 ( IRExpr*, IRExpr*, IRExpr*, IRExpr* );
1566 extern IRExpr** mkIRExprVec_5 ( IRExpr*, IRExpr*, IRExpr*, IRExpr*,
1567 IRExpr* );
1568 extern IRExpr** mkIRExprVec_6 ( IRExpr*, IRExpr*, IRExpr*, IRExpr*,
1569 IRExpr*, IRExpr* );
1570 extern IRExpr** mkIRExprVec_7 ( IRExpr*, IRExpr*, IRExpr*, IRExpr*,
1571 IRExpr*, IRExpr*, IRExpr* );
1572 extern IRExpr** mkIRExprVec_8 ( IRExpr*, IRExpr*, IRExpr*, IRExpr*,
1573 IRExpr*, IRExpr*, IRExpr*, IRExpr*);
1574
1575 /* IRExpr copiers:
1576 - shallowCopy: shallow-copy (ie. create a new vector that shares the
1577 elements with the original).
1578 - deepCopy: deep-copy (ie. create a completely new vector). */
1579 extern IRExpr** shallowCopyIRExprVec ( IRExpr** );
1580 extern IRExpr** deepCopyIRExprVec ( IRExpr** );
1581
1582 /* Make a constant expression from the given host word taking into
1583 account (of course) the host word size. */
1584 extern IRExpr* mkIRExpr_HWord ( HWord );
1585
1586 /* Convenience function for constructing clean helper calls. */
1587 extern
1588 IRExpr* mkIRExprCCall ( IRType retty,
1589 Int regparms, HChar* name, void* addr,
1590 IRExpr** args );
1591
1592
1593 /* Convenience functions for atoms (IRExprs which are either Iex_Tmp or
1594 * Iex_Const). */
isIRAtom(IRExpr * e)1595 static inline Bool isIRAtom ( IRExpr* e ) {
1596 return toBool(e->tag == Iex_RdTmp || e->tag == Iex_Const);
1597 }
1598
1599 /* Are these two IR atoms identical? Causes an assertion
1600 failure if they are passed non-atoms. */
1601 extern Bool eqIRAtom ( IRExpr*, IRExpr* );
1602
1603
1604 /* ------------------ Jump kinds ------------------ */
1605
1606 /* This describes hints which can be passed to the dispatcher at guest
1607 control-flow transfer points.
1608
1609 Re Ijk_TInval: the guest state _must_ have two pseudo-registers,
1610 guest_TISTART and guest_TILEN, which specify the start and length
1611 of the region to be invalidated. These are both the size of a
1612 guest word. It is the responsibility of the relevant toIR.c to
1613 ensure that these are filled in with suitable values before issuing
1614 a jump of kind Ijk_TInval.
1615
1616 Re Ijk_EmWarn and Ijk_EmFail: the guest state must have a
1617 pseudo-register guest_EMWARN, which is 32-bits regardless of the
1618 host or guest word size. That register should be made to hold an
1619 EmWarn_* value to indicate the reason for the exit.
1620
1621 In the case of Ijk_EmFail, the exit is fatal (Vex-generated code
1622 cannot continue) and so the jump destination can be anything.
1623
1624 Re Ijk_Sys_ (syscall jumps): the guest state must have a
1625 pseudo-register guest_IP_AT_SYSCALL, which is the size of a guest
1626 word. Front ends should set this to be the IP at the most recently
1627 executed kernel-entering (system call) instruction. This makes it
1628 very much easier (viz, actually possible at all) to back up the
1629 guest to restart a syscall that has been interrupted by a signal.
1630 */
1631 typedef
1632 enum {
1633 Ijk_Boring=0x16000, /* not interesting; just goto next */
1634 Ijk_Call, /* guest is doing a call */
1635 Ijk_Ret, /* guest is doing a return */
1636 Ijk_ClientReq, /* do guest client req before continuing */
1637 Ijk_Yield, /* client is yielding to thread scheduler */
1638 Ijk_YieldNoRedir, /* client is yielding to thread scheduler AND jump to
1639 un-redirected guest addr */
1640 Ijk_EmWarn, /* report emulation warning before continuing */
1641 Ijk_EmFail, /* emulation critical (FATAL) error; give up */
1642 Ijk_NoDecode, /* next instruction cannot be decoded */
1643 Ijk_MapFail, /* Vex-provided address translation failed */
1644 Ijk_TInval, /* Invalidate translations before continuing. */
1645 Ijk_NoRedir, /* Jump to un-redirected guest addr */
1646 Ijk_SigTRAP, /* current instruction synths SIGTRAP */
1647 Ijk_SigSEGV, /* current instruction synths SIGSEGV */
1648 Ijk_SigBUS, /* current instruction synths SIGBUS */
1649 /* Unfortunately, various guest-dependent syscall kinds. They
1650 all mean: do a syscall before continuing. */
1651 Ijk_Sys_syscall, /* amd64 'syscall', ppc 'sc', arm 'svc #0' */
1652 Ijk_Sys_int32, /* amd64/x86 'int $0x20' */
1653 Ijk_Sys_int128, /* amd64/x86 'int $0x80' */
1654 Ijk_Sys_int129, /* amd64/x86 'int $0x81' */
1655 Ijk_Sys_int130, /* amd64/x86 'int $0x82' */
1656 Ijk_Sys_sysenter /* x86 'sysenter'. guest_EIP becomes
1657 invalid at the point this happens. */
1658 }
1659 IRJumpKind;
1660
1661 extern void ppIRJumpKind ( IRJumpKind );
1662
1663
1664 /* ------------------ Dirty helper calls ------------------ */
1665
1666 /* A dirty call is a flexible mechanism for calling (possibly
1667 conditionally) a helper function or procedure. The helper function
1668 may read, write or modify client memory, and may read, write or
1669 modify client state. It can take arguments and optionally return a
1670 value. It may return different results and/or do different things
1671 when called repeatedly with the same arguments, by means of storing
1672 private state.
1673
1674 If a value is returned, it is assigned to the nominated return
1675 temporary.
1676
1677 Dirty calls are statements rather than expressions for obvious
1678 reasons. If a dirty call is marked as writing guest state, any
1679 values derived from the written parts of the guest state are
1680 invalid. Similarly, if the dirty call is stated as writing
1681 memory, any loaded values are invalidated by it.
1682
1683 In order that instrumentation is possible, the call must state, and
1684 state correctly:
1685
1686 * whether it reads, writes or modifies memory, and if so where
1687 (only one chunk can be stated)
1688
1689 * whether it reads, writes or modifies guest state, and if so which
1690 pieces (several pieces may be stated, and currently their extents
1691 must be known at translation-time).
1692
1693 Normally, code is generated to pass just the args to the helper.
1694 However, if .needsBBP is set, then an extra first argument is
1695 passed, which is the baseblock pointer, so that the callee can
1696 access the guest state. It is invalid for .nFxState to be zero
1697 but .needsBBP to be True, since .nFxState==0 is a claim that the
1698 call does not access guest state.
1699
1700 IMPORTANT NOTE re GUARDS: Dirty calls are strict, very strict. The
1701 arguments are evaluated REGARDLESS of the guard value. It is
1702 unspecified the relative order of arg evaluation and guard
1703 evaluation.
1704 */
1705
1706 #define VEX_N_FXSTATE 7 /* enough for FXSAVE/FXRSTOR on x86 */
1707
1708 /* Effects on resources (eg. registers, memory locations) */
1709 typedef
1710 enum {
1711 Ifx_None = 0x17000, /* no effect */
1712 Ifx_Read, /* reads the resource */
1713 Ifx_Write, /* writes the resource */
1714 Ifx_Modify, /* modifies the resource */
1715 }
1716 IREffect;
1717
1718 /* Pretty-print an IREffect */
1719 extern void ppIREffect ( IREffect );
1720
1721
1722 typedef
1723 struct {
1724 /* What to call, and details of args/results */
1725 IRCallee* cee; /* where to call */
1726 IRExpr* guard; /* :: Ity_Bit. Controls whether call happens */
1727 IRExpr** args; /* arg list, ends in NULL */
1728 IRTemp tmp; /* to assign result to, or IRTemp_INVALID if none */
1729
1730 /* Mem effects; we allow only one R/W/M region to be stated */
1731 IREffect mFx; /* indicates memory effects, if any */
1732 IRExpr* mAddr; /* of access, or NULL if mFx==Ifx_None */
1733 Int mSize; /* of access, or zero if mFx==Ifx_None */
1734
1735 /* Guest state effects; up to N allowed */
1736 Bool needsBBP; /* True => also pass guest state ptr to callee */
1737 Int nFxState; /* must be 0 .. VEX_N_FXSTATE */
1738 struct {
1739 IREffect fx; /* read, write or modify? Ifx_None is invalid. */
1740 Int offset;
1741 Int size;
1742 } fxState[VEX_N_FXSTATE];
1743 }
1744 IRDirty;
1745
1746 /* Pretty-print a dirty call */
1747 extern void ppIRDirty ( IRDirty* );
1748
1749 /* Allocate an uninitialised dirty call */
1750 extern IRDirty* emptyIRDirty ( void );
1751
1752 /* Deep-copy a dirty call */
1753 extern IRDirty* deepCopyIRDirty ( IRDirty* );
1754
1755 /* A handy function which takes some of the tedium out of constructing
1756 dirty helper calls. The called function impliedly does not return
1757 any value and has a constant-True guard. The call is marked as
1758 accessing neither guest state nor memory (hence the "unsafe"
1759 designation) -- you can change this marking later if need be. A
1760 suitable IRCallee is constructed from the supplied bits. */
1761 extern
1762 IRDirty* unsafeIRDirty_0_N ( Int regparms, HChar* name, void* addr,
1763 IRExpr** args );
1764
1765 /* Similarly, make a zero-annotation dirty call which returns a value,
1766 and assign that to the given temp. */
1767 extern
1768 IRDirty* unsafeIRDirty_1_N ( IRTemp dst,
1769 Int regparms, HChar* name, void* addr,
1770 IRExpr** args );
1771
1772
1773 /* --------------- Memory Bus Events --------------- */
1774
1775 typedef
1776 enum {
1777 Imbe_Fence=0x18000,
1778 /* Needed only on ARM. It cancels a reservation made by a
1779 preceding Linked-Load, and needs to be handed through to the
1780 back end, just as LL and SC themselves are. */
1781 Imbe_CancelReservation
1782 }
1783 IRMBusEvent;
1784
1785 extern void ppIRMBusEvent ( IRMBusEvent );
1786
1787
1788 /* --------------- Compare and Swap --------------- */
1789
1790 /* This denotes an atomic compare and swap operation, either
1791 a single-element one or a double-element one.
1792
1793 In the single-element case:
1794
1795 .addr is the memory address.
1796 .end is the endianness with which memory is accessed
1797
1798 If .addr contains the same value as .expdLo, then .dataLo is
1799 written there, else there is no write. In both cases, the
1800 original value at .addr is copied into .oldLo.
1801
1802 Types: .expdLo, .dataLo and .oldLo must all have the same type.
1803 It may be any integral type, viz: I8, I16, I32 or, for 64-bit
1804 guests, I64.
1805
1806 .oldHi must be IRTemp_INVALID, and .expdHi and .dataHi must
1807 be NULL.
1808
1809 In the double-element case:
1810
1811 .addr is the memory address.
1812 .end is the endianness with which memory is accessed
1813
1814 The operation is the same:
1815
1816 If .addr contains the same value as .expdHi:.expdLo, then
1817 .dataHi:.dataLo is written there, else there is no write. In
1818 both cases the original value at .addr is copied into
1819 .oldHi:.oldLo.
1820
1821 Types: .expdHi, .expdLo, .dataHi, .dataLo, .oldHi, .oldLo must
1822 all have the same type, which may be any integral type, viz: I8,
1823 I16, I32 or, for 64-bit guests, I64.
1824
1825 The double-element case is complicated by the issue of
1826 endianness. In all cases, the two elements are understood to be
1827 located adjacently in memory, starting at the address .addr.
1828
1829 If .end is Iend_LE, then the .xxxLo component is at the lower
1830 address and the .xxxHi component is at the higher address, and
1831 each component is itself stored little-endianly.
1832
1833 If .end is Iend_BE, then the .xxxHi component is at the lower
1834 address and the .xxxLo component is at the higher address, and
1835 each component is itself stored big-endianly.
1836
1837 This allows representing more cases than most architectures can
1838 handle. For example, x86 cannot do DCAS on 8- or 16-bit elements.
1839
1840 How to know if the CAS succeeded?
1841
1842 * if .oldLo == .expdLo (resp. .oldHi:.oldLo == .expdHi:.expdLo),
1843 then the CAS succeeded, .dataLo (resp. .dataHi:.dataLo) is now
1844 stored at .addr, and the original value there was .oldLo (resp
1845 .oldHi:.oldLo).
1846
1847 * if .oldLo != .expdLo (resp. .oldHi:.oldLo != .expdHi:.expdLo),
1848 then the CAS failed, and the original value at .addr was .oldLo
1849 (resp. .oldHi:.oldLo).
1850
1851 Hence it is easy to know whether or not the CAS succeeded.
1852 */
1853 typedef
1854 struct {
1855 IRTemp oldHi; /* old value of *addr is written here */
1856 IRTemp oldLo;
1857 IREndness end; /* endianness of the data in memory */
1858 IRExpr* addr; /* store address */
1859 IRExpr* expdHi; /* expected old value at *addr */
1860 IRExpr* expdLo;
1861 IRExpr* dataHi; /* new value for *addr */
1862 IRExpr* dataLo;
1863 }
1864 IRCAS;
1865
1866 extern void ppIRCAS ( IRCAS* cas );
1867
1868 extern IRCAS* mkIRCAS ( IRTemp oldHi, IRTemp oldLo,
1869 IREndness end, IRExpr* addr,
1870 IRExpr* expdHi, IRExpr* expdLo,
1871 IRExpr* dataHi, IRExpr* dataLo );
1872
1873 extern IRCAS* deepCopyIRCAS ( IRCAS* );
1874
1875 /* ------------------ Statements ------------------ */
1876
1877 /* The different kinds of statements. Their meaning is explained
1878 below in the comments for IRStmt.
1879
1880 Those marked META do not represent code, but rather extra
1881 information about the code. These statements can be removed
1882 without affecting the functional behaviour of the code, however
1883 they are required by some IR consumers such as tools that
1884 instrument the code.
1885 */
1886
1887 typedef
1888 enum {
1889 Ist_NoOp=0x19000,
1890 Ist_IMark, /* META */
1891 Ist_AbiHint, /* META */
1892 Ist_Put,
1893 Ist_PutI,
1894 Ist_WrTmp,
1895 Ist_Store,
1896 Ist_CAS,
1897 Ist_LLSC,
1898 Ist_Dirty,
1899 Ist_MBE, /* META (maybe) */
1900 Ist_Exit
1901 }
1902 IRStmtTag;
1903
1904 /* A statement. Stored as a tagged union. 'tag' indicates what kind
1905 of expression this is. 'Ist' is the union that holds the fields.
1906 If an IRStmt 'st' has st.tag equal to Iex_Store, then it's a store
1907 statement, and the fields can be accessed with
1908 'st.Ist.Store.<fieldname>'.
1909
1910 For each kind of statement, we show what it looks like when
1911 pretty-printed with ppIRStmt().
1912 */
1913 typedef
1914 struct _IRStmt {
1915 IRStmtTag tag;
1916 union {
1917 /* A no-op (usually resulting from IR optimisation). Can be
1918 omitted without any effect.
1919
1920 ppIRStmt output: IR-NoOp
1921 */
1922 struct {
1923 } NoOp;
1924
1925 /* META: instruction mark. Marks the start of the statements
1926 that represent a single machine instruction (the end of
1927 those statements is marked by the next IMark or the end of
1928 the IRSB). Contains the address and length of the
1929 instruction.
1930
1931 It also contains a delta value. The delta must be
1932 subtracted from a guest program counter value before
1933 attempting to establish, by comparison with the address
1934 and length values, whether or not that program counter
1935 value refers to this instruction. For x86, amd64, ppc32,
1936 ppc64 and arm, the delta value is zero. For Thumb
1937 instructions, the delta value is one. This is because, on
1938 Thumb, guest PC values (guest_R15T) are encoded using the
1939 top 31 bits of the instruction address and a 1 in the lsb;
1940 hence they appear to be (numerically) 1 past the start of
1941 the instruction they refer to. IOW, guest_R15T on ARM
1942 holds a standard ARM interworking address.
1943
1944 ppIRStmt output: ------ IMark(<addr>, <len>, <delta>) ------,
1945 eg. ------ IMark(0x4000792, 5, 0) ------,
1946 */
1947 struct {
1948 Addr64 addr; /* instruction address */
1949 Int len; /* instruction length */
1950 UChar delta; /* addr = program counter as encoded in guest state
1951 - delta */
1952 } IMark;
1953
1954 /* META: An ABI hint, which says something about this
1955 platform's ABI.
1956
1957 At the moment, the only AbiHint is one which indicates
1958 that a given chunk of address space, [base .. base+len-1],
1959 has become undefined. This is used on amd64-linux and
1960 some ppc variants to pass stack-redzoning hints to whoever
1961 wants to see them. It also indicates the address of the
1962 next (dynamic) instruction that will be executed. This is
1963 to help Memcheck to origin tracking.
1964
1965 ppIRStmt output: ====== AbiHint(<base>, <len>, <nia>) ======
1966 eg. ====== AbiHint(t1, 16, t2) ======
1967 */
1968 struct {
1969 IRExpr* base; /* Start of undefined chunk */
1970 Int len; /* Length of undefined chunk */
1971 IRExpr* nia; /* Address of next (guest) insn */
1972 } AbiHint;
1973
1974 /* Write a guest register, at a fixed offset in the guest state.
1975 ppIRStmt output: PUT(<offset>) = <data>, eg. PUT(60) = t1
1976 */
1977 struct {
1978 Int offset; /* Offset into the guest state */
1979 IRExpr* data; /* The value to write */
1980 } Put;
1981
1982 /* Write a guest register, at a non-fixed offset in the guest
1983 state. See the comment for GetI expressions for more
1984 information.
1985
1986 ppIRStmt output: PUTI<descr>[<ix>,<bias>] = <data>,
1987 eg. PUTI(64:8xF64)[t5,0] = t1
1988 */
1989 struct {
1990 IRRegArray* descr; /* Part of guest state treated as circular */
1991 IRExpr* ix; /* Variable part of index into array */
1992 Int bias; /* Constant offset part of index into array */
1993 IRExpr* data; /* The value to write */
1994 } PutI;
1995
1996 /* Assign a value to a temporary. Note that SSA rules require
1997 each tmp is only assigned to once. IR sanity checking will
1998 reject any block containing a temporary which is not assigned
1999 to exactly once.
2000
2001 ppIRStmt output: t<tmp> = <data>, eg. t1 = 3
2002 */
2003 struct {
2004 IRTemp tmp; /* Temporary (LHS of assignment) */
2005 IRExpr* data; /* Expression (RHS of assignment) */
2006 } WrTmp;
2007
2008 /* Write a value to memory. This is a normal store, not a
2009 Store-Conditional. To represent a Store-Conditional,
2010 instead use IRStmt.LLSC.
2011 ppIRStmt output: ST<end>(<addr>) = <data>, eg. STle(t1) = t2
2012 */
2013 struct {
2014 IREndness end; /* Endianness of the store */
2015 IRExpr* addr; /* store address */
2016 IRExpr* data; /* value to write */
2017 } Store;
2018
2019 /* Do an atomic compare-and-swap operation. Semantics are
2020 described above on a comment at the definition of IRCAS.
2021
2022 ppIRStmt output:
2023 t<tmp> = CAS<end>(<addr> :: <expected> -> <new>)
2024 eg
2025 t1 = CASle(t2 :: t3->Add32(t3,1))
2026 which denotes a 32-bit atomic increment
2027 of a value at address t2
2028
2029 A double-element CAS may also be denoted, in which case <tmp>,
2030 <expected> and <new> are all pairs of items, separated by
2031 commas.
2032 */
2033 struct {
2034 IRCAS* details;
2035 } CAS;
2036
2037 /* Either Load-Linked or Store-Conditional, depending on
2038 STOREDATA.
2039
2040 If STOREDATA is NULL then this is a Load-Linked, meaning
2041 that data is loaded from memory as normal, but a
2042 'reservation' for the address is also lodged in the
2043 hardware.
2044
2045 result = Load-Linked(addr, end)
2046
2047 The data transfer type is the type of RESULT (I32, I64,
2048 etc). ppIRStmt output:
2049
2050 result = LD<end>-Linked(<addr>), eg. LDbe-Linked(t1)
2051
2052 If STOREDATA is not NULL then this is a Store-Conditional,
2053 hence:
2054
2055 result = Store-Conditional(addr, storedata, end)
2056
2057 The data transfer type is the type of STOREDATA and RESULT
2058 has type Ity_I1. The store may fail or succeed depending
2059 on the state of a previously lodged reservation on this
2060 address. RESULT is written 1 if the store succeeds and 0
2061 if it fails. eg ppIRStmt output:
2062
2063 result = ( ST<end>-Cond(<addr>) = <storedata> )
2064 eg t3 = ( STbe-Cond(t1, t2) )
2065
2066 In all cases, the address must be naturally aligned for
2067 the transfer type -- any misaligned addresses should be
2068 caught by a dominating IR check and side exit. This
2069 alignment restriction exists because on at least some
2070 LL/SC platforms (ppc), stwcx. etc will trap w/ SIGBUS on
2071 misaligned addresses, and we have to actually generate
2072 stwcx. on the host, and we don't want it trapping on the
2073 host.
2074
2075 Summary of rules for transfer type:
2076 STOREDATA == NULL (LL):
2077 transfer type = type of RESULT
2078 STOREDATA != NULL (SC):
2079 transfer type = type of STOREDATA, and RESULT :: Ity_I1
2080 */
2081 struct {
2082 IREndness end;
2083 IRTemp result;
2084 IRExpr* addr;
2085 IRExpr* storedata; /* NULL => LL, non-NULL => SC */
2086 } LLSC;
2087
2088 /* Call (possibly conditionally) a C function that has side
2089 effects (ie. is "dirty"). See the comments above the
2090 IRDirty type declaration for more information.
2091
2092 ppIRStmt output:
2093 t<tmp> = DIRTY <guard> <effects>
2094 ::: <callee>(<args>)
2095 eg.
2096 t1 = DIRTY t27 RdFX-gst(16,4) RdFX-gst(60,4)
2097 ::: foo{0x380035f4}(t2)
2098 */
2099 struct {
2100 IRDirty* details;
2101 } Dirty;
2102
2103 /* A memory bus event - a fence, or acquisition/release of the
2104 hardware bus lock. IR optimisation treats all these as fences
2105 across which no memory references may be moved.
2106 ppIRStmt output: MBusEvent-Fence,
2107 MBusEvent-BusLock, MBusEvent-BusUnlock.
2108 */
2109 struct {
2110 IRMBusEvent event;
2111 } MBE;
2112
2113 /* Conditional exit from the middle of an IRSB.
2114 ppIRStmt output: if (<guard>) goto {<jk>} <dst>
2115 eg. if (t69) goto {Boring} 0x4000AAA:I32
2116 */
2117 struct {
2118 IRExpr* guard; /* Conditional expression */
2119 IRJumpKind jk; /* Jump kind */
2120 IRConst* dst; /* Jump target (constant only) */
2121 } Exit;
2122 } Ist;
2123 }
2124 IRStmt;
2125
2126 /* Statement constructors. */
2127 extern IRStmt* IRStmt_NoOp ( void );
2128 extern IRStmt* IRStmt_IMark ( Addr64 addr, Int len, UChar delta );
2129 extern IRStmt* IRStmt_AbiHint ( IRExpr* base, Int len, IRExpr* nia );
2130 extern IRStmt* IRStmt_Put ( Int off, IRExpr* data );
2131 extern IRStmt* IRStmt_PutI ( IRRegArray* descr, IRExpr* ix, Int bias,
2132 IRExpr* data );
2133 extern IRStmt* IRStmt_WrTmp ( IRTemp tmp, IRExpr* data );
2134 extern IRStmt* IRStmt_Store ( IREndness end, IRExpr* addr, IRExpr* data );
2135 extern IRStmt* IRStmt_CAS ( IRCAS* details );
2136 extern IRStmt* IRStmt_LLSC ( IREndness end, IRTemp result,
2137 IRExpr* addr, IRExpr* storedata );
2138 extern IRStmt* IRStmt_Dirty ( IRDirty* details );
2139 extern IRStmt* IRStmt_MBE ( IRMBusEvent event );
2140 extern IRStmt* IRStmt_Exit ( IRExpr* guard, IRJumpKind jk, IRConst* dst );
2141
2142 /* Deep-copy an IRStmt. */
2143 extern IRStmt* deepCopyIRStmt ( IRStmt* );
2144
2145 /* Pretty-print an IRStmt. */
2146 extern void ppIRStmt ( IRStmt* );
2147
2148
2149 /* ------------------ Basic Blocks ------------------ */
2150
2151 /* Type environments: a bunch of statements, expressions, etc, are
2152 incomplete without an environment indicating the type of each
2153 IRTemp. So this provides one. IR temporaries are really just
2154 unsigned ints and so this provides an array, 0 .. n_types_used-1 of
2155 them.
2156 */
2157 typedef
2158 struct {
2159 IRType* types;
2160 Int types_size;
2161 Int types_used;
2162 }
2163 IRTypeEnv;
2164
2165 /* Obtain a new IRTemp */
2166 extern IRTemp newIRTemp ( IRTypeEnv*, IRType );
2167
2168 /* Deep-copy a type environment */
2169 extern IRTypeEnv* deepCopyIRTypeEnv ( IRTypeEnv* );
2170
2171 /* Pretty-print a type environment */
2172 extern void ppIRTypeEnv ( IRTypeEnv* );
2173
2174
2175 /* Code blocks, which in proper compiler terminology are superblocks
2176 (single entry, multiple exit code sequences) contain:
2177
2178 - A table giving a type for each temp (the "type environment")
2179 - An expandable array of statements
2180 - An expression of type 32 or 64 bits, depending on the
2181 guest's word size, indicating the next destination if the block
2182 executes all the way to the end, without a side exit
2183 - An indication of any special actions (JumpKind) needed
2184 for this final jump.
2185
2186 "IRSB" stands for "IR Super Block".
2187 */
2188 typedef
2189 struct {
2190 IRTypeEnv* tyenv;
2191 IRStmt** stmts;
2192 Int stmts_size;
2193 Int stmts_used;
2194 IRExpr* next;
2195 IRJumpKind jumpkind;
2196 }
2197 IRSB;
2198
2199 /* Allocate a new, uninitialised IRSB */
2200 extern IRSB* emptyIRSB ( void );
2201
2202 /* Deep-copy an IRSB */
2203 extern IRSB* deepCopyIRSB ( IRSB* );
2204
2205 /* Deep-copy an IRSB, except for the statements list, which set to be
2206 a new, empty, list of statements. */
2207 extern IRSB* deepCopyIRSBExceptStmts ( IRSB* );
2208
2209 /* Pretty-print an IRSB */
2210 extern void ppIRSB ( IRSB* );
2211
2212 /* Append an IRStmt to an IRSB */
2213 extern void addStmtToIRSB ( IRSB*, IRStmt* );
2214
2215
2216 /*---------------------------------------------------------------*/
2217 /*--- Helper functions for the IR ---*/
2218 /*---------------------------------------------------------------*/
2219
2220 /* For messing with IR type environments */
2221 extern IRTypeEnv* emptyIRTypeEnv ( void );
2222
2223 /* What is the type of this expression? */
2224 extern IRType typeOfIRConst ( IRConst* );
2225 extern IRType typeOfIRTemp ( IRTypeEnv*, IRTemp );
2226 extern IRType typeOfIRExpr ( IRTypeEnv*, IRExpr* );
2227
2228 /* Sanity check a BB of IR */
2229 extern void sanityCheckIRSB ( IRSB* bb,
2230 HChar* caller,
2231 Bool require_flatness,
2232 IRType guest_word_size );
2233 extern Bool isFlatIRStmt ( IRStmt* );
2234
2235 /* Is this any value actually in the enumeration 'IRType' ? */
2236 extern Bool isPlausibleIRType ( IRType ty );
2237
2238 #endif /* ndef __LIBVEX_IR_H */
2239
2240
2241 /*---------------------------------------------------------------*/
2242 /*--- libvex_ir.h ---*/
2243 /*---------------------------------------------------------------*/
2244