• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*--------------------------------------------------------------------*/
2 /*--- Machine-related stuff.                           m_machine.c ---*/
3 /*--------------------------------------------------------------------*/
4 
5 /*
6    This file is part of Valgrind, a dynamic binary instrumentation
7    framework.
8 
9    Copyright (C) 2000-2015 Julian Seward
10       jseward@acm.org
11 
12    This program is free software; you can redistribute it and/or
13    modify it under the terms of the GNU General Public License as
14    published by the Free Software Foundation; either version 2 of the
15    License, or (at your option) any later version.
16 
17    This program is distributed in the hope that it will be useful, but
18    WITHOUT ANY WARRANTY; without even the implied warranty of
19    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20    General Public License for more details.
21 
22    You should have received a copy of the GNU General Public License
23    along with this program; if not, write to the Free Software
24    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
25    02111-1307, USA.
26 
27    The GNU General Public License is contained in the file COPYING.
28 */
29 
30 #include "pub_core_basics.h"
31 #include "pub_core_vki.h"
32 #include "pub_core_threadstate.h"
33 #include "pub_core_libcassert.h"
34 #include "pub_core_libcbase.h"
35 #include "pub_core_libcfile.h"
36 #include "pub_core_libcprint.h"
37 #include "pub_core_mallocfree.h"
38 #include "pub_core_machine.h"
39 #include "pub_core_cpuid.h"
40 #include "pub_core_libcsignal.h"   // for ppc32 messing with SIGILL and SIGFPE
41 #include "pub_core_debuglog.h"
42 
43 
44 #define INSTR_PTR(regs)    ((regs).vex.VG_INSTR_PTR)
45 #define STACK_PTR(regs)    ((regs).vex.VG_STACK_PTR)
46 #define FRAME_PTR(regs)    ((regs).vex.VG_FRAME_PTR)
47 
VG_(get_IP)48 Addr VG_(get_IP) ( ThreadId tid ) {
49    return INSTR_PTR( VG_(threads)[tid].arch );
50 }
VG_(get_SP)51 Addr VG_(get_SP) ( ThreadId tid ) {
52    return STACK_PTR( VG_(threads)[tid].arch );
53 }
VG_(get_FP)54 Addr VG_(get_FP) ( ThreadId tid ) {
55    return FRAME_PTR( VG_(threads)[tid].arch );
56 }
57 
VG_(set_IP)58 void VG_(set_IP) ( ThreadId tid, Addr ip ) {
59    INSTR_PTR( VG_(threads)[tid].arch ) = ip;
60 }
VG_(set_SP)61 void VG_(set_SP) ( ThreadId tid, Addr sp ) {
62    STACK_PTR( VG_(threads)[tid].arch ) = sp;
63 }
64 
VG_(get_UnwindStartRegs)65 void VG_(get_UnwindStartRegs) ( /*OUT*/UnwindStartRegs* regs,
66                                 ThreadId tid )
67 {
68 #  if defined(VGA_x86)
69    regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_EIP;
70    regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_ESP;
71    regs->misc.X86.r_ebp
72       = VG_(threads)[tid].arch.vex.guest_EBP;
73 #  elif defined(VGA_amd64)
74    regs->r_pc = VG_(threads)[tid].arch.vex.guest_RIP;
75    regs->r_sp = VG_(threads)[tid].arch.vex.guest_RSP;
76    regs->misc.AMD64.r_rbp
77       = VG_(threads)[tid].arch.vex.guest_RBP;
78 #  elif defined(VGA_ppc32)
79    regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_CIA;
80    regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_GPR1;
81    regs->misc.PPC32.r_lr
82       = VG_(threads)[tid].arch.vex.guest_LR;
83 #  elif defined(VGA_ppc64be) || defined(VGA_ppc64le)
84    regs->r_pc = VG_(threads)[tid].arch.vex.guest_CIA;
85    regs->r_sp = VG_(threads)[tid].arch.vex.guest_GPR1;
86    regs->misc.PPC64.r_lr
87       = VG_(threads)[tid].arch.vex.guest_LR;
88 #  elif defined(VGA_arm)
89    regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_R15T;
90    regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_R13;
91    regs->misc.ARM.r14
92       = VG_(threads)[tid].arch.vex.guest_R14;
93    regs->misc.ARM.r12
94       = VG_(threads)[tid].arch.vex.guest_R12;
95    regs->misc.ARM.r11
96       = VG_(threads)[tid].arch.vex.guest_R11;
97    regs->misc.ARM.r7
98       = VG_(threads)[tid].arch.vex.guest_R7;
99 #  elif defined(VGA_arm64)
100    regs->r_pc = VG_(threads)[tid].arch.vex.guest_PC;
101    regs->r_sp = VG_(threads)[tid].arch.vex.guest_XSP;
102    regs->misc.ARM64.x29 = VG_(threads)[tid].arch.vex.guest_X29;
103    regs->misc.ARM64.x30 = VG_(threads)[tid].arch.vex.guest_X30;
104 #  elif defined(VGA_s390x)
105    regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_IA;
106    regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_SP;
107    regs->misc.S390X.r_fp
108       = VG_(threads)[tid].arch.vex.guest_FP;
109    regs->misc.S390X.r_lr
110       = VG_(threads)[tid].arch.vex.guest_LR;
111 #  elif defined(VGA_mips32)
112    regs->r_pc = VG_(threads)[tid].arch.vex.guest_PC;
113    regs->r_sp = VG_(threads)[tid].arch.vex.guest_r29;
114    regs->misc.MIPS32.r30
115       = VG_(threads)[tid].arch.vex.guest_r30;
116    regs->misc.MIPS32.r31
117       = VG_(threads)[tid].arch.vex.guest_r31;
118    regs->misc.MIPS32.r28
119       = VG_(threads)[tid].arch.vex.guest_r28;
120 #  elif defined(VGA_mips64)
121    regs->r_pc = VG_(threads)[tid].arch.vex.guest_PC;
122    regs->r_sp = VG_(threads)[tid].arch.vex.guest_r29;
123    regs->misc.MIPS64.r30
124       = VG_(threads)[tid].arch.vex.guest_r30;
125    regs->misc.MIPS64.r31
126       = VG_(threads)[tid].arch.vex.guest_r31;
127    regs->misc.MIPS64.r28
128       = VG_(threads)[tid].arch.vex.guest_r28;
129 #  elif defined(VGA_tilegx)
130    regs->r_pc = VG_(threads)[tid].arch.vex.guest_pc;
131    regs->r_sp = VG_(threads)[tid].arch.vex.guest_r54;
132    regs->misc.TILEGX.r52
133       = VG_(threads)[tid].arch.vex.guest_r52;
134    regs->misc.TILEGX.r55
135       = VG_(threads)[tid].arch.vex.guest_r55;
136 #  else
137 #    error "Unknown arch"
138 #  endif
139 }
140 
141 void
VG_(get_shadow_regs_area)142 VG_(get_shadow_regs_area) ( ThreadId tid,
143                             /*DST*/UChar* dst,
144                             /*SRC*/Int shadowNo, PtrdiffT offset, SizeT size )
145 {
146    void*        src;
147    ThreadState* tst;
148    vg_assert(shadowNo == 0 || shadowNo == 1 || shadowNo == 2);
149    vg_assert(VG_(is_valid_tid)(tid));
150    // Bounds check
151    vg_assert(0 <= offset && offset < sizeof(VexGuestArchState));
152    vg_assert(offset + size <= sizeof(VexGuestArchState));
153    // Copy
154    tst = & VG_(threads)[tid];
155    src = NULL;
156    switch (shadowNo) {
157       case 0: src = (void*)(((Addr)&(tst->arch.vex)) + offset); break;
158       case 1: src = (void*)(((Addr)&(tst->arch.vex_shadow1)) + offset); break;
159       case 2: src = (void*)(((Addr)&(tst->arch.vex_shadow2)) + offset); break;
160    }
161    vg_assert(src != NULL);
162    VG_(memcpy)( dst, src, size);
163 }
164 
165 void
VG_(set_shadow_regs_area)166 VG_(set_shadow_regs_area) ( ThreadId tid,
167                             /*DST*/Int shadowNo, PtrdiffT offset, SizeT size,
168                             /*SRC*/const UChar* src )
169 {
170    void*        dst;
171    ThreadState* tst;
172    vg_assert(shadowNo == 0 || shadowNo == 1 || shadowNo == 2);
173    vg_assert(VG_(is_valid_tid)(tid));
174    // Bounds check
175    vg_assert(0 <= offset && offset < sizeof(VexGuestArchState));
176    vg_assert(offset + size <= sizeof(VexGuestArchState));
177    // Copy
178    tst = & VG_(threads)[tid];
179    dst = NULL;
180    switch (shadowNo) {
181       case 0: dst = (void*)(((Addr)&(tst->arch.vex)) + offset); break;
182       case 1: dst = (void*)(((Addr)&(tst->arch.vex_shadow1)) + offset); break;
183       case 2: dst = (void*)(((Addr)&(tst->arch.vex_shadow2)) + offset); break;
184    }
185    vg_assert(dst != NULL);
186    VG_(memcpy)( dst, src, size);
187 }
188 
189 
apply_to_GPs_of_tid(ThreadId tid,void (* f)(ThreadId,const HChar *,Addr))190 static void apply_to_GPs_of_tid(ThreadId tid, void (*f)(ThreadId,
191                                                         const HChar*, Addr))
192 {
193    VexGuestArchState* vex = &(VG_(get_ThreadState)(tid)->arch.vex);
194    VG_(debugLog)(2, "machine", "apply_to_GPs_of_tid %u\n", tid);
195 #if defined(VGA_x86)
196    (*f)(tid, "EAX", vex->guest_EAX);
197    (*f)(tid, "ECX", vex->guest_ECX);
198    (*f)(tid, "EDX", vex->guest_EDX);
199    (*f)(tid, "EBX", vex->guest_EBX);
200    (*f)(tid, "ESI", vex->guest_ESI);
201    (*f)(tid, "EDI", vex->guest_EDI);
202    (*f)(tid, "ESP", vex->guest_ESP);
203    (*f)(tid, "EBP", vex->guest_EBP);
204 #elif defined(VGA_amd64)
205    (*f)(tid, "RAX", vex->guest_RAX);
206    (*f)(tid, "RCX", vex->guest_RCX);
207    (*f)(tid, "RDX", vex->guest_RDX);
208    (*f)(tid, "RBX", vex->guest_RBX);
209    (*f)(tid, "RSI", vex->guest_RSI);
210    (*f)(tid, "RDI", vex->guest_RDI);
211    (*f)(tid, "RSP", vex->guest_RSP);
212    (*f)(tid, "RBP", vex->guest_RBP);
213    (*f)(tid, "R8" , vex->guest_R8 );
214    (*f)(tid, "R9" , vex->guest_R9 );
215    (*f)(tid, "R10", vex->guest_R10);
216    (*f)(tid, "R11", vex->guest_R11);
217    (*f)(tid, "R12", vex->guest_R12);
218    (*f)(tid, "R13", vex->guest_R13);
219    (*f)(tid, "R14", vex->guest_R14);
220    (*f)(tid, "R15", vex->guest_R15);
221 #elif defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le)
222    (*f)(tid, "GPR0" , vex->guest_GPR0 );
223    (*f)(tid, "GPR1" , vex->guest_GPR1 );
224    (*f)(tid, "GPR2" , vex->guest_GPR2 );
225    (*f)(tid, "GPR3" , vex->guest_GPR3 );
226    (*f)(tid, "GPR4" , vex->guest_GPR4 );
227    (*f)(tid, "GPR5" , vex->guest_GPR5 );
228    (*f)(tid, "GPR6" , vex->guest_GPR6 );
229    (*f)(tid, "GPR7" , vex->guest_GPR7 );
230    (*f)(tid, "GPR8" , vex->guest_GPR8 );
231    (*f)(tid, "GPR9" , vex->guest_GPR9 );
232    (*f)(tid, "GPR10", vex->guest_GPR10);
233    (*f)(tid, "GPR11", vex->guest_GPR11);
234    (*f)(tid, "GPR12", vex->guest_GPR12);
235    (*f)(tid, "GPR13", vex->guest_GPR13);
236    (*f)(tid, "GPR14", vex->guest_GPR14);
237    (*f)(tid, "GPR15", vex->guest_GPR15);
238    (*f)(tid, "GPR16", vex->guest_GPR16);
239    (*f)(tid, "GPR17", vex->guest_GPR17);
240    (*f)(tid, "GPR18", vex->guest_GPR18);
241    (*f)(tid, "GPR19", vex->guest_GPR19);
242    (*f)(tid, "GPR20", vex->guest_GPR20);
243    (*f)(tid, "GPR21", vex->guest_GPR21);
244    (*f)(tid, "GPR22", vex->guest_GPR22);
245    (*f)(tid, "GPR23", vex->guest_GPR23);
246    (*f)(tid, "GPR24", vex->guest_GPR24);
247    (*f)(tid, "GPR25", vex->guest_GPR25);
248    (*f)(tid, "GPR26", vex->guest_GPR26);
249    (*f)(tid, "GPR27", vex->guest_GPR27);
250    (*f)(tid, "GPR28", vex->guest_GPR28);
251    (*f)(tid, "GPR29", vex->guest_GPR29);
252    (*f)(tid, "GPR30", vex->guest_GPR30);
253    (*f)(tid, "GPR31", vex->guest_GPR31);
254    (*f)(tid, "CTR"  , vex->guest_CTR  );
255    (*f)(tid, "LR"   , vex->guest_LR   );
256 #elif defined(VGA_arm)
257    (*f)(tid, "R0" , vex->guest_R0 );
258    (*f)(tid, "R1" , vex->guest_R1 );
259    (*f)(tid, "R2" , vex->guest_R2 );
260    (*f)(tid, "R3" , vex->guest_R3 );
261    (*f)(tid, "R4" , vex->guest_R4 );
262    (*f)(tid, "R5" , vex->guest_R5 );
263    (*f)(tid, "R6" , vex->guest_R6 );
264    (*f)(tid, "R8" , vex->guest_R8 );
265    (*f)(tid, "R9" , vex->guest_R9 );
266    (*f)(tid, "R10", vex->guest_R10);
267    (*f)(tid, "R11", vex->guest_R11);
268    (*f)(tid, "R12", vex->guest_R12);
269    (*f)(tid, "R13", vex->guest_R13);
270    (*f)(tid, "R14", vex->guest_R14);
271 #elif defined(VGA_s390x)
272    (*f)(tid, "r0" , vex->guest_r0 );
273    (*f)(tid, "r1" , vex->guest_r1 );
274    (*f)(tid, "r2" , vex->guest_r2 );
275    (*f)(tid, "r3" , vex->guest_r3 );
276    (*f)(tid, "r4" , vex->guest_r4 );
277    (*f)(tid, "r5" , vex->guest_r5 );
278    (*f)(tid, "r6" , vex->guest_r6 );
279    (*f)(tid, "r7" , vex->guest_r7 );
280    (*f)(tid, "r8" , vex->guest_r8 );
281    (*f)(tid, "r9" , vex->guest_r9 );
282    (*f)(tid, "r10", vex->guest_r10);
283    (*f)(tid, "r11", vex->guest_r11);
284    (*f)(tid, "r12", vex->guest_r12);
285    (*f)(tid, "r13", vex->guest_r13);
286    (*f)(tid, "r14", vex->guest_r14);
287    (*f)(tid, "r15", vex->guest_r15);
288 #elif defined(VGA_mips32) || defined(VGA_mips64)
289    (*f)(tid, "r0" , vex->guest_r0 );
290    (*f)(tid, "r1" , vex->guest_r1 );
291    (*f)(tid, "r2" , vex->guest_r2 );
292    (*f)(tid, "r3" , vex->guest_r3 );
293    (*f)(tid, "r4" , vex->guest_r4 );
294    (*f)(tid, "r5" , vex->guest_r5 );
295    (*f)(tid, "r6" , vex->guest_r6 );
296    (*f)(tid, "r7" , vex->guest_r7 );
297    (*f)(tid, "r8" , vex->guest_r8 );
298    (*f)(tid, "r9" , vex->guest_r9 );
299    (*f)(tid, "r10", vex->guest_r10);
300    (*f)(tid, "r11", vex->guest_r11);
301    (*f)(tid, "r12", vex->guest_r12);
302    (*f)(tid, "r13", vex->guest_r13);
303    (*f)(tid, "r14", vex->guest_r14);
304    (*f)(tid, "r15", vex->guest_r15);
305    (*f)(tid, "r16", vex->guest_r16);
306    (*f)(tid, "r17", vex->guest_r17);
307    (*f)(tid, "r18", vex->guest_r18);
308    (*f)(tid, "r19", vex->guest_r19);
309    (*f)(tid, "r20", vex->guest_r20);
310    (*f)(tid, "r21", vex->guest_r21);
311    (*f)(tid, "r22", vex->guest_r22);
312    (*f)(tid, "r23", vex->guest_r23);
313    (*f)(tid, "r24", vex->guest_r24);
314    (*f)(tid, "r25", vex->guest_r25);
315    (*f)(tid, "r26", vex->guest_r26);
316    (*f)(tid, "r27", vex->guest_r27);
317    (*f)(tid, "r28", vex->guest_r28);
318    (*f)(tid, "r29", vex->guest_r29);
319    (*f)(tid, "r30", vex->guest_r30);
320    (*f)(tid, "r31", vex->guest_r31);
321 #elif defined(VGA_arm64)
322    (*f)(tid, "x0" , vex->guest_X0 );
323    (*f)(tid, "x1" , vex->guest_X1 );
324    (*f)(tid, "x2" , vex->guest_X2 );
325    (*f)(tid, "x3" , vex->guest_X3 );
326    (*f)(tid, "x4" , vex->guest_X4 );
327    (*f)(tid, "x5" , vex->guest_X5 );
328    (*f)(tid, "x6" , vex->guest_X6 );
329    (*f)(tid, "x7" , vex->guest_X7 );
330    (*f)(tid, "x8" , vex->guest_X8 );
331    (*f)(tid, "x9" , vex->guest_X9 );
332    (*f)(tid, "x10", vex->guest_X10);
333    (*f)(tid, "x11", vex->guest_X11);
334    (*f)(tid, "x12", vex->guest_X12);
335    (*f)(tid, "x13", vex->guest_X13);
336    (*f)(tid, "x14", vex->guest_X14);
337    (*f)(tid, "x15", vex->guest_X15);
338    (*f)(tid, "x16", vex->guest_X16);
339    (*f)(tid, "x17", vex->guest_X17);
340    (*f)(tid, "x18", vex->guest_X18);
341    (*f)(tid, "x19", vex->guest_X19);
342    (*f)(tid, "x20", vex->guest_X20);
343    (*f)(tid, "x21", vex->guest_X21);
344    (*f)(tid, "x22", vex->guest_X22);
345    (*f)(tid, "x23", vex->guest_X23);
346    (*f)(tid, "x24", vex->guest_X24);
347    (*f)(tid, "x25", vex->guest_X25);
348    (*f)(tid, "x26", vex->guest_X26);
349    (*f)(tid, "x27", vex->guest_X27);
350    (*f)(tid, "x28", vex->guest_X28);
351    (*f)(tid, "x29", vex->guest_X29);
352    (*f)(tid, "x30", vex->guest_X30);
353 #elif defined(VGA_tilegx)
354    (*f)(tid, "r0",  vex->guest_r0 );
355    (*f)(tid, "r1",  vex->guest_r1 );
356    (*f)(tid, "r2",  vex->guest_r2 );
357    (*f)(tid, "r3",  vex->guest_r3 );
358    (*f)(tid, "r4",  vex->guest_r4 );
359    (*f)(tid, "r5",  vex->guest_r5 );
360    (*f)(tid, "r6",  vex->guest_r6 );
361    (*f)(tid, "r7",  vex->guest_r7 );
362    (*f)(tid, "r8",  vex->guest_r8 );
363    (*f)(tid, "r9",  vex->guest_r9 );
364    (*f)(tid, "r10", vex->guest_r10);
365    (*f)(tid, "r11", vex->guest_r11);
366    (*f)(tid, "r12", vex->guest_r12);
367    (*f)(tid, "r13", vex->guest_r13);
368    (*f)(tid, "r14", vex->guest_r14);
369    (*f)(tid, "r15", vex->guest_r15);
370    (*f)(tid, "r16", vex->guest_r16);
371    (*f)(tid, "r17", vex->guest_r17);
372    (*f)(tid, "r18", vex->guest_r18);
373    (*f)(tid, "r19", vex->guest_r19);
374    (*f)(tid, "r20", vex->guest_r20);
375    (*f)(tid, "r21", vex->guest_r21);
376    (*f)(tid, "r22", vex->guest_r22);
377    (*f)(tid, "r23", vex->guest_r23);
378    (*f)(tid, "r24", vex->guest_r24);
379    (*f)(tid, "r25", vex->guest_r25);
380    (*f)(tid, "r26", vex->guest_r26);
381    (*f)(tid, "r27", vex->guest_r27);
382    (*f)(tid, "r28", vex->guest_r28);
383    (*f)(tid, "r29", vex->guest_r29);
384    (*f)(tid, "r30", vex->guest_r30);
385    (*f)(tid, "r31", vex->guest_r31);
386    (*f)(tid, "r32", vex->guest_r32);
387    (*f)(tid, "r33", vex->guest_r33);
388    (*f)(tid, "r34", vex->guest_r34);
389    (*f)(tid, "r35", vex->guest_r35);
390    (*f)(tid, "r36", vex->guest_r36);
391    (*f)(tid, "r37", vex->guest_r37);
392    (*f)(tid, "r38", vex->guest_r38);
393    (*f)(tid, "r39", vex->guest_r39);
394    (*f)(tid, "r40", vex->guest_r40);
395    (*f)(tid, "r41", vex->guest_r41);
396    (*f)(tid, "r42", vex->guest_r42);
397    (*f)(tid, "r43", vex->guest_r43);
398    (*f)(tid, "r44", vex->guest_r44);
399    (*f)(tid, "r45", vex->guest_r45);
400    (*f)(tid, "r46", vex->guest_r46);
401    (*f)(tid, "r47", vex->guest_r47);
402    (*f)(tid, "r48", vex->guest_r48);
403    (*f)(tid, "r49", vex->guest_r49);
404    (*f)(tid, "r50", vex->guest_r50);
405    (*f)(tid, "r51", vex->guest_r51);
406    (*f)(tid, "r52", vex->guest_r52);
407    (*f)(tid, "r53", vex->guest_r53);
408    (*f)(tid, "r54", vex->guest_r54);
409    (*f)(tid, "r55", vex->guest_r55);
410 #else
411 #  error Unknown arch
412 #endif
413 }
414 
415 
VG_(apply_to_GP_regs)416 void VG_(apply_to_GP_regs)(void (*f)(ThreadId, const HChar*, UWord))
417 {
418    ThreadId tid;
419 
420    for (tid = 1; tid < VG_N_THREADS; tid++) {
421       if (VG_(is_valid_tid)(tid)
422           || VG_(threads)[tid].exitreason == VgSrc_ExitProcess) {
423          // live thread or thread instructed to die by another thread that
424          // called exit.
425          apply_to_GPs_of_tid(tid, f);
426       }
427    }
428 }
429 
VG_(thread_stack_reset_iter)430 void VG_(thread_stack_reset_iter)(/*OUT*/ThreadId* tid)
431 {
432    *tid = (ThreadId)(-1);
433 }
434 
VG_(thread_stack_next)435 Bool VG_(thread_stack_next)(/*MOD*/ThreadId* tid,
436                             /*OUT*/Addr* stack_min,
437                             /*OUT*/Addr* stack_max)
438 {
439    ThreadId i;
440    for (i = (*tid)+1; i < VG_N_THREADS; i++) {
441       if (i == VG_INVALID_THREADID)
442          continue;
443       if (VG_(threads)[i].status != VgTs_Empty) {
444          *tid       = i;
445          *stack_min = VG_(get_SP)(i);
446          *stack_max = VG_(threads)[i].client_stack_highest_byte;
447          return True;
448       }
449    }
450    return False;
451 }
452 
VG_(thread_get_stack_max)453 Addr VG_(thread_get_stack_max)(ThreadId tid)
454 {
455    vg_assert(0 <= tid && tid < VG_N_THREADS && tid != VG_INVALID_THREADID);
456    vg_assert(VG_(threads)[tid].status != VgTs_Empty);
457    return VG_(threads)[tid].client_stack_highest_byte;
458 }
459 
VG_(thread_get_stack_size)460 SizeT VG_(thread_get_stack_size)(ThreadId tid)
461 {
462    vg_assert(0 <= tid && tid < VG_N_THREADS && tid != VG_INVALID_THREADID);
463    vg_assert(VG_(threads)[tid].status != VgTs_Empty);
464    return VG_(threads)[tid].client_stack_szB;
465 }
466 
VG_(thread_get_altstack_min)467 Addr VG_(thread_get_altstack_min)(ThreadId tid)
468 {
469    vg_assert(0 <= tid && tid < VG_N_THREADS && tid != VG_INVALID_THREADID);
470    vg_assert(VG_(threads)[tid].status != VgTs_Empty);
471    return (Addr)VG_(threads)[tid].altstack.ss_sp;
472 }
473 
VG_(thread_get_altstack_size)474 SizeT VG_(thread_get_altstack_size)(ThreadId tid)
475 {
476    vg_assert(0 <= tid && tid < VG_N_THREADS && tid != VG_INVALID_THREADID);
477    vg_assert(VG_(threads)[tid].status != VgTs_Empty);
478    return VG_(threads)[tid].altstack.ss_size;
479 }
480 
481 //-------------------------------------------------------------
482 /* Details about the capabilities of the underlying (host) CPU.  These
483    details are acquired by (1) enquiring with the CPU at startup, or
484    (2) from the AT_SYSINFO entries the kernel gave us (ppc32 cache
485    line size).  It's a bit nasty in the sense that there's no obvious
486    way to stop uses of some of this info before it's ready to go.
487    See pub_core_machine.h for more information about that.
488 
489    VG_(machine_get_hwcaps) may use signals (although it attempts to
490    leave signal state unchanged) and therefore should only be
491    called before m_main sets up the client's signal state.
492 */
493 
494 /* --------- State --------- */
495 static Bool hwcaps_done = False;
496 
497 /* --- all archs --- */
498 static VexArch     va = VexArch_INVALID;
499 static VexArchInfo vai;
500 
501 #if defined(VGA_x86)
502 UInt VG_(machine_x86_have_mxcsr) = 0;
503 #endif
504 #if defined(VGA_ppc32)
505 UInt VG_(machine_ppc32_has_FP)  = 0;
506 UInt VG_(machine_ppc32_has_VMX) = 0;
507 #endif
508 #if defined(VGA_ppc64be) || defined(VGA_ppc64le)
509 ULong VG_(machine_ppc64_has_VMX) = 0;
510 #endif
511 #if defined(VGA_arm)
512 Int VG_(machine_arm_archlevel) = 4;
513 #endif
514 
515 
516 /* For hwcaps detection on ppc32/64, s390x, and arm we'll need to do SIGILL
517    testing, so we need a VG_MINIMAL_JMP_BUF. */
518 #if defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le) \
519     || defined(VGA_arm) || defined(VGA_s390x) || defined(VGA_mips32)
520 #include "pub_core_libcsetjmp.h"
521 static VG_MINIMAL_JMP_BUF(env_unsup_insn);
handler_unsup_insn(Int x)522 static void handler_unsup_insn ( Int x ) {
523    VG_MINIMAL_LONGJMP(env_unsup_insn);
524 }
525 #endif
526 
527 
528 /* Helper function for VG_(machine_get_hwcaps), assumes the SIGILL/etc
529  * handlers are installed.  Determines the sizes affected by dcbz
530  * and dcbzl instructions and updates the given VexArchInfo structure
531  * accordingly.
532  *
533  * Not very defensive: assumes that as long as the dcbz/dcbzl
534  * instructions don't raise a SIGILL, that they will zero an aligned,
535  * contiguous block of memory of a sensible size. */
536 #if defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le)
find_ppc_dcbz_sz(VexArchInfo * arch_info)537 static void find_ppc_dcbz_sz(VexArchInfo *arch_info)
538 {
539    Int dcbz_szB = 0;
540    Int dcbzl_szB;
541 #  define MAX_DCBZL_SZB (128) /* largest known effect of dcbzl */
542    char test_block[4*MAX_DCBZL_SZB];
543    char *aligned = test_block;
544    Int i;
545 
546    /* round up to next max block size, assumes MAX_DCBZL_SZB is pof2 */
547    aligned = (char *)(((HWord)aligned + MAX_DCBZL_SZB) & ~(MAX_DCBZL_SZB - 1));
548    vg_assert((aligned + MAX_DCBZL_SZB) <= &test_block[sizeof(test_block)]);
549 
550    /* dcbz often clears 32B, although sometimes whatever the native cache
551     * block size is */
552    VG_(memset)(test_block, 0xff, sizeof(test_block));
553    __asm__ __volatile__("dcbz 0,%0"
554                         : /*out*/
555                         : "r" (aligned) /*in*/
556                         : "memory" /*clobber*/);
557    for (dcbz_szB = 0, i = 0; i < sizeof(test_block); ++i) {
558       if (!test_block[i])
559          ++dcbz_szB;
560    }
561    vg_assert(dcbz_szB == 16 || dcbz_szB == 32 || dcbz_szB == 64 || dcbz_szB == 128);
562 
563    /* dcbzl clears 128B on G5/PPC970, and usually 32B on other platforms */
564    if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
565       dcbzl_szB = 0; /* indicates unsupported */
566    }
567    else {
568       VG_(memset)(test_block, 0xff, sizeof(test_block));
569       /* some older assemblers won't understand the dcbzl instruction
570        * variant, so we directly emit the instruction ourselves */
571       __asm__ __volatile__("mr 9, %0 ; .long 0x7C204FEC" /*dcbzl 0,9*/
572                            : /*out*/
573                            : "r" (aligned) /*in*/
574                            : "memory", "r9" /*clobber*/);
575       for (dcbzl_szB = 0, i = 0; i < sizeof(test_block); ++i) {
576          if (!test_block[i])
577             ++dcbzl_szB;
578       }
579       vg_assert(dcbzl_szB == 16 || dcbzl_szB == 32 || dcbzl_szB == 64 || dcbzl_szB == 128);
580    }
581 
582    arch_info->ppc_dcbz_szB  = dcbz_szB;
583    arch_info->ppc_dcbzl_szB = dcbzl_szB;
584 
585    VG_(debugLog)(1, "machine", "dcbz_szB=%d dcbzl_szB=%d\n",
586                  dcbz_szB, dcbzl_szB);
587 #  undef MAX_DCBZL_SZB
588 }
589 #endif /* defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le) */
590 
591 #ifdef VGA_s390x
592 
593 /* Read /proc/cpuinfo. Look for lines like these
594 
595    processor 0: version = FF,  identification = 0117C9,  machine = 2064
596 
597    and return the machine model. If the machine model could not be determined
598    or it is an unknown model, return VEX_S390X_MODEL_UNKNOWN. */
599 
VG_(get_machine_model)600 static UInt VG_(get_machine_model)(void)
601 {
602    static struct model_map {
603       const HChar name[5];
604       UInt  id;
605    } model_map[] = {
606       { "2064", VEX_S390X_MODEL_Z900 },
607       { "2066", VEX_S390X_MODEL_Z800 },
608       { "2084", VEX_S390X_MODEL_Z990 },
609       { "2086", VEX_S390X_MODEL_Z890 },
610       { "2094", VEX_S390X_MODEL_Z9_EC },
611       { "2096", VEX_S390X_MODEL_Z9_BC },
612       { "2097", VEX_S390X_MODEL_Z10_EC },
613       { "2098", VEX_S390X_MODEL_Z10_BC },
614       { "2817", VEX_S390X_MODEL_Z196 },
615       { "2818", VEX_S390X_MODEL_Z114 },
616       { "2827", VEX_S390X_MODEL_ZEC12 },
617       { "2828", VEX_S390X_MODEL_ZBC12 },
618       { "2964", VEX_S390X_MODEL_Z13 },
619    };
620 
621    Int    model, n, fh;
622    SysRes fd;
623    SizeT  num_bytes, file_buf_size;
624    HChar *p, *m, *model_name, *file_buf;
625 
626    /* Slurp contents of /proc/cpuinfo into FILE_BUF */
627    fd = VG_(open)( "/proc/cpuinfo", 0, VKI_S_IRUSR );
628    if ( sr_isError(fd) ) return VEX_S390X_MODEL_UNKNOWN;
629 
630    fh  = sr_Res(fd);
631 
632    /* Determine the size of /proc/cpuinfo.
633       Work around broken-ness in /proc file system implementation.
634       fstat returns a zero size for /proc/cpuinfo although it is
635       claimed to be a regular file. */
636    num_bytes = 0;
637    file_buf_size = 1000;
638    file_buf = VG_(malloc)("cpuinfo", file_buf_size + 1);
639    while (42) {
640       n = VG_(read)(fh, file_buf, file_buf_size);
641       if (n < 0) break;
642 
643       num_bytes += n;
644       if (n < file_buf_size) break;  /* reached EOF */
645    }
646 
647    if (n < 0) num_bytes = 0;   /* read error; ignore contents */
648 
649    if (num_bytes > file_buf_size) {
650       VG_(free)( file_buf );
651       VG_(lseek)( fh, 0, VKI_SEEK_SET );
652       file_buf = VG_(malloc)( "cpuinfo", num_bytes + 1 );
653       n = VG_(read)( fh, file_buf, num_bytes );
654       if (n < 0) num_bytes = 0;
655    }
656 
657    file_buf[num_bytes] = '\0';
658    VG_(close)(fh);
659 
660    /* Parse file */
661    model = VEX_S390X_MODEL_UNKNOWN;
662    for (p = file_buf; *p; ++p) {
663       /* Beginning of line */
664      if (VG_(strncmp)( p, "processor", sizeof "processor" - 1 ) != 0) continue;
665 
666      m = VG_(strstr)( p, "machine" );
667      if (m == NULL) continue;
668 
669      p = m + sizeof "machine" - 1;
670      while ( VG_(isspace)( *p ) || *p == '=') {
671        if (*p == '\n') goto next_line;
672        ++p;
673      }
674 
675      model_name = p;
676      for (n = 0; n < sizeof model_map / sizeof model_map[0]; ++n) {
677        struct model_map *mm = model_map + n;
678        SizeT len = VG_(strlen)( mm->name );
679        if ( VG_(strncmp)( mm->name, model_name, len ) == 0 &&
680             VG_(isspace)( model_name[len] )) {
681          if (mm->id < model) model = mm->id;
682          p = model_name + len;
683          break;
684        }
685      }
686      /* Skip until end-of-line */
687      while (*p != '\n')
688        ++p;
689    next_line: ;
690    }
691 
692    VG_(free)( file_buf );
693    VG_(debugLog)(1, "machine", "model = %s\n",
694                  model == VEX_S390X_MODEL_UNKNOWN ? "UNKNOWN"
695                                                   : model_map[model].name);
696    return model;
697 }
698 
699 #endif /* VGA_s390x */
700 
701 #if defined(VGA_mips32) || defined(VGA_mips64)
702 
703 /* Read /proc/cpuinfo and return the machine model. */
VG_(get_machine_model)704 static UInt VG_(get_machine_model)(void)
705 {
706    const char *search_MIPS_str = "MIPS";
707    const char *search_Broadcom_str = "Broadcom";
708    const char *search_Netlogic_str = "Netlogic";
709    const char *search_Cavium_str= "Cavium";
710    Int    n, fh;
711    SysRes fd;
712    SizeT  num_bytes, file_buf_size;
713    HChar  *file_buf;
714 
715    /* Slurp contents of /proc/cpuinfo into FILE_BUF */
716    fd = VG_(open)( "/proc/cpuinfo", 0, VKI_S_IRUSR );
717    if ( sr_isError(fd) ) return -1;
718 
719    fh  = sr_Res(fd);
720 
721    /* Determine the size of /proc/cpuinfo.
722       Work around broken-ness in /proc file system implementation.
723       fstat returns a zero size for /proc/cpuinfo although it is
724       claimed to be a regular file. */
725    num_bytes = 0;
726    file_buf_size = 1000;
727    file_buf = VG_(malloc)("cpuinfo", file_buf_size + 1);
728    while (42) {
729       n = VG_(read)(fh, file_buf, file_buf_size);
730       if (n < 0) break;
731 
732       num_bytes += n;
733       if (n < file_buf_size) break;  /* reached EOF */
734    }
735 
736    if (n < 0) num_bytes = 0;   /* read error; ignore contents */
737 
738    if (num_bytes > file_buf_size) {
739       VG_(free)( file_buf );
740       VG_(lseek)( fh, 0, VKI_SEEK_SET );
741       file_buf = VG_(malloc)( "cpuinfo", num_bytes + 1 );
742       n = VG_(read)( fh, file_buf, num_bytes );
743       if (n < 0) num_bytes = 0;
744    }
745 
746    file_buf[num_bytes] = '\0';
747    VG_(close)(fh);
748 
749    /* Parse file */
750    if (VG_(strstr) (file_buf, search_Broadcom_str) != NULL)
751        return VEX_PRID_COMP_BROADCOM;
752    if (VG_(strstr) (file_buf, search_Netlogic_str) != NULL)
753        return VEX_PRID_COMP_NETLOGIC;
754    if (VG_(strstr)(file_buf, search_Cavium_str) != NULL)
755        return VEX_PRID_COMP_CAVIUM;
756    if (VG_(strstr) (file_buf, search_MIPS_str) != NULL)
757        return VEX_PRID_COMP_MIPS;
758 
759    /* Did not find string in the proc file. */
760    return -1;
761 }
762 
763 #endif
764 
765 /* Determine what insn set and insn set variant the host has, and
766    record it.  To be called once at system startup.  Returns False if
767    this a CPU incapable of running Valgrind.
768    Also determine information about the caches on this host. */
769 
VG_(machine_get_hwcaps)770 Bool VG_(machine_get_hwcaps)( void )
771 {
772    vg_assert(hwcaps_done == False);
773    hwcaps_done = True;
774 
775    // Whack default settings into vai, so that we only need to fill in
776    // any interesting bits.
777    LibVEX_default_VexArchInfo(&vai);
778 
779 #if defined(VGA_x86)
780    { Bool have_sse1, have_sse2, have_sse3, have_cx8, have_lzcnt, have_mmxext;
781      UInt eax, ebx, ecx, edx, max_extended;
782      HChar vstr[13];
783      vstr[0] = 0;
784 
785      if (!VG_(has_cpuid)())
786         /* we can't do cpuid at all.  Give up. */
787         return False;
788 
789      VG_(cpuid)(0, 0, &eax, &ebx, &ecx, &edx);
790      if (eax < 1)
791         /* we can't ask for cpuid(x) for x > 0.  Give up. */
792         return False;
793 
794      /* Get processor ID string, and max basic/extended index
795         values. */
796      VG_(memcpy)(&vstr[0], &ebx, 4);
797      VG_(memcpy)(&vstr[4], &edx, 4);
798      VG_(memcpy)(&vstr[8], &ecx, 4);
799      vstr[12] = 0;
800 
801      VG_(cpuid)(0x80000000, 0, &eax, &ebx, &ecx, &edx);
802      max_extended = eax;
803 
804      /* get capabilities bits into edx */
805      VG_(cpuid)(1, 0, &eax, &ebx, &ecx, &edx);
806 
807      have_sse1 = (edx & (1<<25)) != 0; /* True => have sse insns */
808      have_sse2 = (edx & (1<<26)) != 0; /* True => have sse2 insns */
809      have_sse3 = (ecx & (1<<0)) != 0;  /* True => have sse3 insns */
810 
811      /* cmpxchg8b is a minimum requirement now; if we don't have it we
812         must simply give up.  But all CPUs since Pentium-I have it, so
813         that doesn't seem like much of a restriction. */
814      have_cx8 = (edx & (1<<8)) != 0; /* True => have cmpxchg8b */
815      if (!have_cx8)
816         return False;
817 
818      /* Figure out if this is an AMD that can do MMXEXT. */
819      have_mmxext = False;
820      if (0 == VG_(strcmp)(vstr, "AuthenticAMD")
821          && max_extended >= 0x80000001) {
822         VG_(cpuid)(0x80000001, 0, &eax, &ebx, &ecx, &edx);
823         /* Some older AMD processors support a sse1 subset (Integer SSE). */
824         have_mmxext = !have_sse1 && ((edx & (1<<22)) != 0);
825      }
826 
827      /* Figure out if this is an AMD or Intel that can do LZCNT. */
828      have_lzcnt = False;
829      if ((0 == VG_(strcmp)(vstr, "AuthenticAMD")
830           || 0 == VG_(strcmp)(vstr, "GenuineIntel"))
831          && max_extended >= 0x80000001) {
832         VG_(cpuid)(0x80000001, 0, &eax, &ebx, &ecx, &edx);
833         have_lzcnt = (ecx & (1<<5)) != 0; /* True => have LZCNT */
834      }
835 
836      /* Intel processors don't define the mmxext extension, but since it
837         is just a sse1 subset always define it when we have sse1. */
838      if (have_sse1)
839         have_mmxext = True;
840 
841      va = VexArchX86;
842      vai.endness = VexEndnessLE;
843 
844      if (have_sse3 && have_sse2 && have_sse1 && have_mmxext) {
845         vai.hwcaps  = VEX_HWCAPS_X86_MMXEXT;
846         vai.hwcaps |= VEX_HWCAPS_X86_SSE1;
847         vai.hwcaps |= VEX_HWCAPS_X86_SSE2;
848         vai.hwcaps |= VEX_HWCAPS_X86_SSE3;
849         if (have_lzcnt)
850            vai.hwcaps |= VEX_HWCAPS_X86_LZCNT;
851         VG_(machine_x86_have_mxcsr) = 1;
852      } else if (have_sse2 && have_sse1 && have_mmxext) {
853         vai.hwcaps  = VEX_HWCAPS_X86_MMXEXT;
854         vai.hwcaps |= VEX_HWCAPS_X86_SSE1;
855         vai.hwcaps |= VEX_HWCAPS_X86_SSE2;
856         if (have_lzcnt)
857            vai.hwcaps |= VEX_HWCAPS_X86_LZCNT;
858         VG_(machine_x86_have_mxcsr) = 1;
859      } else if (have_sse1 && have_mmxext) {
860         vai.hwcaps  = VEX_HWCAPS_X86_MMXEXT;
861         vai.hwcaps |= VEX_HWCAPS_X86_SSE1;
862         VG_(machine_x86_have_mxcsr) = 1;
863      } else if (have_mmxext) {
864         vai.hwcaps  = VEX_HWCAPS_X86_MMXEXT; /*integer only sse1 subset*/
865         VG_(machine_x86_have_mxcsr) = 0;
866      } else {
867        vai.hwcaps = 0; /*baseline - no sse at all*/
868        VG_(machine_x86_have_mxcsr) = 0;
869      }
870 
871      VG_(machine_get_cache_info)(&vai);
872 
873      return True;
874    }
875 
876 #elif defined(VGA_amd64)
877    { Bool have_sse3, have_cx8, have_cx16;
878      Bool have_lzcnt, have_avx, have_bmi, have_avx2;
879      Bool have_rdtscp;
880      UInt eax, ebx, ecx, edx, max_basic, max_extended;
881      ULong xgetbv_0 = 0;
882      HChar vstr[13];
883      vstr[0] = 0;
884 
885      if (!VG_(has_cpuid)())
886         /* we can't do cpuid at all.  Give up. */
887         return False;
888 
889      VG_(cpuid)(0, 0, &eax, &ebx, &ecx, &edx);
890      max_basic = eax;
891      if (max_basic < 1)
892         /* we can't ask for cpuid(x) for x > 0.  Give up. */
893         return False;
894 
895      /* Get processor ID string, and max basic/extended index
896         values. */
897      VG_(memcpy)(&vstr[0], &ebx, 4);
898      VG_(memcpy)(&vstr[4], &edx, 4);
899      VG_(memcpy)(&vstr[8], &ecx, 4);
900      vstr[12] = 0;
901 
902      VG_(cpuid)(0x80000000, 0, &eax, &ebx, &ecx, &edx);
903      max_extended = eax;
904 
905      /* get capabilities bits into edx */
906      VG_(cpuid)(1, 0, &eax, &ebx, &ecx, &edx);
907 
908      // we assume that SSE1 and SSE2 are available by default
909      have_sse3 = (ecx & (1<<0)) != 0;  /* True => have sse3 insns */
910      // ssse3   is ecx:9
911      // sse41   is ecx:19
912      // sse42   is ecx:20
913 
914      // xsave   is ecx:26
915      // osxsave is ecx:27
916      // avx     is ecx:28
917      // fma     is ecx:12
918      have_avx = False;
919      /* have_fma = False; */
920      if ( (ecx & ((1<<28)|(1<<27)|(1<<26))) == ((1<<28)|(1<<27)|(1<<26)) ) {
921         /* Processor supports AVX instructions and XGETBV is enabled
922            by OS and AVX instructions are enabled by the OS. */
923         ULong w;
924         __asm__ __volatile__("movq $0,%%rcx ; "
925                              ".byte 0x0F,0x01,0xD0 ; " /* xgetbv */
926                              "movq %%rax,%0"
927                              :/*OUT*/"=r"(w) :/*IN*/
928                              :/*TRASH*/"rdx","rcx","rax");
929         xgetbv_0 = w;
930         if ((xgetbv_0 & 7) == 7) {
931            /* Only say we have AVX if the XSAVE-allowable
932               bitfield-mask allows x87, SSE and AVX state.  We could
933               actually run with a more restrictive XGETBV(0) value,
934               but VEX's implementation of XSAVE and XRSTOR assumes
935               that all 3 bits are enabled.
936 
937               Also, the VEX implementation of XSAVE/XRSTOR assumes that
938               state component [2] (the YMM high halves) are located in
939               the XSAVE image at offsets 576 .. 831.  So we have to
940               check that here before declaring AVX to be supported. */
941            UInt eax2, ebx2, ecx2, edx2;
942            VG_(cpuid)(0xD, 2, &eax2, &ebx2, &ecx2, &edx2);
943            if (ebx2 == 576 && eax2 == 256) {
944               have_avx = True;
945            }
946            /* have_fma = (ecx & (1<<12)) != 0; */
947            /* have_fma: Probably correct, but gcc complains due to
948               unusedness. */
949         }
950      }
951 
952      /* cmpxchg8b is a minimum requirement now; if we don't have it we
953         must simply give up.  But all CPUs since Pentium-I have it, so
954         that doesn't seem like much of a restriction. */
955      have_cx8 = (edx & (1<<8)) != 0; /* True => have cmpxchg8b */
956      if (!have_cx8)
957         return False;
958 
959      /* on amd64 we tolerate older cpus, which don't have cmpxchg16b */
960      have_cx16 = (ecx & (1<<13)) != 0; /* True => have cmpxchg16b */
961 
962      /* Figure out if this CPU can do LZCNT. */
963      have_lzcnt = False;
964      if (max_extended >= 0x80000001) {
965         VG_(cpuid)(0x80000001, 0, &eax, &ebx, &ecx, &edx);
966         have_lzcnt = (ecx & (1<<5)) != 0; /* True => have LZCNT */
967      }
968 
969      /* Can we do RDTSCP? */
970      have_rdtscp = False;
971      if (max_extended >= 0x80000001) {
972         VG_(cpuid)(0x80000001, 0, &eax, &ebx, &ecx, &edx);
973         have_rdtscp = (edx & (1<<27)) != 0; /* True => have RDTSVCP */
974      }
975 
976      /* Check for BMI1 and AVX2.  If we have AVX1 (plus OS support). */
977      have_bmi  = False;
978      have_avx2 = False;
979      if (have_avx && max_basic >= 7) {
980         VG_(cpuid)(7, 0, &eax, &ebx, &ecx, &edx);
981         have_bmi  = (ebx & (1<<3)) != 0; /* True => have BMI1 */
982         have_avx2 = (ebx & (1<<5)) != 0; /* True => have AVX2 */
983      }
984 
985      va          = VexArchAMD64;
986      vai.endness = VexEndnessLE;
987      vai.hwcaps  = (have_sse3   ? VEX_HWCAPS_AMD64_SSE3   : 0)
988                  | (have_cx16   ? VEX_HWCAPS_AMD64_CX16   : 0)
989                  | (have_lzcnt  ? VEX_HWCAPS_AMD64_LZCNT  : 0)
990                  | (have_avx    ? VEX_HWCAPS_AMD64_AVX    : 0)
991                  | (have_bmi    ? VEX_HWCAPS_AMD64_BMI    : 0)
992                  | (have_avx2   ? VEX_HWCAPS_AMD64_AVX2   : 0)
993                  | (have_rdtscp ? VEX_HWCAPS_AMD64_RDTSCP : 0);
994 
995      VG_(machine_get_cache_info)(&vai);
996 
997      return True;
998    }
999 
1000 #elif defined(VGA_ppc32)
1001    {
1002      /* Find out which subset of the ppc32 instruction set is supported by
1003         verifying whether various ppc32 instructions generate a SIGILL
1004         or a SIGFPE. An alternative approach is to check the AT_HWCAP and
1005         AT_PLATFORM entries in the ELF auxiliary table -- see also
1006         the_iifii.client_auxv in m_main.c.
1007       */
1008      vki_sigset_t          saved_set, tmp_set;
1009      vki_sigaction_fromK_t saved_sigill_act, saved_sigfpe_act;
1010      vki_sigaction_toK_t     tmp_sigill_act,   tmp_sigfpe_act;
1011 
1012      volatile Bool have_F, have_V, have_FX, have_GX, have_VX, have_DFP;
1013      volatile Bool have_isa_2_07;
1014      Int r;
1015 
1016      /* This is a kludge.  Really we ought to back-convert saved_act
1017         into a toK_t using VG_(convert_sigaction_fromK_to_toK), but
1018         since that's a no-op on all ppc32 platforms so far supported,
1019         it's not worth the typing effort.  At least include most basic
1020         sanity check: */
1021      vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t));
1022 
1023      VG_(sigemptyset)(&tmp_set);
1024      VG_(sigaddset)(&tmp_set, VKI_SIGILL);
1025      VG_(sigaddset)(&tmp_set, VKI_SIGFPE);
1026 
1027      r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
1028      vg_assert(r == 0);
1029 
1030      r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
1031      vg_assert(r == 0);
1032      tmp_sigill_act = saved_sigill_act;
1033 
1034      r = VG_(sigaction)(VKI_SIGFPE, NULL, &saved_sigfpe_act);
1035      vg_assert(r == 0);
1036      tmp_sigfpe_act = saved_sigfpe_act;
1037 
1038      /* NODEFER: signal handler does not return (from the kernel's point of
1039         view), hence if it is to successfully catch a signal more than once,
1040         we need the NODEFER flag. */
1041      tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
1042      tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
1043      tmp_sigill_act.sa_flags |=  VKI_SA_NODEFER;
1044      tmp_sigill_act.ksa_handler = handler_unsup_insn;
1045      r = VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
1046      vg_assert(r == 0);
1047 
1048      tmp_sigfpe_act.sa_flags &= ~VKI_SA_RESETHAND;
1049      tmp_sigfpe_act.sa_flags &= ~VKI_SA_SIGINFO;
1050      tmp_sigfpe_act.sa_flags |=  VKI_SA_NODEFER;
1051      tmp_sigfpe_act.ksa_handler = handler_unsup_insn;
1052      r = VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL);
1053      vg_assert(r == 0);
1054 
1055      /* standard FP insns */
1056      have_F = True;
1057      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1058         have_F = False;
1059      } else {
1060         __asm__ __volatile__(".long 0xFC000090"); /*fmr 0,0 */
1061      }
1062 
1063      /* Altivec insns */
1064      have_V = True;
1065      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1066         have_V = False;
1067      } else {
1068         /* Unfortunately some older assemblers don't speak Altivec (or
1069            choose not to), so to be safe we directly emit the 32-bit
1070            word corresponding to "vor 0,0,0".  This fixes a build
1071            problem that happens on Debian 3.1 (ppc32), and probably
1072            various other places. */
1073         __asm__ __volatile__(".long 0x10000484"); /*vor 0,0,0*/
1074      }
1075 
1076      /* General-Purpose optional (fsqrt, fsqrts) */
1077      have_FX = True;
1078      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1079         have_FX = False;
1080      } else {
1081         __asm__ __volatile__(".long 0xFC00002C"); /*fsqrt 0,0 */
1082      }
1083 
1084      /* Graphics optional (stfiwx, fres, frsqrte, fsel) */
1085      have_GX = True;
1086      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1087         have_GX = False;
1088      } else {
1089         __asm__ __volatile__(".long 0xFC000034"); /* frsqrte 0,0 */
1090      }
1091 
1092      /* VSX support implies Power ISA 2.06 */
1093      have_VX = True;
1094      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1095         have_VX = False;
1096      } else {
1097         __asm__ __volatile__(".long 0xf0000564"); /* xsabsdp XT,XB */
1098      }
1099 
1100      /* Check for Decimal Floating Point (DFP) support. */
1101      have_DFP = True;
1102      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1103         have_DFP = False;
1104      } else {
1105         __asm__ __volatile__(".long 0xee4e8005"); /* dadd  FRT,FRA, FRB */
1106      }
1107 
1108      /* Check for ISA 2.07 support. */
1109      have_isa_2_07 = True;
1110      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1111         have_isa_2_07 = False;
1112      } else {
1113         __asm__ __volatile__(".long 0x7c000166"); /* mtvsrd XT,RA */
1114      }
1115 
1116      /* determine dcbz/dcbzl sizes while we still have the signal
1117       * handlers registered */
1118      find_ppc_dcbz_sz(&vai);
1119 
1120      r = VG_(sigaction)(VKI_SIGILL, &saved_sigill_act, NULL);
1121      vg_assert(r == 0);
1122      r = VG_(sigaction)(VKI_SIGFPE, &saved_sigfpe_act, NULL);
1123      vg_assert(r == 0);
1124      r = VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
1125      vg_assert(r == 0);
1126      VG_(debugLog)(1, "machine", "F %d V %d FX %d GX %d VX %d DFP %d ISA2.07 %d\n",
1127                     (Int)have_F, (Int)have_V, (Int)have_FX,
1128                     (Int)have_GX, (Int)have_VX, (Int)have_DFP,
1129                     (Int)have_isa_2_07);
1130      /* Make FP a prerequisite for VMX (bogusly so), and for FX and GX. */
1131      if (have_V && !have_F)
1132         have_V = False;
1133      if (have_FX && !have_F)
1134         have_FX = False;
1135      if (have_GX && !have_F)
1136         have_GX = False;
1137 
1138      VG_(machine_ppc32_has_FP)  = have_F ? 1 : 0;
1139      VG_(machine_ppc32_has_VMX) = have_V ? 1 : 0;
1140 
1141      va = VexArchPPC32;
1142      vai.endness = VexEndnessBE;
1143 
1144      vai.hwcaps = 0;
1145      if (have_F)  vai.hwcaps |= VEX_HWCAPS_PPC32_F;
1146      if (have_V)  vai.hwcaps |= VEX_HWCAPS_PPC32_V;
1147      if (have_FX) vai.hwcaps |= VEX_HWCAPS_PPC32_FX;
1148      if (have_GX) vai.hwcaps |= VEX_HWCAPS_PPC32_GX;
1149      if (have_VX) vai.hwcaps |= VEX_HWCAPS_PPC32_VX;
1150      if (have_DFP) vai.hwcaps |= VEX_HWCAPS_PPC32_DFP;
1151      if (have_isa_2_07) vai.hwcaps |= VEX_HWCAPS_PPC32_ISA2_07;
1152 
1153      VG_(machine_get_cache_info)(&vai);
1154 
1155      /* But we're not done yet: VG_(machine_ppc32_set_clszB) must be
1156         called before we're ready to go. */
1157      return True;
1158    }
1159 
1160 #elif defined(VGA_ppc64be)|| defined(VGA_ppc64le)
1161    {
1162      /* Same instruction set detection algorithm as for ppc32. */
1163      vki_sigset_t          saved_set, tmp_set;
1164      vki_sigaction_fromK_t saved_sigill_act, saved_sigfpe_act;
1165      vki_sigaction_toK_t     tmp_sigill_act,   tmp_sigfpe_act;
1166 
1167      volatile Bool have_F, have_V, have_FX, have_GX, have_VX, have_DFP;
1168      volatile Bool have_isa_2_07;
1169      Int r;
1170 
1171      /* This is a kludge.  Really we ought to back-convert saved_act
1172         into a toK_t using VG_(convert_sigaction_fromK_to_toK), but
1173         since that's a no-op on all ppc64 platforms so far supported,
1174         it's not worth the typing effort.  At least include most basic
1175         sanity check: */
1176      vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t));
1177 
1178      VG_(sigemptyset)(&tmp_set);
1179      VG_(sigaddset)(&tmp_set, VKI_SIGILL);
1180      VG_(sigaddset)(&tmp_set, VKI_SIGFPE);
1181 
1182      r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
1183      vg_assert(r == 0);
1184 
1185      r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
1186      vg_assert(r == 0);
1187      tmp_sigill_act = saved_sigill_act;
1188 
1189      VG_(sigaction)(VKI_SIGFPE, NULL, &saved_sigfpe_act);
1190      tmp_sigfpe_act = saved_sigfpe_act;
1191 
1192      /* NODEFER: signal handler does not return (from the kernel's point of
1193         view), hence if it is to successfully catch a signal more than once,
1194         we need the NODEFER flag. */
1195      tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
1196      tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
1197      tmp_sigill_act.sa_flags |=  VKI_SA_NODEFER;
1198      tmp_sigill_act.ksa_handler = handler_unsup_insn;
1199      VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
1200 
1201      tmp_sigfpe_act.sa_flags &= ~VKI_SA_RESETHAND;
1202      tmp_sigfpe_act.sa_flags &= ~VKI_SA_SIGINFO;
1203      tmp_sigfpe_act.sa_flags |=  VKI_SA_NODEFER;
1204      tmp_sigfpe_act.ksa_handler = handler_unsup_insn;
1205      VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL);
1206 
1207      /* standard FP insns */
1208      have_F = True;
1209      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1210         have_F = False;
1211      } else {
1212         __asm__ __volatile__("fmr 0,0");
1213      }
1214 
1215      /* Altivec insns */
1216      have_V = True;
1217      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1218         have_V = False;
1219      } else {
1220         __asm__ __volatile__(".long 0x10000484"); /*vor 0,0,0*/
1221      }
1222 
1223      /* General-Purpose optional (fsqrt, fsqrts) */
1224      have_FX = True;
1225      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1226         have_FX = False;
1227      } else {
1228         __asm__ __volatile__(".long 0xFC00002C"); /*fsqrt 0,0*/
1229      }
1230 
1231      /* Graphics optional (stfiwx, fres, frsqrte, fsel) */
1232      have_GX = True;
1233      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1234         have_GX = False;
1235      } else {
1236         __asm__ __volatile__(".long 0xFC000034"); /*frsqrte 0,0*/
1237      }
1238 
1239      /* VSX support implies Power ISA 2.06 */
1240      have_VX = True;
1241      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1242         have_VX = False;
1243      } else {
1244         __asm__ __volatile__(".long 0xf0000564"); /* xsabsdp XT,XB */
1245      }
1246 
1247      /* Check for Decimal Floating Point (DFP) support. */
1248      have_DFP = True;
1249      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1250         have_DFP = False;
1251      } else {
1252         __asm__ __volatile__(".long 0xee4e8005"); /* dadd  FRT,FRA, FRB */
1253      }
1254 
1255      /* Check for ISA 2.07 support. */
1256      have_isa_2_07 = True;
1257      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1258         have_isa_2_07 = False;
1259      } else {
1260         __asm__ __volatile__(".long 0x7c000166"); /* mtvsrd XT,RA */
1261      }
1262 
1263      /* determine dcbz/dcbzl sizes while we still have the signal
1264       * handlers registered */
1265      find_ppc_dcbz_sz(&vai);
1266 
1267      VG_(sigaction)(VKI_SIGILL, &saved_sigill_act, NULL);
1268      VG_(sigaction)(VKI_SIGFPE, &saved_sigfpe_act, NULL);
1269      VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
1270      VG_(debugLog)(1, "machine", "F %d V %d FX %d GX %d VX %d DFP %d ISA2.07 %d\n",
1271                     (Int)have_F, (Int)have_V, (Int)have_FX,
1272                     (Int)have_GX, (Int)have_VX, (Int)have_DFP,
1273                     (Int)have_isa_2_07);
1274      /* on ppc64be, if we don't even have FP, just give up. */
1275      if (!have_F)
1276         return False;
1277 
1278      VG_(machine_ppc64_has_VMX) = have_V ? 1 : 0;
1279 
1280      va = VexArchPPC64;
1281 #    if defined(VKI_LITTLE_ENDIAN)
1282      vai.endness = VexEndnessLE;
1283 #    elif defined(VKI_BIG_ENDIAN)
1284      vai.endness = VexEndnessBE;
1285 #    else
1286      vai.endness = VexEndness_INVALID;
1287 #    endif
1288 
1289      vai.hwcaps = 0;
1290      if (have_V)  vai.hwcaps |= VEX_HWCAPS_PPC64_V;
1291      if (have_FX) vai.hwcaps |= VEX_HWCAPS_PPC64_FX;
1292      if (have_GX) vai.hwcaps |= VEX_HWCAPS_PPC64_GX;
1293      if (have_VX) vai.hwcaps |= VEX_HWCAPS_PPC64_VX;
1294      if (have_DFP) vai.hwcaps |= VEX_HWCAPS_PPC64_DFP;
1295      if (have_isa_2_07) vai.hwcaps |= VEX_HWCAPS_PPC64_ISA2_07;
1296 
1297      VG_(machine_get_cache_info)(&vai);
1298 
1299      /* But we're not done yet: VG_(machine_ppc64_set_clszB) must be
1300         called before we're ready to go. */
1301      return True;
1302    }
1303 
1304 #elif defined(VGA_s390x)
1305 
1306 #  include "libvex_s390x_common.h"
1307 
1308    {
1309      /* Instruction set detection code borrowed from ppc above. */
1310      vki_sigset_t          saved_set, tmp_set;
1311      vki_sigaction_fromK_t saved_sigill_act;
1312      vki_sigaction_toK_t     tmp_sigill_act;
1313 
1314      volatile Bool have_LDISP, have_STFLE;
1315      Int i, r, model;
1316 
1317      /* If the model is "unknown" don't treat this as an error. Assume
1318         this is a brand-new machine model for which we don't have the
1319         identification yet. Keeping fingers crossed. */
1320      model = VG_(get_machine_model)();
1321 
1322      /* Unblock SIGILL and stash away the old action for that signal */
1323      VG_(sigemptyset)(&tmp_set);
1324      VG_(sigaddset)(&tmp_set, VKI_SIGILL);
1325 
1326      r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
1327      vg_assert(r == 0);
1328 
1329      r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
1330      vg_assert(r == 0);
1331      tmp_sigill_act = saved_sigill_act;
1332 
1333      /* NODEFER: signal handler does not return (from the kernel's point of
1334         view), hence if it is to successfully catch a signal more than once,
1335         we need the NODEFER flag. */
1336      tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
1337      tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
1338      tmp_sigill_act.sa_flags |=  VKI_SA_NODEFER;
1339      tmp_sigill_act.ksa_handler = handler_unsup_insn;
1340      VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
1341 
1342      /* Determine hwcaps. Note, we cannot use the stfle insn because it
1343         is not supported on z900. */
1344 
1345      have_LDISP = True;
1346      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1347         have_LDISP = False;
1348      } else {
1349        /* BASR loads the address of the next insn into r1. Needed to avoid
1350           a segfault in XY. */
1351         __asm__ __volatile__("basr %%r1,%%r0\n\t"
1352                              ".long  0xe3001000\n\t"  /* XY  0,0(%r1) */
1353                              ".short 0x0057" : : : "r0", "r1", "cc", "memory");
1354      }
1355 
1356      /* Check availability of STFLE. If available store facility bits
1357         in hoststfle. */
1358      ULong hoststfle[S390_NUM_FACILITY_DW];
1359 
1360      for (i = 0; i < S390_NUM_FACILITY_DW; ++i)
1361         hoststfle[i] = 0;
1362 
1363      have_STFLE = True;
1364      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1365         have_STFLE = False;
1366      } else {
1367          register ULong reg0 asm("0") = S390_NUM_FACILITY_DW - 1;
1368 
1369          __asm__ __volatile__(" .insn s,0xb2b00000,%0\n"   /* stfle */
1370                               : "=m" (hoststfle), "+d"(reg0)
1371                               : : "cc", "memory");
1372      }
1373 
1374      /* Restore signals */
1375      r = VG_(sigaction)(VKI_SIGILL, &saved_sigill_act, NULL);
1376      vg_assert(r == 0);
1377      r = VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
1378      vg_assert(r == 0);
1379      va = VexArchS390X;
1380      vai.endness = VexEndnessBE;
1381 
1382      vai.hwcaps = model;
1383      if (have_STFLE) vai.hwcaps |= VEX_HWCAPS_S390X_STFLE;
1384      if (have_LDISP) {
1385         /* Use long displacement only on machines >= z990. For all other
1386            machines it is millicoded and therefore slow. */
1387         if (model >= VEX_S390X_MODEL_Z990)
1388            vai.hwcaps |= VEX_HWCAPS_S390X_LDISP;
1389      }
1390 
1391      /* Detect presence of certain facilities using the STFLE insn.
1392         Note, that these facilities were introduced at the same time or later
1393         as STFLE, so the absence of STLFE implies the absence of the facility
1394         we're trying to detect. */
1395      struct fac_hwcaps_map {
1396         UInt installed;
1397         UInt facility_bit;
1398         UInt hwcaps_bit;
1399         const HChar name[6];   // may need adjustment for new facility names
1400      } fac_hwcaps[] = {
1401         { False, S390_FAC_EIMM,  VEX_HWCAPS_S390X_EIMM,  "EIMM"  },
1402         { False, S390_FAC_GIE,   VEX_HWCAPS_S390X_GIE,   "GIE"   },
1403         { False, S390_FAC_DFP,   VEX_HWCAPS_S390X_DFP,   "DFP"   },
1404         { False, S390_FAC_FPSE,  VEX_HWCAPS_S390X_FGX,   "FGX"   },
1405         { False, S390_FAC_ETF2,  VEX_HWCAPS_S390X_ETF2,  "ETF2"  },
1406         { False, S390_FAC_ETF3,  VEX_HWCAPS_S390X_ETF3,  "ETF3"  },
1407         { False, S390_FAC_STCKF, VEX_HWCAPS_S390X_STCKF, "STCKF" },
1408         { False, S390_FAC_FPEXT, VEX_HWCAPS_S390X_FPEXT, "FPEXT" },
1409         { False, S390_FAC_LSC,   VEX_HWCAPS_S390X_LSC,   "LSC"   },
1410         { False, S390_FAC_PFPO,  VEX_HWCAPS_S390X_PFPO,  "PFPO"  },
1411      };
1412 
1413      /* Set hwcaps according to the detected facilities */
1414      for (i=0; i < sizeof fac_hwcaps / sizeof fac_hwcaps[0]; ++i) {
1415         vg_assert(fac_hwcaps[i].facility_bit <= 63);  // for now
1416         if (hoststfle[0] & (1ULL << (63 - fac_hwcaps[i].facility_bit))) {
1417            fac_hwcaps[i].installed = True;
1418            vai.hwcaps |= fac_hwcaps[i].hwcaps_bit;
1419         }
1420      }
1421 
1422      /* Build up a string showing the probed-for facilities */
1423      HChar fac_str[(sizeof fac_hwcaps / sizeof fac_hwcaps[0]) *
1424                    (sizeof fac_hwcaps[0].name + 3) + //  %s %d
1425                    7 + 1 + 4 + 2  // machine %4d
1426                    + 1];  // \0
1427      HChar *p = fac_str;
1428      p += VG_(sprintf)(p, "machine %4d  ", model);
1429      for (i=0; i < sizeof fac_hwcaps / sizeof fac_hwcaps[0]; ++i) {
1430         p += VG_(sprintf)(p, " %s %1u", fac_hwcaps[i].name,
1431                           fac_hwcaps[i].installed);
1432      }
1433      *p++ = '\0';
1434 
1435      VG_(debugLog)(1, "machine", "%s\n", fac_str);
1436      VG_(debugLog)(1, "machine", "hwcaps = 0x%x\n", vai.hwcaps);
1437 
1438      VG_(machine_get_cache_info)(&vai);
1439 
1440      return True;
1441    }
1442 
1443 #elif defined(VGA_arm)
1444    {
1445      /* Same instruction set detection algorithm as for ppc32. */
1446      vki_sigset_t          saved_set, tmp_set;
1447      vki_sigaction_fromK_t saved_sigill_act, saved_sigfpe_act;
1448      vki_sigaction_toK_t     tmp_sigill_act,   tmp_sigfpe_act;
1449 
1450      volatile Bool have_VFP, have_VFP2, have_VFP3, have_NEON;
1451      volatile Int archlevel;
1452      Int r;
1453 
1454      /* This is a kludge.  Really we ought to back-convert saved_act
1455         into a toK_t using VG_(convert_sigaction_fromK_to_toK), but
1456         since that's a no-op on all ppc64 platforms so far supported,
1457         it's not worth the typing effort.  At least include most basic
1458         sanity check: */
1459      vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t));
1460 
1461      VG_(sigemptyset)(&tmp_set);
1462      VG_(sigaddset)(&tmp_set, VKI_SIGILL);
1463      VG_(sigaddset)(&tmp_set, VKI_SIGFPE);
1464 
1465      r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
1466      vg_assert(r == 0);
1467 
1468      r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
1469      vg_assert(r == 0);
1470      tmp_sigill_act = saved_sigill_act;
1471 
1472      VG_(sigaction)(VKI_SIGFPE, NULL, &saved_sigfpe_act);
1473      tmp_sigfpe_act = saved_sigfpe_act;
1474 
1475      /* NODEFER: signal handler does not return (from the kernel's point of
1476         view), hence if it is to successfully catch a signal more than once,
1477         we need the NODEFER flag. */
1478      tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
1479      tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
1480      tmp_sigill_act.sa_flags |=  VKI_SA_NODEFER;
1481      tmp_sigill_act.ksa_handler = handler_unsup_insn;
1482      VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
1483 
1484      tmp_sigfpe_act.sa_flags &= ~VKI_SA_RESETHAND;
1485      tmp_sigfpe_act.sa_flags &= ~VKI_SA_SIGINFO;
1486      tmp_sigfpe_act.sa_flags |=  VKI_SA_NODEFER;
1487      tmp_sigfpe_act.ksa_handler = handler_unsup_insn;
1488      VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL);
1489 
1490      /* VFP insns */
1491      have_VFP = True;
1492      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1493         have_VFP = False;
1494      } else {
1495         __asm__ __volatile__(".word 0xEEB02B42"); /* VMOV.F64 d2, d2 */
1496      }
1497      /* There are several generation of VFP extension but they differs very
1498         little so for now we will not distinguish them. */
1499      have_VFP2 = have_VFP;
1500      have_VFP3 = have_VFP;
1501 
1502      /* NEON insns */
1503      have_NEON = True;
1504      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1505         have_NEON = False;
1506      } else {
1507         __asm__ __volatile__(".word 0xF2244154"); /* VMOV q2, q2 */
1508      }
1509 
1510      /* ARM architecture level */
1511      archlevel = 5; /* v5 will be base level */
1512      if (archlevel < 7) {
1513         archlevel = 7;
1514         if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1515            archlevel = 5;
1516         } else {
1517            __asm__ __volatile__(".word 0xF45FF000"); /* PLI [PC,#-0] */
1518         }
1519      }
1520      if (archlevel < 6) {
1521         archlevel = 6;
1522         if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1523            archlevel = 5;
1524         } else {
1525            __asm__ __volatile__(".word 0xE6822012"); /* PKHBT r2, r2, r2 */
1526         }
1527      }
1528 
1529      VG_(convert_sigaction_fromK_to_toK)(&saved_sigill_act, &tmp_sigill_act);
1530      VG_(convert_sigaction_fromK_to_toK)(&saved_sigfpe_act, &tmp_sigfpe_act);
1531      VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
1532      VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL);
1533      VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
1534 
1535      VG_(debugLog)(1, "machine", "ARMv%d VFP %d VFP2 %d VFP3 %d NEON %d\n",
1536            archlevel, (Int)have_VFP, (Int)have_VFP2, (Int)have_VFP3,
1537            (Int)have_NEON);
1538 
1539      VG_(machine_arm_archlevel) = archlevel;
1540 
1541      va = VexArchARM;
1542      vai.endness = VexEndnessLE;
1543 
1544      vai.hwcaps = VEX_ARM_ARCHLEVEL(archlevel);
1545      if (have_VFP3) vai.hwcaps |= VEX_HWCAPS_ARM_VFP3;
1546      if (have_VFP2) vai.hwcaps |= VEX_HWCAPS_ARM_VFP2;
1547      if (have_VFP)  vai.hwcaps |= VEX_HWCAPS_ARM_VFP;
1548      if (have_NEON) vai.hwcaps |= VEX_HWCAPS_ARM_NEON;
1549 
1550      VG_(machine_get_cache_info)(&vai);
1551 
1552      return True;
1553    }
1554 
1555 #elif defined(VGA_arm64)
1556    {
1557      va = VexArchARM64;
1558      vai.endness = VexEndnessLE;
1559 
1560      /* So far there are no variants. */
1561      vai.hwcaps = 0;
1562 
1563      VG_(machine_get_cache_info)(&vai);
1564 
1565      /* 0 denotes 'not set'.  The range of legitimate values here,
1566         after being set that is, is 2 though 17 inclusive. */
1567      vg_assert(vai.arm64_dMinLine_lg2_szB == 0);
1568      vg_assert(vai.arm64_iMinLine_lg2_szB == 0);
1569      ULong ctr_el0;
1570      __asm__ __volatile__("mrs %0, ctr_el0" : "=r"(ctr_el0));
1571      vai.arm64_dMinLine_lg2_szB = ((ctr_el0 >> 16) & 0xF) + 2;
1572      vai.arm64_iMinLine_lg2_szB = ((ctr_el0 >>  0) & 0xF) + 2;
1573      VG_(debugLog)(1, "machine", "ARM64: ctr_el0.dMinLine_szB = %d, "
1574                       "ctr_el0.iMinLine_szB = %d\n",
1575                    1 << vai.arm64_dMinLine_lg2_szB,
1576                    1 << vai.arm64_iMinLine_lg2_szB);
1577 
1578      return True;
1579    }
1580 
1581 #elif defined(VGA_mips32)
1582    {
1583      /* Define the position of F64 bit in FIR register. */
1584 #    define FP64 22
1585      va = VexArchMIPS32;
1586      UInt model = VG_(get_machine_model)();
1587      if (model == -1)
1588          return False;
1589 
1590      vai.hwcaps = model;
1591 
1592 #    if defined(VKI_LITTLE_ENDIAN)
1593      vai.endness = VexEndnessLE;
1594 #    elif defined(VKI_BIG_ENDIAN)
1595      vai.endness = VexEndnessBE;
1596 #    else
1597      vai.endness = VexEndness_INVALID;
1598 #    endif
1599 
1600      /* Same instruction set detection algorithm as for ppc32/arm... */
1601      vki_sigset_t          saved_set, tmp_set;
1602      vki_sigaction_fromK_t saved_sigill_act;
1603      vki_sigaction_toK_t   tmp_sigill_act;
1604 
1605      volatile Bool have_DSP, have_DSPr2;
1606      Int r;
1607 
1608      vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t));
1609 
1610      VG_(sigemptyset)(&tmp_set);
1611      VG_(sigaddset)(&tmp_set, VKI_SIGILL);
1612 
1613      r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
1614      vg_assert(r == 0);
1615 
1616      r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
1617      vg_assert(r == 0);
1618      tmp_sigill_act = saved_sigill_act;
1619 
1620      /* NODEFER: signal handler does not return (from the kernel's point of
1621         view), hence if it is to successfully catch a signal more than once,
1622         we need the NODEFER flag. */
1623      tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
1624      tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
1625      tmp_sigill_act.sa_flags |=  VKI_SA_NODEFER;
1626      tmp_sigill_act.ksa_handler = handler_unsup_insn;
1627      VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
1628 
1629      if (model == VEX_PRID_COMP_MIPS) {
1630         /* DSPr2 instructions. */
1631         have_DSPr2 = True;
1632         if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1633            have_DSPr2 = False;
1634         } else {
1635            __asm__ __volatile__(".word 0x7d095351"); /* precr.qb.ph t2, t0, t1 */
1636         }
1637         if (have_DSPr2) {
1638            /* We assume it's 74K, since it can run DSPr2. */
1639            vai.hwcaps |= VEX_PRID_IMP_74K;
1640         } else {
1641            /* DSP instructions. */
1642            have_DSP = True;
1643            if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1644               have_DSP = False;
1645            } else {
1646               __asm__ __volatile__(".word 0x7c3f44b8"); /* rddsp t0, 0x3f */
1647            }
1648            if (have_DSP) {
1649               /* We assume it's 34K, since it has support for DSP. */
1650               vai.hwcaps |= VEX_PRID_IMP_34K;
1651            }
1652         }
1653      }
1654 
1655      /* Check if CPU has FPU and 32 dbl. prec. FP registers */
1656      int FIR = 0;
1657      __asm__ __volatile__(
1658         "cfc1 %0, $0"  "\n\t"
1659         : "=r" (FIR)
1660      );
1661      if (FIR & (1 << FP64)) {
1662         vai.hwcaps |= VEX_PRID_CPU_32FPR;
1663      }
1664 
1665      VG_(convert_sigaction_fromK_to_toK)(&saved_sigill_act, &tmp_sigill_act);
1666      VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
1667      VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
1668 
1669      VG_(debugLog)(1, "machine", "hwcaps = 0x%x\n", vai.hwcaps);
1670      VG_(machine_get_cache_info)(&vai);
1671 
1672      return True;
1673    }
1674 
1675 #elif defined(VGA_mips64)
1676    {
1677      va = VexArchMIPS64;
1678      UInt model = VG_(get_machine_model)();
1679      if (model == -1)
1680          return False;
1681 
1682      vai.hwcaps = model;
1683 
1684 #    if defined(VKI_LITTLE_ENDIAN)
1685      vai.endness = VexEndnessLE;
1686 #    elif defined(VKI_BIG_ENDIAN)
1687      vai.endness = VexEndnessBE;
1688 #    else
1689      vai.endness = VexEndness_INVALID;
1690 #    endif
1691 
1692      VG_(machine_get_cache_info)(&vai);
1693 
1694      return True;
1695    }
1696 
1697 #elif defined(VGA_tilegx)
1698    {
1699      va = VexArchTILEGX;
1700      vai.hwcaps = VEX_HWCAPS_TILEGX_BASE;
1701      vai.endness = VexEndnessLE;
1702 
1703      VG_(machine_get_cache_info)(&vai);
1704 
1705      return True;
1706    }
1707 
1708 #else
1709 #  error "Unknown arch"
1710 #endif
1711 }
1712 
1713 /* Notify host cpu instruction cache line size. */
1714 #if defined(VGA_ppc32)
VG_(machine_ppc32_set_clszB)1715 void VG_(machine_ppc32_set_clszB)( Int szB )
1716 {
1717    vg_assert(hwcaps_done);
1718 
1719    /* Either the value must not have been set yet (zero) or we can
1720       tolerate it being set to the same value multiple times, as the
1721       stack scanning logic in m_main is a bit stupid. */
1722    vg_assert(vai.ppc_icache_line_szB == 0
1723              || vai.ppc_icache_line_szB == szB);
1724 
1725    vg_assert(szB == 16 || szB == 32 || szB == 64 || szB == 128);
1726    vai.ppc_icache_line_szB = szB;
1727 }
1728 #endif
1729 
1730 
1731 /* Notify host cpu instruction cache line size. */
1732 #if defined(VGA_ppc64be)|| defined(VGA_ppc64le)
VG_(machine_ppc64_set_clszB)1733 void VG_(machine_ppc64_set_clszB)( Int szB )
1734 {
1735    vg_assert(hwcaps_done);
1736 
1737    /* Either the value must not have been set yet (zero) or we can
1738       tolerate it being set to the same value multiple times, as the
1739       stack scanning logic in m_main is a bit stupid. */
1740    vg_assert(vai.ppc_icache_line_szB == 0
1741              || vai.ppc_icache_line_szB == szB);
1742 
1743    vg_assert(szB == 16 || szB == 32 || szB == 64 || szB == 128);
1744    vai.ppc_icache_line_szB = szB;
1745 }
1746 #endif
1747 
1748 
1749 /* Notify host's ability to handle NEON instructions. */
1750 #if defined(VGA_arm)
VG_(machine_arm_set_has_NEON)1751 void VG_(machine_arm_set_has_NEON)( Bool has_neon )
1752 {
1753    vg_assert(hwcaps_done);
1754    /* There's nothing else we can sanity check. */
1755 
1756    if (has_neon) {
1757       vai.hwcaps |= VEX_HWCAPS_ARM_NEON;
1758    } else {
1759       vai.hwcaps &= ~VEX_HWCAPS_ARM_NEON;
1760    }
1761 }
1762 #endif
1763 
1764 
1765 /* Fetch host cpu info, once established. */
VG_(machine_get_VexArchInfo)1766 void VG_(machine_get_VexArchInfo)( /*OUT*/VexArch* pVa,
1767                                    /*OUT*/VexArchInfo* pVai )
1768 {
1769    vg_assert(hwcaps_done);
1770    if (pVa)  *pVa  = va;
1771    if (pVai) *pVai = vai;
1772 }
1773 
1774 
1775 /* Returns the size of the largest guest register that we will
1776    simulate in this run.  This depends on both the guest architecture
1777    and on the specific capabilities we are simulating for that guest
1778    (eg, AVX or non-AVX ?, for amd64).  Should return either 4, 8, 16
1779    or 32.  General rule: if in doubt, return a value larger than
1780    reality.
1781 
1782    This information is needed by Cachegrind and Callgrind to decide
1783    what the minimum cache line size they are prepared to simulate is.
1784    Basically require that the minimum cache line size is at least as
1785    large as the largest register that might get transferred to/from
1786    memory, so as to guarantee that any such transaction can straddle
1787    at most 2 cache lines.
1788 */
VG_(machine_get_size_of_largest_guest_register)1789 Int VG_(machine_get_size_of_largest_guest_register) ( void )
1790 {
1791    vg_assert(hwcaps_done);
1792    /* Once hwcaps_done is True, we can fish around inside va/vai to
1793       find the information we need. */
1794 
1795 #  if defined(VGA_x86)
1796    vg_assert(va == VexArchX86);
1797    /* We don't support AVX, so 32 is out.  At the other end, even if
1798       we don't support any SSE, the X87 can generate 10 byte
1799       transfers, so let's say 16 to be on the safe side.  Hence the
1800       answer is always 16. */
1801    return 16;
1802 
1803 #  elif defined(VGA_amd64)
1804    /* if AVX then 32 else 16 */
1805    return (vai.hwcaps & VEX_HWCAPS_AMD64_AVX) ? 32 : 16;
1806 
1807 #  elif defined(VGA_ppc32)
1808    /* 8 if boring; 16 if signs of Altivec or other exotic stuff */
1809    if (vai.hwcaps & VEX_HWCAPS_PPC32_V) return 16;
1810    if (vai.hwcaps & VEX_HWCAPS_PPC32_VX) return 16;
1811    if (vai.hwcaps & VEX_HWCAPS_PPC32_DFP) return 16;
1812    return 8;
1813 
1814 #  elif defined(VGA_ppc64be) || defined(VGA_ppc64le)
1815    /* 8 if boring; 16 if signs of Altivec or other exotic stuff */
1816    if (vai.hwcaps & VEX_HWCAPS_PPC64_V) return 16;
1817    if (vai.hwcaps & VEX_HWCAPS_PPC64_VX) return 16;
1818    if (vai.hwcaps & VEX_HWCAPS_PPC64_DFP) return 16;
1819    return 8;
1820 
1821 #  elif defined(VGA_s390x)
1822    return 8;
1823 
1824 #  elif defined(VGA_arm)
1825    /* Really it depends whether or not we have NEON, but let's just
1826       assume we always do. */
1827    return 16;
1828 
1829 #  elif defined(VGA_arm64)
1830    /* ARM64 always has Neon, AFAICS. */
1831    return 16;
1832 
1833 #  elif defined(VGA_mips32)
1834    /* The guest state implies 4, but that can't really be true, can
1835       it? */
1836    return 8;
1837 
1838 #  elif defined(VGA_mips64)
1839    return 8;
1840 
1841 #  elif defined(VGA_tilegx)
1842    return 8;
1843 
1844 #  else
1845 #    error "Unknown arch"
1846 #  endif
1847 }
1848 
1849 
1850 // Given a pointer to a function as obtained by "& functionname" in C,
1851 // produce a pointer to the actual entry point for the function.
VG_(fnptr_to_fnentry)1852 void* VG_(fnptr_to_fnentry)( void* f )
1853 {
1854 #  if defined(VGP_x86_linux) || defined(VGP_amd64_linux)  \
1855       || defined(VGP_arm_linux) || defined(VGO_darwin)          \
1856       || defined(VGP_ppc32_linux) || defined(VGP_ppc64le_linux) \
1857       || defined(VGP_s390x_linux) || defined(VGP_mips32_linux) \
1858       || defined(VGP_mips64_linux) || defined(VGP_arm64_linux) \
1859       || defined(VGP_tilegx_linux) || defined(VGP_x86_solaris) \
1860       || defined(VGP_amd64_solaris)
1861    return f;
1862 #  elif defined(VGP_ppc64be_linux)
1863    /* ppc64-linux uses the AIX scheme, in which f is a pointer to a
1864       3-word function descriptor, of which the first word is the entry
1865       address. */
1866    UWord* descr = (UWord*)f;
1867    return (void*)(descr[0]);
1868 #  else
1869 #    error "Unknown platform"
1870 #  endif
1871 }
1872 
1873 /*--------------------------------------------------------------------*/
1874 /*--- end                                                          ---*/
1875 /*--------------------------------------------------------------------*/
1876