• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*--------------------------------------------------------------------*/
2 /*--- Machine-related stuff.                           m_machine.c ---*/
3 /*--------------------------------------------------------------------*/
4 
5 /*
6    This file is part of Valgrind, a dynamic binary instrumentation
7    framework.
8 
9    Copyright (C) 2000-2017 Julian Seward
10       jseward@acm.org
11 
12    This program is free software; you can redistribute it and/or
13    modify it under the terms of the GNU General Public License as
14    published by the Free Software Foundation; either version 2 of the
15    License, or (at your option) any later version.
16 
17    This program is distributed in the hope that it will be useful, but
18    WITHOUT ANY WARRANTY; without even the implied warranty of
19    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20    General Public License for more details.
21 
22    You should have received a copy of the GNU General Public License
23    along with this program; if not, write to the Free Software
24    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
25    02111-1307, USA.
26 
27    The GNU General Public License is contained in the file COPYING.
28 */
29 
30 #include "pub_core_basics.h"
31 #include "pub_core_vki.h"
32 #include "pub_core_threadstate.h"
33 #include "pub_core_libcassert.h"
34 #include "pub_core_libcbase.h"
35 #include "pub_core_libcfile.h"
36 #include "pub_core_libcprint.h"
37 #include "pub_core_libcproc.h"
38 #include "pub_core_mallocfree.h"
39 #include "pub_core_machine.h"
40 #include "pub_core_cpuid.h"
41 #include "pub_core_libcsignal.h"   // for ppc32 messing with SIGILL and SIGFPE
42 #include "pub_core_debuglog.h"
43 
44 
45 #define INSTR_PTR(regs)    ((regs).vex.VG_INSTR_PTR)
46 #define STACK_PTR(regs)    ((regs).vex.VG_STACK_PTR)
47 #define FRAME_PTR(regs)    ((regs).vex.VG_FRAME_PTR)
48 
VG_(get_IP)49 Addr VG_(get_IP) ( ThreadId tid ) {
50    return INSTR_PTR( VG_(threads)[tid].arch );
51 }
VG_(get_SP)52 Addr VG_(get_SP) ( ThreadId tid ) {
53    return STACK_PTR( VG_(threads)[tid].arch );
54 }
VG_(get_FP)55 Addr VG_(get_FP) ( ThreadId tid ) {
56    return FRAME_PTR( VG_(threads)[tid].arch );
57 }
58 
VG_(set_IP)59 void VG_(set_IP) ( ThreadId tid, Addr ip ) {
60    INSTR_PTR( VG_(threads)[tid].arch ) = ip;
61 }
VG_(set_SP)62 void VG_(set_SP) ( ThreadId tid, Addr sp ) {
63    STACK_PTR( VG_(threads)[tid].arch ) = sp;
64 }
65 
VG_(get_UnwindStartRegs)66 void VG_(get_UnwindStartRegs) ( /*OUT*/UnwindStartRegs* regs,
67                                 ThreadId tid )
68 {
69 #  if defined(VGA_x86)
70    regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_EIP;
71    regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_ESP;
72    regs->misc.X86.r_ebp
73       = VG_(threads)[tid].arch.vex.guest_EBP;
74 #  elif defined(VGA_amd64)
75    regs->r_pc = VG_(threads)[tid].arch.vex.guest_RIP;
76    regs->r_sp = VG_(threads)[tid].arch.vex.guest_RSP;
77    regs->misc.AMD64.r_rbp
78       = VG_(threads)[tid].arch.vex.guest_RBP;
79 #  elif defined(VGA_ppc32)
80    regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_CIA;
81    regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_GPR1;
82    regs->misc.PPC32.r_lr
83       = VG_(threads)[tid].arch.vex.guest_LR;
84 #  elif defined(VGA_ppc64be) || defined(VGA_ppc64le)
85    regs->r_pc = VG_(threads)[tid].arch.vex.guest_CIA;
86    regs->r_sp = VG_(threads)[tid].arch.vex.guest_GPR1;
87    regs->misc.PPC64.r_lr
88       = VG_(threads)[tid].arch.vex.guest_LR;
89 #  elif defined(VGA_arm)
90    regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_R15T;
91    regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_R13;
92    regs->misc.ARM.r14
93       = VG_(threads)[tid].arch.vex.guest_R14;
94    regs->misc.ARM.r12
95       = VG_(threads)[tid].arch.vex.guest_R12;
96    regs->misc.ARM.r11
97       = VG_(threads)[tid].arch.vex.guest_R11;
98    regs->misc.ARM.r7
99       = VG_(threads)[tid].arch.vex.guest_R7;
100 #  elif defined(VGA_arm64)
101    regs->r_pc = VG_(threads)[tid].arch.vex.guest_PC;
102    regs->r_sp = VG_(threads)[tid].arch.vex.guest_XSP;
103    regs->misc.ARM64.x29 = VG_(threads)[tid].arch.vex.guest_X29;
104    regs->misc.ARM64.x30 = VG_(threads)[tid].arch.vex.guest_X30;
105 #  elif defined(VGA_s390x)
106    regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_IA;
107    regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_SP;
108    regs->misc.S390X.r_fp
109       = VG_(threads)[tid].arch.vex.guest_FP;
110    regs->misc.S390X.r_lr
111       = VG_(threads)[tid].arch.vex.guest_LR;
112 #  elif defined(VGA_mips32)
113    regs->r_pc = VG_(threads)[tid].arch.vex.guest_PC;
114    regs->r_sp = VG_(threads)[tid].arch.vex.guest_r29;
115    regs->misc.MIPS32.r30
116       = VG_(threads)[tid].arch.vex.guest_r30;
117    regs->misc.MIPS32.r31
118       = VG_(threads)[tid].arch.vex.guest_r31;
119    regs->misc.MIPS32.r28
120       = VG_(threads)[tid].arch.vex.guest_r28;
121 #  elif defined(VGA_mips64)
122    regs->r_pc = VG_(threads)[tid].arch.vex.guest_PC;
123    regs->r_sp = VG_(threads)[tid].arch.vex.guest_r29;
124    regs->misc.MIPS64.r30
125       = VG_(threads)[tid].arch.vex.guest_r30;
126    regs->misc.MIPS64.r31
127       = VG_(threads)[tid].arch.vex.guest_r31;
128    regs->misc.MIPS64.r28
129       = VG_(threads)[tid].arch.vex.guest_r28;
130 #  else
131 #    error "Unknown arch"
132 #  endif
133 }
134 
135 void
VG_(get_shadow_regs_area)136 VG_(get_shadow_regs_area) ( ThreadId tid,
137                             /*DST*/UChar* dst,
138                             /*SRC*/Int shadowNo, PtrdiffT offset, SizeT size )
139 {
140    void*        src;
141    ThreadState* tst;
142    vg_assert(shadowNo == 0 || shadowNo == 1 || shadowNo == 2);
143    vg_assert(VG_(is_valid_tid)(tid));
144    // Bounds check
145    vg_assert(0 <= offset && offset < sizeof(VexGuestArchState));
146    vg_assert(offset + size <= sizeof(VexGuestArchState));
147    // Copy
148    tst = & VG_(threads)[tid];
149    src = NULL;
150    switch (shadowNo) {
151       case 0: src = (void*)(((Addr)&(tst->arch.vex)) + offset); break;
152       case 1: src = (void*)(((Addr)&(tst->arch.vex_shadow1)) + offset); break;
153       case 2: src = (void*)(((Addr)&(tst->arch.vex_shadow2)) + offset); break;
154    }
155    vg_assert(src != NULL);
156    VG_(memcpy)( dst, src, size);
157 }
158 
159 void
VG_(set_shadow_regs_area)160 VG_(set_shadow_regs_area) ( ThreadId tid,
161                             /*DST*/Int shadowNo, PtrdiffT offset, SizeT size,
162                             /*SRC*/const UChar* src )
163 {
164    void*        dst;
165    ThreadState* tst;
166    vg_assert(shadowNo == 0 || shadowNo == 1 || shadowNo == 2);
167    vg_assert(VG_(is_valid_tid)(tid));
168    // Bounds check
169    vg_assert(0 <= offset && offset < sizeof(VexGuestArchState));
170    vg_assert(offset + size <= sizeof(VexGuestArchState));
171    // Copy
172    tst = & VG_(threads)[tid];
173    dst = NULL;
174    switch (shadowNo) {
175       case 0: dst = (void*)(((Addr)&(tst->arch.vex)) + offset); break;
176       case 1: dst = (void*)(((Addr)&(tst->arch.vex_shadow1)) + offset); break;
177       case 2: dst = (void*)(((Addr)&(tst->arch.vex_shadow2)) + offset); break;
178    }
179    vg_assert(dst != NULL);
180    VG_(memcpy)( dst, src, size);
181 }
182 
183 
apply_to_GPs_of_tid(ThreadId tid,void (* f)(ThreadId,const HChar *,Addr))184 static void apply_to_GPs_of_tid(ThreadId tid, void (*f)(ThreadId,
185                                                         const HChar*, Addr))
186 {
187    VexGuestArchState* vex = &(VG_(get_ThreadState)(tid)->arch.vex);
188    VG_(debugLog)(2, "machine", "apply_to_GPs_of_tid %u\n", tid);
189 #if defined(VGA_x86)
190    (*f)(tid, "EAX", vex->guest_EAX);
191    (*f)(tid, "ECX", vex->guest_ECX);
192    (*f)(tid, "EDX", vex->guest_EDX);
193    (*f)(tid, "EBX", vex->guest_EBX);
194    (*f)(tid, "ESI", vex->guest_ESI);
195    (*f)(tid, "EDI", vex->guest_EDI);
196    (*f)(tid, "ESP", vex->guest_ESP);
197    (*f)(tid, "EBP", vex->guest_EBP);
198 #elif defined(VGA_amd64)
199    (*f)(tid, "RAX", vex->guest_RAX);
200    (*f)(tid, "RCX", vex->guest_RCX);
201    (*f)(tid, "RDX", vex->guest_RDX);
202    (*f)(tid, "RBX", vex->guest_RBX);
203    (*f)(tid, "RSI", vex->guest_RSI);
204    (*f)(tid, "RDI", vex->guest_RDI);
205    (*f)(tid, "RSP", vex->guest_RSP);
206    (*f)(tid, "RBP", vex->guest_RBP);
207    (*f)(tid, "R8" , vex->guest_R8 );
208    (*f)(tid, "R9" , vex->guest_R9 );
209    (*f)(tid, "R10", vex->guest_R10);
210    (*f)(tid, "R11", vex->guest_R11);
211    (*f)(tid, "R12", vex->guest_R12);
212    (*f)(tid, "R13", vex->guest_R13);
213    (*f)(tid, "R14", vex->guest_R14);
214    (*f)(tid, "R15", vex->guest_R15);
215 #elif defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le)
216    (*f)(tid, "GPR0" , vex->guest_GPR0 );
217    (*f)(tid, "GPR1" , vex->guest_GPR1 );
218    (*f)(tid, "GPR2" , vex->guest_GPR2 );
219    (*f)(tid, "GPR3" , vex->guest_GPR3 );
220    (*f)(tid, "GPR4" , vex->guest_GPR4 );
221    (*f)(tid, "GPR5" , vex->guest_GPR5 );
222    (*f)(tid, "GPR6" , vex->guest_GPR6 );
223    (*f)(tid, "GPR7" , vex->guest_GPR7 );
224    (*f)(tid, "GPR8" , vex->guest_GPR8 );
225    (*f)(tid, "GPR9" , vex->guest_GPR9 );
226    (*f)(tid, "GPR10", vex->guest_GPR10);
227    (*f)(tid, "GPR11", vex->guest_GPR11);
228    (*f)(tid, "GPR12", vex->guest_GPR12);
229    (*f)(tid, "GPR13", vex->guest_GPR13);
230    (*f)(tid, "GPR14", vex->guest_GPR14);
231    (*f)(tid, "GPR15", vex->guest_GPR15);
232    (*f)(tid, "GPR16", vex->guest_GPR16);
233    (*f)(tid, "GPR17", vex->guest_GPR17);
234    (*f)(tid, "GPR18", vex->guest_GPR18);
235    (*f)(tid, "GPR19", vex->guest_GPR19);
236    (*f)(tid, "GPR20", vex->guest_GPR20);
237    (*f)(tid, "GPR21", vex->guest_GPR21);
238    (*f)(tid, "GPR22", vex->guest_GPR22);
239    (*f)(tid, "GPR23", vex->guest_GPR23);
240    (*f)(tid, "GPR24", vex->guest_GPR24);
241    (*f)(tid, "GPR25", vex->guest_GPR25);
242    (*f)(tid, "GPR26", vex->guest_GPR26);
243    (*f)(tid, "GPR27", vex->guest_GPR27);
244    (*f)(tid, "GPR28", vex->guest_GPR28);
245    (*f)(tid, "GPR29", vex->guest_GPR29);
246    (*f)(tid, "GPR30", vex->guest_GPR30);
247    (*f)(tid, "GPR31", vex->guest_GPR31);
248    (*f)(tid, "CTR"  , vex->guest_CTR  );
249    (*f)(tid, "LR"   , vex->guest_LR   );
250 #elif defined(VGA_arm)
251    (*f)(tid, "R0" , vex->guest_R0 );
252    (*f)(tid, "R1" , vex->guest_R1 );
253    (*f)(tid, "R2" , vex->guest_R2 );
254    (*f)(tid, "R3" , vex->guest_R3 );
255    (*f)(tid, "R4" , vex->guest_R4 );
256    (*f)(tid, "R5" , vex->guest_R5 );
257    (*f)(tid, "R6" , vex->guest_R6 );
258    (*f)(tid, "R8" , vex->guest_R8 );
259    (*f)(tid, "R9" , vex->guest_R9 );
260    (*f)(tid, "R10", vex->guest_R10);
261    (*f)(tid, "R11", vex->guest_R11);
262    (*f)(tid, "R12", vex->guest_R12);
263    (*f)(tid, "R13", vex->guest_R13);
264    (*f)(tid, "R14", vex->guest_R14);
265 #elif defined(VGA_s390x)
266    (*f)(tid, "r0" , vex->guest_r0 );
267    (*f)(tid, "r1" , vex->guest_r1 );
268    (*f)(tid, "r2" , vex->guest_r2 );
269    (*f)(tid, "r3" , vex->guest_r3 );
270    (*f)(tid, "r4" , vex->guest_r4 );
271    (*f)(tid, "r5" , vex->guest_r5 );
272    (*f)(tid, "r6" , vex->guest_r6 );
273    (*f)(tid, "r7" , vex->guest_r7 );
274    (*f)(tid, "r8" , vex->guest_r8 );
275    (*f)(tid, "r9" , vex->guest_r9 );
276    (*f)(tid, "r10", vex->guest_r10);
277    (*f)(tid, "r11", vex->guest_r11);
278    (*f)(tid, "r12", vex->guest_r12);
279    (*f)(tid, "r13", vex->guest_r13);
280    (*f)(tid, "r14", vex->guest_r14);
281    (*f)(tid, "r15", vex->guest_r15);
282 #elif defined(VGA_mips32) || defined(VGA_mips64)
283    (*f)(tid, "r0" , vex->guest_r0 );
284    (*f)(tid, "r1" , vex->guest_r1 );
285    (*f)(tid, "r2" , vex->guest_r2 );
286    (*f)(tid, "r3" , vex->guest_r3 );
287    (*f)(tid, "r4" , vex->guest_r4 );
288    (*f)(tid, "r5" , vex->guest_r5 );
289    (*f)(tid, "r6" , vex->guest_r6 );
290    (*f)(tid, "r7" , vex->guest_r7 );
291    (*f)(tid, "r8" , vex->guest_r8 );
292    (*f)(tid, "r9" , vex->guest_r9 );
293    (*f)(tid, "r10", vex->guest_r10);
294    (*f)(tid, "r11", vex->guest_r11);
295    (*f)(tid, "r12", vex->guest_r12);
296    (*f)(tid, "r13", vex->guest_r13);
297    (*f)(tid, "r14", vex->guest_r14);
298    (*f)(tid, "r15", vex->guest_r15);
299    (*f)(tid, "r16", vex->guest_r16);
300    (*f)(tid, "r17", vex->guest_r17);
301    (*f)(tid, "r18", vex->guest_r18);
302    (*f)(tid, "r19", vex->guest_r19);
303    (*f)(tid, "r20", vex->guest_r20);
304    (*f)(tid, "r21", vex->guest_r21);
305    (*f)(tid, "r22", vex->guest_r22);
306    (*f)(tid, "r23", vex->guest_r23);
307    (*f)(tid, "r24", vex->guest_r24);
308    (*f)(tid, "r25", vex->guest_r25);
309    (*f)(tid, "r26", vex->guest_r26);
310    (*f)(tid, "r27", vex->guest_r27);
311    (*f)(tid, "r28", vex->guest_r28);
312    (*f)(tid, "r29", vex->guest_r29);
313    (*f)(tid, "r30", vex->guest_r30);
314    (*f)(tid, "r31", vex->guest_r31);
315 #elif defined(VGA_arm64)
316    (*f)(tid, "x0" , vex->guest_X0 );
317    (*f)(tid, "x1" , vex->guest_X1 );
318    (*f)(tid, "x2" , vex->guest_X2 );
319    (*f)(tid, "x3" , vex->guest_X3 );
320    (*f)(tid, "x4" , vex->guest_X4 );
321    (*f)(tid, "x5" , vex->guest_X5 );
322    (*f)(tid, "x6" , vex->guest_X6 );
323    (*f)(tid, "x7" , vex->guest_X7 );
324    (*f)(tid, "x8" , vex->guest_X8 );
325    (*f)(tid, "x9" , vex->guest_X9 );
326    (*f)(tid, "x10", vex->guest_X10);
327    (*f)(tid, "x11", vex->guest_X11);
328    (*f)(tid, "x12", vex->guest_X12);
329    (*f)(tid, "x13", vex->guest_X13);
330    (*f)(tid, "x14", vex->guest_X14);
331    (*f)(tid, "x15", vex->guest_X15);
332    (*f)(tid, "x16", vex->guest_X16);
333    (*f)(tid, "x17", vex->guest_X17);
334    (*f)(tid, "x18", vex->guest_X18);
335    (*f)(tid, "x19", vex->guest_X19);
336    (*f)(tid, "x20", vex->guest_X20);
337    (*f)(tid, "x21", vex->guest_X21);
338    (*f)(tid, "x22", vex->guest_X22);
339    (*f)(tid, "x23", vex->guest_X23);
340    (*f)(tid, "x24", vex->guest_X24);
341    (*f)(tid, "x25", vex->guest_X25);
342    (*f)(tid, "x26", vex->guest_X26);
343    (*f)(tid, "x27", vex->guest_X27);
344    (*f)(tid, "x28", vex->guest_X28);
345    (*f)(tid, "x29", vex->guest_X29);
346    (*f)(tid, "x30", vex->guest_X30);
347 #else
348 #  error Unknown arch
349 #endif
350 }
351 
352 
VG_(apply_to_GP_regs)353 void VG_(apply_to_GP_regs)(void (*f)(ThreadId, const HChar*, UWord))
354 {
355    ThreadId tid;
356 
357    for (tid = 1; tid < VG_N_THREADS; tid++) {
358       if (VG_(is_valid_tid)(tid)
359           || VG_(threads)[tid].exitreason == VgSrc_ExitProcess) {
360          // live thread or thread instructed to die by another thread that
361          // called exit.
362          apply_to_GPs_of_tid(tid, f);
363       }
364    }
365 }
366 
VG_(thread_stack_reset_iter)367 void VG_(thread_stack_reset_iter)(/*OUT*/ThreadId* tid)
368 {
369    *tid = (ThreadId)(-1);
370 }
371 
VG_(thread_stack_next)372 Bool VG_(thread_stack_next)(/*MOD*/ThreadId* tid,
373                             /*OUT*/Addr* stack_min,
374                             /*OUT*/Addr* stack_max)
375 {
376    ThreadId i;
377    for (i = (*tid)+1; i < VG_N_THREADS; i++) {
378       if (i == VG_INVALID_THREADID)
379          continue;
380       if (VG_(threads)[i].status != VgTs_Empty) {
381          *tid       = i;
382          *stack_min = VG_(get_SP)(i);
383          *stack_max = VG_(threads)[i].client_stack_highest_byte;
384          return True;
385       }
386    }
387    return False;
388 }
389 
VG_(thread_get_stack_max)390 Addr VG_(thread_get_stack_max)(ThreadId tid)
391 {
392    vg_assert(0 <= tid && tid < VG_N_THREADS && tid != VG_INVALID_THREADID);
393    vg_assert(VG_(threads)[tid].status != VgTs_Empty);
394    return VG_(threads)[tid].client_stack_highest_byte;
395 }
396 
VG_(thread_get_stack_size)397 SizeT VG_(thread_get_stack_size)(ThreadId tid)
398 {
399    vg_assert(0 <= tid && tid < VG_N_THREADS && tid != VG_INVALID_THREADID);
400    vg_assert(VG_(threads)[tid].status != VgTs_Empty);
401    return VG_(threads)[tid].client_stack_szB;
402 }
403 
VG_(thread_get_altstack_min)404 Addr VG_(thread_get_altstack_min)(ThreadId tid)
405 {
406    vg_assert(0 <= tid && tid < VG_N_THREADS && tid != VG_INVALID_THREADID);
407    vg_assert(VG_(threads)[tid].status != VgTs_Empty);
408    return (Addr)VG_(threads)[tid].altstack.ss_sp;
409 }
410 
VG_(thread_get_altstack_size)411 SizeT VG_(thread_get_altstack_size)(ThreadId tid)
412 {
413    vg_assert(0 <= tid && tid < VG_N_THREADS && tid != VG_INVALID_THREADID);
414    vg_assert(VG_(threads)[tid].status != VgTs_Empty);
415    return VG_(threads)[tid].altstack.ss_size;
416 }
417 
418 //-------------------------------------------------------------
419 /* Details about the capabilities of the underlying (host) CPU.  These
420    details are acquired by (1) enquiring with the CPU at startup, or
421    (2) from the AT_SYSINFO entries the kernel gave us (ppc32 cache
422    line size).  It's a bit nasty in the sense that there's no obvious
423    way to stop uses of some of this info before it's ready to go.
424    See pub_core_machine.h for more information about that.
425 
426    VG_(machine_get_hwcaps) may use signals (although it attempts to
427    leave signal state unchanged) and therefore should only be
428    called before m_main sets up the client's signal state.
429 */
430 
431 /* --------- State --------- */
432 static Bool hwcaps_done = False;
433 
434 /* --- all archs --- */
435 static VexArch     va = VexArch_INVALID;
436 static VexArchInfo vai;
437 
438 #if defined(VGA_x86)
439 UInt VG_(machine_x86_have_mxcsr) = 0;
440 #endif
441 #if defined(VGA_ppc32)
442 UInt VG_(machine_ppc32_has_FP)  = 0;
443 UInt VG_(machine_ppc32_has_VMX) = 0;
444 #endif
445 #if defined(VGA_ppc64be) || defined(VGA_ppc64le)
446 ULong VG_(machine_ppc64_has_VMX) = 0;
447 #endif
448 #if defined(VGA_arm)
449 Int VG_(machine_arm_archlevel) = 4;
450 #endif
451 
452 
453 /* For hwcaps detection on ppc32/64, s390x, and arm we'll need to do SIGILL
454    testing, so we need a VG_MINIMAL_JMP_BUF. */
455 #if defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le) \
456     || defined(VGA_arm) || defined(VGA_s390x) || defined(VGA_mips32)
457 #include "pub_core_libcsetjmp.h"
458 static VG_MINIMAL_JMP_BUF(env_unsup_insn);
handler_unsup_insn(Int x)459 static void handler_unsup_insn ( Int x ) {
460    VG_MINIMAL_LONGJMP(env_unsup_insn);
461 }
462 #endif
463 
464 
465 /* Helper function for VG_(machine_get_hwcaps), assumes the SIGILL/etc
466  * handlers are installed.  Determines the sizes affected by dcbz
467  * and dcbzl instructions and updates the given VexArchInfo structure
468  * accordingly.
469  *
470  * Not very defensive: assumes that as long as the dcbz/dcbzl
471  * instructions don't raise a SIGILL, that they will zero an aligned,
472  * contiguous block of memory of a sensible size. */
473 #if defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le)
find_ppc_dcbz_sz(VexArchInfo * arch_info)474 static void find_ppc_dcbz_sz(VexArchInfo *arch_info)
475 {
476    Int dcbz_szB = 0;
477    Int dcbzl_szB;
478 #  define MAX_DCBZL_SZB (128) /* largest known effect of dcbzl */
479    char test_block[4*MAX_DCBZL_SZB];
480    char *aligned = test_block;
481    Int i;
482 
483    /* round up to next max block size, assumes MAX_DCBZL_SZB is pof2 */
484    aligned = (char *)(((HWord)aligned + MAX_DCBZL_SZB) & ~(MAX_DCBZL_SZB - 1));
485    vg_assert((aligned + MAX_DCBZL_SZB) <= &test_block[sizeof(test_block)]);
486 
487    /* dcbz often clears 32B, although sometimes whatever the native cache
488     * block size is */
489    VG_(memset)(test_block, 0xff, sizeof(test_block));
490    __asm__ __volatile__("dcbz 0,%0"
491                         : /*out*/
492                         : "r" (aligned) /*in*/
493                         : "memory" /*clobber*/);
494    for (dcbz_szB = 0, i = 0; i < sizeof(test_block); ++i) {
495       if (!test_block[i])
496          ++dcbz_szB;
497    }
498    vg_assert(dcbz_szB == 16 || dcbz_szB == 32 || dcbz_szB == 64 || dcbz_szB == 128);
499 
500    /* dcbzl clears 128B on G5/PPC970, and usually 32B on other platforms */
501    if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
502       dcbzl_szB = 0; /* indicates unsupported */
503    }
504    else {
505       VG_(memset)(test_block, 0xff, sizeof(test_block));
506       /* some older assemblers won't understand the dcbzl instruction
507        * variant, so we directly emit the instruction ourselves */
508       __asm__ __volatile__("mr 9, %0 ; .long 0x7C204FEC" /*dcbzl 0,9*/
509                            : /*out*/
510                            : "r" (aligned) /*in*/
511                            : "memory", "r9" /*clobber*/);
512       for (dcbzl_szB = 0, i = 0; i < sizeof(test_block); ++i) {
513          if (!test_block[i])
514             ++dcbzl_szB;
515       }
516       vg_assert(dcbzl_szB == 16 || dcbzl_szB == 32 || dcbzl_szB == 64 || dcbzl_szB == 128);
517    }
518 
519    arch_info->ppc_dcbz_szB  = dcbz_szB;
520    arch_info->ppc_dcbzl_szB = dcbzl_szB;
521 
522    VG_(debugLog)(1, "machine", "dcbz_szB=%d dcbzl_szB=%d\n",
523                  dcbz_szB, dcbzl_szB);
524 #  undef MAX_DCBZL_SZB
525 }
526 #endif /* defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le) */
527 
528 #ifdef VGA_s390x
529 
530 /* Read /proc/cpuinfo. Look for lines like these
531 
532    processor 0: version = FF,  identification = 0117C9,  machine = 2064
533 
534    and return the machine model. If the machine model could not be determined
535    or it is an unknown model, return VEX_S390X_MODEL_UNKNOWN. */
536 
VG_(get_machine_model)537 static UInt VG_(get_machine_model)(void)
538 {
539    static struct model_map {
540       const HChar name[5];
541       UInt  id;
542    } model_map[] = {
543       { "2064", VEX_S390X_MODEL_Z900 },
544       { "2066", VEX_S390X_MODEL_Z800 },
545       { "2084", VEX_S390X_MODEL_Z990 },
546       { "2086", VEX_S390X_MODEL_Z890 },
547       { "2094", VEX_S390X_MODEL_Z9_EC },
548       { "2096", VEX_S390X_MODEL_Z9_BC },
549       { "2097", VEX_S390X_MODEL_Z10_EC },
550       { "2098", VEX_S390X_MODEL_Z10_BC },
551       { "2817", VEX_S390X_MODEL_Z196 },
552       { "2818", VEX_S390X_MODEL_Z114 },
553       { "2827", VEX_S390X_MODEL_ZEC12 },
554       { "2828", VEX_S390X_MODEL_ZBC12 },
555       { "2964", VEX_S390X_MODEL_Z13 },
556       { "2965", VEX_S390X_MODEL_Z13S },
557    };
558 
559    Int    model, n, fh;
560    SysRes fd;
561    SizeT  num_bytes, file_buf_size;
562    HChar *p, *m, *model_name, *file_buf;
563 
564    /* Slurp contents of /proc/cpuinfo into FILE_BUF */
565    fd = VG_(open)( "/proc/cpuinfo", 0, VKI_S_IRUSR );
566    if ( sr_isError(fd) ) return VEX_S390X_MODEL_UNKNOWN;
567 
568    fh  = sr_Res(fd);
569 
570    /* Determine the size of /proc/cpuinfo.
571       Work around broken-ness in /proc file system implementation.
572       fstat returns a zero size for /proc/cpuinfo although it is
573       claimed to be a regular file. */
574    num_bytes = 0;
575    file_buf_size = 1000;
576    file_buf = VG_(malloc)("cpuinfo", file_buf_size + 1);
577    while (42) {
578       n = VG_(read)(fh, file_buf, file_buf_size);
579       if (n < 0) break;
580 
581       num_bytes += n;
582       if (n < file_buf_size) break;  /* reached EOF */
583    }
584 
585    if (n < 0) num_bytes = 0;   /* read error; ignore contents */
586 
587    if (num_bytes > file_buf_size) {
588       VG_(free)( file_buf );
589       VG_(lseek)( fh, 0, VKI_SEEK_SET );
590       file_buf = VG_(malloc)( "cpuinfo", num_bytes + 1 );
591       n = VG_(read)( fh, file_buf, num_bytes );
592       if (n < 0) num_bytes = 0;
593    }
594 
595    file_buf[num_bytes] = '\0';
596    VG_(close)(fh);
597 
598    /* Parse file */
599    model = VEX_S390X_MODEL_UNKNOWN;
600    for (p = file_buf; *p; ++p) {
601       /* Beginning of line */
602      if (VG_(strncmp)( p, "processor", sizeof "processor" - 1 ) != 0) continue;
603 
604      m = VG_(strstr)( p, "machine" );
605      if (m == NULL) continue;
606 
607      p = m + sizeof "machine" - 1;
608      while ( VG_(isspace)( *p ) || *p == '=') {
609        if (*p == '\n') goto next_line;
610        ++p;
611      }
612 
613      model_name = p;
614      for (n = 0; n < sizeof model_map / sizeof model_map[0]; ++n) {
615        struct model_map *mm = model_map + n;
616        SizeT len = VG_(strlen)( mm->name );
617        if ( VG_(strncmp)( mm->name, model_name, len ) == 0 &&
618             VG_(isspace)( model_name[len] )) {
619          if (mm->id < model) model = mm->id;
620          p = model_name + len;
621          break;
622        }
623      }
624      /* Skip until end-of-line */
625      while (*p != '\n')
626        ++p;
627    next_line: ;
628    }
629 
630    VG_(free)( file_buf );
631    VG_(debugLog)(1, "machine", "model = %s\n",
632                  model == VEX_S390X_MODEL_UNKNOWN ? "UNKNOWN"
633                                                   : model_map[model].name);
634    return model;
635 }
636 
637 #endif /* defined(VGA_s390x) */
638 
639 #if defined(VGA_mips32) || defined(VGA_mips64)
640 
641 /*
642  * Initialize hwcaps by parsing /proc/cpuinfo . Returns False if it can not
643  * determine what CPU it is (it searches only for the models that are or may be
644  * supported by Valgrind).
645  */
VG_(parse_cpuinfo)646 static Bool VG_(parse_cpuinfo)(void)
647 {
648    const char *search_Broadcom_str = "cpu model\t\t: Broadcom";
649    const char *search_Cavium_str= "cpu model\t\t: Cavium";
650    const char *search_Ingenic_str= "cpu model\t\t: Ingenic";
651    const char *search_Loongson_str= "cpu model\t\t: ICT Loongson";
652    const char *search_MIPS_str = "cpu model\t\t: MIPS";
653    const char *search_Netlogic_str = "cpu model\t\t: Netlogic";
654 
655    Int    n, fh;
656    SysRes fd;
657    SizeT  num_bytes, file_buf_size;
658    HChar  *file_buf, *isa;
659 
660    /* Slurp contents of /proc/cpuinfo into FILE_BUF */
661    fd = VG_(open)( "/proc/cpuinfo", 0, VKI_S_IRUSR );
662    if ( sr_isError(fd) ) return False;
663 
664    fh  = sr_Res(fd);
665 
666    /* Determine the size of /proc/cpuinfo.
667       Work around broken-ness in /proc file system implementation.
668       fstat returns a zero size for /proc/cpuinfo although it is
669       claimed to be a regular file. */
670    num_bytes = 0;
671    file_buf_size = 1000;
672    file_buf = VG_(malloc)("cpuinfo", file_buf_size + 1);
673    while (42) {
674       n = VG_(read)(fh, file_buf, file_buf_size);
675       if (n < 0) break;
676 
677       num_bytes += n;
678       if (n < file_buf_size) break;  /* reached EOF */
679    }
680 
681    if (n < 0) num_bytes = 0;   /* read error; ignore contents */
682 
683    if (num_bytes > file_buf_size) {
684       VG_(free)( file_buf );
685       VG_(lseek)( fh, 0, VKI_SEEK_SET );
686       file_buf = VG_(malloc)( "cpuinfo", num_bytes + 1 );
687       n = VG_(read)( fh, file_buf, num_bytes );
688       if (n < 0) num_bytes = 0;
689    }
690 
691    file_buf[num_bytes] = '\0';
692    VG_(close)(fh);
693 
694    /* Parse file */
695    if (VG_(strstr)(file_buf, search_Broadcom_str) != NULL)
696        vai.hwcaps = VEX_PRID_COMP_BROADCOM;
697    else if (VG_(strstr)(file_buf, search_Netlogic_str) != NULL)
698        vai.hwcaps = VEX_PRID_COMP_NETLOGIC;
699    else if (VG_(strstr)(file_buf, search_Cavium_str) != NULL)
700        vai.hwcaps = VEX_PRID_COMP_CAVIUM;
701    else if (VG_(strstr)(file_buf, search_MIPS_str) != NULL)
702        vai.hwcaps = VEX_PRID_COMP_MIPS;
703    else if (VG_(strstr)(file_buf, search_Ingenic_str) != NULL)
704        vai.hwcaps = VEX_PRID_COMP_INGENIC_E1;
705    else if (VG_(strstr)(file_buf, search_Loongson_str) != NULL)
706        vai.hwcaps = (VEX_PRID_COMP_LEGACY | VEX_PRID_IMP_LOONGSON_64);
707    else {
708        /* Did not find string in the proc file. */
709        vai.hwcaps = 0;
710        VG_(free)(file_buf);
711        return False;
712    }
713 
714    isa = VG_(strstr)(file_buf, "isa\t\t\t: ");
715 
716    if (NULL != isa) {
717       if (VG_(strstr) (isa, "mips32r1") != NULL)
718           vai.hwcaps |= VEX_MIPS_CPU_ISA_M32R1;
719       if (VG_(strstr) (isa, "mips32r2") != NULL)
720           vai.hwcaps |= VEX_MIPS_CPU_ISA_M32R2;
721       if (VG_(strstr) (isa, "mips32r6") != NULL)
722           vai.hwcaps |= VEX_MIPS_CPU_ISA_M32R6;
723       if (VG_(strstr) (isa, "mips64r1") != NULL)
724           vai.hwcaps |= VEX_MIPS_CPU_ISA_M64R1;
725       if (VG_(strstr) (isa, "mips64r2") != NULL)
726           vai.hwcaps |= VEX_MIPS_CPU_ISA_M64R2;
727       if (VG_(strstr) (isa, "mips64r6") != NULL)
728           vai.hwcaps |= VEX_MIPS_CPU_ISA_M64R6;
729 
730       /*
731        * TODO(petarj): Remove this Cavium workaround once Linux kernel folks
732        * decide to change incorrect settings in
733        * mips/include/asm/mach-cavium-octeon/cpu-feature-overrides.h.
734        * The current settings show mips32r1, mips32r2 and mips64r1 as
735        * unsupported ISAs by Cavium MIPS CPUs.
736        */
737       if (VEX_MIPS_COMP_ID(vai.hwcaps) == VEX_PRID_COMP_CAVIUM) {
738          vai.hwcaps |= VEX_MIPS_CPU_ISA_M32R1 | VEX_MIPS_CPU_ISA_M32R2 |
739                        VEX_MIPS_CPU_ISA_M64R1;
740       }
741    } else {
742       /*
743        * Kernel does not provide information about supported ISAs.
744        * Populate the isa level flags based on the CPU model. That is our
745        * best guess.
746        */
747        switch VEX_MIPS_COMP_ID(vai.hwcaps) {
748           case VEX_PRID_COMP_CAVIUM:
749           case VEX_PRID_COMP_NETLOGIC:
750              vai.hwcaps |= (VEX_MIPS_CPU_ISA_M64R2 | VEX_MIPS_CPU_ISA_M64R1);
751           case VEX_PRID_COMP_INGENIC_E1:
752           case VEX_PRID_COMP_MIPS:
753              vai.hwcaps |= VEX_MIPS_CPU_ISA_M32R2;
754           case VEX_PRID_COMP_BROADCOM:
755              vai.hwcaps |= VEX_MIPS_CPU_ISA_M32R1;
756              break;
757           case VEX_PRID_COMP_LEGACY:
758              if ((VEX_MIPS_PROC_ID(vai.hwcaps) == VEX_PRID_IMP_LOONGSON_64))
759                 vai.hwcaps |= VEX_MIPS_CPU_ISA_M64R2 | VEX_MIPS_CPU_ISA_M64R1 |
760                               VEX_MIPS_CPU_ISA_M32R2 | VEX_MIPS_CPU_ISA_M32R1;
761              break;
762          default:
763              break;
764        }
765    }
766    VG_(free)(file_buf);
767    return True;
768 }
769 
770 #endif /* defined(VGA_mips32) || defined(VGA_mips64) */
771 
772 #if defined(VGP_arm64_linux)
773 
774 /* Check to see whether we are running on a Cavium core, and if so auto-enable
775    the fallback LLSC implementation.  See #369459. */
776 
VG_(parse_cpuinfo)777 static Bool VG_(parse_cpuinfo)(void)
778 {
779    const char *search_Cavium_str = "CPU implementer\t: 0x43";
780 
781    Int    n, fh;
782    SysRes fd;
783    SizeT  num_bytes, file_buf_size;
784    HChar  *file_buf;
785 
786    /* Slurp contents of /proc/cpuinfo into FILE_BUF */
787    fd = VG_(open)( "/proc/cpuinfo", 0, VKI_S_IRUSR );
788    if ( sr_isError(fd) ) return False;
789 
790    fh  = sr_Res(fd);
791 
792    /* Determine the size of /proc/cpuinfo.
793       Work around broken-ness in /proc file system implementation.
794       fstat returns a zero size for /proc/cpuinfo although it is
795       claimed to be a regular file. */
796    num_bytes = 0;
797    file_buf_size = 1000;
798    file_buf = VG_(malloc)("cpuinfo", file_buf_size + 1);
799    while (42) {
800       n = VG_(read)(fh, file_buf, file_buf_size);
801       if (n < 0) break;
802 
803       num_bytes += n;
804       if (n < file_buf_size) break;  /* reached EOF */
805    }
806 
807    if (n < 0) num_bytes = 0;   /* read error; ignore contents */
808 
809    if (num_bytes > file_buf_size) {
810       VG_(free)( file_buf );
811       VG_(lseek)( fh, 0, VKI_SEEK_SET );
812       file_buf = VG_(malloc)( "cpuinfo", num_bytes + 1 );
813       n = VG_(read)( fh, file_buf, num_bytes );
814       if (n < 0) num_bytes = 0;
815    }
816 
817    file_buf[num_bytes] = '\0';
818    VG_(close)(fh);
819 
820    /* Parse file */
821    if (VG_(strstr)(file_buf, search_Cavium_str) != NULL)
822       vai.arm64_requires_fallback_LLSC = True;
823 
824    VG_(free)(file_buf);
825    return True;
826 }
827 
828 #endif /* defined(VGP_arm64_linux) */
829 
VG_(machine_get_hwcaps)830 Bool VG_(machine_get_hwcaps)( void )
831 {
832    vg_assert(hwcaps_done == False);
833    hwcaps_done = True;
834 
835    // Whack default settings into vai, so that we only need to fill in
836    // any interesting bits.
837    LibVEX_default_VexArchInfo(&vai);
838 
839 #if defined(VGA_x86)
840    { Bool have_sse1, have_sse2, have_sse3, have_cx8, have_lzcnt, have_mmxext;
841      UInt eax, ebx, ecx, edx, max_extended;
842      HChar vstr[13];
843      vstr[0] = 0;
844 
845      if (!VG_(has_cpuid)())
846         /* we can't do cpuid at all.  Give up. */
847         return False;
848 
849      VG_(cpuid)(0, 0, &eax, &ebx, &ecx, &edx);
850      if (eax < 1)
851         /* we can't ask for cpuid(x) for x > 0.  Give up. */
852         return False;
853 
854      /* Get processor ID string, and max basic/extended index
855         values. */
856      VG_(memcpy)(&vstr[0], &ebx, 4);
857      VG_(memcpy)(&vstr[4], &edx, 4);
858      VG_(memcpy)(&vstr[8], &ecx, 4);
859      vstr[12] = 0;
860 
861      VG_(cpuid)(0x80000000, 0, &eax, &ebx, &ecx, &edx);
862      max_extended = eax;
863 
864      /* get capabilities bits into edx */
865      VG_(cpuid)(1, 0, &eax, &ebx, &ecx, &edx);
866 
867      have_sse1 = (edx & (1<<25)) != 0; /* True => have sse insns */
868      have_sse2 = (edx & (1<<26)) != 0; /* True => have sse2 insns */
869      have_sse3 = (ecx & (1<<0)) != 0;  /* True => have sse3 insns */
870 
871      /* cmpxchg8b is a minimum requirement now; if we don't have it we
872         must simply give up.  But all CPUs since Pentium-I have it, so
873         that doesn't seem like much of a restriction. */
874      have_cx8 = (edx & (1<<8)) != 0; /* True => have cmpxchg8b */
875      if (!have_cx8)
876         return False;
877 
878      /* Figure out if this is an AMD that can do MMXEXT. */
879      have_mmxext = False;
880      if (0 == VG_(strcmp)(vstr, "AuthenticAMD")
881          && max_extended >= 0x80000001) {
882         VG_(cpuid)(0x80000001, 0, &eax, &ebx, &ecx, &edx);
883         /* Some older AMD processors support a sse1 subset (Integer SSE). */
884         have_mmxext = !have_sse1 && ((edx & (1<<22)) != 0);
885      }
886 
887      /* Figure out if this is an AMD or Intel that can do LZCNT. */
888      have_lzcnt = False;
889      if ((0 == VG_(strcmp)(vstr, "AuthenticAMD")
890           || 0 == VG_(strcmp)(vstr, "GenuineIntel"))
891          && max_extended >= 0x80000001) {
892         VG_(cpuid)(0x80000001, 0, &eax, &ebx, &ecx, &edx);
893         have_lzcnt = (ecx & (1<<5)) != 0; /* True => have LZCNT */
894      }
895 
896      /* Intel processors don't define the mmxext extension, but since it
897         is just a sse1 subset always define it when we have sse1. */
898      if (have_sse1)
899         have_mmxext = True;
900 
901      va = VexArchX86;
902      vai.endness = VexEndnessLE;
903 
904      if (have_sse3 && have_sse2 && have_sse1 && have_mmxext) {
905         vai.hwcaps  = VEX_HWCAPS_X86_MMXEXT;
906         vai.hwcaps |= VEX_HWCAPS_X86_SSE1;
907         vai.hwcaps |= VEX_HWCAPS_X86_SSE2;
908         vai.hwcaps |= VEX_HWCAPS_X86_SSE3;
909         if (have_lzcnt)
910            vai.hwcaps |= VEX_HWCAPS_X86_LZCNT;
911         VG_(machine_x86_have_mxcsr) = 1;
912      } else if (have_sse2 && have_sse1 && have_mmxext) {
913         vai.hwcaps  = VEX_HWCAPS_X86_MMXEXT;
914         vai.hwcaps |= VEX_HWCAPS_X86_SSE1;
915         vai.hwcaps |= VEX_HWCAPS_X86_SSE2;
916         if (have_lzcnt)
917            vai.hwcaps |= VEX_HWCAPS_X86_LZCNT;
918         VG_(machine_x86_have_mxcsr) = 1;
919      } else if (have_sse1 && have_mmxext) {
920         vai.hwcaps  = VEX_HWCAPS_X86_MMXEXT;
921         vai.hwcaps |= VEX_HWCAPS_X86_SSE1;
922         VG_(machine_x86_have_mxcsr) = 1;
923      } else if (have_mmxext) {
924         vai.hwcaps  = VEX_HWCAPS_X86_MMXEXT; /*integer only sse1 subset*/
925         VG_(machine_x86_have_mxcsr) = 0;
926      } else {
927        vai.hwcaps = 0; /*baseline - no sse at all*/
928        VG_(machine_x86_have_mxcsr) = 0;
929      }
930 
931      VG_(machine_get_cache_info)(&vai);
932 
933      return True;
934    }
935 
936 #elif defined(VGA_amd64)
937    { Bool have_sse3, have_cx8, have_cx16;
938      Bool have_lzcnt, have_avx, have_bmi, have_avx2;
939      Bool have_rdtscp;
940      UInt eax, ebx, ecx, edx, max_basic, max_extended;
941      ULong xgetbv_0 = 0;
942      HChar vstr[13];
943      vstr[0] = 0;
944 
945      if (!VG_(has_cpuid)())
946         /* we can't do cpuid at all.  Give up. */
947         return False;
948 
949      VG_(cpuid)(0, 0, &eax, &ebx, &ecx, &edx);
950      max_basic = eax;
951      if (max_basic < 1)
952         /* we can't ask for cpuid(x) for x > 0.  Give up. */
953         return False;
954 
955      /* Get processor ID string, and max basic/extended index
956         values. */
957      VG_(memcpy)(&vstr[0], &ebx, 4);
958      VG_(memcpy)(&vstr[4], &edx, 4);
959      VG_(memcpy)(&vstr[8], &ecx, 4);
960      vstr[12] = 0;
961 
962      VG_(cpuid)(0x80000000, 0, &eax, &ebx, &ecx, &edx);
963      max_extended = eax;
964 
965      /* get capabilities bits into edx */
966      VG_(cpuid)(1, 0, &eax, &ebx, &ecx, &edx);
967 
968      // we assume that SSE1 and SSE2 are available by default
969      have_sse3 = (ecx & (1<<0)) != 0;  /* True => have sse3 insns */
970      // ssse3   is ecx:9
971      // sse41   is ecx:19
972      // sse42   is ecx:20
973 
974      // xsave   is ecx:26
975      // osxsave is ecx:27
976      // avx     is ecx:28
977      // fma     is ecx:12
978      have_avx = False;
979      /* have_fma = False; */
980      if ( (ecx & ((1<<28)|(1<<27)|(1<<26))) == ((1<<28)|(1<<27)|(1<<26)) ) {
981         /* Processor supports AVX instructions and XGETBV is enabled
982            by OS and AVX instructions are enabled by the OS. */
983         ULong w;
984         __asm__ __volatile__("movq $0,%%rcx ; "
985                              ".byte 0x0F,0x01,0xD0 ; " /* xgetbv */
986                              "movq %%rax,%0"
987                              :/*OUT*/"=r"(w) :/*IN*/
988                              :/*TRASH*/"rdx","rcx","rax");
989         xgetbv_0 = w;
990         if ((xgetbv_0 & 7) == 7) {
991            /* Only say we have AVX if the XSAVE-allowable
992               bitfield-mask allows x87, SSE and AVX state.  We could
993               actually run with a more restrictive XGETBV(0) value,
994               but VEX's implementation of XSAVE and XRSTOR assumes
995               that all 3 bits are enabled.
996 
997               Also, the VEX implementation of XSAVE/XRSTOR assumes that
998               state component [2] (the YMM high halves) are located in
999               the XSAVE image at offsets 576 .. 831.  So we have to
1000               check that here before declaring AVX to be supported. */
1001            UInt eax2, ebx2, ecx2, edx2;
1002            VG_(cpuid)(0xD, 2, &eax2, &ebx2, &ecx2, &edx2);
1003            if (ebx2 == 576 && eax2 == 256) {
1004               have_avx = True;
1005            }
1006            /* have_fma = (ecx & (1<<12)) != 0; */
1007            /* have_fma: Probably correct, but gcc complains due to
1008               unusedness. */
1009         }
1010      }
1011 
1012      /* cmpxchg8b is a minimum requirement now; if we don't have it we
1013         must simply give up.  But all CPUs since Pentium-I have it, so
1014         that doesn't seem like much of a restriction. */
1015      have_cx8 = (edx & (1<<8)) != 0; /* True => have cmpxchg8b */
1016      if (!have_cx8)
1017         return False;
1018 
1019      /* on amd64 we tolerate older cpus, which don't have cmpxchg16b */
1020      have_cx16 = (ecx & (1<<13)) != 0; /* True => have cmpxchg16b */
1021 
1022      /* Figure out if this CPU can do LZCNT. */
1023      have_lzcnt = False;
1024      if (max_extended >= 0x80000001) {
1025         VG_(cpuid)(0x80000001, 0, &eax, &ebx, &ecx, &edx);
1026         have_lzcnt = (ecx & (1<<5)) != 0; /* True => have LZCNT */
1027      }
1028 
1029      /* Can we do RDTSCP? */
1030      have_rdtscp = False;
1031      if (max_extended >= 0x80000001) {
1032         VG_(cpuid)(0x80000001, 0, &eax, &ebx, &ecx, &edx);
1033         have_rdtscp = (edx & (1<<27)) != 0; /* True => have RDTSVCP */
1034      }
1035 
1036      /* Check for BMI1 and AVX2.  If we have AVX1 (plus OS support). */
1037      have_bmi  = False;
1038      have_avx2 = False;
1039      if (have_avx && max_basic >= 7) {
1040         VG_(cpuid)(7, 0, &eax, &ebx, &ecx, &edx);
1041         have_bmi  = (ebx & (1<<3)) != 0; /* True => have BMI1 */
1042         have_avx2 = (ebx & (1<<5)) != 0; /* True => have AVX2 */
1043      }
1044 
1045      va          = VexArchAMD64;
1046      vai.endness = VexEndnessLE;
1047      vai.hwcaps  = (have_sse3   ? VEX_HWCAPS_AMD64_SSE3   : 0)
1048                  | (have_cx16   ? VEX_HWCAPS_AMD64_CX16   : 0)
1049                  | (have_lzcnt  ? VEX_HWCAPS_AMD64_LZCNT  : 0)
1050                  | (have_avx    ? VEX_HWCAPS_AMD64_AVX    : 0)
1051                  | (have_bmi    ? VEX_HWCAPS_AMD64_BMI    : 0)
1052                  | (have_avx2   ? VEX_HWCAPS_AMD64_AVX2   : 0)
1053                  | (have_rdtscp ? VEX_HWCAPS_AMD64_RDTSCP : 0);
1054 
1055      VG_(machine_get_cache_info)(&vai);
1056 
1057      return True;
1058    }
1059 
1060 #elif defined(VGA_ppc32)
1061    {
1062      /* Find out which subset of the ppc32 instruction set is supported by
1063         verifying whether various ppc32 instructions generate a SIGILL
1064         or a SIGFPE. An alternative approach is to check the AT_HWCAP and
1065         AT_PLATFORM entries in the ELF auxiliary table -- see also
1066         the_iifii.client_auxv in m_main.c.
1067       */
1068      vki_sigset_t          saved_set, tmp_set;
1069      vki_sigaction_fromK_t saved_sigill_act, saved_sigfpe_act;
1070      vki_sigaction_toK_t     tmp_sigill_act,   tmp_sigfpe_act;
1071 
1072      volatile Bool have_F, have_V, have_FX, have_GX, have_VX, have_DFP;
1073      volatile Bool have_isa_2_07, have_isa_3_0;
1074      Int r;
1075 
1076      /* This is a kludge.  Really we ought to back-convert saved_act
1077         into a toK_t using VG_(convert_sigaction_fromK_to_toK), but
1078         since that's a no-op on all ppc32 platforms so far supported,
1079         it's not worth the typing effort.  At least include most basic
1080         sanity check: */
1081      vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t));
1082 
1083      VG_(sigemptyset)(&tmp_set);
1084      VG_(sigaddset)(&tmp_set, VKI_SIGILL);
1085      VG_(sigaddset)(&tmp_set, VKI_SIGFPE);
1086 
1087      r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
1088      vg_assert(r == 0);
1089 
1090      r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
1091      vg_assert(r == 0);
1092      tmp_sigill_act = saved_sigill_act;
1093 
1094      r = VG_(sigaction)(VKI_SIGFPE, NULL, &saved_sigfpe_act);
1095      vg_assert(r == 0);
1096      tmp_sigfpe_act = saved_sigfpe_act;
1097 
1098      /* NODEFER: signal handler does not return (from the kernel's point of
1099         view), hence if it is to successfully catch a signal more than once,
1100         we need the NODEFER flag. */
1101      tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
1102      tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
1103      tmp_sigill_act.sa_flags |=  VKI_SA_NODEFER;
1104      tmp_sigill_act.ksa_handler = handler_unsup_insn;
1105      r = VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
1106      vg_assert(r == 0);
1107 
1108      tmp_sigfpe_act.sa_flags &= ~VKI_SA_RESETHAND;
1109      tmp_sigfpe_act.sa_flags &= ~VKI_SA_SIGINFO;
1110      tmp_sigfpe_act.sa_flags |=  VKI_SA_NODEFER;
1111      tmp_sigfpe_act.ksa_handler = handler_unsup_insn;
1112      r = VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL);
1113      vg_assert(r == 0);
1114 
1115      /* standard FP insns */
1116      have_F = True;
1117      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1118         have_F = False;
1119      } else {
1120         __asm__ __volatile__(".long 0xFC000090"); /*fmr 0,0 */
1121      }
1122 
1123      /* Altivec insns */
1124      have_V = True;
1125      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1126         have_V = False;
1127      } else {
1128         /* Unfortunately some older assemblers don't speak Altivec (or
1129            choose not to), so to be safe we directly emit the 32-bit
1130            word corresponding to "vor 0,0,0".  This fixes a build
1131            problem that happens on Debian 3.1 (ppc32), and probably
1132            various other places. */
1133         __asm__ __volatile__(".long 0x10000484"); /*vor 0,0,0*/
1134      }
1135 
1136      /* General-Purpose optional (fsqrt, fsqrts) */
1137      have_FX = True;
1138      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1139         have_FX = False;
1140      } else {
1141         __asm__ __volatile__(".long 0xFC00002C"); /*fsqrt 0,0 */
1142      }
1143 
1144      /* Graphics optional (stfiwx, fres, frsqrte, fsel) */
1145      have_GX = True;
1146      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1147         have_GX = False;
1148      } else {
1149         __asm__ __volatile__(".long 0xFC000034"); /* frsqrte 0,0 */
1150      }
1151 
1152      /* VSX support implies Power ISA 2.06 */
1153      have_VX = True;
1154      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1155         have_VX = False;
1156      } else {
1157         __asm__ __volatile__(".long 0xf0000564"); /* xsabsdp XT,XB */
1158      }
1159 
1160      /* Check for Decimal Floating Point (DFP) support. */
1161      have_DFP = True;
1162      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1163         have_DFP = False;
1164      } else {
1165         __asm__ __volatile__(".long 0xee4e8005"); /* dadd  FRT,FRA, FRB */
1166      }
1167 
1168      /* Check for ISA 2.07 support. */
1169      have_isa_2_07 = True;
1170      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1171         have_isa_2_07 = False;
1172      } else {
1173         __asm__ __volatile__(".long 0x7c000166"); /* mtvsrd XT,RA */
1174      }
1175 
1176      /* Check for ISA 3.0 support. */
1177      have_isa_3_0 = True;
1178      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1179         have_isa_3_0 = False;
1180      } else {
1181         __asm__ __volatile__(".long 0x7d205434"); /* cnttzw RT, RB */
1182      }
1183 
1184      /* determine dcbz/dcbzl sizes while we still have the signal
1185       * handlers registered */
1186      find_ppc_dcbz_sz(&vai);
1187 
1188      r = VG_(sigaction)(VKI_SIGILL, &saved_sigill_act, NULL);
1189      vg_assert(r == 0);
1190      r = VG_(sigaction)(VKI_SIGFPE, &saved_sigfpe_act, NULL);
1191      vg_assert(r == 0);
1192      r = VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
1193      vg_assert(r == 0);
1194      VG_(debugLog)(1, "machine", "F %d V %d FX %d GX %d VX %d DFP %d ISA2.07 %d ISA3.0 %d\n",
1195                     (Int)have_F, (Int)have_V, (Int)have_FX,
1196                     (Int)have_GX, (Int)have_VX, (Int)have_DFP,
1197                     (Int)have_isa_2_07, (Int)have_isa_3_0);
1198      /* Make FP a prerequisite for VMX (bogusly so), and for FX and GX. */
1199      if (have_V && !have_F)
1200         have_V = False;
1201      if (have_FX && !have_F)
1202         have_FX = False;
1203      if (have_GX && !have_F)
1204         have_GX = False;
1205 
1206      VG_(machine_ppc32_has_FP)  = have_F ? 1 : 0;
1207      VG_(machine_ppc32_has_VMX) = have_V ? 1 : 0;
1208 
1209      va = VexArchPPC32;
1210      vai.endness = VexEndnessBE;
1211 
1212      vai.hwcaps = 0;
1213      if (have_F)  vai.hwcaps |= VEX_HWCAPS_PPC32_F;
1214      if (have_V)  vai.hwcaps |= VEX_HWCAPS_PPC32_V;
1215      if (have_FX) vai.hwcaps |= VEX_HWCAPS_PPC32_FX;
1216      if (have_GX) vai.hwcaps |= VEX_HWCAPS_PPC32_GX;
1217      if (have_VX) vai.hwcaps |= VEX_HWCAPS_PPC32_VX;
1218      if (have_DFP) vai.hwcaps |= VEX_HWCAPS_PPC32_DFP;
1219      if (have_isa_2_07) vai.hwcaps |= VEX_HWCAPS_PPC32_ISA2_07;
1220      if (have_isa_3_0) vai.hwcaps |= VEX_HWCAPS_PPC32_ISA3_0;
1221 
1222      VG_(machine_get_cache_info)(&vai);
1223 
1224      /* But we're not done yet: VG_(machine_ppc32_set_clszB) must be
1225         called before we're ready to go. */
1226      return True;
1227    }
1228 
1229 #elif defined(VGA_ppc64be)|| defined(VGA_ppc64le)
1230    {
1231      /* Same instruction set detection algorithm as for ppc32. */
1232      vki_sigset_t          saved_set, tmp_set;
1233      vki_sigaction_fromK_t saved_sigill_act, saved_sigfpe_act;
1234      vki_sigaction_toK_t     tmp_sigill_act,   tmp_sigfpe_act;
1235 
1236      volatile Bool have_F, have_V, have_FX, have_GX, have_VX, have_DFP;
1237      volatile Bool have_isa_2_07, have_isa_3_0;
1238      Int r;
1239 
1240      /* This is a kludge.  Really we ought to back-convert saved_act
1241         into a toK_t using VG_(convert_sigaction_fromK_to_toK), but
1242         since that's a no-op on all ppc64 platforms so far supported,
1243         it's not worth the typing effort.  At least include most basic
1244         sanity check: */
1245      vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t));
1246 
1247      VG_(sigemptyset)(&tmp_set);
1248      VG_(sigaddset)(&tmp_set, VKI_SIGILL);
1249      VG_(sigaddset)(&tmp_set, VKI_SIGFPE);
1250 
1251      r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
1252      vg_assert(r == 0);
1253 
1254      r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
1255      vg_assert(r == 0);
1256      tmp_sigill_act = saved_sigill_act;
1257 
1258      VG_(sigaction)(VKI_SIGFPE, NULL, &saved_sigfpe_act);
1259      tmp_sigfpe_act = saved_sigfpe_act;
1260 
1261      /* NODEFER: signal handler does not return (from the kernel's point of
1262         view), hence if it is to successfully catch a signal more than once,
1263         we need the NODEFER flag. */
1264      tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
1265      tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
1266      tmp_sigill_act.sa_flags |=  VKI_SA_NODEFER;
1267      tmp_sigill_act.ksa_handler = handler_unsup_insn;
1268      VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
1269 
1270      tmp_sigfpe_act.sa_flags &= ~VKI_SA_RESETHAND;
1271      tmp_sigfpe_act.sa_flags &= ~VKI_SA_SIGINFO;
1272      tmp_sigfpe_act.sa_flags |=  VKI_SA_NODEFER;
1273      tmp_sigfpe_act.ksa_handler = handler_unsup_insn;
1274      VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL);
1275 
1276      /* standard FP insns */
1277      have_F = True;
1278      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1279         have_F = False;
1280      } else {
1281         __asm__ __volatile__("fmr 0,0");
1282      }
1283 
1284      /* Altivec insns */
1285      have_V = True;
1286      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1287         have_V = False;
1288      } else {
1289         __asm__ __volatile__(".long 0x10000484"); /*vor 0,0,0*/
1290      }
1291 
1292      /* General-Purpose optional (fsqrt, fsqrts) */
1293      have_FX = True;
1294      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1295         have_FX = False;
1296      } else {
1297         __asm__ __volatile__(".long 0xFC00002C"); /*fsqrt 0,0*/
1298      }
1299 
1300      /* Graphics optional (stfiwx, fres, frsqrte, fsel) */
1301      have_GX = True;
1302      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1303         have_GX = False;
1304      } else {
1305         __asm__ __volatile__(".long 0xFC000034"); /*frsqrte 0,0*/
1306      }
1307 
1308      /* VSX support implies Power ISA 2.06 */
1309      have_VX = True;
1310      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1311         have_VX = False;
1312      } else {
1313         __asm__ __volatile__(".long 0xf0000564"); /* xsabsdp XT,XB */
1314      }
1315 
1316      /* Check for Decimal Floating Point (DFP) support. */
1317      have_DFP = True;
1318      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1319         have_DFP = False;
1320      } else {
1321         __asm__ __volatile__(".long 0xee4e8005"); /* dadd  FRT,FRA, FRB */
1322      }
1323 
1324      /* Check for ISA 2.07 support. */
1325      have_isa_2_07 = True;
1326      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1327         have_isa_2_07 = False;
1328      } else {
1329         __asm__ __volatile__(".long 0x7c000166"); /* mtvsrd XT,RA */
1330      }
1331 
1332      /* Check for ISA 3.0 support. */
1333      have_isa_3_0 = True;
1334      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1335         have_isa_3_0 = False;
1336      } else {
1337         __asm__ __volatile__(".long  0x7d205434"); /* cnttzw RT, RB */
1338      }
1339 
1340      /* determine dcbz/dcbzl sizes while we still have the signal
1341       * handlers registered */
1342      find_ppc_dcbz_sz(&vai);
1343 
1344      VG_(sigaction)(VKI_SIGILL, &saved_sigill_act, NULL);
1345      VG_(sigaction)(VKI_SIGFPE, &saved_sigfpe_act, NULL);
1346      VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
1347      VG_(debugLog)(1, "machine", "F %d V %d FX %d GX %d VX %d DFP %d ISA2.07 %d ISA3.0 %d\n",
1348                     (Int)have_F, (Int)have_V, (Int)have_FX,
1349                     (Int)have_GX, (Int)have_VX, (Int)have_DFP,
1350                     (Int)have_isa_2_07, (int)have_isa_3_0);
1351      /* on ppc64be, if we don't even have FP, just give up. */
1352      if (!have_F)
1353         return False;
1354 
1355      VG_(machine_ppc64_has_VMX) = have_V ? 1 : 0;
1356 
1357      va = VexArchPPC64;
1358 #    if defined(VKI_LITTLE_ENDIAN)
1359      vai.endness = VexEndnessLE;
1360 #    elif defined(VKI_BIG_ENDIAN)
1361      vai.endness = VexEndnessBE;
1362 #    else
1363      vai.endness = VexEndness_INVALID;
1364 #    endif
1365 
1366      vai.hwcaps = 0;
1367      if (have_V)  vai.hwcaps |= VEX_HWCAPS_PPC64_V;
1368      if (have_FX) vai.hwcaps |= VEX_HWCAPS_PPC64_FX;
1369      if (have_GX) vai.hwcaps |= VEX_HWCAPS_PPC64_GX;
1370      if (have_VX) vai.hwcaps |= VEX_HWCAPS_PPC64_VX;
1371      if (have_DFP) vai.hwcaps |= VEX_HWCAPS_PPC64_DFP;
1372      if (have_isa_2_07) vai.hwcaps |= VEX_HWCAPS_PPC64_ISA2_07;
1373      if (have_isa_3_0) vai.hwcaps |= VEX_HWCAPS_PPC64_ISA3_0;
1374 
1375      VG_(machine_get_cache_info)(&vai);
1376 
1377      /* But we're not done yet: VG_(machine_ppc64_set_clszB) must be
1378         called before we're ready to go. */
1379      return True;
1380    }
1381 
1382 #elif defined(VGA_s390x)
1383 
1384 #  include "libvex_s390x_common.h"
1385 
1386    {
1387      /* Instruction set detection code borrowed from ppc above. */
1388      vki_sigset_t          saved_set, tmp_set;
1389      vki_sigaction_fromK_t saved_sigill_act;
1390      vki_sigaction_toK_t     tmp_sigill_act;
1391 
1392      volatile Bool have_LDISP, have_STFLE;
1393      Int i, r, model;
1394 
1395      /* If the model is "unknown" don't treat this as an error. Assume
1396         this is a brand-new machine model for which we don't have the
1397         identification yet. Keeping fingers crossed. */
1398      model = VG_(get_machine_model)();
1399 
1400      /* Unblock SIGILL and stash away the old action for that signal */
1401      VG_(sigemptyset)(&tmp_set);
1402      VG_(sigaddset)(&tmp_set, VKI_SIGILL);
1403 
1404      r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
1405      vg_assert(r == 0);
1406 
1407      r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
1408      vg_assert(r == 0);
1409      tmp_sigill_act = saved_sigill_act;
1410 
1411      /* NODEFER: signal handler does not return (from the kernel's point of
1412         view), hence if it is to successfully catch a signal more than once,
1413         we need the NODEFER flag. */
1414      tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
1415      tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
1416      tmp_sigill_act.sa_flags |=  VKI_SA_NODEFER;
1417      tmp_sigill_act.ksa_handler = handler_unsup_insn;
1418      VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
1419 
1420      /* Determine hwcaps. Note, we cannot use the stfle insn because it
1421         is not supported on z900. */
1422 
1423      have_LDISP = True;
1424      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1425         have_LDISP = False;
1426      } else {
1427        /* BASR loads the address of the next insn into r1. Needed to avoid
1428           a segfault in XY. */
1429         __asm__ __volatile__("basr %%r1,%%r0\n\t"
1430                              ".long  0xe3001000\n\t"  /* XY  0,0(%r1) */
1431                              ".short 0x0057" : : : "r0", "r1", "cc", "memory");
1432      }
1433 
1434      /* Check availability of STFLE. If available store facility bits
1435         in hoststfle. */
1436      ULong hoststfle[S390_NUM_FACILITY_DW];
1437 
1438      for (i = 0; i < S390_NUM_FACILITY_DW; ++i)
1439         hoststfle[i] = 0;
1440 
1441      have_STFLE = True;
1442      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1443         have_STFLE = False;
1444      } else {
1445          register ULong reg0 asm("0") = S390_NUM_FACILITY_DW - 1;
1446 
1447          __asm__ __volatile__(" .insn s,0xb2b00000,%0\n"   /* stfle */
1448                               : "=m" (hoststfle), "+d"(reg0)
1449                               : : "cc", "memory");
1450      }
1451 
1452      /* Restore signals */
1453      r = VG_(sigaction)(VKI_SIGILL, &saved_sigill_act, NULL);
1454      vg_assert(r == 0);
1455      r = VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
1456      vg_assert(r == 0);
1457      va = VexArchS390X;
1458      vai.endness = VexEndnessBE;
1459 
1460      vai.hwcaps = model;
1461      if (have_STFLE) vai.hwcaps |= VEX_HWCAPS_S390X_STFLE;
1462      if (have_LDISP) {
1463         /* Use long displacement only on machines >= z990. For all other
1464            machines it is millicoded and therefore slow. */
1465         if (model >= VEX_S390X_MODEL_Z990)
1466            vai.hwcaps |= VEX_HWCAPS_S390X_LDISP;
1467      }
1468 
1469      /* Detect presence of certain facilities using the STFLE insn.
1470         Note, that these facilities were introduced at the same time or later
1471         as STFLE, so the absence of STLFE implies the absence of the facility
1472         we're trying to detect. */
1473      struct fac_hwcaps_map {
1474         UInt installed;
1475         UInt facility_bit;
1476         UInt hwcaps_bit;
1477         const HChar name[6];   // may need adjustment for new facility names
1478      } fac_hwcaps[] = {
1479         { False, S390_FAC_EIMM,  VEX_HWCAPS_S390X_EIMM,  "EIMM"  },
1480         { False, S390_FAC_GIE,   VEX_HWCAPS_S390X_GIE,   "GIE"   },
1481         { False, S390_FAC_DFP,   VEX_HWCAPS_S390X_DFP,   "DFP"   },
1482         { False, S390_FAC_FPSE,  VEX_HWCAPS_S390X_FGX,   "FGX"   },
1483         { False, S390_FAC_ETF2,  VEX_HWCAPS_S390X_ETF2,  "ETF2"  },
1484         { False, S390_FAC_ETF3,  VEX_HWCAPS_S390X_ETF3,  "ETF3"  },
1485         { False, S390_FAC_STCKF, VEX_HWCAPS_S390X_STCKF, "STCKF" },
1486         { False, S390_FAC_FPEXT, VEX_HWCAPS_S390X_FPEXT, "FPEXT" },
1487         { False, S390_FAC_LSC,   VEX_HWCAPS_S390X_LSC,   "LSC"   },
1488         { False, S390_FAC_PFPO,  VEX_HWCAPS_S390X_PFPO,  "PFPO"  },
1489      };
1490 
1491      /* Set hwcaps according to the detected facilities */
1492      for (i=0; i < sizeof fac_hwcaps / sizeof fac_hwcaps[0]; ++i) {
1493         vg_assert(fac_hwcaps[i].facility_bit <= 63);  // for now
1494         if (hoststfle[0] & (1ULL << (63 - fac_hwcaps[i].facility_bit))) {
1495            fac_hwcaps[i].installed = True;
1496            vai.hwcaps |= fac_hwcaps[i].hwcaps_bit;
1497         }
1498      }
1499 
1500      /* Build up a string showing the probed-for facilities */
1501      HChar fac_str[(sizeof fac_hwcaps / sizeof fac_hwcaps[0]) *
1502                    (sizeof fac_hwcaps[0].name + 3) + //  %s %d
1503                    7 + 1 + 4 + 2  // machine %4d
1504                    + 1];  // \0
1505      HChar *p = fac_str;
1506      p += VG_(sprintf)(p, "machine %4d  ", model);
1507      for (i=0; i < sizeof fac_hwcaps / sizeof fac_hwcaps[0]; ++i) {
1508         p += VG_(sprintf)(p, " %s %1u", fac_hwcaps[i].name,
1509                           fac_hwcaps[i].installed);
1510      }
1511      *p++ = '\0';
1512 
1513      VG_(debugLog)(1, "machine", "%s\n", fac_str);
1514      VG_(debugLog)(1, "machine", "hwcaps = 0x%x\n", vai.hwcaps);
1515 
1516      VG_(machine_get_cache_info)(&vai);
1517 
1518      return True;
1519    }
1520 
1521 #elif defined(VGA_arm)
1522    {
1523      /* Same instruction set detection algorithm as for ppc32. */
1524      vki_sigset_t          saved_set, tmp_set;
1525      vki_sigaction_fromK_t saved_sigill_act, saved_sigfpe_act;
1526      vki_sigaction_toK_t     tmp_sigill_act,   tmp_sigfpe_act;
1527 
1528      volatile Bool have_VFP, have_VFP2, have_VFP3, have_NEON, have_V8;
1529      volatile Int archlevel;
1530      Int r;
1531 
1532      /* This is a kludge.  Really we ought to back-convert saved_act
1533         into a toK_t using VG_(convert_sigaction_fromK_to_toK), but
1534         since that's a no-op on all ppc64 platforms so far supported,
1535         it's not worth the typing effort.  At least include most basic
1536         sanity check: */
1537      vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t));
1538 
1539      VG_(sigemptyset)(&tmp_set);
1540      VG_(sigaddset)(&tmp_set, VKI_SIGILL);
1541      VG_(sigaddset)(&tmp_set, VKI_SIGFPE);
1542 
1543      r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
1544      vg_assert(r == 0);
1545 
1546      r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
1547      vg_assert(r == 0);
1548      tmp_sigill_act = saved_sigill_act;
1549 
1550      VG_(sigaction)(VKI_SIGFPE, NULL, &saved_sigfpe_act);
1551      tmp_sigfpe_act = saved_sigfpe_act;
1552 
1553      /* NODEFER: signal handler does not return (from the kernel's point of
1554         view), hence if it is to successfully catch a signal more than once,
1555         we need the NODEFER flag. */
1556      tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
1557      tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
1558      tmp_sigill_act.sa_flags |=  VKI_SA_NODEFER;
1559      tmp_sigill_act.ksa_handler = handler_unsup_insn;
1560      VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
1561 
1562      tmp_sigfpe_act.sa_flags &= ~VKI_SA_RESETHAND;
1563      tmp_sigfpe_act.sa_flags &= ~VKI_SA_SIGINFO;
1564      tmp_sigfpe_act.sa_flags |=  VKI_SA_NODEFER;
1565      tmp_sigfpe_act.ksa_handler = handler_unsup_insn;
1566      VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL);
1567 
1568      /* VFP insns */
1569      have_VFP = True;
1570      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1571         have_VFP = False;
1572      } else {
1573         __asm__ __volatile__(".word 0xEEB02B42"); /* VMOV.F64 d2, d2 */
1574      }
1575      /* There are several generation of VFP extension but they differs very
1576         little so for now we will not distinguish them. */
1577      have_VFP2 = have_VFP;
1578      have_VFP3 = have_VFP;
1579 
1580      /* NEON insns */
1581      have_NEON = True;
1582      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1583         have_NEON = False;
1584      } else {
1585         __asm__ __volatile__(".word 0xF2244154"); /* VMOV q2, q2 */
1586      }
1587 
1588      /* ARM architecture level */
1589      archlevel = 5; /* v5 will be base level */
1590      if (archlevel < 7) {
1591         archlevel = 7;
1592         if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1593            archlevel = 5;
1594         } else {
1595            __asm__ __volatile__(".word 0xF45FF000"); /* PLI [PC,#-0] */
1596         }
1597      }
1598      if (archlevel < 6) {
1599         archlevel = 6;
1600         if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1601            archlevel = 5;
1602         } else {
1603            __asm__ __volatile__(".word 0xE6822012"); /* PKHBT r2, r2, r2 */
1604         }
1605      }
1606 
1607      /* ARMv8 insns */
1608      have_V8 = True;
1609      if (archlevel == 7) {
1610         if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1611            have_V8 = False;
1612         } else {
1613            __asm__ __volatile__(".word 0xF3044F54"); /* VMAXNM.F32 q2,q2,q2 */
1614         }
1615         if (have_V8 && have_NEON && have_VFP3) {
1616            archlevel = 8;
1617         }
1618      }
1619 
1620      VG_(convert_sigaction_fromK_to_toK)(&saved_sigill_act, &tmp_sigill_act);
1621      VG_(convert_sigaction_fromK_to_toK)(&saved_sigfpe_act, &tmp_sigfpe_act);
1622      VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
1623      VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL);
1624      VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
1625 
1626      VG_(debugLog)(1, "machine", "ARMv%d VFP %d VFP2 %d VFP3 %d NEON %d\n",
1627            archlevel, (Int)have_VFP, (Int)have_VFP2, (Int)have_VFP3,
1628            (Int)have_NEON);
1629 
1630      VG_(machine_arm_archlevel) = archlevel;
1631 
1632      va = VexArchARM;
1633      vai.endness = VexEndnessLE;
1634 
1635      vai.hwcaps = VEX_ARM_ARCHLEVEL(archlevel);
1636      if (have_VFP3) vai.hwcaps |= VEX_HWCAPS_ARM_VFP3;
1637      if (have_VFP2) vai.hwcaps |= VEX_HWCAPS_ARM_VFP2;
1638      if (have_VFP)  vai.hwcaps |= VEX_HWCAPS_ARM_VFP;
1639      if (have_NEON) vai.hwcaps |= VEX_HWCAPS_ARM_NEON;
1640 
1641      VG_(machine_get_cache_info)(&vai);
1642 
1643      return True;
1644    }
1645 
1646 #elif defined(VGA_arm64)
1647    {
1648      va = VexArchARM64;
1649      vai.endness = VexEndnessLE;
1650 
1651      /* So far there are no variants. */
1652      vai.hwcaps = 0;
1653 
1654      VG_(machine_get_cache_info)(&vai);
1655 
1656      /* Check whether we need to use the fallback LLSC implementation.
1657         If the check fails, give up. */
1658      if (! VG_(parse_cpuinfo)())
1659         return False;
1660 
1661      /* 0 denotes 'not set'.  The range of legitimate values here,
1662         after being set that is, is 2 though 17 inclusive. */
1663      vg_assert(vai.arm64_dMinLine_lg2_szB == 0);
1664      vg_assert(vai.arm64_iMinLine_lg2_szB == 0);
1665      ULong ctr_el0;
1666      __asm__ __volatile__("mrs %0, ctr_el0" : "=r"(ctr_el0));
1667      vai.arm64_dMinLine_lg2_szB = ((ctr_el0 >> 16) & 0xF) + 2;
1668      vai.arm64_iMinLine_lg2_szB = ((ctr_el0 >>  0) & 0xF) + 2;
1669      VG_(debugLog)(1, "machine", "ARM64: ctr_el0.dMinLine_szB = %d, "
1670                       "ctr_el0.iMinLine_szB = %d\n",
1671                    1 << vai.arm64_dMinLine_lg2_szB,
1672                    1 << vai.arm64_iMinLine_lg2_szB);
1673      VG_(debugLog)(1, "machine", "ARM64: requires_fallback_LLSC: %s\n",
1674                    vai.arm64_requires_fallback_LLSC ? "yes" : "no");
1675 
1676      return True;
1677    }
1678 
1679 #elif defined(VGA_mips32)
1680    {
1681      /* Define the position of F64 bit in FIR register. */
1682 #    define FP64 22
1683      va = VexArchMIPS32;
1684      if (!VG_(parse_cpuinfo)())
1685          return False;
1686 
1687 #    if defined(VKI_LITTLE_ENDIAN)
1688      vai.endness = VexEndnessLE;
1689 #    elif defined(VKI_BIG_ENDIAN)
1690      vai.endness = VexEndnessBE;
1691 #    else
1692      vai.endness = VexEndness_INVALID;
1693 #    endif
1694 
1695      /* Same instruction set detection algorithm as for ppc32/arm... */
1696      vki_sigset_t          saved_set, tmp_set;
1697      vki_sigaction_fromK_t saved_sigill_act;
1698      vki_sigaction_toK_t   tmp_sigill_act;
1699 
1700      volatile Bool have_DSP, have_DSPr2;
1701      Int r;
1702 
1703      vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t));
1704 
1705      VG_(sigemptyset)(&tmp_set);
1706      VG_(sigaddset)(&tmp_set, VKI_SIGILL);
1707 
1708      r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
1709      vg_assert(r == 0);
1710 
1711      r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
1712      vg_assert(r == 0);
1713      tmp_sigill_act = saved_sigill_act;
1714 
1715      /* NODEFER: signal handler does not return (from the kernel's point of
1716         view), hence if it is to successfully catch a signal more than once,
1717         we need the NODEFER flag. */
1718      tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
1719      tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
1720      tmp_sigill_act.sa_flags |=  VKI_SA_NODEFER;
1721      tmp_sigill_act.ksa_handler = handler_unsup_insn;
1722      VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
1723 
1724      if (VEX_PRID_COMP_MIPS == VEX_MIPS_COMP_ID(vai.hwcaps)) {
1725         /* DSPr2 instructions. */
1726         have_DSPr2 = True;
1727         if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1728            have_DSPr2 = False;
1729         } else {
1730            __asm__ __volatile__(".word 0x7d095351"); /* precr.qb.ph t2, t0, t1 */
1731         }
1732         if (have_DSPr2) {
1733            /* We assume it's 74K, since it can run DSPr2. */
1734            vai.hwcaps |= VEX_PRID_IMP_74K;
1735         } else {
1736            /* DSP instructions. */
1737            have_DSP = True;
1738            if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1739               have_DSP = False;
1740            } else {
1741               __asm__ __volatile__(".word 0x7c3f44b8"); /* rddsp t0, 0x3f */
1742            }
1743            if (have_DSP) {
1744               /* We assume it's 34K, since it has support for DSP. */
1745               vai.hwcaps |= VEX_PRID_IMP_34K;
1746            }
1747         }
1748      }
1749 
1750 #    if defined(VGP_mips32_linux)
1751      Int fpmode = VG_(prctl)(VKI_PR_GET_FP_MODE, 0, 0, 0, 0);
1752 #    else
1753      Int fpmode = -1;
1754 #    endif
1755 
1756      if (fpmode < 0) {
1757         /* prctl(PR_GET_FP_MODE) is not supported by Kernel,
1758            we are using alternative way to determine FP mode */
1759         ULong result = 0;
1760 
1761         if (!VG_MINIMAL_SETJMP(env_unsup_insn)) {
1762            __asm__ volatile (
1763               ".set push\n\t"
1764               ".set noreorder\n\t"
1765               ".set oddspreg\n\t"
1766               ".set hardfloat\n\t"
1767               "lui $t0, 0x3FF0\n\t"
1768               "ldc1 $f0, %0\n\t"
1769               "mtc1 $t0, $f1\n\t"
1770               "sdc1 $f0, %0\n\t"
1771               ".set pop\n\t"
1772               : "+m"(result)
1773               :
1774               : "t0", "$f0", "$f1", "memory");
1775 
1776            fpmode = (result != 0x3FF0000000000000ull);
1777         }
1778      }
1779 
1780      if (fpmode != 0)
1781         vai.hwcaps |= VEX_MIPS_HOST_FR;
1782 
1783      VG_(convert_sigaction_fromK_to_toK)(&saved_sigill_act, &tmp_sigill_act);
1784      VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
1785      VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
1786 
1787      VG_(debugLog)(1, "machine", "hwcaps = 0x%x\n", vai.hwcaps);
1788      VG_(machine_get_cache_info)(&vai);
1789 
1790      return True;
1791    }
1792 
1793 #elif defined(VGA_mips64)
1794    {
1795      va = VexArchMIPS64;
1796      if (!VG_(parse_cpuinfo)())
1797          return False;
1798 
1799 #    if defined(VKI_LITTLE_ENDIAN)
1800      vai.endness = VexEndnessLE;
1801 #    elif defined(VKI_BIG_ENDIAN)
1802      vai.endness = VexEndnessBE;
1803 #    else
1804      vai.endness = VexEndness_INVALID;
1805 #    endif
1806 
1807      vai.hwcaps |= VEX_MIPS_HOST_FR;
1808 
1809      VG_(machine_get_cache_info)(&vai);
1810 
1811      return True;
1812    }
1813 
1814 #else
1815 #  error "Unknown arch"
1816 #endif
1817 }
1818 
1819 /* Notify host cpu instruction cache line size. */
1820 #if defined(VGA_ppc32)
VG_(machine_ppc32_set_clszB)1821 void VG_(machine_ppc32_set_clszB)( Int szB )
1822 {
1823    vg_assert(hwcaps_done);
1824 
1825    /* Either the value must not have been set yet (zero) or we can
1826       tolerate it being set to the same value multiple times, as the
1827       stack scanning logic in m_main is a bit stupid. */
1828    vg_assert(vai.ppc_icache_line_szB == 0
1829              || vai.ppc_icache_line_szB == szB);
1830 
1831    vg_assert(szB == 16 || szB == 32 || szB == 64 || szB == 128);
1832    vai.ppc_icache_line_szB = szB;
1833 }
1834 #endif
1835 
1836 
1837 /* Notify host cpu instruction cache line size. */
1838 #if defined(VGA_ppc64be)|| defined(VGA_ppc64le)
VG_(machine_ppc64_set_clszB)1839 void VG_(machine_ppc64_set_clszB)( Int szB )
1840 {
1841    vg_assert(hwcaps_done);
1842 
1843    /* Either the value must not have been set yet (zero) or we can
1844       tolerate it being set to the same value multiple times, as the
1845       stack scanning logic in m_main is a bit stupid. */
1846    vg_assert(vai.ppc_icache_line_szB == 0
1847              || vai.ppc_icache_line_szB == szB);
1848 
1849    vg_assert(szB == 16 || szB == 32 || szB == 64 || szB == 128);
1850    vai.ppc_icache_line_szB = szB;
1851 }
1852 #endif
1853 
1854 
1855 /* Notify host's ability to handle NEON instructions. */
1856 #if defined(VGA_arm)
VG_(machine_arm_set_has_NEON)1857 void VG_(machine_arm_set_has_NEON)( Bool has_neon )
1858 {
1859    vg_assert(hwcaps_done);
1860    /* There's nothing else we can sanity check. */
1861 
1862    if (has_neon) {
1863       vai.hwcaps |= VEX_HWCAPS_ARM_NEON;
1864    } else {
1865       vai.hwcaps &= ~VEX_HWCAPS_ARM_NEON;
1866    }
1867 }
1868 #endif
1869 
1870 
1871 /* Fetch host cpu info, once established. */
VG_(machine_get_VexArchInfo)1872 void VG_(machine_get_VexArchInfo)( /*OUT*/VexArch* pVa,
1873                                    /*OUT*/VexArchInfo* pVai )
1874 {
1875    vg_assert(hwcaps_done);
1876    if (pVa)  *pVa  = va;
1877    if (pVai) *pVai = vai;
1878 }
1879 
1880 
1881 /* Returns the size of the largest guest register that we will
1882    simulate in this run.  This depends on both the guest architecture
1883    and on the specific capabilities we are simulating for that guest
1884    (eg, AVX or non-AVX ?, for amd64).  Should return either 4, 8, 16
1885    or 32.  General rule: if in doubt, return a value larger than
1886    reality.
1887 
1888    This information is needed by Cachegrind and Callgrind to decide
1889    what the minimum cache line size they are prepared to simulate is.
1890    Basically require that the minimum cache line size is at least as
1891    large as the largest register that might get transferred to/from
1892    memory, so as to guarantee that any such transaction can straddle
1893    at most 2 cache lines.
1894 */
VG_(machine_get_size_of_largest_guest_register)1895 Int VG_(machine_get_size_of_largest_guest_register) ( void )
1896 {
1897    vg_assert(hwcaps_done);
1898    /* Once hwcaps_done is True, we can fish around inside va/vai to
1899       find the information we need. */
1900 
1901 #  if defined(VGA_x86)
1902    vg_assert(va == VexArchX86);
1903    /* We don't support AVX, so 32 is out.  At the other end, even if
1904       we don't support any SSE, the X87 can generate 10 byte
1905       transfers, so let's say 16 to be on the safe side.  Hence the
1906       answer is always 16. */
1907    return 16;
1908 
1909 #  elif defined(VGA_amd64)
1910    /* if AVX then 32 else 16 */
1911    return (vai.hwcaps & VEX_HWCAPS_AMD64_AVX) ? 32 : 16;
1912 
1913 #  elif defined(VGA_ppc32)
1914    /* 8 if boring; 16 if signs of Altivec or other exotic stuff */
1915    if (vai.hwcaps & VEX_HWCAPS_PPC32_V) return 16;
1916    if (vai.hwcaps & VEX_HWCAPS_PPC32_VX) return 16;
1917    if (vai.hwcaps & VEX_HWCAPS_PPC32_DFP) return 16;
1918    return 8;
1919 
1920 #  elif defined(VGA_ppc64be) || defined(VGA_ppc64le)
1921    /* 8 if boring; 16 if signs of Altivec or other exotic stuff */
1922    if (vai.hwcaps & VEX_HWCAPS_PPC64_V) return 16;
1923    if (vai.hwcaps & VEX_HWCAPS_PPC64_VX) return 16;
1924    if (vai.hwcaps & VEX_HWCAPS_PPC64_DFP) return 16;
1925    return 8;
1926 
1927 #  elif defined(VGA_s390x)
1928    return 8;
1929 
1930 #  elif defined(VGA_arm)
1931    /* Really it depends whether or not we have NEON, but let's just
1932       assume we always do. */
1933    return 16;
1934 
1935 #  elif defined(VGA_arm64)
1936    /* ARM64 always has Neon, AFAICS. */
1937    return 16;
1938 
1939 #  elif defined(VGA_mips32)
1940    /* The guest state implies 4, but that can't really be true, can
1941       it? */
1942    return 8;
1943 
1944 #  elif defined(VGA_mips64)
1945    return 8;
1946 
1947 #  else
1948 #    error "Unknown arch"
1949 #  endif
1950 }
1951 
1952 
1953 // Given a pointer to a function as obtained by "& functionname" in C,
1954 // produce a pointer to the actual entry point for the function.
VG_(fnptr_to_fnentry)1955 void* VG_(fnptr_to_fnentry)( void* f )
1956 {
1957 #  if defined(VGP_x86_linux) || defined(VGP_amd64_linux)  \
1958       || defined(VGP_arm_linux) || defined(VGO_darwin)          \
1959       || defined(VGP_ppc32_linux) || defined(VGP_ppc64le_linux) \
1960       || defined(VGP_s390x_linux) || defined(VGP_mips32_linux) \
1961       || defined(VGP_mips64_linux) || defined(VGP_arm64_linux) \
1962       || defined(VGP_x86_solaris) || defined(VGP_amd64_solaris)
1963    return f;
1964 #  elif defined(VGP_ppc64be_linux)
1965    /* ppc64-linux uses the AIX scheme, in which f is a pointer to a
1966       3-word function descriptor, of which the first word is the entry
1967       address. */
1968    UWord* descr = (UWord*)f;
1969    return (void*)(descr[0]);
1970 #  else
1971 #    error "Unknown platform"
1972 #  endif
1973 }
1974 
1975 /*--------------------------------------------------------------------*/
1976 /*--- end                                                          ---*/
1977 /*--------------------------------------------------------------------*/
1978