• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*--------------------------------------------------------------------*/
2 /*--- Machine-related stuff.                           m_machine.c ---*/
3 /*--------------------------------------------------------------------*/
4 
5 /*
6    This file is part of Valgrind, a dynamic binary instrumentation
7    framework.
8 
9    Copyright (C) 2000-2012 Julian Seward
10       jseward@acm.org
11 
12    This program is free software; you can redistribute it and/or
13    modify it under the terms of the GNU General Public License as
14    published by the Free Software Foundation; either version 2 of the
15    License, or (at your option) any later version.
16 
17    This program is distributed in the hope that it will be useful, but
18    WITHOUT ANY WARRANTY; without even the implied warranty of
19    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20    General Public License for more details.
21 
22    You should have received a copy of the GNU General Public License
23    along with this program; if not, write to the Free Software
24    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
25    02111-1307, USA.
26 
27    The GNU General Public License is contained in the file COPYING.
28 */
29 
30 #include "pub_core_basics.h"
31 #include "pub_core_vki.h"
32 #include "pub_core_libcsetjmp.h"   // setjmp facilities
33 #include "pub_core_threadstate.h"
34 #include "pub_core_libcassert.h"
35 #include "pub_core_libcbase.h"
36 #include "pub_core_libcfile.h"
37 #include "pub_core_mallocfree.h"
38 #include "pub_core_machine.h"
39 #include "pub_core_cpuid.h"
40 #include "pub_core_libcsignal.h"   // for ppc32 messing with SIGILL and SIGFPE
41 #include "pub_core_debuglog.h"
42 
43 
44 #define INSTR_PTR(regs)    ((regs).vex.VG_INSTR_PTR)
45 #define STACK_PTR(regs)    ((regs).vex.VG_STACK_PTR)
46 #define FRAME_PTR(regs)    ((regs).vex.VG_FRAME_PTR)
47 
VG_(get_IP)48 Addr VG_(get_IP) ( ThreadId tid ) {
49    return INSTR_PTR( VG_(threads)[tid].arch );
50 }
VG_(get_SP)51 Addr VG_(get_SP) ( ThreadId tid ) {
52    return STACK_PTR( VG_(threads)[tid].arch );
53 }
VG_(get_FP)54 Addr VG_(get_FP) ( ThreadId tid ) {
55    return FRAME_PTR( VG_(threads)[tid].arch );
56 }
57 
VG_(set_IP)58 void VG_(set_IP) ( ThreadId tid, Addr ip ) {
59    INSTR_PTR( VG_(threads)[tid].arch ) = ip;
60 }
VG_(set_SP)61 void VG_(set_SP) ( ThreadId tid, Addr sp ) {
62    STACK_PTR( VG_(threads)[tid].arch ) = sp;
63 }
64 
VG_(get_UnwindStartRegs)65 void VG_(get_UnwindStartRegs) ( /*OUT*/UnwindStartRegs* regs,
66                                 ThreadId tid )
67 {
68 #  if defined(VGA_x86)
69    regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_EIP;
70    regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_ESP;
71    regs->misc.X86.r_ebp
72       = VG_(threads)[tid].arch.vex.guest_EBP;
73 #  elif defined(VGA_amd64)
74    regs->r_pc = VG_(threads)[tid].arch.vex.guest_RIP;
75    regs->r_sp = VG_(threads)[tid].arch.vex.guest_RSP;
76    regs->misc.AMD64.r_rbp
77       = VG_(threads)[tid].arch.vex.guest_RBP;
78 #  elif defined(VGA_ppc32)
79    regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_CIA;
80    regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_GPR1;
81    regs->misc.PPC32.r_lr
82       = VG_(threads)[tid].arch.vex.guest_LR;
83 #  elif defined(VGA_ppc64)
84    regs->r_pc = VG_(threads)[tid].arch.vex.guest_CIA;
85    regs->r_sp = VG_(threads)[tid].arch.vex.guest_GPR1;
86    regs->misc.PPC64.r_lr
87       = VG_(threads)[tid].arch.vex.guest_LR;
88 #  elif defined(VGA_arm)
89    regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_R15T;
90    regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_R13;
91    regs->misc.ARM.r14
92       = VG_(threads)[tid].arch.vex.guest_R14;
93    regs->misc.ARM.r12
94       = VG_(threads)[tid].arch.vex.guest_R12;
95    regs->misc.ARM.r11
96       = VG_(threads)[tid].arch.vex.guest_R11;
97    regs->misc.ARM.r7
98       = VG_(threads)[tid].arch.vex.guest_R7;
99 #  elif defined(VGA_s390x)
100    regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_IA;
101    regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_SP;
102    regs->misc.S390X.r_fp
103       = VG_(threads)[tid].arch.vex.guest_r11;
104    regs->misc.S390X.r_lr
105       = VG_(threads)[tid].arch.vex.guest_r14;
106 #  elif defined(VGA_mips32)
107    regs->r_pc = VG_(threads)[tid].arch.vex.guest_PC;
108    regs->r_sp = VG_(threads)[tid].arch.vex.guest_r29;
109    regs->misc.MIPS32.r30
110       = VG_(threads)[tid].arch.vex.guest_r30;
111    regs->misc.MIPS32.r31
112       = VG_(threads)[tid].arch.vex.guest_r31;
113    regs->misc.MIPS32.r28
114       = VG_(threads)[tid].arch.vex.guest_r28;
115 #  else
116 #    error "Unknown arch"
117 #  endif
118 }
119 
120 
VG_(set_syscall_return_shadows)121 void VG_(set_syscall_return_shadows) ( ThreadId tid,
122                                        /* shadow vals for the result */
123                                        UWord s1res, UWord s2res,
124                                        /* shadow vals for the error val */
125                                        UWord s1err, UWord s2err )
126 {
127 #  if defined(VGP_x86_linux)
128    VG_(threads)[tid].arch.vex_shadow1.guest_EAX = s1res;
129    VG_(threads)[tid].arch.vex_shadow2.guest_EAX = s2res;
130 #  elif defined(VGP_amd64_linux)
131    VG_(threads)[tid].arch.vex_shadow1.guest_RAX = s1res;
132    VG_(threads)[tid].arch.vex_shadow2.guest_RAX = s2res;
133 #  elif defined(VGP_ppc32_linux) || defined(VGP_ppc64_linux)
134    VG_(threads)[tid].arch.vex_shadow1.guest_GPR3 = s1res;
135    VG_(threads)[tid].arch.vex_shadow2.guest_GPR3 = s2res;
136 #  elif defined(VGP_arm_linux)
137    VG_(threads)[tid].arch.vex_shadow1.guest_R0 = s1res;
138    VG_(threads)[tid].arch.vex_shadow2.guest_R0 = s2res;
139 #  elif defined(VGO_darwin)
140    // GrP fixme darwin syscalls may return more values (2 registers plus error)
141 #  elif defined(VGP_s390x_linux)
142    VG_(threads)[tid].arch.vex_shadow1.guest_r2 = s1res;
143    VG_(threads)[tid].arch.vex_shadow2.guest_r2 = s2res;
144 #  elif defined(VGP_mips32_linux)
145    VG_(threads)[tid].arch.vex_shadow1.guest_r2 = s1res;
146    VG_(threads)[tid].arch.vex_shadow2.guest_r2 = s2res;
147 #  else
148 #    error "Unknown plat"
149 #  endif
150 }
151 
152 void
VG_(get_shadow_regs_area)153 VG_(get_shadow_regs_area) ( ThreadId tid,
154                             /*DST*/UChar* dst,
155                             /*SRC*/Int shadowNo, PtrdiffT offset, SizeT size )
156 {
157    void*        src;
158    ThreadState* tst;
159    vg_assert(shadowNo == 0 || shadowNo == 1 || shadowNo == 2);
160    vg_assert(VG_(is_valid_tid)(tid));
161    // Bounds check
162    vg_assert(0 <= offset && offset < sizeof(VexGuestArchState));
163    vg_assert(offset + size <= sizeof(VexGuestArchState));
164    // Copy
165    tst = & VG_(threads)[tid];
166    src = NULL;
167    switch (shadowNo) {
168       case 0: src = (void*)(((Addr)&(tst->arch.vex)) + offset); break;
169       case 1: src = (void*)(((Addr)&(tst->arch.vex_shadow1)) + offset); break;
170       case 2: src = (void*)(((Addr)&(tst->arch.vex_shadow2)) + offset); break;
171    }
172    tl_assert(src != NULL);
173    VG_(memcpy)( dst, src, size);
174 }
175 
176 void
VG_(set_shadow_regs_area)177 VG_(set_shadow_regs_area) ( ThreadId tid,
178                             /*DST*/Int shadowNo, PtrdiffT offset, SizeT size,
179                             /*SRC*/const UChar* src )
180 {
181    void*        dst;
182    ThreadState* tst;
183    vg_assert(shadowNo == 0 || shadowNo == 1 || shadowNo == 2);
184    vg_assert(VG_(is_valid_tid)(tid));
185    // Bounds check
186    vg_assert(0 <= offset && offset < sizeof(VexGuestArchState));
187    vg_assert(offset + size <= sizeof(VexGuestArchState));
188    // Copy
189    tst = & VG_(threads)[tid];
190    dst = NULL;
191    switch (shadowNo) {
192       case 0: dst = (void*)(((Addr)&(tst->arch.vex)) + offset); break;
193       case 1: dst = (void*)(((Addr)&(tst->arch.vex_shadow1)) + offset); break;
194       case 2: dst = (void*)(((Addr)&(tst->arch.vex_shadow2)) + offset); break;
195    }
196    tl_assert(dst != NULL);
197    VG_(memcpy)( dst, src, size);
198 }
199 
200 
apply_to_GPs_of_tid(ThreadId tid,void (* f)(ThreadId,HChar *,Addr))201 static void apply_to_GPs_of_tid(ThreadId tid, void (*f)(ThreadId, HChar*, Addr))
202 {
203    VexGuestArchState* vex = &(VG_(get_ThreadState)(tid)->arch.vex);
204 #if defined(VGA_x86)
205    (*f)(tid, "EAX", vex->guest_EAX);
206    (*f)(tid, "ECX", vex->guest_ECX);
207    (*f)(tid, "EDX", vex->guest_EDX);
208    (*f)(tid, "EBX", vex->guest_EBX);
209    (*f)(tid, "ESI", vex->guest_ESI);
210    (*f)(tid, "EDI", vex->guest_EDI);
211    (*f)(tid, "ESP", vex->guest_ESP);
212    (*f)(tid, "EBP", vex->guest_EBP);
213 #elif defined(VGA_amd64)
214    (*f)(tid, "RAX", vex->guest_RAX);
215    (*f)(tid, "RCX", vex->guest_RCX);
216    (*f)(tid, "RDX", vex->guest_RDX);
217    (*f)(tid, "RBX", vex->guest_RBX);
218    (*f)(tid, "RSI", vex->guest_RSI);
219    (*f)(tid, "RDI", vex->guest_RDI);
220    (*f)(tid, "RSP", vex->guest_RSP);
221    (*f)(tid, "RBP", vex->guest_RBP);
222    (*f)(tid, "R8" , vex->guest_R8 );
223    (*f)(tid, "R9" , vex->guest_R9 );
224    (*f)(tid, "R10", vex->guest_R10);
225    (*f)(tid, "R11", vex->guest_R11);
226    (*f)(tid, "R12", vex->guest_R12);
227    (*f)(tid, "R13", vex->guest_R13);
228    (*f)(tid, "R14", vex->guest_R14);
229    (*f)(tid, "R15", vex->guest_R15);
230 #elif defined(VGA_ppc32) || defined(VGA_ppc64)
231    (*f)(tid, "GPR0" , vex->guest_GPR0 );
232    (*f)(tid, "GPR1" , vex->guest_GPR1 );
233    (*f)(tid, "GPR2" , vex->guest_GPR2 );
234    (*f)(tid, "GPR3" , vex->guest_GPR3 );
235    (*f)(tid, "GPR4" , vex->guest_GPR4 );
236    (*f)(tid, "GPR5" , vex->guest_GPR5 );
237    (*f)(tid, "GPR6" , vex->guest_GPR6 );
238    (*f)(tid, "GPR7" , vex->guest_GPR7 );
239    (*f)(tid, "GPR8" , vex->guest_GPR8 );
240    (*f)(tid, "GPR9" , vex->guest_GPR9 );
241    (*f)(tid, "GPR10", vex->guest_GPR10);
242    (*f)(tid, "GPR11", vex->guest_GPR11);
243    (*f)(tid, "GPR12", vex->guest_GPR12);
244    (*f)(tid, "GPR13", vex->guest_GPR13);
245    (*f)(tid, "GPR14", vex->guest_GPR14);
246    (*f)(tid, "GPR15", vex->guest_GPR15);
247    (*f)(tid, "GPR16", vex->guest_GPR16);
248    (*f)(tid, "GPR17", vex->guest_GPR17);
249    (*f)(tid, "GPR18", vex->guest_GPR18);
250    (*f)(tid, "GPR19", vex->guest_GPR19);
251    (*f)(tid, "GPR20", vex->guest_GPR20);
252    (*f)(tid, "GPR21", vex->guest_GPR21);
253    (*f)(tid, "GPR22", vex->guest_GPR22);
254    (*f)(tid, "GPR23", vex->guest_GPR23);
255    (*f)(tid, "GPR24", vex->guest_GPR24);
256    (*f)(tid, "GPR25", vex->guest_GPR25);
257    (*f)(tid, "GPR26", vex->guest_GPR26);
258    (*f)(tid, "GPR27", vex->guest_GPR27);
259    (*f)(tid, "GPR28", vex->guest_GPR28);
260    (*f)(tid, "GPR29", vex->guest_GPR29);
261    (*f)(tid, "GPR30", vex->guest_GPR30);
262    (*f)(tid, "GPR31", vex->guest_GPR31);
263    (*f)(tid, "CTR"  , vex->guest_CTR  );
264    (*f)(tid, "LR"   , vex->guest_LR   );
265 #elif defined(VGA_arm)
266    (*f)(tid, "R0" , vex->guest_R0 );
267    (*f)(tid, "R1" , vex->guest_R1 );
268    (*f)(tid, "R2" , vex->guest_R2 );
269    (*f)(tid, "R3" , vex->guest_R3 );
270    (*f)(tid, "R4" , vex->guest_R4 );
271    (*f)(tid, "R5" , vex->guest_R5 );
272    (*f)(tid, "R6" , vex->guest_R6 );
273    (*f)(tid, "R8" , vex->guest_R8 );
274    (*f)(tid, "R9" , vex->guest_R9 );
275    (*f)(tid, "R10", vex->guest_R10);
276    (*f)(tid, "R11", vex->guest_R11);
277    (*f)(tid, "R12", vex->guest_R12);
278    (*f)(tid, "R13", vex->guest_R13);
279    (*f)(tid, "R14", vex->guest_R14);
280 #elif defined(VGA_s390x)
281    (*f)(tid, "r0" , vex->guest_r0 );
282    (*f)(tid, "r1" , vex->guest_r1 );
283    (*f)(tid, "r2" , vex->guest_r2 );
284    (*f)(tid, "r3" , vex->guest_r3 );
285    (*f)(tid, "r4" , vex->guest_r4 );
286    (*f)(tid, "r5" , vex->guest_r5 );
287    (*f)(tid, "r6" , vex->guest_r6 );
288    (*f)(tid, "r7" , vex->guest_r7 );
289    (*f)(tid, "r8" , vex->guest_r8 );
290    (*f)(tid, "r9" , vex->guest_r9 );
291    (*f)(tid, "r10", vex->guest_r10);
292    (*f)(tid, "r11", vex->guest_r11);
293    (*f)(tid, "r12", vex->guest_r12);
294    (*f)(tid, "r13", vex->guest_r13);
295    (*f)(tid, "r14", vex->guest_r14);
296    (*f)(tid, "r15", vex->guest_r15);
297 #elif defined(VGA_mips32)
298    (*f)(tid, "r0" , vex->guest_r0 );
299    (*f)(tid, "r1" , vex->guest_r1 );
300    (*f)(tid, "r2" , vex->guest_r2 );
301    (*f)(tid, "r3" , vex->guest_r3 );
302    (*f)(tid, "r4" , vex->guest_r4 );
303    (*f)(tid, "r5" , vex->guest_r5 );
304    (*f)(tid, "r6" , vex->guest_r6 );
305    (*f)(tid, "r7" , vex->guest_r7 );
306    (*f)(tid, "r8" , vex->guest_r8 );
307    (*f)(tid, "r9" , vex->guest_r9 );
308    (*f)(tid, "r10", vex->guest_r10);
309    (*f)(tid, "r11", vex->guest_r11);
310    (*f)(tid, "r12", vex->guest_r12);
311    (*f)(tid, "r13", vex->guest_r13);
312    (*f)(tid, "r14", vex->guest_r14);
313    (*f)(tid, "r15", vex->guest_r15);
314    (*f)(tid, "r16", vex->guest_r16);
315    (*f)(tid, "r17", vex->guest_r17);
316    (*f)(tid, "r18", vex->guest_r18);
317    (*f)(tid, "r19", vex->guest_r19);
318    (*f)(tid, "r20", vex->guest_r20);
319    (*f)(tid, "r21", vex->guest_r21);
320    (*f)(tid, "r22", vex->guest_r22);
321    (*f)(tid, "r23", vex->guest_r23);
322    (*f)(tid, "r24", vex->guest_r24);
323    (*f)(tid, "r25", vex->guest_r25);
324    (*f)(tid, "r26", vex->guest_r26);
325    (*f)(tid, "r27", vex->guest_r27);
326    (*f)(tid, "r28", vex->guest_r28);
327    (*f)(tid, "r29", vex->guest_r29);
328    (*f)(tid, "r30", vex->guest_r30);
329    (*f)(tid, "r31", vex->guest_r31);
330 #else
331 #  error Unknown arch
332 #endif
333 }
334 
335 
VG_(apply_to_GP_regs)336 void VG_(apply_to_GP_regs)(void (*f)(ThreadId, HChar*, UWord))
337 {
338    ThreadId tid;
339 
340    for (tid = 1; tid < VG_N_THREADS; tid++) {
341       if (VG_(is_valid_tid)(tid)) {
342          apply_to_GPs_of_tid(tid, f);
343       }
344    }
345 }
346 
VG_(thread_stack_reset_iter)347 void VG_(thread_stack_reset_iter)(/*OUT*/ThreadId* tid)
348 {
349    *tid = (ThreadId)(-1);
350 }
351 
VG_(thread_stack_next)352 Bool VG_(thread_stack_next)(/*MOD*/ThreadId* tid,
353                             /*OUT*/Addr* stack_min,
354                             /*OUT*/Addr* stack_max)
355 {
356    ThreadId i;
357    for (i = (*tid)+1; i < VG_N_THREADS; i++) {
358       if (i == VG_INVALID_THREADID)
359          continue;
360       if (VG_(threads)[i].status != VgTs_Empty) {
361          *tid       = i;
362          *stack_min = VG_(get_SP)(i);
363          *stack_max = VG_(threads)[i].client_stack_highest_word;
364          return True;
365       }
366    }
367    return False;
368 }
369 
VG_(thread_get_stack_max)370 Addr VG_(thread_get_stack_max)(ThreadId tid)
371 {
372    vg_assert(0 <= tid && tid < VG_N_THREADS && tid != VG_INVALID_THREADID);
373    vg_assert(VG_(threads)[tid].status != VgTs_Empty);
374    return VG_(threads)[tid].client_stack_highest_word;
375 }
376 
VG_(thread_get_stack_size)377 SizeT VG_(thread_get_stack_size)(ThreadId tid)
378 {
379    vg_assert(0 <= tid && tid < VG_N_THREADS && tid != VG_INVALID_THREADID);
380    vg_assert(VG_(threads)[tid].status != VgTs_Empty);
381    return VG_(threads)[tid].client_stack_szB;
382 }
383 
VG_(thread_get_altstack_min)384 Addr VG_(thread_get_altstack_min)(ThreadId tid)
385 {
386    vg_assert(0 <= tid && tid < VG_N_THREADS && tid != VG_INVALID_THREADID);
387    vg_assert(VG_(threads)[tid].status != VgTs_Empty);
388    return (Addr)VG_(threads)[tid].altstack.ss_sp;
389 }
390 
VG_(thread_get_altstack_size)391 SizeT VG_(thread_get_altstack_size)(ThreadId tid)
392 {
393    vg_assert(0 <= tid && tid < VG_N_THREADS && tid != VG_INVALID_THREADID);
394    vg_assert(VG_(threads)[tid].status != VgTs_Empty);
395    return VG_(threads)[tid].altstack.ss_size;
396 }
397 
398 //-------------------------------------------------------------
399 /* Details about the capabilities of the underlying (host) CPU.  These
400    details are acquired by (1) enquiring with the CPU at startup, or
401    (2) from the AT_SYSINFO entries the kernel gave us (ppc32 cache
402    line size).  It's a bit nasty in the sense that there's no obvious
403    way to stop uses of some of this info before it's ready to go.
404    See pub_core_machine.h for more information about that.
405 
406    VG_(machine_get_hwcaps) may use signals (although it attempts to
407    leave signal state unchanged) and therefore should only be
408    called before m_main sets up the client's signal state.
409 */
410 
411 /* --------- State --------- */
412 static Bool hwcaps_done = False;
413 
414 /* --- all archs --- */
415 static VexArch     va = VexArch_INVALID;
416 static VexArchInfo vai;
417 
418 #if defined(VGA_x86)
419 UInt VG_(machine_x86_have_mxcsr) = 0;
420 #endif
421 #if defined(VGA_ppc32)
422 UInt VG_(machine_ppc32_has_FP)  = 0;
423 UInt VG_(machine_ppc32_has_VMX) = 0;
424 #endif
425 #if defined(VGA_ppc64)
426 ULong VG_(machine_ppc64_has_VMX) = 0;
427 #endif
428 #if defined(VGA_arm)
429 Int VG_(machine_arm_archlevel) = 4;
430 #endif
431 
432 /* fixs390: anything for s390x here ? */
433 
434 /* For hwcaps detection on ppc32/64, s390x, and arm we'll need to do SIGILL
435    testing, so we need a VG_MINIMAL_JMP_BUF. */
436 #if defined(VGA_ppc32) || defined(VGA_ppc64) \
437     || defined(VGA_arm) || defined(VGA_s390x)
438 #include "pub_tool_libcsetjmp.h"
439 static VG_MINIMAL_JMP_BUF(env_unsup_insn);
handler_unsup_insn(Int x)440 static void handler_unsup_insn ( Int x ) {
441    VG_MINIMAL_LONGJMP(env_unsup_insn);
442 }
443 #endif
444 
445 
446 /* Helper function for VG_(machine_get_hwcaps), assumes the SIGILL/etc
447  * handlers are installed.  Determines the the sizes affected by dcbz
448  * and dcbzl instructions and updates the given VexArchInfo structure
449  * accordingly.
450  *
451  * Not very defensive: assumes that as long as the dcbz/dcbzl
452  * instructions don't raise a SIGILL, that they will zero an aligned,
453  * contiguous block of memory of a sensible size. */
454 #if defined(VGA_ppc32) || defined(VGA_ppc64)
find_ppc_dcbz_sz(VexArchInfo * arch_info)455 static void find_ppc_dcbz_sz(VexArchInfo *arch_info)
456 {
457    Int dcbz_szB = 0;
458    Int dcbzl_szB;
459 #  define MAX_DCBZL_SZB (128) /* largest known effect of dcbzl */
460    char test_block[4*MAX_DCBZL_SZB];
461    char *aligned = test_block;
462    Int i;
463 
464    /* round up to next max block size, assumes MAX_DCBZL_SZB is pof2 */
465    aligned = (char *)(((HWord)aligned + MAX_DCBZL_SZB) & ~(MAX_DCBZL_SZB - 1));
466    vg_assert((aligned + MAX_DCBZL_SZB) <= &test_block[sizeof(test_block)]);
467 
468    /* dcbz often clears 32B, although sometimes whatever the native cache
469     * block size is */
470    VG_(memset)(test_block, 0xff, sizeof(test_block));
471    __asm__ __volatile__("dcbz 0,%0"
472                         : /*out*/
473                         : "r" (aligned) /*in*/
474                         : "memory" /*clobber*/);
475    for (dcbz_szB = 0, i = 0; i < sizeof(test_block); ++i) {
476       if (!test_block[i])
477          ++dcbz_szB;
478    }
479    vg_assert(dcbz_szB == 32 || dcbz_szB == 64 || dcbz_szB == 128);
480 
481    /* dcbzl clears 128B on G5/PPC970, and usually 32B on other platforms */
482    if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
483       dcbzl_szB = 0; /* indicates unsupported */
484    }
485    else {
486       VG_(memset)(test_block, 0xff, sizeof(test_block));
487       /* some older assemblers won't understand the dcbzl instruction
488        * variant, so we directly emit the instruction ourselves */
489       __asm__ __volatile__("mr 9, %0 ; .long 0x7C204FEC" /*dcbzl 0,9*/
490                            : /*out*/
491                            : "r" (aligned) /*in*/
492                            : "memory", "r9" /*clobber*/);
493       for (dcbzl_szB = 0, i = 0; i < sizeof(test_block); ++i) {
494          if (!test_block[i])
495             ++dcbzl_szB;
496       }
497       vg_assert(dcbzl_szB == 32 || dcbzl_szB == 64 || dcbzl_szB == 128);
498    }
499 
500    arch_info->ppc_dcbz_szB  = dcbz_szB;
501    arch_info->ppc_dcbzl_szB = dcbzl_szB;
502 
503    VG_(debugLog)(1, "machine", "dcbz_szB=%d dcbzl_szB=%d\n",
504                  dcbz_szB, dcbzl_szB);
505 #  undef MAX_DCBZL_SZB
506 }
507 #endif /* defined(VGA_ppc32) || defined(VGA_ppc64) */
508 
509 #ifdef VGA_s390x
510 
511 /* Read /proc/cpuinfo. Look for lines like these
512 
513    processor 0: version = FF,  identification = 0117C9,  machine = 2064
514 
515    and return the machine model. If the machine model could not be determined
516    or it is an unknown model, return VEX_S390X_MODEL_UNKNOWN. */
517 
VG_(get_machine_model)518 static UInt VG_(get_machine_model)(void)
519 {
520    static struct model_map {
521       HChar name[5];
522       UInt  id;
523    } model_map[] = {
524       { "2064", VEX_S390X_MODEL_Z900 },
525       { "2066", VEX_S390X_MODEL_Z800 },
526       { "2084", VEX_S390X_MODEL_Z990 },
527       { "2086", VEX_S390X_MODEL_Z890 },
528       { "2094", VEX_S390X_MODEL_Z9_EC },
529       { "2096", VEX_S390X_MODEL_Z9_BC },
530       { "2097", VEX_S390X_MODEL_Z10_EC },
531       { "2098", VEX_S390X_MODEL_Z10_BC },
532       { "2817", VEX_S390X_MODEL_Z196 },
533       { "2818", VEX_S390X_MODEL_Z114 },
534    };
535 
536    Int    model, n, fh;
537    SysRes fd;
538    SizeT  num_bytes, file_buf_size;
539    HChar *p, *m, *model_name, *file_buf;
540 
541    /* Slurp contents of /proc/cpuinfo into FILE_BUF */
542    fd = VG_(open)( "/proc/cpuinfo", 0, VKI_S_IRUSR );
543    if ( sr_isError(fd) ) return VEX_S390X_MODEL_UNKNOWN;
544 
545    fh  = sr_Res(fd);
546 
547    /* Determine the size of /proc/cpuinfo.
548       Work around broken-ness in /proc file system implementation.
549       fstat returns a zero size for /proc/cpuinfo although it is
550       claimed to be a regular file. */
551    num_bytes = 0;
552    file_buf_size = 1000;
553    file_buf = VG_(malloc)("cpuinfo", file_buf_size + 1);
554    while (42) {
555       n = VG_(read)(fh, file_buf, file_buf_size);
556       if (n < 0) break;
557 
558       num_bytes += n;
559       if (n < file_buf_size) break;  /* reached EOF */
560    }
561 
562    if (n < 0) num_bytes = 0;   /* read error; ignore contents */
563 
564    if (num_bytes > file_buf_size) {
565       VG_(free)( file_buf );
566       VG_(lseek)( fh, 0, VKI_SEEK_SET );
567       file_buf = VG_(malloc)( "cpuinfo", num_bytes + 1 );
568       n = VG_(read)( fh, file_buf, num_bytes );
569       if (n < 0) num_bytes = 0;
570    }
571 
572    file_buf[num_bytes] = '\0';
573    VG_(close)(fh);
574 
575    /* Parse file */
576    model = VEX_S390X_MODEL_UNKNOWN;
577    for (p = file_buf; *p; ++p) {
578       /* Beginning of line */
579      if (VG_(strncmp)( p, "processor", sizeof "processor" - 1 ) != 0) continue;
580 
581      m = VG_(strstr)( p, "machine" );
582      if (m == NULL) continue;
583 
584      p = m + sizeof "machine" - 1;
585      while ( VG_(isspace)( *p ) || *p == '=') {
586        if (*p == '\n') goto next_line;
587        ++p;
588      }
589 
590      model_name = p;
591      for (n = 0; n < sizeof model_map / sizeof model_map[0]; ++n) {
592        struct model_map *mm = model_map + n;
593        SizeT len = VG_(strlen)( mm->name );
594        if ( VG_(strncmp)( mm->name, model_name, len ) == 0 &&
595             VG_(isspace)( model_name[len] )) {
596          if (mm->id < model) model = mm->id;
597          p = model_name + len;
598          break;
599        }
600      }
601      /* Skip until end-of-line */
602      while (*p != '\n')
603        ++p;
604    next_line: ;
605    }
606 
607    VG_(free)( file_buf );
608    VG_(debugLog)(1, "machine", "model = %s\n",
609                  model == VEX_S390X_MODEL_UNKNOWN ? "UNKNOWN"
610                                                   : model_map[model].name);
611    return model;
612 }
613 
614 #endif /* VGA_s390x */
615 
616 #ifdef VGA_mips32
617 
618 /* Read /proc/cpuinfo and return the machine model. */
VG_(get_machine_model)619 static UInt VG_(get_machine_model)(void)
620 {
621    char *search_MIPS_str = "MIPS";
622    char *search_Broadcom_str = "Broadcom";
623    Int    n, fh;
624    SysRes fd;
625    SizeT  num_bytes, file_buf_size;
626    HChar  *file_buf;
627 
628    /* Slurp contents of /proc/cpuinfo into FILE_BUF */
629    fd = VG_(open)( "/proc/cpuinfo", 0, VKI_S_IRUSR );
630    if ( sr_isError(fd) ) return -1;
631 
632    fh  = sr_Res(fd);
633 
634    /* Determine the size of /proc/cpuinfo.
635       Work around broken-ness in /proc file system implementation.
636       fstat returns a zero size for /proc/cpuinfo although it is
637       claimed to be a regular file. */
638    num_bytes = 0;
639    file_buf_size = 1000;
640    file_buf = VG_(malloc)("cpuinfo", file_buf_size + 1);
641    while (42) {
642       n = VG_(read)(fh, file_buf, file_buf_size);
643       if (n < 0) break;
644 
645       num_bytes += n;
646       if (n < file_buf_size) break;  /* reached EOF */
647    }
648 
649    if (n < 0) num_bytes = 0;   /* read error; ignore contents */
650 
651    if (num_bytes > file_buf_size) {
652       VG_(free)( file_buf );
653       VG_(lseek)( fh, 0, VKI_SEEK_SET );
654       file_buf = VG_(malloc)( "cpuinfo", num_bytes + 1 );
655       n = VG_(read)( fh, file_buf, num_bytes );
656       if (n < 0) num_bytes = 0;
657    }
658 
659    file_buf[num_bytes] = '\0';
660    VG_(close)(fh);
661 
662    /* Parse file */
663    if (VG_(strstr) (file_buf, search_Broadcom_str) != NULL)
664        return VEX_PRID_COMP_BROADCOM;
665    if (VG_(strstr) (file_buf, search_MIPS_str) != NULL)
666        return VEX_PRID_COMP_MIPS;
667 
668    /* Did not find string in the proc file. */
669    return -1;
670 }
671 
672 #endif
673 
674 /* Determine what insn set and insn set variant the host has, and
675    record it.  To be called once at system startup.  Returns False if
676    this a CPU incapable of running Valgrind. */
677 
VG_(machine_get_hwcaps)678 Bool VG_(machine_get_hwcaps)( void )
679 {
680    vg_assert(hwcaps_done == False);
681    hwcaps_done = True;
682 
683    // Whack default settings into vai, so that we only need to fill in
684    // any interesting bits.
685    LibVEX_default_VexArchInfo(&vai);
686 
687 #if defined(VGA_x86)
688    { Bool have_sse1, have_sse2, have_cx8, have_lzcnt;
689      UInt eax, ebx, ecx, edx, max_extended;
690      UChar vstr[13];
691      vstr[0] = 0;
692 
693      if (!VG_(has_cpuid)())
694         /* we can't do cpuid at all.  Give up. */
695         return False;
696 
697      VG_(cpuid)(0, 0, &eax, &ebx, &ecx, &edx);
698      if (eax < 1)
699         /* we can't ask for cpuid(x) for x > 0.  Give up. */
700         return False;
701 
702      /* Get processor ID string, and max basic/extended index
703         values. */
704      VG_(memcpy)(&vstr[0], &ebx, 4);
705      VG_(memcpy)(&vstr[4], &edx, 4);
706      VG_(memcpy)(&vstr[8], &ecx, 4);
707      vstr[12] = 0;
708 
709      VG_(cpuid)(0x80000000, 0, &eax, &ebx, &ecx, &edx);
710      max_extended = eax;
711 
712      /* get capabilities bits into edx */
713      VG_(cpuid)(1, 0, &eax, &ebx, &ecx, &edx);
714 
715      have_sse1 = (edx & (1<<25)) != 0; /* True => have sse insns */
716      have_sse2 = (edx & (1<<26)) != 0; /* True => have sse2 insns */
717 
718      /* cmpxchg8b is a minimum requirement now; if we don't have it we
719         must simply give up.  But all CPUs since Pentium-I have it, so
720         that doesn't seem like much of a restriction. */
721      have_cx8 = (edx & (1<<8)) != 0; /* True => have cmpxchg8b */
722      if (!have_cx8)
723         return False;
724 
725      /* Figure out if this is an AMD that can do LZCNT. */
726      have_lzcnt = False;
727      if (0 == VG_(strcmp)(vstr, "AuthenticAMD")
728          && max_extended >= 0x80000001) {
729         VG_(cpuid)(0x80000001, 0, &eax, &ebx, &ecx, &edx);
730         have_lzcnt = (ecx & (1<<5)) != 0; /* True => have LZCNT */
731      }
732 
733      if (have_sse2 && have_sse1) {
734         va          = VexArchX86;
735         vai.hwcaps  = VEX_HWCAPS_X86_SSE1;
736         vai.hwcaps |= VEX_HWCAPS_X86_SSE2;
737         if (have_lzcnt)
738            vai.hwcaps |= VEX_HWCAPS_X86_LZCNT;
739         VG_(machine_x86_have_mxcsr) = 1;
740         return True;
741      }
742 
743      if (have_sse1) {
744         va          = VexArchX86;
745         vai.hwcaps  = VEX_HWCAPS_X86_SSE1;
746         VG_(machine_x86_have_mxcsr) = 1;
747         return True;
748      }
749 
750      va         = VexArchX86;
751      vai.hwcaps = 0; /*baseline - no sse at all*/
752      VG_(machine_x86_have_mxcsr) = 0;
753      return True;
754    }
755 
756 #elif defined(VGA_amd64)
757    { Bool have_sse3, have_cx8, have_cx16;
758      Bool have_lzcnt, have_avx /*, have_fma*/;
759      UInt eax, ebx, ecx, edx, max_extended;
760      UChar vstr[13];
761      vstr[0] = 0;
762 
763      if (!VG_(has_cpuid)())
764         /* we can't do cpuid at all.  Give up. */
765         return False;
766 
767      VG_(cpuid)(0, 0, &eax, &ebx, &ecx, &edx);
768      if (eax < 1)
769         /* we can't ask for cpuid(x) for x > 0.  Give up. */
770         return False;
771 
772      /* Get processor ID string, and max basic/extended index
773         values. */
774      VG_(memcpy)(&vstr[0], &ebx, 4);
775      VG_(memcpy)(&vstr[4], &edx, 4);
776      VG_(memcpy)(&vstr[8], &ecx, 4);
777      vstr[12] = 0;
778 
779      VG_(cpuid)(0x80000000, 0, &eax, &ebx, &ecx, &edx);
780      max_extended = eax;
781 
782      /* get capabilities bits into edx */
783      VG_(cpuid)(1, 0, &eax, &ebx, &ecx, &edx);
784 
785      // we assume that SSE1 and SSE2 are available by default
786      have_sse3 = (ecx & (1<<0)) != 0;  /* True => have sse3 insns */
787      // ssse3   is ecx:9
788      // sse41   is ecx:19
789      // sse42   is ecx:20
790 
791      // osxsave is ecx:27
792      // avx     is ecx:28
793      // fma     is ecx:12
794      have_avx = False;
795      /* have_fma = False; */
796      if ( (ecx & ((1<<27)|(1<<28))) == ((1<<27)|(1<<28)) ) {
797         /* processor supports AVX instructions and XGETBV is enabled
798            by OS */
799         ULong w;
800         __asm__ __volatile__("movq $0,%%rcx ; "
801                              ".byte 0x0F,0x01,0xD0 ; " /* xgetbv */
802                              "movq %%rax,%0"
803                              :/*OUT*/"=r"(w) :/*IN*/
804                              :/*TRASH*/"rdx","rcx");
805         if ((w & 6) == 6) {
806            /* OS has enabled both XMM and YMM state support */
807            have_avx = True;
808            /* have_fma = (ecx & (1<<12)) != 0; */
809            /* have_fma: Probably correct, but gcc complains due to
810               unusedness. &*/
811         }
812      }
813 
814 
815      /* cmpxchg8b is a minimum requirement now; if we don't have it we
816         must simply give up.  But all CPUs since Pentium-I have it, so
817         that doesn't seem like much of a restriction. */
818      have_cx8 = (edx & (1<<8)) != 0; /* True => have cmpxchg8b */
819      if (!have_cx8)
820         return False;
821 
822      /* on amd64 we tolerate older cpus, which don't have cmpxchg16b */
823      have_cx16 = (ecx & (1<<13)) != 0; /* True => have cmpxchg16b */
824 
825      /* Figure out if this is an AMD that can do LZCNT. */
826      have_lzcnt = False;
827      if (0 == VG_(strcmp)(vstr, "AuthenticAMD")
828          && max_extended >= 0x80000001) {
829         VG_(cpuid)(0x80000001, 0, &eax, &ebx, &ecx, &edx);
830         have_lzcnt = (ecx & (1<<5)) != 0; /* True => have LZCNT */
831      }
832 
833      va         = VexArchAMD64;
834      vai.hwcaps = (have_sse3  ? VEX_HWCAPS_AMD64_SSE3  : 0)
835                 | (have_cx16  ? VEX_HWCAPS_AMD64_CX16  : 0)
836                 | (have_lzcnt ? VEX_HWCAPS_AMD64_LZCNT : 0)
837                 | (have_avx   ? VEX_HWCAPS_AMD64_AVX   : 0);
838      return True;
839    }
840 
841 #elif defined(VGA_ppc32)
842    {
843      /* Find out which subset of the ppc32 instruction set is supported by
844         verifying whether various ppc32 instructions generate a SIGILL
845         or a SIGFPE. An alternative approach is to check the AT_HWCAP and
846         AT_PLATFORM entries in the ELF auxiliary table -- see also
847         the_iifii.client_auxv in m_main.c.
848       */
849      vki_sigset_t          saved_set, tmp_set;
850      vki_sigaction_fromK_t saved_sigill_act, saved_sigfpe_act;
851      vki_sigaction_toK_t     tmp_sigill_act,   tmp_sigfpe_act;
852 
853      volatile Bool have_F, have_V, have_FX, have_GX, have_VX, have_DFP;
854      Int r;
855 
856      /* This is a kludge.  Really we ought to back-convert saved_act
857         into a toK_t using VG_(convert_sigaction_fromK_to_toK), but
858         since that's a no-op on all ppc32 platforms so far supported,
859         it's not worth the typing effort.  At least include most basic
860         sanity check: */
861      vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t));
862 
863      VG_(sigemptyset)(&tmp_set);
864      VG_(sigaddset)(&tmp_set, VKI_SIGILL);
865      VG_(sigaddset)(&tmp_set, VKI_SIGFPE);
866 
867      r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
868      vg_assert(r == 0);
869 
870      r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
871      vg_assert(r == 0);
872      tmp_sigill_act = saved_sigill_act;
873 
874      r = VG_(sigaction)(VKI_SIGFPE, NULL, &saved_sigfpe_act);
875      vg_assert(r == 0);
876      tmp_sigfpe_act = saved_sigfpe_act;
877 
878      /* NODEFER: signal handler does not return (from the kernel's point of
879         view), hence if it is to successfully catch a signal more than once,
880         we need the NODEFER flag. */
881      tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
882      tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
883      tmp_sigill_act.sa_flags |=  VKI_SA_NODEFER;
884      tmp_sigill_act.ksa_handler = handler_unsup_insn;
885      r = VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
886      vg_assert(r == 0);
887 
888      tmp_sigfpe_act.sa_flags &= ~VKI_SA_RESETHAND;
889      tmp_sigfpe_act.sa_flags &= ~VKI_SA_SIGINFO;
890      tmp_sigfpe_act.sa_flags |=  VKI_SA_NODEFER;
891      tmp_sigfpe_act.ksa_handler = handler_unsup_insn;
892      r = VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL);
893      vg_assert(r == 0);
894 
895      /* standard FP insns */
896      have_F = True;
897      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
898         have_F = False;
899      } else {
900         __asm__ __volatile__(".long 0xFC000090"); /*fmr 0,0 */
901      }
902 
903      /* Altivec insns */
904      have_V = True;
905      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
906         have_V = False;
907      } else {
908         /* Unfortunately some older assemblers don't speak Altivec (or
909            choose not to), so to be safe we directly emit the 32-bit
910            word corresponding to "vor 0,0,0".  This fixes a build
911            problem that happens on Debian 3.1 (ppc32), and probably
912            various other places. */
913         __asm__ __volatile__(".long 0x10000484"); /*vor 0,0,0*/
914      }
915 
916      /* General-Purpose optional (fsqrt, fsqrts) */
917      have_FX = True;
918      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
919         have_FX = False;
920      } else {
921         __asm__ __volatile__(".long 0xFC00002C"); /*fsqrt 0,0 */
922      }
923 
924      /* Graphics optional (stfiwx, fres, frsqrte, fsel) */
925      have_GX = True;
926      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
927         have_GX = False;
928      } else {
929         __asm__ __volatile__(".long 0xFC000034"); /* frsqrte 0,0 */
930      }
931 
932      /* VSX support implies Power ISA 2.06 */
933      have_VX = True;
934      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
935         have_VX = False;
936      } else {
937         __asm__ __volatile__(".long 0xf0000564"); /* xsabsdp XT,XB */
938      }
939 
940      /* Check for Decimal Floating Point (DFP) support. */
941      have_DFP = True;
942      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
943         have_DFP = False;
944      } else {
945         __asm__ __volatile__(".long 0xee4e8005"); /* dadd  FRT,FRA, FRB */
946      }
947 
948      /* determine dcbz/dcbzl sizes while we still have the signal
949       * handlers registered */
950      find_ppc_dcbz_sz(&vai);
951 
952      r = VG_(sigaction)(VKI_SIGILL, &saved_sigill_act, NULL);
953      vg_assert(r == 0);
954      r = VG_(sigaction)(VKI_SIGFPE, &saved_sigfpe_act, NULL);
955      vg_assert(r == 0);
956      r = VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
957      vg_assert(r == 0);
958      VG_(debugLog)(1, "machine", "F %d V %d FX %d GX %d VX %d DFP %d\n",
959                     (Int)have_F, (Int)have_V, (Int)have_FX,
960                     (Int)have_GX, (Int)have_VX, (Int)have_DFP);
961      /* Make FP a prerequisite for VMX (bogusly so), and for FX and GX. */
962      if (have_V && !have_F)
963         have_V = False;
964      if (have_FX && !have_F)
965         have_FX = False;
966      if (have_GX && !have_F)
967         have_GX = False;
968 
969      VG_(machine_ppc32_has_FP)  = have_F ? 1 : 0;
970      VG_(machine_ppc32_has_VMX) = have_V ? 1 : 0;
971 
972      va = VexArchPPC32;
973 
974      vai.hwcaps = 0;
975      if (have_F)  vai.hwcaps |= VEX_HWCAPS_PPC32_F;
976      if (have_V)  vai.hwcaps |= VEX_HWCAPS_PPC32_V;
977      if (have_FX) vai.hwcaps |= VEX_HWCAPS_PPC32_FX;
978      if (have_GX) vai.hwcaps |= VEX_HWCAPS_PPC32_GX;
979      if (have_VX) vai.hwcaps |= VEX_HWCAPS_PPC32_VX;
980      if (have_DFP) vai.hwcaps |= VEX_HWCAPS_PPC32_DFP;
981 
982 
983      /* But we're not done yet: VG_(machine_ppc32_set_clszB) must be
984         called before we're ready to go. */
985      return True;
986    }
987 
988 #elif defined(VGA_ppc64)
989    {
990      /* Same instruction set detection algorithm as for ppc32. */
991      vki_sigset_t          saved_set, tmp_set;
992      vki_sigaction_fromK_t saved_sigill_act, saved_sigfpe_act;
993      vki_sigaction_toK_t     tmp_sigill_act,   tmp_sigfpe_act;
994 
995      volatile Bool have_F, have_V, have_FX, have_GX, have_VX, have_DFP;
996      Int r;
997 
998      /* This is a kludge.  Really we ought to back-convert saved_act
999         into a toK_t using VG_(convert_sigaction_fromK_to_toK), but
1000         since that's a no-op on all ppc64 platforms so far supported,
1001         it's not worth the typing effort.  At least include most basic
1002         sanity check: */
1003      vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t));
1004 
1005      VG_(sigemptyset)(&tmp_set);
1006      VG_(sigaddset)(&tmp_set, VKI_SIGILL);
1007      VG_(sigaddset)(&tmp_set, VKI_SIGFPE);
1008 
1009      r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
1010      vg_assert(r == 0);
1011 
1012      r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
1013      vg_assert(r == 0);
1014      tmp_sigill_act = saved_sigill_act;
1015 
1016      VG_(sigaction)(VKI_SIGFPE, NULL, &saved_sigfpe_act);
1017      tmp_sigfpe_act = saved_sigfpe_act;
1018 
1019      /* NODEFER: signal handler does not return (from the kernel's point of
1020         view), hence if it is to successfully catch a signal more than once,
1021         we need the NODEFER flag. */
1022      tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
1023      tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
1024      tmp_sigill_act.sa_flags |=  VKI_SA_NODEFER;
1025      tmp_sigill_act.ksa_handler = handler_unsup_insn;
1026      VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
1027 
1028      tmp_sigfpe_act.sa_flags &= ~VKI_SA_RESETHAND;
1029      tmp_sigfpe_act.sa_flags &= ~VKI_SA_SIGINFO;
1030      tmp_sigfpe_act.sa_flags |=  VKI_SA_NODEFER;
1031      tmp_sigfpe_act.ksa_handler = handler_unsup_insn;
1032      VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL);
1033 
1034      /* standard FP insns */
1035      have_F = True;
1036      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1037         have_F = False;
1038      } else {
1039         __asm__ __volatile__("fmr 0,0");
1040      }
1041 
1042      /* Altivec insns */
1043      have_V = True;
1044      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1045         have_V = False;
1046      } else {
1047         __asm__ __volatile__(".long 0x10000484"); /*vor 0,0,0*/
1048      }
1049 
1050      /* General-Purpose optional (fsqrt, fsqrts) */
1051      have_FX = True;
1052      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1053         have_FX = False;
1054      } else {
1055         __asm__ __volatile__(".long 0xFC00002C"); /*fsqrt 0,0*/
1056      }
1057 
1058      /* Graphics optional (stfiwx, fres, frsqrte, fsel) */
1059      have_GX = True;
1060      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1061         have_GX = False;
1062      } else {
1063         __asm__ __volatile__(".long 0xFC000034"); /*frsqrte 0,0*/
1064      }
1065 
1066      /* VSX support implies Power ISA 2.06 */
1067      have_VX = True;
1068      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1069         have_VX = False;
1070      } else {
1071         __asm__ __volatile__(".long 0xf0000564"); /* xsabsdp XT,XB */
1072      }
1073 
1074      /* Check for Decimal Floating Point (DFP) support. */
1075      have_DFP = True;
1076      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1077         have_DFP = False;
1078      } else {
1079         __asm__ __volatile__(".long 0xee4e8005"); /* dadd  FRT,FRA, FRB */
1080      }
1081 
1082      /* determine dcbz/dcbzl sizes while we still have the signal
1083       * handlers registered */
1084      find_ppc_dcbz_sz(&vai);
1085 
1086      VG_(sigaction)(VKI_SIGILL, &saved_sigill_act, NULL);
1087      VG_(sigaction)(VKI_SIGFPE, &saved_sigfpe_act, NULL);
1088      VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
1089      VG_(debugLog)(1, "machine", "F %d V %d FX %d GX %d VX %d DFP %d\n",
1090                     (Int)have_F, (Int)have_V, (Int)have_FX,
1091                     (Int)have_GX, (Int)have_VX, (Int)have_DFP);
1092      /* on ppc64, if we don't even have FP, just give up. */
1093      if (!have_F)
1094         return False;
1095 
1096      VG_(machine_ppc64_has_VMX) = have_V ? 1 : 0;
1097 
1098      va = VexArchPPC64;
1099 
1100      vai.hwcaps = 0;
1101      if (have_V)  vai.hwcaps |= VEX_HWCAPS_PPC64_V;
1102      if (have_FX) vai.hwcaps |= VEX_HWCAPS_PPC64_FX;
1103      if (have_GX) vai.hwcaps |= VEX_HWCAPS_PPC64_GX;
1104      if (have_VX) vai.hwcaps |= VEX_HWCAPS_PPC64_VX;
1105      if (have_DFP) vai.hwcaps |= VEX_HWCAPS_PPC64_DFP;
1106 
1107      /* But we're not done yet: VG_(machine_ppc64_set_clszB) must be
1108         called before we're ready to go. */
1109      return True;
1110    }
1111 
1112 #elif defined(VGA_s390x)
1113    {
1114      /* Instruction set detection code borrowed from ppc above. */
1115      vki_sigset_t          saved_set, tmp_set;
1116      vki_sigaction_fromK_t saved_sigill_act;
1117      vki_sigaction_toK_t     tmp_sigill_act;
1118 
1119      volatile Bool have_LDISP, have_EIMM, have_GIE, have_DFP, have_FGX;
1120      volatile Bool have_STFLE, have_ETF2, have_ETF3;
1121      Int r, model;
1122 
1123      /* Unblock SIGILL and stash away the old action for that signal */
1124      VG_(sigemptyset)(&tmp_set);
1125      VG_(sigaddset)(&tmp_set, VKI_SIGILL);
1126 
1127      r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
1128      vg_assert(r == 0);
1129 
1130      r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
1131      vg_assert(r == 0);
1132      tmp_sigill_act = saved_sigill_act;
1133 
1134      /* NODEFER: signal handler does not return (from the kernel's point of
1135         view), hence if it is to successfully catch a signal more than once,
1136         we need the NODEFER flag. */
1137      tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
1138      tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
1139      tmp_sigill_act.sa_flags |=  VKI_SA_NODEFER;
1140      tmp_sigill_act.ksa_handler = handler_unsup_insn;
1141      VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
1142 
1143      /* Determine hwcaps. Note, we cannot use the stfle insn because it
1144         is not supported on z900. */
1145 
1146      have_LDISP = True;
1147      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1148         have_LDISP = False;
1149      } else {
1150        /* BASR loads the address of the next insn into r1. Needed to avoid
1151           a segfault in XY. */
1152         __asm__ __volatile__("basr %%r1,%%r0\n\t"
1153                              ".long  0xe3001000\n\t"  /* XY  0,0(%r1) */
1154                              ".short 0x0057" : : : "r0", "r1", "cc", "memory");
1155      }
1156 
1157      have_EIMM = True;
1158      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1159         have_EIMM = False;
1160      } else {
1161         __asm__ __volatile__(".long  0xc0090000\n\t"  /* iilf r0,0 */
1162                              ".short 0x0000" : : : "r0", "memory");
1163      }
1164 
1165      have_GIE = True;
1166      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1167         have_GIE = False;
1168      } else {
1169         __asm__ __volatile__(".long  0xc2010000\n\t"  /* msfi r0,0 */
1170                              ".short 0x0000" : : : "r0", "memory");
1171      }
1172 
1173      have_DFP = True;
1174      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1175         have_DFP = False;
1176      } else {
1177         __asm__ __volatile__(".long 0xb3d20000"
1178                                : : : "r0", "cc", "memory");  /* adtr r0,r0,r0 */
1179      }
1180 
1181      have_FGX = True;
1182      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1183         have_FGX = False;
1184      } else {
1185         __asm__ __volatile__(".long 0xb3cd0000" : : : "r0");  /* lgdr r0,f0 */
1186      }
1187 
1188      /* Detect presence of the ETF2-enhancement facility using the
1189         STFLE insn. Note, that STFLE and ETF2 were introduced at the same
1190         time, so the absence of STLFE implies the absence of ETF2. */
1191      have_STFLE = True;
1192      have_ETF2 = False;
1193      have_ETF3 = False;
1194      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1195         have_STFLE = False;
1196      } else {
1197          ULong hoststfle[1];
1198          register ULong reg0 asm("0") = 0; /* one double word available */
1199 
1200          __asm__ __volatile__(" .insn s,0xb2b00000,%0\n"   /* stfle */
1201                               : "=m" (hoststfle), "+d"(reg0)
1202                               : : "cc", "memory");
1203          if (hoststfle[0] & (1ULL << (63 - 24)))
1204              have_ETF2 = True;
1205          if (hoststfle[0] & (1ULL << (63 - 30)))
1206              have_ETF3 = True;
1207      }
1208 
1209      /* Restore signals */
1210      r = VG_(sigaction)(VKI_SIGILL, &saved_sigill_act, NULL);
1211      vg_assert(r == 0);
1212      r = VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
1213      vg_assert(r == 0);
1214      va = VexArchS390X;
1215 
1216      model = VG_(get_machine_model)();
1217 
1218      /* If the model is "unknown" don't treat this as an error. Assume
1219         this is a brand-new machine model for which we don't have the
1220         identification yet. Keeping fingers crossed. */
1221 
1222      VG_(debugLog)(1, "machine", "machine %d  LDISP %d EIMM %d GIE %d DFP %d "
1223                    "FGX %d STFLE %d ETF2 %d ETF3 %d\n", model, have_LDISP, have_EIMM,
1224                    have_GIE, have_DFP, have_FGX, have_STFLE, have_ETF2, have_ETF3);
1225 
1226      vai.hwcaps = model;
1227      if (have_LDISP) {
1228         /* Use long displacement only on machines >= z990. For all other machines
1229            it is millicoded and therefore slow. */
1230         if (model >= VEX_S390X_MODEL_Z990)
1231            vai.hwcaps |= VEX_HWCAPS_S390X_LDISP;
1232      }
1233      if (have_EIMM)  vai.hwcaps |= VEX_HWCAPS_S390X_EIMM;
1234      if (have_GIE)   vai.hwcaps |= VEX_HWCAPS_S390X_GIE;
1235      if (have_DFP)   vai.hwcaps |= VEX_HWCAPS_S390X_DFP;
1236      if (have_FGX)   vai.hwcaps |= VEX_HWCAPS_S390X_FGX;
1237      if (have_ETF2)  vai.hwcaps |= VEX_HWCAPS_S390X_ETF2;
1238      if (have_ETF3)  vai.hwcaps |= VEX_HWCAPS_S390X_ETF3;
1239      if (have_STFLE) vai.hwcaps |= VEX_HWCAPS_S390X_STFLE;
1240 
1241      VG_(debugLog)(1, "machine", "hwcaps = 0x%x\n", vai.hwcaps);
1242 
1243      return True;
1244    }
1245 
1246 #elif defined(VGA_arm)
1247    {
1248      /* Same instruction set detection algorithm as for ppc32. */
1249      vki_sigset_t          saved_set, tmp_set;
1250      vki_sigaction_fromK_t saved_sigill_act, saved_sigfpe_act;
1251      vki_sigaction_toK_t     tmp_sigill_act,   tmp_sigfpe_act;
1252 
1253      volatile Bool have_VFP, have_VFP2, have_VFP3, have_NEON;
1254      volatile Int archlevel;
1255      Int r;
1256 
1257      /* This is a kludge.  Really we ought to back-convert saved_act
1258         into a toK_t using VG_(convert_sigaction_fromK_to_toK), but
1259         since that's a no-op on all ppc64 platforms so far supported,
1260         it's not worth the typing effort.  At least include most basic
1261         sanity check: */
1262      vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t));
1263 
1264      VG_(sigemptyset)(&tmp_set);
1265      VG_(sigaddset)(&tmp_set, VKI_SIGILL);
1266      VG_(sigaddset)(&tmp_set, VKI_SIGFPE);
1267 
1268      r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
1269      vg_assert(r == 0);
1270 
1271      r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
1272      vg_assert(r == 0);
1273      tmp_sigill_act = saved_sigill_act;
1274 
1275      VG_(sigaction)(VKI_SIGFPE, NULL, &saved_sigfpe_act);
1276      tmp_sigfpe_act = saved_sigfpe_act;
1277 
1278      /* NODEFER: signal handler does not return (from the kernel's point of
1279         view), hence if it is to successfully catch a signal more than once,
1280         we need the NODEFER flag. */
1281      tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
1282      tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
1283      tmp_sigill_act.sa_flags |=  VKI_SA_NODEFER;
1284      tmp_sigill_act.ksa_handler = handler_unsup_insn;
1285      VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
1286 
1287      tmp_sigfpe_act.sa_flags &= ~VKI_SA_RESETHAND;
1288      tmp_sigfpe_act.sa_flags &= ~VKI_SA_SIGINFO;
1289      tmp_sigfpe_act.sa_flags |=  VKI_SA_NODEFER;
1290      tmp_sigfpe_act.ksa_handler = handler_unsup_insn;
1291      VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL);
1292 
1293      /* VFP insns */
1294      have_VFP = True;
1295      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1296         have_VFP = False;
1297      } else {
1298         __asm__ __volatile__(".word 0xEEB02B42"); /* VMOV.F64 d2, d2 */
1299      }
1300      /* There are several generation of VFP extension but they differs very
1301         little so for now we will not distinguish them. */
1302      have_VFP2 = have_VFP;
1303      have_VFP3 = have_VFP;
1304 
1305      /* NEON insns */
1306      have_NEON = True;
1307      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1308         have_NEON = False;
1309      } else {
1310         __asm__ __volatile__(".word 0xF2244154"); /* VMOV q2, q2 */
1311      }
1312 
1313      /* ARM architecture level */
1314      archlevel = 5; /* v5 will be base level */
1315      if (archlevel < 7) {
1316         archlevel = 7;
1317         if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1318            archlevel = 5;
1319         } else {
1320            __asm__ __volatile__(".word 0xF45FF000"); /* PLI [PC,#-0] */
1321         }
1322      }
1323      if (archlevel < 6) {
1324         archlevel = 6;
1325         if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1326            archlevel = 5;
1327         } else {
1328            __asm__ __volatile__(".word 0xE6822012"); /* PKHBT r2, r2, r2 */
1329         }
1330      }
1331 
1332      VG_(convert_sigaction_fromK_to_toK)(&saved_sigill_act, &tmp_sigill_act);
1333      VG_(convert_sigaction_fromK_to_toK)(&saved_sigfpe_act, &tmp_sigfpe_act);
1334      VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
1335      VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL);
1336      VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
1337 
1338      VG_(debugLog)(1, "machine", "ARMv%d VFP %d VFP2 %d VFP3 %d NEON %d\n",
1339            archlevel, (Int)have_VFP, (Int)have_VFP2, (Int)have_VFP3,
1340            (Int)have_NEON);
1341 
1342      VG_(machine_arm_archlevel) = archlevel;
1343 
1344      va = VexArchARM;
1345 
1346      vai.hwcaps = VEX_ARM_ARCHLEVEL(archlevel);
1347      if (have_VFP3) vai.hwcaps |= VEX_HWCAPS_ARM_VFP3;
1348      if (have_VFP2) vai.hwcaps |= VEX_HWCAPS_ARM_VFP2;
1349      if (have_VFP)  vai.hwcaps |= VEX_HWCAPS_ARM_VFP;
1350      if (have_NEON) vai.hwcaps |= VEX_HWCAPS_ARM_NEON;
1351 
1352      return True;
1353    }
1354 
1355 #elif defined(VGA_mips32)
1356    {
1357      va = VexArchMIPS32;
1358      UInt model = VG_(get_machine_model)();
1359      if (model== -1)
1360          return False;
1361 
1362      vai.hwcaps = model;
1363      return True;
1364    }
1365 
1366 #else
1367 #  error "Unknown arch"
1368 #endif
1369 }
1370 
1371 /* Notify host cpu cache line size. */
1372 #if defined(VGA_ppc32)
VG_(machine_ppc32_set_clszB)1373 void VG_(machine_ppc32_set_clszB)( Int szB )
1374 {
1375    vg_assert(hwcaps_done);
1376 
1377    /* Either the value must not have been set yet (zero) or we can
1378       tolerate it being set to the same value multiple times, as the
1379       stack scanning logic in m_main is a bit stupid. */
1380    vg_assert(vai.ppc_cache_line_szB == 0
1381              || vai.ppc_cache_line_szB == szB);
1382 
1383    vg_assert(szB == 32 || szB == 64 || szB == 128);
1384    vai.ppc_cache_line_szB = szB;
1385 }
1386 #endif
1387 
1388 
1389 /* Notify host cpu cache line size. */
1390 #if defined(VGA_ppc64)
VG_(machine_ppc64_set_clszB)1391 void VG_(machine_ppc64_set_clszB)( Int szB )
1392 {
1393    vg_assert(hwcaps_done);
1394 
1395    /* Either the value must not have been set yet (zero) or we can
1396       tolerate it being set to the same value multiple times, as the
1397       stack scanning logic in m_main is a bit stupid. */
1398    vg_assert(vai.ppc_cache_line_szB == 0
1399              || vai.ppc_cache_line_szB == szB);
1400 
1401    vg_assert(szB == 32 || szB == 64 || szB == 128);
1402    vai.ppc_cache_line_szB = szB;
1403 }
1404 #endif
1405 
1406 
1407 /* Notify host's ability to handle NEON instructions. */
1408 #if defined(VGA_arm)
VG_(machine_arm_set_has_NEON)1409 void VG_(machine_arm_set_has_NEON)( Bool has_neon )
1410 {
1411    vg_assert(hwcaps_done);
1412    /* There's nothing else we can sanity check. */
1413 
1414    if (has_neon) {
1415       vai.hwcaps |= VEX_HWCAPS_ARM_NEON;
1416    } else {
1417       vai.hwcaps &= ~VEX_HWCAPS_ARM_NEON;
1418    }
1419 }
1420 #endif
1421 
1422 
1423 /* Fetch host cpu info, once established. */
VG_(machine_get_VexArchInfo)1424 void VG_(machine_get_VexArchInfo)( /*OUT*/VexArch* pVa,
1425                                    /*OUT*/VexArchInfo* pVai )
1426 {
1427    vg_assert(hwcaps_done);
1428    if (pVa)  *pVa  = va;
1429    if (pVai) *pVai = vai;
1430 }
1431 
1432 
1433 /* Returns the size of the largest guest register that we will
1434    simulate in this run.  This depends on both the guest architecture
1435    and on the specific capabilities we are simulating for that guest
1436    (eg, AVX or non-AVX ?, for amd64).  Should return either 4, 8, 16
1437    or 32.  General rule: if in doubt, return a value larger than
1438    reality.
1439 
1440    This information is needed by Cachegrind and Callgrind to decide
1441    what the minimum cache line size they are prepared to simulate is.
1442    Basically require that the minimum cache line size is at least as
1443    large as the largest register that might get transferred to/from
1444    memory, so as to guarantee that any such transaction can straddle
1445    at most 2 cache lines.
1446 */
VG_(machine_get_size_of_largest_guest_register)1447 Int VG_(machine_get_size_of_largest_guest_register) ( void )
1448 {
1449    vg_assert(hwcaps_done);
1450    /* Once hwcaps_done is True, we can fish around inside va/vai to
1451       find the information we need. */
1452 
1453 #  if defined(VGA_x86)
1454    vg_assert(va == VexArchX86);
1455    /* We don't support AVX, so 32 is out.  At the other end, even if
1456       we don't support any SSE, the X87 can generate 10 byte
1457       transfers, so let's say 16 to be on the safe side.  Hence the
1458       answer is always 16. */
1459    return 16;
1460 
1461 #  elif defined(VGA_amd64)
1462    /* if AVX then 32 else 16 */
1463    return (vai.hwcaps & VEX_HWCAPS_AMD64_AVX) ? 32 : 16;
1464 
1465 #  elif defined(VGA_ppc32)
1466    /* 8 if boring; 16 if signs of Altivec or other exotic stuff */
1467    if (vai.hwcaps & VEX_HWCAPS_PPC32_V) return 16;
1468    if (vai.hwcaps & VEX_HWCAPS_PPC32_VX) return 16;
1469    if (vai.hwcaps & VEX_HWCAPS_PPC32_DFP) return 16;
1470    return 8;
1471 
1472 #  elif defined(VGA_ppc64)
1473    /* 8 if boring; 16 if signs of Altivec or other exotic stuff */
1474    if (vai.hwcaps & VEX_HWCAPS_PPC64_V) return 16;
1475    if (vai.hwcaps & VEX_HWCAPS_PPC64_VX) return 16;
1476    if (vai.hwcaps & VEX_HWCAPS_PPC64_DFP) return 16;
1477    return 8;
1478 
1479 #  elif defined(VGA_s390x)
1480    return 8;
1481 
1482 #  elif defined(VGA_arm)
1483    /* Really it depends whether or not we have NEON, but let's just
1484       assume we always do. */
1485    return 16;
1486 
1487 #  elif defined(VGA_mips32)
1488    /* The guest state implies 4, but that can't really be true, can
1489       it? */
1490    return 8;
1491 
1492 #  else
1493 #    error "Unknown arch"
1494 #  endif
1495 }
1496 
1497 
1498 // Given a pointer to a function as obtained by "& functionname" in C,
1499 // produce a pointer to the actual entry point for the function.
VG_(fnptr_to_fnentry)1500 void* VG_(fnptr_to_fnentry)( void* f )
1501 {
1502 #  if defined(VGP_x86_linux) || defined(VGP_amd64_linux)  \
1503       || defined(VGP_arm_linux)                           \
1504       || defined(VGP_ppc32_linux) || defined(VGO_darwin)  \
1505       || defined(VGP_s390x_linux) || defined(VGP_mips32_linux)
1506    return f;
1507 #  elif defined(VGP_ppc64_linux)
1508    /* ppc64-linux uses the AIX scheme, in which f is a pointer to a
1509       3-word function descriptor, of which the first word is the entry
1510       address. */
1511    UWord* descr = (UWord*)f;
1512    return (void*)(descr[0]);
1513 #  else
1514 #    error "Unknown platform"
1515 #  endif
1516 }
1517 
1518 /*--------------------------------------------------------------------*/
1519 /*--- end                                                          ---*/
1520 /*--------------------------------------------------------------------*/
1521