• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 
2 /*--------------------------------------------------------------------*/
3 /*--- Machine-related stuff.                           m_machine.c ---*/
4 /*--------------------------------------------------------------------*/
5 
6 /*
7    This file is part of Valgrind, a dynamic binary instrumentation
8    framework.
9 
10    Copyright (C) 2000-2010 Julian Seward
11       jseward@acm.org
12 
13    This program is free software; you can redistribute it and/or
14    modify it under the terms of the GNU General Public License as
15    published by the Free Software Foundation; either version 2 of the
16    License, or (at your option) any later version.
17 
18    This program is distributed in the hope that it will be useful, but
19    WITHOUT ANY WARRANTY; without even the implied warranty of
20    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21    General Public License for more details.
22 
23    You should have received a copy of the GNU General Public License
24    along with this program; if not, write to the Free Software
25    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
26    02111-1307, USA.
27 
28    The GNU General Public License is contained in the file COPYING.
29 */
30 
31 #include "pub_core_basics.h"
32 #include "pub_core_vki.h"
33 #include "pub_core_threadstate.h"
34 #include "pub_core_libcassert.h"
35 #include "pub_core_libcbase.h"
36 #include "pub_core_machine.h"
37 #include "pub_core_cpuid.h"
38 #include "pub_core_libcsignal.h"   // for ppc32 messing with SIGILL and SIGFPE
39 #include "pub_core_debuglog.h"
40 
41 
42 #define INSTR_PTR(regs)    ((regs).vex.VG_INSTR_PTR)
43 #define STACK_PTR(regs)    ((regs).vex.VG_STACK_PTR)
44 #define FRAME_PTR(regs)    ((regs).vex.VG_FRAME_PTR)
45 
VG_(get_IP)46 Addr VG_(get_IP) ( ThreadId tid ) {
47    return INSTR_PTR( VG_(threads)[tid].arch );
48 }
VG_(get_SP)49 Addr VG_(get_SP) ( ThreadId tid ) {
50    return STACK_PTR( VG_(threads)[tid].arch );
51 }
VG_(get_FP)52 Addr VG_(get_FP) ( ThreadId tid ) {
53    return FRAME_PTR( VG_(threads)[tid].arch );
54 }
55 
VG_(set_IP)56 void VG_(set_IP) ( ThreadId tid, Addr ip ) {
57    INSTR_PTR( VG_(threads)[tid].arch ) = ip;
58 }
VG_(set_SP)59 void VG_(set_SP) ( ThreadId tid, Addr sp ) {
60    STACK_PTR( VG_(threads)[tid].arch ) = sp;
61 }
62 
VG_(get_UnwindStartRegs)63 void VG_(get_UnwindStartRegs) ( /*OUT*/UnwindStartRegs* regs,
64                                 ThreadId tid )
65 {
66 #  if defined(VGA_x86)
67    regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_EIP;
68    regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_ESP;
69    regs->misc.X86.r_ebp
70       = VG_(threads)[tid].arch.vex.guest_EBP;
71 #  elif defined(VGA_amd64)
72    regs->r_pc = VG_(threads)[tid].arch.vex.guest_RIP;
73    regs->r_sp = VG_(threads)[tid].arch.vex.guest_RSP;
74    regs->misc.AMD64.r_rbp
75       = VG_(threads)[tid].arch.vex.guest_RBP;
76 #  elif defined(VGA_ppc32)
77    regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_CIA;
78    regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_GPR1;
79    regs->misc.PPC32.r_lr
80       = VG_(threads)[tid].arch.vex.guest_LR;
81 #  elif defined(VGA_ppc64)
82    regs->r_pc = VG_(threads)[tid].arch.vex.guest_CIA;
83    regs->r_sp = VG_(threads)[tid].arch.vex.guest_GPR1;
84    regs->misc.PPC64.r_lr
85       = VG_(threads)[tid].arch.vex.guest_LR;
86 #  elif defined(VGA_arm)
87    regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_R15T;
88    regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_R13;
89    regs->misc.ARM.r14
90       = VG_(threads)[tid].arch.vex.guest_R14;
91    regs->misc.ARM.r12
92       = VG_(threads)[tid].arch.vex.guest_R12;
93    regs->misc.ARM.r11
94       = VG_(threads)[tid].arch.vex.guest_R11;
95    regs->misc.ARM.r7
96       = VG_(threads)[tid].arch.vex.guest_R7;
97 #  else
98 #    error "Unknown arch"
99 #  endif
100 }
101 
102 
VG_(set_syscall_return_shadows)103 void VG_(set_syscall_return_shadows) ( ThreadId tid,
104                                        /* shadow vals for the result */
105                                        UWord s1res, UWord s2res,
106                                        /* shadow vals for the error val */
107                                        UWord s1err, UWord s2err )
108 {
109 #  if defined(VGP_x86_linux)
110    VG_(threads)[tid].arch.vex_shadow1.guest_EAX = s1res;
111    VG_(threads)[tid].arch.vex_shadow2.guest_EAX = s2res;
112 #  elif defined(VGP_amd64_linux)
113    VG_(threads)[tid].arch.vex_shadow1.guest_RAX = s1res;
114    VG_(threads)[tid].arch.vex_shadow2.guest_RAX = s2res;
115 #  elif defined(VGP_ppc32_linux) || defined(VGP_ppc64_linux)
116    VG_(threads)[tid].arch.vex_shadow1.guest_GPR3 = s1res;
117    VG_(threads)[tid].arch.vex_shadow2.guest_GPR3 = s2res;
118 #  elif defined(VGP_arm_linux)
119    VG_(threads)[tid].arch.vex_shadow1.guest_R0 = s1res;
120    VG_(threads)[tid].arch.vex_shadow2.guest_R0 = s2res;
121 #  elif defined(VGP_ppc32_aix5) || defined(VGP_ppc64_aix5)
122    VG_(threads)[tid].arch.vex_shadow1.guest_GPR3 = s1res;
123    VG_(threads)[tid].arch.vex_shadow2.guest_GPR3 = s2res;
124    VG_(threads)[tid].arch.vex_shadow1.guest_GPR4 = s1err;
125    VG_(threads)[tid].arch.vex_shadow2.guest_GPR4 = s2err;
126 #  elif defined(VGO_darwin)
127    // GrP fixme darwin syscalls may return more values (2 registers plus error)
128 #  else
129 #    error "Unknown plat"
130 #  endif
131 }
132 
133 void
VG_(get_shadow_regs_area)134 VG_(get_shadow_regs_area) ( ThreadId tid,
135                             /*DST*/UChar* dst,
136                             /*SRC*/Int shadowNo, PtrdiffT offset, SizeT size )
137 {
138    void*        src;
139    ThreadState* tst;
140    vg_assert(shadowNo == 0 || shadowNo == 1 || shadowNo == 2);
141    vg_assert(VG_(is_valid_tid)(tid));
142    // Bounds check
143    vg_assert(0 <= offset && offset < sizeof(VexGuestArchState));
144    vg_assert(offset + size <= sizeof(VexGuestArchState));
145    // Copy
146    tst = & VG_(threads)[tid];
147    src = NULL;
148    switch (shadowNo) {
149       case 0: src = (void*)(((Addr)&(tst->arch.vex)) + offset); break;
150       case 1: src = (void*)(((Addr)&(tst->arch.vex_shadow1)) + offset); break;
151       case 2: src = (void*)(((Addr)&(tst->arch.vex_shadow2)) + offset); break;
152    }
153    tl_assert(src != NULL);
154    VG_(memcpy)( dst, src, size);
155 }
156 
157 void
VG_(set_shadow_regs_area)158 VG_(set_shadow_regs_area) ( ThreadId tid,
159                             /*DST*/Int shadowNo, PtrdiffT offset, SizeT size,
160                             /*SRC*/const UChar* src )
161 {
162    void*        dst;
163    ThreadState* tst;
164    vg_assert(shadowNo == 0 || shadowNo == 1 || shadowNo == 2);
165    vg_assert(VG_(is_valid_tid)(tid));
166    // Bounds check
167    vg_assert(0 <= offset && offset < sizeof(VexGuestArchState));
168    vg_assert(offset + size <= sizeof(VexGuestArchState));
169    // Copy
170    tst = & VG_(threads)[tid];
171    dst = NULL;
172    switch (shadowNo) {
173       case 0: dst = (void*)(((Addr)&(tst->arch.vex)) + offset); break;
174       case 1: dst = (void*)(((Addr)&(tst->arch.vex_shadow1)) + offset); break;
175       case 2: dst = (void*)(((Addr)&(tst->arch.vex_shadow2)) + offset); break;
176    }
177    tl_assert(dst != NULL);
178    VG_(memcpy)( dst, src, size);
179 }
180 
181 
apply_to_GPs_of_tid(VexGuestArchState * vex,void (* f)(Addr))182 static void apply_to_GPs_of_tid(VexGuestArchState* vex, void (*f)(Addr))
183 {
184 #if defined(VGA_x86)
185    (*f)(vex->guest_EAX);
186    (*f)(vex->guest_ECX);
187    (*f)(vex->guest_EDX);
188    (*f)(vex->guest_EBX);
189    (*f)(vex->guest_ESI);
190    (*f)(vex->guest_EDI);
191    (*f)(vex->guest_ESP);
192    (*f)(vex->guest_EBP);
193 #elif defined(VGA_amd64)
194    (*f)(vex->guest_RAX);
195    (*f)(vex->guest_RCX);
196    (*f)(vex->guest_RDX);
197    (*f)(vex->guest_RBX);
198    (*f)(vex->guest_RSI);
199    (*f)(vex->guest_RDI);
200    (*f)(vex->guest_RSP);
201    (*f)(vex->guest_RBP);
202    (*f)(vex->guest_R8);
203    (*f)(vex->guest_R9);
204    (*f)(vex->guest_R10);
205    (*f)(vex->guest_R11);
206    (*f)(vex->guest_R12);
207    (*f)(vex->guest_R13);
208    (*f)(vex->guest_R14);
209    (*f)(vex->guest_R15);
210 #elif defined(VGA_ppc32) || defined(VGA_ppc64)
211    (*f)(vex->guest_GPR0);
212    (*f)(vex->guest_GPR1);
213    (*f)(vex->guest_GPR2);
214    (*f)(vex->guest_GPR3);
215    (*f)(vex->guest_GPR4);
216    (*f)(vex->guest_GPR5);
217    (*f)(vex->guest_GPR6);
218    (*f)(vex->guest_GPR7);
219    (*f)(vex->guest_GPR8);
220    (*f)(vex->guest_GPR9);
221    (*f)(vex->guest_GPR10);
222    (*f)(vex->guest_GPR11);
223    (*f)(vex->guest_GPR12);
224    (*f)(vex->guest_GPR13);
225    (*f)(vex->guest_GPR14);
226    (*f)(vex->guest_GPR15);
227    (*f)(vex->guest_GPR16);
228    (*f)(vex->guest_GPR17);
229    (*f)(vex->guest_GPR18);
230    (*f)(vex->guest_GPR19);
231    (*f)(vex->guest_GPR20);
232    (*f)(vex->guest_GPR21);
233    (*f)(vex->guest_GPR22);
234    (*f)(vex->guest_GPR23);
235    (*f)(vex->guest_GPR24);
236    (*f)(vex->guest_GPR25);
237    (*f)(vex->guest_GPR26);
238    (*f)(vex->guest_GPR27);
239    (*f)(vex->guest_GPR28);
240    (*f)(vex->guest_GPR29);
241    (*f)(vex->guest_GPR30);
242    (*f)(vex->guest_GPR31);
243    (*f)(vex->guest_CTR);
244    (*f)(vex->guest_LR);
245 #elif defined(VGA_arm)
246    (*f)(vex->guest_R0);
247    (*f)(vex->guest_R1);
248    (*f)(vex->guest_R2);
249    (*f)(vex->guest_R3);
250    (*f)(vex->guest_R4);
251    (*f)(vex->guest_R5);
252    (*f)(vex->guest_R6);
253    (*f)(vex->guest_R8);
254    (*f)(vex->guest_R9);
255    (*f)(vex->guest_R10);
256    (*f)(vex->guest_R11);
257    (*f)(vex->guest_R12);
258    (*f)(vex->guest_R13);
259    (*f)(vex->guest_R14);
260 #else
261 #  error Unknown arch
262 #endif
263 }
264 
265 
VG_(apply_to_GP_regs)266 void VG_(apply_to_GP_regs)(void (*f)(UWord))
267 {
268    ThreadId tid;
269 
270    for (tid = 1; tid < VG_N_THREADS; tid++) {
271       if (VG_(is_valid_tid)(tid)) {
272          ThreadState* tst = VG_(get_ThreadState)(tid);
273          apply_to_GPs_of_tid(&(tst->arch.vex), f);
274       }
275    }
276 }
277 
VG_(thread_stack_reset_iter)278 void VG_(thread_stack_reset_iter)(/*OUT*/ThreadId* tid)
279 {
280    *tid = (ThreadId)(-1);
281 }
282 
VG_(thread_stack_next)283 Bool VG_(thread_stack_next)(/*MOD*/ThreadId* tid,
284                             /*OUT*/Addr* stack_min,
285                             /*OUT*/Addr* stack_max)
286 {
287    ThreadId i;
288    for (i = (*tid)+1; i < VG_N_THREADS; i++) {
289       if (i == VG_INVALID_THREADID)
290          continue;
291       if (VG_(threads)[i].status != VgTs_Empty) {
292          *tid       = i;
293          *stack_min = VG_(get_SP)(i);
294          *stack_max = VG_(threads)[i].client_stack_highest_word;
295          return True;
296       }
297    }
298    return False;
299 }
300 
VG_(thread_get_stack_max)301 Addr VG_(thread_get_stack_max)(ThreadId tid)
302 {
303    vg_assert(0 <= tid && tid < VG_N_THREADS && tid != VG_INVALID_THREADID);
304    vg_assert(VG_(threads)[tid].status != VgTs_Empty);
305    return VG_(threads)[tid].client_stack_highest_word;
306 }
307 
VG_(thread_get_stack_size)308 SizeT VG_(thread_get_stack_size)(ThreadId tid)
309 {
310    vg_assert(0 <= tid && tid < VG_N_THREADS && tid != VG_INVALID_THREADID);
311    vg_assert(VG_(threads)[tid].status != VgTs_Empty);
312    return VG_(threads)[tid].client_stack_szB;
313 }
314 
VG_(thread_get_altstack_min)315 Addr VG_(thread_get_altstack_min)(ThreadId tid)
316 {
317    vg_assert(0 <= tid && tid < VG_N_THREADS && tid != VG_INVALID_THREADID);
318    vg_assert(VG_(threads)[tid].status != VgTs_Empty);
319    return (Addr)VG_(threads)[tid].altstack.ss_sp;
320 }
321 
VG_(thread_get_altstack_size)322 SizeT VG_(thread_get_altstack_size)(ThreadId tid)
323 {
324    vg_assert(0 <= tid && tid < VG_N_THREADS && tid != VG_INVALID_THREADID);
325    vg_assert(VG_(threads)[tid].status != VgTs_Empty);
326    return VG_(threads)[tid].altstack.ss_size;
327 }
328 
329 //-------------------------------------------------------------
330 /* Details about the capabilities of the underlying (host) CPU.  These
331    details are acquired by (1) enquiring with the CPU at startup, or
332    (2) from the AT_SYSINFO entries the kernel gave us (ppc32 cache
333    line size).  It's a bit nasty in the sense that there's no obvious
334    way to stop uses of some of this info before it's ready to go.
335 
336    Current dependencies are:
337 
338    x86:   initially:  call VG_(machine_get_hwcaps)
339 
340           then safe to use VG_(machine_get_VexArchInfo)
341                        and VG_(machine_x86_have_mxcsr)
342    -------------
343    amd64: initially:  call VG_(machine_get_hwcaps)
344 
345           then safe to use VG_(machine_get_VexArchInfo)
346    -------------
347    ppc32: initially:  call VG_(machine_get_hwcaps)
348                       call VG_(machine_ppc32_set_clszB)
349 
350           then safe to use VG_(machine_get_VexArchInfo)
351                        and VG_(machine_ppc32_has_FP)
352                        and VG_(machine_ppc32_has_VMX)
353    -------------
354    ppc64: initially:  call VG_(machine_get_hwcaps)
355                       call VG_(machine_ppc64_set_clszB)
356 
357           then safe to use VG_(machine_get_VexArchInfo)
358                        and VG_(machine_ppc64_has_VMX)
359 
360    VG_(machine_get_hwcaps) may use signals (although it attempts to
361    leave signal state unchanged) and therefore should only be
362    called before m_main sets up the client's signal state.
363 */
364 
365 /* --------- State --------- */
366 static Bool hwcaps_done = False;
367 
368 /* --- all archs --- */
369 static VexArch     va;
370 static VexArchInfo vai;
371 
372 #if defined(VGA_x86)
373 UInt VG_(machine_x86_have_mxcsr) = 0;
374 #endif
375 #if defined(VGA_ppc32)
376 UInt VG_(machine_ppc32_has_FP)  = 0;
377 UInt VG_(machine_ppc32_has_VMX) = 0;
378 #endif
379 #if defined(VGA_ppc64)
380 ULong VG_(machine_ppc64_has_VMX) = 0;
381 #endif
382 #if defined(VGA_arm)
383 Int VG_(machine_arm_archlevel) = 4;
384 #endif
385 
386 
387 /* For hwcaps detection on ppc32/64 and arm we'll need to do SIGILL
388    testing, so we need a jmp_buf. */
389 #if defined(VGA_ppc32) || defined(VGA_ppc64) || defined(VGA_arm)
390 #include <setjmp.h> // For jmp_buf
391 static jmp_buf env_unsup_insn;
handler_unsup_insn(Int x)392 static void handler_unsup_insn ( Int x ) { __builtin_longjmp(env_unsup_insn,1); }
393 #endif
394 
395 
396 /* Helper function for VG_(machine_get_hwcaps), assumes the SIGILL/etc
397  * handlers are installed.  Determines the the sizes affected by dcbz
398  * and dcbzl instructions and updates the given VexArchInfo structure
399  * accordingly.
400  *
401  * Not very defensive: assumes that as long as the dcbz/dcbzl
402  * instructions don't raise a SIGILL, that they will zero an aligned,
403  * contiguous block of memory of a sensible size. */
404 #if defined(VGA_ppc32) || defined(VGA_ppc64)
find_ppc_dcbz_sz(VexArchInfo * arch_info)405 static void find_ppc_dcbz_sz(VexArchInfo *arch_info)
406 {
407    Int dcbz_szB = 0;
408    Int dcbzl_szB;
409 #  define MAX_DCBZL_SZB (128) /* largest known effect of dcbzl */
410    char test_block[4*MAX_DCBZL_SZB];
411    char *aligned = test_block;
412    Int i;
413 
414    /* round up to next max block size, assumes MAX_DCBZL_SZB is pof2 */
415    aligned = (char *)(((HWord)aligned + MAX_DCBZL_SZB) & ~(MAX_DCBZL_SZB - 1));
416    vg_assert((aligned + MAX_DCBZL_SZB) <= &test_block[sizeof(test_block)]);
417 
418    /* dcbz often clears 32B, although sometimes whatever the native cache
419     * block size is */
420    VG_(memset)(test_block, 0xff, sizeof(test_block));
421    __asm__ __volatile__("dcbz 0,%0"
422                         : /*out*/
423                         : "r" (aligned) /*in*/
424                         : "memory" /*clobber*/);
425    for (dcbz_szB = 0, i = 0; i < sizeof(test_block); ++i) {
426       if (!test_block[i])
427          ++dcbz_szB;
428    }
429    vg_assert(dcbz_szB == 32 || dcbz_szB == 64 || dcbz_szB == 128);
430 
431    /* dcbzl clears 128B on G5/PPC970, and usually 32B on other platforms */
432    if (__builtin_setjmp(env_unsup_insn)) {
433       dcbzl_szB = 0; /* indicates unsupported */
434    }
435    else {
436       VG_(memset)(test_block, 0xff, sizeof(test_block));
437       /* some older assemblers won't understand the dcbzl instruction
438        * variant, so we directly emit the instruction ourselves */
439       __asm__ __volatile__("mr 9, %0 ; .long 0x7C204FEC" /*dcbzl 0,9*/
440                            : /*out*/
441                            : "r" (aligned) /*in*/
442                            : "memory", "r9" /*clobber*/);
443       for (dcbzl_szB = 0, i = 0; i < sizeof(test_block); ++i) {
444          if (!test_block[i])
445             ++dcbzl_szB;
446       }
447       vg_assert(dcbzl_szB == 32 || dcbzl_szB == 64 || dcbzl_szB == 128);
448    }
449 
450    arch_info->ppc_dcbz_szB  = dcbz_szB;
451    arch_info->ppc_dcbzl_szB = dcbzl_szB;
452 
453    VG_(debugLog)(1, "machine", "dcbz_szB=%d dcbzl_szB=%d\n",
454                  dcbz_szB, dcbzl_szB);
455 #  undef MAX_DCBZL_SZB
456 }
457 #endif /* defined(VGA_ppc32) || defined(VGA_ppc64) */
458 
459 
460 
461 /* Determine what insn set and insn set variant the host has, and
462    record it.  To be called once at system startup.  Returns False if
463    this a CPU incapable of running Valgrind. */
464 
VG_(machine_get_hwcaps)465 Bool VG_(machine_get_hwcaps)( void )
466 {
467    vg_assert(hwcaps_done == False);
468    hwcaps_done = True;
469 
470    // Whack default settings into vai, so that we only need to fill in
471    // any interesting bits.
472    LibVEX_default_VexArchInfo(&vai);
473 
474 #if defined(VGA_x86)
475    { Bool have_sse1, have_sse2, have_cx8, have_lzcnt;
476      UInt eax, ebx, ecx, edx, max_basic, max_extended;
477      UChar vstr[13];
478      vstr[0] = 0;
479 
480      if (!VG_(has_cpuid)())
481         /* we can't do cpuid at all.  Give up. */
482         return False;
483 
484      VG_(cpuid)(0, &eax, &ebx, &ecx, &edx);
485      if (eax < 1)
486         /* we can't ask for cpuid(x) for x > 0.  Give up. */
487         return False;
488 
489      /* Get processor ID string, and max basic/extended index
490         values. */
491      max_basic = eax;
492      VG_(memcpy)(&vstr[0], &ebx, 4);
493      VG_(memcpy)(&vstr[4], &edx, 4);
494      VG_(memcpy)(&vstr[8], &ecx, 4);
495      vstr[12] = 0;
496 
497      VG_(cpuid)(0x80000000, &eax, &ebx, &ecx, &edx);
498      max_extended = eax;
499 
500      /* get capabilities bits into edx */
501      VG_(cpuid)(1, &eax, &ebx, &ecx, &edx);
502 
503      have_sse1 = (edx & (1<<25)) != 0; /* True => have sse insns */
504      have_sse2 = (edx & (1<<26)) != 0; /* True => have sse2 insns */
505 
506      /* cmpxchg8b is a minimum requirement now; if we don't have it we
507         must simply give up.  But all CPUs since Pentium-I have it, so
508         that doesn't seem like much of a restriction. */
509      have_cx8 = (edx & (1<<8)) != 0; /* True => have cmpxchg8b */
510      if (!have_cx8)
511         return False;
512 
513      /* Figure out if this is an AMD that can do LZCNT. */
514      have_lzcnt = False;
515      if (0 == VG_(strcmp)(vstr, "AuthenticAMD")
516          && max_extended >= 0x80000001) {
517         VG_(cpuid)(0x80000001, &eax, &ebx, &ecx, &edx);
518         have_lzcnt = (ecx & (1<<5)) != 0; /* True => have LZCNT */
519      }
520 
521      if (have_sse2 && have_sse1) {
522         va          = VexArchX86;
523         vai.hwcaps  = VEX_HWCAPS_X86_SSE1;
524         vai.hwcaps |= VEX_HWCAPS_X86_SSE2;
525         if (have_lzcnt)
526            vai.hwcaps |= VEX_HWCAPS_X86_LZCNT;
527         VG_(machine_x86_have_mxcsr) = 1;
528         return True;
529      }
530 
531      if (have_sse1) {
532         va          = VexArchX86;
533         vai.hwcaps  = VEX_HWCAPS_X86_SSE1;
534         VG_(machine_x86_have_mxcsr) = 1;
535         return True;
536      }
537 
538      va         = VexArchX86;
539      vai.hwcaps = 0; /*baseline - no sse at all*/
540      VG_(machine_x86_have_mxcsr) = 0;
541      return True;
542    }
543 
544 #elif defined(VGA_amd64)
545    { Bool have_sse1, have_sse2, have_sse3, have_cx8, have_cx16;
546      Bool have_lzcnt;
547      UInt eax, ebx, ecx, edx, max_basic, max_extended;
548      UChar vstr[13];
549      vstr[0] = 0;
550 
551      if (!VG_(has_cpuid)())
552         /* we can't do cpuid at all.  Give up. */
553         return False;
554 
555      VG_(cpuid)(0, &eax, &ebx, &ecx, &edx);
556      if (eax < 1)
557         /* we can't ask for cpuid(x) for x > 0.  Give up. */
558         return False;
559 
560      /* Get processor ID string, and max basic/extended index
561         values. */
562       max_basic = eax;
563      VG_(memcpy)(&vstr[0], &ebx, 4);
564      VG_(memcpy)(&vstr[4], &edx, 4);
565      VG_(memcpy)(&vstr[8], &ecx, 4);
566      vstr[12] = 0;
567 
568      VG_(cpuid)(0x80000000, &eax, &ebx, &ecx, &edx);
569      max_extended = eax;
570 
571      /* get capabilities bits into edx */
572      VG_(cpuid)(1, &eax, &ebx, &ecx, &edx);
573 
574      have_sse1 = (edx & (1<<25)) != 0; /* True => have sse insns */
575      have_sse2 = (edx & (1<<26)) != 0; /* True => have sse2 insns */
576      have_sse3 = (ecx & (1<<0)) != 0;  /* True => have sse3 insns */
577      // ssse3  is ecx:9
578      // sse41  is ecx:19
579      // sse42  is ecx:20
580 
581      /* cmpxchg8b is a minimum requirement now; if we don't have it we
582         must simply give up.  But all CPUs since Pentium-I have it, so
583         that doesn't seem like much of a restriction. */
584      have_cx8 = (edx & (1<<8)) != 0; /* True => have cmpxchg8b */
585      if (!have_cx8)
586         return False;
587 
588      /* on amd64 we tolerate older cpus, which don't have cmpxchg16b */
589      have_cx16 = (ecx & (1<<13)) != 0; /* True => have cmpxchg16b */
590 
591      /* Figure out if this is an AMD that can do LZCNT. */
592      have_lzcnt = False;
593      if (0 == VG_(strcmp)(vstr, "AuthenticAMD")
594          && max_extended >= 0x80000001) {
595         VG_(cpuid)(0x80000001, &eax, &ebx, &ecx, &edx);
596         have_lzcnt = (ecx & (1<<5)) != 0; /* True => have LZCNT */
597      }
598 
599      va         = VexArchAMD64;
600      vai.hwcaps = (have_sse3 ? VEX_HWCAPS_AMD64_SSE3 : 0)
601                   | (have_cx16 ? VEX_HWCAPS_AMD64_CX16 : 0)
602                   | (have_lzcnt ? VEX_HWCAPS_AMD64_LZCNT : 0);
603      return True;
604    }
605 
606 #elif defined(VGA_ppc32)
607    {
608      /* Find out which subset of the ppc32 instruction set is supported by
609         verifying whether various ppc32 instructions generate a SIGILL
610         or a SIGFPE. An alternative approach is to check the AT_HWCAP and
611         AT_PLATFORM entries in the ELF auxiliary table -- see also
612         the_iifii.client_auxv in m_main.c.
613       */
614      vki_sigset_t          saved_set, tmp_set;
615      vki_sigaction_fromK_t saved_sigill_act, saved_sigfpe_act;
616      vki_sigaction_toK_t     tmp_sigill_act,   tmp_sigfpe_act;
617 
618      volatile Bool have_F, have_V, have_FX, have_GX;
619      Int r;
620 
621      /* This is a kludge.  Really we ought to back-convert saved_act
622         into a toK_t using VG_(convert_sigaction_fromK_to_toK), but
623         since that's a no-op on all ppc32 platforms so far supported,
624         it's not worth the typing effort.  At least include most basic
625         sanity check: */
626      vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t));
627 
628      VG_(sigemptyset)(&tmp_set);
629      VG_(sigaddset)(&tmp_set, VKI_SIGILL);
630      VG_(sigaddset)(&tmp_set, VKI_SIGFPE);
631 
632      r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
633      vg_assert(r == 0);
634 
635      r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
636      vg_assert(r == 0);
637      tmp_sigill_act = saved_sigill_act;
638 
639      r = VG_(sigaction)(VKI_SIGFPE, NULL, &saved_sigfpe_act);
640      vg_assert(r == 0);
641      tmp_sigfpe_act = saved_sigfpe_act;
642 
643      /* NODEFER: signal handler does not return (from the kernel's point of
644         view), hence if it is to successfully catch a signal more than once,
645         we need the NODEFER flag. */
646      tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
647      tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
648      tmp_sigill_act.sa_flags |=  VKI_SA_NODEFER;
649      tmp_sigill_act.ksa_handler = handler_unsup_insn;
650      r = VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
651      vg_assert(r == 0);
652 
653      tmp_sigfpe_act.sa_flags &= ~VKI_SA_RESETHAND;
654      tmp_sigfpe_act.sa_flags &= ~VKI_SA_SIGINFO;
655      tmp_sigfpe_act.sa_flags |=  VKI_SA_NODEFER;
656      tmp_sigfpe_act.ksa_handler = handler_unsup_insn;
657      r = VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL);
658      vg_assert(r == 0);
659 
660      /* standard FP insns */
661      have_F = True;
662      if (__builtin_setjmp(env_unsup_insn)) {
663         have_F = False;
664      } else {
665         __asm__ __volatile__(".long 0xFC000090"); /*fmr 0,0 */
666      }
667 
668      /* Altivec insns */
669      have_V = True;
670      if (__builtin_setjmp(env_unsup_insn)) {
671         have_V = False;
672      } else {
673         /* Unfortunately some older assemblers don't speak Altivec (or
674            choose not to), so to be safe we directly emit the 32-bit
675            word corresponding to "vor 0,0,0".  This fixes a build
676            problem that happens on Debian 3.1 (ppc32), and probably
677            various other places. */
678         __asm__ __volatile__(".long 0x10000484"); /*vor 0,0,0*/
679      }
680 
681      /* General-Purpose optional (fsqrt, fsqrts) */
682      have_FX = True;
683      if (__builtin_setjmp(env_unsup_insn)) {
684         have_FX = False;
685      } else {
686         __asm__ __volatile__(".long 0xFC00002C"); /*fsqrt 0,0 */
687      }
688 
689      /* Graphics optional (stfiwx, fres, frsqrte, fsel) */
690      have_GX = True;
691      if (__builtin_setjmp(env_unsup_insn)) {
692         have_GX = False;
693      } else {
694         __asm__ __volatile__(".long 0xFC000034"); /* frsqrte 0,0 */
695      }
696 
697      /* determine dcbz/dcbzl sizes while we still have the signal
698       * handlers registered */
699      find_ppc_dcbz_sz(&vai);
700 
701      r = VG_(sigaction)(VKI_SIGILL, &saved_sigill_act, NULL);
702      vg_assert(r == 0);
703      r = VG_(sigaction)(VKI_SIGFPE, &saved_sigfpe_act, NULL);
704      vg_assert(r == 0);
705      r = VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
706      vg_assert(r == 0);
707      VG_(debugLog)(1, "machine", "F %d V %d FX %d GX %d\n",
708                     (Int)have_F, (Int)have_V, (Int)have_FX, (Int)have_GX);
709      /* Make FP a prerequisite for VMX (bogusly so), and for FX and GX. */
710      if (have_V && !have_F)
711         have_V = False;
712      if (have_FX && !have_F)
713         have_FX = False;
714      if (have_GX && !have_F)
715         have_GX = False;
716 
717      VG_(machine_ppc32_has_FP)  = have_F ? 1 : 0;
718      VG_(machine_ppc32_has_VMX) = have_V ? 1 : 0;
719 
720      va = VexArchPPC32;
721 
722      vai.hwcaps = 0;
723      if (have_F)  vai.hwcaps |= VEX_HWCAPS_PPC32_F;
724      if (have_V)  vai.hwcaps |= VEX_HWCAPS_PPC32_V;
725      if (have_FX) vai.hwcaps |= VEX_HWCAPS_PPC32_FX;
726      if (have_GX) vai.hwcaps |= VEX_HWCAPS_PPC32_GX;
727 
728      /* But we're not done yet: VG_(machine_ppc32_set_clszB) must be
729         called before we're ready to go. */
730      return True;
731    }
732 
733 #elif defined(VGA_ppc64)
734    {
735      /* Same instruction set detection algorithm as for ppc32. */
736      vki_sigset_t          saved_set, tmp_set;
737      vki_sigaction_fromK_t saved_sigill_act, saved_sigfpe_act;
738      vki_sigaction_toK_t     tmp_sigill_act,   tmp_sigfpe_act;
739 
740      volatile Bool have_F, have_V, have_FX, have_GX;
741      Int r;
742 
743      /* This is a kludge.  Really we ought to back-convert saved_act
744         into a toK_t using VG_(convert_sigaction_fromK_to_toK), but
745         since that's a no-op on all ppc64 platforms so far supported,
746         it's not worth the typing effort.  At least include most basic
747         sanity check: */
748      vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t));
749 
750      VG_(sigemptyset)(&tmp_set);
751      VG_(sigaddset)(&tmp_set, VKI_SIGILL);
752      VG_(sigaddset)(&tmp_set, VKI_SIGFPE);
753 
754      r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
755      vg_assert(r == 0);
756 
757      r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
758      vg_assert(r == 0);
759      tmp_sigill_act = saved_sigill_act;
760 
761      VG_(sigaction)(VKI_SIGFPE, NULL, &saved_sigfpe_act);
762      tmp_sigfpe_act = saved_sigfpe_act;
763 
764      /* NODEFER: signal handler does not return (from the kernel's point of
765         view), hence if it is to successfully catch a signal more than once,
766         we need the NODEFER flag. */
767      tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
768      tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
769      tmp_sigill_act.sa_flags |=  VKI_SA_NODEFER;
770      tmp_sigill_act.ksa_handler = handler_unsup_insn;
771      VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
772 
773      tmp_sigfpe_act.sa_flags &= ~VKI_SA_RESETHAND;
774      tmp_sigfpe_act.sa_flags &= ~VKI_SA_SIGINFO;
775      tmp_sigfpe_act.sa_flags |=  VKI_SA_NODEFER;
776      tmp_sigfpe_act.ksa_handler = handler_unsup_insn;
777      VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL);
778 
779      /* standard FP insns */
780      have_F = True;
781      if (__builtin_setjmp(env_unsup_insn)) {
782         have_F = False;
783      } else {
784         __asm__ __volatile__("fmr 0,0");
785      }
786 
787      /* Altivec insns */
788      have_V = True;
789      if (__builtin_setjmp(env_unsup_insn)) {
790         have_V = False;
791      } else {
792         __asm__ __volatile__(".long 0x10000484"); /*vor 0,0,0*/
793      }
794 
795      /* General-Purpose optional (fsqrt, fsqrts) */
796      have_FX = True;
797      if (__builtin_setjmp(env_unsup_insn)) {
798         have_FX = False;
799      } else {
800         __asm__ __volatile__(".long 0xFC00002C"); /*fsqrt 0,0*/
801      }
802 
803      /* Graphics optional (stfiwx, fres, frsqrte, fsel) */
804      have_GX = True;
805      if (__builtin_setjmp(env_unsup_insn)) {
806         have_GX = False;
807      } else {
808         __asm__ __volatile__(".long 0xFC000034"); /*frsqrte 0,0*/
809      }
810 
811      /* determine dcbz/dcbzl sizes while we still have the signal
812       * handlers registered */
813      find_ppc_dcbz_sz(&vai);
814 
815      VG_(sigaction)(VKI_SIGILL, &saved_sigill_act, NULL);
816      VG_(sigaction)(VKI_SIGFPE, &saved_sigfpe_act, NULL);
817      VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
818      VG_(debugLog)(1, "machine", "F %d V %d FX %d GX %d\n",
819                     (Int)have_F, (Int)have_V, (Int)have_FX, (Int)have_GX);
820      /* on ppc64, if we don't even have FP, just give up. */
821      if (!have_F)
822         return False;
823 
824      VG_(machine_ppc64_has_VMX) = have_V ? 1 : 0;
825 
826      va = VexArchPPC64;
827 
828      vai.hwcaps = 0;
829      if (have_V)  vai.hwcaps |= VEX_HWCAPS_PPC64_V;
830      if (have_FX) vai.hwcaps |= VEX_HWCAPS_PPC64_FX;
831      if (have_GX) vai.hwcaps |= VEX_HWCAPS_PPC64_GX;
832 
833      /* But we're not done yet: VG_(machine_ppc64_set_clszB) must be
834         called before we're ready to go. */
835      return True;
836    }
837 
838 #elif defined(VGA_arm)
839    {
840      /* Same instruction set detection algorithm as for ppc32. */
841      vki_sigset_t          saved_set, tmp_set;
842      vki_sigaction_fromK_t saved_sigill_act, saved_sigfpe_act;
843      vki_sigaction_toK_t     tmp_sigill_act,   tmp_sigfpe_act;
844 
845      volatile Bool have_VFP, have_VFP2, have_VFP3, have_NEON;
846      volatile Int archlevel;
847      Int r;
848 
849      /* This is a kludge.  Really we ought to back-convert saved_act
850         into a toK_t using VG_(convert_sigaction_fromK_to_toK), but
851         since that's a no-op on all ppc64 platforms so far supported,
852         it's not worth the typing effort.  At least include most basic
853         sanity check: */
854      vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t));
855 
856      VG_(sigemptyset)(&tmp_set);
857      VG_(sigaddset)(&tmp_set, VKI_SIGILL);
858      VG_(sigaddset)(&tmp_set, VKI_SIGFPE);
859 
860      r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
861      vg_assert(r == 0);
862 
863      r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
864      vg_assert(r == 0);
865      tmp_sigill_act = saved_sigill_act;
866 
867      VG_(sigaction)(VKI_SIGFPE, NULL, &saved_sigfpe_act);
868      tmp_sigfpe_act = saved_sigfpe_act;
869 
870      /* NODEFER: signal handler does not return (from the kernel's point of
871         view), hence if it is to successfully catch a signal more than once,
872         we need the NODEFER flag. */
873      tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
874      tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
875      tmp_sigill_act.sa_flags |=  VKI_SA_NODEFER;
876      tmp_sigill_act.ksa_handler = handler_unsup_insn;
877      VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
878 
879      tmp_sigfpe_act.sa_flags &= ~VKI_SA_RESETHAND;
880      tmp_sigfpe_act.sa_flags &= ~VKI_SA_SIGINFO;
881      tmp_sigfpe_act.sa_flags |=  VKI_SA_NODEFER;
882      tmp_sigfpe_act.ksa_handler = handler_unsup_insn;
883      VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL);
884 
885      /* VFP insns */
886      have_VFP = True;
887      if (__builtin_setjmp(env_unsup_insn)) {
888         have_VFP = False;
889      } else {
890         __asm__ __volatile__(".word 0xEEB02B42"); /* VMOV.F64 d2, d2 */
891      }
892      /* There are several generation of VFP extension but they differs very
893         little so for now we will not distinguish them. */
894      have_VFP2 = have_VFP;
895      have_VFP3 = have_VFP;
896 
897      /* NEON insns */
898      have_NEON = True;
899      if (__builtin_setjmp(env_unsup_insn)) {
900         have_NEON = False;
901      } else {
902         __asm__ __volatile__(".word 0xF2244154"); /* VMOV q2, q2 */
903      }
904 
905      /* ARM architecture level */
906      archlevel = 5; /* v5 will be base level */
907      if (archlevel < 7) {
908         archlevel = 7;
909         if (__builtin_setjmp(env_unsup_insn)) {
910            archlevel = 5;
911         } else {
912            __asm__ __volatile__(".word 0xF45FF000"); /* PLI [PC,#-0] */
913         }
914      }
915      if (archlevel < 6) {
916         archlevel = 6;
917         if (__builtin_setjmp(env_unsup_insn)) {
918            archlevel = 5;
919         } else {
920            __asm__ __volatile__(".word 0xE6822012"); /* PKHBT r2, r2, r2 */
921         }
922      }
923 
924      VG_(convert_sigaction_fromK_to_toK)(&saved_sigill_act, &tmp_sigill_act);
925      VG_(convert_sigaction_fromK_to_toK)(&saved_sigfpe_act, &tmp_sigfpe_act);
926      VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
927      VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL);
928      VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
929 
930      VG_(debugLog)(1, "machine", "ARMv%d VFP %d VFP2 %d VFP3 %d NEON %d\n",
931            archlevel, (Int)have_VFP, (Int)have_VFP2, (Int)have_VFP3,
932            (Int)have_NEON);
933 
934      VG_(machine_arm_archlevel) = archlevel;
935 
936      va = VexArchARM;
937 
938      vai.hwcaps = VEX_ARM_ARCHLEVEL(archlevel);
939      if (have_VFP3) vai.hwcaps |= VEX_HWCAPS_ARM_VFP3;
940      if (have_VFP2) vai.hwcaps |= VEX_HWCAPS_ARM_VFP2;
941      if (have_VFP)  vai.hwcaps |= VEX_HWCAPS_ARM_VFP;
942      if (have_NEON) vai.hwcaps |= VEX_HWCAPS_ARM_NEON;
943 
944      return True;
945    }
946 
947 #else
948 #  error "Unknown arch"
949 #endif
950 }
951 
952 /* Notify host cpu cache line size. */
953 #if defined(VGA_ppc32)
VG_(machine_ppc32_set_clszB)954 void VG_(machine_ppc32_set_clszB)( Int szB )
955 {
956    vg_assert(hwcaps_done);
957 
958    /* Either the value must not have been set yet (zero) or we can
959       tolerate it being set to the same value multiple times, as the
960       stack scanning logic in m_main is a bit stupid. */
961    vg_assert(vai.ppc_cache_line_szB == 0
962              || vai.ppc_cache_line_szB == szB);
963 
964    vg_assert(szB == 32 || szB == 64 || szB == 128);
965    vai.ppc_cache_line_szB = szB;
966 }
967 #endif
968 
969 
970 /* Notify host cpu cache line size. */
971 #if defined(VGA_ppc64)
VG_(machine_ppc64_set_clszB)972 void VG_(machine_ppc64_set_clszB)( Int szB )
973 {
974    vg_assert(hwcaps_done);
975 
976    /* Either the value must not have been set yet (zero) or we can
977       tolerate it being set to the same value multiple times, as the
978       stack scanning logic in m_main is a bit stupid. */
979    vg_assert(vai.ppc_cache_line_szB == 0
980              || vai.ppc_cache_line_szB == szB);
981 
982    vg_assert(szB == 32 || szB == 64 || szB == 128);
983    vai.ppc_cache_line_szB = szB;
984 }
985 #endif
986 
987 
988 /* Notify host's ability to handle NEON instructions. */
989 #if defined(VGA_arm)
VG_(machine_arm_set_has_NEON)990 void VG_(machine_arm_set_has_NEON)( Bool has_neon )
991 {
992    vg_assert(hwcaps_done);
993    /* There's nothing else we can sanity check. */
994 
995    if (has_neon) {
996       vai.hwcaps |= VEX_HWCAPS_ARM_NEON;
997    } else {
998       vai.hwcaps &= ~VEX_HWCAPS_ARM_NEON;
999    }
1000 }
1001 #endif
1002 
1003 
1004 /* Fetch host cpu info, once established. */
VG_(machine_get_VexArchInfo)1005 void VG_(machine_get_VexArchInfo)( /*OUT*/VexArch* pVa,
1006                                    /*OUT*/VexArchInfo* pVai )
1007 {
1008    vg_assert(hwcaps_done);
1009    if (pVa)  *pVa  = va;
1010    if (pVai) *pVai = vai;
1011 }
1012 
1013 
1014 // Given a pointer to a function as obtained by "& functionname" in C,
1015 // produce a pointer to the actual entry point for the function.
VG_(fnptr_to_fnentry)1016 void* VG_(fnptr_to_fnentry)( void* f )
1017 {
1018 #if defined(VGP_x86_linux) || defined(VGP_amd64_linux)  \
1019     || defined(VGP_arm_linux)                           \
1020     || defined(VGP_ppc32_linux) || defined(VGO_darwin)
1021    return f;
1022 #elif defined(VGP_ppc64_linux) || defined(VGP_ppc32_aix5) \
1023                                || defined(VGP_ppc64_aix5)
1024    /* All other ppc variants use the AIX scheme, in which f is a
1025       pointer to a 3-word function descriptor, of which the first word
1026       is the entry address. */
1027    UWord* descr = (UWord*)f;
1028    return (void*)(descr[0]);
1029 #else
1030 #  error "Unknown platform"
1031 #endif
1032 }
1033 
1034 /*--------------------------------------------------------------------*/
1035 /*--- end                                                          ---*/
1036 /*--------------------------------------------------------------------*/
1037