1
2 /*--------------------------------------------------------------------*/
3 /*--- Machine-related stuff. m_machine.c ---*/
4 /*--------------------------------------------------------------------*/
5
6 /*
7 This file is part of Valgrind, a dynamic binary instrumentation
8 framework.
9
10 Copyright (C) 2000-2010 Julian Seward
11 jseward@acm.org
12
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
17
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
26 02111-1307, USA.
27
28 The GNU General Public License is contained in the file COPYING.
29 */
30
31 #include "pub_core_basics.h"
32 #include "pub_core_vki.h"
33 #include "pub_core_threadstate.h"
34 #include "pub_core_libcassert.h"
35 #include "pub_core_libcbase.h"
36 #include "pub_core_machine.h"
37 #include "pub_core_cpuid.h"
38 #include "pub_core_libcsignal.h" // for ppc32 messing with SIGILL and SIGFPE
39 #include "pub_core_debuglog.h"
40
41
42 #define INSTR_PTR(regs) ((regs).vex.VG_INSTR_PTR)
43 #define STACK_PTR(regs) ((regs).vex.VG_STACK_PTR)
44 #define FRAME_PTR(regs) ((regs).vex.VG_FRAME_PTR)
45
VG_(get_IP)46 Addr VG_(get_IP) ( ThreadId tid ) {
47 return INSTR_PTR( VG_(threads)[tid].arch );
48 }
VG_(get_SP)49 Addr VG_(get_SP) ( ThreadId tid ) {
50 return STACK_PTR( VG_(threads)[tid].arch );
51 }
VG_(get_FP)52 Addr VG_(get_FP) ( ThreadId tid ) {
53 return FRAME_PTR( VG_(threads)[tid].arch );
54 }
55
VG_(set_IP)56 void VG_(set_IP) ( ThreadId tid, Addr ip ) {
57 INSTR_PTR( VG_(threads)[tid].arch ) = ip;
58 }
VG_(set_SP)59 void VG_(set_SP) ( ThreadId tid, Addr sp ) {
60 STACK_PTR( VG_(threads)[tid].arch ) = sp;
61 }
62
VG_(get_UnwindStartRegs)63 void VG_(get_UnwindStartRegs) ( /*OUT*/UnwindStartRegs* regs,
64 ThreadId tid )
65 {
66 # if defined(VGA_x86)
67 regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_EIP;
68 regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_ESP;
69 regs->misc.X86.r_ebp
70 = VG_(threads)[tid].arch.vex.guest_EBP;
71 # elif defined(VGA_amd64)
72 regs->r_pc = VG_(threads)[tid].arch.vex.guest_RIP;
73 regs->r_sp = VG_(threads)[tid].arch.vex.guest_RSP;
74 regs->misc.AMD64.r_rbp
75 = VG_(threads)[tid].arch.vex.guest_RBP;
76 # elif defined(VGA_ppc32)
77 regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_CIA;
78 regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_GPR1;
79 regs->misc.PPC32.r_lr
80 = VG_(threads)[tid].arch.vex.guest_LR;
81 # elif defined(VGA_ppc64)
82 regs->r_pc = VG_(threads)[tid].arch.vex.guest_CIA;
83 regs->r_sp = VG_(threads)[tid].arch.vex.guest_GPR1;
84 regs->misc.PPC64.r_lr
85 = VG_(threads)[tid].arch.vex.guest_LR;
86 # elif defined(VGA_arm)
87 regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_R15T;
88 regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_R13;
89 regs->misc.ARM.r14
90 = VG_(threads)[tid].arch.vex.guest_R14;
91 regs->misc.ARM.r12
92 = VG_(threads)[tid].arch.vex.guest_R12;
93 regs->misc.ARM.r11
94 = VG_(threads)[tid].arch.vex.guest_R11;
95 regs->misc.ARM.r7
96 = VG_(threads)[tid].arch.vex.guest_R7;
97 # else
98 # error "Unknown arch"
99 # endif
100 }
101
102
VG_(set_syscall_return_shadows)103 void VG_(set_syscall_return_shadows) ( ThreadId tid,
104 /* shadow vals for the result */
105 UWord s1res, UWord s2res,
106 /* shadow vals for the error val */
107 UWord s1err, UWord s2err )
108 {
109 # if defined(VGP_x86_linux)
110 VG_(threads)[tid].arch.vex_shadow1.guest_EAX = s1res;
111 VG_(threads)[tid].arch.vex_shadow2.guest_EAX = s2res;
112 # elif defined(VGP_amd64_linux)
113 VG_(threads)[tid].arch.vex_shadow1.guest_RAX = s1res;
114 VG_(threads)[tid].arch.vex_shadow2.guest_RAX = s2res;
115 # elif defined(VGP_ppc32_linux) || defined(VGP_ppc64_linux)
116 VG_(threads)[tid].arch.vex_shadow1.guest_GPR3 = s1res;
117 VG_(threads)[tid].arch.vex_shadow2.guest_GPR3 = s2res;
118 # elif defined(VGP_arm_linux)
119 VG_(threads)[tid].arch.vex_shadow1.guest_R0 = s1res;
120 VG_(threads)[tid].arch.vex_shadow2.guest_R0 = s2res;
121 # elif defined(VGP_ppc32_aix5) || defined(VGP_ppc64_aix5)
122 VG_(threads)[tid].arch.vex_shadow1.guest_GPR3 = s1res;
123 VG_(threads)[tid].arch.vex_shadow2.guest_GPR3 = s2res;
124 VG_(threads)[tid].arch.vex_shadow1.guest_GPR4 = s1err;
125 VG_(threads)[tid].arch.vex_shadow2.guest_GPR4 = s2err;
126 # elif defined(VGO_darwin)
127 // GrP fixme darwin syscalls may return more values (2 registers plus error)
128 # else
129 # error "Unknown plat"
130 # endif
131 }
132
133 void
VG_(get_shadow_regs_area)134 VG_(get_shadow_regs_area) ( ThreadId tid,
135 /*DST*/UChar* dst,
136 /*SRC*/Int shadowNo, PtrdiffT offset, SizeT size )
137 {
138 void* src;
139 ThreadState* tst;
140 vg_assert(shadowNo == 0 || shadowNo == 1 || shadowNo == 2);
141 vg_assert(VG_(is_valid_tid)(tid));
142 // Bounds check
143 vg_assert(0 <= offset && offset < sizeof(VexGuestArchState));
144 vg_assert(offset + size <= sizeof(VexGuestArchState));
145 // Copy
146 tst = & VG_(threads)[tid];
147 src = NULL;
148 switch (shadowNo) {
149 case 0: src = (void*)(((Addr)&(tst->arch.vex)) + offset); break;
150 case 1: src = (void*)(((Addr)&(tst->arch.vex_shadow1)) + offset); break;
151 case 2: src = (void*)(((Addr)&(tst->arch.vex_shadow2)) + offset); break;
152 }
153 tl_assert(src != NULL);
154 VG_(memcpy)( dst, src, size);
155 }
156
157 void
VG_(set_shadow_regs_area)158 VG_(set_shadow_regs_area) ( ThreadId tid,
159 /*DST*/Int shadowNo, PtrdiffT offset, SizeT size,
160 /*SRC*/const UChar* src )
161 {
162 void* dst;
163 ThreadState* tst;
164 vg_assert(shadowNo == 0 || shadowNo == 1 || shadowNo == 2);
165 vg_assert(VG_(is_valid_tid)(tid));
166 // Bounds check
167 vg_assert(0 <= offset && offset < sizeof(VexGuestArchState));
168 vg_assert(offset + size <= sizeof(VexGuestArchState));
169 // Copy
170 tst = & VG_(threads)[tid];
171 dst = NULL;
172 switch (shadowNo) {
173 case 0: dst = (void*)(((Addr)&(tst->arch.vex)) + offset); break;
174 case 1: dst = (void*)(((Addr)&(tst->arch.vex_shadow1)) + offset); break;
175 case 2: dst = (void*)(((Addr)&(tst->arch.vex_shadow2)) + offset); break;
176 }
177 tl_assert(dst != NULL);
178 VG_(memcpy)( dst, src, size);
179 }
180
181
apply_to_GPs_of_tid(VexGuestArchState * vex,void (* f)(Addr))182 static void apply_to_GPs_of_tid(VexGuestArchState* vex, void (*f)(Addr))
183 {
184 #if defined(VGA_x86)
185 (*f)(vex->guest_EAX);
186 (*f)(vex->guest_ECX);
187 (*f)(vex->guest_EDX);
188 (*f)(vex->guest_EBX);
189 (*f)(vex->guest_ESI);
190 (*f)(vex->guest_EDI);
191 (*f)(vex->guest_ESP);
192 (*f)(vex->guest_EBP);
193 #elif defined(VGA_amd64)
194 (*f)(vex->guest_RAX);
195 (*f)(vex->guest_RCX);
196 (*f)(vex->guest_RDX);
197 (*f)(vex->guest_RBX);
198 (*f)(vex->guest_RSI);
199 (*f)(vex->guest_RDI);
200 (*f)(vex->guest_RSP);
201 (*f)(vex->guest_RBP);
202 (*f)(vex->guest_R8);
203 (*f)(vex->guest_R9);
204 (*f)(vex->guest_R10);
205 (*f)(vex->guest_R11);
206 (*f)(vex->guest_R12);
207 (*f)(vex->guest_R13);
208 (*f)(vex->guest_R14);
209 (*f)(vex->guest_R15);
210 #elif defined(VGA_ppc32) || defined(VGA_ppc64)
211 (*f)(vex->guest_GPR0);
212 (*f)(vex->guest_GPR1);
213 (*f)(vex->guest_GPR2);
214 (*f)(vex->guest_GPR3);
215 (*f)(vex->guest_GPR4);
216 (*f)(vex->guest_GPR5);
217 (*f)(vex->guest_GPR6);
218 (*f)(vex->guest_GPR7);
219 (*f)(vex->guest_GPR8);
220 (*f)(vex->guest_GPR9);
221 (*f)(vex->guest_GPR10);
222 (*f)(vex->guest_GPR11);
223 (*f)(vex->guest_GPR12);
224 (*f)(vex->guest_GPR13);
225 (*f)(vex->guest_GPR14);
226 (*f)(vex->guest_GPR15);
227 (*f)(vex->guest_GPR16);
228 (*f)(vex->guest_GPR17);
229 (*f)(vex->guest_GPR18);
230 (*f)(vex->guest_GPR19);
231 (*f)(vex->guest_GPR20);
232 (*f)(vex->guest_GPR21);
233 (*f)(vex->guest_GPR22);
234 (*f)(vex->guest_GPR23);
235 (*f)(vex->guest_GPR24);
236 (*f)(vex->guest_GPR25);
237 (*f)(vex->guest_GPR26);
238 (*f)(vex->guest_GPR27);
239 (*f)(vex->guest_GPR28);
240 (*f)(vex->guest_GPR29);
241 (*f)(vex->guest_GPR30);
242 (*f)(vex->guest_GPR31);
243 (*f)(vex->guest_CTR);
244 (*f)(vex->guest_LR);
245 #elif defined(VGA_arm)
246 (*f)(vex->guest_R0);
247 (*f)(vex->guest_R1);
248 (*f)(vex->guest_R2);
249 (*f)(vex->guest_R3);
250 (*f)(vex->guest_R4);
251 (*f)(vex->guest_R5);
252 (*f)(vex->guest_R6);
253 (*f)(vex->guest_R8);
254 (*f)(vex->guest_R9);
255 (*f)(vex->guest_R10);
256 (*f)(vex->guest_R11);
257 (*f)(vex->guest_R12);
258 (*f)(vex->guest_R13);
259 (*f)(vex->guest_R14);
260 #else
261 # error Unknown arch
262 #endif
263 }
264
265
VG_(apply_to_GP_regs)266 void VG_(apply_to_GP_regs)(void (*f)(UWord))
267 {
268 ThreadId tid;
269
270 for (tid = 1; tid < VG_N_THREADS; tid++) {
271 if (VG_(is_valid_tid)(tid)) {
272 ThreadState* tst = VG_(get_ThreadState)(tid);
273 apply_to_GPs_of_tid(&(tst->arch.vex), f);
274 }
275 }
276 }
277
VG_(thread_stack_reset_iter)278 void VG_(thread_stack_reset_iter)(/*OUT*/ThreadId* tid)
279 {
280 *tid = (ThreadId)(-1);
281 }
282
VG_(thread_stack_next)283 Bool VG_(thread_stack_next)(/*MOD*/ThreadId* tid,
284 /*OUT*/Addr* stack_min,
285 /*OUT*/Addr* stack_max)
286 {
287 ThreadId i;
288 for (i = (*tid)+1; i < VG_N_THREADS; i++) {
289 if (i == VG_INVALID_THREADID)
290 continue;
291 if (VG_(threads)[i].status != VgTs_Empty) {
292 *tid = i;
293 *stack_min = VG_(get_SP)(i);
294 *stack_max = VG_(threads)[i].client_stack_highest_word;
295 return True;
296 }
297 }
298 return False;
299 }
300
VG_(thread_get_stack_max)301 Addr VG_(thread_get_stack_max)(ThreadId tid)
302 {
303 vg_assert(0 <= tid && tid < VG_N_THREADS && tid != VG_INVALID_THREADID);
304 vg_assert(VG_(threads)[tid].status != VgTs_Empty);
305 return VG_(threads)[tid].client_stack_highest_word;
306 }
307
VG_(thread_get_stack_size)308 SizeT VG_(thread_get_stack_size)(ThreadId tid)
309 {
310 vg_assert(0 <= tid && tid < VG_N_THREADS && tid != VG_INVALID_THREADID);
311 vg_assert(VG_(threads)[tid].status != VgTs_Empty);
312 return VG_(threads)[tid].client_stack_szB;
313 }
314
VG_(thread_get_altstack_min)315 Addr VG_(thread_get_altstack_min)(ThreadId tid)
316 {
317 vg_assert(0 <= tid && tid < VG_N_THREADS && tid != VG_INVALID_THREADID);
318 vg_assert(VG_(threads)[tid].status != VgTs_Empty);
319 return (Addr)VG_(threads)[tid].altstack.ss_sp;
320 }
321
VG_(thread_get_altstack_size)322 SizeT VG_(thread_get_altstack_size)(ThreadId tid)
323 {
324 vg_assert(0 <= tid && tid < VG_N_THREADS && tid != VG_INVALID_THREADID);
325 vg_assert(VG_(threads)[tid].status != VgTs_Empty);
326 return VG_(threads)[tid].altstack.ss_size;
327 }
328
329 //-------------------------------------------------------------
330 /* Details about the capabilities of the underlying (host) CPU. These
331 details are acquired by (1) enquiring with the CPU at startup, or
332 (2) from the AT_SYSINFO entries the kernel gave us (ppc32 cache
333 line size). It's a bit nasty in the sense that there's no obvious
334 way to stop uses of some of this info before it's ready to go.
335
336 Current dependencies are:
337
338 x86: initially: call VG_(machine_get_hwcaps)
339
340 then safe to use VG_(machine_get_VexArchInfo)
341 and VG_(machine_x86_have_mxcsr)
342 -------------
343 amd64: initially: call VG_(machine_get_hwcaps)
344
345 then safe to use VG_(machine_get_VexArchInfo)
346 -------------
347 ppc32: initially: call VG_(machine_get_hwcaps)
348 call VG_(machine_ppc32_set_clszB)
349
350 then safe to use VG_(machine_get_VexArchInfo)
351 and VG_(machine_ppc32_has_FP)
352 and VG_(machine_ppc32_has_VMX)
353 -------------
354 ppc64: initially: call VG_(machine_get_hwcaps)
355 call VG_(machine_ppc64_set_clszB)
356
357 then safe to use VG_(machine_get_VexArchInfo)
358 and VG_(machine_ppc64_has_VMX)
359
360 VG_(machine_get_hwcaps) may use signals (although it attempts to
361 leave signal state unchanged) and therefore should only be
362 called before m_main sets up the client's signal state.
363 */
364
365 /* --------- State --------- */
366 static Bool hwcaps_done = False;
367
368 /* --- all archs --- */
369 static VexArch va;
370 static VexArchInfo vai;
371
372 #if defined(VGA_x86)
373 UInt VG_(machine_x86_have_mxcsr) = 0;
374 #endif
375 #if defined(VGA_ppc32)
376 UInt VG_(machine_ppc32_has_FP) = 0;
377 UInt VG_(machine_ppc32_has_VMX) = 0;
378 #endif
379 #if defined(VGA_ppc64)
380 ULong VG_(machine_ppc64_has_VMX) = 0;
381 #endif
382 #if defined(VGA_arm)
383 Int VG_(machine_arm_archlevel) = 4;
384 #endif
385
386
387 /* For hwcaps detection on ppc32/64 and arm we'll need to do SIGILL
388 testing, so we need a jmp_buf. */
389 #if defined(VGA_ppc32) || defined(VGA_ppc64) || defined(VGA_arm)
390 #include <setjmp.h> // For jmp_buf
391 static jmp_buf env_unsup_insn;
handler_unsup_insn(Int x)392 static void handler_unsup_insn ( Int x ) { __builtin_longjmp(env_unsup_insn,1); }
393 #endif
394
395
396 /* Helper function for VG_(machine_get_hwcaps), assumes the SIGILL/etc
397 * handlers are installed. Determines the the sizes affected by dcbz
398 * and dcbzl instructions and updates the given VexArchInfo structure
399 * accordingly.
400 *
401 * Not very defensive: assumes that as long as the dcbz/dcbzl
402 * instructions don't raise a SIGILL, that they will zero an aligned,
403 * contiguous block of memory of a sensible size. */
404 #if defined(VGA_ppc32) || defined(VGA_ppc64)
find_ppc_dcbz_sz(VexArchInfo * arch_info)405 static void find_ppc_dcbz_sz(VexArchInfo *arch_info)
406 {
407 Int dcbz_szB = 0;
408 Int dcbzl_szB;
409 # define MAX_DCBZL_SZB (128) /* largest known effect of dcbzl */
410 char test_block[4*MAX_DCBZL_SZB];
411 char *aligned = test_block;
412 Int i;
413
414 /* round up to next max block size, assumes MAX_DCBZL_SZB is pof2 */
415 aligned = (char *)(((HWord)aligned + MAX_DCBZL_SZB) & ~(MAX_DCBZL_SZB - 1));
416 vg_assert((aligned + MAX_DCBZL_SZB) <= &test_block[sizeof(test_block)]);
417
418 /* dcbz often clears 32B, although sometimes whatever the native cache
419 * block size is */
420 VG_(memset)(test_block, 0xff, sizeof(test_block));
421 __asm__ __volatile__("dcbz 0,%0"
422 : /*out*/
423 : "r" (aligned) /*in*/
424 : "memory" /*clobber*/);
425 for (dcbz_szB = 0, i = 0; i < sizeof(test_block); ++i) {
426 if (!test_block[i])
427 ++dcbz_szB;
428 }
429 vg_assert(dcbz_szB == 32 || dcbz_szB == 64 || dcbz_szB == 128);
430
431 /* dcbzl clears 128B on G5/PPC970, and usually 32B on other platforms */
432 if (__builtin_setjmp(env_unsup_insn)) {
433 dcbzl_szB = 0; /* indicates unsupported */
434 }
435 else {
436 VG_(memset)(test_block, 0xff, sizeof(test_block));
437 /* some older assemblers won't understand the dcbzl instruction
438 * variant, so we directly emit the instruction ourselves */
439 __asm__ __volatile__("mr 9, %0 ; .long 0x7C204FEC" /*dcbzl 0,9*/
440 : /*out*/
441 : "r" (aligned) /*in*/
442 : "memory", "r9" /*clobber*/);
443 for (dcbzl_szB = 0, i = 0; i < sizeof(test_block); ++i) {
444 if (!test_block[i])
445 ++dcbzl_szB;
446 }
447 vg_assert(dcbzl_szB == 32 || dcbzl_szB == 64 || dcbzl_szB == 128);
448 }
449
450 arch_info->ppc_dcbz_szB = dcbz_szB;
451 arch_info->ppc_dcbzl_szB = dcbzl_szB;
452
453 VG_(debugLog)(1, "machine", "dcbz_szB=%d dcbzl_szB=%d\n",
454 dcbz_szB, dcbzl_szB);
455 # undef MAX_DCBZL_SZB
456 }
457 #endif /* defined(VGA_ppc32) || defined(VGA_ppc64) */
458
459
460
461 /* Determine what insn set and insn set variant the host has, and
462 record it. To be called once at system startup. Returns False if
463 this a CPU incapable of running Valgrind. */
464
VG_(machine_get_hwcaps)465 Bool VG_(machine_get_hwcaps)( void )
466 {
467 vg_assert(hwcaps_done == False);
468 hwcaps_done = True;
469
470 // Whack default settings into vai, so that we only need to fill in
471 // any interesting bits.
472 LibVEX_default_VexArchInfo(&vai);
473
474 #if defined(VGA_x86)
475 { Bool have_sse1, have_sse2, have_cx8, have_lzcnt;
476 UInt eax, ebx, ecx, edx, max_basic, max_extended;
477 UChar vstr[13];
478 vstr[0] = 0;
479
480 if (!VG_(has_cpuid)())
481 /* we can't do cpuid at all. Give up. */
482 return False;
483
484 VG_(cpuid)(0, &eax, &ebx, &ecx, &edx);
485 if (eax < 1)
486 /* we can't ask for cpuid(x) for x > 0. Give up. */
487 return False;
488
489 /* Get processor ID string, and max basic/extended index
490 values. */
491 max_basic = eax;
492 VG_(memcpy)(&vstr[0], &ebx, 4);
493 VG_(memcpy)(&vstr[4], &edx, 4);
494 VG_(memcpy)(&vstr[8], &ecx, 4);
495 vstr[12] = 0;
496
497 VG_(cpuid)(0x80000000, &eax, &ebx, &ecx, &edx);
498 max_extended = eax;
499
500 /* get capabilities bits into edx */
501 VG_(cpuid)(1, &eax, &ebx, &ecx, &edx);
502
503 have_sse1 = (edx & (1<<25)) != 0; /* True => have sse insns */
504 have_sse2 = (edx & (1<<26)) != 0; /* True => have sse2 insns */
505
506 /* cmpxchg8b is a minimum requirement now; if we don't have it we
507 must simply give up. But all CPUs since Pentium-I have it, so
508 that doesn't seem like much of a restriction. */
509 have_cx8 = (edx & (1<<8)) != 0; /* True => have cmpxchg8b */
510 if (!have_cx8)
511 return False;
512
513 /* Figure out if this is an AMD that can do LZCNT. */
514 have_lzcnt = False;
515 if (0 == VG_(strcmp)(vstr, "AuthenticAMD")
516 && max_extended >= 0x80000001) {
517 VG_(cpuid)(0x80000001, &eax, &ebx, &ecx, &edx);
518 have_lzcnt = (ecx & (1<<5)) != 0; /* True => have LZCNT */
519 }
520
521 if (have_sse2 && have_sse1) {
522 va = VexArchX86;
523 vai.hwcaps = VEX_HWCAPS_X86_SSE1;
524 vai.hwcaps |= VEX_HWCAPS_X86_SSE2;
525 if (have_lzcnt)
526 vai.hwcaps |= VEX_HWCAPS_X86_LZCNT;
527 VG_(machine_x86_have_mxcsr) = 1;
528 return True;
529 }
530
531 if (have_sse1) {
532 va = VexArchX86;
533 vai.hwcaps = VEX_HWCAPS_X86_SSE1;
534 VG_(machine_x86_have_mxcsr) = 1;
535 return True;
536 }
537
538 va = VexArchX86;
539 vai.hwcaps = 0; /*baseline - no sse at all*/
540 VG_(machine_x86_have_mxcsr) = 0;
541 return True;
542 }
543
544 #elif defined(VGA_amd64)
545 { Bool have_sse1, have_sse2, have_sse3, have_cx8, have_cx16;
546 Bool have_lzcnt;
547 UInt eax, ebx, ecx, edx, max_basic, max_extended;
548 UChar vstr[13];
549 vstr[0] = 0;
550
551 if (!VG_(has_cpuid)())
552 /* we can't do cpuid at all. Give up. */
553 return False;
554
555 VG_(cpuid)(0, &eax, &ebx, &ecx, &edx);
556 if (eax < 1)
557 /* we can't ask for cpuid(x) for x > 0. Give up. */
558 return False;
559
560 /* Get processor ID string, and max basic/extended index
561 values. */
562 max_basic = eax;
563 VG_(memcpy)(&vstr[0], &ebx, 4);
564 VG_(memcpy)(&vstr[4], &edx, 4);
565 VG_(memcpy)(&vstr[8], &ecx, 4);
566 vstr[12] = 0;
567
568 VG_(cpuid)(0x80000000, &eax, &ebx, &ecx, &edx);
569 max_extended = eax;
570
571 /* get capabilities bits into edx */
572 VG_(cpuid)(1, &eax, &ebx, &ecx, &edx);
573
574 have_sse1 = (edx & (1<<25)) != 0; /* True => have sse insns */
575 have_sse2 = (edx & (1<<26)) != 0; /* True => have sse2 insns */
576 have_sse3 = (ecx & (1<<0)) != 0; /* True => have sse3 insns */
577 // ssse3 is ecx:9
578 // sse41 is ecx:19
579 // sse42 is ecx:20
580
581 /* cmpxchg8b is a minimum requirement now; if we don't have it we
582 must simply give up. But all CPUs since Pentium-I have it, so
583 that doesn't seem like much of a restriction. */
584 have_cx8 = (edx & (1<<8)) != 0; /* True => have cmpxchg8b */
585 if (!have_cx8)
586 return False;
587
588 /* on amd64 we tolerate older cpus, which don't have cmpxchg16b */
589 have_cx16 = (ecx & (1<<13)) != 0; /* True => have cmpxchg16b */
590
591 /* Figure out if this is an AMD that can do LZCNT. */
592 have_lzcnt = False;
593 if (0 == VG_(strcmp)(vstr, "AuthenticAMD")
594 && max_extended >= 0x80000001) {
595 VG_(cpuid)(0x80000001, &eax, &ebx, &ecx, &edx);
596 have_lzcnt = (ecx & (1<<5)) != 0; /* True => have LZCNT */
597 }
598
599 va = VexArchAMD64;
600 vai.hwcaps = (have_sse3 ? VEX_HWCAPS_AMD64_SSE3 : 0)
601 | (have_cx16 ? VEX_HWCAPS_AMD64_CX16 : 0)
602 | (have_lzcnt ? VEX_HWCAPS_AMD64_LZCNT : 0);
603 return True;
604 }
605
606 #elif defined(VGA_ppc32)
607 {
608 /* Find out which subset of the ppc32 instruction set is supported by
609 verifying whether various ppc32 instructions generate a SIGILL
610 or a SIGFPE. An alternative approach is to check the AT_HWCAP and
611 AT_PLATFORM entries in the ELF auxiliary table -- see also
612 the_iifii.client_auxv in m_main.c.
613 */
614 vki_sigset_t saved_set, tmp_set;
615 vki_sigaction_fromK_t saved_sigill_act, saved_sigfpe_act;
616 vki_sigaction_toK_t tmp_sigill_act, tmp_sigfpe_act;
617
618 volatile Bool have_F, have_V, have_FX, have_GX;
619 Int r;
620
621 /* This is a kludge. Really we ought to back-convert saved_act
622 into a toK_t using VG_(convert_sigaction_fromK_to_toK), but
623 since that's a no-op on all ppc32 platforms so far supported,
624 it's not worth the typing effort. At least include most basic
625 sanity check: */
626 vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t));
627
628 VG_(sigemptyset)(&tmp_set);
629 VG_(sigaddset)(&tmp_set, VKI_SIGILL);
630 VG_(sigaddset)(&tmp_set, VKI_SIGFPE);
631
632 r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
633 vg_assert(r == 0);
634
635 r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
636 vg_assert(r == 0);
637 tmp_sigill_act = saved_sigill_act;
638
639 r = VG_(sigaction)(VKI_SIGFPE, NULL, &saved_sigfpe_act);
640 vg_assert(r == 0);
641 tmp_sigfpe_act = saved_sigfpe_act;
642
643 /* NODEFER: signal handler does not return (from the kernel's point of
644 view), hence if it is to successfully catch a signal more than once,
645 we need the NODEFER flag. */
646 tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
647 tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
648 tmp_sigill_act.sa_flags |= VKI_SA_NODEFER;
649 tmp_sigill_act.ksa_handler = handler_unsup_insn;
650 r = VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
651 vg_assert(r == 0);
652
653 tmp_sigfpe_act.sa_flags &= ~VKI_SA_RESETHAND;
654 tmp_sigfpe_act.sa_flags &= ~VKI_SA_SIGINFO;
655 tmp_sigfpe_act.sa_flags |= VKI_SA_NODEFER;
656 tmp_sigfpe_act.ksa_handler = handler_unsup_insn;
657 r = VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL);
658 vg_assert(r == 0);
659
660 /* standard FP insns */
661 have_F = True;
662 if (__builtin_setjmp(env_unsup_insn)) {
663 have_F = False;
664 } else {
665 __asm__ __volatile__(".long 0xFC000090"); /*fmr 0,0 */
666 }
667
668 /* Altivec insns */
669 have_V = True;
670 if (__builtin_setjmp(env_unsup_insn)) {
671 have_V = False;
672 } else {
673 /* Unfortunately some older assemblers don't speak Altivec (or
674 choose not to), so to be safe we directly emit the 32-bit
675 word corresponding to "vor 0,0,0". This fixes a build
676 problem that happens on Debian 3.1 (ppc32), and probably
677 various other places. */
678 __asm__ __volatile__(".long 0x10000484"); /*vor 0,0,0*/
679 }
680
681 /* General-Purpose optional (fsqrt, fsqrts) */
682 have_FX = True;
683 if (__builtin_setjmp(env_unsup_insn)) {
684 have_FX = False;
685 } else {
686 __asm__ __volatile__(".long 0xFC00002C"); /*fsqrt 0,0 */
687 }
688
689 /* Graphics optional (stfiwx, fres, frsqrte, fsel) */
690 have_GX = True;
691 if (__builtin_setjmp(env_unsup_insn)) {
692 have_GX = False;
693 } else {
694 __asm__ __volatile__(".long 0xFC000034"); /* frsqrte 0,0 */
695 }
696
697 /* determine dcbz/dcbzl sizes while we still have the signal
698 * handlers registered */
699 find_ppc_dcbz_sz(&vai);
700
701 r = VG_(sigaction)(VKI_SIGILL, &saved_sigill_act, NULL);
702 vg_assert(r == 0);
703 r = VG_(sigaction)(VKI_SIGFPE, &saved_sigfpe_act, NULL);
704 vg_assert(r == 0);
705 r = VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
706 vg_assert(r == 0);
707 VG_(debugLog)(1, "machine", "F %d V %d FX %d GX %d\n",
708 (Int)have_F, (Int)have_V, (Int)have_FX, (Int)have_GX);
709 /* Make FP a prerequisite for VMX (bogusly so), and for FX and GX. */
710 if (have_V && !have_F)
711 have_V = False;
712 if (have_FX && !have_F)
713 have_FX = False;
714 if (have_GX && !have_F)
715 have_GX = False;
716
717 VG_(machine_ppc32_has_FP) = have_F ? 1 : 0;
718 VG_(machine_ppc32_has_VMX) = have_V ? 1 : 0;
719
720 va = VexArchPPC32;
721
722 vai.hwcaps = 0;
723 if (have_F) vai.hwcaps |= VEX_HWCAPS_PPC32_F;
724 if (have_V) vai.hwcaps |= VEX_HWCAPS_PPC32_V;
725 if (have_FX) vai.hwcaps |= VEX_HWCAPS_PPC32_FX;
726 if (have_GX) vai.hwcaps |= VEX_HWCAPS_PPC32_GX;
727
728 /* But we're not done yet: VG_(machine_ppc32_set_clszB) must be
729 called before we're ready to go. */
730 return True;
731 }
732
733 #elif defined(VGA_ppc64)
734 {
735 /* Same instruction set detection algorithm as for ppc32. */
736 vki_sigset_t saved_set, tmp_set;
737 vki_sigaction_fromK_t saved_sigill_act, saved_sigfpe_act;
738 vki_sigaction_toK_t tmp_sigill_act, tmp_sigfpe_act;
739
740 volatile Bool have_F, have_V, have_FX, have_GX;
741 Int r;
742
743 /* This is a kludge. Really we ought to back-convert saved_act
744 into a toK_t using VG_(convert_sigaction_fromK_to_toK), but
745 since that's a no-op on all ppc64 platforms so far supported,
746 it's not worth the typing effort. At least include most basic
747 sanity check: */
748 vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t));
749
750 VG_(sigemptyset)(&tmp_set);
751 VG_(sigaddset)(&tmp_set, VKI_SIGILL);
752 VG_(sigaddset)(&tmp_set, VKI_SIGFPE);
753
754 r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
755 vg_assert(r == 0);
756
757 r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
758 vg_assert(r == 0);
759 tmp_sigill_act = saved_sigill_act;
760
761 VG_(sigaction)(VKI_SIGFPE, NULL, &saved_sigfpe_act);
762 tmp_sigfpe_act = saved_sigfpe_act;
763
764 /* NODEFER: signal handler does not return (from the kernel's point of
765 view), hence if it is to successfully catch a signal more than once,
766 we need the NODEFER flag. */
767 tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
768 tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
769 tmp_sigill_act.sa_flags |= VKI_SA_NODEFER;
770 tmp_sigill_act.ksa_handler = handler_unsup_insn;
771 VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
772
773 tmp_sigfpe_act.sa_flags &= ~VKI_SA_RESETHAND;
774 tmp_sigfpe_act.sa_flags &= ~VKI_SA_SIGINFO;
775 tmp_sigfpe_act.sa_flags |= VKI_SA_NODEFER;
776 tmp_sigfpe_act.ksa_handler = handler_unsup_insn;
777 VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL);
778
779 /* standard FP insns */
780 have_F = True;
781 if (__builtin_setjmp(env_unsup_insn)) {
782 have_F = False;
783 } else {
784 __asm__ __volatile__("fmr 0,0");
785 }
786
787 /* Altivec insns */
788 have_V = True;
789 if (__builtin_setjmp(env_unsup_insn)) {
790 have_V = False;
791 } else {
792 __asm__ __volatile__(".long 0x10000484"); /*vor 0,0,0*/
793 }
794
795 /* General-Purpose optional (fsqrt, fsqrts) */
796 have_FX = True;
797 if (__builtin_setjmp(env_unsup_insn)) {
798 have_FX = False;
799 } else {
800 __asm__ __volatile__(".long 0xFC00002C"); /*fsqrt 0,0*/
801 }
802
803 /* Graphics optional (stfiwx, fres, frsqrte, fsel) */
804 have_GX = True;
805 if (__builtin_setjmp(env_unsup_insn)) {
806 have_GX = False;
807 } else {
808 __asm__ __volatile__(".long 0xFC000034"); /*frsqrte 0,0*/
809 }
810
811 /* determine dcbz/dcbzl sizes while we still have the signal
812 * handlers registered */
813 find_ppc_dcbz_sz(&vai);
814
815 VG_(sigaction)(VKI_SIGILL, &saved_sigill_act, NULL);
816 VG_(sigaction)(VKI_SIGFPE, &saved_sigfpe_act, NULL);
817 VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
818 VG_(debugLog)(1, "machine", "F %d V %d FX %d GX %d\n",
819 (Int)have_F, (Int)have_V, (Int)have_FX, (Int)have_GX);
820 /* on ppc64, if we don't even have FP, just give up. */
821 if (!have_F)
822 return False;
823
824 VG_(machine_ppc64_has_VMX) = have_V ? 1 : 0;
825
826 va = VexArchPPC64;
827
828 vai.hwcaps = 0;
829 if (have_V) vai.hwcaps |= VEX_HWCAPS_PPC64_V;
830 if (have_FX) vai.hwcaps |= VEX_HWCAPS_PPC64_FX;
831 if (have_GX) vai.hwcaps |= VEX_HWCAPS_PPC64_GX;
832
833 /* But we're not done yet: VG_(machine_ppc64_set_clszB) must be
834 called before we're ready to go. */
835 return True;
836 }
837
838 #elif defined(VGA_arm)
839 {
840 /* Same instruction set detection algorithm as for ppc32. */
841 vki_sigset_t saved_set, tmp_set;
842 vki_sigaction_fromK_t saved_sigill_act, saved_sigfpe_act;
843 vki_sigaction_toK_t tmp_sigill_act, tmp_sigfpe_act;
844
845 volatile Bool have_VFP, have_VFP2, have_VFP3, have_NEON;
846 volatile Int archlevel;
847 Int r;
848
849 /* This is a kludge. Really we ought to back-convert saved_act
850 into a toK_t using VG_(convert_sigaction_fromK_to_toK), but
851 since that's a no-op on all ppc64 platforms so far supported,
852 it's not worth the typing effort. At least include most basic
853 sanity check: */
854 vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t));
855
856 VG_(sigemptyset)(&tmp_set);
857 VG_(sigaddset)(&tmp_set, VKI_SIGILL);
858 VG_(sigaddset)(&tmp_set, VKI_SIGFPE);
859
860 r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
861 vg_assert(r == 0);
862
863 r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
864 vg_assert(r == 0);
865 tmp_sigill_act = saved_sigill_act;
866
867 VG_(sigaction)(VKI_SIGFPE, NULL, &saved_sigfpe_act);
868 tmp_sigfpe_act = saved_sigfpe_act;
869
870 /* NODEFER: signal handler does not return (from the kernel's point of
871 view), hence if it is to successfully catch a signal more than once,
872 we need the NODEFER flag. */
873 tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
874 tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
875 tmp_sigill_act.sa_flags |= VKI_SA_NODEFER;
876 tmp_sigill_act.ksa_handler = handler_unsup_insn;
877 VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
878
879 tmp_sigfpe_act.sa_flags &= ~VKI_SA_RESETHAND;
880 tmp_sigfpe_act.sa_flags &= ~VKI_SA_SIGINFO;
881 tmp_sigfpe_act.sa_flags |= VKI_SA_NODEFER;
882 tmp_sigfpe_act.ksa_handler = handler_unsup_insn;
883 VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL);
884
885 /* VFP insns */
886 have_VFP = True;
887 if (__builtin_setjmp(env_unsup_insn)) {
888 have_VFP = False;
889 } else {
890 __asm__ __volatile__(".word 0xEEB02B42"); /* VMOV.F64 d2, d2 */
891 }
892 /* There are several generation of VFP extension but they differs very
893 little so for now we will not distinguish them. */
894 have_VFP2 = have_VFP;
895 have_VFP3 = have_VFP;
896
897 /* NEON insns */
898 have_NEON = True;
899 if (__builtin_setjmp(env_unsup_insn)) {
900 have_NEON = False;
901 } else {
902 __asm__ __volatile__(".word 0xF2244154"); /* VMOV q2, q2 */
903 }
904
905 /* ARM architecture level */
906 archlevel = 5; /* v5 will be base level */
907 if (archlevel < 7) {
908 archlevel = 7;
909 if (__builtin_setjmp(env_unsup_insn)) {
910 archlevel = 5;
911 } else {
912 __asm__ __volatile__(".word 0xF45FF000"); /* PLI [PC,#-0] */
913 }
914 }
915 if (archlevel < 6) {
916 archlevel = 6;
917 if (__builtin_setjmp(env_unsup_insn)) {
918 archlevel = 5;
919 } else {
920 __asm__ __volatile__(".word 0xE6822012"); /* PKHBT r2, r2, r2 */
921 }
922 }
923
924 VG_(convert_sigaction_fromK_to_toK)(&saved_sigill_act, &tmp_sigill_act);
925 VG_(convert_sigaction_fromK_to_toK)(&saved_sigfpe_act, &tmp_sigfpe_act);
926 VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
927 VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL);
928 VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
929
930 VG_(debugLog)(1, "machine", "ARMv%d VFP %d VFP2 %d VFP3 %d NEON %d\n",
931 archlevel, (Int)have_VFP, (Int)have_VFP2, (Int)have_VFP3,
932 (Int)have_NEON);
933
934 VG_(machine_arm_archlevel) = archlevel;
935
936 va = VexArchARM;
937
938 vai.hwcaps = VEX_ARM_ARCHLEVEL(archlevel);
939 if (have_VFP3) vai.hwcaps |= VEX_HWCAPS_ARM_VFP3;
940 if (have_VFP2) vai.hwcaps |= VEX_HWCAPS_ARM_VFP2;
941 if (have_VFP) vai.hwcaps |= VEX_HWCAPS_ARM_VFP;
942 if (have_NEON) vai.hwcaps |= VEX_HWCAPS_ARM_NEON;
943
944 return True;
945 }
946
947 #else
948 # error "Unknown arch"
949 #endif
950 }
951
952 /* Notify host cpu cache line size. */
953 #if defined(VGA_ppc32)
VG_(machine_ppc32_set_clszB)954 void VG_(machine_ppc32_set_clszB)( Int szB )
955 {
956 vg_assert(hwcaps_done);
957
958 /* Either the value must not have been set yet (zero) or we can
959 tolerate it being set to the same value multiple times, as the
960 stack scanning logic in m_main is a bit stupid. */
961 vg_assert(vai.ppc_cache_line_szB == 0
962 || vai.ppc_cache_line_szB == szB);
963
964 vg_assert(szB == 32 || szB == 64 || szB == 128);
965 vai.ppc_cache_line_szB = szB;
966 }
967 #endif
968
969
970 /* Notify host cpu cache line size. */
971 #if defined(VGA_ppc64)
VG_(machine_ppc64_set_clszB)972 void VG_(machine_ppc64_set_clszB)( Int szB )
973 {
974 vg_assert(hwcaps_done);
975
976 /* Either the value must not have been set yet (zero) or we can
977 tolerate it being set to the same value multiple times, as the
978 stack scanning logic in m_main is a bit stupid. */
979 vg_assert(vai.ppc_cache_line_szB == 0
980 || vai.ppc_cache_line_szB == szB);
981
982 vg_assert(szB == 32 || szB == 64 || szB == 128);
983 vai.ppc_cache_line_szB = szB;
984 }
985 #endif
986
987
988 /* Notify host's ability to handle NEON instructions. */
989 #if defined(VGA_arm)
VG_(machine_arm_set_has_NEON)990 void VG_(machine_arm_set_has_NEON)( Bool has_neon )
991 {
992 vg_assert(hwcaps_done);
993 /* There's nothing else we can sanity check. */
994
995 if (has_neon) {
996 vai.hwcaps |= VEX_HWCAPS_ARM_NEON;
997 } else {
998 vai.hwcaps &= ~VEX_HWCAPS_ARM_NEON;
999 }
1000 }
1001 #endif
1002
1003
1004 /* Fetch host cpu info, once established. */
VG_(machine_get_VexArchInfo)1005 void VG_(machine_get_VexArchInfo)( /*OUT*/VexArch* pVa,
1006 /*OUT*/VexArchInfo* pVai )
1007 {
1008 vg_assert(hwcaps_done);
1009 if (pVa) *pVa = va;
1010 if (pVai) *pVai = vai;
1011 }
1012
1013
1014 // Given a pointer to a function as obtained by "& functionname" in C,
1015 // produce a pointer to the actual entry point for the function.
VG_(fnptr_to_fnentry)1016 void* VG_(fnptr_to_fnentry)( void* f )
1017 {
1018 #if defined(VGP_x86_linux) || defined(VGP_amd64_linux) \
1019 || defined(VGP_arm_linux) \
1020 || defined(VGP_ppc32_linux) || defined(VGO_darwin)
1021 return f;
1022 #elif defined(VGP_ppc64_linux) || defined(VGP_ppc32_aix5) \
1023 || defined(VGP_ppc64_aix5)
1024 /* All other ppc variants use the AIX scheme, in which f is a
1025 pointer to a 3-word function descriptor, of which the first word
1026 is the entry address. */
1027 UWord* descr = (UWord*)f;
1028 return (void*)(descr[0]);
1029 #else
1030 # error "Unknown platform"
1031 #endif
1032 }
1033
1034 /*--------------------------------------------------------------------*/
1035 /*--- end ---*/
1036 /*--------------------------------------------------------------------*/
1037