1 /*--------------------------------------------------------------------*/
2 /*--- Machine-related stuff. m_machine.c ---*/
3 /*--------------------------------------------------------------------*/
4
5 /*
6 This file is part of Valgrind, a dynamic binary instrumentation
7 framework.
8
9 Copyright (C) 2000-2012 Julian Seward
10 jseward@acm.org
11
12 This program is free software; you can redistribute it and/or
13 modify it under the terms of the GNU General Public License as
14 published by the Free Software Foundation; either version 2 of the
15 License, or (at your option) any later version.
16
17 This program is distributed in the hope that it will be useful, but
18 WITHOUT ANY WARRANTY; without even the implied warranty of
19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 General Public License for more details.
21
22 You should have received a copy of the GNU General Public License
23 along with this program; if not, write to the Free Software
24 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
25 02111-1307, USA.
26
27 The GNU General Public License is contained in the file COPYING.
28 */
29
30 #include "pub_core_basics.h"
31 #include "pub_core_vki.h"
32 #include "pub_core_libcsetjmp.h" // setjmp facilities
33 #include "pub_core_threadstate.h"
34 #include "pub_core_libcassert.h"
35 #include "pub_core_libcbase.h"
36 #include "pub_core_libcfile.h"
37 #include "pub_core_mallocfree.h"
38 #include "pub_core_machine.h"
39 #include "pub_core_cpuid.h"
40 #include "pub_core_libcsignal.h" // for ppc32 messing with SIGILL and SIGFPE
41 #include "pub_core_debuglog.h"
42
43
44 #define INSTR_PTR(regs) ((regs).vex.VG_INSTR_PTR)
45 #define STACK_PTR(regs) ((regs).vex.VG_STACK_PTR)
46 #define FRAME_PTR(regs) ((regs).vex.VG_FRAME_PTR)
47
VG_(get_IP)48 Addr VG_(get_IP) ( ThreadId tid ) {
49 return INSTR_PTR( VG_(threads)[tid].arch );
50 }
VG_(get_SP)51 Addr VG_(get_SP) ( ThreadId tid ) {
52 return STACK_PTR( VG_(threads)[tid].arch );
53 }
VG_(get_FP)54 Addr VG_(get_FP) ( ThreadId tid ) {
55 return FRAME_PTR( VG_(threads)[tid].arch );
56 }
57
VG_(set_IP)58 void VG_(set_IP) ( ThreadId tid, Addr ip ) {
59 INSTR_PTR( VG_(threads)[tid].arch ) = ip;
60 }
VG_(set_SP)61 void VG_(set_SP) ( ThreadId tid, Addr sp ) {
62 STACK_PTR( VG_(threads)[tid].arch ) = sp;
63 }
64
VG_(get_UnwindStartRegs)65 void VG_(get_UnwindStartRegs) ( /*OUT*/UnwindStartRegs* regs,
66 ThreadId tid )
67 {
68 # if defined(VGA_x86)
69 regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_EIP;
70 regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_ESP;
71 regs->misc.X86.r_ebp
72 = VG_(threads)[tid].arch.vex.guest_EBP;
73 # elif defined(VGA_amd64)
74 regs->r_pc = VG_(threads)[tid].arch.vex.guest_RIP;
75 regs->r_sp = VG_(threads)[tid].arch.vex.guest_RSP;
76 regs->misc.AMD64.r_rbp
77 = VG_(threads)[tid].arch.vex.guest_RBP;
78 # elif defined(VGA_ppc32)
79 regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_CIA;
80 regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_GPR1;
81 regs->misc.PPC32.r_lr
82 = VG_(threads)[tid].arch.vex.guest_LR;
83 # elif defined(VGA_ppc64)
84 regs->r_pc = VG_(threads)[tid].arch.vex.guest_CIA;
85 regs->r_sp = VG_(threads)[tid].arch.vex.guest_GPR1;
86 regs->misc.PPC64.r_lr
87 = VG_(threads)[tid].arch.vex.guest_LR;
88 # elif defined(VGA_arm)
89 regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_R15T;
90 regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_R13;
91 regs->misc.ARM.r14
92 = VG_(threads)[tid].arch.vex.guest_R14;
93 regs->misc.ARM.r12
94 = VG_(threads)[tid].arch.vex.guest_R12;
95 regs->misc.ARM.r11
96 = VG_(threads)[tid].arch.vex.guest_R11;
97 regs->misc.ARM.r7
98 = VG_(threads)[tid].arch.vex.guest_R7;
99 # elif defined(VGA_s390x)
100 regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_IA;
101 regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_SP;
102 regs->misc.S390X.r_fp
103 = VG_(threads)[tid].arch.vex.guest_r11;
104 regs->misc.S390X.r_lr
105 = VG_(threads)[tid].arch.vex.guest_r14;
106 # elif defined(VGA_mips32)
107 regs->r_pc = VG_(threads)[tid].arch.vex.guest_PC;
108 regs->r_sp = VG_(threads)[tid].arch.vex.guest_r29;
109 regs->misc.MIPS32.r30
110 = VG_(threads)[tid].arch.vex.guest_r30;
111 regs->misc.MIPS32.r31
112 = VG_(threads)[tid].arch.vex.guest_r31;
113 regs->misc.MIPS32.r28
114 = VG_(threads)[tid].arch.vex.guest_r28;
115 # else
116 # error "Unknown arch"
117 # endif
118 }
119
120
VG_(set_syscall_return_shadows)121 void VG_(set_syscall_return_shadows) ( ThreadId tid,
122 /* shadow vals for the result */
123 UWord s1res, UWord s2res,
124 /* shadow vals for the error val */
125 UWord s1err, UWord s2err )
126 {
127 # if defined(VGP_x86_linux)
128 VG_(threads)[tid].arch.vex_shadow1.guest_EAX = s1res;
129 VG_(threads)[tid].arch.vex_shadow2.guest_EAX = s2res;
130 # elif defined(VGP_amd64_linux)
131 VG_(threads)[tid].arch.vex_shadow1.guest_RAX = s1res;
132 VG_(threads)[tid].arch.vex_shadow2.guest_RAX = s2res;
133 # elif defined(VGP_ppc32_linux) || defined(VGP_ppc64_linux)
134 VG_(threads)[tid].arch.vex_shadow1.guest_GPR3 = s1res;
135 VG_(threads)[tid].arch.vex_shadow2.guest_GPR3 = s2res;
136 # elif defined(VGP_arm_linux)
137 VG_(threads)[tid].arch.vex_shadow1.guest_R0 = s1res;
138 VG_(threads)[tid].arch.vex_shadow2.guest_R0 = s2res;
139 # elif defined(VGO_darwin)
140 // GrP fixme darwin syscalls may return more values (2 registers plus error)
141 # elif defined(VGP_s390x_linux)
142 VG_(threads)[tid].arch.vex_shadow1.guest_r2 = s1res;
143 VG_(threads)[tid].arch.vex_shadow2.guest_r2 = s2res;
144 # elif defined(VGP_mips32_linux)
145 VG_(threads)[tid].arch.vex_shadow1.guest_r2 = s1res;
146 VG_(threads)[tid].arch.vex_shadow2.guest_r2 = s2res;
147 # else
148 # error "Unknown plat"
149 # endif
150 }
151
152 void
VG_(get_shadow_regs_area)153 VG_(get_shadow_regs_area) ( ThreadId tid,
154 /*DST*/UChar* dst,
155 /*SRC*/Int shadowNo, PtrdiffT offset, SizeT size )
156 {
157 void* src;
158 ThreadState* tst;
159 vg_assert(shadowNo == 0 || shadowNo == 1 || shadowNo == 2);
160 vg_assert(VG_(is_valid_tid)(tid));
161 // Bounds check
162 vg_assert(0 <= offset && offset < sizeof(VexGuestArchState));
163 vg_assert(offset + size <= sizeof(VexGuestArchState));
164 // Copy
165 tst = & VG_(threads)[tid];
166 src = NULL;
167 switch (shadowNo) {
168 case 0: src = (void*)(((Addr)&(tst->arch.vex)) + offset); break;
169 case 1: src = (void*)(((Addr)&(tst->arch.vex_shadow1)) + offset); break;
170 case 2: src = (void*)(((Addr)&(tst->arch.vex_shadow2)) + offset); break;
171 }
172 tl_assert(src != NULL);
173 VG_(memcpy)( dst, src, size);
174 }
175
176 void
VG_(set_shadow_regs_area)177 VG_(set_shadow_regs_area) ( ThreadId tid,
178 /*DST*/Int shadowNo, PtrdiffT offset, SizeT size,
179 /*SRC*/const UChar* src )
180 {
181 void* dst;
182 ThreadState* tst;
183 vg_assert(shadowNo == 0 || shadowNo == 1 || shadowNo == 2);
184 vg_assert(VG_(is_valid_tid)(tid));
185 // Bounds check
186 vg_assert(0 <= offset && offset < sizeof(VexGuestArchState));
187 vg_assert(offset + size <= sizeof(VexGuestArchState));
188 // Copy
189 tst = & VG_(threads)[tid];
190 dst = NULL;
191 switch (shadowNo) {
192 case 0: dst = (void*)(((Addr)&(tst->arch.vex)) + offset); break;
193 case 1: dst = (void*)(((Addr)&(tst->arch.vex_shadow1)) + offset); break;
194 case 2: dst = (void*)(((Addr)&(tst->arch.vex_shadow2)) + offset); break;
195 }
196 tl_assert(dst != NULL);
197 VG_(memcpy)( dst, src, size);
198 }
199
200
apply_to_GPs_of_tid(ThreadId tid,void (* f)(ThreadId,HChar *,Addr))201 static void apply_to_GPs_of_tid(ThreadId tid, void (*f)(ThreadId, HChar*, Addr))
202 {
203 VexGuestArchState* vex = &(VG_(get_ThreadState)(tid)->arch.vex);
204 #if defined(VGA_x86)
205 (*f)(tid, "EAX", vex->guest_EAX);
206 (*f)(tid, "ECX", vex->guest_ECX);
207 (*f)(tid, "EDX", vex->guest_EDX);
208 (*f)(tid, "EBX", vex->guest_EBX);
209 (*f)(tid, "ESI", vex->guest_ESI);
210 (*f)(tid, "EDI", vex->guest_EDI);
211 (*f)(tid, "ESP", vex->guest_ESP);
212 (*f)(tid, "EBP", vex->guest_EBP);
213 #elif defined(VGA_amd64)
214 (*f)(tid, "RAX", vex->guest_RAX);
215 (*f)(tid, "RCX", vex->guest_RCX);
216 (*f)(tid, "RDX", vex->guest_RDX);
217 (*f)(tid, "RBX", vex->guest_RBX);
218 (*f)(tid, "RSI", vex->guest_RSI);
219 (*f)(tid, "RDI", vex->guest_RDI);
220 (*f)(tid, "RSP", vex->guest_RSP);
221 (*f)(tid, "RBP", vex->guest_RBP);
222 (*f)(tid, "R8" , vex->guest_R8 );
223 (*f)(tid, "R9" , vex->guest_R9 );
224 (*f)(tid, "R10", vex->guest_R10);
225 (*f)(tid, "R11", vex->guest_R11);
226 (*f)(tid, "R12", vex->guest_R12);
227 (*f)(tid, "R13", vex->guest_R13);
228 (*f)(tid, "R14", vex->guest_R14);
229 (*f)(tid, "R15", vex->guest_R15);
230 #elif defined(VGA_ppc32) || defined(VGA_ppc64)
231 (*f)(tid, "GPR0" , vex->guest_GPR0 );
232 (*f)(tid, "GPR1" , vex->guest_GPR1 );
233 (*f)(tid, "GPR2" , vex->guest_GPR2 );
234 (*f)(tid, "GPR3" , vex->guest_GPR3 );
235 (*f)(tid, "GPR4" , vex->guest_GPR4 );
236 (*f)(tid, "GPR5" , vex->guest_GPR5 );
237 (*f)(tid, "GPR6" , vex->guest_GPR6 );
238 (*f)(tid, "GPR7" , vex->guest_GPR7 );
239 (*f)(tid, "GPR8" , vex->guest_GPR8 );
240 (*f)(tid, "GPR9" , vex->guest_GPR9 );
241 (*f)(tid, "GPR10", vex->guest_GPR10);
242 (*f)(tid, "GPR11", vex->guest_GPR11);
243 (*f)(tid, "GPR12", vex->guest_GPR12);
244 (*f)(tid, "GPR13", vex->guest_GPR13);
245 (*f)(tid, "GPR14", vex->guest_GPR14);
246 (*f)(tid, "GPR15", vex->guest_GPR15);
247 (*f)(tid, "GPR16", vex->guest_GPR16);
248 (*f)(tid, "GPR17", vex->guest_GPR17);
249 (*f)(tid, "GPR18", vex->guest_GPR18);
250 (*f)(tid, "GPR19", vex->guest_GPR19);
251 (*f)(tid, "GPR20", vex->guest_GPR20);
252 (*f)(tid, "GPR21", vex->guest_GPR21);
253 (*f)(tid, "GPR22", vex->guest_GPR22);
254 (*f)(tid, "GPR23", vex->guest_GPR23);
255 (*f)(tid, "GPR24", vex->guest_GPR24);
256 (*f)(tid, "GPR25", vex->guest_GPR25);
257 (*f)(tid, "GPR26", vex->guest_GPR26);
258 (*f)(tid, "GPR27", vex->guest_GPR27);
259 (*f)(tid, "GPR28", vex->guest_GPR28);
260 (*f)(tid, "GPR29", vex->guest_GPR29);
261 (*f)(tid, "GPR30", vex->guest_GPR30);
262 (*f)(tid, "GPR31", vex->guest_GPR31);
263 (*f)(tid, "CTR" , vex->guest_CTR );
264 (*f)(tid, "LR" , vex->guest_LR );
265 #elif defined(VGA_arm)
266 (*f)(tid, "R0" , vex->guest_R0 );
267 (*f)(tid, "R1" , vex->guest_R1 );
268 (*f)(tid, "R2" , vex->guest_R2 );
269 (*f)(tid, "R3" , vex->guest_R3 );
270 (*f)(tid, "R4" , vex->guest_R4 );
271 (*f)(tid, "R5" , vex->guest_R5 );
272 (*f)(tid, "R6" , vex->guest_R6 );
273 (*f)(tid, "R8" , vex->guest_R8 );
274 (*f)(tid, "R9" , vex->guest_R9 );
275 (*f)(tid, "R10", vex->guest_R10);
276 (*f)(tid, "R11", vex->guest_R11);
277 (*f)(tid, "R12", vex->guest_R12);
278 (*f)(tid, "R13", vex->guest_R13);
279 (*f)(tid, "R14", vex->guest_R14);
280 #elif defined(VGA_s390x)
281 (*f)(tid, "r0" , vex->guest_r0 );
282 (*f)(tid, "r1" , vex->guest_r1 );
283 (*f)(tid, "r2" , vex->guest_r2 );
284 (*f)(tid, "r3" , vex->guest_r3 );
285 (*f)(tid, "r4" , vex->guest_r4 );
286 (*f)(tid, "r5" , vex->guest_r5 );
287 (*f)(tid, "r6" , vex->guest_r6 );
288 (*f)(tid, "r7" , vex->guest_r7 );
289 (*f)(tid, "r8" , vex->guest_r8 );
290 (*f)(tid, "r9" , vex->guest_r9 );
291 (*f)(tid, "r10", vex->guest_r10);
292 (*f)(tid, "r11", vex->guest_r11);
293 (*f)(tid, "r12", vex->guest_r12);
294 (*f)(tid, "r13", vex->guest_r13);
295 (*f)(tid, "r14", vex->guest_r14);
296 (*f)(tid, "r15", vex->guest_r15);
297 #elif defined(VGA_mips32)
298 (*f)(tid, "r0" , vex->guest_r0 );
299 (*f)(tid, "r1" , vex->guest_r1 );
300 (*f)(tid, "r2" , vex->guest_r2 );
301 (*f)(tid, "r3" , vex->guest_r3 );
302 (*f)(tid, "r4" , vex->guest_r4 );
303 (*f)(tid, "r5" , vex->guest_r5 );
304 (*f)(tid, "r6" , vex->guest_r6 );
305 (*f)(tid, "r7" , vex->guest_r7 );
306 (*f)(tid, "r8" , vex->guest_r8 );
307 (*f)(tid, "r9" , vex->guest_r9 );
308 (*f)(tid, "r10", vex->guest_r10);
309 (*f)(tid, "r11", vex->guest_r11);
310 (*f)(tid, "r12", vex->guest_r12);
311 (*f)(tid, "r13", vex->guest_r13);
312 (*f)(tid, "r14", vex->guest_r14);
313 (*f)(tid, "r15", vex->guest_r15);
314 (*f)(tid, "r16", vex->guest_r16);
315 (*f)(tid, "r17", vex->guest_r17);
316 (*f)(tid, "r18", vex->guest_r18);
317 (*f)(tid, "r19", vex->guest_r19);
318 (*f)(tid, "r20", vex->guest_r20);
319 (*f)(tid, "r21", vex->guest_r21);
320 (*f)(tid, "r22", vex->guest_r22);
321 (*f)(tid, "r23", vex->guest_r23);
322 (*f)(tid, "r24", vex->guest_r24);
323 (*f)(tid, "r25", vex->guest_r25);
324 (*f)(tid, "r26", vex->guest_r26);
325 (*f)(tid, "r27", vex->guest_r27);
326 (*f)(tid, "r28", vex->guest_r28);
327 (*f)(tid, "r29", vex->guest_r29);
328 (*f)(tid, "r30", vex->guest_r30);
329 (*f)(tid, "r31", vex->guest_r31);
330 #else
331 # error Unknown arch
332 #endif
333 }
334
335
VG_(apply_to_GP_regs)336 void VG_(apply_to_GP_regs)(void (*f)(ThreadId, HChar*, UWord))
337 {
338 ThreadId tid;
339
340 for (tid = 1; tid < VG_N_THREADS; tid++) {
341 if (VG_(is_valid_tid)(tid)) {
342 apply_to_GPs_of_tid(tid, f);
343 }
344 }
345 }
346
VG_(thread_stack_reset_iter)347 void VG_(thread_stack_reset_iter)(/*OUT*/ThreadId* tid)
348 {
349 *tid = (ThreadId)(-1);
350 }
351
VG_(thread_stack_next)352 Bool VG_(thread_stack_next)(/*MOD*/ThreadId* tid,
353 /*OUT*/Addr* stack_min,
354 /*OUT*/Addr* stack_max)
355 {
356 ThreadId i;
357 for (i = (*tid)+1; i < VG_N_THREADS; i++) {
358 if (i == VG_INVALID_THREADID)
359 continue;
360 if (VG_(threads)[i].status != VgTs_Empty) {
361 *tid = i;
362 *stack_min = VG_(get_SP)(i);
363 *stack_max = VG_(threads)[i].client_stack_highest_word;
364 return True;
365 }
366 }
367 return False;
368 }
369
VG_(thread_get_stack_max)370 Addr VG_(thread_get_stack_max)(ThreadId tid)
371 {
372 vg_assert(0 <= tid && tid < VG_N_THREADS && tid != VG_INVALID_THREADID);
373 vg_assert(VG_(threads)[tid].status != VgTs_Empty);
374 return VG_(threads)[tid].client_stack_highest_word;
375 }
376
VG_(thread_get_stack_size)377 SizeT VG_(thread_get_stack_size)(ThreadId tid)
378 {
379 vg_assert(0 <= tid && tid < VG_N_THREADS && tid != VG_INVALID_THREADID);
380 vg_assert(VG_(threads)[tid].status != VgTs_Empty);
381 return VG_(threads)[tid].client_stack_szB;
382 }
383
VG_(thread_get_altstack_min)384 Addr VG_(thread_get_altstack_min)(ThreadId tid)
385 {
386 vg_assert(0 <= tid && tid < VG_N_THREADS && tid != VG_INVALID_THREADID);
387 vg_assert(VG_(threads)[tid].status != VgTs_Empty);
388 return (Addr)VG_(threads)[tid].altstack.ss_sp;
389 }
390
VG_(thread_get_altstack_size)391 SizeT VG_(thread_get_altstack_size)(ThreadId tid)
392 {
393 vg_assert(0 <= tid && tid < VG_N_THREADS && tid != VG_INVALID_THREADID);
394 vg_assert(VG_(threads)[tid].status != VgTs_Empty);
395 return VG_(threads)[tid].altstack.ss_size;
396 }
397
398 //-------------------------------------------------------------
399 /* Details about the capabilities of the underlying (host) CPU. These
400 details are acquired by (1) enquiring with the CPU at startup, or
401 (2) from the AT_SYSINFO entries the kernel gave us (ppc32 cache
402 line size). It's a bit nasty in the sense that there's no obvious
403 way to stop uses of some of this info before it's ready to go.
404 See pub_core_machine.h for more information about that.
405
406 VG_(machine_get_hwcaps) may use signals (although it attempts to
407 leave signal state unchanged) and therefore should only be
408 called before m_main sets up the client's signal state.
409 */
410
411 /* --------- State --------- */
412 static Bool hwcaps_done = False;
413
414 /* --- all archs --- */
415 static VexArch va = VexArch_INVALID;
416 static VexArchInfo vai;
417
418 #if defined(VGA_x86)
419 UInt VG_(machine_x86_have_mxcsr) = 0;
420 #endif
421 #if defined(VGA_ppc32)
422 UInt VG_(machine_ppc32_has_FP) = 0;
423 UInt VG_(machine_ppc32_has_VMX) = 0;
424 #endif
425 #if defined(VGA_ppc64)
426 ULong VG_(machine_ppc64_has_VMX) = 0;
427 #endif
428 #if defined(VGA_arm)
429 Int VG_(machine_arm_archlevel) = 4;
430 #endif
431
432 /* fixs390: anything for s390x here ? */
433
434 /* For hwcaps detection on ppc32/64, s390x, and arm we'll need to do SIGILL
435 testing, so we need a VG_MINIMAL_JMP_BUF. */
436 #if defined(VGA_ppc32) || defined(VGA_ppc64) \
437 || defined(VGA_arm) || defined(VGA_s390x)
438 #include "pub_tool_libcsetjmp.h"
439 static VG_MINIMAL_JMP_BUF(env_unsup_insn);
handler_unsup_insn(Int x)440 static void handler_unsup_insn ( Int x ) {
441 VG_MINIMAL_LONGJMP(env_unsup_insn);
442 }
443 #endif
444
445
446 /* Helper function for VG_(machine_get_hwcaps), assumes the SIGILL/etc
447 * handlers are installed. Determines the the sizes affected by dcbz
448 * and dcbzl instructions and updates the given VexArchInfo structure
449 * accordingly.
450 *
451 * Not very defensive: assumes that as long as the dcbz/dcbzl
452 * instructions don't raise a SIGILL, that they will zero an aligned,
453 * contiguous block of memory of a sensible size. */
454 #if defined(VGA_ppc32) || defined(VGA_ppc64)
find_ppc_dcbz_sz(VexArchInfo * arch_info)455 static void find_ppc_dcbz_sz(VexArchInfo *arch_info)
456 {
457 Int dcbz_szB = 0;
458 Int dcbzl_szB;
459 # define MAX_DCBZL_SZB (128) /* largest known effect of dcbzl */
460 char test_block[4*MAX_DCBZL_SZB];
461 char *aligned = test_block;
462 Int i;
463
464 /* round up to next max block size, assumes MAX_DCBZL_SZB is pof2 */
465 aligned = (char *)(((HWord)aligned + MAX_DCBZL_SZB) & ~(MAX_DCBZL_SZB - 1));
466 vg_assert((aligned + MAX_DCBZL_SZB) <= &test_block[sizeof(test_block)]);
467
468 /* dcbz often clears 32B, although sometimes whatever the native cache
469 * block size is */
470 VG_(memset)(test_block, 0xff, sizeof(test_block));
471 __asm__ __volatile__("dcbz 0,%0"
472 : /*out*/
473 : "r" (aligned) /*in*/
474 : "memory" /*clobber*/);
475 for (dcbz_szB = 0, i = 0; i < sizeof(test_block); ++i) {
476 if (!test_block[i])
477 ++dcbz_szB;
478 }
479 vg_assert(dcbz_szB == 32 || dcbz_szB == 64 || dcbz_szB == 128);
480
481 /* dcbzl clears 128B on G5/PPC970, and usually 32B on other platforms */
482 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
483 dcbzl_szB = 0; /* indicates unsupported */
484 }
485 else {
486 VG_(memset)(test_block, 0xff, sizeof(test_block));
487 /* some older assemblers won't understand the dcbzl instruction
488 * variant, so we directly emit the instruction ourselves */
489 __asm__ __volatile__("mr 9, %0 ; .long 0x7C204FEC" /*dcbzl 0,9*/
490 : /*out*/
491 : "r" (aligned) /*in*/
492 : "memory", "r9" /*clobber*/);
493 for (dcbzl_szB = 0, i = 0; i < sizeof(test_block); ++i) {
494 if (!test_block[i])
495 ++dcbzl_szB;
496 }
497 vg_assert(dcbzl_szB == 32 || dcbzl_szB == 64 || dcbzl_szB == 128);
498 }
499
500 arch_info->ppc_dcbz_szB = dcbz_szB;
501 arch_info->ppc_dcbzl_szB = dcbzl_szB;
502
503 VG_(debugLog)(1, "machine", "dcbz_szB=%d dcbzl_szB=%d\n",
504 dcbz_szB, dcbzl_szB);
505 # undef MAX_DCBZL_SZB
506 }
507 #endif /* defined(VGA_ppc32) || defined(VGA_ppc64) */
508
509 #ifdef VGA_s390x
510
511 /* Read /proc/cpuinfo. Look for lines like these
512
513 processor 0: version = FF, identification = 0117C9, machine = 2064
514
515 and return the machine model. If the machine model could not be determined
516 or it is an unknown model, return VEX_S390X_MODEL_UNKNOWN. */
517
VG_(get_machine_model)518 static UInt VG_(get_machine_model)(void)
519 {
520 static struct model_map {
521 HChar name[5];
522 UInt id;
523 } model_map[] = {
524 { "2064", VEX_S390X_MODEL_Z900 },
525 { "2066", VEX_S390X_MODEL_Z800 },
526 { "2084", VEX_S390X_MODEL_Z990 },
527 { "2086", VEX_S390X_MODEL_Z890 },
528 { "2094", VEX_S390X_MODEL_Z9_EC },
529 { "2096", VEX_S390X_MODEL_Z9_BC },
530 { "2097", VEX_S390X_MODEL_Z10_EC },
531 { "2098", VEX_S390X_MODEL_Z10_BC },
532 { "2817", VEX_S390X_MODEL_Z196 },
533 { "2818", VEX_S390X_MODEL_Z114 },
534 };
535
536 Int model, n, fh;
537 SysRes fd;
538 SizeT num_bytes, file_buf_size;
539 HChar *p, *m, *model_name, *file_buf;
540
541 /* Slurp contents of /proc/cpuinfo into FILE_BUF */
542 fd = VG_(open)( "/proc/cpuinfo", 0, VKI_S_IRUSR );
543 if ( sr_isError(fd) ) return VEX_S390X_MODEL_UNKNOWN;
544
545 fh = sr_Res(fd);
546
547 /* Determine the size of /proc/cpuinfo.
548 Work around broken-ness in /proc file system implementation.
549 fstat returns a zero size for /proc/cpuinfo although it is
550 claimed to be a regular file. */
551 num_bytes = 0;
552 file_buf_size = 1000;
553 file_buf = VG_(malloc)("cpuinfo", file_buf_size + 1);
554 while (42) {
555 n = VG_(read)(fh, file_buf, file_buf_size);
556 if (n < 0) break;
557
558 num_bytes += n;
559 if (n < file_buf_size) break; /* reached EOF */
560 }
561
562 if (n < 0) num_bytes = 0; /* read error; ignore contents */
563
564 if (num_bytes > file_buf_size) {
565 VG_(free)( file_buf );
566 VG_(lseek)( fh, 0, VKI_SEEK_SET );
567 file_buf = VG_(malloc)( "cpuinfo", num_bytes + 1 );
568 n = VG_(read)( fh, file_buf, num_bytes );
569 if (n < 0) num_bytes = 0;
570 }
571
572 file_buf[num_bytes] = '\0';
573 VG_(close)(fh);
574
575 /* Parse file */
576 model = VEX_S390X_MODEL_UNKNOWN;
577 for (p = file_buf; *p; ++p) {
578 /* Beginning of line */
579 if (VG_(strncmp)( p, "processor", sizeof "processor" - 1 ) != 0) continue;
580
581 m = VG_(strstr)( p, "machine" );
582 if (m == NULL) continue;
583
584 p = m + sizeof "machine" - 1;
585 while ( VG_(isspace)( *p ) || *p == '=') {
586 if (*p == '\n') goto next_line;
587 ++p;
588 }
589
590 model_name = p;
591 for (n = 0; n < sizeof model_map / sizeof model_map[0]; ++n) {
592 struct model_map *mm = model_map + n;
593 SizeT len = VG_(strlen)( mm->name );
594 if ( VG_(strncmp)( mm->name, model_name, len ) == 0 &&
595 VG_(isspace)( model_name[len] )) {
596 if (mm->id < model) model = mm->id;
597 p = model_name + len;
598 break;
599 }
600 }
601 /* Skip until end-of-line */
602 while (*p != '\n')
603 ++p;
604 next_line: ;
605 }
606
607 VG_(free)( file_buf );
608 VG_(debugLog)(1, "machine", "model = %s\n",
609 model == VEX_S390X_MODEL_UNKNOWN ? "UNKNOWN"
610 : model_map[model].name);
611 return model;
612 }
613
614 #endif /* VGA_s390x */
615
616 #ifdef VGA_mips32
617
618 /* Read /proc/cpuinfo and return the machine model. */
VG_(get_machine_model)619 static UInt VG_(get_machine_model)(void)
620 {
621 char *search_MIPS_str = "MIPS";
622 char *search_Broadcom_str = "Broadcom";
623 Int n, fh;
624 SysRes fd;
625 SizeT num_bytes, file_buf_size;
626 HChar *file_buf;
627
628 /* Slurp contents of /proc/cpuinfo into FILE_BUF */
629 fd = VG_(open)( "/proc/cpuinfo", 0, VKI_S_IRUSR );
630 if ( sr_isError(fd) ) return -1;
631
632 fh = sr_Res(fd);
633
634 /* Determine the size of /proc/cpuinfo.
635 Work around broken-ness in /proc file system implementation.
636 fstat returns a zero size for /proc/cpuinfo although it is
637 claimed to be a regular file. */
638 num_bytes = 0;
639 file_buf_size = 1000;
640 file_buf = VG_(malloc)("cpuinfo", file_buf_size + 1);
641 while (42) {
642 n = VG_(read)(fh, file_buf, file_buf_size);
643 if (n < 0) break;
644
645 num_bytes += n;
646 if (n < file_buf_size) break; /* reached EOF */
647 }
648
649 if (n < 0) num_bytes = 0; /* read error; ignore contents */
650
651 if (num_bytes > file_buf_size) {
652 VG_(free)( file_buf );
653 VG_(lseek)( fh, 0, VKI_SEEK_SET );
654 file_buf = VG_(malloc)( "cpuinfo", num_bytes + 1 );
655 n = VG_(read)( fh, file_buf, num_bytes );
656 if (n < 0) num_bytes = 0;
657 }
658
659 file_buf[num_bytes] = '\0';
660 VG_(close)(fh);
661
662 /* Parse file */
663 if (VG_(strstr) (file_buf, search_Broadcom_str) != NULL)
664 return VEX_PRID_COMP_BROADCOM;
665 if (VG_(strstr) (file_buf, search_MIPS_str) != NULL)
666 return VEX_PRID_COMP_MIPS;
667
668 /* Did not find string in the proc file. */
669 return -1;
670 }
671
672 #endif
673
674 /* Determine what insn set and insn set variant the host has, and
675 record it. To be called once at system startup. Returns False if
676 this a CPU incapable of running Valgrind. */
677
VG_(machine_get_hwcaps)678 Bool VG_(machine_get_hwcaps)( void )
679 {
680 vg_assert(hwcaps_done == False);
681 hwcaps_done = True;
682
683 // Whack default settings into vai, so that we only need to fill in
684 // any interesting bits.
685 LibVEX_default_VexArchInfo(&vai);
686
687 #if defined(VGA_x86)
688 { Bool have_sse1, have_sse2, have_cx8, have_lzcnt;
689 UInt eax, ebx, ecx, edx, max_extended;
690 UChar vstr[13];
691 vstr[0] = 0;
692
693 if (!VG_(has_cpuid)())
694 /* we can't do cpuid at all. Give up. */
695 return False;
696
697 VG_(cpuid)(0, 0, &eax, &ebx, &ecx, &edx);
698 if (eax < 1)
699 /* we can't ask for cpuid(x) for x > 0. Give up. */
700 return False;
701
702 /* Get processor ID string, and max basic/extended index
703 values. */
704 VG_(memcpy)(&vstr[0], &ebx, 4);
705 VG_(memcpy)(&vstr[4], &edx, 4);
706 VG_(memcpy)(&vstr[8], &ecx, 4);
707 vstr[12] = 0;
708
709 VG_(cpuid)(0x80000000, 0, &eax, &ebx, &ecx, &edx);
710 max_extended = eax;
711
712 /* get capabilities bits into edx */
713 VG_(cpuid)(1, 0, &eax, &ebx, &ecx, &edx);
714
715 have_sse1 = (edx & (1<<25)) != 0; /* True => have sse insns */
716 have_sse2 = (edx & (1<<26)) != 0; /* True => have sse2 insns */
717
718 /* cmpxchg8b is a minimum requirement now; if we don't have it we
719 must simply give up. But all CPUs since Pentium-I have it, so
720 that doesn't seem like much of a restriction. */
721 have_cx8 = (edx & (1<<8)) != 0; /* True => have cmpxchg8b */
722 if (!have_cx8)
723 return False;
724
725 /* Figure out if this is an AMD that can do LZCNT. */
726 have_lzcnt = False;
727 if (0 == VG_(strcmp)(vstr, "AuthenticAMD")
728 && max_extended >= 0x80000001) {
729 VG_(cpuid)(0x80000001, 0, &eax, &ebx, &ecx, &edx);
730 have_lzcnt = (ecx & (1<<5)) != 0; /* True => have LZCNT */
731 }
732
733 if (have_sse2 && have_sse1) {
734 va = VexArchX86;
735 vai.hwcaps = VEX_HWCAPS_X86_SSE1;
736 vai.hwcaps |= VEX_HWCAPS_X86_SSE2;
737 if (have_lzcnt)
738 vai.hwcaps |= VEX_HWCAPS_X86_LZCNT;
739 VG_(machine_x86_have_mxcsr) = 1;
740 return True;
741 }
742
743 if (have_sse1) {
744 va = VexArchX86;
745 vai.hwcaps = VEX_HWCAPS_X86_SSE1;
746 VG_(machine_x86_have_mxcsr) = 1;
747 return True;
748 }
749
750 va = VexArchX86;
751 vai.hwcaps = 0; /*baseline - no sse at all*/
752 VG_(machine_x86_have_mxcsr) = 0;
753 return True;
754 }
755
756 #elif defined(VGA_amd64)
757 { Bool have_sse3, have_cx8, have_cx16;
758 Bool have_lzcnt, have_avx /*, have_fma*/;
759 UInt eax, ebx, ecx, edx, max_extended;
760 UChar vstr[13];
761 vstr[0] = 0;
762
763 if (!VG_(has_cpuid)())
764 /* we can't do cpuid at all. Give up. */
765 return False;
766
767 VG_(cpuid)(0, 0, &eax, &ebx, &ecx, &edx);
768 if (eax < 1)
769 /* we can't ask for cpuid(x) for x > 0. Give up. */
770 return False;
771
772 /* Get processor ID string, and max basic/extended index
773 values. */
774 VG_(memcpy)(&vstr[0], &ebx, 4);
775 VG_(memcpy)(&vstr[4], &edx, 4);
776 VG_(memcpy)(&vstr[8], &ecx, 4);
777 vstr[12] = 0;
778
779 VG_(cpuid)(0x80000000, 0, &eax, &ebx, &ecx, &edx);
780 max_extended = eax;
781
782 /* get capabilities bits into edx */
783 VG_(cpuid)(1, 0, &eax, &ebx, &ecx, &edx);
784
785 // we assume that SSE1 and SSE2 are available by default
786 have_sse3 = (ecx & (1<<0)) != 0; /* True => have sse3 insns */
787 // ssse3 is ecx:9
788 // sse41 is ecx:19
789 // sse42 is ecx:20
790
791 // osxsave is ecx:27
792 // avx is ecx:28
793 // fma is ecx:12
794 have_avx = False;
795 /* have_fma = False; */
796 if ( (ecx & ((1<<27)|(1<<28))) == ((1<<27)|(1<<28)) ) {
797 /* processor supports AVX instructions and XGETBV is enabled
798 by OS */
799 ULong w;
800 __asm__ __volatile__("movq $0,%%rcx ; "
801 ".byte 0x0F,0x01,0xD0 ; " /* xgetbv */
802 "movq %%rax,%0"
803 :/*OUT*/"=r"(w) :/*IN*/
804 :/*TRASH*/"rdx","rcx");
805 if ((w & 6) == 6) {
806 /* OS has enabled both XMM and YMM state support */
807 have_avx = True;
808 /* have_fma = (ecx & (1<<12)) != 0; */
809 /* have_fma: Probably correct, but gcc complains due to
810 unusedness. &*/
811 }
812 }
813
814
815 /* cmpxchg8b is a minimum requirement now; if we don't have it we
816 must simply give up. But all CPUs since Pentium-I have it, so
817 that doesn't seem like much of a restriction. */
818 have_cx8 = (edx & (1<<8)) != 0; /* True => have cmpxchg8b */
819 if (!have_cx8)
820 return False;
821
822 /* on amd64 we tolerate older cpus, which don't have cmpxchg16b */
823 have_cx16 = (ecx & (1<<13)) != 0; /* True => have cmpxchg16b */
824
825 /* Figure out if this is an AMD that can do LZCNT. */
826 have_lzcnt = False;
827 if (0 == VG_(strcmp)(vstr, "AuthenticAMD")
828 && max_extended >= 0x80000001) {
829 VG_(cpuid)(0x80000001, 0, &eax, &ebx, &ecx, &edx);
830 have_lzcnt = (ecx & (1<<5)) != 0; /* True => have LZCNT */
831 }
832
833 va = VexArchAMD64;
834 vai.hwcaps = (have_sse3 ? VEX_HWCAPS_AMD64_SSE3 : 0)
835 | (have_cx16 ? VEX_HWCAPS_AMD64_CX16 : 0)
836 | (have_lzcnt ? VEX_HWCAPS_AMD64_LZCNT : 0)
837 | (have_avx ? VEX_HWCAPS_AMD64_AVX : 0);
838 return True;
839 }
840
841 #elif defined(VGA_ppc32)
842 {
843 /* Find out which subset of the ppc32 instruction set is supported by
844 verifying whether various ppc32 instructions generate a SIGILL
845 or a SIGFPE. An alternative approach is to check the AT_HWCAP and
846 AT_PLATFORM entries in the ELF auxiliary table -- see also
847 the_iifii.client_auxv in m_main.c.
848 */
849 vki_sigset_t saved_set, tmp_set;
850 vki_sigaction_fromK_t saved_sigill_act, saved_sigfpe_act;
851 vki_sigaction_toK_t tmp_sigill_act, tmp_sigfpe_act;
852
853 volatile Bool have_F, have_V, have_FX, have_GX, have_VX, have_DFP;
854 Int r;
855
856 /* This is a kludge. Really we ought to back-convert saved_act
857 into a toK_t using VG_(convert_sigaction_fromK_to_toK), but
858 since that's a no-op on all ppc32 platforms so far supported,
859 it's not worth the typing effort. At least include most basic
860 sanity check: */
861 vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t));
862
863 VG_(sigemptyset)(&tmp_set);
864 VG_(sigaddset)(&tmp_set, VKI_SIGILL);
865 VG_(sigaddset)(&tmp_set, VKI_SIGFPE);
866
867 r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
868 vg_assert(r == 0);
869
870 r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
871 vg_assert(r == 0);
872 tmp_sigill_act = saved_sigill_act;
873
874 r = VG_(sigaction)(VKI_SIGFPE, NULL, &saved_sigfpe_act);
875 vg_assert(r == 0);
876 tmp_sigfpe_act = saved_sigfpe_act;
877
878 /* NODEFER: signal handler does not return (from the kernel's point of
879 view), hence if it is to successfully catch a signal more than once,
880 we need the NODEFER flag. */
881 tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
882 tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
883 tmp_sigill_act.sa_flags |= VKI_SA_NODEFER;
884 tmp_sigill_act.ksa_handler = handler_unsup_insn;
885 r = VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
886 vg_assert(r == 0);
887
888 tmp_sigfpe_act.sa_flags &= ~VKI_SA_RESETHAND;
889 tmp_sigfpe_act.sa_flags &= ~VKI_SA_SIGINFO;
890 tmp_sigfpe_act.sa_flags |= VKI_SA_NODEFER;
891 tmp_sigfpe_act.ksa_handler = handler_unsup_insn;
892 r = VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL);
893 vg_assert(r == 0);
894
895 /* standard FP insns */
896 have_F = True;
897 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
898 have_F = False;
899 } else {
900 __asm__ __volatile__(".long 0xFC000090"); /*fmr 0,0 */
901 }
902
903 /* Altivec insns */
904 have_V = True;
905 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
906 have_V = False;
907 } else {
908 /* Unfortunately some older assemblers don't speak Altivec (or
909 choose not to), so to be safe we directly emit the 32-bit
910 word corresponding to "vor 0,0,0". This fixes a build
911 problem that happens on Debian 3.1 (ppc32), and probably
912 various other places. */
913 __asm__ __volatile__(".long 0x10000484"); /*vor 0,0,0*/
914 }
915
916 /* General-Purpose optional (fsqrt, fsqrts) */
917 have_FX = True;
918 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
919 have_FX = False;
920 } else {
921 __asm__ __volatile__(".long 0xFC00002C"); /*fsqrt 0,0 */
922 }
923
924 /* Graphics optional (stfiwx, fres, frsqrte, fsel) */
925 have_GX = True;
926 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
927 have_GX = False;
928 } else {
929 __asm__ __volatile__(".long 0xFC000034"); /* frsqrte 0,0 */
930 }
931
932 /* VSX support implies Power ISA 2.06 */
933 have_VX = True;
934 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
935 have_VX = False;
936 } else {
937 __asm__ __volatile__(".long 0xf0000564"); /* xsabsdp XT,XB */
938 }
939
940 /* Check for Decimal Floating Point (DFP) support. */
941 have_DFP = True;
942 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
943 have_DFP = False;
944 } else {
945 __asm__ __volatile__(".long 0xee4e8005"); /* dadd FRT,FRA, FRB */
946 }
947
948 /* determine dcbz/dcbzl sizes while we still have the signal
949 * handlers registered */
950 find_ppc_dcbz_sz(&vai);
951
952 r = VG_(sigaction)(VKI_SIGILL, &saved_sigill_act, NULL);
953 vg_assert(r == 0);
954 r = VG_(sigaction)(VKI_SIGFPE, &saved_sigfpe_act, NULL);
955 vg_assert(r == 0);
956 r = VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
957 vg_assert(r == 0);
958 VG_(debugLog)(1, "machine", "F %d V %d FX %d GX %d VX %d DFP %d\n",
959 (Int)have_F, (Int)have_V, (Int)have_FX,
960 (Int)have_GX, (Int)have_VX, (Int)have_DFP);
961 /* Make FP a prerequisite for VMX (bogusly so), and for FX and GX. */
962 if (have_V && !have_F)
963 have_V = False;
964 if (have_FX && !have_F)
965 have_FX = False;
966 if (have_GX && !have_F)
967 have_GX = False;
968
969 VG_(machine_ppc32_has_FP) = have_F ? 1 : 0;
970 VG_(machine_ppc32_has_VMX) = have_V ? 1 : 0;
971
972 va = VexArchPPC32;
973
974 vai.hwcaps = 0;
975 if (have_F) vai.hwcaps |= VEX_HWCAPS_PPC32_F;
976 if (have_V) vai.hwcaps |= VEX_HWCAPS_PPC32_V;
977 if (have_FX) vai.hwcaps |= VEX_HWCAPS_PPC32_FX;
978 if (have_GX) vai.hwcaps |= VEX_HWCAPS_PPC32_GX;
979 if (have_VX) vai.hwcaps |= VEX_HWCAPS_PPC32_VX;
980 if (have_DFP) vai.hwcaps |= VEX_HWCAPS_PPC32_DFP;
981
982
983 /* But we're not done yet: VG_(machine_ppc32_set_clszB) must be
984 called before we're ready to go. */
985 return True;
986 }
987
988 #elif defined(VGA_ppc64)
989 {
990 /* Same instruction set detection algorithm as for ppc32. */
991 vki_sigset_t saved_set, tmp_set;
992 vki_sigaction_fromK_t saved_sigill_act, saved_sigfpe_act;
993 vki_sigaction_toK_t tmp_sigill_act, tmp_sigfpe_act;
994
995 volatile Bool have_F, have_V, have_FX, have_GX, have_VX, have_DFP;
996 Int r;
997
998 /* This is a kludge. Really we ought to back-convert saved_act
999 into a toK_t using VG_(convert_sigaction_fromK_to_toK), but
1000 since that's a no-op on all ppc64 platforms so far supported,
1001 it's not worth the typing effort. At least include most basic
1002 sanity check: */
1003 vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t));
1004
1005 VG_(sigemptyset)(&tmp_set);
1006 VG_(sigaddset)(&tmp_set, VKI_SIGILL);
1007 VG_(sigaddset)(&tmp_set, VKI_SIGFPE);
1008
1009 r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
1010 vg_assert(r == 0);
1011
1012 r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
1013 vg_assert(r == 0);
1014 tmp_sigill_act = saved_sigill_act;
1015
1016 VG_(sigaction)(VKI_SIGFPE, NULL, &saved_sigfpe_act);
1017 tmp_sigfpe_act = saved_sigfpe_act;
1018
1019 /* NODEFER: signal handler does not return (from the kernel's point of
1020 view), hence if it is to successfully catch a signal more than once,
1021 we need the NODEFER flag. */
1022 tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
1023 tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
1024 tmp_sigill_act.sa_flags |= VKI_SA_NODEFER;
1025 tmp_sigill_act.ksa_handler = handler_unsup_insn;
1026 VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
1027
1028 tmp_sigfpe_act.sa_flags &= ~VKI_SA_RESETHAND;
1029 tmp_sigfpe_act.sa_flags &= ~VKI_SA_SIGINFO;
1030 tmp_sigfpe_act.sa_flags |= VKI_SA_NODEFER;
1031 tmp_sigfpe_act.ksa_handler = handler_unsup_insn;
1032 VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL);
1033
1034 /* standard FP insns */
1035 have_F = True;
1036 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1037 have_F = False;
1038 } else {
1039 __asm__ __volatile__("fmr 0,0");
1040 }
1041
1042 /* Altivec insns */
1043 have_V = True;
1044 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1045 have_V = False;
1046 } else {
1047 __asm__ __volatile__(".long 0x10000484"); /*vor 0,0,0*/
1048 }
1049
1050 /* General-Purpose optional (fsqrt, fsqrts) */
1051 have_FX = True;
1052 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1053 have_FX = False;
1054 } else {
1055 __asm__ __volatile__(".long 0xFC00002C"); /*fsqrt 0,0*/
1056 }
1057
1058 /* Graphics optional (stfiwx, fres, frsqrte, fsel) */
1059 have_GX = True;
1060 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1061 have_GX = False;
1062 } else {
1063 __asm__ __volatile__(".long 0xFC000034"); /*frsqrte 0,0*/
1064 }
1065
1066 /* VSX support implies Power ISA 2.06 */
1067 have_VX = True;
1068 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1069 have_VX = False;
1070 } else {
1071 __asm__ __volatile__(".long 0xf0000564"); /* xsabsdp XT,XB */
1072 }
1073
1074 /* Check for Decimal Floating Point (DFP) support. */
1075 have_DFP = True;
1076 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1077 have_DFP = False;
1078 } else {
1079 __asm__ __volatile__(".long 0xee4e8005"); /* dadd FRT,FRA, FRB */
1080 }
1081
1082 /* determine dcbz/dcbzl sizes while we still have the signal
1083 * handlers registered */
1084 find_ppc_dcbz_sz(&vai);
1085
1086 VG_(sigaction)(VKI_SIGILL, &saved_sigill_act, NULL);
1087 VG_(sigaction)(VKI_SIGFPE, &saved_sigfpe_act, NULL);
1088 VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
1089 VG_(debugLog)(1, "machine", "F %d V %d FX %d GX %d VX %d DFP %d\n",
1090 (Int)have_F, (Int)have_V, (Int)have_FX,
1091 (Int)have_GX, (Int)have_VX, (Int)have_DFP);
1092 /* on ppc64, if we don't even have FP, just give up. */
1093 if (!have_F)
1094 return False;
1095
1096 VG_(machine_ppc64_has_VMX) = have_V ? 1 : 0;
1097
1098 va = VexArchPPC64;
1099
1100 vai.hwcaps = 0;
1101 if (have_V) vai.hwcaps |= VEX_HWCAPS_PPC64_V;
1102 if (have_FX) vai.hwcaps |= VEX_HWCAPS_PPC64_FX;
1103 if (have_GX) vai.hwcaps |= VEX_HWCAPS_PPC64_GX;
1104 if (have_VX) vai.hwcaps |= VEX_HWCAPS_PPC64_VX;
1105 if (have_DFP) vai.hwcaps |= VEX_HWCAPS_PPC64_DFP;
1106
1107 /* But we're not done yet: VG_(machine_ppc64_set_clszB) must be
1108 called before we're ready to go. */
1109 return True;
1110 }
1111
1112 #elif defined(VGA_s390x)
1113 {
1114 /* Instruction set detection code borrowed from ppc above. */
1115 vki_sigset_t saved_set, tmp_set;
1116 vki_sigaction_fromK_t saved_sigill_act;
1117 vki_sigaction_toK_t tmp_sigill_act;
1118
1119 volatile Bool have_LDISP, have_EIMM, have_GIE, have_DFP, have_FGX;
1120 volatile Bool have_STFLE, have_ETF2, have_ETF3;
1121 Int r, model;
1122
1123 /* Unblock SIGILL and stash away the old action for that signal */
1124 VG_(sigemptyset)(&tmp_set);
1125 VG_(sigaddset)(&tmp_set, VKI_SIGILL);
1126
1127 r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
1128 vg_assert(r == 0);
1129
1130 r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
1131 vg_assert(r == 0);
1132 tmp_sigill_act = saved_sigill_act;
1133
1134 /* NODEFER: signal handler does not return (from the kernel's point of
1135 view), hence if it is to successfully catch a signal more than once,
1136 we need the NODEFER flag. */
1137 tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
1138 tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
1139 tmp_sigill_act.sa_flags |= VKI_SA_NODEFER;
1140 tmp_sigill_act.ksa_handler = handler_unsup_insn;
1141 VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
1142
1143 /* Determine hwcaps. Note, we cannot use the stfle insn because it
1144 is not supported on z900. */
1145
1146 have_LDISP = True;
1147 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1148 have_LDISP = False;
1149 } else {
1150 /* BASR loads the address of the next insn into r1. Needed to avoid
1151 a segfault in XY. */
1152 __asm__ __volatile__("basr %%r1,%%r0\n\t"
1153 ".long 0xe3001000\n\t" /* XY 0,0(%r1) */
1154 ".short 0x0057" : : : "r0", "r1", "cc", "memory");
1155 }
1156
1157 have_EIMM = True;
1158 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1159 have_EIMM = False;
1160 } else {
1161 __asm__ __volatile__(".long 0xc0090000\n\t" /* iilf r0,0 */
1162 ".short 0x0000" : : : "r0", "memory");
1163 }
1164
1165 have_GIE = True;
1166 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1167 have_GIE = False;
1168 } else {
1169 __asm__ __volatile__(".long 0xc2010000\n\t" /* msfi r0,0 */
1170 ".short 0x0000" : : : "r0", "memory");
1171 }
1172
1173 have_DFP = True;
1174 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1175 have_DFP = False;
1176 } else {
1177 __asm__ __volatile__(".long 0xb3d20000"
1178 : : : "r0", "cc", "memory"); /* adtr r0,r0,r0 */
1179 }
1180
1181 have_FGX = True;
1182 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1183 have_FGX = False;
1184 } else {
1185 __asm__ __volatile__(".long 0xb3cd0000" : : : "r0"); /* lgdr r0,f0 */
1186 }
1187
1188 /* Detect presence of the ETF2-enhancement facility using the
1189 STFLE insn. Note, that STFLE and ETF2 were introduced at the same
1190 time, so the absence of STLFE implies the absence of ETF2. */
1191 have_STFLE = True;
1192 have_ETF2 = False;
1193 have_ETF3 = False;
1194 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1195 have_STFLE = False;
1196 } else {
1197 ULong hoststfle[1];
1198 register ULong reg0 asm("0") = 0; /* one double word available */
1199
1200 __asm__ __volatile__(" .insn s,0xb2b00000,%0\n" /* stfle */
1201 : "=m" (hoststfle), "+d"(reg0)
1202 : : "cc", "memory");
1203 if (hoststfle[0] & (1ULL << (63 - 24)))
1204 have_ETF2 = True;
1205 if (hoststfle[0] & (1ULL << (63 - 30)))
1206 have_ETF3 = True;
1207 }
1208
1209 /* Restore signals */
1210 r = VG_(sigaction)(VKI_SIGILL, &saved_sigill_act, NULL);
1211 vg_assert(r == 0);
1212 r = VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
1213 vg_assert(r == 0);
1214 va = VexArchS390X;
1215
1216 model = VG_(get_machine_model)();
1217
1218 /* If the model is "unknown" don't treat this as an error. Assume
1219 this is a brand-new machine model for which we don't have the
1220 identification yet. Keeping fingers crossed. */
1221
1222 VG_(debugLog)(1, "machine", "machine %d LDISP %d EIMM %d GIE %d DFP %d "
1223 "FGX %d STFLE %d ETF2 %d ETF3 %d\n", model, have_LDISP, have_EIMM,
1224 have_GIE, have_DFP, have_FGX, have_STFLE, have_ETF2, have_ETF3);
1225
1226 vai.hwcaps = model;
1227 if (have_LDISP) {
1228 /* Use long displacement only on machines >= z990. For all other machines
1229 it is millicoded and therefore slow. */
1230 if (model >= VEX_S390X_MODEL_Z990)
1231 vai.hwcaps |= VEX_HWCAPS_S390X_LDISP;
1232 }
1233 if (have_EIMM) vai.hwcaps |= VEX_HWCAPS_S390X_EIMM;
1234 if (have_GIE) vai.hwcaps |= VEX_HWCAPS_S390X_GIE;
1235 if (have_DFP) vai.hwcaps |= VEX_HWCAPS_S390X_DFP;
1236 if (have_FGX) vai.hwcaps |= VEX_HWCAPS_S390X_FGX;
1237 if (have_ETF2) vai.hwcaps |= VEX_HWCAPS_S390X_ETF2;
1238 if (have_ETF3) vai.hwcaps |= VEX_HWCAPS_S390X_ETF3;
1239 if (have_STFLE) vai.hwcaps |= VEX_HWCAPS_S390X_STFLE;
1240
1241 VG_(debugLog)(1, "machine", "hwcaps = 0x%x\n", vai.hwcaps);
1242
1243 return True;
1244 }
1245
1246 #elif defined(VGA_arm)
1247 {
1248 /* Same instruction set detection algorithm as for ppc32. */
1249 vki_sigset_t saved_set, tmp_set;
1250 vki_sigaction_fromK_t saved_sigill_act, saved_sigfpe_act;
1251 vki_sigaction_toK_t tmp_sigill_act, tmp_sigfpe_act;
1252
1253 volatile Bool have_VFP, have_VFP2, have_VFP3, have_NEON;
1254 volatile Int archlevel;
1255 Int r;
1256
1257 /* This is a kludge. Really we ought to back-convert saved_act
1258 into a toK_t using VG_(convert_sigaction_fromK_to_toK), but
1259 since that's a no-op on all ppc64 platforms so far supported,
1260 it's not worth the typing effort. At least include most basic
1261 sanity check: */
1262 vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t));
1263
1264 VG_(sigemptyset)(&tmp_set);
1265 VG_(sigaddset)(&tmp_set, VKI_SIGILL);
1266 VG_(sigaddset)(&tmp_set, VKI_SIGFPE);
1267
1268 r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
1269 vg_assert(r == 0);
1270
1271 r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
1272 vg_assert(r == 0);
1273 tmp_sigill_act = saved_sigill_act;
1274
1275 VG_(sigaction)(VKI_SIGFPE, NULL, &saved_sigfpe_act);
1276 tmp_sigfpe_act = saved_sigfpe_act;
1277
1278 /* NODEFER: signal handler does not return (from the kernel's point of
1279 view), hence if it is to successfully catch a signal more than once,
1280 we need the NODEFER flag. */
1281 tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
1282 tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
1283 tmp_sigill_act.sa_flags |= VKI_SA_NODEFER;
1284 tmp_sigill_act.ksa_handler = handler_unsup_insn;
1285 VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
1286
1287 tmp_sigfpe_act.sa_flags &= ~VKI_SA_RESETHAND;
1288 tmp_sigfpe_act.sa_flags &= ~VKI_SA_SIGINFO;
1289 tmp_sigfpe_act.sa_flags |= VKI_SA_NODEFER;
1290 tmp_sigfpe_act.ksa_handler = handler_unsup_insn;
1291 VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL);
1292
1293 /* VFP insns */
1294 have_VFP = True;
1295 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1296 have_VFP = False;
1297 } else {
1298 __asm__ __volatile__(".word 0xEEB02B42"); /* VMOV.F64 d2, d2 */
1299 }
1300 /* There are several generation of VFP extension but they differs very
1301 little so for now we will not distinguish them. */
1302 have_VFP2 = have_VFP;
1303 have_VFP3 = have_VFP;
1304
1305 /* NEON insns */
1306 have_NEON = True;
1307 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1308 have_NEON = False;
1309 } else {
1310 __asm__ __volatile__(".word 0xF2244154"); /* VMOV q2, q2 */
1311 }
1312
1313 /* ARM architecture level */
1314 archlevel = 5; /* v5 will be base level */
1315 if (archlevel < 7) {
1316 archlevel = 7;
1317 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1318 archlevel = 5;
1319 } else {
1320 __asm__ __volatile__(".word 0xF45FF000"); /* PLI [PC,#-0] */
1321 }
1322 }
1323 if (archlevel < 6) {
1324 archlevel = 6;
1325 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1326 archlevel = 5;
1327 } else {
1328 __asm__ __volatile__(".word 0xE6822012"); /* PKHBT r2, r2, r2 */
1329 }
1330 }
1331
1332 VG_(convert_sigaction_fromK_to_toK)(&saved_sigill_act, &tmp_sigill_act);
1333 VG_(convert_sigaction_fromK_to_toK)(&saved_sigfpe_act, &tmp_sigfpe_act);
1334 VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
1335 VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL);
1336 VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
1337
1338 VG_(debugLog)(1, "machine", "ARMv%d VFP %d VFP2 %d VFP3 %d NEON %d\n",
1339 archlevel, (Int)have_VFP, (Int)have_VFP2, (Int)have_VFP3,
1340 (Int)have_NEON);
1341
1342 VG_(machine_arm_archlevel) = archlevel;
1343
1344 va = VexArchARM;
1345
1346 vai.hwcaps = VEX_ARM_ARCHLEVEL(archlevel);
1347 if (have_VFP3) vai.hwcaps |= VEX_HWCAPS_ARM_VFP3;
1348 if (have_VFP2) vai.hwcaps |= VEX_HWCAPS_ARM_VFP2;
1349 if (have_VFP) vai.hwcaps |= VEX_HWCAPS_ARM_VFP;
1350 if (have_NEON) vai.hwcaps |= VEX_HWCAPS_ARM_NEON;
1351
1352 return True;
1353 }
1354
1355 #elif defined(VGA_mips32)
1356 {
1357 va = VexArchMIPS32;
1358 UInt model = VG_(get_machine_model)();
1359 if (model== -1)
1360 return False;
1361
1362 vai.hwcaps = model;
1363 return True;
1364 }
1365
1366 #else
1367 # error "Unknown arch"
1368 #endif
1369 }
1370
1371 /* Notify host cpu cache line size. */
1372 #if defined(VGA_ppc32)
VG_(machine_ppc32_set_clszB)1373 void VG_(machine_ppc32_set_clszB)( Int szB )
1374 {
1375 vg_assert(hwcaps_done);
1376
1377 /* Either the value must not have been set yet (zero) or we can
1378 tolerate it being set to the same value multiple times, as the
1379 stack scanning logic in m_main is a bit stupid. */
1380 vg_assert(vai.ppc_cache_line_szB == 0
1381 || vai.ppc_cache_line_szB == szB);
1382
1383 vg_assert(szB == 32 || szB == 64 || szB == 128);
1384 vai.ppc_cache_line_szB = szB;
1385 }
1386 #endif
1387
1388
1389 /* Notify host cpu cache line size. */
1390 #if defined(VGA_ppc64)
VG_(machine_ppc64_set_clszB)1391 void VG_(machine_ppc64_set_clszB)( Int szB )
1392 {
1393 vg_assert(hwcaps_done);
1394
1395 /* Either the value must not have been set yet (zero) or we can
1396 tolerate it being set to the same value multiple times, as the
1397 stack scanning logic in m_main is a bit stupid. */
1398 vg_assert(vai.ppc_cache_line_szB == 0
1399 || vai.ppc_cache_line_szB == szB);
1400
1401 vg_assert(szB == 32 || szB == 64 || szB == 128);
1402 vai.ppc_cache_line_szB = szB;
1403 }
1404 #endif
1405
1406
1407 /* Notify host's ability to handle NEON instructions. */
1408 #if defined(VGA_arm)
VG_(machine_arm_set_has_NEON)1409 void VG_(machine_arm_set_has_NEON)( Bool has_neon )
1410 {
1411 vg_assert(hwcaps_done);
1412 /* There's nothing else we can sanity check. */
1413
1414 if (has_neon) {
1415 vai.hwcaps |= VEX_HWCAPS_ARM_NEON;
1416 } else {
1417 vai.hwcaps &= ~VEX_HWCAPS_ARM_NEON;
1418 }
1419 }
1420 #endif
1421
1422
1423 /* Fetch host cpu info, once established. */
VG_(machine_get_VexArchInfo)1424 void VG_(machine_get_VexArchInfo)( /*OUT*/VexArch* pVa,
1425 /*OUT*/VexArchInfo* pVai )
1426 {
1427 vg_assert(hwcaps_done);
1428 if (pVa) *pVa = va;
1429 if (pVai) *pVai = vai;
1430 }
1431
1432
1433 /* Returns the size of the largest guest register that we will
1434 simulate in this run. This depends on both the guest architecture
1435 and on the specific capabilities we are simulating for that guest
1436 (eg, AVX or non-AVX ?, for amd64). Should return either 4, 8, 16
1437 or 32. General rule: if in doubt, return a value larger than
1438 reality.
1439
1440 This information is needed by Cachegrind and Callgrind to decide
1441 what the minimum cache line size they are prepared to simulate is.
1442 Basically require that the minimum cache line size is at least as
1443 large as the largest register that might get transferred to/from
1444 memory, so as to guarantee that any such transaction can straddle
1445 at most 2 cache lines.
1446 */
VG_(machine_get_size_of_largest_guest_register)1447 Int VG_(machine_get_size_of_largest_guest_register) ( void )
1448 {
1449 vg_assert(hwcaps_done);
1450 /* Once hwcaps_done is True, we can fish around inside va/vai to
1451 find the information we need. */
1452
1453 # if defined(VGA_x86)
1454 vg_assert(va == VexArchX86);
1455 /* We don't support AVX, so 32 is out. At the other end, even if
1456 we don't support any SSE, the X87 can generate 10 byte
1457 transfers, so let's say 16 to be on the safe side. Hence the
1458 answer is always 16. */
1459 return 16;
1460
1461 # elif defined(VGA_amd64)
1462 /* if AVX then 32 else 16 */
1463 return (vai.hwcaps & VEX_HWCAPS_AMD64_AVX) ? 32 : 16;
1464
1465 # elif defined(VGA_ppc32)
1466 /* 8 if boring; 16 if signs of Altivec or other exotic stuff */
1467 if (vai.hwcaps & VEX_HWCAPS_PPC32_V) return 16;
1468 if (vai.hwcaps & VEX_HWCAPS_PPC32_VX) return 16;
1469 if (vai.hwcaps & VEX_HWCAPS_PPC32_DFP) return 16;
1470 return 8;
1471
1472 # elif defined(VGA_ppc64)
1473 /* 8 if boring; 16 if signs of Altivec or other exotic stuff */
1474 if (vai.hwcaps & VEX_HWCAPS_PPC64_V) return 16;
1475 if (vai.hwcaps & VEX_HWCAPS_PPC64_VX) return 16;
1476 if (vai.hwcaps & VEX_HWCAPS_PPC64_DFP) return 16;
1477 return 8;
1478
1479 # elif defined(VGA_s390x)
1480 return 8;
1481
1482 # elif defined(VGA_arm)
1483 /* Really it depends whether or not we have NEON, but let's just
1484 assume we always do. */
1485 return 16;
1486
1487 # elif defined(VGA_mips32)
1488 /* The guest state implies 4, but that can't really be true, can
1489 it? */
1490 return 8;
1491
1492 # else
1493 # error "Unknown arch"
1494 # endif
1495 }
1496
1497
1498 // Given a pointer to a function as obtained by "& functionname" in C,
1499 // produce a pointer to the actual entry point for the function.
VG_(fnptr_to_fnentry)1500 void* VG_(fnptr_to_fnentry)( void* f )
1501 {
1502 # if defined(VGP_x86_linux) || defined(VGP_amd64_linux) \
1503 || defined(VGP_arm_linux) \
1504 || defined(VGP_ppc32_linux) || defined(VGO_darwin) \
1505 || defined(VGP_s390x_linux) || defined(VGP_mips32_linux)
1506 return f;
1507 # elif defined(VGP_ppc64_linux)
1508 /* ppc64-linux uses the AIX scheme, in which f is a pointer to a
1509 3-word function descriptor, of which the first word is the entry
1510 address. */
1511 UWord* descr = (UWord*)f;
1512 return (void*)(descr[0]);
1513 # else
1514 # error "Unknown platform"
1515 # endif
1516 }
1517
1518 /*--------------------------------------------------------------------*/
1519 /*--- end ---*/
1520 /*--------------------------------------------------------------------*/
1521