1 /*--------------------------------------------------------------------*/
2 /*--- Machine-related stuff. m_machine.c ---*/
3 /*--------------------------------------------------------------------*/
4
5 /*
6 This file is part of Valgrind, a dynamic binary instrumentation
7 framework.
8
9 Copyright (C) 2000-2017 Julian Seward
10 jseward@acm.org
11
12 This program is free software; you can redistribute it and/or
13 modify it under the terms of the GNU General Public License as
14 published by the Free Software Foundation; either version 2 of the
15 License, or (at your option) any later version.
16
17 This program is distributed in the hope that it will be useful, but
18 WITHOUT ANY WARRANTY; without even the implied warranty of
19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 General Public License for more details.
21
22 You should have received a copy of the GNU General Public License
23 along with this program; if not, write to the Free Software
24 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
25 02111-1307, USA.
26
27 The GNU General Public License is contained in the file COPYING.
28 */
29
30 #include "pub_core_basics.h"
31 #include "pub_core_vki.h"
32 #include "pub_core_threadstate.h"
33 #include "pub_core_libcassert.h"
34 #include "pub_core_libcbase.h"
35 #include "pub_core_libcfile.h"
36 #include "pub_core_libcprint.h"
37 #include "pub_core_libcproc.h"
38 #include "pub_core_mallocfree.h"
39 #include "pub_core_machine.h"
40 #include "pub_core_cpuid.h"
41 #include "pub_core_libcsignal.h" // for ppc32 messing with SIGILL and SIGFPE
42 #include "pub_core_debuglog.h"
43
44
45 #define INSTR_PTR(regs) ((regs).vex.VG_INSTR_PTR)
46 #define STACK_PTR(regs) ((regs).vex.VG_STACK_PTR)
47 #define FRAME_PTR(regs) ((regs).vex.VG_FRAME_PTR)
48
VG_(get_IP)49 Addr VG_(get_IP) ( ThreadId tid ) {
50 return INSTR_PTR( VG_(threads)[tid].arch );
51 }
VG_(get_SP)52 Addr VG_(get_SP) ( ThreadId tid ) {
53 return STACK_PTR( VG_(threads)[tid].arch );
54 }
VG_(get_FP)55 Addr VG_(get_FP) ( ThreadId tid ) {
56 return FRAME_PTR( VG_(threads)[tid].arch );
57 }
58
VG_(set_IP)59 void VG_(set_IP) ( ThreadId tid, Addr ip ) {
60 INSTR_PTR( VG_(threads)[tid].arch ) = ip;
61 }
VG_(set_SP)62 void VG_(set_SP) ( ThreadId tid, Addr sp ) {
63 STACK_PTR( VG_(threads)[tid].arch ) = sp;
64 }
65
VG_(get_UnwindStartRegs)66 void VG_(get_UnwindStartRegs) ( /*OUT*/UnwindStartRegs* regs,
67 ThreadId tid )
68 {
69 # if defined(VGA_x86)
70 regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_EIP;
71 regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_ESP;
72 regs->misc.X86.r_ebp
73 = VG_(threads)[tid].arch.vex.guest_EBP;
74 # elif defined(VGA_amd64)
75 regs->r_pc = VG_(threads)[tid].arch.vex.guest_RIP;
76 regs->r_sp = VG_(threads)[tid].arch.vex.guest_RSP;
77 regs->misc.AMD64.r_rbp
78 = VG_(threads)[tid].arch.vex.guest_RBP;
79 # elif defined(VGA_ppc32)
80 regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_CIA;
81 regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_GPR1;
82 regs->misc.PPC32.r_lr
83 = VG_(threads)[tid].arch.vex.guest_LR;
84 # elif defined(VGA_ppc64be) || defined(VGA_ppc64le)
85 regs->r_pc = VG_(threads)[tid].arch.vex.guest_CIA;
86 regs->r_sp = VG_(threads)[tid].arch.vex.guest_GPR1;
87 regs->misc.PPC64.r_lr
88 = VG_(threads)[tid].arch.vex.guest_LR;
89 # elif defined(VGA_arm)
90 regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_R15T;
91 regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_R13;
92 regs->misc.ARM.r14
93 = VG_(threads)[tid].arch.vex.guest_R14;
94 regs->misc.ARM.r12
95 = VG_(threads)[tid].arch.vex.guest_R12;
96 regs->misc.ARM.r11
97 = VG_(threads)[tid].arch.vex.guest_R11;
98 regs->misc.ARM.r7
99 = VG_(threads)[tid].arch.vex.guest_R7;
100 # elif defined(VGA_arm64)
101 regs->r_pc = VG_(threads)[tid].arch.vex.guest_PC;
102 regs->r_sp = VG_(threads)[tid].arch.vex.guest_XSP;
103 regs->misc.ARM64.x29 = VG_(threads)[tid].arch.vex.guest_X29;
104 regs->misc.ARM64.x30 = VG_(threads)[tid].arch.vex.guest_X30;
105 # elif defined(VGA_s390x)
106 regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_IA;
107 regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_SP;
108 regs->misc.S390X.r_fp
109 = VG_(threads)[tid].arch.vex.guest_FP;
110 regs->misc.S390X.r_lr
111 = VG_(threads)[tid].arch.vex.guest_LR;
112 # elif defined(VGA_mips32)
113 regs->r_pc = VG_(threads)[tid].arch.vex.guest_PC;
114 regs->r_sp = VG_(threads)[tid].arch.vex.guest_r29;
115 regs->misc.MIPS32.r30
116 = VG_(threads)[tid].arch.vex.guest_r30;
117 regs->misc.MIPS32.r31
118 = VG_(threads)[tid].arch.vex.guest_r31;
119 regs->misc.MIPS32.r28
120 = VG_(threads)[tid].arch.vex.guest_r28;
121 # elif defined(VGA_mips64)
122 regs->r_pc = VG_(threads)[tid].arch.vex.guest_PC;
123 regs->r_sp = VG_(threads)[tid].arch.vex.guest_r29;
124 regs->misc.MIPS64.r30
125 = VG_(threads)[tid].arch.vex.guest_r30;
126 regs->misc.MIPS64.r31
127 = VG_(threads)[tid].arch.vex.guest_r31;
128 regs->misc.MIPS64.r28
129 = VG_(threads)[tid].arch.vex.guest_r28;
130 # else
131 # error "Unknown arch"
132 # endif
133 }
134
135 void
VG_(get_shadow_regs_area)136 VG_(get_shadow_regs_area) ( ThreadId tid,
137 /*DST*/UChar* dst,
138 /*SRC*/Int shadowNo, PtrdiffT offset, SizeT size )
139 {
140 void* src;
141 ThreadState* tst;
142 vg_assert(shadowNo == 0 || shadowNo == 1 || shadowNo == 2);
143 vg_assert(VG_(is_valid_tid)(tid));
144 // Bounds check
145 vg_assert(0 <= offset && offset < sizeof(VexGuestArchState));
146 vg_assert(offset + size <= sizeof(VexGuestArchState));
147 // Copy
148 tst = & VG_(threads)[tid];
149 src = NULL;
150 switch (shadowNo) {
151 case 0: src = (void*)(((Addr)&(tst->arch.vex)) + offset); break;
152 case 1: src = (void*)(((Addr)&(tst->arch.vex_shadow1)) + offset); break;
153 case 2: src = (void*)(((Addr)&(tst->arch.vex_shadow2)) + offset); break;
154 }
155 vg_assert(src != NULL);
156 VG_(memcpy)( dst, src, size);
157 }
158
159 void
VG_(set_shadow_regs_area)160 VG_(set_shadow_regs_area) ( ThreadId tid,
161 /*DST*/Int shadowNo, PtrdiffT offset, SizeT size,
162 /*SRC*/const UChar* src )
163 {
164 void* dst;
165 ThreadState* tst;
166 vg_assert(shadowNo == 0 || shadowNo == 1 || shadowNo == 2);
167 vg_assert(VG_(is_valid_tid)(tid));
168 // Bounds check
169 vg_assert(0 <= offset && offset < sizeof(VexGuestArchState));
170 vg_assert(offset + size <= sizeof(VexGuestArchState));
171 // Copy
172 tst = & VG_(threads)[tid];
173 dst = NULL;
174 switch (shadowNo) {
175 case 0: dst = (void*)(((Addr)&(tst->arch.vex)) + offset); break;
176 case 1: dst = (void*)(((Addr)&(tst->arch.vex_shadow1)) + offset); break;
177 case 2: dst = (void*)(((Addr)&(tst->arch.vex_shadow2)) + offset); break;
178 }
179 vg_assert(dst != NULL);
180 VG_(memcpy)( dst, src, size);
181 }
182
183
apply_to_GPs_of_tid(ThreadId tid,void (* f)(ThreadId,const HChar *,Addr))184 static void apply_to_GPs_of_tid(ThreadId tid, void (*f)(ThreadId,
185 const HChar*, Addr))
186 {
187 VexGuestArchState* vex = &(VG_(get_ThreadState)(tid)->arch.vex);
188 VG_(debugLog)(2, "machine", "apply_to_GPs_of_tid %u\n", tid);
189 #if defined(VGA_x86)
190 (*f)(tid, "EAX", vex->guest_EAX);
191 (*f)(tid, "ECX", vex->guest_ECX);
192 (*f)(tid, "EDX", vex->guest_EDX);
193 (*f)(tid, "EBX", vex->guest_EBX);
194 (*f)(tid, "ESI", vex->guest_ESI);
195 (*f)(tid, "EDI", vex->guest_EDI);
196 (*f)(tid, "ESP", vex->guest_ESP);
197 (*f)(tid, "EBP", vex->guest_EBP);
198 #elif defined(VGA_amd64)
199 (*f)(tid, "RAX", vex->guest_RAX);
200 (*f)(tid, "RCX", vex->guest_RCX);
201 (*f)(tid, "RDX", vex->guest_RDX);
202 (*f)(tid, "RBX", vex->guest_RBX);
203 (*f)(tid, "RSI", vex->guest_RSI);
204 (*f)(tid, "RDI", vex->guest_RDI);
205 (*f)(tid, "RSP", vex->guest_RSP);
206 (*f)(tid, "RBP", vex->guest_RBP);
207 (*f)(tid, "R8" , vex->guest_R8 );
208 (*f)(tid, "R9" , vex->guest_R9 );
209 (*f)(tid, "R10", vex->guest_R10);
210 (*f)(tid, "R11", vex->guest_R11);
211 (*f)(tid, "R12", vex->guest_R12);
212 (*f)(tid, "R13", vex->guest_R13);
213 (*f)(tid, "R14", vex->guest_R14);
214 (*f)(tid, "R15", vex->guest_R15);
215 #elif defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le)
216 (*f)(tid, "GPR0" , vex->guest_GPR0 );
217 (*f)(tid, "GPR1" , vex->guest_GPR1 );
218 (*f)(tid, "GPR2" , vex->guest_GPR2 );
219 (*f)(tid, "GPR3" , vex->guest_GPR3 );
220 (*f)(tid, "GPR4" , vex->guest_GPR4 );
221 (*f)(tid, "GPR5" , vex->guest_GPR5 );
222 (*f)(tid, "GPR6" , vex->guest_GPR6 );
223 (*f)(tid, "GPR7" , vex->guest_GPR7 );
224 (*f)(tid, "GPR8" , vex->guest_GPR8 );
225 (*f)(tid, "GPR9" , vex->guest_GPR9 );
226 (*f)(tid, "GPR10", vex->guest_GPR10);
227 (*f)(tid, "GPR11", vex->guest_GPR11);
228 (*f)(tid, "GPR12", vex->guest_GPR12);
229 (*f)(tid, "GPR13", vex->guest_GPR13);
230 (*f)(tid, "GPR14", vex->guest_GPR14);
231 (*f)(tid, "GPR15", vex->guest_GPR15);
232 (*f)(tid, "GPR16", vex->guest_GPR16);
233 (*f)(tid, "GPR17", vex->guest_GPR17);
234 (*f)(tid, "GPR18", vex->guest_GPR18);
235 (*f)(tid, "GPR19", vex->guest_GPR19);
236 (*f)(tid, "GPR20", vex->guest_GPR20);
237 (*f)(tid, "GPR21", vex->guest_GPR21);
238 (*f)(tid, "GPR22", vex->guest_GPR22);
239 (*f)(tid, "GPR23", vex->guest_GPR23);
240 (*f)(tid, "GPR24", vex->guest_GPR24);
241 (*f)(tid, "GPR25", vex->guest_GPR25);
242 (*f)(tid, "GPR26", vex->guest_GPR26);
243 (*f)(tid, "GPR27", vex->guest_GPR27);
244 (*f)(tid, "GPR28", vex->guest_GPR28);
245 (*f)(tid, "GPR29", vex->guest_GPR29);
246 (*f)(tid, "GPR30", vex->guest_GPR30);
247 (*f)(tid, "GPR31", vex->guest_GPR31);
248 (*f)(tid, "CTR" , vex->guest_CTR );
249 (*f)(tid, "LR" , vex->guest_LR );
250 #elif defined(VGA_arm)
251 (*f)(tid, "R0" , vex->guest_R0 );
252 (*f)(tid, "R1" , vex->guest_R1 );
253 (*f)(tid, "R2" , vex->guest_R2 );
254 (*f)(tid, "R3" , vex->guest_R3 );
255 (*f)(tid, "R4" , vex->guest_R4 );
256 (*f)(tid, "R5" , vex->guest_R5 );
257 (*f)(tid, "R6" , vex->guest_R6 );
258 (*f)(tid, "R8" , vex->guest_R8 );
259 (*f)(tid, "R9" , vex->guest_R9 );
260 (*f)(tid, "R10", vex->guest_R10);
261 (*f)(tid, "R11", vex->guest_R11);
262 (*f)(tid, "R12", vex->guest_R12);
263 (*f)(tid, "R13", vex->guest_R13);
264 (*f)(tid, "R14", vex->guest_R14);
265 #elif defined(VGA_s390x)
266 (*f)(tid, "r0" , vex->guest_r0 );
267 (*f)(tid, "r1" , vex->guest_r1 );
268 (*f)(tid, "r2" , vex->guest_r2 );
269 (*f)(tid, "r3" , vex->guest_r3 );
270 (*f)(tid, "r4" , vex->guest_r4 );
271 (*f)(tid, "r5" , vex->guest_r5 );
272 (*f)(tid, "r6" , vex->guest_r6 );
273 (*f)(tid, "r7" , vex->guest_r7 );
274 (*f)(tid, "r8" , vex->guest_r8 );
275 (*f)(tid, "r9" , vex->guest_r9 );
276 (*f)(tid, "r10", vex->guest_r10);
277 (*f)(tid, "r11", vex->guest_r11);
278 (*f)(tid, "r12", vex->guest_r12);
279 (*f)(tid, "r13", vex->guest_r13);
280 (*f)(tid, "r14", vex->guest_r14);
281 (*f)(tid, "r15", vex->guest_r15);
282 #elif defined(VGA_mips32) || defined(VGA_mips64)
283 (*f)(tid, "r0" , vex->guest_r0 );
284 (*f)(tid, "r1" , vex->guest_r1 );
285 (*f)(tid, "r2" , vex->guest_r2 );
286 (*f)(tid, "r3" , vex->guest_r3 );
287 (*f)(tid, "r4" , vex->guest_r4 );
288 (*f)(tid, "r5" , vex->guest_r5 );
289 (*f)(tid, "r6" , vex->guest_r6 );
290 (*f)(tid, "r7" , vex->guest_r7 );
291 (*f)(tid, "r8" , vex->guest_r8 );
292 (*f)(tid, "r9" , vex->guest_r9 );
293 (*f)(tid, "r10", vex->guest_r10);
294 (*f)(tid, "r11", vex->guest_r11);
295 (*f)(tid, "r12", vex->guest_r12);
296 (*f)(tid, "r13", vex->guest_r13);
297 (*f)(tid, "r14", vex->guest_r14);
298 (*f)(tid, "r15", vex->guest_r15);
299 (*f)(tid, "r16", vex->guest_r16);
300 (*f)(tid, "r17", vex->guest_r17);
301 (*f)(tid, "r18", vex->guest_r18);
302 (*f)(tid, "r19", vex->guest_r19);
303 (*f)(tid, "r20", vex->guest_r20);
304 (*f)(tid, "r21", vex->guest_r21);
305 (*f)(tid, "r22", vex->guest_r22);
306 (*f)(tid, "r23", vex->guest_r23);
307 (*f)(tid, "r24", vex->guest_r24);
308 (*f)(tid, "r25", vex->guest_r25);
309 (*f)(tid, "r26", vex->guest_r26);
310 (*f)(tid, "r27", vex->guest_r27);
311 (*f)(tid, "r28", vex->guest_r28);
312 (*f)(tid, "r29", vex->guest_r29);
313 (*f)(tid, "r30", vex->guest_r30);
314 (*f)(tid, "r31", vex->guest_r31);
315 #elif defined(VGA_arm64)
316 (*f)(tid, "x0" , vex->guest_X0 );
317 (*f)(tid, "x1" , vex->guest_X1 );
318 (*f)(tid, "x2" , vex->guest_X2 );
319 (*f)(tid, "x3" , vex->guest_X3 );
320 (*f)(tid, "x4" , vex->guest_X4 );
321 (*f)(tid, "x5" , vex->guest_X5 );
322 (*f)(tid, "x6" , vex->guest_X6 );
323 (*f)(tid, "x7" , vex->guest_X7 );
324 (*f)(tid, "x8" , vex->guest_X8 );
325 (*f)(tid, "x9" , vex->guest_X9 );
326 (*f)(tid, "x10", vex->guest_X10);
327 (*f)(tid, "x11", vex->guest_X11);
328 (*f)(tid, "x12", vex->guest_X12);
329 (*f)(tid, "x13", vex->guest_X13);
330 (*f)(tid, "x14", vex->guest_X14);
331 (*f)(tid, "x15", vex->guest_X15);
332 (*f)(tid, "x16", vex->guest_X16);
333 (*f)(tid, "x17", vex->guest_X17);
334 (*f)(tid, "x18", vex->guest_X18);
335 (*f)(tid, "x19", vex->guest_X19);
336 (*f)(tid, "x20", vex->guest_X20);
337 (*f)(tid, "x21", vex->guest_X21);
338 (*f)(tid, "x22", vex->guest_X22);
339 (*f)(tid, "x23", vex->guest_X23);
340 (*f)(tid, "x24", vex->guest_X24);
341 (*f)(tid, "x25", vex->guest_X25);
342 (*f)(tid, "x26", vex->guest_X26);
343 (*f)(tid, "x27", vex->guest_X27);
344 (*f)(tid, "x28", vex->guest_X28);
345 (*f)(tid, "x29", vex->guest_X29);
346 (*f)(tid, "x30", vex->guest_X30);
347 #else
348 # error Unknown arch
349 #endif
350 }
351
352
VG_(apply_to_GP_regs)353 void VG_(apply_to_GP_regs)(void (*f)(ThreadId, const HChar*, UWord))
354 {
355 ThreadId tid;
356
357 for (tid = 1; tid < VG_N_THREADS; tid++) {
358 if (VG_(is_valid_tid)(tid)
359 || VG_(threads)[tid].exitreason == VgSrc_ExitProcess) {
360 // live thread or thread instructed to die by another thread that
361 // called exit.
362 apply_to_GPs_of_tid(tid, f);
363 }
364 }
365 }
366
VG_(thread_stack_reset_iter)367 void VG_(thread_stack_reset_iter)(/*OUT*/ThreadId* tid)
368 {
369 *tid = (ThreadId)(-1);
370 }
371
VG_(thread_stack_next)372 Bool VG_(thread_stack_next)(/*MOD*/ThreadId* tid,
373 /*OUT*/Addr* stack_min,
374 /*OUT*/Addr* stack_max)
375 {
376 ThreadId i;
377 for (i = (*tid)+1; i < VG_N_THREADS; i++) {
378 if (i == VG_INVALID_THREADID)
379 continue;
380 if (VG_(threads)[i].status != VgTs_Empty) {
381 *tid = i;
382 *stack_min = VG_(get_SP)(i);
383 *stack_max = VG_(threads)[i].client_stack_highest_byte;
384 return True;
385 }
386 }
387 return False;
388 }
389
VG_(thread_get_stack_max)390 Addr VG_(thread_get_stack_max)(ThreadId tid)
391 {
392 vg_assert(0 <= tid && tid < VG_N_THREADS && tid != VG_INVALID_THREADID);
393 vg_assert(VG_(threads)[tid].status != VgTs_Empty);
394 return VG_(threads)[tid].client_stack_highest_byte;
395 }
396
VG_(thread_get_stack_size)397 SizeT VG_(thread_get_stack_size)(ThreadId tid)
398 {
399 vg_assert(0 <= tid && tid < VG_N_THREADS && tid != VG_INVALID_THREADID);
400 vg_assert(VG_(threads)[tid].status != VgTs_Empty);
401 return VG_(threads)[tid].client_stack_szB;
402 }
403
VG_(thread_get_altstack_min)404 Addr VG_(thread_get_altstack_min)(ThreadId tid)
405 {
406 vg_assert(0 <= tid && tid < VG_N_THREADS && tid != VG_INVALID_THREADID);
407 vg_assert(VG_(threads)[tid].status != VgTs_Empty);
408 return (Addr)VG_(threads)[tid].altstack.ss_sp;
409 }
410
VG_(thread_get_altstack_size)411 SizeT VG_(thread_get_altstack_size)(ThreadId tid)
412 {
413 vg_assert(0 <= tid && tid < VG_N_THREADS && tid != VG_INVALID_THREADID);
414 vg_assert(VG_(threads)[tid].status != VgTs_Empty);
415 return VG_(threads)[tid].altstack.ss_size;
416 }
417
418 //-------------------------------------------------------------
419 /* Details about the capabilities of the underlying (host) CPU. These
420 details are acquired by (1) enquiring with the CPU at startup, or
421 (2) from the AT_SYSINFO entries the kernel gave us (ppc32 cache
422 line size). It's a bit nasty in the sense that there's no obvious
423 way to stop uses of some of this info before it's ready to go.
424 See pub_core_machine.h for more information about that.
425
426 VG_(machine_get_hwcaps) may use signals (although it attempts to
427 leave signal state unchanged) and therefore should only be
428 called before m_main sets up the client's signal state.
429 */
430
431 /* --------- State --------- */
432 static Bool hwcaps_done = False;
433
434 /* --- all archs --- */
435 static VexArch va = VexArch_INVALID;
436 static VexArchInfo vai;
437
438 #if defined(VGA_x86)
439 UInt VG_(machine_x86_have_mxcsr) = 0;
440 #endif
441 #if defined(VGA_ppc32)
442 UInt VG_(machine_ppc32_has_FP) = 0;
443 UInt VG_(machine_ppc32_has_VMX) = 0;
444 #endif
445 #if defined(VGA_ppc64be) || defined(VGA_ppc64le)
446 ULong VG_(machine_ppc64_has_VMX) = 0;
447 #endif
448 #if defined(VGA_arm)
449 Int VG_(machine_arm_archlevel) = 4;
450 #endif
451
452
453 /* For hwcaps detection on ppc32/64, s390x, and arm we'll need to do SIGILL
454 testing, so we need a VG_MINIMAL_JMP_BUF. */
455 #if defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le) \
456 || defined(VGA_arm) || defined(VGA_s390x) || defined(VGA_mips32)
457 #include "pub_core_libcsetjmp.h"
458 static VG_MINIMAL_JMP_BUF(env_unsup_insn);
handler_unsup_insn(Int x)459 static void handler_unsup_insn ( Int x ) {
460 VG_MINIMAL_LONGJMP(env_unsup_insn);
461 }
462 #endif
463
464
465 /* Helper function for VG_(machine_get_hwcaps), assumes the SIGILL/etc
466 * handlers are installed. Determines the sizes affected by dcbz
467 * and dcbzl instructions and updates the given VexArchInfo structure
468 * accordingly.
469 *
470 * Not very defensive: assumes that as long as the dcbz/dcbzl
471 * instructions don't raise a SIGILL, that they will zero an aligned,
472 * contiguous block of memory of a sensible size. */
473 #if defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le)
find_ppc_dcbz_sz(VexArchInfo * arch_info)474 static void find_ppc_dcbz_sz(VexArchInfo *arch_info)
475 {
476 Int dcbz_szB = 0;
477 Int dcbzl_szB;
478 # define MAX_DCBZL_SZB (128) /* largest known effect of dcbzl */
479 char test_block[4*MAX_DCBZL_SZB];
480 char *aligned = test_block;
481 Int i;
482
483 /* round up to next max block size, assumes MAX_DCBZL_SZB is pof2 */
484 aligned = (char *)(((HWord)aligned + MAX_DCBZL_SZB) & ~(MAX_DCBZL_SZB - 1));
485 vg_assert((aligned + MAX_DCBZL_SZB) <= &test_block[sizeof(test_block)]);
486
487 /* dcbz often clears 32B, although sometimes whatever the native cache
488 * block size is */
489 VG_(memset)(test_block, 0xff, sizeof(test_block));
490 __asm__ __volatile__("dcbz 0,%0"
491 : /*out*/
492 : "r" (aligned) /*in*/
493 : "memory" /*clobber*/);
494 for (dcbz_szB = 0, i = 0; i < sizeof(test_block); ++i) {
495 if (!test_block[i])
496 ++dcbz_szB;
497 }
498 vg_assert(dcbz_szB == 16 || dcbz_szB == 32 || dcbz_szB == 64 || dcbz_szB == 128);
499
500 /* dcbzl clears 128B on G5/PPC970, and usually 32B on other platforms */
501 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
502 dcbzl_szB = 0; /* indicates unsupported */
503 }
504 else {
505 VG_(memset)(test_block, 0xff, sizeof(test_block));
506 /* some older assemblers won't understand the dcbzl instruction
507 * variant, so we directly emit the instruction ourselves */
508 __asm__ __volatile__("mr 9, %0 ; .long 0x7C204FEC" /*dcbzl 0,9*/
509 : /*out*/
510 : "r" (aligned) /*in*/
511 : "memory", "r9" /*clobber*/);
512 for (dcbzl_szB = 0, i = 0; i < sizeof(test_block); ++i) {
513 if (!test_block[i])
514 ++dcbzl_szB;
515 }
516 vg_assert(dcbzl_szB == 16 || dcbzl_szB == 32 || dcbzl_szB == 64 || dcbzl_szB == 128);
517 }
518
519 arch_info->ppc_dcbz_szB = dcbz_szB;
520 arch_info->ppc_dcbzl_szB = dcbzl_szB;
521
522 VG_(debugLog)(1, "machine", "dcbz_szB=%d dcbzl_szB=%d\n",
523 dcbz_szB, dcbzl_szB);
524 # undef MAX_DCBZL_SZB
525 }
526 #endif /* defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le) */
527
528 #ifdef VGA_s390x
529
530 /* Read /proc/cpuinfo. Look for lines like these
531
532 processor 0: version = FF, identification = 0117C9, machine = 2064
533
534 and return the machine model. If the machine model could not be determined
535 or it is an unknown model, return VEX_S390X_MODEL_UNKNOWN. */
536
VG_(get_machine_model)537 static UInt VG_(get_machine_model)(void)
538 {
539 static struct model_map {
540 const HChar name[5];
541 UInt id;
542 } model_map[] = {
543 { "2064", VEX_S390X_MODEL_Z900 },
544 { "2066", VEX_S390X_MODEL_Z800 },
545 { "2084", VEX_S390X_MODEL_Z990 },
546 { "2086", VEX_S390X_MODEL_Z890 },
547 { "2094", VEX_S390X_MODEL_Z9_EC },
548 { "2096", VEX_S390X_MODEL_Z9_BC },
549 { "2097", VEX_S390X_MODEL_Z10_EC },
550 { "2098", VEX_S390X_MODEL_Z10_BC },
551 { "2817", VEX_S390X_MODEL_Z196 },
552 { "2818", VEX_S390X_MODEL_Z114 },
553 { "2827", VEX_S390X_MODEL_ZEC12 },
554 { "2828", VEX_S390X_MODEL_ZBC12 },
555 { "2964", VEX_S390X_MODEL_Z13 },
556 { "2965", VEX_S390X_MODEL_Z13S },
557 };
558
559 Int model, n, fh;
560 SysRes fd;
561 SizeT num_bytes, file_buf_size;
562 HChar *p, *m, *model_name, *file_buf;
563
564 /* Slurp contents of /proc/cpuinfo into FILE_BUF */
565 fd = VG_(open)( "/proc/cpuinfo", 0, VKI_S_IRUSR );
566 if ( sr_isError(fd) ) return VEX_S390X_MODEL_UNKNOWN;
567
568 fh = sr_Res(fd);
569
570 /* Determine the size of /proc/cpuinfo.
571 Work around broken-ness in /proc file system implementation.
572 fstat returns a zero size for /proc/cpuinfo although it is
573 claimed to be a regular file. */
574 num_bytes = 0;
575 file_buf_size = 1000;
576 file_buf = VG_(malloc)("cpuinfo", file_buf_size + 1);
577 while (42) {
578 n = VG_(read)(fh, file_buf, file_buf_size);
579 if (n < 0) break;
580
581 num_bytes += n;
582 if (n < file_buf_size) break; /* reached EOF */
583 }
584
585 if (n < 0) num_bytes = 0; /* read error; ignore contents */
586
587 if (num_bytes > file_buf_size) {
588 VG_(free)( file_buf );
589 VG_(lseek)( fh, 0, VKI_SEEK_SET );
590 file_buf = VG_(malloc)( "cpuinfo", num_bytes + 1 );
591 n = VG_(read)( fh, file_buf, num_bytes );
592 if (n < 0) num_bytes = 0;
593 }
594
595 file_buf[num_bytes] = '\0';
596 VG_(close)(fh);
597
598 /* Parse file */
599 model = VEX_S390X_MODEL_UNKNOWN;
600 for (p = file_buf; *p; ++p) {
601 /* Beginning of line */
602 if (VG_(strncmp)( p, "processor", sizeof "processor" - 1 ) != 0) continue;
603
604 m = VG_(strstr)( p, "machine" );
605 if (m == NULL) continue;
606
607 p = m + sizeof "machine" - 1;
608 while ( VG_(isspace)( *p ) || *p == '=') {
609 if (*p == '\n') goto next_line;
610 ++p;
611 }
612
613 model_name = p;
614 for (n = 0; n < sizeof model_map / sizeof model_map[0]; ++n) {
615 struct model_map *mm = model_map + n;
616 SizeT len = VG_(strlen)( mm->name );
617 if ( VG_(strncmp)( mm->name, model_name, len ) == 0 &&
618 VG_(isspace)( model_name[len] )) {
619 if (mm->id < model) model = mm->id;
620 p = model_name + len;
621 break;
622 }
623 }
624 /* Skip until end-of-line */
625 while (*p != '\n')
626 ++p;
627 next_line: ;
628 }
629
630 VG_(free)( file_buf );
631 VG_(debugLog)(1, "machine", "model = %s\n",
632 model == VEX_S390X_MODEL_UNKNOWN ? "UNKNOWN"
633 : model_map[model].name);
634 return model;
635 }
636
637 #endif /* defined(VGA_s390x) */
638
639 #if defined(VGA_mips32) || defined(VGA_mips64)
640
641 /*
642 * Initialize hwcaps by parsing /proc/cpuinfo . Returns False if it can not
643 * determine what CPU it is (it searches only for the models that are or may be
644 * supported by Valgrind).
645 */
VG_(parse_cpuinfo)646 static Bool VG_(parse_cpuinfo)(void)
647 {
648 const char *search_Broadcom_str = "cpu model\t\t: Broadcom";
649 const char *search_Cavium_str= "cpu model\t\t: Cavium";
650 const char *search_Ingenic_str= "cpu model\t\t: Ingenic";
651 const char *search_Loongson_str= "cpu model\t\t: ICT Loongson";
652 const char *search_MIPS_str = "cpu model\t\t: MIPS";
653 const char *search_Netlogic_str = "cpu model\t\t: Netlogic";
654
655 Int n, fh;
656 SysRes fd;
657 SizeT num_bytes, file_buf_size;
658 HChar *file_buf, *isa;
659
660 /* Slurp contents of /proc/cpuinfo into FILE_BUF */
661 fd = VG_(open)( "/proc/cpuinfo", 0, VKI_S_IRUSR );
662 if ( sr_isError(fd) ) return False;
663
664 fh = sr_Res(fd);
665
666 /* Determine the size of /proc/cpuinfo.
667 Work around broken-ness in /proc file system implementation.
668 fstat returns a zero size for /proc/cpuinfo although it is
669 claimed to be a regular file. */
670 num_bytes = 0;
671 file_buf_size = 1000;
672 file_buf = VG_(malloc)("cpuinfo", file_buf_size + 1);
673 while (42) {
674 n = VG_(read)(fh, file_buf, file_buf_size);
675 if (n < 0) break;
676
677 num_bytes += n;
678 if (n < file_buf_size) break; /* reached EOF */
679 }
680
681 if (n < 0) num_bytes = 0; /* read error; ignore contents */
682
683 if (num_bytes > file_buf_size) {
684 VG_(free)( file_buf );
685 VG_(lseek)( fh, 0, VKI_SEEK_SET );
686 file_buf = VG_(malloc)( "cpuinfo", num_bytes + 1 );
687 n = VG_(read)( fh, file_buf, num_bytes );
688 if (n < 0) num_bytes = 0;
689 }
690
691 file_buf[num_bytes] = '\0';
692 VG_(close)(fh);
693
694 /* Parse file */
695 if (VG_(strstr)(file_buf, search_Broadcom_str) != NULL)
696 vai.hwcaps = VEX_PRID_COMP_BROADCOM;
697 else if (VG_(strstr)(file_buf, search_Netlogic_str) != NULL)
698 vai.hwcaps = VEX_PRID_COMP_NETLOGIC;
699 else if (VG_(strstr)(file_buf, search_Cavium_str) != NULL)
700 vai.hwcaps = VEX_PRID_COMP_CAVIUM;
701 else if (VG_(strstr)(file_buf, search_MIPS_str) != NULL)
702 vai.hwcaps = VEX_PRID_COMP_MIPS;
703 else if (VG_(strstr)(file_buf, search_Ingenic_str) != NULL)
704 vai.hwcaps = VEX_PRID_COMP_INGENIC_E1;
705 else if (VG_(strstr)(file_buf, search_Loongson_str) != NULL)
706 vai.hwcaps = (VEX_PRID_COMP_LEGACY | VEX_PRID_IMP_LOONGSON_64);
707 else {
708 /* Did not find string in the proc file. */
709 vai.hwcaps = 0;
710 VG_(free)(file_buf);
711 return False;
712 }
713
714 isa = VG_(strstr)(file_buf, "isa\t\t\t: ");
715
716 if (NULL != isa) {
717 if (VG_(strstr) (isa, "mips32r1") != NULL)
718 vai.hwcaps |= VEX_MIPS_CPU_ISA_M32R1;
719 if (VG_(strstr) (isa, "mips32r2") != NULL)
720 vai.hwcaps |= VEX_MIPS_CPU_ISA_M32R2;
721 if (VG_(strstr) (isa, "mips32r6") != NULL)
722 vai.hwcaps |= VEX_MIPS_CPU_ISA_M32R6;
723 if (VG_(strstr) (isa, "mips64r1") != NULL)
724 vai.hwcaps |= VEX_MIPS_CPU_ISA_M64R1;
725 if (VG_(strstr) (isa, "mips64r2") != NULL)
726 vai.hwcaps |= VEX_MIPS_CPU_ISA_M64R2;
727 if (VG_(strstr) (isa, "mips64r6") != NULL)
728 vai.hwcaps |= VEX_MIPS_CPU_ISA_M64R6;
729
730 /*
731 * TODO(petarj): Remove this Cavium workaround once Linux kernel folks
732 * decide to change incorrect settings in
733 * mips/include/asm/mach-cavium-octeon/cpu-feature-overrides.h.
734 * The current settings show mips32r1, mips32r2 and mips64r1 as
735 * unsupported ISAs by Cavium MIPS CPUs.
736 */
737 if (VEX_MIPS_COMP_ID(vai.hwcaps) == VEX_PRID_COMP_CAVIUM) {
738 vai.hwcaps |= VEX_MIPS_CPU_ISA_M32R1 | VEX_MIPS_CPU_ISA_M32R2 |
739 VEX_MIPS_CPU_ISA_M64R1;
740 }
741 } else {
742 /*
743 * Kernel does not provide information about supported ISAs.
744 * Populate the isa level flags based on the CPU model. That is our
745 * best guess.
746 */
747 switch VEX_MIPS_COMP_ID(vai.hwcaps) {
748 case VEX_PRID_COMP_CAVIUM:
749 case VEX_PRID_COMP_NETLOGIC:
750 vai.hwcaps |= (VEX_MIPS_CPU_ISA_M64R2 | VEX_MIPS_CPU_ISA_M64R1);
751 case VEX_PRID_COMP_INGENIC_E1:
752 case VEX_PRID_COMP_MIPS:
753 vai.hwcaps |= VEX_MIPS_CPU_ISA_M32R2;
754 case VEX_PRID_COMP_BROADCOM:
755 vai.hwcaps |= VEX_MIPS_CPU_ISA_M32R1;
756 break;
757 case VEX_PRID_COMP_LEGACY:
758 if ((VEX_MIPS_PROC_ID(vai.hwcaps) == VEX_PRID_IMP_LOONGSON_64))
759 vai.hwcaps |= VEX_MIPS_CPU_ISA_M64R2 | VEX_MIPS_CPU_ISA_M64R1 |
760 VEX_MIPS_CPU_ISA_M32R2 | VEX_MIPS_CPU_ISA_M32R1;
761 break;
762 default:
763 break;
764 }
765 }
766 VG_(free)(file_buf);
767 return True;
768 }
769
770 #endif /* defined(VGA_mips32) || defined(VGA_mips64) */
771
772 #if defined(VGP_arm64_linux)
773
774 /* Check to see whether we are running on a Cavium core, and if so auto-enable
775 the fallback LLSC implementation. See #369459. */
776
VG_(parse_cpuinfo)777 static Bool VG_(parse_cpuinfo)(void)
778 {
779 const char *search_Cavium_str = "CPU implementer\t: 0x43";
780
781 Int n, fh;
782 SysRes fd;
783 SizeT num_bytes, file_buf_size;
784 HChar *file_buf;
785
786 /* Slurp contents of /proc/cpuinfo into FILE_BUF */
787 fd = VG_(open)( "/proc/cpuinfo", 0, VKI_S_IRUSR );
788 if ( sr_isError(fd) ) return False;
789
790 fh = sr_Res(fd);
791
792 /* Determine the size of /proc/cpuinfo.
793 Work around broken-ness in /proc file system implementation.
794 fstat returns a zero size for /proc/cpuinfo although it is
795 claimed to be a regular file. */
796 num_bytes = 0;
797 file_buf_size = 1000;
798 file_buf = VG_(malloc)("cpuinfo", file_buf_size + 1);
799 while (42) {
800 n = VG_(read)(fh, file_buf, file_buf_size);
801 if (n < 0) break;
802
803 num_bytes += n;
804 if (n < file_buf_size) break; /* reached EOF */
805 }
806
807 if (n < 0) num_bytes = 0; /* read error; ignore contents */
808
809 if (num_bytes > file_buf_size) {
810 VG_(free)( file_buf );
811 VG_(lseek)( fh, 0, VKI_SEEK_SET );
812 file_buf = VG_(malloc)( "cpuinfo", num_bytes + 1 );
813 n = VG_(read)( fh, file_buf, num_bytes );
814 if (n < 0) num_bytes = 0;
815 }
816
817 file_buf[num_bytes] = '\0';
818 VG_(close)(fh);
819
820 /* Parse file */
821 if (VG_(strstr)(file_buf, search_Cavium_str) != NULL)
822 vai.arm64_requires_fallback_LLSC = True;
823
824 VG_(free)(file_buf);
825 return True;
826 }
827
828 #endif /* defined(VGP_arm64_linux) */
829
VG_(machine_get_hwcaps)830 Bool VG_(machine_get_hwcaps)( void )
831 {
832 vg_assert(hwcaps_done == False);
833 hwcaps_done = True;
834
835 // Whack default settings into vai, so that we only need to fill in
836 // any interesting bits.
837 LibVEX_default_VexArchInfo(&vai);
838
839 #if defined(VGA_x86)
840 { Bool have_sse1, have_sse2, have_sse3, have_cx8, have_lzcnt, have_mmxext;
841 UInt eax, ebx, ecx, edx, max_extended;
842 HChar vstr[13];
843 vstr[0] = 0;
844
845 if (!VG_(has_cpuid)())
846 /* we can't do cpuid at all. Give up. */
847 return False;
848
849 VG_(cpuid)(0, 0, &eax, &ebx, &ecx, &edx);
850 if (eax < 1)
851 /* we can't ask for cpuid(x) for x > 0. Give up. */
852 return False;
853
854 /* Get processor ID string, and max basic/extended index
855 values. */
856 VG_(memcpy)(&vstr[0], &ebx, 4);
857 VG_(memcpy)(&vstr[4], &edx, 4);
858 VG_(memcpy)(&vstr[8], &ecx, 4);
859 vstr[12] = 0;
860
861 VG_(cpuid)(0x80000000, 0, &eax, &ebx, &ecx, &edx);
862 max_extended = eax;
863
864 /* get capabilities bits into edx */
865 VG_(cpuid)(1, 0, &eax, &ebx, &ecx, &edx);
866
867 have_sse1 = (edx & (1<<25)) != 0; /* True => have sse insns */
868 have_sse2 = (edx & (1<<26)) != 0; /* True => have sse2 insns */
869 have_sse3 = (ecx & (1<<0)) != 0; /* True => have sse3 insns */
870
871 /* cmpxchg8b is a minimum requirement now; if we don't have it we
872 must simply give up. But all CPUs since Pentium-I have it, so
873 that doesn't seem like much of a restriction. */
874 have_cx8 = (edx & (1<<8)) != 0; /* True => have cmpxchg8b */
875 if (!have_cx8)
876 return False;
877
878 /* Figure out if this is an AMD that can do MMXEXT. */
879 have_mmxext = False;
880 if (0 == VG_(strcmp)(vstr, "AuthenticAMD")
881 && max_extended >= 0x80000001) {
882 VG_(cpuid)(0x80000001, 0, &eax, &ebx, &ecx, &edx);
883 /* Some older AMD processors support a sse1 subset (Integer SSE). */
884 have_mmxext = !have_sse1 && ((edx & (1<<22)) != 0);
885 }
886
887 /* Figure out if this is an AMD or Intel that can do LZCNT. */
888 have_lzcnt = False;
889 if ((0 == VG_(strcmp)(vstr, "AuthenticAMD")
890 || 0 == VG_(strcmp)(vstr, "GenuineIntel"))
891 && max_extended >= 0x80000001) {
892 VG_(cpuid)(0x80000001, 0, &eax, &ebx, &ecx, &edx);
893 have_lzcnt = (ecx & (1<<5)) != 0; /* True => have LZCNT */
894 }
895
896 /* Intel processors don't define the mmxext extension, but since it
897 is just a sse1 subset always define it when we have sse1. */
898 if (have_sse1)
899 have_mmxext = True;
900
901 va = VexArchX86;
902 vai.endness = VexEndnessLE;
903
904 if (have_sse3 && have_sse2 && have_sse1 && have_mmxext) {
905 vai.hwcaps = VEX_HWCAPS_X86_MMXEXT;
906 vai.hwcaps |= VEX_HWCAPS_X86_SSE1;
907 vai.hwcaps |= VEX_HWCAPS_X86_SSE2;
908 vai.hwcaps |= VEX_HWCAPS_X86_SSE3;
909 if (have_lzcnt)
910 vai.hwcaps |= VEX_HWCAPS_X86_LZCNT;
911 VG_(machine_x86_have_mxcsr) = 1;
912 } else if (have_sse2 && have_sse1 && have_mmxext) {
913 vai.hwcaps = VEX_HWCAPS_X86_MMXEXT;
914 vai.hwcaps |= VEX_HWCAPS_X86_SSE1;
915 vai.hwcaps |= VEX_HWCAPS_X86_SSE2;
916 if (have_lzcnt)
917 vai.hwcaps |= VEX_HWCAPS_X86_LZCNT;
918 VG_(machine_x86_have_mxcsr) = 1;
919 } else if (have_sse1 && have_mmxext) {
920 vai.hwcaps = VEX_HWCAPS_X86_MMXEXT;
921 vai.hwcaps |= VEX_HWCAPS_X86_SSE1;
922 VG_(machine_x86_have_mxcsr) = 1;
923 } else if (have_mmxext) {
924 vai.hwcaps = VEX_HWCAPS_X86_MMXEXT; /*integer only sse1 subset*/
925 VG_(machine_x86_have_mxcsr) = 0;
926 } else {
927 vai.hwcaps = 0; /*baseline - no sse at all*/
928 VG_(machine_x86_have_mxcsr) = 0;
929 }
930
931 VG_(machine_get_cache_info)(&vai);
932
933 return True;
934 }
935
936 #elif defined(VGA_amd64)
937 { Bool have_sse3, have_cx8, have_cx16;
938 Bool have_lzcnt, have_avx, have_bmi, have_avx2;
939 Bool have_rdtscp;
940 UInt eax, ebx, ecx, edx, max_basic, max_extended;
941 ULong xgetbv_0 = 0;
942 HChar vstr[13];
943 vstr[0] = 0;
944
945 if (!VG_(has_cpuid)())
946 /* we can't do cpuid at all. Give up. */
947 return False;
948
949 VG_(cpuid)(0, 0, &eax, &ebx, &ecx, &edx);
950 max_basic = eax;
951 if (max_basic < 1)
952 /* we can't ask for cpuid(x) for x > 0. Give up. */
953 return False;
954
955 /* Get processor ID string, and max basic/extended index
956 values. */
957 VG_(memcpy)(&vstr[0], &ebx, 4);
958 VG_(memcpy)(&vstr[4], &edx, 4);
959 VG_(memcpy)(&vstr[8], &ecx, 4);
960 vstr[12] = 0;
961
962 VG_(cpuid)(0x80000000, 0, &eax, &ebx, &ecx, &edx);
963 max_extended = eax;
964
965 /* get capabilities bits into edx */
966 VG_(cpuid)(1, 0, &eax, &ebx, &ecx, &edx);
967
968 // we assume that SSE1 and SSE2 are available by default
969 have_sse3 = (ecx & (1<<0)) != 0; /* True => have sse3 insns */
970 // ssse3 is ecx:9
971 // sse41 is ecx:19
972 // sse42 is ecx:20
973
974 // xsave is ecx:26
975 // osxsave is ecx:27
976 // avx is ecx:28
977 // fma is ecx:12
978 have_avx = False;
979 /* have_fma = False; */
980 if ( (ecx & ((1<<28)|(1<<27)|(1<<26))) == ((1<<28)|(1<<27)|(1<<26)) ) {
981 /* Processor supports AVX instructions and XGETBV is enabled
982 by OS and AVX instructions are enabled by the OS. */
983 ULong w;
984 __asm__ __volatile__("movq $0,%%rcx ; "
985 ".byte 0x0F,0x01,0xD0 ; " /* xgetbv */
986 "movq %%rax,%0"
987 :/*OUT*/"=r"(w) :/*IN*/
988 :/*TRASH*/"rdx","rcx","rax");
989 xgetbv_0 = w;
990 if ((xgetbv_0 & 7) == 7) {
991 /* Only say we have AVX if the XSAVE-allowable
992 bitfield-mask allows x87, SSE and AVX state. We could
993 actually run with a more restrictive XGETBV(0) value,
994 but VEX's implementation of XSAVE and XRSTOR assumes
995 that all 3 bits are enabled.
996
997 Also, the VEX implementation of XSAVE/XRSTOR assumes that
998 state component [2] (the YMM high halves) are located in
999 the XSAVE image at offsets 576 .. 831. So we have to
1000 check that here before declaring AVX to be supported. */
1001 UInt eax2, ebx2, ecx2, edx2;
1002 VG_(cpuid)(0xD, 2, &eax2, &ebx2, &ecx2, &edx2);
1003 if (ebx2 == 576 && eax2 == 256) {
1004 have_avx = True;
1005 }
1006 /* have_fma = (ecx & (1<<12)) != 0; */
1007 /* have_fma: Probably correct, but gcc complains due to
1008 unusedness. */
1009 }
1010 }
1011
1012 /* cmpxchg8b is a minimum requirement now; if we don't have it we
1013 must simply give up. But all CPUs since Pentium-I have it, so
1014 that doesn't seem like much of a restriction. */
1015 have_cx8 = (edx & (1<<8)) != 0; /* True => have cmpxchg8b */
1016 if (!have_cx8)
1017 return False;
1018
1019 /* on amd64 we tolerate older cpus, which don't have cmpxchg16b */
1020 have_cx16 = (ecx & (1<<13)) != 0; /* True => have cmpxchg16b */
1021
1022 /* Figure out if this CPU can do LZCNT. */
1023 have_lzcnt = False;
1024 if (max_extended >= 0x80000001) {
1025 VG_(cpuid)(0x80000001, 0, &eax, &ebx, &ecx, &edx);
1026 have_lzcnt = (ecx & (1<<5)) != 0; /* True => have LZCNT */
1027 }
1028
1029 /* Can we do RDTSCP? */
1030 have_rdtscp = False;
1031 if (max_extended >= 0x80000001) {
1032 VG_(cpuid)(0x80000001, 0, &eax, &ebx, &ecx, &edx);
1033 have_rdtscp = (edx & (1<<27)) != 0; /* True => have RDTSVCP */
1034 }
1035
1036 /* Check for BMI1 and AVX2. If we have AVX1 (plus OS support). */
1037 have_bmi = False;
1038 have_avx2 = False;
1039 if (have_avx && max_basic >= 7) {
1040 VG_(cpuid)(7, 0, &eax, &ebx, &ecx, &edx);
1041 have_bmi = (ebx & (1<<3)) != 0; /* True => have BMI1 */
1042 have_avx2 = (ebx & (1<<5)) != 0; /* True => have AVX2 */
1043 }
1044
1045 va = VexArchAMD64;
1046 vai.endness = VexEndnessLE;
1047 vai.hwcaps = (have_sse3 ? VEX_HWCAPS_AMD64_SSE3 : 0)
1048 | (have_cx16 ? VEX_HWCAPS_AMD64_CX16 : 0)
1049 | (have_lzcnt ? VEX_HWCAPS_AMD64_LZCNT : 0)
1050 | (have_avx ? VEX_HWCAPS_AMD64_AVX : 0)
1051 | (have_bmi ? VEX_HWCAPS_AMD64_BMI : 0)
1052 | (have_avx2 ? VEX_HWCAPS_AMD64_AVX2 : 0)
1053 | (have_rdtscp ? VEX_HWCAPS_AMD64_RDTSCP : 0);
1054
1055 VG_(machine_get_cache_info)(&vai);
1056
1057 return True;
1058 }
1059
1060 #elif defined(VGA_ppc32)
1061 {
1062 /* Find out which subset of the ppc32 instruction set is supported by
1063 verifying whether various ppc32 instructions generate a SIGILL
1064 or a SIGFPE. An alternative approach is to check the AT_HWCAP and
1065 AT_PLATFORM entries in the ELF auxiliary table -- see also
1066 the_iifii.client_auxv in m_main.c.
1067 */
1068 vki_sigset_t saved_set, tmp_set;
1069 vki_sigaction_fromK_t saved_sigill_act, saved_sigfpe_act;
1070 vki_sigaction_toK_t tmp_sigill_act, tmp_sigfpe_act;
1071
1072 volatile Bool have_F, have_V, have_FX, have_GX, have_VX, have_DFP;
1073 volatile Bool have_isa_2_07, have_isa_3_0;
1074 Int r;
1075
1076 /* This is a kludge. Really we ought to back-convert saved_act
1077 into a toK_t using VG_(convert_sigaction_fromK_to_toK), but
1078 since that's a no-op on all ppc32 platforms so far supported,
1079 it's not worth the typing effort. At least include most basic
1080 sanity check: */
1081 vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t));
1082
1083 VG_(sigemptyset)(&tmp_set);
1084 VG_(sigaddset)(&tmp_set, VKI_SIGILL);
1085 VG_(sigaddset)(&tmp_set, VKI_SIGFPE);
1086
1087 r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
1088 vg_assert(r == 0);
1089
1090 r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
1091 vg_assert(r == 0);
1092 tmp_sigill_act = saved_sigill_act;
1093
1094 r = VG_(sigaction)(VKI_SIGFPE, NULL, &saved_sigfpe_act);
1095 vg_assert(r == 0);
1096 tmp_sigfpe_act = saved_sigfpe_act;
1097
1098 /* NODEFER: signal handler does not return (from the kernel's point of
1099 view), hence if it is to successfully catch a signal more than once,
1100 we need the NODEFER flag. */
1101 tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
1102 tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
1103 tmp_sigill_act.sa_flags |= VKI_SA_NODEFER;
1104 tmp_sigill_act.ksa_handler = handler_unsup_insn;
1105 r = VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
1106 vg_assert(r == 0);
1107
1108 tmp_sigfpe_act.sa_flags &= ~VKI_SA_RESETHAND;
1109 tmp_sigfpe_act.sa_flags &= ~VKI_SA_SIGINFO;
1110 tmp_sigfpe_act.sa_flags |= VKI_SA_NODEFER;
1111 tmp_sigfpe_act.ksa_handler = handler_unsup_insn;
1112 r = VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL);
1113 vg_assert(r == 0);
1114
1115 /* standard FP insns */
1116 have_F = True;
1117 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1118 have_F = False;
1119 } else {
1120 __asm__ __volatile__(".long 0xFC000090"); /*fmr 0,0 */
1121 }
1122
1123 /* Altivec insns */
1124 have_V = True;
1125 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1126 have_V = False;
1127 } else {
1128 /* Unfortunately some older assemblers don't speak Altivec (or
1129 choose not to), so to be safe we directly emit the 32-bit
1130 word corresponding to "vor 0,0,0". This fixes a build
1131 problem that happens on Debian 3.1 (ppc32), and probably
1132 various other places. */
1133 __asm__ __volatile__(".long 0x10000484"); /*vor 0,0,0*/
1134 }
1135
1136 /* General-Purpose optional (fsqrt, fsqrts) */
1137 have_FX = True;
1138 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1139 have_FX = False;
1140 } else {
1141 __asm__ __volatile__(".long 0xFC00002C"); /*fsqrt 0,0 */
1142 }
1143
1144 /* Graphics optional (stfiwx, fres, frsqrte, fsel) */
1145 have_GX = True;
1146 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1147 have_GX = False;
1148 } else {
1149 __asm__ __volatile__(".long 0xFC000034"); /* frsqrte 0,0 */
1150 }
1151
1152 /* VSX support implies Power ISA 2.06 */
1153 have_VX = True;
1154 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1155 have_VX = False;
1156 } else {
1157 __asm__ __volatile__(".long 0xf0000564"); /* xsabsdp XT,XB */
1158 }
1159
1160 /* Check for Decimal Floating Point (DFP) support. */
1161 have_DFP = True;
1162 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1163 have_DFP = False;
1164 } else {
1165 __asm__ __volatile__(".long 0xee4e8005"); /* dadd FRT,FRA, FRB */
1166 }
1167
1168 /* Check for ISA 2.07 support. */
1169 have_isa_2_07 = True;
1170 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1171 have_isa_2_07 = False;
1172 } else {
1173 __asm__ __volatile__(".long 0x7c000166"); /* mtvsrd XT,RA */
1174 }
1175
1176 /* Check for ISA 3.0 support. */
1177 have_isa_3_0 = True;
1178 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1179 have_isa_3_0 = False;
1180 } else {
1181 __asm__ __volatile__(".long 0x7d205434"); /* cnttzw RT, RB */
1182 }
1183
1184 /* determine dcbz/dcbzl sizes while we still have the signal
1185 * handlers registered */
1186 find_ppc_dcbz_sz(&vai);
1187
1188 r = VG_(sigaction)(VKI_SIGILL, &saved_sigill_act, NULL);
1189 vg_assert(r == 0);
1190 r = VG_(sigaction)(VKI_SIGFPE, &saved_sigfpe_act, NULL);
1191 vg_assert(r == 0);
1192 r = VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
1193 vg_assert(r == 0);
1194 VG_(debugLog)(1, "machine", "F %d V %d FX %d GX %d VX %d DFP %d ISA2.07 %d ISA3.0 %d\n",
1195 (Int)have_F, (Int)have_V, (Int)have_FX,
1196 (Int)have_GX, (Int)have_VX, (Int)have_DFP,
1197 (Int)have_isa_2_07, (Int)have_isa_3_0);
1198 /* Make FP a prerequisite for VMX (bogusly so), and for FX and GX. */
1199 if (have_V && !have_F)
1200 have_V = False;
1201 if (have_FX && !have_F)
1202 have_FX = False;
1203 if (have_GX && !have_F)
1204 have_GX = False;
1205
1206 VG_(machine_ppc32_has_FP) = have_F ? 1 : 0;
1207 VG_(machine_ppc32_has_VMX) = have_V ? 1 : 0;
1208
1209 va = VexArchPPC32;
1210 vai.endness = VexEndnessBE;
1211
1212 vai.hwcaps = 0;
1213 if (have_F) vai.hwcaps |= VEX_HWCAPS_PPC32_F;
1214 if (have_V) vai.hwcaps |= VEX_HWCAPS_PPC32_V;
1215 if (have_FX) vai.hwcaps |= VEX_HWCAPS_PPC32_FX;
1216 if (have_GX) vai.hwcaps |= VEX_HWCAPS_PPC32_GX;
1217 if (have_VX) vai.hwcaps |= VEX_HWCAPS_PPC32_VX;
1218 if (have_DFP) vai.hwcaps |= VEX_HWCAPS_PPC32_DFP;
1219 if (have_isa_2_07) vai.hwcaps |= VEX_HWCAPS_PPC32_ISA2_07;
1220 if (have_isa_3_0) vai.hwcaps |= VEX_HWCAPS_PPC32_ISA3_0;
1221
1222 VG_(machine_get_cache_info)(&vai);
1223
1224 /* But we're not done yet: VG_(machine_ppc32_set_clszB) must be
1225 called before we're ready to go. */
1226 return True;
1227 }
1228
1229 #elif defined(VGA_ppc64be)|| defined(VGA_ppc64le)
1230 {
1231 /* Same instruction set detection algorithm as for ppc32. */
1232 vki_sigset_t saved_set, tmp_set;
1233 vki_sigaction_fromK_t saved_sigill_act, saved_sigfpe_act;
1234 vki_sigaction_toK_t tmp_sigill_act, tmp_sigfpe_act;
1235
1236 volatile Bool have_F, have_V, have_FX, have_GX, have_VX, have_DFP;
1237 volatile Bool have_isa_2_07, have_isa_3_0;
1238 Int r;
1239
1240 /* This is a kludge. Really we ought to back-convert saved_act
1241 into a toK_t using VG_(convert_sigaction_fromK_to_toK), but
1242 since that's a no-op on all ppc64 platforms so far supported,
1243 it's not worth the typing effort. At least include most basic
1244 sanity check: */
1245 vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t));
1246
1247 VG_(sigemptyset)(&tmp_set);
1248 VG_(sigaddset)(&tmp_set, VKI_SIGILL);
1249 VG_(sigaddset)(&tmp_set, VKI_SIGFPE);
1250
1251 r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
1252 vg_assert(r == 0);
1253
1254 r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
1255 vg_assert(r == 0);
1256 tmp_sigill_act = saved_sigill_act;
1257
1258 VG_(sigaction)(VKI_SIGFPE, NULL, &saved_sigfpe_act);
1259 tmp_sigfpe_act = saved_sigfpe_act;
1260
1261 /* NODEFER: signal handler does not return (from the kernel's point of
1262 view), hence if it is to successfully catch a signal more than once,
1263 we need the NODEFER flag. */
1264 tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
1265 tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
1266 tmp_sigill_act.sa_flags |= VKI_SA_NODEFER;
1267 tmp_sigill_act.ksa_handler = handler_unsup_insn;
1268 VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
1269
1270 tmp_sigfpe_act.sa_flags &= ~VKI_SA_RESETHAND;
1271 tmp_sigfpe_act.sa_flags &= ~VKI_SA_SIGINFO;
1272 tmp_sigfpe_act.sa_flags |= VKI_SA_NODEFER;
1273 tmp_sigfpe_act.ksa_handler = handler_unsup_insn;
1274 VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL);
1275
1276 /* standard FP insns */
1277 have_F = True;
1278 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1279 have_F = False;
1280 } else {
1281 __asm__ __volatile__("fmr 0,0");
1282 }
1283
1284 /* Altivec insns */
1285 have_V = True;
1286 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1287 have_V = False;
1288 } else {
1289 __asm__ __volatile__(".long 0x10000484"); /*vor 0,0,0*/
1290 }
1291
1292 /* General-Purpose optional (fsqrt, fsqrts) */
1293 have_FX = True;
1294 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1295 have_FX = False;
1296 } else {
1297 __asm__ __volatile__(".long 0xFC00002C"); /*fsqrt 0,0*/
1298 }
1299
1300 /* Graphics optional (stfiwx, fres, frsqrte, fsel) */
1301 have_GX = True;
1302 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1303 have_GX = False;
1304 } else {
1305 __asm__ __volatile__(".long 0xFC000034"); /*frsqrte 0,0*/
1306 }
1307
1308 /* VSX support implies Power ISA 2.06 */
1309 have_VX = True;
1310 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1311 have_VX = False;
1312 } else {
1313 __asm__ __volatile__(".long 0xf0000564"); /* xsabsdp XT,XB */
1314 }
1315
1316 /* Check for Decimal Floating Point (DFP) support. */
1317 have_DFP = True;
1318 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1319 have_DFP = False;
1320 } else {
1321 __asm__ __volatile__(".long 0xee4e8005"); /* dadd FRT,FRA, FRB */
1322 }
1323
1324 /* Check for ISA 2.07 support. */
1325 have_isa_2_07 = True;
1326 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1327 have_isa_2_07 = False;
1328 } else {
1329 __asm__ __volatile__(".long 0x7c000166"); /* mtvsrd XT,RA */
1330 }
1331
1332 /* Check for ISA 3.0 support. */
1333 have_isa_3_0 = True;
1334 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1335 have_isa_3_0 = False;
1336 } else {
1337 __asm__ __volatile__(".long 0x7d205434"); /* cnttzw RT, RB */
1338 }
1339
1340 /* determine dcbz/dcbzl sizes while we still have the signal
1341 * handlers registered */
1342 find_ppc_dcbz_sz(&vai);
1343
1344 VG_(sigaction)(VKI_SIGILL, &saved_sigill_act, NULL);
1345 VG_(sigaction)(VKI_SIGFPE, &saved_sigfpe_act, NULL);
1346 VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
1347 VG_(debugLog)(1, "machine", "F %d V %d FX %d GX %d VX %d DFP %d ISA2.07 %d ISA3.0 %d\n",
1348 (Int)have_F, (Int)have_V, (Int)have_FX,
1349 (Int)have_GX, (Int)have_VX, (Int)have_DFP,
1350 (Int)have_isa_2_07, (int)have_isa_3_0);
1351 /* on ppc64be, if we don't even have FP, just give up. */
1352 if (!have_F)
1353 return False;
1354
1355 VG_(machine_ppc64_has_VMX) = have_V ? 1 : 0;
1356
1357 va = VexArchPPC64;
1358 # if defined(VKI_LITTLE_ENDIAN)
1359 vai.endness = VexEndnessLE;
1360 # elif defined(VKI_BIG_ENDIAN)
1361 vai.endness = VexEndnessBE;
1362 # else
1363 vai.endness = VexEndness_INVALID;
1364 # endif
1365
1366 vai.hwcaps = 0;
1367 if (have_V) vai.hwcaps |= VEX_HWCAPS_PPC64_V;
1368 if (have_FX) vai.hwcaps |= VEX_HWCAPS_PPC64_FX;
1369 if (have_GX) vai.hwcaps |= VEX_HWCAPS_PPC64_GX;
1370 if (have_VX) vai.hwcaps |= VEX_HWCAPS_PPC64_VX;
1371 if (have_DFP) vai.hwcaps |= VEX_HWCAPS_PPC64_DFP;
1372 if (have_isa_2_07) vai.hwcaps |= VEX_HWCAPS_PPC64_ISA2_07;
1373 if (have_isa_3_0) vai.hwcaps |= VEX_HWCAPS_PPC64_ISA3_0;
1374
1375 VG_(machine_get_cache_info)(&vai);
1376
1377 /* But we're not done yet: VG_(machine_ppc64_set_clszB) must be
1378 called before we're ready to go. */
1379 return True;
1380 }
1381
1382 #elif defined(VGA_s390x)
1383
1384 # include "libvex_s390x_common.h"
1385
1386 {
1387 /* Instruction set detection code borrowed from ppc above. */
1388 vki_sigset_t saved_set, tmp_set;
1389 vki_sigaction_fromK_t saved_sigill_act;
1390 vki_sigaction_toK_t tmp_sigill_act;
1391
1392 volatile Bool have_LDISP, have_STFLE;
1393 Int i, r, model;
1394
1395 /* If the model is "unknown" don't treat this as an error. Assume
1396 this is a brand-new machine model for which we don't have the
1397 identification yet. Keeping fingers crossed. */
1398 model = VG_(get_machine_model)();
1399
1400 /* Unblock SIGILL and stash away the old action for that signal */
1401 VG_(sigemptyset)(&tmp_set);
1402 VG_(sigaddset)(&tmp_set, VKI_SIGILL);
1403
1404 r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
1405 vg_assert(r == 0);
1406
1407 r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
1408 vg_assert(r == 0);
1409 tmp_sigill_act = saved_sigill_act;
1410
1411 /* NODEFER: signal handler does not return (from the kernel's point of
1412 view), hence if it is to successfully catch a signal more than once,
1413 we need the NODEFER flag. */
1414 tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
1415 tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
1416 tmp_sigill_act.sa_flags |= VKI_SA_NODEFER;
1417 tmp_sigill_act.ksa_handler = handler_unsup_insn;
1418 VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
1419
1420 /* Determine hwcaps. Note, we cannot use the stfle insn because it
1421 is not supported on z900. */
1422
1423 have_LDISP = True;
1424 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1425 have_LDISP = False;
1426 } else {
1427 /* BASR loads the address of the next insn into r1. Needed to avoid
1428 a segfault in XY. */
1429 __asm__ __volatile__("basr %%r1,%%r0\n\t"
1430 ".long 0xe3001000\n\t" /* XY 0,0(%r1) */
1431 ".short 0x0057" : : : "r0", "r1", "cc", "memory");
1432 }
1433
1434 /* Check availability of STFLE. If available store facility bits
1435 in hoststfle. */
1436 ULong hoststfle[S390_NUM_FACILITY_DW];
1437
1438 for (i = 0; i < S390_NUM_FACILITY_DW; ++i)
1439 hoststfle[i] = 0;
1440
1441 have_STFLE = True;
1442 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1443 have_STFLE = False;
1444 } else {
1445 register ULong reg0 asm("0") = S390_NUM_FACILITY_DW - 1;
1446
1447 __asm__ __volatile__(" .insn s,0xb2b00000,%0\n" /* stfle */
1448 : "=m" (hoststfle), "+d"(reg0)
1449 : : "cc", "memory");
1450 }
1451
1452 /* Restore signals */
1453 r = VG_(sigaction)(VKI_SIGILL, &saved_sigill_act, NULL);
1454 vg_assert(r == 0);
1455 r = VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
1456 vg_assert(r == 0);
1457 va = VexArchS390X;
1458 vai.endness = VexEndnessBE;
1459
1460 vai.hwcaps = model;
1461 if (have_STFLE) vai.hwcaps |= VEX_HWCAPS_S390X_STFLE;
1462 if (have_LDISP) {
1463 /* Use long displacement only on machines >= z990. For all other
1464 machines it is millicoded and therefore slow. */
1465 if (model >= VEX_S390X_MODEL_Z990)
1466 vai.hwcaps |= VEX_HWCAPS_S390X_LDISP;
1467 }
1468
1469 /* Detect presence of certain facilities using the STFLE insn.
1470 Note, that these facilities were introduced at the same time or later
1471 as STFLE, so the absence of STLFE implies the absence of the facility
1472 we're trying to detect. */
1473 struct fac_hwcaps_map {
1474 UInt installed;
1475 UInt facility_bit;
1476 UInt hwcaps_bit;
1477 const HChar name[6]; // may need adjustment for new facility names
1478 } fac_hwcaps[] = {
1479 { False, S390_FAC_EIMM, VEX_HWCAPS_S390X_EIMM, "EIMM" },
1480 { False, S390_FAC_GIE, VEX_HWCAPS_S390X_GIE, "GIE" },
1481 { False, S390_FAC_DFP, VEX_HWCAPS_S390X_DFP, "DFP" },
1482 { False, S390_FAC_FPSE, VEX_HWCAPS_S390X_FGX, "FGX" },
1483 { False, S390_FAC_ETF2, VEX_HWCAPS_S390X_ETF2, "ETF2" },
1484 { False, S390_FAC_ETF3, VEX_HWCAPS_S390X_ETF3, "ETF3" },
1485 { False, S390_FAC_STCKF, VEX_HWCAPS_S390X_STCKF, "STCKF" },
1486 { False, S390_FAC_FPEXT, VEX_HWCAPS_S390X_FPEXT, "FPEXT" },
1487 { False, S390_FAC_LSC, VEX_HWCAPS_S390X_LSC, "LSC" },
1488 { False, S390_FAC_PFPO, VEX_HWCAPS_S390X_PFPO, "PFPO" },
1489 };
1490
1491 /* Set hwcaps according to the detected facilities */
1492 for (i=0; i < sizeof fac_hwcaps / sizeof fac_hwcaps[0]; ++i) {
1493 vg_assert(fac_hwcaps[i].facility_bit <= 63); // for now
1494 if (hoststfle[0] & (1ULL << (63 - fac_hwcaps[i].facility_bit))) {
1495 fac_hwcaps[i].installed = True;
1496 vai.hwcaps |= fac_hwcaps[i].hwcaps_bit;
1497 }
1498 }
1499
1500 /* Build up a string showing the probed-for facilities */
1501 HChar fac_str[(sizeof fac_hwcaps / sizeof fac_hwcaps[0]) *
1502 (sizeof fac_hwcaps[0].name + 3) + // %s %d
1503 7 + 1 + 4 + 2 // machine %4d
1504 + 1]; // \0
1505 HChar *p = fac_str;
1506 p += VG_(sprintf)(p, "machine %4d ", model);
1507 for (i=0; i < sizeof fac_hwcaps / sizeof fac_hwcaps[0]; ++i) {
1508 p += VG_(sprintf)(p, " %s %1u", fac_hwcaps[i].name,
1509 fac_hwcaps[i].installed);
1510 }
1511 *p++ = '\0';
1512
1513 VG_(debugLog)(1, "machine", "%s\n", fac_str);
1514 VG_(debugLog)(1, "machine", "hwcaps = 0x%x\n", vai.hwcaps);
1515
1516 VG_(machine_get_cache_info)(&vai);
1517
1518 return True;
1519 }
1520
1521 #elif defined(VGA_arm)
1522 {
1523 /* Same instruction set detection algorithm as for ppc32. */
1524 vki_sigset_t saved_set, tmp_set;
1525 vki_sigaction_fromK_t saved_sigill_act, saved_sigfpe_act;
1526 vki_sigaction_toK_t tmp_sigill_act, tmp_sigfpe_act;
1527
1528 volatile Bool have_VFP, have_VFP2, have_VFP3, have_NEON, have_V8;
1529 volatile Int archlevel;
1530 Int r;
1531
1532 /* This is a kludge. Really we ought to back-convert saved_act
1533 into a toK_t using VG_(convert_sigaction_fromK_to_toK), but
1534 since that's a no-op on all ppc64 platforms so far supported,
1535 it's not worth the typing effort. At least include most basic
1536 sanity check: */
1537 vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t));
1538
1539 VG_(sigemptyset)(&tmp_set);
1540 VG_(sigaddset)(&tmp_set, VKI_SIGILL);
1541 VG_(sigaddset)(&tmp_set, VKI_SIGFPE);
1542
1543 r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
1544 vg_assert(r == 0);
1545
1546 r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
1547 vg_assert(r == 0);
1548 tmp_sigill_act = saved_sigill_act;
1549
1550 VG_(sigaction)(VKI_SIGFPE, NULL, &saved_sigfpe_act);
1551 tmp_sigfpe_act = saved_sigfpe_act;
1552
1553 /* NODEFER: signal handler does not return (from the kernel's point of
1554 view), hence if it is to successfully catch a signal more than once,
1555 we need the NODEFER flag. */
1556 tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
1557 tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
1558 tmp_sigill_act.sa_flags |= VKI_SA_NODEFER;
1559 tmp_sigill_act.ksa_handler = handler_unsup_insn;
1560 VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
1561
1562 tmp_sigfpe_act.sa_flags &= ~VKI_SA_RESETHAND;
1563 tmp_sigfpe_act.sa_flags &= ~VKI_SA_SIGINFO;
1564 tmp_sigfpe_act.sa_flags |= VKI_SA_NODEFER;
1565 tmp_sigfpe_act.ksa_handler = handler_unsup_insn;
1566 VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL);
1567
1568 /* VFP insns */
1569 have_VFP = True;
1570 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1571 have_VFP = False;
1572 } else {
1573 __asm__ __volatile__(".word 0xEEB02B42"); /* VMOV.F64 d2, d2 */
1574 }
1575 /* There are several generation of VFP extension but they differs very
1576 little so for now we will not distinguish them. */
1577 have_VFP2 = have_VFP;
1578 have_VFP3 = have_VFP;
1579
1580 /* NEON insns */
1581 have_NEON = True;
1582 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1583 have_NEON = False;
1584 } else {
1585 __asm__ __volatile__(".word 0xF2244154"); /* VMOV q2, q2 */
1586 }
1587
1588 /* ARM architecture level */
1589 archlevel = 5; /* v5 will be base level */
1590 if (archlevel < 7) {
1591 archlevel = 7;
1592 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1593 archlevel = 5;
1594 } else {
1595 __asm__ __volatile__(".word 0xF45FF000"); /* PLI [PC,#-0] */
1596 }
1597 }
1598 if (archlevel < 6) {
1599 archlevel = 6;
1600 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1601 archlevel = 5;
1602 } else {
1603 __asm__ __volatile__(".word 0xE6822012"); /* PKHBT r2, r2, r2 */
1604 }
1605 }
1606
1607 /* ARMv8 insns */
1608 have_V8 = True;
1609 if (archlevel == 7) {
1610 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1611 have_V8 = False;
1612 } else {
1613 __asm__ __volatile__(".word 0xF3044F54"); /* VMAXNM.F32 q2,q2,q2 */
1614 }
1615 if (have_V8 && have_NEON && have_VFP3) {
1616 archlevel = 8;
1617 }
1618 }
1619
1620 VG_(convert_sigaction_fromK_to_toK)(&saved_sigill_act, &tmp_sigill_act);
1621 VG_(convert_sigaction_fromK_to_toK)(&saved_sigfpe_act, &tmp_sigfpe_act);
1622 VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
1623 VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL);
1624 VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
1625
1626 VG_(debugLog)(1, "machine", "ARMv%d VFP %d VFP2 %d VFP3 %d NEON %d\n",
1627 archlevel, (Int)have_VFP, (Int)have_VFP2, (Int)have_VFP3,
1628 (Int)have_NEON);
1629
1630 VG_(machine_arm_archlevel) = archlevel;
1631
1632 va = VexArchARM;
1633 vai.endness = VexEndnessLE;
1634
1635 vai.hwcaps = VEX_ARM_ARCHLEVEL(archlevel);
1636 if (have_VFP3) vai.hwcaps |= VEX_HWCAPS_ARM_VFP3;
1637 if (have_VFP2) vai.hwcaps |= VEX_HWCAPS_ARM_VFP2;
1638 if (have_VFP) vai.hwcaps |= VEX_HWCAPS_ARM_VFP;
1639 if (have_NEON) vai.hwcaps |= VEX_HWCAPS_ARM_NEON;
1640
1641 VG_(machine_get_cache_info)(&vai);
1642
1643 return True;
1644 }
1645
1646 #elif defined(VGA_arm64)
1647 {
1648 va = VexArchARM64;
1649 vai.endness = VexEndnessLE;
1650
1651 /* So far there are no variants. */
1652 vai.hwcaps = 0;
1653
1654 VG_(machine_get_cache_info)(&vai);
1655
1656 /* Check whether we need to use the fallback LLSC implementation.
1657 If the check fails, give up. */
1658 if (! VG_(parse_cpuinfo)())
1659 return False;
1660
1661 /* 0 denotes 'not set'. The range of legitimate values here,
1662 after being set that is, is 2 though 17 inclusive. */
1663 vg_assert(vai.arm64_dMinLine_lg2_szB == 0);
1664 vg_assert(vai.arm64_iMinLine_lg2_szB == 0);
1665 ULong ctr_el0;
1666 __asm__ __volatile__("mrs %0, ctr_el0" : "=r"(ctr_el0));
1667 vai.arm64_dMinLine_lg2_szB = ((ctr_el0 >> 16) & 0xF) + 2;
1668 vai.arm64_iMinLine_lg2_szB = ((ctr_el0 >> 0) & 0xF) + 2;
1669 VG_(debugLog)(1, "machine", "ARM64: ctr_el0.dMinLine_szB = %d, "
1670 "ctr_el0.iMinLine_szB = %d\n",
1671 1 << vai.arm64_dMinLine_lg2_szB,
1672 1 << vai.arm64_iMinLine_lg2_szB);
1673 VG_(debugLog)(1, "machine", "ARM64: requires_fallback_LLSC: %s\n",
1674 vai.arm64_requires_fallback_LLSC ? "yes" : "no");
1675
1676 return True;
1677 }
1678
1679 #elif defined(VGA_mips32)
1680 {
1681 /* Define the position of F64 bit in FIR register. */
1682 # define FP64 22
1683 va = VexArchMIPS32;
1684 if (!VG_(parse_cpuinfo)())
1685 return False;
1686
1687 # if defined(VKI_LITTLE_ENDIAN)
1688 vai.endness = VexEndnessLE;
1689 # elif defined(VKI_BIG_ENDIAN)
1690 vai.endness = VexEndnessBE;
1691 # else
1692 vai.endness = VexEndness_INVALID;
1693 # endif
1694
1695 /* Same instruction set detection algorithm as for ppc32/arm... */
1696 vki_sigset_t saved_set, tmp_set;
1697 vki_sigaction_fromK_t saved_sigill_act;
1698 vki_sigaction_toK_t tmp_sigill_act;
1699
1700 volatile Bool have_DSP, have_DSPr2;
1701 Int r;
1702
1703 vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t));
1704
1705 VG_(sigemptyset)(&tmp_set);
1706 VG_(sigaddset)(&tmp_set, VKI_SIGILL);
1707
1708 r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
1709 vg_assert(r == 0);
1710
1711 r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
1712 vg_assert(r == 0);
1713 tmp_sigill_act = saved_sigill_act;
1714
1715 /* NODEFER: signal handler does not return (from the kernel's point of
1716 view), hence if it is to successfully catch a signal more than once,
1717 we need the NODEFER flag. */
1718 tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
1719 tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
1720 tmp_sigill_act.sa_flags |= VKI_SA_NODEFER;
1721 tmp_sigill_act.ksa_handler = handler_unsup_insn;
1722 VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
1723
1724 if (VEX_PRID_COMP_MIPS == VEX_MIPS_COMP_ID(vai.hwcaps)) {
1725 /* DSPr2 instructions. */
1726 have_DSPr2 = True;
1727 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1728 have_DSPr2 = False;
1729 } else {
1730 __asm__ __volatile__(".word 0x7d095351"); /* precr.qb.ph t2, t0, t1 */
1731 }
1732 if (have_DSPr2) {
1733 /* We assume it's 74K, since it can run DSPr2. */
1734 vai.hwcaps |= VEX_PRID_IMP_74K;
1735 } else {
1736 /* DSP instructions. */
1737 have_DSP = True;
1738 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1739 have_DSP = False;
1740 } else {
1741 __asm__ __volatile__(".word 0x7c3f44b8"); /* rddsp t0, 0x3f */
1742 }
1743 if (have_DSP) {
1744 /* We assume it's 34K, since it has support for DSP. */
1745 vai.hwcaps |= VEX_PRID_IMP_34K;
1746 }
1747 }
1748 }
1749
1750 # if defined(VGP_mips32_linux)
1751 Int fpmode = VG_(prctl)(VKI_PR_GET_FP_MODE, 0, 0, 0, 0);
1752 # else
1753 Int fpmode = -1;
1754 # endif
1755
1756 if (fpmode < 0) {
1757 /* prctl(PR_GET_FP_MODE) is not supported by Kernel,
1758 we are using alternative way to determine FP mode */
1759 ULong result = 0;
1760
1761 if (!VG_MINIMAL_SETJMP(env_unsup_insn)) {
1762 __asm__ volatile (
1763 ".set push\n\t"
1764 ".set noreorder\n\t"
1765 ".set oddspreg\n\t"
1766 ".set hardfloat\n\t"
1767 "lui $t0, 0x3FF0\n\t"
1768 "ldc1 $f0, %0\n\t"
1769 "mtc1 $t0, $f1\n\t"
1770 "sdc1 $f0, %0\n\t"
1771 ".set pop\n\t"
1772 : "+m"(result)
1773 :
1774 : "t0", "$f0", "$f1", "memory");
1775
1776 fpmode = (result != 0x3FF0000000000000ull);
1777 }
1778 }
1779
1780 if (fpmode != 0)
1781 vai.hwcaps |= VEX_MIPS_HOST_FR;
1782
1783 VG_(convert_sigaction_fromK_to_toK)(&saved_sigill_act, &tmp_sigill_act);
1784 VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
1785 VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
1786
1787 VG_(debugLog)(1, "machine", "hwcaps = 0x%x\n", vai.hwcaps);
1788 VG_(machine_get_cache_info)(&vai);
1789
1790 return True;
1791 }
1792
1793 #elif defined(VGA_mips64)
1794 {
1795 va = VexArchMIPS64;
1796 if (!VG_(parse_cpuinfo)())
1797 return False;
1798
1799 # if defined(VKI_LITTLE_ENDIAN)
1800 vai.endness = VexEndnessLE;
1801 # elif defined(VKI_BIG_ENDIAN)
1802 vai.endness = VexEndnessBE;
1803 # else
1804 vai.endness = VexEndness_INVALID;
1805 # endif
1806
1807 vai.hwcaps |= VEX_MIPS_HOST_FR;
1808
1809 VG_(machine_get_cache_info)(&vai);
1810
1811 return True;
1812 }
1813
1814 #else
1815 # error "Unknown arch"
1816 #endif
1817 }
1818
1819 /* Notify host cpu instruction cache line size. */
1820 #if defined(VGA_ppc32)
VG_(machine_ppc32_set_clszB)1821 void VG_(machine_ppc32_set_clszB)( Int szB )
1822 {
1823 vg_assert(hwcaps_done);
1824
1825 /* Either the value must not have been set yet (zero) or we can
1826 tolerate it being set to the same value multiple times, as the
1827 stack scanning logic in m_main is a bit stupid. */
1828 vg_assert(vai.ppc_icache_line_szB == 0
1829 || vai.ppc_icache_line_szB == szB);
1830
1831 vg_assert(szB == 16 || szB == 32 || szB == 64 || szB == 128);
1832 vai.ppc_icache_line_szB = szB;
1833 }
1834 #endif
1835
1836
1837 /* Notify host cpu instruction cache line size. */
1838 #if defined(VGA_ppc64be)|| defined(VGA_ppc64le)
VG_(machine_ppc64_set_clszB)1839 void VG_(machine_ppc64_set_clszB)( Int szB )
1840 {
1841 vg_assert(hwcaps_done);
1842
1843 /* Either the value must not have been set yet (zero) or we can
1844 tolerate it being set to the same value multiple times, as the
1845 stack scanning logic in m_main is a bit stupid. */
1846 vg_assert(vai.ppc_icache_line_szB == 0
1847 || vai.ppc_icache_line_szB == szB);
1848
1849 vg_assert(szB == 16 || szB == 32 || szB == 64 || szB == 128);
1850 vai.ppc_icache_line_szB = szB;
1851 }
1852 #endif
1853
1854
1855 /* Notify host's ability to handle NEON instructions. */
1856 #if defined(VGA_arm)
VG_(machine_arm_set_has_NEON)1857 void VG_(machine_arm_set_has_NEON)( Bool has_neon )
1858 {
1859 vg_assert(hwcaps_done);
1860 /* There's nothing else we can sanity check. */
1861
1862 if (has_neon) {
1863 vai.hwcaps |= VEX_HWCAPS_ARM_NEON;
1864 } else {
1865 vai.hwcaps &= ~VEX_HWCAPS_ARM_NEON;
1866 }
1867 }
1868 #endif
1869
1870
1871 /* Fetch host cpu info, once established. */
VG_(machine_get_VexArchInfo)1872 void VG_(machine_get_VexArchInfo)( /*OUT*/VexArch* pVa,
1873 /*OUT*/VexArchInfo* pVai )
1874 {
1875 vg_assert(hwcaps_done);
1876 if (pVa) *pVa = va;
1877 if (pVai) *pVai = vai;
1878 }
1879
1880
1881 /* Returns the size of the largest guest register that we will
1882 simulate in this run. This depends on both the guest architecture
1883 and on the specific capabilities we are simulating for that guest
1884 (eg, AVX or non-AVX ?, for amd64). Should return either 4, 8, 16
1885 or 32. General rule: if in doubt, return a value larger than
1886 reality.
1887
1888 This information is needed by Cachegrind and Callgrind to decide
1889 what the minimum cache line size they are prepared to simulate is.
1890 Basically require that the minimum cache line size is at least as
1891 large as the largest register that might get transferred to/from
1892 memory, so as to guarantee that any such transaction can straddle
1893 at most 2 cache lines.
1894 */
VG_(machine_get_size_of_largest_guest_register)1895 Int VG_(machine_get_size_of_largest_guest_register) ( void )
1896 {
1897 vg_assert(hwcaps_done);
1898 /* Once hwcaps_done is True, we can fish around inside va/vai to
1899 find the information we need. */
1900
1901 # if defined(VGA_x86)
1902 vg_assert(va == VexArchX86);
1903 /* We don't support AVX, so 32 is out. At the other end, even if
1904 we don't support any SSE, the X87 can generate 10 byte
1905 transfers, so let's say 16 to be on the safe side. Hence the
1906 answer is always 16. */
1907 return 16;
1908
1909 # elif defined(VGA_amd64)
1910 /* if AVX then 32 else 16 */
1911 return (vai.hwcaps & VEX_HWCAPS_AMD64_AVX) ? 32 : 16;
1912
1913 # elif defined(VGA_ppc32)
1914 /* 8 if boring; 16 if signs of Altivec or other exotic stuff */
1915 if (vai.hwcaps & VEX_HWCAPS_PPC32_V) return 16;
1916 if (vai.hwcaps & VEX_HWCAPS_PPC32_VX) return 16;
1917 if (vai.hwcaps & VEX_HWCAPS_PPC32_DFP) return 16;
1918 return 8;
1919
1920 # elif defined(VGA_ppc64be) || defined(VGA_ppc64le)
1921 /* 8 if boring; 16 if signs of Altivec or other exotic stuff */
1922 if (vai.hwcaps & VEX_HWCAPS_PPC64_V) return 16;
1923 if (vai.hwcaps & VEX_HWCAPS_PPC64_VX) return 16;
1924 if (vai.hwcaps & VEX_HWCAPS_PPC64_DFP) return 16;
1925 return 8;
1926
1927 # elif defined(VGA_s390x)
1928 return 8;
1929
1930 # elif defined(VGA_arm)
1931 /* Really it depends whether or not we have NEON, but let's just
1932 assume we always do. */
1933 return 16;
1934
1935 # elif defined(VGA_arm64)
1936 /* ARM64 always has Neon, AFAICS. */
1937 return 16;
1938
1939 # elif defined(VGA_mips32)
1940 /* The guest state implies 4, but that can't really be true, can
1941 it? */
1942 return 8;
1943
1944 # elif defined(VGA_mips64)
1945 return 8;
1946
1947 # else
1948 # error "Unknown arch"
1949 # endif
1950 }
1951
1952
1953 // Given a pointer to a function as obtained by "& functionname" in C,
1954 // produce a pointer to the actual entry point for the function.
VG_(fnptr_to_fnentry)1955 void* VG_(fnptr_to_fnentry)( void* f )
1956 {
1957 # if defined(VGP_x86_linux) || defined(VGP_amd64_linux) \
1958 || defined(VGP_arm_linux) || defined(VGO_darwin) \
1959 || defined(VGP_ppc32_linux) || defined(VGP_ppc64le_linux) \
1960 || defined(VGP_s390x_linux) || defined(VGP_mips32_linux) \
1961 || defined(VGP_mips64_linux) || defined(VGP_arm64_linux) \
1962 || defined(VGP_x86_solaris) || defined(VGP_amd64_solaris)
1963 return f;
1964 # elif defined(VGP_ppc64be_linux)
1965 /* ppc64-linux uses the AIX scheme, in which f is a pointer to a
1966 3-word function descriptor, of which the first word is the entry
1967 address. */
1968 UWord* descr = (UWord*)f;
1969 return (void*)(descr[0]);
1970 # else
1971 # error "Unknown platform"
1972 # endif
1973 }
1974
1975 /*--------------------------------------------------------------------*/
1976 /*--- end ---*/
1977 /*--------------------------------------------------------------------*/
1978