1
2 /* HOW TO USE
3
4 13 Dec '05 - Linker no longer used (apart from mymalloc)
5 Simply compile and link switchback.c with test_xxx.c,
6 e.g. for ppc64:
7 $ (cd .. && make EXTRA_CFLAGS="-m64" libvex_ppc64_linux.a) && gcc -m64 -mregnames -Wall -Wshadow -Wno-long-long -Winline -O -g -o switchback switchback.c linker.c ../libvex_ppc64_linux.a test_xxx.c
8
9 Test file test_xxx.c must have an entry point called "entry",
10 which expects to take a single argument which is a function pointer
11 (to "serviceFn").
12
13 Test file may not reference any other symbols.
14
15 NOTE: POWERPC: it is critical, when using this on ppc, to set
16 CacheLineSize to the right value. Values we currently know of:
17
18 imac (G3): 32
19 G5 (ppc970): 128
20 */
21
22 #include <stdio.h>
23 #include <assert.h>
24 #include <stdlib.h>
25 #include <sys/types.h>
26 #include <sys/stat.h>
27 #include <unistd.h>
28
29 #include "../pub/libvex_basictypes.h"
30 #include "../pub/libvex_guest_x86.h"
31 #include "../pub/libvex_guest_amd64.h"
32 #include "../pub/libvex_guest_ppc32.h"
33 #include "../pub/libvex_guest_ppc64.h"
34 #include "../pub/libvex.h"
35 #include "../pub/libvex_trc_values.h"
36 #include "linker.h"
37
38 static ULong n_bbs_done = 0;
39 static Int n_translations_made = 0;
40
41
42 #if defined(__i386__)
43 # define VexGuestState VexGuestX86State
44 # define LibVEX_Guest_initialise LibVEX_GuestX86_initialise
45 # define VexArch VexArchX86
46 # define VexSubArch VexSubArchX86_sse1
47 # define GuestPC guest_EIP
48 # define CacheLineSize 0/*irrelevant*/
49 #elif defined(__x86_64__)
50 # define VexGuestState VexGuestAMD64State
51 # define LibVEX_Guest_initialise LibVEX_GuestAMD64_initialise
52 # define VexArch VexArchAMD64
53 # define VexSubArch VexSubArch_NONE
54 # define GuestPC guest_RIP
55 # define CacheLineSize 0/*irrelevant*/
56 #elif defined(__powerpc__)
57
58 #if !defined(__powerpc64__) // ppc32
59 # define VexGuestState VexGuestPPC32State
60 # define LibVEX_Guest_initialise LibVEX_GuestPPC32_initialise
61 # define VexArch VexArchPPC32
62 # define VexSubArch VexSubArchPPC32_FI
63 # define GuestPC guest_CIA
64 # define CacheLineSize 128
65 #else
66 # define VexGuestState VexGuestPPC64State
67 # define LibVEX_Guest_initialise LibVEX_GuestPPC64_initialise
68 # define VexArch VexArchPPC64
69 # define VexSubArch VexSubArchPPC64_FI
70 # define GuestPC guest_CIA
71 # define CacheLineSize 128
72 #endif
73
74 #else
75 # error "Unknown arch"
76 #endif
77
78 /* 7: show conversion into IR */
79 /* 6: show after initial opt */
80 /* 5: show after instrumentation */
81 /* 4: show after second opt */
82 /* 3: show after tree building */
83 /* 2: show selected insns */
84 /* 1: show after reg-alloc */
85 /* 0: show final assembly */
86 #define TEST_FLAGS (1<<7)|(1<<3)|(1<<2)|(1<<1)|(1<<0)
87 #define DEBUG_TRACE_FLAGS 0//(1<<7)|(0<<6)|(0<<5)|(0<<4)|(1<<3)|(1<<2)|(1<<1)|(1<<0)
88
89
90 /* guest state */
91 UInt gstack[50000];
92 VexGuestState gst;
93 VexControl vcon;
94
95 /* only used for the switchback transition */
96 /* i386: helper1 = &gst, helper2 = %EFLAGS */
97 /* amd64: helper1 = &gst, helper2 = %EFLAGS */
98 /* ppc32: helper1 = &gst, helper2 = %CR, helper3 = %XER */
99 HWord sb_helper1 = 0;
100 HWord sb_helper2 = 0;
101 HWord sb_helper3 = 0;
102
103 /* translation cache */
104 #define N_TRANS_CACHE 1000000
105 #define N_TRANS_TABLE 10000
106
107 ULong trans_cache[N_TRANS_CACHE];
108 VexGuestExtents trans_table [N_TRANS_TABLE];
109 ULong* trans_tableP[N_TRANS_TABLE];
110
111 Int trans_cache_used = 0;
112 Int trans_table_used = 0;
113
chase_into_ok(Addr64 dst)114 static Bool chase_into_ok ( Addr64 dst ) { return False; }
115
116 #if 0
117 // local_sys_write_stderr(&c,1);
118 static void local_sys_write_stderr ( HChar* buf, Int n )
119 {
120 UInt __res;
121 __asm__ volatile (
122 "li %%r0,4\n\t" /* set %r0 = __NR_write */
123 "li %%r3,1\n\t" /* set %r3 = stdout */
124 "mr %%r4,%1\n\t" /* set %r4 = buf */
125 "mr %%r5,%2\n\t" /* set %r5 = n */
126 "sc\n\t" /* write(stderr, buf, n) */
127 "mr %0,%%r3\n" /* set __res = r3 */
128 : "=mr" (__res)
129 : "g" (buf), "g" (n)
130 : "r0", "r3", "r4", "r5" );
131 }
132 #endif
133
134 /* For providing services. */
serviceFn(HWord arg1,HWord arg2)135 static HWord serviceFn ( HWord arg1, HWord arg2 )
136 {
137 switch (arg1) {
138 case 0: /* EXIT */
139 printf("---STOP---\n");
140 printf("serviceFn:EXIT\n");
141 printf("%llu bbs simulated\n", n_bbs_done);
142 printf("%d translations made, %d tt bytes\n",
143 n_translations_made, 8*trans_cache_used);
144 exit(0);
145 case 1: /* PUTC */
146 putchar(arg2);
147 return 0;
148 case 2: /* MALLOC */
149 return (HWord)malloc(arg2);
150 case 3: /* FREE */
151 free((void*)arg2);
152 return 0;
153 default:
154 assert(0);
155 }
156 }
157
158
159 /* -------------------- */
160 /* continue execution on the real CPU (never returns) */
161 extern void switchback_asm(void);
162
163 #if defined(__i386__)
164
165 asm(
166 "switchback_asm:\n"
167 " movl sb_helper1, %eax\n" // eax = guest state ptr
168 " movl 16(%eax), %esp\n" // switch stacks
169 " pushl 56(%eax)\n" // push continuation addr
170 " movl sb_helper2, %ebx\n" // get eflags
171 " pushl %ebx\n" // eflags:CA
172 " pushl 0(%eax)\n" // EAX:eflags:CA
173 " movl 4(%eax), %ecx\n"
174 " movl 8(%eax), %edx\n"
175 " movl 12(%eax), %ebx\n"
176 " movl 20(%eax), %ebp\n"
177 " movl 24(%eax), %esi\n"
178 " movl 28(%eax), %edi\n"
179 " popl %eax\n"
180 " popfl\n"
181 " ret\n"
182 );
switchback(void)183 void switchback ( void )
184 {
185 sb_helper1 = (HWord)&gst;
186 sb_helper2 = LibVEX_GuestX86_get_eflags(&gst);
187 switchback_asm(); // never returns
188 }
189
190 #elif defined(__x86_64__)
191
192 asm(
193 "switchback_asm:\n"
194 " movq sb_helper1, %rax\n" // rax = guest state ptr
195 " movq 32(%rax), %rsp\n" // switch stacks
196 " pushq 168(%rax)\n" // push continuation addr
197 " movq sb_helper2, %rbx\n" // get eflags
198 " pushq %rbx\n" // eflags:CA
199 " pushq 0(%rax)\n" // RAX:eflags:CA
200 " movq 8(%rax), %rcx\n"
201 " movq 16(%rax), %rdx\n"
202 " movq 24(%rax), %rbx\n"
203 " movq 40(%rax), %rbp\n"
204 " movq 48(%rax), %rsi\n"
205 " movq 56(%rax), %rdi\n"
206
207 " movq 64(%rax), %r8\n"
208 " movq 72(%rax), %r9\n"
209 " movq 80(%rax), %r10\n"
210 " movq 88(%rax), %r11\n"
211 " movq 96(%rax), %r12\n"
212 " movq 104(%rax), %r13\n"
213 " movq 112(%rax), %r14\n"
214 " movq 120(%rax), %r15\n"
215
216 " popq %rax\n"
217 " popfq\n"
218 " ret\n"
219 );
switchback(void)220 void switchback ( void )
221 {
222 sb_helper1 = (HWord)&gst;
223 sb_helper2 = LibVEX_GuestAMD64_get_rflags(&gst);
224 switchback_asm(); // never returns
225 }
226
227 #elif defined(__powerpc__)
228
invalidate_icache(void * ptr,int nbytes)229 static void invalidate_icache(void *ptr, int nbytes)
230 {
231 unsigned long startaddr = (unsigned long) ptr;
232 unsigned long endaddr = startaddr + nbytes;
233 unsigned long addr;
234 unsigned long cls = CacheLineSize;
235
236 startaddr &= ~(cls - 1);
237 for (addr = startaddr; addr < endaddr; addr += cls)
238 asm volatile("dcbst 0,%0" : : "r" (addr));
239 asm volatile("sync");
240 for (addr = startaddr; addr < endaddr; addr += cls)
241 asm volatile("icbi 0,%0" : : "r" (addr));
242 asm volatile("sync; isync");
243 }
244
245
246 #if !defined(__powerpc64__) // ppc32
247 asm(
248 "switchback_asm:\n"
249 // gst
250 " lis %r31,sb_helper1@ha\n" // get hi-wd of guest_state_ptr addr
251 " lwz %r31,sb_helper1@l(%r31)\n" // load word of guest_state_ptr to r31
252
253 // LR
254 " lwz %r3,900(%r31)\n" // guest_LR
255 " mtlr %r3\n" // move to LR
256
257 // CR
258 " lis %r3,sb_helper2@ha\n" // get hi-wd of flags addr
259 " lwz %r3,sb_helper2@l(%r3)\n" // load flags word to r3
260 " mtcr %r3\n" // move r3 to CR
261
262 // CTR
263 " lwz %r3,904(%r31)\n" // guest_CTR
264 " mtctr %r3\n" // move r3 to CTR
265
266 // XER
267 " lis %r3,sb_helper3@ha\n" // get hi-wd of xer addr
268 " lwz %r3,sb_helper3@l(%r3)\n" // load xer word to r3
269 " mtxer %r3\n" // move r3 to XER
270
271
272 // GPR's
273 " lwz %r0, 0(%r31)\n"
274 " lwz %r1, 4(%r31)\n" // switch stacks (r1 = SP)
275 " lwz %r2, 8(%r31)\n"
276 " lwz %r3, 12(%r31)\n"
277 " lwz %r4, 16(%r31)\n"
278 " lwz %r5, 20(%r31)\n"
279 " lwz %r6, 24(%r31)\n"
280 " lwz %r7, 28(%r31)\n"
281 " lwz %r8, 32(%r31)\n"
282 " lwz %r9, 36(%r31)\n"
283 " lwz %r10, 40(%r31)\n"
284 " lwz %r11, 44(%r31)\n"
285 " lwz %r12, 48(%r31)\n"
286 " lwz %r13, 52(%r31)\n"
287 " lwz %r14, 56(%r31)\n"
288 " lwz %r15, 60(%r31)\n"
289 " lwz %r16, 64(%r31)\n"
290 " lwz %r17, 68(%r31)\n"
291 " lwz %r18, 72(%r31)\n"
292 " lwz %r19, 76(%r31)\n"
293 " lwz %r20, 80(%r31)\n"
294 " lwz %r21, 84(%r31)\n"
295 " lwz %r22, 88(%r31)\n"
296 " lwz %r23, 92(%r31)\n"
297 " lwz %r24, 96(%r31)\n"
298 " lwz %r25, 100(%r31)\n"
299 " lwz %r26, 104(%r31)\n"
300 " lwz %r27, 108(%r31)\n"
301 " lwz %r28, 112(%r31)\n"
302 " lwz %r29, 116(%r31)\n"
303 " lwz %r30, 120(%r31)\n"
304 " lwz %r31, 124(%r31)\n"
305 "nop_start_point:\n"
306 " nop\n"
307 " nop\n"
308 " nop\n"
309 " nop\n"
310 " nop\n"
311 "nop_end_point:\n"
312 );
313
314 #else // ppc64
315
316 asm(
317 ".text\n"
318 " .global switchback_asm\n"
319 " .section \".opd\",\"aw\"\n"
320 " .align 3\n"
321 "switchback_asm:\n"
322 " .quad .switchback_asm,.TOC.@tocbase,0\n"
323 " .previous\n"
324 " .type .switchback_asm,@function\n"
325 " .global .switchback_asm\n"
326 ".switchback_asm:\n"
327 "switchback_asm_undotted:\n"
328
329 // gst: load word of guest_state_ptr to r31
330 " lis %r31,sb_helper1@highest\n"
331 " ori %r31,%r31,sb_helper1@higher\n"
332 " rldicr %r31,%r31,32,31\n"
333 " oris %r31,%r31,sb_helper1@h\n"
334 " ori %r31,%r31,sb_helper1@l\n"
335 " ld %r31,0(%r31)\n"
336
337
338 // LR
339 " ld %r3,1032(%r31)\n" // guest_LR
340 " mtlr %r3\n" // move to LR
341
342 // CR
343 " lis %r3,sb_helper2@highest\n"
344 " ori %r3,%r3,sb_helper2@higher\n"
345 " rldicr %r3,%r3,32,31\n"
346 " oris %r3,%r3,sb_helper2@h\n"
347 " ori %r3,%r3,sb_helper2@l\n"
348 " ld %r3,0(%r3)\n" // load flags word to r3
349 " mtcr %r3\n" // move r3 to CR
350
351 // CTR
352 " ld %r3,1040(%r31)\n" // guest_CTR
353 " mtctr %r3\n" // move r3 to CTR
354
355 // XER
356 " lis %r3,sb_helper3@highest\n"
357 " ori %r3,%r3,sb_helper3@higher\n"
358 " rldicr %r3,%r3,32,31\n"
359 " oris %r3,%r3,sb_helper3@h\n"
360 " ori %r3,%r3,sb_helper3@l\n"
361 " ld %r3,0(%r3)\n" // load xer word to r3
362 " mtxer %r3\n" // move r3 to XER
363
364 // GPR's
365 " ld %r0, 0(%r31)\n"
366 " ld %r1, 8(%r31)\n" // switch stacks (r1 = SP)
367 " ld %r2, 16(%r31)\n"
368 " ld %r3, 24(%r31)\n"
369 " ld %r4, 32(%r31)\n"
370 " ld %r5, 40(%r31)\n"
371 " ld %r6, 48(%r31)\n"
372 " ld %r7, 56(%r31)\n"
373 " ld %r8, 64(%r31)\n"
374 " ld %r9, 72(%r31)\n"
375 " ld %r10, 80(%r31)\n"
376 " ld %r11, 88(%r31)\n"
377 " ld %r12, 96(%r31)\n"
378 " ld %r13, 104(%r31)\n"
379 " ld %r14, 112(%r31)\n"
380 " ld %r15, 120(%r31)\n"
381 " ld %r16, 128(%r31)\n"
382 " ld %r17, 136(%r31)\n"
383 " ld %r18, 144(%r31)\n"
384 " ld %r19, 152(%r31)\n"
385 " ld %r20, 160(%r31)\n"
386 " ld %r21, 168(%r31)\n"
387 " ld %r22, 176(%r31)\n"
388 " ld %r23, 184(%r31)\n"
389 " ld %r24, 192(%r31)\n"
390 " ld %r25, 200(%r31)\n"
391 " ld %r26, 208(%r31)\n"
392 " ld %r27, 216(%r31)\n"
393 " ld %r28, 224(%r31)\n"
394 " ld %r29, 232(%r31)\n"
395 " ld %r30, 240(%r31)\n"
396 " ld %r31, 248(%r31)\n"
397 "nop_start_point:\n"
398 " nop\n"
399 " nop\n"
400 " nop\n"
401 " nop\n"
402 " nop\n"
403 "nop_end_point:\n"
404 );
405 #endif
406
407 extern void switchback_asm_undotted;
408 extern void nop_start_point;
409 extern void nop_end_point;
switchback(void)410 void switchback ( void )
411 {
412 Int i;
413 /* blargh. Copy the entire switchback_asm procedure into new
414 memory on which can can set both write and execute permissions,
415 so we can poke around with it and then run the results. */
416
417 #if defined(__powerpc64__) // ppc32
418 UChar* sa_start = (UChar*)&switchback_asm_undotted;
419 #else
420 UChar* sa_start = (UChar*)&switchback_asm;
421 #endif
422 UChar* sa_nop_start = (UChar*)&nop_start_point;
423 UChar* sa_end = (UChar*)&nop_end_point;
424
425 #if 0
426 printf("sa_start %p\n", sa_start );
427 printf("sa_nop_start %p\n", sa_nop_start);
428 printf("sa_end %p\n", sa_end);
429 #endif
430 Int nbytes = sa_end - sa_start;
431 Int off_nopstart = sa_nop_start - sa_start;
432 if (0)
433 printf("nbytes = %d, nopstart = %d\n", nbytes, off_nopstart);
434
435 /* copy it into mallocville */
436 UChar* copy = mymalloc(nbytes);
437 assert(copy);
438 for (i = 0; i < nbytes; i++)
439 copy[i] = sa_start[i];
440
441 UInt* p = (UInt*)(©[off_nopstart]);
442
443 #if !defined(__powerpc64__) // ppc32
444 Addr32 addr_of_nop = (Addr32)p;
445 Addr32 where_to_go = gst.guest_CIA;
446 Int diff = ((Int)where_to_go) - ((Int)addr_of_nop);
447
448 #if 0
449 printf("addr of first nop = 0x%x\n", addr_of_nop);
450 printf("where to go = 0x%x\n", where_to_go);
451 printf("diff = 0x%x\n", diff);
452 #endif
453
454 #else // ppc64
455 Addr64 addr_of_nop = (Addr64)p;
456 Addr64 where_to_go = gst.guest_CIA;
457 Long diff = ((Long)where_to_go) - ((Long)addr_of_nop);
458
459 #if 0
460 printf("addr of first nop = 0x%llx\n", addr_of_nop);
461 printf("where to go = 0x%llx\n", where_to_go);
462 printf("diff = 0x%llx\n", diff);
463 #endif
464 #endif
465
466 if (diff < -0x2000000 || diff >= 0x2000000) {
467 // we're hosed. Give up
468 printf("hosed -- offset too large\n");
469 assert(0);
470 }
471
472 sb_helper1 = (HWord)&gst;
473 #if !defined(__powerpc64__) // ppc32
474 sb_helper2 = LibVEX_GuestPPC32_get_CR(&gst);
475 sb_helper3 = LibVEX_GuestPPC32_get_XER(&gst);
476 #else // ppc64
477 sb_helper2 = LibVEX_GuestPPC64_get_CR(&gst);
478 sb_helper3 = LibVEX_GuestPPC64_get_XER(&gst);
479 #endif
480
481 /* stay sane ... */
482 assert(p[0] == 24<<26); /* nop */
483
484 /* branch to diff */
485 p[0] = ((18<<26) | (((diff >> 2) & 0xFFFFFF) << 2) | (0<<1) | (0<<0));
486
487 invalidate_icache( copy, nbytes );
488
489 #if defined(__powerpc64__)
490 //printf("jumping to %p\n", copy);
491 { ULong faketoc[3];
492 void* v;
493 faketoc[0] = (ULong)copy;
494 v = &faketoc[0];
495 ( (void(*)(void)) v )();
496 }
497 #else
498 ( (void(*)(void))copy )();
499 #endif
500 }
501
502 #else
503 # error "Unknown arch (switchback)"
504 #endif
505
506 /* -------------------- */
507 static HWord f, gp, res;
508 extern void run_translation_asm(void);
509
510 #if defined(__i386__)
511 asm(
512 "run_translation_asm:\n"
513 " pushal\n"
514 " movl gp, %ebp\n"
515 " movl f, %eax\n"
516 " call *%eax\n"
517 " movl %eax, res\n"
518 " popal\n"
519 " ret\n"
520 );
521
522 #elif defined(__x86_64__)
523 asm(
524 "run_translation_asm:\n"
525
526 " pushq %rax\n"
527 " pushq %rbx\n"
528 " pushq %rcx\n"
529 " pushq %rdx\n"
530 " pushq %rbp\n"
531 " pushq %rsi\n"
532 " pushq %rdi\n"
533 " pushq %r8\n"
534 " pushq %r9\n"
535 " pushq %r10\n"
536 " pushq %r11\n"
537 " pushq %r12\n"
538 " pushq %r13\n"
539 " pushq %r14\n"
540 " pushq %r15\n"
541
542 " movq gp, %rbp\n"
543 " movq f, %rax\n"
544 " call *%rax\n"
545 " movq %rax, res\n"
546
547 " popq %r15\n"
548 " popq %r14\n"
549 " popq %r13\n"
550 " popq %r12\n"
551 " popq %r11\n"
552 " popq %r10\n"
553 " popq %r9\n"
554 " popq %r8\n"
555 " popq %rdi\n"
556 " popq %rsi\n"
557 " popq %rbp\n"
558 " popq %rdx\n"
559 " popq %rcx\n"
560 " popq %rbx\n"
561 " popq %rax\n"
562
563 " ret\n"
564 );
565
566 #elif defined(__powerpc__)
567
568 #if !defined(__powerpc64__) // ppc32
569 asm(
570 "run_translation_asm:\n"
571
572 // create new stack:
573 // save old sp at first word & update sp
574 " stwu 1,-256(1)\n"
575
576 // save LR
577 " mflr %r0\n"
578 " stw %r0,260(%r1)\n"
579
580 // leave hole @ 4(%r1) for a callee to save it's LR
581 // no params
582 // no need to save non-volatile CR fields
583
584 // store registers to stack: just the callee-saved regs
585 " stw %r13, 8(%r1)\n"
586 " stw %r14, 12(%r1)\n"
587 " stw %r15, 16(%r1)\n"
588 " stw %r16, 20(%r1)\n"
589 " stw %r17, 24(%r1)\n"
590 " stw %r18, 28(%r1)\n"
591 " stw %r19, 32(%r1)\n"
592 " stw %r20, 36(%r1)\n"
593 " stw %r21, 40(%r1)\n"
594 " stw %r22, 44(%r1)\n"
595 " stw %r23, 48(%r1)\n"
596 " stw %r24, 52(%r1)\n"
597 " stw %r25, 56(%r1)\n"
598 " stw %r26, 60(%r1)\n"
599 " stw %r27, 64(%r1)\n"
600 " stw %r28, 68(%r1)\n"
601 " stw %r29, 72(%r1)\n"
602 " stw %r30, 76(%r1)\n"
603 " stw %r31, 80(%r1)\n"
604
605 // r31 (guest state ptr) := global var "gp"
606 " lis %r31,gp@ha\n"
607 " lwz %r31,gp@l(%r31)\n"
608
609 // call translation address in global var "f"
610 " lis %r4,f@ha\n"
611 " lwz %r4,f@l(%r4)\n"
612 " mtctr %r4\n"
613 " bctrl\n"
614
615 // save return value (in r3) into global var "res"
616 " lis %r5,res@ha\n"
617 " stw %r3,res@l(%r5)\n"
618
619 // save possibly modified guest state ptr (r31) in "gp"
620 " lis %r5,gp@ha\n"
621 " stw %r31,gp@l(%r5)\n"
622
623 // reload registers from stack
624 " lwz %r13, 8(%r1)\n"
625 " lwz %r14, 12(%r1)\n"
626 " lwz %r15, 16(%r1)\n"
627 " lwz %r16, 20(%r1)\n"
628 " lwz %r17, 24(%r1)\n"
629 " lwz %r18, 28(%r1)\n"
630 " lwz %r19, 32(%r1)\n"
631 " lwz %r20, 36(%r1)\n"
632 " lwz %r21, 40(%r1)\n"
633 " lwz %r22, 44(%r1)\n"
634 " lwz %r23, 48(%r1)\n"
635 " lwz %r24, 52(%r1)\n"
636 " lwz %r25, 56(%r1)\n"
637 " lwz %r26, 60(%r1)\n"
638 " lwz %r27, 64(%r1)\n"
639 " lwz %r28, 68(%r1)\n"
640 " lwz %r29, 72(%r1)\n"
641 " lwz %r30, 76(%r1)\n"
642 " lwz %r31, 80(%r1)\n"
643
644 // restore LR
645 " lwz %r0,260(%r1)\n"
646 " mtlr %r0\n"
647
648 // restore previous stack pointer
649 " addi %r1,%r1,256\n"
650
651 // return
652 " blr"
653 );
654
655 #else // ppc64
656
657 asm(
658 ".text\n"
659 " .global run_translation_asm\n"
660 " .section \".opd\",\"aw\"\n"
661 " .align 3\n"
662 "run_translation_asm:\n"
663 " .quad .run_translation_asm,.TOC.@tocbase,0\n"
664 " .previous\n"
665 " .type .run_translation_asm,@function\n"
666 " .global .run_translation_asm\n"
667 ".run_translation_asm:\n"
668
669 // save LR,CTR
670 " mflr %r0\n"
671 " std %r0,16(%r1)\n"
672 " mfctr %r0\n"
673 " std %r0,8(%r1)\n"
674
675 // create new stack:
676 // save old sp at first word & update sp
677 " stdu 1,-256(1)\n"
678
679 // leave hole @ 4(%r1) for a callee to save it's LR
680 // no params
681 // no need to save non-volatile CR fields
682
683 // store registers to stack: just the callee-saved regs
684 " std %r13, 48(%r1)\n"
685 " std %r14, 56(%r1)\n"
686 " std %r15, 64(%r1)\n"
687 " std %r16, 72(%r1)\n"
688 " std %r17, 80(%r1)\n"
689 " std %r18, 88(%r1)\n"
690 " std %r19, 96(%r1)\n"
691 " std %r20, 104(%r1)\n"
692 " std %r21, 112(%r1)\n"
693 " std %r22, 120(%r1)\n"
694 " std %r23, 128(%r1)\n"
695 " std %r24, 136(%r1)\n"
696 " std %r25, 144(%r1)\n"
697 " std %r26, 152(%r1)\n"
698 " std %r27, 160(%r1)\n"
699 " std %r28, 168(%r1)\n"
700 " std %r29, 176(%r1)\n"
701 " std %r30, 184(%r1)\n"
702 " std %r31, 192(%r1)\n"
703
704 // r31 (guest state ptr) := global var "gp"
705 " lis %r31,gp@highest\n"
706 " ori %r31,%r31,gp@higher\n"
707 " rldicr %r31,%r31,32,31\n"
708 " oris %r31,%r31,gp@h\n"
709 " ori %r31,%r31,gp@l\n"
710 " ld %r31,0(%r31)\n"
711
712 // call translation address in global var "f"
713 " lis %r4,f@highest\n"
714 " ori %r4,%r4,f@higher\n"
715 " rldicr %r4,%r4,32,31\n"
716 " oris %r4,%r4,f@h\n"
717 " ori %r4,%r4,f@l\n"
718 " ld %r4,0(%r4)\n"
719 " mtctr %r4\n"
720 " bctrl\n"
721
722 // save return value (in r3) into global var "res"
723 " lis %r5,res@highest\n"
724 " ori %r5,%r5,res@higher\n"
725 " rldicr %r5,%r5,32,31\n"
726 " oris %r5,%r5,res@h\n"
727 " ori %r5,%r5,res@l\n"
728 " std %r3,0(%r5)\n"
729
730 // save possibly modified guest state ptr (r31) in "gp"
731 " lis %r5,gp@highest\n"
732 " ori %r5,%r5,gp@higher\n"
733 " rldicr %r5,%r5,32,31\n"
734 " oris %r5,%r5,gp@h\n"
735 " ori %r5,%r5,gp@l\n"
736 " std %r31,0(%r5)\n"
737
738 // reload registers from stack
739 " ld %r13, 48(%r1)\n"
740 " ld %r14, 56(%r1)\n"
741 " ld %r15, 64(%r1)\n"
742 " ld %r16, 72(%r1)\n"
743 " ld %r17, 80(%r1)\n"
744 " ld %r18, 88(%r1)\n"
745 " ld %r19, 96(%r1)\n"
746 " ld %r20, 104(%r1)\n"
747 " ld %r21, 112(%r1)\n"
748 " ld %r22, 120(%r1)\n"
749 " ld %r23, 128(%r1)\n"
750 " ld %r24, 136(%r1)\n"
751 " ld %r25, 144(%r1)\n"
752 " ld %r26, 152(%r1)\n"
753 " ld %r27, 160(%r1)\n"
754 " ld %r28, 168(%r1)\n"
755 " ld %r29, 176(%r1)\n"
756 " ld %r30, 184(%r1)\n"
757 " ld %r31, 192(%r1)\n"
758
759 // restore previous stack pointer
760 " addi %r1,%r1,256\n"
761
762 // restore LR,CTR
763 " ld %r0,16(%r1)\n"
764 " mtlr %r0\n"
765 " ld %r0,8(%r1)\n"
766 " mtctr %r0\n"
767
768 // return
769 " blr"
770 );
771 #endif
772
773 #else
774
775 # error "Unknown arch"
776 #endif
777
778 /* Run a translation at host address 'translation'. Return
779 True if Vex asked for an translation cache flush as a result.
780 */
run_translation(HWord translation)781 Bool run_translation ( HWord translation )
782 {
783 if (0 && DEBUG_TRACE_FLAGS) {
784 printf(" run translation %p\n", (void*)translation );
785 printf(" simulated bb: %llu\n", n_bbs_done);
786 }
787 f = translation;
788 gp = (HWord)&gst;
789 run_translation_asm();
790 gst.GuestPC = res;
791 n_bbs_done ++;
792 return gp==VEX_TRC_JMP_TINVAL;
793 }
794
find_translation(Addr64 guest_addr)795 HWord find_translation ( Addr64 guest_addr )
796 {
797 Int i;
798 HWord __res;
799 if (0)
800 printf("find translation %p ... ", ULong_to_Ptr(guest_addr));
801 for (i = 0; i < trans_table_used; i++)
802 if (trans_table[i].base[0] == guest_addr)
803 break;
804 if (i == trans_table_used) {
805 if (0) printf("none\n");
806 return 0; /* not found */
807 }
808
809 /* Move this translation one step towards the front, so finding it
810 next time round is just that little bit cheaper. */
811 if (i > 2) {
812 VexGuestExtents tmpE = trans_table[i-1];
813 ULong* tmpP = trans_tableP[i-1];
814 trans_table[i-1] = trans_table[i];
815 trans_tableP[i-1] = trans_tableP[i];
816 trans_table[i] = tmpE;
817 trans_tableP[i] = tmpP;
818 i--;
819 }
820
821 __res = (HWord)trans_tableP[i];
822 if (0) printf("%p\n", (void*)__res);
823 return __res;
824 }
825
826 #define N_TRANSBUF 5000
827 static UChar transbuf[N_TRANSBUF];
make_translation(Addr64 guest_addr,Bool verbose)828 void make_translation ( Addr64 guest_addr, Bool verbose )
829 {
830 VexTranslateArgs vta;
831 VexTranslateResult tres;
832 VexArchInfo vex_archinfo;
833 Int trans_used, i, ws_needed;
834
835 if (trans_table_used >= N_TRANS_TABLE
836 || trans_cache_used >= N_TRANS_CACHE-1000) {
837 /* If things are looking to full, just dump
838 all the translations. */
839 trans_cache_used = 0;
840 trans_table_used = 0;
841 }
842
843 assert(trans_table_used < N_TRANS_TABLE);
844 if (0)
845 printf("make translation %p\n", ULong_to_Ptr(guest_addr));
846
847 LibVEX_default_VexArchInfo(&vex_archinfo);
848 vex_archinfo.subarch = VexSubArch;
849 vex_archinfo.ppc_cache_line_szB = CacheLineSize;
850
851 /* */
852 vta.arch_guest = VexArch;
853 vta.archinfo_guest = vex_archinfo;
854 vta.arch_host = VexArch;
855 vta.archinfo_host = vex_archinfo;
856 vta.guest_bytes = (UChar*)ULong_to_Ptr(guest_addr);
857 vta.guest_bytes_addr = (Addr64)guest_addr;
858 vta.guest_bytes_addr_noredir = (Addr64)guest_addr;
859 vta.chase_into_ok = chase_into_ok;
860 // vta.guest_extents = &vge;
861 vta.guest_extents = &trans_table[trans_table_used];
862 vta.host_bytes = transbuf;
863 vta.host_bytes_size = N_TRANSBUF;
864 vta.host_bytes_used = &trans_used;
865 vta.instrument1 = NULL;
866 vta.instrument2 = NULL;
867 vta.do_self_check = False;
868 vta.traceflags = verbose ? TEST_FLAGS : DEBUG_TRACE_FLAGS;
869 vta.dispatch = NULL;
870
871 tres = LibVEX_Translate ( &vta );
872
873 assert(tres == VexTransOK);
874 ws_needed = (trans_used+7) / 8;
875 assert(ws_needed > 0);
876 assert(trans_cache_used + ws_needed < N_TRANS_CACHE);
877 n_translations_made++;
878
879 for (i = 0; i < trans_used; i++) {
880 HChar* dst = ((HChar*)(&trans_cache[trans_cache_used])) + i;
881 HChar* src = (HChar*)(&transbuf[i]);
882 *dst = *src;
883 }
884
885 #if defined(__powerpc__)
886 invalidate_icache( &trans_cache[trans_cache_used], trans_used );
887 #endif
888
889 trans_tableP[trans_table_used] = &trans_cache[trans_cache_used];
890 trans_table_used++;
891 trans_cache_used += ws_needed;
892 }
893
894
overlap(Addr64 start,UInt len,VexGuestExtents * vge)895 static Bool overlap ( Addr64 start, UInt len, VexGuestExtents* vge )
896 {
897 Int i;
898 for (i = 0; i < vge->n_used; i++) {
899 if (vge->base[i]+vge->len[i] <= start
900 || vge->base[i] >= start+len) {
901 /* ok */
902 } else {
903 return True;
904 }
905 }
906 return False; /* no overlap */
907 }
908
dump_translations(Addr64 start,UInt len)909 static void dump_translations ( Addr64 start, UInt len )
910 {
911 Int i, j;
912 j = 0;
913 for (i = 0; i < trans_table_used; i++) {
914 if (overlap(start, len, &trans_table[i])) {
915 /* do nothing */
916 } else {
917 assert(j <= i);
918 trans_table[j] = trans_table[i];
919 trans_tableP[j] = trans_tableP[i];
920 j++;
921 }
922 }
923 assert(j >= 0 && j <= trans_table_used);
924 if (0) printf("dumped %d translations\n", trans_table_used - j);
925 trans_table_used = j;
926 }
927
928
929 static ULong stopAfter = 0;
930 static UChar* entryP = NULL;
931
932
933 __attribute__ ((noreturn))
934 static
failure_exit(void)935 void failure_exit ( void )
936 {
937 fprintf(stdout, "VEX did failure_exit. Bye.\n");
938 fprintf(stdout, "bb counter = %llu\n\n", n_bbs_done);
939 exit(1);
940 }
941
942 static
log_bytes(HChar * bytes,Int nbytes)943 void log_bytes ( HChar* bytes, Int nbytes )
944 {
945 fwrite ( bytes, 1, nbytes, stdout );
946 fflush ( stdout );
947 }
948
949
950 /* run simulated code forever (it will exit by calling
951 serviceFn(0)). */
run_simulator(void)952 static void run_simulator ( void )
953 {
954 static Addr64 last_guest = 0;
955 Addr64 next_guest;
956 HWord next_host;
957 Bool need_inval;
958 while (1) {
959 next_guest = gst.GuestPC;
960
961 if (0)
962 printf("\nnext_guest: 0x%x\n", (UInt)next_guest);
963
964 #if defined(__powerpc64__)
965 if (next_guest == Ptr_to_ULong( (void*)(*(ULong*)(&serviceFn)) )) {
966 #else
967 if (next_guest == Ptr_to_ULong(&serviceFn)) {
968 #endif
969 /* "do" the function call to serviceFn */
970 # if defined(__i386__)
971 {
972 HWord esp = gst.guest_ESP;
973 gst.guest_EIP = *(UInt*)(esp+0);
974 gst.guest_EAX = serviceFn( *(UInt*)(esp+4), *(UInt*)(esp+8) );
975 gst.guest_ESP = esp+4;
976 next_guest = gst.guest_EIP;
977 }
978 # elif defined(__x86_64__)
979 {
980 HWord esp = gst.guest_RSP;
981 gst.guest_RIP = *(UInt*)(esp+0);
982 gst.guest_RAX = serviceFn( gst.guest_RDI, gst.guest_RSI );
983 gst.guest_RSP = esp+8;
984 next_guest = gst.guest_RIP;
985 }
986 # elif defined(__powerpc__)
987 {
988 gst.guest_GPR3 = serviceFn( gst.guest_GPR3, gst.guest_GPR4 );
989 gst.guest_CIA = gst.guest_LR;
990 next_guest = gst.guest_CIA;
991 }
992 # else
993 # error "Unknown arch"
994 # endif
995 }
996
997 next_host = find_translation(next_guest);
998 if (next_host == 0) {
999 make_translation(next_guest,False);
1000 next_host = find_translation(next_guest);
1001 assert(next_host != 0);
1002 }
1003
1004 // Switchback
1005 if (n_bbs_done == stopAfter) {
1006 printf("---begin SWITCHBACK at bb:%llu---\n", n_bbs_done);
1007 #if 1
1008 if (last_guest) {
1009 printf("\n*** Last run translation (bb:%llu):\n", n_bbs_done-1);
1010 make_translation(last_guest,True);
1011 }
1012 #endif
1013 #if 0
1014 if (next_guest) {
1015 printf("\n*** Current translation (bb:%llu):\n", n_bbs_done);
1016 make_translation(next_guest,True);
1017 }
1018 #endif
1019 printf("--- end SWITCHBACK at bb:%llu ---\n", n_bbs_done);
1020 switchback();
1021 assert(0); /*NOTREACHED*/
1022 }
1023
1024 last_guest = next_guest;
1025 need_inval = run_translation(next_host);
1026 if (need_inval) {
1027 #if defined(__powerpc__)
1028 dump_translations( (Addr64)gst.guest_TISTART, gst.guest_TILEN );
1029 if (0) printf("dump translations done\n");
1030 #endif
1031 }
1032 }
1033 }
1034
1035
1036 static void usage ( void )
1037 {
1038 printf("usage: switchback #bbs\n");
1039 printf(" - begins switchback for basic block #bbs\n");
1040 printf(" - use -1 for largest possible run without switchback\n\n");
1041 exit(1);
1042 }
1043
1044 #if defined(__powerpc__)
1045
1046 #if !defined(__powerpc64__) // ppc32
1047 UInt saved_R2;
1048 asm(
1049 "get_R2:\n"
1050 " lis %r10,saved_R2@ha\n"
1051 " stw %r2,saved_R2@l(%r10)\n"
1052 " blr\n"
1053 );
1054 #else // ppc64
1055 ULong saved_R2;
1056 ULong saved_R13;
1057 asm(
1058 ".text\n"
1059 " .global get_R2\n"
1060 " .section \".opd\",\"aw\"\n"
1061 " .align 3\n"
1062 "get_R2:\n"
1063 " .quad .get_R2,.TOC.@tocbase,0\n"
1064 " .previous\n"
1065 " .type .get_R2,@function\n"
1066 " .global .get_R2\n"
1067 ".get_R2:\n"
1068 " lis %r10,saved_R2@highest\n"
1069 " ori %r10,%r10,saved_R2@higher\n"
1070 " rldicr %r10,%r10,32,31\n"
1071 " oris %r10,%r10,saved_R2@h\n"
1072 " ori %r10,%r10,saved_R2@l\n"
1073 " std %r2,0(%r10)\n"
1074 " blr\n"
1075 );
1076 asm(
1077 ".text\n"
1078 " .global get_R13\n"
1079 " .section \".opd\",\"aw\"\n"
1080 " .align 3\n"
1081 "get_R13:\n"
1082 " .quad .get_R13,.TOC.@tocbase,0\n"
1083 " .previous\n"
1084 " .type .get_R13,@function\n"
1085 " .global .get_R13\n"
1086 ".get_R13:\n"
1087 " lis %r10,saved_R13@highest\n"
1088 " ori %r10,%r10,saved_R13@higher\n"
1089 " rldicr %r10,%r10,32,31\n"
1090 " oris %r10,%r10,saved_R13@h\n"
1091 " ori %r10,%r10,saved_R13@l\n"
1092 " std %r13,0(%r10)\n"
1093 " blr\n"
1094 );
1095 #endif
1096 extern void get_R2 ( void );
1097 extern void get_R13 ( void );
1098 #endif
1099
1100 int main ( Int argc, HChar** argv )
1101 {
1102 if (argc != 2)
1103 usage();
1104
1105 stopAfter = (ULong)atoll(argv[1]);
1106
1107 extern void entry ( void*(*service)(int,int) );
1108 entryP = (UChar*)&entry;
1109
1110 if (!entryP) {
1111 printf("switchback: can't find entry point\n");
1112 exit(1);
1113 }
1114
1115 LibVEX_default_VexControl(&vcon);
1116 vcon.guest_max_insns=50;
1117 vcon.guest_chase_thresh=0;
1118 vcon.iropt_level=2;
1119
1120 LibVEX_Init( failure_exit, log_bytes, 1, False, &vcon );
1121 LibVEX_Guest_initialise(&gst);
1122
1123 /* set up as if a call to the entry point passing serviceFn as
1124 the one and only parameter */
1125 # if defined(__i386__)
1126 gst.guest_EIP = (UInt)entryP;
1127 gst.guest_ESP = (UInt)&gstack[25000];
1128 *(UInt*)(gst.guest_ESP+4) = (UInt)serviceFn;
1129 *(UInt*)(gst.guest_ESP+0) = 0x12345678;
1130 # elif defined(__x86_64__)
1131 gst.guest_RIP = (ULong)entryP;
1132 gst.guest_RSP = (ULong)&gstack[25000];
1133 gst.guest_RDI = (ULong)serviceFn;
1134 *(ULong*)(gst.guest_RSP+0) = 0x12345678AABBCCDDULL;
1135 # elif defined(__powerpc__)
1136 get_R2();
1137
1138 #if !defined(__powerpc64__) // ppc32
1139 gst.guest_CIA = (UInt)entryP;
1140 gst.guest_GPR1 = (UInt)&gstack[25000]; /* stack pointer */
1141 gst.guest_GPR3 = (UInt)serviceFn; /* param to entry */
1142 gst.guest_GPR2 = saved_R2;
1143 gst.guest_LR = 0x12345678; /* bogus return address */
1144 #else // ppc64
1145 get_R13();
1146 gst.guest_CIA = * (ULong*)entryP;
1147 gst.guest_GPR1 = (ULong)&gstack[25000]; /* stack pointer */
1148 gst.guest_GPR3 = (ULong)serviceFn; /* param to entry */
1149 gst.guest_GPR2 = saved_R2;
1150 gst.guest_GPR13 = saved_R13;
1151 gst.guest_LR = 0x1234567812345678ULL; /* bogus return address */
1152 // printf("setting CIA to %p\n", (void*)gst.guest_CIA);
1153 #endif
1154
1155 # else
1156 # error "Unknown arch"
1157 # endif
1158
1159 printf("\n---START---\n");
1160
1161 #if 1
1162 run_simulator();
1163 #else
1164 ( (void(*)(HWord(*)(HWord,HWord))) entryP ) (serviceFn);
1165 #endif
1166
1167
1168 return 0;
1169 }
1170