• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 
2 /* HOW TO USE
3 
4 13 Dec '05 - Linker no longer used (apart from mymalloc)
5 Simply compile and link switchback.c with test_xxx.c,
6 e.g. for ppc64:
7 $ (cd .. && make EXTRA_CFLAGS="-m64" libvex_ppc64_linux.a) && gcc -m64 -mregnames -Wall -Wshadow -Wno-long-long -Winline -O -g -o switchback switchback.c linker.c ../libvex_ppc64_linux.a test_xxx.c
8 
9 Test file test_xxx.c must have an entry point called "entry",
10 which expects to take a single argument which is a function pointer
11 (to "serviceFn").
12 
13 Test file may not reference any other symbols.
14 
15 NOTE: POWERPC: it is critical, when using this on ppc, to set
16 CacheLineSize to the right value.  Values we currently know of:
17 
18    imac (G3):   32
19    G5 (ppc970): 128
20 */
21 
22 #include <stdio.h>
23 #include <assert.h>
24 #include <stdlib.h>
25 #include <sys/types.h>
26 #include <sys/stat.h>
27 #include <unistd.h>
28 
29 #include "../pub/libvex_basictypes.h"
30 #include "../pub/libvex_guest_x86.h"
31 #include "../pub/libvex_guest_amd64.h"
32 #include "../pub/libvex_guest_ppc32.h"
33 #include "../pub/libvex_guest_ppc64.h"
34 #include "../pub/libvex.h"
35 #include "../pub/libvex_trc_values.h"
36 #include "linker.h"
37 
38 static ULong n_bbs_done = 0;
39 static Int   n_translations_made = 0;
40 
41 
42 #if defined(__i386__)
43 #  define VexGuestState             VexGuestX86State
44 #  define LibVEX_Guest_initialise   LibVEX_GuestX86_initialise
45 #  define VexArch                   VexArchX86
46 #  define VexSubArch                VexSubArchX86_sse1
47 #  define GuestPC                   guest_EIP
48 #  define CacheLineSize             0/*irrelevant*/
49 #elif defined(__x86_64__)
50 #  define VexGuestState             VexGuestAMD64State
51 #  define LibVEX_Guest_initialise   LibVEX_GuestAMD64_initialise
52 #  define VexArch                   VexArchAMD64
53 #  define VexSubArch                VexSubArch_NONE
54 #  define GuestPC                   guest_RIP
55 #  define CacheLineSize             0/*irrelevant*/
56 #elif defined(__powerpc__)
57 
58 #if !defined(__powerpc64__) // ppc32
59 #  define VexGuestState             VexGuestPPC32State
60 #  define LibVEX_Guest_initialise   LibVEX_GuestPPC32_initialise
61 #  define VexArch                   VexArchPPC32
62 #  define VexSubArch                VexSubArchPPC32_FI
63 #  define GuestPC                   guest_CIA
64 #  define CacheLineSize             128
65 #else
66 #  define VexGuestState             VexGuestPPC64State
67 #  define LibVEX_Guest_initialise   LibVEX_GuestPPC64_initialise
68 #  define VexArch                   VexArchPPC64
69 #  define VexSubArch                VexSubArchPPC64_FI
70 #  define GuestPC                   guest_CIA
71 #  define CacheLineSize             128
72 #endif
73 
74 #else
75 #   error "Unknown arch"
76 #endif
77 
78 /* 7: show conversion into IR */
79 /* 6: show after initial opt */
80 /* 5: show after instrumentation */
81 /* 4: show after second opt */
82 /* 3: show after tree building */
83 /* 2: show selected insns */
84 /* 1: show after reg-alloc */
85 /* 0: show final assembly */
86 #define TEST_FLAGS (1<<7)|(1<<3)|(1<<2)|(1<<1)|(1<<0)
87 #define DEBUG_TRACE_FLAGS 0//(1<<7)|(0<<6)|(0<<5)|(0<<4)|(1<<3)|(1<<2)|(1<<1)|(1<<0)
88 
89 
90 /* guest state */
91 UInt gstack[50000];
92 VexGuestState gst;
93 VexControl vcon;
94 
95 /* only used for the switchback transition */
96 /* i386:  helper1 = &gst, helper2 = %EFLAGS */
97 /* amd64: helper1 = &gst, helper2 = %EFLAGS */
98 /* ppc32: helper1 = &gst, helper2 = %CR, helper3 = %XER */
99 HWord sb_helper1 = 0;
100 HWord sb_helper2 = 0;
101 HWord sb_helper3 = 0;
102 
103 /* translation cache */
104 #define N_TRANS_CACHE 1000000
105 #define N_TRANS_TABLE 10000
106 
107 ULong trans_cache[N_TRANS_CACHE];
108 VexGuestExtents trans_table [N_TRANS_TABLE];
109 ULong*          trans_tableP[N_TRANS_TABLE];
110 
111 Int trans_cache_used = 0;
112 Int trans_table_used = 0;
113 
chase_into_ok(Addr64 dst)114 static Bool chase_into_ok ( Addr64 dst ) { return False; }
115 
116 #if 0
117 // local_sys_write_stderr(&c,1);
118 static void local_sys_write_stderr ( HChar* buf, Int n )
119 {
120    UInt __res;
121    __asm__ volatile (
122       "li %%r0,4\n\t"      /* set %r0 = __NR_write */
123       "li %%r3,1\n\t"      /* set %r3 = stdout */
124       "mr %%r4,%1\n\t"     /* set %r4 = buf */
125       "mr %%r5,%2\n\t"     /* set %r5 = n */
126       "sc\n\t"             /* write(stderr, buf, n) */
127       "mr %0,%%r3\n"       /* set __res = r3 */
128       : "=mr" (__res)
129       : "g" (buf), "g" (n)
130       : "r0", "r3", "r4", "r5" );
131 }
132 #endif
133 
134 /* For providing services. */
serviceFn(HWord arg1,HWord arg2)135 static HWord serviceFn ( HWord arg1, HWord arg2 )
136 {
137    switch (arg1) {
138       case 0: /* EXIT */
139          printf("---STOP---\n");
140          printf("serviceFn:EXIT\n");
141 	 printf("%llu bbs simulated\n", n_bbs_done);
142 	 printf("%d translations made, %d tt bytes\n",
143                 n_translations_made, 8*trans_cache_used);
144          exit(0);
145       case 1: /* PUTC */
146          putchar(arg2);
147          return 0;
148       case 2: /* MALLOC */
149          return (HWord)malloc(arg2);
150       case 3: /* FREE */
151          free((void*)arg2);
152          return 0;
153       default:
154          assert(0);
155    }
156 }
157 
158 
159 /* -------------------- */
160 /* continue execution on the real CPU (never returns) */
161 extern void switchback_asm(void);
162 
163 #if defined(__i386__)
164 
165 asm(
166 "switchback_asm:\n"
167 "   movl sb_helper1, %eax\n"  // eax = guest state ptr
168 "   movl  16(%eax), %esp\n"   // switch stacks
169 "   pushl 56(%eax)\n"         // push continuation addr
170 "   movl sb_helper2, %ebx\n"  // get eflags
171 "   pushl %ebx\n"             // eflags:CA
172 "   pushl 0(%eax)\n"          //  EAX:eflags:CA
173 "   movl 4(%eax), %ecx\n"
174 "   movl 8(%eax), %edx\n"
175 "   movl 12(%eax), %ebx\n"
176 "   movl 20(%eax), %ebp\n"
177 "   movl 24(%eax), %esi\n"
178 "   movl 28(%eax), %edi\n"
179 "   popl %eax\n"
180 "   popfl\n"
181 "   ret\n"
182 );
switchback(void)183 void switchback ( void )
184 {
185    sb_helper1 = (HWord)&gst;
186    sb_helper2 = LibVEX_GuestX86_get_eflags(&gst);
187    switchback_asm(); // never returns
188 }
189 
190 #elif defined(__x86_64__)
191 
192 asm(
193 "switchback_asm:\n"
194 "   movq sb_helper1, %rax\n"  // rax = guest state ptr
195 "   movq  32(%rax), %rsp\n"   // switch stacks
196 "   pushq 168(%rax)\n"        // push continuation addr
197 "   movq sb_helper2, %rbx\n"  // get eflags
198 "   pushq %rbx\n"             // eflags:CA
199 "   pushq 0(%rax)\n"          // RAX:eflags:CA
200 "   movq 8(%rax), %rcx\n"
201 "   movq 16(%rax), %rdx\n"
202 "   movq 24(%rax), %rbx\n"
203 "   movq 40(%rax), %rbp\n"
204 "   movq 48(%rax), %rsi\n"
205 "   movq 56(%rax), %rdi\n"
206 
207 "   movq 64(%rax), %r8\n"
208 "   movq 72(%rax), %r9\n"
209 "   movq 80(%rax), %r10\n"
210 "   movq 88(%rax), %r11\n"
211 "   movq 96(%rax), %r12\n"
212 "   movq 104(%rax), %r13\n"
213 "   movq 112(%rax), %r14\n"
214 "   movq 120(%rax), %r15\n"
215 
216 "   popq %rax\n"
217 "   popfq\n"
218 "   ret\n"
219 );
switchback(void)220 void switchback ( void )
221 {
222    sb_helper1 = (HWord)&gst;
223    sb_helper2 = LibVEX_GuestAMD64_get_rflags(&gst);
224    switchback_asm(); // never returns
225 }
226 
227 #elif defined(__powerpc__)
228 
invalidate_icache(void * ptr,int nbytes)229 static void invalidate_icache(void *ptr, int nbytes)
230 {
231    unsigned long startaddr = (unsigned long) ptr;
232    unsigned long endaddr = startaddr + nbytes;
233    unsigned long addr;
234    unsigned long cls = CacheLineSize;
235 
236    startaddr &= ~(cls - 1);
237    for (addr = startaddr; addr < endaddr; addr += cls)
238       asm volatile("dcbst 0,%0" : : "r" (addr));
239    asm volatile("sync");
240    for (addr = startaddr; addr < endaddr; addr += cls)
241       asm volatile("icbi 0,%0" : : "r" (addr));
242    asm volatile("sync; isync");
243 }
244 
245 
246 #if !defined(__powerpc64__) // ppc32
247 asm(
248 "switchback_asm:\n"
249 // gst
250 "   lis  %r31,sb_helper1@ha\n"      // get hi-wd of guest_state_ptr addr
251 "   lwz  %r31,sb_helper1@l(%r31)\n" // load word of guest_state_ptr to r31
252 
253 // LR
254 "   lwz  %r3,900(%r31)\n"           // guest_LR
255 "   mtlr %r3\n"                     // move to LR
256 
257 // CR
258 "   lis  %r3,sb_helper2@ha\n"       // get hi-wd of flags addr
259 "   lwz  %r3,sb_helper2@l(%r3)\n"   // load flags word to r3
260 "   mtcr %r3\n"                     // move r3 to CR
261 
262 // CTR
263 "   lwz %r3,904(%r31)\n"       // guest_CTR
264 "   mtctr %r3\n"               // move r3 to CTR
265 
266 // XER
267 "   lis  %r3,sb_helper3@ha\n"       // get hi-wd of xer addr
268 "   lwz  %r3,sb_helper3@l(%r3)\n"   // load xer word to r3
269 "   mtxer %r3\n"                     // move r3 to XER
270 
271 
272 // GPR's
273 "   lwz %r0,    0(%r31)\n"
274 "   lwz %r1,    4(%r31)\n"     // switch stacks (r1 = SP)
275 "   lwz %r2,    8(%r31)\n"
276 "   lwz %r3,   12(%r31)\n"
277 "   lwz %r4,   16(%r31)\n"
278 "   lwz %r5,   20(%r31)\n"
279 "   lwz %r6,   24(%r31)\n"
280 "   lwz %r7,   28(%r31)\n"
281 "   lwz %r8,   32(%r31)\n"
282 "   lwz %r9,   36(%r31)\n"
283 "   lwz %r10,  40(%r31)\n"
284 "   lwz %r11,  44(%r31)\n"
285 "   lwz %r12,  48(%r31)\n"
286 "   lwz %r13,  52(%r31)\n"
287 "   lwz %r14,  56(%r31)\n"
288 "   lwz %r15,  60(%r31)\n"
289 "   lwz %r16,  64(%r31)\n"
290 "   lwz %r17,  68(%r31)\n"
291 "   lwz %r18,  72(%r31)\n"
292 "   lwz %r19,  76(%r31)\n"
293 "   lwz %r20,  80(%r31)\n"
294 "   lwz %r21,  84(%r31)\n"
295 "   lwz %r22,  88(%r31)\n"
296 "   lwz %r23,  92(%r31)\n"
297 "   lwz %r24,  96(%r31)\n"
298 "   lwz %r25, 100(%r31)\n"
299 "   lwz %r26, 104(%r31)\n"
300 "   lwz %r27, 108(%r31)\n"
301 "   lwz %r28, 112(%r31)\n"
302 "   lwz %r29, 116(%r31)\n"
303 "   lwz %r30, 120(%r31)\n"
304 "   lwz %r31, 124(%r31)\n"
305 "nop_start_point:\n"
306 "   nop\n"
307 "   nop\n"
308 "   nop\n"
309 "   nop\n"
310 "   nop\n"
311 "nop_end_point:\n"
312 );
313 
314 #else // ppc64
315 
316 asm(
317 ".text\n"
318 "   .global switchback_asm\n"
319 "   .section \".opd\",\"aw\"\n"
320 "   .align 3\n"
321 "switchback_asm:\n"
322 "   .quad .switchback_asm,.TOC.@tocbase,0\n"
323 "   .previous\n"
324 "   .type .switchback_asm,@function\n"
325 "   .global  .switchback_asm\n"
326 ".switchback_asm:\n"
327 "switchback_asm_undotted:\n"
328 
329 // gst: load word of guest_state_ptr to r31
330 "   lis    %r31,sb_helper1@highest\n"
331 "   ori    %r31,%r31,sb_helper1@higher\n"
332 "   rldicr %r31,%r31,32,31\n"
333 "   oris   %r31,%r31,sb_helper1@h\n"
334 "   ori    %r31,%r31,sb_helper1@l\n"
335 "   ld     %r31,0(%r31)\n"
336 
337 
338 // LR
339 "   ld   %r3,1032(%r31)\n"          // guest_LR
340 "   mtlr %r3\n"                     // move to LR
341 
342 // CR
343 "   lis    %r3,sb_helper2@highest\n"
344 "   ori    %r3,%r3,sb_helper2@higher\n"
345 "   rldicr %r3,%r3,32,31\n"
346 "   oris   %r3,%r3,sb_helper2@h\n"
347 "   ori    %r3,%r3,sb_helper2@l\n"
348 "   ld     %r3,0(%r3)\n"            // load flags word to r3
349 "   mtcr   %r3\n"                   // move r3 to CR
350 
351 // CTR
352 "   ld     %r3,1040(%r31)\n"        // guest_CTR
353 "   mtctr  %r3\n"                   // move r3 to CTR
354 
355 // XER
356 "   lis    %r3,sb_helper3@highest\n"
357 "   ori    %r3,%r3,sb_helper3@higher\n"
358 "   rldicr %r3,%r3,32,31\n"
359 "   oris   %r3,%r3,sb_helper3@h\n"
360 "   ori    %r3,%r3,sb_helper3@l\n"
361 "   ld     %r3,0(%r3)\n"            // load xer word to r3
362 "   mtxer  %r3\n"                   // move r3 to XER
363 
364 // GPR's
365 "   ld %r0,    0(%r31)\n"
366 "   ld %r1,    8(%r31)\n"     // switch stacks (r1 = SP)
367 "   ld %r2,   16(%r31)\n"
368 "   ld %r3,   24(%r31)\n"
369 "   ld %r4,   32(%r31)\n"
370 "   ld %r5,   40(%r31)\n"
371 "   ld %r6,   48(%r31)\n"
372 "   ld %r7,   56(%r31)\n"
373 "   ld %r8,   64(%r31)\n"
374 "   ld %r9,   72(%r31)\n"
375 "   ld %r10,  80(%r31)\n"
376 "   ld %r11,  88(%r31)\n"
377 "   ld %r12,  96(%r31)\n"
378 "   ld %r13, 104(%r31)\n"
379 "   ld %r14, 112(%r31)\n"
380 "   ld %r15, 120(%r31)\n"
381 "   ld %r16, 128(%r31)\n"
382 "   ld %r17, 136(%r31)\n"
383 "   ld %r18, 144(%r31)\n"
384 "   ld %r19, 152(%r31)\n"
385 "   ld %r20, 160(%r31)\n"
386 "   ld %r21, 168(%r31)\n"
387 "   ld %r22, 176(%r31)\n"
388 "   ld %r23, 184(%r31)\n"
389 "   ld %r24, 192(%r31)\n"
390 "   ld %r25, 200(%r31)\n"
391 "   ld %r26, 208(%r31)\n"
392 "   ld %r27, 216(%r31)\n"
393 "   ld %r28, 224(%r31)\n"
394 "   ld %r29, 232(%r31)\n"
395 "   ld %r30, 240(%r31)\n"
396 "   ld %r31, 248(%r31)\n"
397 "nop_start_point:\n"
398 "   nop\n"
399 "   nop\n"
400 "   nop\n"
401 "   nop\n"
402 "   nop\n"
403 "nop_end_point:\n"
404 );
405 #endif
406 
407 extern void switchback_asm_undotted;
408 extern void nop_start_point;
409 extern void nop_end_point;
switchback(void)410 void switchback ( void )
411 {
412    Int i;
413    /* blargh.  Copy the entire switchback_asm procedure into new
414       memory on which can can set both write and execute permissions,
415       so we can poke around with it and then run the results. */
416 
417 #if defined(__powerpc64__) // ppc32
418    UChar* sa_start     = (UChar*)&switchback_asm_undotted;
419 #else
420    UChar* sa_start     = (UChar*)&switchback_asm;
421 #endif
422    UChar* sa_nop_start = (UChar*)&nop_start_point;
423    UChar* sa_end       = (UChar*)&nop_end_point;
424 
425 #if 0
426    printf("sa_start     %p\n", sa_start );
427    printf("sa_nop_start %p\n", sa_nop_start);
428    printf("sa_end       %p\n", sa_end);
429 #endif
430    Int nbytes       = sa_end - sa_start;
431    Int off_nopstart = sa_nop_start - sa_start;
432    if (0)
433       printf("nbytes = %d, nopstart = %d\n", nbytes, off_nopstart);
434 
435    /* copy it into mallocville */
436    UChar* copy = mymalloc(nbytes);
437    assert(copy);
438    for (i = 0; i < nbytes; i++)
439       copy[i] = sa_start[i];
440 
441    UInt* p = (UInt*)(&copy[off_nopstart]);
442 
443 #if !defined(__powerpc64__) // ppc32
444    Addr32 addr_of_nop = (Addr32)p;
445    Addr32 where_to_go = gst.guest_CIA;
446    Int    diff = ((Int)where_to_go) - ((Int)addr_of_nop);
447 
448 #if 0
449    printf("addr of first nop = 0x%x\n", addr_of_nop);
450    printf("where to go       = 0x%x\n", where_to_go);
451    printf("diff = 0x%x\n", diff);
452 #endif
453 
454 #else // ppc64
455    Addr64 addr_of_nop = (Addr64)p;
456    Addr64 where_to_go = gst.guest_CIA;
457    Long   diff = ((Long)where_to_go) - ((Long)addr_of_nop);
458 
459 #if 0
460    printf("addr of first nop = 0x%llx\n", addr_of_nop);
461    printf("where to go       = 0x%llx\n", where_to_go);
462    printf("diff = 0x%llx\n", diff);
463 #endif
464 #endif
465 
466    if (diff < -0x2000000 || diff >= 0x2000000) {
467      // we're hosed.  Give up
468      printf("hosed -- offset too large\n");
469      assert(0);
470    }
471 
472    sb_helper1 = (HWord)&gst;
473 #if !defined(__powerpc64__) // ppc32
474    sb_helper2 = LibVEX_GuestPPC32_get_CR(&gst);
475    sb_helper3 = LibVEX_GuestPPC32_get_XER(&gst);
476 #else // ppc64
477    sb_helper2 = LibVEX_GuestPPC64_get_CR(&gst);
478    sb_helper3 = LibVEX_GuestPPC64_get_XER(&gst);
479 #endif
480 
481    /* stay sane ... */
482    assert(p[0] == 24<<26); /* nop */
483 
484    /* branch to diff */
485    p[0] = ((18<<26) | (((diff >> 2) & 0xFFFFFF) << 2) | (0<<1) | (0<<0));
486 
487    invalidate_icache( copy, nbytes );
488 
489 #if defined(__powerpc64__)
490    //printf("jumping to %p\n", copy);
491    { ULong faketoc[3];
492      void* v;
493      faketoc[0] = (ULong)copy;
494      v = &faketoc[0];
495      ( (void(*)(void)) v )();
496    }
497 #else
498    ( (void(*)(void))copy )();
499 #endif
500 }
501 
502 #else
503 #   error "Unknown arch (switchback)"
504 #endif
505 
506 /* -------------------- */
507 static HWord f, gp, res;
508 extern void run_translation_asm(void);
509 
510 #if defined(__i386__)
511 asm(
512 "run_translation_asm:\n"
513 "   pushal\n"
514 "   movl gp, %ebp\n"
515 "   movl f, %eax\n"
516 "   call *%eax\n"
517 "   movl %eax, res\n"
518 "   popal\n"
519 "   ret\n"
520 );
521 
522 #elif defined(__x86_64__)
523 asm(
524 "run_translation_asm:\n"
525 
526 "   pushq %rax\n"
527 "   pushq %rbx\n"
528 "   pushq %rcx\n"
529 "   pushq %rdx\n"
530 "   pushq %rbp\n"
531 "   pushq %rsi\n"
532 "   pushq %rdi\n"
533 "   pushq %r8\n"
534 "   pushq %r9\n"
535 "   pushq %r10\n"
536 "   pushq %r11\n"
537 "   pushq %r12\n"
538 "   pushq %r13\n"
539 "   pushq %r14\n"
540 "   pushq %r15\n"
541 
542 "   movq gp, %rbp\n"
543 "   movq f, %rax\n"
544 "   call *%rax\n"
545 "   movq %rax, res\n"
546 
547 "   popq  %r15\n"
548 "   popq  %r14\n"
549 "   popq  %r13\n"
550 "   popq  %r12\n"
551 "   popq  %r11\n"
552 "   popq  %r10\n"
553 "   popq  %r9\n"
554 "   popq  %r8\n"
555 "   popq  %rdi\n"
556 "   popq  %rsi\n"
557 "   popq  %rbp\n"
558 "   popq  %rdx\n"
559 "   popq  %rcx\n"
560 "   popq  %rbx\n"
561 "   popq  %rax\n"
562 
563 "   ret\n"
564 );
565 
566 #elif defined(__powerpc__)
567 
568 #if !defined(__powerpc64__) // ppc32
569 asm(
570 "run_translation_asm:\n"
571 
572 // create new stack:
573 // save old sp at first word & update sp
574 "   stwu 1,-256(1)\n"
575 
576 // save LR
577 "   mflr %r0\n"
578 "   stw  %r0,260(%r1)\n"
579 
580 // leave hole @ 4(%r1) for a callee to save it's LR
581 // no params
582 // no need to save non-volatile CR fields
583 
584 // store registers to stack: just the callee-saved regs
585 "   stw %r13,  8(%r1)\n"
586 "   stw %r14, 12(%r1)\n"
587 "   stw %r15, 16(%r1)\n"
588 "   stw %r16, 20(%r1)\n"
589 "   stw %r17, 24(%r1)\n"
590 "   stw %r18, 28(%r1)\n"
591 "   stw %r19, 32(%r1)\n"
592 "   stw %r20, 36(%r1)\n"
593 "   stw %r21, 40(%r1)\n"
594 "   stw %r22, 44(%r1)\n"
595 "   stw %r23, 48(%r1)\n"
596 "   stw %r24, 52(%r1)\n"
597 "   stw %r25, 56(%r1)\n"
598 "   stw %r26, 60(%r1)\n"
599 "   stw %r27, 64(%r1)\n"
600 "   stw %r28, 68(%r1)\n"
601 "   stw %r29, 72(%r1)\n"
602 "   stw %r30, 76(%r1)\n"
603 "   stw %r31, 80(%r1)\n"
604 
605 // r31 (guest state ptr) := global var "gp"
606 "   lis %r31,gp@ha\n"
607 "   lwz %r31,gp@l(%r31)\n"
608 
609 // call translation address in global var "f"
610 "   lis %r4,f@ha\n"
611 "   lwz %r4,f@l(%r4)\n"
612 "   mtctr %r4\n"
613 "   bctrl\n"
614 
615 // save return value (in r3) into global var "res"
616 "   lis %r5,res@ha\n"
617 "   stw %r3,res@l(%r5)\n"
618 
619 // save possibly modified guest state ptr (r31) in "gp"
620 "   lis %r5,gp@ha\n"
621 "   stw %r31,gp@l(%r5)\n"
622 
623 // reload registers from stack
624 "   lwz %r13,  8(%r1)\n"
625 "   lwz %r14, 12(%r1)\n"
626 "   lwz %r15, 16(%r1)\n"
627 "   lwz %r16, 20(%r1)\n"
628 "   lwz %r17, 24(%r1)\n"
629 "   lwz %r18, 28(%r1)\n"
630 "   lwz %r19, 32(%r1)\n"
631 "   lwz %r20, 36(%r1)\n"
632 "   lwz %r21, 40(%r1)\n"
633 "   lwz %r22, 44(%r1)\n"
634 "   lwz %r23, 48(%r1)\n"
635 "   lwz %r24, 52(%r1)\n"
636 "   lwz %r25, 56(%r1)\n"
637 "   lwz %r26, 60(%r1)\n"
638 "   lwz %r27, 64(%r1)\n"
639 "   lwz %r28, 68(%r1)\n"
640 "   lwz %r29, 72(%r1)\n"
641 "   lwz %r30, 76(%r1)\n"
642 "   lwz %r31, 80(%r1)\n"
643 
644 // restore LR
645 "   lwz  %r0,260(%r1)\n"
646 "   mtlr %r0\n"
647 
648 // restore previous stack pointer
649 "   addi %r1,%r1,256\n"
650 
651 // return
652 "   blr"
653 );
654 
655 #else // ppc64
656 
657 asm(
658 ".text\n"
659 "   .global run_translation_asm\n"
660 "   .section \".opd\",\"aw\"\n"
661 "   .align 3\n"
662 "run_translation_asm:\n"
663 "   .quad .run_translation_asm,.TOC.@tocbase,0\n"
664 "   .previous\n"
665 "   .type .run_translation_asm,@function\n"
666 "   .global  .run_translation_asm\n"
667 ".run_translation_asm:\n"
668 
669 // save LR,CTR
670 "   mflr  %r0\n"
671 "   std   %r0,16(%r1)\n"
672 "   mfctr %r0\n"
673 "   std   %r0,8(%r1)\n"
674 
675 // create new stack:
676 // save old sp at first word & update sp
677 "   stdu 1,-256(1)\n"
678 
679 // leave hole @ 4(%r1) for a callee to save it's LR
680 // no params
681 // no need to save non-volatile CR fields
682 
683 // store registers to stack: just the callee-saved regs
684 "   std %r13,  48(%r1)\n"
685 "   std %r14,  56(%r1)\n"
686 "   std %r15,  64(%r1)\n"
687 "   std %r16,  72(%r1)\n"
688 "   std %r17,  80(%r1)\n"
689 "   std %r18,  88(%r1)\n"
690 "   std %r19,  96(%r1)\n"
691 "   std %r20, 104(%r1)\n"
692 "   std %r21, 112(%r1)\n"
693 "   std %r22, 120(%r1)\n"
694 "   std %r23, 128(%r1)\n"
695 "   std %r24, 136(%r1)\n"
696 "   std %r25, 144(%r1)\n"
697 "   std %r26, 152(%r1)\n"
698 "   std %r27, 160(%r1)\n"
699 "   std %r28, 168(%r1)\n"
700 "   std %r29, 176(%r1)\n"
701 "   std %r30, 184(%r1)\n"
702 "   std %r31, 192(%r1)\n"
703 
704 // r31 (guest state ptr) := global var "gp"
705 "   lis    %r31,gp@highest\n"
706 "   ori    %r31,%r31,gp@higher\n"
707 "   rldicr %r31,%r31,32,31\n"
708 "   oris   %r31,%r31,gp@h\n"
709 "   ori    %r31,%r31,gp@l\n"
710 "   ld     %r31,0(%r31)\n"
711 
712 // call translation address in global var "f"
713 "   lis    %r4,f@highest\n"
714 "   ori    %r4,%r4,f@higher\n"
715 "   rldicr %r4,%r4,32,31\n"
716 "   oris   %r4,%r4,f@h\n"
717 "   ori    %r4,%r4,f@l\n"
718 "   ld     %r4,0(%r4)\n"
719 "   mtctr  %r4\n"
720 "   bctrl\n"
721 
722 // save return value (in r3) into global var "res"
723 "   lis    %r5,res@highest\n"
724 "   ori    %r5,%r5,res@higher\n"
725 "   rldicr %r5,%r5,32,31\n"
726 "   oris   %r5,%r5,res@h\n"
727 "   ori    %r5,%r5,res@l\n"
728 "   std    %r3,0(%r5)\n"
729 
730 // save possibly modified guest state ptr (r31) in "gp"
731 "   lis    %r5,gp@highest\n"
732 "   ori    %r5,%r5,gp@higher\n"
733 "   rldicr %r5,%r5,32,31\n"
734 "   oris   %r5,%r5,gp@h\n"
735 "   ori    %r5,%r5,gp@l\n"
736 "   std    %r31,0(%r5)\n"
737 
738 // reload registers from stack
739 "   ld %r13,  48(%r1)\n"
740 "   ld %r14,  56(%r1)\n"
741 "   ld %r15,  64(%r1)\n"
742 "   ld %r16,  72(%r1)\n"
743 "   ld %r17,  80(%r1)\n"
744 "   ld %r18,  88(%r1)\n"
745 "   ld %r19,  96(%r1)\n"
746 "   ld %r20, 104(%r1)\n"
747 "   ld %r21, 112(%r1)\n"
748 "   ld %r22, 120(%r1)\n"
749 "   ld %r23, 128(%r1)\n"
750 "   ld %r24, 136(%r1)\n"
751 "   ld %r25, 144(%r1)\n"
752 "   ld %r26, 152(%r1)\n"
753 "   ld %r27, 160(%r1)\n"
754 "   ld %r28, 168(%r1)\n"
755 "   ld %r29, 176(%r1)\n"
756 "   ld %r30, 184(%r1)\n"
757 "   ld %r31, 192(%r1)\n"
758 
759 // restore previous stack pointer
760 "   addi %r1,%r1,256\n"
761 
762 // restore LR,CTR
763 "   ld    %r0,16(%r1)\n"
764 "   mtlr  %r0\n"
765 "   ld    %r0,8(%r1)\n"
766 "   mtctr %r0\n"
767 
768 // return
769 "   blr"
770 );
771 #endif
772 
773 #else
774 
775 #   error "Unknown arch"
776 #endif
777 
778 /* Run a translation at host address 'translation'.  Return
779    True if Vex asked for an translation cache flush as a result.
780 */
run_translation(HWord translation)781 Bool run_translation ( HWord translation )
782 {
783    if (0 && DEBUG_TRACE_FLAGS) {
784       printf(" run translation %p\n", (void*)translation );
785       printf(" simulated bb: %llu\n", n_bbs_done);
786    }
787    f = translation;
788    gp = (HWord)&gst;
789    run_translation_asm();
790    gst.GuestPC = res;
791    n_bbs_done ++;
792    return gp==VEX_TRC_JMP_TINVAL;
793 }
794 
find_translation(Addr64 guest_addr)795 HWord find_translation ( Addr64 guest_addr )
796 {
797    Int i;
798    HWord __res;
799    if (0)
800       printf("find translation %p ... ", ULong_to_Ptr(guest_addr));
801    for (i = 0; i < trans_table_used; i++)
802      if (trans_table[i].base[0] == guest_addr)
803         break;
804    if (i == trans_table_used) {
805       if (0) printf("none\n");
806       return 0; /* not found */
807    }
808 
809    /* Move this translation one step towards the front, so finding it
810       next time round is just that little bit cheaper. */
811    if (i > 2) {
812       VexGuestExtents tmpE = trans_table[i-1];
813       ULong*          tmpP = trans_tableP[i-1];
814       trans_table[i-1]  = trans_table[i];
815       trans_tableP[i-1] = trans_tableP[i];
816       trans_table[i] = tmpE;
817       trans_tableP[i] = tmpP;
818       i--;
819    }
820 
821    __res = (HWord)trans_tableP[i];
822    if (0) printf("%p\n", (void*)__res);
823    return __res;
824 }
825 
826 #define N_TRANSBUF 5000
827 static UChar transbuf[N_TRANSBUF];
make_translation(Addr64 guest_addr,Bool verbose)828 void make_translation ( Addr64 guest_addr, Bool verbose )
829 {
830    VexTranslateArgs   vta;
831    VexTranslateResult tres;
832    VexArchInfo vex_archinfo;
833    Int trans_used, i, ws_needed;
834 
835    if (trans_table_used >= N_TRANS_TABLE
836        || trans_cache_used >= N_TRANS_CACHE-1000) {
837       /* If things are looking to full, just dump
838          all the translations. */
839       trans_cache_used = 0;
840       trans_table_used = 0;
841    }
842 
843    assert(trans_table_used < N_TRANS_TABLE);
844    if (0)
845       printf("make translation %p\n", ULong_to_Ptr(guest_addr));
846 
847    LibVEX_default_VexArchInfo(&vex_archinfo);
848    vex_archinfo.subarch = VexSubArch;
849    vex_archinfo.ppc_cache_line_szB = CacheLineSize;
850 
851    /* */
852    vta.arch_guest       = VexArch;
853    vta.archinfo_guest   = vex_archinfo;
854    vta.arch_host        = VexArch;
855    vta.archinfo_host    = vex_archinfo;
856    vta.guest_bytes      = (UChar*)ULong_to_Ptr(guest_addr);
857    vta.guest_bytes_addr = (Addr64)guest_addr;
858    vta.guest_bytes_addr_noredir = (Addr64)guest_addr;
859    vta.chase_into_ok    = chase_into_ok;
860 //   vta.guest_extents    = &vge;
861    vta.guest_extents    = &trans_table[trans_table_used];
862    vta.host_bytes       = transbuf;
863    vta.host_bytes_size  = N_TRANSBUF;
864    vta.host_bytes_used  = &trans_used;
865    vta.instrument1      = NULL;
866    vta.instrument2      = NULL;
867    vta.do_self_check    = False;
868    vta.traceflags       = verbose ? TEST_FLAGS : DEBUG_TRACE_FLAGS;
869    vta.dispatch         = NULL;
870 
871    tres = LibVEX_Translate ( &vta );
872 
873    assert(tres == VexTransOK);
874    ws_needed = (trans_used+7) / 8;
875    assert(ws_needed > 0);
876    assert(trans_cache_used + ws_needed < N_TRANS_CACHE);
877    n_translations_made++;
878 
879    for (i = 0; i < trans_used; i++) {
880       HChar* dst = ((HChar*)(&trans_cache[trans_cache_used])) + i;
881       HChar* src = (HChar*)(&transbuf[i]);
882       *dst = *src;
883    }
884 
885 #if defined(__powerpc__)
886    invalidate_icache( &trans_cache[trans_cache_used], trans_used );
887 #endif
888 
889    trans_tableP[trans_table_used] = &trans_cache[trans_cache_used];
890    trans_table_used++;
891    trans_cache_used += ws_needed;
892 }
893 
894 
overlap(Addr64 start,UInt len,VexGuestExtents * vge)895 static Bool overlap ( Addr64 start, UInt len, VexGuestExtents* vge )
896 {
897    Int i;
898    for (i = 0; i < vge->n_used; i++) {
899      if (vge->base[i]+vge->len[i] <= start
900          || vge->base[i] >= start+len) {
901        /* ok */
902      } else {
903         return True;
904      }
905    }
906    return False; /* no overlap */
907 }
908 
dump_translations(Addr64 start,UInt len)909 static void dump_translations ( Addr64 start, UInt len )
910 {
911    Int i, j;
912    j = 0;
913    for (i = 0; i < trans_table_used; i++) {
914       if (overlap(start, len, &trans_table[i])) {
915          /* do nothing */
916       } else {
917          assert(j <= i);
918          trans_table[j] = trans_table[i];
919          trans_tableP[j] = trans_tableP[i];
920 	 j++;
921       }
922    }
923    assert(j >= 0 && j <= trans_table_used);
924    if (0) printf("dumped %d translations\n", trans_table_used - j);
925    trans_table_used = j;
926 }
927 
928 
929 static ULong  stopAfter = 0;
930 static UChar* entryP    = NULL;
931 
932 
933 __attribute__ ((noreturn))
934 static
failure_exit(void)935 void failure_exit ( void )
936 {
937    fprintf(stdout, "VEX did failure_exit.  Bye.\n");
938    fprintf(stdout, "bb counter = %llu\n\n", n_bbs_done);
939    exit(1);
940 }
941 
942 static
log_bytes(HChar * bytes,Int nbytes)943 void log_bytes ( HChar* bytes, Int nbytes )
944 {
945    fwrite ( bytes, 1, nbytes, stdout );
946    fflush ( stdout );
947 }
948 
949 
950 /* run simulated code forever (it will exit by calling
951    serviceFn(0)). */
run_simulator(void)952 static void run_simulator ( void )
953 {
954    static Addr64 last_guest = 0;
955    Addr64 next_guest;
956    HWord next_host;
957    Bool need_inval;
958    while (1) {
959       next_guest = gst.GuestPC;
960 
961       if (0)
962          printf("\nnext_guest: 0x%x\n", (UInt)next_guest);
963 
964 #if defined(__powerpc64__)
965       if (next_guest == Ptr_to_ULong( (void*)(*(ULong*)(&serviceFn)) )) {
966 #else
967       if (next_guest == Ptr_to_ULong(&serviceFn)) {
968 #endif
969          /* "do" the function call to serviceFn */
970 #        if defined(__i386__)
971          {
972             HWord esp = gst.guest_ESP;
973             gst.guest_EIP = *(UInt*)(esp+0);
974             gst.guest_EAX = serviceFn( *(UInt*)(esp+4), *(UInt*)(esp+8) );
975             gst.guest_ESP = esp+4;
976             next_guest = gst.guest_EIP;
977          }
978 #        elif defined(__x86_64__)
979          {
980             HWord esp = gst.guest_RSP;
981             gst.guest_RIP = *(UInt*)(esp+0);
982             gst.guest_RAX = serviceFn( gst.guest_RDI, gst.guest_RSI );
983             gst.guest_RSP = esp+8;
984             next_guest = gst.guest_RIP;
985          }
986 #        elif defined(__powerpc__)
987          {
988             gst.guest_GPR3 = serviceFn( gst.guest_GPR3, gst.guest_GPR4 );
989             gst.guest_CIA  = gst.guest_LR;
990             next_guest     = gst.guest_CIA;
991          }
992 #        else
993 #        error "Unknown arch"
994 #        endif
995       }
996 
997       next_host = find_translation(next_guest);
998       if (next_host == 0) {
999          make_translation(next_guest,False);
1000          next_host = find_translation(next_guest);
1001          assert(next_host != 0);
1002       }
1003 
1004       // Switchback
1005       if (n_bbs_done == stopAfter) {
1006          printf("---begin SWITCHBACK at bb:%llu---\n", n_bbs_done);
1007 #if 1
1008          if (last_guest) {
1009             printf("\n*** Last run translation (bb:%llu):\n", n_bbs_done-1);
1010             make_translation(last_guest,True);
1011          }
1012 #endif
1013 #if 0
1014          if (next_guest) {
1015             printf("\n*** Current translation (bb:%llu):\n", n_bbs_done);
1016             make_translation(next_guest,True);
1017          }
1018 #endif
1019          printf("---  end SWITCHBACK at bb:%llu ---\n", n_bbs_done);
1020          switchback();
1021          assert(0); /*NOTREACHED*/
1022       }
1023 
1024       last_guest = next_guest;
1025       need_inval = run_translation(next_host);
1026       if (need_inval) {
1027 #if defined(__powerpc__)
1028          dump_translations( (Addr64)gst.guest_TISTART, gst.guest_TILEN );
1029 	 if (0) printf("dump translations done\n");
1030 #endif
1031       }
1032    }
1033 }
1034 
1035 
1036 static void usage ( void )
1037 {
1038    printf("usage: switchback #bbs\n");
1039    printf("   - begins switchback for basic block #bbs\n");
1040    printf("   - use -1 for largest possible run without switchback\n\n");
1041    exit(1);
1042 }
1043 
1044 #if defined(__powerpc__)
1045 
1046 #if !defined(__powerpc64__) // ppc32
1047 UInt saved_R2;
1048 asm(
1049 "get_R2:\n"
1050 "   lis  %r10,saved_R2@ha\n"
1051 "   stw  %r2,saved_R2@l(%r10)\n"
1052 "   blr\n"
1053 );
1054 #else // ppc64
1055 ULong saved_R2;
1056 ULong saved_R13;
1057 asm(
1058 ".text\n"
1059 "   .global get_R2\n"
1060 "   .section \".opd\",\"aw\"\n"
1061 "   .align 3\n"
1062 "get_R2:\n"
1063 "   .quad .get_R2,.TOC.@tocbase,0\n"
1064 "   .previous\n"
1065 "   .type .get_R2,@function\n"
1066 "   .global  .get_R2\n"
1067 ".get_R2:\n"
1068 "   lis    %r10,saved_R2@highest\n"
1069 "   ori    %r10,%r10,saved_R2@higher\n"
1070 "   rldicr %r10,%r10,32,31\n"
1071 "   oris   %r10,%r10,saved_R2@h\n"
1072 "   ori    %r10,%r10,saved_R2@l\n"
1073 "   std    %r2,0(%r10)\n"
1074 "   blr\n"
1075 );
1076 asm(
1077 ".text\n"
1078 "   .global get_R13\n"
1079 "   .section \".opd\",\"aw\"\n"
1080 "   .align 3\n"
1081 "get_R13:\n"
1082 "   .quad .get_R13,.TOC.@tocbase,0\n"
1083 "   .previous\n"
1084 "   .type .get_R13,@function\n"
1085 "   .global  .get_R13\n"
1086 ".get_R13:\n"
1087 "   lis    %r10,saved_R13@highest\n"
1088 "   ori    %r10,%r10,saved_R13@higher\n"
1089 "   rldicr %r10,%r10,32,31\n"
1090 "   oris   %r10,%r10,saved_R13@h\n"
1091 "   ori    %r10,%r10,saved_R13@l\n"
1092 "   std    %r13,0(%r10)\n"
1093 "   blr\n"
1094 );
1095 #endif
1096 extern void get_R2 ( void );
1097 extern void get_R13 ( void );
1098 #endif
1099 
1100 int main ( Int argc, HChar** argv )
1101 {
1102    if (argc != 2)
1103       usage();
1104 
1105    stopAfter = (ULong)atoll(argv[1]);
1106 
1107    extern void entry ( void*(*service)(int,int) );
1108    entryP = (UChar*)&entry;
1109 
1110    if (!entryP) {
1111       printf("switchback: can't find entry point\n");
1112       exit(1);
1113    }
1114 
1115    LibVEX_default_VexControl(&vcon);
1116    vcon.guest_max_insns=50;
1117    vcon.guest_chase_thresh=0;
1118    vcon.iropt_level=2;
1119 
1120    LibVEX_Init( failure_exit, log_bytes, 1, False, &vcon );
1121    LibVEX_Guest_initialise(&gst);
1122 
1123    /* set up as if a call to the entry point passing serviceFn as
1124       the one and only parameter */
1125 #  if defined(__i386__)
1126    gst.guest_EIP = (UInt)entryP;
1127    gst.guest_ESP = (UInt)&gstack[25000];
1128    *(UInt*)(gst.guest_ESP+4) = (UInt)serviceFn;
1129    *(UInt*)(gst.guest_ESP+0) = 0x12345678;
1130 #  elif defined(__x86_64__)
1131    gst.guest_RIP = (ULong)entryP;
1132    gst.guest_RSP = (ULong)&gstack[25000];
1133    gst.guest_RDI = (ULong)serviceFn;
1134    *(ULong*)(gst.guest_RSP+0) = 0x12345678AABBCCDDULL;
1135 #  elif defined(__powerpc__)
1136    get_R2();
1137 
1138 #if !defined(__powerpc64__) // ppc32
1139    gst.guest_CIA   = (UInt)entryP;
1140    gst.guest_GPR1  = (UInt)&gstack[25000]; /* stack pointer */
1141    gst.guest_GPR3  = (UInt)serviceFn; /* param to entry */
1142    gst.guest_GPR2  = saved_R2;
1143    gst.guest_LR    = 0x12345678; /* bogus return address */
1144 #else // ppc64
1145    get_R13();
1146    gst.guest_CIA   = * (ULong*)entryP;
1147    gst.guest_GPR1  = (ULong)&gstack[25000]; /* stack pointer */
1148    gst.guest_GPR3  = (ULong)serviceFn;      /* param to entry */
1149    gst.guest_GPR2  = saved_R2;
1150    gst.guest_GPR13 = saved_R13;
1151    gst.guest_LR    = 0x1234567812345678ULL; /* bogus return address */
1152 //   printf("setting CIA to %p\n", (void*)gst.guest_CIA);
1153 #endif
1154 
1155 #  else
1156 #  error "Unknown arch"
1157 #  endif
1158 
1159    printf("\n---START---\n");
1160 
1161 #if 1
1162    run_simulator();
1163 #else
1164    ( (void(*)(HWord(*)(HWord,HWord))) entryP ) (serviceFn);
1165 #endif
1166 
1167 
1168    return 0;
1169 }
1170