1 2/*--------------------------------------------------------------------*/ 3/*--- The core dispatch loop, for jumping to a code address. ---*/ 4/*--- dispatch-ppc64-aix5.S ---*/ 5/*--------------------------------------------------------------------*/ 6 7/* 8 This file is part of Valgrind, a dynamic binary instrumentation 9 framework. 10 11 Copyright (C) 2006-2010 OpenWorks LLP 12 info@open-works.co.uk 13 14 This program is free software; you can redistribute it and/or 15 modify it under the terms of the GNU General Public License as 16 published by the Free Software Foundation; either version 2 of the 17 License, or (at your option) any later version. 18 19 This program is distributed in the hope that it will be useful, but 20 WITHOUT ANY WARRANTY; without even the implied warranty of 21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 22 General Public License for more details. 23 24 You should have received a copy of the GNU General Public License 25 along with this program; if not, write to the Free Software 26 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 27 02111-1307, USA. 28 29 The GNU General Public License is contained in the file COPYING. 30 31 Neither the names of the U.S. Department of Energy nor the 32 University of California nor the names of its contributors may be 33 used to endorse or promote products derived from this software 34 without prior written permission. 35*/ 36 37#if defined(VGP_ppc64_aix5) 38 39#include "pub_core_basics_asm.h" 40#include "pub_core_dispatch_asm.h" 41#include "pub_core_transtab_asm.h" 42#include "libvex_guest_offsets.h" /* for OFFSET_ppc64_CIA */ 43 44 45/*------------------------------------------------------------*/ 46/*--- ---*/ 47/*--- The dispatch loop. VG_(run_innerloop) is used to ---*/ 48/*--- run all translations except no-redir ones. ---*/ 49/*--- ---*/ 50/*------------------------------------------------------------*/ 51 52/*----------------------------------------------------*/ 53/*--- Incomprehensible TOC mumbo-jumbo nonsense. ---*/ 54/*----------------------------------------------------*/ 55 56/* No, I don't have a clue either. I just compiled a bit of 57 C with gcc and copied the assembly code it produced. */ 58 59/* Basically "ld rd, tocent__foo(2)" gets &foo into rd. */ 60 61 .file "dispatch-ppc64-aix5.S" 62 .machine "ppc64" 63 .toc 64 .csect .text[PR] 65 .toc 66tocent__vgPlain_dispatch_ctr: 67 .tc vgPlain_dispatch_ctr[TC],vgPlain_dispatch_ctr[RW] 68tocent__vgPlain_machine_ppc64_has_VMX: 69 .tc vgPlain_machine_ppc64_has_VMX[TC],vgPlain_machine_ppc64_has_VMX[RW] 70tocent__vgPlain_tt_fast: 71 .tc vgPlain_tt_fast[TC],vgPlain_tt_fast[RW] 72tocent__vgPlain_tt_fastN: 73 .tc vgPlain_tt_fast[TC],vgPlain_tt_fastN[RW] 74 .csect .text[PR] 75 .align 2 76 .globl vgPlain_run_innerloop 77 .globl .vgPlain_run_innerloop 78 .csect vgPlain_run_innerloop[DS] 79vgPlain_run_innerloop: 80 .llong .vgPlain_run_innerloop, TOC[tc0], 0 81 .csect .text[PR] 82 83/*----------------------------------------------------*/ 84/*--- Preamble (set everything up) ---*/ 85/*----------------------------------------------------*/ 86 87/* signature: 88UWord VG_(run_innerloop) ( void* guest_state, UWord do_profiling ); 89*/ 90.vgPlain_run_innerloop: 91 92 /* r3 holds guest_state */ 93 /* r4 holds do_profiling */ 94 /* Rather than attempt to make sense of the AIX ABI, just 95 drop r1 by 512 (to get away from the caller's frame), then 96 1024 (to give ourselves a 1024-byte save area), and then 97 another 512 (to clear our save area). In all, drop r1 by 2048 98 and dump stuff on the stack at 512(1)..1536(1). */ 99 100 /* ----- entry point to VG_(run_innerloop) ----- */ 101 /* For AIX/ppc64 we do: LR-> +16(parent_sp), CR-> +8(parent_sp) */ 102 103 /* Save lr and cr*/ 104 mflr 0 105 std 0,16(1) 106 mfcr 0 107 std 0,8(1) 108 109 /* New stack frame */ 110 stdu 1,-2048(1) /* sp should maintain 16-byte alignment */ 111 112 /* Save callee-saved registers... */ 113 /* r3, r4 are live here, so use r5 */ 114 115 /* Floating-point reg save area : 144 bytes at r1[256+256..256+399] */ 116 stfd 31,256+392(1) 117 stfd 30,256+384(1) 118 stfd 29,256+376(1) 119 stfd 28,256+368(1) 120 stfd 27,256+360(1) 121 stfd 26,256+352(1) 122 stfd 25,256+344(1) 123 stfd 24,256+336(1) 124 stfd 23,256+328(1) 125 stfd 22,256+320(1) 126 stfd 21,256+312(1) 127 stfd 20,256+304(1) 128 stfd 19,256+296(1) 129 stfd 18,256+288(1) 130 stfd 17,256+280(1) 131 stfd 16,256+272(1) 132 stfd 15,256+264(1) 133 stfd 14,256+256(1) 134 135 /* General reg save area : 76 bytes at r1[256+400 .. 256+543] */ 136 std 31,256+544(1) 137 std 30,256+536(1) 138 std 29,256+528(1) 139 std 28,256+520(1) 140 std 27,256+512(1) 141 std 26,256+504(1) 142 std 25,256+496(1) 143 std 24,256+488(1) 144 std 23,256+480(1) 145 std 22,256+472(1) 146 std 21,256+464(1) 147 std 20,256+456(1) 148 std 19,256+448(1) 149 std 18,256+440(1) 150 std 17,256+432(1) 151 std 16,256+424(1) 152 std 15,256+416(1) 153 std 14,256+408(1) 154 /* Probably not necessary to save r13 (thread-specific ptr), 155 as VEX stays clear of it... but what the hell. */ 156 std 13,256+400(1) 157 158 /* It's necessary to save/restore VRSAVE in the AIX / Darwin ABI. 159 The Linux kernel might not actually use VRSAVE for its intended 160 purpose, but it should be harmless to preserve anyway. */ 161 /* r3, r4 are live here, so use r5 */ 162 ld 5,tocent__vgPlain_machine_ppc64_has_VMX(2) 163 ld 5,0(5) 164 cmpldi 5,0 165 beq LafterVMX1 166 167// Sigh. AIX 5.2 has no idea that Altivec exists. 168// /* VRSAVE save word : 4 bytes at r1[476 .. 479] */ 169// mfspr 5,256 /* vrsave reg is spr number 256 */ 170// stw 5,476(1) 171// 172// /* Vector reg save area (quadword aligned): 173// 192 bytes at r1[480 .. 671] */ 174// li 5,656 175// stvx 31,5,1 176// li 5,640 177// stvx 30,5,1 178// li 5,624 179// stvx 29,5,1 180// li 5,608 181// stvx 28,5,1 182// li 5,592 183// stvx 27,5,1 184// li 5,576 185// stvx 26,5,1 186// li 5,560 187// stvx 25,5,1 188// li 5,544 189// stvx 25,5,1 190// li 5,528 191// stvx 23,5,1 192// li 5,512 193// stvx 22,5,1 194// li 5,496 195// stvx 21,5,1 196// li 5,480 197// stvx 20,5,1 198LafterVMX1: 199 200 /* Local variable space... */ 201 /* Put the original guest state pointer at r1[256]. We 202 will need to refer to it each time round the dispatch loop. 203 Apart from that, we can use r1[0 .. 255] and r1[264 .. 511] 204 as scratch space. */ 205 206 /* r3 holds guest_state */ 207 /* r4 holds do_profiling */ 208 mr 31,3 /* r31 (generated code gsp) = r3 */ 209 std 3,256(1) /* stash orig guest_state ptr */ 210 211 /* hold dispatch_ctr (NOTE: 32-bit value) in r29 */ 212 ld 5,tocent__vgPlain_dispatch_ctr(2) 213 lwz 29,0(5) /* 32-bit zero-extending load */ 214 215 /* set host FPU control word to the default mode expected 216 by VEX-generated code. See comments in libvex.h for 217 more info. */ 218 /* get zero into f3 (tedious) */ 219 /* note: fsub 3,3,3 is not a reliable way to do this, 220 since if f3 holds a NaN or similar then we don't necessarily 221 wind up with zero. */ 222 li 5,0 223 std 5,128(1) /* r1[128] is scratch */ 224 lfd 3,128(1) 225 mtfsf 0xFF,3 /* fpscr = f3 */ 226 227 /* set host AltiVec control word to the default mode expected 228 by VEX-generated code. */ 229 ld 5,tocent__vgPlain_machine_ppc64_has_VMX(2) 230 ld 5,0(5) 231 cmpldi 5,0 232 beq LafterVMX2 233 234// Sigh. AIX 5.2 has no idea that Altivec exists. 235// vspltisw 3,0x0 /* generate zero */ 236// mtvscr 3 237LafterVMX2: 238 239 /* fetch %CIA into r3 */ 240 ld 3,OFFSET_ppc64_CIA(31) 241 242 /* fall into main loop (the right one) */ 243 /* r4 = do_profiling. It's probably trashed after here, 244 but that's OK: we don't need it after here. */ 245 cmpldi 4,0 246 beq VG_(run_innerloop__dispatch_unprofiled) 247 b VG_(run_innerloop__dispatch_profiled) 248 /*NOTREACHED*/ 249 250/*----------------------------------------------------*/ 251/*--- NO-PROFILING (standard) dispatcher ---*/ 252/*----------------------------------------------------*/ 253 254.globl VG_(run_innerloop__dispatch_unprofiled) 255VG_(run_innerloop__dispatch_unprofiled): 256 /* At entry: Live regs: 257 r1 (=sp) 258 r3 (=CIA = next guest address) 259 r29 (=dispatch_ctr) 260 r31 (=guest_state) 261 Stack state: 262 256(r1) (=orig guest_state) 263 */ 264 265 /* Has the guest state pointer been messed with? If yes, exit. */ 266 ld 5,256(1) /* original guest_state ptr */ 267 cmpd 5,31 268 ld 5,tocent__vgPlain_tt_fast(2) /* &VG_(tt_fast) */ 269 bne gsp_changed 270 271 /* save the jump address in the guest state */ 272 std 3,OFFSET_ppc64_CIA(31) 273 274 /* Are we out of timeslice? If yes, defer to scheduler. */ 275 addi 29,29,-1 276 cmplwi 29,0 /* yes, lwi - is 32-bit */ 277 beq counter_is_zero 278 279 /* try a fast lookup in the translation cache */ 280 /* r4 = VG_TT_FAST_HASH(addr) * sizeof(FastCacheEntry) 281 = ((r3 >>u 2) & VG_TT_FAST_MASK) << 4 */ 282 rldicl 4,3, 62, 64-VG_TT_FAST_BITS /* entry# */ 283 sldi 4,4,4 /* entry# * sizeof(FastCacheEntry) */ 284 add 5,5,4 /* &VG_(tt_fast)[entry#] */ 285 ld 6,0(5) /* .guest */ 286 ld 7,8(5) /* .host */ 287 cmpd 3,6 288 bne fast_lookup_failed 289 290 /* Found a match. Call .host. */ 291 mtctr 7 292 bctrl 293 294 /* On return from guest code: 295 r3 holds destination (original) address. 296 r31 may be unchanged (guest_state), or may indicate further 297 details of the control transfer requested to *r3. 298 */ 299 /* start over */ 300 b VG_(run_innerloop__dispatch_unprofiled) 301 /*NOTREACHED*/ 302 303/*----------------------------------------------------*/ 304/*--- PROFILING dispatcher (can be much slower) ---*/ 305/*----------------------------------------------------*/ 306 307.globl VG_(run_innerloop__dispatch_profiled) 308VG_(run_innerloop__dispatch_profiled): 309 /* At entry: Live regs: 310 r1 (=sp) 311 r3 (=CIA = next guest address) 312 r29 (=dispatch_ctr) 313 r31 (=guest_state) 314 Stack state: 315 256(r1) (=orig guest_state) 316 */ 317 318 /* Has the guest state pointer been messed with? If yes, exit. */ 319 ld 5,256(1) /* original guest_state ptr */ 320 cmpd 5,31 321 ld 5,tocent__vgPlain_tt_fast(2) /* &VG_(tt_fast) */ 322 bne gsp_changed 323 324 /* save the jump address in the guest state */ 325 std 3,OFFSET_ppc64_CIA(31) 326 327 /* Are we out of timeslice? If yes, defer to scheduler. */ 328 addi 29,29,-1 329 cmplwi 29,0 /* yes, lwi - is 32-bit */ 330 beq counter_is_zero 331 332 /* try a fast lookup in the translation cache */ 333 /* r4 = VG_TT_FAST_HASH(addr) * sizeof(FastCacheEntry) 334 = ((r3 >>u 2) & VG_TT_FAST_MASK) << 4 */ 335 rldicl 4,3, 62, 64-VG_TT_FAST_BITS /* entry# */ 336 sldi 4,4,4 /* entry# * sizeof(FastCacheEntry) */ 337 add 5,5,4 /* &VG_(tt_fast)[entry#] */ 338 ld 6,0(5) /* .guest */ 339 ld 7,8(5) /* .host */ 340 cmpd 3,6 341 bne fast_lookup_failed 342 343 /* increment bb profile counter */ 344 ld 9,tocent__vgPlain_tt_fastN(2) /* r9 = &tt_fastN */ 345 srdi 4,4,1 /* entry# * sizeof(UInt*) */ 346 ldx 8,9,4 /* r7 = tt_fastN[r4] */ 347 lwz 10,0(8) 348 addi 10,10,1 349 stw 10,0(8) 350 351 /* Found a match. Call .host. */ 352 mtctr 7 353 bctrl 354 355 /* On return from guest code: 356 r3 holds destination (original) address. 357 r31 may be unchanged (guest_state), or may indicate further 358 details of the control transfer requested to *r3. 359 */ 360 /* start over */ 361 b VG_(run_innerloop__dispatch_profiled) 362 /*NOTREACHED*/ 363 364/*----------------------------------------------------*/ 365/*--- exit points ---*/ 366/*----------------------------------------------------*/ 367 368gsp_changed: 369 /* Someone messed with the gsp (in r31). Have to 370 defer to scheduler to resolve this. dispatch ctr 371 is not yet decremented, so no need to increment. */ 372 /* %CIA is NOT up to date here. First, need to write 373 %r3 back to %CIA, but without trashing %r31 since 374 that holds the value we want to return to the scheduler. 375 Hence use %r5 transiently for the guest state pointer. */ 376 ld 5,256(1) /* original guest_state ptr */ 377 std 3,OFFSET_ppc64_CIA(5) 378 mr 3,31 /* r3 = new gsp value */ 379 b run_innerloop_exit 380 /*NOTREACHED*/ 381 382counter_is_zero: 383 /* %CIA is up to date */ 384 /* back out decrement of the dispatch counter */ 385 addi 29,29,1 386 li 3,VG_TRC_INNER_COUNTERZERO 387 b run_innerloop_exit 388 389fast_lookup_failed: 390 /* %CIA is up to date */ 391 /* back out decrement of the dispatch counter */ 392 addi 29,29,1 393 li 3,VG_TRC_INNER_FASTMISS 394 b run_innerloop_exit 395 396 397 398/* All exits from the dispatcher go through here. 399 r3 holds the return value. 400*/ 401run_innerloop_exit: 402 /* We're leaving. Check that nobody messed with 403 VSCR or FPSCR. */ 404 405 /* Set fpscr back to a known state, since vex-generated code 406 may have messed with fpscr[rm]. */ 407 li 5,0 408 std 5,128(1) /* r1[128] is scratch */ 409 lfd 3,128(1) 410 mtfsf 0xFF,3 /* fpscr = f3 */ 411 412 /* Using r11 - value used again further on, so don't trash! */ 413 ld 11,tocent__vgPlain_machine_ppc64_has_VMX(2) 414 ld 11,0(11) 415 cmpldi 11,0 416 beq LafterVMX8 417 418// Sigh. AIX 5.2 has no idea that Altivec exists. 419// /* Check VSCR[NJ] == 1 */ 420// /* first generate 4x 0x00010000 */ 421// vspltisw 4,0x1 /* 4x 0x00000001 */ 422// vspltisw 5,0x0 /* zero */ 423// vsldoi 6,4,5,0x2 /* <<2*8 => 4x 0x00010000 */ 424// /* retrieve VSCR and mask wanted bits */ 425// mfvscr 7 426// vand 7,7,6 /* gives NJ flag */ 427// vspltw 7,7,0x3 /* flags-word to all lanes */ 428// vcmpequw. 8,6,7 /* CR[24] = 1 if v6 == v7 */ 429// bt 24,invariant_violation /* branch if all_equal */ 430LafterVMX8: 431 432 /* otherwise we're OK */ 433 b run_innerloop_exit_REALLY 434 435 436invariant_violation: 437 li 3,VG_TRC_INVARIANT_FAILED 438 b run_innerloop_exit_REALLY 439 440run_innerloop_exit_REALLY: 441 /* r3 holds VG_TRC_* value to return */ 442 443 /* Write ctr to VG(dispatch_ctr) */ 444 ld 5,tocent__vgPlain_dispatch_ctr(2) 445 stw 29,0(5) /* yes, really stw */ 446 447 /* Restore callee-saved registers... */ 448 449 /* Floating-point regs */ 450 lfd 31,256+392(1) 451 lfd 30,256+384(1) 452 lfd 29,256+376(1) 453 lfd 28,256+368(1) 454 lfd 27,256+360(1) 455 lfd 26,256+352(1) 456 lfd 25,256+344(1) 457 lfd 24,256+336(1) 458 lfd 23,256+328(1) 459 lfd 22,256+320(1) 460 lfd 21,256+312(1) 461 lfd 20,256+304(1) 462 lfd 19,256+296(1) 463 lfd 18,256+288(1) 464 lfd 17,256+280(1) 465 lfd 16,256+272(1) 466 lfd 15,256+264(1) 467 lfd 14,256+256(1) 468 469 /* General regs */ 470 ld 31,256+544(1) 471 ld 30,256+536(1) 472 ld 29,256+528(1) 473 ld 28,256+520(1) 474 ld 27,256+512(1) 475 ld 26,256+504(1) 476 ld 25,256+496(1) 477 ld 24,256+488(1) 478 ld 23,256+480(1) 479 ld 22,256+472(1) 480 ld 21,256+464(1) 481 ld 20,256+456(1) 482 ld 19,256+448(1) 483 ld 18,256+440(1) 484 ld 17,256+432(1) 485 ld 16,256+424(1) 486 ld 15,256+416(1) 487 ld 14,256+408(1) 488 ld 13,256+400(1) 489 490 /* r11 already holds VG_(machine_ppc64_has_VMX) value */ 491 cmpldi 11,0 492 beq LafterVMX9 493 494// Sigh. AIX 5.2 has no idea that Altivec exists. 495// /* VRSAVE */ 496// lwz 4,476(1) 497// mtspr 4,256 /* VRSAVE reg is spr number 256 */ 498// 499// /* Vector regs */ 500// li 4,656 501// lvx 31,4,1 502// li 4,640 503// lvx 30,4,1 504// li 4,624 505// lvx 29,4,1 506// li 4,608 507// lvx 28,4,1 508// li 4,592 509// lvx 27,4,1 510// li 4,576 511// lvx 26,4,1 512// li 4,560 513// lvx 25,4,1 514// li 4,544 515// lvx 24,4,1 516// li 4,528 517// lvx 23,4,1 518// li 4,512 519// lvx 22,4,1 520// li 4,496 521// lvx 21,4,1 522// li 4,480 523// lvx 20,4,1 524LafterVMX9: 525 526 /* r3 is live here; don't trash it */ 527 /* restore lr,cr,sp */ 528 addi 4,1,2048 /* r4 = old SP */ 529 ld 0,16(4) 530 mtlr 0 531 ld 0,8(4) 532 mtcr 0 533 mr 1,4 534 blr 535 536LT..vgPlain_run_innerloop: 537 .long 0 538 .byte 0,0,32,64,0,0,1,0 539 .long 0 540 .long LT..vgPlain_run_innerloop-.vgPlain_run_innerloop 541 .short 3 542 .byte "vgPlain_run_innerloop" 543 .align 2 544_section_.text: 545 .csect .data[RW],3 546 .llong _section_.text 547 548/*------------------------------------------------------------*/ 549/*--- ---*/ 550/*--- A special dispatcher, for running no-redir ---*/ 551/*--- translations. Just runs the given translation once. ---*/ 552/*--- ---*/ 553/*------------------------------------------------------------*/ 554 555/* signature: 556void VG_(run_a_noredir_translation) ( UWord* argblock ); 557*/ 558 559/* Run a no-redir translation. argblock points to 4 UWords, 2 to carry args 560 and 2 to carry results: 561 0: input: ptr to translation 562 1: input: ptr to guest state 563 2: output: next guest PC 564 3: output: guest state pointer afterwards (== thread return code) 565*/ 566.csect .text[PR] 567.align 2 568.globl .VG_(run_a_noredir_translation) 569.VG_(run_a_noredir_translation): 570 /* Rather than attempt to make sense of the AIX ABI, just 571 drop r1 by 512 (to get away from the caller's frame), then 572 1024 (to give ourselves a 1024-byte save area), and then 573 another 1024 (to clear our save area). In all, drop r1 by 2048 574 and dump stuff on the stack at 512(1)..1536(1). */ 575 /* At entry, r3 points to argblock */ 576 577 /* ----- entry point to VG_(run_innerloop) ----- */ 578 /* For AIX/ppc64 we do: LR-> +16(parent_sp), CR-> +8(parent_sp) */ 579 580 /* Save lr and cr*/ 581 mflr 0 582 std 0,16(1) 583 mfcr 0 584 std 0,8(1) 585 586 /* New stack frame */ 587 stdu 1,-2048(1) /* sp should maintain 16-byte alignment */ 588 589 /* General reg save area : 160 bytes at r1[512 .. 671] */ 590 std 31,664(1) 591 std 30,656(1) 592 std 29,648(1) 593 std 28,640(1) 594 std 27,632(1) 595 std 26,624(1) 596 std 25,616(1) 597 std 24,608(1) 598 std 23,600(1) 599 std 22,592(1) 600 std 21,584(1) 601 std 20,576(1) 602 std 19,568(1) 603 std 18,560(1) 604 std 17,552(1) 605 std 16,544(1) 606 std 15,536(1) 607 std 14,528(1) 608 std 13,520(1) 609 std 3,512(1) /* will need it later */ 610 611 ld 31,8(3) /* rd argblock[1] */ 612 ld 30,0(3) /* rd argblock[0] */ 613 mtlr 30 /* run translation */ 614 blrl 615 616 ld 4,512(1) /* &argblock */ 617 std 3, 16(4) /* wr argblock[2] */ 618 std 31,24(4) /* wr argblock[3] */ 619 620 /* General regs */ 621 ld 31,664(1) 622 ld 30,656(1) 623 ld 29,648(1) 624 ld 28,640(1) 625 ld 27,632(1) 626 ld 26,624(1) 627 ld 25,616(1) 628 ld 24,608(1) 629 ld 23,600(1) 630 ld 22,592(1) 631 ld 21,584(1) 632 ld 20,576(1) 633 ld 19,568(1) 634 ld 18,560(1) 635 ld 17,552(1) 636 ld 16,544(1) 637 ld 15,536(1) 638 ld 14,528(1) 639 ld 13,520(1) 640 641 /* restore lr,cr,sp */ 642 addi 4,1,2048 /* r4 = old SP */ 643 ld 0,16(4) 644 mtlr 0 645 ld 0,8(4) 646 mtcr 0 647 mr 1,4 648 blr 649 650#endif // defined(VGP_ppc64_aix5) 651 652/*--------------------------------------------------------------------*/ 653/*--- end ---*/ 654/*--------------------------------------------------------------------*/ 655