1 2/*--------------------------------------------------------------------*/ 3/*--- The core dispatch loop, for jumping to a code address. ---*/ 4/*--- dispatch-ppc32-linux.S ---*/ 5/*--------------------------------------------------------------------*/ 6 7/* 8 This file is part of Valgrind, a dynamic binary instrumentation 9 framework. 10 11 Copyright (C) 2005-2011 Cerion Armour-Brown <cerion@open-works.co.uk> 12 13 This program is free software; you can redistribute it and/or 14 modify it under the terms of the GNU General Public License as 15 published by the Free Software Foundation; either version 2 of the 16 License, or (at your option) any later version. 17 18 This program is distributed in the hope that it will be useful, but 19 WITHOUT ANY WARRANTY; without even the implied warranty of 20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 21 General Public License for more details. 22 23 You should have received a copy of the GNU General Public License 24 along with this program; if not, write to the Free Software 25 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 26 02111-1307, USA. 27 28 The GNU General Public License is contained in the file COPYING. 29*/ 30 31#if defined(VGP_ppc32_linux) 32 33#include "config.h" 34#include "pub_core_basics_asm.h" 35#include "pub_core_dispatch_asm.h" 36#include "pub_core_transtab_asm.h" 37#include "libvex_guest_offsets.h" /* for OFFSET_ppc32_CIA */ 38 39 40/*------------------------------------------------------------*/ 41/*--- ---*/ 42/*--- The dispatch loop. VG_(run_innerloop) is used to ---*/ 43/*--- run all translations except no-redir ones. ---*/ 44/*--- ---*/ 45/*------------------------------------------------------------*/ 46 47/*----------------------------------------------------*/ 48/*--- Preamble (set everything up) ---*/ 49/*----------------------------------------------------*/ 50 51/* signature: 52UWord VG_(run_innerloop) ( void* guest_state, UWord do_profiling ); 53*/ 54.text 55.globl VG_(run_innerloop) 56.type VG_(run_innerloop), @function 57VG_(run_innerloop): 58 /* r3 holds guest_state */ 59 /* r4 holds do_profiling */ 60 61 /* ----- entry point to VG_(run_innerloop) ----- */ 62 /* For Linux/ppc32 we need the SysV ABI, which uses 63 LR->4(parent_sp), CR->anywhere. 64 (The AIX ABI, used on Darwin, 65 uses LR->8(prt_sp), CR->4(prt_sp)) 66 */ 67 68 /* Save lr */ 69 mflr 0 70 stw 0,4(1) 71 72 /* New stack frame */ 73 stwu 1,-496(1) /* sp should maintain 16-byte alignment */ 74 75 /* Save callee-saved registers... */ 76 /* r3, r4 are live here, so use r5 */ 77 lis 5,VG_(machine_ppc32_has_FP)@ha 78 lwz 5,VG_(machine_ppc32_has_FP)@l(5) 79 cmplwi 5,0 80 beq LafterFP1 81 82 /* Floating-point reg save area : 144 bytes */ 83 stfd 31,488(1) 84 stfd 30,480(1) 85 stfd 29,472(1) 86 stfd 28,464(1) 87 stfd 27,456(1) 88 stfd 26,448(1) 89 stfd 25,440(1) 90 stfd 24,432(1) 91 stfd 23,424(1) 92 stfd 22,416(1) 93 stfd 21,408(1) 94 stfd 20,400(1) 95 stfd 19,392(1) 96 stfd 18,384(1) 97 stfd 17,376(1) 98 stfd 16,368(1) 99 stfd 15,360(1) 100 stfd 14,352(1) 101LafterFP1: 102 103 /* General reg save area : 72 bytes */ 104 stw 31,348(1) 105 stw 30,344(1) 106 stw 29,340(1) 107 stw 28,336(1) 108 stw 27,332(1) 109 stw 26,328(1) 110 stw 25,324(1) 111 stw 24,320(1) 112 stw 23,316(1) 113 stw 22,312(1) 114 stw 21,308(1) 115 stw 20,304(1) 116 stw 19,300(1) 117 stw 18,296(1) 118 stw 17,292(1) 119 stw 16,288(1) 120 stw 15,284(1) 121 stw 14,280(1) 122 /* Probably not necessary to save r13 (thread-specific ptr), 123 as VEX stays clear of it... but what the hey. */ 124 stw 13,276(1) 125 126 /* It's necessary to save/restore VRSAVE in the AIX / Darwin ABI. 127 The Linux kernel might not actually use VRSAVE for its intended 128 purpose, but it should be harmless to preserve anyway. */ 129 /* r3, r4 are live here, so use r5 */ 130 lis 5,VG_(machine_ppc32_has_VMX)@ha 131 lwz 5,VG_(machine_ppc32_has_VMX)@l(5) 132 cmplwi 5,0 133 beq LafterVMX1 134 135#ifdef HAS_ALTIVEC 136 /* VRSAVE save word : 32 bytes */ 137 mfspr 5,256 /* vrsave reg is spr number 256 */ 138 stw 5,244(1) 139 140 /* Alignment padding : 4 bytes */ 141 142 /* Vector reg save area (quadword aligned) : 192 bytes */ 143 li 5,224 144 stvx 31,5,1 145 li 5,208 146 stvx 30,5,1 147 li 5,192 148 stvx 29,5,1 149 li 5,176 150 stvx 28,5,1 151 li 5,160 152 stvx 27,5,1 153 li 5,144 154 stvx 26,5,1 155 li 5,128 156 stvx 25,5,1 157 li 5,112 158 stvx 25,5,1 159 li 5,96 160 stvx 23,5,1 161 li 5,80 162 stvx 22,5,1 163 li 5,64 164 stvx 21,5,1 165 li 5,48 166 stvx 20,5,1 167#endif 168 169LafterVMX1: 170 171 /* Save cr */ 172 mfcr 0 173 stw 0,44(1) 174 175 /* Local variable space... */ 176 177 /* 32(sp) used later to check FPSCR[RM] */ 178 179 /* r3 holds guest_state */ 180 /* r4 holds do_profiling */ 181 mr 31,3 /* r31 (generated code gsp) = r3 */ 182 stw 3,28(1) /* spill orig guest_state ptr */ 183 184 /* 24(sp) used later to stop ctr reg being clobbered */ 185 /* 20(sp) used later to load fpscr with zero */ 186 /* 8:16(sp) free */ 187 188 /* Linkage Area (reserved) 189 4(sp) : LR 190 0(sp) : back-chain 191 */ 192 193 /* CAB TODO: Use a caller-saved reg for orig guest_state ptr 194 - rem to set non-allocateable in isel.c */ 195 196 /* hold dispatch_ctr in r29 */ 197 lis 5,VG_(dispatch_ctr)@ha 198 lwz 29,VG_(dispatch_ctr)@l(5) 199 200 /* set host FPU control word to the default mode expected 201 by VEX-generated code. See comments in libvex.h for 202 more info. */ 203 lis 5,VG_(machine_ppc32_has_FP)@ha 204 lwz 5,VG_(machine_ppc32_has_FP)@l(5) 205 cmplwi 5,0 206 beq LafterFP2 207 208 /* get zero into f3 (tedious) */ 209 /* note: fsub 3,3,3 is not a reliable way to do this, 210 since if f3 holds a NaN or similar then we don't necessarily 211 wind up with zero. */ 212 li 5,0 213 stw 5,20(1) 214 lfs 3,20(1) 215 mtfsf 0xFF,3 /* fpscr = f3 */ 216LafterFP2: 217 218 /* set host AltiVec control word to the default mode expected 219 by VEX-generated code. */ 220 lis 5,VG_(machine_ppc32_has_VMX)@ha 221 lwz 5,VG_(machine_ppc32_has_VMX)@l(5) 222 cmplwi 5,0 223 beq LafterVMX2 224 225#ifdef HAS_ALTIVEC 226 vspltisw 3,0x0 /* generate zero */ 227 mtvscr 3 228#endif 229 230LafterVMX2: 231 232 /* make a stack frame for the code we are calling */ 233 stwu 1,-16(1) 234 235 /* fetch %CIA into r3 */ 236 lwz 3,OFFSET_ppc32_CIA(31) 237 238 /* fall into main loop (the right one) */ 239 /* r4 = do_profiling. It's probably trashed after here, 240 but that's OK: we don't need it after here. */ 241 cmplwi 4,0 242 beq VG_(run_innerloop__dispatch_unprofiled) 243 b VG_(run_innerloop__dispatch_profiled) 244 /*NOTREACHED*/ 245 246/*----------------------------------------------------*/ 247/*--- NO-PROFILING (standard) dispatcher ---*/ 248/*----------------------------------------------------*/ 249 250.global VG_(run_innerloop__dispatch_unprofiled) 251VG_(run_innerloop__dispatch_unprofiled): 252 /* At entry: Live regs: 253 r1 (=sp) 254 r3 (=CIA = next guest address) 255 r29 (=dispatch_ctr) 256 r31 (=guest_state) 257 */ 258 /* Has the guest state pointer been messed with? If yes, exit. 259 Also set up & VG_(tt_fast) early in an attempt at better 260 scheduling. */ 261 lis 5,VG_(tt_fast)@ha 262 addi 5,5,VG_(tt_fast)@l /* & VG_(tt_fast) */ 263 andi. 0,31,1 264 bne gsp_changed 265 266 /* save the jump address in the guest state */ 267 stw 3,OFFSET_ppc32_CIA(31) 268 269 /* Are we out of timeslice? If yes, defer to scheduler. */ 270 subi 29,29,1 271 cmplwi 29,0 272 beq counter_is_zero 273 274 /* try a fast lookup in the translation cache */ 275 /* r4 = VG_TT_FAST_HASH(addr) * sizeof(FastCacheEntry) 276 = ((r3 >>u 2) & VG_TT_FAST_MASK) << 3 */ 277 rlwinm 4,3,1, 29-VG_TT_FAST_BITS, 28 /* entry# * 8 */ 278 add 5,5,4 /* & VG_(tt_fast)[entry#] */ 279 lwz 6,0(5) /* .guest */ 280 lwz 7,4(5) /* .host */ 281 cmpw 3,6 282 bne fast_lookup_failed 283 284 /* Found a match. Call .host. */ 285 mtctr 7 286 bctrl 287 288 /* On return from guest code: 289 r3 holds destination (original) address. 290 r31 may be unchanged (guest_state), or may indicate further 291 details of the control transfer requested to *r3. 292 */ 293 /* start over */ 294 b VG_(run_innerloop__dispatch_unprofiled) 295 /*NOTREACHED*/ 296 297/*----------------------------------------------------*/ 298/*--- PROFILING dispatcher (can be much slower) ---*/ 299/*----------------------------------------------------*/ 300 301.global VG_(run_innerloop__dispatch_profiled) 302VG_(run_innerloop__dispatch_profiled): 303 /* At entry: Live regs: 304 r1 (=sp) 305 r3 (=CIA = next guest address) 306 r29 (=dispatch_ctr) 307 r31 (=guest_state) 308 */ 309 /* Has the guest state pointer been messed with? If yes, exit. 310 Also set up & VG_(tt_fast) early in an attempt at better 311 scheduling. */ 312 lis 5,VG_(tt_fast)@ha 313 addi 5,5,VG_(tt_fast)@l /* & VG_(tt_fast) */ 314 andi. 0,31,1 315 bne gsp_changed 316 317 /* save the jump address in the guest state */ 318 stw 3,OFFSET_ppc32_CIA(31) 319 320 /* Are we out of timeslice? If yes, defer to scheduler. */ 321 subi 29,29,1 322 cmplwi 29,0 323 beq counter_is_zero 324 325 /* try a fast lookup in the translation cache */ 326 /* r4 = VG_TT_FAST_HASH(addr) * sizeof(FastCacheEntry) 327 = ((r3 >>u 2) & VG_TT_FAST_MASK) << 3 */ 328 rlwinm 4,3,1, 29-VG_TT_FAST_BITS, 28 /* entry# * 8 */ 329 add 5,5,4 /* & VG_(tt_fast)[entry#] */ 330 lwz 6,0(5) /* .guest */ 331 lwz 7,4(5) /* .host */ 332 cmpw 3,6 333 bne fast_lookup_failed 334 335 /* increment bb profile counter */ 336 srwi 4,4,1 /* entry# * sizeof(UInt*) */ 337 addis 6,4,VG_(tt_fastN)@ha 338 lwz 9,VG_(tt_fastN)@l(6) 339 lwz 8,0(9) 340 addi 8,8,1 341 stw 8,0(9) 342 343 /* Found a match. Call .host. */ 344 mtctr 7 345 bctrl 346 347 /* On return from guest code: 348 r3 holds destination (original) address. 349 r31 may be unchanged (guest_state), or may indicate further 350 details of the control transfer requested to *r3. 351 */ 352 /* start over */ 353 b VG_(run_innerloop__dispatch_profiled) 354 /*NOTREACHED*/ 355 356/*----------------------------------------------------*/ 357/*--- exit points ---*/ 358/*----------------------------------------------------*/ 359 360gsp_changed: 361 /* Someone messed with the gsp (in r31). Have to 362 defer to scheduler to resolve this. dispatch ctr 363 is not yet decremented, so no need to increment. */ 364 /* %CIA is NOT up to date here. First, need to write 365 %r3 back to %CIA, but without trashing %r31 since 366 that holds the value we want to return to the scheduler. 367 Hence use %r5 transiently for the guest state pointer. */ 368 lwz 5,44(1) /* original guest_state ptr */ 369 stw 3,OFFSET_ppc32_CIA(5) 370 mr 3,31 /* r3 = new gsp value */ 371 b run_innerloop_exit 372 /*NOTREACHED*/ 373 374counter_is_zero: 375 /* %CIA is up to date */ 376 /* back out decrement of the dispatch counter */ 377 addi 29,29,1 378 li 3,VG_TRC_INNER_COUNTERZERO 379 b run_innerloop_exit 380 381fast_lookup_failed: 382 /* %CIA is up to date */ 383 /* back out decrement of the dispatch counter */ 384 addi 29,29,1 385 li 3,VG_TRC_INNER_FASTMISS 386 b run_innerloop_exit 387 388 389 390/* All exits from the dispatcher go through here. 391 r3 holds the return value. 392*/ 393run_innerloop_exit: 394 /* We're leaving. Check that nobody messed with 395 VSCR or FPSCR. */ 396 397 /* Using r10 - value used again further on, so don't trash! */ 398 lis 10,VG_(machine_ppc32_has_FP)@ha 399 lwz 10,VG_(machine_ppc32_has_FP)@l(10) 400 cmplwi 10,0 401 beq LafterFP8 402 403 /* Set fpscr back to a known state, since vex-generated code 404 may have messed with fpscr[rm]. */ 405 li 5,0 406 addi 1,1,-16 407 stw 5,0(1) 408 lfs 3,0(1) 409 addi 1,1,16 410 mtfsf 0xFF,3 /* fpscr = f3 */ 411LafterFP8: 412 413 /* Using r11 - value used again further on, so don't trash! */ 414 lis 11,VG_(machine_ppc32_has_VMX)@ha 415 lwz 11,VG_(machine_ppc32_has_VMX)@l(11) 416 cmplwi 11,0 417 beq LafterVMX8 418 419#ifdef HAS_ALTIVEC 420 /* Check VSCR[NJ] == 1 */ 421 /* first generate 4x 0x00010000 */ 422 vspltisw 4,0x1 /* 4x 0x00000001 */ 423 vspltisw 5,0x0 /* zero */ 424 vsldoi 6,4,5,0x2 /* <<2*8 => 4x 0x00010000 */ 425 /* retrieve VSCR and mask wanted bits */ 426 mfvscr 7 427 vand 7,7,6 /* gives NJ flag */ 428 vspltw 7,7,0x3 /* flags-word to all lanes */ 429 vcmpequw. 8,6,7 /* CR[24] = 1 if v6 == v7 */ 430 bt 24,invariant_violation /* branch if all_equal */ 431#endif 432LafterVMX8: 433 434 /* otherwise we're OK */ 435 b run_innerloop_exit_REALLY 436 437 438invariant_violation: 439 li 3,VG_TRC_INVARIANT_FAILED 440 b run_innerloop_exit_REALLY 441 442run_innerloop_exit_REALLY: 443 /* r3 holds VG_TRC_* value to return */ 444 445 /* Return to parent stack */ 446 addi 1,1,16 447 448 /* Write ctr to VG(dispatch_ctr) */ 449 lis 5,VG_(dispatch_ctr)@ha 450 stw 29,VG_(dispatch_ctr)@l(5) 451 452 /* Restore cr */ 453 lwz 0,44(1) 454 mtcr 0 455 456 /* Restore callee-saved registers... */ 457 458 /* r10 already holds VG_(machine_ppc32_has_FP) value */ 459 cmplwi 10,0 460 beq LafterFP9 461 462 /* Floating-point regs */ 463 lfd 31,488(1) 464 lfd 30,480(1) 465 lfd 29,472(1) 466 lfd 28,464(1) 467 lfd 27,456(1) 468 lfd 26,448(1) 469 lfd 25,440(1) 470 lfd 24,432(1) 471 lfd 23,424(1) 472 lfd 22,416(1) 473 lfd 21,408(1) 474 lfd 20,400(1) 475 lfd 19,392(1) 476 lfd 18,384(1) 477 lfd 17,376(1) 478 lfd 16,368(1) 479 lfd 15,360(1) 480 lfd 14,352(1) 481LafterFP9: 482 483 /* General regs */ 484 lwz 31,348(1) 485 lwz 30,344(1) 486 lwz 29,340(1) 487 lwz 28,336(1) 488 lwz 27,332(1) 489 lwz 26,328(1) 490 lwz 25,324(1) 491 lwz 24,320(1) 492 lwz 23,316(1) 493 lwz 22,312(1) 494 lwz 21,308(1) 495 lwz 20,304(1) 496 lwz 19,300(1) 497 lwz 18,296(1) 498 lwz 17,292(1) 499 lwz 16,288(1) 500 lwz 15,284(1) 501 lwz 14,280(1) 502 lwz 13,276(1) 503 504 /* r11 already holds VG_(machine_ppc32_has_VMX) value */ 505 cmplwi 11,0 506 beq LafterVMX9 507 508#ifdef HAS_ALTIVEC 509 /* VRSAVE */ 510 lwz 4,244(1) 511 mfspr 4,256 /* VRSAVE reg is spr number 256 */ 512 513 /* Vector regs */ 514 li 4,224 515 lvx 31,4,1 516 li 4,208 517 lvx 30,4,1 518 li 4,192 519 lvx 29,4,1 520 li 4,176 521 lvx 28,4,1 522 li 4,160 523 lvx 27,4,1 524 li 4,144 525 lvx 26,4,1 526 li 4,128 527 lvx 25,4,1 528 li 4,112 529 lvx 24,4,1 530 li 4,96 531 lvx 23,4,1 532 li 4,80 533 lvx 22,4,1 534 li 4,64 535 lvx 21,4,1 536 li 4,48 537 lvx 20,4,1 538#endif 539LafterVMX9: 540 541 /* reset lr & sp */ 542 lwz 0,500(1) /* stack_size + 4 */ 543 mtlr 0 544 addi 1,1,496 /* stack_size */ 545 blr 546.size VG_(run_innerloop), .-VG_(run_innerloop) 547 548 549/*------------------------------------------------------------*/ 550/*--- ---*/ 551/*--- A special dispatcher, for running no-redir ---*/ 552/*--- translations. Just runs the given translation once. ---*/ 553/*--- ---*/ 554/*------------------------------------------------------------*/ 555 556/* signature: 557void VG_(run_a_noredir_translation) ( UWord* argblock ); 558*/ 559 560/* Run a no-redir translation. argblock points to 4 UWords, 2 to carry args 561 and 2 to carry results: 562 0: input: ptr to translation 563 1: input: ptr to guest state 564 2: output: next guest PC 565 3: output: guest state pointer afterwards (== thread return code) 566*/ 567.global VG_(run_a_noredir_translation) 568.type VG_(run_a_noredir_translation), @function 569VG_(run_a_noredir_translation): 570 /* save callee-save int regs, & lr */ 571 stwu 1,-256(1) 572 stw 14,128(1) 573 stw 15,132(1) 574 stw 16,136(1) 575 stw 17,140(1) 576 stw 18,144(1) 577 stw 19,148(1) 578 stw 20,152(1) 579 stw 21,156(1) 580 stw 22,160(1) 581 stw 23,164(1) 582 stw 24,168(1) 583 stw 25,172(1) 584 stw 26,176(1) 585 stw 27,180(1) 586 stw 28,184(1) 587 stw 29,188(1) 588 stw 30,192(1) 589 stw 31,196(1) 590 mflr 31 591 stw 31,200(1) 592 593 stw 3,204(1) 594 lwz 31,4(3) 595 lwz 30,0(3) 596 mtlr 30 597 blrl 598 599 lwz 4,204(1) 600 stw 3, 8(4) 601 stw 31,12(4) 602 603 lwz 14,128(1) 604 lwz 15,132(1) 605 lwz 16,136(1) 606 lwz 17,140(1) 607 lwz 18,144(1) 608 lwz 19,148(1) 609 lwz 20,152(1) 610 lwz 21,156(1) 611 lwz 22,160(1) 612 lwz 23,164(1) 613 lwz 24,168(1) 614 lwz 25,172(1) 615 lwz 26,176(1) 616 lwz 27,180(1) 617 lwz 28,184(1) 618 lwz 29,188(1) 619 lwz 30,192(1) 620 lwz 31,200(1) 621 mtlr 31 622 lwz 31,196(1) 623 addi 1,1,256 624 blr 625.size VG_(run_a_noredir_translation), .-VG_(run_a_noredir_translation) 626 627 628/* Let the linker know we don't need an executable stack */ 629.section .note.GNU-stack,"",@progbits 630 631#endif // defined(VGP_ppc32_linux) 632 633/*--------------------------------------------------------------------*/ 634/*--- end ---*/ 635/*--------------------------------------------------------------------*/ 636