1 2/*--------------------------------------------------------------------*/ 3/*--- The core dispatch loop, for jumping to a code address. ---*/ 4/*--- dispatch-ppc64-linux.S ---*/ 5/*--------------------------------------------------------------------*/ 6 7/* 8 This file is part of Valgrind, a dynamic binary instrumentation 9 framework. 10 11 Copyright (C) 2005-2013 Cerion Armour-Brown <cerion@open-works.co.uk> 12 13 This program is free software; you can redistribute it and/or 14 modify it under the terms of the GNU General Public License as 15 published by the Free Software Foundation; either version 2 of the 16 License, or (at your option) any later version. 17 18 This program is distributed in the hope that it will be useful, but 19 WITHOUT ANY WARRANTY; without even the implied warranty of 20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 21 General Public License for more details. 22 23 You should have received a copy of the GNU General Public License 24 along with this program; if not, write to the Free Software 25 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 26 02111-1307, USA. 27 28 The GNU General Public License is contained in the file COPYING. 29*/ 30 31#if defined(VGP_ppc64le_linux) 32 33#include "pub_core_basics_asm.h" 34#include "pub_core_dispatch_asm.h" 35#include "pub_core_transtab_asm.h" 36#include "libvex_guest_offsets.h" /* for OFFSET_ppc64_CIA */ 37 38/* NOTE: PPC64 supports Big Endian and Little Endian. It also supports the 39 ELF version 1 and ELF version 2 APIs. 40 41 Currently LE uses ELF version 2 and BE uses ELF version 1. However, 42 BE and LE may support the other ELF version in the future. So, the 43 _CALL_ELF is used in the assembly function to enable code for a 44 specific ELF version independently of the Enianess of the machine. 45 The test "#if _CALL_ELF == 2" checks if ELF version 2 is being used. 46*/ 47 48/* References to globals via the TOC */ 49 50/* 51 .globl vgPlain_tt_fast 52 .lcomm vgPlain_tt_fast,4,4 53 .type vgPlain_tt_fast, @object 54*/ 55.section ".toc","aw" 56.tocent__vgPlain_tt_fast: 57 .tc vgPlain_tt_fast[TC],vgPlain_tt_fast 58.tocent__vgPlain_stats__n_xindirs_32: 59 .tc vgPlain_stats__n_xindirs_32[TC],vgPlain_stats__n_xindirs_32 60.tocent__vgPlain_stats__n_xindir_misses_32: 61 .tc vgPlain_stats__n_xindir_misses_32[TC],vgPlain_stats__n_xindir_misses_32 62.tocent__vgPlain_machine_ppc64_has_VMX: 63 .tc vgPlain_machine_ppc64_has_VMX[TC],vgPlain_machine_ppc64_has_VMX 64 65/*------------------------------------------------------------*/ 66/*--- ---*/ 67/*--- The dispatch loop. VG_(disp_run_translations) is ---*/ 68/*--- used to run all translations, ---*/ 69/*--- including no-redir ones. ---*/ 70/*--- ---*/ 71/*------------------------------------------------------------*/ 72 73/*----------------------------------------------------*/ 74/*--- Entry and preamble (set everything up) ---*/ 75/*----------------------------------------------------*/ 76 77/* signature: 78void VG_(disp_run_translations)( UWord* two_words, 79 void* guest_state, 80 Addr host_addr ); 81*/ 82 83.section ".text" 84.align 2 85.globl VG_(disp_run_translations) 86#if _CALL_ELF == 2 87.type VG_(disp_run_translations),@function 88VG_(disp_run_translations): 89.type .VG_(disp_run_translations),@function 90#else 91.section ".opd","aw" 92.align 3 93VG_(disp_run_translations): 94.quad .VG_(disp_run_translations),.TOC.@tocbase,0 95.previous 96.type .VG_(disp_run_translations),@function 97#endif 98.globl .VG_(disp_run_translations) 99.VG_(disp_run_translations): 100#if _CALL_ELF == 2 1010: addis 2, 12,.TOC.-0b@ha 102 addi 2,2,.TOC.-0b@l 103 .localentry VG_(disp_run_translations), .-VG_(disp_run_translations) 104#endif 105 106 /* r3 holds two_words */ 107 /* r4 holds guest_state */ 108 /* r5 holds host_addr */ 109 110 /* ----- entry point to VG_(disp_run_translations) ----- */ 111 /* PPC64 ABI saves LR->16(prt_sp), CR->8(prt_sp)) */ 112 113 /* Save lr, cr */ 114 mflr 6 115 std 6,16(1) 116 mfcr 6 117 std 6,8(1) 118 119 /* New stack frame */ 120 stdu 1,-624(1) /* sp should maintain 16-byte alignment */ 121 122 /* General reg save area : 152 bytes */ 123 std 31,472(1) 124 std 30,464(1) 125 std 29,456(1) 126 std 28,448(1) 127 std 27,440(1) 128 std 26,432(1) 129 std 25,424(1) 130 std 24,416(1) 131 std 23,408(1) 132 std 22,400(1) 133 std 21,392(1) 134 std 20,384(1) 135 std 19,376(1) 136 std 18,368(1) 137 std 17,360(1) 138 std 16,352(1) 139 std 15,344(1) 140 std 14,336(1) 141 std 13,328(1) 142 std 3,104(1) /* save two_words for later */ 143 144 /* Save callee-saved registers... */ 145 /* Floating-point reg save area : 144 bytes */ 146 stfd 31,616(1) 147 stfd 30,608(1) 148 stfd 29,600(1) 149 stfd 28,592(1) 150 stfd 27,584(1) 151 stfd 26,576(1) 152 stfd 25,568(1) 153 stfd 24,560(1) 154 stfd 23,552(1) 155 stfd 22,544(1) 156 stfd 21,536(1) 157 stfd 20,528(1) 158 stfd 19,520(1) 159 stfd 18,512(1) 160 stfd 17,504(1) 161 stfd 16,496(1) 162 stfd 15,488(1) 163 stfd 14,480(1) 164 165 /* It's necessary to save/restore VRSAVE in the AIX / Darwin ABI. 166 The Linux kernel might not actually use VRSAVE for its intended 167 purpose, but it should be harmless to preserve anyway. */ 168 /* r3, r4, r5 are live here, so use r6 */ 169 ld 6,.tocent__vgPlain_machine_ppc64_has_VMX@toc(2) 170 ld 6,0(6) 171 cmpldi 6,0 172 beq .LafterVMX1 173 174 /* VRSAVE save word : 32 bytes */ 175 mfspr 6,256 /* vrsave reg is spr number 256 */ 176 stw 6,324(1) 177 178 /* Alignment padding : 4 bytes */ 179 180 /* Vector reg save area (quadword aligned) : 192 bytes */ 181 li 6,304 182 stvx 31,6,1 183 li 6,288 184 stvx 30,6,1 185 li 6,272 186 stvx 29,6,1 187 li 6,256 188 stvx 28,6,1 189 li 6,240 190 stvx 27,6,1 191 li 6,224 192 stvx 26,6,1 193 li 6,208 194 stvx 25,6,1 195 li 6,192 196 stvx 24,6,1 197 li 6,176 198 stvx 23,6,1 199 li 6,160 200 stvx 22,6,1 201 li 6,144 202 stvx 21,6,1 203 li 6,128 204 stvx 20,6,1 205.LafterVMX1: 206 207 /* Local variable space... */ 208 209 /* r3 holds two_words */ 210 /* r4 holds guest_state */ 211 /* r5 holds host_addr */ 212 213 /* 96(sp) used later to check FPSCR[RM] */ 214 /* 88(sp) used later to load fpscr with zero */ 215 /* 48:87(sp) free */ 216 217 /* Linkage Area (reserved) BE ABI 218 40(sp) : TOC 219 32(sp) : link editor doubleword 220 24(sp) : compiler doubleword 221 16(sp) : LR 222 8(sp) : CR 223 0(sp) : back-chain 224 */ 225 226 /* set host FPU control word to the default mode expected 227 by VEX-generated code. See comments in libvex.h for 228 more info. */ 229 /* => get zero into f3 (tedious) 230 fsub 3,3,3 is not a reliable way to do this, since if 231 f3 holds a NaN or similar then we don't necessarily 232 wind up with zero. */ 233 li 6,0 234 stw 6,88(1) 235 lfs 3,88(1) 236 mtfsf 0xFF,3 /* fpscr = lo32 of f3 */ 237 238 /* set host AltiVec control word to the default mode expected 239 by VEX-generated code. */ 240 ld 6,.tocent__vgPlain_machine_ppc64_has_VMX@toc(2) 241 ld 6,0(6) 242 cmpldi 6,0 243 beq .LafterVMX2 244 245 vspltisw 3,0x0 /* generate zero */ 246 mtvscr 3 247.LafterVMX2: 248 249 /* make a stack frame for the code we are calling */ 250 stdu 1,-48(1) 251 252 /* Set up the guest state ptr */ 253 mr 31,4 /* r31 (generated code gsp) = r4 */ 254#if _CALL_ELF == 2 255/* for the LE ABI need to setup r2 and r12 */ 2560: addis 2, 12,.TOC.-0b@ha 257 addi 2,2,.TOC.-0b@l 258#endif 259 260 /* and jump into the code cache. Chained translations in 261 the code cache run, until for whatever reason, they can't 262 continue. When that happens, the translation in question 263 will jump (or call) to one of the continuation points 264 VG_(cp_...) below. */ 265 mtctr 5 266 bctr 267 /*NOTREACHED*/ 268 269/*----------------------------------------------------*/ 270/*--- Postamble and exit. ---*/ 271/*----------------------------------------------------*/ 272 273.postamble: 274 /* At this point, r6 and r7 contain two 275 words to be returned to the caller. r6 276 holds a TRC value, and r7 optionally may 277 hold another word (for CHAIN_ME exits, the 278 address of the place to patch.) */ 279 280 /* undo the "make a stack frame for the code we are calling" */ 281 addi 1,1,48 282 283 /* We're leaving. Check that nobody messed with 284 VSCR or FPSCR in ways we don't expect. */ 285 /* Using r11 - value used again further on, so don't trash! */ 286 ld 11,.tocent__vgPlain_machine_ppc64_has_VMX@toc(2) 287 ld 11,0(11) 288 289 /* Set fpscr back to a known state, since vex-generated code 290 may have messed with fpscr[rm]. */ 291 li 5,0 292 addi 1,1,-16 293 stw 5,0(1) 294 lfs 3,0(1) 295 addi 1,1,16 296 mtfsf 0xFF,3 /* fpscr = f3 */ 297 298 cmpldi 11,0 /* Do we have altivec? */ 299 beq .LafterVMX8 300 301 /* Check VSCR[NJ] == 1 */ 302 /* first generate 4x 0x00010000 */ 303 vspltisw 4,0x1 /* 4x 0x00000001 */ 304 vspltisw 5,0x0 /* zero */ 305 vsldoi 6,4,5,0x2 /* <<2*8 => 4x 0x00010000 */ 306 /* retrieve VSCR and mask wanted bits */ 307 mfvscr 7 308 vand 7,7,6 /* gives NJ flag */ 309 vspltw 7,7,0x3 /* flags-word to all lanes */ 310 vcmpequw. 8,6,7 /* CR[24] = 1 if v6 == v7 */ 311 bt 24,.invariant_violation /* branch if all_equal */ 312 313.LafterVMX8: 314 /* otherwise we're OK */ 315 b .remove_frame 316 317.invariant_violation: 318 li 6,VG_TRC_INVARIANT_FAILED 319 li 7,0 320 /* fall through */ 321 322.remove_frame: 323 /* r11 already holds VG_(machine_ppc32_has_VMX) value */ 324 cmplwi 11,0 325 beq .LafterVMX9 326 327 /* Restore Altivec regs. 328 Use r5 as scratch since r6/r7 are live. */ 329 /* VRSAVE */ 330 lwz 5,324(1) 331 mfspr 5,256 /* VRSAVE reg is spr number 256 */ 332 333 /* Vector regs */ 334 li 5,304 335 lvx 31,5,1 336 li 5,288 337 lvx 30,5,1 338 li 5,272 339 lvx 29,5,1 340 li 5,256 341 lvx 28,5,1 342 li 5,240 343 lvx 27,5,1 344 li 5,224 345 lvx 26,5,1 346 li 5,208 347 lvx 25,5,1 348 li 5,192 349 lvx 24,5,1 350 li 5,176 351 lvx 23,5,1 352 li 5,160 353 lvx 22,5,1 354 li 5,144 355 lvx 21,5,1 356 li 5,128 357 lvx 20,5,1 358.LafterVMX9: 359 360 /* Restore FP regs */ 361 /* Floating-point regs */ 362 lfd 31,616(1) 363 lfd 30,608(1) 364 lfd 29,600(1) 365 lfd 28,592(1) 366 lfd 27,584(1) 367 lfd 26,576(1) 368 lfd 25,568(1) 369 lfd 24,560(1) 370 lfd 23,552(1) 371 lfd 22,544(1) 372 lfd 21,536(1) 373 lfd 20,528(1) 374 lfd 19,520(1) 375 lfd 18,512(1) 376 lfd 17,504(1) 377 lfd 16,496(1) 378 lfd 15,488(1) 379 lfd 14,480(1) 380 381 /* restore int regs, including importantly r3 (two_words) */ 382 ld 31,472(1) 383 ld 30,464(1) 384 ld 29,456(1) 385 ld 28,448(1) 386 ld 27,440(1) 387 ld 26,432(1) 388 ld 25,424(1) 389 ld 24,416(1) 390 ld 23,408(1) 391 ld 22,400(1) 392 ld 21,392(1) 393 ld 20,384(1) 394 ld 19,376(1) 395 ld 18,368(1) 396 ld 17,360(1) 397 ld 16,352(1) 398 ld 15,344(1) 399 ld 14,336(1) 400 ld 13,328(1) 401 ld 3,104(1) 402 /* Stash return values */ 403 std 6,0(3) 404 std 7,8(3) 405 406 /* restore lr & sp, and leave */ 407 ld 0,632(1) /* stack_size + 8 */ 408 mtcr 0 409 ld 0,640(1) /* stack_size + 16 */ 410 mtlr 0 411 addi 1,1,624 /* stack_size */ 412 blr 413#if _CALL_ELF == 2 414 .size VG_(disp_run_translations),.-VG_(disp_run_translations) 415#endif 416 417 418/*----------------------------------------------------*/ 419/*--- Continuation points ---*/ 420/*----------------------------------------------------*/ 421 422/* ------ Chain me to slow entry point ------ */ 423 .section ".text" 424 .align 2 425 .globl VG_(disp_cp_chain_me_to_slowEP) 426#if _CALL_ELF == 2 427 .type VG_(disp_cp_chain_me_to_slowEP),@function 428 VG_(disp_cp_chain_me_to_slowEP): 429#else 430 .section ".opd","aw" 431 .align 3 432VG_(disp_cp_chain_me_to_slowEP): 433 .quad .VG_(disp_cp_chain_me_to_slowEP),.TOC.@tocbase,0 434 .previous 435#endif 436 .type .VG_(disp_cp_chain_me_to_slowEP),@function 437 .globl .VG_(disp_cp_chain_me_to_slowEP) 438.VG_(disp_cp_chain_me_to_slowEP): 439#if _CALL_ELF == 2 4400: addis 2, 12,.TOC.-0b@ha 441 addi 2,2,.TOC.-0b@l 442 .localentry VG_(disp_cp_chain_me_to_slowEP), .-VG_(disp_cp_chain_me_to_slowEP) 443#endif 444 /* We got called. The return address indicates 445 where the patching needs to happen. Collect 446 the return address and, exit back to C land, 447 handing the caller the pair (Chain_me_S, RA) */ 448 li 6, VG_TRC_CHAIN_ME_TO_SLOW_EP 449 mflr 7 450 /* 20 = imm64-fixed5 r30, disp_cp_chain_me_to_slowEP 451 4 = mtctr r30 452 4 = btctr 453 */ 454 subi 7,7,20+4+4 455 b .postamble 456#if _CALL_ELF == 2 457 .size VG_(disp_cp_chain_me_to_slowEP),.-VG_(disp_cp_chain_me_to_slowEP) 458#endif 459 460/* ------ Chain me to fast entry point ------ */ 461 .section ".text" 462 .align 2 463 .globl VG_(disp_cp_chain_me_to_fastEP) 464#if _CALL_ELF == 2 465 .type VG_(disp_cp_chain_me_to_fastEP),@function 466VG_(disp_cp_chain_me_to_fastEP): 467#else 468 .section ".opd","aw" 469 .align 3 470VG_(disp_cp_chain_me_to_fastEP): 471 .quad .VG_(disp_cp_chain_me_to_fastEP),.TOC.@tocbase,0 472 .previous 473#endif 474 .type .VG_(disp_cp_chain_me_to_fastEP),@function 475 .globl .VG_(disp_cp_chain_me_to_fastEP) 476.VG_(disp_cp_chain_me_to_fastEP): 477#if _CALL_ELF == 2 4780: addis 2, 12,.TOC.-0b@ha 479 addi 2,2,.TOC.-0b@l 480 .localentry VG_(disp_cp_chain_me_to_fastEP), .-VG_(disp_cp_chain_me_to_fastEP) 481#endif 482 /* We got called. The return address indicates 483 where the patching needs to happen. Collect 484 the return address and, exit back to C land, 485 handing the caller the pair (Chain_me_S, RA) */ 486 li 6, VG_TRC_CHAIN_ME_TO_FAST_EP 487 mflr 7 488 /* 20 = imm64-fixed5 r30, disp_cp_chain_me_to_fastEP 489 4 = mtctr r30 490 4 = btctr 491 */ 492 subi 7,7,20+4+4 493 b .postamble 494#if _CALL_ELF == 2 495 .size VG_(disp_cp_chain_me_to_fastEP),.-VG_(disp_cp_chain_me_to_fastEP) 496#endif 497 498/* ------ Indirect but boring jump ------ */ 499 .section ".text" 500 .align 2 501 .globl VG_(disp_cp_xindir) 502#if _CALL_ELF == 2 503 .type VG_(disp_cp_xindir),@function 504VG_(disp_cp_xindir): 505#else 506 .section ".opd","aw" 507 .align 3 508VG_(disp_cp_xindir): 509 .quad .VG_(disp_cp_xindir),.TOC.@tocbase,0 510 .previous 511#endif 512 .type .VG_(disp_cp_xindir),@function 513 .globl .VG_(disp_cp_xindir) 514.VG_(disp_cp_xindir): 515#if _CALL_ELF == 2 5160: addis 2, 12,.TOC.-0b@ha 517 addi 2,2,.TOC.-0b@l 518 .localentry VG_(disp_cp_xindir), .-VG_(disp_cp_xindir) 519#endif 520 /* Where are we going? */ 521 ld 3,OFFSET_ppc64_CIA(31) 522 523 /* stats only */ 524 ld 5, .tocent__vgPlain_stats__n_xindirs_32@toc(2) 525 lwz 6,0(5) 526 addi 6,6,1 527 stw 6,0(5) 528 529 /* r5 = &VG_(tt_fast) */ 530 ld 5, .tocent__vgPlain_tt_fast@toc(2) /* &VG_(tt_fast) */ 531 532 /* try a fast lookup in the translation cache */ 533 /* r4 = VG_TT_FAST_HASH(addr) * sizeof(FastCacheEntry) 534 = ((r3 >>u 2) & VG_TT_FAST_MASK) << 4 */ 535 rldicl 4,3, 62, 64-VG_TT_FAST_BITS /* entry# */ 536 sldi 4,4,4 /* entry# * sizeof(FastCacheEntry) */ 537 add 5,5,4 /* & VG_(tt_fast)[entry#] */ 538 ld 6,0(5) /* .guest */ 539 ld 7,8(5) /* .host */ 540 cmpd 3,6 541 bne .fast_lookup_failed 542 543 /* Found a match. Jump to .host. */ 544 mtctr 7 545 bctr 546#if _CALL_ELF == 2 547 .size VG_(disp_cp_xindir),.-VG_(disp_cp_xindir) 548#endif 549 550.fast_lookup_failed: 551 /* stats only */ 552 ld 5, .tocent__vgPlain_stats__n_xindir_misses_32@toc(2) 553 lwz 6,0(5) 554 addi 6,6,1 555 stw 6,0(5) 556 557 li 6,VG_TRC_INNER_FASTMISS 558 li 7,0 559 b .postamble 560 /*NOTREACHED*/ 561 562/* ------ Assisted jump ------ */ 563.section ".text" 564 .align 2 565 .globl VG_(disp_cp_xassisted) 566#if _CALL_ELF == 2 567 .type VG_(disp_cp_xassisted),@function 568VG_(disp_cp_xassisted): 569#else 570 .section ".opd","aw" 571 .align 3 572VG_(disp_cp_xassisted): 573 .quad .VG_(disp_cp_xassisted),.TOC.@tocbase,0 574 .previous 575#endif 576#if _CALL_ELF == 2 5770: addis 2, 12,.TOC.-0b@ha 578 addi 2,2,.TOC.-0b@l 579 .localentry VG_(disp_cp_xassisted), .-VG_(disp_cp_xassisted) 580#endif 581 .type .VG_(disp_cp_xassisted),@function 582 .globl .VG_(disp_cp_xassisted) 583.VG_(disp_cp_xassisted): 584 /* r31 contains the TRC */ 585 mr 6,31 586 li 7,0 587 b .postamble 588#if _CALL_ELF == 2 589 .size VG_(disp_cp_xassisted),.-VG_(disp_cp_xassisted) 590#endif 591 592/* ------ Event check failed ------ */ 593 .section ".text" 594 .align 2 595 .globl VG_(disp_cp_evcheck_fail) 596#if _CALL_ELF == 2 597 .type VG_(disp_cp_evcheck_fail),@function 598VG_(disp_cp_evcheck_fail): 599#else 600 .section ".opd","aw" 601 .align 3 602VG_(disp_cp_evcheck_fail): 603 .quad .VG_(disp_cp_evcheck_fail),.TOC.@tocbase,0 604 .previous 605#endif 606#if _CALL_ELF == 2 6070: addis 2, 12,.TOC.-0b@ha 608 addi 2,2,.TOC.-0b@l 609 .localentry VG_(disp_cp_evcheck_fail), .-VG_(disp_cp_evcheck_fail) 610#endif 611 .type .VG_(disp_cp_evcheck_fail),@function 612 .globl .VG_(disp_cp_evcheck_fail) 613.VG_(disp_cp_evcheck_fail): 614 li 6,VG_TRC_INNER_COUNTERZERO 615 li 7,0 616 b .postamble 617#if _CALL_ELF == 2 618 .size VG_(disp_cp_evcheck_fail),.-VG_(disp_cp_evcheck_fail) 619#endif 620 621.size .VG_(disp_run_translations), .-.VG_(disp_run_translations) 622 623/* Let the linker know we don't need an executable stack */ 624.section .note.GNU-stack,"",@progbits 625 626#endif // defined(VGP_ppc64be_linux) || defined(VGP_ppc64le_linux) 627 628/*--------------------------------------------------------------------*/ 629/*--- end ---*/ 630/*--------------------------------------------------------------------*/ 631