1 2/*--------------------------------------------------------------------*/ 3/*--- The core dispatch loop, for jumping to a code address. ---*/ 4/*--- dispatch-ppc64-linux.S ---*/ 5/*--------------------------------------------------------------------*/ 6 7/* 8 This file is part of Valgrind, a dynamic binary instrumentation 9 framework. 10 11 Copyright (C) 2005-2015 Cerion Armour-Brown <cerion@open-works.co.uk> 12 13 This program is free software; you can redistribute it and/or 14 modify it under the terms of the GNU General Public License as 15 published by the Free Software Foundation; either version 2 of the 16 License, or (at your option) any later version. 17 18 This program is distributed in the hope that it will be useful, but 19 WITHOUT ANY WARRANTY; without even the implied warranty of 20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 21 General Public License for more details. 22 23 You should have received a copy of the GNU General Public License 24 along with this program; if not, write to the Free Software 25 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 26 02111-1307, USA. 27 28 The GNU General Public License is contained in the file COPYING. 29*/ 30 31#include "pub_core_basics_asm.h" 32 33#if defined(VGP_ppc64le_linux) 34 35#include "pub_core_dispatch_asm.h" 36#include "pub_core_transtab_asm.h" 37#include "libvex_guest_offsets.h" /* for OFFSET_ppc64_CIA */ 38 39/* NOTE: PPC64 supports Big Endian and Little Endian. It also supports the 40 ELF version 1 and ELF version 2 APIs. 41 42 Currently LE uses ELF version 2 and BE uses ELF version 1. However, 43 BE and LE may support the other ELF version in the future. So, the 44 _CALL_ELF is used in the assembly function to enable code for a 45 specific ELF version independently of the Enianess of the machine. 46 The test "#if _CALL_ELF == 2" checks if ELF version 2 is being used. 47*/ 48 49/* References to globals via the TOC */ 50 51/* 52 .globl vgPlain_tt_fast 53 .lcomm vgPlain_tt_fast,4,4 54 .type vgPlain_tt_fast, @object 55*/ 56.section ".toc","aw" 57.tocent__vgPlain_tt_fast: 58 .tc vgPlain_tt_fast[TC],vgPlain_tt_fast 59.tocent__vgPlain_stats__n_xindirs_32: 60 .tc vgPlain_stats__n_xindirs_32[TC],vgPlain_stats__n_xindirs_32 61.tocent__vgPlain_stats__n_xindir_misses_32: 62 .tc vgPlain_stats__n_xindir_misses_32[TC],vgPlain_stats__n_xindir_misses_32 63.tocent__vgPlain_machine_ppc64_has_VMX: 64 .tc vgPlain_machine_ppc64_has_VMX[TC],vgPlain_machine_ppc64_has_VMX 65 66/*------------------------------------------------------------*/ 67/*--- ---*/ 68/*--- The dispatch loop. VG_(disp_run_translations) is ---*/ 69/*--- used to run all translations, ---*/ 70/*--- including no-redir ones. ---*/ 71/*--- ---*/ 72/*------------------------------------------------------------*/ 73 74/*----------------------------------------------------*/ 75/*--- Entry and preamble (set everything up) ---*/ 76/*----------------------------------------------------*/ 77 78/* signature: 79void VG_(disp_run_translations)( UWord* two_words, 80 void* guest_state, 81 Addr host_addr ); 82*/ 83 84.section ".text" 85.align 2 86.globl VG_(disp_run_translations) 87#if _CALL_ELF == 2 88.type VG_(disp_run_translations),@function 89VG_(disp_run_translations): 90.type .VG_(disp_run_translations),@function 91#else 92.section ".opd","aw" 93.align 3 94VG_(disp_run_translations): 95.quad .VG_(disp_run_translations),.TOC.@tocbase,0 96.previous 97.type .VG_(disp_run_translations),@function 98#endif 99.globl .VG_(disp_run_translations) 100.VG_(disp_run_translations): 101#if _CALL_ELF == 2 1020: addis 2, 12,.TOC.-0b@ha 103 addi 2,2,.TOC.-0b@l 104 .localentry VG_(disp_run_translations), .-VG_(disp_run_translations) 105#endif 106 107 /* r3 holds two_words */ 108 /* r4 holds guest_state */ 109 /* r5 holds host_addr */ 110 111 /* ----- entry point to VG_(disp_run_translations) ----- */ 112 /* PPC64 ABI saves LR->16(prt_sp), CR->8(prt_sp)) */ 113 114 /* Save lr, cr */ 115 mflr 6 116 std 6,16(1) 117 mfcr 6 118 std 6,8(1) 119 120 /* New stack frame */ 121 stdu 1,-624(1) /* sp should maintain 16-byte alignment */ 122 123 /* General reg save area : 152 bytes */ 124 std 31,472(1) 125 std 30,464(1) 126 std 29,456(1) 127 std 28,448(1) 128 std 27,440(1) 129 std 26,432(1) 130 std 25,424(1) 131 std 24,416(1) 132 std 23,408(1) 133 std 22,400(1) 134 std 21,392(1) 135 std 20,384(1) 136 std 19,376(1) 137 std 18,368(1) 138 std 17,360(1) 139 std 16,352(1) 140 std 15,344(1) 141 std 14,336(1) 142 std 13,328(1) 143 std 3,104(1) /* save two_words for later */ 144 145 /* Save callee-saved registers... */ 146 /* Floating-point reg save area : 144 bytes */ 147 stfd 31,616(1) 148 stfd 30,608(1) 149 stfd 29,600(1) 150 stfd 28,592(1) 151 stfd 27,584(1) 152 stfd 26,576(1) 153 stfd 25,568(1) 154 stfd 24,560(1) 155 stfd 23,552(1) 156 stfd 22,544(1) 157 stfd 21,536(1) 158 stfd 20,528(1) 159 stfd 19,520(1) 160 stfd 18,512(1) 161 stfd 17,504(1) 162 stfd 16,496(1) 163 stfd 15,488(1) 164 stfd 14,480(1) 165 166 /* It's necessary to save/restore VRSAVE in the AIX / Darwin ABI. 167 The Linux kernel might not actually use VRSAVE for its intended 168 purpose, but it should be harmless to preserve anyway. */ 169 /* r3, r4, r5 are live here, so use r6 */ 170 ld 6,.tocent__vgPlain_machine_ppc64_has_VMX@toc(2) 171 ld 6,0(6) 172 cmpldi 6,0 173 beq .LafterVMX1 174 175 /* VRSAVE save word : 32 bytes */ 176 mfspr 6,256 /* vrsave reg is spr number 256 */ 177 stw 6,324(1) 178 179 /* Alignment padding : 4 bytes */ 180 181 /* Vector reg save area (quadword aligned) : 192 bytes */ 182 li 6,304 183 stvx 31,6,1 184 li 6,288 185 stvx 30,6,1 186 li 6,272 187 stvx 29,6,1 188 li 6,256 189 stvx 28,6,1 190 li 6,240 191 stvx 27,6,1 192 li 6,224 193 stvx 26,6,1 194 li 6,208 195 stvx 25,6,1 196 li 6,192 197 stvx 24,6,1 198 li 6,176 199 stvx 23,6,1 200 li 6,160 201 stvx 22,6,1 202 li 6,144 203 stvx 21,6,1 204 li 6,128 205 stvx 20,6,1 206.LafterVMX1: 207 208 /* Local variable space... */ 209 210 /* r3 holds two_words */ 211 /* r4 holds guest_state */ 212 /* r5 holds host_addr */ 213 214 /* 96(sp) used later to check FPSCR[RM] */ 215 /* 88(sp) used later to load fpscr with zero */ 216 /* 48:87(sp) free */ 217 218 /* Linkage Area (reserved) BE ABI 219 40(sp) : TOC 220 32(sp) : link editor doubleword 221 24(sp) : compiler doubleword 222 16(sp) : LR 223 8(sp) : CR 224 0(sp) : back-chain 225 */ 226 227 /* set host FPU control word to the default mode expected 228 by VEX-generated code. See comments in libvex.h for 229 more info. */ 230 /* => get zero into f3 (tedious) 231 fsub 3,3,3 is not a reliable way to do this, since if 232 f3 holds a NaN or similar then we don't necessarily 233 wind up with zero. */ 234 li 6,0 235 stw 6,88(1) 236 lfs 3,88(1) 237 mtfsf 0xFF,3 /* fpscr = lo32 of f3 */ 238 239 /* set host AltiVec control word to the default mode expected 240 by VEX-generated code. */ 241 ld 6,.tocent__vgPlain_machine_ppc64_has_VMX@toc(2) 242 ld 6,0(6) 243 cmpldi 6,0 244 beq .LafterVMX2 245 246 vspltisw 3,0x0 /* generate zero */ 247 mtvscr 3 248.LafterVMX2: 249 250 /* make a stack frame for the code we are calling */ 251 stdu 1,-48(1) 252 253 /* Set up the guest state ptr */ 254 mr 31,4 /* r31 (generated code gsp) = r4 */ 255#if _CALL_ELF == 2 256/* for the LE ABI need to setup r2 and r12 */ 2570: addis 2, 12,.TOC.-0b@ha 258 addi 2,2,.TOC.-0b@l 259#endif 260 261 /* and jump into the code cache. Chained translations in 262 the code cache run, until for whatever reason, they can't 263 continue. When that happens, the translation in question 264 will jump (or call) to one of the continuation points 265 VG_(cp_...) below. */ 266 mtctr 5 267 bctr 268 /*NOTREACHED*/ 269 270/*----------------------------------------------------*/ 271/*--- Postamble and exit. ---*/ 272/*----------------------------------------------------*/ 273 274.postamble: 275 /* At this point, r6 and r7 contain two 276 words to be returned to the caller. r6 277 holds a TRC value, and r7 optionally may 278 hold another word (for CHAIN_ME exits, the 279 address of the place to patch.) */ 280 281 /* undo the "make a stack frame for the code we are calling" */ 282 addi 1,1,48 283 284 /* We're leaving. Check that nobody messed with 285 VSCR or FPSCR in ways we don't expect. */ 286 /* Using r11 - value used again further on, so don't trash! */ 287 ld 11,.tocent__vgPlain_machine_ppc64_has_VMX@toc(2) 288 ld 11,0(11) 289 290 /* Set fpscr back to a known state, since vex-generated code 291 may have messed with fpscr[rm]. */ 292 li 5,0 293 addi 1,1,-16 294 stw 5,0(1) 295 lfs 3,0(1) 296 addi 1,1,16 297 mtfsf 0xFF,3 /* fpscr = f3 */ 298 299 cmpldi 11,0 /* Do we have altivec? */ 300 beq .LafterVMX8 301 302 /* Check VSCR[NJ] == 1 */ 303 /* first generate 4x 0x00010000 */ 304 vspltisw 4,0x1 /* 4x 0x00000001 */ 305 vspltisw 5,0x0 /* zero */ 306 vsldoi 6,4,5,0x2 /* <<2*8 => 4x 0x00010000 */ 307 /* retrieve VSCR and mask wanted bits */ 308 mfvscr 7 309 vand 7,7,6 /* gives NJ flag */ 310 vspltw 7,7,0x3 /* flags-word to all lanes */ 311 vcmpequw. 8,6,7 /* CR[24] = 1 if v6 == v7 */ 312 bt 24,.invariant_violation /* branch if all_equal */ 313 314.LafterVMX8: 315 /* otherwise we're OK */ 316 b .remove_frame 317 318.invariant_violation: 319 li 6,VG_TRC_INVARIANT_FAILED 320 li 7,0 321 /* fall through */ 322 323.remove_frame: 324 /* r11 already holds VG_(machine_ppc32_has_VMX) value */ 325 cmplwi 11,0 326 beq .LafterVMX9 327 328 /* Restore Altivec regs. 329 Use r5 as scratch since r6/r7 are live. */ 330 /* VRSAVE */ 331 lwz 5,324(1) 332 mfspr 5,256 /* VRSAVE reg is spr number 256 */ 333 334 /* Vector regs */ 335 li 5,304 336 lvx 31,5,1 337 li 5,288 338 lvx 30,5,1 339 li 5,272 340 lvx 29,5,1 341 li 5,256 342 lvx 28,5,1 343 li 5,240 344 lvx 27,5,1 345 li 5,224 346 lvx 26,5,1 347 li 5,208 348 lvx 25,5,1 349 li 5,192 350 lvx 24,5,1 351 li 5,176 352 lvx 23,5,1 353 li 5,160 354 lvx 22,5,1 355 li 5,144 356 lvx 21,5,1 357 li 5,128 358 lvx 20,5,1 359.LafterVMX9: 360 361 /* Restore FP regs */ 362 /* Floating-point regs */ 363 lfd 31,616(1) 364 lfd 30,608(1) 365 lfd 29,600(1) 366 lfd 28,592(1) 367 lfd 27,584(1) 368 lfd 26,576(1) 369 lfd 25,568(1) 370 lfd 24,560(1) 371 lfd 23,552(1) 372 lfd 22,544(1) 373 lfd 21,536(1) 374 lfd 20,528(1) 375 lfd 19,520(1) 376 lfd 18,512(1) 377 lfd 17,504(1) 378 lfd 16,496(1) 379 lfd 15,488(1) 380 lfd 14,480(1) 381 382 /* restore int regs, including importantly r3 (two_words) */ 383 ld 31,472(1) 384 ld 30,464(1) 385 ld 29,456(1) 386 ld 28,448(1) 387 ld 27,440(1) 388 ld 26,432(1) 389 ld 25,424(1) 390 ld 24,416(1) 391 ld 23,408(1) 392 ld 22,400(1) 393 ld 21,392(1) 394 ld 20,384(1) 395 ld 19,376(1) 396 ld 18,368(1) 397 ld 17,360(1) 398 ld 16,352(1) 399 ld 15,344(1) 400 ld 14,336(1) 401 ld 13,328(1) 402 ld 3,104(1) 403 /* Stash return values */ 404 std 6,0(3) 405 std 7,8(3) 406 407 /* restore lr & sp, and leave */ 408 ld 0,632(1) /* stack_size + 8 */ 409 mtcr 0 410 ld 0,640(1) /* stack_size + 16 */ 411 mtlr 0 412 addi 1,1,624 /* stack_size */ 413 blr 414#if _CALL_ELF == 2 415 .size VG_(disp_run_translations),.-VG_(disp_run_translations) 416#endif 417 418 419/*----------------------------------------------------*/ 420/*--- Continuation points ---*/ 421/*----------------------------------------------------*/ 422 423/* ------ Chain me to slow entry point ------ */ 424 .section ".text" 425 .align 2 426 .globl VG_(disp_cp_chain_me_to_slowEP) 427#if _CALL_ELF == 2 428 .type VG_(disp_cp_chain_me_to_slowEP),@function 429 VG_(disp_cp_chain_me_to_slowEP): 430#else 431 .section ".opd","aw" 432 .align 3 433VG_(disp_cp_chain_me_to_slowEP): 434 .quad .VG_(disp_cp_chain_me_to_slowEP),.TOC.@tocbase,0 435 .previous 436#endif 437 .type .VG_(disp_cp_chain_me_to_slowEP),@function 438 .globl .VG_(disp_cp_chain_me_to_slowEP) 439.VG_(disp_cp_chain_me_to_slowEP): 440#if _CALL_ELF == 2 4410: addis 2, 12,.TOC.-0b@ha 442 addi 2,2,.TOC.-0b@l 443 .localentry VG_(disp_cp_chain_me_to_slowEP), .-VG_(disp_cp_chain_me_to_slowEP) 444#endif 445 /* We got called. The return address indicates 446 where the patching needs to happen. Collect 447 the return address and, exit back to C land, 448 handing the caller the pair (Chain_me_S, RA) */ 449 li 6, VG_TRC_CHAIN_ME_TO_SLOW_EP 450 mflr 7 451 /* 20 = imm64-fixed5 r30, disp_cp_chain_me_to_slowEP 452 4 = mtctr r30 453 4 = btctr 454 */ 455 subi 7,7,20+4+4 456 b .postamble 457#if _CALL_ELF == 2 458 .size VG_(disp_cp_chain_me_to_slowEP),.-VG_(disp_cp_chain_me_to_slowEP) 459#endif 460 461/* ------ Chain me to fast entry point ------ */ 462 .section ".text" 463 .align 2 464 .globl VG_(disp_cp_chain_me_to_fastEP) 465#if _CALL_ELF == 2 466 .type VG_(disp_cp_chain_me_to_fastEP),@function 467VG_(disp_cp_chain_me_to_fastEP): 468#else 469 .section ".opd","aw" 470 .align 3 471VG_(disp_cp_chain_me_to_fastEP): 472 .quad .VG_(disp_cp_chain_me_to_fastEP),.TOC.@tocbase,0 473 .previous 474#endif 475 .type .VG_(disp_cp_chain_me_to_fastEP),@function 476 .globl .VG_(disp_cp_chain_me_to_fastEP) 477.VG_(disp_cp_chain_me_to_fastEP): 478#if _CALL_ELF == 2 4790: addis 2, 12,.TOC.-0b@ha 480 addi 2,2,.TOC.-0b@l 481 .localentry VG_(disp_cp_chain_me_to_fastEP), .-VG_(disp_cp_chain_me_to_fastEP) 482#endif 483 /* We got called. The return address indicates 484 where the patching needs to happen. Collect 485 the return address and, exit back to C land, 486 handing the caller the pair (Chain_me_S, RA) */ 487 li 6, VG_TRC_CHAIN_ME_TO_FAST_EP 488 mflr 7 489 /* 20 = imm64-fixed5 r30, disp_cp_chain_me_to_fastEP 490 4 = mtctr r30 491 4 = btctr 492 */ 493 subi 7,7,20+4+4 494 b .postamble 495#if _CALL_ELF == 2 496 .size VG_(disp_cp_chain_me_to_fastEP),.-VG_(disp_cp_chain_me_to_fastEP) 497#endif 498 499/* ------ Indirect but boring jump ------ */ 500 .section ".text" 501 .align 2 502 .globl VG_(disp_cp_xindir) 503#if _CALL_ELF == 2 504 .type VG_(disp_cp_xindir),@function 505VG_(disp_cp_xindir): 506#else 507 .section ".opd","aw" 508 .align 3 509VG_(disp_cp_xindir): 510 .quad .VG_(disp_cp_xindir),.TOC.@tocbase,0 511 .previous 512#endif 513 .type .VG_(disp_cp_xindir),@function 514 .globl .VG_(disp_cp_xindir) 515.VG_(disp_cp_xindir): 516#if _CALL_ELF == 2 5170: addis 2, 12,.TOC.-0b@ha 518 addi 2,2,.TOC.-0b@l 519 .localentry VG_(disp_cp_xindir), .-VG_(disp_cp_xindir) 520#endif 521 /* Where are we going? */ 522 ld 3,OFFSET_ppc64_CIA(31) 523 524 /* stats only */ 525 ld 5, .tocent__vgPlain_stats__n_xindirs_32@toc(2) 526 lwz 6,0(5) 527 addi 6,6,1 528 stw 6,0(5) 529 530 /* r5 = &VG_(tt_fast) */ 531 ld 5, .tocent__vgPlain_tt_fast@toc(2) /* &VG_(tt_fast) */ 532 533 /* try a fast lookup in the translation cache */ 534 /* r4 = VG_TT_FAST_HASH(addr) * sizeof(FastCacheEntry) 535 = ((r3 >>u 2) & VG_TT_FAST_MASK) << 4 */ 536 rldicl 4,3, 62, 64-VG_TT_FAST_BITS /* entry# */ 537 sldi 4,4,4 /* entry# * sizeof(FastCacheEntry) */ 538 add 5,5,4 /* & VG_(tt_fast)[entry#] */ 539 ld 6,0(5) /* .guest */ 540 ld 7,8(5) /* .host */ 541 cmpd 3,6 542 bne .fast_lookup_failed 543 544 /* Found a match. Jump to .host. */ 545 mtctr 7 546 bctr 547#if _CALL_ELF == 2 548 .size VG_(disp_cp_xindir),.-VG_(disp_cp_xindir) 549#endif 550 551.fast_lookup_failed: 552 /* stats only */ 553 ld 5, .tocent__vgPlain_stats__n_xindir_misses_32@toc(2) 554 lwz 6,0(5) 555 addi 6,6,1 556 stw 6,0(5) 557 558 li 6,VG_TRC_INNER_FASTMISS 559 li 7,0 560 b .postamble 561 /*NOTREACHED*/ 562 563/* ------ Assisted jump ------ */ 564.section ".text" 565 .align 2 566 .globl VG_(disp_cp_xassisted) 567#if _CALL_ELF == 2 568 .type VG_(disp_cp_xassisted),@function 569VG_(disp_cp_xassisted): 570#else 571 .section ".opd","aw" 572 .align 3 573VG_(disp_cp_xassisted): 574 .quad .VG_(disp_cp_xassisted),.TOC.@tocbase,0 575 .previous 576#endif 577#if _CALL_ELF == 2 5780: addis 2, 12,.TOC.-0b@ha 579 addi 2,2,.TOC.-0b@l 580 .localentry VG_(disp_cp_xassisted), .-VG_(disp_cp_xassisted) 581#endif 582 .type .VG_(disp_cp_xassisted),@function 583 .globl .VG_(disp_cp_xassisted) 584.VG_(disp_cp_xassisted): 585 /* r31 contains the TRC */ 586 mr 6,31 587 li 7,0 588 b .postamble 589#if _CALL_ELF == 2 590 .size VG_(disp_cp_xassisted),.-VG_(disp_cp_xassisted) 591#endif 592 593/* ------ Event check failed ------ */ 594 .section ".text" 595 .align 2 596 .globl VG_(disp_cp_evcheck_fail) 597#if _CALL_ELF == 2 598 .type VG_(disp_cp_evcheck_fail),@function 599VG_(disp_cp_evcheck_fail): 600#else 601 .section ".opd","aw" 602 .align 3 603VG_(disp_cp_evcheck_fail): 604 .quad .VG_(disp_cp_evcheck_fail),.TOC.@tocbase,0 605 .previous 606#endif 607#if _CALL_ELF == 2 6080: addis 2, 12,.TOC.-0b@ha 609 addi 2,2,.TOC.-0b@l 610 .localentry VG_(disp_cp_evcheck_fail), .-VG_(disp_cp_evcheck_fail) 611#endif 612 .type .VG_(disp_cp_evcheck_fail),@function 613 .globl .VG_(disp_cp_evcheck_fail) 614.VG_(disp_cp_evcheck_fail): 615 li 6,VG_TRC_INNER_COUNTERZERO 616 li 7,0 617 b .postamble 618#if _CALL_ELF == 2 619 .size VG_(disp_cp_evcheck_fail),.-VG_(disp_cp_evcheck_fail) 620#endif 621 622.size .VG_(disp_run_translations), .-.VG_(disp_run_translations) 623 624#endif // defined(VGP_ppc64be_linux) || defined(VGP_ppc64le_linux) 625 626/* Let the linker know we don't need an executable stack */ 627MARK_STACK_NO_EXEC 628 629/*--------------------------------------------------------------------*/ 630/*--- end ---*/ 631/*--------------------------------------------------------------------*/ 632