1 2/*--------------------------------------------------------------------*/ 3/*--- The core dispatch loop, for jumping to a code address. ---*/ 4/*--- dispatch-ppc32-linux.S ---*/ 5/*--------------------------------------------------------------------*/ 6 7/* 8 This file is part of Valgrind, a dynamic binary instrumentation 9 framework. 10 11 Copyright (C) 2005-2012 Cerion Armour-Brown <cerion@open-works.co.uk> 12 13 This program is free software; you can redistribute it and/or 14 modify it under the terms of the GNU General Public License as 15 published by the Free Software Foundation; either version 2 of the 16 License, or (at your option) any later version. 17 18 This program is distributed in the hope that it will be useful, but 19 WITHOUT ANY WARRANTY; without even the implied warranty of 20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 21 General Public License for more details. 22 23 You should have received a copy of the GNU General Public License 24 along with this program; if not, write to the Free Software 25 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 26 02111-1307, USA. 27 28 The GNU General Public License is contained in the file COPYING. 29*/ 30 31#if defined(VGP_ppc32_linux) 32 33#include "config.h" 34#include "pub_core_basics_asm.h" 35#include "pub_core_dispatch_asm.h" 36#include "pub_core_transtab_asm.h" 37#include "libvex_guest_offsets.h" /* for OFFSET_ppc32_CIA */ 38 39 40/*------------------------------------------------------------*/ 41/*--- ---*/ 42/*--- The dispatch loop. VG_(disp_run_translations) is ---*/ 43/*--- used to run all translations, ---*/ 44/*--- including no-redir ones. ---*/ 45/*--- ---*/ 46/*------------------------------------------------------------*/ 47 48/*----------------------------------------------------*/ 49/*--- Entry and preamble (set everything up) ---*/ 50/*----------------------------------------------------*/ 51 52/* signature: 53void VG_(disp_run_translations)( UWord* two_words, 54 void* guest_state, 55 Addr host_addr ); 56*/ 57.text 58.globl VG_(disp_run_translations) 59.type VG_(disp_run_translations), @function 60VG_(disp_run_translations): 61 /* r3 holds two_words */ 62 /* r4 holds guest_state */ 63 /* r5 holds host_addr */ 64 65 /* ----- entry point to VG_(disp_run_translations) ----- */ 66 /* For Linux/ppc32 we need the SysV ABI, which uses 67 LR->4(parent_sp), CR->anywhere. 68 (The AIX ABI, used on Darwin, 69 uses LR->8(prt_sp), CR->4(prt_sp)) 70 */ 71 72 /* Save lr */ 73 mflr 6 74 stw 6,4(1) 75 76 /* New stack frame */ 77 stwu 1,-496(1) /* sp should maintain 16-byte alignment */ 78 79 /* Save callee-saved registers... */ 80 /* r3, r4, r5 are live here, so use r6 */ 81 lis 6,VG_(machine_ppc32_has_FP)@ha 82 lwz 6,VG_(machine_ppc32_has_FP)@l(6) 83 cmplwi 6,0 84 beq LafterFP1 85 86 /* Floating-point reg save area : 144 bytes */ 87 stfd 31,488(1) 88 stfd 30,480(1) 89 stfd 29,472(1) 90 stfd 28,464(1) 91 stfd 27,456(1) 92 stfd 26,448(1) 93 stfd 25,440(1) 94 stfd 24,432(1) 95 stfd 23,424(1) 96 stfd 22,416(1) 97 stfd 21,408(1) 98 stfd 20,400(1) 99 stfd 19,392(1) 100 stfd 18,384(1) 101 stfd 17,376(1) 102 stfd 16,368(1) 103 stfd 15,360(1) 104 stfd 14,352(1) 105LafterFP1: 106 107 /* General reg save area : 76 bytes */ 108 stw 31,348(1) 109 stw 30,344(1) 110 stw 29,340(1) 111 stw 28,336(1) 112 stw 27,332(1) 113 stw 26,328(1) 114 stw 25,324(1) 115 stw 24,320(1) 116 stw 23,316(1) 117 stw 22,312(1) 118 stw 21,308(1) 119 stw 20,304(1) 120 stw 19,300(1) 121 stw 18,296(1) 122 stw 17,292(1) 123 stw 16,288(1) 124 stw 15,284(1) 125 stw 14,280(1) 126 stw 13,276(1) 127 stw 3,272(1) /* save two_words for later */ 128 129 /* It's necessary to save/restore VRSAVE in the AIX / Darwin ABI. 130 The Linux kernel might not actually use VRSAVE for its intended 131 purpose, but it should be harmless to preserve anyway. */ 132 /* r3, r4, r5 are live here, so use r6 */ 133 lis 6,VG_(machine_ppc32_has_VMX)@ha 134 lwz 6,VG_(machine_ppc32_has_VMX)@l(6) 135 cmplwi 6,0 136 beq LafterVMX1 137 138#ifdef HAS_ALTIVEC 139 /* VRSAVE save word : 32 bytes */ 140 mfspr 6,256 /* vrsave reg is spr number 256 */ 141 stw 6,244(1) 142 143 /* Alignment padding : 4 bytes */ 144 145 /* Vector reg save area (quadword aligned) : 192 bytes */ 146 li 6,224 147 stvx 31,6,1 148 li 6,208 149 stvx 30,6,1 150 li 6,192 151 stvx 29,6,1 152 li 6,176 153 stvx 28,6,1 154 li 6,160 155 stvx 27,6,1 156 li 6,144 157 stvx 26,6,1 158 li 6,128 159 stvx 25,6,1 160 li 6,112 161 stvx 25,6,1 162 li 6,96 163 stvx 23,6,1 164 li 6,80 165 stvx 22,6,1 166 li 6,64 167 stvx 21,6,1 168 li 6,48 169 stvx 20,6,1 170#endif 171 172LafterVMX1: 173 174 /* Save cr */ 175 mfcr 6 176 stw 6,44(1) 177 178 /* Local variable space... */ 179 180 /* 32(sp) used later to check FPSCR[RM] */ 181 182 /* r3 holds two_words */ 183 /* r4 holds guest_state */ 184 /* r5 holds host_addr */ 185 186 /* 24(sp) used later to stop ctr reg being clobbered */ 187 /* 20(sp) used later to load fpscr with zero */ 188 /* 8:16(sp) free */ 189 190 /* Linkage Area (reserved) 191 4(sp) : LR 192 0(sp) : back-chain 193 */ 194 195 /* set host FPU control word to the default mode expected 196 by VEX-generated code. See comments in libvex.h for 197 more info. */ 198 lis 6,VG_(machine_ppc32_has_FP)@ha 199 lwz 6,VG_(machine_ppc32_has_FP)@l(6) 200 cmplwi 6,0 201 beq LafterFP2 202 203 /* get zero into f3 (tedious) */ 204 /* note: fsub 3,3,3 is not a reliable way to do this, 205 since if f3 holds a NaN or similar then we don't necessarily 206 wind up with zero. */ 207 li 6,0 208 stw 6,20(1) 209 lfs 3,20(1) 210 mtfsf 0xFF,3 /* fpscr = f3 */ 211LafterFP2: 212 213 /* set host AltiVec control word to the default mode expected 214 by VEX-generated code. */ 215 lis 6,VG_(machine_ppc32_has_VMX)@ha 216 lwz 6,VG_(machine_ppc32_has_VMX)@l(6) 217 cmplwi 6,0 218 beq LafterVMX2 219 220#ifdef HAS_ALTIVEC 221 vspltisw 3,0x0 /* generate zero */ 222 mtvscr 3 223#endif 224 225LafterVMX2: 226 227 /* make a stack frame for the code we are calling */ 228 stwu 1,-16(1) 229 230 /* Set up the guest state ptr */ 231 mr 31,4 /* r31 (generated code gsp) = r4 */ 232 233 /* and jump into the code cache. Chained translations in 234 the code cache run, until for whatever reason, they can't 235 continue. When that happens, the translation in question 236 will jump (or call) to one of the continuation points 237 VG_(cp_...) below. */ 238 mtctr 5 239 bctr 240 /*NOTREACHED*/ 241 242/*----------------------------------------------------*/ 243/*--- Postamble and exit. ---*/ 244/*----------------------------------------------------*/ 245 246postamble: 247 /* At this point, r6 and r7 contain two 248 words to be returned to the caller. r6 249 holds a TRC value, and r7 optionally may 250 hold another word (for CHAIN_ME exits, the 251 address of the place to patch.) */ 252 253 /* We're leaving. Check that nobody messed with 254 VSCR or FPSCR in ways we don't expect. */ 255 /* Using r10 - value used again further on, so don't trash! */ 256 lis 10,VG_(machine_ppc32_has_FP)@ha 257 lwz 10,VG_(machine_ppc32_has_FP)@l(10) 258 259 /* Using r11 - value used again further on, so don't trash! */ 260 lis 11,VG_(machine_ppc32_has_VMX)@ha 261 lwz 11,VG_(machine_ppc32_has_VMX)@l(11) 262 263 cmplwi 10,0 /* Do we have FP ? */ 264 beq LafterFP8 265 266 /* Set fpscr back to a known state, since vex-generated code 267 may have messed with fpscr[rm]. */ 268 li 5,0 269 addi 1,1,-16 270 stw 5,0(1) 271 lfs 3,0(1) 272 addi 1,1,16 273 mtfsf 0xFF,3 /* fpscr = f3 */ 274LafterFP8: 275 276 cmplwi 11,0 /* Do we have altivec? */ 277 beq LafterVMX8 278 279#ifdef HAS_ALTIVEC 280 /* Check VSCR[NJ] == 1 */ 281 /* first generate 4x 0x00010000 */ 282 vspltisw 4,0x1 /* 4x 0x00000001 */ 283 vspltisw 5,0x0 /* zero */ 284 vsldoi 6,4,5,0x2 /* <<2*8 => 4x 0x00010000 */ 285 /* retrieve VSCR and mask wanted bits */ 286 mfvscr 7 287 vand 7,7,6 /* gives NJ flag */ 288 vspltw 7,7,0x3 /* flags-word to all lanes */ 289 vcmpequw. 8,6,7 /* CR[24] = 1 if v6 == v7 */ 290 bt 24,invariant_violation /* branch if all_equal */ 291#endif 292 293LafterVMX8: 294 /* otherwise we're OK */ 295 b remove_frame 296 297invariant_violation: 298 li 6,VG_TRC_INVARIANT_FAILED 299 li 7,0 300 /* fall through */ 301 302remove_frame: 303 /* Restore FP regs */ 304 /* r10 already holds VG_(machine_ppc32_has_FP) value */ 305 cmplwi 10,0 306 beq LafterFP9 307 308 /* Floating-point regs */ 309 lfd 31,488(1) 310 lfd 30,480(1) 311 lfd 29,472(1) 312 lfd 28,464(1) 313 lfd 27,456(1) 314 lfd 26,448(1) 315 lfd 25,440(1) 316 lfd 24,432(1) 317 lfd 23,424(1) 318 lfd 22,416(1) 319 lfd 21,408(1) 320 lfd 20,400(1) 321 lfd 19,392(1) 322 lfd 18,384(1) 323 lfd 17,376(1) 324 lfd 16,368(1) 325 lfd 15,360(1) 326 lfd 14,352(1) 327LafterFP9: 328 329 /* r11 already holds VG_(machine_ppc32_has_VMX) value */ 330 cmplwi 11,0 331 beq LafterVMX9 332 333 /* Restore Altivec regs */ 334#ifdef HAS_ALTIVEC 335 /* VRSAVE */ 336 lwz 4,244(1) 337 mfspr 4,256 /* VRSAVE reg is spr number 256 */ 338 339 /* Vector regs */ 340 li 4,224 341 lvx 31,4,1 342 li 4,208 343 lvx 30,4,1 344 li 4,192 345 lvx 29,4,1 346 li 4,176 347 lvx 28,4,1 348 li 4,160 349 lvx 27,4,1 350 li 4,144 351 lvx 26,4,1 352 li 4,128 353 lvx 25,4,1 354 li 4,112 355 lvx 24,4,1 356 li 4,96 357 lvx 23,4,1 358 li 4,80 359 lvx 22,4,1 360 li 4,64 361 lvx 21,4,1 362 li 4,48 363 lvx 20,4,1 364#endif 365LafterVMX9: 366 367 /* restore int regs, including importantly r3 (two_words) */ 368 addi 1,1,16 369 lwz 31,348(1) 370 lwz 30,344(1) 371 lwz 29,340(1) 372 lwz 28,336(1) 373 lwz 27,332(1) 374 lwz 26,328(1) 375 lwz 25,324(1) 376 lwz 24,320(1) 377 lwz 23,316(1) 378 lwz 22,312(1) 379 lwz 21,308(1) 380 lwz 20,304(1) 381 lwz 19,300(1) 382 lwz 18,296(1) 383 lwz 17,292(1) 384 lwz 16,288(1) 385 lwz 15,284(1) 386 lwz 14,280(1) 387 lwz 13,276(1) 388 lwz 3,272(1) 389 /* Stash return values */ 390 stw 6,0(3) 391 stw 7,4(3) 392 393 /* restore lr & sp, and leave */ 394 lwz 0,500(1) /* stack_size + 4 */ 395 mtlr 0 396 addi 1,1,496 /* stack_size */ 397 blr 398 399 400/*----------------------------------------------------*/ 401/*--- Continuation points ---*/ 402/*----------------------------------------------------*/ 403 404/* ------ Chain me to slow entry point ------ */ 405.global VG_(disp_cp_chain_me_to_slowEP) 406VG_(disp_cp_chain_me_to_slowEP): 407 /* We got called. The return address indicates 408 where the patching needs to happen. Collect 409 the return address and, exit back to C land, 410 handing the caller the pair (Chain_me_S, RA) */ 411 li 6, VG_TRC_CHAIN_ME_TO_SLOW_EP 412 mflr 7 413 /* 8 = imm32-fixed2 r30, disp_cp_chain_me_to_slowEP 414 4 = mtctr r30 415 4 = btctr 416 */ 417 subi 7,7,8+4+4 418 b postamble 419 420/* ------ Chain me to fast entry point ------ */ 421.global VG_(disp_cp_chain_me_to_fastEP) 422VG_(disp_cp_chain_me_to_fastEP): 423 /* We got called. The return address indicates 424 where the patching needs to happen. Collect 425 the return address and, exit back to C land, 426 handing the caller the pair (Chain_me_S, RA) */ 427 li 6, VG_TRC_CHAIN_ME_TO_FAST_EP 428 mflr 7 429 /* 8 = imm32-fixed2 r30, disp_cp_chain_me_to_fastEP 430 4 = mtctr r30 431 4 = btctr 432 */ 433 subi 7,7,8+4+4 434 b postamble 435 436/* ------ Indirect but boring jump ------ */ 437.global VG_(disp_cp_xindir) 438VG_(disp_cp_xindir): 439 /* Where are we going? */ 440 lwz 3,OFFSET_ppc32_CIA(31) 441 442 /* stats only */ 443 lis 5,VG_(stats__n_xindirs_32)@ha 444 addi 5,5,VG_(stats__n_xindirs_32)@l 445 lwz 6,0(5) 446 addi 6,6,1 447 stw 6,0(5) 448 449 /* r5 = &VG_(tt_fast) */ 450 lis 5,VG_(tt_fast)@ha 451 addi 5,5,VG_(tt_fast)@l /* & VG_(tt_fast) */ 452 453 /* try a fast lookup in the translation cache */ 454 /* r4 = VG_TT_FAST_HASH(addr) * sizeof(FastCacheEntry) 455 = ((r3 >>u 2) & VG_TT_FAST_MASK) << 3 */ 456 rlwinm 4,3,1, 29-VG_TT_FAST_BITS, 28 /* entry# * 8 */ 457 add 5,5,4 /* & VG_(tt_fast)[entry#] */ 458 lwz 6,0(5) /* .guest */ 459 lwz 7,4(5) /* .host */ 460 cmpw 3,6 461 bne fast_lookup_failed 462 463 /* Found a match. Jump to .host. */ 464 mtctr 7 465 bctr 466 467fast_lookup_failed: 468 /* stats only */ 469 lis 5,VG_(stats__n_xindir_misses_32)@ha 470 addi 5,5,VG_(stats__n_xindir_misses_32)@l 471 lwz 6,0(5) 472 addi 6,6,1 473 stw 6,0(5) 474 475 li 6,VG_TRC_INNER_FASTMISS 476 li 7,0 477 b postamble 478 /*NOTREACHED*/ 479 480/* ------ Assisted jump ------ */ 481.global VG_(disp_cp_xassisted) 482VG_(disp_cp_xassisted): 483 /* r31 contains the TRC */ 484 mr 6,31 485 li 7,0 486 b postamble 487 488/* ------ Event check failed ------ */ 489.global VG_(disp_cp_evcheck_fail) 490VG_(disp_cp_evcheck_fail): 491 li 6,VG_TRC_INNER_COUNTERZERO 492 li 7,0 493 b postamble 494 495 496.size VG_(disp_run_translations), .-VG_(disp_run_translations) 497 498/* Let the linker know we don't need an executable stack */ 499.section .note.GNU-stack,"",@progbits 500 501#endif // defined(VGP_ppc32_linux) 502 503/*--------------------------------------------------------------------*/ 504/*--- end ---*/ 505/*--------------------------------------------------------------------*/ 506