• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1
2/*--------------------------------------------------------------------*/
3/*--- The core dispatch loop, for jumping to a code address.       ---*/
4/*---                                       dispatch-ppc32-linux.S ---*/
5/*--------------------------------------------------------------------*/
6
7/*
8  This file is part of Valgrind, a dynamic binary instrumentation
9  framework.
10
11  Copyright (C) 2005-2011 Cerion Armour-Brown <cerion@open-works.co.uk>
12
13  This program is free software; you can redistribute it and/or
14  modify it under the terms of the GNU General Public License as
15  published by the Free Software Foundation; either version 2 of the
16  License, or (at your option) any later version.
17
18  This program is distributed in the hope that it will be useful, but
19  WITHOUT ANY WARRANTY; without even the implied warranty of
20  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21  General Public License for more details.
22
23  You should have received a copy of the GNU General Public License
24  along with this program; if not, write to the Free Software
25  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
26  02111-1307, USA.
27
28  The GNU General Public License is contained in the file COPYING.
29*/
30
31#if defined(VGP_ppc32_linux)
32
33#include "config.h"
34#include "pub_core_basics_asm.h"
35#include "pub_core_dispatch_asm.h"
36#include "pub_core_transtab_asm.h"
37#include "libvex_guest_offsets.h"	/* for OFFSET_ppc32_CIA */
38
39
40/*------------------------------------------------------------*/
41/*---                                                      ---*/
42/*--- The dispatch loop.  VG_(run_innerloop) is used to    ---*/
43/*--- run all translations except no-redir ones.           ---*/
44/*---                                                      ---*/
45/*------------------------------------------------------------*/
46
47/*----------------------------------------------------*/
48/*--- Preamble (set everything up)                 ---*/
49/*----------------------------------------------------*/
50
51/* signature:
52UWord VG_(run_innerloop) ( void* guest_state, UWord do_profiling );
53*/
54.text
55.globl  VG_(run_innerloop)
56.type  VG_(run_innerloop), @function
57VG_(run_innerloop):
58	/* r3 holds guest_state */
59	/* r4 holds do_profiling */
60
61        /* ----- entry point to VG_(run_innerloop) ----- */
62        /* For Linux/ppc32 we need the SysV ABI, which uses
63           LR->4(parent_sp), CR->anywhere.
64           (The AIX ABI, used on Darwin,
65           uses LR->8(prt_sp), CR->4(prt_sp))
66        */
67
68        /* Save lr */
69        mflr    0
70        stw     0,4(1)
71
72        /* New stack frame */
73        stwu    1,-496(1)  /* sp should maintain 16-byte alignment */
74
75        /* Save callee-saved registers... */
76	/* r3, r4 are live here, so use r5 */
77        lis     5,VG_(machine_ppc32_has_FP)@ha
78        lwz     5,VG_(machine_ppc32_has_FP)@l(5)
79        cmplwi  5,0
80        beq     LafterFP1
81
82        /* Floating-point reg save area : 144 bytes */
83        stfd    31,488(1)
84        stfd    30,480(1)
85        stfd    29,472(1)
86        stfd    28,464(1)
87        stfd    27,456(1)
88        stfd    26,448(1)
89        stfd    25,440(1)
90        stfd    24,432(1)
91        stfd    23,424(1)
92        stfd    22,416(1)
93        stfd    21,408(1)
94        stfd    20,400(1)
95        stfd    19,392(1)
96        stfd    18,384(1)
97        stfd    17,376(1)
98        stfd    16,368(1)
99        stfd    15,360(1)
100        stfd    14,352(1)
101LafterFP1:
102
103        /* General reg save area : 72 bytes */
104        stw     31,348(1)
105        stw     30,344(1)
106        stw     29,340(1)
107        stw     28,336(1)
108        stw     27,332(1)
109        stw     26,328(1)
110        stw     25,324(1)
111        stw     24,320(1)
112        stw     23,316(1)
113        stw     22,312(1)
114        stw     21,308(1)
115        stw     20,304(1)
116        stw     19,300(1)
117        stw     18,296(1)
118        stw     17,292(1)
119        stw     16,288(1)
120        stw     15,284(1)
121        stw     14,280(1)
122        /* Probably not necessary to save r13 (thread-specific ptr),
123           as VEX stays clear of it... but what the hey. */
124        stw     13,276(1)
125
126        /* It's necessary to save/restore VRSAVE in the AIX / Darwin ABI.
127           The Linux kernel might not actually use VRSAVE for its intended
128           purpose, but it should be harmless to preserve anyway. */
129	/* r3, r4 are live here, so use r5 */
130        lis     5,VG_(machine_ppc32_has_VMX)@ha
131        lwz     5,VG_(machine_ppc32_has_VMX)@l(5)
132        cmplwi  5,0
133        beq     LafterVMX1
134
135#ifdef HAS_ALTIVEC
136        /* VRSAVE save word : 32 bytes */
137        mfspr   5,256         /* vrsave reg is spr number 256 */
138        stw     5,244(1)
139
140        /* Alignment padding : 4 bytes */
141
142        /* Vector reg save area (quadword aligned) : 192 bytes */
143        li      5,224
144        stvx    31,5,1
145        li      5,208
146        stvx    30,5,1
147        li      5,192
148        stvx    29,5,1
149        li      5,176
150        stvx    28,5,1
151        li      5,160
152        stvx    27,5,1
153        li      5,144
154        stvx    26,5,1
155        li      5,128
156        stvx    25,5,1
157        li      5,112
158        stvx    25,5,1
159        li      5,96
160        stvx    23,5,1
161        li      5,80
162        stvx    22,5,1
163        li      5,64
164        stvx    21,5,1
165        li      5,48
166        stvx    20,5,1
167#endif
168
169LafterVMX1:
170
171        /* Save cr */
172        mfcr    0
173        stw     0,44(1)
174
175        /* Local variable space... */
176
177        /* 32(sp) used later to check FPSCR[RM] */
178
179        /* r3 holds guest_state */
180        /* r4 holds do_profiling */
181        mr      31,3      /* r31 (generated code gsp) = r3 */
182        stw     3,28(1)   /* spill orig guest_state ptr */
183
184        /* 24(sp) used later to stop ctr reg being clobbered */
185        /* 20(sp) used later to load fpscr with zero */
186        /* 8:16(sp) free */
187
188        /* Linkage Area (reserved)
189           4(sp)  : LR
190           0(sp)  : back-chain
191        */
192
193        /* CAB TODO: Use a caller-saved reg for orig guest_state ptr
194           - rem to set non-allocateable in isel.c */
195
196        /* hold dispatch_ctr in r29 */
197        lis     5,VG_(dispatch_ctr)@ha
198        lwz     29,VG_(dispatch_ctr)@l(5)
199
200        /* set host FPU control word to the default mode expected
201           by VEX-generated code.  See comments in libvex.h for
202           more info. */
203        lis     5,VG_(machine_ppc32_has_FP)@ha
204        lwz     5,VG_(machine_ppc32_has_FP)@l(5)
205        cmplwi  5,0
206        beq     LafterFP2
207
208        /* get zero into f3 (tedious) */
209        /* note: fsub 3,3,3 is not a reliable way to do this,
210           since if f3 holds a NaN or similar then we don't necessarily
211           wind up with zero. */
212        li      5,0
213        stw     5,20(1)
214        lfs     3,20(1)
215        mtfsf   0xFF,3   /* fpscr = f3 */
216LafterFP2:
217
218        /* set host AltiVec control word to the default mode expected
219           by VEX-generated code. */
220        lis     5,VG_(machine_ppc32_has_VMX)@ha
221        lwz     5,VG_(machine_ppc32_has_VMX)@l(5)
222        cmplwi  5,0
223        beq     LafterVMX2
224
225#ifdef HAS_ALTIVEC
226        vspltisw 3,0x0  /* generate zero */
227        mtvscr  3
228#endif
229
230LafterVMX2:
231
232        /* make a stack frame for the code we are calling */
233        stwu    1,-16(1)
234
235        /* fetch %CIA into r3 */
236        lwz     3,OFFSET_ppc32_CIA(31)
237
238        /* fall into main loop (the right one) */
239	/* r4 = do_profiling.  It's probably trashed after here,
240           but that's OK: we don't need it after here. */
241	cmplwi	4,0
242	beq	VG_(run_innerloop__dispatch_unprofiled)
243	b	VG_(run_innerloop__dispatch_profiled)
244	/*NOTREACHED*/
245
246/*----------------------------------------------------*/
247/*--- NO-PROFILING (standard) dispatcher           ---*/
248/*----------------------------------------------------*/
249
250.global	VG_(run_innerloop__dispatch_unprofiled)
251VG_(run_innerloop__dispatch_unprofiled):
252	/* At entry: Live regs:
253		r1  (=sp)
254		r3  (=CIA = next guest address)
255		r29 (=dispatch_ctr)
256		r31 (=guest_state)
257	*/
258	/* Has the guest state pointer been messed with?  If yes, exit.
259           Also set up & VG_(tt_fast) early in an attempt at better
260           scheduling. */
261        lis	5,VG_(tt_fast)@ha
262        addi    5,5,VG_(tt_fast)@l   /* & VG_(tt_fast) */
263        andi.   0,31,1
264        bne	gsp_changed
265
266        /* save the jump address in the guest state */
267        stw     3,OFFSET_ppc32_CIA(31)
268
269        /* Are we out of timeslice?  If yes, defer to scheduler. */
270	subi	29,29,1
271	cmplwi	29,0
272        beq	counter_is_zero
273
274        /* try a fast lookup in the translation cache */
275        /* r4 = VG_TT_FAST_HASH(addr)           * sizeof(FastCacheEntry)
276              = ((r3 >>u 2) & VG_TT_FAST_MASK)  << 3 */
277	rlwinm	4,3,1, 29-VG_TT_FAST_BITS, 28	/* entry# * 8 */
278	add	5,5,4	/* & VG_(tt_fast)[entry#] */
279	lwz	6,0(5)   /* .guest */
280	lwz	7,4(5)   /* .host */
281        cmpw    3,6
282        bne     fast_lookup_failed
283
284        /* Found a match.  Call .host. */
285        mtctr   7
286        bctrl
287
288        /* On return from guest code:
289	   r3  holds destination (original) address.
290           r31 may be unchanged (guest_state), or may indicate further
291           details of the control transfer requested to *r3.
292        */
293	/* start over */
294	b	VG_(run_innerloop__dispatch_unprofiled)
295	/*NOTREACHED*/
296
297/*----------------------------------------------------*/
298/*--- PROFILING dispatcher (can be much slower)    ---*/
299/*----------------------------------------------------*/
300
301.global	VG_(run_innerloop__dispatch_profiled)
302VG_(run_innerloop__dispatch_profiled):
303	/* At entry: Live regs:
304		r1 (=sp)
305		r3  (=CIA = next guest address)
306		r29 (=dispatch_ctr)
307		r31 (=guest_state)
308	*/
309	/* Has the guest state pointer been messed with?  If yes, exit.
310           Also set up & VG_(tt_fast) early in an attempt at better
311           scheduling. */
312        lis	5,VG_(tt_fast)@ha
313        addi    5,5,VG_(tt_fast)@l   /* & VG_(tt_fast) */
314        andi.   0,31,1
315        bne	gsp_changed
316
317        /* save the jump address in the guest state */
318        stw     3,OFFSET_ppc32_CIA(31)
319
320        /* Are we out of timeslice?  If yes, defer to scheduler. */
321	subi	29,29,1
322	cmplwi	29,0
323        beq	counter_is_zero
324
325        /* try a fast lookup in the translation cache */
326        /* r4 = VG_TT_FAST_HASH(addr)           * sizeof(FastCacheEntry)
327              = ((r3 >>u 2) & VG_TT_FAST_MASK)  << 3 */
328	rlwinm	4,3,1, 29-VG_TT_FAST_BITS, 28	/* entry# * 8 */
329	add	5,5,4	/* & VG_(tt_fast)[entry#] */
330	lwz	6,0(5)   /* .guest */
331	lwz	7,4(5)   /* .host */
332        cmpw    3,6
333        bne     fast_lookup_failed
334
335        /* increment bb profile counter */
336	srwi	4,4,1	/* entry# * sizeof(UInt*) */
337        addis   6,4,VG_(tt_fastN)@ha
338        lwz     9,VG_(tt_fastN)@l(6)
339        lwz     8,0(9)
340        addi    8,8,1
341        stw     8,0(9)
342
343        /* Found a match.  Call .host. */
344        mtctr   7
345        bctrl
346
347        /* On return from guest code:
348	   r3  holds destination (original) address.
349           r31 may be unchanged (guest_state), or may indicate further
350           details of the control transfer requested to *r3.
351        */
352	/* start over */
353	b	VG_(run_innerloop__dispatch_profiled)
354	/*NOTREACHED*/
355
356/*----------------------------------------------------*/
357/*--- exit points                                  ---*/
358/*----------------------------------------------------*/
359
360gsp_changed:
361	/* Someone messed with the gsp (in r31).  Have to
362           defer to scheduler to resolve this.  dispatch ctr
363	   is not yet decremented, so no need to increment. */
364	/* %CIA is NOT up to date here.  First, need to write
365	   %r3 back to %CIA, but without trashing %r31 since
366	   that holds the value we want to return to the scheduler.
367	   Hence use %r5 transiently for the guest state pointer. */
368        lwz     5,44(1)         /* original guest_state ptr */
369        stw     3,OFFSET_ppc32_CIA(5)
370	mr	3,31		/* r3 = new gsp value */
371	b	run_innerloop_exit
372	/*NOTREACHED*/
373
374counter_is_zero:
375	/* %CIA is up to date */
376	/* back out decrement of the dispatch counter */
377        addi    29,29,1
378        li      3,VG_TRC_INNER_COUNTERZERO
379        b       run_innerloop_exit
380
381fast_lookup_failed:
382	/* %CIA is up to date */
383	/* back out decrement of the dispatch counter */
384        addi    29,29,1
385        li      3,VG_TRC_INNER_FASTMISS
386	b       run_innerloop_exit
387
388
389
390/* All exits from the dispatcher go through here.
391   r3 holds the return value.
392*/
393run_innerloop_exit:
394        /* We're leaving.  Check that nobody messed with
395           VSCR or FPSCR. */
396
397        /* Using r10 - value used again further on, so don't trash! */
398        lis     10,VG_(machine_ppc32_has_FP)@ha
399        lwz     10,VG_(machine_ppc32_has_FP)@l(10)
400        cmplwi  10,0
401        beq     LafterFP8
402
403	/* Set fpscr back to a known state, since vex-generated code
404	   may have messed with fpscr[rm]. */
405        li      5,0
406        addi    1,1,-16
407        stw     5,0(1)
408        lfs     3,0(1)
409        addi    1,1,16
410        mtfsf   0xFF,3   /* fpscr = f3 */
411LafterFP8:
412
413	/* Using r11 - value used again further on, so don't trash! */
414        lis     11,VG_(machine_ppc32_has_VMX)@ha
415        lwz     11,VG_(machine_ppc32_has_VMX)@l(11)
416        cmplwi  11,0
417        beq     LafterVMX8
418
419#ifdef HAS_ALTIVEC
420        /* Check VSCR[NJ] == 1 */
421        /* first generate 4x 0x00010000 */
422        vspltisw  4,0x1                   /* 4x 0x00000001 */
423        vspltisw  5,0x0                   /* zero */
424        vsldoi    6,4,5,0x2               /* <<2*8 => 4x 0x00010000 */
425        /* retrieve VSCR and mask wanted bits */
426        mfvscr    7
427        vand      7,7,6                   /* gives NJ flag */
428        vspltw    7,7,0x3                 /* flags-word to all lanes */
429        vcmpequw. 8,6,7                   /* CR[24] = 1 if v6 == v7 */
430        bt        24,invariant_violation  /* branch if all_equal */
431#endif
432LafterVMX8:
433
434	/* otherwise we're OK */
435        b       run_innerloop_exit_REALLY
436
437
438invariant_violation:
439        li      3,VG_TRC_INVARIANT_FAILED
440        b       run_innerloop_exit_REALLY
441
442run_innerloop_exit_REALLY:
443        /* r3 holds VG_TRC_* value to return */
444
445        /* Return to parent stack */
446        addi    1,1,16
447
448        /* Write ctr to VG(dispatch_ctr) */
449        lis     5,VG_(dispatch_ctr)@ha
450        stw     29,VG_(dispatch_ctr)@l(5)
451
452        /* Restore cr */
453        lwz     0,44(1)
454        mtcr    0
455
456        /* Restore callee-saved registers... */
457
458        /* r10 already holds VG_(machine_ppc32_has_FP) value */
459        cmplwi  10,0
460        beq     LafterFP9
461
462        /* Floating-point regs */
463        lfd     31,488(1)
464        lfd     30,480(1)
465        lfd     29,472(1)
466        lfd     28,464(1)
467        lfd     27,456(1)
468        lfd     26,448(1)
469        lfd     25,440(1)
470        lfd     24,432(1)
471        lfd     23,424(1)
472        lfd     22,416(1)
473        lfd     21,408(1)
474        lfd     20,400(1)
475        lfd     19,392(1)
476        lfd     18,384(1)
477        lfd     17,376(1)
478        lfd     16,368(1)
479        lfd     15,360(1)
480        lfd     14,352(1)
481LafterFP9:
482
483        /* General regs */
484        lwz     31,348(1)
485        lwz     30,344(1)
486        lwz     29,340(1)
487        lwz     28,336(1)
488        lwz     27,332(1)
489        lwz     26,328(1)
490        lwz     25,324(1)
491        lwz     24,320(1)
492        lwz     23,316(1)
493        lwz     22,312(1)
494        lwz     21,308(1)
495        lwz     20,304(1)
496        lwz     19,300(1)
497        lwz     18,296(1)
498        lwz     17,292(1)
499        lwz     16,288(1)
500        lwz     15,284(1)
501        lwz     14,280(1)
502        lwz     13,276(1)
503
504        /* r11 already holds VG_(machine_ppc32_has_VMX) value */
505        cmplwi  11,0
506        beq     LafterVMX9
507
508#ifdef HAS_ALTIVEC
509        /* VRSAVE */
510        lwz     4,244(1)
511        mfspr   4,256         /* VRSAVE reg is spr number 256 */
512
513        /* Vector regs */
514        li      4,224
515        lvx     31,4,1
516        li      4,208
517        lvx     30,4,1
518        li      4,192
519        lvx     29,4,1
520        li      4,176
521        lvx     28,4,1
522        li      4,160
523        lvx     27,4,1
524        li      4,144
525        lvx     26,4,1
526        li      4,128
527        lvx     25,4,1
528        li      4,112
529        lvx     24,4,1
530        li      4,96
531        lvx     23,4,1
532        li      4,80
533        lvx     22,4,1
534        li      4,64
535        lvx     21,4,1
536        li      4,48
537        lvx     20,4,1
538#endif
539LafterVMX9:
540
541        /* reset lr & sp */
542        lwz     0,500(1)  /* stack_size + 4 */
543        mtlr    0
544        addi    1,1,496   /* stack_size */
545        blr
546.size VG_(run_innerloop), .-VG_(run_innerloop)
547
548
549/*------------------------------------------------------------*/
550/*---                                                      ---*/
551/*--- A special dispatcher, for running no-redir           ---*/
552/*--- translations.  Just runs the given translation once. ---*/
553/*---                                                      ---*/
554/*------------------------------------------------------------*/
555
556/* signature:
557void VG_(run_a_noredir_translation) ( UWord* argblock );
558*/
559
560/* Run a no-redir translation.  argblock points to 4 UWords, 2 to carry args
561   and 2 to carry results:
562      0: input:  ptr to translation
563      1: input:  ptr to guest state
564      2: output: next guest PC
565      3: output: guest state pointer afterwards (== thread return code)
566*/
567.global VG_(run_a_noredir_translation)
568.type VG_(run_a_noredir_translation), @function
569VG_(run_a_noredir_translation):
570	/* save callee-save int regs, & lr */
571	stwu 1,-256(1)
572	stw  14,128(1)
573	stw  15,132(1)
574	stw  16,136(1)
575	stw  17,140(1)
576	stw  18,144(1)
577	stw  19,148(1)
578	stw  20,152(1)
579	stw  21,156(1)
580	stw  22,160(1)
581	stw  23,164(1)
582	stw  24,168(1)
583	stw  25,172(1)
584	stw  26,176(1)
585	stw  27,180(1)
586	stw  28,184(1)
587	stw  29,188(1)
588	stw  30,192(1)
589	stw  31,196(1)
590	mflr 31
591	stw  31,200(1)
592
593	stw  3,204(1)
594	lwz  31,4(3)
595	lwz  30,0(3)
596	mtlr 30
597	blrl
598
599	lwz  4,204(1)
600	stw  3,  8(4)
601	stw  31,12(4)
602
603	lwz  14,128(1)
604	lwz  15,132(1)
605	lwz  16,136(1)
606	lwz  17,140(1)
607	lwz  18,144(1)
608	lwz  19,148(1)
609	lwz  20,152(1)
610	lwz  21,156(1)
611	lwz  22,160(1)
612	lwz  23,164(1)
613	lwz  24,168(1)
614	lwz  25,172(1)
615	lwz  26,176(1)
616	lwz  27,180(1)
617	lwz  28,184(1)
618	lwz  29,188(1)
619	lwz  30,192(1)
620	lwz  31,200(1)
621	mtlr 31
622	lwz  31,196(1)
623	addi 1,1,256
624	blr
625.size VG_(run_a_noredir_translation), .-VG_(run_a_noredir_translation)
626
627
628/* Let the linker know we don't need an executable stack */
629.section .note.GNU-stack,"",@progbits
630
631#endif // defined(VGP_ppc32_linux)
632
633/*--------------------------------------------------------------------*/
634/*--- end                                                          ---*/
635/*--------------------------------------------------------------------*/
636