• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1
2/*--------------------------------------------------------------------*/
3/*--- The core dispatch loop, for jumping to a code address.       ---*/
4/*---                                        dispatch-ppc64-aix5.S ---*/
5/*--------------------------------------------------------------------*/
6
7/*
8  This file is part of Valgrind, a dynamic binary instrumentation
9  framework.
10
11  Copyright (C) 2006-2010 OpenWorks LLP
12     info@open-works.co.uk
13
14  This program is free software; you can redistribute it and/or
15  modify it under the terms of the GNU General Public License as
16  published by the Free Software Foundation; either version 2 of the
17  License, or (at your option) any later version.
18
19  This program is distributed in the hope that it will be useful, but
20  WITHOUT ANY WARRANTY; without even the implied warranty of
21  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
22  General Public License for more details.
23
24  You should have received a copy of the GNU General Public License
25  along with this program; if not, write to the Free Software
26  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
27  02111-1307, USA.
28
29  The GNU General Public License is contained in the file COPYING.
30
31  Neither the names of the U.S. Department of Energy nor the
32  University of California nor the names of its contributors may be
33  used to endorse or promote products derived from this software
34  without prior written permission.
35*/
36
37#if defined(VGP_ppc64_aix5)
38
39#include "pub_core_basics_asm.h"
40#include "pub_core_dispatch_asm.h"
41#include "pub_core_transtab_asm.h"
42#include "libvex_guest_offsets.h"	/* for OFFSET_ppc64_CIA */
43
44
45/*------------------------------------------------------------*/
46/*---                                                      ---*/
47/*--- The dispatch loop.  VG_(run_innerloop) is used to    ---*/
48/*--- run all translations except no-redir ones.           ---*/
49/*---                                                      ---*/
50/*------------------------------------------------------------*/
51
52/*----------------------------------------------------*/
53/*--- Incomprehensible TOC mumbo-jumbo nonsense.   ---*/
54/*----------------------------------------------------*/
55
56/* No, I don't have a clue either.  I just compiled a bit of
57   C with gcc and copied the assembly code it produced. */
58
59/* Basically "ld rd, tocent__foo(2)" gets &foo into rd. */
60
61    .file       "dispatch-ppc64-aix5.S"
62    .machine	"ppc64"
63    .toc
64    .csect .text[PR]
65    .toc
66tocent__vgPlain_dispatch_ctr:
67    .tc vgPlain_dispatch_ctr[TC],vgPlain_dispatch_ctr[RW]
68tocent__vgPlain_machine_ppc64_has_VMX:
69    .tc vgPlain_machine_ppc64_has_VMX[TC],vgPlain_machine_ppc64_has_VMX[RW]
70tocent__vgPlain_tt_fast:
71    .tc vgPlain_tt_fast[TC],vgPlain_tt_fast[RW]
72tocent__vgPlain_tt_fastN:
73    .tc vgPlain_tt_fast[TC],vgPlain_tt_fastN[RW]
74    .csect .text[PR]
75    .align 2
76    .globl vgPlain_run_innerloop
77    .globl .vgPlain_run_innerloop
78    .csect vgPlain_run_innerloop[DS]
79vgPlain_run_innerloop:
80    .llong .vgPlain_run_innerloop, TOC[tc0], 0
81    .csect .text[PR]
82
83/*----------------------------------------------------*/
84/*--- Preamble (set everything up)                 ---*/
85/*----------------------------------------------------*/
86
87/* signature:
88UWord VG_(run_innerloop) ( void* guest_state, UWord do_profiling );
89*/
90.vgPlain_run_innerloop:
91
92	/* r3 holds guest_state */
93	/* r4 holds do_profiling */
94	/* Rather than attempt to make sense of the AIX ABI, just
95           drop r1 by 512 (to get away from the caller's frame), then
96	   1024 (to give ourselves a 1024-byte save area), and then
97	   another 512 (to clear our save area).  In all, drop r1 by 2048
98	   and dump stuff on the stack at 512(1)..1536(1).  */
99
100        /* ----- entry point to VG_(run_innerloop) ----- */
101        /* For AIX/ppc64 we do:	 LR-> +16(parent_sp), CR-> +8(parent_sp) */
102
103        /* Save lr and cr*/
104        mflr    0
105        std     0,16(1)
106	mfcr	0
107	std	0,8(1)
108
109        /* New stack frame */
110        stdu    1,-2048(1)  /* sp should maintain 16-byte alignment */
111
112        /* Save callee-saved registers... */
113	/* r3, r4 are live here, so use r5 */
114
115        /* Floating-point reg save area : 144 bytes at r1[256+256..256+399] */
116        stfd    31,256+392(1)
117        stfd    30,256+384(1)
118        stfd    29,256+376(1)
119        stfd    28,256+368(1)
120        stfd    27,256+360(1)
121        stfd    26,256+352(1)
122        stfd    25,256+344(1)
123        stfd    24,256+336(1)
124        stfd    23,256+328(1)
125        stfd    22,256+320(1)
126        stfd    21,256+312(1)
127        stfd    20,256+304(1)
128        stfd    19,256+296(1)
129        stfd    18,256+288(1)
130        stfd    17,256+280(1)
131        stfd    16,256+272(1)
132        stfd    15,256+264(1)
133        stfd    14,256+256(1)
134
135        /* General reg save area : 76 bytes at r1[256+400 .. 256+543] */
136        std     31,256+544(1)
137        std     30,256+536(1)
138        std     29,256+528(1)
139        std     28,256+520(1)
140        std     27,256+512(1)
141        std     26,256+504(1)
142        std     25,256+496(1)
143        std     24,256+488(1)
144        std     23,256+480(1)
145        std     22,256+472(1)
146        std     21,256+464(1)
147        std     20,256+456(1)
148        std     19,256+448(1)
149        std     18,256+440(1)
150        std     17,256+432(1)
151        std     16,256+424(1)
152        std     15,256+416(1)
153        std     14,256+408(1)
154        /* Probably not necessary to save r13 (thread-specific ptr),
155           as VEX stays clear of it... but what the hell. */
156        std     13,256+400(1)
157
158        /* It's necessary to save/restore VRSAVE in the AIX / Darwin ABI.
159           The Linux kernel might not actually use VRSAVE for its intended
160           purpose, but it should be harmless to preserve anyway. */
161	/* r3, r4 are live here, so use r5 */
162        ld      5,tocent__vgPlain_machine_ppc64_has_VMX(2)
163        ld      5,0(5)
164        cmpldi  5,0
165        beq     LafterVMX1
166
167//	Sigh.  AIX 5.2 has no idea that Altivec exists.
168//        /* VRSAVE save word : 4 bytes at r1[476 .. 479] */
169//        mfspr   5,256        /* vrsave reg is spr number 256 */
170//        stw     5,476(1)
171//
172//        /* Vector reg save area (quadword aligned):
173//	   192 bytes at r1[480 .. 671] */
174//        li      5,656
175//        stvx    31,5,1
176//        li      5,640
177//        stvx    30,5,1
178//        li      5,624
179//        stvx    29,5,1
180//        li      5,608
181//        stvx    28,5,1
182//        li      5,592
183//        stvx    27,5,1
184//        li      5,576
185//        stvx    26,5,1
186//        li      5,560
187//        stvx    25,5,1
188//        li      5,544
189//        stvx    25,5,1
190//        li      5,528
191//        stvx    23,5,1
192//        li      5,512
193//        stvx    22,5,1
194//        li      5,496
195//        stvx    21,5,1
196//        li      5,480
197//        stvx    20,5,1
198LafterVMX1:
199
200        /* Local variable space... */
201	/* Put the original guest state pointer at r1[256].  We
202           will need to refer to it each time round the dispatch loop.
203	   Apart from that, we can use r1[0 .. 255] and r1[264 .. 511]
204	   as scratch space. */
205
206        /* r3 holds guest_state */
207        /* r4 holds do_profiling */
208        mr      31,3      /* r31 (generated code gsp) = r3 */
209        std     3,256(1)  /* stash orig guest_state ptr */
210
211        /* hold dispatch_ctr (NOTE: 32-bit value) in r29 */
212        ld      5,tocent__vgPlain_dispatch_ctr(2)
213        lwz     29,0(5)	/* 32-bit zero-extending load */
214
215        /* set host FPU control word to the default mode expected
216           by VEX-generated code.  See comments in libvex.h for
217           more info. */
218        /* get zero into f3 (tedious) */
219        /* note: fsub 3,3,3 is not a reliable way to do this,
220           since if f3 holds a NaN or similar then we don't necessarily
221           wind up with zero. */
222        li      5,0
223        std     5,128(1) /* r1[128] is scratch */
224        lfd     3,128(1)
225        mtfsf   0xFF,3   /* fpscr = f3 */
226
227        /* set host AltiVec control word to the default mode expected
228           by VEX-generated code. */
229        ld      5,tocent__vgPlain_machine_ppc64_has_VMX(2)
230        ld      5,0(5)
231        cmpldi  5,0
232        beq     LafterVMX2
233
234//	Sigh.  AIX 5.2 has no idea that Altivec exists.
235//        vspltisw 3,0x0  /* generate zero */
236//        mtvscr  3
237LafterVMX2:
238
239        /* fetch %CIA into r3 */
240        ld     3,OFFSET_ppc64_CIA(31)
241
242        /* fall into main loop (the right one) */
243	/* r4 = do_profiling.  It's probably trashed after here,
244           but that's OK: we don't need it after here. */
245	cmpldi	4,0
246	beq	VG_(run_innerloop__dispatch_unprofiled)
247	b	VG_(run_innerloop__dispatch_profiled)
248	/*NOTREACHED*/
249
250/*----------------------------------------------------*/
251/*--- NO-PROFILING (standard) dispatcher           ---*/
252/*----------------------------------------------------*/
253
254.globl VG_(run_innerloop__dispatch_unprofiled)
255VG_(run_innerloop__dispatch_unprofiled):
256	/* At entry: Live regs:
257		r1  (=sp)
258		r3  (=CIA = next guest address)
259		r29 (=dispatch_ctr)
260		r31 (=guest_state)
261	   Stack state:
262		256(r1) (=orig guest_state)
263	*/
264
265	/* Has the guest state pointer been messed with?  If yes, exit. */
266        ld      5,256(1)         /* original guest_state ptr */
267        cmpd    5,31
268	ld	5,tocent__vgPlain_tt_fast(2)	/* &VG_(tt_fast) */
269        bne	gsp_changed
270
271        /* save the jump address in the guest state */
272        std     3,OFFSET_ppc64_CIA(31)
273
274        /* Are we out of timeslice?  If yes, defer to scheduler. */
275	addi	29,29,-1
276	cmplwi	29,0	/* yes, lwi - is 32-bit */
277        beq	counter_is_zero
278
279        /* try a fast lookup in the translation cache */
280        /* r4 = VG_TT_FAST_HASH(addr)           * sizeof(FastCacheEntry)
281              = ((r3 >>u 2) & VG_TT_FAST_MASK)  << 4 */
282        rldicl  4,3, 62, 64-VG_TT_FAST_BITS   /* entry# */
283        sldi    4,4,4    /* entry# * sizeof(FastCacheEntry) */
284	add     5,5,4	/* &VG_(tt_fast)[entry#] */
285	ld      6,0(5)  /* .guest */
286	ld      7,8(5)  /* .host */
287        cmpd    3,6
288        bne     fast_lookup_failed
289
290        /* Found a match.  Call .host. */
291        mtctr   7
292        bctrl
293
294        /* On return from guest code:
295	   r3  holds destination (original) address.
296           r31 may be unchanged (guest_state), or may indicate further
297           details of the control transfer requested to *r3.
298        */
299	/* start over */
300	b	VG_(run_innerloop__dispatch_unprofiled)
301	/*NOTREACHED*/
302
303/*----------------------------------------------------*/
304/*--- PROFILING dispatcher (can be much slower)    ---*/
305/*----------------------------------------------------*/
306
307.globl VG_(run_innerloop__dispatch_profiled)
308VG_(run_innerloop__dispatch_profiled):
309	/* At entry: Live regs:
310		r1  (=sp)
311		r3  (=CIA = next guest address)
312		r29 (=dispatch_ctr)
313		r31 (=guest_state)
314	   Stack state:
315		256(r1) (=orig guest_state)
316	*/
317
318	/* Has the guest state pointer been messed with?  If yes, exit. */
319        ld      5,256(1)         /* original guest_state ptr */
320        cmpd    5,31
321	ld	5,tocent__vgPlain_tt_fast(2)	/* &VG_(tt_fast) */
322        bne	gsp_changed
323
324        /* save the jump address in the guest state */
325        std     3,OFFSET_ppc64_CIA(31)
326
327        /* Are we out of timeslice?  If yes, defer to scheduler. */
328	addi	29,29,-1
329	cmplwi	29,0	/* yes, lwi - is 32-bit */
330        beq	counter_is_zero
331
332        /* try a fast lookup in the translation cache */
333        /* r4 = VG_TT_FAST_HASH(addr)           * sizeof(FastCacheEntry)
334              = ((r3 >>u 2) & VG_TT_FAST_MASK)  << 4 */
335        rldicl  4,3, 62, 64-VG_TT_FAST_BITS   /* entry# */
336        sldi    4,4,4    /* entry# * sizeof(FastCacheEntry) */
337	add     5,5,4	/* &VG_(tt_fast)[entry#] */
338	ld      6,0(5)  /* .guest */
339	ld      7,8(5)  /* .host */
340        cmpd    3,6
341        bne     fast_lookup_failed
342
343        /* increment bb profile counter */
344	ld      9,tocent__vgPlain_tt_fastN(2)   /* r9 = &tt_fastN */
345	srdi	4,4,1   /* entry# * sizeof(UInt*) */
346        ldx     8,9,4   /* r7 = tt_fastN[r4] */
347        lwz     10,0(8)
348        addi    10,10,1
349        stw     10,0(8)
350
351        /* Found a match.  Call .host. */
352        mtctr   7
353        bctrl
354
355        /* On return from guest code:
356	   r3  holds destination (original) address.
357           r31 may be unchanged (guest_state), or may indicate further
358           details of the control transfer requested to *r3.
359        */
360	/* start over */
361	b	VG_(run_innerloop__dispatch_profiled)
362	/*NOTREACHED*/
363
364/*----------------------------------------------------*/
365/*--- exit points                                  ---*/
366/*----------------------------------------------------*/
367
368gsp_changed:
369	/* Someone messed with the gsp (in r31).  Have to
370           defer to scheduler to resolve this.  dispatch ctr
371	   is not yet decremented, so no need to increment. */
372	/* %CIA is NOT up to date here.  First, need to write
373	   %r3 back to %CIA, but without trashing %r31 since
374	   that holds the value we want to return to the scheduler.
375	   Hence use %r5 transiently for the guest state pointer. */
376        ld      5,256(1)        /* original guest_state ptr */
377        std     3,OFFSET_ppc64_CIA(5)
378	mr	3,31		/* r3 = new gsp value */
379	b	run_innerloop_exit
380	/*NOTREACHED*/
381
382counter_is_zero:
383	/* %CIA is up to date */
384	/* back out decrement of the dispatch counter */
385        addi    29,29,1
386        li      3,VG_TRC_INNER_COUNTERZERO
387        b       run_innerloop_exit
388
389fast_lookup_failed:
390	/* %CIA is up to date */
391	/* back out decrement of the dispatch counter */
392        addi    29,29,1
393        li      3,VG_TRC_INNER_FASTMISS
394	b       run_innerloop_exit
395
396
397
398/* All exits from the dispatcher go through here.
399   r3 holds the return value.
400*/
401run_innerloop_exit:
402        /* We're leaving.  Check that nobody messed with
403           VSCR or FPSCR. */
404
405	/* Set fpscr back to a known state, since vex-generated code
406	   may have messed with fpscr[rm]. */
407        li      5,0
408        std     5,128(1) /* r1[128] is scratch */
409        lfd     3,128(1)
410        mtfsf   0xFF,3   /* fpscr = f3 */
411
412	/* Using r11 - value used again further on, so don't trash! */
413        ld      11,tocent__vgPlain_machine_ppc64_has_VMX(2)
414        ld      11,0(11)
415        cmpldi  11,0
416        beq     LafterVMX8
417
418//	Sigh.  AIX 5.2 has no idea that Altivec exists.
419//        /* Check VSCR[NJ] == 1 */
420//        /* first generate 4x 0x00010000 */
421//        vspltisw  4,0x1                   /* 4x 0x00000001 */
422//        vspltisw  5,0x0                   /* zero */
423//        vsldoi    6,4,5,0x2               /* <<2*8 => 4x 0x00010000 */
424//        /* retrieve VSCR and mask wanted bits */
425//        mfvscr    7
426//        vand      7,7,6                   /* gives NJ flag */
427//        vspltw    7,7,0x3                 /* flags-word to all lanes */
428//        vcmpequw. 8,6,7                   /* CR[24] = 1 if v6 == v7 */
429//        bt        24,invariant_violation  /* branch if all_equal */
430LafterVMX8:
431
432	/* otherwise we're OK */
433        b       run_innerloop_exit_REALLY
434
435
436invariant_violation:
437        li      3,VG_TRC_INVARIANT_FAILED
438        b       run_innerloop_exit_REALLY
439
440run_innerloop_exit_REALLY:
441        /* r3 holds VG_TRC_* value to return */
442
443        /* Write ctr to VG(dispatch_ctr) */
444        ld      5,tocent__vgPlain_dispatch_ctr(2)
445        stw     29,0(5)	 /* yes, really stw */
446
447        /* Restore callee-saved registers... */
448
449        /* Floating-point regs */
450        lfd     31,256+392(1)
451        lfd     30,256+384(1)
452        lfd     29,256+376(1)
453        lfd     28,256+368(1)
454        lfd     27,256+360(1)
455        lfd     26,256+352(1)
456        lfd     25,256+344(1)
457        lfd     24,256+336(1)
458        lfd     23,256+328(1)
459        lfd     22,256+320(1)
460        lfd     21,256+312(1)
461        lfd     20,256+304(1)
462        lfd     19,256+296(1)
463        lfd     18,256+288(1)
464        lfd     17,256+280(1)
465        lfd     16,256+272(1)
466        lfd     15,256+264(1)
467        lfd     14,256+256(1)
468
469        /* General regs */
470        ld      31,256+544(1)
471        ld      30,256+536(1)
472        ld      29,256+528(1)
473        ld      28,256+520(1)
474        ld      27,256+512(1)
475        ld      26,256+504(1)
476        ld      25,256+496(1)
477        ld      24,256+488(1)
478        ld      23,256+480(1)
479        ld      22,256+472(1)
480        ld      21,256+464(1)
481        ld      20,256+456(1)
482        ld      19,256+448(1)
483        ld      18,256+440(1)
484        ld      17,256+432(1)
485        ld      16,256+424(1)
486        ld      15,256+416(1)
487        ld      14,256+408(1)
488        ld      13,256+400(1)
489
490        /* r11 already holds VG_(machine_ppc64_has_VMX) value */
491        cmpldi  11,0
492        beq     LafterVMX9
493
494//       Sigh.  AIX 5.2 has no idea that Altivec exists.
495//        /* VRSAVE */
496//        lwz     4,476(1)
497//        mtspr   4,256         /* VRSAVE reg is spr number 256 */
498//
499//        /* Vector regs */
500//        li      4,656
501//        lvx     31,4,1
502//        li      4,640
503//        lvx     30,4,1
504//        li      4,624
505//        lvx     29,4,1
506//        li      4,608
507//        lvx     28,4,1
508//        li      4,592
509//        lvx     27,4,1
510//        li      4,576
511//        lvx     26,4,1
512//        li      4,560
513//        lvx     25,4,1
514//        li      4,544
515//        lvx     24,4,1
516//        li      4,528
517//        lvx     23,4,1
518//        li      4,512
519//        lvx     22,4,1
520//        li      4,496
521//        lvx     21,4,1
522//        li      4,480
523//        lvx     20,4,1
524LafterVMX9:
525
526	/* r3 is live here; don't trash it */
527        /* restore lr,cr,sp */
528	addi	4,1,2048 /* r4 = old SP */
529	ld	0,16(4)
530	mtlr	0
531	ld	0,8(4)
532	mtcr	0
533	mr	1,4
534	blr
535
536LT..vgPlain_run_innerloop:
537    .long 0
538    .byte 0,0,32,64,0,0,1,0
539    .long 0
540    .long LT..vgPlain_run_innerloop-.vgPlain_run_innerloop
541    .short 3
542    .byte "vgPlain_run_innerloop"
543    .align 2
544_section_.text:
545    .csect .data[RW],3
546    .llong _section_.text
547
548/*------------------------------------------------------------*/
549/*---                                                      ---*/
550/*--- A special dispatcher, for running no-redir           ---*/
551/*--- translations.  Just runs the given translation once. ---*/
552/*---                                                      ---*/
553/*------------------------------------------------------------*/
554
555/* signature:
556void VG_(run_a_noredir_translation) ( UWord* argblock );
557*/
558
559/* Run a no-redir translation.  argblock points to 4 UWords, 2 to carry args
560   and 2 to carry results:
561      0: input:  ptr to translation
562      1: input:  ptr to guest state
563      2: output: next guest PC
564      3: output: guest state pointer afterwards (== thread return code)
565*/
566.csect .text[PR]
567.align 2
568.globl  .VG_(run_a_noredir_translation)
569.VG_(run_a_noredir_translation):
570	/* Rather than attempt to make sense of the AIX ABI, just
571           drop r1 by 512 (to get away from the caller's frame), then
572	   1024 (to give ourselves a 1024-byte save area), and then
573	   another 1024 (to clear our save area).  In all, drop r1 by 2048
574	   and dump stuff on the stack at 512(1)..1536(1).  */
575	/* At entry, r3 points to argblock */
576
577        /* ----- entry point to VG_(run_innerloop) ----- */
578        /* For AIX/ppc64 we do:	 LR-> +16(parent_sp), CR-> +8(parent_sp) */
579
580        /* Save lr and cr*/
581        mflr    0
582        std     0,16(1)
583	mfcr	0
584	std	0,8(1)
585
586        /* New stack frame */
587        stdu    1,-2048(1)  /* sp should maintain 16-byte alignment */
588
589        /* General reg save area : 160 bytes at r1[512 .. 671] */
590        std     31,664(1)
591        std     30,656(1)
592        std     29,648(1)
593        std     28,640(1)
594        std     27,632(1)
595        std     26,624(1)
596        std     25,616(1)
597        std     24,608(1)
598        std     23,600(1)
599        std     22,592(1)
600        std     21,584(1)
601        std     20,576(1)
602        std     19,568(1)
603        std     18,560(1)
604        std     17,552(1)
605        std     16,544(1)
606        std     15,536(1)
607        std     14,528(1)
608        std     13,520(1)
609	std	 3,512(1)	/* will need it later */
610
611        ld      31,8(3)		/* rd argblock[1] */
612        ld      30,0(3)		/* rd argblock[0] */
613        mtlr    30		/* run translation */
614        blrl
615
616        ld      4,512(1)	/* &argblock */
617        std     3, 16(4)	/* wr argblock[2] */
618        std     31,24(4)	/* wr argblock[3] */
619
620        /* General regs */
621        ld      31,664(1)
622        ld      30,656(1)
623        ld      29,648(1)
624        ld      28,640(1)
625        ld      27,632(1)
626        ld      26,624(1)
627        ld      25,616(1)
628        ld      24,608(1)
629        ld      23,600(1)
630        ld      22,592(1)
631        ld      21,584(1)
632        ld      20,576(1)
633        ld      19,568(1)
634        ld      18,560(1)
635        ld      17,552(1)
636        ld      16,544(1)
637        ld      15,536(1)
638        ld      14,528(1)
639        ld      13,520(1)
640
641        /* restore lr,cr,sp */
642	addi	4,1,2048  /* r4 = old SP */
643	ld	0,16(4)
644	mtlr	0
645	ld	0,8(4)
646	mtcr	0
647	mr	1,4
648	blr
649
650#endif // defined(VGP_ppc64_aix5)
651
652/*--------------------------------------------------------------------*/
653/*--- end                                                          ---*/
654/*--------------------------------------------------------------------*/
655