• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 
2 /*--------------------------------------------------------------------*/
3 /*--- begin                               guest_generic_bb_to_IR.c ---*/
4 /*--------------------------------------------------------------------*/
5 
6 /*
7    This file is part of Valgrind, a dynamic binary instrumentation
8    framework.
9 
10    Copyright (C) 2004-2010 OpenWorks LLP
11       info@open-works.net
12 
13    This program is free software; you can redistribute it and/or
14    modify it under the terms of the GNU General Public License as
15    published by the Free Software Foundation; either version 2 of the
16    License, or (at your option) any later version.
17 
18    This program is distributed in the hope that it will be useful, but
19    WITHOUT ANY WARRANTY; without even the implied warranty of
20    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21    General Public License for more details.
22 
23    You should have received a copy of the GNU General Public License
24    along with this program; if not, write to the Free Software
25    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
26    02110-1301, USA.
27 
28    The GNU General Public License is contained in the file COPYING.
29 
30    Neither the names of the U.S. Department of Energy nor the
31    University of California nor the names of its contributors may be
32    used to endorse or promote products derived from this software
33    without prior written permission.
34 */
35 
36 #include "libvex_basictypes.h"
37 #include "libvex_ir.h"
38 #include "libvex.h"
39 #include "main_util.h"
40 #include "main_globals.h"
41 #include "guest_generic_bb_to_IR.h"
42 
43 
44 /* Forwards .. */
45 __attribute__((regparm(2)))
46 static UInt genericg_compute_checksum_4al ( HWord first_w32, HWord n_w32s );
47 __attribute__((regparm(1)))
48 static UInt genericg_compute_checksum_4al_1 ( HWord first_w32 );
49 __attribute__((regparm(1)))
50 static UInt genericg_compute_checksum_4al_2 ( HWord first_w32 );
51 __attribute__((regparm(1)))
52 static UInt genericg_compute_checksum_4al_3 ( HWord first_w32 );
53 __attribute__((regparm(1)))
54 static UInt genericg_compute_checksum_4al_4 ( HWord first_w32 );
55 __attribute__((regparm(1)))
56 static UInt genericg_compute_checksum_4al_5 ( HWord first_w32 );
57 __attribute__((regparm(1)))
58 static UInt genericg_compute_checksum_4al_6 ( HWord first_w32 );
59 __attribute__((regparm(1)))
60 static UInt genericg_compute_checksum_4al_7 ( HWord first_w32 );
61 __attribute__((regparm(1)))
62 static UInt genericg_compute_checksum_4al_8 ( HWord first_w32 );
63 __attribute__((regparm(1)))
64 static UInt genericg_compute_checksum_4al_9 ( HWord first_w32 );
65 __attribute__((regparm(1)))
66 static UInt genericg_compute_checksum_4al_10 ( HWord first_w32 );
67 __attribute__((regparm(1)))
68 static UInt genericg_compute_checksum_4al_11 ( HWord first_w32 );
69 __attribute__((regparm(1)))
70 static UInt genericg_compute_checksum_4al_12 ( HWord first_w32 );
71 
72 /* Small helpers */
const_False(void * callback_opaque,Addr64 a)73 static Bool const_False ( void* callback_opaque, Addr64 a ) {
74    return False;
75 }
76 
77 /* Disassemble a complete basic block, starting at guest_IP_start,
78    returning a new IRSB.  The disassembler may chase across basic
79    block boundaries if it wishes and if chase_into_ok allows it.
80    The precise guest address ranges from which code has been taken
81    are written into vge.  guest_IP_bbstart is taken to be the IP in
82    the guest's address space corresponding to the instruction at
83    &guest_code[0].
84 
85    dis_instr_fn is the arch-specific fn to disassemble on function; it
86    is this that does the real work.
87 
88    do_self_check indicates that the caller needs a self-checking
89    translation.
90 
91    preamble_function is a callback which allows the caller to add
92    its own IR preamble (following the self-check, if any).  May be
93    NULL.  If non-NULL, the IRSB under construction is handed to
94    this function, which presumably adds IR statements to it.  The
95    callback may optionally complete the block and direct bb_to_IR
96    not to disassemble any instructions into it; this is indicated
97    by the callback returning True.
98 
99    offB_TIADDR and offB_TILEN are the offsets of guest_TIADDR and
100    guest_TILEN.  Since this routine has to work for any guest state,
101    without knowing what it is, those offsets have to passed in.
102 
103    callback_opaque is a caller-supplied pointer to data which the
104    callbacks may want to see.  Vex has no idea what it is.
105    (In fact it's a VgInstrumentClosure.)
106 */
107 
bb_to_IR(VexGuestExtents * vge,void * callback_opaque,DisOneInstrFn dis_instr_fn,UChar * guest_code,Addr64 guest_IP_bbstart,Bool (* chase_into_ok)(void *,Addr64),Bool host_bigendian,VexArch arch_guest,VexArchInfo * archinfo_guest,VexAbiInfo * abiinfo_both,IRType guest_word_type,Bool do_self_check,Bool (* preamble_function)(void *,IRSB *),Int offB_TISTART,Int offB_TILEN)108 IRSB* bb_to_IR ( /*OUT*/VexGuestExtents* vge,
109                  /*IN*/ void*            callback_opaque,
110                  /*IN*/ DisOneInstrFn    dis_instr_fn,
111                  /*IN*/ UChar*           guest_code,
112                  /*IN*/ Addr64           guest_IP_bbstart,
113                  /*IN*/ Bool             (*chase_into_ok)(void*,Addr64),
114                  /*IN*/ Bool             host_bigendian,
115                  /*IN*/ VexArch          arch_guest,
116                  /*IN*/ VexArchInfo*     archinfo_guest,
117                  /*IN*/ VexAbiInfo*      abiinfo_both,
118                  /*IN*/ IRType           guest_word_type,
119                  /*IN*/ Bool             do_self_check,
120                  /*IN*/ Bool             (*preamble_function)(void*,IRSB*),
121                  /*IN*/ Int              offB_TISTART,
122                  /*IN*/ Int              offB_TILEN )
123 {
124    Long       delta;
125    Int        i, n_instrs, first_stmt_idx;
126    Bool       resteerOK, need_to_put_IP, debug_print;
127    DisResult  dres;
128    IRStmt*    imark;
129    static Int n_resteers = 0;
130    Int        d_resteers = 0;
131    Int        selfcheck_idx = 0;
132    IRSB*      irsb;
133    Addr64     guest_IP_curr_instr;
134    IRConst*   guest_IP_bbstart_IRConst = NULL;
135    Int        n_cond_resteers_allowed = 2;
136 
137    Bool (*resteerOKfn)(void*,Addr64) = NULL;
138 
139    debug_print = toBool(vex_traceflags & VEX_TRACE_FE);
140 
141    /* Note: for adler32 to work without % operation for the self
142       check, need to limit length of stuff it scans to 5552 bytes.
143       Therefore limiting the max bb len to 100 insns seems generously
144       conservative. */
145 
146    /* check sanity .. */
147    vassert(sizeof(HWord) == sizeof(void*));
148    vassert(vex_control.guest_max_insns >= 1);
149    vassert(vex_control.guest_max_insns < 100);
150    vassert(vex_control.guest_chase_thresh >= 0);
151    vassert(vex_control.guest_chase_thresh < vex_control.guest_max_insns);
152    vassert(guest_word_type == Ity_I32 || guest_word_type == Ity_I64);
153 
154    /* Start a new, empty extent. */
155    vge->n_used  = 1;
156    vge->base[0] = guest_IP_bbstart;
157    vge->len[0]  = 0;
158 
159    /* And a new IR superblock to dump the result into. */
160    irsb = emptyIRSB();
161 
162    /* Delta keeps track of how far along the guest_code array we have
163       so far gone. */
164    delta    = 0;
165    n_instrs = 0;
166 
167    /* Guest addresses as IRConsts.  Used in the two self-checks
168       generated. */
169    if (do_self_check) {
170       guest_IP_bbstart_IRConst
171          = guest_word_type==Ity_I32
172               ? IRConst_U32(toUInt(guest_IP_bbstart))
173               : IRConst_U64(guest_IP_bbstart);
174    }
175 
176    /* If asked to make a self-checking translation, leave 5 spaces
177       in which to put the check statements.  We'll fill them in later
178       when we know the length and adler32 of the area to check. */
179    if (do_self_check) {
180       selfcheck_idx = irsb->stmts_used;
181       addStmtToIRSB( irsb, IRStmt_NoOp() );
182       addStmtToIRSB( irsb, IRStmt_NoOp() );
183       addStmtToIRSB( irsb, IRStmt_NoOp() );
184       addStmtToIRSB( irsb, IRStmt_NoOp() );
185       addStmtToIRSB( irsb, IRStmt_NoOp() );
186    }
187 
188    /* If the caller supplied a function to add its own preamble, use
189       it now. */
190    if (preamble_function) {
191       Bool stopNow = preamble_function( callback_opaque, irsb );
192       if (stopNow) {
193          /* The callback has completed the IR block without any guest
194             insns being disassembled into it, so just return it at
195             this point, even if a self-check was requested - as there
196             is nothing to self-check.  The five self-check no-ops will
197             still be in place, but they are harmless. */
198          return irsb;
199       }
200    }
201 
202    /* Process instructions. */
203    while (True) {
204       vassert(n_instrs < vex_control.guest_max_insns);
205 
206       /* Regardless of what chase_into_ok says, is chasing permissible
207          at all right now?  Set resteerOKfn accordingly. */
208       resteerOK
209          = toBool(
210               n_instrs < vex_control.guest_chase_thresh
211               /* If making self-checking translations, don't chase
212                  .. it makes the checks too complicated.  We only want
213                  to scan just one sequence of bytes in the check, not
214                  a whole bunch. */
215               && !do_self_check
216               /* we can't afford to have a resteer once we're on the
217                  last extent slot. */
218               && vge->n_used < 3
219            );
220 
221       resteerOKfn
222          = resteerOK ? chase_into_ok : const_False;
223 
224       /* n_cond_resteers_allowed keeps track of whether we're still
225          allowing dis_instr_fn to chase conditional branches.  It
226          starts (at 2) and gets decremented each time dis_instr_fn
227          tells us it has chased a conditional branch.  We then
228          decrement it, and use it to tell later calls to dis_instr_fn
229          whether or not it is allowed to chase conditional
230          branches. */
231       vassert(n_cond_resteers_allowed >= 0 && n_cond_resteers_allowed <= 2);
232 
233       /* This is the IP of the instruction we're just about to deal
234          with. */
235       guest_IP_curr_instr = guest_IP_bbstart + delta;
236 
237       /* This is the irsb statement array index of the first stmt in
238          this insn.  That will always be the instruction-mark
239          descriptor. */
240       first_stmt_idx = irsb->stmts_used;
241 
242       /* Add an instruction-mark statement.  We won't know until after
243          disassembling the instruction how long it instruction is, so
244          just put in a zero length and we'll fix it up later. */
245       addStmtToIRSB( irsb, IRStmt_IMark( guest_IP_curr_instr, 0 ));
246 
247       /* for the first insn, the dispatch loop will have set
248          %IP, but for all the others we have to do it ourselves. */
249       need_to_put_IP = toBool(n_instrs > 0);
250 
251       /* Finally, actually disassemble an instruction. */
252       dres = dis_instr_fn ( irsb,
253                             need_to_put_IP,
254                             resteerOKfn,
255                             toBool(n_cond_resteers_allowed > 0),
256                             callback_opaque,
257                             guest_code,
258                             delta,
259                             guest_IP_curr_instr,
260                             arch_guest,
261                             archinfo_guest,
262                             abiinfo_both,
263                             host_bigendian );
264 
265       /* stay sane ... */
266       vassert(dres.whatNext == Dis_StopHere
267               || dres.whatNext == Dis_Continue
268               || dres.whatNext == Dis_ResteerU
269               || dres.whatNext == Dis_ResteerC);
270       /* ... disassembled insn length is sane ... */
271       vassert(dres.len >= 0 && dres.len <= 20);
272       /* ... continueAt is zero if no resteer requested ... */
273       if (dres.whatNext != Dis_ResteerU && dres.whatNext != Dis_ResteerC)
274          vassert(dres.continueAt == 0);
275       /* ... if we disallowed conditional resteers, check that one
276              didn't actually happen anyway ... */
277       if (n_cond_resteers_allowed == 0)
278          vassert(dres.whatNext != Dis_ResteerC);
279 
280       /* Fill in the insn-mark length field. */
281       vassert(first_stmt_idx >= 0 && first_stmt_idx < irsb->stmts_used);
282       imark = irsb->stmts[first_stmt_idx];
283       vassert(imark);
284       vassert(imark->tag == Ist_IMark);
285       vassert(imark->Ist.IMark.len == 0);
286       imark->Ist.IMark.len = toUInt(dres.len);
287 
288       /* Print the resulting IR, if needed. */
289       if (vex_traceflags & VEX_TRACE_FE) {
290          for (i = first_stmt_idx; i < irsb->stmts_used; i++) {
291             vex_printf("              ");
292             ppIRStmt(irsb->stmts[i]);
293             vex_printf("\n");
294          }
295       }
296 
297       /* If dis_instr_fn terminated the BB at this point, check it
298          also filled in the irsb->next field. */
299       if (dres.whatNext == Dis_StopHere) {
300          vassert(irsb->next != NULL);
301          if (debug_print) {
302             vex_printf("              ");
303             vex_printf( "goto {");
304             ppIRJumpKind(irsb->jumpkind);
305             vex_printf( "} ");
306             ppIRExpr( irsb->next );
307             vex_printf( "\n");
308          }
309       }
310 
311       /* Update the VexGuestExtents we are constructing. */
312       /* If vex_control.guest_max_insns is required to be < 100 and
313          each insn is at max 20 bytes long, this limit of 5000 then
314          seems reasonable since the max possible extent length will be
315          100 * 20 == 2000. */
316       vassert(vge->len[vge->n_used-1] < 5000);
317       vge->len[vge->n_used-1]
318          = toUShort(toUInt( vge->len[vge->n_used-1] + dres.len ));
319       n_instrs++;
320       if (debug_print)
321          vex_printf("\n");
322 
323       /* Advance delta (inconspicuous but very important :-) */
324       delta += (Long)dres.len;
325 
326       switch (dres.whatNext) {
327          case Dis_Continue:
328             vassert(irsb->next == NULL);
329             if (n_instrs < vex_control.guest_max_insns) {
330                /* keep going */
331             } else {
332                /* We have to stop. */
333                irsb->next
334                   = IRExpr_Const(
335                        guest_word_type == Ity_I32
336                           ? IRConst_U32(toUInt(guest_IP_bbstart+delta))
337                           : IRConst_U64(guest_IP_bbstart+delta)
338                     );
339                goto done;
340             }
341             break;
342          case Dis_StopHere:
343             vassert(irsb->next != NULL);
344             goto done;
345          case Dis_ResteerU:
346          case Dis_ResteerC:
347             /* Check that we actually allowed a resteer .. */
348             vassert(resteerOK);
349             vassert(irsb->next == NULL);
350             if (dres.whatNext == Dis_ResteerC) {
351                vassert(n_cond_resteers_allowed > 0);
352                n_cond_resteers_allowed--;
353             }
354             /* figure out a new delta to continue at. */
355             vassert(resteerOKfn(callback_opaque,dres.continueAt));
356             delta = dres.continueAt - guest_IP_bbstart;
357             /* we now have to start a new extent slot. */
358             vge->n_used++;
359             vassert(vge->n_used <= 3);
360             vge->base[vge->n_used-1] = dres.continueAt;
361             vge->len[vge->n_used-1] = 0;
362             n_resteers++;
363             d_resteers++;
364             if (0 && (n_resteers & 0xFF) == 0)
365             vex_printf("resteer[%d,%d] to 0x%llx (delta = %lld)\n",
366                        n_resteers, d_resteers,
367                        dres.continueAt, delta);
368             break;
369          default:
370             vpanic("bb_to_IR");
371       }
372    }
373    /*NOTREACHED*/
374    vassert(0);
375 
376   done:
377    /* We're done.  The only thing that might need attending to is that
378       a self-checking preamble may need to be created.
379 
380       The scheme is to compute a rather crude checksum of the code
381       we're making a translation of, and add to the IR a call to a
382       helper routine which recomputes the checksum every time the
383       translation is run, and requests a retranslation if it doesn't
384       match.  This is obviously very expensive and considerable
385       efforts are made to speed it up:
386 
387       * the checksum is computed from all the 32-bit words that
388         overlap the translated code.  That means it could depend on up
389         to 3 bytes before and 3 bytes after which aren't part of the
390         translated area, and so if those change then we'll
391         unnecessarily have to discard and retranslate.  This seems
392         like a pretty remote possibility and it seems as if the
393         benefit of not having to deal with the ends of the range at
394         byte precision far outweigh any possible extra translations
395         needed.
396 
397       * there's a generic routine and 12 specialised cases, which
398         handle the cases of 1 through 12-word lengths respectively.
399         They seem to cover about 90% of the cases that occur in
400         practice.
401    */
402    if (do_self_check) {
403 
404       UInt     len2check, expected32;
405       IRTemp   tistart_tmp, tilen_tmp;
406       UInt     (*fn_generic)(HWord, HWord) __attribute__((regparm(2)));
407       UInt     (*fn_spec)(HWord) __attribute__((regparm(1)));
408       HChar*   nm_generic;
409       HChar*   nm_spec;
410       HWord    fn_generic_entry = 0;
411       HWord    fn_spec_entry = 0;
412 
413       vassert(vge->n_used == 1);
414       len2check = vge->len[0];
415 
416       /* stay sane */
417       vassert(len2check >= 0 && len2check < 1000/*arbitrary*/);
418 
419       /* Skip the check if the translation involved zero bytes */
420       if (len2check > 0) {
421          HWord first_w32 = ((HWord)guest_code) & ~(HWord)3;
422          HWord last_w32  = (((HWord)guest_code) + len2check - 1) & ~(HWord)3;
423          vassert(first_w32 <= last_w32);
424          HWord w32_diff = last_w32 - first_w32;
425          vassert(0 == (w32_diff & 3));
426          HWord w32s_to_check = (w32_diff + 4) / 4;
427          vassert(w32s_to_check > 0 && w32s_to_check < 1004/*arbitrary*//4);
428 
429          /* vex_printf("%lx %lx  %ld\n", first_w32, last_w32, w32s_to_check); */
430 
431          fn_generic =  genericg_compute_checksum_4al;
432          nm_generic = "genericg_compute_checksum_4al";
433          fn_spec = NULL;
434          nm_spec = NULL;
435 
436          switch (w32s_to_check) {
437              case 1:  fn_spec =  genericg_compute_checksum_4al_1;
438                       nm_spec = "genericg_compute_checksum_4al_1"; break;
439              case 2:  fn_spec =  genericg_compute_checksum_4al_2;
440                       nm_spec = "genericg_compute_checksum_4al_2"; break;
441              case 3:  fn_spec =  genericg_compute_checksum_4al_3;
442                       nm_spec = "genericg_compute_checksum_4al_3"; break;
443              case 4:  fn_spec =  genericg_compute_checksum_4al_4;
444                       nm_spec = "genericg_compute_checksum_4al_4"; break;
445              case 5:  fn_spec =  genericg_compute_checksum_4al_5;
446                       nm_spec = "genericg_compute_checksum_4al_5"; break;
447              case 6:  fn_spec =  genericg_compute_checksum_4al_6;
448                       nm_spec = "genericg_compute_checksum_4al_6"; break;
449              case 7:  fn_spec =  genericg_compute_checksum_4al_7;
450                       nm_spec = "genericg_compute_checksum_4al_7"; break;
451              case 8:  fn_spec =  genericg_compute_checksum_4al_8;
452                       nm_spec = "genericg_compute_checksum_4al_8"; break;
453              case 9:  fn_spec =  genericg_compute_checksum_4al_9;
454                       nm_spec = "genericg_compute_checksum_4al_9"; break;
455              case 10: fn_spec =  genericg_compute_checksum_4al_10;
456                       nm_spec = "genericg_compute_checksum_4al_10"; break;
457              case 11: fn_spec =  genericg_compute_checksum_4al_11;
458                       nm_spec = "genericg_compute_checksum_4al_11"; break;
459              case 12: fn_spec =  genericg_compute_checksum_4al_12;
460                       nm_spec = "genericg_compute_checksum_4al_12"; break;
461              default: break;
462          }
463 
464          expected32 = fn_generic( first_w32, w32s_to_check );
465          /* If we got a specialised version, check it produces the same
466             result as the generic version! */
467          if (fn_spec) {
468             vassert(nm_spec);
469             vassert(expected32 == fn_spec( first_w32 ));
470          } else {
471             vassert(!nm_spec);
472          }
473 
474          /* Set TISTART and TILEN.  These will describe to the despatcher
475             the area of guest code to invalidate should we exit with a
476             self-check failure. */
477 
478          tistart_tmp = newIRTemp(irsb->tyenv, guest_word_type);
479          tilen_tmp   = newIRTemp(irsb->tyenv, guest_word_type);
480 
481          irsb->stmts[selfcheck_idx+0]
482             = IRStmt_WrTmp(tistart_tmp, IRExpr_Const(guest_IP_bbstart_IRConst) );
483 
484          irsb->stmts[selfcheck_idx+1]
485             = IRStmt_WrTmp(tilen_tmp,
486                            guest_word_type==Ity_I32
487                               ? IRExpr_Const(IRConst_U32(len2check))
488                               : IRExpr_Const(IRConst_U64(len2check))
489               );
490 
491          irsb->stmts[selfcheck_idx+2]
492             = IRStmt_Put( offB_TISTART, IRExpr_RdTmp(tistart_tmp) );
493 
494          irsb->stmts[selfcheck_idx+3]
495             = IRStmt_Put( offB_TILEN, IRExpr_RdTmp(tilen_tmp) );
496 
497          /* Generate the entry point descriptors */
498          if (abiinfo_both->host_ppc_calls_use_fndescrs) {
499             HWord* descr = (HWord*)fn_generic;
500             fn_generic_entry = descr[0];
501             if (fn_spec) {
502                descr = (HWord*)fn_spec;
503                fn_spec_entry = descr[0];
504             } else {
505                fn_spec_entry = (HWord)NULL;
506             }
507          } else {
508             fn_generic_entry = (HWord)fn_generic;
509             if (fn_spec) {
510                fn_spec_entry = (HWord)fn_spec;
511             } else {
512                fn_spec_entry = (HWord)NULL;
513             }
514          }
515 
516          IRExpr* callexpr = NULL;
517          if (fn_spec) {
518             callexpr = mkIRExprCCall(
519                           Ity_I32, 1/*regparms*/,
520                           nm_spec, (void*)fn_spec_entry,
521                           mkIRExprVec_1(
522                              mkIRExpr_HWord( (HWord)first_w32 )
523                           )
524                        );
525          } else {
526             callexpr = mkIRExprCCall(
527                           Ity_I32, 2/*regparms*/,
528                           nm_generic, (void*)fn_generic_entry,
529                           mkIRExprVec_2(
530                              mkIRExpr_HWord( (HWord)first_w32 ),
531                              mkIRExpr_HWord( (HWord)w32s_to_check )
532                           )
533                        );
534          }
535 
536          irsb->stmts[selfcheck_idx+4]
537             = IRStmt_Exit(
538                  IRExpr_Binop(
539                     Iop_CmpNE32,
540                     callexpr,
541                     IRExpr_Const(IRConst_U32(expected32))
542                  ),
543                  Ijk_TInval,
544                  guest_IP_bbstart_IRConst
545               );
546       }
547    }
548 
549    return irsb;
550 }
551 
552 
553 /*-------------------------------------------------------------
554   A support routine for doing self-checking translations.
555   -------------------------------------------------------------*/
556 
557 /* CLEAN HELPER */
558 /* CALLED FROM GENERATED CODE */
559 
560 /* Compute a checksum of host memory at [addr .. addr+len-1], as fast
561    as possible.  The _4al_4plus version is assured that the request is
562    for 4-aligned memory and for a block of 4 or more long, whilst the
563    _generic version must be able to handle any alignment, and lengths
564    down to zero too.  This fn is called once for every use of a
565    self-checking translation, so it needs to be as fast as
566    possible. */
567 
ROL32(UInt w,Int n)568 static inline UInt ROL32 ( UInt w, Int n ) {
569    w = (w << n) | (w >> (32-n));
570    return w;
571 }
572 
573 __attribute((regparm(2)))
genericg_compute_checksum_4al(HWord first_w32,HWord n_w32s)574 static UInt genericg_compute_checksum_4al ( HWord first_w32, HWord n_w32s )
575 {
576    UInt  sum1 = 0, sum2 = 0;
577    UInt* p = (UInt*)first_w32;
578    /* unrolled */
579    while (n_w32s >= 4) {
580       UInt  w;
581       w = p[0];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
582       w = p[1];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
583       w = p[2];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
584       w = p[3];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
585       p += 4;
586       n_w32s -= 4;
587       sum1 ^= sum2;
588    }
589    while (n_w32s >= 1) {
590       UInt  w;
591       w = p[0];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
592       p += 1;
593       n_w32s -= 1;
594       sum1 ^= sum2;
595    }
596    return sum1 + sum2;
597 }
598 
599 /* Specialised versions of the above function */
600 
601 __attribute__((regparm(1)))
genericg_compute_checksum_4al_1(HWord first_w32)602 static UInt genericg_compute_checksum_4al_1 ( HWord first_w32 )
603 {
604    UInt  sum1 = 0, sum2 = 0;
605    UInt* p = (UInt*)first_w32;
606    UInt  w;
607    w = p[0];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
608    sum1 ^= sum2;
609    return sum1 + sum2;
610 }
611 
612 __attribute__((regparm(1)))
genericg_compute_checksum_4al_2(HWord first_w32)613 static UInt genericg_compute_checksum_4al_2 ( HWord first_w32 )
614 {
615    UInt  sum1 = 0, sum2 = 0;
616    UInt* p = (UInt*)first_w32;
617    UInt  w;
618    w = p[0];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
619    sum1 ^= sum2;
620    w = p[1];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
621    sum1 ^= sum2;
622    return sum1 + sum2;
623 }
624 
625 __attribute__((regparm(1)))
genericg_compute_checksum_4al_3(HWord first_w32)626 static UInt genericg_compute_checksum_4al_3 ( HWord first_w32 )
627 {
628    UInt  sum1 = 0, sum2 = 0;
629    UInt* p = (UInt*)first_w32;
630    UInt  w;
631    w = p[0];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
632    sum1 ^= sum2;
633    w = p[1];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
634    sum1 ^= sum2;
635    w = p[2];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
636    sum1 ^= sum2;
637    return sum1 + sum2;
638 }
639 
640 __attribute__((regparm(1)))
genericg_compute_checksum_4al_4(HWord first_w32)641 static UInt genericg_compute_checksum_4al_4 ( HWord first_w32 )
642 {
643    UInt  sum1 = 0, sum2 = 0;
644    UInt* p = (UInt*)first_w32;
645    UInt  w;
646    w = p[0];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
647    w = p[1];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
648    w = p[2];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
649    w = p[3];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
650    sum1 ^= sum2;
651    return sum1 + sum2;
652 }
653 
654 __attribute__((regparm(1)))
genericg_compute_checksum_4al_5(HWord first_w32)655 static UInt genericg_compute_checksum_4al_5 ( HWord first_w32 )
656 {
657    UInt  sum1 = 0, sum2 = 0;
658    UInt* p = (UInt*)first_w32;
659    UInt  w;
660    w = p[0];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
661    w = p[1];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
662    w = p[2];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
663    w = p[3];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
664    sum1 ^= sum2;
665    w = p[4];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
666    sum1 ^= sum2;
667    return sum1 + sum2;
668 }
669 
670 __attribute__((regparm(1)))
genericg_compute_checksum_4al_6(HWord first_w32)671 static UInt genericg_compute_checksum_4al_6 ( HWord first_w32 )
672 {
673    UInt  sum1 = 0, sum2 = 0;
674    UInt* p = (UInt*)first_w32;
675    UInt  w;
676    w = p[0];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
677    w = p[1];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
678    w = p[2];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
679    w = p[3];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
680    sum1 ^= sum2;
681    w = p[4];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
682    sum1 ^= sum2;
683    w = p[5];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
684    sum1 ^= sum2;
685    return sum1 + sum2;
686 }
687 
688 __attribute__((regparm(1)))
genericg_compute_checksum_4al_7(HWord first_w32)689 static UInt genericg_compute_checksum_4al_7 ( HWord first_w32 )
690 {
691    UInt  sum1 = 0, sum2 = 0;
692    UInt* p = (UInt*)first_w32;
693    UInt  w;
694    w = p[0];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
695    w = p[1];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
696    w = p[2];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
697    w = p[3];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
698    sum1 ^= sum2;
699    w = p[4];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
700    sum1 ^= sum2;
701    w = p[5];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
702    sum1 ^= sum2;
703    w = p[6];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
704    sum1 ^= sum2;
705    return sum1 + sum2;
706 }
707 
708 __attribute__((regparm(1)))
genericg_compute_checksum_4al_8(HWord first_w32)709 static UInt genericg_compute_checksum_4al_8 ( HWord first_w32 )
710 {
711    UInt  sum1 = 0, sum2 = 0;
712    UInt* p = (UInt*)first_w32;
713    UInt  w;
714    w = p[0];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
715    w = p[1];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
716    w = p[2];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
717    w = p[3];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
718    sum1 ^= sum2;
719    w = p[4];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
720    w = p[5];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
721    w = p[6];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
722    w = p[7];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
723    sum1 ^= sum2;
724    return sum1 + sum2;
725 }
726 
727 __attribute__((regparm(1)))
genericg_compute_checksum_4al_9(HWord first_w32)728 static UInt genericg_compute_checksum_4al_9 ( HWord first_w32 )
729 {
730    UInt  sum1 = 0, sum2 = 0;
731    UInt* p = (UInt*)first_w32;
732    UInt  w;
733    w = p[0];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
734    w = p[1];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
735    w = p[2];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
736    w = p[3];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
737    sum1 ^= sum2;
738    w = p[4];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
739    w = p[5];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
740    w = p[6];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
741    w = p[7];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
742    sum1 ^= sum2;
743    w = p[8];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
744    sum1 ^= sum2;
745    return sum1 + sum2;
746 }
747 
748 __attribute__((regparm(1)))
genericg_compute_checksum_4al_10(HWord first_w32)749 static UInt genericg_compute_checksum_4al_10 ( HWord first_w32 )
750 {
751    UInt  sum1 = 0, sum2 = 0;
752    UInt* p = (UInt*)first_w32;
753    UInt  w;
754    w = p[0];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
755    w = p[1];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
756    w = p[2];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
757    w = p[3];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
758    sum1 ^= sum2;
759    w = p[4];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
760    w = p[5];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
761    w = p[6];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
762    w = p[7];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
763    sum1 ^= sum2;
764    w = p[8];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
765    sum1 ^= sum2;
766    w = p[9];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
767    sum1 ^= sum2;
768    return sum1 + sum2;
769 }
770 
771 __attribute__((regparm(1)))
genericg_compute_checksum_4al_11(HWord first_w32)772 static UInt genericg_compute_checksum_4al_11 ( HWord first_w32 )
773 {
774    UInt  sum1 = 0, sum2 = 0;
775    UInt* p = (UInt*)first_w32;
776    UInt  w;
777    w = p[0];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
778    w = p[1];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
779    w = p[2];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
780    w = p[3];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
781    sum1 ^= sum2;
782    w = p[4];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
783    w = p[5];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
784    w = p[6];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
785    w = p[7];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
786    sum1 ^= sum2;
787    w = p[8];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
788    sum1 ^= sum2;
789    w = p[9];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
790    sum1 ^= sum2;
791    w = p[10]; sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
792    sum1 ^= sum2;
793    return sum1 + sum2;
794 }
795 
796 __attribute__((regparm(1)))
genericg_compute_checksum_4al_12(HWord first_w32)797 static UInt genericg_compute_checksum_4al_12 ( HWord first_w32 )
798 {
799    UInt  sum1 = 0, sum2 = 0;
800    UInt* p = (UInt*)first_w32;
801    UInt  w;
802    w = p[0];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
803    w = p[1];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
804    w = p[2];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
805    w = p[3];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
806    sum1 ^= sum2;
807    w = p[4];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
808    w = p[5];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
809    w = p[6];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
810    w = p[7];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
811    sum1 ^= sum2;
812    w = p[8];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
813    w = p[9];  sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
814    w = p[10]; sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
815    w = p[11]; sum1 = ROL32(sum1 ^ w, 31);  sum2 += w;
816    sum1 ^= sum2;
817    return sum1 + sum2;
818 }
819 
820 /*--------------------------------------------------------------------*/
821 /*--- end                                 guest_generic_bb_to_IR.c ---*/
822 /*--------------------------------------------------------------------*/
823