1
2 /*--------------------------------------------------------------------*/
3 /*--- begin guest_generic_bb_to_IR.c ---*/
4 /*--------------------------------------------------------------------*/
5
6 /*
7 This file is part of Valgrind, a dynamic binary instrumentation
8 framework.
9
10 Copyright (C) 2004-2010 OpenWorks LLP
11 info@open-works.net
12
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
17
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
26 02110-1301, USA.
27
28 The GNU General Public License is contained in the file COPYING.
29
30 Neither the names of the U.S. Department of Energy nor the
31 University of California nor the names of its contributors may be
32 used to endorse or promote products derived from this software
33 without prior written permission.
34 */
35
36 #include "libvex_basictypes.h"
37 #include "libvex_ir.h"
38 #include "libvex.h"
39 #include "main_util.h"
40 #include "main_globals.h"
41 #include "guest_generic_bb_to_IR.h"
42
43
44 /* Forwards .. */
45 __attribute__((regparm(2)))
46 static UInt genericg_compute_checksum_4al ( HWord first_w32, HWord n_w32s );
47 __attribute__((regparm(1)))
48 static UInt genericg_compute_checksum_4al_1 ( HWord first_w32 );
49 __attribute__((regparm(1)))
50 static UInt genericg_compute_checksum_4al_2 ( HWord first_w32 );
51 __attribute__((regparm(1)))
52 static UInt genericg_compute_checksum_4al_3 ( HWord first_w32 );
53 __attribute__((regparm(1)))
54 static UInt genericg_compute_checksum_4al_4 ( HWord first_w32 );
55 __attribute__((regparm(1)))
56 static UInt genericg_compute_checksum_4al_5 ( HWord first_w32 );
57 __attribute__((regparm(1)))
58 static UInt genericg_compute_checksum_4al_6 ( HWord first_w32 );
59 __attribute__((regparm(1)))
60 static UInt genericg_compute_checksum_4al_7 ( HWord first_w32 );
61 __attribute__((regparm(1)))
62 static UInt genericg_compute_checksum_4al_8 ( HWord first_w32 );
63 __attribute__((regparm(1)))
64 static UInt genericg_compute_checksum_4al_9 ( HWord first_w32 );
65 __attribute__((regparm(1)))
66 static UInt genericg_compute_checksum_4al_10 ( HWord first_w32 );
67 __attribute__((regparm(1)))
68 static UInt genericg_compute_checksum_4al_11 ( HWord first_w32 );
69 __attribute__((regparm(1)))
70 static UInt genericg_compute_checksum_4al_12 ( HWord first_w32 );
71
72 /* Small helpers */
const_False(void * callback_opaque,Addr64 a)73 static Bool const_False ( void* callback_opaque, Addr64 a ) {
74 return False;
75 }
76
77 /* Disassemble a complete basic block, starting at guest_IP_start,
78 returning a new IRSB. The disassembler may chase across basic
79 block boundaries if it wishes and if chase_into_ok allows it.
80 The precise guest address ranges from which code has been taken
81 are written into vge. guest_IP_bbstart is taken to be the IP in
82 the guest's address space corresponding to the instruction at
83 &guest_code[0].
84
85 dis_instr_fn is the arch-specific fn to disassemble on function; it
86 is this that does the real work.
87
88 do_self_check indicates that the caller needs a self-checking
89 translation.
90
91 preamble_function is a callback which allows the caller to add
92 its own IR preamble (following the self-check, if any). May be
93 NULL. If non-NULL, the IRSB under construction is handed to
94 this function, which presumably adds IR statements to it. The
95 callback may optionally complete the block and direct bb_to_IR
96 not to disassemble any instructions into it; this is indicated
97 by the callback returning True.
98
99 offB_TIADDR and offB_TILEN are the offsets of guest_TIADDR and
100 guest_TILEN. Since this routine has to work for any guest state,
101 without knowing what it is, those offsets have to passed in.
102
103 callback_opaque is a caller-supplied pointer to data which the
104 callbacks may want to see. Vex has no idea what it is.
105 (In fact it's a VgInstrumentClosure.)
106 */
107
bb_to_IR(VexGuestExtents * vge,void * callback_opaque,DisOneInstrFn dis_instr_fn,UChar * guest_code,Addr64 guest_IP_bbstart,Bool (* chase_into_ok)(void *,Addr64),Bool host_bigendian,VexArch arch_guest,VexArchInfo * archinfo_guest,VexAbiInfo * abiinfo_both,IRType guest_word_type,Bool do_self_check,Bool (* preamble_function)(void *,IRSB *),Int offB_TISTART,Int offB_TILEN)108 IRSB* bb_to_IR ( /*OUT*/VexGuestExtents* vge,
109 /*IN*/ void* callback_opaque,
110 /*IN*/ DisOneInstrFn dis_instr_fn,
111 /*IN*/ UChar* guest_code,
112 /*IN*/ Addr64 guest_IP_bbstart,
113 /*IN*/ Bool (*chase_into_ok)(void*,Addr64),
114 /*IN*/ Bool host_bigendian,
115 /*IN*/ VexArch arch_guest,
116 /*IN*/ VexArchInfo* archinfo_guest,
117 /*IN*/ VexAbiInfo* abiinfo_both,
118 /*IN*/ IRType guest_word_type,
119 /*IN*/ Bool do_self_check,
120 /*IN*/ Bool (*preamble_function)(void*,IRSB*),
121 /*IN*/ Int offB_TISTART,
122 /*IN*/ Int offB_TILEN )
123 {
124 Long delta;
125 Int i, n_instrs, first_stmt_idx;
126 Bool resteerOK, need_to_put_IP, debug_print;
127 DisResult dres;
128 IRStmt* imark;
129 static Int n_resteers = 0;
130 Int d_resteers = 0;
131 Int selfcheck_idx = 0;
132 IRSB* irsb;
133 Addr64 guest_IP_curr_instr;
134 IRConst* guest_IP_bbstart_IRConst = NULL;
135 Int n_cond_resteers_allowed = 2;
136
137 Bool (*resteerOKfn)(void*,Addr64) = NULL;
138
139 debug_print = toBool(vex_traceflags & VEX_TRACE_FE);
140
141 /* Note: for adler32 to work without % operation for the self
142 check, need to limit length of stuff it scans to 5552 bytes.
143 Therefore limiting the max bb len to 100 insns seems generously
144 conservative. */
145
146 /* check sanity .. */
147 vassert(sizeof(HWord) == sizeof(void*));
148 vassert(vex_control.guest_max_insns >= 1);
149 vassert(vex_control.guest_max_insns < 100);
150 vassert(vex_control.guest_chase_thresh >= 0);
151 vassert(vex_control.guest_chase_thresh < vex_control.guest_max_insns);
152 vassert(guest_word_type == Ity_I32 || guest_word_type == Ity_I64);
153
154 /* Start a new, empty extent. */
155 vge->n_used = 1;
156 vge->base[0] = guest_IP_bbstart;
157 vge->len[0] = 0;
158
159 /* And a new IR superblock to dump the result into. */
160 irsb = emptyIRSB();
161
162 /* Delta keeps track of how far along the guest_code array we have
163 so far gone. */
164 delta = 0;
165 n_instrs = 0;
166
167 /* Guest addresses as IRConsts. Used in the two self-checks
168 generated. */
169 if (do_self_check) {
170 guest_IP_bbstart_IRConst
171 = guest_word_type==Ity_I32
172 ? IRConst_U32(toUInt(guest_IP_bbstart))
173 : IRConst_U64(guest_IP_bbstart);
174 }
175
176 /* If asked to make a self-checking translation, leave 5 spaces
177 in which to put the check statements. We'll fill them in later
178 when we know the length and adler32 of the area to check. */
179 if (do_self_check) {
180 selfcheck_idx = irsb->stmts_used;
181 addStmtToIRSB( irsb, IRStmt_NoOp() );
182 addStmtToIRSB( irsb, IRStmt_NoOp() );
183 addStmtToIRSB( irsb, IRStmt_NoOp() );
184 addStmtToIRSB( irsb, IRStmt_NoOp() );
185 addStmtToIRSB( irsb, IRStmt_NoOp() );
186 }
187
188 /* If the caller supplied a function to add its own preamble, use
189 it now. */
190 if (preamble_function) {
191 Bool stopNow = preamble_function( callback_opaque, irsb );
192 if (stopNow) {
193 /* The callback has completed the IR block without any guest
194 insns being disassembled into it, so just return it at
195 this point, even if a self-check was requested - as there
196 is nothing to self-check. The five self-check no-ops will
197 still be in place, but they are harmless. */
198 return irsb;
199 }
200 }
201
202 /* Process instructions. */
203 while (True) {
204 vassert(n_instrs < vex_control.guest_max_insns);
205
206 /* Regardless of what chase_into_ok says, is chasing permissible
207 at all right now? Set resteerOKfn accordingly. */
208 resteerOK
209 = toBool(
210 n_instrs < vex_control.guest_chase_thresh
211 /* If making self-checking translations, don't chase
212 .. it makes the checks too complicated. We only want
213 to scan just one sequence of bytes in the check, not
214 a whole bunch. */
215 && !do_self_check
216 /* we can't afford to have a resteer once we're on the
217 last extent slot. */
218 && vge->n_used < 3
219 );
220
221 resteerOKfn
222 = resteerOK ? chase_into_ok : const_False;
223
224 /* n_cond_resteers_allowed keeps track of whether we're still
225 allowing dis_instr_fn to chase conditional branches. It
226 starts (at 2) and gets decremented each time dis_instr_fn
227 tells us it has chased a conditional branch. We then
228 decrement it, and use it to tell later calls to dis_instr_fn
229 whether or not it is allowed to chase conditional
230 branches. */
231 vassert(n_cond_resteers_allowed >= 0 && n_cond_resteers_allowed <= 2);
232
233 /* This is the IP of the instruction we're just about to deal
234 with. */
235 guest_IP_curr_instr = guest_IP_bbstart + delta;
236
237 /* This is the irsb statement array index of the first stmt in
238 this insn. That will always be the instruction-mark
239 descriptor. */
240 first_stmt_idx = irsb->stmts_used;
241
242 /* Add an instruction-mark statement. We won't know until after
243 disassembling the instruction how long it instruction is, so
244 just put in a zero length and we'll fix it up later. */
245 addStmtToIRSB( irsb, IRStmt_IMark( guest_IP_curr_instr, 0 ));
246
247 /* for the first insn, the dispatch loop will have set
248 %IP, but for all the others we have to do it ourselves. */
249 need_to_put_IP = toBool(n_instrs > 0);
250
251 /* Finally, actually disassemble an instruction. */
252 dres = dis_instr_fn ( irsb,
253 need_to_put_IP,
254 resteerOKfn,
255 toBool(n_cond_resteers_allowed > 0),
256 callback_opaque,
257 guest_code,
258 delta,
259 guest_IP_curr_instr,
260 arch_guest,
261 archinfo_guest,
262 abiinfo_both,
263 host_bigendian );
264
265 /* stay sane ... */
266 vassert(dres.whatNext == Dis_StopHere
267 || dres.whatNext == Dis_Continue
268 || dres.whatNext == Dis_ResteerU
269 || dres.whatNext == Dis_ResteerC);
270 /* ... disassembled insn length is sane ... */
271 vassert(dres.len >= 0 && dres.len <= 20);
272 /* ... continueAt is zero if no resteer requested ... */
273 if (dres.whatNext != Dis_ResteerU && dres.whatNext != Dis_ResteerC)
274 vassert(dres.continueAt == 0);
275 /* ... if we disallowed conditional resteers, check that one
276 didn't actually happen anyway ... */
277 if (n_cond_resteers_allowed == 0)
278 vassert(dres.whatNext != Dis_ResteerC);
279
280 /* Fill in the insn-mark length field. */
281 vassert(first_stmt_idx >= 0 && first_stmt_idx < irsb->stmts_used);
282 imark = irsb->stmts[first_stmt_idx];
283 vassert(imark);
284 vassert(imark->tag == Ist_IMark);
285 vassert(imark->Ist.IMark.len == 0);
286 imark->Ist.IMark.len = toUInt(dres.len);
287
288 /* Print the resulting IR, if needed. */
289 if (vex_traceflags & VEX_TRACE_FE) {
290 for (i = first_stmt_idx; i < irsb->stmts_used; i++) {
291 vex_printf(" ");
292 ppIRStmt(irsb->stmts[i]);
293 vex_printf("\n");
294 }
295 }
296
297 /* If dis_instr_fn terminated the BB at this point, check it
298 also filled in the irsb->next field. */
299 if (dres.whatNext == Dis_StopHere) {
300 vassert(irsb->next != NULL);
301 if (debug_print) {
302 vex_printf(" ");
303 vex_printf( "goto {");
304 ppIRJumpKind(irsb->jumpkind);
305 vex_printf( "} ");
306 ppIRExpr( irsb->next );
307 vex_printf( "\n");
308 }
309 }
310
311 /* Update the VexGuestExtents we are constructing. */
312 /* If vex_control.guest_max_insns is required to be < 100 and
313 each insn is at max 20 bytes long, this limit of 5000 then
314 seems reasonable since the max possible extent length will be
315 100 * 20 == 2000. */
316 vassert(vge->len[vge->n_used-1] < 5000);
317 vge->len[vge->n_used-1]
318 = toUShort(toUInt( vge->len[vge->n_used-1] + dres.len ));
319 n_instrs++;
320 if (debug_print)
321 vex_printf("\n");
322
323 /* Advance delta (inconspicuous but very important :-) */
324 delta += (Long)dres.len;
325
326 switch (dres.whatNext) {
327 case Dis_Continue:
328 vassert(irsb->next == NULL);
329 if (n_instrs < vex_control.guest_max_insns) {
330 /* keep going */
331 } else {
332 /* We have to stop. */
333 irsb->next
334 = IRExpr_Const(
335 guest_word_type == Ity_I32
336 ? IRConst_U32(toUInt(guest_IP_bbstart+delta))
337 : IRConst_U64(guest_IP_bbstart+delta)
338 );
339 goto done;
340 }
341 break;
342 case Dis_StopHere:
343 vassert(irsb->next != NULL);
344 goto done;
345 case Dis_ResteerU:
346 case Dis_ResteerC:
347 /* Check that we actually allowed a resteer .. */
348 vassert(resteerOK);
349 vassert(irsb->next == NULL);
350 if (dres.whatNext == Dis_ResteerC) {
351 vassert(n_cond_resteers_allowed > 0);
352 n_cond_resteers_allowed--;
353 }
354 /* figure out a new delta to continue at. */
355 vassert(resteerOKfn(callback_opaque,dres.continueAt));
356 delta = dres.continueAt - guest_IP_bbstart;
357 /* we now have to start a new extent slot. */
358 vge->n_used++;
359 vassert(vge->n_used <= 3);
360 vge->base[vge->n_used-1] = dres.continueAt;
361 vge->len[vge->n_used-1] = 0;
362 n_resteers++;
363 d_resteers++;
364 if (0 && (n_resteers & 0xFF) == 0)
365 vex_printf("resteer[%d,%d] to 0x%llx (delta = %lld)\n",
366 n_resteers, d_resteers,
367 dres.continueAt, delta);
368 break;
369 default:
370 vpanic("bb_to_IR");
371 }
372 }
373 /*NOTREACHED*/
374 vassert(0);
375
376 done:
377 /* We're done. The only thing that might need attending to is that
378 a self-checking preamble may need to be created.
379
380 The scheme is to compute a rather crude checksum of the code
381 we're making a translation of, and add to the IR a call to a
382 helper routine which recomputes the checksum every time the
383 translation is run, and requests a retranslation if it doesn't
384 match. This is obviously very expensive and considerable
385 efforts are made to speed it up:
386
387 * the checksum is computed from all the 32-bit words that
388 overlap the translated code. That means it could depend on up
389 to 3 bytes before and 3 bytes after which aren't part of the
390 translated area, and so if those change then we'll
391 unnecessarily have to discard and retranslate. This seems
392 like a pretty remote possibility and it seems as if the
393 benefit of not having to deal with the ends of the range at
394 byte precision far outweigh any possible extra translations
395 needed.
396
397 * there's a generic routine and 12 specialised cases, which
398 handle the cases of 1 through 12-word lengths respectively.
399 They seem to cover about 90% of the cases that occur in
400 practice.
401 */
402 if (do_self_check) {
403
404 UInt len2check, expected32;
405 IRTemp tistart_tmp, tilen_tmp;
406 UInt (*fn_generic)(HWord, HWord) __attribute__((regparm(2)));
407 UInt (*fn_spec)(HWord) __attribute__((regparm(1)));
408 HChar* nm_generic;
409 HChar* nm_spec;
410 HWord fn_generic_entry = 0;
411 HWord fn_spec_entry = 0;
412
413 vassert(vge->n_used == 1);
414 len2check = vge->len[0];
415
416 /* stay sane */
417 vassert(len2check >= 0 && len2check < 1000/*arbitrary*/);
418
419 /* Skip the check if the translation involved zero bytes */
420 if (len2check > 0) {
421 HWord first_w32 = ((HWord)guest_code) & ~(HWord)3;
422 HWord last_w32 = (((HWord)guest_code) + len2check - 1) & ~(HWord)3;
423 vassert(first_w32 <= last_w32);
424 HWord w32_diff = last_w32 - first_w32;
425 vassert(0 == (w32_diff & 3));
426 HWord w32s_to_check = (w32_diff + 4) / 4;
427 vassert(w32s_to_check > 0 && w32s_to_check < 1004/*arbitrary*//4);
428
429 /* vex_printf("%lx %lx %ld\n", first_w32, last_w32, w32s_to_check); */
430
431 fn_generic = genericg_compute_checksum_4al;
432 nm_generic = "genericg_compute_checksum_4al";
433 fn_spec = NULL;
434 nm_spec = NULL;
435
436 switch (w32s_to_check) {
437 case 1: fn_spec = genericg_compute_checksum_4al_1;
438 nm_spec = "genericg_compute_checksum_4al_1"; break;
439 case 2: fn_spec = genericg_compute_checksum_4al_2;
440 nm_spec = "genericg_compute_checksum_4al_2"; break;
441 case 3: fn_spec = genericg_compute_checksum_4al_3;
442 nm_spec = "genericg_compute_checksum_4al_3"; break;
443 case 4: fn_spec = genericg_compute_checksum_4al_4;
444 nm_spec = "genericg_compute_checksum_4al_4"; break;
445 case 5: fn_spec = genericg_compute_checksum_4al_5;
446 nm_spec = "genericg_compute_checksum_4al_5"; break;
447 case 6: fn_spec = genericg_compute_checksum_4al_6;
448 nm_spec = "genericg_compute_checksum_4al_6"; break;
449 case 7: fn_spec = genericg_compute_checksum_4al_7;
450 nm_spec = "genericg_compute_checksum_4al_7"; break;
451 case 8: fn_spec = genericg_compute_checksum_4al_8;
452 nm_spec = "genericg_compute_checksum_4al_8"; break;
453 case 9: fn_spec = genericg_compute_checksum_4al_9;
454 nm_spec = "genericg_compute_checksum_4al_9"; break;
455 case 10: fn_spec = genericg_compute_checksum_4al_10;
456 nm_spec = "genericg_compute_checksum_4al_10"; break;
457 case 11: fn_spec = genericg_compute_checksum_4al_11;
458 nm_spec = "genericg_compute_checksum_4al_11"; break;
459 case 12: fn_spec = genericg_compute_checksum_4al_12;
460 nm_spec = "genericg_compute_checksum_4al_12"; break;
461 default: break;
462 }
463
464 expected32 = fn_generic( first_w32, w32s_to_check );
465 /* If we got a specialised version, check it produces the same
466 result as the generic version! */
467 if (fn_spec) {
468 vassert(nm_spec);
469 vassert(expected32 == fn_spec( first_w32 ));
470 } else {
471 vassert(!nm_spec);
472 }
473
474 /* Set TISTART and TILEN. These will describe to the despatcher
475 the area of guest code to invalidate should we exit with a
476 self-check failure. */
477
478 tistart_tmp = newIRTemp(irsb->tyenv, guest_word_type);
479 tilen_tmp = newIRTemp(irsb->tyenv, guest_word_type);
480
481 irsb->stmts[selfcheck_idx+0]
482 = IRStmt_WrTmp(tistart_tmp, IRExpr_Const(guest_IP_bbstart_IRConst) );
483
484 irsb->stmts[selfcheck_idx+1]
485 = IRStmt_WrTmp(tilen_tmp,
486 guest_word_type==Ity_I32
487 ? IRExpr_Const(IRConst_U32(len2check))
488 : IRExpr_Const(IRConst_U64(len2check))
489 );
490
491 irsb->stmts[selfcheck_idx+2]
492 = IRStmt_Put( offB_TISTART, IRExpr_RdTmp(tistart_tmp) );
493
494 irsb->stmts[selfcheck_idx+3]
495 = IRStmt_Put( offB_TILEN, IRExpr_RdTmp(tilen_tmp) );
496
497 /* Generate the entry point descriptors */
498 if (abiinfo_both->host_ppc_calls_use_fndescrs) {
499 HWord* descr = (HWord*)fn_generic;
500 fn_generic_entry = descr[0];
501 if (fn_spec) {
502 descr = (HWord*)fn_spec;
503 fn_spec_entry = descr[0];
504 } else {
505 fn_spec_entry = (HWord)NULL;
506 }
507 } else {
508 fn_generic_entry = (HWord)fn_generic;
509 if (fn_spec) {
510 fn_spec_entry = (HWord)fn_spec;
511 } else {
512 fn_spec_entry = (HWord)NULL;
513 }
514 }
515
516 IRExpr* callexpr = NULL;
517 if (fn_spec) {
518 callexpr = mkIRExprCCall(
519 Ity_I32, 1/*regparms*/,
520 nm_spec, (void*)fn_spec_entry,
521 mkIRExprVec_1(
522 mkIRExpr_HWord( (HWord)first_w32 )
523 )
524 );
525 } else {
526 callexpr = mkIRExprCCall(
527 Ity_I32, 2/*regparms*/,
528 nm_generic, (void*)fn_generic_entry,
529 mkIRExprVec_2(
530 mkIRExpr_HWord( (HWord)first_w32 ),
531 mkIRExpr_HWord( (HWord)w32s_to_check )
532 )
533 );
534 }
535
536 irsb->stmts[selfcheck_idx+4]
537 = IRStmt_Exit(
538 IRExpr_Binop(
539 Iop_CmpNE32,
540 callexpr,
541 IRExpr_Const(IRConst_U32(expected32))
542 ),
543 Ijk_TInval,
544 guest_IP_bbstart_IRConst
545 );
546 }
547 }
548
549 return irsb;
550 }
551
552
553 /*-------------------------------------------------------------
554 A support routine for doing self-checking translations.
555 -------------------------------------------------------------*/
556
557 /* CLEAN HELPER */
558 /* CALLED FROM GENERATED CODE */
559
560 /* Compute a checksum of host memory at [addr .. addr+len-1], as fast
561 as possible. The _4al_4plus version is assured that the request is
562 for 4-aligned memory and for a block of 4 or more long, whilst the
563 _generic version must be able to handle any alignment, and lengths
564 down to zero too. This fn is called once for every use of a
565 self-checking translation, so it needs to be as fast as
566 possible. */
567
ROL32(UInt w,Int n)568 static inline UInt ROL32 ( UInt w, Int n ) {
569 w = (w << n) | (w >> (32-n));
570 return w;
571 }
572
573 __attribute((regparm(2)))
genericg_compute_checksum_4al(HWord first_w32,HWord n_w32s)574 static UInt genericg_compute_checksum_4al ( HWord first_w32, HWord n_w32s )
575 {
576 UInt sum1 = 0, sum2 = 0;
577 UInt* p = (UInt*)first_w32;
578 /* unrolled */
579 while (n_w32s >= 4) {
580 UInt w;
581 w = p[0]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
582 w = p[1]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
583 w = p[2]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
584 w = p[3]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
585 p += 4;
586 n_w32s -= 4;
587 sum1 ^= sum2;
588 }
589 while (n_w32s >= 1) {
590 UInt w;
591 w = p[0]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
592 p += 1;
593 n_w32s -= 1;
594 sum1 ^= sum2;
595 }
596 return sum1 + sum2;
597 }
598
599 /* Specialised versions of the above function */
600
601 __attribute__((regparm(1)))
genericg_compute_checksum_4al_1(HWord first_w32)602 static UInt genericg_compute_checksum_4al_1 ( HWord first_w32 )
603 {
604 UInt sum1 = 0, sum2 = 0;
605 UInt* p = (UInt*)first_w32;
606 UInt w;
607 w = p[0]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
608 sum1 ^= sum2;
609 return sum1 + sum2;
610 }
611
612 __attribute__((regparm(1)))
genericg_compute_checksum_4al_2(HWord first_w32)613 static UInt genericg_compute_checksum_4al_2 ( HWord first_w32 )
614 {
615 UInt sum1 = 0, sum2 = 0;
616 UInt* p = (UInt*)first_w32;
617 UInt w;
618 w = p[0]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
619 sum1 ^= sum2;
620 w = p[1]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
621 sum1 ^= sum2;
622 return sum1 + sum2;
623 }
624
625 __attribute__((regparm(1)))
genericg_compute_checksum_4al_3(HWord first_w32)626 static UInt genericg_compute_checksum_4al_3 ( HWord first_w32 )
627 {
628 UInt sum1 = 0, sum2 = 0;
629 UInt* p = (UInt*)first_w32;
630 UInt w;
631 w = p[0]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
632 sum1 ^= sum2;
633 w = p[1]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
634 sum1 ^= sum2;
635 w = p[2]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
636 sum1 ^= sum2;
637 return sum1 + sum2;
638 }
639
640 __attribute__((regparm(1)))
genericg_compute_checksum_4al_4(HWord first_w32)641 static UInt genericg_compute_checksum_4al_4 ( HWord first_w32 )
642 {
643 UInt sum1 = 0, sum2 = 0;
644 UInt* p = (UInt*)first_w32;
645 UInt w;
646 w = p[0]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
647 w = p[1]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
648 w = p[2]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
649 w = p[3]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
650 sum1 ^= sum2;
651 return sum1 + sum2;
652 }
653
654 __attribute__((regparm(1)))
genericg_compute_checksum_4al_5(HWord first_w32)655 static UInt genericg_compute_checksum_4al_5 ( HWord first_w32 )
656 {
657 UInt sum1 = 0, sum2 = 0;
658 UInt* p = (UInt*)first_w32;
659 UInt w;
660 w = p[0]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
661 w = p[1]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
662 w = p[2]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
663 w = p[3]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
664 sum1 ^= sum2;
665 w = p[4]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
666 sum1 ^= sum2;
667 return sum1 + sum2;
668 }
669
670 __attribute__((regparm(1)))
genericg_compute_checksum_4al_6(HWord first_w32)671 static UInt genericg_compute_checksum_4al_6 ( HWord first_w32 )
672 {
673 UInt sum1 = 0, sum2 = 0;
674 UInt* p = (UInt*)first_w32;
675 UInt w;
676 w = p[0]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
677 w = p[1]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
678 w = p[2]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
679 w = p[3]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
680 sum1 ^= sum2;
681 w = p[4]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
682 sum1 ^= sum2;
683 w = p[5]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
684 sum1 ^= sum2;
685 return sum1 + sum2;
686 }
687
688 __attribute__((regparm(1)))
genericg_compute_checksum_4al_7(HWord first_w32)689 static UInt genericg_compute_checksum_4al_7 ( HWord first_w32 )
690 {
691 UInt sum1 = 0, sum2 = 0;
692 UInt* p = (UInt*)first_w32;
693 UInt w;
694 w = p[0]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
695 w = p[1]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
696 w = p[2]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
697 w = p[3]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
698 sum1 ^= sum2;
699 w = p[4]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
700 sum1 ^= sum2;
701 w = p[5]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
702 sum1 ^= sum2;
703 w = p[6]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
704 sum1 ^= sum2;
705 return sum1 + sum2;
706 }
707
708 __attribute__((regparm(1)))
genericg_compute_checksum_4al_8(HWord first_w32)709 static UInt genericg_compute_checksum_4al_8 ( HWord first_w32 )
710 {
711 UInt sum1 = 0, sum2 = 0;
712 UInt* p = (UInt*)first_w32;
713 UInt w;
714 w = p[0]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
715 w = p[1]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
716 w = p[2]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
717 w = p[3]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
718 sum1 ^= sum2;
719 w = p[4]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
720 w = p[5]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
721 w = p[6]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
722 w = p[7]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
723 sum1 ^= sum2;
724 return sum1 + sum2;
725 }
726
727 __attribute__((regparm(1)))
genericg_compute_checksum_4al_9(HWord first_w32)728 static UInt genericg_compute_checksum_4al_9 ( HWord first_w32 )
729 {
730 UInt sum1 = 0, sum2 = 0;
731 UInt* p = (UInt*)first_w32;
732 UInt w;
733 w = p[0]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
734 w = p[1]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
735 w = p[2]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
736 w = p[3]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
737 sum1 ^= sum2;
738 w = p[4]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
739 w = p[5]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
740 w = p[6]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
741 w = p[7]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
742 sum1 ^= sum2;
743 w = p[8]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
744 sum1 ^= sum2;
745 return sum1 + sum2;
746 }
747
748 __attribute__((regparm(1)))
genericg_compute_checksum_4al_10(HWord first_w32)749 static UInt genericg_compute_checksum_4al_10 ( HWord first_w32 )
750 {
751 UInt sum1 = 0, sum2 = 0;
752 UInt* p = (UInt*)first_w32;
753 UInt w;
754 w = p[0]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
755 w = p[1]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
756 w = p[2]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
757 w = p[3]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
758 sum1 ^= sum2;
759 w = p[4]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
760 w = p[5]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
761 w = p[6]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
762 w = p[7]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
763 sum1 ^= sum2;
764 w = p[8]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
765 sum1 ^= sum2;
766 w = p[9]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
767 sum1 ^= sum2;
768 return sum1 + sum2;
769 }
770
771 __attribute__((regparm(1)))
genericg_compute_checksum_4al_11(HWord first_w32)772 static UInt genericg_compute_checksum_4al_11 ( HWord first_w32 )
773 {
774 UInt sum1 = 0, sum2 = 0;
775 UInt* p = (UInt*)first_w32;
776 UInt w;
777 w = p[0]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
778 w = p[1]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
779 w = p[2]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
780 w = p[3]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
781 sum1 ^= sum2;
782 w = p[4]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
783 w = p[5]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
784 w = p[6]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
785 w = p[7]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
786 sum1 ^= sum2;
787 w = p[8]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
788 sum1 ^= sum2;
789 w = p[9]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
790 sum1 ^= sum2;
791 w = p[10]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
792 sum1 ^= sum2;
793 return sum1 + sum2;
794 }
795
796 __attribute__((regparm(1)))
genericg_compute_checksum_4al_12(HWord first_w32)797 static UInt genericg_compute_checksum_4al_12 ( HWord first_w32 )
798 {
799 UInt sum1 = 0, sum2 = 0;
800 UInt* p = (UInt*)first_w32;
801 UInt w;
802 w = p[0]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
803 w = p[1]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
804 w = p[2]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
805 w = p[3]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
806 sum1 ^= sum2;
807 w = p[4]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
808 w = p[5]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
809 w = p[6]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
810 w = p[7]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
811 sum1 ^= sum2;
812 w = p[8]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
813 w = p[9]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
814 w = p[10]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
815 w = p[11]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
816 sum1 ^= sum2;
817 return sum1 + sum2;
818 }
819
820 /*--------------------------------------------------------------------*/
821 /*--- end guest_generic_bb_to_IR.c ---*/
822 /*--------------------------------------------------------------------*/
823