• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 #include "threefish_api.h"
2 
threefish_encrypt_256(struct threefish_key * key_ctx,u64 * input,u64 * output)3 void threefish_encrypt_256(struct threefish_key *key_ctx, u64 *input,
4 			   u64 *output)
5 {
6 	u64 b0 = input[0], b1 = input[1],
7 	    b2 = input[2], b3 = input[3];
8 	u64 k0 = key_ctx->key[0], k1 = key_ctx->key[1],
9 	    k2 = key_ctx->key[2], k3 = key_ctx->key[3],
10 	    k4 = key_ctx->key[4];
11 	u64 t0 = key_ctx->tweak[0], t1 = key_ctx->tweak[1],
12 	    t2 = key_ctx->tweak[2];
13 
14 	b1 += k1 + t0;
15 	b0 += b1 + k0;
16 	b1 = ((b1 << 14) | (b1 >> (64 - 14))) ^ b0;
17 
18 	b3 += k3;
19 	b2 += b3 + k2 + t1;
20 	b3 = ((b3 << 16) | (b3 >> (64 - 16))) ^ b2;
21 
22 	b0 += b3;
23 	b3 = ((b3 << 52) | (b3 >> (64 - 52))) ^ b0;
24 
25 	b2 += b1;
26 	b1 = ((b1 << 57) | (b1 >> (64 - 57))) ^ b2;
27 
28 	b0 += b1;
29 	b1 = ((b1 << 23) | (b1 >> (64 - 23))) ^ b0;
30 
31 	b2 += b3;
32 	b3 = ((b3 << 40) | (b3 >> (64 - 40))) ^ b2;
33 
34 	b0 += b3;
35 	b3 = ((b3 << 5) | (b3 >> (64 - 5))) ^ b0;
36 
37 	b2 += b1;
38 	b1 = ((b1 << 37) | (b1 >> (64 - 37))) ^ b2;
39 
40 	b1 += k2 + t1;
41 	b0 += b1 + k1;
42 	b1 = ((b1 << 25) | (b1 >> (64 - 25))) ^ b0;
43 
44 	b3 += k4 + 1;
45 	b2 += b3 + k3 + t2;
46 	b3 = ((b3 << 33) | (b3 >> (64 - 33))) ^ b2;
47 
48 	b0 += b3;
49 	b3 = ((b3 << 46) | (b3 >> (64 - 46))) ^ b0;
50 
51 	b2 += b1;
52 	b1 = ((b1 << 12) | (b1 >> (64 - 12))) ^ b2;
53 
54 	b0 += b1;
55 	b1 = ((b1 << 58) | (b1 >> (64 - 58))) ^ b0;
56 
57 	b2 += b3;
58 	b3 = ((b3 << 22) | (b3 >> (64 - 22))) ^ b2;
59 
60 	b0 += b3;
61 	b3 = ((b3 << 32) | (b3 >> (64 - 32))) ^ b0;
62 
63 	b2 += b1;
64 	b1 = ((b1 << 32) | (b1 >> (64 - 32))) ^ b2;
65 
66 
67 	b1 += k3 + t2;
68 	b0 += b1 + k2;
69 	b1 = ((b1 << 14) | (b1 >> (64 - 14))) ^ b0;
70 
71 	b3 += k0 + 2;
72 	b2 += b3 + k4 + t0;
73 	b3 = ((b3 << 16) | (b3 >> (64 - 16))) ^ b2;
74 
75 	b0 += b3;
76 	b3 = ((b3 << 52) | (b3 >> (64 - 52))) ^ b0;
77 
78 	b2 += b1;
79 	b1 = ((b1 << 57) | (b1 >> (64 - 57))) ^ b2;
80 
81 	b0 += b1;
82 	b1 = ((b1 << 23) | (b1 >> (64 - 23))) ^ b0;
83 
84 	b2 += b3;
85 	b3 = ((b3 << 40) | (b3 >> (64 - 40))) ^ b2;
86 
87 	b0 += b3;
88 	b3 = ((b3 << 5) | (b3 >> (64 - 5))) ^ b0;
89 
90 	b2 += b1;
91 	b1 = ((b1 << 37) | (b1 >> (64 - 37))) ^ b2;
92 
93 	b1 += k4 + t0;
94 	b0 += b1 + k3;
95 	b1 = ((b1 << 25) | (b1 >> (64 - 25))) ^ b0;
96 
97 	b3 += k1 + 3;
98 	b2 += b3 + k0 + t1;
99 	b3 = ((b3 << 33) | (b3 >> (64 - 33))) ^ b2;
100 
101 	b0 += b3;
102 	b3 = ((b3 << 46) | (b3 >> (64 - 46))) ^ b0;
103 
104 	b2 += b1;
105 	b1 = ((b1 << 12) | (b1 >> (64 - 12))) ^ b2;
106 
107 	b0 += b1;
108 	b1 = ((b1 << 58) | (b1 >> (64 - 58))) ^ b0;
109 
110 	b2 += b3;
111 	b3 = ((b3 << 22) | (b3 >> (64 - 22))) ^ b2;
112 
113 	b0 += b3;
114 	b3 = ((b3 << 32) | (b3 >> (64 - 32))) ^ b0;
115 
116 	b2 += b1;
117 	b1 = ((b1 << 32) | (b1 >> (64 - 32))) ^ b2;
118 
119 
120 	b1 += k0 + t1;
121 	b0 += b1 + k4;
122 	b1 = ((b1 << 14) | (b1 >> (64 - 14))) ^ b0;
123 
124 	b3 += k2 + 4;
125 	b2 += b3 + k1 + t2;
126 	b3 = ((b3 << 16) | (b3 >> (64 - 16))) ^ b2;
127 
128 	b0 += b3;
129 	b3 = ((b3 << 52) | (b3 >> (64 - 52))) ^ b0;
130 
131 	b2 += b1;
132 	b1 = ((b1 << 57) | (b1 >> (64 - 57))) ^ b2;
133 
134 	b0 += b1;
135 	b1 = ((b1 << 23) | (b1 >> (64 - 23))) ^ b0;
136 
137 	b2 += b3;
138 	b3 = ((b3 << 40) | (b3 >> (64 - 40))) ^ b2;
139 
140 	b0 += b3;
141 	b3 = ((b3 << 5) | (b3 >> (64 - 5))) ^ b0;
142 
143 	b2 += b1;
144 	b1 = ((b1 << 37) | (b1 >> (64 - 37))) ^ b2;
145 
146 	b1 += k1 + t2;
147 	b0 += b1 + k0;
148 	b1 = ((b1 << 25) | (b1 >> (64 - 25))) ^ b0;
149 
150 	b3 += k3 + 5;
151 	b2 += b3 + k2 + t0;
152 	b3 = ((b3 << 33) | (b3 >> (64 - 33))) ^ b2;
153 
154 	b0 += b3;
155 	b3 = ((b3 << 46) | (b3 >> (64 - 46))) ^ b0;
156 
157 	b2 += b1;
158 	b1 = ((b1 << 12) | (b1 >> (64 - 12))) ^ b2;
159 
160 	b0 += b1;
161 	b1 = ((b1 << 58) | (b1 >> (64 - 58))) ^ b0;
162 
163 	b2 += b3;
164 	b3 = ((b3 << 22) | (b3 >> (64 - 22))) ^ b2;
165 
166 	b0 += b3;
167 	b3 = ((b3 << 32) | (b3 >> (64 - 32))) ^ b0;
168 
169 	b2 += b1;
170 	b1 = ((b1 << 32) | (b1 >> (64 - 32))) ^ b2;
171 
172 
173 	b1 += k2 + t0;
174 	b0 += b1 + k1;
175 	b1 = ((b1 << 14) | (b1 >> (64 - 14))) ^ b0;
176 
177 	b3 += k4 + 6;
178 	b2 += b3 + k3 + t1;
179 	b3 = ((b3 << 16) | (b3 >> (64 - 16))) ^ b2;
180 
181 	b0 += b3;
182 	b3 = ((b3 << 52) | (b3 >> (64 - 52))) ^ b0;
183 
184 	b2 += b1;
185 	b1 = ((b1 << 57) | (b1 >> (64 - 57))) ^ b2;
186 
187 	b0 += b1;
188 	b1 = ((b1 << 23) | (b1 >> (64 - 23))) ^ b0;
189 
190 	b2 += b3;
191 	b3 = ((b3 << 40) | (b3 >> (64 - 40))) ^ b2;
192 
193 	b0 += b3;
194 	b3 = ((b3 << 5) | (b3 >> (64 - 5))) ^ b0;
195 
196 	b2 += b1;
197 	b1 = ((b1 << 37) | (b1 >> (64 - 37))) ^ b2;
198 
199 	b1 += k3 + t1;
200 	b0 += b1 + k2;
201 	b1 = ((b1 << 25) | (b1 >> (64 - 25))) ^ b0;
202 
203 	b3 += k0 + 7;
204 	b2 += b3 + k4 + t2;
205 	b3 = ((b3 << 33) | (b3 >> (64 - 33))) ^ b2;
206 
207 	b0 += b3;
208 	b3 = ((b3 << 46) | (b3 >> (64 - 46))) ^ b0;
209 
210 	b2 += b1;
211 	b1 = ((b1 << 12) | (b1 >> (64 - 12))) ^ b2;
212 
213 	b0 += b1;
214 	b1 = ((b1 << 58) | (b1 >> (64 - 58))) ^ b0;
215 
216 	b2 += b3;
217 	b3 = ((b3 << 22) | (b3 >> (64 - 22))) ^ b2;
218 
219 	b0 += b3;
220 	b3 = ((b3 << 32) | (b3 >> (64 - 32))) ^ b0;
221 
222 	b2 += b1;
223 	b1 = ((b1 << 32) | (b1 >> (64 - 32))) ^ b2;
224 
225 
226 	b1 += k4 + t2;
227 	b0 += b1 + k3;
228 	b1 = ((b1 << 14) | (b1 >> (64 - 14))) ^ b0;
229 
230 	b3 += k1 + 8;
231 	b2 += b3 + k0 + t0;
232 	b3 = ((b3 << 16) | (b3 >> (64 - 16))) ^ b2;
233 
234 	b0 += b3;
235 	b3 = ((b3 << 52) | (b3 >> (64 - 52))) ^ b0;
236 
237 	b2 += b1;
238 	b1 = ((b1 << 57) | (b1 >> (64 - 57))) ^ b2;
239 
240 	b0 += b1;
241 	b1 = ((b1 << 23) | (b1 >> (64 - 23))) ^ b0;
242 
243 	b2 += b3;
244 	b3 = ((b3 << 40) | (b3 >> (64 - 40))) ^ b2;
245 
246 	b0 += b3;
247 	b3 = ((b3 << 5) | (b3 >> (64 - 5))) ^ b0;
248 
249 	b2 += b1;
250 	b1 = ((b1 << 37) | (b1 >> (64 - 37))) ^ b2;
251 
252 	b1 += k0 + t0;
253 	b0 += b1 + k4;
254 	b1 = ((b1 << 25) | (b1 >> (64 - 25))) ^ b0;
255 
256 	b3 += k2 + 9;
257 	b2 += b3 + k1 + t1;
258 	b3 = ((b3 << 33) | (b3 >> (64 - 33))) ^ b2;
259 
260 	b0 += b3;
261 	b3 = ((b3 << 46) | (b3 >> (64 - 46))) ^ b0;
262 
263 	b2 += b1;
264 	b1 = ((b1 << 12) | (b1 >> (64 - 12))) ^ b2;
265 
266 	b0 += b1;
267 	b1 = ((b1 << 58) | (b1 >> (64 - 58))) ^ b0;
268 
269 	b2 += b3;
270 	b3 = ((b3 << 22) | (b3 >> (64 - 22))) ^ b2;
271 
272 	b0 += b3;
273 	b3 = ((b3 << 32) | (b3 >> (64 - 32))) ^ b0;
274 
275 	b2 += b1;
276 	b1 = ((b1 << 32) | (b1 >> (64 - 32))) ^ b2;
277 
278 
279 	b1 += k1 + t1;
280 	b0 += b1 + k0;
281 	b1 = ((b1 << 14) | (b1 >> (64 - 14))) ^ b0;
282 
283 	b3 += k3 + 10;
284 	b2 += b3 + k2 + t2;
285 	b3 = ((b3 << 16) | (b3 >> (64 - 16))) ^ b2;
286 
287 	b0 += b3;
288 	b3 = ((b3 << 52) | (b3 >> (64 - 52))) ^ b0;
289 
290 	b2 += b1;
291 	b1 = ((b1 << 57) | (b1 >> (64 - 57))) ^ b2;
292 
293 	b0 += b1;
294 	b1 = ((b1 << 23) | (b1 >> (64 - 23))) ^ b0;
295 
296 	b2 += b3;
297 	b3 = ((b3 << 40) | (b3 >> (64 - 40))) ^ b2;
298 
299 	b0 += b3;
300 	b3 = ((b3 << 5) | (b3 >> (64 - 5))) ^ b0;
301 
302 	b2 += b1;
303 	b1 = ((b1 << 37) | (b1 >> (64 - 37))) ^ b2;
304 
305 	b1 += k2 + t2;
306 	b0 += b1 + k1;
307 	b1 = ((b1 << 25) | (b1 >> (64 - 25))) ^ b0;
308 
309 	b3 += k4 + 11;
310 	b2 += b3 + k3 + t0;
311 	b3 = ((b3 << 33) | (b3 >> (64 - 33))) ^ b2;
312 
313 	b0 += b3;
314 	b3 = ((b3 << 46) | (b3 >> (64 - 46))) ^ b0;
315 
316 	b2 += b1;
317 	b1 = ((b1 << 12) | (b1 >> (64 - 12))) ^ b2;
318 
319 	b0 += b1;
320 	b1 = ((b1 << 58) | (b1 >> (64 - 58))) ^ b0;
321 
322 	b2 += b3;
323 	b3 = ((b3 << 22) | (b3 >> (64 - 22))) ^ b2;
324 
325 	b0 += b3;
326 	b3 = ((b3 << 32) | (b3 >> (64 - 32))) ^ b0;
327 
328 	b2 += b1;
329 	b1 = ((b1 << 32) | (b1 >> (64 - 32))) ^ b2;
330 
331 
332 	b1 += k3 + t0;
333 	b0 += b1 + k2;
334 	b1 = ((b1 << 14) | (b1 >> (64 - 14))) ^ b0;
335 
336 	b3 += k0 + 12;
337 	b2 += b3 + k4 + t1;
338 	b3 = ((b3 << 16) | (b3 >> (64 - 16))) ^ b2;
339 
340 	b0 += b3;
341 	b3 = ((b3 << 52) | (b3 >> (64 - 52))) ^ b0;
342 
343 	b2 += b1;
344 	b1 = ((b1 << 57) | (b1 >> (64 - 57))) ^ b2;
345 
346 	b0 += b1;
347 	b1 = ((b1 << 23) | (b1 >> (64 - 23))) ^ b0;
348 
349 	b2 += b3;
350 	b3 = ((b3 << 40) | (b3 >> (64 - 40))) ^ b2;
351 
352 	b0 += b3;
353 	b3 = ((b3 << 5) | (b3 >> (64 - 5))) ^ b0;
354 
355 	b2 += b1;
356 	b1 = ((b1 << 37) | (b1 >> (64 - 37))) ^ b2;
357 
358 	b1 += k4 + t1;
359 	b0 += b1 + k3;
360 	b1 = ((b1 << 25) | (b1 >> (64 - 25))) ^ b0;
361 
362 	b3 += k1 + 13;
363 	b2 += b3 + k0 + t2;
364 	b3 = ((b3 << 33) | (b3 >> (64 - 33))) ^ b2;
365 
366 	b0 += b3;
367 	b3 = ((b3 << 46) | (b3 >> (64 - 46))) ^ b0;
368 
369 	b2 += b1;
370 	b1 = ((b1 << 12) | (b1 >> (64 - 12))) ^ b2;
371 
372 	b0 += b1;
373 	b1 = ((b1 << 58) | (b1 >> (64 - 58))) ^ b0;
374 
375 	b2 += b3;
376 	b3 = ((b3 << 22) | (b3 >> (64 - 22))) ^ b2;
377 
378 	b0 += b3;
379 	b3 = ((b3 << 32) | (b3 >> (64 - 32))) ^ b0;
380 
381 	b2 += b1;
382 	b1 = ((b1 << 32) | (b1 >> (64 - 32))) ^ b2;
383 
384 
385 	b1 += k0 + t2;
386 	b0 += b1 + k4;
387 	b1 = ((b1 << 14) | (b1 >> (64 - 14))) ^ b0;
388 
389 	b3 += k2 + 14;
390 	b2 += b3 + k1 + t0;
391 	b3 = ((b3 << 16) | (b3 >> (64 - 16))) ^ b2;
392 
393 	b0 += b3;
394 	b3 = ((b3 << 52) | (b3 >> (64 - 52))) ^ b0;
395 
396 	b2 += b1;
397 	b1 = ((b1 << 57) | (b1 >> (64 - 57))) ^ b2;
398 
399 	b0 += b1;
400 	b1 = ((b1 << 23) | (b1 >> (64 - 23))) ^ b0;
401 
402 	b2 += b3;
403 	b3 = ((b3 << 40) | (b3 >> (64 - 40))) ^ b2;
404 
405 	b0 += b3;
406 	b3 = ((b3 << 5) | (b3 >> (64 - 5))) ^ b0;
407 
408 	b2 += b1;
409 	b1 = ((b1 << 37) | (b1 >> (64 - 37))) ^ b2;
410 
411 	b1 += k1 + t0;
412 	b0 += b1 + k0;
413 	b1 = ((b1 << 25) | (b1 >> (64 - 25))) ^ b0;
414 
415 	b3 += k3 + 15;
416 	b2 += b3 + k2 + t1;
417 	b3 = ((b3 << 33) | (b3 >> (64 - 33))) ^ b2;
418 
419 	b0 += b3;
420 	b3 = ((b3 << 46) | (b3 >> (64 - 46))) ^ b0;
421 
422 	b2 += b1;
423 	b1 = ((b1 << 12) | (b1 >> (64 - 12))) ^ b2;
424 
425 	b0 += b1;
426 	b1 = ((b1 << 58) | (b1 >> (64 - 58))) ^ b0;
427 
428 	b2 += b3;
429 	b3 = ((b3 << 22) | (b3 >> (64 - 22))) ^ b2;
430 
431 	b0 += b3;
432 	b3 = ((b3 << 32) | (b3 >> (64 - 32))) ^ b0;
433 
434 	b2 += b1;
435 	b1 = ((b1 << 32) | (b1 >> (64 - 32))) ^ b2;
436 
437 
438 	b1 += k2 + t1;
439 	b0 += b1 + k1;
440 	b1 = ((b1 << 14) | (b1 >> (64 - 14))) ^ b0;
441 
442 	b3 += k4 + 16;
443 	b2 += b3 + k3 + t2;
444 	b3 = ((b3 << 16) | (b3 >> (64 - 16))) ^ b2;
445 
446 	b0 += b3;
447 	b3 = ((b3 << 52) | (b3 >> (64 - 52))) ^ b0;
448 
449 	b2 += b1;
450 	b1 = ((b1 << 57) | (b1 >> (64 - 57))) ^ b2;
451 
452 	b0 += b1;
453 	b1 = ((b1 << 23) | (b1 >> (64 - 23))) ^ b0;
454 
455 	b2 += b3;
456 	b3 = ((b3 << 40) | (b3 >> (64 - 40))) ^ b2;
457 
458 	b0 += b3;
459 	b3 = ((b3 << 5) | (b3 >> (64 - 5))) ^ b0;
460 
461 	b2 += b1;
462 	b1 = ((b1 << 37) | (b1 >> (64 - 37))) ^ b2;
463 
464 	b1 += k3 + t2;
465 	b0 += b1 + k2;
466 	b1 = ((b1 << 25) | (b1 >> (64 - 25))) ^ b0;
467 
468 	b3 += k0 + 17;
469 	b2 += b3 + k4 + t0;
470 	b3 = ((b3 << 33) | (b3 >> (64 - 33))) ^ b2;
471 
472 	b0 += b3;
473 	b3 = ((b3 << 46) | (b3 >> (64 - 46))) ^ b0;
474 
475 	b2 += b1;
476 	b1 = ((b1 << 12) | (b1 >> (64 - 12))) ^ b2;
477 
478 	b0 += b1;
479 	b1 = ((b1 << 58) | (b1 >> (64 - 58))) ^ b0;
480 
481 	b2 += b3;
482 	b3 = ((b3 << 22) | (b3 >> (64 - 22))) ^ b2;
483 
484 	b0 += b3;
485 	b3 = ((b3 << 32) | (b3 >> (64 - 32))) ^ b0;
486 
487 	b2 += b1;
488 	b1 = ((b1 << 32) | (b1 >> (64 - 32))) ^ b2;
489 
490 	output[0] = b0 + k3;
491 	output[1] = b1 + k4 + t0;
492 	output[2] = b2 + k0 + t1;
493 	output[3] = b3 + k1 + 18;
494 }
495 
threefish_decrypt_256(struct threefish_key * key_ctx,u64 * input,u64 * output)496 void threefish_decrypt_256(struct threefish_key *key_ctx, u64 *input,
497 			   u64 *output)
498 {
499 	u64 b0 = input[0], b1 = input[1],
500 	    b2 = input[2], b3 = input[3];
501 	u64 k0 = key_ctx->key[0], k1 = key_ctx->key[1],
502 	    k2 = key_ctx->key[2], k3 = key_ctx->key[3],
503 	    k4 = key_ctx->key[4];
504 	u64 t0 = key_ctx->tweak[0], t1 = key_ctx->tweak[1],
505 	    t2 = key_ctx->tweak[2];
506 
507 	u64 tmp;
508 
509 	b0 -= k3;
510 	b1 -= k4 + t0;
511 	b2 -= k0 + t1;
512 	b3 -= k1 + 18;
513 	tmp = b3 ^ b0;
514 	b3 = (tmp >> 32) | (tmp << (64 - 32));
515 	b0 -= b3;
516 
517 	tmp = b1 ^ b2;
518 	b1 = (tmp >> 32) | (tmp << (64 - 32));
519 	b2 -= b1;
520 
521 	tmp = b1 ^ b0;
522 	b1 = (tmp >> 58) | (tmp << (64 - 58));
523 	b0 -= b1;
524 
525 	tmp = b3 ^ b2;
526 	b3 = (tmp >> 22) | (tmp << (64 - 22));
527 	b2 -= b3;
528 
529 	tmp = b3 ^ b0;
530 	b3 = (tmp >> 46) | (tmp << (64 - 46));
531 	b0 -= b3;
532 
533 	tmp = b1 ^ b2;
534 	b1 = (tmp >> 12) | (tmp << (64 - 12));
535 	b2 -= b1;
536 
537 	tmp = b1 ^ b0;
538 	b1 = (tmp >> 25) | (tmp << (64 - 25));
539 	b0 -= b1 + k2;
540 	b1 -= k3 + t2;
541 
542 	tmp = b3 ^ b2;
543 	b3 = (tmp >> 33) | (tmp << (64 - 33));
544 	b2 -= b3 + k4 + t0;
545 	b3 -= k0 + 17;
546 
547 	tmp = b3 ^ b0;
548 	b3 = (tmp >> 5) | (tmp << (64 - 5));
549 	b0 -= b3;
550 
551 	tmp = b1 ^ b2;
552 	b1 = (tmp >> 37) | (tmp << (64 - 37));
553 	b2 -= b1;
554 
555 	tmp = b1 ^ b0;
556 	b1 = (tmp >> 23) | (tmp << (64 - 23));
557 	b0 -= b1;
558 
559 	tmp = b3 ^ b2;
560 	b3 = (tmp >> 40) | (tmp << (64 - 40));
561 	b2 -= b3;
562 
563 	tmp = b3 ^ b0;
564 	b3 = (tmp >> 52) | (tmp << (64 - 52));
565 	b0 -= b3;
566 
567 	tmp = b1 ^ b2;
568 	b1 = (tmp >> 57) | (tmp << (64 - 57));
569 	b2 -= b1;
570 
571 	tmp = b1 ^ b0;
572 	b1 = (tmp >> 14) | (tmp << (64 - 14));
573 	b0 -= b1 + k1;
574 	b1 -= k2 + t1;
575 
576 	tmp = b3 ^ b2;
577 	b3 = (tmp >> 16) | (tmp << (64 - 16));
578 	b2 -= b3 + k3 + t2;
579 	b3 -= k4 + 16;
580 
581 
582 	tmp = b3 ^ b0;
583 	b3 = (tmp >> 32) | (tmp << (64 - 32));
584 	b0 -= b3;
585 
586 	tmp = b1 ^ b2;
587 	b1 = (tmp >> 32) | (tmp << (64 - 32));
588 	b2 -= b1;
589 
590 	tmp = b1 ^ b0;
591 	b1 = (tmp >> 58) | (tmp << (64 - 58));
592 	b0 -= b1;
593 
594 	tmp = b3 ^ b2;
595 	b3 = (tmp >> 22) | (tmp << (64 - 22));
596 	b2 -= b3;
597 
598 	tmp = b3 ^ b0;
599 	b3 = (tmp >> 46) | (tmp << (64 - 46));
600 	b0 -= b3;
601 
602 	tmp = b1 ^ b2;
603 	b1 = (tmp >> 12) | (tmp << (64 - 12));
604 	b2 -= b1;
605 
606 	tmp = b1 ^ b0;
607 	b1 = (tmp >> 25) | (tmp << (64 - 25));
608 	b0 -= b1 + k0;
609 	b1 -= k1 + t0;
610 
611 	tmp = b3 ^ b2;
612 	b3 = (tmp >> 33) | (tmp << (64 - 33));
613 	b2 -= b3 + k2 + t1;
614 	b3 -= k3 + 15;
615 
616 	tmp = b3 ^ b0;
617 	b3 = (tmp >> 5) | (tmp << (64 - 5));
618 	b0 -= b3;
619 
620 	tmp = b1 ^ b2;
621 	b1 = (tmp >> 37) | (tmp << (64 - 37));
622 	b2 -= b1;
623 
624 	tmp = b1 ^ b0;
625 	b1 = (tmp >> 23) | (tmp << (64 - 23));
626 	b0 -= b1;
627 
628 	tmp = b3 ^ b2;
629 	b3 = (tmp >> 40) | (tmp << (64 - 40));
630 	b2 -= b3;
631 
632 	tmp = b3 ^ b0;
633 	b3 = (tmp >> 52) | (tmp << (64 - 52));
634 	b0 -= b3;
635 
636 	tmp = b1 ^ b2;
637 	b1 = (tmp >> 57) | (tmp << (64 - 57));
638 	b2 -= b1;
639 
640 	tmp = b1 ^ b0;
641 	b1 = (tmp >> 14) | (tmp << (64 - 14));
642 	b0 -= b1 + k4;
643 	b1 -= k0 + t2;
644 
645 	tmp = b3 ^ b2;
646 	b3 = (tmp >> 16) | (tmp << (64 - 16));
647 	b2 -= b3 + k1 + t0;
648 	b3 -= k2 + 14;
649 
650 
651 	tmp = b3 ^ b0;
652 	b3 = (tmp >> 32) | (tmp << (64 - 32));
653 	b0 -= b3;
654 
655 	tmp = b1 ^ b2;
656 	b1 = (tmp >> 32) | (tmp << (64 - 32));
657 	b2 -= b1;
658 
659 	tmp = b1 ^ b0;
660 	b1 = (tmp >> 58) | (tmp << (64 - 58));
661 	b0 -= b1;
662 
663 	tmp = b3 ^ b2;
664 	b3 = (tmp >> 22) | (tmp << (64 - 22));
665 	b2 -= b3;
666 
667 	tmp = b3 ^ b0;
668 	b3 = (tmp >> 46) | (tmp << (64 - 46));
669 	b0 -= b3;
670 
671 	tmp = b1 ^ b2;
672 	b1 = (tmp >> 12) | (tmp << (64 - 12));
673 	b2 -= b1;
674 
675 	tmp = b1 ^ b0;
676 	b1 = (tmp >> 25) | (tmp << (64 - 25));
677 	b0 -= b1 + k3;
678 	b1 -= k4 + t1;
679 
680 	tmp = b3 ^ b2;
681 	b3 = (tmp >> 33) | (tmp << (64 - 33));
682 	b2 -= b3 + k0 + t2;
683 	b3 -= k1 + 13;
684 
685 	tmp = b3 ^ b0;
686 	b3 = (tmp >> 5) | (tmp << (64 - 5));
687 	b0 -= b3;
688 
689 	tmp = b1 ^ b2;
690 	b1 = (tmp >> 37) | (tmp << (64 - 37));
691 	b2 -= b1;
692 
693 	tmp = b1 ^ b0;
694 	b1 = (tmp >> 23) | (tmp << (64 - 23));
695 	b0 -= b1;
696 
697 	tmp = b3 ^ b2;
698 	b3 = (tmp >> 40) | (tmp << (64 - 40));
699 	b2 -= b3;
700 
701 	tmp = b3 ^ b0;
702 	b3 = (tmp >> 52) | (tmp << (64 - 52));
703 	b0 -= b3;
704 
705 	tmp = b1 ^ b2;
706 	b1 = (tmp >> 57) | (tmp << (64 - 57));
707 	b2 -= b1;
708 
709 	tmp = b1 ^ b0;
710 	b1 = (tmp >> 14) | (tmp << (64 - 14));
711 	b0 -= b1 + k2;
712 	b1 -= k3 + t0;
713 
714 	tmp = b3 ^ b2;
715 	b3 = (tmp >> 16) | (tmp << (64 - 16));
716 	b2 -= b3 + k4 + t1;
717 	b3 -= k0 + 12;
718 
719 
720 	tmp = b3 ^ b0;
721 	b3 = (tmp >> 32) | (tmp << (64 - 32));
722 	b0 -= b3;
723 
724 	tmp = b1 ^ b2;
725 	b1 = (tmp >> 32) | (tmp << (64 - 32));
726 	b2 -= b1;
727 
728 	tmp = b1 ^ b0;
729 	b1 = (tmp >> 58) | (tmp << (64 - 58));
730 	b0 -= b1;
731 
732 	tmp = b3 ^ b2;
733 	b3 = (tmp >> 22) | (tmp << (64 - 22));
734 	b2 -= b3;
735 
736 	tmp = b3 ^ b0;
737 	b3 = (tmp >> 46) | (tmp << (64 - 46));
738 	b0 -= b3;
739 
740 	tmp = b1 ^ b2;
741 	b1 = (tmp >> 12) | (tmp << (64 - 12));
742 	b2 -= b1;
743 
744 	tmp = b1 ^ b0;
745 	b1 = (tmp >> 25) | (tmp << (64 - 25));
746 	b0 -= b1 + k1;
747 	b1 -= k2 + t2;
748 
749 	tmp = b3 ^ b2;
750 	b3 = (tmp >> 33) | (tmp << (64 - 33));
751 	b2 -= b3 + k3 + t0;
752 	b3 -= k4 + 11;
753 
754 	tmp = b3 ^ b0;
755 	b3 = (tmp >> 5) | (tmp << (64 - 5));
756 	b0 -= b3;
757 
758 	tmp = b1 ^ b2;
759 	b1 = (tmp >> 37) | (tmp << (64 - 37));
760 	b2 -= b1;
761 
762 	tmp = b1 ^ b0;
763 	b1 = (tmp >> 23) | (tmp << (64 - 23));
764 	b0 -= b1;
765 
766 	tmp = b3 ^ b2;
767 	b3 = (tmp >> 40) | (tmp << (64 - 40));
768 	b2 -= b3;
769 
770 	tmp = b3 ^ b0;
771 	b3 = (tmp >> 52) | (tmp << (64 - 52));
772 	b0 -= b3;
773 
774 	tmp = b1 ^ b2;
775 	b1 = (tmp >> 57) | (tmp << (64 - 57));
776 	b2 -= b1;
777 
778 	tmp = b1 ^ b0;
779 	b1 = (tmp >> 14) | (tmp << (64 - 14));
780 	b0 -= b1 + k0;
781 	b1 -= k1 + t1;
782 
783 	tmp = b3 ^ b2;
784 	b3 = (tmp >> 16) | (tmp << (64 - 16));
785 	b2 -= b3 + k2 + t2;
786 	b3 -= k3 + 10;
787 
788 
789 	tmp = b3 ^ b0;
790 	b3 = (tmp >> 32) | (tmp << (64 - 32));
791 	b0 -= b3;
792 
793 	tmp = b1 ^ b2;
794 	b1 = (tmp >> 32) | (tmp << (64 - 32));
795 	b2 -= b1;
796 
797 	tmp = b1 ^ b0;
798 	b1 = (tmp >> 58) | (tmp << (64 - 58));
799 	b0 -= b1;
800 
801 	tmp = b3 ^ b2;
802 	b3 = (tmp >> 22) | (tmp << (64 - 22));
803 	b2 -= b3;
804 
805 	tmp = b3 ^ b0;
806 	b3 = (tmp >> 46) | (tmp << (64 - 46));
807 	b0 -= b3;
808 
809 	tmp = b1 ^ b2;
810 	b1 = (tmp >> 12) | (tmp << (64 - 12));
811 	b2 -= b1;
812 
813 	tmp = b1 ^ b0;
814 	b1 = (tmp >> 25) | (tmp << (64 - 25));
815 	b0 -= b1 + k4;
816 	b1 -= k0 + t0;
817 
818 	tmp = b3 ^ b2;
819 	b3 = (tmp >> 33) | (tmp << (64 - 33));
820 	b2 -= b3 + k1 + t1;
821 	b3 -= k2 + 9;
822 
823 	tmp = b3 ^ b0;
824 	b3 = (tmp >> 5) | (tmp << (64 - 5));
825 	b0 -= b3;
826 
827 	tmp = b1 ^ b2;
828 	b1 = (tmp >> 37) | (tmp << (64 - 37));
829 	b2 -= b1;
830 
831 	tmp = b1 ^ b0;
832 	b1 = (tmp >> 23) | (tmp << (64 - 23));
833 	b0 -= b1;
834 
835 	tmp = b3 ^ b2;
836 	b3 = (tmp >> 40) | (tmp << (64 - 40));
837 	b2 -= b3;
838 
839 	tmp = b3 ^ b0;
840 	b3 = (tmp >> 52) | (tmp << (64 - 52));
841 	b0 -= b3;
842 
843 	tmp = b1 ^ b2;
844 	b1 = (tmp >> 57) | (tmp << (64 - 57));
845 	b2 -= b1;
846 
847 	tmp = b1 ^ b0;
848 	b1 = (tmp >> 14) | (tmp << (64 - 14));
849 	b0 -= b1 + k3;
850 	b1 -= k4 + t2;
851 
852 	tmp = b3 ^ b2;
853 	b3 = (tmp >> 16) | (tmp << (64 - 16));
854 	b2 -= b3 + k0 + t0;
855 	b3 -= k1 + 8;
856 
857 
858 	tmp = b3 ^ b0;
859 	b3 = (tmp >> 32) | (tmp << (64 - 32));
860 	b0 -= b3;
861 
862 	tmp = b1 ^ b2;
863 	b1 = (tmp >> 32) | (tmp << (64 - 32));
864 	b2 -= b1;
865 
866 	tmp = b1 ^ b0;
867 	b1 = (tmp >> 58) | (tmp << (64 - 58));
868 	b0 -= b1;
869 
870 	tmp = b3 ^ b2;
871 	b3 = (tmp >> 22) | (tmp << (64 - 22));
872 	b2 -= b3;
873 
874 	tmp = b3 ^ b0;
875 	b3 = (tmp >> 46) | (tmp << (64 - 46));
876 	b0 -= b3;
877 
878 	tmp = b1 ^ b2;
879 	b1 = (tmp >> 12) | (tmp << (64 - 12));
880 	b2 -= b1;
881 
882 	tmp = b1 ^ b0;
883 	b1 = (tmp >> 25) | (tmp << (64 - 25));
884 	b0 -= b1 + k2;
885 	b1 -= k3 + t1;
886 
887 	tmp = b3 ^ b2;
888 	b3 = (tmp >> 33) | (tmp << (64 - 33));
889 	b2 -= b3 + k4 + t2;
890 	b3 -= k0 + 7;
891 
892 	tmp = b3 ^ b0;
893 	b3 = (tmp >> 5) | (tmp << (64 - 5));
894 	b0 -= b3;
895 
896 	tmp = b1 ^ b2;
897 	b1 = (tmp >> 37) | (tmp << (64 - 37));
898 	b2 -= b1;
899 
900 	tmp = b1 ^ b0;
901 	b1 = (tmp >> 23) | (tmp << (64 - 23));
902 	b0 -= b1;
903 
904 	tmp = b3 ^ b2;
905 	b3 = (tmp >> 40) | (tmp << (64 - 40));
906 	b2 -= b3;
907 
908 	tmp = b3 ^ b0;
909 	b3 = (tmp >> 52) | (tmp << (64 - 52));
910 	b0 -= b3;
911 
912 	tmp = b1 ^ b2;
913 	b1 = (tmp >> 57) | (tmp << (64 - 57));
914 	b2 -= b1;
915 
916 	tmp = b1 ^ b0;
917 	b1 = (tmp >> 14) | (tmp << (64 - 14));
918 	b0 -= b1 + k1;
919 	b1 -= k2 + t0;
920 
921 	tmp = b3 ^ b2;
922 	b3 = (tmp >> 16) | (tmp << (64 - 16));
923 	b2 -= b3 + k3 + t1;
924 	b3 -= k4 + 6;
925 
926 
927 	tmp = b3 ^ b0;
928 	b3 = (tmp >> 32) | (tmp << (64 - 32));
929 	b0 -= b3;
930 
931 	tmp = b1 ^ b2;
932 	b1 = (tmp >> 32) | (tmp << (64 - 32));
933 	b2 -= b1;
934 
935 	tmp = b1 ^ b0;
936 	b1 = (tmp >> 58) | (tmp << (64 - 58));
937 	b0 -= b1;
938 
939 	tmp = b3 ^ b2;
940 	b3 = (tmp >> 22) | (tmp << (64 - 22));
941 	b2 -= b3;
942 
943 	tmp = b3 ^ b0;
944 	b3 = (tmp >> 46) | (tmp << (64 - 46));
945 	b0 -= b3;
946 
947 	tmp = b1 ^ b2;
948 	b1 = (tmp >> 12) | (tmp << (64 - 12));
949 	b2 -= b1;
950 
951 	tmp = b1 ^ b0;
952 	b1 = (tmp >> 25) | (tmp << (64 - 25));
953 	b0 -= b1 + k0;
954 	b1 -= k1 + t2;
955 
956 	tmp = b3 ^ b2;
957 	b3 = (tmp >> 33) | (tmp << (64 - 33));
958 	b2 -= b3 + k2 + t0;
959 	b3 -= k3 + 5;
960 
961 	tmp = b3 ^ b0;
962 	b3 = (tmp >> 5) | (tmp << (64 - 5));
963 	b0 -= b3;
964 
965 	tmp = b1 ^ b2;
966 	b1 = (tmp >> 37) | (tmp << (64 - 37));
967 	b2 -= b1;
968 
969 	tmp = b1 ^ b0;
970 	b1 = (tmp >> 23) | (tmp << (64 - 23));
971 	b0 -= b1;
972 
973 	tmp = b3 ^ b2;
974 	b3 = (tmp >> 40) | (tmp << (64 - 40));
975 	b2 -= b3;
976 
977 	tmp = b3 ^ b0;
978 	b3 = (tmp >> 52) | (tmp << (64 - 52));
979 	b0 -= b3;
980 
981 	tmp = b1 ^ b2;
982 	b1 = (tmp >> 57) | (tmp << (64 - 57));
983 	b2 -= b1;
984 
985 	tmp = b1 ^ b0;
986 	b1 = (tmp >> 14) | (tmp << (64 - 14));
987 	b0 -= b1 + k4;
988 	b1 -= k0 + t1;
989 
990 	tmp = b3 ^ b2;
991 	b3 = (tmp >> 16) | (tmp << (64 - 16));
992 	b2 -= b3 + k1 + t2;
993 	b3 -= k2 + 4;
994 
995 
996 	tmp = b3 ^ b0;
997 	b3 = (tmp >> 32) | (tmp << (64 - 32));
998 	b0 -= b3;
999 
1000 	tmp = b1 ^ b2;
1001 	b1 = (tmp >> 32) | (tmp << (64 - 32));
1002 	b2 -= b1;
1003 
1004 	tmp = b1 ^ b0;
1005 	b1 = (tmp >> 58) | (tmp << (64 - 58));
1006 	b0 -= b1;
1007 
1008 	tmp = b3 ^ b2;
1009 	b3 = (tmp >> 22) | (tmp << (64 - 22));
1010 	b2 -= b3;
1011 
1012 	tmp = b3 ^ b0;
1013 	b3 = (tmp >> 46) | (tmp << (64 - 46));
1014 	b0 -= b3;
1015 
1016 	tmp = b1 ^ b2;
1017 	b1 = (tmp >> 12) | (tmp << (64 - 12));
1018 	b2 -= b1;
1019 
1020 	tmp = b1 ^ b0;
1021 	b1 = (tmp >> 25) | (tmp << (64 - 25));
1022 	b0 -= b1 + k3;
1023 	b1 -= k4 + t0;
1024 
1025 	tmp = b3 ^ b2;
1026 	b3 = (tmp >> 33) | (tmp << (64 - 33));
1027 	b2 -= b3 + k0 + t1;
1028 	b3 -= k1 + 3;
1029 
1030 	tmp = b3 ^ b0;
1031 	b3 = (tmp >> 5) | (tmp << (64 - 5));
1032 	b0 -= b3;
1033 
1034 	tmp = b1 ^ b2;
1035 	b1 = (tmp >> 37) | (tmp << (64 - 37));
1036 	b2 -= b1;
1037 
1038 	tmp = b1 ^ b0;
1039 	b1 = (tmp >> 23) | (tmp << (64 - 23));
1040 	b0 -= b1;
1041 
1042 	tmp = b3 ^ b2;
1043 	b3 = (tmp >> 40) | (tmp << (64 - 40));
1044 	b2 -= b3;
1045 
1046 	tmp = b3 ^ b0;
1047 	b3 = (tmp >> 52) | (tmp << (64 - 52));
1048 	b0 -= b3;
1049 
1050 	tmp = b1 ^ b2;
1051 	b1 = (tmp >> 57) | (tmp << (64 - 57));
1052 	b2 -= b1;
1053 
1054 	tmp = b1 ^ b0;
1055 	b1 = (tmp >> 14) | (tmp << (64 - 14));
1056 	b0 -= b1 + k2;
1057 	b1 -= k3 + t2;
1058 
1059 	tmp = b3 ^ b2;
1060 	b3 = (tmp >> 16) | (tmp << (64 - 16));
1061 	b2 -= b3 + k4 + t0;
1062 	b3 -= k0 + 2;
1063 
1064 
1065 	tmp = b3 ^ b0;
1066 	b3 = (tmp >> 32) | (tmp << (64 - 32));
1067 	b0 -= b3;
1068 
1069 	tmp = b1 ^ b2;
1070 	b1 = (tmp >> 32) | (tmp << (64 - 32));
1071 	b2 -= b1;
1072 
1073 	tmp = b1 ^ b0;
1074 	b1 = (tmp >> 58) | (tmp << (64 - 58));
1075 	b0 -= b1;
1076 
1077 	tmp = b3 ^ b2;
1078 	b3 = (tmp >> 22) | (tmp << (64 - 22));
1079 	b2 -= b3;
1080 
1081 	tmp = b3 ^ b0;
1082 	b3 = (tmp >> 46) | (tmp << (64 - 46));
1083 	b0 -= b3;
1084 
1085 	tmp = b1 ^ b2;
1086 	b1 = (tmp >> 12) | (tmp << (64 - 12));
1087 	b2 -= b1;
1088 
1089 	tmp = b1 ^ b0;
1090 	b1 = (tmp >> 25) | (tmp << (64 - 25));
1091 	b0 -= b1 + k1;
1092 	b1 -= k2 + t1;
1093 
1094 	tmp = b3 ^ b2;
1095 	b3 = (tmp >> 33) | (tmp << (64 - 33));
1096 	b2 -= b3 + k3 + t2;
1097 	b3 -= k4 + 1;
1098 
1099 	tmp = b3 ^ b0;
1100 	b3 = (tmp >> 5) | (tmp << (64 - 5));
1101 	b0 -= b3;
1102 
1103 	tmp = b1 ^ b2;
1104 	b1 = (tmp >> 37) | (tmp << (64 - 37));
1105 	b2 -= b1;
1106 
1107 	tmp = b1 ^ b0;
1108 	b1 = (tmp >> 23) | (tmp << (64 - 23));
1109 	b0 -= b1;
1110 
1111 	tmp = b3 ^ b2;
1112 	b3 = (tmp >> 40) | (tmp << (64 - 40));
1113 	b2 -= b3;
1114 
1115 	tmp = b3 ^ b0;
1116 	b3 = (tmp >> 52) | (tmp << (64 - 52));
1117 	b0 -= b3;
1118 
1119 	tmp = b1 ^ b2;
1120 	b1 = (tmp >> 57) | (tmp << (64 - 57));
1121 	b2 -= b1;
1122 
1123 	tmp = b1 ^ b0;
1124 	b1 = (tmp >> 14) | (tmp << (64 - 14));
1125 	b0 -= b1 + k0;
1126 	b1 -= k1 + t0;
1127 
1128 	tmp = b3 ^ b2;
1129 	b3 = (tmp >> 16) | (tmp << (64 - 16));
1130 	b2 -= b3 + k2 + t1;
1131 	b3 -= k3;
1132 
1133 	output[0] = b0;
1134 	output[1] = b1;
1135 	output[2] = b2;
1136 	output[3] = b3;
1137 }
1138 
threefish_encrypt_512(struct threefish_key * key_ctx,u64 * input,u64 * output)1139 void threefish_encrypt_512(struct threefish_key *key_ctx, u64 *input,
1140 			   u64 *output)
1141 {
1142 	u64 b0 = input[0], b1 = input[1],
1143 	    b2 = input[2], b3 = input[3],
1144 	    b4 = input[4], b5 = input[5],
1145 	    b6 = input[6], b7 = input[7];
1146 	u64 k0 = key_ctx->key[0], k1 = key_ctx->key[1],
1147 	    k2 = key_ctx->key[2], k3 = key_ctx->key[3],
1148 	    k4 = key_ctx->key[4], k5 = key_ctx->key[5],
1149 	    k6 = key_ctx->key[6], k7 = key_ctx->key[7],
1150 	    k8 = key_ctx->key[8];
1151 	u64 t0 = key_ctx->tweak[0], t1 = key_ctx->tweak[1],
1152 	    t2 = key_ctx->tweak[2];
1153 
1154 	b1 += k1;
1155 	b0 += b1 + k0;
1156 	b1 = ((b1 << 46) | (b1 >> (64 - 46))) ^ b0;
1157 
1158 	b3 += k3;
1159 	b2 += b3 + k2;
1160 	b3 = ((b3 << 36) | (b3 >> (64 - 36))) ^ b2;
1161 
1162 	b5 += k5 + t0;
1163 	b4 += b5 + k4;
1164 	b5 = ((b5 << 19) | (b5 >> (64 - 19))) ^ b4;
1165 
1166 	b7 += k7;
1167 	b6 += b7 + k6 + t1;
1168 	b7 = ((b7 << 37) | (b7 >> (64 - 37))) ^ b6;
1169 
1170 	b2 += b1;
1171 	b1 = ((b1 << 33) | (b1 >> (64 - 33))) ^ b2;
1172 
1173 	b4 += b7;
1174 	b7 = ((b7 << 27) | (b7 >> (64 - 27))) ^ b4;
1175 
1176 	b6 += b5;
1177 	b5 = ((b5 << 14) | (b5 >> (64 - 14))) ^ b6;
1178 
1179 	b0 += b3;
1180 	b3 = ((b3 << 42) | (b3 >> (64 - 42))) ^ b0;
1181 
1182 	b4 += b1;
1183 	b1 = ((b1 << 17) | (b1 >> (64 - 17))) ^ b4;
1184 
1185 	b6 += b3;
1186 	b3 = ((b3 << 49) | (b3 >> (64 - 49))) ^ b6;
1187 
1188 	b0 += b5;
1189 	b5 = ((b5 << 36) | (b5 >> (64 - 36))) ^ b0;
1190 
1191 	b2 += b7;
1192 	b7 = ((b7 << 39) | (b7 >> (64 - 39))) ^ b2;
1193 
1194 	b6 += b1;
1195 	b1 = ((b1 << 44) | (b1 >> (64 - 44))) ^ b6;
1196 
1197 	b0 += b7;
1198 	b7 = ((b7 << 9) | (b7 >> (64 - 9))) ^ b0;
1199 
1200 	b2 += b5;
1201 	b5 = ((b5 << 54) | (b5 >> (64 - 54))) ^ b2;
1202 
1203 	b4 += b3;
1204 	b3 = ((b3 << 56) | (b3 >> (64 - 56))) ^ b4;
1205 
1206 	b1 += k2;
1207 	b0 += b1 + k1;
1208 	b1 = ((b1 << 39) | (b1 >> (64 - 39))) ^ b0;
1209 
1210 	b3 += k4;
1211 	b2 += b3 + k3;
1212 	b3 = ((b3 << 30) | (b3 >> (64 - 30))) ^ b2;
1213 
1214 	b5 += k6 + t1;
1215 	b4 += b5 + k5;
1216 	b5 = ((b5 << 34) | (b5 >> (64 - 34))) ^ b4;
1217 
1218 	b7 += k8 + 1;
1219 	b6 += b7 + k7 + t2;
1220 	b7 = ((b7 << 24) | (b7 >> (64 - 24))) ^ b6;
1221 
1222 	b2 += b1;
1223 	b1 = ((b1 << 13) | (b1 >> (64 - 13))) ^ b2;
1224 
1225 	b4 += b7;
1226 	b7 = ((b7 << 50) | (b7 >> (64 - 50))) ^ b4;
1227 
1228 	b6 += b5;
1229 	b5 = ((b5 << 10) | (b5 >> (64 - 10))) ^ b6;
1230 
1231 	b0 += b3;
1232 	b3 = ((b3 << 17) | (b3 >> (64 - 17))) ^ b0;
1233 
1234 	b4 += b1;
1235 	b1 = ((b1 << 25) | (b1 >> (64 - 25))) ^ b4;
1236 
1237 	b6 += b3;
1238 	b3 = ((b3 << 29) | (b3 >> (64 - 29))) ^ b6;
1239 
1240 	b0 += b5;
1241 	b5 = ((b5 << 39) | (b5 >> (64 - 39))) ^ b0;
1242 
1243 	b2 += b7;
1244 	b7 = ((b7 << 43) | (b7 >> (64 - 43))) ^ b2;
1245 
1246 	b6 += b1;
1247 	b1 = ((b1 << 8) | (b1 >> (64 - 8))) ^ b6;
1248 
1249 	b0 += b7;
1250 	b7 = ((b7 << 35) | (b7 >> (64 - 35))) ^ b0;
1251 
1252 	b2 += b5;
1253 	b5 = ((b5 << 56) | (b5 >> (64 - 56))) ^ b2;
1254 
1255 	b4 += b3;
1256 	b3 = ((b3 << 22) | (b3 >> (64 - 22))) ^ b4;
1257 
1258 	b1 += k3;
1259 	b0 += b1 + k2;
1260 	b1 = ((b1 << 46) | (b1 >> (64 - 46))) ^ b0;
1261 
1262 	b3 += k5;
1263 	b2 += b3 + k4;
1264 	b3 = ((b3 << 36) | (b3 >> (64 - 36))) ^ b2;
1265 
1266 	b5 += k7 + t2;
1267 	b4 += b5 + k6;
1268 	b5 = ((b5 << 19) | (b5 >> (64 - 19))) ^ b4;
1269 
1270 	b7 += k0 + 2;
1271 	b6 += b7 + k8 + t0;
1272 	b7 = ((b7 << 37) | (b7 >> (64 - 37))) ^ b6;
1273 
1274 	b2 += b1;
1275 	b1 = ((b1 << 33) | (b1 >> (64 - 33))) ^ b2;
1276 
1277 	b4 += b7;
1278 	b7 = ((b7 << 27) | (b7 >> (64 - 27))) ^ b4;
1279 
1280 	b6 += b5;
1281 	b5 = ((b5 << 14) | (b5 >> (64 - 14))) ^ b6;
1282 
1283 	b0 += b3;
1284 	b3 = ((b3 << 42) | (b3 >> (64 - 42))) ^ b0;
1285 
1286 	b4 += b1;
1287 	b1 = ((b1 << 17) | (b1 >> (64 - 17))) ^ b4;
1288 
1289 	b6 += b3;
1290 	b3 = ((b3 << 49) | (b3 >> (64 - 49))) ^ b6;
1291 
1292 	b0 += b5;
1293 	b5 = ((b5 << 36) | (b5 >> (64 - 36))) ^ b0;
1294 
1295 	b2 += b7;
1296 	b7 = ((b7 << 39) | (b7 >> (64 - 39))) ^ b2;
1297 
1298 	b6 += b1;
1299 	b1 = ((b1 << 44) | (b1 >> (64 - 44))) ^ b6;
1300 
1301 	b0 += b7;
1302 	b7 = ((b7 << 9) | (b7 >> (64 - 9))) ^ b0;
1303 
1304 	b2 += b5;
1305 	b5 = ((b5 << 54) | (b5 >> (64 - 54))) ^ b2;
1306 
1307 	b4 += b3;
1308 	b3 = ((b3 << 56) | (b3 >> (64 - 56))) ^ b4;
1309 
1310 	b1 += k4;
1311 	b0 += b1 + k3;
1312 	b1 = ((b1 << 39) | (b1 >> (64 - 39))) ^ b0;
1313 
1314 	b3 += k6;
1315 	b2 += b3 + k5;
1316 	b3 = ((b3 << 30) | (b3 >> (64 - 30))) ^ b2;
1317 
1318 	b5 += k8 + t0;
1319 	b4 += b5 + k7;
1320 	b5 = ((b5 << 34) | (b5 >> (64 - 34))) ^ b4;
1321 
1322 	b7 += k1 + 3;
1323 	b6 += b7 + k0 + t1;
1324 	b7 = ((b7 << 24) | (b7 >> (64 - 24))) ^ b6;
1325 
1326 	b2 += b1;
1327 	b1 = ((b1 << 13) | (b1 >> (64 - 13))) ^ b2;
1328 
1329 	b4 += b7;
1330 	b7 = ((b7 << 50) | (b7 >> (64 - 50))) ^ b4;
1331 
1332 	b6 += b5;
1333 	b5 = ((b5 << 10) | (b5 >> (64 - 10))) ^ b6;
1334 
1335 	b0 += b3;
1336 	b3 = ((b3 << 17) | (b3 >> (64 - 17))) ^ b0;
1337 
1338 	b4 += b1;
1339 	b1 = ((b1 << 25) | (b1 >> (64 - 25))) ^ b4;
1340 
1341 	b6 += b3;
1342 	b3 = ((b3 << 29) | (b3 >> (64 - 29))) ^ b6;
1343 
1344 	b0 += b5;
1345 	b5 = ((b5 << 39) | (b5 >> (64 - 39))) ^ b0;
1346 
1347 	b2 += b7;
1348 	b7 = ((b7 << 43) | (b7 >> (64 - 43))) ^ b2;
1349 
1350 	b6 += b1;
1351 	b1 = ((b1 << 8) | (b1 >> (64 - 8))) ^ b6;
1352 
1353 	b0 += b7;
1354 	b7 = ((b7 << 35) | (b7 >> (64 - 35))) ^ b0;
1355 
1356 	b2 += b5;
1357 	b5 = ((b5 << 56) | (b5 >> (64 - 56))) ^ b2;
1358 
1359 	b4 += b3;
1360 	b3 = ((b3 << 22) | (b3 >> (64 - 22))) ^ b4;
1361 
1362 	b1 += k5;
1363 	b0 += b1 + k4;
1364 	b1 = ((b1 << 46) | (b1 >> (64 - 46))) ^ b0;
1365 
1366 	b3 += k7;
1367 	b2 += b3 + k6;
1368 	b3 = ((b3 << 36) | (b3 >> (64 - 36))) ^ b2;
1369 
1370 	b5 += k0 + t1;
1371 	b4 += b5 + k8;
1372 	b5 = ((b5 << 19) | (b5 >> (64 - 19))) ^ b4;
1373 
1374 	b7 += k2 + 4;
1375 	b6 += b7 + k1 + t2;
1376 	b7 = ((b7 << 37) | (b7 >> (64 - 37))) ^ b6;
1377 
1378 	b2 += b1;
1379 	b1 = ((b1 << 33) | (b1 >> (64 - 33))) ^ b2;
1380 
1381 	b4 += b7;
1382 	b7 = ((b7 << 27) | (b7 >> (64 - 27))) ^ b4;
1383 
1384 	b6 += b5;
1385 	b5 = ((b5 << 14) | (b5 >> (64 - 14))) ^ b6;
1386 
1387 	b0 += b3;
1388 	b3 = ((b3 << 42) | (b3 >> (64 - 42))) ^ b0;
1389 
1390 	b4 += b1;
1391 	b1 = ((b1 << 17) | (b1 >> (64 - 17))) ^ b4;
1392 
1393 	b6 += b3;
1394 	b3 = ((b3 << 49) | (b3 >> (64 - 49))) ^ b6;
1395 
1396 	b0 += b5;
1397 	b5 = ((b5 << 36) | (b5 >> (64 - 36))) ^ b0;
1398 
1399 	b2 += b7;
1400 	b7 = ((b7 << 39) | (b7 >> (64 - 39))) ^ b2;
1401 
1402 	b6 += b1;
1403 	b1 = ((b1 << 44) | (b1 >> (64 - 44))) ^ b6;
1404 
1405 	b0 += b7;
1406 	b7 = ((b7 << 9) | (b7 >> (64 - 9))) ^ b0;
1407 
1408 	b2 += b5;
1409 	b5 = ((b5 << 54) | (b5 >> (64 - 54))) ^ b2;
1410 
1411 	b4 += b3;
1412 	b3 = ((b3 << 56) | (b3 >> (64 - 56))) ^ b4;
1413 
1414 	b1 += k6;
1415 	b0 += b1 + k5;
1416 	b1 = ((b1 << 39) | (b1 >> (64 - 39))) ^ b0;
1417 
1418 	b3 += k8;
1419 	b2 += b3 + k7;
1420 	b3 = ((b3 << 30) | (b3 >> (64 - 30))) ^ b2;
1421 
1422 	b5 += k1 + t2;
1423 	b4 += b5 + k0;
1424 	b5 = ((b5 << 34) | (b5 >> (64 - 34))) ^ b4;
1425 
1426 	b7 += k3 + 5;
1427 	b6 += b7 + k2 + t0;
1428 	b7 = ((b7 << 24) | (b7 >> (64 - 24))) ^ b6;
1429 
1430 	b2 += b1;
1431 	b1 = ((b1 << 13) | (b1 >> (64 - 13))) ^ b2;
1432 
1433 	b4 += b7;
1434 	b7 = ((b7 << 50) | (b7 >> (64 - 50))) ^ b4;
1435 
1436 	b6 += b5;
1437 	b5 = ((b5 << 10) | (b5 >> (64 - 10))) ^ b6;
1438 
1439 	b0 += b3;
1440 	b3 = ((b3 << 17) | (b3 >> (64 - 17))) ^ b0;
1441 
1442 	b4 += b1;
1443 	b1 = ((b1 << 25) | (b1 >> (64 - 25))) ^ b4;
1444 
1445 	b6 += b3;
1446 	b3 = ((b3 << 29) | (b3 >> (64 - 29))) ^ b6;
1447 
1448 	b0 += b5;
1449 	b5 = ((b5 << 39) | (b5 >> (64 - 39))) ^ b0;
1450 
1451 	b2 += b7;
1452 	b7 = ((b7 << 43) | (b7 >> (64 - 43))) ^ b2;
1453 
1454 	b6 += b1;
1455 	b1 = ((b1 << 8) | (b1 >> (64 - 8))) ^ b6;
1456 
1457 	b0 += b7;
1458 	b7 = ((b7 << 35) | (b7 >> (64 - 35))) ^ b0;
1459 
1460 	b2 += b5;
1461 	b5 = ((b5 << 56) | (b5 >> (64 - 56))) ^ b2;
1462 
1463 	b4 += b3;
1464 	b3 = ((b3 << 22) | (b3 >> (64 - 22))) ^ b4;
1465 
1466 	b1 += k7;
1467 	b0 += b1 + k6;
1468 	b1 = ((b1 << 46) | (b1 >> (64 - 46))) ^ b0;
1469 
1470 	b3 += k0;
1471 	b2 += b3 + k8;
1472 	b3 = ((b3 << 36) | (b3 >> (64 - 36))) ^ b2;
1473 
1474 	b5 += k2 + t0;
1475 	b4 += b5 + k1;
1476 	b5 = ((b5 << 19) | (b5 >> (64 - 19))) ^ b4;
1477 
1478 	b7 += k4 + 6;
1479 	b6 += b7 + k3 + t1;
1480 	b7 = ((b7 << 37) | (b7 >> (64 - 37))) ^ b6;
1481 
1482 	b2 += b1;
1483 	b1 = ((b1 << 33) | (b1 >> (64 - 33))) ^ b2;
1484 
1485 	b4 += b7;
1486 	b7 = ((b7 << 27) | (b7 >> (64 - 27))) ^ b4;
1487 
1488 	b6 += b5;
1489 	b5 = ((b5 << 14) | (b5 >> (64 - 14))) ^ b6;
1490 
1491 	b0 += b3;
1492 	b3 = ((b3 << 42) | (b3 >> (64 - 42))) ^ b0;
1493 
1494 	b4 += b1;
1495 	b1 = ((b1 << 17) | (b1 >> (64 - 17))) ^ b4;
1496 
1497 	b6 += b3;
1498 	b3 = ((b3 << 49) | (b3 >> (64 - 49))) ^ b6;
1499 
1500 	b0 += b5;
1501 	b5 = ((b5 << 36) | (b5 >> (64 - 36))) ^ b0;
1502 
1503 	b2 += b7;
1504 	b7 = ((b7 << 39) | (b7 >> (64 - 39))) ^ b2;
1505 
1506 	b6 += b1;
1507 	b1 = ((b1 << 44) | (b1 >> (64 - 44))) ^ b6;
1508 
1509 	b0 += b7;
1510 	b7 = ((b7 << 9) | (b7 >> (64 - 9))) ^ b0;
1511 
1512 	b2 += b5;
1513 	b5 = ((b5 << 54) | (b5 >> (64 - 54))) ^ b2;
1514 
1515 	b4 += b3;
1516 	b3 = ((b3 << 56) | (b3 >> (64 - 56))) ^ b4;
1517 
1518 	b1 += k8;
1519 	b0 += b1 + k7;
1520 	b1 = ((b1 << 39) | (b1 >> (64 - 39))) ^ b0;
1521 
1522 	b3 += k1;
1523 	b2 += b3 + k0;
1524 	b3 = ((b3 << 30) | (b3 >> (64 - 30))) ^ b2;
1525 
1526 	b5 += k3 + t1;
1527 	b4 += b5 + k2;
1528 	b5 = ((b5 << 34) | (b5 >> (64 - 34))) ^ b4;
1529 
1530 	b7 += k5 + 7;
1531 	b6 += b7 + k4 + t2;
1532 	b7 = ((b7 << 24) | (b7 >> (64 - 24))) ^ b6;
1533 
1534 	b2 += b1;
1535 	b1 = ((b1 << 13) | (b1 >> (64 - 13))) ^ b2;
1536 
1537 	b4 += b7;
1538 	b7 = ((b7 << 50) | (b7 >> (64 - 50))) ^ b4;
1539 
1540 	b6 += b5;
1541 	b5 = ((b5 << 10) | (b5 >> (64 - 10))) ^ b6;
1542 
1543 	b0 += b3;
1544 	b3 = ((b3 << 17) | (b3 >> (64 - 17))) ^ b0;
1545 
1546 	b4 += b1;
1547 	b1 = ((b1 << 25) | (b1 >> (64 - 25))) ^ b4;
1548 
1549 	b6 += b3;
1550 	b3 = ((b3 << 29) | (b3 >> (64 - 29))) ^ b6;
1551 
1552 	b0 += b5;
1553 	b5 = ((b5 << 39) | (b5 >> (64 - 39))) ^ b0;
1554 
1555 	b2 += b7;
1556 	b7 = ((b7 << 43) | (b7 >> (64 - 43))) ^ b2;
1557 
1558 	b6 += b1;
1559 	b1 = ((b1 << 8) | (b1 >> (64 - 8))) ^ b6;
1560 
1561 	b0 += b7;
1562 	b7 = ((b7 << 35) | (b7 >> (64 - 35))) ^ b0;
1563 
1564 	b2 += b5;
1565 	b5 = ((b5 << 56) | (b5 >> (64 - 56))) ^ b2;
1566 
1567 	b4 += b3;
1568 	b3 = ((b3 << 22) | (b3 >> (64 - 22))) ^ b4;
1569 
1570 	b1 += k0;
1571 	b0 += b1 + k8;
1572 	b1 = ((b1 << 46) | (b1 >> (64 - 46))) ^ b0;
1573 
1574 	b3 += k2;
1575 	b2 += b3 + k1;
1576 	b3 = ((b3 << 36) | (b3 >> (64 - 36))) ^ b2;
1577 
1578 	b5 += k4 + t2;
1579 	b4 += b5 + k3;
1580 	b5 = ((b5 << 19) | (b5 >> (64 - 19))) ^ b4;
1581 
1582 	b7 += k6 + 8;
1583 	b6 += b7 + k5 + t0;
1584 	b7 = ((b7 << 37) | (b7 >> (64 - 37))) ^ b6;
1585 
1586 	b2 += b1;
1587 	b1 = ((b1 << 33) | (b1 >> (64 - 33))) ^ b2;
1588 
1589 	b4 += b7;
1590 	b7 = ((b7 << 27) | (b7 >> (64 - 27))) ^ b4;
1591 
1592 	b6 += b5;
1593 	b5 = ((b5 << 14) | (b5 >> (64 - 14))) ^ b6;
1594 
1595 	b0 += b3;
1596 	b3 = ((b3 << 42) | (b3 >> (64 - 42))) ^ b0;
1597 
1598 	b4 += b1;
1599 	b1 = ((b1 << 17) | (b1 >> (64 - 17))) ^ b4;
1600 
1601 	b6 += b3;
1602 	b3 = ((b3 << 49) | (b3 >> (64 - 49))) ^ b6;
1603 
1604 	b0 += b5;
1605 	b5 = ((b5 << 36) | (b5 >> (64 - 36))) ^ b0;
1606 
1607 	b2 += b7;
1608 	b7 = ((b7 << 39) | (b7 >> (64 - 39))) ^ b2;
1609 
1610 	b6 += b1;
1611 	b1 = ((b1 << 44) | (b1 >> (64 - 44))) ^ b6;
1612 
1613 	b0 += b7;
1614 	b7 = ((b7 << 9) | (b7 >> (64 - 9))) ^ b0;
1615 
1616 	b2 += b5;
1617 	b5 = ((b5 << 54) | (b5 >> (64 - 54))) ^ b2;
1618 
1619 	b4 += b3;
1620 	b3 = ((b3 << 56) | (b3 >> (64 - 56))) ^ b4;
1621 
1622 	b1 += k1;
1623 	b0 += b1 + k0;
1624 	b1 = ((b1 << 39) | (b1 >> (64 - 39))) ^ b0;
1625 
1626 	b3 += k3;
1627 	b2 += b3 + k2;
1628 	b3 = ((b3 << 30) | (b3 >> (64 - 30))) ^ b2;
1629 
1630 	b5 += k5 + t0;
1631 	b4 += b5 + k4;
1632 	b5 = ((b5 << 34) | (b5 >> (64 - 34))) ^ b4;
1633 
1634 	b7 += k7 + 9;
1635 	b6 += b7 + k6 + t1;
1636 	b7 = ((b7 << 24) | (b7 >> (64 - 24))) ^ b6;
1637 
1638 	b2 += b1;
1639 	b1 = ((b1 << 13) | (b1 >> (64 - 13))) ^ b2;
1640 
1641 	b4 += b7;
1642 	b7 = ((b7 << 50) | (b7 >> (64 - 50))) ^ b4;
1643 
1644 	b6 += b5;
1645 	b5 = ((b5 << 10) | (b5 >> (64 - 10))) ^ b6;
1646 
1647 	b0 += b3;
1648 	b3 = ((b3 << 17) | (b3 >> (64 - 17))) ^ b0;
1649 
1650 	b4 += b1;
1651 	b1 = ((b1 << 25) | (b1 >> (64 - 25))) ^ b4;
1652 
1653 	b6 += b3;
1654 	b3 = ((b3 << 29) | (b3 >> (64 - 29))) ^ b6;
1655 
1656 	b0 += b5;
1657 	b5 = ((b5 << 39) | (b5 >> (64 - 39))) ^ b0;
1658 
1659 	b2 += b7;
1660 	b7 = ((b7 << 43) | (b7 >> (64 - 43))) ^ b2;
1661 
1662 	b6 += b1;
1663 	b1 = ((b1 << 8) | (b1 >> (64 - 8))) ^ b6;
1664 
1665 	b0 += b7;
1666 	b7 = ((b7 << 35) | (b7 >> (64 - 35))) ^ b0;
1667 
1668 	b2 += b5;
1669 	b5 = ((b5 << 56) | (b5 >> (64 - 56))) ^ b2;
1670 
1671 	b4 += b3;
1672 	b3 = ((b3 << 22) | (b3 >> (64 - 22))) ^ b4;
1673 
1674 	b1 += k2;
1675 	b0 += b1 + k1;
1676 	b1 = ((b1 << 46) | (b1 >> (64 - 46))) ^ b0;
1677 
1678 	b3 += k4;
1679 	b2 += b3 + k3;
1680 	b3 = ((b3 << 36) | (b3 >> (64 - 36))) ^ b2;
1681 
1682 	b5 += k6 + t1;
1683 	b4 += b5 + k5;
1684 	b5 = ((b5 << 19) | (b5 >> (64 - 19))) ^ b4;
1685 
1686 	b7 += k8 + 10;
1687 	b6 += b7 + k7 + t2;
1688 	b7 = ((b7 << 37) | (b7 >> (64 - 37))) ^ b6;
1689 
1690 	b2 += b1;
1691 	b1 = ((b1 << 33) | (b1 >> (64 - 33))) ^ b2;
1692 
1693 	b4 += b7;
1694 	b7 = ((b7 << 27) | (b7 >> (64 - 27))) ^ b4;
1695 
1696 	b6 += b5;
1697 	b5 = ((b5 << 14) | (b5 >> (64 - 14))) ^ b6;
1698 
1699 	b0 += b3;
1700 	b3 = ((b3 << 42) | (b3 >> (64 - 42))) ^ b0;
1701 
1702 	b4 += b1;
1703 	b1 = ((b1 << 17) | (b1 >> (64 - 17))) ^ b4;
1704 
1705 	b6 += b3;
1706 	b3 = ((b3 << 49) | (b3 >> (64 - 49))) ^ b6;
1707 
1708 	b0 += b5;
1709 	b5 = ((b5 << 36) | (b5 >> (64 - 36))) ^ b0;
1710 
1711 	b2 += b7;
1712 	b7 = ((b7 << 39) | (b7 >> (64 - 39))) ^ b2;
1713 
1714 	b6 += b1;
1715 	b1 = ((b1 << 44) | (b1 >> (64 - 44))) ^ b6;
1716 
1717 	b0 += b7;
1718 	b7 = ((b7 << 9) | (b7 >> (64 - 9))) ^ b0;
1719 
1720 	b2 += b5;
1721 	b5 = ((b5 << 54) | (b5 >> (64 - 54))) ^ b2;
1722 
1723 	b4 += b3;
1724 	b3 = ((b3 << 56) | (b3 >> (64 - 56))) ^ b4;
1725 
1726 	b1 += k3;
1727 	b0 += b1 + k2;
1728 	b1 = ((b1 << 39) | (b1 >> (64 - 39))) ^ b0;
1729 
1730 	b3 += k5;
1731 	b2 += b3 + k4;
1732 	b3 = ((b3 << 30) | (b3 >> (64 - 30))) ^ b2;
1733 
1734 	b5 += k7 + t2;
1735 	b4 += b5 + k6;
1736 	b5 = ((b5 << 34) | (b5 >> (64 - 34))) ^ b4;
1737 
1738 	b7 += k0 + 11;
1739 	b6 += b7 + k8 + t0;
1740 	b7 = ((b7 << 24) | (b7 >> (64 - 24))) ^ b6;
1741 
1742 	b2 += b1;
1743 	b1 = ((b1 << 13) | (b1 >> (64 - 13))) ^ b2;
1744 
1745 	b4 += b7;
1746 	b7 = ((b7 << 50) | (b7 >> (64 - 50))) ^ b4;
1747 
1748 	b6 += b5;
1749 	b5 = ((b5 << 10) | (b5 >> (64 - 10))) ^ b6;
1750 
1751 	b0 += b3;
1752 	b3 = ((b3 << 17) | (b3 >> (64 - 17))) ^ b0;
1753 
1754 	b4 += b1;
1755 	b1 = ((b1 << 25) | (b1 >> (64 - 25))) ^ b4;
1756 
1757 	b6 += b3;
1758 	b3 = ((b3 << 29) | (b3 >> (64 - 29))) ^ b6;
1759 
1760 	b0 += b5;
1761 	b5 = ((b5 << 39) | (b5 >> (64 - 39))) ^ b0;
1762 
1763 	b2 += b7;
1764 	b7 = ((b7 << 43) | (b7 >> (64 - 43))) ^ b2;
1765 
1766 	b6 += b1;
1767 	b1 = ((b1 << 8) | (b1 >> (64 - 8))) ^ b6;
1768 
1769 	b0 += b7;
1770 	b7 = ((b7 << 35) | (b7 >> (64 - 35))) ^ b0;
1771 
1772 	b2 += b5;
1773 	b5 = ((b5 << 56) | (b5 >> (64 - 56))) ^ b2;
1774 
1775 	b4 += b3;
1776 	b3 = ((b3 << 22) | (b3 >> (64 - 22))) ^ b4;
1777 
1778 	b1 += k4;
1779 	b0 += b1 + k3;
1780 	b1 = ((b1 << 46) | (b1 >> (64 - 46))) ^ b0;
1781 
1782 	b3 += k6;
1783 	b2 += b3 + k5;
1784 	b3 = ((b3 << 36) | (b3 >> (64 - 36))) ^ b2;
1785 
1786 	b5 += k8 + t0;
1787 	b4 += b5 + k7;
1788 	b5 = ((b5 << 19) | (b5 >> (64 - 19))) ^ b4;
1789 
1790 	b7 += k1 + 12;
1791 	b6 += b7 + k0 + t1;
1792 	b7 = ((b7 << 37) | (b7 >> (64 - 37))) ^ b6;
1793 
1794 	b2 += b1;
1795 	b1 = ((b1 << 33) | (b1 >> (64 - 33))) ^ b2;
1796 
1797 	b4 += b7;
1798 	b7 = ((b7 << 27) | (b7 >> (64 - 27))) ^ b4;
1799 
1800 	b6 += b5;
1801 	b5 = ((b5 << 14) | (b5 >> (64 - 14))) ^ b6;
1802 
1803 	b0 += b3;
1804 	b3 = ((b3 << 42) | (b3 >> (64 - 42))) ^ b0;
1805 
1806 	b4 += b1;
1807 	b1 = ((b1 << 17) | (b1 >> (64 - 17))) ^ b4;
1808 
1809 	b6 += b3;
1810 	b3 = ((b3 << 49) | (b3 >> (64 - 49))) ^ b6;
1811 
1812 	b0 += b5;
1813 	b5 = ((b5 << 36) | (b5 >> (64 - 36))) ^ b0;
1814 
1815 	b2 += b7;
1816 	b7 = ((b7 << 39) | (b7 >> (64 - 39))) ^ b2;
1817 
1818 	b6 += b1;
1819 	b1 = ((b1 << 44) | (b1 >> (64 - 44))) ^ b6;
1820 
1821 	b0 += b7;
1822 	b7 = ((b7 << 9) | (b7 >> (64 - 9))) ^ b0;
1823 
1824 	b2 += b5;
1825 	b5 = ((b5 << 54) | (b5 >> (64 - 54))) ^ b2;
1826 
1827 	b4 += b3;
1828 	b3 = ((b3 << 56) | (b3 >> (64 - 56))) ^ b4;
1829 
1830 	b1 += k5;
1831 	b0 += b1 + k4;
1832 	b1 = ((b1 << 39) | (b1 >> (64 - 39))) ^ b0;
1833 
1834 	b3 += k7;
1835 	b2 += b3 + k6;
1836 	b3 = ((b3 << 30) | (b3 >> (64 - 30))) ^ b2;
1837 
1838 	b5 += k0 + t1;
1839 	b4 += b5 + k8;
1840 	b5 = ((b5 << 34) | (b5 >> (64 - 34))) ^ b4;
1841 
1842 	b7 += k2 + 13;
1843 	b6 += b7 + k1 + t2;
1844 	b7 = ((b7 << 24) | (b7 >> (64 - 24))) ^ b6;
1845 
1846 	b2 += b1;
1847 	b1 = ((b1 << 13) | (b1 >> (64 - 13))) ^ b2;
1848 
1849 	b4 += b7;
1850 	b7 = ((b7 << 50) | (b7 >> (64 - 50))) ^ b4;
1851 
1852 	b6 += b5;
1853 	b5 = ((b5 << 10) | (b5 >> (64 - 10))) ^ b6;
1854 
1855 	b0 += b3;
1856 	b3 = ((b3 << 17) | (b3 >> (64 - 17))) ^ b0;
1857 
1858 	b4 += b1;
1859 	b1 = ((b1 << 25) | (b1 >> (64 - 25))) ^ b4;
1860 
1861 	b6 += b3;
1862 	b3 = ((b3 << 29) | (b3 >> (64 - 29))) ^ b6;
1863 
1864 	b0 += b5;
1865 	b5 = ((b5 << 39) | (b5 >> (64 - 39))) ^ b0;
1866 
1867 	b2 += b7;
1868 	b7 = ((b7 << 43) | (b7 >> (64 - 43))) ^ b2;
1869 
1870 	b6 += b1;
1871 	b1 = ((b1 << 8) | (b1 >> (64 - 8))) ^ b6;
1872 
1873 	b0 += b7;
1874 	b7 = ((b7 << 35) | (b7 >> (64 - 35))) ^ b0;
1875 
1876 	b2 += b5;
1877 	b5 = ((b5 << 56) | (b5 >> (64 - 56))) ^ b2;
1878 
1879 	b4 += b3;
1880 	b3 = ((b3 << 22) | (b3 >> (64 - 22))) ^ b4;
1881 
1882 	b1 += k6;
1883 	b0 += b1 + k5;
1884 	b1 = ((b1 << 46) | (b1 >> (64 - 46))) ^ b0;
1885 
1886 	b3 += k8;
1887 	b2 += b3 + k7;
1888 	b3 = ((b3 << 36) | (b3 >> (64 - 36))) ^ b2;
1889 
1890 	b5 += k1 + t2;
1891 	b4 += b5 + k0;
1892 	b5 = ((b5 << 19) | (b5 >> (64 - 19))) ^ b4;
1893 
1894 	b7 += k3 + 14;
1895 	b6 += b7 + k2 + t0;
1896 	b7 = ((b7 << 37) | (b7 >> (64 - 37))) ^ b6;
1897 
1898 	b2 += b1;
1899 	b1 = ((b1 << 33) | (b1 >> (64 - 33))) ^ b2;
1900 
1901 	b4 += b7;
1902 	b7 = ((b7 << 27) | (b7 >> (64 - 27))) ^ b4;
1903 
1904 	b6 += b5;
1905 	b5 = ((b5 << 14) | (b5 >> (64 - 14))) ^ b6;
1906 
1907 	b0 += b3;
1908 	b3 = ((b3 << 42) | (b3 >> (64 - 42))) ^ b0;
1909 
1910 	b4 += b1;
1911 	b1 = ((b1 << 17) | (b1 >> (64 - 17))) ^ b4;
1912 
1913 	b6 += b3;
1914 	b3 = ((b3 << 49) | (b3 >> (64 - 49))) ^ b6;
1915 
1916 	b0 += b5;
1917 	b5 = ((b5 << 36) | (b5 >> (64 - 36))) ^ b0;
1918 
1919 	b2 += b7;
1920 	b7 = ((b7 << 39) | (b7 >> (64 - 39))) ^ b2;
1921 
1922 	b6 += b1;
1923 	b1 = ((b1 << 44) | (b1 >> (64 - 44))) ^ b6;
1924 
1925 	b0 += b7;
1926 	b7 = ((b7 << 9) | (b7 >> (64 - 9))) ^ b0;
1927 
1928 	b2 += b5;
1929 	b5 = ((b5 << 54) | (b5 >> (64 - 54))) ^ b2;
1930 
1931 	b4 += b3;
1932 	b3 = ((b3 << 56) | (b3 >> (64 - 56))) ^ b4;
1933 
1934 	b1 += k7;
1935 	b0 += b1 + k6;
1936 	b1 = ((b1 << 39) | (b1 >> (64 - 39))) ^ b0;
1937 
1938 	b3 += k0;
1939 	b2 += b3 + k8;
1940 	b3 = ((b3 << 30) | (b3 >> (64 - 30))) ^ b2;
1941 
1942 	b5 += k2 + t0;
1943 	b4 += b5 + k1;
1944 	b5 = ((b5 << 34) | (b5 >> (64 - 34))) ^ b4;
1945 
1946 	b7 += k4 + 15;
1947 	b6 += b7 + k3 + t1;
1948 	b7 = ((b7 << 24) | (b7 >> (64 - 24))) ^ b6;
1949 
1950 	b2 += b1;
1951 	b1 = ((b1 << 13) | (b1 >> (64 - 13))) ^ b2;
1952 
1953 	b4 += b7;
1954 	b7 = ((b7 << 50) | (b7 >> (64 - 50))) ^ b4;
1955 
1956 	b6 += b5;
1957 	b5 = ((b5 << 10) | (b5 >> (64 - 10))) ^ b6;
1958 
1959 	b0 += b3;
1960 	b3 = ((b3 << 17) | (b3 >> (64 - 17))) ^ b0;
1961 
1962 	b4 += b1;
1963 	b1 = ((b1 << 25) | (b1 >> (64 - 25))) ^ b4;
1964 
1965 	b6 += b3;
1966 	b3 = ((b3 << 29) | (b3 >> (64 - 29))) ^ b6;
1967 
1968 	b0 += b5;
1969 	b5 = ((b5 << 39) | (b5 >> (64 - 39))) ^ b0;
1970 
1971 	b2 += b7;
1972 	b7 = ((b7 << 43) | (b7 >> (64 - 43))) ^ b2;
1973 
1974 	b6 += b1;
1975 	b1 = ((b1 << 8) | (b1 >> (64 - 8))) ^ b6;
1976 
1977 	b0 += b7;
1978 	b7 = ((b7 << 35) | (b7 >> (64 - 35))) ^ b0;
1979 
1980 	b2 += b5;
1981 	b5 = ((b5 << 56) | (b5 >> (64 - 56))) ^ b2;
1982 
1983 	b4 += b3;
1984 	b3 = ((b3 << 22) | (b3 >> (64 - 22))) ^ b4;
1985 
1986 	b1 += k8;
1987 	b0 += b1 + k7;
1988 	b1 = ((b1 << 46) | (b1 >> (64 - 46))) ^ b0;
1989 
1990 	b3 += k1;
1991 	b2 += b3 + k0;
1992 	b3 = ((b3 << 36) | (b3 >> (64 - 36))) ^ b2;
1993 
1994 	b5 += k3 + t1;
1995 	b4 += b5 + k2;
1996 	b5 = ((b5 << 19) | (b5 >> (64 - 19))) ^ b4;
1997 
1998 	b7 += k5 + 16;
1999 	b6 += b7 + k4 + t2;
2000 	b7 = ((b7 << 37) | (b7 >> (64 - 37))) ^ b6;
2001 
2002 	b2 += b1;
2003 	b1 = ((b1 << 33) | (b1 >> (64 - 33))) ^ b2;
2004 
2005 	b4 += b7;
2006 	b7 = ((b7 << 27) | (b7 >> (64 - 27))) ^ b4;
2007 
2008 	b6 += b5;
2009 	b5 = ((b5 << 14) | (b5 >> (64 - 14))) ^ b6;
2010 
2011 	b0 += b3;
2012 	b3 = ((b3 << 42) | (b3 >> (64 - 42))) ^ b0;
2013 
2014 	b4 += b1;
2015 	b1 = ((b1 << 17) | (b1 >> (64 - 17))) ^ b4;
2016 
2017 	b6 += b3;
2018 	b3 = ((b3 << 49) | (b3 >> (64 - 49))) ^ b6;
2019 
2020 	b0 += b5;
2021 	b5 = ((b5 << 36) | (b5 >> (64 - 36))) ^ b0;
2022 
2023 	b2 += b7;
2024 	b7 = ((b7 << 39) | (b7 >> (64 - 39))) ^ b2;
2025 
2026 	b6 += b1;
2027 	b1 = ((b1 << 44) | (b1 >> (64 - 44))) ^ b6;
2028 
2029 	b0 += b7;
2030 	b7 = ((b7 << 9) | (b7 >> (64 - 9))) ^ b0;
2031 
2032 	b2 += b5;
2033 	b5 = ((b5 << 54) | (b5 >> (64 - 54))) ^ b2;
2034 
2035 	b4 += b3;
2036 	b3 = ((b3 << 56) | (b3 >> (64 - 56))) ^ b4;
2037 
2038 	b1 += k0;
2039 	b0 += b1 + k8;
2040 	b1 = ((b1 << 39) | (b1 >> (64 - 39))) ^ b0;
2041 
2042 	b3 += k2;
2043 	b2 += b3 + k1;
2044 	b3 = ((b3 << 30) | (b3 >> (64 - 30))) ^ b2;
2045 
2046 	b5 += k4 + t2;
2047 	b4 += b5 + k3;
2048 	b5 = ((b5 << 34) | (b5 >> (64 - 34))) ^ b4;
2049 
2050 	b7 += k6 + 17;
2051 	b6 += b7 + k5 + t0;
2052 	b7 = ((b7 << 24) | (b7 >> (64 - 24))) ^ b6;
2053 
2054 	b2 += b1;
2055 	b1 = ((b1 << 13) | (b1 >> (64 - 13))) ^ b2;
2056 
2057 	b4 += b7;
2058 	b7 = ((b7 << 50) | (b7 >> (64 - 50))) ^ b4;
2059 
2060 	b6 += b5;
2061 	b5 = ((b5 << 10) | (b5 >> (64 - 10))) ^ b6;
2062 
2063 	b0 += b3;
2064 	b3 = ((b3 << 17) | (b3 >> (64 - 17))) ^ b0;
2065 
2066 	b4 += b1;
2067 	b1 = ((b1 << 25) | (b1 >> (64 - 25))) ^ b4;
2068 
2069 	b6 += b3;
2070 	b3 = ((b3 << 29) | (b3 >> (64 - 29))) ^ b6;
2071 
2072 	b0 += b5;
2073 	b5 = ((b5 << 39) | (b5 >> (64 - 39))) ^ b0;
2074 
2075 	b2 += b7;
2076 	b7 = ((b7 << 43) | (b7 >> (64 - 43))) ^ b2;
2077 
2078 	b6 += b1;
2079 	b1 = ((b1 << 8) | (b1 >> (64 - 8))) ^ b6;
2080 
2081 	b0 += b7;
2082 	b7 = ((b7 << 35) | (b7 >> (64 - 35))) ^ b0;
2083 
2084 	b2 += b5;
2085 	b5 = ((b5 << 56) | (b5 >> (64 - 56))) ^ b2;
2086 
2087 	b4 += b3;
2088 	b3 = ((b3 << 22) | (b3 >> (64 - 22))) ^ b4;
2089 
2090 	output[0] = b0 + k0;
2091 	output[1] = b1 + k1;
2092 	output[2] = b2 + k2;
2093 	output[3] = b3 + k3;
2094 	output[4] = b4 + k4;
2095 	output[5] = b5 + k5 + t0;
2096 	output[6] = b6 + k6 + t1;
2097 	output[7] = b7 + k7 + 18;
2098 }
2099 
threefish_decrypt_512(struct threefish_key * key_ctx,u64 * input,u64 * output)2100 void threefish_decrypt_512(struct threefish_key *key_ctx, u64 *input,
2101 			   u64 *output)
2102 {
2103 	u64 b0 = input[0], b1 = input[1],
2104 	    b2 = input[2], b3 = input[3],
2105 	    b4 = input[4], b5 = input[5],
2106 	    b6 = input[6], b7 = input[7];
2107 	u64 k0 = key_ctx->key[0], k1 = key_ctx->key[1],
2108 	    k2 = key_ctx->key[2], k3 = key_ctx->key[3],
2109 	    k4 = key_ctx->key[4], k5 = key_ctx->key[5],
2110 	    k6 = key_ctx->key[6], k7 = key_ctx->key[7],
2111 	    k8 = key_ctx->key[8];
2112 	u64 t0 = key_ctx->tweak[0], t1 = key_ctx->tweak[1],
2113 	    t2 = key_ctx->tweak[2];
2114 
2115 	u64 tmp;
2116 
2117 	b0 -= k0;
2118 	b1 -= k1;
2119 	b2 -= k2;
2120 	b3 -= k3;
2121 	b4 -= k4;
2122 	b5 -= k5 + t0;
2123 	b6 -= k6 + t1;
2124 	b7 -= k7 + 18;
2125 
2126 	tmp = b3 ^ b4;
2127 	b3 = (tmp >> 22) | (tmp << (64 - 22));
2128 	b4 -= b3;
2129 
2130 	tmp = b5 ^ b2;
2131 	b5 = (tmp >> 56) | (tmp << (64 - 56));
2132 	b2 -= b5;
2133 
2134 	tmp = b7 ^ b0;
2135 	b7 = (tmp >> 35) | (tmp << (64 - 35));
2136 	b0 -= b7;
2137 
2138 	tmp = b1 ^ b6;
2139 	b1 = (tmp >> 8) | (tmp << (64 - 8));
2140 	b6 -= b1;
2141 
2142 	tmp = b7 ^ b2;
2143 	b7 = (tmp >> 43) | (tmp << (64 - 43));
2144 	b2 -= b7;
2145 
2146 	tmp = b5 ^ b0;
2147 	b5 = (tmp >> 39) | (tmp << (64 - 39));
2148 	b0 -= b5;
2149 
2150 	tmp = b3 ^ b6;
2151 	b3 = (tmp >> 29) | (tmp << (64 - 29));
2152 	b6 -= b3;
2153 
2154 	tmp = b1 ^ b4;
2155 	b1 = (tmp >> 25) | (tmp << (64 - 25));
2156 	b4 -= b1;
2157 
2158 	tmp = b3 ^ b0;
2159 	b3 = (tmp >> 17) | (tmp << (64 - 17));
2160 	b0 -= b3;
2161 
2162 	tmp = b5 ^ b6;
2163 	b5 = (tmp >> 10) | (tmp << (64 - 10));
2164 	b6 -= b5;
2165 
2166 	tmp = b7 ^ b4;
2167 	b7 = (tmp >> 50) | (tmp << (64 - 50));
2168 	b4 -= b7;
2169 
2170 	tmp = b1 ^ b2;
2171 	b1 = (tmp >> 13) | (tmp << (64 - 13));
2172 	b2 -= b1;
2173 
2174 	tmp = b7 ^ b6;
2175 	b7 = (tmp >> 24) | (tmp << (64 - 24));
2176 	b6 -= b7 + k5 + t0;
2177 	b7 -= k6 + 17;
2178 
2179 	tmp = b5 ^ b4;
2180 	b5 = (tmp >> 34) | (tmp << (64 - 34));
2181 	b4 -= b5 + k3;
2182 	b5 -= k4 + t2;
2183 
2184 	tmp = b3 ^ b2;
2185 	b3 = (tmp >> 30) | (tmp << (64 - 30));
2186 	b2 -= b3 + k1;
2187 	b3 -= k2;
2188 
2189 	tmp = b1 ^ b0;
2190 	b1 = (tmp >> 39) | (tmp << (64 - 39));
2191 	b0 -= b1 + k8;
2192 	b1 -= k0;
2193 
2194 	tmp = b3 ^ b4;
2195 	b3 = (tmp >> 56) | (tmp << (64 - 56));
2196 	b4 -= b3;
2197 
2198 	tmp = b5 ^ b2;
2199 	b5 = (tmp >> 54) | (tmp << (64 - 54));
2200 	b2 -= b5;
2201 
2202 	tmp = b7 ^ b0;
2203 	b7 = (tmp >> 9) | (tmp << (64 - 9));
2204 	b0 -= b7;
2205 
2206 	tmp = b1 ^ b6;
2207 	b1 = (tmp >> 44) | (tmp << (64 - 44));
2208 	b6 -= b1;
2209 
2210 	tmp = b7 ^ b2;
2211 	b7 = (tmp >> 39) | (tmp << (64 - 39));
2212 	b2 -= b7;
2213 
2214 	tmp = b5 ^ b0;
2215 	b5 = (tmp >> 36) | (tmp << (64 - 36));
2216 	b0 -= b5;
2217 
2218 	tmp = b3 ^ b6;
2219 	b3 = (tmp >> 49) | (tmp << (64 - 49));
2220 	b6 -= b3;
2221 
2222 	tmp = b1 ^ b4;
2223 	b1 = (tmp >> 17) | (tmp << (64 - 17));
2224 	b4 -= b1;
2225 
2226 	tmp = b3 ^ b0;
2227 	b3 = (tmp >> 42) | (tmp << (64 - 42));
2228 	b0 -= b3;
2229 
2230 	tmp = b5 ^ b6;
2231 	b5 = (tmp >> 14) | (tmp << (64 - 14));
2232 	b6 -= b5;
2233 
2234 	tmp = b7 ^ b4;
2235 	b7 = (tmp >> 27) | (tmp << (64 - 27));
2236 	b4 -= b7;
2237 
2238 	tmp = b1 ^ b2;
2239 	b1 = (tmp >> 33) | (tmp << (64 - 33));
2240 	b2 -= b1;
2241 
2242 	tmp = b7 ^ b6;
2243 	b7 = (tmp >> 37) | (tmp << (64 - 37));
2244 	b6 -= b7 + k4 + t2;
2245 	b7 -= k5 + 16;
2246 
2247 	tmp = b5 ^ b4;
2248 	b5 = (tmp >> 19) | (tmp << (64 - 19));
2249 	b4 -= b5 + k2;
2250 	b5 -= k3 + t1;
2251 
2252 	tmp = b3 ^ b2;
2253 	b3 = (tmp >> 36) | (tmp << (64 - 36));
2254 	b2 -= b3 + k0;
2255 	b3 -= k1;
2256 
2257 	tmp = b1 ^ b0;
2258 	b1 = (tmp >> 46) | (tmp << (64 - 46));
2259 	b0 -= b1 + k7;
2260 	b1 -= k8;
2261 
2262 	tmp = b3 ^ b4;
2263 	b3 = (tmp >> 22) | (tmp << (64 - 22));
2264 	b4 -= b3;
2265 
2266 	tmp = b5 ^ b2;
2267 	b5 = (tmp >> 56) | (tmp << (64 - 56));
2268 	b2 -= b5;
2269 
2270 	tmp = b7 ^ b0;
2271 	b7 = (tmp >> 35) | (tmp << (64 - 35));
2272 	b0 -= b7;
2273 
2274 	tmp = b1 ^ b6;
2275 	b1 = (tmp >> 8) | (tmp << (64 - 8));
2276 	b6 -= b1;
2277 
2278 	tmp = b7 ^ b2;
2279 	b7 = (tmp >> 43) | (tmp << (64 - 43));
2280 	b2 -= b7;
2281 
2282 	tmp = b5 ^ b0;
2283 	b5 = (tmp >> 39) | (tmp << (64 - 39));
2284 	b0 -= b5;
2285 
2286 	tmp = b3 ^ b6;
2287 	b3 = (tmp >> 29) | (tmp << (64 - 29));
2288 	b6 -= b3;
2289 
2290 	tmp = b1 ^ b4;
2291 	b1 = (tmp >> 25) | (tmp << (64 - 25));
2292 	b4 -= b1;
2293 
2294 	tmp = b3 ^ b0;
2295 	b3 = (tmp >> 17) | (tmp << (64 - 17));
2296 	b0 -= b3;
2297 
2298 	tmp = b5 ^ b6;
2299 	b5 = (tmp >> 10) | (tmp << (64 - 10));
2300 	b6 -= b5;
2301 
2302 	tmp = b7 ^ b4;
2303 	b7 = (tmp >> 50) | (tmp << (64 - 50));
2304 	b4 -= b7;
2305 
2306 	tmp = b1 ^ b2;
2307 	b1 = (tmp >> 13) | (tmp << (64 - 13));
2308 	b2 -= b1;
2309 
2310 	tmp = b7 ^ b6;
2311 	b7 = (tmp >> 24) | (tmp << (64 - 24));
2312 	b6 -= b7 + k3 + t1;
2313 	b7 -= k4 + 15;
2314 
2315 	tmp = b5 ^ b4;
2316 	b5 = (tmp >> 34) | (tmp << (64 - 34));
2317 	b4 -= b5 + k1;
2318 	b5 -= k2 + t0;
2319 
2320 	tmp = b3 ^ b2;
2321 	b3 = (tmp >> 30) | (tmp << (64 - 30));
2322 	b2 -= b3 + k8;
2323 	b3 -= k0;
2324 
2325 	tmp = b1 ^ b0;
2326 	b1 = (tmp >> 39) | (tmp << (64 - 39));
2327 	b0 -= b1 + k6;
2328 	b1 -= k7;
2329 
2330 	tmp = b3 ^ b4;
2331 	b3 = (tmp >> 56) | (tmp << (64 - 56));
2332 	b4 -= b3;
2333 
2334 	tmp = b5 ^ b2;
2335 	b5 = (tmp >> 54) | (tmp << (64 - 54));
2336 	b2 -= b5;
2337 
2338 	tmp = b7 ^ b0;
2339 	b7 = (tmp >> 9) | (tmp << (64 - 9));
2340 	b0 -= b7;
2341 
2342 	tmp = b1 ^ b6;
2343 	b1 = (tmp >> 44) | (tmp << (64 - 44));
2344 	b6 -= b1;
2345 
2346 	tmp = b7 ^ b2;
2347 	b7 = (tmp >> 39) | (tmp << (64 - 39));
2348 	b2 -= b7;
2349 
2350 	tmp = b5 ^ b0;
2351 	b5 = (tmp >> 36) | (tmp << (64 - 36));
2352 	b0 -= b5;
2353 
2354 	tmp = b3 ^ b6;
2355 	b3 = (tmp >> 49) | (tmp << (64 - 49));
2356 	b6 -= b3;
2357 
2358 	tmp = b1 ^ b4;
2359 	b1 = (tmp >> 17) | (tmp << (64 - 17));
2360 	b4 -= b1;
2361 
2362 	tmp = b3 ^ b0;
2363 	b3 = (tmp >> 42) | (tmp << (64 - 42));
2364 	b0 -= b3;
2365 
2366 	tmp = b5 ^ b6;
2367 	b5 = (tmp >> 14) | (tmp << (64 - 14));
2368 	b6 -= b5;
2369 
2370 	tmp = b7 ^ b4;
2371 	b7 = (tmp >> 27) | (tmp << (64 - 27));
2372 	b4 -= b7;
2373 
2374 	tmp = b1 ^ b2;
2375 	b1 = (tmp >> 33) | (tmp << (64 - 33));
2376 	b2 -= b1;
2377 
2378 	tmp = b7 ^ b6;
2379 	b7 = (tmp >> 37) | (tmp << (64 - 37));
2380 	b6 -= b7 + k2 + t0;
2381 	b7 -= k3 + 14;
2382 
2383 	tmp = b5 ^ b4;
2384 	b5 = (tmp >> 19) | (tmp << (64 - 19));
2385 	b4 -= b5 + k0;
2386 	b5 -= k1 + t2;
2387 
2388 	tmp = b3 ^ b2;
2389 	b3 = (tmp >> 36) | (tmp << (64 - 36));
2390 	b2 -= b3 + k7;
2391 	b3 -= k8;
2392 
2393 	tmp = b1 ^ b0;
2394 	b1 = (tmp >> 46) | (tmp << (64 - 46));
2395 	b0 -= b1 + k5;
2396 	b1 -= k6;
2397 
2398 	tmp = b3 ^ b4;
2399 	b3 = (tmp >> 22) | (tmp << (64 - 22));
2400 	b4 -= b3;
2401 
2402 	tmp = b5 ^ b2;
2403 	b5 = (tmp >> 56) | (tmp << (64 - 56));
2404 	b2 -= b5;
2405 
2406 	tmp = b7 ^ b0;
2407 	b7 = (tmp >> 35) | (tmp << (64 - 35));
2408 	b0 -= b7;
2409 
2410 	tmp = b1 ^ b6;
2411 	b1 = (tmp >> 8) | (tmp << (64 - 8));
2412 	b6 -= b1;
2413 
2414 	tmp = b7 ^ b2;
2415 	b7 = (tmp >> 43) | (tmp << (64 - 43));
2416 	b2 -= b7;
2417 
2418 	tmp = b5 ^ b0;
2419 	b5 = (tmp >> 39) | (tmp << (64 - 39));
2420 	b0 -= b5;
2421 
2422 	tmp = b3 ^ b6;
2423 	b3 = (tmp >> 29) | (tmp << (64 - 29));
2424 	b6 -= b3;
2425 
2426 	tmp = b1 ^ b4;
2427 	b1 = (tmp >> 25) | (tmp << (64 - 25));
2428 	b4 -= b1;
2429 
2430 	tmp = b3 ^ b0;
2431 	b3 = (tmp >> 17) | (tmp << (64 - 17));
2432 	b0 -= b3;
2433 
2434 	tmp = b5 ^ b6;
2435 	b5 = (tmp >> 10) | (tmp << (64 - 10));
2436 	b6 -= b5;
2437 
2438 	tmp = b7 ^ b4;
2439 	b7 = (tmp >> 50) | (tmp << (64 - 50));
2440 	b4 -= b7;
2441 
2442 	tmp = b1 ^ b2;
2443 	b1 = (tmp >> 13) | (tmp << (64 - 13));
2444 	b2 -= b1;
2445 
2446 	tmp = b7 ^ b6;
2447 	b7 = (tmp >> 24) | (tmp << (64 - 24));
2448 	b6 -= b7 + k1 + t2;
2449 	b7 -= k2 + 13;
2450 
2451 	tmp = b5 ^ b4;
2452 	b5 = (tmp >> 34) | (tmp << (64 - 34));
2453 	b4 -= b5 + k8;
2454 	b5 -= k0 + t1;
2455 
2456 	tmp = b3 ^ b2;
2457 	b3 = (tmp >> 30) | (tmp << (64 - 30));
2458 	b2 -= b3 + k6;
2459 	b3 -= k7;
2460 
2461 	tmp = b1 ^ b0;
2462 	b1 = (tmp >> 39) | (tmp << (64 - 39));
2463 	b0 -= b1 + k4;
2464 	b1 -= k5;
2465 
2466 	tmp = b3 ^ b4;
2467 	b3 = (tmp >> 56) | (tmp << (64 - 56));
2468 	b4 -= b3;
2469 
2470 	tmp = b5 ^ b2;
2471 	b5 = (tmp >> 54) | (tmp << (64 - 54));
2472 	b2 -= b5;
2473 
2474 	tmp = b7 ^ b0;
2475 	b7 = (tmp >> 9) | (tmp << (64 - 9));
2476 	b0 -= b7;
2477 
2478 	tmp = b1 ^ b6;
2479 	b1 = (tmp >> 44) | (tmp << (64 - 44));
2480 	b6 -= b1;
2481 
2482 	tmp = b7 ^ b2;
2483 	b7 = (tmp >> 39) | (tmp << (64 - 39));
2484 	b2 -= b7;
2485 
2486 	tmp = b5 ^ b0;
2487 	b5 = (tmp >> 36) | (tmp << (64 - 36));
2488 	b0 -= b5;
2489 
2490 	tmp = b3 ^ b6;
2491 	b3 = (tmp >> 49) | (tmp << (64 - 49));
2492 	b6 -= b3;
2493 
2494 	tmp = b1 ^ b4;
2495 	b1 = (tmp >> 17) | (tmp << (64 - 17));
2496 	b4 -= b1;
2497 
2498 	tmp = b3 ^ b0;
2499 	b3 = (tmp >> 42) | (tmp << (64 - 42));
2500 	b0 -= b3;
2501 
2502 	tmp = b5 ^ b6;
2503 	b5 = (tmp >> 14) | (tmp << (64 - 14));
2504 	b6 -= b5;
2505 
2506 	tmp = b7 ^ b4;
2507 	b7 = (tmp >> 27) | (tmp << (64 - 27));
2508 	b4 -= b7;
2509 
2510 	tmp = b1 ^ b2;
2511 	b1 = (tmp >> 33) | (tmp << (64 - 33));
2512 	b2 -= b1;
2513 
2514 	tmp = b7 ^ b6;
2515 	b7 = (tmp >> 37) | (tmp << (64 - 37));
2516 	b6 -= b7 + k0 + t1;
2517 	b7 -= k1 + 12;
2518 
2519 	tmp = b5 ^ b4;
2520 	b5 = (tmp >> 19) | (tmp << (64 - 19));
2521 	b4 -= b5 + k7;
2522 	b5 -= k8 + t0;
2523 
2524 	tmp = b3 ^ b2;
2525 	b3 = (tmp >> 36) | (tmp << (64 - 36));
2526 	b2 -= b3 + k5;
2527 	b3 -= k6;
2528 
2529 	tmp = b1 ^ b0;
2530 	b1 = (tmp >> 46) | (tmp << (64 - 46));
2531 	b0 -= b1 + k3;
2532 	b1 -= k4;
2533 
2534 	tmp = b3 ^ b4;
2535 	b3 = (tmp >> 22) | (tmp << (64 - 22));
2536 	b4 -= b3;
2537 
2538 	tmp = b5 ^ b2;
2539 	b5 = (tmp >> 56) | (tmp << (64 - 56));
2540 	b2 -= b5;
2541 
2542 	tmp = b7 ^ b0;
2543 	b7 = (tmp >> 35) | (tmp << (64 - 35));
2544 	b0 -= b7;
2545 
2546 	tmp = b1 ^ b6;
2547 	b1 = (tmp >> 8) | (tmp << (64 - 8));
2548 	b6 -= b1;
2549 
2550 	tmp = b7 ^ b2;
2551 	b7 = (tmp >> 43) | (tmp << (64 - 43));
2552 	b2 -= b7;
2553 
2554 	tmp = b5 ^ b0;
2555 	b5 = (tmp >> 39) | (tmp << (64 - 39));
2556 	b0 -= b5;
2557 
2558 	tmp = b3 ^ b6;
2559 	b3 = (tmp >> 29) | (tmp << (64 - 29));
2560 	b6 -= b3;
2561 
2562 	tmp = b1 ^ b4;
2563 	b1 = (tmp >> 25) | (tmp << (64 - 25));
2564 	b4 -= b1;
2565 
2566 	tmp = b3 ^ b0;
2567 	b3 = (tmp >> 17) | (tmp << (64 - 17));
2568 	b0 -= b3;
2569 
2570 	tmp = b5 ^ b6;
2571 	b5 = (tmp >> 10) | (tmp << (64 - 10));
2572 	b6 -= b5;
2573 
2574 	tmp = b7 ^ b4;
2575 	b7 = (tmp >> 50) | (tmp << (64 - 50));
2576 	b4 -= b7;
2577 
2578 	tmp = b1 ^ b2;
2579 	b1 = (tmp >> 13) | (tmp << (64 - 13));
2580 	b2 -= b1;
2581 
2582 	tmp = b7 ^ b6;
2583 	b7 = (tmp >> 24) | (tmp << (64 - 24));
2584 	b6 -= b7 + k8 + t0;
2585 	b7 -= k0 + 11;
2586 
2587 	tmp = b5 ^ b4;
2588 	b5 = (tmp >> 34) | (tmp << (64 - 34));
2589 	b4 -= b5 + k6;
2590 	b5 -= k7 + t2;
2591 
2592 	tmp = b3 ^ b2;
2593 	b3 = (tmp >> 30) | (tmp << (64 - 30));
2594 	b2 -= b3 + k4;
2595 	b3 -= k5;
2596 
2597 	tmp = b1 ^ b0;
2598 	b1 = (tmp >> 39) | (tmp << (64 - 39));
2599 	b0 -= b1 + k2;
2600 	b1 -= k3;
2601 
2602 	tmp = b3 ^ b4;
2603 	b3 = (tmp >> 56) | (tmp << (64 - 56));
2604 	b4 -= b3;
2605 
2606 	tmp = b5 ^ b2;
2607 	b5 = (tmp >> 54) | (tmp << (64 - 54));
2608 	b2 -= b5;
2609 
2610 	tmp = b7 ^ b0;
2611 	b7 = (tmp >> 9) | (tmp << (64 - 9));
2612 	b0 -= b7;
2613 
2614 	tmp = b1 ^ b6;
2615 	b1 = (tmp >> 44) | (tmp << (64 - 44));
2616 	b6 -= b1;
2617 
2618 	tmp = b7 ^ b2;
2619 	b7 = (tmp >> 39) | (tmp << (64 - 39));
2620 	b2 -= b7;
2621 
2622 	tmp = b5 ^ b0;
2623 	b5 = (tmp >> 36) | (tmp << (64 - 36));
2624 	b0 -= b5;
2625 
2626 	tmp = b3 ^ b6;
2627 	b3 = (tmp >> 49) | (tmp << (64 - 49));
2628 	b6 -= b3;
2629 
2630 	tmp = b1 ^ b4;
2631 	b1 = (tmp >> 17) | (tmp << (64 - 17));
2632 	b4 -= b1;
2633 
2634 	tmp = b3 ^ b0;
2635 	b3 = (tmp >> 42) | (tmp << (64 - 42));
2636 	b0 -= b3;
2637 
2638 	tmp = b5 ^ b6;
2639 	b5 = (tmp >> 14) | (tmp << (64 - 14));
2640 	b6 -= b5;
2641 
2642 	tmp = b7 ^ b4;
2643 	b7 = (tmp >> 27) | (tmp << (64 - 27));
2644 	b4 -= b7;
2645 
2646 	tmp = b1 ^ b2;
2647 	b1 = (tmp >> 33) | (tmp << (64 - 33));
2648 	b2 -= b1;
2649 
2650 	tmp = b7 ^ b6;
2651 	b7 = (tmp >> 37) | (tmp << (64 - 37));
2652 	b6 -= b7 + k7 + t2;
2653 	b7 -= k8 + 10;
2654 
2655 	tmp = b5 ^ b4;
2656 	b5 = (tmp >> 19) | (tmp << (64 - 19));
2657 	b4 -= b5 + k5;
2658 	b5 -= k6 + t1;
2659 
2660 	tmp = b3 ^ b2;
2661 	b3 = (tmp >> 36) | (tmp << (64 - 36));
2662 	b2 -= b3 + k3;
2663 	b3 -= k4;
2664 
2665 	tmp = b1 ^ b0;
2666 	b1 = (tmp >> 46) | (tmp << (64 - 46));
2667 	b0 -= b1 + k1;
2668 	b1 -= k2;
2669 
2670 	tmp = b3 ^ b4;
2671 	b3 = (tmp >> 22) | (tmp << (64 - 22));
2672 	b4 -= b3;
2673 
2674 	tmp = b5 ^ b2;
2675 	b5 = (tmp >> 56) | (tmp << (64 - 56));
2676 	b2 -= b5;
2677 
2678 	tmp = b7 ^ b0;
2679 	b7 = (tmp >> 35) | (tmp << (64 - 35));
2680 	b0 -= b7;
2681 
2682 	tmp = b1 ^ b6;
2683 	b1 = (tmp >> 8) | (tmp << (64 - 8));
2684 	b6 -= b1;
2685 
2686 	tmp = b7 ^ b2;
2687 	b7 = (tmp >> 43) | (tmp << (64 - 43));
2688 	b2 -= b7;
2689 
2690 	tmp = b5 ^ b0;
2691 	b5 = (tmp >> 39) | (tmp << (64 - 39));
2692 	b0 -= b5;
2693 
2694 	tmp = b3 ^ b6;
2695 	b3 = (tmp >> 29) | (tmp << (64 - 29));
2696 	b6 -= b3;
2697 
2698 	tmp = b1 ^ b4;
2699 	b1 = (tmp >> 25) | (tmp << (64 - 25));
2700 	b4 -= b1;
2701 
2702 	tmp = b3 ^ b0;
2703 	b3 = (tmp >> 17) | (tmp << (64 - 17));
2704 	b0 -= b3;
2705 
2706 	tmp = b5 ^ b6;
2707 	b5 = (tmp >> 10) | (tmp << (64 - 10));
2708 	b6 -= b5;
2709 
2710 	tmp = b7 ^ b4;
2711 	b7 = (tmp >> 50) | (tmp << (64 - 50));
2712 	b4 -= b7;
2713 
2714 	tmp = b1 ^ b2;
2715 	b1 = (tmp >> 13) | (tmp << (64 - 13));
2716 	b2 -= b1;
2717 
2718 	tmp = b7 ^ b6;
2719 	b7 = (tmp >> 24) | (tmp << (64 - 24));
2720 	b6 -= b7 + k6 + t1;
2721 	b7 -= k7 + 9;
2722 
2723 	tmp = b5 ^ b4;
2724 	b5 = (tmp >> 34) | (tmp << (64 - 34));
2725 	b4 -= b5 + k4;
2726 	b5 -= k5 + t0;
2727 
2728 	tmp = b3 ^ b2;
2729 	b3 = (tmp >> 30) | (tmp << (64 - 30));
2730 	b2 -= b3 + k2;
2731 	b3 -= k3;
2732 
2733 	tmp = b1 ^ b0;
2734 	b1 = (tmp >> 39) | (tmp << (64 - 39));
2735 	b0 -= b1 + k0;
2736 	b1 -= k1;
2737 
2738 	tmp = b3 ^ b4;
2739 	b3 = (tmp >> 56) | (tmp << (64 - 56));
2740 	b4 -= b3;
2741 
2742 	tmp = b5 ^ b2;
2743 	b5 = (tmp >> 54) | (tmp << (64 - 54));
2744 	b2 -= b5;
2745 
2746 	tmp = b7 ^ b0;
2747 	b7 = (tmp >> 9) | (tmp << (64 - 9));
2748 	b0 -= b7;
2749 
2750 	tmp = b1 ^ b6;
2751 	b1 = (tmp >> 44) | (tmp << (64 - 44));
2752 	b6 -= b1;
2753 
2754 	tmp = b7 ^ b2;
2755 	b7 = (tmp >> 39) | (tmp << (64 - 39));
2756 	b2 -= b7;
2757 
2758 	tmp = b5 ^ b0;
2759 	b5 = (tmp >> 36) | (tmp << (64 - 36));
2760 	b0 -= b5;
2761 
2762 	tmp = b3 ^ b6;
2763 	b3 = (tmp >> 49) | (tmp << (64 - 49));
2764 	b6 -= b3;
2765 
2766 	tmp = b1 ^ b4;
2767 	b1 = (tmp >> 17) | (tmp << (64 - 17));
2768 	b4 -= b1;
2769 
2770 	tmp = b3 ^ b0;
2771 	b3 = (tmp >> 42) | (tmp << (64 - 42));
2772 	b0 -= b3;
2773 
2774 	tmp = b5 ^ b6;
2775 	b5 = (tmp >> 14) | (tmp << (64 - 14));
2776 	b6 -= b5;
2777 
2778 	tmp = b7 ^ b4;
2779 	b7 = (tmp >> 27) | (tmp << (64 - 27));
2780 	b4 -= b7;
2781 
2782 	tmp = b1 ^ b2;
2783 	b1 = (tmp >> 33) | (tmp << (64 - 33));
2784 	b2 -= b1;
2785 
2786 	tmp = b7 ^ b6;
2787 	b7 = (tmp >> 37) | (tmp << (64 - 37));
2788 	b6 -= b7 + k5 + t0;
2789 	b7 -= k6 + 8;
2790 
2791 	tmp = b5 ^ b4;
2792 	b5 = (tmp >> 19) | (tmp << (64 - 19));
2793 	b4 -= b5 + k3;
2794 	b5 -= k4 + t2;
2795 
2796 	tmp = b3 ^ b2;
2797 	b3 = (tmp >> 36) | (tmp << (64 - 36));
2798 	b2 -= b3 + k1;
2799 	b3 -= k2;
2800 
2801 	tmp = b1 ^ b0;
2802 	b1 = (tmp >> 46) | (tmp << (64 - 46));
2803 	b0 -= b1 + k8;
2804 	b1 -= k0;
2805 
2806 	tmp = b3 ^ b4;
2807 	b3 = (tmp >> 22) | (tmp << (64 - 22));
2808 	b4 -= b3;
2809 
2810 	tmp = b5 ^ b2;
2811 	b5 = (tmp >> 56) | (tmp << (64 - 56));
2812 	b2 -= b5;
2813 
2814 	tmp = b7 ^ b0;
2815 	b7 = (tmp >> 35) | (tmp << (64 - 35));
2816 	b0 -= b7;
2817 
2818 	tmp = b1 ^ b6;
2819 	b1 = (tmp >> 8) | (tmp << (64 - 8));
2820 	b6 -= b1;
2821 
2822 	tmp = b7 ^ b2;
2823 	b7 = (tmp >> 43) | (tmp << (64 - 43));
2824 	b2 -= b7;
2825 
2826 	tmp = b5 ^ b0;
2827 	b5 = (tmp >> 39) | (tmp << (64 - 39));
2828 	b0 -= b5;
2829 
2830 	tmp = b3 ^ b6;
2831 	b3 = (tmp >> 29) | (tmp << (64 - 29));
2832 	b6 -= b3;
2833 
2834 	tmp = b1 ^ b4;
2835 	b1 = (tmp >> 25) | (tmp << (64 - 25));
2836 	b4 -= b1;
2837 
2838 	tmp = b3 ^ b0;
2839 	b3 = (tmp >> 17) | (tmp << (64 - 17));
2840 	b0 -= b3;
2841 
2842 	tmp = b5 ^ b6;
2843 	b5 = (tmp >> 10) | (tmp << (64 - 10));
2844 	b6 -= b5;
2845 
2846 	tmp = b7 ^ b4;
2847 	b7 = (tmp >> 50) | (tmp << (64 - 50));
2848 	b4 -= b7;
2849 
2850 	tmp = b1 ^ b2;
2851 	b1 = (tmp >> 13) | (tmp << (64 - 13));
2852 	b2 -= b1;
2853 
2854 	tmp = b7 ^ b6;
2855 	b7 = (tmp >> 24) | (tmp << (64 - 24));
2856 	b6 -= b7 + k4 + t2;
2857 	b7 -= k5 + 7;
2858 
2859 	tmp = b5 ^ b4;
2860 	b5 = (tmp >> 34) | (tmp << (64 - 34));
2861 	b4 -= b5 + k2;
2862 	b5 -= k3 + t1;
2863 
2864 	tmp = b3 ^ b2;
2865 	b3 = (tmp >> 30) | (tmp << (64 - 30));
2866 	b2 -= b3 + k0;
2867 	b3 -= k1;
2868 
2869 	tmp = b1 ^ b0;
2870 	b1 = (tmp >> 39) | (tmp << (64 - 39));
2871 	b0 -= b1 + k7;
2872 	b1 -= k8;
2873 
2874 	tmp = b3 ^ b4;
2875 	b3 = (tmp >> 56) | (tmp << (64 - 56));
2876 	b4 -= b3;
2877 
2878 	tmp = b5 ^ b2;
2879 	b5 = (tmp >> 54) | (tmp << (64 - 54));
2880 	b2 -= b5;
2881 
2882 	tmp = b7 ^ b0;
2883 	b7 = (tmp >> 9) | (tmp << (64 - 9));
2884 	b0 -= b7;
2885 
2886 	tmp = b1 ^ b6;
2887 	b1 = (tmp >> 44) | (tmp << (64 - 44));
2888 	b6 -= b1;
2889 
2890 	tmp = b7 ^ b2;
2891 	b7 = (tmp >> 39) | (tmp << (64 - 39));
2892 	b2 -= b7;
2893 
2894 	tmp = b5 ^ b0;
2895 	b5 = (tmp >> 36) | (tmp << (64 - 36));
2896 	b0 -= b5;
2897 
2898 	tmp = b3 ^ b6;
2899 	b3 = (tmp >> 49) | (tmp << (64 - 49));
2900 	b6 -= b3;
2901 
2902 	tmp = b1 ^ b4;
2903 	b1 = (tmp >> 17) | (tmp << (64 - 17));
2904 	b4 -= b1;
2905 
2906 	tmp = b3 ^ b0;
2907 	b3 = (tmp >> 42) | (tmp << (64 - 42));
2908 	b0 -= b3;
2909 
2910 	tmp = b5 ^ b6;
2911 	b5 = (tmp >> 14) | (tmp << (64 - 14));
2912 	b6 -= b5;
2913 
2914 	tmp = b7 ^ b4;
2915 	b7 = (tmp >> 27) | (tmp << (64 - 27));
2916 	b4 -= b7;
2917 
2918 	tmp = b1 ^ b2;
2919 	b1 = (tmp >> 33) | (tmp << (64 - 33));
2920 	b2 -= b1;
2921 
2922 	tmp = b7 ^ b6;
2923 	b7 = (tmp >> 37) | (tmp << (64 - 37));
2924 	b6 -= b7 + k3 + t1;
2925 	b7 -= k4 + 6;
2926 
2927 	tmp = b5 ^ b4;
2928 	b5 = (tmp >> 19) | (tmp << (64 - 19));
2929 	b4 -= b5 + k1;
2930 	b5 -= k2 + t0;
2931 
2932 	tmp = b3 ^ b2;
2933 	b3 = (tmp >> 36) | (tmp << (64 - 36));
2934 	b2 -= b3 + k8;
2935 	b3 -= k0;
2936 
2937 	tmp = b1 ^ b0;
2938 	b1 = (tmp >> 46) | (tmp << (64 - 46));
2939 	b0 -= b1 + k6;
2940 	b1 -= k7;
2941 
2942 	tmp = b3 ^ b4;
2943 	b3 = (tmp >> 22) | (tmp << (64 - 22));
2944 	b4 -= b3;
2945 
2946 	tmp = b5 ^ b2;
2947 	b5 = (tmp >> 56) | (tmp << (64 - 56));
2948 	b2 -= b5;
2949 
2950 	tmp = b7 ^ b0;
2951 	b7 = (tmp >> 35) | (tmp << (64 - 35));
2952 	b0 -= b7;
2953 
2954 	tmp = b1 ^ b6;
2955 	b1 = (tmp >> 8) | (tmp << (64 - 8));
2956 	b6 -= b1;
2957 
2958 	tmp = b7 ^ b2;
2959 	b7 = (tmp >> 43) | (tmp << (64 - 43));
2960 	b2 -= b7;
2961 
2962 	tmp = b5 ^ b0;
2963 	b5 = (tmp >> 39) | (tmp << (64 - 39));
2964 	b0 -= b5;
2965 
2966 	tmp = b3 ^ b6;
2967 	b3 = (tmp >> 29) | (tmp << (64 - 29));
2968 	b6 -= b3;
2969 
2970 	tmp = b1 ^ b4;
2971 	b1 = (tmp >> 25) | (tmp << (64 - 25));
2972 	b4 -= b1;
2973 
2974 	tmp = b3 ^ b0;
2975 	b3 = (tmp >> 17) | (tmp << (64 - 17));
2976 	b0 -= b3;
2977 
2978 	tmp = b5 ^ b6;
2979 	b5 = (tmp >> 10) | (tmp << (64 - 10));
2980 	b6 -= b5;
2981 
2982 	tmp = b7 ^ b4;
2983 	b7 = (tmp >> 50) | (tmp << (64 - 50));
2984 	b4 -= b7;
2985 
2986 	tmp = b1 ^ b2;
2987 	b1 = (tmp >> 13) | (tmp << (64 - 13));
2988 	b2 -= b1;
2989 
2990 	tmp = b7 ^ b6;
2991 	b7 = (tmp >> 24) | (tmp << (64 - 24));
2992 	b6 -= b7 + k2 + t0;
2993 	b7 -= k3 + 5;
2994 
2995 	tmp = b5 ^ b4;
2996 	b5 = (tmp >> 34) | (tmp << (64 - 34));
2997 	b4 -= b5 + k0;
2998 	b5 -= k1 + t2;
2999 
3000 	tmp = b3 ^ b2;
3001 	b3 = (tmp >> 30) | (tmp << (64 - 30));
3002 	b2 -= b3 + k7;
3003 	b3 -= k8;
3004 
3005 	tmp = b1 ^ b0;
3006 	b1 = (tmp >> 39) | (tmp << (64 - 39));
3007 	b0 -= b1 + k5;
3008 	b1 -= k6;
3009 
3010 	tmp = b3 ^ b4;
3011 	b3 = (tmp >> 56) | (tmp << (64 - 56));
3012 	b4 -= b3;
3013 
3014 	tmp = b5 ^ b2;
3015 	b5 = (tmp >> 54) | (tmp << (64 - 54));
3016 	b2 -= b5;
3017 
3018 	tmp = b7 ^ b0;
3019 	b7 = (tmp >> 9) | (tmp << (64 - 9));
3020 	b0 -= b7;
3021 
3022 	tmp = b1 ^ b6;
3023 	b1 = (tmp >> 44) | (tmp << (64 - 44));
3024 	b6 -= b1;
3025 
3026 	tmp = b7 ^ b2;
3027 	b7 = (tmp >> 39) | (tmp << (64 - 39));
3028 	b2 -= b7;
3029 
3030 	tmp = b5 ^ b0;
3031 	b5 = (tmp >> 36) | (tmp << (64 - 36));
3032 	b0 -= b5;
3033 
3034 	tmp = b3 ^ b6;
3035 	b3 = (tmp >> 49) | (tmp << (64 - 49));
3036 	b6 -= b3;
3037 
3038 	tmp = b1 ^ b4;
3039 	b1 = (tmp >> 17) | (tmp << (64 - 17));
3040 	b4 -= b1;
3041 
3042 	tmp = b3 ^ b0;
3043 	b3 = (tmp >> 42) | (tmp << (64 - 42));
3044 	b0 -= b3;
3045 
3046 	tmp = b5 ^ b6;
3047 	b5 = (tmp >> 14) | (tmp << (64 - 14));
3048 	b6 -= b5;
3049 
3050 	tmp = b7 ^ b4;
3051 	b7 = (tmp >> 27) | (tmp << (64 - 27));
3052 	b4 -= b7;
3053 
3054 	tmp = b1 ^ b2;
3055 	b1 = (tmp >> 33) | (tmp << (64 - 33));
3056 	b2 -= b1;
3057 
3058 	tmp = b7 ^ b6;
3059 	b7 = (tmp >> 37) | (tmp << (64 - 37));
3060 	b6 -= b7 + k1 + t2;
3061 	b7 -= k2 + 4;
3062 
3063 	tmp = b5 ^ b4;
3064 	b5 = (tmp >> 19) | (tmp << (64 - 19));
3065 	b4 -= b5 + k8;
3066 	b5 -= k0 + t1;
3067 
3068 	tmp = b3 ^ b2;
3069 	b3 = (tmp >> 36) | (tmp << (64 - 36));
3070 	b2 -= b3 + k6;
3071 	b3 -= k7;
3072 
3073 	tmp = b1 ^ b0;
3074 	b1 = (tmp >> 46) | (tmp << (64 - 46));
3075 	b0 -= b1 + k4;
3076 	b1 -= k5;
3077 
3078 	tmp = b3 ^ b4;
3079 	b3 = (tmp >> 22) | (tmp << (64 - 22));
3080 	b4 -= b3;
3081 
3082 	tmp = b5 ^ b2;
3083 	b5 = (tmp >> 56) | (tmp << (64 - 56));
3084 	b2 -= b5;
3085 
3086 	tmp = b7 ^ b0;
3087 	b7 = (tmp >> 35) | (tmp << (64 - 35));
3088 	b0 -= b7;
3089 
3090 	tmp = b1 ^ b6;
3091 	b1 = (tmp >> 8) | (tmp << (64 - 8));
3092 	b6 -= b1;
3093 
3094 	tmp = b7 ^ b2;
3095 	b7 = (tmp >> 43) | (tmp << (64 - 43));
3096 	b2 -= b7;
3097 
3098 	tmp = b5 ^ b0;
3099 	b5 = (tmp >> 39) | (tmp << (64 - 39));
3100 	b0 -= b5;
3101 
3102 	tmp = b3 ^ b6;
3103 	b3 = (tmp >> 29) | (tmp << (64 - 29));
3104 	b6 -= b3;
3105 
3106 	tmp = b1 ^ b4;
3107 	b1 = (tmp >> 25) | (tmp << (64 - 25));
3108 	b4 -= b1;
3109 
3110 	tmp = b3 ^ b0;
3111 	b3 = (tmp >> 17) | (tmp << (64 - 17));
3112 	b0 -= b3;
3113 
3114 	tmp = b5 ^ b6;
3115 	b5 = (tmp >> 10) | (tmp << (64 - 10));
3116 	b6 -= b5;
3117 
3118 	tmp = b7 ^ b4;
3119 	b7 = (tmp >> 50) | (tmp << (64 - 50));
3120 	b4 -= b7;
3121 
3122 	tmp = b1 ^ b2;
3123 	b1 = (tmp >> 13) | (tmp << (64 - 13));
3124 	b2 -= b1;
3125 
3126 	tmp = b7 ^ b6;
3127 	b7 = (tmp >> 24) | (tmp << (64 - 24));
3128 	b6 -= b7 + k0 + t1;
3129 	b7 -= k1 + 3;
3130 
3131 	tmp = b5 ^ b4;
3132 	b5 = (tmp >> 34) | (tmp << (64 - 34));
3133 	b4 -= b5 + k7;
3134 	b5 -= k8 + t0;
3135 
3136 	tmp = b3 ^ b2;
3137 	b3 = (tmp >> 30) | (tmp << (64 - 30));
3138 	b2 -= b3 + k5;
3139 	b3 -= k6;
3140 
3141 	tmp = b1 ^ b0;
3142 	b1 = (tmp >> 39) | (tmp << (64 - 39));
3143 	b0 -= b1 + k3;
3144 	b1 -= k4;
3145 
3146 	tmp = b3 ^ b4;
3147 	b3 = (tmp >> 56) | (tmp << (64 - 56));
3148 	b4 -= b3;
3149 
3150 	tmp = b5 ^ b2;
3151 	b5 = (tmp >> 54) | (tmp << (64 - 54));
3152 	b2 -= b5;
3153 
3154 	tmp = b7 ^ b0;
3155 	b7 = (tmp >> 9) | (tmp << (64 - 9));
3156 	b0 -= b7;
3157 
3158 	tmp = b1 ^ b6;
3159 	b1 = (tmp >> 44) | (tmp << (64 - 44));
3160 	b6 -= b1;
3161 
3162 	tmp = b7 ^ b2;
3163 	b7 = (tmp >> 39) | (tmp << (64 - 39));
3164 	b2 -= b7;
3165 
3166 	tmp = b5 ^ b0;
3167 	b5 = (tmp >> 36) | (tmp << (64 - 36));
3168 	b0 -= b5;
3169 
3170 	tmp = b3 ^ b6;
3171 	b3 = (tmp >> 49) | (tmp << (64 - 49));
3172 	b6 -= b3;
3173 
3174 	tmp = b1 ^ b4;
3175 	b1 = (tmp >> 17) | (tmp << (64 - 17));
3176 	b4 -= b1;
3177 
3178 	tmp = b3 ^ b0;
3179 	b3 = (tmp >> 42) | (tmp << (64 - 42));
3180 	b0 -= b3;
3181 
3182 	tmp = b5 ^ b6;
3183 	b5 = (tmp >> 14) | (tmp << (64 - 14));
3184 	b6 -= b5;
3185 
3186 	tmp = b7 ^ b4;
3187 	b7 = (tmp >> 27) | (tmp << (64 - 27));
3188 	b4 -= b7;
3189 
3190 	tmp = b1 ^ b2;
3191 	b1 = (tmp >> 33) | (tmp << (64 - 33));
3192 	b2 -= b1;
3193 
3194 	tmp = b7 ^ b6;
3195 	b7 = (tmp >> 37) | (tmp << (64 - 37));
3196 	b6 -= b7 + k8 + t0;
3197 	b7 -= k0 + 2;
3198 
3199 	tmp = b5 ^ b4;
3200 	b5 = (tmp >> 19) | (tmp << (64 - 19));
3201 	b4 -= b5 + k6;
3202 	b5 -= k7 + t2;
3203 
3204 	tmp = b3 ^ b2;
3205 	b3 = (tmp >> 36) | (tmp << (64 - 36));
3206 	b2 -= b3 + k4;
3207 	b3 -= k5;
3208 
3209 	tmp = b1 ^ b0;
3210 	b1 = (tmp >> 46) | (tmp << (64 - 46));
3211 	b0 -= b1 + k2;
3212 	b1 -= k3;
3213 
3214 	tmp = b3 ^ b4;
3215 	b3 = (tmp >> 22) | (tmp << (64 - 22));
3216 	b4 -= b3;
3217 
3218 	tmp = b5 ^ b2;
3219 	b5 = (tmp >> 56) | (tmp << (64 - 56));
3220 	b2 -= b5;
3221 
3222 	tmp = b7 ^ b0;
3223 	b7 = (tmp >> 35) | (tmp << (64 - 35));
3224 	b0 -= b7;
3225 
3226 	tmp = b1 ^ b6;
3227 	b1 = (tmp >> 8) | (tmp << (64 - 8));
3228 	b6 -= b1;
3229 
3230 	tmp = b7 ^ b2;
3231 	b7 = (tmp >> 43) | (tmp << (64 - 43));
3232 	b2 -= b7;
3233 
3234 	tmp = b5 ^ b0;
3235 	b5 = (tmp >> 39) | (tmp << (64 - 39));
3236 	b0 -= b5;
3237 
3238 	tmp = b3 ^ b6;
3239 	b3 = (tmp >> 29) | (tmp << (64 - 29));
3240 	b6 -= b3;
3241 
3242 	tmp = b1 ^ b4;
3243 	b1 = (tmp >> 25) | (tmp << (64 - 25));
3244 	b4 -= b1;
3245 
3246 	tmp = b3 ^ b0;
3247 	b3 = (tmp >> 17) | (tmp << (64 - 17));
3248 	b0 -= b3;
3249 
3250 	tmp = b5 ^ b6;
3251 	b5 = (tmp >> 10) | (tmp << (64 - 10));
3252 	b6 -= b5;
3253 
3254 	tmp = b7 ^ b4;
3255 	b7 = (tmp >> 50) | (tmp << (64 - 50));
3256 	b4 -= b7;
3257 
3258 	tmp = b1 ^ b2;
3259 	b1 = (tmp >> 13) | (tmp << (64 - 13));
3260 	b2 -= b1;
3261 
3262 	tmp = b7 ^ b6;
3263 	b7 = (tmp >> 24) | (tmp << (64 - 24));
3264 	b6 -= b7 + k7 + t2;
3265 	b7 -= k8 + 1;
3266 
3267 	tmp = b5 ^ b4;
3268 	b5 = (tmp >> 34) | (tmp << (64 - 34));
3269 	b4 -= b5 + k5;
3270 	b5 -= k6 + t1;
3271 
3272 	tmp = b3 ^ b2;
3273 	b3 = (tmp >> 30) | (tmp << (64 - 30));
3274 	b2 -= b3 + k3;
3275 	b3 -= k4;
3276 
3277 	tmp = b1 ^ b0;
3278 	b1 = (tmp >> 39) | (tmp << (64 - 39));
3279 	b0 -= b1 + k1;
3280 	b1 -= k2;
3281 
3282 	tmp = b3 ^ b4;
3283 	b3 = (tmp >> 56) | (tmp << (64 - 56));
3284 	b4 -= b3;
3285 
3286 	tmp = b5 ^ b2;
3287 	b5 = (tmp >> 54) | (tmp << (64 - 54));
3288 	b2 -= b5;
3289 
3290 	tmp = b7 ^ b0;
3291 	b7 = (tmp >> 9) | (tmp << (64 - 9));
3292 	b0 -= b7;
3293 
3294 	tmp = b1 ^ b6;
3295 	b1 = (tmp >> 44) | (tmp << (64 - 44));
3296 	b6 -= b1;
3297 
3298 	tmp = b7 ^ b2;
3299 	b7 = (tmp >> 39) | (tmp << (64 - 39));
3300 	b2 -= b7;
3301 
3302 	tmp = b5 ^ b0;
3303 	b5 = (tmp >> 36) | (tmp << (64 - 36));
3304 	b0 -= b5;
3305 
3306 	tmp = b3 ^ b6;
3307 	b3 = (tmp >> 49) | (tmp << (64 - 49));
3308 	b6 -= b3;
3309 
3310 	tmp = b1 ^ b4;
3311 	b1 = (tmp >> 17) | (tmp << (64 - 17));
3312 	b4 -= b1;
3313 
3314 	tmp = b3 ^ b0;
3315 	b3 = (tmp >> 42) | (tmp << (64 - 42));
3316 	b0 -= b3;
3317 
3318 	tmp = b5 ^ b6;
3319 	b5 = (tmp >> 14) | (tmp << (64 - 14));
3320 	b6 -= b5;
3321 
3322 	tmp = b7 ^ b4;
3323 	b7 = (tmp >> 27) | (tmp << (64 - 27));
3324 	b4 -= b7;
3325 
3326 	tmp = b1 ^ b2;
3327 	b1 = (tmp >> 33) | (tmp << (64 - 33));
3328 	b2 -= b1;
3329 
3330 	tmp = b7 ^ b6;
3331 	b7 = (tmp >> 37) | (tmp << (64 - 37));
3332 	b6 -= b7 + k6 + t1;
3333 	b7 -= k7;
3334 
3335 	tmp = b5 ^ b4;
3336 	b5 = (tmp >> 19) | (tmp << (64 - 19));
3337 	b4 -= b5 + k4;
3338 	b5 -= k5 + t0;
3339 
3340 	tmp = b3 ^ b2;
3341 	b3 = (tmp >> 36) | (tmp << (64 - 36));
3342 	b2 -= b3 + k2;
3343 	b3 -= k3;
3344 
3345 	tmp = b1 ^ b0;
3346 	b1 = (tmp >> 46) | (tmp << (64 - 46));
3347 	b0 -= b1 + k0;
3348 	b1 -= k1;
3349 
3350 	output[0] = b0;
3351 	output[1] = b1;
3352 	output[2] = b2;
3353 	output[3] = b3;
3354 
3355 	output[7] = b7;
3356 	output[6] = b6;
3357 	output[5] = b5;
3358 	output[4] = b4;
3359 }
3360 
threefish_encrypt_1024(struct threefish_key * key_ctx,u64 * input,u64 * output)3361 void threefish_encrypt_1024(struct threefish_key *key_ctx, u64 *input,
3362 			    u64 *output)
3363 {
3364 	u64 b0 = input[0], b1 = input[1],
3365 	    b2 = input[2], b3 = input[3],
3366 	    b4 = input[4], b5 = input[5],
3367 	    b6 = input[6], b7 = input[7],
3368 	    b8 = input[8], b9 = input[9],
3369 	    b10 = input[10], b11 = input[11],
3370 	    b12 = input[12], b13 = input[13],
3371 	    b14 = input[14], b15 = input[15];
3372 	u64 k0 = key_ctx->key[0], k1 = key_ctx->key[1],
3373 	    k2 = key_ctx->key[2], k3 = key_ctx->key[3],
3374 	    k4 = key_ctx->key[4], k5 = key_ctx->key[5],
3375 	    k6 = key_ctx->key[6], k7 = key_ctx->key[7],
3376 	    k8 = key_ctx->key[8], k9 = key_ctx->key[9],
3377 	    k10 = key_ctx->key[10], k11 = key_ctx->key[11],
3378 	    k12 = key_ctx->key[12], k13 = key_ctx->key[13],
3379 	    k14 = key_ctx->key[14], k15 = key_ctx->key[15],
3380 	    k16 = key_ctx->key[16];
3381 	u64 t0 = key_ctx->tweak[0], t1 = key_ctx->tweak[1],
3382 	    t2 = key_ctx->tweak[2];
3383 
3384 	b1 += k1;
3385 	b0 += b1 + k0;
3386 	b1 = ((b1 << 24) | (b1 >> (64 - 24))) ^ b0;
3387 
3388 	b3 += k3;
3389 	b2 += b3 + k2;
3390 	b3 = ((b3 << 13) | (b3 >> (64 - 13))) ^ b2;
3391 
3392 	b5 += k5;
3393 	b4 += b5 + k4;
3394 	b5 = ((b5 << 8) | (b5 >> (64 - 8))) ^ b4;
3395 
3396 	b7 += k7;
3397 	b6 += b7 + k6;
3398 	b7 = ((b7 << 47) | (b7 >> (64 - 47))) ^ b6;
3399 
3400 	b9 += k9;
3401 	b8 += b9 + k8;
3402 	b9 = ((b9 << 8) | (b9 >> (64 - 8))) ^ b8;
3403 
3404 	b11 += k11;
3405 	b10 += b11 + k10;
3406 	b11 = ((b11 << 17) | (b11 >> (64 - 17))) ^ b10;
3407 
3408 	b13 += k13 + t0;
3409 	b12 += b13 + k12;
3410 	b13 = ((b13 << 22) | (b13 >> (64 - 22))) ^ b12;
3411 
3412 	b15 += k15;
3413 	b14 += b15 + k14 + t1;
3414 	b15 = ((b15 << 37) | (b15 >> (64 - 37))) ^ b14;
3415 
3416 	b0 += b9;
3417 	b9 = ((b9 << 38) | (b9 >> (64 - 38))) ^ b0;
3418 
3419 	b2 += b13;
3420 	b13 = ((b13 << 19) | (b13 >> (64 - 19))) ^ b2;
3421 
3422 	b6 += b11;
3423 	b11 = ((b11 << 10) | (b11 >> (64 - 10))) ^ b6;
3424 
3425 	b4 += b15;
3426 	b15 = ((b15 << 55) | (b15 >> (64 - 55))) ^ b4;
3427 
3428 	b10 += b7;
3429 	b7 = ((b7 << 49) | (b7 >> (64 - 49))) ^ b10;
3430 
3431 	b12 += b3;
3432 	b3 = ((b3 << 18) | (b3 >> (64 - 18))) ^ b12;
3433 
3434 	b14 += b5;
3435 	b5 = ((b5 << 23) | (b5 >> (64 - 23))) ^ b14;
3436 
3437 	b8 += b1;
3438 	b1 = ((b1 << 52) | (b1 >> (64 - 52))) ^ b8;
3439 
3440 	b0 += b7;
3441 	b7 = ((b7 << 33) | (b7 >> (64 - 33))) ^ b0;
3442 
3443 	b2 += b5;
3444 	b5 = ((b5 << 4) | (b5 >> (64 - 4))) ^ b2;
3445 
3446 	b4 += b3;
3447 	b3 = ((b3 << 51) | (b3 >> (64 - 51))) ^ b4;
3448 
3449 	b6 += b1;
3450 	b1 = ((b1 << 13) | (b1 >> (64 - 13))) ^ b6;
3451 
3452 	b12 += b15;
3453 	b15 = ((b15 << 34) | (b15 >> (64 - 34))) ^ b12;
3454 
3455 	b14 += b13;
3456 	b13 = ((b13 << 41) | (b13 >> (64 - 41))) ^ b14;
3457 
3458 	b8 += b11;
3459 	b11 = ((b11 << 59) | (b11 >> (64 - 59))) ^ b8;
3460 
3461 	b10 += b9;
3462 	b9 = ((b9 << 17) | (b9 >> (64 - 17))) ^ b10;
3463 
3464 	b0 += b15;
3465 	b15 = ((b15 << 5) | (b15 >> (64 - 5))) ^ b0;
3466 
3467 	b2 += b11;
3468 	b11 = ((b11 << 20) | (b11 >> (64 - 20))) ^ b2;
3469 
3470 	b6 += b13;
3471 	b13 = ((b13 << 48) | (b13 >> (64 - 48))) ^ b6;
3472 
3473 	b4 += b9;
3474 	b9 = ((b9 << 41) | (b9 >> (64 - 41))) ^ b4;
3475 
3476 	b14 += b1;
3477 	b1 = ((b1 << 47) | (b1 >> (64 - 47))) ^ b14;
3478 
3479 	b8 += b5;
3480 	b5 = ((b5 << 28) | (b5 >> (64 - 28))) ^ b8;
3481 
3482 	b10 += b3;
3483 	b3 = ((b3 << 16) | (b3 >> (64 - 16))) ^ b10;
3484 
3485 	b12 += b7;
3486 	b7 = ((b7 << 25) | (b7 >> (64 - 25))) ^ b12;
3487 
3488 	b1 += k2;
3489 	b0 += b1 + k1;
3490 	b1 = ((b1 << 41) | (b1 >> (64 - 41))) ^ b0;
3491 
3492 	b3 += k4;
3493 	b2 += b3 + k3;
3494 	b3 = ((b3 << 9) | (b3 >> (64 - 9))) ^ b2;
3495 
3496 	b5 += k6;
3497 	b4 += b5 + k5;
3498 	b5 = ((b5 << 37) | (b5 >> (64 - 37))) ^ b4;
3499 
3500 	b7 += k8;
3501 	b6 += b7 + k7;
3502 	b7 = ((b7 << 31) | (b7 >> (64 - 31))) ^ b6;
3503 
3504 	b9 += k10;
3505 	b8 += b9 + k9;
3506 	b9 = ((b9 << 12) | (b9 >> (64 - 12))) ^ b8;
3507 
3508 	b11 += k12;
3509 	b10 += b11 + k11;
3510 	b11 = ((b11 << 47) | (b11 >> (64 - 47))) ^ b10;
3511 
3512 	b13 += k14 + t1;
3513 	b12 += b13 + k13;
3514 	b13 = ((b13 << 44) | (b13 >> (64 - 44))) ^ b12;
3515 
3516 	b15 += k16 + 1;
3517 	b14 += b15 + k15 + t2;
3518 	b15 = ((b15 << 30) | (b15 >> (64 - 30))) ^ b14;
3519 
3520 	b0 += b9;
3521 	b9 = ((b9 << 16) | (b9 >> (64 - 16))) ^ b0;
3522 
3523 	b2 += b13;
3524 	b13 = ((b13 << 34) | (b13 >> (64 - 34))) ^ b2;
3525 
3526 	b6 += b11;
3527 	b11 = ((b11 << 56) | (b11 >> (64 - 56))) ^ b6;
3528 
3529 	b4 += b15;
3530 	b15 = ((b15 << 51) | (b15 >> (64 - 51))) ^ b4;
3531 
3532 	b10 += b7;
3533 	b7 = ((b7 << 4) | (b7 >> (64 - 4))) ^ b10;
3534 
3535 	b12 += b3;
3536 	b3 = ((b3 << 53) | (b3 >> (64 - 53))) ^ b12;
3537 
3538 	b14 += b5;
3539 	b5 = ((b5 << 42) | (b5 >> (64 - 42))) ^ b14;
3540 
3541 	b8 += b1;
3542 	b1 = ((b1 << 41) | (b1 >> (64 - 41))) ^ b8;
3543 
3544 	b0 += b7;
3545 	b7 = ((b7 << 31) | (b7 >> (64 - 31))) ^ b0;
3546 
3547 	b2 += b5;
3548 	b5 = ((b5 << 44) | (b5 >> (64 - 44))) ^ b2;
3549 
3550 	b4 += b3;
3551 	b3 = ((b3 << 47) | (b3 >> (64 - 47))) ^ b4;
3552 
3553 	b6 += b1;
3554 	b1 = ((b1 << 46) | (b1 >> (64 - 46))) ^ b6;
3555 
3556 	b12 += b15;
3557 	b15 = ((b15 << 19) | (b15 >> (64 - 19))) ^ b12;
3558 
3559 	b14 += b13;
3560 	b13 = ((b13 << 42) | (b13 >> (64 - 42))) ^ b14;
3561 
3562 	b8 += b11;
3563 	b11 = ((b11 << 44) | (b11 >> (64 - 44))) ^ b8;
3564 
3565 	b10 += b9;
3566 	b9 = ((b9 << 25) | (b9 >> (64 - 25))) ^ b10;
3567 
3568 	b0 += b15;
3569 	b15 = ((b15 << 9) | (b15 >> (64 - 9))) ^ b0;
3570 
3571 	b2 += b11;
3572 	b11 = ((b11 << 48) | (b11 >> (64 - 48))) ^ b2;
3573 
3574 	b6 += b13;
3575 	b13 = ((b13 << 35) | (b13 >> (64 - 35))) ^ b6;
3576 
3577 	b4 += b9;
3578 	b9 = ((b9 << 52) | (b9 >> (64 - 52))) ^ b4;
3579 
3580 	b14 += b1;
3581 	b1 = ((b1 << 23) | (b1 >> (64 - 23))) ^ b14;
3582 
3583 	b8 += b5;
3584 	b5 = ((b5 << 31) | (b5 >> (64 - 31))) ^ b8;
3585 
3586 	b10 += b3;
3587 	b3 = ((b3 << 37) | (b3 >> (64 - 37))) ^ b10;
3588 
3589 	b12 += b7;
3590 	b7 = ((b7 << 20) | (b7 >> (64 - 20))) ^ b12;
3591 
3592 	b1 += k3;
3593 	b0 += b1 + k2;
3594 	b1 = ((b1 << 24) | (b1 >> (64 - 24))) ^ b0;
3595 
3596 	b3 += k5;
3597 	b2 += b3 + k4;
3598 	b3 = ((b3 << 13) | (b3 >> (64 - 13))) ^ b2;
3599 
3600 	b5 += k7;
3601 	b4 += b5 + k6;
3602 	b5 = ((b5 << 8) | (b5 >> (64 - 8))) ^ b4;
3603 
3604 	b7 += k9;
3605 	b6 += b7 + k8;
3606 	b7 = ((b7 << 47) | (b7 >> (64 - 47))) ^ b6;
3607 
3608 	b9 += k11;
3609 	b8 += b9 + k10;
3610 	b9 = ((b9 << 8) | (b9 >> (64 - 8))) ^ b8;
3611 
3612 	b11 += k13;
3613 	b10 += b11 + k12;
3614 	b11 = ((b11 << 17) | (b11 >> (64 - 17))) ^ b10;
3615 
3616 	b13 += k15 + t2;
3617 	b12 += b13 + k14;
3618 	b13 = ((b13 << 22) | (b13 >> (64 - 22))) ^ b12;
3619 
3620 	b15 += k0 + 2;
3621 	b14 += b15 + k16 + t0;
3622 	b15 = ((b15 << 37) | (b15 >> (64 - 37))) ^ b14;
3623 
3624 	b0 += b9;
3625 	b9 = ((b9 << 38) | (b9 >> (64 - 38))) ^ b0;
3626 
3627 	b2 += b13;
3628 	b13 = ((b13 << 19) | (b13 >> (64 - 19))) ^ b2;
3629 
3630 	b6 += b11;
3631 	b11 = ((b11 << 10) | (b11 >> (64 - 10))) ^ b6;
3632 
3633 	b4 += b15;
3634 	b15 = ((b15 << 55) | (b15 >> (64 - 55))) ^ b4;
3635 
3636 	b10 += b7;
3637 	b7 = ((b7 << 49) | (b7 >> (64 - 49))) ^ b10;
3638 
3639 	b12 += b3;
3640 	b3 = ((b3 << 18) | (b3 >> (64 - 18))) ^ b12;
3641 
3642 	b14 += b5;
3643 	b5 = ((b5 << 23) | (b5 >> (64 - 23))) ^ b14;
3644 
3645 	b8 += b1;
3646 	b1 = ((b1 << 52) | (b1 >> (64 - 52))) ^ b8;
3647 
3648 	b0 += b7;
3649 	b7 = ((b7 << 33) | (b7 >> (64 - 33))) ^ b0;
3650 
3651 	b2 += b5;
3652 	b5 = ((b5 << 4) | (b5 >> (64 - 4))) ^ b2;
3653 
3654 	b4 += b3;
3655 	b3 = ((b3 << 51) | (b3 >> (64 - 51))) ^ b4;
3656 
3657 	b6 += b1;
3658 	b1 = ((b1 << 13) | (b1 >> (64 - 13))) ^ b6;
3659 
3660 	b12 += b15;
3661 	b15 = ((b15 << 34) | (b15 >> (64 - 34))) ^ b12;
3662 
3663 	b14 += b13;
3664 	b13 = ((b13 << 41) | (b13 >> (64 - 41))) ^ b14;
3665 
3666 	b8 += b11;
3667 	b11 = ((b11 << 59) | (b11 >> (64 - 59))) ^ b8;
3668 
3669 	b10 += b9;
3670 	b9 = ((b9 << 17) | (b9 >> (64 - 17))) ^ b10;
3671 
3672 	b0 += b15;
3673 	b15 = ((b15 << 5) | (b15 >> (64 - 5))) ^ b0;
3674 
3675 	b2 += b11;
3676 	b11 = ((b11 << 20) | (b11 >> (64 - 20))) ^ b2;
3677 
3678 	b6 += b13;
3679 	b13 = ((b13 << 48) | (b13 >> (64 - 48))) ^ b6;
3680 
3681 	b4 += b9;
3682 	b9 = ((b9 << 41) | (b9 >> (64 - 41))) ^ b4;
3683 
3684 	b14 += b1;
3685 	b1 = ((b1 << 47) | (b1 >> (64 - 47))) ^ b14;
3686 
3687 	b8 += b5;
3688 	b5 = ((b5 << 28) | (b5 >> (64 - 28))) ^ b8;
3689 
3690 	b10 += b3;
3691 	b3 = ((b3 << 16) | (b3 >> (64 - 16))) ^ b10;
3692 
3693 	b12 += b7;
3694 	b7 = ((b7 << 25) | (b7 >> (64 - 25))) ^ b12;
3695 
3696 	b1 += k4;
3697 	b0 += b1 + k3;
3698 	b1 = ((b1 << 41) | (b1 >> (64 - 41))) ^ b0;
3699 
3700 	b3 += k6;
3701 	b2 += b3 + k5;
3702 	b3 = ((b3 << 9) | (b3 >> (64 - 9))) ^ b2;
3703 
3704 	b5 += k8;
3705 	b4 += b5 + k7;
3706 	b5 = ((b5 << 37) | (b5 >> (64 - 37))) ^ b4;
3707 
3708 	b7 += k10;
3709 	b6 += b7 + k9;
3710 	b7 = ((b7 << 31) | (b7 >> (64 - 31))) ^ b6;
3711 
3712 	b9 += k12;
3713 	b8 += b9 + k11;
3714 	b9 = ((b9 << 12) | (b9 >> (64 - 12))) ^ b8;
3715 
3716 	b11 += k14;
3717 	b10 += b11 + k13;
3718 	b11 = ((b11 << 47) | (b11 >> (64 - 47))) ^ b10;
3719 
3720 	b13 += k16 + t0;
3721 	b12 += b13 + k15;
3722 	b13 = ((b13 << 44) | (b13 >> (64 - 44))) ^ b12;
3723 
3724 	b15 += k1 + 3;
3725 	b14 += b15 + k0 + t1;
3726 	b15 = ((b15 << 30) | (b15 >> (64 - 30))) ^ b14;
3727 
3728 	b0 += b9;
3729 	b9 = ((b9 << 16) | (b9 >> (64 - 16))) ^ b0;
3730 
3731 	b2 += b13;
3732 	b13 = ((b13 << 34) | (b13 >> (64 - 34))) ^ b2;
3733 
3734 	b6 += b11;
3735 	b11 = ((b11 << 56) | (b11 >> (64 - 56))) ^ b6;
3736 
3737 	b4 += b15;
3738 	b15 = ((b15 << 51) | (b15 >> (64 - 51))) ^ b4;
3739 
3740 	b10 += b7;
3741 	b7 = ((b7 << 4) | (b7 >> (64 - 4))) ^ b10;
3742 
3743 	b12 += b3;
3744 	b3 = ((b3 << 53) | (b3 >> (64 - 53))) ^ b12;
3745 
3746 	b14 += b5;
3747 	b5 = ((b5 << 42) | (b5 >> (64 - 42))) ^ b14;
3748 
3749 	b8 += b1;
3750 	b1 = ((b1 << 41) | (b1 >> (64 - 41))) ^ b8;
3751 
3752 	b0 += b7;
3753 	b7 = ((b7 << 31) | (b7 >> (64 - 31))) ^ b0;
3754 
3755 	b2 += b5;
3756 	b5 = ((b5 << 44) | (b5 >> (64 - 44))) ^ b2;
3757 
3758 	b4 += b3;
3759 	b3 = ((b3 << 47) | (b3 >> (64 - 47))) ^ b4;
3760 
3761 	b6 += b1;
3762 	b1 = ((b1 << 46) | (b1 >> (64 - 46))) ^ b6;
3763 
3764 	b12 += b15;
3765 	b15 = ((b15 << 19) | (b15 >> (64 - 19))) ^ b12;
3766 
3767 	b14 += b13;
3768 	b13 = ((b13 << 42) | (b13 >> (64 - 42))) ^ b14;
3769 
3770 	b8 += b11;
3771 	b11 = ((b11 << 44) | (b11 >> (64 - 44))) ^ b8;
3772 
3773 	b10 += b9;
3774 	b9 = ((b9 << 25) | (b9 >> (64 - 25))) ^ b10;
3775 
3776 	b0 += b15;
3777 	b15 = ((b15 << 9) | (b15 >> (64 - 9))) ^ b0;
3778 
3779 	b2 += b11;
3780 	b11 = ((b11 << 48) | (b11 >> (64 - 48))) ^ b2;
3781 
3782 	b6 += b13;
3783 	b13 = ((b13 << 35) | (b13 >> (64 - 35))) ^ b6;
3784 
3785 	b4 += b9;
3786 	b9 = ((b9 << 52) | (b9 >> (64 - 52))) ^ b4;
3787 
3788 	b14 += b1;
3789 	b1 = ((b1 << 23) | (b1 >> (64 - 23))) ^ b14;
3790 
3791 	b8 += b5;
3792 	b5 = ((b5 << 31) | (b5 >> (64 - 31))) ^ b8;
3793 
3794 	b10 += b3;
3795 	b3 = ((b3 << 37) | (b3 >> (64 - 37))) ^ b10;
3796 
3797 	b12 += b7;
3798 	b7 = ((b7 << 20) | (b7 >> (64 - 20))) ^ b12;
3799 
3800 	b1 += k5;
3801 	b0 += b1 + k4;
3802 	b1 = ((b1 << 24) | (b1 >> (64 - 24))) ^ b0;
3803 
3804 	b3 += k7;
3805 	b2 += b3 + k6;
3806 	b3 = ((b3 << 13) | (b3 >> (64 - 13))) ^ b2;
3807 
3808 	b5 += k9;
3809 	b4 += b5 + k8;
3810 	b5 = ((b5 << 8) | (b5 >> (64 - 8))) ^ b4;
3811 
3812 	b7 += k11;
3813 	b6 += b7 + k10;
3814 	b7 = ((b7 << 47) | (b7 >> (64 - 47))) ^ b6;
3815 
3816 	b9 += k13;
3817 	b8 += b9 + k12;
3818 	b9 = ((b9 << 8) | (b9 >> (64 - 8))) ^ b8;
3819 
3820 	b11 += k15;
3821 	b10 += b11 + k14;
3822 	b11 = ((b11 << 17) | (b11 >> (64 - 17))) ^ b10;
3823 
3824 	b13 += k0 + t1;
3825 	b12 += b13 + k16;
3826 	b13 = ((b13 << 22) | (b13 >> (64 - 22))) ^ b12;
3827 
3828 	b15 += k2 + 4;
3829 	b14 += b15 + k1 + t2;
3830 	b15 = ((b15 << 37) | (b15 >> (64 - 37))) ^ b14;
3831 
3832 	b0 += b9;
3833 	b9 = ((b9 << 38) | (b9 >> (64 - 38))) ^ b0;
3834 
3835 	b2 += b13;
3836 	b13 = ((b13 << 19) | (b13 >> (64 - 19))) ^ b2;
3837 
3838 	b6 += b11;
3839 	b11 = ((b11 << 10) | (b11 >> (64 - 10))) ^ b6;
3840 
3841 	b4 += b15;
3842 	b15 = ((b15 << 55) | (b15 >> (64 - 55))) ^ b4;
3843 
3844 	b10 += b7;
3845 	b7 = ((b7 << 49) | (b7 >> (64 - 49))) ^ b10;
3846 
3847 	b12 += b3;
3848 	b3 = ((b3 << 18) | (b3 >> (64 - 18))) ^ b12;
3849 
3850 	b14 += b5;
3851 	b5 = ((b5 << 23) | (b5 >> (64 - 23))) ^ b14;
3852 
3853 	b8 += b1;
3854 	b1 = ((b1 << 52) | (b1 >> (64 - 52))) ^ b8;
3855 
3856 	b0 += b7;
3857 	b7 = ((b7 << 33) | (b7 >> (64 - 33))) ^ b0;
3858 
3859 	b2 += b5;
3860 	b5 = ((b5 << 4) | (b5 >> (64 - 4))) ^ b2;
3861 
3862 	b4 += b3;
3863 	b3 = ((b3 << 51) | (b3 >> (64 - 51))) ^ b4;
3864 
3865 	b6 += b1;
3866 	b1 = ((b1 << 13) | (b1 >> (64 - 13))) ^ b6;
3867 
3868 	b12 += b15;
3869 	b15 = ((b15 << 34) | (b15 >> (64 - 34))) ^ b12;
3870 
3871 	b14 += b13;
3872 	b13 = ((b13 << 41) | (b13 >> (64 - 41))) ^ b14;
3873 
3874 	b8 += b11;
3875 	b11 = ((b11 << 59) | (b11 >> (64 - 59))) ^ b8;
3876 
3877 	b10 += b9;
3878 	b9 = ((b9 << 17) | (b9 >> (64 - 17))) ^ b10;
3879 
3880 	b0 += b15;
3881 	b15 = ((b15 << 5) | (b15 >> (64 - 5))) ^ b0;
3882 
3883 	b2 += b11;
3884 	b11 = ((b11 << 20) | (b11 >> (64 - 20))) ^ b2;
3885 
3886 	b6 += b13;
3887 	b13 = ((b13 << 48) | (b13 >> (64 - 48))) ^ b6;
3888 
3889 	b4 += b9;
3890 	b9 = ((b9 << 41) | (b9 >> (64 - 41))) ^ b4;
3891 
3892 	b14 += b1;
3893 	b1 = ((b1 << 47) | (b1 >> (64 - 47))) ^ b14;
3894 
3895 	b8 += b5;
3896 	b5 = ((b5 << 28) | (b5 >> (64 - 28))) ^ b8;
3897 
3898 	b10 += b3;
3899 	b3 = ((b3 << 16) | (b3 >> (64 - 16))) ^ b10;
3900 
3901 	b12 += b7;
3902 	b7 = ((b7 << 25) | (b7 >> (64 - 25))) ^ b12;
3903 
3904 	b1 += k6;
3905 	b0 += b1 + k5;
3906 	b1 = ((b1 << 41) | (b1 >> (64 - 41))) ^ b0;
3907 
3908 	b3 += k8;
3909 	b2 += b3 + k7;
3910 	b3 = ((b3 << 9) | (b3 >> (64 - 9))) ^ b2;
3911 
3912 	b5 += k10;
3913 	b4 += b5 + k9;
3914 	b5 = ((b5 << 37) | (b5 >> (64 - 37))) ^ b4;
3915 
3916 	b7 += k12;
3917 	b6 += b7 + k11;
3918 	b7 = ((b7 << 31) | (b7 >> (64 - 31))) ^ b6;
3919 
3920 	b9 += k14;
3921 	b8 += b9 + k13;
3922 	b9 = ((b9 << 12) | (b9 >> (64 - 12))) ^ b8;
3923 
3924 	b11 += k16;
3925 	b10 += b11 + k15;
3926 	b11 = ((b11 << 47) | (b11 >> (64 - 47))) ^ b10;
3927 
3928 	b13 += k1 + t2;
3929 	b12 += b13 + k0;
3930 	b13 = ((b13 << 44) | (b13 >> (64 - 44))) ^ b12;
3931 
3932 	b15 += k3 + 5;
3933 	b14 += b15 + k2 + t0;
3934 	b15 = ((b15 << 30) | (b15 >> (64 - 30))) ^ b14;
3935 
3936 	b0 += b9;
3937 	b9 = ((b9 << 16) | (b9 >> (64 - 16))) ^ b0;
3938 
3939 	b2 += b13;
3940 	b13 = ((b13 << 34) | (b13 >> (64 - 34))) ^ b2;
3941 
3942 	b6 += b11;
3943 	b11 = ((b11 << 56) | (b11 >> (64 - 56))) ^ b6;
3944 
3945 	b4 += b15;
3946 	b15 = ((b15 << 51) | (b15 >> (64 - 51))) ^ b4;
3947 
3948 	b10 += b7;
3949 	b7 = ((b7 << 4) | (b7 >> (64 - 4))) ^ b10;
3950 
3951 	b12 += b3;
3952 	b3 = ((b3 << 53) | (b3 >> (64 - 53))) ^ b12;
3953 
3954 	b14 += b5;
3955 	b5 = ((b5 << 42) | (b5 >> (64 - 42))) ^ b14;
3956 
3957 	b8 += b1;
3958 	b1 = ((b1 << 41) | (b1 >> (64 - 41))) ^ b8;
3959 
3960 	b0 += b7;
3961 	b7 = ((b7 << 31) | (b7 >> (64 - 31))) ^ b0;
3962 
3963 	b2 += b5;
3964 	b5 = ((b5 << 44) | (b5 >> (64 - 44))) ^ b2;
3965 
3966 	b4 += b3;
3967 	b3 = ((b3 << 47) | (b3 >> (64 - 47))) ^ b4;
3968 
3969 	b6 += b1;
3970 	b1 = ((b1 << 46) | (b1 >> (64 - 46))) ^ b6;
3971 
3972 	b12 += b15;
3973 	b15 = ((b15 << 19) | (b15 >> (64 - 19))) ^ b12;
3974 
3975 	b14 += b13;
3976 	b13 = ((b13 << 42) | (b13 >> (64 - 42))) ^ b14;
3977 
3978 	b8 += b11;
3979 	b11 = ((b11 << 44) | (b11 >> (64 - 44))) ^ b8;
3980 
3981 	b10 += b9;
3982 	b9 = ((b9 << 25) | (b9 >> (64 - 25))) ^ b10;
3983 
3984 	b0 += b15;
3985 	b15 = ((b15 << 9) | (b15 >> (64 - 9))) ^ b0;
3986 
3987 	b2 += b11;
3988 	b11 = ((b11 << 48) | (b11 >> (64 - 48))) ^ b2;
3989 
3990 	b6 += b13;
3991 	b13 = ((b13 << 35) | (b13 >> (64 - 35))) ^ b6;
3992 
3993 	b4 += b9;
3994 	b9 = ((b9 << 52) | (b9 >> (64 - 52))) ^ b4;
3995 
3996 	b14 += b1;
3997 	b1 = ((b1 << 23) | (b1 >> (64 - 23))) ^ b14;
3998 
3999 	b8 += b5;
4000 	b5 = ((b5 << 31) | (b5 >> (64 - 31))) ^ b8;
4001 
4002 	b10 += b3;
4003 	b3 = ((b3 << 37) | (b3 >> (64 - 37))) ^ b10;
4004 
4005 	b12 += b7;
4006 	b7 = ((b7 << 20) | (b7 >> (64 - 20))) ^ b12;
4007 
4008 	b1 += k7;
4009 	b0 += b1 + k6;
4010 	b1 = ((b1 << 24) | (b1 >> (64 - 24))) ^ b0;
4011 
4012 	b3 += k9;
4013 	b2 += b3 + k8;
4014 	b3 = ((b3 << 13) | (b3 >> (64 - 13))) ^ b2;
4015 
4016 	b5 += k11;
4017 	b4 += b5 + k10;
4018 	b5 = ((b5 << 8) | (b5 >> (64 - 8))) ^ b4;
4019 
4020 	b7 += k13;
4021 	b6 += b7 + k12;
4022 	b7 = ((b7 << 47) | (b7 >> (64 - 47))) ^ b6;
4023 
4024 	b9 += k15;
4025 	b8 += b9 + k14;
4026 	b9 = ((b9 << 8) | (b9 >> (64 - 8))) ^ b8;
4027 
4028 	b11 += k0;
4029 	b10 += b11 + k16;
4030 	b11 = ((b11 << 17) | (b11 >> (64 - 17))) ^ b10;
4031 
4032 	b13 += k2 + t0;
4033 	b12 += b13 + k1;
4034 	b13 = ((b13 << 22) | (b13 >> (64 - 22))) ^ b12;
4035 
4036 	b15 += k4 + 6;
4037 	b14 += b15 + k3 + t1;
4038 	b15 = ((b15 << 37) | (b15 >> (64 - 37))) ^ b14;
4039 
4040 	b0 += b9;
4041 	b9 = ((b9 << 38) | (b9 >> (64 - 38))) ^ b0;
4042 
4043 	b2 += b13;
4044 	b13 = ((b13 << 19) | (b13 >> (64 - 19))) ^ b2;
4045 
4046 	b6 += b11;
4047 	b11 = ((b11 << 10) | (b11 >> (64 - 10))) ^ b6;
4048 
4049 	b4 += b15;
4050 	b15 = ((b15 << 55) | (b15 >> (64 - 55))) ^ b4;
4051 
4052 	b10 += b7;
4053 	b7 = ((b7 << 49) | (b7 >> (64 - 49))) ^ b10;
4054 
4055 	b12 += b3;
4056 	b3 = ((b3 << 18) | (b3 >> (64 - 18))) ^ b12;
4057 
4058 	b14 += b5;
4059 	b5 = ((b5 << 23) | (b5 >> (64 - 23))) ^ b14;
4060 
4061 	b8 += b1;
4062 	b1 = ((b1 << 52) | (b1 >> (64 - 52))) ^ b8;
4063 
4064 	b0 += b7;
4065 	b7 = ((b7 << 33) | (b7 >> (64 - 33))) ^ b0;
4066 
4067 	b2 += b5;
4068 	b5 = ((b5 << 4) | (b5 >> (64 - 4))) ^ b2;
4069 
4070 	b4 += b3;
4071 	b3 = ((b3 << 51) | (b3 >> (64 - 51))) ^ b4;
4072 
4073 	b6 += b1;
4074 	b1 = ((b1 << 13) | (b1 >> (64 - 13))) ^ b6;
4075 
4076 	b12 += b15;
4077 	b15 = ((b15 << 34) | (b15 >> (64 - 34))) ^ b12;
4078 
4079 	b14 += b13;
4080 	b13 = ((b13 << 41) | (b13 >> (64 - 41))) ^ b14;
4081 
4082 	b8 += b11;
4083 	b11 = ((b11 << 59) | (b11 >> (64 - 59))) ^ b8;
4084 
4085 	b10 += b9;
4086 	b9 = ((b9 << 17) | (b9 >> (64 - 17))) ^ b10;
4087 
4088 	b0 += b15;
4089 	b15 = ((b15 << 5) | (b15 >> (64 - 5))) ^ b0;
4090 
4091 	b2 += b11;
4092 	b11 = ((b11 << 20) | (b11 >> (64 - 20))) ^ b2;
4093 
4094 	b6 += b13;
4095 	b13 = ((b13 << 48) | (b13 >> (64 - 48))) ^ b6;
4096 
4097 	b4 += b9;
4098 	b9 = ((b9 << 41) | (b9 >> (64 - 41))) ^ b4;
4099 
4100 	b14 += b1;
4101 	b1 = ((b1 << 47) | (b1 >> (64 - 47))) ^ b14;
4102 
4103 	b8 += b5;
4104 	b5 = ((b5 << 28) | (b5 >> (64 - 28))) ^ b8;
4105 
4106 	b10 += b3;
4107 	b3 = ((b3 << 16) | (b3 >> (64 - 16))) ^ b10;
4108 
4109 	b12 += b7;
4110 	b7 = ((b7 << 25) | (b7 >> (64 - 25))) ^ b12;
4111 
4112 	b1 += k8;
4113 	b0 += b1 + k7;
4114 	b1 = ((b1 << 41) | (b1 >> (64 - 41))) ^ b0;
4115 
4116 	b3 += k10;
4117 	b2 += b3 + k9;
4118 	b3 = ((b3 << 9) | (b3 >> (64 - 9))) ^ b2;
4119 
4120 	b5 += k12;
4121 	b4 += b5 + k11;
4122 	b5 = ((b5 << 37) | (b5 >> (64 - 37))) ^ b4;
4123 
4124 	b7 += k14;
4125 	b6 += b7 + k13;
4126 	b7 = ((b7 << 31) | (b7 >> (64 - 31))) ^ b6;
4127 
4128 	b9 += k16;
4129 	b8 += b9 + k15;
4130 	b9 = ((b9 << 12) | (b9 >> (64 - 12))) ^ b8;
4131 
4132 	b11 += k1;
4133 	b10 += b11 + k0;
4134 	b11 = ((b11 << 47) | (b11 >> (64 - 47))) ^ b10;
4135 
4136 	b13 += k3 + t1;
4137 	b12 += b13 + k2;
4138 	b13 = ((b13 << 44) | (b13 >> (64 - 44))) ^ b12;
4139 
4140 	b15 += k5 + 7;
4141 	b14 += b15 + k4 + t2;
4142 	b15 = ((b15 << 30) | (b15 >> (64 - 30))) ^ b14;
4143 
4144 	b0 += b9;
4145 	b9 = ((b9 << 16) | (b9 >> (64 - 16))) ^ b0;
4146 
4147 	b2 += b13;
4148 	b13 = ((b13 << 34) | (b13 >> (64 - 34))) ^ b2;
4149 
4150 	b6 += b11;
4151 	b11 = ((b11 << 56) | (b11 >> (64 - 56))) ^ b6;
4152 
4153 	b4 += b15;
4154 	b15 = ((b15 << 51) | (b15 >> (64 - 51))) ^ b4;
4155 
4156 	b10 += b7;
4157 	b7 = ((b7 << 4) | (b7 >> (64 - 4))) ^ b10;
4158 
4159 	b12 += b3;
4160 	b3 = ((b3 << 53) | (b3 >> (64 - 53))) ^ b12;
4161 
4162 	b14 += b5;
4163 	b5 = ((b5 << 42) | (b5 >> (64 - 42))) ^ b14;
4164 
4165 	b8 += b1;
4166 	b1 = ((b1 << 41) | (b1 >> (64 - 41))) ^ b8;
4167 
4168 	b0 += b7;
4169 	b7 = ((b7 << 31) | (b7 >> (64 - 31))) ^ b0;
4170 
4171 	b2 += b5;
4172 	b5 = ((b5 << 44) | (b5 >> (64 - 44))) ^ b2;
4173 
4174 	b4 += b3;
4175 	b3 = ((b3 << 47) | (b3 >> (64 - 47))) ^ b4;
4176 
4177 	b6 += b1;
4178 	b1 = ((b1 << 46) | (b1 >> (64 - 46))) ^ b6;
4179 
4180 	b12 += b15;
4181 	b15 = ((b15 << 19) | (b15 >> (64 - 19))) ^ b12;
4182 
4183 	b14 += b13;
4184 	b13 = ((b13 << 42) | (b13 >> (64 - 42))) ^ b14;
4185 
4186 	b8 += b11;
4187 	b11 = ((b11 << 44) | (b11 >> (64 - 44))) ^ b8;
4188 
4189 	b10 += b9;
4190 	b9 = ((b9 << 25) | (b9 >> (64 - 25))) ^ b10;
4191 
4192 	b0 += b15;
4193 	b15 = ((b15 << 9) | (b15 >> (64 - 9))) ^ b0;
4194 
4195 	b2 += b11;
4196 	b11 = ((b11 << 48) | (b11 >> (64 - 48))) ^ b2;
4197 
4198 	b6 += b13;
4199 	b13 = ((b13 << 35) | (b13 >> (64 - 35))) ^ b6;
4200 
4201 	b4 += b9;
4202 	b9 = ((b9 << 52) | (b9 >> (64 - 52))) ^ b4;
4203 
4204 	b14 += b1;
4205 	b1 = ((b1 << 23) | (b1 >> (64 - 23))) ^ b14;
4206 
4207 	b8 += b5;
4208 	b5 = ((b5 << 31) | (b5 >> (64 - 31))) ^ b8;
4209 
4210 	b10 += b3;
4211 	b3 = ((b3 << 37) | (b3 >> (64 - 37))) ^ b10;
4212 
4213 	b12 += b7;
4214 	b7 = ((b7 << 20) | (b7 >> (64 - 20))) ^ b12;
4215 
4216 	b1 += k9;
4217 	b0 += b1 + k8;
4218 	b1 = ((b1 << 24) | (b1 >> (64 - 24))) ^ b0;
4219 
4220 	b3 += k11;
4221 	b2 += b3 + k10;
4222 	b3 = ((b3 << 13) | (b3 >> (64 - 13))) ^ b2;
4223 
4224 	b5 += k13;
4225 	b4 += b5 + k12;
4226 	b5 = ((b5 << 8) | (b5 >> (64 - 8))) ^ b4;
4227 
4228 	b7 += k15;
4229 	b6 += b7 + k14;
4230 	b7 = ((b7 << 47) | (b7 >> (64 - 47))) ^ b6;
4231 
4232 	b9 += k0;
4233 	b8 += b9 + k16;
4234 	b9 = ((b9 << 8) | (b9 >> (64 - 8))) ^ b8;
4235 
4236 	b11 += k2;
4237 	b10 += b11 + k1;
4238 	b11 = ((b11 << 17) | (b11 >> (64 - 17))) ^ b10;
4239 
4240 	b13 += k4 + t2;
4241 	b12 += b13 + k3;
4242 	b13 = ((b13 << 22) | (b13 >> (64 - 22))) ^ b12;
4243 
4244 	b15 += k6 + 8;
4245 	b14 += b15 + k5 + t0;
4246 	b15 = ((b15 << 37) | (b15 >> (64 - 37))) ^ b14;
4247 
4248 	b0 += b9;
4249 	b9 = ((b9 << 38) | (b9 >> (64 - 38))) ^ b0;
4250 
4251 	b2 += b13;
4252 	b13 = ((b13 << 19) | (b13 >> (64 - 19))) ^ b2;
4253 
4254 	b6 += b11;
4255 	b11 = ((b11 << 10) | (b11 >> (64 - 10))) ^ b6;
4256 
4257 	b4 += b15;
4258 	b15 = ((b15 << 55) | (b15 >> (64 - 55))) ^ b4;
4259 
4260 	b10 += b7;
4261 	b7 = ((b7 << 49) | (b7 >> (64 - 49))) ^ b10;
4262 
4263 	b12 += b3;
4264 	b3 = ((b3 << 18) | (b3 >> (64 - 18))) ^ b12;
4265 
4266 	b14 += b5;
4267 	b5 = ((b5 << 23) | (b5 >> (64 - 23))) ^ b14;
4268 
4269 	b8 += b1;
4270 	b1 = ((b1 << 52) | (b1 >> (64 - 52))) ^ b8;
4271 
4272 	b0 += b7;
4273 	b7 = ((b7 << 33) | (b7 >> (64 - 33))) ^ b0;
4274 
4275 	b2 += b5;
4276 	b5 = ((b5 << 4) | (b5 >> (64 - 4))) ^ b2;
4277 
4278 	b4 += b3;
4279 	b3 = ((b3 << 51) | (b3 >> (64 - 51))) ^ b4;
4280 
4281 	b6 += b1;
4282 	b1 = ((b1 << 13) | (b1 >> (64 - 13))) ^ b6;
4283 
4284 	b12 += b15;
4285 	b15 = ((b15 << 34) | (b15 >> (64 - 34))) ^ b12;
4286 
4287 	b14 += b13;
4288 	b13 = ((b13 << 41) | (b13 >> (64 - 41))) ^ b14;
4289 
4290 	b8 += b11;
4291 	b11 = ((b11 << 59) | (b11 >> (64 - 59))) ^ b8;
4292 
4293 	b10 += b9;
4294 	b9 = ((b9 << 17) | (b9 >> (64 - 17))) ^ b10;
4295 
4296 	b0 += b15;
4297 	b15 = ((b15 << 5) | (b15 >> (64 - 5))) ^ b0;
4298 
4299 	b2 += b11;
4300 	b11 = ((b11 << 20) | (b11 >> (64 - 20))) ^ b2;
4301 
4302 	b6 += b13;
4303 	b13 = ((b13 << 48) | (b13 >> (64 - 48))) ^ b6;
4304 
4305 	b4 += b9;
4306 	b9 = ((b9 << 41) | (b9 >> (64 - 41))) ^ b4;
4307 
4308 	b14 += b1;
4309 	b1 = ((b1 << 47) | (b1 >> (64 - 47))) ^ b14;
4310 
4311 	b8 += b5;
4312 	b5 = ((b5 << 28) | (b5 >> (64 - 28))) ^ b8;
4313 
4314 	b10 += b3;
4315 	b3 = ((b3 << 16) | (b3 >> (64 - 16))) ^ b10;
4316 
4317 	b12 += b7;
4318 	b7 = ((b7 << 25) | (b7 >> (64 - 25))) ^ b12;
4319 
4320 	b1 += k10;
4321 	b0 += b1 + k9;
4322 	b1 = ((b1 << 41) | (b1 >> (64 - 41))) ^ b0;
4323 
4324 	b3 += k12;
4325 	b2 += b3 + k11;
4326 	b3 = ((b3 << 9) | (b3 >> (64 - 9))) ^ b2;
4327 
4328 	b5 += k14;
4329 	b4 += b5 + k13;
4330 	b5 = ((b5 << 37) | (b5 >> (64 - 37))) ^ b4;
4331 
4332 	b7 += k16;
4333 	b6 += b7 + k15;
4334 	b7 = ((b7 << 31) | (b7 >> (64 - 31))) ^ b6;
4335 
4336 	b9 += k1;
4337 	b8 += b9 + k0;
4338 	b9 = ((b9 << 12) | (b9 >> (64 - 12))) ^ b8;
4339 
4340 	b11 += k3;
4341 	b10 += b11 + k2;
4342 	b11 = ((b11 << 47) | (b11 >> (64 - 47))) ^ b10;
4343 
4344 	b13 += k5 + t0;
4345 	b12 += b13 + k4;
4346 	b13 = ((b13 << 44) | (b13 >> (64 - 44))) ^ b12;
4347 
4348 	b15 += k7 + 9;
4349 	b14 += b15 + k6 + t1;
4350 	b15 = ((b15 << 30) | (b15 >> (64 - 30))) ^ b14;
4351 
4352 	b0 += b9;
4353 	b9 = ((b9 << 16) | (b9 >> (64 - 16))) ^ b0;
4354 
4355 	b2 += b13;
4356 	b13 = ((b13 << 34) | (b13 >> (64 - 34))) ^ b2;
4357 
4358 	b6 += b11;
4359 	b11 = ((b11 << 56) | (b11 >> (64 - 56))) ^ b6;
4360 
4361 	b4 += b15;
4362 	b15 = ((b15 << 51) | (b15 >> (64 - 51))) ^ b4;
4363 
4364 	b10 += b7;
4365 	b7 = ((b7 << 4) | (b7 >> (64 - 4))) ^ b10;
4366 
4367 	b12 += b3;
4368 	b3 = ((b3 << 53) | (b3 >> (64 - 53))) ^ b12;
4369 
4370 	b14 += b5;
4371 	b5 = ((b5 << 42) | (b5 >> (64 - 42))) ^ b14;
4372 
4373 	b8 += b1;
4374 	b1 = ((b1 << 41) | (b1 >> (64 - 41))) ^ b8;
4375 
4376 	b0 += b7;
4377 	b7 = ((b7 << 31) | (b7 >> (64 - 31))) ^ b0;
4378 
4379 	b2 += b5;
4380 	b5 = ((b5 << 44) | (b5 >> (64 - 44))) ^ b2;
4381 
4382 	b4 += b3;
4383 	b3 = ((b3 << 47) | (b3 >> (64 - 47))) ^ b4;
4384 
4385 	b6 += b1;
4386 	b1 = ((b1 << 46) | (b1 >> (64 - 46))) ^ b6;
4387 
4388 	b12 += b15;
4389 	b15 = ((b15 << 19) | (b15 >> (64 - 19))) ^ b12;
4390 
4391 	b14 += b13;
4392 	b13 = ((b13 << 42) | (b13 >> (64 - 42))) ^ b14;
4393 
4394 	b8 += b11;
4395 	b11 = ((b11 << 44) | (b11 >> (64 - 44))) ^ b8;
4396 
4397 	b10 += b9;
4398 	b9 = ((b9 << 25) | (b9 >> (64 - 25))) ^ b10;
4399 
4400 	b0 += b15;
4401 	b15 = ((b15 << 9) | (b15 >> (64 - 9))) ^ b0;
4402 
4403 	b2 += b11;
4404 	b11 = ((b11 << 48) | (b11 >> (64 - 48))) ^ b2;
4405 
4406 	b6 += b13;
4407 	b13 = ((b13 << 35) | (b13 >> (64 - 35))) ^ b6;
4408 
4409 	b4 += b9;
4410 	b9 = ((b9 << 52) | (b9 >> (64 - 52))) ^ b4;
4411 
4412 	b14 += b1;
4413 	b1 = ((b1 << 23) | (b1 >> (64 - 23))) ^ b14;
4414 
4415 	b8 += b5;
4416 	b5 = ((b5 << 31) | (b5 >> (64 - 31))) ^ b8;
4417 
4418 	b10 += b3;
4419 	b3 = ((b3 << 37) | (b3 >> (64 - 37))) ^ b10;
4420 
4421 	b12 += b7;
4422 	b7 = ((b7 << 20) | (b7 >> (64 - 20))) ^ b12;
4423 
4424 	b1 += k11;
4425 	b0 += b1 + k10;
4426 	b1 = ((b1 << 24) | (b1 >> (64 - 24))) ^ b0;
4427 
4428 	b3 += k13;
4429 	b2 += b3 + k12;
4430 	b3 = ((b3 << 13) | (b3 >> (64 - 13))) ^ b2;
4431 
4432 	b5 += k15;
4433 	b4 += b5 + k14;
4434 	b5 = ((b5 << 8) | (b5 >> (64 - 8))) ^ b4;
4435 
4436 	b7 += k0;
4437 	b6 += b7 + k16;
4438 	b7 = ((b7 << 47) | (b7 >> (64 - 47))) ^ b6;
4439 
4440 	b9 += k2;
4441 	b8 += b9 + k1;
4442 	b9 = ((b9 << 8) | (b9 >> (64 - 8))) ^ b8;
4443 
4444 	b11 += k4;
4445 	b10 += b11 + k3;
4446 	b11 = ((b11 << 17) | (b11 >> (64 - 17))) ^ b10;
4447 
4448 	b13 += k6 + t1;
4449 	b12 += b13 + k5;
4450 	b13 = ((b13 << 22) | (b13 >> (64 - 22))) ^ b12;
4451 
4452 	b15 += k8 + 10;
4453 	b14 += b15 + k7 + t2;
4454 	b15 = ((b15 << 37) | (b15 >> (64 - 37))) ^ b14;
4455 
4456 	b0 += b9;
4457 	b9 = ((b9 << 38) | (b9 >> (64 - 38))) ^ b0;
4458 
4459 	b2 += b13;
4460 	b13 = ((b13 << 19) | (b13 >> (64 - 19))) ^ b2;
4461 
4462 	b6 += b11;
4463 	b11 = ((b11 << 10) | (b11 >> (64 - 10))) ^ b6;
4464 
4465 	b4 += b15;
4466 	b15 = ((b15 << 55) | (b15 >> (64 - 55))) ^ b4;
4467 
4468 	b10 += b7;
4469 	b7 = ((b7 << 49) | (b7 >> (64 - 49))) ^ b10;
4470 
4471 	b12 += b3;
4472 	b3 = ((b3 << 18) | (b3 >> (64 - 18))) ^ b12;
4473 
4474 	b14 += b5;
4475 	b5 = ((b5 << 23) | (b5 >> (64 - 23))) ^ b14;
4476 
4477 	b8 += b1;
4478 	b1 = ((b1 << 52) | (b1 >> (64 - 52))) ^ b8;
4479 
4480 	b0 += b7;
4481 	b7 = ((b7 << 33) | (b7 >> (64 - 33))) ^ b0;
4482 
4483 	b2 += b5;
4484 	b5 = ((b5 << 4) | (b5 >> (64 - 4))) ^ b2;
4485 
4486 	b4 += b3;
4487 	b3 = ((b3 << 51) | (b3 >> (64 - 51))) ^ b4;
4488 
4489 	b6 += b1;
4490 	b1 = ((b1 << 13) | (b1 >> (64 - 13))) ^ b6;
4491 
4492 	b12 += b15;
4493 	b15 = ((b15 << 34) | (b15 >> (64 - 34))) ^ b12;
4494 
4495 	b14 += b13;
4496 	b13 = ((b13 << 41) | (b13 >> (64 - 41))) ^ b14;
4497 
4498 	b8 += b11;
4499 	b11 = ((b11 << 59) | (b11 >> (64 - 59))) ^ b8;
4500 
4501 	b10 += b9;
4502 	b9 = ((b9 << 17) | (b9 >> (64 - 17))) ^ b10;
4503 
4504 	b0 += b15;
4505 	b15 = ((b15 << 5) | (b15 >> (64 - 5))) ^ b0;
4506 
4507 	b2 += b11;
4508 	b11 = ((b11 << 20) | (b11 >> (64 - 20))) ^ b2;
4509 
4510 	b6 += b13;
4511 	b13 = ((b13 << 48) | (b13 >> (64 - 48))) ^ b6;
4512 
4513 	b4 += b9;
4514 	b9 = ((b9 << 41) | (b9 >> (64 - 41))) ^ b4;
4515 
4516 	b14 += b1;
4517 	b1 = ((b1 << 47) | (b1 >> (64 - 47))) ^ b14;
4518 
4519 	b8 += b5;
4520 	b5 = ((b5 << 28) | (b5 >> (64 - 28))) ^ b8;
4521 
4522 	b10 += b3;
4523 	b3 = ((b3 << 16) | (b3 >> (64 - 16))) ^ b10;
4524 
4525 	b12 += b7;
4526 	b7 = ((b7 << 25) | (b7 >> (64 - 25))) ^ b12;
4527 
4528 	b1 += k12;
4529 	b0 += b1 + k11;
4530 	b1 = ((b1 << 41) | (b1 >> (64 - 41))) ^ b0;
4531 
4532 	b3 += k14;
4533 	b2 += b3 + k13;
4534 	b3 = ((b3 << 9) | (b3 >> (64 - 9))) ^ b2;
4535 
4536 	b5 += k16;
4537 	b4 += b5 + k15;
4538 	b5 = ((b5 << 37) | (b5 >> (64 - 37))) ^ b4;
4539 
4540 	b7 += k1;
4541 	b6 += b7 + k0;
4542 	b7 = ((b7 << 31) | (b7 >> (64 - 31))) ^ b6;
4543 
4544 	b9 += k3;
4545 	b8 += b9 + k2;
4546 	b9 = ((b9 << 12) | (b9 >> (64 - 12))) ^ b8;
4547 
4548 	b11 += k5;
4549 	b10 += b11 + k4;
4550 	b11 = ((b11 << 47) | (b11 >> (64 - 47))) ^ b10;
4551 
4552 	b13 += k7 + t2;
4553 	b12 += b13 + k6;
4554 	b13 = ((b13 << 44) | (b13 >> (64 - 44))) ^ b12;
4555 
4556 	b15 += k9 + 11;
4557 	b14 += b15 + k8 + t0;
4558 	b15 = ((b15 << 30) | (b15 >> (64 - 30))) ^ b14;
4559 
4560 	b0 += b9;
4561 	b9 = ((b9 << 16) | (b9 >> (64 - 16))) ^ b0;
4562 
4563 	b2 += b13;
4564 	b13 = ((b13 << 34) | (b13 >> (64 - 34))) ^ b2;
4565 
4566 	b6 += b11;
4567 	b11 = ((b11 << 56) | (b11 >> (64 - 56))) ^ b6;
4568 
4569 	b4 += b15;
4570 	b15 = ((b15 << 51) | (b15 >> (64 - 51))) ^ b4;
4571 
4572 	b10 += b7;
4573 	b7 = ((b7 << 4) | (b7 >> (64 - 4))) ^ b10;
4574 
4575 	b12 += b3;
4576 	b3 = ((b3 << 53) | (b3 >> (64 - 53))) ^ b12;
4577 
4578 	b14 += b5;
4579 	b5 = ((b5 << 42) | (b5 >> (64 - 42))) ^ b14;
4580 
4581 	b8 += b1;
4582 	b1 = ((b1 << 41) | (b1 >> (64 - 41))) ^ b8;
4583 
4584 	b0 += b7;
4585 	b7 = ((b7 << 31) | (b7 >> (64 - 31))) ^ b0;
4586 
4587 	b2 += b5;
4588 	b5 = ((b5 << 44) | (b5 >> (64 - 44))) ^ b2;
4589 
4590 	b4 += b3;
4591 	b3 = ((b3 << 47) | (b3 >> (64 - 47))) ^ b4;
4592 
4593 	b6 += b1;
4594 	b1 = ((b1 << 46) | (b1 >> (64 - 46))) ^ b6;
4595 
4596 	b12 += b15;
4597 	b15 = ((b15 << 19) | (b15 >> (64 - 19))) ^ b12;
4598 
4599 	b14 += b13;
4600 	b13 = ((b13 << 42) | (b13 >> (64 - 42))) ^ b14;
4601 
4602 	b8 += b11;
4603 	b11 = ((b11 << 44) | (b11 >> (64 - 44))) ^ b8;
4604 
4605 	b10 += b9;
4606 	b9 = ((b9 << 25) | (b9 >> (64 - 25))) ^ b10;
4607 
4608 	b0 += b15;
4609 	b15 = ((b15 << 9) | (b15 >> (64 - 9))) ^ b0;
4610 
4611 	b2 += b11;
4612 	b11 = ((b11 << 48) | (b11 >> (64 - 48))) ^ b2;
4613 
4614 	b6 += b13;
4615 	b13 = ((b13 << 35) | (b13 >> (64 - 35))) ^ b6;
4616 
4617 	b4 += b9;
4618 	b9 = ((b9 << 52) | (b9 >> (64 - 52))) ^ b4;
4619 
4620 	b14 += b1;
4621 	b1 = ((b1 << 23) | (b1 >> (64 - 23))) ^ b14;
4622 
4623 	b8 += b5;
4624 	b5 = ((b5 << 31) | (b5 >> (64 - 31))) ^ b8;
4625 
4626 	b10 += b3;
4627 	b3 = ((b3 << 37) | (b3 >> (64 - 37))) ^ b10;
4628 
4629 	b12 += b7;
4630 	b7 = ((b7 << 20) | (b7 >> (64 - 20))) ^ b12;
4631 
4632 	b1 += k13;
4633 	b0 += b1 + k12;
4634 	b1 = ((b1 << 24) | (b1 >> (64 - 24))) ^ b0;
4635 
4636 	b3 += k15;
4637 	b2 += b3 + k14;
4638 	b3 = ((b3 << 13) | (b3 >> (64 - 13))) ^ b2;
4639 
4640 	b5 += k0;
4641 	b4 += b5 + k16;
4642 	b5 = ((b5 << 8) | (b5 >> (64 - 8))) ^ b4;
4643 
4644 	b7 += k2;
4645 	b6 += b7 + k1;
4646 	b7 = ((b7 << 47) | (b7 >> (64 - 47))) ^ b6;
4647 
4648 	b9 += k4;
4649 	b8 += b9 + k3;
4650 	b9 = ((b9 << 8) | (b9 >> (64 - 8))) ^ b8;
4651 
4652 	b11 += k6;
4653 	b10 += b11 + k5;
4654 	b11 = ((b11 << 17) | (b11 >> (64 - 17))) ^ b10;
4655 
4656 	b13 += k8 + t0;
4657 	b12 += b13 + k7;
4658 	b13 = ((b13 << 22) | (b13 >> (64 - 22))) ^ b12;
4659 
4660 	b15 += k10 + 12;
4661 	b14 += b15 + k9 + t1;
4662 	b15 = ((b15 << 37) | (b15 >> (64 - 37))) ^ b14;
4663 
4664 	b0 += b9;
4665 	b9 = ((b9 << 38) | (b9 >> (64 - 38))) ^ b0;
4666 
4667 	b2 += b13;
4668 	b13 = ((b13 << 19) | (b13 >> (64 - 19))) ^ b2;
4669 
4670 	b6 += b11;
4671 	b11 = ((b11 << 10) | (b11 >> (64 - 10))) ^ b6;
4672 
4673 	b4 += b15;
4674 	b15 = ((b15 << 55) | (b15 >> (64 - 55))) ^ b4;
4675 
4676 	b10 += b7;
4677 	b7 = ((b7 << 49) | (b7 >> (64 - 49))) ^ b10;
4678 
4679 	b12 += b3;
4680 	b3 = ((b3 << 18) | (b3 >> (64 - 18))) ^ b12;
4681 
4682 	b14 += b5;
4683 	b5 = ((b5 << 23) | (b5 >> (64 - 23))) ^ b14;
4684 
4685 	b8 += b1;
4686 	b1 = ((b1 << 52) | (b1 >> (64 - 52))) ^ b8;
4687 
4688 	b0 += b7;
4689 	b7 = ((b7 << 33) | (b7 >> (64 - 33))) ^ b0;
4690 
4691 	b2 += b5;
4692 	b5 = ((b5 << 4) | (b5 >> (64 - 4))) ^ b2;
4693 
4694 	b4 += b3;
4695 	b3 = ((b3 << 51) | (b3 >> (64 - 51))) ^ b4;
4696 
4697 	b6 += b1;
4698 	b1 = ((b1 << 13) | (b1 >> (64 - 13))) ^ b6;
4699 
4700 	b12 += b15;
4701 	b15 = ((b15 << 34) | (b15 >> (64 - 34))) ^ b12;
4702 
4703 	b14 += b13;
4704 	b13 = ((b13 << 41) | (b13 >> (64 - 41))) ^ b14;
4705 
4706 	b8 += b11;
4707 	b11 = ((b11 << 59) | (b11 >> (64 - 59))) ^ b8;
4708 
4709 	b10 += b9;
4710 	b9 = ((b9 << 17) | (b9 >> (64 - 17))) ^ b10;
4711 
4712 	b0 += b15;
4713 	b15 = ((b15 << 5) | (b15 >> (64 - 5))) ^ b0;
4714 
4715 	b2 += b11;
4716 	b11 = ((b11 << 20) | (b11 >> (64 - 20))) ^ b2;
4717 
4718 	b6 += b13;
4719 	b13 = ((b13 << 48) | (b13 >> (64 - 48))) ^ b6;
4720 
4721 	b4 += b9;
4722 	b9 = ((b9 << 41) | (b9 >> (64 - 41))) ^ b4;
4723 
4724 	b14 += b1;
4725 	b1 = ((b1 << 47) | (b1 >> (64 - 47))) ^ b14;
4726 
4727 	b8 += b5;
4728 	b5 = ((b5 << 28) | (b5 >> (64 - 28))) ^ b8;
4729 
4730 	b10 += b3;
4731 	b3 = ((b3 << 16) | (b3 >> (64 - 16))) ^ b10;
4732 
4733 	b12 += b7;
4734 	b7 = ((b7 << 25) | (b7 >> (64 - 25))) ^ b12;
4735 
4736 	b1 += k14;
4737 	b0 += b1 + k13;
4738 	b1 = ((b1 << 41) | (b1 >> (64 - 41))) ^ b0;
4739 
4740 	b3 += k16;
4741 	b2 += b3 + k15;
4742 	b3 = ((b3 << 9) | (b3 >> (64 - 9))) ^ b2;
4743 
4744 	b5 += k1;
4745 	b4 += b5 + k0;
4746 	b5 = ((b5 << 37) | (b5 >> (64 - 37))) ^ b4;
4747 
4748 	b7 += k3;
4749 	b6 += b7 + k2;
4750 	b7 = ((b7 << 31) | (b7 >> (64 - 31))) ^ b6;
4751 
4752 	b9 += k5;
4753 	b8 += b9 + k4;
4754 	b9 = ((b9 << 12) | (b9 >> (64 - 12))) ^ b8;
4755 
4756 	b11 += k7;
4757 	b10 += b11 + k6;
4758 	b11 = ((b11 << 47) | (b11 >> (64 - 47))) ^ b10;
4759 
4760 	b13 += k9 + t1;
4761 	b12 += b13 + k8;
4762 	b13 = ((b13 << 44) | (b13 >> (64 - 44))) ^ b12;
4763 
4764 	b15 += k11 + 13;
4765 	b14 += b15 + k10 + t2;
4766 	b15 = ((b15 << 30) | (b15 >> (64 - 30))) ^ b14;
4767 
4768 	b0 += b9;
4769 	b9 = ((b9 << 16) | (b9 >> (64 - 16))) ^ b0;
4770 
4771 	b2 += b13;
4772 	b13 = ((b13 << 34) | (b13 >> (64 - 34))) ^ b2;
4773 
4774 	b6 += b11;
4775 	b11 = ((b11 << 56) | (b11 >> (64 - 56))) ^ b6;
4776 
4777 	b4 += b15;
4778 	b15 = ((b15 << 51) | (b15 >> (64 - 51))) ^ b4;
4779 
4780 	b10 += b7;
4781 	b7 = ((b7 << 4) | (b7 >> (64 - 4))) ^ b10;
4782 
4783 	b12 += b3;
4784 	b3 = ((b3 << 53) | (b3 >> (64 - 53))) ^ b12;
4785 
4786 	b14 += b5;
4787 	b5 = ((b5 << 42) | (b5 >> (64 - 42))) ^ b14;
4788 
4789 	b8 += b1;
4790 	b1 = ((b1 << 41) | (b1 >> (64 - 41))) ^ b8;
4791 
4792 	b0 += b7;
4793 	b7 = ((b7 << 31) | (b7 >> (64 - 31))) ^ b0;
4794 
4795 	b2 += b5;
4796 	b5 = ((b5 << 44) | (b5 >> (64 - 44))) ^ b2;
4797 
4798 	b4 += b3;
4799 	b3 = ((b3 << 47) | (b3 >> (64 - 47))) ^ b4;
4800 
4801 	b6 += b1;
4802 	b1 = ((b1 << 46) | (b1 >> (64 - 46))) ^ b6;
4803 
4804 	b12 += b15;
4805 	b15 = ((b15 << 19) | (b15 >> (64 - 19))) ^ b12;
4806 
4807 	b14 += b13;
4808 	b13 = ((b13 << 42) | (b13 >> (64 - 42))) ^ b14;
4809 
4810 	b8 += b11;
4811 	b11 = ((b11 << 44) | (b11 >> (64 - 44))) ^ b8;
4812 
4813 	b10 += b9;
4814 	b9 = ((b9 << 25) | (b9 >> (64 - 25))) ^ b10;
4815 
4816 	b0 += b15;
4817 	b15 = ((b15 << 9) | (b15 >> (64 - 9))) ^ b0;
4818 
4819 	b2 += b11;
4820 	b11 = ((b11 << 48) | (b11 >> (64 - 48))) ^ b2;
4821 
4822 	b6 += b13;
4823 	b13 = ((b13 << 35) | (b13 >> (64 - 35))) ^ b6;
4824 
4825 	b4 += b9;
4826 	b9 = ((b9 << 52) | (b9 >> (64 - 52))) ^ b4;
4827 
4828 	b14 += b1;
4829 	b1 = ((b1 << 23) | (b1 >> (64 - 23))) ^ b14;
4830 
4831 	b8 += b5;
4832 	b5 = ((b5 << 31) | (b5 >> (64 - 31))) ^ b8;
4833 
4834 	b10 += b3;
4835 	b3 = ((b3 << 37) | (b3 >> (64 - 37))) ^ b10;
4836 
4837 	b12 += b7;
4838 	b7 = ((b7 << 20) | (b7 >> (64 - 20))) ^ b12;
4839 
4840 	b1 += k15;
4841 	b0 += b1 + k14;
4842 	b1 = ((b1 << 24) | (b1 >> (64 - 24))) ^ b0;
4843 
4844 	b3 += k0;
4845 	b2 += b3 + k16;
4846 	b3 = ((b3 << 13) | (b3 >> (64 - 13))) ^ b2;
4847 
4848 	b5 += k2;
4849 	b4 += b5 + k1;
4850 	b5 = ((b5 << 8) | (b5 >> (64 - 8))) ^ b4;
4851 
4852 	b7 += k4;
4853 	b6 += b7 + k3;
4854 	b7 = ((b7 << 47) | (b7 >> (64 - 47))) ^ b6;
4855 
4856 	b9 += k6;
4857 	b8 += b9 + k5;
4858 	b9 = ((b9 << 8) | (b9 >> (64 - 8))) ^ b8;
4859 
4860 	b11 += k8;
4861 	b10 += b11 + k7;
4862 	b11 = ((b11 << 17) | (b11 >> (64 - 17))) ^ b10;
4863 
4864 	b13 += k10 + t2;
4865 	b12 += b13 + k9;
4866 	b13 = ((b13 << 22) | (b13 >> (64 - 22))) ^ b12;
4867 
4868 	b15 += k12 + 14;
4869 	b14 += b15 + k11 + t0;
4870 	b15 = ((b15 << 37) | (b15 >> (64 - 37))) ^ b14;
4871 
4872 	b0 += b9;
4873 	b9 = ((b9 << 38) | (b9 >> (64 - 38))) ^ b0;
4874 
4875 	b2 += b13;
4876 	b13 = ((b13 << 19) | (b13 >> (64 - 19))) ^ b2;
4877 
4878 	b6 += b11;
4879 	b11 = ((b11 << 10) | (b11 >> (64 - 10))) ^ b6;
4880 
4881 	b4 += b15;
4882 	b15 = ((b15 << 55) | (b15 >> (64 - 55))) ^ b4;
4883 
4884 	b10 += b7;
4885 	b7 = ((b7 << 49) | (b7 >> (64 - 49))) ^ b10;
4886 
4887 	b12 += b3;
4888 	b3 = ((b3 << 18) | (b3 >> (64 - 18))) ^ b12;
4889 
4890 	b14 += b5;
4891 	b5 = ((b5 << 23) | (b5 >> (64 - 23))) ^ b14;
4892 
4893 	b8 += b1;
4894 	b1 = ((b1 << 52) | (b1 >> (64 - 52))) ^ b8;
4895 
4896 	b0 += b7;
4897 	b7 = ((b7 << 33) | (b7 >> (64 - 33))) ^ b0;
4898 
4899 	b2 += b5;
4900 	b5 = ((b5 << 4) | (b5 >> (64 - 4))) ^ b2;
4901 
4902 	b4 += b3;
4903 	b3 = ((b3 << 51) | (b3 >> (64 - 51))) ^ b4;
4904 
4905 	b6 += b1;
4906 	b1 = ((b1 << 13) | (b1 >> (64 - 13))) ^ b6;
4907 
4908 	b12 += b15;
4909 	b15 = ((b15 << 34) | (b15 >> (64 - 34))) ^ b12;
4910 
4911 	b14 += b13;
4912 	b13 = ((b13 << 41) | (b13 >> (64 - 41))) ^ b14;
4913 
4914 	b8 += b11;
4915 	b11 = ((b11 << 59) | (b11 >> (64 - 59))) ^ b8;
4916 
4917 	b10 += b9;
4918 	b9 = ((b9 << 17) | (b9 >> (64 - 17))) ^ b10;
4919 
4920 	b0 += b15;
4921 	b15 = ((b15 << 5) | (b15 >> (64 - 5))) ^ b0;
4922 
4923 	b2 += b11;
4924 	b11 = ((b11 << 20) | (b11 >> (64 - 20))) ^ b2;
4925 
4926 	b6 += b13;
4927 	b13 = ((b13 << 48) | (b13 >> (64 - 48))) ^ b6;
4928 
4929 	b4 += b9;
4930 	b9 = ((b9 << 41) | (b9 >> (64 - 41))) ^ b4;
4931 
4932 	b14 += b1;
4933 	b1 = ((b1 << 47) | (b1 >> (64 - 47))) ^ b14;
4934 
4935 	b8 += b5;
4936 	b5 = ((b5 << 28) | (b5 >> (64 - 28))) ^ b8;
4937 
4938 	b10 += b3;
4939 	b3 = ((b3 << 16) | (b3 >> (64 - 16))) ^ b10;
4940 
4941 	b12 += b7;
4942 	b7 = ((b7 << 25) | (b7 >> (64 - 25))) ^ b12;
4943 
4944 	b1 += k16;
4945 	b0 += b1 + k15;
4946 	b1 = ((b1 << 41) | (b1 >> (64 - 41))) ^ b0;
4947 
4948 	b3 += k1;
4949 	b2 += b3 + k0;
4950 	b3 = ((b3 << 9) | (b3 >> (64 - 9))) ^ b2;
4951 
4952 	b5 += k3;
4953 	b4 += b5 + k2;
4954 	b5 = ((b5 << 37) | (b5 >> (64 - 37))) ^ b4;
4955 
4956 	b7 += k5;
4957 	b6 += b7 + k4;
4958 	b7 = ((b7 << 31) | (b7 >> (64 - 31))) ^ b6;
4959 
4960 	b9 += k7;
4961 	b8 += b9 + k6;
4962 	b9 = ((b9 << 12) | (b9 >> (64 - 12))) ^ b8;
4963 
4964 	b11 += k9;
4965 	b10 += b11 + k8;
4966 	b11 = ((b11 << 47) | (b11 >> (64 - 47))) ^ b10;
4967 
4968 	b13 += k11 + t0;
4969 	b12 += b13 + k10;
4970 	b13 = ((b13 << 44) | (b13 >> (64 - 44))) ^ b12;
4971 
4972 	b15 += k13 + 15;
4973 	b14 += b15 + k12 + t1;
4974 	b15 = ((b15 << 30) | (b15 >> (64 - 30))) ^ b14;
4975 
4976 	b0 += b9;
4977 	b9 = ((b9 << 16) | (b9 >> (64 - 16))) ^ b0;
4978 
4979 	b2 += b13;
4980 	b13 = ((b13 << 34) | (b13 >> (64 - 34))) ^ b2;
4981 
4982 	b6 += b11;
4983 	b11 = ((b11 << 56) | (b11 >> (64 - 56))) ^ b6;
4984 
4985 	b4 += b15;
4986 	b15 = ((b15 << 51) | (b15 >> (64 - 51))) ^ b4;
4987 
4988 	b10 += b7;
4989 	b7 = ((b7 << 4) | (b7 >> (64 - 4))) ^ b10;
4990 
4991 	b12 += b3;
4992 	b3 = ((b3 << 53) | (b3 >> (64 - 53))) ^ b12;
4993 
4994 	b14 += b5;
4995 	b5 = ((b5 << 42) | (b5 >> (64 - 42))) ^ b14;
4996 
4997 	b8 += b1;
4998 	b1 = ((b1 << 41) | (b1 >> (64 - 41))) ^ b8;
4999 
5000 	b0 += b7;
5001 	b7 = ((b7 << 31) | (b7 >> (64 - 31))) ^ b0;
5002 
5003 	b2 += b5;
5004 	b5 = ((b5 << 44) | (b5 >> (64 - 44))) ^ b2;
5005 
5006 	b4 += b3;
5007 	b3 = ((b3 << 47) | (b3 >> (64 - 47))) ^ b4;
5008 
5009 	b6 += b1;
5010 	b1 = ((b1 << 46) | (b1 >> (64 - 46))) ^ b6;
5011 
5012 	b12 += b15;
5013 	b15 = ((b15 << 19) | (b15 >> (64 - 19))) ^ b12;
5014 
5015 	b14 += b13;
5016 	b13 = ((b13 << 42) | (b13 >> (64 - 42))) ^ b14;
5017 
5018 	b8 += b11;
5019 	b11 = ((b11 << 44) | (b11 >> (64 - 44))) ^ b8;
5020 
5021 	b10 += b9;
5022 	b9 = ((b9 << 25) | (b9 >> (64 - 25))) ^ b10;
5023 
5024 	b0 += b15;
5025 	b15 = ((b15 << 9) | (b15 >> (64 - 9))) ^ b0;
5026 
5027 	b2 += b11;
5028 	b11 = ((b11 << 48) | (b11 >> (64 - 48))) ^ b2;
5029 
5030 	b6 += b13;
5031 	b13 = ((b13 << 35) | (b13 >> (64 - 35))) ^ b6;
5032 
5033 	b4 += b9;
5034 	b9 = ((b9 << 52) | (b9 >> (64 - 52))) ^ b4;
5035 
5036 	b14 += b1;
5037 	b1 = ((b1 << 23) | (b1 >> (64 - 23))) ^ b14;
5038 
5039 	b8 += b5;
5040 	b5 = ((b5 << 31) | (b5 >> (64 - 31))) ^ b8;
5041 
5042 	b10 += b3;
5043 	b3 = ((b3 << 37) | (b3 >> (64 - 37))) ^ b10;
5044 
5045 	b12 += b7;
5046 	b7 = ((b7 << 20) | (b7 >> (64 - 20))) ^ b12;
5047 
5048 	b1 += k0;
5049 	b0 += b1 + k16;
5050 	b1 = ((b1 << 24) | (b1 >> (64 - 24))) ^ b0;
5051 
5052 	b3 += k2;
5053 	b2 += b3 + k1;
5054 	b3 = ((b3 << 13) | (b3 >> (64 - 13))) ^ b2;
5055 
5056 	b5 += k4;
5057 	b4 += b5 + k3;
5058 	b5 = ((b5 << 8) | (b5 >> (64 - 8))) ^ b4;
5059 
5060 	b7 += k6;
5061 	b6 += b7 + k5;
5062 	b7 = ((b7 << 47) | (b7 >> (64 - 47))) ^ b6;
5063 
5064 	b9 += k8;
5065 	b8 += b9 + k7;
5066 	b9 = ((b9 << 8) | (b9 >> (64 - 8))) ^ b8;
5067 
5068 	b11 += k10;
5069 	b10 += b11 + k9;
5070 	b11 = ((b11 << 17) | (b11 >> (64 - 17))) ^ b10;
5071 
5072 	b13 += k12 + t1;
5073 	b12 += b13 + k11;
5074 	b13 = ((b13 << 22) | (b13 >> (64 - 22))) ^ b12;
5075 
5076 	b15 += k14 + 16;
5077 	b14 += b15 + k13 + t2;
5078 	b15 = ((b15 << 37) | (b15 >> (64 - 37))) ^ b14;
5079 
5080 	b0 += b9;
5081 	b9 = ((b9 << 38) | (b9 >> (64 - 38))) ^ b0;
5082 
5083 	b2 += b13;
5084 	b13 = ((b13 << 19) | (b13 >> (64 - 19))) ^ b2;
5085 
5086 	b6 += b11;
5087 	b11 = ((b11 << 10) | (b11 >> (64 - 10))) ^ b6;
5088 
5089 	b4 += b15;
5090 	b15 = ((b15 << 55) | (b15 >> (64 - 55))) ^ b4;
5091 
5092 	b10 += b7;
5093 	b7 = ((b7 << 49) | (b7 >> (64 - 49))) ^ b10;
5094 
5095 	b12 += b3;
5096 	b3 = ((b3 << 18) | (b3 >> (64 - 18))) ^ b12;
5097 
5098 	b14 += b5;
5099 	b5 = ((b5 << 23) | (b5 >> (64 - 23))) ^ b14;
5100 
5101 	b8 += b1;
5102 	b1 = ((b1 << 52) | (b1 >> (64 - 52))) ^ b8;
5103 
5104 	b0 += b7;
5105 	b7 = ((b7 << 33) | (b7 >> (64 - 33))) ^ b0;
5106 
5107 	b2 += b5;
5108 	b5 = ((b5 << 4) | (b5 >> (64 - 4))) ^ b2;
5109 
5110 	b4 += b3;
5111 	b3 = ((b3 << 51) | (b3 >> (64 - 51))) ^ b4;
5112 
5113 	b6 += b1;
5114 	b1 = ((b1 << 13) | (b1 >> (64 - 13))) ^ b6;
5115 
5116 	b12 += b15;
5117 	b15 = ((b15 << 34) | (b15 >> (64 - 34))) ^ b12;
5118 
5119 	b14 += b13;
5120 	b13 = ((b13 << 41) | (b13 >> (64 - 41))) ^ b14;
5121 
5122 	b8 += b11;
5123 	b11 = ((b11 << 59) | (b11 >> (64 - 59))) ^ b8;
5124 
5125 	b10 += b9;
5126 	b9 = ((b9 << 17) | (b9 >> (64 - 17))) ^ b10;
5127 
5128 	b0 += b15;
5129 	b15 = ((b15 << 5) | (b15 >> (64 - 5))) ^ b0;
5130 
5131 	b2 += b11;
5132 	b11 = ((b11 << 20) | (b11 >> (64 - 20))) ^ b2;
5133 
5134 	b6 += b13;
5135 	b13 = ((b13 << 48) | (b13 >> (64 - 48))) ^ b6;
5136 
5137 	b4 += b9;
5138 	b9 = ((b9 << 41) | (b9 >> (64 - 41))) ^ b4;
5139 
5140 	b14 += b1;
5141 	b1 = ((b1 << 47) | (b1 >> (64 - 47))) ^ b14;
5142 
5143 	b8 += b5;
5144 	b5 = ((b5 << 28) | (b5 >> (64 - 28))) ^ b8;
5145 
5146 	b10 += b3;
5147 	b3 = ((b3 << 16) | (b3 >> (64 - 16))) ^ b10;
5148 
5149 	b12 += b7;
5150 	b7 = ((b7 << 25) | (b7 >> (64 - 25))) ^ b12;
5151 
5152 	b1 += k1;
5153 	b0 += b1 + k0;
5154 	b1 = ((b1 << 41) | (b1 >> (64 - 41))) ^ b0;
5155 
5156 	b3 += k3;
5157 	b2 += b3 + k2;
5158 	b3 = ((b3 << 9) | (b3 >> (64 - 9))) ^ b2;
5159 
5160 	b5 += k5;
5161 	b4 += b5 + k4;
5162 	b5 = ((b5 << 37) | (b5 >> (64 - 37))) ^ b4;
5163 
5164 	b7 += k7;
5165 	b6 += b7 + k6;
5166 	b7 = ((b7 << 31) | (b7 >> (64 - 31))) ^ b6;
5167 
5168 	b9 += k9;
5169 	b8 += b9 + k8;
5170 	b9 = ((b9 << 12) | (b9 >> (64 - 12))) ^ b8;
5171 
5172 	b11 += k11;
5173 	b10 += b11 + k10;
5174 	b11 = ((b11 << 47) | (b11 >> (64 - 47))) ^ b10;
5175 
5176 	b13 += k13 + t2;
5177 	b12 += b13 + k12;
5178 	b13 = ((b13 << 44) | (b13 >> (64 - 44))) ^ b12;
5179 
5180 	b15 += k15 + 17;
5181 	b14 += b15 + k14 + t0;
5182 	b15 = ((b15 << 30) | (b15 >> (64 - 30))) ^ b14;
5183 
5184 	b0 += b9;
5185 	b9 = ((b9 << 16) | (b9 >> (64 - 16))) ^ b0;
5186 
5187 	b2 += b13;
5188 	b13 = ((b13 << 34) | (b13 >> (64 - 34))) ^ b2;
5189 
5190 	b6 += b11;
5191 	b11 = ((b11 << 56) | (b11 >> (64 - 56))) ^ b6;
5192 
5193 	b4 += b15;
5194 	b15 = ((b15 << 51) | (b15 >> (64 - 51))) ^ b4;
5195 
5196 	b10 += b7;
5197 	b7 = ((b7 << 4) | (b7 >> (64 - 4))) ^ b10;
5198 
5199 	b12 += b3;
5200 	b3 = ((b3 << 53) | (b3 >> (64 - 53))) ^ b12;
5201 
5202 	b14 += b5;
5203 	b5 = ((b5 << 42) | (b5 >> (64 - 42))) ^ b14;
5204 
5205 	b8 += b1;
5206 	b1 = ((b1 << 41) | (b1 >> (64 - 41))) ^ b8;
5207 
5208 	b0 += b7;
5209 	b7 = ((b7 << 31) | (b7 >> (64 - 31))) ^ b0;
5210 
5211 	b2 += b5;
5212 	b5 = ((b5 << 44) | (b5 >> (64 - 44))) ^ b2;
5213 
5214 	b4 += b3;
5215 	b3 = ((b3 << 47) | (b3 >> (64 - 47))) ^ b4;
5216 
5217 	b6 += b1;
5218 	b1 = ((b1 << 46) | (b1 >> (64 - 46))) ^ b6;
5219 
5220 	b12 += b15;
5221 	b15 = ((b15 << 19) | (b15 >> (64 - 19))) ^ b12;
5222 
5223 	b14 += b13;
5224 	b13 = ((b13 << 42) | (b13 >> (64 - 42))) ^ b14;
5225 
5226 	b8 += b11;
5227 	b11 = ((b11 << 44) | (b11 >> (64 - 44))) ^ b8;
5228 
5229 	b10 += b9;
5230 	b9 = ((b9 << 25) | (b9 >> (64 - 25))) ^ b10;
5231 
5232 	b0 += b15;
5233 	b15 = ((b15 << 9) | (b15 >> (64 - 9))) ^ b0;
5234 
5235 	b2 += b11;
5236 	b11 = ((b11 << 48) | (b11 >> (64 - 48))) ^ b2;
5237 
5238 	b6 += b13;
5239 	b13 = ((b13 << 35) | (b13 >> (64 - 35))) ^ b6;
5240 
5241 	b4 += b9;
5242 	b9 = ((b9 << 52) | (b9 >> (64 - 52))) ^ b4;
5243 
5244 	b14 += b1;
5245 	b1 = ((b1 << 23) | (b1 >> (64 - 23))) ^ b14;
5246 
5247 	b8 += b5;
5248 	b5 = ((b5 << 31) | (b5 >> (64 - 31))) ^ b8;
5249 
5250 	b10 += b3;
5251 	b3 = ((b3 << 37) | (b3 >> (64 - 37))) ^ b10;
5252 
5253 	b12 += b7;
5254 	b7 = ((b7 << 20) | (b7 >> (64 - 20))) ^ b12;
5255 
5256 	b1 += k2;
5257 	b0 += b1 + k1;
5258 	b1 = ((b1 << 24) | (b1 >> (64 - 24))) ^ b0;
5259 
5260 	b3 += k4;
5261 	b2 += b3 + k3;
5262 	b3 = ((b3 << 13) | (b3 >> (64 - 13))) ^ b2;
5263 
5264 	b5 += k6;
5265 	b4 += b5 + k5;
5266 	b5 = ((b5 << 8) | (b5 >> (64 - 8))) ^ b4;
5267 
5268 	b7 += k8;
5269 	b6 += b7 + k7;
5270 	b7 = ((b7 << 47) | (b7 >> (64 - 47))) ^ b6;
5271 
5272 	b9 += k10;
5273 	b8 += b9 + k9;
5274 	b9 = ((b9 << 8) | (b9 >> (64 - 8))) ^ b8;
5275 
5276 	b11 += k12;
5277 	b10 += b11 + k11;
5278 	b11 = ((b11 << 17) | (b11 >> (64 - 17))) ^ b10;
5279 
5280 	b13 += k14 + t0;
5281 	b12 += b13 + k13;
5282 	b13 = ((b13 << 22) | (b13 >> (64 - 22))) ^ b12;
5283 
5284 	b15 += k16 + 18;
5285 	b14 += b15 + k15 + t1;
5286 	b15 = ((b15 << 37) | (b15 >> (64 - 37))) ^ b14;
5287 
5288 	b0 += b9;
5289 	b9 = ((b9 << 38) | (b9 >> (64 - 38))) ^ b0;
5290 
5291 	b2 += b13;
5292 	b13 = ((b13 << 19) | (b13 >> (64 - 19))) ^ b2;
5293 
5294 	b6 += b11;
5295 	b11 = ((b11 << 10) | (b11 >> (64 - 10))) ^ b6;
5296 
5297 	b4 += b15;
5298 	b15 = ((b15 << 55) | (b15 >> (64 - 55))) ^ b4;
5299 
5300 	b10 += b7;
5301 	b7 = ((b7 << 49) | (b7 >> (64 - 49))) ^ b10;
5302 
5303 	b12 += b3;
5304 	b3 = ((b3 << 18) | (b3 >> (64 - 18))) ^ b12;
5305 
5306 	b14 += b5;
5307 	b5 = ((b5 << 23) | (b5 >> (64 - 23))) ^ b14;
5308 
5309 	b8 += b1;
5310 	b1 = ((b1 << 52) | (b1 >> (64 - 52))) ^ b8;
5311 
5312 	b0 += b7;
5313 	b7 = ((b7 << 33) | (b7 >> (64 - 33))) ^ b0;
5314 
5315 	b2 += b5;
5316 	b5 = ((b5 << 4) | (b5 >> (64 - 4))) ^ b2;
5317 
5318 	b4 += b3;
5319 	b3 = ((b3 << 51) | (b3 >> (64 - 51))) ^ b4;
5320 
5321 	b6 += b1;
5322 	b1 = ((b1 << 13) | (b1 >> (64 - 13))) ^ b6;
5323 
5324 	b12 += b15;
5325 	b15 = ((b15 << 34) | (b15 >> (64 - 34))) ^ b12;
5326 
5327 	b14 += b13;
5328 	b13 = ((b13 << 41) | (b13 >> (64 - 41))) ^ b14;
5329 
5330 	b8 += b11;
5331 	b11 = ((b11 << 59) | (b11 >> (64 - 59))) ^ b8;
5332 
5333 	b10 += b9;
5334 	b9 = ((b9 << 17) | (b9 >> (64 - 17))) ^ b10;
5335 
5336 	b0 += b15;
5337 	b15 = ((b15 << 5) | (b15 >> (64 - 5))) ^ b0;
5338 
5339 	b2 += b11;
5340 	b11 = ((b11 << 20) | (b11 >> (64 - 20))) ^ b2;
5341 
5342 	b6 += b13;
5343 	b13 = ((b13 << 48) | (b13 >> (64 - 48))) ^ b6;
5344 
5345 	b4 += b9;
5346 	b9 = ((b9 << 41) | (b9 >> (64 - 41))) ^ b4;
5347 
5348 	b14 += b1;
5349 	b1 = ((b1 << 47) | (b1 >> (64 - 47))) ^ b14;
5350 
5351 	b8 += b5;
5352 	b5 = ((b5 << 28) | (b5 >> (64 - 28))) ^ b8;
5353 
5354 	b10 += b3;
5355 	b3 = ((b3 << 16) | (b3 >> (64 - 16))) ^ b10;
5356 
5357 	b12 += b7;
5358 	b7 = ((b7 << 25) | (b7 >> (64 - 25))) ^ b12;
5359 
5360 	b1 += k3;
5361 	b0 += b1 + k2;
5362 	b1 = ((b1 << 41) | (b1 >> (64 - 41))) ^ b0;
5363 
5364 	b3 += k5;
5365 	b2 += b3 + k4;
5366 	b3 = ((b3 << 9) | (b3 >> (64 - 9))) ^ b2;
5367 
5368 	b5 += k7;
5369 	b4 += b5 + k6;
5370 	b5 = ((b5 << 37) | (b5 >> (64 - 37))) ^ b4;
5371 
5372 	b7 += k9;
5373 	b6 += b7 + k8;
5374 	b7 = ((b7 << 31) | (b7 >> (64 - 31))) ^ b6;
5375 
5376 	b9 += k11;
5377 	b8 += b9 + k10;
5378 	b9 = ((b9 << 12) | (b9 >> (64 - 12))) ^ b8;
5379 
5380 	b11 += k13;
5381 	b10 += b11 + k12;
5382 	b11 = ((b11 << 47) | (b11 >> (64 - 47))) ^ b10;
5383 
5384 	b13 += k15 + t1;
5385 	b12 += b13 + k14;
5386 	b13 = ((b13 << 44) | (b13 >> (64 - 44))) ^ b12;
5387 
5388 	b15 += k0 + 19;
5389 	b14 += b15 + k16 + t2;
5390 	b15 = ((b15 << 30) | (b15 >> (64 - 30))) ^ b14;
5391 
5392 	b0 += b9;
5393 	b9 = ((b9 << 16) | (b9 >> (64 - 16))) ^ b0;
5394 
5395 	b2 += b13;
5396 	b13 = ((b13 << 34) | (b13 >> (64 - 34))) ^ b2;
5397 
5398 	b6 += b11;
5399 	b11 = ((b11 << 56) | (b11 >> (64 - 56))) ^ b6;
5400 
5401 	b4 += b15;
5402 	b15 = ((b15 << 51) | (b15 >> (64 - 51))) ^ b4;
5403 
5404 	b10 += b7;
5405 	b7 = ((b7 << 4) | (b7 >> (64 - 4))) ^ b10;
5406 
5407 	b12 += b3;
5408 	b3 = ((b3 << 53) | (b3 >> (64 - 53))) ^ b12;
5409 
5410 	b14 += b5;
5411 	b5 = ((b5 << 42) | (b5 >> (64 - 42))) ^ b14;
5412 
5413 	b8 += b1;
5414 	b1 = ((b1 << 41) | (b1 >> (64 - 41))) ^ b8;
5415 
5416 	b0 += b7;
5417 	b7 = ((b7 << 31) | (b7 >> (64 - 31))) ^ b0;
5418 
5419 	b2 += b5;
5420 	b5 = ((b5 << 44) | (b5 >> (64 - 44))) ^ b2;
5421 
5422 	b4 += b3;
5423 	b3 = ((b3 << 47) | (b3 >> (64 - 47))) ^ b4;
5424 
5425 	b6 += b1;
5426 	b1 = ((b1 << 46) | (b1 >> (64 - 46))) ^ b6;
5427 
5428 	b12 += b15;
5429 	b15 = ((b15 << 19) | (b15 >> (64 - 19))) ^ b12;
5430 
5431 	b14 += b13;
5432 	b13 = ((b13 << 42) | (b13 >> (64 - 42))) ^ b14;
5433 
5434 	b8 += b11;
5435 	b11 = ((b11 << 44) | (b11 >> (64 - 44))) ^ b8;
5436 
5437 	b10 += b9;
5438 	b9 = ((b9 << 25) | (b9 >> (64 - 25))) ^ b10;
5439 
5440 	b0 += b15;
5441 	b15 = ((b15 << 9) | (b15 >> (64 - 9))) ^ b0;
5442 
5443 	b2 += b11;
5444 	b11 = ((b11 << 48) | (b11 >> (64 - 48))) ^ b2;
5445 
5446 	b6 += b13;
5447 	b13 = ((b13 << 35) | (b13 >> (64 - 35))) ^ b6;
5448 
5449 	b4 += b9;
5450 	b9 = ((b9 << 52) | (b9 >> (64 - 52))) ^ b4;
5451 
5452 	b14 += b1;
5453 	b1 = ((b1 << 23) | (b1 >> (64 - 23))) ^ b14;
5454 
5455 	b8 += b5;
5456 	b5 = ((b5 << 31) | (b5 >> (64 - 31))) ^ b8;
5457 
5458 	b10 += b3;
5459 	b3 = ((b3 << 37) | (b3 >> (64 - 37))) ^ b10;
5460 
5461 	b12 += b7;
5462 	b7 = ((b7 << 20) | (b7 >> (64 - 20))) ^ b12;
5463 
5464 	output[0] = b0 + k3;
5465 	output[1] = b1 + k4;
5466 	output[2] = b2 + k5;
5467 	output[3] = b3 + k6;
5468 	output[4] = b4 + k7;
5469 	output[5] = b5 + k8;
5470 	output[6] = b6 + k9;
5471 	output[7] = b7 + k10;
5472 	output[8] = b8 + k11;
5473 	output[9] = b9 + k12;
5474 	output[10] = b10 + k13;
5475 	output[11] = b11 + k14;
5476 	output[12] = b12 + k15;
5477 	output[13] = b13 + k16 + t2;
5478 	output[14] = b14 + k0 + t0;
5479 	output[15] = b15 + k1 + 20;
5480 }
5481 
threefish_decrypt_1024(struct threefish_key * key_ctx,u64 * input,u64 * output)5482 void threefish_decrypt_1024(struct threefish_key *key_ctx, u64 *input,
5483 			    u64 *output)
5484 {
5485 	u64 b0 = input[0], b1 = input[1],
5486 	    b2 = input[2], b3 = input[3],
5487 	    b4 = input[4], b5 = input[5],
5488 	    b6 = input[6], b7 = input[7],
5489 	    b8 = input[8], b9 = input[9],
5490 	    b10 = input[10], b11 = input[11],
5491 	    b12 = input[12], b13 = input[13],
5492 	    b14 = input[14], b15 = input[15];
5493 	u64 k0 = key_ctx->key[0], k1 = key_ctx->key[1],
5494 	    k2 = key_ctx->key[2], k3 = key_ctx->key[3],
5495 	    k4 = key_ctx->key[4], k5 = key_ctx->key[5],
5496 	    k6 = key_ctx->key[6], k7 = key_ctx->key[7],
5497 	    k8 = key_ctx->key[8], k9 = key_ctx->key[9],
5498 	    k10 = key_ctx->key[10], k11 = key_ctx->key[11],
5499 	    k12 = key_ctx->key[12], k13 = key_ctx->key[13],
5500 	    k14 = key_ctx->key[14], k15 = key_ctx->key[15],
5501 	    k16 = key_ctx->key[16];
5502 	u64 t0 = key_ctx->tweak[0], t1 = key_ctx->tweak[1],
5503 	    t2 = key_ctx->tweak[2];
5504 	u64 tmp;
5505 
5506 	b0 -= k3;
5507 	b1 -= k4;
5508 	b2 -= k5;
5509 	b3 -= k6;
5510 	b4 -= k7;
5511 	b5 -= k8;
5512 	b6 -= k9;
5513 	b7 -= k10;
5514 	b8 -= k11;
5515 	b9 -= k12;
5516 	b10 -= k13;
5517 	b11 -= k14;
5518 	b12 -= k15;
5519 	b13 -= k16 + t2;
5520 	b14 -= k0 + t0;
5521 	b15 -= k1 + 20;
5522 	tmp = b7 ^ b12;
5523 	b7 = (tmp >> 20) | (tmp << (64 - 20));
5524 	b12 -= b7;
5525 
5526 	tmp = b3 ^ b10;
5527 	b3 = (tmp >> 37) | (tmp << (64 - 37));
5528 	b10 -= b3;
5529 
5530 	tmp = b5 ^ b8;
5531 	b5 = (tmp >> 31) | (tmp << (64 - 31));
5532 	b8 -= b5;
5533 
5534 	tmp = b1 ^ b14;
5535 	b1 = (tmp >> 23) | (tmp << (64 - 23));
5536 	b14 -= b1;
5537 
5538 	tmp = b9 ^ b4;
5539 	b9 = (tmp >> 52) | (tmp << (64 - 52));
5540 	b4 -= b9;
5541 
5542 	tmp = b13 ^ b6;
5543 	b13 = (tmp >> 35) | (tmp << (64 - 35));
5544 	b6 -= b13;
5545 
5546 	tmp = b11 ^ b2;
5547 	b11 = (tmp >> 48) | (tmp << (64 - 48));
5548 	b2 -= b11;
5549 
5550 	tmp = b15 ^ b0;
5551 	b15 = (tmp >> 9) | (tmp << (64 - 9));
5552 	b0 -= b15;
5553 
5554 	tmp = b9 ^ b10;
5555 	b9 = (tmp >> 25) | (tmp << (64 - 25));
5556 	b10 -= b9;
5557 
5558 	tmp = b11 ^ b8;
5559 	b11 = (tmp >> 44) | (tmp << (64 - 44));
5560 	b8 -= b11;
5561 
5562 	tmp = b13 ^ b14;
5563 	b13 = (tmp >> 42) | (tmp << (64 - 42));
5564 	b14 -= b13;
5565 
5566 	tmp = b15 ^ b12;
5567 	b15 = (tmp >> 19) | (tmp << (64 - 19));
5568 	b12 -= b15;
5569 
5570 	tmp = b1 ^ b6;
5571 	b1 = (tmp >> 46) | (tmp << (64 - 46));
5572 	b6 -= b1;
5573 
5574 	tmp = b3 ^ b4;
5575 	b3 = (tmp >> 47) | (tmp << (64 - 47));
5576 	b4 -= b3;
5577 
5578 	tmp = b5 ^ b2;
5579 	b5 = (tmp >> 44) | (tmp << (64 - 44));
5580 	b2 -= b5;
5581 
5582 	tmp = b7 ^ b0;
5583 	b7 = (tmp >> 31) | (tmp << (64 - 31));
5584 	b0 -= b7;
5585 
5586 	tmp = b1 ^ b8;
5587 	b1 = (tmp >> 41) | (tmp << (64 - 41));
5588 	b8 -= b1;
5589 
5590 	tmp = b5 ^ b14;
5591 	b5 = (tmp >> 42) | (tmp << (64 - 42));
5592 	b14 -= b5;
5593 
5594 	tmp = b3 ^ b12;
5595 	b3 = (tmp >> 53) | (tmp << (64 - 53));
5596 	b12 -= b3;
5597 
5598 	tmp = b7 ^ b10;
5599 	b7 = (tmp >> 4) | (tmp << (64 - 4));
5600 	b10 -= b7;
5601 
5602 	tmp = b15 ^ b4;
5603 	b15 = (tmp >> 51) | (tmp << (64 - 51));
5604 	b4 -= b15;
5605 
5606 	tmp = b11 ^ b6;
5607 	b11 = (tmp >> 56) | (tmp << (64 - 56));
5608 	b6 -= b11;
5609 
5610 	tmp = b13 ^ b2;
5611 	b13 = (tmp >> 34) | (tmp << (64 - 34));
5612 	b2 -= b13;
5613 
5614 	tmp = b9 ^ b0;
5615 	b9 = (tmp >> 16) | (tmp << (64 - 16));
5616 	b0 -= b9;
5617 
5618 	tmp = b15 ^ b14;
5619 	b15 = (tmp >> 30) | (tmp << (64 - 30));
5620 	b14 -= b15 + k16 + t2;
5621 	b15 -= k0 + 19;
5622 
5623 	tmp = b13 ^ b12;
5624 	b13 = (tmp >> 44) | (tmp << (64 - 44));
5625 	b12 -= b13 + k14;
5626 	b13 -= k15 + t1;
5627 
5628 	tmp = b11 ^ b10;
5629 	b11 = (tmp >> 47) | (tmp << (64 - 47));
5630 	b10 -= b11 + k12;
5631 	b11 -= k13;
5632 
5633 	tmp = b9 ^ b8;
5634 	b9 = (tmp >> 12) | (tmp << (64 - 12));
5635 	b8 -= b9 + k10;
5636 	b9 -= k11;
5637 
5638 	tmp = b7 ^ b6;
5639 	b7 = (tmp >> 31) | (tmp << (64 - 31));
5640 	b6 -= b7 + k8;
5641 	b7 -= k9;
5642 
5643 	tmp = b5 ^ b4;
5644 	b5 = (tmp >> 37) | (tmp << (64 - 37));
5645 	b4 -= b5 + k6;
5646 	b5 -= k7;
5647 
5648 	tmp = b3 ^ b2;
5649 	b3 = (tmp >> 9) | (tmp << (64 - 9));
5650 	b2 -= b3 + k4;
5651 	b3 -= k5;
5652 
5653 	tmp = b1 ^ b0;
5654 	b1 = (tmp >> 41) | (tmp << (64 - 41));
5655 	b0 -= b1 + k2;
5656 	b1 -= k3;
5657 
5658 	tmp = b7 ^ b12;
5659 	b7 = (tmp >> 25) | (tmp << (64 - 25));
5660 	b12 -= b7;
5661 
5662 	tmp = b3 ^ b10;
5663 	b3 = (tmp >> 16) | (tmp << (64 - 16));
5664 	b10 -= b3;
5665 
5666 	tmp = b5 ^ b8;
5667 	b5 = (tmp >> 28) | (tmp << (64 - 28));
5668 	b8 -= b5;
5669 
5670 	tmp = b1 ^ b14;
5671 	b1 = (tmp >> 47) | (tmp << (64 - 47));
5672 	b14 -= b1;
5673 
5674 	tmp = b9 ^ b4;
5675 	b9 = (tmp >> 41) | (tmp << (64 - 41));
5676 	b4 -= b9;
5677 
5678 	tmp = b13 ^ b6;
5679 	b13 = (tmp >> 48) | (tmp << (64 - 48));
5680 	b6 -= b13;
5681 
5682 	tmp = b11 ^ b2;
5683 	b11 = (tmp >> 20) | (tmp << (64 - 20));
5684 	b2 -= b11;
5685 
5686 	tmp = b15 ^ b0;
5687 	b15 = (tmp >> 5) | (tmp << (64 - 5));
5688 	b0 -= b15;
5689 
5690 	tmp = b9 ^ b10;
5691 	b9 = (tmp >> 17) | (tmp << (64 - 17));
5692 	b10 -= b9;
5693 
5694 	tmp = b11 ^ b8;
5695 	b11 = (tmp >> 59) | (tmp << (64 - 59));
5696 	b8 -= b11;
5697 
5698 	tmp = b13 ^ b14;
5699 	b13 = (tmp >> 41) | (tmp << (64 - 41));
5700 	b14 -= b13;
5701 
5702 	tmp = b15 ^ b12;
5703 	b15 = (tmp >> 34) | (tmp << (64 - 34));
5704 	b12 -= b15;
5705 
5706 	tmp = b1 ^ b6;
5707 	b1 = (tmp >> 13) | (tmp << (64 - 13));
5708 	b6 -= b1;
5709 
5710 	tmp = b3 ^ b4;
5711 	b3 = (tmp >> 51) | (tmp << (64 - 51));
5712 	b4 -= b3;
5713 
5714 	tmp = b5 ^ b2;
5715 	b5 = (tmp >> 4) | (tmp << (64 - 4));
5716 	b2 -= b5;
5717 
5718 	tmp = b7 ^ b0;
5719 	b7 = (tmp >> 33) | (tmp << (64 - 33));
5720 	b0 -= b7;
5721 
5722 	tmp = b1 ^ b8;
5723 	b1 = (tmp >> 52) | (tmp << (64 - 52));
5724 	b8 -= b1;
5725 
5726 	tmp = b5 ^ b14;
5727 	b5 = (tmp >> 23) | (tmp << (64 - 23));
5728 	b14 -= b5;
5729 
5730 	tmp = b3 ^ b12;
5731 	b3 = (tmp >> 18) | (tmp << (64 - 18));
5732 	b12 -= b3;
5733 
5734 	tmp = b7 ^ b10;
5735 	b7 = (tmp >> 49) | (tmp << (64 - 49));
5736 	b10 -= b7;
5737 
5738 	tmp = b15 ^ b4;
5739 	b15 = (tmp >> 55) | (tmp << (64 - 55));
5740 	b4 -= b15;
5741 
5742 	tmp = b11 ^ b6;
5743 	b11 = (tmp >> 10) | (tmp << (64 - 10));
5744 	b6 -= b11;
5745 
5746 	tmp = b13 ^ b2;
5747 	b13 = (tmp >> 19) | (tmp << (64 - 19));
5748 	b2 -= b13;
5749 
5750 	tmp = b9 ^ b0;
5751 	b9 = (tmp >> 38) | (tmp << (64 - 38));
5752 	b0 -= b9;
5753 
5754 	tmp = b15 ^ b14;
5755 	b15 = (tmp >> 37) | (tmp << (64 - 37));
5756 	b14 -= b15 + k15 + t1;
5757 	b15 -= k16 + 18;
5758 
5759 	tmp = b13 ^ b12;
5760 	b13 = (tmp >> 22) | (tmp << (64 - 22));
5761 	b12 -= b13 + k13;
5762 	b13 -= k14 + t0;
5763 
5764 	tmp = b11 ^ b10;
5765 	b11 = (tmp >> 17) | (tmp << (64 - 17));
5766 	b10 -= b11 + k11;
5767 	b11 -= k12;
5768 
5769 	tmp = b9 ^ b8;
5770 	b9 = (tmp >> 8) | (tmp << (64 - 8));
5771 	b8 -= b9 + k9;
5772 	b9 -= k10;
5773 
5774 	tmp = b7 ^ b6;
5775 	b7 = (tmp >> 47) | (tmp << (64 - 47));
5776 	b6 -= b7 + k7;
5777 	b7 -= k8;
5778 
5779 	tmp = b5 ^ b4;
5780 	b5 = (tmp >> 8) | (tmp << (64 - 8));
5781 	b4 -= b5 + k5;
5782 	b5 -= k6;
5783 
5784 	tmp = b3 ^ b2;
5785 	b3 = (tmp >> 13) | (tmp << (64 - 13));
5786 	b2 -= b3 + k3;
5787 	b3 -= k4;
5788 
5789 	tmp = b1 ^ b0;
5790 	b1 = (tmp >> 24) | (tmp << (64 - 24));
5791 	b0 -= b1 + k1;
5792 	b1 -= k2;
5793 
5794 	tmp = b7 ^ b12;
5795 	b7 = (tmp >> 20) | (tmp << (64 - 20));
5796 	b12 -= b7;
5797 
5798 	tmp = b3 ^ b10;
5799 	b3 = (tmp >> 37) | (tmp << (64 - 37));
5800 	b10 -= b3;
5801 
5802 	tmp = b5 ^ b8;
5803 	b5 = (tmp >> 31) | (tmp << (64 - 31));
5804 	b8 -= b5;
5805 
5806 	tmp = b1 ^ b14;
5807 	b1 = (tmp >> 23) | (tmp << (64 - 23));
5808 	b14 -= b1;
5809 
5810 	tmp = b9 ^ b4;
5811 	b9 = (tmp >> 52) | (tmp << (64 - 52));
5812 	b4 -= b9;
5813 
5814 	tmp = b13 ^ b6;
5815 	b13 = (tmp >> 35) | (tmp << (64 - 35));
5816 	b6 -= b13;
5817 
5818 	tmp = b11 ^ b2;
5819 	b11 = (tmp >> 48) | (tmp << (64 - 48));
5820 	b2 -= b11;
5821 
5822 	tmp = b15 ^ b0;
5823 	b15 = (tmp >> 9) | (tmp << (64 - 9));
5824 	b0 -= b15;
5825 
5826 	tmp = b9 ^ b10;
5827 	b9 = (tmp >> 25) | (tmp << (64 - 25));
5828 	b10 -= b9;
5829 
5830 	tmp = b11 ^ b8;
5831 	b11 = (tmp >> 44) | (tmp << (64 - 44));
5832 	b8 -= b11;
5833 
5834 	tmp = b13 ^ b14;
5835 	b13 = (tmp >> 42) | (tmp << (64 - 42));
5836 	b14 -= b13;
5837 
5838 	tmp = b15 ^ b12;
5839 	b15 = (tmp >> 19) | (tmp << (64 - 19));
5840 	b12 -= b15;
5841 
5842 	tmp = b1 ^ b6;
5843 	b1 = (tmp >> 46) | (tmp << (64 - 46));
5844 	b6 -= b1;
5845 
5846 	tmp = b3 ^ b4;
5847 	b3 = (tmp >> 47) | (tmp << (64 - 47));
5848 	b4 -= b3;
5849 
5850 	tmp = b5 ^ b2;
5851 	b5 = (tmp >> 44) | (tmp << (64 - 44));
5852 	b2 -= b5;
5853 
5854 	tmp = b7 ^ b0;
5855 	b7 = (tmp >> 31) | (tmp << (64 - 31));
5856 	b0 -= b7;
5857 
5858 	tmp = b1 ^ b8;
5859 	b1 = (tmp >> 41) | (tmp << (64 - 41));
5860 	b8 -= b1;
5861 
5862 	tmp = b5 ^ b14;
5863 	b5 = (tmp >> 42) | (tmp << (64 - 42));
5864 	b14 -= b5;
5865 
5866 	tmp = b3 ^ b12;
5867 	b3 = (tmp >> 53) | (tmp << (64 - 53));
5868 	b12 -= b3;
5869 
5870 	tmp = b7 ^ b10;
5871 	b7 = (tmp >> 4) | (tmp << (64 - 4));
5872 	b10 -= b7;
5873 
5874 	tmp = b15 ^ b4;
5875 	b15 = (tmp >> 51) | (tmp << (64 - 51));
5876 	b4 -= b15;
5877 
5878 	tmp = b11 ^ b6;
5879 	b11 = (tmp >> 56) | (tmp << (64 - 56));
5880 	b6 -= b11;
5881 
5882 	tmp = b13 ^ b2;
5883 	b13 = (tmp >> 34) | (tmp << (64 - 34));
5884 	b2 -= b13;
5885 
5886 	tmp = b9 ^ b0;
5887 	b9 = (tmp >> 16) | (tmp << (64 - 16));
5888 	b0 -= b9;
5889 
5890 	tmp = b15 ^ b14;
5891 	b15 = (tmp >> 30) | (tmp << (64 - 30));
5892 	b14 -= b15 + k14 + t0;
5893 	b15 -= k15 + 17;
5894 
5895 	tmp = b13 ^ b12;
5896 	b13 = (tmp >> 44) | (tmp << (64 - 44));
5897 	b12 -= b13 + k12;
5898 	b13 -= k13 + t2;
5899 
5900 	tmp = b11 ^ b10;
5901 	b11 = (tmp >> 47) | (tmp << (64 - 47));
5902 	b10 -= b11 + k10;
5903 	b11 -= k11;
5904 
5905 	tmp = b9 ^ b8;
5906 	b9 = (tmp >> 12) | (tmp << (64 - 12));
5907 	b8 -= b9 + k8;
5908 	b9 -= k9;
5909 
5910 	tmp = b7 ^ b6;
5911 	b7 = (tmp >> 31) | (tmp << (64 - 31));
5912 	b6 -= b7 + k6;
5913 	b7 -= k7;
5914 
5915 	tmp = b5 ^ b4;
5916 	b5 = (tmp >> 37) | (tmp << (64 - 37));
5917 	b4 -= b5 + k4;
5918 	b5 -= k5;
5919 
5920 	tmp = b3 ^ b2;
5921 	b3 = (tmp >> 9) | (tmp << (64 - 9));
5922 	b2 -= b3 + k2;
5923 	b3 -= k3;
5924 
5925 	tmp = b1 ^ b0;
5926 	b1 = (tmp >> 41) | (tmp << (64 - 41));
5927 	b0 -= b1 + k0;
5928 	b1 -= k1;
5929 
5930 	tmp = b7 ^ b12;
5931 	b7 = (tmp >> 25) | (tmp << (64 - 25));
5932 	b12 -= b7;
5933 
5934 	tmp = b3 ^ b10;
5935 	b3 = (tmp >> 16) | (tmp << (64 - 16));
5936 	b10 -= b3;
5937 
5938 	tmp = b5 ^ b8;
5939 	b5 = (tmp >> 28) | (tmp << (64 - 28));
5940 	b8 -= b5;
5941 
5942 	tmp = b1 ^ b14;
5943 	b1 = (tmp >> 47) | (tmp << (64 - 47));
5944 	b14 -= b1;
5945 
5946 	tmp = b9 ^ b4;
5947 	b9 = (tmp >> 41) | (tmp << (64 - 41));
5948 	b4 -= b9;
5949 
5950 	tmp = b13 ^ b6;
5951 	b13 = (tmp >> 48) | (tmp << (64 - 48));
5952 	b6 -= b13;
5953 
5954 	tmp = b11 ^ b2;
5955 	b11 = (tmp >> 20) | (tmp << (64 - 20));
5956 	b2 -= b11;
5957 
5958 	tmp = b15 ^ b0;
5959 	b15 = (tmp >> 5) | (tmp << (64 - 5));
5960 	b0 -= b15;
5961 
5962 	tmp = b9 ^ b10;
5963 	b9 = (tmp >> 17) | (tmp << (64 - 17));
5964 	b10 -= b9;
5965 
5966 	tmp = b11 ^ b8;
5967 	b11 = (tmp >> 59) | (tmp << (64 - 59));
5968 	b8 -= b11;
5969 
5970 	tmp = b13 ^ b14;
5971 	b13 = (tmp >> 41) | (tmp << (64 - 41));
5972 	b14 -= b13;
5973 
5974 	tmp = b15 ^ b12;
5975 	b15 = (tmp >> 34) | (tmp << (64 - 34));
5976 	b12 -= b15;
5977 
5978 	tmp = b1 ^ b6;
5979 	b1 = (tmp >> 13) | (tmp << (64 - 13));
5980 	b6 -= b1;
5981 
5982 	tmp = b3 ^ b4;
5983 	b3 = (tmp >> 51) | (tmp << (64 - 51));
5984 	b4 -= b3;
5985 
5986 	tmp = b5 ^ b2;
5987 	b5 = (tmp >> 4) | (tmp << (64 - 4));
5988 	b2 -= b5;
5989 
5990 	tmp = b7 ^ b0;
5991 	b7 = (tmp >> 33) | (tmp << (64 - 33));
5992 	b0 -= b7;
5993 
5994 	tmp = b1 ^ b8;
5995 	b1 = (tmp >> 52) | (tmp << (64 - 52));
5996 	b8 -= b1;
5997 
5998 	tmp = b5 ^ b14;
5999 	b5 = (tmp >> 23) | (tmp << (64 - 23));
6000 	b14 -= b5;
6001 
6002 	tmp = b3 ^ b12;
6003 	b3 = (tmp >> 18) | (tmp << (64 - 18));
6004 	b12 -= b3;
6005 
6006 	tmp = b7 ^ b10;
6007 	b7 = (tmp >> 49) | (tmp << (64 - 49));
6008 	b10 -= b7;
6009 
6010 	tmp = b15 ^ b4;
6011 	b15 = (tmp >> 55) | (tmp << (64 - 55));
6012 	b4 -= b15;
6013 
6014 	tmp = b11 ^ b6;
6015 	b11 = (tmp >> 10) | (tmp << (64 - 10));
6016 	b6 -= b11;
6017 
6018 	tmp = b13 ^ b2;
6019 	b13 = (tmp >> 19) | (tmp << (64 - 19));
6020 	b2 -= b13;
6021 
6022 	tmp = b9 ^ b0;
6023 	b9 = (tmp >> 38) | (tmp << (64 - 38));
6024 	b0 -= b9;
6025 
6026 	tmp = b15 ^ b14;
6027 	b15 = (tmp >> 37) | (tmp << (64 - 37));
6028 	b14 -= b15 + k13 + t2;
6029 	b15 -= k14 + 16;
6030 
6031 	tmp = b13 ^ b12;
6032 	b13 = (tmp >> 22) | (tmp << (64 - 22));
6033 	b12 -= b13 + k11;
6034 	b13 -= k12 + t1;
6035 
6036 	tmp = b11 ^ b10;
6037 	b11 = (tmp >> 17) | (tmp << (64 - 17));
6038 	b10 -= b11 + k9;
6039 	b11 -= k10;
6040 
6041 	tmp = b9 ^ b8;
6042 	b9 = (tmp >> 8) | (tmp << (64 - 8));
6043 	b8 -= b9 + k7;
6044 	b9 -= k8;
6045 
6046 	tmp = b7 ^ b6;
6047 	b7 = (tmp >> 47) | (tmp << (64 - 47));
6048 	b6 -= b7 + k5;
6049 	b7 -= k6;
6050 
6051 	tmp = b5 ^ b4;
6052 	b5 = (tmp >> 8) | (tmp << (64 - 8));
6053 	b4 -= b5 + k3;
6054 	b5 -= k4;
6055 
6056 	tmp = b3 ^ b2;
6057 	b3 = (tmp >> 13) | (tmp << (64 - 13));
6058 	b2 -= b3 + k1;
6059 	b3 -= k2;
6060 
6061 	tmp = b1 ^ b0;
6062 	b1 = (tmp >> 24) | (tmp << (64 - 24));
6063 	b0 -= b1 + k16;
6064 	b1 -= k0;
6065 
6066 	tmp = b7 ^ b12;
6067 	b7 = (tmp >> 20) | (tmp << (64 - 20));
6068 	b12 -= b7;
6069 
6070 	tmp = b3 ^ b10;
6071 	b3 = (tmp >> 37) | (tmp << (64 - 37));
6072 	b10 -= b3;
6073 
6074 	tmp = b5 ^ b8;
6075 	b5 = (tmp >> 31) | (tmp << (64 - 31));
6076 	b8 -= b5;
6077 
6078 	tmp = b1 ^ b14;
6079 	b1 = (tmp >> 23) | (tmp << (64 - 23));
6080 	b14 -= b1;
6081 
6082 	tmp = b9 ^ b4;
6083 	b9 = (tmp >> 52) | (tmp << (64 - 52));
6084 	b4 -= b9;
6085 
6086 	tmp = b13 ^ b6;
6087 	b13 = (tmp >> 35) | (tmp << (64 - 35));
6088 	b6 -= b13;
6089 
6090 	tmp = b11 ^ b2;
6091 	b11 = (tmp >> 48) | (tmp << (64 - 48));
6092 	b2 -= b11;
6093 
6094 	tmp = b15 ^ b0;
6095 	b15 = (tmp >> 9) | (tmp << (64 - 9));
6096 	b0 -= b15;
6097 
6098 	tmp = b9 ^ b10;
6099 	b9 = (tmp >> 25) | (tmp << (64 - 25));
6100 	b10 -= b9;
6101 
6102 	tmp = b11 ^ b8;
6103 	b11 = (tmp >> 44) | (tmp << (64 - 44));
6104 	b8 -= b11;
6105 
6106 	tmp = b13 ^ b14;
6107 	b13 = (tmp >> 42) | (tmp << (64 - 42));
6108 	b14 -= b13;
6109 
6110 	tmp = b15 ^ b12;
6111 	b15 = (tmp >> 19) | (tmp << (64 - 19));
6112 	b12 -= b15;
6113 
6114 	tmp = b1 ^ b6;
6115 	b1 = (tmp >> 46) | (tmp << (64 - 46));
6116 	b6 -= b1;
6117 
6118 	tmp = b3 ^ b4;
6119 	b3 = (tmp >> 47) | (tmp << (64 - 47));
6120 	b4 -= b3;
6121 
6122 	tmp = b5 ^ b2;
6123 	b5 = (tmp >> 44) | (tmp << (64 - 44));
6124 	b2 -= b5;
6125 
6126 	tmp = b7 ^ b0;
6127 	b7 = (tmp >> 31) | (tmp << (64 - 31));
6128 	b0 -= b7;
6129 
6130 	tmp = b1 ^ b8;
6131 	b1 = (tmp >> 41) | (tmp << (64 - 41));
6132 	b8 -= b1;
6133 
6134 	tmp = b5 ^ b14;
6135 	b5 = (tmp >> 42) | (tmp << (64 - 42));
6136 	b14 -= b5;
6137 
6138 	tmp = b3 ^ b12;
6139 	b3 = (tmp >> 53) | (tmp << (64 - 53));
6140 	b12 -= b3;
6141 
6142 	tmp = b7 ^ b10;
6143 	b7 = (tmp >> 4) | (tmp << (64 - 4));
6144 	b10 -= b7;
6145 
6146 	tmp = b15 ^ b4;
6147 	b15 = (tmp >> 51) | (tmp << (64 - 51));
6148 	b4 -= b15;
6149 
6150 	tmp = b11 ^ b6;
6151 	b11 = (tmp >> 56) | (tmp << (64 - 56));
6152 	b6 -= b11;
6153 
6154 	tmp = b13 ^ b2;
6155 	b13 = (tmp >> 34) | (tmp << (64 - 34));
6156 	b2 -= b13;
6157 
6158 	tmp = b9 ^ b0;
6159 	b9 = (tmp >> 16) | (tmp << (64 - 16));
6160 	b0 -= b9;
6161 
6162 	tmp = b15 ^ b14;
6163 	b15 = (tmp >> 30) | (tmp << (64 - 30));
6164 	b14 -= b15 + k12 + t1;
6165 	b15 -= k13 + 15;
6166 
6167 	tmp = b13 ^ b12;
6168 	b13 = (tmp >> 44) | (tmp << (64 - 44));
6169 	b12 -= b13 + k10;
6170 	b13 -= k11 + t0;
6171 
6172 	tmp = b11 ^ b10;
6173 	b11 = (tmp >> 47) | (tmp << (64 - 47));
6174 	b10 -= b11 + k8;
6175 	b11 -= k9;
6176 
6177 	tmp = b9 ^ b8;
6178 	b9 = (tmp >> 12) | (tmp << (64 - 12));
6179 	b8 -= b9 + k6;
6180 	b9 -= k7;
6181 
6182 	tmp = b7 ^ b6;
6183 	b7 = (tmp >> 31) | (tmp << (64 - 31));
6184 	b6 -= b7 + k4;
6185 	b7 -= k5;
6186 
6187 	tmp = b5 ^ b4;
6188 	b5 = (tmp >> 37) | (tmp << (64 - 37));
6189 	b4 -= b5 + k2;
6190 	b5 -= k3;
6191 
6192 	tmp = b3 ^ b2;
6193 	b3 = (tmp >> 9) | (tmp << (64 - 9));
6194 	b2 -= b3 + k0;
6195 	b3 -= k1;
6196 
6197 	tmp = b1 ^ b0;
6198 	b1 = (tmp >> 41) | (tmp << (64 - 41));
6199 	b0 -= b1 + k15;
6200 	b1 -= k16;
6201 
6202 	tmp = b7 ^ b12;
6203 	b7 = (tmp >> 25) | (tmp << (64 - 25));
6204 	b12 -= b7;
6205 
6206 	tmp = b3 ^ b10;
6207 	b3 = (tmp >> 16) | (tmp << (64 - 16));
6208 	b10 -= b3;
6209 
6210 	tmp = b5 ^ b8;
6211 	b5 = (tmp >> 28) | (tmp << (64 - 28));
6212 	b8 -= b5;
6213 
6214 	tmp = b1 ^ b14;
6215 	b1 = (tmp >> 47) | (tmp << (64 - 47));
6216 	b14 -= b1;
6217 
6218 	tmp = b9 ^ b4;
6219 	b9 = (tmp >> 41) | (tmp << (64 - 41));
6220 	b4 -= b9;
6221 
6222 	tmp = b13 ^ b6;
6223 	b13 = (tmp >> 48) | (tmp << (64 - 48));
6224 	b6 -= b13;
6225 
6226 	tmp = b11 ^ b2;
6227 	b11 = (tmp >> 20) | (tmp << (64 - 20));
6228 	b2 -= b11;
6229 
6230 	tmp = b15 ^ b0;
6231 	b15 = (tmp >> 5) | (tmp << (64 - 5));
6232 	b0 -= b15;
6233 
6234 	tmp = b9 ^ b10;
6235 	b9 = (tmp >> 17) | (tmp << (64 - 17));
6236 	b10 -= b9;
6237 
6238 	tmp = b11 ^ b8;
6239 	b11 = (tmp >> 59) | (tmp << (64 - 59));
6240 	b8 -= b11;
6241 
6242 	tmp = b13 ^ b14;
6243 	b13 = (tmp >> 41) | (tmp << (64 - 41));
6244 	b14 -= b13;
6245 
6246 	tmp = b15 ^ b12;
6247 	b15 = (tmp >> 34) | (tmp << (64 - 34));
6248 	b12 -= b15;
6249 
6250 	tmp = b1 ^ b6;
6251 	b1 = (tmp >> 13) | (tmp << (64 - 13));
6252 	b6 -= b1;
6253 
6254 	tmp = b3 ^ b4;
6255 	b3 = (tmp >> 51) | (tmp << (64 - 51));
6256 	b4 -= b3;
6257 
6258 	tmp = b5 ^ b2;
6259 	b5 = (tmp >> 4) | (tmp << (64 - 4));
6260 	b2 -= b5;
6261 
6262 	tmp = b7 ^ b0;
6263 	b7 = (tmp >> 33) | (tmp << (64 - 33));
6264 	b0 -= b7;
6265 
6266 	tmp = b1 ^ b8;
6267 	b1 = (tmp >> 52) | (tmp << (64 - 52));
6268 	b8 -= b1;
6269 
6270 	tmp = b5 ^ b14;
6271 	b5 = (tmp >> 23) | (tmp << (64 - 23));
6272 	b14 -= b5;
6273 
6274 	tmp = b3 ^ b12;
6275 	b3 = (tmp >> 18) | (tmp << (64 - 18));
6276 	b12 -= b3;
6277 
6278 	tmp = b7 ^ b10;
6279 	b7 = (tmp >> 49) | (tmp << (64 - 49));
6280 	b10 -= b7;
6281 
6282 	tmp = b15 ^ b4;
6283 	b15 = (tmp >> 55) | (tmp << (64 - 55));
6284 	b4 -= b15;
6285 
6286 	tmp = b11 ^ b6;
6287 	b11 = (tmp >> 10) | (tmp << (64 - 10));
6288 	b6 -= b11;
6289 
6290 	tmp = b13 ^ b2;
6291 	b13 = (tmp >> 19) | (tmp << (64 - 19));
6292 	b2 -= b13;
6293 
6294 	tmp = b9 ^ b0;
6295 	b9 = (tmp >> 38) | (tmp << (64 - 38));
6296 	b0 -= b9;
6297 
6298 	tmp = b15 ^ b14;
6299 	b15 = (tmp >> 37) | (tmp << (64 - 37));
6300 	b14 -= b15 + k11 + t0;
6301 	b15 -= k12 + 14;
6302 
6303 	tmp = b13 ^ b12;
6304 	b13 = (tmp >> 22) | (tmp << (64 - 22));
6305 	b12 -= b13 + k9;
6306 	b13 -= k10 + t2;
6307 
6308 	tmp = b11 ^ b10;
6309 	b11 = (tmp >> 17) | (tmp << (64 - 17));
6310 	b10 -= b11 + k7;
6311 	b11 -= k8;
6312 
6313 	tmp = b9 ^ b8;
6314 	b9 = (tmp >> 8) | (tmp << (64 - 8));
6315 	b8 -= b9 + k5;
6316 	b9 -= k6;
6317 
6318 	tmp = b7 ^ b6;
6319 	b7 = (tmp >> 47) | (tmp << (64 - 47));
6320 	b6 -= b7 + k3;
6321 	b7 -= k4;
6322 
6323 	tmp = b5 ^ b4;
6324 	b5 = (tmp >> 8) | (tmp << (64 - 8));
6325 	b4 -= b5 + k1;
6326 	b5 -= k2;
6327 
6328 	tmp = b3 ^ b2;
6329 	b3 = (tmp >> 13) | (tmp << (64 - 13));
6330 	b2 -= b3 + k16;
6331 	b3 -= k0;
6332 
6333 	tmp = b1 ^ b0;
6334 	b1 = (tmp >> 24) | (tmp << (64 - 24));
6335 	b0 -= b1 + k14;
6336 	b1 -= k15;
6337 
6338 	tmp = b7 ^ b12;
6339 	b7 = (tmp >> 20) | (tmp << (64 - 20));
6340 	b12 -= b7;
6341 
6342 	tmp = b3 ^ b10;
6343 	b3 = (tmp >> 37) | (tmp << (64 - 37));
6344 	b10 -= b3;
6345 
6346 	tmp = b5 ^ b8;
6347 	b5 = (tmp >> 31) | (tmp << (64 - 31));
6348 	b8 -= b5;
6349 
6350 	tmp = b1 ^ b14;
6351 	b1 = (tmp >> 23) | (tmp << (64 - 23));
6352 	b14 -= b1;
6353 
6354 	tmp = b9 ^ b4;
6355 	b9 = (tmp >> 52) | (tmp << (64 - 52));
6356 	b4 -= b9;
6357 
6358 	tmp = b13 ^ b6;
6359 	b13 = (tmp >> 35) | (tmp << (64 - 35));
6360 	b6 -= b13;
6361 
6362 	tmp = b11 ^ b2;
6363 	b11 = (tmp >> 48) | (tmp << (64 - 48));
6364 	b2 -= b11;
6365 
6366 	tmp = b15 ^ b0;
6367 	b15 = (tmp >> 9) | (tmp << (64 - 9));
6368 	b0 -= b15;
6369 
6370 	tmp = b9 ^ b10;
6371 	b9 = (tmp >> 25) | (tmp << (64 - 25));
6372 	b10 -= b9;
6373 
6374 	tmp = b11 ^ b8;
6375 	b11 = (tmp >> 44) | (tmp << (64 - 44));
6376 	b8 -= b11;
6377 
6378 	tmp = b13 ^ b14;
6379 	b13 = (tmp >> 42) | (tmp << (64 - 42));
6380 	b14 -= b13;
6381 
6382 	tmp = b15 ^ b12;
6383 	b15 = (tmp >> 19) | (tmp << (64 - 19));
6384 	b12 -= b15;
6385 
6386 	tmp = b1 ^ b6;
6387 	b1 = (tmp >> 46) | (tmp << (64 - 46));
6388 	b6 -= b1;
6389 
6390 	tmp = b3 ^ b4;
6391 	b3 = (tmp >> 47) | (tmp << (64 - 47));
6392 	b4 -= b3;
6393 
6394 	tmp = b5 ^ b2;
6395 	b5 = (tmp >> 44) | (tmp << (64 - 44));
6396 	b2 -= b5;
6397 
6398 	tmp = b7 ^ b0;
6399 	b7 = (tmp >> 31) | (tmp << (64 - 31));
6400 	b0 -= b7;
6401 
6402 	tmp = b1 ^ b8;
6403 	b1 = (tmp >> 41) | (tmp << (64 - 41));
6404 	b8 -= b1;
6405 
6406 	tmp = b5 ^ b14;
6407 	b5 = (tmp >> 42) | (tmp << (64 - 42));
6408 	b14 -= b5;
6409 
6410 	tmp = b3 ^ b12;
6411 	b3 = (tmp >> 53) | (tmp << (64 - 53));
6412 	b12 -= b3;
6413 
6414 	tmp = b7 ^ b10;
6415 	b7 = (tmp >> 4) | (tmp << (64 - 4));
6416 	b10 -= b7;
6417 
6418 	tmp = b15 ^ b4;
6419 	b15 = (tmp >> 51) | (tmp << (64 - 51));
6420 	b4 -= b15;
6421 
6422 	tmp = b11 ^ b6;
6423 	b11 = (tmp >> 56) | (tmp << (64 - 56));
6424 	b6 -= b11;
6425 
6426 	tmp = b13 ^ b2;
6427 	b13 = (tmp >> 34) | (tmp << (64 - 34));
6428 	b2 -= b13;
6429 
6430 	tmp = b9 ^ b0;
6431 	b9 = (tmp >> 16) | (tmp << (64 - 16));
6432 	b0 -= b9;
6433 
6434 	tmp = b15 ^ b14;
6435 	b15 = (tmp >> 30) | (tmp << (64 - 30));
6436 	b14 -= b15 + k10 + t2;
6437 	b15 -= k11 + 13;
6438 
6439 	tmp = b13 ^ b12;
6440 	b13 = (tmp >> 44) | (tmp << (64 - 44));
6441 	b12 -= b13 + k8;
6442 	b13 -= k9 + t1;
6443 
6444 	tmp = b11 ^ b10;
6445 	b11 = (tmp >> 47) | (tmp << (64 - 47));
6446 	b10 -= b11 + k6;
6447 	b11 -= k7;
6448 
6449 	tmp = b9 ^ b8;
6450 	b9 = (tmp >> 12) | (tmp << (64 - 12));
6451 	b8 -= b9 + k4;
6452 	b9 -= k5;
6453 
6454 	tmp = b7 ^ b6;
6455 	b7 = (tmp >> 31) | (tmp << (64 - 31));
6456 	b6 -= b7 + k2;
6457 	b7 -= k3;
6458 
6459 	tmp = b5 ^ b4;
6460 	b5 = (tmp >> 37) | (tmp << (64 - 37));
6461 	b4 -= b5 + k0;
6462 	b5 -= k1;
6463 
6464 	tmp = b3 ^ b2;
6465 	b3 = (tmp >> 9) | (tmp << (64 - 9));
6466 	b2 -= b3 + k15;
6467 	b3 -= k16;
6468 
6469 	tmp = b1 ^ b0;
6470 	b1 = (tmp >> 41) | (tmp << (64 - 41));
6471 	b0 -= b1 + k13;
6472 	b1 -= k14;
6473 
6474 	tmp = b7 ^ b12;
6475 	b7 = (tmp >> 25) | (tmp << (64 - 25));
6476 	b12 -= b7;
6477 
6478 	tmp = b3 ^ b10;
6479 	b3 = (tmp >> 16) | (tmp << (64 - 16));
6480 	b10 -= b3;
6481 
6482 	tmp = b5 ^ b8;
6483 	b5 = (tmp >> 28) | (tmp << (64 - 28));
6484 	b8 -= b5;
6485 
6486 	tmp = b1 ^ b14;
6487 	b1 = (tmp >> 47) | (tmp << (64 - 47));
6488 	b14 -= b1;
6489 
6490 	tmp = b9 ^ b4;
6491 	b9 = (tmp >> 41) | (tmp << (64 - 41));
6492 	b4 -= b9;
6493 
6494 	tmp = b13 ^ b6;
6495 	b13 = (tmp >> 48) | (tmp << (64 - 48));
6496 	b6 -= b13;
6497 
6498 	tmp = b11 ^ b2;
6499 	b11 = (tmp >> 20) | (tmp << (64 - 20));
6500 	b2 -= b11;
6501 
6502 	tmp = b15 ^ b0;
6503 	b15 = (tmp >> 5) | (tmp << (64 - 5));
6504 	b0 -= b15;
6505 
6506 	tmp = b9 ^ b10;
6507 	b9 = (tmp >> 17) | (tmp << (64 - 17));
6508 	b10 -= b9;
6509 
6510 	tmp = b11 ^ b8;
6511 	b11 = (tmp >> 59) | (tmp << (64 - 59));
6512 	b8 -= b11;
6513 
6514 	tmp = b13 ^ b14;
6515 	b13 = (tmp >> 41) | (tmp << (64 - 41));
6516 	b14 -= b13;
6517 
6518 	tmp = b15 ^ b12;
6519 	b15 = (tmp >> 34) | (tmp << (64 - 34));
6520 	b12 -= b15;
6521 
6522 	tmp = b1 ^ b6;
6523 	b1 = (tmp >> 13) | (tmp << (64 - 13));
6524 	b6 -= b1;
6525 
6526 	tmp = b3 ^ b4;
6527 	b3 = (tmp >> 51) | (tmp << (64 - 51));
6528 	b4 -= b3;
6529 
6530 	tmp = b5 ^ b2;
6531 	b5 = (tmp >> 4) | (tmp << (64 - 4));
6532 	b2 -= b5;
6533 
6534 	tmp = b7 ^ b0;
6535 	b7 = (tmp >> 33) | (tmp << (64 - 33));
6536 	b0 -= b7;
6537 
6538 	tmp = b1 ^ b8;
6539 	b1 = (tmp >> 52) | (tmp << (64 - 52));
6540 	b8 -= b1;
6541 
6542 	tmp = b5 ^ b14;
6543 	b5 = (tmp >> 23) | (tmp << (64 - 23));
6544 	b14 -= b5;
6545 
6546 	tmp = b3 ^ b12;
6547 	b3 = (tmp >> 18) | (tmp << (64 - 18));
6548 	b12 -= b3;
6549 
6550 	tmp = b7 ^ b10;
6551 	b7 = (tmp >> 49) | (tmp << (64 - 49));
6552 	b10 -= b7;
6553 
6554 	tmp = b15 ^ b4;
6555 	b15 = (tmp >> 55) | (tmp << (64 - 55));
6556 	b4 -= b15;
6557 
6558 	tmp = b11 ^ b6;
6559 	b11 = (tmp >> 10) | (tmp << (64 - 10));
6560 	b6 -= b11;
6561 
6562 	tmp = b13 ^ b2;
6563 	b13 = (tmp >> 19) | (tmp << (64 - 19));
6564 	b2 -= b13;
6565 
6566 	tmp = b9 ^ b0;
6567 	b9 = (tmp >> 38) | (tmp << (64 - 38));
6568 	b0 -= b9;
6569 
6570 	tmp = b15 ^ b14;
6571 	b15 = (tmp >> 37) | (tmp << (64 - 37));
6572 	b14 -= b15 + k9 + t1;
6573 	b15 -= k10 + 12;
6574 
6575 	tmp = b13 ^ b12;
6576 	b13 = (tmp >> 22) | (tmp << (64 - 22));
6577 	b12 -= b13 + k7;
6578 	b13 -= k8 + t0;
6579 
6580 	tmp = b11 ^ b10;
6581 	b11 = (tmp >> 17) | (tmp << (64 - 17));
6582 	b10 -= b11 + k5;
6583 	b11 -= k6;
6584 
6585 	tmp = b9 ^ b8;
6586 	b9 = (tmp >> 8) | (tmp << (64 - 8));
6587 	b8 -= b9 + k3;
6588 	b9 -= k4;
6589 
6590 	tmp = b7 ^ b6;
6591 	b7 = (tmp >> 47) | (tmp << (64 - 47));
6592 	b6 -= b7 + k1;
6593 	b7 -= k2;
6594 
6595 	tmp = b5 ^ b4;
6596 	b5 = (tmp >> 8) | (tmp << (64 - 8));
6597 	b4 -= b5 + k16;
6598 	b5 -= k0;
6599 
6600 	tmp = b3 ^ b2;
6601 	b3 = (tmp >> 13) | (tmp << (64 - 13));
6602 	b2 -= b3 + k14;
6603 	b3 -= k15;
6604 
6605 	tmp = b1 ^ b0;
6606 	b1 = (tmp >> 24) | (tmp << (64 - 24));
6607 	b0 -= b1 + k12;
6608 	b1 -= k13;
6609 
6610 	tmp = b7 ^ b12;
6611 	b7 = (tmp >> 20) | (tmp << (64 - 20));
6612 	b12 -= b7;
6613 
6614 	tmp = b3 ^ b10;
6615 	b3 = (tmp >> 37) | (tmp << (64 - 37));
6616 	b10 -= b3;
6617 
6618 	tmp = b5 ^ b8;
6619 	b5 = (tmp >> 31) | (tmp << (64 - 31));
6620 	b8 -= b5;
6621 
6622 	tmp = b1 ^ b14;
6623 	b1 = (tmp >> 23) | (tmp << (64 - 23));
6624 	b14 -= b1;
6625 
6626 	tmp = b9 ^ b4;
6627 	b9 = (tmp >> 52) | (tmp << (64 - 52));
6628 	b4 -= b9;
6629 
6630 	tmp = b13 ^ b6;
6631 	b13 = (tmp >> 35) | (tmp << (64 - 35));
6632 	b6 -= b13;
6633 
6634 	tmp = b11 ^ b2;
6635 	b11 = (tmp >> 48) | (tmp << (64 - 48));
6636 	b2 -= b11;
6637 
6638 	tmp = b15 ^ b0;
6639 	b15 = (tmp >> 9) | (tmp << (64 - 9));
6640 	b0 -= b15;
6641 
6642 	tmp = b9 ^ b10;
6643 	b9 = (tmp >> 25) | (tmp << (64 - 25));
6644 	b10 -= b9;
6645 
6646 	tmp = b11 ^ b8;
6647 	b11 = (tmp >> 44) | (tmp << (64 - 44));
6648 	b8 -= b11;
6649 
6650 	tmp = b13 ^ b14;
6651 	b13 = (tmp >> 42) | (tmp << (64 - 42));
6652 	b14 -= b13;
6653 
6654 	tmp = b15 ^ b12;
6655 	b15 = (tmp >> 19) | (tmp << (64 - 19));
6656 	b12 -= b15;
6657 
6658 	tmp = b1 ^ b6;
6659 	b1 = (tmp >> 46) | (tmp << (64 - 46));
6660 	b6 -= b1;
6661 
6662 	tmp = b3 ^ b4;
6663 	b3 = (tmp >> 47) | (tmp << (64 - 47));
6664 	b4 -= b3;
6665 
6666 	tmp = b5 ^ b2;
6667 	b5 = (tmp >> 44) | (tmp << (64 - 44));
6668 	b2 -= b5;
6669 
6670 	tmp = b7 ^ b0;
6671 	b7 = (tmp >> 31) | (tmp << (64 - 31));
6672 	b0 -= b7;
6673 
6674 	tmp = b1 ^ b8;
6675 	b1 = (tmp >> 41) | (tmp << (64 - 41));
6676 	b8 -= b1;
6677 
6678 	tmp = b5 ^ b14;
6679 	b5 = (tmp >> 42) | (tmp << (64 - 42));
6680 	b14 -= b5;
6681 
6682 	tmp = b3 ^ b12;
6683 	b3 = (tmp >> 53) | (tmp << (64 - 53));
6684 	b12 -= b3;
6685 
6686 	tmp = b7 ^ b10;
6687 	b7 = (tmp >> 4) | (tmp << (64 - 4));
6688 	b10 -= b7;
6689 
6690 	tmp = b15 ^ b4;
6691 	b15 = (tmp >> 51) | (tmp << (64 - 51));
6692 	b4 -= b15;
6693 
6694 	tmp = b11 ^ b6;
6695 	b11 = (tmp >> 56) | (tmp << (64 - 56));
6696 	b6 -= b11;
6697 
6698 	tmp = b13 ^ b2;
6699 	b13 = (tmp >> 34) | (tmp << (64 - 34));
6700 	b2 -= b13;
6701 
6702 	tmp = b9 ^ b0;
6703 	b9 = (tmp >> 16) | (tmp << (64 - 16));
6704 	b0 -= b9;
6705 
6706 	tmp = b15 ^ b14;
6707 	b15 = (tmp >> 30) | (tmp << (64 - 30));
6708 	b14 -= b15 + k8 + t0;
6709 	b15 -= k9 + 11;
6710 
6711 	tmp = b13 ^ b12;
6712 	b13 = (tmp >> 44) | (tmp << (64 - 44));
6713 	b12 -= b13 + k6;
6714 	b13 -= k7 + t2;
6715 
6716 	tmp = b11 ^ b10;
6717 	b11 = (tmp >> 47) | (tmp << (64 - 47));
6718 	b10 -= b11 + k4;
6719 	b11 -= k5;
6720 
6721 	tmp = b9 ^ b8;
6722 	b9 = (tmp >> 12) | (tmp << (64 - 12));
6723 	b8 -= b9 + k2;
6724 	b9 -= k3;
6725 
6726 	tmp = b7 ^ b6;
6727 	b7 = (tmp >> 31) | (tmp << (64 - 31));
6728 	b6 -= b7 + k0;
6729 	b7 -= k1;
6730 
6731 	tmp = b5 ^ b4;
6732 	b5 = (tmp >> 37) | (tmp << (64 - 37));
6733 	b4 -= b5 + k15;
6734 	b5 -= k16;
6735 
6736 	tmp = b3 ^ b2;
6737 	b3 = (tmp >> 9) | (tmp << (64 - 9));
6738 	b2 -= b3 + k13;
6739 	b3 -= k14;
6740 
6741 	tmp = b1 ^ b0;
6742 	b1 = (tmp >> 41) | (tmp << (64 - 41));
6743 	b0 -= b1 + k11;
6744 	b1 -= k12;
6745 
6746 	tmp = b7 ^ b12;
6747 	b7 = (tmp >> 25) | (tmp << (64 - 25));
6748 	b12 -= b7;
6749 
6750 	tmp = b3 ^ b10;
6751 	b3 = (tmp >> 16) | (tmp << (64 - 16));
6752 	b10 -= b3;
6753 
6754 	tmp = b5 ^ b8;
6755 	b5 = (tmp >> 28) | (tmp << (64 - 28));
6756 	b8 -= b5;
6757 
6758 	tmp = b1 ^ b14;
6759 	b1 = (tmp >> 47) | (tmp << (64 - 47));
6760 	b14 -= b1;
6761 
6762 	tmp = b9 ^ b4;
6763 	b9 = (tmp >> 41) | (tmp << (64 - 41));
6764 	b4 -= b9;
6765 
6766 	tmp = b13 ^ b6;
6767 	b13 = (tmp >> 48) | (tmp << (64 - 48));
6768 	b6 -= b13;
6769 
6770 	tmp = b11 ^ b2;
6771 	b11 = (tmp >> 20) | (tmp << (64 - 20));
6772 	b2 -= b11;
6773 
6774 	tmp = b15 ^ b0;
6775 	b15 = (tmp >> 5) | (tmp << (64 - 5));
6776 	b0 -= b15;
6777 
6778 	tmp = b9 ^ b10;
6779 	b9 = (tmp >> 17) | (tmp << (64 - 17));
6780 	b10 -= b9;
6781 
6782 	tmp = b11 ^ b8;
6783 	b11 = (tmp >> 59) | (tmp << (64 - 59));
6784 	b8 -= b11;
6785 
6786 	tmp = b13 ^ b14;
6787 	b13 = (tmp >> 41) | (tmp << (64 - 41));
6788 	b14 -= b13;
6789 
6790 	tmp = b15 ^ b12;
6791 	b15 = (tmp >> 34) | (tmp << (64 - 34));
6792 	b12 -= b15;
6793 
6794 	tmp = b1 ^ b6;
6795 	b1 = (tmp >> 13) | (tmp << (64 - 13));
6796 	b6 -= b1;
6797 
6798 	tmp = b3 ^ b4;
6799 	b3 = (tmp >> 51) | (tmp << (64 - 51));
6800 	b4 -= b3;
6801 
6802 	tmp = b5 ^ b2;
6803 	b5 = (tmp >> 4) | (tmp << (64 - 4));
6804 	b2 -= b5;
6805 
6806 	tmp = b7 ^ b0;
6807 	b7 = (tmp >> 33) | (tmp << (64 - 33));
6808 	b0 -= b7;
6809 
6810 	tmp = b1 ^ b8;
6811 	b1 = (tmp >> 52) | (tmp << (64 - 52));
6812 	b8 -= b1;
6813 
6814 	tmp = b5 ^ b14;
6815 	b5 = (tmp >> 23) | (tmp << (64 - 23));
6816 	b14 -= b5;
6817 
6818 	tmp = b3 ^ b12;
6819 	b3 = (tmp >> 18) | (tmp << (64 - 18));
6820 	b12 -= b3;
6821 
6822 	tmp = b7 ^ b10;
6823 	b7 = (tmp >> 49) | (tmp << (64 - 49));
6824 	b10 -= b7;
6825 
6826 	tmp = b15 ^ b4;
6827 	b15 = (tmp >> 55) | (tmp << (64 - 55));
6828 	b4 -= b15;
6829 
6830 	tmp = b11 ^ b6;
6831 	b11 = (tmp >> 10) | (tmp << (64 - 10));
6832 	b6 -= b11;
6833 
6834 	tmp = b13 ^ b2;
6835 	b13 = (tmp >> 19) | (tmp << (64 - 19));
6836 	b2 -= b13;
6837 
6838 	tmp = b9 ^ b0;
6839 	b9 = (tmp >> 38) | (tmp << (64 - 38));
6840 	b0 -= b9;
6841 
6842 	tmp = b15 ^ b14;
6843 	b15 = (tmp >> 37) | (tmp << (64 - 37));
6844 	b14 -= b15 + k7 + t2;
6845 	b15 -= k8 + 10;
6846 
6847 	tmp = b13 ^ b12;
6848 	b13 = (tmp >> 22) | (tmp << (64 - 22));
6849 	b12 -= b13 + k5;
6850 	b13 -= k6 + t1;
6851 
6852 	tmp = b11 ^ b10;
6853 	b11 = (tmp >> 17) | (tmp << (64 - 17));
6854 	b10 -= b11 + k3;
6855 	b11 -= k4;
6856 
6857 	tmp = b9 ^ b8;
6858 	b9 = (tmp >> 8) | (tmp << (64 - 8));
6859 	b8 -= b9 + k1;
6860 	b9 -= k2;
6861 
6862 	tmp = b7 ^ b6;
6863 	b7 = (tmp >> 47) | (tmp << (64 - 47));
6864 	b6 -= b7 + k16;
6865 	b7 -= k0;
6866 
6867 	tmp = b5 ^ b4;
6868 	b5 = (tmp >> 8) | (tmp << (64 - 8));
6869 	b4 -= b5 + k14;
6870 	b5 -= k15;
6871 
6872 	tmp = b3 ^ b2;
6873 	b3 = (tmp >> 13) | (tmp << (64 - 13));
6874 	b2 -= b3 + k12;
6875 	b3 -= k13;
6876 
6877 	tmp = b1 ^ b0;
6878 	b1 = (tmp >> 24) | (tmp << (64 - 24));
6879 	b0 -= b1 + k10;
6880 	b1 -= k11;
6881 
6882 	tmp = b7 ^ b12;
6883 	b7 = (tmp >> 20) | (tmp << (64 - 20));
6884 	b12 -= b7;
6885 
6886 	tmp = b3 ^ b10;
6887 	b3 = (tmp >> 37) | (tmp << (64 - 37));
6888 	b10 -= b3;
6889 
6890 	tmp = b5 ^ b8;
6891 	b5 = (tmp >> 31) | (tmp << (64 - 31));
6892 	b8 -= b5;
6893 
6894 	tmp = b1 ^ b14;
6895 	b1 = (tmp >> 23) | (tmp << (64 - 23));
6896 	b14 -= b1;
6897 
6898 	tmp = b9 ^ b4;
6899 	b9 = (tmp >> 52) | (tmp << (64 - 52));
6900 	b4 -= b9;
6901 
6902 	tmp = b13 ^ b6;
6903 	b13 = (tmp >> 35) | (tmp << (64 - 35));
6904 	b6 -= b13;
6905 
6906 	tmp = b11 ^ b2;
6907 	b11 = (tmp >> 48) | (tmp << (64 - 48));
6908 	b2 -= b11;
6909 
6910 	tmp = b15 ^ b0;
6911 	b15 = (tmp >> 9) | (tmp << (64 - 9));
6912 	b0 -= b15;
6913 
6914 	tmp = b9 ^ b10;
6915 	b9 = (tmp >> 25) | (tmp << (64 - 25));
6916 	b10 -= b9;
6917 
6918 	tmp = b11 ^ b8;
6919 	b11 = (tmp >> 44) | (tmp << (64 - 44));
6920 	b8 -= b11;
6921 
6922 	tmp = b13 ^ b14;
6923 	b13 = (tmp >> 42) | (tmp << (64 - 42));
6924 	b14 -= b13;
6925 
6926 	tmp = b15 ^ b12;
6927 	b15 = (tmp >> 19) | (tmp << (64 - 19));
6928 	b12 -= b15;
6929 
6930 	tmp = b1 ^ b6;
6931 	b1 = (tmp >> 46) | (tmp << (64 - 46));
6932 	b6 -= b1;
6933 
6934 	tmp = b3 ^ b4;
6935 	b3 = (tmp >> 47) | (tmp << (64 - 47));
6936 	b4 -= b3;
6937 
6938 	tmp = b5 ^ b2;
6939 	b5 = (tmp >> 44) | (tmp << (64 - 44));
6940 	b2 -= b5;
6941 
6942 	tmp = b7 ^ b0;
6943 	b7 = (tmp >> 31) | (tmp << (64 - 31));
6944 	b0 -= b7;
6945 
6946 	tmp = b1 ^ b8;
6947 	b1 = (tmp >> 41) | (tmp << (64 - 41));
6948 	b8 -= b1;
6949 
6950 	tmp = b5 ^ b14;
6951 	b5 = (tmp >> 42) | (tmp << (64 - 42));
6952 	b14 -= b5;
6953 
6954 	tmp = b3 ^ b12;
6955 	b3 = (tmp >> 53) | (tmp << (64 - 53));
6956 	b12 -= b3;
6957 
6958 	tmp = b7 ^ b10;
6959 	b7 = (tmp >> 4) | (tmp << (64 - 4));
6960 	b10 -= b7;
6961 
6962 	tmp = b15 ^ b4;
6963 	b15 = (tmp >> 51) | (tmp << (64 - 51));
6964 	b4 -= b15;
6965 
6966 	tmp = b11 ^ b6;
6967 	b11 = (tmp >> 56) | (tmp << (64 - 56));
6968 	b6 -= b11;
6969 
6970 	tmp = b13 ^ b2;
6971 	b13 = (tmp >> 34) | (tmp << (64 - 34));
6972 	b2 -= b13;
6973 
6974 	tmp = b9 ^ b0;
6975 	b9 = (tmp >> 16) | (tmp << (64 - 16));
6976 	b0 -= b9;
6977 
6978 	tmp = b15 ^ b14;
6979 	b15 = (tmp >> 30) | (tmp << (64 - 30));
6980 	b14 -= b15 + k6 + t1;
6981 	b15 -= k7 + 9;
6982 
6983 	tmp = b13 ^ b12;
6984 	b13 = (tmp >> 44) | (tmp << (64 - 44));
6985 	b12 -= b13 + k4;
6986 	b13 -= k5 + t0;
6987 
6988 	tmp = b11 ^ b10;
6989 	b11 = (tmp >> 47) | (tmp << (64 - 47));
6990 	b10 -= b11 + k2;
6991 	b11 -= k3;
6992 
6993 	tmp = b9 ^ b8;
6994 	b9 = (tmp >> 12) | (tmp << (64 - 12));
6995 	b8 -= b9 + k0;
6996 	b9 -= k1;
6997 
6998 	tmp = b7 ^ b6;
6999 	b7 = (tmp >> 31) | (tmp << (64 - 31));
7000 	b6 -= b7 + k15;
7001 	b7 -= k16;
7002 
7003 	tmp = b5 ^ b4;
7004 	b5 = (tmp >> 37) | (tmp << (64 - 37));
7005 	b4 -= b5 + k13;
7006 	b5 -= k14;
7007 
7008 	tmp = b3 ^ b2;
7009 	b3 = (tmp >> 9) | (tmp << (64 - 9));
7010 	b2 -= b3 + k11;
7011 	b3 -= k12;
7012 
7013 	tmp = b1 ^ b0;
7014 	b1 = (tmp >> 41) | (tmp << (64 - 41));
7015 	b0 -= b1 + k9;
7016 	b1 -= k10;
7017 
7018 	tmp = b7 ^ b12;
7019 	b7 = (tmp >> 25) | (tmp << (64 - 25));
7020 	b12 -= b7;
7021 
7022 	tmp = b3 ^ b10;
7023 	b3 = (tmp >> 16) | (tmp << (64 - 16));
7024 	b10 -= b3;
7025 
7026 	tmp = b5 ^ b8;
7027 	b5 = (tmp >> 28) | (tmp << (64 - 28));
7028 	b8 -= b5;
7029 
7030 	tmp = b1 ^ b14;
7031 	b1 = (tmp >> 47) | (tmp << (64 - 47));
7032 	b14 -= b1;
7033 
7034 	tmp = b9 ^ b4;
7035 	b9 = (tmp >> 41) | (tmp << (64 - 41));
7036 	b4 -= b9;
7037 
7038 	tmp = b13 ^ b6;
7039 	b13 = (tmp >> 48) | (tmp << (64 - 48));
7040 	b6 -= b13;
7041 
7042 	tmp = b11 ^ b2;
7043 	b11 = (tmp >> 20) | (tmp << (64 - 20));
7044 	b2 -= b11;
7045 
7046 	tmp = b15 ^ b0;
7047 	b15 = (tmp >> 5) | (tmp << (64 - 5));
7048 	b0 -= b15;
7049 
7050 	tmp = b9 ^ b10;
7051 	b9 = (tmp >> 17) | (tmp << (64 - 17));
7052 	b10 -= b9;
7053 
7054 	tmp = b11 ^ b8;
7055 	b11 = (tmp >> 59) | (tmp << (64 - 59));
7056 	b8 -= b11;
7057 
7058 	tmp = b13 ^ b14;
7059 	b13 = (tmp >> 41) | (tmp << (64 - 41));
7060 	b14 -= b13;
7061 
7062 	tmp = b15 ^ b12;
7063 	b15 = (tmp >> 34) | (tmp << (64 - 34));
7064 	b12 -= b15;
7065 
7066 	tmp = b1 ^ b6;
7067 	b1 = (tmp >> 13) | (tmp << (64 - 13));
7068 	b6 -= b1;
7069 
7070 	tmp = b3 ^ b4;
7071 	b3 = (tmp >> 51) | (tmp << (64 - 51));
7072 	b4 -= b3;
7073 
7074 	tmp = b5 ^ b2;
7075 	b5 = (tmp >> 4) | (tmp << (64 - 4));
7076 	b2 -= b5;
7077 
7078 	tmp = b7 ^ b0;
7079 	b7 = (tmp >> 33) | (tmp << (64 - 33));
7080 	b0 -= b7;
7081 
7082 	tmp = b1 ^ b8;
7083 	b1 = (tmp >> 52) | (tmp << (64 - 52));
7084 	b8 -= b1;
7085 
7086 	tmp = b5 ^ b14;
7087 	b5 = (tmp >> 23) | (tmp << (64 - 23));
7088 	b14 -= b5;
7089 
7090 	tmp = b3 ^ b12;
7091 	b3 = (tmp >> 18) | (tmp << (64 - 18));
7092 	b12 -= b3;
7093 
7094 	tmp = b7 ^ b10;
7095 	b7 = (tmp >> 49) | (tmp << (64 - 49));
7096 	b10 -= b7;
7097 
7098 	tmp = b15 ^ b4;
7099 	b15 = (tmp >> 55) | (tmp << (64 - 55));
7100 	b4 -= b15;
7101 
7102 	tmp = b11 ^ b6;
7103 	b11 = (tmp >> 10) | (tmp << (64 - 10));
7104 	b6 -= b11;
7105 
7106 	tmp = b13 ^ b2;
7107 	b13 = (tmp >> 19) | (tmp << (64 - 19));
7108 	b2 -= b13;
7109 
7110 	tmp = b9 ^ b0;
7111 	b9 = (tmp >> 38) | (tmp << (64 - 38));
7112 	b0 -= b9;
7113 
7114 	tmp = b15 ^ b14;
7115 	b15 = (tmp >> 37) | (tmp << (64 - 37));
7116 	b14 -= b15 + k5 + t0;
7117 	b15 -= k6 + 8;
7118 
7119 	tmp = b13 ^ b12;
7120 	b13 = (tmp >> 22) | (tmp << (64 - 22));
7121 	b12 -= b13 + k3;
7122 	b13 -= k4 + t2;
7123 
7124 	tmp = b11 ^ b10;
7125 	b11 = (tmp >> 17) | (tmp << (64 - 17));
7126 	b10 -= b11 + k1;
7127 	b11 -= k2;
7128 
7129 	tmp = b9 ^ b8;
7130 	b9 = (tmp >> 8) | (tmp << (64 - 8));
7131 	b8 -= b9 + k16;
7132 	b9 -= k0;
7133 
7134 	tmp = b7 ^ b6;
7135 	b7 = (tmp >> 47) | (tmp << (64 - 47));
7136 	b6 -= b7 + k14;
7137 	b7 -= k15;
7138 
7139 	tmp = b5 ^ b4;
7140 	b5 = (tmp >> 8) | (tmp << (64 - 8));
7141 	b4 -= b5 + k12;
7142 	b5 -= k13;
7143 
7144 	tmp = b3 ^ b2;
7145 	b3 = (tmp >> 13) | (tmp << (64 - 13));
7146 	b2 -= b3 + k10;
7147 	b3 -= k11;
7148 
7149 	tmp = b1 ^ b0;
7150 	b1 = (tmp >> 24) | (tmp << (64 - 24));
7151 	b0 -= b1 + k8;
7152 	b1 -= k9;
7153 
7154 	tmp = b7 ^ b12;
7155 	b7 = (tmp >> 20) | (tmp << (64 - 20));
7156 	b12 -= b7;
7157 
7158 	tmp = b3 ^ b10;
7159 	b3 = (tmp >> 37) | (tmp << (64 - 37));
7160 	b10 -= b3;
7161 
7162 	tmp = b5 ^ b8;
7163 	b5 = (tmp >> 31) | (tmp << (64 - 31));
7164 	b8 -= b5;
7165 
7166 	tmp = b1 ^ b14;
7167 	b1 = (tmp >> 23) | (tmp << (64 - 23));
7168 	b14 -= b1;
7169 
7170 	tmp = b9 ^ b4;
7171 	b9 = (tmp >> 52) | (tmp << (64 - 52));
7172 	b4 -= b9;
7173 
7174 	tmp = b13 ^ b6;
7175 	b13 = (tmp >> 35) | (tmp << (64 - 35));
7176 	b6 -= b13;
7177 
7178 	tmp = b11 ^ b2;
7179 	b11 = (tmp >> 48) | (tmp << (64 - 48));
7180 	b2 -= b11;
7181 
7182 	tmp = b15 ^ b0;
7183 	b15 = (tmp >> 9) | (tmp << (64 - 9));
7184 	b0 -= b15;
7185 
7186 	tmp = b9 ^ b10;
7187 	b9 = (tmp >> 25) | (tmp << (64 - 25));
7188 	b10 -= b9;
7189 
7190 	tmp = b11 ^ b8;
7191 	b11 = (tmp >> 44) | (tmp << (64 - 44));
7192 	b8 -= b11;
7193 
7194 	tmp = b13 ^ b14;
7195 	b13 = (tmp >> 42) | (tmp << (64 - 42));
7196 	b14 -= b13;
7197 
7198 	tmp = b15 ^ b12;
7199 	b15 = (tmp >> 19) | (tmp << (64 - 19));
7200 	b12 -= b15;
7201 
7202 	tmp = b1 ^ b6;
7203 	b1 = (tmp >> 46) | (tmp << (64 - 46));
7204 	b6 -= b1;
7205 
7206 	tmp = b3 ^ b4;
7207 	b3 = (tmp >> 47) | (tmp << (64 - 47));
7208 	b4 -= b3;
7209 
7210 	tmp = b5 ^ b2;
7211 	b5 = (tmp >> 44) | (tmp << (64 - 44));
7212 	b2 -= b5;
7213 
7214 	tmp = b7 ^ b0;
7215 	b7 = (tmp >> 31) | (tmp << (64 - 31));
7216 	b0 -= b7;
7217 
7218 	tmp = b1 ^ b8;
7219 	b1 = (tmp >> 41) | (tmp << (64 - 41));
7220 	b8 -= b1;
7221 
7222 	tmp = b5 ^ b14;
7223 	b5 = (tmp >> 42) | (tmp << (64 - 42));
7224 	b14 -= b5;
7225 
7226 	tmp = b3 ^ b12;
7227 	b3 = (tmp >> 53) | (tmp << (64 - 53));
7228 	b12 -= b3;
7229 
7230 	tmp = b7 ^ b10;
7231 	b7 = (tmp >> 4) | (tmp << (64 - 4));
7232 	b10 -= b7;
7233 
7234 	tmp = b15 ^ b4;
7235 	b15 = (tmp >> 51) | (tmp << (64 - 51));
7236 	b4 -= b15;
7237 
7238 	tmp = b11 ^ b6;
7239 	b11 = (tmp >> 56) | (tmp << (64 - 56));
7240 	b6 -= b11;
7241 
7242 	tmp = b13 ^ b2;
7243 	b13 = (tmp >> 34) | (tmp << (64 - 34));
7244 	b2 -= b13;
7245 
7246 	tmp = b9 ^ b0;
7247 	b9 = (tmp >> 16) | (tmp << (64 - 16));
7248 	b0 -= b9;
7249 
7250 	tmp = b15 ^ b14;
7251 	b15 = (tmp >> 30) | (tmp << (64 - 30));
7252 	b14 -= b15 + k4 + t2;
7253 	b15 -= k5 + 7;
7254 
7255 	tmp = b13 ^ b12;
7256 	b13 = (tmp >> 44) | (tmp << (64 - 44));
7257 	b12 -= b13 + k2;
7258 	b13 -= k3 + t1;
7259 
7260 	tmp = b11 ^ b10;
7261 	b11 = (tmp >> 47) | (tmp << (64 - 47));
7262 	b10 -= b11 + k0;
7263 	b11 -= k1;
7264 
7265 	tmp = b9 ^ b8;
7266 	b9 = (tmp >> 12) | (tmp << (64 - 12));
7267 	b8 -= b9 + k15;
7268 	b9 -= k16;
7269 
7270 	tmp = b7 ^ b6;
7271 	b7 = (tmp >> 31) | (tmp << (64 - 31));
7272 	b6 -= b7 + k13;
7273 	b7 -= k14;
7274 
7275 	tmp = b5 ^ b4;
7276 	b5 = (tmp >> 37) | (tmp << (64 - 37));
7277 	b4 -= b5 + k11;
7278 	b5 -= k12;
7279 
7280 	tmp = b3 ^ b2;
7281 	b3 = (tmp >> 9) | (tmp << (64 - 9));
7282 	b2 -= b3 + k9;
7283 	b3 -= k10;
7284 
7285 	tmp = b1 ^ b0;
7286 	b1 = (tmp >> 41) | (tmp << (64 - 41));
7287 	b0 -= b1 + k7;
7288 	b1 -= k8;
7289 
7290 	tmp = b7 ^ b12;
7291 	b7 = (tmp >> 25) | (tmp << (64 - 25));
7292 	b12 -= b7;
7293 
7294 	tmp = b3 ^ b10;
7295 	b3 = (tmp >> 16) | (tmp << (64 - 16));
7296 	b10 -= b3;
7297 
7298 	tmp = b5 ^ b8;
7299 	b5 = (tmp >> 28) | (tmp << (64 - 28));
7300 	b8 -= b5;
7301 
7302 	tmp = b1 ^ b14;
7303 	b1 = (tmp >> 47) | (tmp << (64 - 47));
7304 	b14 -= b1;
7305 
7306 	tmp = b9 ^ b4;
7307 	b9 = (tmp >> 41) | (tmp << (64 - 41));
7308 	b4 -= b9;
7309 
7310 	tmp = b13 ^ b6;
7311 	b13 = (tmp >> 48) | (tmp << (64 - 48));
7312 	b6 -= b13;
7313 
7314 	tmp = b11 ^ b2;
7315 	b11 = (tmp >> 20) | (tmp << (64 - 20));
7316 	b2 -= b11;
7317 
7318 	tmp = b15 ^ b0;
7319 	b15 = (tmp >> 5) | (tmp << (64 - 5));
7320 	b0 -= b15;
7321 
7322 	tmp = b9 ^ b10;
7323 	b9 = (tmp >> 17) | (tmp << (64 - 17));
7324 	b10 -= b9;
7325 
7326 	tmp = b11 ^ b8;
7327 	b11 = (tmp >> 59) | (tmp << (64 - 59));
7328 	b8 -= b11;
7329 
7330 	tmp = b13 ^ b14;
7331 	b13 = (tmp >> 41) | (tmp << (64 - 41));
7332 	b14 -= b13;
7333 
7334 	tmp = b15 ^ b12;
7335 	b15 = (tmp >> 34) | (tmp << (64 - 34));
7336 	b12 -= b15;
7337 
7338 	tmp = b1 ^ b6;
7339 	b1 = (tmp >> 13) | (tmp << (64 - 13));
7340 	b6 -= b1;
7341 
7342 	tmp = b3 ^ b4;
7343 	b3 = (tmp >> 51) | (tmp << (64 - 51));
7344 	b4 -= b3;
7345 
7346 	tmp = b5 ^ b2;
7347 	b5 = (tmp >> 4) | (tmp << (64 - 4));
7348 	b2 -= b5;
7349 
7350 	tmp = b7 ^ b0;
7351 	b7 = (tmp >> 33) | (tmp << (64 - 33));
7352 	b0 -= b7;
7353 
7354 	tmp = b1 ^ b8;
7355 	b1 = (tmp >> 52) | (tmp << (64 - 52));
7356 	b8 -= b1;
7357 
7358 	tmp = b5 ^ b14;
7359 	b5 = (tmp >> 23) | (tmp << (64 - 23));
7360 	b14 -= b5;
7361 
7362 	tmp = b3 ^ b12;
7363 	b3 = (tmp >> 18) | (tmp << (64 - 18));
7364 	b12 -= b3;
7365 
7366 	tmp = b7 ^ b10;
7367 	b7 = (tmp >> 49) | (tmp << (64 - 49));
7368 	b10 -= b7;
7369 
7370 	tmp = b15 ^ b4;
7371 	b15 = (tmp >> 55) | (tmp << (64 - 55));
7372 	b4 -= b15;
7373 
7374 	tmp = b11 ^ b6;
7375 	b11 = (tmp >> 10) | (tmp << (64 - 10));
7376 	b6 -= b11;
7377 
7378 	tmp = b13 ^ b2;
7379 	b13 = (tmp >> 19) | (tmp << (64 - 19));
7380 	b2 -= b13;
7381 
7382 	tmp = b9 ^ b0;
7383 	b9 = (tmp >> 38) | (tmp << (64 - 38));
7384 	b0 -= b9;
7385 
7386 	tmp = b15 ^ b14;
7387 	b15 = (tmp >> 37) | (tmp << (64 - 37));
7388 	b14 -= b15 + k3 + t1;
7389 	b15 -= k4 + 6;
7390 
7391 	tmp = b13 ^ b12;
7392 	b13 = (tmp >> 22) | (tmp << (64 - 22));
7393 	b12 -= b13 + k1;
7394 	b13 -= k2 + t0;
7395 
7396 	tmp = b11 ^ b10;
7397 	b11 = (tmp >> 17) | (tmp << (64 - 17));
7398 	b10 -= b11 + k16;
7399 	b11 -= k0;
7400 
7401 	tmp = b9 ^ b8;
7402 	b9 = (tmp >> 8) | (tmp << (64 - 8));
7403 	b8 -= b9 + k14;
7404 	b9 -= k15;
7405 
7406 	tmp = b7 ^ b6;
7407 	b7 = (tmp >> 47) | (tmp << (64 - 47));
7408 	b6 -= b7 + k12;
7409 	b7 -= k13;
7410 
7411 	tmp = b5 ^ b4;
7412 	b5 = (tmp >> 8) | (tmp << (64 - 8));
7413 	b4 -= b5 + k10;
7414 	b5 -= k11;
7415 
7416 	tmp = b3 ^ b2;
7417 	b3 = (tmp >> 13) | (tmp << (64 - 13));
7418 	b2 -= b3 + k8;
7419 	b3 -= k9;
7420 
7421 	tmp = b1 ^ b0;
7422 	b1 = (tmp >> 24) | (tmp << (64 - 24));
7423 	b0 -= b1 + k6;
7424 	b1 -= k7;
7425 
7426 	tmp = b7 ^ b12;
7427 	b7 = (tmp >> 20) | (tmp << (64 - 20));
7428 	b12 -= b7;
7429 
7430 	tmp = b3 ^ b10;
7431 	b3 = (tmp >> 37) | (tmp << (64 - 37));
7432 	b10 -= b3;
7433 
7434 	tmp = b5 ^ b8;
7435 	b5 = (tmp >> 31) | (tmp << (64 - 31));
7436 	b8 -= b5;
7437 
7438 	tmp = b1 ^ b14;
7439 	b1 = (tmp >> 23) | (tmp << (64 - 23));
7440 	b14 -= b1;
7441 
7442 	tmp = b9 ^ b4;
7443 	b9 = (tmp >> 52) | (tmp << (64 - 52));
7444 	b4 -= b9;
7445 
7446 	tmp = b13 ^ b6;
7447 	b13 = (tmp >> 35) | (tmp << (64 - 35));
7448 	b6 -= b13;
7449 
7450 	tmp = b11 ^ b2;
7451 	b11 = (tmp >> 48) | (tmp << (64 - 48));
7452 	b2 -= b11;
7453 
7454 	tmp = b15 ^ b0;
7455 	b15 = (tmp >> 9) | (tmp << (64 - 9));
7456 	b0 -= b15;
7457 
7458 	tmp = b9 ^ b10;
7459 	b9 = (tmp >> 25) | (tmp << (64 - 25));
7460 	b10 -= b9;
7461 
7462 	tmp = b11 ^ b8;
7463 	b11 = (tmp >> 44) | (tmp << (64 - 44));
7464 	b8 -= b11;
7465 
7466 	tmp = b13 ^ b14;
7467 	b13 = (tmp >> 42) | (tmp << (64 - 42));
7468 	b14 -= b13;
7469 
7470 	tmp = b15 ^ b12;
7471 	b15 = (tmp >> 19) | (tmp << (64 - 19));
7472 	b12 -= b15;
7473 
7474 	tmp = b1 ^ b6;
7475 	b1 = (tmp >> 46) | (tmp << (64 - 46));
7476 	b6 -= b1;
7477 
7478 	tmp = b3 ^ b4;
7479 	b3 = (tmp >> 47) | (tmp << (64 - 47));
7480 	b4 -= b3;
7481 
7482 	tmp = b5 ^ b2;
7483 	b5 = (tmp >> 44) | (tmp << (64 - 44));
7484 	b2 -= b5;
7485 
7486 	tmp = b7 ^ b0;
7487 	b7 = (tmp >> 31) | (tmp << (64 - 31));
7488 	b0 -= b7;
7489 
7490 	tmp = b1 ^ b8;
7491 	b1 = (tmp >> 41) | (tmp << (64 - 41));
7492 	b8 -= b1;
7493 
7494 	tmp = b5 ^ b14;
7495 	b5 = (tmp >> 42) | (tmp << (64 - 42));
7496 	b14 -= b5;
7497 
7498 	tmp = b3 ^ b12;
7499 	b3 = (tmp >> 53) | (tmp << (64 - 53));
7500 	b12 -= b3;
7501 
7502 	tmp = b7 ^ b10;
7503 	b7 = (tmp >> 4) | (tmp << (64 - 4));
7504 	b10 -= b7;
7505 
7506 	tmp = b15 ^ b4;
7507 	b15 = (tmp >> 51) | (tmp << (64 - 51));
7508 	b4 -= b15;
7509 
7510 	tmp = b11 ^ b6;
7511 	b11 = (tmp >> 56) | (tmp << (64 - 56));
7512 	b6 -= b11;
7513 
7514 	tmp = b13 ^ b2;
7515 	b13 = (tmp >> 34) | (tmp << (64 - 34));
7516 	b2 -= b13;
7517 
7518 	tmp = b9 ^ b0;
7519 	b9 = (tmp >> 16) | (tmp << (64 - 16));
7520 	b0 -= b9;
7521 
7522 	tmp = b15 ^ b14;
7523 	b15 = (tmp >> 30) | (tmp << (64 - 30));
7524 	b14 -= b15 + k2 + t0;
7525 	b15 -= k3 + 5;
7526 
7527 	tmp = b13 ^ b12;
7528 	b13 = (tmp >> 44) | (tmp << (64 - 44));
7529 	b12 -= b13 + k0;
7530 	b13 -= k1 + t2;
7531 
7532 	tmp = b11 ^ b10;
7533 	b11 = (tmp >> 47) | (tmp << (64 - 47));
7534 	b10 -= b11 + k15;
7535 	b11 -= k16;
7536 
7537 	tmp = b9 ^ b8;
7538 	b9 = (tmp >> 12) | (tmp << (64 - 12));
7539 	b8 -= b9 + k13;
7540 	b9 -= k14;
7541 
7542 	tmp = b7 ^ b6;
7543 	b7 = (tmp >> 31) | (tmp << (64 - 31));
7544 	b6 -= b7 + k11;
7545 	b7 -= k12;
7546 
7547 	tmp = b5 ^ b4;
7548 	b5 = (tmp >> 37) | (tmp << (64 - 37));
7549 	b4 -= b5 + k9;
7550 	b5 -= k10;
7551 
7552 	tmp = b3 ^ b2;
7553 	b3 = (tmp >> 9) | (tmp << (64 - 9));
7554 	b2 -= b3 + k7;
7555 	b3 -= k8;
7556 
7557 	tmp = b1 ^ b0;
7558 	b1 = (tmp >> 41) | (tmp << (64 - 41));
7559 	b0 -= b1 + k5;
7560 	b1 -= k6;
7561 
7562 	tmp = b7 ^ b12;
7563 	b7 = (tmp >> 25) | (tmp << (64 - 25));
7564 	b12 -= b7;
7565 
7566 	tmp = b3 ^ b10;
7567 	b3 = (tmp >> 16) | (tmp << (64 - 16));
7568 	b10 -= b3;
7569 
7570 	tmp = b5 ^ b8;
7571 	b5 = (tmp >> 28) | (tmp << (64 - 28));
7572 	b8 -= b5;
7573 
7574 	tmp = b1 ^ b14;
7575 	b1 = (tmp >> 47) | (tmp << (64 - 47));
7576 	b14 -= b1;
7577 
7578 	tmp = b9 ^ b4;
7579 	b9 = (tmp >> 41) | (tmp << (64 - 41));
7580 	b4 -= b9;
7581 
7582 	tmp = b13 ^ b6;
7583 	b13 = (tmp >> 48) | (tmp << (64 - 48));
7584 	b6 -= b13;
7585 
7586 	tmp = b11 ^ b2;
7587 	b11 = (tmp >> 20) | (tmp << (64 - 20));
7588 	b2 -= b11;
7589 
7590 	tmp = b15 ^ b0;
7591 	b15 = (tmp >> 5) | (tmp << (64 - 5));
7592 	b0 -= b15;
7593 
7594 	tmp = b9 ^ b10;
7595 	b9 = (tmp >> 17) | (tmp << (64 - 17));
7596 	b10 -= b9;
7597 
7598 	tmp = b11 ^ b8;
7599 	b11 = (tmp >> 59) | (tmp << (64 - 59));
7600 	b8 -= b11;
7601 
7602 	tmp = b13 ^ b14;
7603 	b13 = (tmp >> 41) | (tmp << (64 - 41));
7604 	b14 -= b13;
7605 
7606 	tmp = b15 ^ b12;
7607 	b15 = (tmp >> 34) | (tmp << (64 - 34));
7608 	b12 -= b15;
7609 
7610 	tmp = b1 ^ b6;
7611 	b1 = (tmp >> 13) | (tmp << (64 - 13));
7612 	b6 -= b1;
7613 
7614 	tmp = b3 ^ b4;
7615 	b3 = (tmp >> 51) | (tmp << (64 - 51));
7616 	b4 -= b3;
7617 
7618 	tmp = b5 ^ b2;
7619 	b5 = (tmp >> 4) | (tmp << (64 - 4));
7620 	b2 -= b5;
7621 
7622 	tmp = b7 ^ b0;
7623 	b7 = (tmp >> 33) | (tmp << (64 - 33));
7624 	b0 -= b7;
7625 
7626 	tmp = b1 ^ b8;
7627 	b1 = (tmp >> 52) | (tmp << (64 - 52));
7628 	b8 -= b1;
7629 
7630 	tmp = b5 ^ b14;
7631 	b5 = (tmp >> 23) | (tmp << (64 - 23));
7632 	b14 -= b5;
7633 
7634 	tmp = b3 ^ b12;
7635 	b3 = (tmp >> 18) | (tmp << (64 - 18));
7636 	b12 -= b3;
7637 
7638 	tmp = b7 ^ b10;
7639 	b7 = (tmp >> 49) | (tmp << (64 - 49));
7640 	b10 -= b7;
7641 
7642 	tmp = b15 ^ b4;
7643 	b15 = (tmp >> 55) | (tmp << (64 - 55));
7644 	b4 -= b15;
7645 
7646 	tmp = b11 ^ b6;
7647 	b11 = (tmp >> 10) | (tmp << (64 - 10));
7648 	b6 -= b11;
7649 
7650 	tmp = b13 ^ b2;
7651 	b13 = (tmp >> 19) | (tmp << (64 - 19));
7652 	b2 -= b13;
7653 
7654 	tmp = b9 ^ b0;
7655 	b9 = (tmp >> 38) | (tmp << (64 - 38));
7656 	b0 -= b9;
7657 
7658 	tmp = b15 ^ b14;
7659 	b15 = (tmp >> 37) | (tmp << (64 - 37));
7660 	b14 -= b15 + k1 + t2;
7661 	b15 -= k2 + 4;
7662 
7663 	tmp = b13 ^ b12;
7664 	b13 = (tmp >> 22) | (tmp << (64 - 22));
7665 	b12 -= b13 + k16;
7666 	b13 -= k0 + t1;
7667 
7668 	tmp = b11 ^ b10;
7669 	b11 = (tmp >> 17) | (tmp << (64 - 17));
7670 	b10 -= b11 + k14;
7671 	b11 -= k15;
7672 
7673 	tmp = b9 ^ b8;
7674 	b9 = (tmp >> 8) | (tmp << (64 - 8));
7675 	b8 -= b9 + k12;
7676 	b9 -= k13;
7677 
7678 	tmp = b7 ^ b6;
7679 	b7 = (tmp >> 47) | (tmp << (64 - 47));
7680 	b6 -= b7 + k10;
7681 	b7 -= k11;
7682 
7683 	tmp = b5 ^ b4;
7684 	b5 = (tmp >> 8) | (tmp << (64 - 8));
7685 	b4 -= b5 + k8;
7686 	b5 -= k9;
7687 
7688 	tmp = b3 ^ b2;
7689 	b3 = (tmp >> 13) | (tmp << (64 - 13));
7690 	b2 -= b3 + k6;
7691 	b3 -= k7;
7692 
7693 	tmp = b1 ^ b0;
7694 	b1 = (tmp >> 24) | (tmp << (64 - 24));
7695 	b0 -= b1 + k4;
7696 	b1 -= k5;
7697 
7698 	tmp = b7 ^ b12;
7699 	b7 = (tmp >> 20) | (tmp << (64 - 20));
7700 	b12 -= b7;
7701 
7702 	tmp = b3 ^ b10;
7703 	b3 = (tmp >> 37) | (tmp << (64 - 37));
7704 	b10 -= b3;
7705 
7706 	tmp = b5 ^ b8;
7707 	b5 = (tmp >> 31) | (tmp << (64 - 31));
7708 	b8 -= b5;
7709 
7710 	tmp = b1 ^ b14;
7711 	b1 = (tmp >> 23) | (tmp << (64 - 23));
7712 	b14 -= b1;
7713 
7714 	tmp = b9 ^ b4;
7715 	b9 = (tmp >> 52) | (tmp << (64 - 52));
7716 	b4 -= b9;
7717 
7718 	tmp = b13 ^ b6;
7719 	b13 = (tmp >> 35) | (tmp << (64 - 35));
7720 	b6 -= b13;
7721 
7722 	tmp = b11 ^ b2;
7723 	b11 = (tmp >> 48) | (tmp << (64 - 48));
7724 	b2 -= b11;
7725 
7726 	tmp = b15 ^ b0;
7727 	b15 = (tmp >> 9) | (tmp << (64 - 9));
7728 	b0 -= b15;
7729 
7730 	tmp = b9 ^ b10;
7731 	b9 = (tmp >> 25) | (tmp << (64 - 25));
7732 	b10 -= b9;
7733 
7734 	tmp = b11 ^ b8;
7735 	b11 = (tmp >> 44) | (tmp << (64 - 44));
7736 	b8 -= b11;
7737 
7738 	tmp = b13 ^ b14;
7739 	b13 = (tmp >> 42) | (tmp << (64 - 42));
7740 	b14 -= b13;
7741 
7742 	tmp = b15 ^ b12;
7743 	b15 = (tmp >> 19) | (tmp << (64 - 19));
7744 	b12 -= b15;
7745 
7746 	tmp = b1 ^ b6;
7747 	b1 = (tmp >> 46) | (tmp << (64 - 46));
7748 	b6 -= b1;
7749 
7750 	tmp = b3 ^ b4;
7751 	b3 = (tmp >> 47) | (tmp << (64 - 47));
7752 	b4 -= b3;
7753 
7754 	tmp = b5 ^ b2;
7755 	b5 = (tmp >> 44) | (tmp << (64 - 44));
7756 	b2 -= b5;
7757 
7758 	tmp = b7 ^ b0;
7759 	b7 = (tmp >> 31) | (tmp << (64 - 31));
7760 	b0 -= b7;
7761 
7762 	tmp = b1 ^ b8;
7763 	b1 = (tmp >> 41) | (tmp << (64 - 41));
7764 	b8 -= b1;
7765 
7766 	tmp = b5 ^ b14;
7767 	b5 = (tmp >> 42) | (tmp << (64 - 42));
7768 	b14 -= b5;
7769 
7770 	tmp = b3 ^ b12;
7771 	b3 = (tmp >> 53) | (tmp << (64 - 53));
7772 	b12 -= b3;
7773 
7774 	tmp = b7 ^ b10;
7775 	b7 = (tmp >> 4) | (tmp << (64 - 4));
7776 	b10 -= b7;
7777 
7778 	tmp = b15 ^ b4;
7779 	b15 = (tmp >> 51) | (tmp << (64 - 51));
7780 	b4 -= b15;
7781 
7782 	tmp = b11 ^ b6;
7783 	b11 = (tmp >> 56) | (tmp << (64 - 56));
7784 	b6 -= b11;
7785 
7786 	tmp = b13 ^ b2;
7787 	b13 = (tmp >> 34) | (tmp << (64 - 34));
7788 	b2 -= b13;
7789 
7790 	tmp = b9 ^ b0;
7791 	b9 = (tmp >> 16) | (tmp << (64 - 16));
7792 	b0 -= b9;
7793 
7794 	tmp = b15 ^ b14;
7795 	b15 = (tmp >> 30) | (tmp << (64 - 30));
7796 	b14 -= b15 + k0 + t1;
7797 	b15 -= k1 + 3;
7798 
7799 	tmp = b13 ^ b12;
7800 	b13 = (tmp >> 44) | (tmp << (64 - 44));
7801 	b12 -= b13 + k15;
7802 	b13 -= k16 + t0;
7803 
7804 	tmp = b11 ^ b10;
7805 	b11 = (tmp >> 47) | (tmp << (64 - 47));
7806 	b10 -= b11 + k13;
7807 	b11 -= k14;
7808 
7809 	tmp = b9 ^ b8;
7810 	b9 = (tmp >> 12) | (tmp << (64 - 12));
7811 	b8 -= b9 + k11;
7812 	b9 -= k12;
7813 
7814 	tmp = b7 ^ b6;
7815 	b7 = (tmp >> 31) | (tmp << (64 - 31));
7816 	b6 -= b7 + k9;
7817 	b7 -= k10;
7818 
7819 	tmp = b5 ^ b4;
7820 	b5 = (tmp >> 37) | (tmp << (64 - 37));
7821 	b4 -= b5 + k7;
7822 	b5 -= k8;
7823 
7824 	tmp = b3 ^ b2;
7825 	b3 = (tmp >> 9) | (tmp << (64 - 9));
7826 	b2 -= b3 + k5;
7827 	b3 -= k6;
7828 
7829 	tmp = b1 ^ b0;
7830 	b1 = (tmp >> 41) | (tmp << (64 - 41));
7831 	b0 -= b1 + k3;
7832 	b1 -= k4;
7833 
7834 	tmp = b7 ^ b12;
7835 	b7 = (tmp >> 25) | (tmp << (64 - 25));
7836 	b12 -= b7;
7837 
7838 	tmp = b3 ^ b10;
7839 	b3 = (tmp >> 16) | (tmp << (64 - 16));
7840 	b10 -= b3;
7841 
7842 	tmp = b5 ^ b8;
7843 	b5 = (tmp >> 28) | (tmp << (64 - 28));
7844 	b8 -= b5;
7845 
7846 	tmp = b1 ^ b14;
7847 	b1 = (tmp >> 47) | (tmp << (64 - 47));
7848 	b14 -= b1;
7849 
7850 	tmp = b9 ^ b4;
7851 	b9 = (tmp >> 41) | (tmp << (64 - 41));
7852 	b4 -= b9;
7853 
7854 	tmp = b13 ^ b6;
7855 	b13 = (tmp >> 48) | (tmp << (64 - 48));
7856 	b6 -= b13;
7857 
7858 	tmp = b11 ^ b2;
7859 	b11 = (tmp >> 20) | (tmp << (64 - 20));
7860 	b2 -= b11;
7861 
7862 	tmp = b15 ^ b0;
7863 	b15 = (tmp >> 5) | (tmp << (64 - 5));
7864 	b0 -= b15;
7865 
7866 	tmp = b9 ^ b10;
7867 	b9 = (tmp >> 17) | (tmp << (64 - 17));
7868 	b10 -= b9;
7869 
7870 	tmp = b11 ^ b8;
7871 	b11 = (tmp >> 59) | (tmp << (64 - 59));
7872 	b8 -= b11;
7873 
7874 	tmp = b13 ^ b14;
7875 	b13 = (tmp >> 41) | (tmp << (64 - 41));
7876 	b14 -= b13;
7877 
7878 	tmp = b15 ^ b12;
7879 	b15 = (tmp >> 34) | (tmp << (64 - 34));
7880 	b12 -= b15;
7881 
7882 	tmp = b1 ^ b6;
7883 	b1 = (tmp >> 13) | (tmp << (64 - 13));
7884 	b6 -= b1;
7885 
7886 	tmp = b3 ^ b4;
7887 	b3 = (tmp >> 51) | (tmp << (64 - 51));
7888 	b4 -= b3;
7889 
7890 	tmp = b5 ^ b2;
7891 	b5 = (tmp >> 4) | (tmp << (64 - 4));
7892 	b2 -= b5;
7893 
7894 	tmp = b7 ^ b0;
7895 	b7 = (tmp >> 33) | (tmp << (64 - 33));
7896 	b0 -= b7;
7897 
7898 	tmp = b1 ^ b8;
7899 	b1 = (tmp >> 52) | (tmp << (64 - 52));
7900 	b8 -= b1;
7901 
7902 	tmp = b5 ^ b14;
7903 	b5 = (tmp >> 23) | (tmp << (64 - 23));
7904 	b14 -= b5;
7905 
7906 	tmp = b3 ^ b12;
7907 	b3 = (tmp >> 18) | (tmp << (64 - 18));
7908 	b12 -= b3;
7909 
7910 	tmp = b7 ^ b10;
7911 	b7 = (tmp >> 49) | (tmp << (64 - 49));
7912 	b10 -= b7;
7913 
7914 	tmp = b15 ^ b4;
7915 	b15 = (tmp >> 55) | (tmp << (64 - 55));
7916 	b4 -= b15;
7917 
7918 	tmp = b11 ^ b6;
7919 	b11 = (tmp >> 10) | (tmp << (64 - 10));
7920 	b6 -= b11;
7921 
7922 	tmp = b13 ^ b2;
7923 	b13 = (tmp >> 19) | (tmp << (64 - 19));
7924 	b2 -= b13;
7925 
7926 	tmp = b9 ^ b0;
7927 	b9 = (tmp >> 38) | (tmp << (64 - 38));
7928 	b0 -= b9;
7929 
7930 	tmp = b15 ^ b14;
7931 	b15 = (tmp >> 37) | (tmp << (64 - 37));
7932 	b14 -= b15 + k16 + t0;
7933 	b15 -= k0 + 2;
7934 
7935 	tmp = b13 ^ b12;
7936 	b13 = (tmp >> 22) | (tmp << (64 - 22));
7937 	b12 -= b13 + k14;
7938 	b13 -= k15 + t2;
7939 
7940 	tmp = b11 ^ b10;
7941 	b11 = (tmp >> 17) | (tmp << (64 - 17));
7942 	b10 -= b11 + k12;
7943 	b11 -= k13;
7944 
7945 	tmp = b9 ^ b8;
7946 	b9 = (tmp >> 8) | (tmp << (64 - 8));
7947 	b8 -= b9 + k10;
7948 	b9 -= k11;
7949 
7950 	tmp = b7 ^ b6;
7951 	b7 = (tmp >> 47) | (tmp << (64 - 47));
7952 	b6 -= b7 + k8;
7953 	b7 -= k9;
7954 
7955 	tmp = b5 ^ b4;
7956 	b5 = (tmp >> 8) | (tmp << (64 - 8));
7957 	b4 -= b5 + k6;
7958 	b5 -= k7;
7959 
7960 	tmp = b3 ^ b2;
7961 	b3 = (tmp >> 13) | (tmp << (64 - 13));
7962 	b2 -= b3 + k4;
7963 	b3 -= k5;
7964 
7965 	tmp = b1 ^ b0;
7966 	b1 = (tmp >> 24) | (tmp << (64 - 24));
7967 	b0 -= b1 + k2;
7968 	b1 -= k3;
7969 
7970 	tmp = b7 ^ b12;
7971 	b7 = (tmp >> 20) | (tmp << (64 - 20));
7972 	b12 -= b7;
7973 
7974 	tmp = b3 ^ b10;
7975 	b3 = (tmp >> 37) | (tmp << (64 - 37));
7976 	b10 -= b3;
7977 
7978 	tmp = b5 ^ b8;
7979 	b5 = (tmp >> 31) | (tmp << (64 - 31));
7980 	b8 -= b5;
7981 
7982 	tmp = b1 ^ b14;
7983 	b1 = (tmp >> 23) | (tmp << (64 - 23));
7984 	b14 -= b1;
7985 
7986 	tmp = b9 ^ b4;
7987 	b9 = (tmp >> 52) | (tmp << (64 - 52));
7988 	b4 -= b9;
7989 
7990 	tmp = b13 ^ b6;
7991 	b13 = (tmp >> 35) | (tmp << (64 - 35));
7992 	b6 -= b13;
7993 
7994 	tmp = b11 ^ b2;
7995 	b11 = (tmp >> 48) | (tmp << (64 - 48));
7996 	b2 -= b11;
7997 
7998 	tmp = b15 ^ b0;
7999 	b15 = (tmp >> 9) | (tmp << (64 - 9));
8000 	b0 -= b15;
8001 
8002 	tmp = b9 ^ b10;
8003 	b9 = (tmp >> 25) | (tmp << (64 - 25));
8004 	b10 -= b9;
8005 
8006 	tmp = b11 ^ b8;
8007 	b11 = (tmp >> 44) | (tmp << (64 - 44));
8008 	b8 -= b11;
8009 
8010 	tmp = b13 ^ b14;
8011 	b13 = (tmp >> 42) | (tmp << (64 - 42));
8012 	b14 -= b13;
8013 
8014 	tmp = b15 ^ b12;
8015 	b15 = (tmp >> 19) | (tmp << (64 - 19));
8016 	b12 -= b15;
8017 
8018 	tmp = b1 ^ b6;
8019 	b1 = (tmp >> 46) | (tmp << (64 - 46));
8020 	b6 -= b1;
8021 
8022 	tmp = b3 ^ b4;
8023 	b3 = (tmp >> 47) | (tmp << (64 - 47));
8024 	b4 -= b3;
8025 
8026 	tmp = b5 ^ b2;
8027 	b5 = (tmp >> 44) | (tmp << (64 - 44));
8028 	b2 -= b5;
8029 
8030 	tmp = b7 ^ b0;
8031 	b7 = (tmp >> 31) | (tmp << (64 - 31));
8032 	b0 -= b7;
8033 
8034 	tmp = b1 ^ b8;
8035 	b1 = (tmp >> 41) | (tmp << (64 - 41));
8036 	b8 -= b1;
8037 
8038 	tmp = b5 ^ b14;
8039 	b5 = (tmp >> 42) | (tmp << (64 - 42));
8040 	b14 -= b5;
8041 
8042 	tmp = b3 ^ b12;
8043 	b3 = (tmp >> 53) | (tmp << (64 - 53));
8044 	b12 -= b3;
8045 
8046 	tmp = b7 ^ b10;
8047 	b7 = (tmp >> 4) | (tmp << (64 - 4));
8048 	b10 -= b7;
8049 
8050 	tmp = b15 ^ b4;
8051 	b15 = (tmp >> 51) | (tmp << (64 - 51));
8052 	b4 -= b15;
8053 
8054 	tmp = b11 ^ b6;
8055 	b11 = (tmp >> 56) | (tmp << (64 - 56));
8056 	b6 -= b11;
8057 
8058 	tmp = b13 ^ b2;
8059 	b13 = (tmp >> 34) | (tmp << (64 - 34));
8060 	b2 -= b13;
8061 
8062 	tmp = b9 ^ b0;
8063 	b9 = (tmp >> 16) | (tmp << (64 - 16));
8064 	b0 -= b9;
8065 
8066 	tmp = b15 ^ b14;
8067 	b15 = (tmp >> 30) | (tmp << (64 - 30));
8068 	b14 -= b15 + k15 + t2;
8069 	b15 -= k16 + 1;
8070 
8071 	tmp = b13 ^ b12;
8072 	b13 = (tmp >> 44) | (tmp << (64 - 44));
8073 	b12 -= b13 + k13;
8074 	b13 -= k14 + t1;
8075 
8076 	tmp = b11 ^ b10;
8077 	b11 = (tmp >> 47) | (tmp << (64 - 47));
8078 	b10 -= b11 + k11;
8079 	b11 -= k12;
8080 
8081 	tmp = b9 ^ b8;
8082 	b9 = (tmp >> 12) | (tmp << (64 - 12));
8083 	b8 -= b9 + k9;
8084 	b9 -= k10;
8085 
8086 	tmp = b7 ^ b6;
8087 	b7 = (tmp >> 31) | (tmp << (64 - 31));
8088 	b6 -= b7 + k7;
8089 	b7 -= k8;
8090 
8091 	tmp = b5 ^ b4;
8092 	b5 = (tmp >> 37) | (tmp << (64 - 37));
8093 	b4 -= b5 + k5;
8094 	b5 -= k6;
8095 
8096 	tmp = b3 ^ b2;
8097 	b3 = (tmp >> 9) | (tmp << (64 - 9));
8098 	b2 -= b3 + k3;
8099 	b3 -= k4;
8100 
8101 	tmp = b1 ^ b0;
8102 	b1 = (tmp >> 41) | (tmp << (64 - 41));
8103 	b0 -= b1 + k1;
8104 	b1 -= k2;
8105 
8106 	tmp = b7 ^ b12;
8107 	b7 = (tmp >> 25) | (tmp << (64 - 25));
8108 	b12 -= b7;
8109 
8110 	tmp = b3 ^ b10;
8111 	b3 = (tmp >> 16) | (tmp << (64 - 16));
8112 	b10 -= b3;
8113 
8114 	tmp = b5 ^ b8;
8115 	b5 = (tmp >> 28) | (tmp << (64 - 28));
8116 	b8 -= b5;
8117 
8118 	tmp = b1 ^ b14;
8119 	b1 = (tmp >> 47) | (tmp << (64 - 47));
8120 	b14 -= b1;
8121 
8122 	tmp = b9 ^ b4;
8123 	b9 = (tmp >> 41) | (tmp << (64 - 41));
8124 	b4 -= b9;
8125 
8126 	tmp = b13 ^ b6;
8127 	b13 = (tmp >> 48) | (tmp << (64 - 48));
8128 	b6 -= b13;
8129 
8130 	tmp = b11 ^ b2;
8131 	b11 = (tmp >> 20) | (tmp << (64 - 20));
8132 	b2 -= b11;
8133 
8134 	tmp = b15 ^ b0;
8135 	b15 = (tmp >> 5) | (tmp << (64 - 5));
8136 	b0 -= b15;
8137 
8138 	tmp = b9 ^ b10;
8139 	b9 = (tmp >> 17) | (tmp << (64 - 17));
8140 	b10 -= b9;
8141 
8142 	tmp = b11 ^ b8;
8143 	b11 = (tmp >> 59) | (tmp << (64 - 59));
8144 	b8 -= b11;
8145 
8146 	tmp = b13 ^ b14;
8147 	b13 = (tmp >> 41) | (tmp << (64 - 41));
8148 	b14 -= b13;
8149 
8150 	tmp = b15 ^ b12;
8151 	b15 = (tmp >> 34) | (tmp << (64 - 34));
8152 	b12 -= b15;
8153 
8154 	tmp = b1 ^ b6;
8155 	b1 = (tmp >> 13) | (tmp << (64 - 13));
8156 	b6 -= b1;
8157 
8158 	tmp = b3 ^ b4;
8159 	b3 = (tmp >> 51) | (tmp << (64 - 51));
8160 	b4 -= b3;
8161 
8162 	tmp = b5 ^ b2;
8163 	b5 = (tmp >> 4) | (tmp << (64 - 4));
8164 	b2 -= b5;
8165 
8166 	tmp = b7 ^ b0;
8167 	b7 = (tmp >> 33) | (tmp << (64 - 33));
8168 	b0 -= b7;
8169 
8170 	tmp = b1 ^ b8;
8171 	b1 = (tmp >> 52) | (tmp << (64 - 52));
8172 	b8 -= b1;
8173 
8174 	tmp = b5 ^ b14;
8175 	b5 = (tmp >> 23) | (tmp << (64 - 23));
8176 	b14 -= b5;
8177 
8178 	tmp = b3 ^ b12;
8179 	b3 = (tmp >> 18) | (tmp << (64 - 18));
8180 	b12 -= b3;
8181 
8182 	tmp = b7 ^ b10;
8183 	b7 = (tmp >> 49) | (tmp << (64 - 49));
8184 	b10 -= b7;
8185 
8186 	tmp = b15 ^ b4;
8187 	b15 = (tmp >> 55) | (tmp << (64 - 55));
8188 	b4 -= b15;
8189 
8190 	tmp = b11 ^ b6;
8191 	b11 = (tmp >> 10) | (tmp << (64 - 10));
8192 	b6 -= b11;
8193 
8194 	tmp = b13 ^ b2;
8195 	b13 = (tmp >> 19) | (tmp << (64 - 19));
8196 	b2 -= b13;
8197 
8198 	tmp = b9 ^ b0;
8199 	b9 = (tmp >> 38) | (tmp << (64 - 38));
8200 	b0 -= b9;
8201 
8202 	tmp = b15 ^ b14;
8203 	b15 = (tmp >> 37) | (tmp << (64 - 37));
8204 	b14 -= b15 + k14 + t1;
8205 	b15 -= k15;
8206 
8207 	tmp = b13 ^ b12;
8208 	b13 = (tmp >> 22) | (tmp << (64 - 22));
8209 	b12 -= b13 + k12;
8210 	b13 -= k13 + t0;
8211 
8212 	tmp = b11 ^ b10;
8213 	b11 = (tmp >> 17) | (tmp << (64 - 17));
8214 	b10 -= b11 + k10;
8215 	b11 -= k11;
8216 
8217 	tmp = b9 ^ b8;
8218 	b9 = (tmp >> 8) | (tmp << (64 - 8));
8219 	b8 -= b9 + k8;
8220 	b9 -= k9;
8221 
8222 	tmp = b7 ^ b6;
8223 	b7 = (tmp >> 47) | (tmp << (64 - 47));
8224 	b6 -= b7 + k6;
8225 	b7 -= k7;
8226 
8227 	tmp = b5 ^ b4;
8228 	b5 = (tmp >> 8) | (tmp << (64 - 8));
8229 	b4 -= b5 + k4;
8230 	b5 -= k5;
8231 
8232 	tmp = b3 ^ b2;
8233 	b3 = (tmp >> 13) | (tmp << (64 - 13));
8234 	b2 -= b3 + k2;
8235 	b3 -= k3;
8236 
8237 	tmp = b1 ^ b0;
8238 	b1 = (tmp >> 24) | (tmp << (64 - 24));
8239 	b0 -= b1 + k0;
8240 	b1 -= k1;
8241 
8242 	output[15] = b15;
8243 	output[14] = b14;
8244 	output[13] = b13;
8245 	output[12] = b12;
8246 	output[11] = b11;
8247 	output[10] = b10;
8248 	output[9] = b9;
8249 	output[8] = b8;
8250 	output[7] = b7;
8251 	output[6] = b6;
8252 	output[5] = b5;
8253 	output[4] = b4;
8254 	output[3] = b3;
8255 	output[2] = b2;
8256 	output[1] = b1;
8257 	output[0] = b0;
8258 }
8259