• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1/*
2Implementation by the Keccak, Keyak and Ketje Teams, namely, Guido Bertoni,
3Joan Daemen, Michaël Peeters, Gilles Van Assche and Ronny Van Keer, hereby
4denoted as "the implementer".
5
6For more information, feedback or questions, please refer to our websites:
7http://keccak.noekeon.org/
8http://keyak.noekeon.org/
9http://ketje.noekeon.org/
10
11To the extent possible under law, the implementer has waived all copyright
12and related or neighboring rights to the source code in this file.
13http://creativecommons.org/publicdomain/zero/1.0/
14*/
15
16#define declareABCDE \
17    UINT64 Aba, Abe, Abi, Abo, Abu; \
18    UINT64 Aga, Age, Agi, Ago, Agu; \
19    UINT64 Aka, Ake, Aki, Ako, Aku; \
20    UINT64 Ama, Ame, Ami, Amo, Amu; \
21    UINT64 Asa, Ase, Asi, Aso, Asu; \
22    UINT64 Bba, Bbe, Bbi, Bbo, Bbu; \
23    UINT64 Bga, Bge, Bgi, Bgo, Bgu; \
24    UINT64 Bka, Bke, Bki, Bko, Bku; \
25    UINT64 Bma, Bme, Bmi, Bmo, Bmu; \
26    UINT64 Bsa, Bse, Bsi, Bso, Bsu; \
27    UINT64 Ca, Ce, Ci, Co, Cu; \
28    UINT64 Da, De, Di, Do, Du; \
29    UINT64 Eba, Ebe, Ebi, Ebo, Ebu; \
30    UINT64 Ega, Ege, Egi, Ego, Egu; \
31    UINT64 Eka, Eke, Eki, Eko, Eku; \
32    UINT64 Ema, Eme, Emi, Emo, Emu; \
33    UINT64 Esa, Ese, Esi, Eso, Esu; \
34
35#define prepareTheta \
36    Ca = Aba^Aga^Aka^Ama^Asa; \
37    Ce = Abe^Age^Ake^Ame^Ase; \
38    Ci = Abi^Agi^Aki^Ami^Asi; \
39    Co = Abo^Ago^Ako^Amo^Aso; \
40    Cu = Abu^Agu^Aku^Amu^Asu; \
41
42#ifdef UseBebigokimisa
43/* --- Code for round, with prepare-theta (lane complementing pattern 'bebigokimisa') */
44
45/* --- 64-bit lanes mapped to 64-bit words */
46
47#define thetaRhoPiChiIotaPrepareTheta(i, A, E) \
48    Da = Cu^ROL64(Ce, 1); \
49    De = Ca^ROL64(Ci, 1); \
50    Di = Ce^ROL64(Co, 1); \
51    Do = Ci^ROL64(Cu, 1); \
52    Du = Co^ROL64(Ca, 1); \
53\
54    A##ba ^= Da; \
55    Bba = A##ba; \
56    A##ge ^= De; \
57    Bbe = ROL64(A##ge, 44); \
58    A##ki ^= Di; \
59    Bbi = ROL64(A##ki, 43); \
60    A##mo ^= Do; \
61    Bbo = ROL64(A##mo, 21); \
62    A##su ^= Du; \
63    Bbu = ROL64(A##su, 14); \
64    E##ba =   Bba ^(  Bbe |  Bbi ); \
65    E##ba ^= KeccakF1600RoundConstants[i]; \
66    Ca = E##ba; \
67    E##be =   Bbe ^((~Bbi)|  Bbo ); \
68    Ce = E##be; \
69    E##bi =   Bbi ^(  Bbo &  Bbu ); \
70    Ci = E##bi; \
71    E##bo =   Bbo ^(  Bbu |  Bba ); \
72    Co = E##bo; \
73    E##bu =   Bbu ^(  Bba &  Bbe ); \
74    Cu = E##bu; \
75\
76    A##bo ^= Do; \
77    Bga = ROL64(A##bo, 28); \
78    A##gu ^= Du; \
79    Bge = ROL64(A##gu, 20); \
80    A##ka ^= Da; \
81    Bgi = ROL64(A##ka, 3); \
82    A##me ^= De; \
83    Bgo = ROL64(A##me, 45); \
84    A##si ^= Di; \
85    Bgu = ROL64(A##si, 61); \
86    E##ga =   Bga ^(  Bge |  Bgi ); \
87    Ca ^= E##ga; \
88    E##ge =   Bge ^(  Bgi &  Bgo ); \
89    Ce ^= E##ge; \
90    E##gi =   Bgi ^(  Bgo |(~Bgu)); \
91    Ci ^= E##gi; \
92    E##go =   Bgo ^(  Bgu |  Bga ); \
93    Co ^= E##go; \
94    E##gu =   Bgu ^(  Bga &  Bge ); \
95    Cu ^= E##gu; \
96\
97    A##be ^= De; \
98    Bka = ROL64(A##be, 1); \
99    A##gi ^= Di; \
100    Bke = ROL64(A##gi, 6); \
101    A##ko ^= Do; \
102    Bki = ROL64(A##ko, 25); \
103    A##mu ^= Du; \
104    Bko = ROL64(A##mu, 8); \
105    A##sa ^= Da; \
106    Bku = ROL64(A##sa, 18); \
107    E##ka =   Bka ^(  Bke |  Bki ); \
108    Ca ^= E##ka; \
109    E##ke =   Bke ^(  Bki &  Bko ); \
110    Ce ^= E##ke; \
111    E##ki =   Bki ^((~Bko)&  Bku ); \
112    Ci ^= E##ki; \
113    E##ko = (~Bko)^(  Bku |  Bka ); \
114    Co ^= E##ko; \
115    E##ku =   Bku ^(  Bka &  Bke ); \
116    Cu ^= E##ku; \
117\
118    A##bu ^= Du; \
119    Bma = ROL64(A##bu, 27); \
120    A##ga ^= Da; \
121    Bme = ROL64(A##ga, 36); \
122    A##ke ^= De; \
123    Bmi = ROL64(A##ke, 10); \
124    A##mi ^= Di; \
125    Bmo = ROL64(A##mi, 15); \
126    A##so ^= Do; \
127    Bmu = ROL64(A##so, 56); \
128    E##ma =   Bma ^(  Bme &  Bmi ); \
129    Ca ^= E##ma; \
130    E##me =   Bme ^(  Bmi |  Bmo ); \
131    Ce ^= E##me; \
132    E##mi =   Bmi ^((~Bmo)|  Bmu ); \
133    Ci ^= E##mi; \
134    E##mo = (~Bmo)^(  Bmu &  Bma ); \
135    Co ^= E##mo; \
136    E##mu =   Bmu ^(  Bma |  Bme ); \
137    Cu ^= E##mu; \
138\
139    A##bi ^= Di; \
140    Bsa = ROL64(A##bi, 62); \
141    A##go ^= Do; \
142    Bse = ROL64(A##go, 55); \
143    A##ku ^= Du; \
144    Bsi = ROL64(A##ku, 39); \
145    A##ma ^= Da; \
146    Bso = ROL64(A##ma, 41); \
147    A##se ^= De; \
148    Bsu = ROL64(A##se, 2); \
149    E##sa =   Bsa ^((~Bse)&  Bsi ); \
150    Ca ^= E##sa; \
151    E##se = (~Bse)^(  Bsi |  Bso ); \
152    Ce ^= E##se; \
153    E##si =   Bsi ^(  Bso &  Bsu ); \
154    Ci ^= E##si; \
155    E##so =   Bso ^(  Bsu |  Bsa ); \
156    Co ^= E##so; \
157    E##su =   Bsu ^(  Bsa &  Bse ); \
158    Cu ^= E##su; \
159\
160
161/* --- Code for round (lane complementing pattern 'bebigokimisa') */
162
163/* --- 64-bit lanes mapped to 64-bit words */
164
165#define thetaRhoPiChiIota(i, A, E) \
166    Da = Cu^ROL64(Ce, 1); \
167    De = Ca^ROL64(Ci, 1); \
168    Di = Ce^ROL64(Co, 1); \
169    Do = Ci^ROL64(Cu, 1); \
170    Du = Co^ROL64(Ca, 1); \
171\
172    A##ba ^= Da; \
173    Bba = A##ba; \
174    A##ge ^= De; \
175    Bbe = ROL64(A##ge, 44); \
176    A##ki ^= Di; \
177    Bbi = ROL64(A##ki, 43); \
178    A##mo ^= Do; \
179    Bbo = ROL64(A##mo, 21); \
180    A##su ^= Du; \
181    Bbu = ROL64(A##su, 14); \
182    E##ba =   Bba ^(  Bbe |  Bbi ); \
183    E##ba ^= KeccakF1600RoundConstants[i]; \
184    E##be =   Bbe ^((~Bbi)|  Bbo ); \
185    E##bi =   Bbi ^(  Bbo &  Bbu ); \
186    E##bo =   Bbo ^(  Bbu |  Bba ); \
187    E##bu =   Bbu ^(  Bba &  Bbe ); \
188\
189    A##bo ^= Do; \
190    Bga = ROL64(A##bo, 28); \
191    A##gu ^= Du; \
192    Bge = ROL64(A##gu, 20); \
193    A##ka ^= Da; \
194    Bgi = ROL64(A##ka, 3); \
195    A##me ^= De; \
196    Bgo = ROL64(A##me, 45); \
197    A##si ^= Di; \
198    Bgu = ROL64(A##si, 61); \
199    E##ga =   Bga ^(  Bge |  Bgi ); \
200    E##ge =   Bge ^(  Bgi &  Bgo ); \
201    E##gi =   Bgi ^(  Bgo |(~Bgu)); \
202    E##go =   Bgo ^(  Bgu |  Bga ); \
203    E##gu =   Bgu ^(  Bga &  Bge ); \
204\
205    A##be ^= De; \
206    Bka = ROL64(A##be, 1); \
207    A##gi ^= Di; \
208    Bke = ROL64(A##gi, 6); \
209    A##ko ^= Do; \
210    Bki = ROL64(A##ko, 25); \
211    A##mu ^= Du; \
212    Bko = ROL64(A##mu, 8); \
213    A##sa ^= Da; \
214    Bku = ROL64(A##sa, 18); \
215    E##ka =   Bka ^(  Bke |  Bki ); \
216    E##ke =   Bke ^(  Bki &  Bko ); \
217    E##ki =   Bki ^((~Bko)&  Bku ); \
218    E##ko = (~Bko)^(  Bku |  Bka ); \
219    E##ku =   Bku ^(  Bka &  Bke ); \
220\
221    A##bu ^= Du; \
222    Bma = ROL64(A##bu, 27); \
223    A##ga ^= Da; \
224    Bme = ROL64(A##ga, 36); \
225    A##ke ^= De; \
226    Bmi = ROL64(A##ke, 10); \
227    A##mi ^= Di; \
228    Bmo = ROL64(A##mi, 15); \
229    A##so ^= Do; \
230    Bmu = ROL64(A##so, 56); \
231    E##ma =   Bma ^(  Bme &  Bmi ); \
232    E##me =   Bme ^(  Bmi |  Bmo ); \
233    E##mi =   Bmi ^((~Bmo)|  Bmu ); \
234    E##mo = (~Bmo)^(  Bmu &  Bma ); \
235    E##mu =   Bmu ^(  Bma |  Bme ); \
236\
237    A##bi ^= Di; \
238    Bsa = ROL64(A##bi, 62); \
239    A##go ^= Do; \
240    Bse = ROL64(A##go, 55); \
241    A##ku ^= Du; \
242    Bsi = ROL64(A##ku, 39); \
243    A##ma ^= Da; \
244    Bso = ROL64(A##ma, 41); \
245    A##se ^= De; \
246    Bsu = ROL64(A##se, 2); \
247    E##sa =   Bsa ^((~Bse)&  Bsi ); \
248    E##se = (~Bse)^(  Bsi |  Bso ); \
249    E##si =   Bsi ^(  Bso &  Bsu ); \
250    E##so =   Bso ^(  Bsu |  Bsa ); \
251    E##su =   Bsu ^(  Bsa &  Bse ); \
252\
253
254#else /* UseBebigokimisa */
255
256/* --- Code for round, with prepare-theta */
257
258/* --- 64-bit lanes mapped to 64-bit words */
259
260#define thetaRhoPiChiIotaPrepareTheta(i, A, E) \
261    Da = Cu^ROL64(Ce, 1); \
262    De = Ca^ROL64(Ci, 1); \
263    Di = Ce^ROL64(Co, 1); \
264    Do = Ci^ROL64(Cu, 1); \
265    Du = Co^ROL64(Ca, 1); \
266\
267    A##ba ^= Da; \
268    Bba = A##ba; \
269    A##ge ^= De; \
270    Bbe = ROL64(A##ge, 44); \
271    A##ki ^= Di; \
272    Bbi = ROL64(A##ki, 43); \
273    A##mo ^= Do; \
274    Bbo = ROL64(A##mo, 21); \
275    A##su ^= Du; \
276    Bbu = ROL64(A##su, 14); \
277    E##ba =   Bba ^((~Bbe)&  Bbi ); \
278    E##ba ^= KeccakF1600RoundConstants[i]; \
279    Ca = E##ba; \
280    E##be =   Bbe ^((~Bbi)&  Bbo ); \
281    Ce = E##be; \
282    E##bi =   Bbi ^((~Bbo)&  Bbu ); \
283    Ci = E##bi; \
284    E##bo =   Bbo ^((~Bbu)&  Bba ); \
285    Co = E##bo; \
286    E##bu =   Bbu ^((~Bba)&  Bbe ); \
287    Cu = E##bu; \
288\
289    A##bo ^= Do; \
290    Bga = ROL64(A##bo, 28); \
291    A##gu ^= Du; \
292    Bge = ROL64(A##gu, 20); \
293    A##ka ^= Da; \
294    Bgi = ROL64(A##ka, 3); \
295    A##me ^= De; \
296    Bgo = ROL64(A##me, 45); \
297    A##si ^= Di; \
298    Bgu = ROL64(A##si, 61); \
299    E##ga =   Bga ^((~Bge)&  Bgi ); \
300    Ca ^= E##ga; \
301    E##ge =   Bge ^((~Bgi)&  Bgo ); \
302    Ce ^= E##ge; \
303    E##gi =   Bgi ^((~Bgo)&  Bgu ); \
304    Ci ^= E##gi; \
305    E##go =   Bgo ^((~Bgu)&  Bga ); \
306    Co ^= E##go; \
307    E##gu =   Bgu ^((~Bga)&  Bge ); \
308    Cu ^= E##gu; \
309\
310    A##be ^= De; \
311    Bka = ROL64(A##be, 1); \
312    A##gi ^= Di; \
313    Bke = ROL64(A##gi, 6); \
314    A##ko ^= Do; \
315    Bki = ROL64(A##ko, 25); \
316    A##mu ^= Du; \
317    Bko = ROL64(A##mu, 8); \
318    A##sa ^= Da; \
319    Bku = ROL64(A##sa, 18); \
320    E##ka =   Bka ^((~Bke)&  Bki ); \
321    Ca ^= E##ka; \
322    E##ke =   Bke ^((~Bki)&  Bko ); \
323    Ce ^= E##ke; \
324    E##ki =   Bki ^((~Bko)&  Bku ); \
325    Ci ^= E##ki; \
326    E##ko =   Bko ^((~Bku)&  Bka ); \
327    Co ^= E##ko; \
328    E##ku =   Bku ^((~Bka)&  Bke ); \
329    Cu ^= E##ku; \
330\
331    A##bu ^= Du; \
332    Bma = ROL64(A##bu, 27); \
333    A##ga ^= Da; \
334    Bme = ROL64(A##ga, 36); \
335    A##ke ^= De; \
336    Bmi = ROL64(A##ke, 10); \
337    A##mi ^= Di; \
338    Bmo = ROL64(A##mi, 15); \
339    A##so ^= Do; \
340    Bmu = ROL64(A##so, 56); \
341    E##ma =   Bma ^((~Bme)&  Bmi ); \
342    Ca ^= E##ma; \
343    E##me =   Bme ^((~Bmi)&  Bmo ); \
344    Ce ^= E##me; \
345    E##mi =   Bmi ^((~Bmo)&  Bmu ); \
346    Ci ^= E##mi; \
347    E##mo =   Bmo ^((~Bmu)&  Bma ); \
348    Co ^= E##mo; \
349    E##mu =   Bmu ^((~Bma)&  Bme ); \
350    Cu ^= E##mu; \
351\
352    A##bi ^= Di; \
353    Bsa = ROL64(A##bi, 62); \
354    A##go ^= Do; \
355    Bse = ROL64(A##go, 55); \
356    A##ku ^= Du; \
357    Bsi = ROL64(A##ku, 39); \
358    A##ma ^= Da; \
359    Bso = ROL64(A##ma, 41); \
360    A##se ^= De; \
361    Bsu = ROL64(A##se, 2); \
362    E##sa =   Bsa ^((~Bse)&  Bsi ); \
363    Ca ^= E##sa; \
364    E##se =   Bse ^((~Bsi)&  Bso ); \
365    Ce ^= E##se; \
366    E##si =   Bsi ^((~Bso)&  Bsu ); \
367    Ci ^= E##si; \
368    E##so =   Bso ^((~Bsu)&  Bsa ); \
369    Co ^= E##so; \
370    E##su =   Bsu ^((~Bsa)&  Bse ); \
371    Cu ^= E##su; \
372\
373
374/* --- Code for round */
375
376/* --- 64-bit lanes mapped to 64-bit words */
377
378#define thetaRhoPiChiIota(i, A, E) \
379    Da = Cu^ROL64(Ce, 1); \
380    De = Ca^ROL64(Ci, 1); \
381    Di = Ce^ROL64(Co, 1); \
382    Do = Ci^ROL64(Cu, 1); \
383    Du = Co^ROL64(Ca, 1); \
384\
385    A##ba ^= Da; \
386    Bba = A##ba; \
387    A##ge ^= De; \
388    Bbe = ROL64(A##ge, 44); \
389    A##ki ^= Di; \
390    Bbi = ROL64(A##ki, 43); \
391    A##mo ^= Do; \
392    Bbo = ROL64(A##mo, 21); \
393    A##su ^= Du; \
394    Bbu = ROL64(A##su, 14); \
395    E##ba =   Bba ^((~Bbe)&  Bbi ); \
396    E##ba ^= KeccakF1600RoundConstants[i]; \
397    E##be =   Bbe ^((~Bbi)&  Bbo ); \
398    E##bi =   Bbi ^((~Bbo)&  Bbu ); \
399    E##bo =   Bbo ^((~Bbu)&  Bba ); \
400    E##bu =   Bbu ^((~Bba)&  Bbe ); \
401\
402    A##bo ^= Do; \
403    Bga = ROL64(A##bo, 28); \
404    A##gu ^= Du; \
405    Bge = ROL64(A##gu, 20); \
406    A##ka ^= Da; \
407    Bgi = ROL64(A##ka, 3); \
408    A##me ^= De; \
409    Bgo = ROL64(A##me, 45); \
410    A##si ^= Di; \
411    Bgu = ROL64(A##si, 61); \
412    E##ga =   Bga ^((~Bge)&  Bgi ); \
413    E##ge =   Bge ^((~Bgi)&  Bgo ); \
414    E##gi =   Bgi ^((~Bgo)&  Bgu ); \
415    E##go =   Bgo ^((~Bgu)&  Bga ); \
416    E##gu =   Bgu ^((~Bga)&  Bge ); \
417\
418    A##be ^= De; \
419    Bka = ROL64(A##be, 1); \
420    A##gi ^= Di; \
421    Bke = ROL64(A##gi, 6); \
422    A##ko ^= Do; \
423    Bki = ROL64(A##ko, 25); \
424    A##mu ^= Du; \
425    Bko = ROL64(A##mu, 8); \
426    A##sa ^= Da; \
427    Bku = ROL64(A##sa, 18); \
428    E##ka =   Bka ^((~Bke)&  Bki ); \
429    E##ke =   Bke ^((~Bki)&  Bko ); \
430    E##ki =   Bki ^((~Bko)&  Bku ); \
431    E##ko =   Bko ^((~Bku)&  Bka ); \
432    E##ku =   Bku ^((~Bka)&  Bke ); \
433\
434    A##bu ^= Du; \
435    Bma = ROL64(A##bu, 27); \
436    A##ga ^= Da; \
437    Bme = ROL64(A##ga, 36); \
438    A##ke ^= De; \
439    Bmi = ROL64(A##ke, 10); \
440    A##mi ^= Di; \
441    Bmo = ROL64(A##mi, 15); \
442    A##so ^= Do; \
443    Bmu = ROL64(A##so, 56); \
444    E##ma =   Bma ^((~Bme)&  Bmi ); \
445    E##me =   Bme ^((~Bmi)&  Bmo ); \
446    E##mi =   Bmi ^((~Bmo)&  Bmu ); \
447    E##mo =   Bmo ^((~Bmu)&  Bma ); \
448    E##mu =   Bmu ^((~Bma)&  Bme ); \
449\
450    A##bi ^= Di; \
451    Bsa = ROL64(A##bi, 62); \
452    A##go ^= Do; \
453    Bse = ROL64(A##go, 55); \
454    A##ku ^= Du; \
455    Bsi = ROL64(A##ku, 39); \
456    A##ma ^= Da; \
457    Bso = ROL64(A##ma, 41); \
458    A##se ^= De; \
459    Bsu = ROL64(A##se, 2); \
460    E##sa =   Bsa ^((~Bse)&  Bsi ); \
461    E##se =   Bse ^((~Bsi)&  Bso ); \
462    E##si =   Bsi ^((~Bso)&  Bsu ); \
463    E##so =   Bso ^((~Bsu)&  Bsa ); \
464    E##su =   Bsu ^((~Bsa)&  Bse ); \
465\
466
467#endif /* UseBebigokimisa */
468
469
470#define copyFromState(X, state) \
471    X##ba = state[ 0]; \
472    X##be = state[ 1]; \
473    X##bi = state[ 2]; \
474    X##bo = state[ 3]; \
475    X##bu = state[ 4]; \
476    X##ga = state[ 5]; \
477    X##ge = state[ 6]; \
478    X##gi = state[ 7]; \
479    X##go = state[ 8]; \
480    X##gu = state[ 9]; \
481    X##ka = state[10]; \
482    X##ke = state[11]; \
483    X##ki = state[12]; \
484    X##ko = state[13]; \
485    X##ku = state[14]; \
486    X##ma = state[15]; \
487    X##me = state[16]; \
488    X##mi = state[17]; \
489    X##mo = state[18]; \
490    X##mu = state[19]; \
491    X##sa = state[20]; \
492    X##se = state[21]; \
493    X##si = state[22]; \
494    X##so = state[23]; \
495    X##su = state[24]; \
496
497#define copyToState(state, X) \
498    state[ 0] = X##ba; \
499    state[ 1] = X##be; \
500    state[ 2] = X##bi; \
501    state[ 3] = X##bo; \
502    state[ 4] = X##bu; \
503    state[ 5] = X##ga; \
504    state[ 6] = X##ge; \
505    state[ 7] = X##gi; \
506    state[ 8] = X##go; \
507    state[ 9] = X##gu; \
508    state[10] = X##ka; \
509    state[11] = X##ke; \
510    state[12] = X##ki; \
511    state[13] = X##ko; \
512    state[14] = X##ku; \
513    state[15] = X##ma; \
514    state[16] = X##me; \
515    state[17] = X##mi; \
516    state[18] = X##mo; \
517    state[19] = X##mu; \
518    state[20] = X##sa; \
519    state[21] = X##se; \
520    state[22] = X##si; \
521    state[23] = X##so; \
522    state[24] = X##su; \
523
524#define copyStateVariables(X, Y) \
525    X##ba = Y##ba; \
526    X##be = Y##be; \
527    X##bi = Y##bi; \
528    X##bo = Y##bo; \
529    X##bu = Y##bu; \
530    X##ga = Y##ga; \
531    X##ge = Y##ge; \
532    X##gi = Y##gi; \
533    X##go = Y##go; \
534    X##gu = Y##gu; \
535    X##ka = Y##ka; \
536    X##ke = Y##ke; \
537    X##ki = Y##ki; \
538    X##ko = Y##ko; \
539    X##ku = Y##ku; \
540    X##ma = Y##ma; \
541    X##me = Y##me; \
542    X##mi = Y##mi; \
543    X##mo = Y##mo; \
544    X##mu = Y##mu; \
545    X##sa = Y##sa; \
546    X##se = Y##se; \
547    X##si = Y##si; \
548    X##so = Y##so; \
549    X##su = Y##su; \
550
551#define copyFromStateAndAdd(X, state, input, laneCount) \
552    if (laneCount < 16) { \
553        if (laneCount < 8) { \
554            if (laneCount < 4) { \
555                if (laneCount < 2) { \
556                    if (laneCount < 1) { \
557                        X##ba = state[ 0]; \
558                    } \
559                    else { \
560                        X##ba = state[ 0]^input[ 0]; \
561                    } \
562                    X##be = state[ 1]; \
563                    X##bi = state[ 2]; \
564                } \
565                else { \
566                    X##ba = state[ 0]^input[ 0]; \
567                    X##be = state[ 1]^input[ 1]; \
568                    if (laneCount < 3) { \
569                        X##bi = state[ 2]; \
570                    } \
571                    else { \
572                        X##bi = state[ 2]^input[ 2]; \
573                    } \
574                } \
575                X##bo = state[ 3]; \
576                X##bu = state[ 4]; \
577                X##ga = state[ 5]; \
578                X##ge = state[ 6]; \
579            } \
580            else { \
581                X##ba = state[ 0]^input[ 0]; \
582                X##be = state[ 1]^input[ 1]; \
583                X##bi = state[ 2]^input[ 2]; \
584                X##bo = state[ 3]^input[ 3]; \
585                if (laneCount < 6) { \
586                    if (laneCount < 5) { \
587                        X##bu = state[ 4]; \
588                    } \
589                    else { \
590                        X##bu = state[ 4]^input[ 4]; \
591                    } \
592                    X##ga = state[ 5]; \
593                    X##ge = state[ 6]; \
594                } \
595                else { \
596                    X##bu = state[ 4]^input[ 4]; \
597                    X##ga = state[ 5]^input[ 5]; \
598                    if (laneCount < 7) { \
599                        X##ge = state[ 6]; \
600                    } \
601                    else { \
602                        X##ge = state[ 6]^input[ 6]; \
603                    } \
604                } \
605            } \
606            X##gi = state[ 7]; \
607            X##go = state[ 8]; \
608            X##gu = state[ 9]; \
609            X##ka = state[10]; \
610            X##ke = state[11]; \
611            X##ki = state[12]; \
612            X##ko = state[13]; \
613            X##ku = state[14]; \
614        } \
615        else { \
616            X##ba = state[ 0]^input[ 0]; \
617            X##be = state[ 1]^input[ 1]; \
618            X##bi = state[ 2]^input[ 2]; \
619            X##bo = state[ 3]^input[ 3]; \
620            X##bu = state[ 4]^input[ 4]; \
621            X##ga = state[ 5]^input[ 5]; \
622            X##ge = state[ 6]^input[ 6]; \
623            X##gi = state[ 7]^input[ 7]; \
624            if (laneCount < 12) { \
625                if (laneCount < 10) { \
626                    if (laneCount < 9) { \
627                        X##go = state[ 8]; \
628                    } \
629                    else { \
630                        X##go = state[ 8]^input[ 8]; \
631                    } \
632                    X##gu = state[ 9]; \
633                    X##ka = state[10]; \
634                } \
635                else { \
636                    X##go = state[ 8]^input[ 8]; \
637                    X##gu = state[ 9]^input[ 9]; \
638                    if (laneCount < 11) { \
639                        X##ka = state[10]; \
640                    } \
641                    else { \
642                        X##ka = state[10]^input[10]; \
643                    } \
644                } \
645                X##ke = state[11]; \
646                X##ki = state[12]; \
647                X##ko = state[13]; \
648                X##ku = state[14]; \
649            } \
650            else { \
651                X##go = state[ 8]^input[ 8]; \
652                X##gu = state[ 9]^input[ 9]; \
653                X##ka = state[10]^input[10]; \
654                X##ke = state[11]^input[11]; \
655                if (laneCount < 14) { \
656                    if (laneCount < 13) { \
657                        X##ki = state[12]; \
658                    } \
659                    else { \
660                        X##ki = state[12]^input[12]; \
661                    } \
662                    X##ko = state[13]; \
663                    X##ku = state[14]; \
664                } \
665                else { \
666                    X##ki = state[12]^input[12]; \
667                    X##ko = state[13]^input[13]; \
668                    if (laneCount < 15) { \
669                        X##ku = state[14]; \
670                    } \
671                    else { \
672                        X##ku = state[14]^input[14]; \
673                    } \
674                } \
675            } \
676        } \
677        X##ma = state[15]; \
678        X##me = state[16]; \
679        X##mi = state[17]; \
680        X##mo = state[18]; \
681        X##mu = state[19]; \
682        X##sa = state[20]; \
683        X##se = state[21]; \
684        X##si = state[22]; \
685        X##so = state[23]; \
686        X##su = state[24]; \
687    } \
688    else { \
689        X##ba = state[ 0]^input[ 0]; \
690        X##be = state[ 1]^input[ 1]; \
691        X##bi = state[ 2]^input[ 2]; \
692        X##bo = state[ 3]^input[ 3]; \
693        X##bu = state[ 4]^input[ 4]; \
694        X##ga = state[ 5]^input[ 5]; \
695        X##ge = state[ 6]^input[ 6]; \
696        X##gi = state[ 7]^input[ 7]; \
697        X##go = state[ 8]^input[ 8]; \
698        X##gu = state[ 9]^input[ 9]; \
699        X##ka = state[10]^input[10]; \
700        X##ke = state[11]^input[11]; \
701        X##ki = state[12]^input[12]; \
702        X##ko = state[13]^input[13]; \
703        X##ku = state[14]^input[14]; \
704        X##ma = state[15]^input[15]; \
705        if (laneCount < 24) { \
706            if (laneCount < 20) { \
707                if (laneCount < 18) { \
708                    if (laneCount < 17) { \
709                        X##me = state[16]; \
710                    } \
711                    else { \
712                        X##me = state[16]^input[16]; \
713                    } \
714                    X##mi = state[17]; \
715                    X##mo = state[18]; \
716                } \
717                else { \
718                    X##me = state[16]^input[16]; \
719                    X##mi = state[17]^input[17]; \
720                    if (laneCount < 19) { \
721                        X##mo = state[18]; \
722                    } \
723                    else { \
724                        X##mo = state[18]^input[18]; \
725                    } \
726                } \
727                X##mu = state[19]; \
728                X##sa = state[20]; \
729                X##se = state[21]; \
730                X##si = state[22]; \
731            } \
732            else { \
733                X##me = state[16]^input[16]; \
734                X##mi = state[17]^input[17]; \
735                X##mo = state[18]^input[18]; \
736                X##mu = state[19]^input[19]; \
737                if (laneCount < 22) { \
738                    if (laneCount < 21) { \
739                        X##sa = state[20]; \
740                    } \
741                    else { \
742                        X##sa = state[20]^input[20]; \
743                    } \
744                    X##se = state[21]; \
745                    X##si = state[22]; \
746                } \
747                else { \
748                    X##sa = state[20]^input[20]; \
749                    X##se = state[21]^input[21]; \
750                    if (laneCount < 23) { \
751                        X##si = state[22]; \
752                    } \
753                    else { \
754                        X##si = state[22]^input[22]; \
755                    } \
756                } \
757            } \
758            X##so = state[23]; \
759            X##su = state[24]; \
760        } \
761        else { \
762            X##me = state[16]^input[16]; \
763            X##mi = state[17]^input[17]; \
764            X##mo = state[18]^input[18]; \
765            X##mu = state[19]^input[19]; \
766            X##sa = state[20]^input[20]; \
767            X##se = state[21]^input[21]; \
768            X##si = state[22]^input[22]; \
769            X##so = state[23]^input[23]; \
770            if (laneCount < 25) { \
771                X##su = state[24]; \
772            } \
773            else { \
774                X##su = state[24]^input[24]; \
775            } \
776        } \
777    }
778
779#define addInput(X, input, laneCount) \
780    if (laneCount == 21) { \
781        X##ba ^= input[ 0]; \
782        X##be ^= input[ 1]; \
783        X##bi ^= input[ 2]; \
784        X##bo ^= input[ 3]; \
785        X##bu ^= input[ 4]; \
786        X##ga ^= input[ 5]; \
787        X##ge ^= input[ 6]; \
788        X##gi ^= input[ 7]; \
789        X##go ^= input[ 8]; \
790        X##gu ^= input[ 9]; \
791        X##ka ^= input[10]; \
792        X##ke ^= input[11]; \
793        X##ki ^= input[12]; \
794        X##ko ^= input[13]; \
795        X##ku ^= input[14]; \
796        X##ma ^= input[15]; \
797        X##me ^= input[16]; \
798        X##mi ^= input[17]; \
799        X##mo ^= input[18]; \
800        X##mu ^= input[19]; \
801        X##sa ^= input[20]; \
802    } \
803    else if (laneCount < 16) { \
804        if (laneCount < 8) { \
805            if (laneCount < 4) { \
806                if (laneCount < 2) { \
807                    if (laneCount < 1) { \
808                    } \
809                    else { \
810                        X##ba ^= input[ 0]; \
811                    } \
812                } \
813                else { \
814                    X##ba ^= input[ 0]; \
815                    X##be ^= input[ 1]; \
816                    if (laneCount < 3) { \
817                    } \
818                    else { \
819                        X##bi ^= input[ 2]; \
820                    } \
821                } \
822            } \
823            else { \
824                X##ba ^= input[ 0]; \
825                X##be ^= input[ 1]; \
826                X##bi ^= input[ 2]; \
827                X##bo ^= input[ 3]; \
828                if (laneCount < 6) { \
829                    if (laneCount < 5) { \
830                    } \
831                    else { \
832                        X##bu ^= input[ 4]; \
833                    } \
834                } \
835                else { \
836                    X##bu ^= input[ 4]; \
837                    X##ga ^= input[ 5]; \
838                    if (laneCount < 7) { \
839                    } \
840                    else { \
841                        X##ge ^= input[ 6]; \
842                    } \
843                } \
844            } \
845        } \
846        else { \
847            X##ba ^= input[ 0]; \
848            X##be ^= input[ 1]; \
849            X##bi ^= input[ 2]; \
850            X##bo ^= input[ 3]; \
851            X##bu ^= input[ 4]; \
852            X##ga ^= input[ 5]; \
853            X##ge ^= input[ 6]; \
854            X##gi ^= input[ 7]; \
855            if (laneCount < 12) { \
856                if (laneCount < 10) { \
857                    if (laneCount < 9) { \
858                    } \
859                    else { \
860                        X##go ^= input[ 8]; \
861                    } \
862                } \
863                else { \
864                    X##go ^= input[ 8]; \
865                    X##gu ^= input[ 9]; \
866                    if (laneCount < 11) { \
867                    } \
868                    else { \
869                        X##ka ^= input[10]; \
870                    } \
871                } \
872            } \
873            else { \
874                X##go ^= input[ 8]; \
875                X##gu ^= input[ 9]; \
876                X##ka ^= input[10]; \
877                X##ke ^= input[11]; \
878                if (laneCount < 14) { \
879                    if (laneCount < 13) { \
880                    } \
881                    else { \
882                        X##ki ^= input[12]; \
883                    } \
884                } \
885                else { \
886                    X##ki ^= input[12]; \
887                    X##ko ^= input[13]; \
888                    if (laneCount < 15) { \
889                    } \
890                    else { \
891                        X##ku ^= input[14]; \
892                    } \
893                } \
894            } \
895        } \
896    } \
897    else { \
898        X##ba ^= input[ 0]; \
899        X##be ^= input[ 1]; \
900        X##bi ^= input[ 2]; \
901        X##bo ^= input[ 3]; \
902        X##bu ^= input[ 4]; \
903        X##ga ^= input[ 5]; \
904        X##ge ^= input[ 6]; \
905        X##gi ^= input[ 7]; \
906        X##go ^= input[ 8]; \
907        X##gu ^= input[ 9]; \
908        X##ka ^= input[10]; \
909        X##ke ^= input[11]; \
910        X##ki ^= input[12]; \
911        X##ko ^= input[13]; \
912        X##ku ^= input[14]; \
913        X##ma ^= input[15]; \
914        if (laneCount < 24) { \
915            if (laneCount < 20) { \
916                if (laneCount < 18) { \
917                    if (laneCount < 17) { \
918                    } \
919                    else { \
920                        X##me ^= input[16]; \
921                    } \
922                } \
923                else { \
924                    X##me ^= input[16]; \
925                    X##mi ^= input[17]; \
926                    if (laneCount < 19) { \
927                    } \
928                    else { \
929                        X##mo ^= input[18]; \
930                    } \
931                } \
932            } \
933            else { \
934                X##me ^= input[16]; \
935                X##mi ^= input[17]; \
936                X##mo ^= input[18]; \
937                X##mu ^= input[19]; \
938                if (laneCount < 22) { \
939                    if (laneCount < 21) { \
940                    } \
941                    else { \
942                        X##sa ^= input[20]; \
943                    } \
944                } \
945                else { \
946                    X##sa ^= input[20]; \
947                    X##se ^= input[21]; \
948                    if (laneCount < 23) { \
949                    } \
950                    else { \
951                        X##si ^= input[22]; \
952                    } \
953                } \
954            } \
955        } \
956        else { \
957            X##me ^= input[16]; \
958            X##mi ^= input[17]; \
959            X##mo ^= input[18]; \
960            X##mu ^= input[19]; \
961            X##sa ^= input[20]; \
962            X##se ^= input[21]; \
963            X##si ^= input[22]; \
964            X##so ^= input[23]; \
965            if (laneCount < 25) { \
966            } \
967            else { \
968                X##su ^= input[24]; \
969            } \
970        } \
971    }
972
973#ifdef UseBebigokimisa
974
975#define copyToStateAndOutput(X, state, output, laneCount) \
976    if (laneCount < 16) { \
977        if (laneCount < 8) { \
978            if (laneCount < 4) { \
979                if (laneCount < 2) { \
980                    state[ 0] = X##ba; \
981                    if (laneCount >= 1) { \
982                        output[ 0] = X##ba; \
983                    } \
984                    state[ 1] = X##be; \
985                    state[ 2] = X##bi; \
986                } \
987                else { \
988                    state[ 0] = X##ba; \
989                    output[ 0] = X##ba; \
990                    state[ 1] = X##be; \
991                    output[ 1] = ~X##be; \
992                    state[ 2] = X##bi; \
993                    if (laneCount >= 3) { \
994                        output[ 2] = ~X##bi; \
995                    } \
996                } \
997                state[ 3] = X##bo; \
998                state[ 4] = X##bu; \
999                state[ 5] = X##ga; \
1000                state[ 6] = X##ge; \
1001            } \
1002            else { \
1003                state[ 0] = X##ba; \
1004                output[ 0] = X##ba; \
1005                state[ 1] = X##be; \
1006                output[ 1] = ~X##be; \
1007                state[ 2] = X##bi; \
1008                output[ 2] = ~X##bi; \
1009                state[ 3] = X##bo; \
1010                output[ 3] = X##bo; \
1011                if (laneCount < 6) { \
1012                    state[ 4] = X##bu; \
1013                    if (laneCount >= 5) { \
1014                        output[ 4] = X##bu; \
1015                    } \
1016                    state[ 5] = X##ga; \
1017                    state[ 6] = X##ge; \
1018                } \
1019                else { \
1020                    state[ 4] = X##bu; \
1021                    output[ 4] = X##bu; \
1022                    state[ 5] = X##ga; \
1023                    output[ 5] = X##ga; \
1024                    state[ 6] = X##ge; \
1025                    if (laneCount >= 7) { \
1026                        output[ 6] = X##ge; \
1027                    } \
1028                } \
1029            } \
1030            state[ 7] = X##gi; \
1031            state[ 8] = X##go; \
1032            state[ 9] = X##gu; \
1033            state[10] = X##ka; \
1034            state[11] = X##ke; \
1035            state[12] = X##ki; \
1036            state[13] = X##ko; \
1037            state[14] = X##ku; \
1038        } \
1039        else { \
1040            state[ 0] = X##ba; \
1041            output[ 0] = X##ba; \
1042            state[ 1] = X##be; \
1043            output[ 1] = ~X##be; \
1044            state[ 2] = X##bi; \
1045            output[ 2] = ~X##bi; \
1046            state[ 3] = X##bo; \
1047            output[ 3] = X##bo; \
1048            state[ 4] = X##bu; \
1049            output[ 4] = X##bu; \
1050            state[ 5] = X##ga; \
1051            output[ 5] = X##ga; \
1052            state[ 6] = X##ge; \
1053            output[ 6] = X##ge; \
1054            state[ 7] = X##gi; \
1055            output[ 7] = X##gi; \
1056            if (laneCount < 12) { \
1057                if (laneCount < 10) { \
1058                    state[ 8] = X##go; \
1059                    if (laneCount >= 9) { \
1060                        output[ 8] = ~X##go; \
1061                    } \
1062                    state[ 9] = X##gu; \
1063                    state[10] = X##ka; \
1064                } \
1065                else { \
1066                    state[ 8] = X##go; \
1067                    output[ 8] = ~X##go; \
1068                    state[ 9] = X##gu; \
1069                    output[ 9] = X##gu; \
1070                    state[10] = X##ka; \
1071                    if (laneCount >= 11) { \
1072                        output[10] = X##ka; \
1073                    } \
1074                } \
1075                state[11] = X##ke; \
1076                state[12] = X##ki; \
1077                state[13] = X##ko; \
1078                state[14] = X##ku; \
1079            } \
1080            else { \
1081                state[ 8] = X##go; \
1082                output[ 8] = ~X##go; \
1083                state[ 9] = X##gu; \
1084                output[ 9] = X##gu; \
1085                state[10] = X##ka; \
1086                output[10] = X##ka; \
1087                state[11] = X##ke; \
1088                output[11] = X##ke; \
1089                if (laneCount < 14) { \
1090                    state[12] = X##ki; \
1091                    if (laneCount >= 13) { \
1092                        output[12] = ~X##ki; \
1093                    } \
1094                    state[13] = X##ko; \
1095                    state[14] = X##ku; \
1096                } \
1097                else { \
1098                    state[12] = X##ki; \
1099                    output[12] = ~X##ki; \
1100                    state[13] = X##ko; \
1101                    output[13] = X##ko; \
1102                    state[14] = X##ku; \
1103                    if (laneCount >= 15) { \
1104                        output[14] = X##ku; \
1105                    } \
1106                } \
1107            } \
1108        } \
1109        state[15] = X##ma; \
1110        state[16] = X##me; \
1111        state[17] = X##mi; \
1112        state[18] = X##mo; \
1113        state[19] = X##mu; \
1114        state[20] = X##sa; \
1115        state[21] = X##se; \
1116        state[22] = X##si; \
1117        state[23] = X##so; \
1118        state[24] = X##su; \
1119    } \
1120    else { \
1121        state[ 0] = X##ba; \
1122        output[ 0] = X##ba; \
1123        state[ 1] = X##be; \
1124        output[ 1] = ~X##be; \
1125        state[ 2] = X##bi; \
1126        output[ 2] = ~X##bi; \
1127        state[ 3] = X##bo; \
1128        output[ 3] = X##bo; \
1129        state[ 4] = X##bu; \
1130        output[ 4] = X##bu; \
1131        state[ 5] = X##ga; \
1132        output[ 5] = X##ga; \
1133        state[ 6] = X##ge; \
1134        output[ 6] = X##ge; \
1135        state[ 7] = X##gi; \
1136        output[ 7] = X##gi; \
1137        state[ 8] = X##go; \
1138        output[ 8] = ~X##go; \
1139        state[ 9] = X##gu; \
1140        output[ 9] = X##gu; \
1141        state[10] = X##ka; \
1142        output[10] = X##ka; \
1143        state[11] = X##ke; \
1144        output[11] = X##ke; \
1145        state[12] = X##ki; \
1146        output[12] = ~X##ki; \
1147        state[13] = X##ko; \
1148        output[13] = X##ko; \
1149        state[14] = X##ku; \
1150        output[14] = X##ku; \
1151        state[15] = X##ma; \
1152        output[15] = X##ma; \
1153        if (laneCount < 24) { \
1154            if (laneCount < 20) { \
1155                if (laneCount < 18) { \
1156                    state[16] = X##me; \
1157                    if (laneCount >= 17) { \
1158                        output[16] = X##me; \
1159                    } \
1160                    state[17] = X##mi; \
1161                    state[18] = X##mo; \
1162                } \
1163                else { \
1164                    state[16] = X##me; \
1165                    output[16] = X##me; \
1166                    state[17] = X##mi; \
1167                    output[17] = ~X##mi; \
1168                    state[18] = X##mo; \
1169                    if (laneCount >= 19) { \
1170                        output[18] = X##mo; \
1171                    } \
1172                } \
1173                state[19] = X##mu; \
1174                state[20] = X##sa; \
1175                state[21] = X##se; \
1176                state[22] = X##si; \
1177            } \
1178            else { \
1179                state[16] = X##me; \
1180                output[16] = X##me; \
1181                state[17] = X##mi; \
1182                output[17] = ~X##mi; \
1183                state[18] = X##mo; \
1184                output[18] = X##mo; \
1185                state[19] = X##mu; \
1186                output[19] = X##mu; \
1187                if (laneCount < 22) { \
1188                    state[20] = X##sa; \
1189                    if (laneCount >= 21) { \
1190                        output[20] = ~X##sa; \
1191                    } \
1192                    state[21] = X##se; \
1193                    state[22] = X##si; \
1194                } \
1195                else { \
1196                    state[20] = X##sa; \
1197                    output[20] = ~X##sa; \
1198                    state[21] = X##se; \
1199                    output[21] = X##se; \
1200                    state[22] = X##si; \
1201                    if (laneCount >= 23) { \
1202                        output[22] = X##si; \
1203                    } \
1204                } \
1205            } \
1206            state[23] = X##so; \
1207            state[24] = X##su; \
1208        } \
1209        else { \
1210            state[16] = X##me; \
1211            output[16] = X##me; \
1212            state[17] = X##mi; \
1213            output[17] = ~X##mi; \
1214            state[18] = X##mo; \
1215            output[18] = X##mo; \
1216            state[19] = X##mu; \
1217            output[19] = X##mu; \
1218            state[20] = X##sa; \
1219            output[20] = ~X##sa; \
1220            state[21] = X##se; \
1221            output[21] = X##se; \
1222            state[22] = X##si; \
1223            output[22] = X##si; \
1224            state[23] = X##so; \
1225            output[23] = X##so; \
1226            state[24] = X##su; \
1227            if (laneCount >= 25) { \
1228                output[24] = X##su; \
1229            } \
1230        } \
1231    }
1232
1233#define output(X, output, laneCount) \
1234    if (laneCount < 16) { \
1235        if (laneCount < 8) { \
1236            if (laneCount < 4) { \
1237                if (laneCount < 2) { \
1238                    if (laneCount >= 1) { \
1239                        output[ 0] = X##ba; \
1240                    } \
1241                } \
1242                else { \
1243                    output[ 0] = X##ba; \
1244                    output[ 1] = ~X##be; \
1245                    if (laneCount >= 3) { \
1246                        output[ 2] = ~X##bi; \
1247                    } \
1248                } \
1249            } \
1250            else { \
1251                output[ 0] = X##ba; \
1252                output[ 1] = ~X##be; \
1253                output[ 2] = ~X##bi; \
1254                output[ 3] = X##bo; \
1255                if (laneCount < 6) { \
1256                    if (laneCount >= 5) { \
1257                        output[ 4] = X##bu; \
1258                    } \
1259                } \
1260                else { \
1261                    output[ 4] = X##bu; \
1262                    output[ 5] = X##ga; \
1263                    if (laneCount >= 7) { \
1264                        output[ 6] = X##ge; \
1265                    } \
1266                } \
1267            } \
1268        } \
1269        else { \
1270            output[ 0] = X##ba; \
1271            output[ 1] = ~X##be; \
1272            output[ 2] = ~X##bi; \
1273            output[ 3] = X##bo; \
1274            output[ 4] = X##bu; \
1275            output[ 5] = X##ga; \
1276            output[ 6] = X##ge; \
1277            output[ 7] = X##gi; \
1278            if (laneCount < 12) { \
1279                if (laneCount < 10) { \
1280                    if (laneCount >= 9) { \
1281                        output[ 8] = ~X##go; \
1282                    } \
1283                } \
1284                else { \
1285                    output[ 8] = ~X##go; \
1286                    output[ 9] = X##gu; \
1287                    if (laneCount >= 11) { \
1288                        output[10] = X##ka; \
1289                    } \
1290                } \
1291            } \
1292            else { \
1293                output[ 8] = ~X##go; \
1294                output[ 9] = X##gu; \
1295                output[10] = X##ka; \
1296                output[11] = X##ke; \
1297                if (laneCount < 14) { \
1298                    if (laneCount >= 13) { \
1299                        output[12] = ~X##ki; \
1300                    } \
1301                } \
1302                else { \
1303                    output[12] = ~X##ki; \
1304                    output[13] = X##ko; \
1305                    if (laneCount >= 15) { \
1306                        output[14] = X##ku; \
1307                    } \
1308                } \
1309            } \
1310        } \
1311    } \
1312    else { \
1313        output[ 0] = X##ba; \
1314        output[ 1] = ~X##be; \
1315        output[ 2] = ~X##bi; \
1316        output[ 3] = X##bo; \
1317        output[ 4] = X##bu; \
1318        output[ 5] = X##ga; \
1319        output[ 6] = X##ge; \
1320        output[ 7] = X##gi; \
1321        output[ 8] = ~X##go; \
1322        output[ 9] = X##gu; \
1323        output[10] = X##ka; \
1324        output[11] = X##ke; \
1325        output[12] = ~X##ki; \
1326        output[13] = X##ko; \
1327        output[14] = X##ku; \
1328        output[15] = X##ma; \
1329        if (laneCount < 24) { \
1330            if (laneCount < 20) { \
1331                if (laneCount < 18) { \
1332                    if (laneCount >= 17) { \
1333                        output[16] = X##me; \
1334                    } \
1335                } \
1336                else { \
1337                    output[16] = X##me; \
1338                    output[17] = ~X##mi; \
1339                    if (laneCount >= 19) { \
1340                        output[18] = X##mo; \
1341                    } \
1342                } \
1343            } \
1344            else { \
1345                output[16] = X##me; \
1346                output[17] = ~X##mi; \
1347                output[18] = X##mo; \
1348                output[19] = X##mu; \
1349                if (laneCount < 22) { \
1350                    if (laneCount >= 21) { \
1351                        output[20] = ~X##sa; \
1352                    } \
1353                } \
1354                else { \
1355                    output[20] = ~X##sa; \
1356                    output[21] = X##se; \
1357                    if (laneCount >= 23) { \
1358                        output[22] = X##si; \
1359                    } \
1360                } \
1361            } \
1362        } \
1363        else { \
1364            output[16] = X##me; \
1365            output[17] = ~X##mi; \
1366            output[18] = X##mo; \
1367            output[19] = X##mu; \
1368            output[20] = ~X##sa; \
1369            output[21] = X##se; \
1370            output[22] = X##si; \
1371            output[23] = X##so; \
1372            if (laneCount >= 25) { \
1373                output[24] = X##su; \
1374            } \
1375        } \
1376    }
1377
1378#define wrapOne(X, input, output, index, name) \
1379    X##name ^= input[index]; \
1380    output[index] = X##name;
1381
1382#define wrapOneInvert(X, input, output, index, name) \
1383    X##name ^= input[index]; \
1384    output[index] = ~X##name;
1385
1386#define unwrapOne(X, input, output, index, name) \
1387    output[index] = input[index] ^ X##name; \
1388    X##name ^= output[index];
1389
1390#define unwrapOneInvert(X, input, output, index, name) \
1391    output[index] = ~(input[index] ^ X##name); \
1392    X##name ^= output[index]; \
1393
1394#else /* UseBebigokimisa */
1395
1396
1397#define copyToStateAndOutput(X, state, output, laneCount) \
1398    if (laneCount < 16) { \
1399        if (laneCount < 8) { \
1400            if (laneCount < 4) { \
1401                if (laneCount < 2) { \
1402                    state[ 0] = X##ba; \
1403                    if (laneCount >= 1) { \
1404                        output[ 0] = X##ba; \
1405                    } \
1406                    state[ 1] = X##be; \
1407                    state[ 2] = X##bi; \
1408                } \
1409                else { \
1410                    state[ 0] = X##ba; \
1411                    output[ 0] = X##ba; \
1412                    state[ 1] = X##be; \
1413                    output[ 1] = X##be; \
1414                    state[ 2] = X##bi; \
1415                    if (laneCount >= 3) { \
1416                        output[ 2] = X##bi; \
1417                    } \
1418                } \
1419                state[ 3] = X##bo; \
1420                state[ 4] = X##bu; \
1421                state[ 5] = X##ga; \
1422                state[ 6] = X##ge; \
1423            } \
1424            else { \
1425                state[ 0] = X##ba; \
1426                output[ 0] = X##ba; \
1427                state[ 1] = X##be; \
1428                output[ 1] = X##be; \
1429                state[ 2] = X##bi; \
1430                output[ 2] = X##bi; \
1431                state[ 3] = X##bo; \
1432                output[ 3] = X##bo; \
1433                if (laneCount < 6) { \
1434                    state[ 4] = X##bu; \
1435                    if (laneCount >= 5) { \
1436                        output[ 4] = X##bu; \
1437                    } \
1438                    state[ 5] = X##ga; \
1439                    state[ 6] = X##ge; \
1440                } \
1441                else { \
1442                    state[ 4] = X##bu; \
1443                    output[ 4] = X##bu; \
1444                    state[ 5] = X##ga; \
1445                    output[ 5] = X##ga; \
1446                    state[ 6] = X##ge; \
1447                    if (laneCount >= 7) { \
1448                        output[ 6] = X##ge; \
1449                    } \
1450                } \
1451            } \
1452            state[ 7] = X##gi; \
1453            state[ 8] = X##go; \
1454            state[ 9] = X##gu; \
1455            state[10] = X##ka; \
1456            state[11] = X##ke; \
1457            state[12] = X##ki; \
1458            state[13] = X##ko; \
1459            state[14] = X##ku; \
1460        } \
1461        else { \
1462            state[ 0] = X##ba; \
1463            output[ 0] = X##ba; \
1464            state[ 1] = X##be; \
1465            output[ 1] = X##be; \
1466            state[ 2] = X##bi; \
1467            output[ 2] = X##bi; \
1468            state[ 3] = X##bo; \
1469            output[ 3] = X##bo; \
1470            state[ 4] = X##bu; \
1471            output[ 4] = X##bu; \
1472            state[ 5] = X##ga; \
1473            output[ 5] = X##ga; \
1474            state[ 6] = X##ge; \
1475            output[ 6] = X##ge; \
1476            state[ 7] = X##gi; \
1477            output[ 7] = X##gi; \
1478            if (laneCount < 12) { \
1479                if (laneCount < 10) { \
1480                    state[ 8] = X##go; \
1481                    if (laneCount >= 9) { \
1482                        output[ 8] = X##go; \
1483                    } \
1484                    state[ 9] = X##gu; \
1485                    state[10] = X##ka; \
1486                } \
1487                else { \
1488                    state[ 8] = X##go; \
1489                    output[ 8] = X##go; \
1490                    state[ 9] = X##gu; \
1491                    output[ 9] = X##gu; \
1492                    state[10] = X##ka; \
1493                    if (laneCount >= 11) { \
1494                        output[10] = X##ka; \
1495                    } \
1496                } \
1497                state[11] = X##ke; \
1498                state[12] = X##ki; \
1499                state[13] = X##ko; \
1500                state[14] = X##ku; \
1501            } \
1502            else { \
1503                state[ 8] = X##go; \
1504                output[ 8] = X##go; \
1505                state[ 9] = X##gu; \
1506                output[ 9] = X##gu; \
1507                state[10] = X##ka; \
1508                output[10] = X##ka; \
1509                state[11] = X##ke; \
1510                output[11] = X##ke; \
1511                if (laneCount < 14) { \
1512                    state[12] = X##ki; \
1513                    if (laneCount >= 13) { \
1514                        output[12]= X##ki; \
1515                    } \
1516                    state[13] = X##ko; \
1517                    state[14] = X##ku; \
1518                } \
1519                else { \
1520                    state[12] = X##ki; \
1521                    output[12]= X##ki; \
1522                    state[13] = X##ko; \
1523                    output[13] = X##ko; \
1524                    state[14] = X##ku; \
1525                    if (laneCount >= 15) { \
1526                        output[14] = X##ku; \
1527                    } \
1528                } \
1529            } \
1530        } \
1531        state[15] = X##ma; \
1532        state[16] = X##me; \
1533        state[17] = X##mi; \
1534        state[18] = X##mo; \
1535        state[19] = X##mu; \
1536        state[20] = X##sa; \
1537        state[21] = X##se; \
1538        state[22] = X##si; \
1539        state[23] = X##so; \
1540        state[24] = X##su; \
1541    } \
1542    else { \
1543        state[ 0] = X##ba; \
1544        output[ 0] = X##ba; \
1545        state[ 1] = X##be; \
1546        output[ 1] = X##be; \
1547        state[ 2] = X##bi; \
1548        output[ 2] = X##bi; \
1549        state[ 3] = X##bo; \
1550        output[ 3] = X##bo; \
1551        state[ 4] = X##bu; \
1552        output[ 4] = X##bu; \
1553        state[ 5] = X##ga; \
1554        output[ 5] = X##ga; \
1555        state[ 6] = X##ge; \
1556        output[ 6] = X##ge; \
1557        state[ 7] = X##gi; \
1558        output[ 7] = X##gi; \
1559        state[ 8] = X##go; \
1560        output[ 8] = X##go; \
1561        state[ 9] = X##gu; \
1562        output[ 9] = X##gu; \
1563        state[10] = X##ka; \
1564        output[10] = X##ka; \
1565        state[11] = X##ke; \
1566        output[11] = X##ke; \
1567        state[12] = X##ki; \
1568        output[12]= X##ki; \
1569        state[13] = X##ko; \
1570        output[13] = X##ko; \
1571        state[14] = X##ku; \
1572        output[14] = X##ku; \
1573        state[15] = X##ma; \
1574        output[15] = X##ma; \
1575        if (laneCount < 24) { \
1576            if (laneCount < 20) { \
1577                if (laneCount < 18) { \
1578                    state[16] = X##me; \
1579                    if (laneCount >= 17) { \
1580                        output[16] = X##me; \
1581                    } \
1582                    state[17] = X##mi; \
1583                    state[18] = X##mo; \
1584                } \
1585                else { \
1586                    state[16] = X##me; \
1587                    output[16] = X##me; \
1588                    state[17] = X##mi; \
1589                    output[17] = X##mi; \
1590                    state[18] = X##mo; \
1591                    if (laneCount >= 19) { \
1592                        output[18] = X##mo; \
1593                    } \
1594                } \
1595                state[19] = X##mu; \
1596                state[20] = X##sa; \
1597                state[21] = X##se; \
1598                state[22] = X##si; \
1599            } \
1600            else { \
1601                state[16] = X##me; \
1602                output[16] = X##me; \
1603                state[17] = X##mi; \
1604                output[17] = X##mi; \
1605                state[18] = X##mo; \
1606                output[18] = X##mo; \
1607                state[19] = X##mu; \
1608                output[19] = X##mu; \
1609                if (laneCount < 22) { \
1610                    state[20] = X##sa; \
1611                    if (laneCount >= 21) { \
1612                        output[20] = X##sa; \
1613                    } \
1614                    state[21] = X##se; \
1615                    state[22] = X##si; \
1616                } \
1617                else { \
1618                    state[20] = X##sa; \
1619                    output[20] = X##sa; \
1620                    state[21] = X##se; \
1621                    output[21] = X##se; \
1622                    state[22] = X##si; \
1623                    if (laneCount >= 23) { \
1624                        output[22] = X##si; \
1625                    } \
1626                } \
1627            } \
1628            state[23] = X##so; \
1629            state[24] = X##su; \
1630        } \
1631        else { \
1632            state[16] = X##me; \
1633            output[16] = X##me; \
1634            state[17] = X##mi; \
1635            output[17] = X##mi; \
1636            state[18] = X##mo; \
1637            output[18] = X##mo; \
1638            state[19] = X##mu; \
1639            output[19] = X##mu; \
1640            state[20] = X##sa; \
1641            output[20] = X##sa; \
1642            state[21] = X##se; \
1643            output[21] = X##se; \
1644            state[22] = X##si; \
1645            output[22] = X##si; \
1646            state[23] = X##so; \
1647            output[23] = X##so; \
1648            state[24] = X##su; \
1649            if (laneCount >= 25) { \
1650                output[24] = X##su; \
1651            } \
1652        } \
1653    }
1654
1655#define output(X, output, laneCount) \
1656    if (laneCount < 16) { \
1657        if (laneCount < 8) { \
1658            if (laneCount < 4) { \
1659                if (laneCount < 2) { \
1660                    if (laneCount >= 1) { \
1661                        output[ 0] = X##ba; \
1662                    } \
1663                } \
1664                else { \
1665                    output[ 0] = X##ba; \
1666                    output[ 1] = X##be; \
1667                    if (laneCount >= 3) { \
1668                        output[ 2] = X##bi; \
1669                    } \
1670                } \
1671            } \
1672            else { \
1673                output[ 0] = X##ba; \
1674                output[ 1] = X##be; \
1675                output[ 2] = X##bi; \
1676                output[ 3] = X##bo; \
1677                if (laneCount < 6) { \
1678                    if (laneCount >= 5) { \
1679                        output[ 4] = X##bu; \
1680                    } \
1681                } \
1682                else { \
1683                    output[ 4] = X##bu; \
1684                    output[ 5] = X##ga; \
1685                    if (laneCount >= 7) { \
1686                        output[ 6] = X##ge; \
1687                    } \
1688                } \
1689            } \
1690        } \
1691        else { \
1692            output[ 0] = X##ba; \
1693            output[ 1] = X##be; \
1694            output[ 2] = X##bi; \
1695            output[ 3] = X##bo; \
1696            output[ 4] = X##bu; \
1697            output[ 5] = X##ga; \
1698            output[ 6] = X##ge; \
1699            output[ 7] = X##gi; \
1700            if (laneCount < 12) { \
1701                if (laneCount < 10) { \
1702                    if (laneCount >= 9) { \
1703                        output[ 8] = X##go; \
1704                    } \
1705                } \
1706                else { \
1707                    output[ 8] = X##go; \
1708                    output[ 9] = X##gu; \
1709                    if (laneCount >= 11) { \
1710                        output[10] = X##ka; \
1711                    } \
1712                } \
1713            } \
1714            else { \
1715                output[ 8] = X##go; \
1716                output[ 9] = X##gu; \
1717                output[10] = X##ka; \
1718                output[11] = X##ke; \
1719                if (laneCount < 14) { \
1720                    if (laneCount >= 13) { \
1721                        output[12] = X##ki; \
1722                    } \
1723                } \
1724                else { \
1725                    output[12] = X##ki; \
1726                    output[13] = X##ko; \
1727                    if (laneCount >= 15) { \
1728                        output[14] = X##ku; \
1729                    } \
1730                } \
1731            } \
1732        } \
1733    } \
1734    else { \
1735        output[ 0] = X##ba; \
1736        output[ 1] = X##be; \
1737        output[ 2] = X##bi; \
1738        output[ 3] = X##bo; \
1739        output[ 4] = X##bu; \
1740        output[ 5] = X##ga; \
1741        output[ 6] = X##ge; \
1742        output[ 7] = X##gi; \
1743        output[ 8] = X##go; \
1744        output[ 9] = X##gu; \
1745        output[10] = X##ka; \
1746        output[11] = X##ke; \
1747        output[12] = X##ki; \
1748        output[13] = X##ko; \
1749        output[14] = X##ku; \
1750        output[15] = X##ma; \
1751        if (laneCount < 24) { \
1752            if (laneCount < 20) { \
1753                if (laneCount < 18) { \
1754                    if (laneCount >= 17) { \
1755                        output[16] = X##me; \
1756                    } \
1757                } \
1758                else { \
1759                    output[16] = X##me; \
1760                    output[17] = X##mi; \
1761                    if (laneCount >= 19) { \
1762                        output[18] = X##mo; \
1763                    } \
1764                } \
1765            } \
1766            else { \
1767                output[16] = X##me; \
1768                output[17] = X##mi; \
1769                output[18] = X##mo; \
1770                output[19] = X##mu; \
1771                if (laneCount < 22) { \
1772                    if (laneCount >= 21) { \
1773                        output[20] = X##sa; \
1774                    } \
1775                } \
1776                else { \
1777                    output[20] = X##sa; \
1778                    output[21] = X##se; \
1779                    if (laneCount >= 23) { \
1780                        output[22] = X##si; \
1781                    } \
1782                } \
1783            } \
1784        } \
1785        else { \
1786            output[16] = X##me; \
1787            output[17] = X##mi; \
1788            output[18] = X##mo; \
1789            output[19] = X##mu; \
1790            output[20] = X##sa; \
1791            output[21] = X##se; \
1792            output[22] = X##si; \
1793            output[23] = X##so; \
1794            if (laneCount >= 25) { \
1795                output[24] = X##su; \
1796            } \
1797        } \
1798    }
1799
1800#define wrapOne(X, input, output, index, name) \
1801    X##name ^= input[index]; \
1802    output[index] = X##name;
1803
1804#define wrapOneInvert(X, input, output, index, name) \
1805    X##name ^= input[index]; \
1806    output[index] = X##name;
1807
1808#define unwrapOne(X, input, output, index, name) \
1809    output[index] = input[index] ^ X##name; \
1810    X##name ^= output[index];
1811
1812#define unwrapOneInvert(X, input, output, index, name) \
1813    output[index] = input[index] ^ X##name; \
1814    X##name ^= output[index];
1815
1816#endif
1817
1818#define wrap(X, input, output, laneCount, trailingBits) \
1819    if (laneCount < 16) { \
1820        if (laneCount < 8) { \
1821            if (laneCount < 4) { \
1822                if (laneCount < 2) { \
1823                    if (laneCount < 1) { \
1824                        X##ba ^= trailingBits; \
1825                    } \
1826                    else { \
1827                        wrapOne(X, input, output, 0, ba) \
1828                        X##be ^= trailingBits; \
1829                    } \
1830                } \
1831                else { \
1832                    wrapOne(X, input, output, 0, ba) \
1833                    wrapOneInvert(X, input, output, 1, be) \
1834                    if (laneCount < 3) { \
1835                        X##bi ^= trailingBits; \
1836                    } \
1837                    else { \
1838                        wrapOneInvert(X, input, output, 2, bi) \
1839                        X##bo ^= trailingBits; \
1840                    } \
1841                } \
1842            } \
1843            else { \
1844                wrapOne(X, input, output, 0, ba) \
1845                wrapOneInvert(X, input, output, 1, be) \
1846                wrapOneInvert(X, input, output, 2, bi) \
1847                wrapOne(X, input, output, 3, bo) \
1848                if (laneCount < 6) { \
1849                    if (laneCount < 5) { \
1850                        X##bu ^= trailingBits; \
1851                    } \
1852                    else { \
1853                        wrapOne(X, input, output, 4, bu) \
1854                        X##ga ^= trailingBits; \
1855                    } \
1856                } \
1857                else { \
1858                    wrapOne(X, input, output, 4, bu) \
1859                    wrapOne(X, input, output, 5, ga) \
1860                    if (laneCount < 7) { \
1861                        X##ge ^= trailingBits; \
1862                    } \
1863                    else { \
1864                        wrapOne(X, input, output, 6, ge) \
1865                        X##gi ^= trailingBits; \
1866                    } \
1867                } \
1868            } \
1869        } \
1870        else { \
1871            wrapOne(X, input, output, 0, ba) \
1872            wrapOneInvert(X, input, output, 1, be) \
1873            wrapOneInvert(X, input, output, 2, bi) \
1874            wrapOne(X, input, output, 3, bo) \
1875            wrapOne(X, input, output, 4, bu) \
1876            wrapOne(X, input, output, 5, ga) \
1877            wrapOne(X, input, output, 6, ge) \
1878            wrapOne(X, input, output, 7, gi) \
1879            if (laneCount < 12) { \
1880                if (laneCount < 10) { \
1881                    if (laneCount < 9) { \
1882                        X##go ^= trailingBits; \
1883                    } \
1884                    else { \
1885                        wrapOneInvert(X, input, output, 8, go) \
1886                        X##gu ^= trailingBits; \
1887                    } \
1888                } \
1889                else { \
1890                    wrapOneInvert(X, input, output, 8, go) \
1891                    wrapOne(X, input, output, 9, gu) \
1892                    if (laneCount < 11) { \
1893                        X##ka ^= trailingBits; \
1894                    } \
1895                    else { \
1896                        wrapOne(X, input, output, 10, ka) \
1897                        X##ke ^= trailingBits; \
1898                    } \
1899                } \
1900            } \
1901            else { \
1902                wrapOneInvert(X, input, output, 8, go) \
1903                wrapOne(X, input, output, 9, gu) \
1904                wrapOne(X, input, output, 10, ka) \
1905                wrapOne(X, input, output, 11, ke) \
1906                if (laneCount < 14) { \
1907                    if (laneCount < 13) { \
1908                        X##ki ^= trailingBits; \
1909                    } \
1910                    else { \
1911                        wrapOneInvert(X, input, output, 12, ki) \
1912                        X##ko ^= trailingBits; \
1913                    } \
1914                } \
1915                else { \
1916                    wrapOneInvert(X, input, output, 12, ki) \
1917                    wrapOne(X, input, output, 13, ko) \
1918                    if (laneCount < 15) { \
1919                        X##ku ^= trailingBits; \
1920                    } \
1921                    else { \
1922                        wrapOne(X, input, output, 14, ku) \
1923                        X##ma ^= trailingBits; \
1924                    } \
1925                } \
1926            } \
1927        } \
1928    } \
1929    else { \
1930        wrapOne(X, input, output, 0, ba) \
1931        wrapOneInvert(X, input, output, 1, be) \
1932        wrapOneInvert(X, input, output, 2, bi) \
1933        wrapOne(X, input, output, 3, bo) \
1934        wrapOne(X, input, output, 4, bu) \
1935        wrapOne(X, input, output, 5, ga) \
1936        wrapOne(X, input, output, 6, ge) \
1937        wrapOne(X, input, output, 7, gi) \
1938        wrapOneInvert(X, input, output, 8, go) \
1939        wrapOne(X, input, output, 9, gu) \
1940        wrapOne(X, input, output, 10, ka) \
1941        wrapOne(X, input, output, 11, ke) \
1942        wrapOneInvert(X, input, output, 12, ki) \
1943        wrapOne(X, input, output, 13, ko) \
1944        wrapOne(X, input, output, 14, ku) \
1945        wrapOne(X, input, output, 15, ma) \
1946        if (laneCount < 24) { \
1947            if (laneCount < 20) { \
1948                if (laneCount < 18) { \
1949                    if (laneCount < 17) { \
1950                        X##me ^= trailingBits; \
1951                    } \
1952                    else { \
1953                        wrapOne(X, input, output, 16, me) \
1954                        X##mi ^= trailingBits; \
1955                    } \
1956                } \
1957                else { \
1958                    wrapOne(X, input, output, 16, me) \
1959                    wrapOneInvert(X, input, output, 17, mi) \
1960                    if (laneCount < 19) { \
1961                        X##mo ^= trailingBits; \
1962                    } \
1963                    else { \
1964                        wrapOne(X, input, output, 18, mo) \
1965                        X##mu ^= trailingBits; \
1966                    } \
1967                } \
1968            } \
1969            else { \
1970                wrapOne(X, input, output, 16, me) \
1971                wrapOneInvert(X, input, output, 17, mi) \
1972                wrapOne(X, input, output, 18, mo) \
1973                wrapOne(X, input, output, 19, mu) \
1974                if (laneCount < 22) { \
1975                    if (laneCount < 21) { \
1976                        X##sa ^= trailingBits; \
1977                    } \
1978                    else { \
1979                        wrapOneInvert(X, input, output, 20, sa) \
1980                        X##se ^= trailingBits; \
1981                    } \
1982                } \
1983                else { \
1984                    wrapOneInvert(X, input, output, 20, sa) \
1985                    wrapOne(X, input, output, 21, se) \
1986                    if (laneCount < 23) { \
1987                        X##si ^= trailingBits; \
1988                    } \
1989                    else { \
1990                        wrapOne(X, input, output, 22, si) \
1991                        X##so ^= trailingBits; \
1992                    } \
1993                } \
1994            } \
1995        } \
1996        else { \
1997            wrapOne(X, input, output, 16, me) \
1998            wrapOneInvert(X, input, output, 17, mi) \
1999            wrapOne(X, input, output, 18, mo) \
2000            wrapOne(X, input, output, 19, mu) \
2001            wrapOneInvert(X, input, output, 20, sa) \
2002            wrapOne(X, input, output, 21, se) \
2003            wrapOne(X, input, output, 22, si) \
2004            wrapOne(X, input, output, 23, so) \
2005            if (laneCount < 25) { \
2006                X##su ^= trailingBits; \
2007            } \
2008            else { \
2009                wrapOne(X, input, output, 24, su) \
2010            } \
2011        } \
2012    }
2013
2014#define unwrap(X, input, output, laneCount, trailingBits) \
2015    if (laneCount < 16) { \
2016        if (laneCount < 8) { \
2017            if (laneCount < 4) { \
2018                if (laneCount < 2) { \
2019                    if (laneCount < 1) { \
2020                        X##ba ^= trailingBits; \
2021                    } \
2022                    else { \
2023                        unwrapOne(X, input, output, 0, ba) \
2024                        X##be ^= trailingBits; \
2025                    } \
2026                } \
2027                else { \
2028                    unwrapOne(X, input, output, 0, ba) \
2029                    unwrapOneInvert(X, input, output, 1, be) \
2030                    if (laneCount < 3) { \
2031                        X##bi ^= trailingBits; \
2032                    } \
2033                    else { \
2034                        unwrapOneInvert(X, input, output, 2, bi) \
2035                        X##bo ^= trailingBits; \
2036                    } \
2037                } \
2038            } \
2039            else { \
2040                unwrapOne(X, input, output, 0, ba) \
2041                unwrapOneInvert(X, input, output, 1, be) \
2042                unwrapOneInvert(X, input, output, 2, bi) \
2043                unwrapOne(X, input, output, 3, bo) \
2044                if (laneCount < 6) { \
2045                    if (laneCount < 5) { \
2046                        X##bu ^= trailingBits; \
2047                    } \
2048                    else { \
2049                        unwrapOne(X, input, output, 4, bu) \
2050                        X##ga ^= trailingBits; \
2051                    } \
2052                } \
2053                else { \
2054                    unwrapOne(X, input, output, 4, bu) \
2055                    unwrapOne(X, input, output, 5, ga) \
2056                    if (laneCount < 7) { \
2057                        X##ge ^= trailingBits; \
2058                    } \
2059                    else { \
2060                        unwrapOne(X, input, output, 6, ge) \
2061                        X##gi ^= trailingBits; \
2062                    } \
2063                } \
2064            } \
2065        } \
2066        else { \
2067            unwrapOne(X, input, output, 0, ba) \
2068            unwrapOneInvert(X, input, output, 1, be) \
2069            unwrapOneInvert(X, input, output, 2, bi) \
2070            unwrapOne(X, input, output, 3, bo) \
2071            unwrapOne(X, input, output, 4, bu) \
2072            unwrapOne(X, input, output, 5, ga) \
2073            unwrapOne(X, input, output, 6, ge) \
2074            unwrapOne(X, input, output, 7, gi) \
2075            if (laneCount < 12) { \
2076                if (laneCount < 10) { \
2077                    if (laneCount < 9) { \
2078                        X##go ^= trailingBits; \
2079                    } \
2080                    else { \
2081                        unwrapOneInvert(X, input, output, 8, go) \
2082                        X##gu ^= trailingBits; \
2083                    } \
2084                } \
2085                else { \
2086                    unwrapOneInvert(X, input, output, 8, go) \
2087                    unwrapOne(X, input, output, 9, gu) \
2088                    if (laneCount < 11) { \
2089                        X##ka ^= trailingBits; \
2090                    } \
2091                    else { \
2092                        unwrapOne(X, input, output, 10, ka) \
2093                        X##ke ^= trailingBits; \
2094                    } \
2095                } \
2096            } \
2097            else { \
2098                unwrapOneInvert(X, input, output, 8, go) \
2099                unwrapOne(X, input, output, 9, gu) \
2100                unwrapOne(X, input, output, 10, ka) \
2101                unwrapOne(X, input, output, 11, ke) \
2102                if (laneCount < 14) { \
2103                    if (laneCount < 13) { \
2104                        X##ki ^= trailingBits; \
2105                    } \
2106                    else { \
2107                        unwrapOneInvert(X, input, output, 12, ki) \
2108                        X##ko ^= trailingBits; \
2109                    } \
2110                } \
2111                else { \
2112                    unwrapOneInvert(X, input, output, 12, ki) \
2113                    unwrapOne(X, input, output, 13, ko) \
2114                    if (laneCount < 15) { \
2115                        X##ku ^= trailingBits; \
2116                    } \
2117                    else { \
2118                        unwrapOne(X, input, output, 14, ku) \
2119                        X##ma ^= trailingBits; \
2120                    } \
2121                } \
2122            } \
2123        } \
2124    } \
2125    else { \
2126        unwrapOne(X, input, output, 0, ba) \
2127        unwrapOneInvert(X, input, output, 1, be) \
2128        unwrapOneInvert(X, input, output, 2, bi) \
2129        unwrapOne(X, input, output, 3, bo) \
2130        unwrapOne(X, input, output, 4, bu) \
2131        unwrapOne(X, input, output, 5, ga) \
2132        unwrapOne(X, input, output, 6, ge) \
2133        unwrapOne(X, input, output, 7, gi) \
2134        unwrapOneInvert(X, input, output, 8, go) \
2135        unwrapOne(X, input, output, 9, gu) \
2136        unwrapOne(X, input, output, 10, ka) \
2137        unwrapOne(X, input, output, 11, ke) \
2138        unwrapOneInvert(X, input, output, 12, ki) \
2139        unwrapOne(X, input, output, 13, ko) \
2140        unwrapOne(X, input, output, 14, ku) \
2141        unwrapOne(X, input, output, 15, ma) \
2142        if (laneCount < 24) { \
2143            if (laneCount < 20) { \
2144                if (laneCount < 18) { \
2145                    if (laneCount < 17) { \
2146                        X##me ^= trailingBits; \
2147                    } \
2148                    else { \
2149                        unwrapOne(X, input, output, 16, me) \
2150                        X##mi ^= trailingBits; \
2151                    } \
2152                } \
2153                else { \
2154                    unwrapOne(X, input, output, 16, me) \
2155                    unwrapOneInvert(X, input, output, 17, mi) \
2156                    if (laneCount < 19) { \
2157                        X##mo ^= trailingBits; \
2158                    } \
2159                    else { \
2160                        unwrapOne(X, input, output, 18, mo) \
2161                        X##mu ^= trailingBits; \
2162                    } \
2163                } \
2164            } \
2165            else { \
2166                unwrapOne(X, input, output, 16, me) \
2167                unwrapOneInvert(X, input, output, 17, mi) \
2168                unwrapOne(X, input, output, 18, mo) \
2169                unwrapOne(X, input, output, 19, mu) \
2170                if (laneCount < 22) { \
2171                    if (laneCount < 21) { \
2172                        X##sa ^= trailingBits; \
2173                    } \
2174                    else { \
2175                        unwrapOneInvert(X, input, output, 20, sa) \
2176                        X##se ^= trailingBits; \
2177                    } \
2178                } \
2179                else { \
2180                    unwrapOneInvert(X, input, output, 20, sa) \
2181                    unwrapOne(X, input, output, 21, se) \
2182                    if (laneCount < 23) { \
2183                        X##si ^= trailingBits; \
2184                    } \
2185                    else { \
2186                        unwrapOne(X, input, output, 22, si) \
2187                        X##so ^= trailingBits; \
2188                    } \
2189                } \
2190            } \
2191        } \
2192        else { \
2193            unwrapOne(X, input, output, 16, me) \
2194            unwrapOneInvert(X, input, output, 17, mi) \
2195            unwrapOne(X, input, output, 18, mo) \
2196            unwrapOne(X, input, output, 19, mu) \
2197            unwrapOneInvert(X, input, output, 20, sa) \
2198            unwrapOne(X, input, output, 21, se) \
2199            unwrapOne(X, input, output, 22, si) \
2200            unwrapOne(X, input, output, 23, so) \
2201            if (laneCount < 25) { \
2202                X##su ^= trailingBits; \
2203            } \
2204            else { \
2205                unwrapOne(X, input, output, 24, su) \
2206            } \
2207        } \
2208    }
2209