1/* 2Implementation by the Keccak, Keyak and Ketje Teams, namely, Guido Bertoni, 3Joan Daemen, Michaël Peeters, Gilles Van Assche and Ronny Van Keer, hereby 4denoted as "the implementer". 5 6For more information, feedback or questions, please refer to our websites: 7http://keccak.noekeon.org/ 8http://keyak.noekeon.org/ 9http://ketje.noekeon.org/ 10 11To the extent possible under law, the implementer has waived all copyright 12and related or neighboring rights to the source code in this file. 13http://creativecommons.org/publicdomain/zero/1.0/ 14*/ 15 16#define declareABCDE \ 17 UINT64 Aba, Abe, Abi, Abo, Abu; \ 18 UINT64 Aga, Age, Agi, Ago, Agu; \ 19 UINT64 Aka, Ake, Aki, Ako, Aku; \ 20 UINT64 Ama, Ame, Ami, Amo, Amu; \ 21 UINT64 Asa, Ase, Asi, Aso, Asu; \ 22 UINT64 Bba, Bbe, Bbi, Bbo, Bbu; \ 23 UINT64 Bga, Bge, Bgi, Bgo, Bgu; \ 24 UINT64 Bka, Bke, Bki, Bko, Bku; \ 25 UINT64 Bma, Bme, Bmi, Bmo, Bmu; \ 26 UINT64 Bsa, Bse, Bsi, Bso, Bsu; \ 27 UINT64 Ca, Ce, Ci, Co, Cu; \ 28 UINT64 Da, De, Di, Do, Du; \ 29 UINT64 Eba, Ebe, Ebi, Ebo, Ebu; \ 30 UINT64 Ega, Ege, Egi, Ego, Egu; \ 31 UINT64 Eka, Eke, Eki, Eko, Eku; \ 32 UINT64 Ema, Eme, Emi, Emo, Emu; \ 33 UINT64 Esa, Ese, Esi, Eso, Esu; \ 34 35#define prepareTheta \ 36 Ca = Aba^Aga^Aka^Ama^Asa; \ 37 Ce = Abe^Age^Ake^Ame^Ase; \ 38 Ci = Abi^Agi^Aki^Ami^Asi; \ 39 Co = Abo^Ago^Ako^Amo^Aso; \ 40 Cu = Abu^Agu^Aku^Amu^Asu; \ 41 42#ifdef UseBebigokimisa 43/* --- Code for round, with prepare-theta (lane complementing pattern 'bebigokimisa') */ 44 45/* --- 64-bit lanes mapped to 64-bit words */ 46 47#define thetaRhoPiChiIotaPrepareTheta(i, A, E) \ 48 Da = Cu^ROL64(Ce, 1); \ 49 De = Ca^ROL64(Ci, 1); \ 50 Di = Ce^ROL64(Co, 1); \ 51 Do = Ci^ROL64(Cu, 1); \ 52 Du = Co^ROL64(Ca, 1); \ 53\ 54 A##ba ^= Da; \ 55 Bba = A##ba; \ 56 A##ge ^= De; \ 57 Bbe = ROL64(A##ge, 44); \ 58 A##ki ^= Di; \ 59 Bbi = ROL64(A##ki, 43); \ 60 A##mo ^= Do; \ 61 Bbo = ROL64(A##mo, 21); \ 62 A##su ^= Du; \ 63 Bbu = ROL64(A##su, 14); \ 64 E##ba = Bba ^( Bbe | Bbi ); \ 65 E##ba ^= KeccakF1600RoundConstants[i]; \ 66 Ca = E##ba; \ 67 E##be = Bbe ^((~Bbi)| Bbo ); \ 68 Ce = E##be; \ 69 E##bi = Bbi ^( Bbo & Bbu ); \ 70 Ci = E##bi; \ 71 E##bo = Bbo ^( Bbu | Bba ); \ 72 Co = E##bo; \ 73 E##bu = Bbu ^( Bba & Bbe ); \ 74 Cu = E##bu; \ 75\ 76 A##bo ^= Do; \ 77 Bga = ROL64(A##bo, 28); \ 78 A##gu ^= Du; \ 79 Bge = ROL64(A##gu, 20); \ 80 A##ka ^= Da; \ 81 Bgi = ROL64(A##ka, 3); \ 82 A##me ^= De; \ 83 Bgo = ROL64(A##me, 45); \ 84 A##si ^= Di; \ 85 Bgu = ROL64(A##si, 61); \ 86 E##ga = Bga ^( Bge | Bgi ); \ 87 Ca ^= E##ga; \ 88 E##ge = Bge ^( Bgi & Bgo ); \ 89 Ce ^= E##ge; \ 90 E##gi = Bgi ^( Bgo |(~Bgu)); \ 91 Ci ^= E##gi; \ 92 E##go = Bgo ^( Bgu | Bga ); \ 93 Co ^= E##go; \ 94 E##gu = Bgu ^( Bga & Bge ); \ 95 Cu ^= E##gu; \ 96\ 97 A##be ^= De; \ 98 Bka = ROL64(A##be, 1); \ 99 A##gi ^= Di; \ 100 Bke = ROL64(A##gi, 6); \ 101 A##ko ^= Do; \ 102 Bki = ROL64(A##ko, 25); \ 103 A##mu ^= Du; \ 104 Bko = ROL64(A##mu, 8); \ 105 A##sa ^= Da; \ 106 Bku = ROL64(A##sa, 18); \ 107 E##ka = Bka ^( Bke | Bki ); \ 108 Ca ^= E##ka; \ 109 E##ke = Bke ^( Bki & Bko ); \ 110 Ce ^= E##ke; \ 111 E##ki = Bki ^((~Bko)& Bku ); \ 112 Ci ^= E##ki; \ 113 E##ko = (~Bko)^( Bku | Bka ); \ 114 Co ^= E##ko; \ 115 E##ku = Bku ^( Bka & Bke ); \ 116 Cu ^= E##ku; \ 117\ 118 A##bu ^= Du; \ 119 Bma = ROL64(A##bu, 27); \ 120 A##ga ^= Da; \ 121 Bme = ROL64(A##ga, 36); \ 122 A##ke ^= De; \ 123 Bmi = ROL64(A##ke, 10); \ 124 A##mi ^= Di; \ 125 Bmo = ROL64(A##mi, 15); \ 126 A##so ^= Do; \ 127 Bmu = ROL64(A##so, 56); \ 128 E##ma = Bma ^( Bme & Bmi ); \ 129 Ca ^= E##ma; \ 130 E##me = Bme ^( Bmi | Bmo ); \ 131 Ce ^= E##me; \ 132 E##mi = Bmi ^((~Bmo)| Bmu ); \ 133 Ci ^= E##mi; \ 134 E##mo = (~Bmo)^( Bmu & Bma ); \ 135 Co ^= E##mo; \ 136 E##mu = Bmu ^( Bma | Bme ); \ 137 Cu ^= E##mu; \ 138\ 139 A##bi ^= Di; \ 140 Bsa = ROL64(A##bi, 62); \ 141 A##go ^= Do; \ 142 Bse = ROL64(A##go, 55); \ 143 A##ku ^= Du; \ 144 Bsi = ROL64(A##ku, 39); \ 145 A##ma ^= Da; \ 146 Bso = ROL64(A##ma, 41); \ 147 A##se ^= De; \ 148 Bsu = ROL64(A##se, 2); \ 149 E##sa = Bsa ^((~Bse)& Bsi ); \ 150 Ca ^= E##sa; \ 151 E##se = (~Bse)^( Bsi | Bso ); \ 152 Ce ^= E##se; \ 153 E##si = Bsi ^( Bso & Bsu ); \ 154 Ci ^= E##si; \ 155 E##so = Bso ^( Bsu | Bsa ); \ 156 Co ^= E##so; \ 157 E##su = Bsu ^( Bsa & Bse ); \ 158 Cu ^= E##su; \ 159\ 160 161/* --- Code for round (lane complementing pattern 'bebigokimisa') */ 162 163/* --- 64-bit lanes mapped to 64-bit words */ 164 165#define thetaRhoPiChiIota(i, A, E) \ 166 Da = Cu^ROL64(Ce, 1); \ 167 De = Ca^ROL64(Ci, 1); \ 168 Di = Ce^ROL64(Co, 1); \ 169 Do = Ci^ROL64(Cu, 1); \ 170 Du = Co^ROL64(Ca, 1); \ 171\ 172 A##ba ^= Da; \ 173 Bba = A##ba; \ 174 A##ge ^= De; \ 175 Bbe = ROL64(A##ge, 44); \ 176 A##ki ^= Di; \ 177 Bbi = ROL64(A##ki, 43); \ 178 A##mo ^= Do; \ 179 Bbo = ROL64(A##mo, 21); \ 180 A##su ^= Du; \ 181 Bbu = ROL64(A##su, 14); \ 182 E##ba = Bba ^( Bbe | Bbi ); \ 183 E##ba ^= KeccakF1600RoundConstants[i]; \ 184 E##be = Bbe ^((~Bbi)| Bbo ); \ 185 E##bi = Bbi ^( Bbo & Bbu ); \ 186 E##bo = Bbo ^( Bbu | Bba ); \ 187 E##bu = Bbu ^( Bba & Bbe ); \ 188\ 189 A##bo ^= Do; \ 190 Bga = ROL64(A##bo, 28); \ 191 A##gu ^= Du; \ 192 Bge = ROL64(A##gu, 20); \ 193 A##ka ^= Da; \ 194 Bgi = ROL64(A##ka, 3); \ 195 A##me ^= De; \ 196 Bgo = ROL64(A##me, 45); \ 197 A##si ^= Di; \ 198 Bgu = ROL64(A##si, 61); \ 199 E##ga = Bga ^( Bge | Bgi ); \ 200 E##ge = Bge ^( Bgi & Bgo ); \ 201 E##gi = Bgi ^( Bgo |(~Bgu)); \ 202 E##go = Bgo ^( Bgu | Bga ); \ 203 E##gu = Bgu ^( Bga & Bge ); \ 204\ 205 A##be ^= De; \ 206 Bka = ROL64(A##be, 1); \ 207 A##gi ^= Di; \ 208 Bke = ROL64(A##gi, 6); \ 209 A##ko ^= Do; \ 210 Bki = ROL64(A##ko, 25); \ 211 A##mu ^= Du; \ 212 Bko = ROL64(A##mu, 8); \ 213 A##sa ^= Da; \ 214 Bku = ROL64(A##sa, 18); \ 215 E##ka = Bka ^( Bke | Bki ); \ 216 E##ke = Bke ^( Bki & Bko ); \ 217 E##ki = Bki ^((~Bko)& Bku ); \ 218 E##ko = (~Bko)^( Bku | Bka ); \ 219 E##ku = Bku ^( Bka & Bke ); \ 220\ 221 A##bu ^= Du; \ 222 Bma = ROL64(A##bu, 27); \ 223 A##ga ^= Da; \ 224 Bme = ROL64(A##ga, 36); \ 225 A##ke ^= De; \ 226 Bmi = ROL64(A##ke, 10); \ 227 A##mi ^= Di; \ 228 Bmo = ROL64(A##mi, 15); \ 229 A##so ^= Do; \ 230 Bmu = ROL64(A##so, 56); \ 231 E##ma = Bma ^( Bme & Bmi ); \ 232 E##me = Bme ^( Bmi | Bmo ); \ 233 E##mi = Bmi ^((~Bmo)| Bmu ); \ 234 E##mo = (~Bmo)^( Bmu & Bma ); \ 235 E##mu = Bmu ^( Bma | Bme ); \ 236\ 237 A##bi ^= Di; \ 238 Bsa = ROL64(A##bi, 62); \ 239 A##go ^= Do; \ 240 Bse = ROL64(A##go, 55); \ 241 A##ku ^= Du; \ 242 Bsi = ROL64(A##ku, 39); \ 243 A##ma ^= Da; \ 244 Bso = ROL64(A##ma, 41); \ 245 A##se ^= De; \ 246 Bsu = ROL64(A##se, 2); \ 247 E##sa = Bsa ^((~Bse)& Bsi ); \ 248 E##se = (~Bse)^( Bsi | Bso ); \ 249 E##si = Bsi ^( Bso & Bsu ); \ 250 E##so = Bso ^( Bsu | Bsa ); \ 251 E##su = Bsu ^( Bsa & Bse ); \ 252\ 253 254#else /* UseBebigokimisa */ 255 256/* --- Code for round, with prepare-theta */ 257 258/* --- 64-bit lanes mapped to 64-bit words */ 259 260#define thetaRhoPiChiIotaPrepareTheta(i, A, E) \ 261 Da = Cu^ROL64(Ce, 1); \ 262 De = Ca^ROL64(Ci, 1); \ 263 Di = Ce^ROL64(Co, 1); \ 264 Do = Ci^ROL64(Cu, 1); \ 265 Du = Co^ROL64(Ca, 1); \ 266\ 267 A##ba ^= Da; \ 268 Bba = A##ba; \ 269 A##ge ^= De; \ 270 Bbe = ROL64(A##ge, 44); \ 271 A##ki ^= Di; \ 272 Bbi = ROL64(A##ki, 43); \ 273 A##mo ^= Do; \ 274 Bbo = ROL64(A##mo, 21); \ 275 A##su ^= Du; \ 276 Bbu = ROL64(A##su, 14); \ 277 E##ba = Bba ^((~Bbe)& Bbi ); \ 278 E##ba ^= KeccakF1600RoundConstants[i]; \ 279 Ca = E##ba; \ 280 E##be = Bbe ^((~Bbi)& Bbo ); \ 281 Ce = E##be; \ 282 E##bi = Bbi ^((~Bbo)& Bbu ); \ 283 Ci = E##bi; \ 284 E##bo = Bbo ^((~Bbu)& Bba ); \ 285 Co = E##bo; \ 286 E##bu = Bbu ^((~Bba)& Bbe ); \ 287 Cu = E##bu; \ 288\ 289 A##bo ^= Do; \ 290 Bga = ROL64(A##bo, 28); \ 291 A##gu ^= Du; \ 292 Bge = ROL64(A##gu, 20); \ 293 A##ka ^= Da; \ 294 Bgi = ROL64(A##ka, 3); \ 295 A##me ^= De; \ 296 Bgo = ROL64(A##me, 45); \ 297 A##si ^= Di; \ 298 Bgu = ROL64(A##si, 61); \ 299 E##ga = Bga ^((~Bge)& Bgi ); \ 300 Ca ^= E##ga; \ 301 E##ge = Bge ^((~Bgi)& Bgo ); \ 302 Ce ^= E##ge; \ 303 E##gi = Bgi ^((~Bgo)& Bgu ); \ 304 Ci ^= E##gi; \ 305 E##go = Bgo ^((~Bgu)& Bga ); \ 306 Co ^= E##go; \ 307 E##gu = Bgu ^((~Bga)& Bge ); \ 308 Cu ^= E##gu; \ 309\ 310 A##be ^= De; \ 311 Bka = ROL64(A##be, 1); \ 312 A##gi ^= Di; \ 313 Bke = ROL64(A##gi, 6); \ 314 A##ko ^= Do; \ 315 Bki = ROL64(A##ko, 25); \ 316 A##mu ^= Du; \ 317 Bko = ROL64(A##mu, 8); \ 318 A##sa ^= Da; \ 319 Bku = ROL64(A##sa, 18); \ 320 E##ka = Bka ^((~Bke)& Bki ); \ 321 Ca ^= E##ka; \ 322 E##ke = Bke ^((~Bki)& Bko ); \ 323 Ce ^= E##ke; \ 324 E##ki = Bki ^((~Bko)& Bku ); \ 325 Ci ^= E##ki; \ 326 E##ko = Bko ^((~Bku)& Bka ); \ 327 Co ^= E##ko; \ 328 E##ku = Bku ^((~Bka)& Bke ); \ 329 Cu ^= E##ku; \ 330\ 331 A##bu ^= Du; \ 332 Bma = ROL64(A##bu, 27); \ 333 A##ga ^= Da; \ 334 Bme = ROL64(A##ga, 36); \ 335 A##ke ^= De; \ 336 Bmi = ROL64(A##ke, 10); \ 337 A##mi ^= Di; \ 338 Bmo = ROL64(A##mi, 15); \ 339 A##so ^= Do; \ 340 Bmu = ROL64(A##so, 56); \ 341 E##ma = Bma ^((~Bme)& Bmi ); \ 342 Ca ^= E##ma; \ 343 E##me = Bme ^((~Bmi)& Bmo ); \ 344 Ce ^= E##me; \ 345 E##mi = Bmi ^((~Bmo)& Bmu ); \ 346 Ci ^= E##mi; \ 347 E##mo = Bmo ^((~Bmu)& Bma ); \ 348 Co ^= E##mo; \ 349 E##mu = Bmu ^((~Bma)& Bme ); \ 350 Cu ^= E##mu; \ 351\ 352 A##bi ^= Di; \ 353 Bsa = ROL64(A##bi, 62); \ 354 A##go ^= Do; \ 355 Bse = ROL64(A##go, 55); \ 356 A##ku ^= Du; \ 357 Bsi = ROL64(A##ku, 39); \ 358 A##ma ^= Da; \ 359 Bso = ROL64(A##ma, 41); \ 360 A##se ^= De; \ 361 Bsu = ROL64(A##se, 2); \ 362 E##sa = Bsa ^((~Bse)& Bsi ); \ 363 Ca ^= E##sa; \ 364 E##se = Bse ^((~Bsi)& Bso ); \ 365 Ce ^= E##se; \ 366 E##si = Bsi ^((~Bso)& Bsu ); \ 367 Ci ^= E##si; \ 368 E##so = Bso ^((~Bsu)& Bsa ); \ 369 Co ^= E##so; \ 370 E##su = Bsu ^((~Bsa)& Bse ); \ 371 Cu ^= E##su; \ 372\ 373 374/* --- Code for round */ 375 376/* --- 64-bit lanes mapped to 64-bit words */ 377 378#define thetaRhoPiChiIota(i, A, E) \ 379 Da = Cu^ROL64(Ce, 1); \ 380 De = Ca^ROL64(Ci, 1); \ 381 Di = Ce^ROL64(Co, 1); \ 382 Do = Ci^ROL64(Cu, 1); \ 383 Du = Co^ROL64(Ca, 1); \ 384\ 385 A##ba ^= Da; \ 386 Bba = A##ba; \ 387 A##ge ^= De; \ 388 Bbe = ROL64(A##ge, 44); \ 389 A##ki ^= Di; \ 390 Bbi = ROL64(A##ki, 43); \ 391 A##mo ^= Do; \ 392 Bbo = ROL64(A##mo, 21); \ 393 A##su ^= Du; \ 394 Bbu = ROL64(A##su, 14); \ 395 E##ba = Bba ^((~Bbe)& Bbi ); \ 396 E##ba ^= KeccakF1600RoundConstants[i]; \ 397 E##be = Bbe ^((~Bbi)& Bbo ); \ 398 E##bi = Bbi ^((~Bbo)& Bbu ); \ 399 E##bo = Bbo ^((~Bbu)& Bba ); \ 400 E##bu = Bbu ^((~Bba)& Bbe ); \ 401\ 402 A##bo ^= Do; \ 403 Bga = ROL64(A##bo, 28); \ 404 A##gu ^= Du; \ 405 Bge = ROL64(A##gu, 20); \ 406 A##ka ^= Da; \ 407 Bgi = ROL64(A##ka, 3); \ 408 A##me ^= De; \ 409 Bgo = ROL64(A##me, 45); \ 410 A##si ^= Di; \ 411 Bgu = ROL64(A##si, 61); \ 412 E##ga = Bga ^((~Bge)& Bgi ); \ 413 E##ge = Bge ^((~Bgi)& Bgo ); \ 414 E##gi = Bgi ^((~Bgo)& Bgu ); \ 415 E##go = Bgo ^((~Bgu)& Bga ); \ 416 E##gu = Bgu ^((~Bga)& Bge ); \ 417\ 418 A##be ^= De; \ 419 Bka = ROL64(A##be, 1); \ 420 A##gi ^= Di; \ 421 Bke = ROL64(A##gi, 6); \ 422 A##ko ^= Do; \ 423 Bki = ROL64(A##ko, 25); \ 424 A##mu ^= Du; \ 425 Bko = ROL64(A##mu, 8); \ 426 A##sa ^= Da; \ 427 Bku = ROL64(A##sa, 18); \ 428 E##ka = Bka ^((~Bke)& Bki ); \ 429 E##ke = Bke ^((~Bki)& Bko ); \ 430 E##ki = Bki ^((~Bko)& Bku ); \ 431 E##ko = Bko ^((~Bku)& Bka ); \ 432 E##ku = Bku ^((~Bka)& Bke ); \ 433\ 434 A##bu ^= Du; \ 435 Bma = ROL64(A##bu, 27); \ 436 A##ga ^= Da; \ 437 Bme = ROL64(A##ga, 36); \ 438 A##ke ^= De; \ 439 Bmi = ROL64(A##ke, 10); \ 440 A##mi ^= Di; \ 441 Bmo = ROL64(A##mi, 15); \ 442 A##so ^= Do; \ 443 Bmu = ROL64(A##so, 56); \ 444 E##ma = Bma ^((~Bme)& Bmi ); \ 445 E##me = Bme ^((~Bmi)& Bmo ); \ 446 E##mi = Bmi ^((~Bmo)& Bmu ); \ 447 E##mo = Bmo ^((~Bmu)& Bma ); \ 448 E##mu = Bmu ^((~Bma)& Bme ); \ 449\ 450 A##bi ^= Di; \ 451 Bsa = ROL64(A##bi, 62); \ 452 A##go ^= Do; \ 453 Bse = ROL64(A##go, 55); \ 454 A##ku ^= Du; \ 455 Bsi = ROL64(A##ku, 39); \ 456 A##ma ^= Da; \ 457 Bso = ROL64(A##ma, 41); \ 458 A##se ^= De; \ 459 Bsu = ROL64(A##se, 2); \ 460 E##sa = Bsa ^((~Bse)& Bsi ); \ 461 E##se = Bse ^((~Bsi)& Bso ); \ 462 E##si = Bsi ^((~Bso)& Bsu ); \ 463 E##so = Bso ^((~Bsu)& Bsa ); \ 464 E##su = Bsu ^((~Bsa)& Bse ); \ 465\ 466 467#endif /* UseBebigokimisa */ 468 469 470#define copyFromState(X, state) \ 471 X##ba = state[ 0]; \ 472 X##be = state[ 1]; \ 473 X##bi = state[ 2]; \ 474 X##bo = state[ 3]; \ 475 X##bu = state[ 4]; \ 476 X##ga = state[ 5]; \ 477 X##ge = state[ 6]; \ 478 X##gi = state[ 7]; \ 479 X##go = state[ 8]; \ 480 X##gu = state[ 9]; \ 481 X##ka = state[10]; \ 482 X##ke = state[11]; \ 483 X##ki = state[12]; \ 484 X##ko = state[13]; \ 485 X##ku = state[14]; \ 486 X##ma = state[15]; \ 487 X##me = state[16]; \ 488 X##mi = state[17]; \ 489 X##mo = state[18]; \ 490 X##mu = state[19]; \ 491 X##sa = state[20]; \ 492 X##se = state[21]; \ 493 X##si = state[22]; \ 494 X##so = state[23]; \ 495 X##su = state[24]; \ 496 497#define copyToState(state, X) \ 498 state[ 0] = X##ba; \ 499 state[ 1] = X##be; \ 500 state[ 2] = X##bi; \ 501 state[ 3] = X##bo; \ 502 state[ 4] = X##bu; \ 503 state[ 5] = X##ga; \ 504 state[ 6] = X##ge; \ 505 state[ 7] = X##gi; \ 506 state[ 8] = X##go; \ 507 state[ 9] = X##gu; \ 508 state[10] = X##ka; \ 509 state[11] = X##ke; \ 510 state[12] = X##ki; \ 511 state[13] = X##ko; \ 512 state[14] = X##ku; \ 513 state[15] = X##ma; \ 514 state[16] = X##me; \ 515 state[17] = X##mi; \ 516 state[18] = X##mo; \ 517 state[19] = X##mu; \ 518 state[20] = X##sa; \ 519 state[21] = X##se; \ 520 state[22] = X##si; \ 521 state[23] = X##so; \ 522 state[24] = X##su; \ 523 524#define copyStateVariables(X, Y) \ 525 X##ba = Y##ba; \ 526 X##be = Y##be; \ 527 X##bi = Y##bi; \ 528 X##bo = Y##bo; \ 529 X##bu = Y##bu; \ 530 X##ga = Y##ga; \ 531 X##ge = Y##ge; \ 532 X##gi = Y##gi; \ 533 X##go = Y##go; \ 534 X##gu = Y##gu; \ 535 X##ka = Y##ka; \ 536 X##ke = Y##ke; \ 537 X##ki = Y##ki; \ 538 X##ko = Y##ko; \ 539 X##ku = Y##ku; \ 540 X##ma = Y##ma; \ 541 X##me = Y##me; \ 542 X##mi = Y##mi; \ 543 X##mo = Y##mo; \ 544 X##mu = Y##mu; \ 545 X##sa = Y##sa; \ 546 X##se = Y##se; \ 547 X##si = Y##si; \ 548 X##so = Y##so; \ 549 X##su = Y##su; \ 550 551#define copyFromStateAndAdd(X, state, input, laneCount) \ 552 if (laneCount < 16) { \ 553 if (laneCount < 8) { \ 554 if (laneCount < 4) { \ 555 if (laneCount < 2) { \ 556 if (laneCount < 1) { \ 557 X##ba = state[ 0]; \ 558 } \ 559 else { \ 560 X##ba = state[ 0]^input[ 0]; \ 561 } \ 562 X##be = state[ 1]; \ 563 X##bi = state[ 2]; \ 564 } \ 565 else { \ 566 X##ba = state[ 0]^input[ 0]; \ 567 X##be = state[ 1]^input[ 1]; \ 568 if (laneCount < 3) { \ 569 X##bi = state[ 2]; \ 570 } \ 571 else { \ 572 X##bi = state[ 2]^input[ 2]; \ 573 } \ 574 } \ 575 X##bo = state[ 3]; \ 576 X##bu = state[ 4]; \ 577 X##ga = state[ 5]; \ 578 X##ge = state[ 6]; \ 579 } \ 580 else { \ 581 X##ba = state[ 0]^input[ 0]; \ 582 X##be = state[ 1]^input[ 1]; \ 583 X##bi = state[ 2]^input[ 2]; \ 584 X##bo = state[ 3]^input[ 3]; \ 585 if (laneCount < 6) { \ 586 if (laneCount < 5) { \ 587 X##bu = state[ 4]; \ 588 } \ 589 else { \ 590 X##bu = state[ 4]^input[ 4]; \ 591 } \ 592 X##ga = state[ 5]; \ 593 X##ge = state[ 6]; \ 594 } \ 595 else { \ 596 X##bu = state[ 4]^input[ 4]; \ 597 X##ga = state[ 5]^input[ 5]; \ 598 if (laneCount < 7) { \ 599 X##ge = state[ 6]; \ 600 } \ 601 else { \ 602 X##ge = state[ 6]^input[ 6]; \ 603 } \ 604 } \ 605 } \ 606 X##gi = state[ 7]; \ 607 X##go = state[ 8]; \ 608 X##gu = state[ 9]; \ 609 X##ka = state[10]; \ 610 X##ke = state[11]; \ 611 X##ki = state[12]; \ 612 X##ko = state[13]; \ 613 X##ku = state[14]; \ 614 } \ 615 else { \ 616 X##ba = state[ 0]^input[ 0]; \ 617 X##be = state[ 1]^input[ 1]; \ 618 X##bi = state[ 2]^input[ 2]; \ 619 X##bo = state[ 3]^input[ 3]; \ 620 X##bu = state[ 4]^input[ 4]; \ 621 X##ga = state[ 5]^input[ 5]; \ 622 X##ge = state[ 6]^input[ 6]; \ 623 X##gi = state[ 7]^input[ 7]; \ 624 if (laneCount < 12) { \ 625 if (laneCount < 10) { \ 626 if (laneCount < 9) { \ 627 X##go = state[ 8]; \ 628 } \ 629 else { \ 630 X##go = state[ 8]^input[ 8]; \ 631 } \ 632 X##gu = state[ 9]; \ 633 X##ka = state[10]; \ 634 } \ 635 else { \ 636 X##go = state[ 8]^input[ 8]; \ 637 X##gu = state[ 9]^input[ 9]; \ 638 if (laneCount < 11) { \ 639 X##ka = state[10]; \ 640 } \ 641 else { \ 642 X##ka = state[10]^input[10]; \ 643 } \ 644 } \ 645 X##ke = state[11]; \ 646 X##ki = state[12]; \ 647 X##ko = state[13]; \ 648 X##ku = state[14]; \ 649 } \ 650 else { \ 651 X##go = state[ 8]^input[ 8]; \ 652 X##gu = state[ 9]^input[ 9]; \ 653 X##ka = state[10]^input[10]; \ 654 X##ke = state[11]^input[11]; \ 655 if (laneCount < 14) { \ 656 if (laneCount < 13) { \ 657 X##ki = state[12]; \ 658 } \ 659 else { \ 660 X##ki = state[12]^input[12]; \ 661 } \ 662 X##ko = state[13]; \ 663 X##ku = state[14]; \ 664 } \ 665 else { \ 666 X##ki = state[12]^input[12]; \ 667 X##ko = state[13]^input[13]; \ 668 if (laneCount < 15) { \ 669 X##ku = state[14]; \ 670 } \ 671 else { \ 672 X##ku = state[14]^input[14]; \ 673 } \ 674 } \ 675 } \ 676 } \ 677 X##ma = state[15]; \ 678 X##me = state[16]; \ 679 X##mi = state[17]; \ 680 X##mo = state[18]; \ 681 X##mu = state[19]; \ 682 X##sa = state[20]; \ 683 X##se = state[21]; \ 684 X##si = state[22]; \ 685 X##so = state[23]; \ 686 X##su = state[24]; \ 687 } \ 688 else { \ 689 X##ba = state[ 0]^input[ 0]; \ 690 X##be = state[ 1]^input[ 1]; \ 691 X##bi = state[ 2]^input[ 2]; \ 692 X##bo = state[ 3]^input[ 3]; \ 693 X##bu = state[ 4]^input[ 4]; \ 694 X##ga = state[ 5]^input[ 5]; \ 695 X##ge = state[ 6]^input[ 6]; \ 696 X##gi = state[ 7]^input[ 7]; \ 697 X##go = state[ 8]^input[ 8]; \ 698 X##gu = state[ 9]^input[ 9]; \ 699 X##ka = state[10]^input[10]; \ 700 X##ke = state[11]^input[11]; \ 701 X##ki = state[12]^input[12]; \ 702 X##ko = state[13]^input[13]; \ 703 X##ku = state[14]^input[14]; \ 704 X##ma = state[15]^input[15]; \ 705 if (laneCount < 24) { \ 706 if (laneCount < 20) { \ 707 if (laneCount < 18) { \ 708 if (laneCount < 17) { \ 709 X##me = state[16]; \ 710 } \ 711 else { \ 712 X##me = state[16]^input[16]; \ 713 } \ 714 X##mi = state[17]; \ 715 X##mo = state[18]; \ 716 } \ 717 else { \ 718 X##me = state[16]^input[16]; \ 719 X##mi = state[17]^input[17]; \ 720 if (laneCount < 19) { \ 721 X##mo = state[18]; \ 722 } \ 723 else { \ 724 X##mo = state[18]^input[18]; \ 725 } \ 726 } \ 727 X##mu = state[19]; \ 728 X##sa = state[20]; \ 729 X##se = state[21]; \ 730 X##si = state[22]; \ 731 } \ 732 else { \ 733 X##me = state[16]^input[16]; \ 734 X##mi = state[17]^input[17]; \ 735 X##mo = state[18]^input[18]; \ 736 X##mu = state[19]^input[19]; \ 737 if (laneCount < 22) { \ 738 if (laneCount < 21) { \ 739 X##sa = state[20]; \ 740 } \ 741 else { \ 742 X##sa = state[20]^input[20]; \ 743 } \ 744 X##se = state[21]; \ 745 X##si = state[22]; \ 746 } \ 747 else { \ 748 X##sa = state[20]^input[20]; \ 749 X##se = state[21]^input[21]; \ 750 if (laneCount < 23) { \ 751 X##si = state[22]; \ 752 } \ 753 else { \ 754 X##si = state[22]^input[22]; \ 755 } \ 756 } \ 757 } \ 758 X##so = state[23]; \ 759 X##su = state[24]; \ 760 } \ 761 else { \ 762 X##me = state[16]^input[16]; \ 763 X##mi = state[17]^input[17]; \ 764 X##mo = state[18]^input[18]; \ 765 X##mu = state[19]^input[19]; \ 766 X##sa = state[20]^input[20]; \ 767 X##se = state[21]^input[21]; \ 768 X##si = state[22]^input[22]; \ 769 X##so = state[23]^input[23]; \ 770 if (laneCount < 25) { \ 771 X##su = state[24]; \ 772 } \ 773 else { \ 774 X##su = state[24]^input[24]; \ 775 } \ 776 } \ 777 } 778 779#define addInput(X, input, laneCount) \ 780 if (laneCount == 21) { \ 781 X##ba ^= input[ 0]; \ 782 X##be ^= input[ 1]; \ 783 X##bi ^= input[ 2]; \ 784 X##bo ^= input[ 3]; \ 785 X##bu ^= input[ 4]; \ 786 X##ga ^= input[ 5]; \ 787 X##ge ^= input[ 6]; \ 788 X##gi ^= input[ 7]; \ 789 X##go ^= input[ 8]; \ 790 X##gu ^= input[ 9]; \ 791 X##ka ^= input[10]; \ 792 X##ke ^= input[11]; \ 793 X##ki ^= input[12]; \ 794 X##ko ^= input[13]; \ 795 X##ku ^= input[14]; \ 796 X##ma ^= input[15]; \ 797 X##me ^= input[16]; \ 798 X##mi ^= input[17]; \ 799 X##mo ^= input[18]; \ 800 X##mu ^= input[19]; \ 801 X##sa ^= input[20]; \ 802 } \ 803 else if (laneCount < 16) { \ 804 if (laneCount < 8) { \ 805 if (laneCount < 4) { \ 806 if (laneCount < 2) { \ 807 if (laneCount < 1) { \ 808 } \ 809 else { \ 810 X##ba ^= input[ 0]; \ 811 } \ 812 } \ 813 else { \ 814 X##ba ^= input[ 0]; \ 815 X##be ^= input[ 1]; \ 816 if (laneCount < 3) { \ 817 } \ 818 else { \ 819 X##bi ^= input[ 2]; \ 820 } \ 821 } \ 822 } \ 823 else { \ 824 X##ba ^= input[ 0]; \ 825 X##be ^= input[ 1]; \ 826 X##bi ^= input[ 2]; \ 827 X##bo ^= input[ 3]; \ 828 if (laneCount < 6) { \ 829 if (laneCount < 5) { \ 830 } \ 831 else { \ 832 X##bu ^= input[ 4]; \ 833 } \ 834 } \ 835 else { \ 836 X##bu ^= input[ 4]; \ 837 X##ga ^= input[ 5]; \ 838 if (laneCount < 7) { \ 839 } \ 840 else { \ 841 X##ge ^= input[ 6]; \ 842 } \ 843 } \ 844 } \ 845 } \ 846 else { \ 847 X##ba ^= input[ 0]; \ 848 X##be ^= input[ 1]; \ 849 X##bi ^= input[ 2]; \ 850 X##bo ^= input[ 3]; \ 851 X##bu ^= input[ 4]; \ 852 X##ga ^= input[ 5]; \ 853 X##ge ^= input[ 6]; \ 854 X##gi ^= input[ 7]; \ 855 if (laneCount < 12) { \ 856 if (laneCount < 10) { \ 857 if (laneCount < 9) { \ 858 } \ 859 else { \ 860 X##go ^= input[ 8]; \ 861 } \ 862 } \ 863 else { \ 864 X##go ^= input[ 8]; \ 865 X##gu ^= input[ 9]; \ 866 if (laneCount < 11) { \ 867 } \ 868 else { \ 869 X##ka ^= input[10]; \ 870 } \ 871 } \ 872 } \ 873 else { \ 874 X##go ^= input[ 8]; \ 875 X##gu ^= input[ 9]; \ 876 X##ka ^= input[10]; \ 877 X##ke ^= input[11]; \ 878 if (laneCount < 14) { \ 879 if (laneCount < 13) { \ 880 } \ 881 else { \ 882 X##ki ^= input[12]; \ 883 } \ 884 } \ 885 else { \ 886 X##ki ^= input[12]; \ 887 X##ko ^= input[13]; \ 888 if (laneCount < 15) { \ 889 } \ 890 else { \ 891 X##ku ^= input[14]; \ 892 } \ 893 } \ 894 } \ 895 } \ 896 } \ 897 else { \ 898 X##ba ^= input[ 0]; \ 899 X##be ^= input[ 1]; \ 900 X##bi ^= input[ 2]; \ 901 X##bo ^= input[ 3]; \ 902 X##bu ^= input[ 4]; \ 903 X##ga ^= input[ 5]; \ 904 X##ge ^= input[ 6]; \ 905 X##gi ^= input[ 7]; \ 906 X##go ^= input[ 8]; \ 907 X##gu ^= input[ 9]; \ 908 X##ka ^= input[10]; \ 909 X##ke ^= input[11]; \ 910 X##ki ^= input[12]; \ 911 X##ko ^= input[13]; \ 912 X##ku ^= input[14]; \ 913 X##ma ^= input[15]; \ 914 if (laneCount < 24) { \ 915 if (laneCount < 20) { \ 916 if (laneCount < 18) { \ 917 if (laneCount < 17) { \ 918 } \ 919 else { \ 920 X##me ^= input[16]; \ 921 } \ 922 } \ 923 else { \ 924 X##me ^= input[16]; \ 925 X##mi ^= input[17]; \ 926 if (laneCount < 19) { \ 927 } \ 928 else { \ 929 X##mo ^= input[18]; \ 930 } \ 931 } \ 932 } \ 933 else { \ 934 X##me ^= input[16]; \ 935 X##mi ^= input[17]; \ 936 X##mo ^= input[18]; \ 937 X##mu ^= input[19]; \ 938 if (laneCount < 22) { \ 939 if (laneCount < 21) { \ 940 } \ 941 else { \ 942 X##sa ^= input[20]; \ 943 } \ 944 } \ 945 else { \ 946 X##sa ^= input[20]; \ 947 X##se ^= input[21]; \ 948 if (laneCount < 23) { \ 949 } \ 950 else { \ 951 X##si ^= input[22]; \ 952 } \ 953 } \ 954 } \ 955 } \ 956 else { \ 957 X##me ^= input[16]; \ 958 X##mi ^= input[17]; \ 959 X##mo ^= input[18]; \ 960 X##mu ^= input[19]; \ 961 X##sa ^= input[20]; \ 962 X##se ^= input[21]; \ 963 X##si ^= input[22]; \ 964 X##so ^= input[23]; \ 965 if (laneCount < 25) { \ 966 } \ 967 else { \ 968 X##su ^= input[24]; \ 969 } \ 970 } \ 971 } 972 973#ifdef UseBebigokimisa 974 975#define copyToStateAndOutput(X, state, output, laneCount) \ 976 if (laneCount < 16) { \ 977 if (laneCount < 8) { \ 978 if (laneCount < 4) { \ 979 if (laneCount < 2) { \ 980 state[ 0] = X##ba; \ 981 if (laneCount >= 1) { \ 982 output[ 0] = X##ba; \ 983 } \ 984 state[ 1] = X##be; \ 985 state[ 2] = X##bi; \ 986 } \ 987 else { \ 988 state[ 0] = X##ba; \ 989 output[ 0] = X##ba; \ 990 state[ 1] = X##be; \ 991 output[ 1] = ~X##be; \ 992 state[ 2] = X##bi; \ 993 if (laneCount >= 3) { \ 994 output[ 2] = ~X##bi; \ 995 } \ 996 } \ 997 state[ 3] = X##bo; \ 998 state[ 4] = X##bu; \ 999 state[ 5] = X##ga; \ 1000 state[ 6] = X##ge; \ 1001 } \ 1002 else { \ 1003 state[ 0] = X##ba; \ 1004 output[ 0] = X##ba; \ 1005 state[ 1] = X##be; \ 1006 output[ 1] = ~X##be; \ 1007 state[ 2] = X##bi; \ 1008 output[ 2] = ~X##bi; \ 1009 state[ 3] = X##bo; \ 1010 output[ 3] = X##bo; \ 1011 if (laneCount < 6) { \ 1012 state[ 4] = X##bu; \ 1013 if (laneCount >= 5) { \ 1014 output[ 4] = X##bu; \ 1015 } \ 1016 state[ 5] = X##ga; \ 1017 state[ 6] = X##ge; \ 1018 } \ 1019 else { \ 1020 state[ 4] = X##bu; \ 1021 output[ 4] = X##bu; \ 1022 state[ 5] = X##ga; \ 1023 output[ 5] = X##ga; \ 1024 state[ 6] = X##ge; \ 1025 if (laneCount >= 7) { \ 1026 output[ 6] = X##ge; \ 1027 } \ 1028 } \ 1029 } \ 1030 state[ 7] = X##gi; \ 1031 state[ 8] = X##go; \ 1032 state[ 9] = X##gu; \ 1033 state[10] = X##ka; \ 1034 state[11] = X##ke; \ 1035 state[12] = X##ki; \ 1036 state[13] = X##ko; \ 1037 state[14] = X##ku; \ 1038 } \ 1039 else { \ 1040 state[ 0] = X##ba; \ 1041 output[ 0] = X##ba; \ 1042 state[ 1] = X##be; \ 1043 output[ 1] = ~X##be; \ 1044 state[ 2] = X##bi; \ 1045 output[ 2] = ~X##bi; \ 1046 state[ 3] = X##bo; \ 1047 output[ 3] = X##bo; \ 1048 state[ 4] = X##bu; \ 1049 output[ 4] = X##bu; \ 1050 state[ 5] = X##ga; \ 1051 output[ 5] = X##ga; \ 1052 state[ 6] = X##ge; \ 1053 output[ 6] = X##ge; \ 1054 state[ 7] = X##gi; \ 1055 output[ 7] = X##gi; \ 1056 if (laneCount < 12) { \ 1057 if (laneCount < 10) { \ 1058 state[ 8] = X##go; \ 1059 if (laneCount >= 9) { \ 1060 output[ 8] = ~X##go; \ 1061 } \ 1062 state[ 9] = X##gu; \ 1063 state[10] = X##ka; \ 1064 } \ 1065 else { \ 1066 state[ 8] = X##go; \ 1067 output[ 8] = ~X##go; \ 1068 state[ 9] = X##gu; \ 1069 output[ 9] = X##gu; \ 1070 state[10] = X##ka; \ 1071 if (laneCount >= 11) { \ 1072 output[10] = X##ka; \ 1073 } \ 1074 } \ 1075 state[11] = X##ke; \ 1076 state[12] = X##ki; \ 1077 state[13] = X##ko; \ 1078 state[14] = X##ku; \ 1079 } \ 1080 else { \ 1081 state[ 8] = X##go; \ 1082 output[ 8] = ~X##go; \ 1083 state[ 9] = X##gu; \ 1084 output[ 9] = X##gu; \ 1085 state[10] = X##ka; \ 1086 output[10] = X##ka; \ 1087 state[11] = X##ke; \ 1088 output[11] = X##ke; \ 1089 if (laneCount < 14) { \ 1090 state[12] = X##ki; \ 1091 if (laneCount >= 13) { \ 1092 output[12] = ~X##ki; \ 1093 } \ 1094 state[13] = X##ko; \ 1095 state[14] = X##ku; \ 1096 } \ 1097 else { \ 1098 state[12] = X##ki; \ 1099 output[12] = ~X##ki; \ 1100 state[13] = X##ko; \ 1101 output[13] = X##ko; \ 1102 state[14] = X##ku; \ 1103 if (laneCount >= 15) { \ 1104 output[14] = X##ku; \ 1105 } \ 1106 } \ 1107 } \ 1108 } \ 1109 state[15] = X##ma; \ 1110 state[16] = X##me; \ 1111 state[17] = X##mi; \ 1112 state[18] = X##mo; \ 1113 state[19] = X##mu; \ 1114 state[20] = X##sa; \ 1115 state[21] = X##se; \ 1116 state[22] = X##si; \ 1117 state[23] = X##so; \ 1118 state[24] = X##su; \ 1119 } \ 1120 else { \ 1121 state[ 0] = X##ba; \ 1122 output[ 0] = X##ba; \ 1123 state[ 1] = X##be; \ 1124 output[ 1] = ~X##be; \ 1125 state[ 2] = X##bi; \ 1126 output[ 2] = ~X##bi; \ 1127 state[ 3] = X##bo; \ 1128 output[ 3] = X##bo; \ 1129 state[ 4] = X##bu; \ 1130 output[ 4] = X##bu; \ 1131 state[ 5] = X##ga; \ 1132 output[ 5] = X##ga; \ 1133 state[ 6] = X##ge; \ 1134 output[ 6] = X##ge; \ 1135 state[ 7] = X##gi; \ 1136 output[ 7] = X##gi; \ 1137 state[ 8] = X##go; \ 1138 output[ 8] = ~X##go; \ 1139 state[ 9] = X##gu; \ 1140 output[ 9] = X##gu; \ 1141 state[10] = X##ka; \ 1142 output[10] = X##ka; \ 1143 state[11] = X##ke; \ 1144 output[11] = X##ke; \ 1145 state[12] = X##ki; \ 1146 output[12] = ~X##ki; \ 1147 state[13] = X##ko; \ 1148 output[13] = X##ko; \ 1149 state[14] = X##ku; \ 1150 output[14] = X##ku; \ 1151 state[15] = X##ma; \ 1152 output[15] = X##ma; \ 1153 if (laneCount < 24) { \ 1154 if (laneCount < 20) { \ 1155 if (laneCount < 18) { \ 1156 state[16] = X##me; \ 1157 if (laneCount >= 17) { \ 1158 output[16] = X##me; \ 1159 } \ 1160 state[17] = X##mi; \ 1161 state[18] = X##mo; \ 1162 } \ 1163 else { \ 1164 state[16] = X##me; \ 1165 output[16] = X##me; \ 1166 state[17] = X##mi; \ 1167 output[17] = ~X##mi; \ 1168 state[18] = X##mo; \ 1169 if (laneCount >= 19) { \ 1170 output[18] = X##mo; \ 1171 } \ 1172 } \ 1173 state[19] = X##mu; \ 1174 state[20] = X##sa; \ 1175 state[21] = X##se; \ 1176 state[22] = X##si; \ 1177 } \ 1178 else { \ 1179 state[16] = X##me; \ 1180 output[16] = X##me; \ 1181 state[17] = X##mi; \ 1182 output[17] = ~X##mi; \ 1183 state[18] = X##mo; \ 1184 output[18] = X##mo; \ 1185 state[19] = X##mu; \ 1186 output[19] = X##mu; \ 1187 if (laneCount < 22) { \ 1188 state[20] = X##sa; \ 1189 if (laneCount >= 21) { \ 1190 output[20] = ~X##sa; \ 1191 } \ 1192 state[21] = X##se; \ 1193 state[22] = X##si; \ 1194 } \ 1195 else { \ 1196 state[20] = X##sa; \ 1197 output[20] = ~X##sa; \ 1198 state[21] = X##se; \ 1199 output[21] = X##se; \ 1200 state[22] = X##si; \ 1201 if (laneCount >= 23) { \ 1202 output[22] = X##si; \ 1203 } \ 1204 } \ 1205 } \ 1206 state[23] = X##so; \ 1207 state[24] = X##su; \ 1208 } \ 1209 else { \ 1210 state[16] = X##me; \ 1211 output[16] = X##me; \ 1212 state[17] = X##mi; \ 1213 output[17] = ~X##mi; \ 1214 state[18] = X##mo; \ 1215 output[18] = X##mo; \ 1216 state[19] = X##mu; \ 1217 output[19] = X##mu; \ 1218 state[20] = X##sa; \ 1219 output[20] = ~X##sa; \ 1220 state[21] = X##se; \ 1221 output[21] = X##se; \ 1222 state[22] = X##si; \ 1223 output[22] = X##si; \ 1224 state[23] = X##so; \ 1225 output[23] = X##so; \ 1226 state[24] = X##su; \ 1227 if (laneCount >= 25) { \ 1228 output[24] = X##su; \ 1229 } \ 1230 } \ 1231 } 1232 1233#define output(X, output, laneCount) \ 1234 if (laneCount < 16) { \ 1235 if (laneCount < 8) { \ 1236 if (laneCount < 4) { \ 1237 if (laneCount < 2) { \ 1238 if (laneCount >= 1) { \ 1239 output[ 0] = X##ba; \ 1240 } \ 1241 } \ 1242 else { \ 1243 output[ 0] = X##ba; \ 1244 output[ 1] = ~X##be; \ 1245 if (laneCount >= 3) { \ 1246 output[ 2] = ~X##bi; \ 1247 } \ 1248 } \ 1249 } \ 1250 else { \ 1251 output[ 0] = X##ba; \ 1252 output[ 1] = ~X##be; \ 1253 output[ 2] = ~X##bi; \ 1254 output[ 3] = X##bo; \ 1255 if (laneCount < 6) { \ 1256 if (laneCount >= 5) { \ 1257 output[ 4] = X##bu; \ 1258 } \ 1259 } \ 1260 else { \ 1261 output[ 4] = X##bu; \ 1262 output[ 5] = X##ga; \ 1263 if (laneCount >= 7) { \ 1264 output[ 6] = X##ge; \ 1265 } \ 1266 } \ 1267 } \ 1268 } \ 1269 else { \ 1270 output[ 0] = X##ba; \ 1271 output[ 1] = ~X##be; \ 1272 output[ 2] = ~X##bi; \ 1273 output[ 3] = X##bo; \ 1274 output[ 4] = X##bu; \ 1275 output[ 5] = X##ga; \ 1276 output[ 6] = X##ge; \ 1277 output[ 7] = X##gi; \ 1278 if (laneCount < 12) { \ 1279 if (laneCount < 10) { \ 1280 if (laneCount >= 9) { \ 1281 output[ 8] = ~X##go; \ 1282 } \ 1283 } \ 1284 else { \ 1285 output[ 8] = ~X##go; \ 1286 output[ 9] = X##gu; \ 1287 if (laneCount >= 11) { \ 1288 output[10] = X##ka; \ 1289 } \ 1290 } \ 1291 } \ 1292 else { \ 1293 output[ 8] = ~X##go; \ 1294 output[ 9] = X##gu; \ 1295 output[10] = X##ka; \ 1296 output[11] = X##ke; \ 1297 if (laneCount < 14) { \ 1298 if (laneCount >= 13) { \ 1299 output[12] = ~X##ki; \ 1300 } \ 1301 } \ 1302 else { \ 1303 output[12] = ~X##ki; \ 1304 output[13] = X##ko; \ 1305 if (laneCount >= 15) { \ 1306 output[14] = X##ku; \ 1307 } \ 1308 } \ 1309 } \ 1310 } \ 1311 } \ 1312 else { \ 1313 output[ 0] = X##ba; \ 1314 output[ 1] = ~X##be; \ 1315 output[ 2] = ~X##bi; \ 1316 output[ 3] = X##bo; \ 1317 output[ 4] = X##bu; \ 1318 output[ 5] = X##ga; \ 1319 output[ 6] = X##ge; \ 1320 output[ 7] = X##gi; \ 1321 output[ 8] = ~X##go; \ 1322 output[ 9] = X##gu; \ 1323 output[10] = X##ka; \ 1324 output[11] = X##ke; \ 1325 output[12] = ~X##ki; \ 1326 output[13] = X##ko; \ 1327 output[14] = X##ku; \ 1328 output[15] = X##ma; \ 1329 if (laneCount < 24) { \ 1330 if (laneCount < 20) { \ 1331 if (laneCount < 18) { \ 1332 if (laneCount >= 17) { \ 1333 output[16] = X##me; \ 1334 } \ 1335 } \ 1336 else { \ 1337 output[16] = X##me; \ 1338 output[17] = ~X##mi; \ 1339 if (laneCount >= 19) { \ 1340 output[18] = X##mo; \ 1341 } \ 1342 } \ 1343 } \ 1344 else { \ 1345 output[16] = X##me; \ 1346 output[17] = ~X##mi; \ 1347 output[18] = X##mo; \ 1348 output[19] = X##mu; \ 1349 if (laneCount < 22) { \ 1350 if (laneCount >= 21) { \ 1351 output[20] = ~X##sa; \ 1352 } \ 1353 } \ 1354 else { \ 1355 output[20] = ~X##sa; \ 1356 output[21] = X##se; \ 1357 if (laneCount >= 23) { \ 1358 output[22] = X##si; \ 1359 } \ 1360 } \ 1361 } \ 1362 } \ 1363 else { \ 1364 output[16] = X##me; \ 1365 output[17] = ~X##mi; \ 1366 output[18] = X##mo; \ 1367 output[19] = X##mu; \ 1368 output[20] = ~X##sa; \ 1369 output[21] = X##se; \ 1370 output[22] = X##si; \ 1371 output[23] = X##so; \ 1372 if (laneCount >= 25) { \ 1373 output[24] = X##su; \ 1374 } \ 1375 } \ 1376 } 1377 1378#define wrapOne(X, input, output, index, name) \ 1379 X##name ^= input[index]; \ 1380 output[index] = X##name; 1381 1382#define wrapOneInvert(X, input, output, index, name) \ 1383 X##name ^= input[index]; \ 1384 output[index] = ~X##name; 1385 1386#define unwrapOne(X, input, output, index, name) \ 1387 output[index] = input[index] ^ X##name; \ 1388 X##name ^= output[index]; 1389 1390#define unwrapOneInvert(X, input, output, index, name) \ 1391 output[index] = ~(input[index] ^ X##name); \ 1392 X##name ^= output[index]; \ 1393 1394#else /* UseBebigokimisa */ 1395 1396 1397#define copyToStateAndOutput(X, state, output, laneCount) \ 1398 if (laneCount < 16) { \ 1399 if (laneCount < 8) { \ 1400 if (laneCount < 4) { \ 1401 if (laneCount < 2) { \ 1402 state[ 0] = X##ba; \ 1403 if (laneCount >= 1) { \ 1404 output[ 0] = X##ba; \ 1405 } \ 1406 state[ 1] = X##be; \ 1407 state[ 2] = X##bi; \ 1408 } \ 1409 else { \ 1410 state[ 0] = X##ba; \ 1411 output[ 0] = X##ba; \ 1412 state[ 1] = X##be; \ 1413 output[ 1] = X##be; \ 1414 state[ 2] = X##bi; \ 1415 if (laneCount >= 3) { \ 1416 output[ 2] = X##bi; \ 1417 } \ 1418 } \ 1419 state[ 3] = X##bo; \ 1420 state[ 4] = X##bu; \ 1421 state[ 5] = X##ga; \ 1422 state[ 6] = X##ge; \ 1423 } \ 1424 else { \ 1425 state[ 0] = X##ba; \ 1426 output[ 0] = X##ba; \ 1427 state[ 1] = X##be; \ 1428 output[ 1] = X##be; \ 1429 state[ 2] = X##bi; \ 1430 output[ 2] = X##bi; \ 1431 state[ 3] = X##bo; \ 1432 output[ 3] = X##bo; \ 1433 if (laneCount < 6) { \ 1434 state[ 4] = X##bu; \ 1435 if (laneCount >= 5) { \ 1436 output[ 4] = X##bu; \ 1437 } \ 1438 state[ 5] = X##ga; \ 1439 state[ 6] = X##ge; \ 1440 } \ 1441 else { \ 1442 state[ 4] = X##bu; \ 1443 output[ 4] = X##bu; \ 1444 state[ 5] = X##ga; \ 1445 output[ 5] = X##ga; \ 1446 state[ 6] = X##ge; \ 1447 if (laneCount >= 7) { \ 1448 output[ 6] = X##ge; \ 1449 } \ 1450 } \ 1451 } \ 1452 state[ 7] = X##gi; \ 1453 state[ 8] = X##go; \ 1454 state[ 9] = X##gu; \ 1455 state[10] = X##ka; \ 1456 state[11] = X##ke; \ 1457 state[12] = X##ki; \ 1458 state[13] = X##ko; \ 1459 state[14] = X##ku; \ 1460 } \ 1461 else { \ 1462 state[ 0] = X##ba; \ 1463 output[ 0] = X##ba; \ 1464 state[ 1] = X##be; \ 1465 output[ 1] = X##be; \ 1466 state[ 2] = X##bi; \ 1467 output[ 2] = X##bi; \ 1468 state[ 3] = X##bo; \ 1469 output[ 3] = X##bo; \ 1470 state[ 4] = X##bu; \ 1471 output[ 4] = X##bu; \ 1472 state[ 5] = X##ga; \ 1473 output[ 5] = X##ga; \ 1474 state[ 6] = X##ge; \ 1475 output[ 6] = X##ge; \ 1476 state[ 7] = X##gi; \ 1477 output[ 7] = X##gi; \ 1478 if (laneCount < 12) { \ 1479 if (laneCount < 10) { \ 1480 state[ 8] = X##go; \ 1481 if (laneCount >= 9) { \ 1482 output[ 8] = X##go; \ 1483 } \ 1484 state[ 9] = X##gu; \ 1485 state[10] = X##ka; \ 1486 } \ 1487 else { \ 1488 state[ 8] = X##go; \ 1489 output[ 8] = X##go; \ 1490 state[ 9] = X##gu; \ 1491 output[ 9] = X##gu; \ 1492 state[10] = X##ka; \ 1493 if (laneCount >= 11) { \ 1494 output[10] = X##ka; \ 1495 } \ 1496 } \ 1497 state[11] = X##ke; \ 1498 state[12] = X##ki; \ 1499 state[13] = X##ko; \ 1500 state[14] = X##ku; \ 1501 } \ 1502 else { \ 1503 state[ 8] = X##go; \ 1504 output[ 8] = X##go; \ 1505 state[ 9] = X##gu; \ 1506 output[ 9] = X##gu; \ 1507 state[10] = X##ka; \ 1508 output[10] = X##ka; \ 1509 state[11] = X##ke; \ 1510 output[11] = X##ke; \ 1511 if (laneCount < 14) { \ 1512 state[12] = X##ki; \ 1513 if (laneCount >= 13) { \ 1514 output[12]= X##ki; \ 1515 } \ 1516 state[13] = X##ko; \ 1517 state[14] = X##ku; \ 1518 } \ 1519 else { \ 1520 state[12] = X##ki; \ 1521 output[12]= X##ki; \ 1522 state[13] = X##ko; \ 1523 output[13] = X##ko; \ 1524 state[14] = X##ku; \ 1525 if (laneCount >= 15) { \ 1526 output[14] = X##ku; \ 1527 } \ 1528 } \ 1529 } \ 1530 } \ 1531 state[15] = X##ma; \ 1532 state[16] = X##me; \ 1533 state[17] = X##mi; \ 1534 state[18] = X##mo; \ 1535 state[19] = X##mu; \ 1536 state[20] = X##sa; \ 1537 state[21] = X##se; \ 1538 state[22] = X##si; \ 1539 state[23] = X##so; \ 1540 state[24] = X##su; \ 1541 } \ 1542 else { \ 1543 state[ 0] = X##ba; \ 1544 output[ 0] = X##ba; \ 1545 state[ 1] = X##be; \ 1546 output[ 1] = X##be; \ 1547 state[ 2] = X##bi; \ 1548 output[ 2] = X##bi; \ 1549 state[ 3] = X##bo; \ 1550 output[ 3] = X##bo; \ 1551 state[ 4] = X##bu; \ 1552 output[ 4] = X##bu; \ 1553 state[ 5] = X##ga; \ 1554 output[ 5] = X##ga; \ 1555 state[ 6] = X##ge; \ 1556 output[ 6] = X##ge; \ 1557 state[ 7] = X##gi; \ 1558 output[ 7] = X##gi; \ 1559 state[ 8] = X##go; \ 1560 output[ 8] = X##go; \ 1561 state[ 9] = X##gu; \ 1562 output[ 9] = X##gu; \ 1563 state[10] = X##ka; \ 1564 output[10] = X##ka; \ 1565 state[11] = X##ke; \ 1566 output[11] = X##ke; \ 1567 state[12] = X##ki; \ 1568 output[12]= X##ki; \ 1569 state[13] = X##ko; \ 1570 output[13] = X##ko; \ 1571 state[14] = X##ku; \ 1572 output[14] = X##ku; \ 1573 state[15] = X##ma; \ 1574 output[15] = X##ma; \ 1575 if (laneCount < 24) { \ 1576 if (laneCount < 20) { \ 1577 if (laneCount < 18) { \ 1578 state[16] = X##me; \ 1579 if (laneCount >= 17) { \ 1580 output[16] = X##me; \ 1581 } \ 1582 state[17] = X##mi; \ 1583 state[18] = X##mo; \ 1584 } \ 1585 else { \ 1586 state[16] = X##me; \ 1587 output[16] = X##me; \ 1588 state[17] = X##mi; \ 1589 output[17] = X##mi; \ 1590 state[18] = X##mo; \ 1591 if (laneCount >= 19) { \ 1592 output[18] = X##mo; \ 1593 } \ 1594 } \ 1595 state[19] = X##mu; \ 1596 state[20] = X##sa; \ 1597 state[21] = X##se; \ 1598 state[22] = X##si; \ 1599 } \ 1600 else { \ 1601 state[16] = X##me; \ 1602 output[16] = X##me; \ 1603 state[17] = X##mi; \ 1604 output[17] = X##mi; \ 1605 state[18] = X##mo; \ 1606 output[18] = X##mo; \ 1607 state[19] = X##mu; \ 1608 output[19] = X##mu; \ 1609 if (laneCount < 22) { \ 1610 state[20] = X##sa; \ 1611 if (laneCount >= 21) { \ 1612 output[20] = X##sa; \ 1613 } \ 1614 state[21] = X##se; \ 1615 state[22] = X##si; \ 1616 } \ 1617 else { \ 1618 state[20] = X##sa; \ 1619 output[20] = X##sa; \ 1620 state[21] = X##se; \ 1621 output[21] = X##se; \ 1622 state[22] = X##si; \ 1623 if (laneCount >= 23) { \ 1624 output[22] = X##si; \ 1625 } \ 1626 } \ 1627 } \ 1628 state[23] = X##so; \ 1629 state[24] = X##su; \ 1630 } \ 1631 else { \ 1632 state[16] = X##me; \ 1633 output[16] = X##me; \ 1634 state[17] = X##mi; \ 1635 output[17] = X##mi; \ 1636 state[18] = X##mo; \ 1637 output[18] = X##mo; \ 1638 state[19] = X##mu; \ 1639 output[19] = X##mu; \ 1640 state[20] = X##sa; \ 1641 output[20] = X##sa; \ 1642 state[21] = X##se; \ 1643 output[21] = X##se; \ 1644 state[22] = X##si; \ 1645 output[22] = X##si; \ 1646 state[23] = X##so; \ 1647 output[23] = X##so; \ 1648 state[24] = X##su; \ 1649 if (laneCount >= 25) { \ 1650 output[24] = X##su; \ 1651 } \ 1652 } \ 1653 } 1654 1655#define output(X, output, laneCount) \ 1656 if (laneCount < 16) { \ 1657 if (laneCount < 8) { \ 1658 if (laneCount < 4) { \ 1659 if (laneCount < 2) { \ 1660 if (laneCount >= 1) { \ 1661 output[ 0] = X##ba; \ 1662 } \ 1663 } \ 1664 else { \ 1665 output[ 0] = X##ba; \ 1666 output[ 1] = X##be; \ 1667 if (laneCount >= 3) { \ 1668 output[ 2] = X##bi; \ 1669 } \ 1670 } \ 1671 } \ 1672 else { \ 1673 output[ 0] = X##ba; \ 1674 output[ 1] = X##be; \ 1675 output[ 2] = X##bi; \ 1676 output[ 3] = X##bo; \ 1677 if (laneCount < 6) { \ 1678 if (laneCount >= 5) { \ 1679 output[ 4] = X##bu; \ 1680 } \ 1681 } \ 1682 else { \ 1683 output[ 4] = X##bu; \ 1684 output[ 5] = X##ga; \ 1685 if (laneCount >= 7) { \ 1686 output[ 6] = X##ge; \ 1687 } \ 1688 } \ 1689 } \ 1690 } \ 1691 else { \ 1692 output[ 0] = X##ba; \ 1693 output[ 1] = X##be; \ 1694 output[ 2] = X##bi; \ 1695 output[ 3] = X##bo; \ 1696 output[ 4] = X##bu; \ 1697 output[ 5] = X##ga; \ 1698 output[ 6] = X##ge; \ 1699 output[ 7] = X##gi; \ 1700 if (laneCount < 12) { \ 1701 if (laneCount < 10) { \ 1702 if (laneCount >= 9) { \ 1703 output[ 8] = X##go; \ 1704 } \ 1705 } \ 1706 else { \ 1707 output[ 8] = X##go; \ 1708 output[ 9] = X##gu; \ 1709 if (laneCount >= 11) { \ 1710 output[10] = X##ka; \ 1711 } \ 1712 } \ 1713 } \ 1714 else { \ 1715 output[ 8] = X##go; \ 1716 output[ 9] = X##gu; \ 1717 output[10] = X##ka; \ 1718 output[11] = X##ke; \ 1719 if (laneCount < 14) { \ 1720 if (laneCount >= 13) { \ 1721 output[12] = X##ki; \ 1722 } \ 1723 } \ 1724 else { \ 1725 output[12] = X##ki; \ 1726 output[13] = X##ko; \ 1727 if (laneCount >= 15) { \ 1728 output[14] = X##ku; \ 1729 } \ 1730 } \ 1731 } \ 1732 } \ 1733 } \ 1734 else { \ 1735 output[ 0] = X##ba; \ 1736 output[ 1] = X##be; \ 1737 output[ 2] = X##bi; \ 1738 output[ 3] = X##bo; \ 1739 output[ 4] = X##bu; \ 1740 output[ 5] = X##ga; \ 1741 output[ 6] = X##ge; \ 1742 output[ 7] = X##gi; \ 1743 output[ 8] = X##go; \ 1744 output[ 9] = X##gu; \ 1745 output[10] = X##ka; \ 1746 output[11] = X##ke; \ 1747 output[12] = X##ki; \ 1748 output[13] = X##ko; \ 1749 output[14] = X##ku; \ 1750 output[15] = X##ma; \ 1751 if (laneCount < 24) { \ 1752 if (laneCount < 20) { \ 1753 if (laneCount < 18) { \ 1754 if (laneCount >= 17) { \ 1755 output[16] = X##me; \ 1756 } \ 1757 } \ 1758 else { \ 1759 output[16] = X##me; \ 1760 output[17] = X##mi; \ 1761 if (laneCount >= 19) { \ 1762 output[18] = X##mo; \ 1763 } \ 1764 } \ 1765 } \ 1766 else { \ 1767 output[16] = X##me; \ 1768 output[17] = X##mi; \ 1769 output[18] = X##mo; \ 1770 output[19] = X##mu; \ 1771 if (laneCount < 22) { \ 1772 if (laneCount >= 21) { \ 1773 output[20] = X##sa; \ 1774 } \ 1775 } \ 1776 else { \ 1777 output[20] = X##sa; \ 1778 output[21] = X##se; \ 1779 if (laneCount >= 23) { \ 1780 output[22] = X##si; \ 1781 } \ 1782 } \ 1783 } \ 1784 } \ 1785 else { \ 1786 output[16] = X##me; \ 1787 output[17] = X##mi; \ 1788 output[18] = X##mo; \ 1789 output[19] = X##mu; \ 1790 output[20] = X##sa; \ 1791 output[21] = X##se; \ 1792 output[22] = X##si; \ 1793 output[23] = X##so; \ 1794 if (laneCount >= 25) { \ 1795 output[24] = X##su; \ 1796 } \ 1797 } \ 1798 } 1799 1800#define wrapOne(X, input, output, index, name) \ 1801 X##name ^= input[index]; \ 1802 output[index] = X##name; 1803 1804#define wrapOneInvert(X, input, output, index, name) \ 1805 X##name ^= input[index]; \ 1806 output[index] = X##name; 1807 1808#define unwrapOne(X, input, output, index, name) \ 1809 output[index] = input[index] ^ X##name; \ 1810 X##name ^= output[index]; 1811 1812#define unwrapOneInvert(X, input, output, index, name) \ 1813 output[index] = input[index] ^ X##name; \ 1814 X##name ^= output[index]; 1815 1816#endif 1817 1818#define wrap(X, input, output, laneCount, trailingBits) \ 1819 if (laneCount < 16) { \ 1820 if (laneCount < 8) { \ 1821 if (laneCount < 4) { \ 1822 if (laneCount < 2) { \ 1823 if (laneCount < 1) { \ 1824 X##ba ^= trailingBits; \ 1825 } \ 1826 else { \ 1827 wrapOne(X, input, output, 0, ba) \ 1828 X##be ^= trailingBits; \ 1829 } \ 1830 } \ 1831 else { \ 1832 wrapOne(X, input, output, 0, ba) \ 1833 wrapOneInvert(X, input, output, 1, be) \ 1834 if (laneCount < 3) { \ 1835 X##bi ^= trailingBits; \ 1836 } \ 1837 else { \ 1838 wrapOneInvert(X, input, output, 2, bi) \ 1839 X##bo ^= trailingBits; \ 1840 } \ 1841 } \ 1842 } \ 1843 else { \ 1844 wrapOne(X, input, output, 0, ba) \ 1845 wrapOneInvert(X, input, output, 1, be) \ 1846 wrapOneInvert(X, input, output, 2, bi) \ 1847 wrapOne(X, input, output, 3, bo) \ 1848 if (laneCount < 6) { \ 1849 if (laneCount < 5) { \ 1850 X##bu ^= trailingBits; \ 1851 } \ 1852 else { \ 1853 wrapOne(X, input, output, 4, bu) \ 1854 X##ga ^= trailingBits; \ 1855 } \ 1856 } \ 1857 else { \ 1858 wrapOne(X, input, output, 4, bu) \ 1859 wrapOne(X, input, output, 5, ga) \ 1860 if (laneCount < 7) { \ 1861 X##ge ^= trailingBits; \ 1862 } \ 1863 else { \ 1864 wrapOne(X, input, output, 6, ge) \ 1865 X##gi ^= trailingBits; \ 1866 } \ 1867 } \ 1868 } \ 1869 } \ 1870 else { \ 1871 wrapOne(X, input, output, 0, ba) \ 1872 wrapOneInvert(X, input, output, 1, be) \ 1873 wrapOneInvert(X, input, output, 2, bi) \ 1874 wrapOne(X, input, output, 3, bo) \ 1875 wrapOne(X, input, output, 4, bu) \ 1876 wrapOne(X, input, output, 5, ga) \ 1877 wrapOne(X, input, output, 6, ge) \ 1878 wrapOne(X, input, output, 7, gi) \ 1879 if (laneCount < 12) { \ 1880 if (laneCount < 10) { \ 1881 if (laneCount < 9) { \ 1882 X##go ^= trailingBits; \ 1883 } \ 1884 else { \ 1885 wrapOneInvert(X, input, output, 8, go) \ 1886 X##gu ^= trailingBits; \ 1887 } \ 1888 } \ 1889 else { \ 1890 wrapOneInvert(X, input, output, 8, go) \ 1891 wrapOne(X, input, output, 9, gu) \ 1892 if (laneCount < 11) { \ 1893 X##ka ^= trailingBits; \ 1894 } \ 1895 else { \ 1896 wrapOne(X, input, output, 10, ka) \ 1897 X##ke ^= trailingBits; \ 1898 } \ 1899 } \ 1900 } \ 1901 else { \ 1902 wrapOneInvert(X, input, output, 8, go) \ 1903 wrapOne(X, input, output, 9, gu) \ 1904 wrapOne(X, input, output, 10, ka) \ 1905 wrapOne(X, input, output, 11, ke) \ 1906 if (laneCount < 14) { \ 1907 if (laneCount < 13) { \ 1908 X##ki ^= trailingBits; \ 1909 } \ 1910 else { \ 1911 wrapOneInvert(X, input, output, 12, ki) \ 1912 X##ko ^= trailingBits; \ 1913 } \ 1914 } \ 1915 else { \ 1916 wrapOneInvert(X, input, output, 12, ki) \ 1917 wrapOne(X, input, output, 13, ko) \ 1918 if (laneCount < 15) { \ 1919 X##ku ^= trailingBits; \ 1920 } \ 1921 else { \ 1922 wrapOne(X, input, output, 14, ku) \ 1923 X##ma ^= trailingBits; \ 1924 } \ 1925 } \ 1926 } \ 1927 } \ 1928 } \ 1929 else { \ 1930 wrapOne(X, input, output, 0, ba) \ 1931 wrapOneInvert(X, input, output, 1, be) \ 1932 wrapOneInvert(X, input, output, 2, bi) \ 1933 wrapOne(X, input, output, 3, bo) \ 1934 wrapOne(X, input, output, 4, bu) \ 1935 wrapOne(X, input, output, 5, ga) \ 1936 wrapOne(X, input, output, 6, ge) \ 1937 wrapOne(X, input, output, 7, gi) \ 1938 wrapOneInvert(X, input, output, 8, go) \ 1939 wrapOne(X, input, output, 9, gu) \ 1940 wrapOne(X, input, output, 10, ka) \ 1941 wrapOne(X, input, output, 11, ke) \ 1942 wrapOneInvert(X, input, output, 12, ki) \ 1943 wrapOne(X, input, output, 13, ko) \ 1944 wrapOne(X, input, output, 14, ku) \ 1945 wrapOne(X, input, output, 15, ma) \ 1946 if (laneCount < 24) { \ 1947 if (laneCount < 20) { \ 1948 if (laneCount < 18) { \ 1949 if (laneCount < 17) { \ 1950 X##me ^= trailingBits; \ 1951 } \ 1952 else { \ 1953 wrapOne(X, input, output, 16, me) \ 1954 X##mi ^= trailingBits; \ 1955 } \ 1956 } \ 1957 else { \ 1958 wrapOne(X, input, output, 16, me) \ 1959 wrapOneInvert(X, input, output, 17, mi) \ 1960 if (laneCount < 19) { \ 1961 X##mo ^= trailingBits; \ 1962 } \ 1963 else { \ 1964 wrapOne(X, input, output, 18, mo) \ 1965 X##mu ^= trailingBits; \ 1966 } \ 1967 } \ 1968 } \ 1969 else { \ 1970 wrapOne(X, input, output, 16, me) \ 1971 wrapOneInvert(X, input, output, 17, mi) \ 1972 wrapOne(X, input, output, 18, mo) \ 1973 wrapOne(X, input, output, 19, mu) \ 1974 if (laneCount < 22) { \ 1975 if (laneCount < 21) { \ 1976 X##sa ^= trailingBits; \ 1977 } \ 1978 else { \ 1979 wrapOneInvert(X, input, output, 20, sa) \ 1980 X##se ^= trailingBits; \ 1981 } \ 1982 } \ 1983 else { \ 1984 wrapOneInvert(X, input, output, 20, sa) \ 1985 wrapOne(X, input, output, 21, se) \ 1986 if (laneCount < 23) { \ 1987 X##si ^= trailingBits; \ 1988 } \ 1989 else { \ 1990 wrapOne(X, input, output, 22, si) \ 1991 X##so ^= trailingBits; \ 1992 } \ 1993 } \ 1994 } \ 1995 } \ 1996 else { \ 1997 wrapOne(X, input, output, 16, me) \ 1998 wrapOneInvert(X, input, output, 17, mi) \ 1999 wrapOne(X, input, output, 18, mo) \ 2000 wrapOne(X, input, output, 19, mu) \ 2001 wrapOneInvert(X, input, output, 20, sa) \ 2002 wrapOne(X, input, output, 21, se) \ 2003 wrapOne(X, input, output, 22, si) \ 2004 wrapOne(X, input, output, 23, so) \ 2005 if (laneCount < 25) { \ 2006 X##su ^= trailingBits; \ 2007 } \ 2008 else { \ 2009 wrapOne(X, input, output, 24, su) \ 2010 } \ 2011 } \ 2012 } 2013 2014#define unwrap(X, input, output, laneCount, trailingBits) \ 2015 if (laneCount < 16) { \ 2016 if (laneCount < 8) { \ 2017 if (laneCount < 4) { \ 2018 if (laneCount < 2) { \ 2019 if (laneCount < 1) { \ 2020 X##ba ^= trailingBits; \ 2021 } \ 2022 else { \ 2023 unwrapOne(X, input, output, 0, ba) \ 2024 X##be ^= trailingBits; \ 2025 } \ 2026 } \ 2027 else { \ 2028 unwrapOne(X, input, output, 0, ba) \ 2029 unwrapOneInvert(X, input, output, 1, be) \ 2030 if (laneCount < 3) { \ 2031 X##bi ^= trailingBits; \ 2032 } \ 2033 else { \ 2034 unwrapOneInvert(X, input, output, 2, bi) \ 2035 X##bo ^= trailingBits; \ 2036 } \ 2037 } \ 2038 } \ 2039 else { \ 2040 unwrapOne(X, input, output, 0, ba) \ 2041 unwrapOneInvert(X, input, output, 1, be) \ 2042 unwrapOneInvert(X, input, output, 2, bi) \ 2043 unwrapOne(X, input, output, 3, bo) \ 2044 if (laneCount < 6) { \ 2045 if (laneCount < 5) { \ 2046 X##bu ^= trailingBits; \ 2047 } \ 2048 else { \ 2049 unwrapOne(X, input, output, 4, bu) \ 2050 X##ga ^= trailingBits; \ 2051 } \ 2052 } \ 2053 else { \ 2054 unwrapOne(X, input, output, 4, bu) \ 2055 unwrapOne(X, input, output, 5, ga) \ 2056 if (laneCount < 7) { \ 2057 X##ge ^= trailingBits; \ 2058 } \ 2059 else { \ 2060 unwrapOne(X, input, output, 6, ge) \ 2061 X##gi ^= trailingBits; \ 2062 } \ 2063 } \ 2064 } \ 2065 } \ 2066 else { \ 2067 unwrapOne(X, input, output, 0, ba) \ 2068 unwrapOneInvert(X, input, output, 1, be) \ 2069 unwrapOneInvert(X, input, output, 2, bi) \ 2070 unwrapOne(X, input, output, 3, bo) \ 2071 unwrapOne(X, input, output, 4, bu) \ 2072 unwrapOne(X, input, output, 5, ga) \ 2073 unwrapOne(X, input, output, 6, ge) \ 2074 unwrapOne(X, input, output, 7, gi) \ 2075 if (laneCount < 12) { \ 2076 if (laneCount < 10) { \ 2077 if (laneCount < 9) { \ 2078 X##go ^= trailingBits; \ 2079 } \ 2080 else { \ 2081 unwrapOneInvert(X, input, output, 8, go) \ 2082 X##gu ^= trailingBits; \ 2083 } \ 2084 } \ 2085 else { \ 2086 unwrapOneInvert(X, input, output, 8, go) \ 2087 unwrapOne(X, input, output, 9, gu) \ 2088 if (laneCount < 11) { \ 2089 X##ka ^= trailingBits; \ 2090 } \ 2091 else { \ 2092 unwrapOne(X, input, output, 10, ka) \ 2093 X##ke ^= trailingBits; \ 2094 } \ 2095 } \ 2096 } \ 2097 else { \ 2098 unwrapOneInvert(X, input, output, 8, go) \ 2099 unwrapOne(X, input, output, 9, gu) \ 2100 unwrapOne(X, input, output, 10, ka) \ 2101 unwrapOne(X, input, output, 11, ke) \ 2102 if (laneCount < 14) { \ 2103 if (laneCount < 13) { \ 2104 X##ki ^= trailingBits; \ 2105 } \ 2106 else { \ 2107 unwrapOneInvert(X, input, output, 12, ki) \ 2108 X##ko ^= trailingBits; \ 2109 } \ 2110 } \ 2111 else { \ 2112 unwrapOneInvert(X, input, output, 12, ki) \ 2113 unwrapOne(X, input, output, 13, ko) \ 2114 if (laneCount < 15) { \ 2115 X##ku ^= trailingBits; \ 2116 } \ 2117 else { \ 2118 unwrapOne(X, input, output, 14, ku) \ 2119 X##ma ^= trailingBits; \ 2120 } \ 2121 } \ 2122 } \ 2123 } \ 2124 } \ 2125 else { \ 2126 unwrapOne(X, input, output, 0, ba) \ 2127 unwrapOneInvert(X, input, output, 1, be) \ 2128 unwrapOneInvert(X, input, output, 2, bi) \ 2129 unwrapOne(X, input, output, 3, bo) \ 2130 unwrapOne(X, input, output, 4, bu) \ 2131 unwrapOne(X, input, output, 5, ga) \ 2132 unwrapOne(X, input, output, 6, ge) \ 2133 unwrapOne(X, input, output, 7, gi) \ 2134 unwrapOneInvert(X, input, output, 8, go) \ 2135 unwrapOne(X, input, output, 9, gu) \ 2136 unwrapOne(X, input, output, 10, ka) \ 2137 unwrapOne(X, input, output, 11, ke) \ 2138 unwrapOneInvert(X, input, output, 12, ki) \ 2139 unwrapOne(X, input, output, 13, ko) \ 2140 unwrapOne(X, input, output, 14, ku) \ 2141 unwrapOne(X, input, output, 15, ma) \ 2142 if (laneCount < 24) { \ 2143 if (laneCount < 20) { \ 2144 if (laneCount < 18) { \ 2145 if (laneCount < 17) { \ 2146 X##me ^= trailingBits; \ 2147 } \ 2148 else { \ 2149 unwrapOne(X, input, output, 16, me) \ 2150 X##mi ^= trailingBits; \ 2151 } \ 2152 } \ 2153 else { \ 2154 unwrapOne(X, input, output, 16, me) \ 2155 unwrapOneInvert(X, input, output, 17, mi) \ 2156 if (laneCount < 19) { \ 2157 X##mo ^= trailingBits; \ 2158 } \ 2159 else { \ 2160 unwrapOne(X, input, output, 18, mo) \ 2161 X##mu ^= trailingBits; \ 2162 } \ 2163 } \ 2164 } \ 2165 else { \ 2166 unwrapOne(X, input, output, 16, me) \ 2167 unwrapOneInvert(X, input, output, 17, mi) \ 2168 unwrapOne(X, input, output, 18, mo) \ 2169 unwrapOne(X, input, output, 19, mu) \ 2170 if (laneCount < 22) { \ 2171 if (laneCount < 21) { \ 2172 X##sa ^= trailingBits; \ 2173 } \ 2174 else { \ 2175 unwrapOneInvert(X, input, output, 20, sa) \ 2176 X##se ^= trailingBits; \ 2177 } \ 2178 } \ 2179 else { \ 2180 unwrapOneInvert(X, input, output, 20, sa) \ 2181 unwrapOne(X, input, output, 21, se) \ 2182 if (laneCount < 23) { \ 2183 X##si ^= trailingBits; \ 2184 } \ 2185 else { \ 2186 unwrapOne(X, input, output, 22, si) \ 2187 X##so ^= trailingBits; \ 2188 } \ 2189 } \ 2190 } \ 2191 } \ 2192 else { \ 2193 unwrapOne(X, input, output, 16, me) \ 2194 unwrapOneInvert(X, input, output, 17, mi) \ 2195 unwrapOne(X, input, output, 18, mo) \ 2196 unwrapOne(X, input, output, 19, mu) \ 2197 unwrapOneInvert(X, input, output, 20, sa) \ 2198 unwrapOne(X, input, output, 21, se) \ 2199 unwrapOne(X, input, output, 22, si) \ 2200 unwrapOne(X, input, output, 23, so) \ 2201 if (laneCount < 25) { \ 2202 X##su ^= trailingBits; \ 2203 } \ 2204 else { \ 2205 unwrapOne(X, input, output, 24, su) \ 2206 } \ 2207 } \ 2208 } 2209