1#include "arm_arch.h" 2 3.text 4 5#if defined(__thumb2__) 6.syntax unified 7.thumb 8#else 9.code 32 10#endif 11 12.type iotas32, %object 13.align 5 14iotas32: 15 .long 0x00000001, 0x00000000 16 .long 0x00000000, 0x00000089 17 .long 0x00000000, 0x8000008b 18 .long 0x00000000, 0x80008080 19 .long 0x00000001, 0x0000008b 20 .long 0x00000001, 0x00008000 21 .long 0x00000001, 0x80008088 22 .long 0x00000001, 0x80000082 23 .long 0x00000000, 0x0000000b 24 .long 0x00000000, 0x0000000a 25 .long 0x00000001, 0x00008082 26 .long 0x00000000, 0x00008003 27 .long 0x00000001, 0x0000808b 28 .long 0x00000001, 0x8000000b 29 .long 0x00000001, 0x8000008a 30 .long 0x00000001, 0x80000081 31 .long 0x00000000, 0x80000081 32 .long 0x00000000, 0x80000008 33 .long 0x00000000, 0x00000083 34 .long 0x00000000, 0x80008003 35 .long 0x00000001, 0x80008088 36 .long 0x00000000, 0x80000088 37 .long 0x00000001, 0x00008000 38 .long 0x00000000, 0x80008082 39.size iotas32,.-iotas32 40 41.type KeccakF1600_int, %function 42.align 5 43KeccakF1600_int: 44 add r9,sp,#176 45 add r12,sp,#0 46 add r10,sp,#40 47 ldmia r9,{r4-r9} @ A[4][2..4] 48KeccakF1600_enter: 49 str lr,[sp,#440] 50 eor r11,r11,r11 51 str r11,[sp,#444] 52 b .Lround2x 53 54.align 4 55.Lround2x: 56 ldmia r12,{r0-r3} @ A[0][0..1] 57 ldmia r10,{r10-r12,r14} @ A[1][0..1] 58#ifdef __thumb2__ 59 eor r0,r0,r10 60 eor r1,r1,r11 61 eor r2,r2,r12 62 ldrd r10,r11,[sp,#56] 63 eor r3,r3,r14 64 ldrd r12,r14,[sp,#64] 65 eor r4,r4,r10 66 eor r5,r5,r11 67 eor r6,r6,r12 68 ldrd r10,r11,[sp,#72] 69 eor r7,r7,r14 70 ldrd r12,r14,[sp,#80] 71 eor r8,r8,r10 72 eor r9,r9,r11 73 eor r0,r0,r12 74 ldrd r10,r11,[sp,#88] 75 eor r1,r1,r14 76 ldrd r12,r14,[sp,#96] 77 eor r2,r2,r10 78 eor r3,r3,r11 79 eor r4,r4,r12 80 ldrd r10,r11,[sp,#104] 81 eor r5,r5,r14 82 ldrd r12,r14,[sp,#112] 83 eor r6,r6,r10 84 eor r7,r7,r11 85 eor r8,r8,r12 86 ldrd r10,r11,[sp,#120] 87 eor r9,r9,r14 88 ldrd r12,r14,[sp,#128] 89 eor r0,r0,r10 90 eor r1,r1,r11 91 eor r2,r2,r12 92 ldrd r10,r11,[sp,#136] 93 eor r3,r3,r14 94 ldrd r12,r14,[sp,#144] 95 eor r4,r4,r10 96 eor r5,r5,r11 97 eor r6,r6,r12 98 ldrd r10,r11,[sp,#152] 99 eor r7,r7,r14 100 ldrd r12,r14,[sp,#160] 101 eor r8,r8,r10 102 eor r9,r9,r11 103 eor r0,r0,r12 104 ldrd r10,r11,[sp,#168] 105 eor r1,r1,r14 106 ldrd r12,r14,[sp,#16] 107 eor r2,r2,r10 108 eor r3,r3,r11 109 eor r4,r4,r12 110 ldrd r10,r11,[sp,#24] 111 eor r5,r5,r14 112 ldrd r12,r14,[sp,#32] 113#else 114 eor r0,r0,r10 115 add r10,sp,#56 116 eor r1,r1,r11 117 eor r2,r2,r12 118 eor r3,r3,r14 119 ldmia r10,{r10-r12,r14} @ A[1][2..3] 120 eor r4,r4,r10 121 add r10,sp,#72 122 eor r5,r5,r11 123 eor r6,r6,r12 124 eor r7,r7,r14 125 ldmia r10,{r10-r12,r14} @ A[1][4]..A[2][0] 126 eor r8,r8,r10 127 add r10,sp,#88 128 eor r9,r9,r11 129 eor r0,r0,r12 130 eor r1,r1,r14 131 ldmia r10,{r10-r12,r14} @ A[2][1..2] 132 eor r2,r2,r10 133 add r10,sp,#104 134 eor r3,r3,r11 135 eor r4,r4,r12 136 eor r5,r5,r14 137 ldmia r10,{r10-r12,r14} @ A[2][3..4] 138 eor r6,r6,r10 139 add r10,sp,#120 140 eor r7,r7,r11 141 eor r8,r8,r12 142 eor r9,r9,r14 143 ldmia r10,{r10-r12,r14} @ A[3][0..1] 144 eor r0,r0,r10 145 add r10,sp,#136 146 eor r1,r1,r11 147 eor r2,r2,r12 148 eor r3,r3,r14 149 ldmia r10,{r10-r12,r14} @ A[3][2..3] 150 eor r4,r4,r10 151 add r10,sp,#152 152 eor r5,r5,r11 153 eor r6,r6,r12 154 eor r7,r7,r14 155 ldmia r10,{r10-r12,r14} @ A[3][4]..A[4][0] 156 eor r8,r8,r10 157 ldr r10,[sp,#168] @ A[4][1] 158 eor r9,r9,r11 159 ldr r11,[sp,#168+4] 160 eor r0,r0,r12 161 ldr r12,[sp,#16] @ A[0][2] 162 eor r1,r1,r14 163 ldr r14,[sp,#16+4] 164 eor r2,r2,r10 165 add r10,sp,#24 166 eor r3,r3,r11 167 eor r4,r4,r12 168 eor r5,r5,r14 169 ldmia r10,{r10-r12,r14} @ A[0][3..4] 170#endif 171 eor r6,r6,r10 172 eor r7,r7,r11 173 eor r8,r8,r12 174 eor r9,r9,r14 175 176 eor r10,r0,r5,ror#32-1 @ E[0] = ROL64(C[2], 1) ^ C[0]; 177#ifndef __thumb2__ 178 str r10,[sp,#208] @ D[1] = E[0] 179#endif 180 eor r11,r1,r4 181#ifndef __thumb2__ 182 str r11,[sp,#208+4] 183#else 184 strd r10,r11,[sp,#208] @ D[1] = E[0] 185#endif 186 eor r12,r6,r1,ror#32-1 @ E[1] = ROL64(C[0], 1) ^ C[3]; 187 eor r14,r7,r0 188#ifndef __thumb2__ 189 str r12,[sp,#232] @ D[4] = E[1] 190#endif 191 eor r0,r8,r3,ror#32-1 @ C[0] = ROL64(C[1], 1) ^ C[4]; 192#ifndef __thumb2__ 193 str r14,[sp,#232+4] 194#else 195 strd r12,r14,[sp,#232] @ D[4] = E[1] 196#endif 197 eor r1,r9,r2 198#ifndef __thumb2__ 199 str r0,[sp,#200] @ D[0] = C[0] 200#endif 201 eor r2,r2,r7,ror#32-1 @ C[1] = ROL64(C[3], 1) ^ C[1]; 202#ifndef __thumb2__ 203 ldr r7,[sp,#144] 204#endif 205 eor r3,r3,r6 206#ifndef __thumb2__ 207 str r1,[sp,#200+4] 208#else 209 strd r0,r1,[sp,#200] @ D[0] = C[0] 210#endif 211#ifndef __thumb2__ 212 ldr r6,[sp,#144+4] 213#else 214 ldrd r7,r6,[sp,#144] 215#endif 216#ifndef __thumb2__ 217 str r2,[sp,#216] @ D[2] = C[1] 218#endif 219 eor r4,r4,r9,ror#32-1 @ C[2] = ROL64(C[4], 1) ^ C[2]; 220#ifndef __thumb2__ 221 str r3,[sp,#216+4] 222#else 223 strd r2,r3,[sp,#216] @ D[2] = C[1] 224#endif 225 eor r5,r5,r8 226 227#ifndef __thumb2__ 228 ldr r8,[sp,#192] 229#endif 230#ifndef __thumb2__ 231 ldr r9,[sp,#192+4] 232#else 233 ldrd r8,r9,[sp,#192] 234#endif 235#ifndef __thumb2__ 236 str r4,[sp,#224] @ D[3] = C[2] 237#endif 238 eor r7,r7,r4 239#ifndef __thumb2__ 240 str r5,[sp,#224+4] 241#else 242 strd r4,r5,[sp,#224] @ D[3] = C[2] 243#endif 244 eor r6,r6,r5 245#ifndef __thumb2__ 246 ldr r4,[sp,#0] 247#endif 248 @ mov r7,r7,ror#32-10 @ C[3] = ROL64(A[3][3] ^ C[2], rhotates[3][3]); /* D[3] */ 249 @ mov r6,r6,ror#32-11 250#ifndef __thumb2__ 251 ldr r5,[sp,#0+4] 252#else 253 ldrd r4,r5,[sp,#0] 254#endif 255 eor r8,r8,r12 256 eor r9,r9,r14 257#ifndef __thumb2__ 258 ldr r12,[sp,#96] 259#endif 260 eor r0,r0,r4 261#ifndef __thumb2__ 262 ldr r14,[sp,#96+4] 263#else 264 ldrd r12,r14,[sp,#96] 265#endif 266 @ mov r8,r8,ror#32-7 @ C[4] = ROL64(A[4][4] ^ E[1], rhotates[4][4]); /* D[4] */ 267 @ mov r9,r9,ror#32-7 268 eor r1,r1,r5 @ C[0] = A[0][0] ^ C[0]; /* rotate by 0 */ /* D[0] */ 269 eor r12,r12,r2 270#ifndef __thumb2__ 271 ldr r2,[sp,#48] 272#endif 273 eor r14,r14,r3 274#ifndef __thumb2__ 275 ldr r3,[sp,#48+4] 276#else 277 ldrd r2,r3,[sp,#48] 278#endif 279 mov r5,r12,ror#32-21 @ C[2] = ROL64(A[2][2] ^ C[1], rhotates[2][2]); /* D[2] */ 280 ldr r12,[sp,#444] @ load counter 281 eor r2,r2,r10 282 adr r10,iotas32 283 mov r4,r14,ror#32-22 284 add r14,r10,r12 285 eor r3,r3,r11 286 ldmia r14,{r10,r11} @ iotas[i] 287 bic r12,r4,r2,ror#32-22 288 bic r14,r5,r3,ror#32-22 289 mov r2,r2,ror#32-22 @ C[1] = ROL64(A[1][1] ^ E[0], rhotates[1][1]); /* D[1] */ 290 mov r3,r3,ror#32-22 291 eor r12,r12,r0 292 eor r14,r14,r1 293 eor r10,r10,r12 294 eor r11,r11,r14 295#ifndef __thumb2__ 296 str r10,[sp,#240] @ R[0][0] = C[0] ^ (~C[1] & C[2]) ^ iotas[i]; 297#endif 298 bic r12,r6,r4,ror#11 299#ifndef __thumb2__ 300 str r11,[sp,#240+4] 301#else 302 strd r10,r11,[sp,#240] @ R[0][0] = C[0] ^ (~C[1] & C[2]) ^ iotas[i]; 303#endif 304 bic r14,r7,r5,ror#10 305 bic r10,r8,r6,ror#32-(11-7) 306 bic r11,r9,r7,ror#32-(10-7) 307 eor r12,r2,r12,ror#32-11 308#ifndef __thumb2__ 309 str r12,[sp,#248] @ R[0][1] = C[1] ^ (~C[2] & C[3]); 310#endif 311 eor r14,r3,r14,ror#32-10 312#ifndef __thumb2__ 313 str r14,[sp,#248+4] 314#else 315 strd r12,r14,[sp,#248] @ R[0][1] = C[1] ^ (~C[2] & C[3]); 316#endif 317 eor r10,r4,r10,ror#32-7 318 eor r11,r5,r11,ror#32-7 319#ifndef __thumb2__ 320 str r10,[sp,#256] @ R[0][2] = C[2] ^ (~C[3] & C[4]); 321#endif 322 bic r12,r0,r8,ror#32-7 323#ifndef __thumb2__ 324 str r11,[sp,#256+4] 325#else 326 strd r10,r11,[sp,#256] @ R[0][2] = C[2] ^ (~C[3] & C[4]); 327#endif 328 bic r14,r1,r9,ror#32-7 329 eor r12,r12,r6,ror#32-11 330#ifndef __thumb2__ 331 str r12,[sp,#264] @ R[0][3] = C[3] ^ (~C[4] & C[0]); 332#endif 333 eor r14,r14,r7,ror#32-10 334#ifndef __thumb2__ 335 str r14,[sp,#264+4] 336#else 337 strd r12,r14,[sp,#264] @ R[0][3] = C[3] ^ (~C[4] & C[0]); 338#endif 339 bic r10,r2,r0 340 add r14,sp,#224 341#ifndef __thumb2__ 342 ldr r0,[sp,#24] @ A[0][3] 343#endif 344 bic r11,r3,r1 345#ifndef __thumb2__ 346 ldr r1,[sp,#24+4] 347#else 348 ldrd r0,r1,[sp,#24] @ A[0][3] 349#endif 350 eor r10,r10,r8,ror#32-7 351 eor r11,r11,r9,ror#32-7 352#ifndef __thumb2__ 353 str r10,[sp,#272] @ R[0][4] = C[4] ^ (~C[0] & C[1]); 354#endif 355 add r9,sp,#200 356#ifndef __thumb2__ 357 str r11,[sp,#272+4] 358#else 359 strd r10,r11,[sp,#272] @ R[0][4] = C[4] ^ (~C[0] & C[1]); 360#endif 361 362 ldmia r14,{r10-r12,r14} @ D[3..4] 363 ldmia r9,{r6-r9} @ D[0..1] 364 365#ifndef __thumb2__ 366 ldr r2,[sp,#72] @ A[1][4] 367#endif 368 eor r0,r0,r10 369#ifndef __thumb2__ 370 ldr r3,[sp,#72+4] 371#else 372 ldrd r2,r3,[sp,#72] @ A[1][4] 373#endif 374 eor r1,r1,r11 375 @ mov r0,r0,ror#32-14 @ C[0] = ROL64(A[0][3] ^ D[3], rhotates[0][3]); 376#ifndef __thumb2__ 377 ldr r10,[sp,#128] @ A[3][1] 378#endif 379 @ mov r1,r1,ror#32-14 380#ifndef __thumb2__ 381 ldr r11,[sp,#128+4] 382#else 383 ldrd r10,r11,[sp,#128] @ A[3][1] 384#endif 385 386 eor r2,r2,r12 387#ifndef __thumb2__ 388 ldr r4,[sp,#80] @ A[2][0] 389#endif 390 eor r3,r3,r14 391#ifndef __thumb2__ 392 ldr r5,[sp,#80+4] 393#else 394 ldrd r4,r5,[sp,#80] @ A[2][0] 395#endif 396 @ mov r2,r2,ror#32-10 @ C[1] = ROL64(A[1][4] ^ D[4], rhotates[1][4]); 397 @ mov r3,r3,ror#32-10 398 399 eor r6,r6,r4 400#ifndef __thumb2__ 401 ldr r12,[sp,#216] @ D[2] 402#endif 403 eor r7,r7,r5 404#ifndef __thumb2__ 405 ldr r14,[sp,#216+4] 406#else 407 ldrd r12,r14,[sp,#216] @ D[2] 408#endif 409 mov r5,r6,ror#32-1 @ C[2] = ROL64(A[2][0] ^ D[0], rhotates[2][0]); 410 mov r4,r7,ror#32-2 411 412 eor r10,r10,r8 413#ifndef __thumb2__ 414 ldr r8,[sp,#176] @ A[4][2] 415#endif 416 eor r11,r11,r9 417#ifndef __thumb2__ 418 ldr r9,[sp,#176+4] 419#else 420 ldrd r8,r9,[sp,#176] @ A[4][2] 421#endif 422 mov r7,r10,ror#32-22 @ C[3] = ROL64(A[3][1] ^ D[1], rhotates[3][1]); 423 mov r6,r11,ror#32-23 424 425 bic r10,r4,r2,ror#32-10 426 bic r11,r5,r3,ror#32-10 427 eor r12,r12,r8 428 eor r14,r14,r9 429 mov r9,r12,ror#32-30 @ C[4] = ROL64(A[4][2] ^ D[2], rhotates[4][2]); 430 mov r8,r14,ror#32-31 431 eor r10,r10,r0,ror#32-14 432 eor r11,r11,r1,ror#32-14 433#ifndef __thumb2__ 434 str r10,[sp,#280] @ R[1][0] = C[0] ^ (~C[1] & C[2]) 435#endif 436 bic r12,r6,r4 437#ifndef __thumb2__ 438 str r11,[sp,#280+4] 439#else 440 strd r10,r11,[sp,#280] @ R[1][0] = C[0] ^ (~C[1] & C[2]) 441#endif 442 bic r14,r7,r5 443 eor r12,r12,r2,ror#32-10 444#ifndef __thumb2__ 445 str r12,[sp,#288] @ R[1][1] = C[1] ^ (~C[2] & C[3]); 446#endif 447 eor r14,r14,r3,ror#32-10 448#ifndef __thumb2__ 449 str r14,[sp,#288+4] 450#else 451 strd r12,r14,[sp,#288] @ R[1][1] = C[1] ^ (~C[2] & C[3]); 452#endif 453 bic r10,r8,r6 454 bic r11,r9,r7 455 bic r12,r0,r8,ror#14 456 bic r14,r1,r9,ror#14 457 eor r10,r10,r4 458 eor r11,r11,r5 459#ifndef __thumb2__ 460 str r10,[sp,#296] @ R[1][2] = C[2] ^ (~C[3] & C[4]); 461#endif 462 bic r2,r2,r0,ror#32-(14-10) 463#ifndef __thumb2__ 464 str r11,[sp,#296+4] 465#else 466 strd r10,r11,[sp,#296] @ R[1][2] = C[2] ^ (~C[3] & C[4]); 467#endif 468 eor r12,r6,r12,ror#32-14 469 bic r11,r3,r1,ror#32-(14-10) 470#ifndef __thumb2__ 471 str r12,[sp,#304] @ R[1][3] = C[3] ^ (~C[4] & C[0]); 472#endif 473 eor r14,r7,r14,ror#32-14 474#ifndef __thumb2__ 475 str r14,[sp,#304+4] 476#else 477 strd r12,r14,[sp,#304] @ R[1][3] = C[3] ^ (~C[4] & C[0]); 478#endif 479 add r12,sp,#208 480#ifndef __thumb2__ 481 ldr r1,[sp,#8] @ A[0][1] 482#endif 483 eor r10,r8,r2,ror#32-10 484#ifndef __thumb2__ 485 ldr r0,[sp,#8+4] 486#else 487 ldrd r1,r0,[sp,#8] @ A[0][1] 488#endif 489 eor r11,r9,r11,ror#32-10 490#ifndef __thumb2__ 491 str r10,[sp,#312] @ R[1][4] = C[4] ^ (~C[0] & C[1]); 492#endif 493#ifndef __thumb2__ 494 str r11,[sp,#312+4] 495#else 496 strd r10,r11,[sp,#312] @ R[1][4] = C[4] ^ (~C[0] & C[1]); 497#endif 498 499 add r9,sp,#224 500 ldmia r12,{r10-r12,r14} @ D[1..2] 501#ifndef __thumb2__ 502 ldr r2,[sp,#56] @ A[1][2] 503#endif 504#ifndef __thumb2__ 505 ldr r3,[sp,#56+4] 506#else 507 ldrd r2,r3,[sp,#56] @ A[1][2] 508#endif 509 ldmia r9,{r6-r9} @ D[3..4] 510 511 eor r1,r1,r10 512#ifndef __thumb2__ 513 ldr r4,[sp,#104] @ A[2][3] 514#endif 515 eor r0,r0,r11 516#ifndef __thumb2__ 517 ldr r5,[sp,#104+4] 518#else 519 ldrd r4,r5,[sp,#104] @ A[2][3] 520#endif 521 mov r0,r0,ror#32-1 @ C[0] = ROL64(A[0][1] ^ D[1], rhotates[0][1]); 522 523 eor r2,r2,r12 524#ifndef __thumb2__ 525 ldr r10,[sp,#152] @ A[3][4] 526#endif 527 eor r3,r3,r14 528#ifndef __thumb2__ 529 ldr r11,[sp,#152+4] 530#else 531 ldrd r10,r11,[sp,#152] @ A[3][4] 532#endif 533 @ mov r2,r2,ror#32-3 @ C[1] = ROL64(A[1][2] ^ D[2], rhotates[1][2]); 534#ifndef __thumb2__ 535 ldr r12,[sp,#200] @ D[0] 536#endif 537 @ mov r3,r3,ror#32-3 538#ifndef __thumb2__ 539 ldr r14,[sp,#200+4] 540#else 541 ldrd r12,r14,[sp,#200] @ D[0] 542#endif 543 544 eor r4,r4,r6 545 eor r5,r5,r7 546 @ mov r5,r6,ror#32-12 @ C[2] = ROL64(A[2][3] ^ D[3], rhotates[2][3]); 547 @ mov r4,r7,ror#32-13 @ [track reverse order below] 548 549 eor r10,r10,r8 550#ifndef __thumb2__ 551 ldr r8,[sp,#160] @ A[4][0] 552#endif 553 eor r11,r11,r9 554#ifndef __thumb2__ 555 ldr r9,[sp,#160+4] 556#else 557 ldrd r8,r9,[sp,#160] @ A[4][0] 558#endif 559 mov r6,r10,ror#32-4 @ C[3] = ROL64(A[3][4] ^ D[4], rhotates[3][4]); 560 mov r7,r11,ror#32-4 561 562 eor r12,r12,r8 563 eor r14,r14,r9 564 mov r8,r12,ror#32-9 @ C[4] = ROL64(A[4][0] ^ D[0], rhotates[4][0]); 565 mov r9,r14,ror#32-9 566 567 bic r10,r5,r2,ror#13-3 568 bic r11,r4,r3,ror#12-3 569 bic r12,r6,r5,ror#32-13 570 bic r14,r7,r4,ror#32-12 571 eor r10,r0,r10,ror#32-13 572 eor r11,r1,r11,ror#32-12 573#ifndef __thumb2__ 574 str r10,[sp,#320] @ R[2][0] = C[0] ^ (~C[1] & C[2]) 575#endif 576 eor r12,r12,r2,ror#32-3 577#ifndef __thumb2__ 578 str r11,[sp,#320+4] 579#else 580 strd r10,r11,[sp,#320] @ R[2][0] = C[0] ^ (~C[1] & C[2]) 581#endif 582 eor r14,r14,r3,ror#32-3 583#ifndef __thumb2__ 584 str r12,[sp,#328] @ R[2][1] = C[1] ^ (~C[2] & C[3]); 585#endif 586 bic r10,r8,r6 587 bic r11,r9,r7 588#ifndef __thumb2__ 589 str r14,[sp,#328+4] 590#else 591 strd r12,r14,[sp,#328] @ R[2][1] = C[1] ^ (~C[2] & C[3]); 592#endif 593 eor r10,r10,r5,ror#32-13 594 eor r11,r11,r4,ror#32-12 595#ifndef __thumb2__ 596 str r10,[sp,#336] @ R[2][2] = C[2] ^ (~C[3] & C[4]); 597#endif 598 bic r12,r0,r8 599#ifndef __thumb2__ 600 str r11,[sp,#336+4] 601#else 602 strd r10,r11,[sp,#336] @ R[2][2] = C[2] ^ (~C[3] & C[4]); 603#endif 604 bic r14,r1,r9 605 eor r12,r12,r6 606 eor r14,r14,r7 607#ifndef __thumb2__ 608 str r12,[sp,#344] @ R[2][3] = C[3] ^ (~C[4] & C[0]); 609#endif 610 bic r10,r2,r0,ror#3 611#ifndef __thumb2__ 612 str r14,[sp,#344+4] 613#else 614 strd r12,r14,[sp,#344] @ R[2][3] = C[3] ^ (~C[4] & C[0]); 615#endif 616 bic r11,r3,r1,ror#3 617#ifndef __thumb2__ 618 ldr r1,[sp,#32] @ A[0][4] [in reverse order] 619#endif 620 eor r10,r8,r10,ror#32-3 621#ifndef __thumb2__ 622 ldr r0,[sp,#32+4] 623#else 624 ldrd r1,r0,[sp,#32] @ A[0][4] [in reverse order] 625#endif 626 eor r11,r9,r11,ror#32-3 627#ifndef __thumb2__ 628 str r10,[sp,#352] @ R[2][4] = C[4] ^ (~C[0] & C[1]); 629#endif 630 add r9,sp,#208 631#ifndef __thumb2__ 632 str r11,[sp,#352+4] 633#else 634 strd r10,r11,[sp,#352] @ R[2][4] = C[4] ^ (~C[0] & C[1]); 635#endif 636 637#ifndef __thumb2__ 638 ldr r10,[sp,#232] @ D[4] 639#endif 640#ifndef __thumb2__ 641 ldr r11,[sp,#232+4] 642#else 643 ldrd r10,r11,[sp,#232] @ D[4] 644#endif 645#ifndef __thumb2__ 646 ldr r12,[sp,#200] @ D[0] 647#endif 648#ifndef __thumb2__ 649 ldr r14,[sp,#200+4] 650#else 651 ldrd r12,r14,[sp,#200] @ D[0] 652#endif 653 654 ldmia r9,{r6-r9} @ D[1..2] 655 656 eor r1,r1,r10 657#ifndef __thumb2__ 658 ldr r2,[sp,#40] @ A[1][0] 659#endif 660 eor r0,r0,r11 661#ifndef __thumb2__ 662 ldr r3,[sp,#40+4] 663#else 664 ldrd r2,r3,[sp,#40] @ A[1][0] 665#endif 666 @ mov r1,r10,ror#32-13 @ C[0] = ROL64(A[0][4] ^ D[4], rhotates[0][4]); 667#ifndef __thumb2__ 668 ldr r4,[sp,#88] @ A[2][1] 669#endif 670 @ mov r0,r11,ror#32-14 @ [was loaded in reverse order] 671#ifndef __thumb2__ 672 ldr r5,[sp,#88+4] 673#else 674 ldrd r4,r5,[sp,#88] @ A[2][1] 675#endif 676 677 eor r2,r2,r12 678#ifndef __thumb2__ 679 ldr r10,[sp,#136] @ A[3][2] 680#endif 681 eor r3,r3,r14 682#ifndef __thumb2__ 683 ldr r11,[sp,#136+4] 684#else 685 ldrd r10,r11,[sp,#136] @ A[3][2] 686#endif 687 @ mov r2,r2,ror#32-18 @ C[1] = ROL64(A[1][0] ^ D[0], rhotates[1][0]); 688#ifndef __thumb2__ 689 ldr r12,[sp,#224] @ D[3] 690#endif 691 @ mov r3,r3,ror#32-18 692#ifndef __thumb2__ 693 ldr r14,[sp,#224+4] 694#else 695 ldrd r12,r14,[sp,#224] @ D[3] 696#endif 697 698 eor r6,r6,r4 699 eor r7,r7,r5 700 mov r4,r6,ror#32-5 @ C[2] = ROL64(A[2][1] ^ D[1], rhotates[2][1]); 701 mov r5,r7,ror#32-5 702 703 eor r10,r10,r8 704#ifndef __thumb2__ 705 ldr r8,[sp,#184] @ A[4][3] 706#endif 707 eor r11,r11,r9 708#ifndef __thumb2__ 709 ldr r9,[sp,#184+4] 710#else 711 ldrd r8,r9,[sp,#184] @ A[4][3] 712#endif 713 mov r7,r10,ror#32-7 @ C[3] = ROL64(A[3][2] ^ D[2], rhotates[3][2]); 714 mov r6,r11,ror#32-8 715 716 eor r12,r12,r8 717 eor r14,r14,r9 718 mov r8,r12,ror#32-28 @ C[4] = ROL64(A[4][3] ^ D[3], rhotates[4][3]); 719 mov r9,r14,ror#32-28 720 721 bic r10,r4,r2,ror#32-18 722 bic r11,r5,r3,ror#32-18 723 eor r10,r10,r0,ror#32-14 724 eor r11,r11,r1,ror#32-13 725#ifndef __thumb2__ 726 str r10,[sp,#360] @ R[3][0] = C[0] ^ (~C[1] & C[2]) 727#endif 728 bic r12,r6,r4 729#ifndef __thumb2__ 730 str r11,[sp,#360+4] 731#else 732 strd r10,r11,[sp,#360] @ R[3][0] = C[0] ^ (~C[1] & C[2]) 733#endif 734 bic r14,r7,r5 735 eor r12,r12,r2,ror#32-18 736#ifndef __thumb2__ 737 str r12,[sp,#368] @ R[3][1] = C[1] ^ (~C[2] & C[3]); 738#endif 739 eor r14,r14,r3,ror#32-18 740#ifndef __thumb2__ 741 str r14,[sp,#368+4] 742#else 743 strd r12,r14,[sp,#368] @ R[3][1] = C[1] ^ (~C[2] & C[3]); 744#endif 745 bic r10,r8,r6 746 bic r11,r9,r7 747 bic r12,r0,r8,ror#14 748 bic r14,r1,r9,ror#13 749 eor r10,r10,r4 750 eor r11,r11,r5 751#ifndef __thumb2__ 752 str r10,[sp,#376] @ R[3][2] = C[2] ^ (~C[3] & C[4]); 753#endif 754 bic r2,r2,r0,ror#18-14 755#ifndef __thumb2__ 756 str r11,[sp,#376+4] 757#else 758 strd r10,r11,[sp,#376] @ R[3][2] = C[2] ^ (~C[3] & C[4]); 759#endif 760 eor r12,r6,r12,ror#32-14 761 bic r11,r3,r1,ror#18-13 762 eor r14,r7,r14,ror#32-13 763#ifndef __thumb2__ 764 str r12,[sp,#384] @ R[3][3] = C[3] ^ (~C[4] & C[0]); 765#endif 766#ifndef __thumb2__ 767 str r14,[sp,#384+4] 768#else 769 strd r12,r14,[sp,#384] @ R[3][3] = C[3] ^ (~C[4] & C[0]); 770#endif 771 add r14,sp,#216 772#ifndef __thumb2__ 773 ldr r0,[sp,#16] @ A[0][2] 774#endif 775 eor r10,r8,r2,ror#32-18 776#ifndef __thumb2__ 777 ldr r1,[sp,#16+4] 778#else 779 ldrd r0,r1,[sp,#16] @ A[0][2] 780#endif 781 eor r11,r9,r11,ror#32-18 782#ifndef __thumb2__ 783 str r10,[sp,#392] @ R[3][4] = C[4] ^ (~C[0] & C[1]); 784#endif 785#ifndef __thumb2__ 786 str r11,[sp,#392+4] 787#else 788 strd r10,r11,[sp,#392] @ R[3][4] = C[4] ^ (~C[0] & C[1]); 789#endif 790 791 ldmia r14,{r10-r12,r14} @ D[2..3] 792#ifndef __thumb2__ 793 ldr r2,[sp,#64] @ A[1][3] 794#endif 795#ifndef __thumb2__ 796 ldr r3,[sp,#64+4] 797#else 798 ldrd r2,r3,[sp,#64] @ A[1][3] 799#endif 800#ifndef __thumb2__ 801 ldr r6,[sp,#232] @ D[4] 802#endif 803#ifndef __thumb2__ 804 ldr r7,[sp,#232+4] 805#else 806 ldrd r6,r7,[sp,#232] @ D[4] 807#endif 808 809 eor r0,r0,r10 810#ifndef __thumb2__ 811 ldr r4,[sp,#112] @ A[2][4] 812#endif 813 eor r1,r1,r11 814#ifndef __thumb2__ 815 ldr r5,[sp,#112+4] 816#else 817 ldrd r4,r5,[sp,#112] @ A[2][4] 818#endif 819 @ mov r0,r0,ror#32-31 @ C[0] = ROL64(A[0][2] ^ D[2], rhotates[0][2]); 820#ifndef __thumb2__ 821 ldr r8,[sp,#200] @ D[0] 822#endif 823 @ mov r1,r1,ror#32-31 824#ifndef __thumb2__ 825 ldr r9,[sp,#200+4] 826#else 827 ldrd r8,r9,[sp,#200] @ D[0] 828#endif 829 830 eor r12,r12,r2 831#ifndef __thumb2__ 832 ldr r10,[sp,#120] @ A[3][0] 833#endif 834 eor r14,r14,r3 835#ifndef __thumb2__ 836 ldr r11,[sp,#120+4] 837#else 838 ldrd r10,r11,[sp,#120] @ A[3][0] 839#endif 840 mov r3,r12,ror#32-27 @ C[1] = ROL64(A[1][3] ^ D[3], rhotates[1][3]); 841#ifndef __thumb2__ 842 ldr r12,[sp,#208] @ D[1] 843#endif 844 mov r2,r14,ror#32-28 845#ifndef __thumb2__ 846 ldr r14,[sp,#208+4] 847#else 848 ldrd r12,r14,[sp,#208] @ D[1] 849#endif 850 851 eor r6,r6,r4 852 eor r7,r7,r5 853 mov r5,r6,ror#32-19 @ C[2] = ROL64(A[2][4] ^ D[4], rhotates[2][4]); 854 mov r4,r7,ror#32-20 855 856 eor r10,r10,r8 857#ifndef __thumb2__ 858 ldr r8,[sp,#168] @ A[4][1] 859#endif 860 eor r11,r11,r9 861#ifndef __thumb2__ 862 ldr r9,[sp,#168+4] 863#else 864 ldrd r8,r9,[sp,#168] @ A[4][1] 865#endif 866 mov r7,r10,ror#32-20 @ C[3] = ROL64(A[3][0] ^ D[0], rhotates[3][0]); 867 mov r6,r11,ror#32-21 868 869 eor r8,r8,r12 870 eor r9,r9,r14 871 @ mov r8,r2,ror#32-1 @ C[4] = ROL64(A[4][1] ^ D[1], rhotates[4][1]); 872 @ mov r9,r3,ror#32-1 873 874 bic r10,r4,r2 875 bic r11,r5,r3 876 eor r10,r10,r0,ror#32-31 877#ifndef __thumb2__ 878 str r10,[sp,#400] @ R[4][0] = C[0] ^ (~C[1] & C[2]) 879#endif 880 eor r11,r11,r1,ror#32-31 881#ifndef __thumb2__ 882 str r11,[sp,#400+4] 883#else 884 strd r10,r11,[sp,#400] @ R[4][0] = C[0] ^ (~C[1] & C[2]) 885#endif 886 bic r12,r6,r4 887 bic r14,r7,r5 888 eor r12,r12,r2 889 eor r14,r14,r3 890#ifndef __thumb2__ 891 str r12,[sp,#408] @ R[4][1] = C[1] ^ (~C[2] & C[3]); 892#endif 893 bic r10,r8,r6,ror#1 894#ifndef __thumb2__ 895 str r14,[sp,#408+4] 896#else 897 strd r12,r14,[sp,#408] @ R[4][1] = C[1] ^ (~C[2] & C[3]); 898#endif 899 bic r11,r9,r7,ror#1 900 bic r12,r0,r8,ror#31-1 901 bic r14,r1,r9,ror#31-1 902 eor r4,r4,r10,ror#32-1 903#ifndef __thumb2__ 904 str r4,[sp,#416] @ R[4][2] = C[2] ^= (~C[3] & C[4]); 905#endif 906 eor r5,r5,r11,ror#32-1 907#ifndef __thumb2__ 908 str r5,[sp,#416+4] 909#else 910 strd r4,r5,[sp,#416] @ R[4][2] = C[2] ^= (~C[3] & C[4]); 911#endif 912 eor r6,r6,r12,ror#32-31 913 eor r7,r7,r14,ror#32-31 914#ifndef __thumb2__ 915 str r6,[sp,#424] @ R[4][3] = C[3] ^= (~C[4] & C[0]); 916#endif 917 bic r10,r2,r0,ror#32-31 918#ifndef __thumb2__ 919 str r7,[sp,#424+4] 920#else 921 strd r6,r7,[sp,#424] @ R[4][3] = C[3] ^= (~C[4] & C[0]); 922#endif 923 bic r11,r3,r1,ror#32-31 924 add r12,sp,#240 925 eor r8,r10,r8,ror#32-1 926 add r10,sp,#280 927 eor r9,r11,r9,ror#32-1 928#ifndef __thumb2__ 929 str r8,[sp,#432] @ R[4][4] = C[4] ^= (~C[0] & C[1]); 930#endif 931#ifndef __thumb2__ 932 str r9,[sp,#432+4] 933#else 934 strd r8,r9,[sp,#432] @ R[4][4] = C[4] ^= (~C[0] & C[1]); 935#endif 936 ldmia r12,{r0-r3} @ A[0][0..1] 937 ldmia r10,{r10-r12,r14} @ A[1][0..1] 938#ifdef __thumb2__ 939 eor r0,r0,r10 940 eor r1,r1,r11 941 eor r2,r2,r12 942 ldrd r10,r11,[sp,#296] 943 eor r3,r3,r14 944 ldrd r12,r14,[sp,#304] 945 eor r4,r4,r10 946 eor r5,r5,r11 947 eor r6,r6,r12 948 ldrd r10,r11,[sp,#312] 949 eor r7,r7,r14 950 ldrd r12,r14,[sp,#320] 951 eor r8,r8,r10 952 eor r9,r9,r11 953 eor r0,r0,r12 954 ldrd r10,r11,[sp,#328] 955 eor r1,r1,r14 956 ldrd r12,r14,[sp,#336] 957 eor r2,r2,r10 958 eor r3,r3,r11 959 eor r4,r4,r12 960 ldrd r10,r11,[sp,#344] 961 eor r5,r5,r14 962 ldrd r12,r14,[sp,#352] 963 eor r6,r6,r10 964 eor r7,r7,r11 965 eor r8,r8,r12 966 ldrd r10,r11,[sp,#360] 967 eor r9,r9,r14 968 ldrd r12,r14,[sp,#368] 969 eor r0,r0,r10 970 eor r1,r1,r11 971 eor r2,r2,r12 972 ldrd r10,r11,[sp,#376] 973 eor r3,r3,r14 974 ldrd r12,r14,[sp,#384] 975 eor r4,r4,r10 976 eor r5,r5,r11 977 eor r6,r6,r12 978 ldrd r10,r11,[sp,#392] 979 eor r7,r7,r14 980 ldrd r12,r14,[sp,#400] 981 eor r8,r8,r10 982 eor r9,r9,r11 983 eor r0,r0,r12 984 ldrd r10,r11,[sp,#408] 985 eor r1,r1,r14 986 ldrd r12,r14,[sp,#256] 987 eor r2,r2,r10 988 eor r3,r3,r11 989 eor r4,r4,r12 990 ldrd r10,r11,[sp,#264] 991 eor r5,r5,r14 992 ldrd r12,r14,[sp,#272] 993#else 994 eor r0,r0,r10 995 add r10,sp,#296 996 eor r1,r1,r11 997 eor r2,r2,r12 998 eor r3,r3,r14 999 ldmia r10,{r10-r12,r14} @ A[1][2..3] 1000 eor r4,r4,r10 1001 add r10,sp,#312 1002 eor r5,r5,r11 1003 eor r6,r6,r12 1004 eor r7,r7,r14 1005 ldmia r10,{r10-r12,r14} @ A[1][4]..A[2][0] 1006 eor r8,r8,r10 1007 add r10,sp,#328 1008 eor r9,r9,r11 1009 eor r0,r0,r12 1010 eor r1,r1,r14 1011 ldmia r10,{r10-r12,r14} @ A[2][1..2] 1012 eor r2,r2,r10 1013 add r10,sp,#344 1014 eor r3,r3,r11 1015 eor r4,r4,r12 1016 eor r5,r5,r14 1017 ldmia r10,{r10-r12,r14} @ A[2][3..4] 1018 eor r6,r6,r10 1019 add r10,sp,#360 1020 eor r7,r7,r11 1021 eor r8,r8,r12 1022 eor r9,r9,r14 1023 ldmia r10,{r10-r12,r14} @ A[3][0..1] 1024 eor r0,r0,r10 1025 add r10,sp,#376 1026 eor r1,r1,r11 1027 eor r2,r2,r12 1028 eor r3,r3,r14 1029 ldmia r10,{r10-r12,r14} @ A[3][2..3] 1030 eor r4,r4,r10 1031 add r10,sp,#392 1032 eor r5,r5,r11 1033 eor r6,r6,r12 1034 eor r7,r7,r14 1035 ldmia r10,{r10-r12,r14} @ A[3][4]..A[4][0] 1036 eor r8,r8,r10 1037 ldr r10,[sp,#408] @ A[4][1] 1038 eor r9,r9,r11 1039 ldr r11,[sp,#408+4] 1040 eor r0,r0,r12 1041 ldr r12,[sp,#256] @ A[0][2] 1042 eor r1,r1,r14 1043 ldr r14,[sp,#256+4] 1044 eor r2,r2,r10 1045 add r10,sp,#264 1046 eor r3,r3,r11 1047 eor r4,r4,r12 1048 eor r5,r5,r14 1049 ldmia r10,{r10-r12,r14} @ A[0][3..4] 1050#endif 1051 eor r6,r6,r10 1052 eor r7,r7,r11 1053 eor r8,r8,r12 1054 eor r9,r9,r14 1055 1056 eor r10,r0,r5,ror#32-1 @ E[0] = ROL64(C[2], 1) ^ C[0]; 1057#ifndef __thumb2__ 1058 str r10,[sp,#208] @ D[1] = E[0] 1059#endif 1060 eor r11,r1,r4 1061#ifndef __thumb2__ 1062 str r11,[sp,#208+4] 1063#else 1064 strd r10,r11,[sp,#208] @ D[1] = E[0] 1065#endif 1066 eor r12,r6,r1,ror#32-1 @ E[1] = ROL64(C[0], 1) ^ C[3]; 1067 eor r14,r7,r0 1068#ifndef __thumb2__ 1069 str r12,[sp,#232] @ D[4] = E[1] 1070#endif 1071 eor r0,r8,r3,ror#32-1 @ C[0] = ROL64(C[1], 1) ^ C[4]; 1072#ifndef __thumb2__ 1073 str r14,[sp,#232+4] 1074#else 1075 strd r12,r14,[sp,#232] @ D[4] = E[1] 1076#endif 1077 eor r1,r9,r2 1078#ifndef __thumb2__ 1079 str r0,[sp,#200] @ D[0] = C[0] 1080#endif 1081 eor r2,r2,r7,ror#32-1 @ C[1] = ROL64(C[3], 1) ^ C[1]; 1082#ifndef __thumb2__ 1083 ldr r7,[sp,#384] 1084#endif 1085 eor r3,r3,r6 1086#ifndef __thumb2__ 1087 str r1,[sp,#200+4] 1088#else 1089 strd r0,r1,[sp,#200] @ D[0] = C[0] 1090#endif 1091#ifndef __thumb2__ 1092 ldr r6,[sp,#384+4] 1093#else 1094 ldrd r7,r6,[sp,#384] 1095#endif 1096#ifndef __thumb2__ 1097 str r2,[sp,#216] @ D[2] = C[1] 1098#endif 1099 eor r4,r4,r9,ror#32-1 @ C[2] = ROL64(C[4], 1) ^ C[2]; 1100#ifndef __thumb2__ 1101 str r3,[sp,#216+4] 1102#else 1103 strd r2,r3,[sp,#216] @ D[2] = C[1] 1104#endif 1105 eor r5,r5,r8 1106 1107#ifndef __thumb2__ 1108 ldr r8,[sp,#432] 1109#endif 1110#ifndef __thumb2__ 1111 ldr r9,[sp,#432+4] 1112#else 1113 ldrd r8,r9,[sp,#432] 1114#endif 1115#ifndef __thumb2__ 1116 str r4,[sp,#224] @ D[3] = C[2] 1117#endif 1118 eor r7,r7,r4 1119#ifndef __thumb2__ 1120 str r5,[sp,#224+4] 1121#else 1122 strd r4,r5,[sp,#224] @ D[3] = C[2] 1123#endif 1124 eor r6,r6,r5 1125#ifndef __thumb2__ 1126 ldr r4,[sp,#240] 1127#endif 1128 @ mov r7,r7,ror#32-10 @ C[3] = ROL64(A[3][3] ^ C[2], rhotates[3][3]); /* D[3] */ 1129 @ mov r6,r6,ror#32-11 1130#ifndef __thumb2__ 1131 ldr r5,[sp,#240+4] 1132#else 1133 ldrd r4,r5,[sp,#240] 1134#endif 1135 eor r8,r8,r12 1136 eor r9,r9,r14 1137#ifndef __thumb2__ 1138 ldr r12,[sp,#336] 1139#endif 1140 eor r0,r0,r4 1141#ifndef __thumb2__ 1142 ldr r14,[sp,#336+4] 1143#else 1144 ldrd r12,r14,[sp,#336] 1145#endif 1146 @ mov r8,r8,ror#32-7 @ C[4] = ROL64(A[4][4] ^ E[1], rhotates[4][4]); /* D[4] */ 1147 @ mov r9,r9,ror#32-7 1148 eor r1,r1,r5 @ C[0] = A[0][0] ^ C[0]; /* rotate by 0 */ /* D[0] */ 1149 eor r12,r12,r2 1150#ifndef __thumb2__ 1151 ldr r2,[sp,#288] 1152#endif 1153 eor r14,r14,r3 1154#ifndef __thumb2__ 1155 ldr r3,[sp,#288+4] 1156#else 1157 ldrd r2,r3,[sp,#288] 1158#endif 1159 mov r5,r12,ror#32-21 @ C[2] = ROL64(A[2][2] ^ C[1], rhotates[2][2]); /* D[2] */ 1160 ldr r12,[sp,#444] @ load counter 1161 eor r2,r2,r10 1162 adr r10,iotas32 1163 mov r4,r14,ror#32-22 1164 add r14,r10,r12 1165 eor r3,r3,r11 1166#ifndef __thumb2__ 1167 ldr r10,[r14,#8] @ iotas[i].lo 1168#endif 1169 add r12,r12,#16 1170#ifndef __thumb2__ 1171 ldr r11,[r14,#12] @ iotas[i].hi 1172#else 1173 ldrd r10,r11,[r14,#8] @ iotas[i].lo 1174#endif 1175 cmp r12,#192 1176 str r12,[sp,#444] @ store counter 1177 bic r12,r4,r2,ror#32-22 1178 bic r14,r5,r3,ror#32-22 1179 mov r2,r2,ror#32-22 @ C[1] = ROL64(A[1][1] ^ E[0], rhotates[1][1]); /* D[1] */ 1180 mov r3,r3,ror#32-22 1181 eor r12,r12,r0 1182 eor r14,r14,r1 1183 eor r10,r10,r12 1184 eor r11,r11,r14 1185#ifndef __thumb2__ 1186 str r10,[sp,#0] @ R[0][0] = C[0] ^ (~C[1] & C[2]) ^ iotas[i]; 1187#endif 1188 bic r12,r6,r4,ror#11 1189#ifndef __thumb2__ 1190 str r11,[sp,#0+4] 1191#else 1192 strd r10,r11,[sp,#0] @ R[0][0] = C[0] ^ (~C[1] & C[2]) ^ iotas[i]; 1193#endif 1194 bic r14,r7,r5,ror#10 1195 bic r10,r8,r6,ror#32-(11-7) 1196 bic r11,r9,r7,ror#32-(10-7) 1197 eor r12,r2,r12,ror#32-11 1198#ifndef __thumb2__ 1199 str r12,[sp,#8] @ R[0][1] = C[1] ^ (~C[2] & C[3]); 1200#endif 1201 eor r14,r3,r14,ror#32-10 1202#ifndef __thumb2__ 1203 str r14,[sp,#8+4] 1204#else 1205 strd r12,r14,[sp,#8] @ R[0][1] = C[1] ^ (~C[2] & C[3]); 1206#endif 1207 eor r10,r4,r10,ror#32-7 1208 eor r11,r5,r11,ror#32-7 1209#ifndef __thumb2__ 1210 str r10,[sp,#16] @ R[0][2] = C[2] ^ (~C[3] & C[4]); 1211#endif 1212 bic r12,r0,r8,ror#32-7 1213#ifndef __thumb2__ 1214 str r11,[sp,#16+4] 1215#else 1216 strd r10,r11,[sp,#16] @ R[0][2] = C[2] ^ (~C[3] & C[4]); 1217#endif 1218 bic r14,r1,r9,ror#32-7 1219 eor r12,r12,r6,ror#32-11 1220#ifndef __thumb2__ 1221 str r12,[sp,#24] @ R[0][3] = C[3] ^ (~C[4] & C[0]); 1222#endif 1223 eor r14,r14,r7,ror#32-10 1224#ifndef __thumb2__ 1225 str r14,[sp,#24+4] 1226#else 1227 strd r12,r14,[sp,#24] @ R[0][3] = C[3] ^ (~C[4] & C[0]); 1228#endif 1229 bic r10,r2,r0 1230 add r14,sp,#224 1231#ifndef __thumb2__ 1232 ldr r0,[sp,#264] @ A[0][3] 1233#endif 1234 bic r11,r3,r1 1235#ifndef __thumb2__ 1236 ldr r1,[sp,#264+4] 1237#else 1238 ldrd r0,r1,[sp,#264] @ A[0][3] 1239#endif 1240 eor r10,r10,r8,ror#32-7 1241 eor r11,r11,r9,ror#32-7 1242#ifndef __thumb2__ 1243 str r10,[sp,#32] @ R[0][4] = C[4] ^ (~C[0] & C[1]); 1244#endif 1245 add r9,sp,#200 1246#ifndef __thumb2__ 1247 str r11,[sp,#32+4] 1248#else 1249 strd r10,r11,[sp,#32] @ R[0][4] = C[4] ^ (~C[0] & C[1]); 1250#endif 1251 1252 ldmia r14,{r10-r12,r14} @ D[3..4] 1253 ldmia r9,{r6-r9} @ D[0..1] 1254 1255#ifndef __thumb2__ 1256 ldr r2,[sp,#312] @ A[1][4] 1257#endif 1258 eor r0,r0,r10 1259#ifndef __thumb2__ 1260 ldr r3,[sp,#312+4] 1261#else 1262 ldrd r2,r3,[sp,#312] @ A[1][4] 1263#endif 1264 eor r1,r1,r11 1265 @ mov r0,r0,ror#32-14 @ C[0] = ROL64(A[0][3] ^ D[3], rhotates[0][3]); 1266#ifndef __thumb2__ 1267 ldr r10,[sp,#368] @ A[3][1] 1268#endif 1269 @ mov r1,r1,ror#32-14 1270#ifndef __thumb2__ 1271 ldr r11,[sp,#368+4] 1272#else 1273 ldrd r10,r11,[sp,#368] @ A[3][1] 1274#endif 1275 1276 eor r2,r2,r12 1277#ifndef __thumb2__ 1278 ldr r4,[sp,#320] @ A[2][0] 1279#endif 1280 eor r3,r3,r14 1281#ifndef __thumb2__ 1282 ldr r5,[sp,#320+4] 1283#else 1284 ldrd r4,r5,[sp,#320] @ A[2][0] 1285#endif 1286 @ mov r2,r2,ror#32-10 @ C[1] = ROL64(A[1][4] ^ D[4], rhotates[1][4]); 1287 @ mov r3,r3,ror#32-10 1288 1289 eor r6,r6,r4 1290#ifndef __thumb2__ 1291 ldr r12,[sp,#216] @ D[2] 1292#endif 1293 eor r7,r7,r5 1294#ifndef __thumb2__ 1295 ldr r14,[sp,#216+4] 1296#else 1297 ldrd r12,r14,[sp,#216] @ D[2] 1298#endif 1299 mov r5,r6,ror#32-1 @ C[2] = ROL64(A[2][0] ^ D[0], rhotates[2][0]); 1300 mov r4,r7,ror#32-2 1301 1302 eor r10,r10,r8 1303#ifndef __thumb2__ 1304 ldr r8,[sp,#416] @ A[4][2] 1305#endif 1306 eor r11,r11,r9 1307#ifndef __thumb2__ 1308 ldr r9,[sp,#416+4] 1309#else 1310 ldrd r8,r9,[sp,#416] @ A[4][2] 1311#endif 1312 mov r7,r10,ror#32-22 @ C[3] = ROL64(A[3][1] ^ D[1], rhotates[3][1]); 1313 mov r6,r11,ror#32-23 1314 1315 bic r10,r4,r2,ror#32-10 1316 bic r11,r5,r3,ror#32-10 1317 eor r12,r12,r8 1318 eor r14,r14,r9 1319 mov r9,r12,ror#32-30 @ C[4] = ROL64(A[4][2] ^ D[2], rhotates[4][2]); 1320 mov r8,r14,ror#32-31 1321 eor r10,r10,r0,ror#32-14 1322 eor r11,r11,r1,ror#32-14 1323#ifndef __thumb2__ 1324 str r10,[sp,#40] @ R[1][0] = C[0] ^ (~C[1] & C[2]) 1325#endif 1326 bic r12,r6,r4 1327#ifndef __thumb2__ 1328 str r11,[sp,#40+4] 1329#else 1330 strd r10,r11,[sp,#40] @ R[1][0] = C[0] ^ (~C[1] & C[2]) 1331#endif 1332 bic r14,r7,r5 1333 eor r12,r12,r2,ror#32-10 1334#ifndef __thumb2__ 1335 str r12,[sp,#48] @ R[1][1] = C[1] ^ (~C[2] & C[3]); 1336#endif 1337 eor r14,r14,r3,ror#32-10 1338#ifndef __thumb2__ 1339 str r14,[sp,#48+4] 1340#else 1341 strd r12,r14,[sp,#48] @ R[1][1] = C[1] ^ (~C[2] & C[3]); 1342#endif 1343 bic r10,r8,r6 1344 bic r11,r9,r7 1345 bic r12,r0,r8,ror#14 1346 bic r14,r1,r9,ror#14 1347 eor r10,r10,r4 1348 eor r11,r11,r5 1349#ifndef __thumb2__ 1350 str r10,[sp,#56] @ R[1][2] = C[2] ^ (~C[3] & C[4]); 1351#endif 1352 bic r2,r2,r0,ror#32-(14-10) 1353#ifndef __thumb2__ 1354 str r11,[sp,#56+4] 1355#else 1356 strd r10,r11,[sp,#56] @ R[1][2] = C[2] ^ (~C[3] & C[4]); 1357#endif 1358 eor r12,r6,r12,ror#32-14 1359 bic r11,r3,r1,ror#32-(14-10) 1360#ifndef __thumb2__ 1361 str r12,[sp,#64] @ R[1][3] = C[3] ^ (~C[4] & C[0]); 1362#endif 1363 eor r14,r7,r14,ror#32-14 1364#ifndef __thumb2__ 1365 str r14,[sp,#64+4] 1366#else 1367 strd r12,r14,[sp,#64] @ R[1][3] = C[3] ^ (~C[4] & C[0]); 1368#endif 1369 add r12,sp,#208 1370#ifndef __thumb2__ 1371 ldr r1,[sp,#248] @ A[0][1] 1372#endif 1373 eor r10,r8,r2,ror#32-10 1374#ifndef __thumb2__ 1375 ldr r0,[sp,#248+4] 1376#else 1377 ldrd r1,r0,[sp,#248] @ A[0][1] 1378#endif 1379 eor r11,r9,r11,ror#32-10 1380#ifndef __thumb2__ 1381 str r10,[sp,#72] @ R[1][4] = C[4] ^ (~C[0] & C[1]); 1382#endif 1383#ifndef __thumb2__ 1384 str r11,[sp,#72+4] 1385#else 1386 strd r10,r11,[sp,#72] @ R[1][4] = C[4] ^ (~C[0] & C[1]); 1387#endif 1388 1389 add r9,sp,#224 1390 ldmia r12,{r10-r12,r14} @ D[1..2] 1391#ifndef __thumb2__ 1392 ldr r2,[sp,#296] @ A[1][2] 1393#endif 1394#ifndef __thumb2__ 1395 ldr r3,[sp,#296+4] 1396#else 1397 ldrd r2,r3,[sp,#296] @ A[1][2] 1398#endif 1399 ldmia r9,{r6-r9} @ D[3..4] 1400 1401 eor r1,r1,r10 1402#ifndef __thumb2__ 1403 ldr r4,[sp,#344] @ A[2][3] 1404#endif 1405 eor r0,r0,r11 1406#ifndef __thumb2__ 1407 ldr r5,[sp,#344+4] 1408#else 1409 ldrd r4,r5,[sp,#344] @ A[2][3] 1410#endif 1411 mov r0,r0,ror#32-1 @ C[0] = ROL64(A[0][1] ^ D[1], rhotates[0][1]); 1412 1413 eor r2,r2,r12 1414#ifndef __thumb2__ 1415 ldr r10,[sp,#392] @ A[3][4] 1416#endif 1417 eor r3,r3,r14 1418#ifndef __thumb2__ 1419 ldr r11,[sp,#392+4] 1420#else 1421 ldrd r10,r11,[sp,#392] @ A[3][4] 1422#endif 1423 @ mov r2,r2,ror#32-3 @ C[1] = ROL64(A[1][2] ^ D[2], rhotates[1][2]); 1424#ifndef __thumb2__ 1425 ldr r12,[sp,#200] @ D[0] 1426#endif 1427 @ mov r3,r3,ror#32-3 1428#ifndef __thumb2__ 1429 ldr r14,[sp,#200+4] 1430#else 1431 ldrd r12,r14,[sp,#200] @ D[0] 1432#endif 1433 1434 eor r4,r4,r6 1435 eor r5,r5,r7 1436 @ mov r5,r6,ror#32-12 @ C[2] = ROL64(A[2][3] ^ D[3], rhotates[2][3]); 1437 @ mov r4,r7,ror#32-13 @ [track reverse order below] 1438 1439 eor r10,r10,r8 1440#ifndef __thumb2__ 1441 ldr r8,[sp,#400] @ A[4][0] 1442#endif 1443 eor r11,r11,r9 1444#ifndef __thumb2__ 1445 ldr r9,[sp,#400+4] 1446#else 1447 ldrd r8,r9,[sp,#400] @ A[4][0] 1448#endif 1449 mov r6,r10,ror#32-4 @ C[3] = ROL64(A[3][4] ^ D[4], rhotates[3][4]); 1450 mov r7,r11,ror#32-4 1451 1452 eor r12,r12,r8 1453 eor r14,r14,r9 1454 mov r8,r12,ror#32-9 @ C[4] = ROL64(A[4][0] ^ D[0], rhotates[4][0]); 1455 mov r9,r14,ror#32-9 1456 1457 bic r10,r5,r2,ror#13-3 1458 bic r11,r4,r3,ror#12-3 1459 bic r12,r6,r5,ror#32-13 1460 bic r14,r7,r4,ror#32-12 1461 eor r10,r0,r10,ror#32-13 1462 eor r11,r1,r11,ror#32-12 1463#ifndef __thumb2__ 1464 str r10,[sp,#80] @ R[2][0] = C[0] ^ (~C[1] & C[2]) 1465#endif 1466 eor r12,r12,r2,ror#32-3 1467#ifndef __thumb2__ 1468 str r11,[sp,#80+4] 1469#else 1470 strd r10,r11,[sp,#80] @ R[2][0] = C[0] ^ (~C[1] & C[2]) 1471#endif 1472 eor r14,r14,r3,ror#32-3 1473#ifndef __thumb2__ 1474 str r12,[sp,#88] @ R[2][1] = C[1] ^ (~C[2] & C[3]); 1475#endif 1476 bic r10,r8,r6 1477 bic r11,r9,r7 1478#ifndef __thumb2__ 1479 str r14,[sp,#88+4] 1480#else 1481 strd r12,r14,[sp,#88] @ R[2][1] = C[1] ^ (~C[2] & C[3]); 1482#endif 1483 eor r10,r10,r5,ror#32-13 1484 eor r11,r11,r4,ror#32-12 1485#ifndef __thumb2__ 1486 str r10,[sp,#96] @ R[2][2] = C[2] ^ (~C[3] & C[4]); 1487#endif 1488 bic r12,r0,r8 1489#ifndef __thumb2__ 1490 str r11,[sp,#96+4] 1491#else 1492 strd r10,r11,[sp,#96] @ R[2][2] = C[2] ^ (~C[3] & C[4]); 1493#endif 1494 bic r14,r1,r9 1495 eor r12,r12,r6 1496 eor r14,r14,r7 1497#ifndef __thumb2__ 1498 str r12,[sp,#104] @ R[2][3] = C[3] ^ (~C[4] & C[0]); 1499#endif 1500 bic r10,r2,r0,ror#3 1501#ifndef __thumb2__ 1502 str r14,[sp,#104+4] 1503#else 1504 strd r12,r14,[sp,#104] @ R[2][3] = C[3] ^ (~C[4] & C[0]); 1505#endif 1506 bic r11,r3,r1,ror#3 1507#ifndef __thumb2__ 1508 ldr r1,[sp,#272] @ A[0][4] [in reverse order] 1509#endif 1510 eor r10,r8,r10,ror#32-3 1511#ifndef __thumb2__ 1512 ldr r0,[sp,#272+4] 1513#else 1514 ldrd r1,r0,[sp,#272] @ A[0][4] [in reverse order] 1515#endif 1516 eor r11,r9,r11,ror#32-3 1517#ifndef __thumb2__ 1518 str r10,[sp,#112] @ R[2][4] = C[4] ^ (~C[0] & C[1]); 1519#endif 1520 add r9,sp,#208 1521#ifndef __thumb2__ 1522 str r11,[sp,#112+4] 1523#else 1524 strd r10,r11,[sp,#112] @ R[2][4] = C[4] ^ (~C[0] & C[1]); 1525#endif 1526 1527#ifndef __thumb2__ 1528 ldr r10,[sp,#232] @ D[4] 1529#endif 1530#ifndef __thumb2__ 1531 ldr r11,[sp,#232+4] 1532#else 1533 ldrd r10,r11,[sp,#232] @ D[4] 1534#endif 1535#ifndef __thumb2__ 1536 ldr r12,[sp,#200] @ D[0] 1537#endif 1538#ifndef __thumb2__ 1539 ldr r14,[sp,#200+4] 1540#else 1541 ldrd r12,r14,[sp,#200] @ D[0] 1542#endif 1543 1544 ldmia r9,{r6-r9} @ D[1..2] 1545 1546 eor r1,r1,r10 1547#ifndef __thumb2__ 1548 ldr r2,[sp,#280] @ A[1][0] 1549#endif 1550 eor r0,r0,r11 1551#ifndef __thumb2__ 1552 ldr r3,[sp,#280+4] 1553#else 1554 ldrd r2,r3,[sp,#280] @ A[1][0] 1555#endif 1556 @ mov r1,r10,ror#32-13 @ C[0] = ROL64(A[0][4] ^ D[4], rhotates[0][4]); 1557#ifndef __thumb2__ 1558 ldr r4,[sp,#328] @ A[2][1] 1559#endif 1560 @ mov r0,r11,ror#32-14 @ [was loaded in reverse order] 1561#ifndef __thumb2__ 1562 ldr r5,[sp,#328+4] 1563#else 1564 ldrd r4,r5,[sp,#328] @ A[2][1] 1565#endif 1566 1567 eor r2,r2,r12 1568#ifndef __thumb2__ 1569 ldr r10,[sp,#376] @ A[3][2] 1570#endif 1571 eor r3,r3,r14 1572#ifndef __thumb2__ 1573 ldr r11,[sp,#376+4] 1574#else 1575 ldrd r10,r11,[sp,#376] @ A[3][2] 1576#endif 1577 @ mov r2,r2,ror#32-18 @ C[1] = ROL64(A[1][0] ^ D[0], rhotates[1][0]); 1578#ifndef __thumb2__ 1579 ldr r12,[sp,#224] @ D[3] 1580#endif 1581 @ mov r3,r3,ror#32-18 1582#ifndef __thumb2__ 1583 ldr r14,[sp,#224+4] 1584#else 1585 ldrd r12,r14,[sp,#224] @ D[3] 1586#endif 1587 1588 eor r6,r6,r4 1589 eor r7,r7,r5 1590 mov r4,r6,ror#32-5 @ C[2] = ROL64(A[2][1] ^ D[1], rhotates[2][1]); 1591 mov r5,r7,ror#32-5 1592 1593 eor r10,r10,r8 1594#ifndef __thumb2__ 1595 ldr r8,[sp,#424] @ A[4][3] 1596#endif 1597 eor r11,r11,r9 1598#ifndef __thumb2__ 1599 ldr r9,[sp,#424+4] 1600#else 1601 ldrd r8,r9,[sp,#424] @ A[4][3] 1602#endif 1603 mov r7,r10,ror#32-7 @ C[3] = ROL64(A[3][2] ^ D[2], rhotates[3][2]); 1604 mov r6,r11,ror#32-8 1605 1606 eor r12,r12,r8 1607 eor r14,r14,r9 1608 mov r8,r12,ror#32-28 @ C[4] = ROL64(A[4][3] ^ D[3], rhotates[4][3]); 1609 mov r9,r14,ror#32-28 1610 1611 bic r10,r4,r2,ror#32-18 1612 bic r11,r5,r3,ror#32-18 1613 eor r10,r10,r0,ror#32-14 1614 eor r11,r11,r1,ror#32-13 1615#ifndef __thumb2__ 1616 str r10,[sp,#120] @ R[3][0] = C[0] ^ (~C[1] & C[2]) 1617#endif 1618 bic r12,r6,r4 1619#ifndef __thumb2__ 1620 str r11,[sp,#120+4] 1621#else 1622 strd r10,r11,[sp,#120] @ R[3][0] = C[0] ^ (~C[1] & C[2]) 1623#endif 1624 bic r14,r7,r5 1625 eor r12,r12,r2,ror#32-18 1626#ifndef __thumb2__ 1627 str r12,[sp,#128] @ R[3][1] = C[1] ^ (~C[2] & C[3]); 1628#endif 1629 eor r14,r14,r3,ror#32-18 1630#ifndef __thumb2__ 1631 str r14,[sp,#128+4] 1632#else 1633 strd r12,r14,[sp,#128] @ R[3][1] = C[1] ^ (~C[2] & C[3]); 1634#endif 1635 bic r10,r8,r6 1636 bic r11,r9,r7 1637 bic r12,r0,r8,ror#14 1638 bic r14,r1,r9,ror#13 1639 eor r10,r10,r4 1640 eor r11,r11,r5 1641#ifndef __thumb2__ 1642 str r10,[sp,#136] @ R[3][2] = C[2] ^ (~C[3] & C[4]); 1643#endif 1644 bic r2,r2,r0,ror#18-14 1645#ifndef __thumb2__ 1646 str r11,[sp,#136+4] 1647#else 1648 strd r10,r11,[sp,#136] @ R[3][2] = C[2] ^ (~C[3] & C[4]); 1649#endif 1650 eor r12,r6,r12,ror#32-14 1651 bic r11,r3,r1,ror#18-13 1652 eor r14,r7,r14,ror#32-13 1653#ifndef __thumb2__ 1654 str r12,[sp,#144] @ R[3][3] = C[3] ^ (~C[4] & C[0]); 1655#endif 1656#ifndef __thumb2__ 1657 str r14,[sp,#144+4] 1658#else 1659 strd r12,r14,[sp,#144] @ R[3][3] = C[3] ^ (~C[4] & C[0]); 1660#endif 1661 add r14,sp,#216 1662#ifndef __thumb2__ 1663 ldr r0,[sp,#256] @ A[0][2] 1664#endif 1665 eor r10,r8,r2,ror#32-18 1666#ifndef __thumb2__ 1667 ldr r1,[sp,#256+4] 1668#else 1669 ldrd r0,r1,[sp,#256] @ A[0][2] 1670#endif 1671 eor r11,r9,r11,ror#32-18 1672#ifndef __thumb2__ 1673 str r10,[sp,#152] @ R[3][4] = C[4] ^ (~C[0] & C[1]); 1674#endif 1675#ifndef __thumb2__ 1676 str r11,[sp,#152+4] 1677#else 1678 strd r10,r11,[sp,#152] @ R[3][4] = C[4] ^ (~C[0] & C[1]); 1679#endif 1680 1681 ldmia r14,{r10-r12,r14} @ D[2..3] 1682#ifndef __thumb2__ 1683 ldr r2,[sp,#304] @ A[1][3] 1684#endif 1685#ifndef __thumb2__ 1686 ldr r3,[sp,#304+4] 1687#else 1688 ldrd r2,r3,[sp,#304] @ A[1][3] 1689#endif 1690#ifndef __thumb2__ 1691 ldr r6,[sp,#232] @ D[4] 1692#endif 1693#ifndef __thumb2__ 1694 ldr r7,[sp,#232+4] 1695#else 1696 ldrd r6,r7,[sp,#232] @ D[4] 1697#endif 1698 1699 eor r0,r0,r10 1700#ifndef __thumb2__ 1701 ldr r4,[sp,#352] @ A[2][4] 1702#endif 1703 eor r1,r1,r11 1704#ifndef __thumb2__ 1705 ldr r5,[sp,#352+4] 1706#else 1707 ldrd r4,r5,[sp,#352] @ A[2][4] 1708#endif 1709 @ mov r0,r0,ror#32-31 @ C[0] = ROL64(A[0][2] ^ D[2], rhotates[0][2]); 1710#ifndef __thumb2__ 1711 ldr r8,[sp,#200] @ D[0] 1712#endif 1713 @ mov r1,r1,ror#32-31 1714#ifndef __thumb2__ 1715 ldr r9,[sp,#200+4] 1716#else 1717 ldrd r8,r9,[sp,#200] @ D[0] 1718#endif 1719 1720 eor r12,r12,r2 1721#ifndef __thumb2__ 1722 ldr r10,[sp,#360] @ A[3][0] 1723#endif 1724 eor r14,r14,r3 1725#ifndef __thumb2__ 1726 ldr r11,[sp,#360+4] 1727#else 1728 ldrd r10,r11,[sp,#360] @ A[3][0] 1729#endif 1730 mov r3,r12,ror#32-27 @ C[1] = ROL64(A[1][3] ^ D[3], rhotates[1][3]); 1731#ifndef __thumb2__ 1732 ldr r12,[sp,#208] @ D[1] 1733#endif 1734 mov r2,r14,ror#32-28 1735#ifndef __thumb2__ 1736 ldr r14,[sp,#208+4] 1737#else 1738 ldrd r12,r14,[sp,#208] @ D[1] 1739#endif 1740 1741 eor r6,r6,r4 1742 eor r7,r7,r5 1743 mov r5,r6,ror#32-19 @ C[2] = ROL64(A[2][4] ^ D[4], rhotates[2][4]); 1744 mov r4,r7,ror#32-20 1745 1746 eor r10,r10,r8 1747#ifndef __thumb2__ 1748 ldr r8,[sp,#408] @ A[4][1] 1749#endif 1750 eor r11,r11,r9 1751#ifndef __thumb2__ 1752 ldr r9,[sp,#408+4] 1753#else 1754 ldrd r8,r9,[sp,#408] @ A[4][1] 1755#endif 1756 mov r7,r10,ror#32-20 @ C[3] = ROL64(A[3][0] ^ D[0], rhotates[3][0]); 1757 mov r6,r11,ror#32-21 1758 1759 eor r8,r8,r12 1760 eor r9,r9,r14 1761 @ mov r8,r2,ror#32-1 @ C[4] = ROL64(A[4][1] ^ D[1], rhotates[4][1]); 1762 @ mov r9,r3,ror#32-1 1763 1764 bic r10,r4,r2 1765 bic r11,r5,r3 1766 eor r10,r10,r0,ror#32-31 1767#ifndef __thumb2__ 1768 str r10,[sp,#160] @ R[4][0] = C[0] ^ (~C[1] & C[2]) 1769#endif 1770 eor r11,r11,r1,ror#32-31 1771#ifndef __thumb2__ 1772 str r11,[sp,#160+4] 1773#else 1774 strd r10,r11,[sp,#160] @ R[4][0] = C[0] ^ (~C[1] & C[2]) 1775#endif 1776 bic r12,r6,r4 1777 bic r14,r7,r5 1778 eor r12,r12,r2 1779 eor r14,r14,r3 1780#ifndef __thumb2__ 1781 str r12,[sp,#168] @ R[4][1] = C[1] ^ (~C[2] & C[3]); 1782#endif 1783 bic r10,r8,r6,ror#1 1784#ifndef __thumb2__ 1785 str r14,[sp,#168+4] 1786#else 1787 strd r12,r14,[sp,#168] @ R[4][1] = C[1] ^ (~C[2] & C[3]); 1788#endif 1789 bic r11,r9,r7,ror#1 1790 bic r12,r0,r8,ror#31-1 1791 bic r14,r1,r9,ror#31-1 1792 eor r4,r4,r10,ror#32-1 1793#ifndef __thumb2__ 1794 str r4,[sp,#176] @ R[4][2] = C[2] ^= (~C[3] & C[4]); 1795#endif 1796 eor r5,r5,r11,ror#32-1 1797#ifndef __thumb2__ 1798 str r5,[sp,#176+4] 1799#else 1800 strd r4,r5,[sp,#176] @ R[4][2] = C[2] ^= (~C[3] & C[4]); 1801#endif 1802 eor r6,r6,r12,ror#32-31 1803 eor r7,r7,r14,ror#32-31 1804#ifndef __thumb2__ 1805 str r6,[sp,#184] @ R[4][3] = C[3] ^= (~C[4] & C[0]); 1806#endif 1807 bic r10,r2,r0,ror#32-31 1808#ifndef __thumb2__ 1809 str r7,[sp,#184+4] 1810#else 1811 strd r6,r7,[sp,#184] @ R[4][3] = C[3] ^= (~C[4] & C[0]); 1812#endif 1813 bic r11,r3,r1,ror#32-31 1814 add r12,sp,#0 1815 eor r8,r10,r8,ror#32-1 1816 add r10,sp,#40 1817 eor r9,r11,r9,ror#32-1 1818#ifndef __thumb2__ 1819 str r8,[sp,#192] @ R[4][4] = C[4] ^= (~C[0] & C[1]); 1820#endif 1821#ifndef __thumb2__ 1822 str r9,[sp,#192+4] 1823#else 1824 strd r8,r9,[sp,#192] @ R[4][4] = C[4] ^= (~C[0] & C[1]); 1825#endif 1826 blo .Lround2x 1827 1828 ldr pc,[sp,#440] 1829.size KeccakF1600_int,.-KeccakF1600_int 1830 1831.type KeccakF1600, %function 1832.align 5 1833KeccakF1600: 1834 stmdb sp!,{r0,r4-r11,lr} 1835 sub sp,sp,#440+16 @ space for A[5][5],D[5],T[5][5],... 1836 1837 add r10,r0,#40 1838 add r11,sp,#40 1839 ldmia r0, {r0-r9} @ copy A[5][5] to stack 1840 stmia sp, {r0-r9} 1841 ldmia r10!,{r0-r9} 1842 stmia r11!,{r0-r9} 1843 ldmia r10!,{r0-r9} 1844 stmia r11!,{r0-r9} 1845 ldmia r10!,{r0-r9} 1846 stmia r11!,{r0-r9} 1847 ldmia r10, {r0-r9} 1848 add r12,sp,#0 1849 add r10,sp,#40 1850 stmia r11, {r0-r9} 1851 1852 bl KeccakF1600_enter 1853 1854 ldr r11, [sp,#440+16] @ restore pointer to A 1855 ldmia sp, {r0-r9} 1856 stmia r11!,{r0-r9} @ return A[5][5] 1857 ldmia r10!,{r0-r9} 1858 stmia r11!,{r0-r9} 1859 ldmia r10!,{r0-r9} 1860 stmia r11!,{r0-r9} 1861 ldmia r10!,{r0-r9} 1862 stmia r11!,{r0-r9} 1863 ldmia r10, {r0-r9} 1864 stmia r11, {r0-r9} 1865 1866 add sp,sp,#440+20 1867 ldmia sp!,{r4-r11,pc} 1868.size KeccakF1600,.-KeccakF1600 1869.global SHA3_absorb 1870.type SHA3_absorb,%function 1871.align 5 1872SHA3_absorb: 1873 stmdb sp!,{r0-r12,lr} 1874 sub sp,sp,#456+16 1875 1876 add r10,r0,#40 1877 @ mov r11,r1 1878 mov r12,r2 1879 mov r14,r3 1880 cmp r2,r3 1881 blo .Labsorb_abort 1882 1883 add r11,sp,#0 1884 ldmia r0, {r0-r9} @ copy A[5][5] to stack 1885 stmia r11!, {r0-r9} 1886 ldmia r10!,{r0-r9} 1887 stmia r11!, {r0-r9} 1888 ldmia r10!,{r0-r9} 1889 stmia r11!, {r0-r9} 1890 ldmia r10!,{r0-r9} 1891 stmia r11!, {r0-r9} 1892 ldmia r10!,{r0-r9} 1893 stmia r11, {r0-r9} 1894 1895 ldr r11,[sp,#476] @ restore r11 1896#ifdef __thumb2__ 1897 mov r9,#0x00ff00ff 1898 mov r8,#0x0f0f0f0f 1899 mov r7,#0x33333333 1900 mov r6,#0x55555555 1901#else 1902 mov r6,#0x11 @ compose constants 1903 mov r8,#0x0f 1904 mov r9,#0xff 1905 orr r6,r6,r6,lsl#8 1906 orr r8,r8,r8,lsl#8 1907 orr r6,r6,r6,lsl#16 @ 0x11111111 1908 orr r9,r9,r9,lsl#16 @ 0x00ff00ff 1909 orr r8,r8,r8,lsl#16 @ 0x0f0f0f0f 1910 orr r7,r6,r6,lsl#1 @ 0x33333333 1911 orr r6,r6,r6,lsl#2 @ 0x55555555 1912#endif 1913 str r9,[sp,#468] 1914 str r8,[sp,#464] 1915 str r7,[sp,#460] 1916 str r6,[sp,#456] 1917 b .Loop_absorb 1918 1919.align 4 1920.Loop_absorb: 1921 subs r0,r12,r14 1922 blo .Labsorbed 1923 add r10,sp,#0 1924 str r0,[sp,#480] @ save len - bsz 1925 1926.align 4 1927.Loop_block: 1928 ldrb r0,[r11],#1 1929 ldrb r1,[r11],#1 1930 ldrb r2,[r11],#1 1931 ldrb r3,[r11],#1 1932 ldrb r4,[r11],#1 1933 orr r0,r0,r1,lsl#8 1934 ldrb r1,[r11],#1 1935 orr r0,r0,r2,lsl#16 1936 ldrb r2,[r11],#1 1937 orr r0,r0,r3,lsl#24 @ lo 1938 ldrb r3,[r11],#1 1939 orr r1,r4,r1,lsl#8 1940 orr r1,r1,r2,lsl#16 1941 orr r1,r1,r3,lsl#24 @ hi 1942 1943 and r2,r0,r6 @ &=0x55555555 1944 and r0,r0,r6,lsl#1 @ &=0xaaaaaaaa 1945 and r3,r1,r6 @ &=0x55555555 1946 and r1,r1,r6,lsl#1 @ &=0xaaaaaaaa 1947 orr r2,r2,r2,lsr#1 1948 orr r0,r0,r0,lsl#1 1949 orr r3,r3,r3,lsr#1 1950 orr r1,r1,r1,lsl#1 1951 and r2,r2,r7 @ &=0x33333333 1952 and r0,r0,r7,lsl#2 @ &=0xcccccccc 1953 and r3,r3,r7 @ &=0x33333333 1954 and r1,r1,r7,lsl#2 @ &=0xcccccccc 1955 orr r2,r2,r2,lsr#2 1956 orr r0,r0,r0,lsl#2 1957 orr r3,r3,r3,lsr#2 1958 orr r1,r1,r1,lsl#2 1959 and r2,r2,r8 @ &=0x0f0f0f0f 1960 and r0,r0,r8,lsl#4 @ &=0xf0f0f0f0 1961 and r3,r3,r8 @ &=0x0f0f0f0f 1962 and r1,r1,r8,lsl#4 @ &=0xf0f0f0f0 1963 ldmia r10,{r4-r5} @ A_flat[i] 1964 orr r2,r2,r2,lsr#4 1965 orr r0,r0,r0,lsl#4 1966 orr r3,r3,r3,lsr#4 1967 orr r1,r1,r1,lsl#4 1968 and r2,r2,r9 @ &=0x00ff00ff 1969 and r0,r0,r9,lsl#8 @ &=0xff00ff00 1970 and r3,r3,r9 @ &=0x00ff00ff 1971 and r1,r1,r9,lsl#8 @ &=0xff00ff00 1972 orr r2,r2,r2,lsr#8 1973 orr r0,r0,r0,lsl#8 1974 orr r3,r3,r3,lsr#8 1975 orr r1,r1,r1,lsl#8 1976 1977 mov r2,r2,lsl#16 1978 mov r1,r1,lsr#16 1979 eor r4,r4,r3,lsl#16 1980 eor r5,r5,r0,lsr#16 1981 eor r4,r4,r2,lsr#16 1982 eor r5,r5,r1,lsl#16 1983 stmia r10!,{r4-r5} @ A_flat[i++] ^= BitInterleave(inp[0..7]) 1984 1985 subs r14,r14,#8 1986 bhi .Loop_block 1987 1988 str r11,[sp,#476] 1989 1990 bl KeccakF1600_int 1991 1992 add r14,sp,#456 1993 ldmia r14,{r6-r12,r14} @ restore constants and variables 1994 b .Loop_absorb 1995 1996.align 4 1997.Labsorbed: 1998 add r11,sp,#40 1999 ldmia sp, {r0-r9} 2000 stmia r10!,{r0-r9} @ return A[5][5] 2001 ldmia r11!, {r0-r9} 2002 stmia r10!,{r0-r9} 2003 ldmia r11!, {r0-r9} 2004 stmia r10!,{r0-r9} 2005 ldmia r11!, {r0-r9} 2006 stmia r10!,{r0-r9} 2007 ldmia r11, {r0-r9} 2008 stmia r10, {r0-r9} 2009 2010.Labsorb_abort: 2011 add sp,sp,#456+32 2012 mov r0,r12 @ return value 2013 ldmia sp!,{r4-r12,pc} 2014.size SHA3_absorb,.-SHA3_absorb 2015.global SHA3_squeeze 2016.type SHA3_squeeze,%function 2017.align 5 2018SHA3_squeeze: 2019 stmdb sp!,{r0,r3-r10,lr} 2020 2021 mov r10,r0 2022 mov r4,r1 2023 mov r5,r2 2024 mov r12,r3 2025 2026#ifdef __thumb2__ 2027 mov r9,#0x00ff00ff 2028 mov r8,#0x0f0f0f0f 2029 mov r7,#0x33333333 2030 mov r6,#0x55555555 2031#else 2032 mov r6,#0x11 @ compose constants 2033 mov r8,#0x0f 2034 mov r9,#0xff 2035 orr r6,r6,r6,lsl#8 2036 orr r8,r8,r8,lsl#8 2037 orr r6,r6,r6,lsl#16 @ 0x11111111 2038 orr r9,r9,r9,lsl#16 @ 0x00ff00ff 2039 orr r8,r8,r8,lsl#16 @ 0x0f0f0f0f 2040 orr r7,r6,r6,lsl#1 @ 0x33333333 2041 orr r6,r6,r6,lsl#2 @ 0x55555555 2042#endif 2043 stmdb sp!,{r6-r9} 2044 2045 mov r14,r10 2046 b .Loop_squeeze 2047 2048.align 4 2049.Loop_squeeze: 2050 ldmia r10!,{r0,r1} @ A_flat[i++] 2051 2052 mov r2,r0,lsl#16 2053 mov r3,r1,lsl#16 @ r3 = r1 << 16 2054 mov r2,r2,lsr#16 @ r2 = r0 & 0x0000ffff 2055 mov r1,r1,lsr#16 2056 mov r0,r0,lsr#16 @ r0 = r0 >> 16 2057 mov r1,r1,lsl#16 @ r1 = r1 & 0xffff0000 2058 2059 orr r2,r2,r2,lsl#8 2060 orr r3,r3,r3,lsr#8 2061 orr r0,r0,r0,lsl#8 2062 orr r1,r1,r1,lsr#8 2063 and r2,r2,r9 @ &=0x00ff00ff 2064 and r3,r3,r9,lsl#8 @ &=0xff00ff00 2065 and r0,r0,r9 @ &=0x00ff00ff 2066 and r1,r1,r9,lsl#8 @ &=0xff00ff00 2067 orr r2,r2,r2,lsl#4 2068 orr r3,r3,r3,lsr#4 2069 orr r0,r0,r0,lsl#4 2070 orr r1,r1,r1,lsr#4 2071 and r2,r2,r8 @ &=0x0f0f0f0f 2072 and r3,r3,r8,lsl#4 @ &=0xf0f0f0f0 2073 and r0,r0,r8 @ &=0x0f0f0f0f 2074 and r1,r1,r8,lsl#4 @ &=0xf0f0f0f0 2075 orr r2,r2,r2,lsl#2 2076 orr r3,r3,r3,lsr#2 2077 orr r0,r0,r0,lsl#2 2078 orr r1,r1,r1,lsr#2 2079 and r2,r2,r7 @ &=0x33333333 2080 and r3,r3,r7,lsl#2 @ &=0xcccccccc 2081 and r0,r0,r7 @ &=0x33333333 2082 and r1,r1,r7,lsl#2 @ &=0xcccccccc 2083 orr r2,r2,r2,lsl#1 2084 orr r3,r3,r3,lsr#1 2085 orr r0,r0,r0,lsl#1 2086 orr r1,r1,r1,lsr#1 2087 and r2,r2,r6 @ &=0x55555555 2088 and r3,r3,r6,lsl#1 @ &=0xaaaaaaaa 2089 and r0,r0,r6 @ &=0x55555555 2090 and r1,r1,r6,lsl#1 @ &=0xaaaaaaaa 2091 2092 orr r2,r2,r3 2093 orr r0,r0,r1 2094 2095 cmp r5,#8 2096 blo .Lsqueeze_tail 2097 mov r1,r2,lsr#8 2098 strb r2,[r4],#1 2099 mov r3,r2,lsr#16 2100 strb r1,[r4],#1 2101 mov r2,r2,lsr#24 2102 strb r3,[r4],#1 2103 strb r2,[r4],#1 2104 2105 mov r1,r0,lsr#8 2106 strb r0,[r4],#1 2107 mov r3,r0,lsr#16 2108 strb r1,[r4],#1 2109 mov r0,r0,lsr#24 2110 strb r3,[r4],#1 2111 strb r0,[r4],#1 2112 subs r5,r5,#8 2113 beq .Lsqueeze_done 2114 2115 subs r12,r12,#8 @ bsz -= 8 2116 bhi .Loop_squeeze 2117 2118 mov r0,r14 @ original r10 2119 2120 bl KeccakF1600 2121 2122 ldmia sp,{r6-r10,r12} @ restore constants and variables 2123 mov r14,r10 2124 b .Loop_squeeze 2125 2126.align 4 2127.Lsqueeze_tail: 2128 strb r2,[r4],#1 2129 mov r2,r2,lsr#8 2130 subs r5,r5,#1 2131 beq .Lsqueeze_done 2132 strb r2,[r4],#1 2133 mov r2,r2,lsr#8 2134 subs r5,r5,#1 2135 beq .Lsqueeze_done 2136 strb r2,[r4],#1 2137 mov r2,r2,lsr#8 2138 subs r5,r5,#1 2139 beq .Lsqueeze_done 2140 strb r2,[r4],#1 2141 subs r5,r5,#1 2142 beq .Lsqueeze_done 2143 2144 strb r0,[r4],#1 2145 mov r0,r0,lsr#8 2146 subs r5,r5,#1 2147 beq .Lsqueeze_done 2148 strb r0,[r4],#1 2149 mov r0,r0,lsr#8 2150 subs r5,r5,#1 2151 beq .Lsqueeze_done 2152 strb r0,[r4] 2153 b .Lsqueeze_done 2154 2155.align 4 2156.Lsqueeze_done: 2157 add sp,sp,#24 2158 ldmia sp!,{r4-r10,pc} 2159.size SHA3_squeeze,.-SHA3_squeeze 2160#if __ARM_MAX_ARCH__>=7 2161.fpu neon 2162 2163.type iotas64, %object 2164.align 5 2165iotas64: 2166 .quad 0x0000000000000001 2167 .quad 0x0000000000008082 2168 .quad 0x800000000000808a 2169 .quad 0x8000000080008000 2170 .quad 0x000000000000808b 2171 .quad 0x0000000080000001 2172 .quad 0x8000000080008081 2173 .quad 0x8000000000008009 2174 .quad 0x000000000000008a 2175 .quad 0x0000000000000088 2176 .quad 0x0000000080008009 2177 .quad 0x000000008000000a 2178 .quad 0x000000008000808b 2179 .quad 0x800000000000008b 2180 .quad 0x8000000000008089 2181 .quad 0x8000000000008003 2182 .quad 0x8000000000008002 2183 .quad 0x8000000000000080 2184 .quad 0x000000000000800a 2185 .quad 0x800000008000000a 2186 .quad 0x8000000080008081 2187 .quad 0x8000000000008080 2188 .quad 0x0000000080000001 2189 .quad 0x8000000080008008 2190.size iotas64,.-iotas64 2191 2192.type KeccakF1600_neon, %function 2193.align 5 2194KeccakF1600_neon: 2195 add r1, r0, #16 2196 adr r2, iotas64 2197 mov r3, #24 @ loop counter 2198 b .Loop_neon 2199 2200.align 4 2201.Loop_neon: 2202 @ Theta 2203 vst1.64 {q4}, [r0,:64] @ offload A[0..1][4] 2204 veor q13, q0, q5 @ A[0..1][0]^A[2..3][0] 2205 vst1.64 {d18}, [r1,:64] @ offload A[2][4] 2206 veor q14, q1, q6 @ A[0..1][1]^A[2..3][1] 2207 veor q15, q2, q7 @ A[0..1][2]^A[2..3][2] 2208 veor d26, d26, d27 @ C[0]=A[0][0]^A[1][0]^A[2][0]^A[3][0] 2209 veor d27, d28, d29 @ C[1]=A[0][1]^A[1][1]^A[2][1]^A[3][1] 2210 veor q14, q3, q8 @ A[0..1][3]^A[2..3][3] 2211 veor q4, q4, q9 @ A[0..1][4]^A[2..3][4] 2212 veor d30, d30, d31 @ C[2]=A[0][2]^A[1][2]^A[2][2]^A[3][2] 2213 veor d31, d28, d29 @ C[3]=A[0][3]^A[1][3]^A[2][3]^A[3][3] 2214 veor d25, d8, d9 @ C[4]=A[0][4]^A[1][4]^A[2][4]^A[3][4] 2215 veor q13, q13, q10 @ C[0..1]^=A[4][0..1] 2216 veor q14, q15, q11 @ C[2..3]^=A[4][2..3] 2217 veor d25, d25, d24 @ C[4]^=A[4][4] 2218 2219 vadd.u64 q4, q13, q13 @ C[0..1]<<1 2220 vadd.u64 q15, q14, q14 @ C[2..3]<<1 2221 vadd.u64 d18, d25, d25 @ C[4]<<1 2222 vsri.u64 q4, q13, #63 @ ROL64(C[0..1],1) 2223 vsri.u64 q15, q14, #63 @ ROL64(C[2..3],1) 2224 vsri.u64 d18, d25, #63 @ ROL64(C[4],1) 2225 veor d25, d25, d9 @ D[0] = C[4] ^= ROL64(C[1],1) 2226 veor q13, q13, q15 @ D[1..2] = C[0..1] ^ ROL64(C[2..3],1) 2227 veor d28, d28, d18 @ D[3] = C[2] ^= ROL64(C[4],1) 2228 veor d29, d29, d8 @ D[4] = C[3] ^= ROL64(C[0],1) 2229 2230 veor d0, d0, d25 @ A[0][0] ^= C[4] 2231 veor d1, d1, d25 @ A[1][0] ^= C[4] 2232 veor d10, d10, d25 @ A[2][0] ^= C[4] 2233 veor d11, d11, d25 @ A[3][0] ^= C[4] 2234 veor d20, d20, d25 @ A[4][0] ^= C[4] 2235 2236 veor d2, d2, d26 @ A[0][1] ^= D[1] 2237 veor d3, d3, d26 @ A[1][1] ^= D[1] 2238 veor d12, d12, d26 @ A[2][1] ^= D[1] 2239 veor d13, d13, d26 @ A[3][1] ^= D[1] 2240 veor d21, d21, d26 @ A[4][1] ^= D[1] 2241 vmov d26, d27 2242 2243 veor d6, d6, d28 @ A[0][3] ^= C[2] 2244 veor d7, d7, d28 @ A[1][3] ^= C[2] 2245 veor d16, d16, d28 @ A[2][3] ^= C[2] 2246 veor d17, d17, d28 @ A[3][3] ^= C[2] 2247 veor d23, d23, d28 @ A[4][3] ^= C[2] 2248 vld1.64 {q4}, [r0,:64] @ restore A[0..1][4] 2249 vmov d28, d29 2250 2251 vld1.64 {d18}, [r1,:64] @ restore A[2][4] 2252 veor q2, q2, q13 @ A[0..1][2] ^= D[2] 2253 veor q7, q7, q13 @ A[2..3][2] ^= D[2] 2254 veor d22, d22, d27 @ A[4][2] ^= D[2] 2255 2256 veor q4, q4, q14 @ A[0..1][4] ^= C[3] 2257 veor q9, q9, q14 @ A[2..3][4] ^= C[3] 2258 veor d24, d24, d29 @ A[4][4] ^= C[3] 2259 2260 @ Rho + Pi 2261 vmov d26, d2 @ C[1] = A[0][1] 2262 vshl.u64 d2, d3, #44 2263 vmov d27, d4 @ C[2] = A[0][2] 2264 vshl.u64 d4, d14, #43 2265 vmov d28, d6 @ C[3] = A[0][3] 2266 vshl.u64 d6, d17, #21 2267 vmov d29, d8 @ C[4] = A[0][4] 2268 vshl.u64 d8, d24, #14 2269 vsri.u64 d2, d3, #64-44 @ A[0][1] = ROL64(A[1][1], rhotates[1][1]) 2270 vsri.u64 d4, d14, #64-43 @ A[0][2] = ROL64(A[2][2], rhotates[2][2]) 2271 vsri.u64 d6, d17, #64-21 @ A[0][3] = ROL64(A[3][3], rhotates[3][3]) 2272 vsri.u64 d8, d24, #64-14 @ A[0][4] = ROL64(A[4][4], rhotates[4][4]) 2273 2274 vshl.u64 d3, d9, #20 2275 vshl.u64 d14, d16, #25 2276 vshl.u64 d17, d15, #15 2277 vshl.u64 d24, d21, #2 2278 vsri.u64 d3, d9, #64-20 @ A[1][1] = ROL64(A[1][4], rhotates[1][4]) 2279 vsri.u64 d14, d16, #64-25 @ A[2][2] = ROL64(A[2][3], rhotates[2][3]) 2280 vsri.u64 d17, d15, #64-15 @ A[3][3] = ROL64(A[3][2], rhotates[3][2]) 2281 vsri.u64 d24, d21, #64-2 @ A[4][4] = ROL64(A[4][1], rhotates[4][1]) 2282 2283 vshl.u64 d9, d22, #61 2284 @ vshl.u64 d16, d19, #8 2285 vshl.u64 d15, d12, #10 2286 vshl.u64 d21, d7, #55 2287 vsri.u64 d9, d22, #64-61 @ A[1][4] = ROL64(A[4][2], rhotates[4][2]) 2288 vext.8 d16, d19, d19, #8-1 @ A[2][3] = ROL64(A[3][4], rhotates[3][4]) 2289 vsri.u64 d15, d12, #64-10 @ A[3][2] = ROL64(A[2][1], rhotates[2][1]) 2290 vsri.u64 d21, d7, #64-55 @ A[4][1] = ROL64(A[1][3], rhotates[1][3]) 2291 2292 vshl.u64 d22, d18, #39 2293 @ vshl.u64 d19, d23, #56 2294 vshl.u64 d12, d5, #6 2295 vshl.u64 d7, d13, #45 2296 vsri.u64 d22, d18, #64-39 @ A[4][2] = ROL64(A[2][4], rhotates[2][4]) 2297 vext.8 d19, d23, d23, #8-7 @ A[3][4] = ROL64(A[4][3], rhotates[4][3]) 2298 vsri.u64 d12, d5, #64-6 @ A[2][1] = ROL64(A[1][2], rhotates[1][2]) 2299 vsri.u64 d7, d13, #64-45 @ A[1][3] = ROL64(A[3][1], rhotates[3][1]) 2300 2301 vshl.u64 d18, d20, #18 2302 vshl.u64 d23, d11, #41 2303 vshl.u64 d5, d10, #3 2304 vshl.u64 d13, d1, #36 2305 vsri.u64 d18, d20, #64-18 @ A[2][4] = ROL64(A[4][0], rhotates[4][0]) 2306 vsri.u64 d23, d11, #64-41 @ A[4][3] = ROL64(A[3][0], rhotates[3][0]) 2307 vsri.u64 d5, d10, #64-3 @ A[1][2] = ROL64(A[2][0], rhotates[2][0]) 2308 vsri.u64 d13, d1, #64-36 @ A[3][1] = ROL64(A[1][0], rhotates[1][0]) 2309 2310 vshl.u64 d1, d28, #28 2311 vshl.u64 d10, d26, #1 2312 vshl.u64 d11, d29, #27 2313 vshl.u64 d20, d27, #62 2314 vsri.u64 d1, d28, #64-28 @ A[1][0] = ROL64(C[3], rhotates[0][3]) 2315 vsri.u64 d10, d26, #64-1 @ A[2][0] = ROL64(C[1], rhotates[0][1]) 2316 vsri.u64 d11, d29, #64-27 @ A[3][0] = ROL64(C[4], rhotates[0][4]) 2317 vsri.u64 d20, d27, #64-62 @ A[4][0] = ROL64(C[2], rhotates[0][2]) 2318 2319 @ Chi + Iota 2320 vbic q13, q2, q1 2321 vbic q14, q3, q2 2322 vbic q15, q4, q3 2323 veor q13, q13, q0 @ A[0..1][0] ^ (~A[0..1][1] & A[0..1][2]) 2324 veor q14, q14, q1 @ A[0..1][1] ^ (~A[0..1][2] & A[0..1][3]) 2325 veor q2, q2, q15 @ A[0..1][2] ^= (~A[0..1][3] & A[0..1][4]) 2326 vst1.64 {q13}, [r0,:64] @ offload A[0..1][0] 2327 vbic q13, q0, q4 2328 vbic q15, q1, q0 2329 vmov q1, q14 @ A[0..1][1] 2330 veor q3, q3, q13 @ A[0..1][3] ^= (~A[0..1][4] & A[0..1][0]) 2331 veor q4, q4, q15 @ A[0..1][4] ^= (~A[0..1][0] & A[0..1][1]) 2332 2333 vbic q13, q7, q6 2334 vmov q0, q5 @ A[2..3][0] 2335 vbic q14, q8, q7 2336 vmov q15, q6 @ A[2..3][1] 2337 veor q5, q5, q13 @ A[2..3][0] ^= (~A[2..3][1] & A[2..3][2]) 2338 vbic q13, q9, q8 2339 veor q6, q6, q14 @ A[2..3][1] ^= (~A[2..3][2] & A[2..3][3]) 2340 vbic q14, q0, q9 2341 veor q7, q7, q13 @ A[2..3][2] ^= (~A[2..3][3] & A[2..3][4]) 2342 vbic q13, q15, q0 2343 veor q8, q8, q14 @ A[2..3][3] ^= (~A[2..3][4] & A[2..3][0]) 2344 vmov q14, q10 @ A[4][0..1] 2345 veor q9, q9, q13 @ A[2..3][4] ^= (~A[2..3][0] & A[2..3][1]) 2346 2347 vld1.64 d25, [r2,:64]! @ Iota[i++] 2348 vbic d26, d22, d21 2349 vbic d27, d23, d22 2350 vld1.64 {q0}, [r0,:64] @ restore A[0..1][0] 2351 veor d20, d20, d26 @ A[4][0] ^= (~A[4][1] & A[4][2]) 2352 vbic d26, d24, d23 2353 veor d21, d21, d27 @ A[4][1] ^= (~A[4][2] & A[4][3]) 2354 vbic d27, d28, d24 2355 veor d22, d22, d26 @ A[4][2] ^= (~A[4][3] & A[4][4]) 2356 vbic d26, d29, d28 2357 veor d23, d23, d27 @ A[4][3] ^= (~A[4][4] & A[4][0]) 2358 veor d0, d0, d25 @ A[0][0] ^= Iota[i] 2359 veor d24, d24, d26 @ A[4][4] ^= (~A[4][0] & A[4][1]) 2360 2361 subs r3, r3, #1 2362 bne .Loop_neon 2363 2364 .word 0xe12fff1e 2365.size KeccakF1600_neon,.-KeccakF1600_neon 2366 2367.global SHA3_absorb_neon 2368.type SHA3_absorb_neon, %function 2369.align 5 2370SHA3_absorb_neon: 2371 stmdb sp!, {r4-r6,lr} 2372 vstmdb sp!, {d8-d15} 2373 2374 mov r4, r1 @ inp 2375 mov r5, r2 @ len 2376 mov r6, r3 @ bsz 2377 2378 vld1.32 {d0}, [r0,:64]! @ A[0][0] 2379 vld1.32 {d2}, [r0,:64]! @ A[0][1] 2380 vld1.32 {d4}, [r0,:64]! @ A[0][2] 2381 vld1.32 {d6}, [r0,:64]! @ A[0][3] 2382 vld1.32 {d8}, [r0,:64]! @ A[0][4] 2383 2384 vld1.32 {d1}, [r0,:64]! @ A[1][0] 2385 vld1.32 {d3}, [r0,:64]! @ A[1][1] 2386 vld1.32 {d5}, [r0,:64]! @ A[1][2] 2387 vld1.32 {d7}, [r0,:64]! @ A[1][3] 2388 vld1.32 {d9}, [r0,:64]! @ A[1][4] 2389 2390 vld1.32 {d10}, [r0,:64]! @ A[2][0] 2391 vld1.32 {d12}, [r0,:64]! @ A[2][1] 2392 vld1.32 {d14}, [r0,:64]! @ A[2][2] 2393 vld1.32 {d16}, [r0,:64]! @ A[2][3] 2394 vld1.32 {d18}, [r0,:64]! @ A[2][4] 2395 2396 vld1.32 {d11}, [r0,:64]! @ A[3][0] 2397 vld1.32 {d13}, [r0,:64]! @ A[3][1] 2398 vld1.32 {d15}, [r0,:64]! @ A[3][2] 2399 vld1.32 {d17}, [r0,:64]! @ A[3][3] 2400 vld1.32 {d19}, [r0,:64]! @ A[3][4] 2401 2402 vld1.32 {d20-d23}, [r0,:64]! @ A[4][0..3] 2403 vld1.32 {d24}, [r0,:64] @ A[4][4] 2404 sub r0, r0, #24*8 @ rewind 2405 b .Loop_absorb_neon 2406 2407.align 4 2408.Loop_absorb_neon: 2409 subs r12, r5, r6 @ len - bsz 2410 blo .Labsorbed_neon 2411 mov r5, r12 2412 2413 vld1.8 {d31}, [r4]! @ endian-neutral loads... 2414 cmp r6, #8*2 2415 veor d0, d0, d31 @ A[0][0] ^= *inp++ 2416 blo .Lprocess_neon 2417 vld1.8 {d31}, [r4]! 2418 veor d2, d2, d31 @ A[0][1] ^= *inp++ 2419 beq .Lprocess_neon 2420 vld1.8 {d31}, [r4]! 2421 cmp r6, #8*4 2422 veor d4, d4, d31 @ A[0][2] ^= *inp++ 2423 blo .Lprocess_neon 2424 vld1.8 {d31}, [r4]! 2425 veor d6, d6, d31 @ A[0][3] ^= *inp++ 2426 beq .Lprocess_neon 2427 vld1.8 {d31},[r4]! 2428 cmp r6, #8*6 2429 veor d8, d8, d31 @ A[0][4] ^= *inp++ 2430 blo .Lprocess_neon 2431 2432 vld1.8 {d31}, [r4]! 2433 veor d1, d1, d31 @ A[1][0] ^= *inp++ 2434 beq .Lprocess_neon 2435 vld1.8 {d31}, [r4]! 2436 cmp r6, #8*8 2437 veor d3, d3, d31 @ A[1][1] ^= *inp++ 2438 blo .Lprocess_neon 2439 vld1.8 {d31}, [r4]! 2440 veor d5, d5, d31 @ A[1][2] ^= *inp++ 2441 beq .Lprocess_neon 2442 vld1.8 {d31}, [r4]! 2443 cmp r6, #8*10 2444 veor d7, d7, d31 @ A[1][3] ^= *inp++ 2445 blo .Lprocess_neon 2446 vld1.8 {d31}, [r4]! 2447 veor d9, d9, d31 @ A[1][4] ^= *inp++ 2448 beq .Lprocess_neon 2449 2450 vld1.8 {d31}, [r4]! 2451 cmp r6, #8*12 2452 veor d10, d10, d31 @ A[2][0] ^= *inp++ 2453 blo .Lprocess_neon 2454 vld1.8 {d31}, [r4]! 2455 veor d12, d12, d31 @ A[2][1] ^= *inp++ 2456 beq .Lprocess_neon 2457 vld1.8 {d31}, [r4]! 2458 cmp r6, #8*14 2459 veor d14, d14, d31 @ A[2][2] ^= *inp++ 2460 blo .Lprocess_neon 2461 vld1.8 {d31}, [r4]! 2462 veor d16, d16, d31 @ A[2][3] ^= *inp++ 2463 beq .Lprocess_neon 2464 vld1.8 {d31}, [r4]! 2465 cmp r6, #8*16 2466 veor d18, d18, d31 @ A[2][4] ^= *inp++ 2467 blo .Lprocess_neon 2468 2469 vld1.8 {d31}, [r4]! 2470 veor d11, d11, d31 @ A[3][0] ^= *inp++ 2471 beq .Lprocess_neon 2472 vld1.8 {d31}, [r4]! 2473 cmp r6, #8*18 2474 veor d13, d13, d31 @ A[3][1] ^= *inp++ 2475 blo .Lprocess_neon 2476 vld1.8 {d31}, [r4]! 2477 veor d15, d15, d31 @ A[3][2] ^= *inp++ 2478 beq .Lprocess_neon 2479 vld1.8 {d31}, [r4]! 2480 cmp r6, #8*20 2481 veor d17, d17, d31 @ A[3][3] ^= *inp++ 2482 blo .Lprocess_neon 2483 vld1.8 {d31}, [r4]! 2484 veor d19, d19, d31 @ A[3][4] ^= *inp++ 2485 beq .Lprocess_neon 2486 2487 vld1.8 {d31}, [r4]! 2488 cmp r6, #8*22 2489 veor d20, d20, d31 @ A[4][0] ^= *inp++ 2490 blo .Lprocess_neon 2491 vld1.8 {d31}, [r4]! 2492 veor d21, d21, d31 @ A[4][1] ^= *inp++ 2493 beq .Lprocess_neon 2494 vld1.8 {d31}, [r4]! 2495 cmp r6, #8*24 2496 veor d22, d22, d31 @ A[4][2] ^= *inp++ 2497 blo .Lprocess_neon 2498 vld1.8 {d31}, [r4]! 2499 veor d23, d23, d31 @ A[4][3] ^= *inp++ 2500 beq .Lprocess_neon 2501 vld1.8 {d31}, [r4]! 2502 veor d24, d24, d31 @ A[4][4] ^= *inp++ 2503 2504.Lprocess_neon: 2505 bl KeccakF1600_neon 2506 b .Loop_absorb_neon 2507 2508.align 4 2509.Labsorbed_neon: 2510 vst1.32 {d0}, [r0,:64]! @ A[0][0..4] 2511 vst1.32 {d2}, [r0,:64]! 2512 vst1.32 {d4}, [r0,:64]! 2513 vst1.32 {d6}, [r0,:64]! 2514 vst1.32 {d8}, [r0,:64]! 2515 2516 vst1.32 {d1}, [r0,:64]! @ A[1][0..4] 2517 vst1.32 {d3}, [r0,:64]! 2518 vst1.32 {d5}, [r0,:64]! 2519 vst1.32 {d7}, [r0,:64]! 2520 vst1.32 {d9}, [r0,:64]! 2521 2522 vst1.32 {d10}, [r0,:64]! @ A[2][0..4] 2523 vst1.32 {d12}, [r0,:64]! 2524 vst1.32 {d14}, [r0,:64]! 2525 vst1.32 {d16}, [r0,:64]! 2526 vst1.32 {d18}, [r0,:64]! 2527 2528 vst1.32 {d11}, [r0,:64]! @ A[3][0..4] 2529 vst1.32 {d13}, [r0,:64]! 2530 vst1.32 {d15}, [r0,:64]! 2531 vst1.32 {d17}, [r0,:64]! 2532 vst1.32 {d19}, [r0,:64]! 2533 2534 vst1.32 {d20-d23}, [r0,:64]! @ A[4][0..4] 2535 vst1.32 {d24}, [r0,:64] 2536 2537 mov r0, r5 @ return value 2538 vldmia sp!, {d8-d15} 2539 ldmia sp!, {r4-r6,pc} 2540.size SHA3_absorb_neon,.-SHA3_absorb_neon 2541 2542.global SHA3_squeeze_neon 2543.type SHA3_squeeze_neon, %function 2544.align 5 2545SHA3_squeeze_neon: 2546 stmdb sp!, {r4-r6,lr} 2547 2548 mov r4, r1 @ out 2549 mov r5, r2 @ len 2550 mov r6, r3 @ bsz 2551 mov r12, r0 @ A_flat 2552 mov r14, r3 @ bsz 2553 b .Loop_squeeze_neon 2554 2555.align 4 2556.Loop_squeeze_neon: 2557 cmp r5, #8 2558 blo .Lsqueeze_neon_tail 2559 vld1.32 {d0}, [r12]! 2560 vst1.8 {d0}, [r4]! @ endian-neutral store 2561 2562 subs r5, r5, #8 @ len -= 8 2563 beq .Lsqueeze_neon_done 2564 2565 subs r14, r14, #8 @ bsz -= 8 2566 bhi .Loop_squeeze_neon 2567 2568 vstmdb sp!, {d8-d15} 2569 2570 vld1.32 {d0}, [r0,:64]! @ A[0][0..4] 2571 vld1.32 {d2}, [r0,:64]! 2572 vld1.32 {d4}, [r0,:64]! 2573 vld1.32 {d6}, [r0,:64]! 2574 vld1.32 {d8}, [r0,:64]! 2575 2576 vld1.32 {d1}, [r0,:64]! @ A[1][0..4] 2577 vld1.32 {d3}, [r0,:64]! 2578 vld1.32 {d5}, [r0,:64]! 2579 vld1.32 {d7}, [r0,:64]! 2580 vld1.32 {d9}, [r0,:64]! 2581 2582 vld1.32 {d10}, [r0,:64]! @ A[2][0..4] 2583 vld1.32 {d12}, [r0,:64]! 2584 vld1.32 {d14}, [r0,:64]! 2585 vld1.32 {d16}, [r0,:64]! 2586 vld1.32 {d18}, [r0,:64]! 2587 2588 vld1.32 {d11}, [r0,:64]! @ A[3][0..4] 2589 vld1.32 {d13}, [r0,:64]! 2590 vld1.32 {d15}, [r0,:64]! 2591 vld1.32 {d17}, [r0,:64]! 2592 vld1.32 {d19}, [r0,:64]! 2593 2594 vld1.32 {d20-d23}, [r0,:64]! @ A[4][0..4] 2595 vld1.32 {d24}, [r0,:64] 2596 sub r0, r0, #24*8 @ rewind 2597 2598 bl KeccakF1600_neon 2599 2600 mov r12, r0 @ A_flat 2601 vst1.32 {d0}, [r0,:64]! @ A[0][0..4] 2602 vst1.32 {d2}, [r0,:64]! 2603 vst1.32 {d4}, [r0,:64]! 2604 vst1.32 {d6}, [r0,:64]! 2605 vst1.32 {d8}, [r0,:64]! 2606 2607 vst1.32 {d1}, [r0,:64]! @ A[1][0..4] 2608 vst1.32 {d3}, [r0,:64]! 2609 vst1.32 {d5}, [r0,:64]! 2610 vst1.32 {d7}, [r0,:64]! 2611 vst1.32 {d9}, [r0,:64]! 2612 2613 vst1.32 {d10}, [r0,:64]! @ A[2][0..4] 2614 vst1.32 {d12}, [r0,:64]! 2615 vst1.32 {d14}, [r0,:64]! 2616 vst1.32 {d16}, [r0,:64]! 2617 vst1.32 {d18}, [r0,:64]! 2618 2619 vst1.32 {d11}, [r0,:64]! @ A[3][0..4] 2620 vst1.32 {d13}, [r0,:64]! 2621 vst1.32 {d15}, [r0,:64]! 2622 vst1.32 {d17}, [r0,:64]! 2623 vst1.32 {d19}, [r0,:64]! 2624 2625 vst1.32 {d20-d23}, [r0,:64]! @ A[4][0..4] 2626 mov r14, r6 @ bsz 2627 vst1.32 {d24}, [r0,:64] 2628 mov r0, r12 @ rewind 2629 2630 vldmia sp!, {d8-d15} 2631 b .Loop_squeeze_neon 2632 2633.align 4 2634.Lsqueeze_neon_tail: 2635 ldmia r12, {r2,r3} 2636 cmp r5, #2 2637 strb r2, [r4],#1 @ endian-neutral store 2638 mov r2, r2, lsr#8 2639 blo .Lsqueeze_neon_done 2640 strb r2, [r4], #1 2641 mov r2, r2, lsr#8 2642 beq .Lsqueeze_neon_done 2643 strb r2, [r4], #1 2644 mov r2, r2, lsr#8 2645 cmp r5, #4 2646 blo .Lsqueeze_neon_done 2647 strb r2, [r4], #1 2648 beq .Lsqueeze_neon_done 2649 2650 strb r3, [r4], #1 2651 mov r3, r3, lsr#8 2652 cmp r5, #6 2653 blo .Lsqueeze_neon_done 2654 strb r3, [r4], #1 2655 mov r3, r3, lsr#8 2656 beq .Lsqueeze_neon_done 2657 strb r3, [r4], #1 2658 2659.Lsqueeze_neon_done: 2660 ldmia sp!, {r4-r6,pc} 2661.size SHA3_squeeze_neon,.-SHA3_squeeze_neon 2662#endif 2663.asciz "Keccak-1600 absorb and squeeze for ARMv4/NEON, CRYPTOGAMS by <appro@openssl.org>" 2664.align 2 2665