1#include "arm_arch.h" 2 3#if defined(__thumb2__) 4.syntax unified 5.thumb 6#else 7.code 32 8#endif 9 10.text 11 12.type iotas32, %object 13.align 5 14iotas32: 15.long 0x00000001, 0x00000000 16.long 0x00000000, 0x00000089 17.long 0x00000000, 0x8000008b 18.long 0x00000000, 0x80008080 19.long 0x00000001, 0x0000008b 20.long 0x00000001, 0x00008000 21.long 0x00000001, 0x80008088 22.long 0x00000001, 0x80000082 23.long 0x00000000, 0x0000000b 24.long 0x00000000, 0x0000000a 25.long 0x00000001, 0x00008082 26.long 0x00000000, 0x00008003 27.long 0x00000001, 0x0000808b 28.long 0x00000001, 0x8000000b 29.long 0x00000001, 0x8000008a 30.long 0x00000001, 0x80000081 31.long 0x00000000, 0x80000081 32.long 0x00000000, 0x80000008 33.long 0x00000000, 0x00000083 34.long 0x00000000, 0x80008003 35.long 0x00000001, 0x80008088 36.long 0x00000000, 0x80000088 37.long 0x00000001, 0x00008000 38.long 0x00000000, 0x80008082 39.size iotas32,.-iotas32 40 41.type KeccakF1600_int, %function 42.align 5 43KeccakF1600_int: 44 add r9,sp,#176 45 add r12,sp,#0 46 add r10,sp,#40 47 ldmia r9,{r4,r5,r6,r7,r8,r9} @ A[4][2..4] 48KeccakF1600_enter: 49 str lr,[sp,#440] 50 eor r11,r11,r11 51 str r11,[sp,#444] 52 b .Lround2x 53 54.align 4 55.Lround2x: 56 ldmia r12,{r0,r1,r2,r3} @ A[0][0..1] 57 ldmia r10,{r10,r11,r12,r14} @ A[1][0..1] 58#ifdef __thumb2__ 59 eor r0,r0,r10 60 eor r1,r1,r11 61 eor r2,r2,r12 62 ldrd r10,r11,[sp,#56] 63 eor r3,r3,r14 64 ldrd r12,r14,[sp,#64] 65 eor r4,r4,r10 66 eor r5,r5,r11 67 eor r6,r6,r12 68 ldrd r10,r11,[sp,#72] 69 eor r7,r7,r14 70 ldrd r12,r14,[sp,#80] 71 eor r8,r8,r10 72 eor r9,r9,r11 73 eor r0,r0,r12 74 ldrd r10,r11,[sp,#88] 75 eor r1,r1,r14 76 ldrd r12,r14,[sp,#96] 77 eor r2,r2,r10 78 eor r3,r3,r11 79 eor r4,r4,r12 80 ldrd r10,r11,[sp,#104] 81 eor r5,r5,r14 82 ldrd r12,r14,[sp,#112] 83 eor r6,r6,r10 84 eor r7,r7,r11 85 eor r8,r8,r12 86 ldrd r10,r11,[sp,#120] 87 eor r9,r9,r14 88 ldrd r12,r14,[sp,#128] 89 eor r0,r0,r10 90 eor r1,r1,r11 91 eor r2,r2,r12 92 ldrd r10,r11,[sp,#136] 93 eor r3,r3,r14 94 ldrd r12,r14,[sp,#144] 95 eor r4,r4,r10 96 eor r5,r5,r11 97 eor r6,r6,r12 98 ldrd r10,r11,[sp,#152] 99 eor r7,r7,r14 100 ldrd r12,r14,[sp,#160] 101 eor r8,r8,r10 102 eor r9,r9,r11 103 eor r0,r0,r12 104 ldrd r10,r11,[sp,#168] 105 eor r1,r1,r14 106 ldrd r12,r14,[sp,#16] 107 eor r2,r2,r10 108 eor r3,r3,r11 109 eor r4,r4,r12 110 ldrd r10,r11,[sp,#24] 111 eor r5,r5,r14 112 ldrd r12,r14,[sp,#32] 113#else 114 eor r0,r0,r10 115 add r10,sp,#56 116 eor r1,r1,r11 117 eor r2,r2,r12 118 eor r3,r3,r14 119 ldmia r10,{r10,r11,r12,r14} @ A[1][2..3] 120 eor r4,r4,r10 121 add r10,sp,#72 122 eor r5,r5,r11 123 eor r6,r6,r12 124 eor r7,r7,r14 125 ldmia r10,{r10,r11,r12,r14} @ A[1][4]..A[2][0] 126 eor r8,r8,r10 127 add r10,sp,#88 128 eor r9,r9,r11 129 eor r0,r0,r12 130 eor r1,r1,r14 131 ldmia r10,{r10,r11,r12,r14} @ A[2][1..2] 132 eor r2,r2,r10 133 add r10,sp,#104 134 eor r3,r3,r11 135 eor r4,r4,r12 136 eor r5,r5,r14 137 ldmia r10,{r10,r11,r12,r14} @ A[2][3..4] 138 eor r6,r6,r10 139 add r10,sp,#120 140 eor r7,r7,r11 141 eor r8,r8,r12 142 eor r9,r9,r14 143 ldmia r10,{r10,r11,r12,r14} @ A[3][0..1] 144 eor r0,r0,r10 145 add r10,sp,#136 146 eor r1,r1,r11 147 eor r2,r2,r12 148 eor r3,r3,r14 149 ldmia r10,{r10,r11,r12,r14} @ A[3][2..3] 150 eor r4,r4,r10 151 add r10,sp,#152 152 eor r5,r5,r11 153 eor r6,r6,r12 154 eor r7,r7,r14 155 ldmia r10,{r10,r11,r12,r14} @ A[3][4]..A[4][0] 156 eor r8,r8,r10 157 ldr r10,[sp,#168] @ A[4][1] 158 eor r9,r9,r11 159 ldr r11,[sp,#168+4] 160 eor r0,r0,r12 161 ldr r12,[sp,#16] @ A[0][2] 162 eor r1,r1,r14 163 ldr r14,[sp,#16+4] 164 eor r2,r2,r10 165 add r10,sp,#24 166 eor r3,r3,r11 167 eor r4,r4,r12 168 eor r5,r5,r14 169 ldmia r10,{r10,r11,r12,r14} @ A[0][3..4] 170#endif 171 eor r6,r6,r10 172 eor r7,r7,r11 173 eor r8,r8,r12 174 eor r9,r9,r14 175 176 eor r10,r0,r5,ror#32-1 @ E[0] = ROL64(C[2], 1) ^ C[0]; 177#ifndef __thumb2__ 178 str r10,[sp,#208] @ D[1] = E[0] 179#endif 180 eor r11,r1,r4 181#ifndef __thumb2__ 182 str r11,[sp,#208+4] 183#else 184 strd r10,r11,[sp,#208] @ D[1] = E[0] 185#endif 186 eor r12,r6,r1,ror#32-1 @ E[1] = ROL64(C[0], 1) ^ C[3]; 187 eor r14,r7,r0 188#ifndef __thumb2__ 189 str r12,[sp,#232] @ D[4] = E[1] 190#endif 191 eor r0,r8,r3,ror#32-1 @ C[0] = ROL64(C[1], 1) ^ C[4]; 192#ifndef __thumb2__ 193 str r14,[sp,#232+4] 194#else 195 strd r12,r14,[sp,#232] @ D[4] = E[1] 196#endif 197 eor r1,r9,r2 198#ifndef __thumb2__ 199 str r0,[sp,#200] @ D[0] = C[0] 200#endif 201 eor r2,r2,r7,ror#32-1 @ C[1] = ROL64(C[3], 1) ^ C[1]; 202#ifndef __thumb2__ 203 ldr r7,[sp,#144] 204#endif 205 eor r3,r3,r6 206#ifndef __thumb2__ 207 str r1,[sp,#200+4] 208#else 209 strd r0,r1,[sp,#200] @ D[0] = C[0] 210#endif 211#ifndef __thumb2__ 212 ldr r6,[sp,#144+4] 213#else 214 ldrd r7,r6,[sp,#144] 215#endif 216#ifndef __thumb2__ 217 str r2,[sp,#216] @ D[2] = C[1] 218#endif 219 eor r4,r4,r9,ror#32-1 @ C[2] = ROL64(C[4], 1) ^ C[2]; 220#ifndef __thumb2__ 221 str r3,[sp,#216+4] 222#else 223 strd r2,r3,[sp,#216] @ D[2] = C[1] 224#endif 225 eor r5,r5,r8 226 227#ifndef __thumb2__ 228 ldr r8,[sp,#192] 229#endif 230#ifndef __thumb2__ 231 ldr r9,[sp,#192+4] 232#else 233 ldrd r8,r9,[sp,#192] 234#endif 235#ifndef __thumb2__ 236 str r4,[sp,#224] @ D[3] = C[2] 237#endif 238 eor r7,r7,r4 239#ifndef __thumb2__ 240 str r5,[sp,#224+4] 241#else 242 strd r4,r5,[sp,#224] @ D[3] = C[2] 243#endif 244 eor r6,r6,r5 245#ifndef __thumb2__ 246 ldr r4,[sp,#0] 247#endif 248 @ mov r7,r7,ror#32-10 @ C[3] = ROL64(A[3][3] ^ C[2], rhotates[3][3]); /* D[3] */ 249 @ mov r6,r6,ror#32-11 250#ifndef __thumb2__ 251 ldr r5,[sp,#0+4] 252#else 253 ldrd r4,r5,[sp,#0] 254#endif 255 eor r8,r8,r12 256 eor r9,r9,r14 257#ifndef __thumb2__ 258 ldr r12,[sp,#96] 259#endif 260 eor r0,r0,r4 261#ifndef __thumb2__ 262 ldr r14,[sp,#96+4] 263#else 264 ldrd r12,r14,[sp,#96] 265#endif 266 @ mov r8,r8,ror#32-7 @ C[4] = ROL64(A[4][4] ^ E[1], rhotates[4][4]); /* D[4] */ 267 @ mov r9,r9,ror#32-7 268 eor r1,r1,r5 @ C[0] = A[0][0] ^ C[0]; 269 eor r12,r12,r2 270#ifndef __thumb2__ 271 ldr r2,[sp,#48] 272#endif 273 eor r14,r14,r3 274#ifndef __thumb2__ 275 ldr r3,[sp,#48+4] 276#else 277 ldrd r2,r3,[sp,#48] 278#endif 279 mov r5,r12,ror#32-21 @ C[2] = ROL64(A[2][2] ^ C[1], rhotates[2][2]); 280 ldr r12,[sp,#444] @ load counter 281 eor r2,r2,r10 282 adr r10,iotas32 283 mov r4,r14,ror#32-22 284 add r14,r10,r12 285 eor r3,r3,r11 286 ldmia r14,{r10,r11} @ iotas[i] 287 bic r12,r4,r2,ror#32-22 288 bic r14,r5,r3,ror#32-22 289 mov r2,r2,ror#32-22 @ C[1] = ROL64(A[1][1] ^ E[0], rhotates[1][1]); 290 mov r3,r3,ror#32-22 291 eor r12,r12,r0 292 eor r14,r14,r1 293 eor r10,r10,r12 294 eor r11,r11,r14 295#ifndef __thumb2__ 296 str r10,[sp,#240] @ R[0][0] = C[0] ^ (~C[1] & C[2]) ^ iotas[i]; 297#endif 298 bic r12,r6,r4,ror#11 299#ifndef __thumb2__ 300 str r11,[sp,#240+4] 301#else 302 strd r10,r11,[sp,#240] @ R[0][0] = C[0] ^ (~C[1] & C[2]) ^ iotas[i]; 303#endif 304 bic r14,r7,r5,ror#10 305 bic r10,r8,r6,ror#32-(11-7) 306 bic r11,r9,r7,ror#32-(10-7) 307 eor r12,r2,r12,ror#32-11 308#ifndef __thumb2__ 309 str r12,[sp,#248] @ R[0][1] = C[1] ^ (~C[2] & C[3]); 310#endif 311 eor r14,r3,r14,ror#32-10 312#ifndef __thumb2__ 313 str r14,[sp,#248+4] 314#else 315 strd r12,r14,[sp,#248] @ R[0][1] = C[1] ^ (~C[2] & C[3]); 316#endif 317 eor r10,r4,r10,ror#32-7 318 eor r11,r5,r11,ror#32-7 319#ifndef __thumb2__ 320 str r10,[sp,#256] @ R[0][2] = C[2] ^ (~C[3] & C[4]); 321#endif 322 bic r12,r0,r8,ror#32-7 323#ifndef __thumb2__ 324 str r11,[sp,#256+4] 325#else 326 strd r10,r11,[sp,#256] @ R[0][2] = C[2] ^ (~C[3] & C[4]); 327#endif 328 bic r14,r1,r9,ror#32-7 329 eor r12,r12,r6,ror#32-11 330#ifndef __thumb2__ 331 str r12,[sp,#264] @ R[0][3] = C[3] ^ (~C[4] & C[0]); 332#endif 333 eor r14,r14,r7,ror#32-10 334#ifndef __thumb2__ 335 str r14,[sp,#264+4] 336#else 337 strd r12,r14,[sp,#264] @ R[0][3] = C[3] ^ (~C[4] & C[0]); 338#endif 339 bic r10,r2,r0 340 add r14,sp,#224 341#ifndef __thumb2__ 342 ldr r0,[sp,#24] @ A[0][3] 343#endif 344 bic r11,r3,r1 345#ifndef __thumb2__ 346 ldr r1,[sp,#24+4] 347#else 348 ldrd r0,r1,[sp,#24] @ A[0][3] 349#endif 350 eor r10,r10,r8,ror#32-7 351 eor r11,r11,r9,ror#32-7 352#ifndef __thumb2__ 353 str r10,[sp,#272] @ R[0][4] = C[4] ^ (~C[0] & C[1]); 354#endif 355 add r9,sp,#200 356#ifndef __thumb2__ 357 str r11,[sp,#272+4] 358#else 359 strd r10,r11,[sp,#272] @ R[0][4] = C[4] ^ (~C[0] & C[1]); 360#endif 361 362 ldmia r14,{r10,r11,r12,r14} @ D[3..4] 363 ldmia r9,{r6,r7,r8,r9} @ D[0..1] 364 365#ifndef __thumb2__ 366 ldr r2,[sp,#72] @ A[1][4] 367#endif 368 eor r0,r0,r10 369#ifndef __thumb2__ 370 ldr r3,[sp,#72+4] 371#else 372 ldrd r2,r3,[sp,#72] @ A[1][4] 373#endif 374 eor r1,r1,r11 375 @ mov r0,r0,ror#32-14 @ C[0] = ROL64(A[0][3] ^ D[3], rhotates[0][3]); 376#ifndef __thumb2__ 377 ldr r10,[sp,#128] @ A[3][1] 378#endif 379 @ mov r1,r1,ror#32-14 380#ifndef __thumb2__ 381 ldr r11,[sp,#128+4] 382#else 383 ldrd r10,r11,[sp,#128] @ A[3][1] 384#endif 385 386 eor r2,r2,r12 387#ifndef __thumb2__ 388 ldr r4,[sp,#80] @ A[2][0] 389#endif 390 eor r3,r3,r14 391#ifndef __thumb2__ 392 ldr r5,[sp,#80+4] 393#else 394 ldrd r4,r5,[sp,#80] @ A[2][0] 395#endif 396 @ mov r2,r2,ror#32-10 @ C[1] = ROL64(A[1][4] ^ D[4], rhotates[1][4]); 397 @ mov r3,r3,ror#32-10 398 399 eor r6,r6,r4 400#ifndef __thumb2__ 401 ldr r12,[sp,#216] @ D[2] 402#endif 403 eor r7,r7,r5 404#ifndef __thumb2__ 405 ldr r14,[sp,#216+4] 406#else 407 ldrd r12,r14,[sp,#216] @ D[2] 408#endif 409 mov r5,r6,ror#32-1 @ C[2] = ROL64(A[2][0] ^ D[0], rhotates[2][0]); 410 mov r4,r7,ror#32-2 411 412 eor r10,r10,r8 413#ifndef __thumb2__ 414 ldr r8,[sp,#176] @ A[4][2] 415#endif 416 eor r11,r11,r9 417#ifndef __thumb2__ 418 ldr r9,[sp,#176+4] 419#else 420 ldrd r8,r9,[sp,#176] @ A[4][2] 421#endif 422 mov r7,r10,ror#32-22 @ C[3] = ROL64(A[3][1] ^ D[1], rhotates[3][1]); 423 mov r6,r11,ror#32-23 424 425 bic r10,r4,r2,ror#32-10 426 bic r11,r5,r3,ror#32-10 427 eor r12,r12,r8 428 eor r14,r14,r9 429 mov r9,r12,ror#32-30 @ C[4] = ROL64(A[4][2] ^ D[2], rhotates[4][2]); 430 mov r8,r14,ror#32-31 431 eor r10,r10,r0,ror#32-14 432 eor r11,r11,r1,ror#32-14 433#ifndef __thumb2__ 434 str r10,[sp,#280] @ R[1][0] = C[0] ^ (~C[1] & C[2]) 435#endif 436 bic r12,r6,r4 437#ifndef __thumb2__ 438 str r11,[sp,#280+4] 439#else 440 strd r10,r11,[sp,#280] @ R[1][0] = C[0] ^ (~C[1] & C[2]) 441#endif 442 bic r14,r7,r5 443 eor r12,r12,r2,ror#32-10 444#ifndef __thumb2__ 445 str r12,[sp,#288] @ R[1][1] = C[1] ^ (~C[2] & C[3]); 446#endif 447 eor r14,r14,r3,ror#32-10 448#ifndef __thumb2__ 449 str r14,[sp,#288+4] 450#else 451 strd r12,r14,[sp,#288] @ R[1][1] = C[1] ^ (~C[2] & C[3]); 452#endif 453 bic r10,r8,r6 454 bic r11,r9,r7 455 bic r12,r0,r8,ror#14 456 bic r14,r1,r9,ror#14 457 eor r10,r10,r4 458 eor r11,r11,r5 459#ifndef __thumb2__ 460 str r10,[sp,#296] @ R[1][2] = C[2] ^ (~C[3] & C[4]); 461#endif 462 bic r2,r2,r0,ror#32-(14-10) 463#ifndef __thumb2__ 464 str r11,[sp,#296+4] 465#else 466 strd r10,r11,[sp,#296] @ R[1][2] = C[2] ^ (~C[3] & C[4]); 467#endif 468 eor r12,r6,r12,ror#32-14 469 bic r11,r3,r1,ror#32-(14-10) 470#ifndef __thumb2__ 471 str r12,[sp,#304] @ R[1][3] = C[3] ^ (~C[4] & C[0]); 472#endif 473 eor r14,r7,r14,ror#32-14 474#ifndef __thumb2__ 475 str r14,[sp,#304+4] 476#else 477 strd r12,r14,[sp,#304] @ R[1][3] = C[3] ^ (~C[4] & C[0]); 478#endif 479 add r12,sp,#208 480#ifndef __thumb2__ 481 ldr r1,[sp,#8] @ A[0][1] 482#endif 483 eor r10,r8,r2,ror#32-10 484#ifndef __thumb2__ 485 ldr r0,[sp,#8+4] 486#else 487 ldrd r1,r0,[sp,#8] @ A[0][1] 488#endif 489 eor r11,r9,r11,ror#32-10 490#ifndef __thumb2__ 491 str r10,[sp,#312] @ R[1][4] = C[4] ^ (~C[0] & C[1]); 492#endif 493#ifndef __thumb2__ 494 str r11,[sp,#312+4] 495#else 496 strd r10,r11,[sp,#312] @ R[1][4] = C[4] ^ (~C[0] & C[1]); 497#endif 498 499 add r9,sp,#224 500 ldmia r12,{r10,r11,r12,r14} @ D[1..2] 501#ifndef __thumb2__ 502 ldr r2,[sp,#56] @ A[1][2] 503#endif 504#ifndef __thumb2__ 505 ldr r3,[sp,#56+4] 506#else 507 ldrd r2,r3,[sp,#56] @ A[1][2] 508#endif 509 ldmia r9,{r6,r7,r8,r9} @ D[3..4] 510 511 eor r1,r1,r10 512#ifndef __thumb2__ 513 ldr r4,[sp,#104] @ A[2][3] 514#endif 515 eor r0,r0,r11 516#ifndef __thumb2__ 517 ldr r5,[sp,#104+4] 518#else 519 ldrd r4,r5,[sp,#104] @ A[2][3] 520#endif 521 mov r0,r0,ror#32-1 @ C[0] = ROL64(A[0][1] ^ D[1], rhotates[0][1]); 522 523 eor r2,r2,r12 524#ifndef __thumb2__ 525 ldr r10,[sp,#152] @ A[3][4] 526#endif 527 eor r3,r3,r14 528#ifndef __thumb2__ 529 ldr r11,[sp,#152+4] 530#else 531 ldrd r10,r11,[sp,#152] @ A[3][4] 532#endif 533 @ mov r2,r2,ror#32-3 @ C[1] = ROL64(A[1][2] ^ D[2], rhotates[1][2]); 534#ifndef __thumb2__ 535 ldr r12,[sp,#200] @ D[0] 536#endif 537 @ mov r3,r3,ror#32-3 538#ifndef __thumb2__ 539 ldr r14,[sp,#200+4] 540#else 541 ldrd r12,r14,[sp,#200] @ D[0] 542#endif 543 544 eor r4,r4,r6 545 eor r5,r5,r7 546 @ mov r5,r6,ror#32-12 @ C[2] = ROL64(A[2][3] ^ D[3], rhotates[2][3]); 547 @ mov r4,r7,ror#32-13 @ [track reverse order below] 548 549 eor r10,r10,r8 550#ifndef __thumb2__ 551 ldr r8,[sp,#160] @ A[4][0] 552#endif 553 eor r11,r11,r9 554#ifndef __thumb2__ 555 ldr r9,[sp,#160+4] 556#else 557 ldrd r8,r9,[sp,#160] @ A[4][0] 558#endif 559 mov r6,r10,ror#32-4 @ C[3] = ROL64(A[3][4] ^ D[4], rhotates[3][4]); 560 mov r7,r11,ror#32-4 561 562 eor r12,r12,r8 563 eor r14,r14,r9 564 mov r8,r12,ror#32-9 @ C[4] = ROL64(A[4][0] ^ D[0], rhotates[4][0]); 565 mov r9,r14,ror#32-9 566 567 bic r10,r5,r2,ror#13-3 568 bic r11,r4,r3,ror#12-3 569 bic r12,r6,r5,ror#32-13 570 bic r14,r7,r4,ror#32-12 571 eor r10,r0,r10,ror#32-13 572 eor r11,r1,r11,ror#32-12 573#ifndef __thumb2__ 574 str r10,[sp,#320] @ R[2][0] = C[0] ^ (~C[1] & C[2]) 575#endif 576 eor r12,r12,r2,ror#32-3 577#ifndef __thumb2__ 578 str r11,[sp,#320+4] 579#else 580 strd r10,r11,[sp,#320] @ R[2][0] = C[0] ^ (~C[1] & C[2]) 581#endif 582 eor r14,r14,r3,ror#32-3 583#ifndef __thumb2__ 584 str r12,[sp,#328] @ R[2][1] = C[1] ^ (~C[2] & C[3]); 585#endif 586 bic r10,r8,r6 587 bic r11,r9,r7 588#ifndef __thumb2__ 589 str r14,[sp,#328+4] 590#else 591 strd r12,r14,[sp,#328] @ R[2][1] = C[1] ^ (~C[2] & C[3]); 592#endif 593 eor r10,r10,r5,ror#32-13 594 eor r11,r11,r4,ror#32-12 595#ifndef __thumb2__ 596 str r10,[sp,#336] @ R[2][2] = C[2] ^ (~C[3] & C[4]); 597#endif 598 bic r12,r0,r8 599#ifndef __thumb2__ 600 str r11,[sp,#336+4] 601#else 602 strd r10,r11,[sp,#336] @ R[2][2] = C[2] ^ (~C[3] & C[4]); 603#endif 604 bic r14,r1,r9 605 eor r12,r12,r6 606 eor r14,r14,r7 607#ifndef __thumb2__ 608 str r12,[sp,#344] @ R[2][3] = C[3] ^ (~C[4] & C[0]); 609#endif 610 bic r10,r2,r0,ror#3 611#ifndef __thumb2__ 612 str r14,[sp,#344+4] 613#else 614 strd r12,r14,[sp,#344] @ R[2][3] = C[3] ^ (~C[4] & C[0]); 615#endif 616 bic r11,r3,r1,ror#3 617#ifndef __thumb2__ 618 ldr r1,[sp,#32] @ A[0][4] [in reverse order] 619#endif 620 eor r10,r8,r10,ror#32-3 621#ifndef __thumb2__ 622 ldr r0,[sp,#32+4] 623#else 624 ldrd r1,r0,[sp,#32] @ A[0][4] [in reverse order] 625#endif 626 eor r11,r9,r11,ror#32-3 627#ifndef __thumb2__ 628 str r10,[sp,#352] @ R[2][4] = C[4] ^ (~C[0] & C[1]); 629#endif 630 add r9,sp,#208 631#ifndef __thumb2__ 632 str r11,[sp,#352+4] 633#else 634 strd r10,r11,[sp,#352] @ R[2][4] = C[4] ^ (~C[0] & C[1]); 635#endif 636 637#ifndef __thumb2__ 638 ldr r10,[sp,#232] @ D[4] 639#endif 640#ifndef __thumb2__ 641 ldr r11,[sp,#232+4] 642#else 643 ldrd r10,r11,[sp,#232] @ D[4] 644#endif 645#ifndef __thumb2__ 646 ldr r12,[sp,#200] @ D[0] 647#endif 648#ifndef __thumb2__ 649 ldr r14,[sp,#200+4] 650#else 651 ldrd r12,r14,[sp,#200] @ D[0] 652#endif 653 654 ldmia r9,{r6,r7,r8,r9} @ D[1..2] 655 656 eor r1,r1,r10 657#ifndef __thumb2__ 658 ldr r2,[sp,#40] @ A[1][0] 659#endif 660 eor r0,r0,r11 661#ifndef __thumb2__ 662 ldr r3,[sp,#40+4] 663#else 664 ldrd r2,r3,[sp,#40] @ A[1][0] 665#endif 666 @ mov r1,r10,ror#32-13 @ C[0] = ROL64(A[0][4] ^ D[4], rhotates[0][4]); 667#ifndef __thumb2__ 668 ldr r4,[sp,#88] @ A[2][1] 669#endif 670 @ mov r0,r11,ror#32-14 @ [was loaded in reverse order] 671#ifndef __thumb2__ 672 ldr r5,[sp,#88+4] 673#else 674 ldrd r4,r5,[sp,#88] @ A[2][1] 675#endif 676 677 eor r2,r2,r12 678#ifndef __thumb2__ 679 ldr r10,[sp,#136] @ A[3][2] 680#endif 681 eor r3,r3,r14 682#ifndef __thumb2__ 683 ldr r11,[sp,#136+4] 684#else 685 ldrd r10,r11,[sp,#136] @ A[3][2] 686#endif 687 @ mov r2,r2,ror#32-18 @ C[1] = ROL64(A[1][0] ^ D[0], rhotates[1][0]); 688#ifndef __thumb2__ 689 ldr r12,[sp,#224] @ D[3] 690#endif 691 @ mov r3,r3,ror#32-18 692#ifndef __thumb2__ 693 ldr r14,[sp,#224+4] 694#else 695 ldrd r12,r14,[sp,#224] @ D[3] 696#endif 697 698 eor r6,r6,r4 699 eor r7,r7,r5 700 mov r4,r6,ror#32-5 @ C[2] = ROL64(A[2][1] ^ D[1], rhotates[2][1]); 701 mov r5,r7,ror#32-5 702 703 eor r10,r10,r8 704#ifndef __thumb2__ 705 ldr r8,[sp,#184] @ A[4][3] 706#endif 707 eor r11,r11,r9 708#ifndef __thumb2__ 709 ldr r9,[sp,#184+4] 710#else 711 ldrd r8,r9,[sp,#184] @ A[4][3] 712#endif 713 mov r7,r10,ror#32-7 @ C[3] = ROL64(A[3][2] ^ D[2], rhotates[3][2]); 714 mov r6,r11,ror#32-8 715 716 eor r12,r12,r8 717 eor r14,r14,r9 718 mov r8,r12,ror#32-28 @ C[4] = ROL64(A[4][3] ^ D[3], rhotates[4][3]); 719 mov r9,r14,ror#32-28 720 721 bic r10,r4,r2,ror#32-18 722 bic r11,r5,r3,ror#32-18 723 eor r10,r10,r0,ror#32-14 724 eor r11,r11,r1,ror#32-13 725#ifndef __thumb2__ 726 str r10,[sp,#360] @ R[3][0] = C[0] ^ (~C[1] & C[2]) 727#endif 728 bic r12,r6,r4 729#ifndef __thumb2__ 730 str r11,[sp,#360+4] 731#else 732 strd r10,r11,[sp,#360] @ R[3][0] = C[0] ^ (~C[1] & C[2]) 733#endif 734 bic r14,r7,r5 735 eor r12,r12,r2,ror#32-18 736#ifndef __thumb2__ 737 str r12,[sp,#368] @ R[3][1] = C[1] ^ (~C[2] & C[3]); 738#endif 739 eor r14,r14,r3,ror#32-18 740#ifndef __thumb2__ 741 str r14,[sp,#368+4] 742#else 743 strd r12,r14,[sp,#368] @ R[3][1] = C[1] ^ (~C[2] & C[3]); 744#endif 745 bic r10,r8,r6 746 bic r11,r9,r7 747 bic r12,r0,r8,ror#14 748 bic r14,r1,r9,ror#13 749 eor r10,r10,r4 750 eor r11,r11,r5 751#ifndef __thumb2__ 752 str r10,[sp,#376] @ R[3][2] = C[2] ^ (~C[3] & C[4]); 753#endif 754 bic r2,r2,r0,ror#18-14 755#ifndef __thumb2__ 756 str r11,[sp,#376+4] 757#else 758 strd r10,r11,[sp,#376] @ R[3][2] = C[2] ^ (~C[3] & C[4]); 759#endif 760 eor r12,r6,r12,ror#32-14 761 bic r11,r3,r1,ror#18-13 762 eor r14,r7,r14,ror#32-13 763#ifndef __thumb2__ 764 str r12,[sp,#384] @ R[3][3] = C[3] ^ (~C[4] & C[0]); 765#endif 766#ifndef __thumb2__ 767 str r14,[sp,#384+4] 768#else 769 strd r12,r14,[sp,#384] @ R[3][3] = C[3] ^ (~C[4] & C[0]); 770#endif 771 add r14,sp,#216 772#ifndef __thumb2__ 773 ldr r0,[sp,#16] @ A[0][2] 774#endif 775 eor r10,r8,r2,ror#32-18 776#ifndef __thumb2__ 777 ldr r1,[sp,#16+4] 778#else 779 ldrd r0,r1,[sp,#16] @ A[0][2] 780#endif 781 eor r11,r9,r11,ror#32-18 782#ifndef __thumb2__ 783 str r10,[sp,#392] @ R[3][4] = C[4] ^ (~C[0] & C[1]); 784#endif 785#ifndef __thumb2__ 786 str r11,[sp,#392+4] 787#else 788 strd r10,r11,[sp,#392] @ R[3][4] = C[4] ^ (~C[0] & C[1]); 789#endif 790 791 ldmia r14,{r10,r11,r12,r14} @ D[2..3] 792#ifndef __thumb2__ 793 ldr r2,[sp,#64] @ A[1][3] 794#endif 795#ifndef __thumb2__ 796 ldr r3,[sp,#64+4] 797#else 798 ldrd r2,r3,[sp,#64] @ A[1][3] 799#endif 800#ifndef __thumb2__ 801 ldr r6,[sp,#232] @ D[4] 802#endif 803#ifndef __thumb2__ 804 ldr r7,[sp,#232+4] 805#else 806 ldrd r6,r7,[sp,#232] @ D[4] 807#endif 808 809 eor r0,r0,r10 810#ifndef __thumb2__ 811 ldr r4,[sp,#112] @ A[2][4] 812#endif 813 eor r1,r1,r11 814#ifndef __thumb2__ 815 ldr r5,[sp,#112+4] 816#else 817 ldrd r4,r5,[sp,#112] @ A[2][4] 818#endif 819 @ mov r0,r0,ror#32-31 @ C[0] = ROL64(A[0][2] ^ D[2], rhotates[0][2]); 820#ifndef __thumb2__ 821 ldr r8,[sp,#200] @ D[0] 822#endif 823 @ mov r1,r1,ror#32-31 824#ifndef __thumb2__ 825 ldr r9,[sp,#200+4] 826#else 827 ldrd r8,r9,[sp,#200] @ D[0] 828#endif 829 830 eor r12,r12,r2 831#ifndef __thumb2__ 832 ldr r10,[sp,#120] @ A[3][0] 833#endif 834 eor r14,r14,r3 835#ifndef __thumb2__ 836 ldr r11,[sp,#120+4] 837#else 838 ldrd r10,r11,[sp,#120] @ A[3][0] 839#endif 840 mov r3,r12,ror#32-27 @ C[1] = ROL64(A[1][3] ^ D[3], rhotates[1][3]); 841#ifndef __thumb2__ 842 ldr r12,[sp,#208] @ D[1] 843#endif 844 mov r2,r14,ror#32-28 845#ifndef __thumb2__ 846 ldr r14,[sp,#208+4] 847#else 848 ldrd r12,r14,[sp,#208] @ D[1] 849#endif 850 851 eor r6,r6,r4 852 eor r7,r7,r5 853 mov r5,r6,ror#32-19 @ C[2] = ROL64(A[2][4] ^ D[4], rhotates[2][4]); 854 mov r4,r7,ror#32-20 855 856 eor r10,r10,r8 857#ifndef __thumb2__ 858 ldr r8,[sp,#168] @ A[4][1] 859#endif 860 eor r11,r11,r9 861#ifndef __thumb2__ 862 ldr r9,[sp,#168+4] 863#else 864 ldrd r8,r9,[sp,#168] @ A[4][1] 865#endif 866 mov r7,r10,ror#32-20 @ C[3] = ROL64(A[3][0] ^ D[0], rhotates[3][0]); 867 mov r6,r11,ror#32-21 868 869 eor r8,r8,r12 870 eor r9,r9,r14 871 @ mov r8,r2,ror#32-1 @ C[4] = ROL64(A[4][1] ^ D[1], rhotates[4][1]); 872 @ mov r9,r3,ror#32-1 873 874 bic r10,r4,r2 875 bic r11,r5,r3 876 eor r10,r10,r0,ror#32-31 877#ifndef __thumb2__ 878 str r10,[sp,#400] @ R[4][0] = C[0] ^ (~C[1] & C[2]) 879#endif 880 eor r11,r11,r1,ror#32-31 881#ifndef __thumb2__ 882 str r11,[sp,#400+4] 883#else 884 strd r10,r11,[sp,#400] @ R[4][0] = C[0] ^ (~C[1] & C[2]) 885#endif 886 bic r12,r6,r4 887 bic r14,r7,r5 888 eor r12,r12,r2 889 eor r14,r14,r3 890#ifndef __thumb2__ 891 str r12,[sp,#408] @ R[4][1] = C[1] ^ (~C[2] & C[3]); 892#endif 893 bic r10,r8,r6,ror#1 894#ifndef __thumb2__ 895 str r14,[sp,#408+4] 896#else 897 strd r12,r14,[sp,#408] @ R[4][1] = C[1] ^ (~C[2] & C[3]); 898#endif 899 bic r11,r9,r7,ror#1 900 bic r12,r0,r8,ror#31-1 901 bic r14,r1,r9,ror#31-1 902 eor r4,r4,r10,ror#32-1 903#ifndef __thumb2__ 904 str r4,[sp,#416] @ R[4][2] = C[2] ^= (~C[3] & C[4]); 905#endif 906 eor r5,r5,r11,ror#32-1 907#ifndef __thumb2__ 908 str r5,[sp,#416+4] 909#else 910 strd r4,r5,[sp,#416] @ R[4][2] = C[2] ^= (~C[3] & C[4]); 911#endif 912 eor r6,r6,r12,ror#32-31 913 eor r7,r7,r14,ror#32-31 914#ifndef __thumb2__ 915 str r6,[sp,#424] @ R[4][3] = C[3] ^= (~C[4] & C[0]); 916#endif 917 bic r10,r2,r0,ror#32-31 918#ifndef __thumb2__ 919 str r7,[sp,#424+4] 920#else 921 strd r6,r7,[sp,#424] @ R[4][3] = C[3] ^= (~C[4] & C[0]); 922#endif 923 bic r11,r3,r1,ror#32-31 924 add r12,sp,#240 925 eor r8,r10,r8,ror#32-1 926 add r10,sp,#280 927 eor r9,r11,r9,ror#32-1 928#ifndef __thumb2__ 929 str r8,[sp,#432] @ R[4][4] = C[4] ^= (~C[0] & C[1]); 930#endif 931#ifndef __thumb2__ 932 str r9,[sp,#432+4] 933#else 934 strd r8,r9,[sp,#432] @ R[4][4] = C[4] ^= (~C[0] & C[1]); 935#endif 936 ldmia r12,{r0,r1,r2,r3} @ A[0][0..1] 937 ldmia r10,{r10,r11,r12,r14} @ A[1][0..1] 938#ifdef __thumb2__ 939 eor r0,r0,r10 940 eor r1,r1,r11 941 eor r2,r2,r12 942 ldrd r10,r11,[sp,#296] 943 eor r3,r3,r14 944 ldrd r12,r14,[sp,#304] 945 eor r4,r4,r10 946 eor r5,r5,r11 947 eor r6,r6,r12 948 ldrd r10,r11,[sp,#312] 949 eor r7,r7,r14 950 ldrd r12,r14,[sp,#320] 951 eor r8,r8,r10 952 eor r9,r9,r11 953 eor r0,r0,r12 954 ldrd r10,r11,[sp,#328] 955 eor r1,r1,r14 956 ldrd r12,r14,[sp,#336] 957 eor r2,r2,r10 958 eor r3,r3,r11 959 eor r4,r4,r12 960 ldrd r10,r11,[sp,#344] 961 eor r5,r5,r14 962 ldrd r12,r14,[sp,#352] 963 eor r6,r6,r10 964 eor r7,r7,r11 965 eor r8,r8,r12 966 ldrd r10,r11,[sp,#360] 967 eor r9,r9,r14 968 ldrd r12,r14,[sp,#368] 969 eor r0,r0,r10 970 eor r1,r1,r11 971 eor r2,r2,r12 972 ldrd r10,r11,[sp,#376] 973 eor r3,r3,r14 974 ldrd r12,r14,[sp,#384] 975 eor r4,r4,r10 976 eor r5,r5,r11 977 eor r6,r6,r12 978 ldrd r10,r11,[sp,#392] 979 eor r7,r7,r14 980 ldrd r12,r14,[sp,#400] 981 eor r8,r8,r10 982 eor r9,r9,r11 983 eor r0,r0,r12 984 ldrd r10,r11,[sp,#408] 985 eor r1,r1,r14 986 ldrd r12,r14,[sp,#256] 987 eor r2,r2,r10 988 eor r3,r3,r11 989 eor r4,r4,r12 990 ldrd r10,r11,[sp,#264] 991 eor r5,r5,r14 992 ldrd r12,r14,[sp,#272] 993#else 994 eor r0,r0,r10 995 add r10,sp,#296 996 eor r1,r1,r11 997 eor r2,r2,r12 998 eor r3,r3,r14 999 ldmia r10,{r10,r11,r12,r14} @ A[1][2..3] 1000 eor r4,r4,r10 1001 add r10,sp,#312 1002 eor r5,r5,r11 1003 eor r6,r6,r12 1004 eor r7,r7,r14 1005 ldmia r10,{r10,r11,r12,r14} @ A[1][4]..A[2][0] 1006 eor r8,r8,r10 1007 add r10,sp,#328 1008 eor r9,r9,r11 1009 eor r0,r0,r12 1010 eor r1,r1,r14 1011 ldmia r10,{r10,r11,r12,r14} @ A[2][1..2] 1012 eor r2,r2,r10 1013 add r10,sp,#344 1014 eor r3,r3,r11 1015 eor r4,r4,r12 1016 eor r5,r5,r14 1017 ldmia r10,{r10,r11,r12,r14} @ A[2][3..4] 1018 eor r6,r6,r10 1019 add r10,sp,#360 1020 eor r7,r7,r11 1021 eor r8,r8,r12 1022 eor r9,r9,r14 1023 ldmia r10,{r10,r11,r12,r14} @ A[3][0..1] 1024 eor r0,r0,r10 1025 add r10,sp,#376 1026 eor r1,r1,r11 1027 eor r2,r2,r12 1028 eor r3,r3,r14 1029 ldmia r10,{r10,r11,r12,r14} @ A[3][2..3] 1030 eor r4,r4,r10 1031 add r10,sp,#392 1032 eor r5,r5,r11 1033 eor r6,r6,r12 1034 eor r7,r7,r14 1035 ldmia r10,{r10,r11,r12,r14} @ A[3][4]..A[4][0] 1036 eor r8,r8,r10 1037 ldr r10,[sp,#408] @ A[4][1] 1038 eor r9,r9,r11 1039 ldr r11,[sp,#408+4] 1040 eor r0,r0,r12 1041 ldr r12,[sp,#256] @ A[0][2] 1042 eor r1,r1,r14 1043 ldr r14,[sp,#256+4] 1044 eor r2,r2,r10 1045 add r10,sp,#264 1046 eor r3,r3,r11 1047 eor r4,r4,r12 1048 eor r5,r5,r14 1049 ldmia r10,{r10,r11,r12,r14} @ A[0][3..4] 1050#endif 1051 eor r6,r6,r10 1052 eor r7,r7,r11 1053 eor r8,r8,r12 1054 eor r9,r9,r14 1055 1056 eor r10,r0,r5,ror#32-1 @ E[0] = ROL64(C[2], 1) ^ C[0]; 1057#ifndef __thumb2__ 1058 str r10,[sp,#208] @ D[1] = E[0] 1059#endif 1060 eor r11,r1,r4 1061#ifndef __thumb2__ 1062 str r11,[sp,#208+4] 1063#else 1064 strd r10,r11,[sp,#208] @ D[1] = E[0] 1065#endif 1066 eor r12,r6,r1,ror#32-1 @ E[1] = ROL64(C[0], 1) ^ C[3]; 1067 eor r14,r7,r0 1068#ifndef __thumb2__ 1069 str r12,[sp,#232] @ D[4] = E[1] 1070#endif 1071 eor r0,r8,r3,ror#32-1 @ C[0] = ROL64(C[1], 1) ^ C[4]; 1072#ifndef __thumb2__ 1073 str r14,[sp,#232+4] 1074#else 1075 strd r12,r14,[sp,#232] @ D[4] = E[1] 1076#endif 1077 eor r1,r9,r2 1078#ifndef __thumb2__ 1079 str r0,[sp,#200] @ D[0] = C[0] 1080#endif 1081 eor r2,r2,r7,ror#32-1 @ C[1] = ROL64(C[3], 1) ^ C[1]; 1082#ifndef __thumb2__ 1083 ldr r7,[sp,#384] 1084#endif 1085 eor r3,r3,r6 1086#ifndef __thumb2__ 1087 str r1,[sp,#200+4] 1088#else 1089 strd r0,r1,[sp,#200] @ D[0] = C[0] 1090#endif 1091#ifndef __thumb2__ 1092 ldr r6,[sp,#384+4] 1093#else 1094 ldrd r7,r6,[sp,#384] 1095#endif 1096#ifndef __thumb2__ 1097 str r2,[sp,#216] @ D[2] = C[1] 1098#endif 1099 eor r4,r4,r9,ror#32-1 @ C[2] = ROL64(C[4], 1) ^ C[2]; 1100#ifndef __thumb2__ 1101 str r3,[sp,#216+4] 1102#else 1103 strd r2,r3,[sp,#216] @ D[2] = C[1] 1104#endif 1105 eor r5,r5,r8 1106 1107#ifndef __thumb2__ 1108 ldr r8,[sp,#432] 1109#endif 1110#ifndef __thumb2__ 1111 ldr r9,[sp,#432+4] 1112#else 1113 ldrd r8,r9,[sp,#432] 1114#endif 1115#ifndef __thumb2__ 1116 str r4,[sp,#224] @ D[3] = C[2] 1117#endif 1118 eor r7,r7,r4 1119#ifndef __thumb2__ 1120 str r5,[sp,#224+4] 1121#else 1122 strd r4,r5,[sp,#224] @ D[3] = C[2] 1123#endif 1124 eor r6,r6,r5 1125#ifndef __thumb2__ 1126 ldr r4,[sp,#240] 1127#endif 1128 @ mov r7,r7,ror#32-10 @ C[3] = ROL64(A[3][3] ^ C[2], rhotates[3][3]); /* D[3] */ 1129 @ mov r6,r6,ror#32-11 1130#ifndef __thumb2__ 1131 ldr r5,[sp,#240+4] 1132#else 1133 ldrd r4,r5,[sp,#240] 1134#endif 1135 eor r8,r8,r12 1136 eor r9,r9,r14 1137#ifndef __thumb2__ 1138 ldr r12,[sp,#336] 1139#endif 1140 eor r0,r0,r4 1141#ifndef __thumb2__ 1142 ldr r14,[sp,#336+4] 1143#else 1144 ldrd r12,r14,[sp,#336] 1145#endif 1146 @ mov r8,r8,ror#32-7 @ C[4] = ROL64(A[4][4] ^ E[1], rhotates[4][4]); /* D[4] */ 1147 @ mov r9,r9,ror#32-7 1148 eor r1,r1,r5 @ C[0] = A[0][0] ^ C[0]; 1149 eor r12,r12,r2 1150#ifndef __thumb2__ 1151 ldr r2,[sp,#288] 1152#endif 1153 eor r14,r14,r3 1154#ifndef __thumb2__ 1155 ldr r3,[sp,#288+4] 1156#else 1157 ldrd r2,r3,[sp,#288] 1158#endif 1159 mov r5,r12,ror#32-21 @ C[2] = ROL64(A[2][2] ^ C[1], rhotates[2][2]); 1160 ldr r12,[sp,#444] @ load counter 1161 eor r2,r2,r10 1162 adr r10,iotas32 1163 mov r4,r14,ror#32-22 1164 add r14,r10,r12 1165 eor r3,r3,r11 1166#ifndef __thumb2__ 1167 ldr r10,[r14,#8] @ iotas[i].lo 1168#endif 1169 add r12,r12,#16 1170#ifndef __thumb2__ 1171 ldr r11,[r14,#12] @ iotas[i].hi 1172#else 1173 ldrd r10,r11,[r14,#8] @ iotas[i].lo 1174#endif 1175 cmp r12,#192 1176 str r12,[sp,#444] @ store counter 1177 bic r12,r4,r2,ror#32-22 1178 bic r14,r5,r3,ror#32-22 1179 mov r2,r2,ror#32-22 @ C[1] = ROL64(A[1][1] ^ E[0], rhotates[1][1]); 1180 mov r3,r3,ror#32-22 1181 eor r12,r12,r0 1182 eor r14,r14,r1 1183 eor r10,r10,r12 1184 eor r11,r11,r14 1185#ifndef __thumb2__ 1186 str r10,[sp,#0] @ R[0][0] = C[0] ^ (~C[1] & C[2]) ^ iotas[i]; 1187#endif 1188 bic r12,r6,r4,ror#11 1189#ifndef __thumb2__ 1190 str r11,[sp,#0+4] 1191#else 1192 strd r10,r11,[sp,#0] @ R[0][0] = C[0] ^ (~C[1] & C[2]) ^ iotas[i]; 1193#endif 1194 bic r14,r7,r5,ror#10 1195 bic r10,r8,r6,ror#32-(11-7) 1196 bic r11,r9,r7,ror#32-(10-7) 1197 eor r12,r2,r12,ror#32-11 1198#ifndef __thumb2__ 1199 str r12,[sp,#8] @ R[0][1] = C[1] ^ (~C[2] & C[3]); 1200#endif 1201 eor r14,r3,r14,ror#32-10 1202#ifndef __thumb2__ 1203 str r14,[sp,#8+4] 1204#else 1205 strd r12,r14,[sp,#8] @ R[0][1] = C[1] ^ (~C[2] & C[3]); 1206#endif 1207 eor r10,r4,r10,ror#32-7 1208 eor r11,r5,r11,ror#32-7 1209#ifndef __thumb2__ 1210 str r10,[sp,#16] @ R[0][2] = C[2] ^ (~C[3] & C[4]); 1211#endif 1212 bic r12,r0,r8,ror#32-7 1213#ifndef __thumb2__ 1214 str r11,[sp,#16+4] 1215#else 1216 strd r10,r11,[sp,#16] @ R[0][2] = C[2] ^ (~C[3] & C[4]); 1217#endif 1218 bic r14,r1,r9,ror#32-7 1219 eor r12,r12,r6,ror#32-11 1220#ifndef __thumb2__ 1221 str r12,[sp,#24] @ R[0][3] = C[3] ^ (~C[4] & C[0]); 1222#endif 1223 eor r14,r14,r7,ror#32-10 1224#ifndef __thumb2__ 1225 str r14,[sp,#24+4] 1226#else 1227 strd r12,r14,[sp,#24] @ R[0][3] = C[3] ^ (~C[4] & C[0]); 1228#endif 1229 bic r10,r2,r0 1230 add r14,sp,#224 1231#ifndef __thumb2__ 1232 ldr r0,[sp,#264] @ A[0][3] 1233#endif 1234 bic r11,r3,r1 1235#ifndef __thumb2__ 1236 ldr r1,[sp,#264+4] 1237#else 1238 ldrd r0,r1,[sp,#264] @ A[0][3] 1239#endif 1240 eor r10,r10,r8,ror#32-7 1241 eor r11,r11,r9,ror#32-7 1242#ifndef __thumb2__ 1243 str r10,[sp,#32] @ R[0][4] = C[4] ^ (~C[0] & C[1]); 1244#endif 1245 add r9,sp,#200 1246#ifndef __thumb2__ 1247 str r11,[sp,#32+4] 1248#else 1249 strd r10,r11,[sp,#32] @ R[0][4] = C[4] ^ (~C[0] & C[1]); 1250#endif 1251 1252 ldmia r14,{r10,r11,r12,r14} @ D[3..4] 1253 ldmia r9,{r6,r7,r8,r9} @ D[0..1] 1254 1255#ifndef __thumb2__ 1256 ldr r2,[sp,#312] @ A[1][4] 1257#endif 1258 eor r0,r0,r10 1259#ifndef __thumb2__ 1260 ldr r3,[sp,#312+4] 1261#else 1262 ldrd r2,r3,[sp,#312] @ A[1][4] 1263#endif 1264 eor r1,r1,r11 1265 @ mov r0,r0,ror#32-14 @ C[0] = ROL64(A[0][3] ^ D[3], rhotates[0][3]); 1266#ifndef __thumb2__ 1267 ldr r10,[sp,#368] @ A[3][1] 1268#endif 1269 @ mov r1,r1,ror#32-14 1270#ifndef __thumb2__ 1271 ldr r11,[sp,#368+4] 1272#else 1273 ldrd r10,r11,[sp,#368] @ A[3][1] 1274#endif 1275 1276 eor r2,r2,r12 1277#ifndef __thumb2__ 1278 ldr r4,[sp,#320] @ A[2][0] 1279#endif 1280 eor r3,r3,r14 1281#ifndef __thumb2__ 1282 ldr r5,[sp,#320+4] 1283#else 1284 ldrd r4,r5,[sp,#320] @ A[2][0] 1285#endif 1286 @ mov r2,r2,ror#32-10 @ C[1] = ROL64(A[1][4] ^ D[4], rhotates[1][4]); 1287 @ mov r3,r3,ror#32-10 1288 1289 eor r6,r6,r4 1290#ifndef __thumb2__ 1291 ldr r12,[sp,#216] @ D[2] 1292#endif 1293 eor r7,r7,r5 1294#ifndef __thumb2__ 1295 ldr r14,[sp,#216+4] 1296#else 1297 ldrd r12,r14,[sp,#216] @ D[2] 1298#endif 1299 mov r5,r6,ror#32-1 @ C[2] = ROL64(A[2][0] ^ D[0], rhotates[2][0]); 1300 mov r4,r7,ror#32-2 1301 1302 eor r10,r10,r8 1303#ifndef __thumb2__ 1304 ldr r8,[sp,#416] @ A[4][2] 1305#endif 1306 eor r11,r11,r9 1307#ifndef __thumb2__ 1308 ldr r9,[sp,#416+4] 1309#else 1310 ldrd r8,r9,[sp,#416] @ A[4][2] 1311#endif 1312 mov r7,r10,ror#32-22 @ C[3] = ROL64(A[3][1] ^ D[1], rhotates[3][1]); 1313 mov r6,r11,ror#32-23 1314 1315 bic r10,r4,r2,ror#32-10 1316 bic r11,r5,r3,ror#32-10 1317 eor r12,r12,r8 1318 eor r14,r14,r9 1319 mov r9,r12,ror#32-30 @ C[4] = ROL64(A[4][2] ^ D[2], rhotates[4][2]); 1320 mov r8,r14,ror#32-31 1321 eor r10,r10,r0,ror#32-14 1322 eor r11,r11,r1,ror#32-14 1323#ifndef __thumb2__ 1324 str r10,[sp,#40] @ R[1][0] = C[0] ^ (~C[1] & C[2]) 1325#endif 1326 bic r12,r6,r4 1327#ifndef __thumb2__ 1328 str r11,[sp,#40+4] 1329#else 1330 strd r10,r11,[sp,#40] @ R[1][0] = C[0] ^ (~C[1] & C[2]) 1331#endif 1332 bic r14,r7,r5 1333 eor r12,r12,r2,ror#32-10 1334#ifndef __thumb2__ 1335 str r12,[sp,#48] @ R[1][1] = C[1] ^ (~C[2] & C[3]); 1336#endif 1337 eor r14,r14,r3,ror#32-10 1338#ifndef __thumb2__ 1339 str r14,[sp,#48+4] 1340#else 1341 strd r12,r14,[sp,#48] @ R[1][1] = C[1] ^ (~C[2] & C[3]); 1342#endif 1343 bic r10,r8,r6 1344 bic r11,r9,r7 1345 bic r12,r0,r8,ror#14 1346 bic r14,r1,r9,ror#14 1347 eor r10,r10,r4 1348 eor r11,r11,r5 1349#ifndef __thumb2__ 1350 str r10,[sp,#56] @ R[1][2] = C[2] ^ (~C[3] & C[4]); 1351#endif 1352 bic r2,r2,r0,ror#32-(14-10) 1353#ifndef __thumb2__ 1354 str r11,[sp,#56+4] 1355#else 1356 strd r10,r11,[sp,#56] @ R[1][2] = C[2] ^ (~C[3] & C[4]); 1357#endif 1358 eor r12,r6,r12,ror#32-14 1359 bic r11,r3,r1,ror#32-(14-10) 1360#ifndef __thumb2__ 1361 str r12,[sp,#64] @ R[1][3] = C[3] ^ (~C[4] & C[0]); 1362#endif 1363 eor r14,r7,r14,ror#32-14 1364#ifndef __thumb2__ 1365 str r14,[sp,#64+4] 1366#else 1367 strd r12,r14,[sp,#64] @ R[1][3] = C[3] ^ (~C[4] & C[0]); 1368#endif 1369 add r12,sp,#208 1370#ifndef __thumb2__ 1371 ldr r1,[sp,#248] @ A[0][1] 1372#endif 1373 eor r10,r8,r2,ror#32-10 1374#ifndef __thumb2__ 1375 ldr r0,[sp,#248+4] 1376#else 1377 ldrd r1,r0,[sp,#248] @ A[0][1] 1378#endif 1379 eor r11,r9,r11,ror#32-10 1380#ifndef __thumb2__ 1381 str r10,[sp,#72] @ R[1][4] = C[4] ^ (~C[0] & C[1]); 1382#endif 1383#ifndef __thumb2__ 1384 str r11,[sp,#72+4] 1385#else 1386 strd r10,r11,[sp,#72] @ R[1][4] = C[4] ^ (~C[0] & C[1]); 1387#endif 1388 1389 add r9,sp,#224 1390 ldmia r12,{r10,r11,r12,r14} @ D[1..2] 1391#ifndef __thumb2__ 1392 ldr r2,[sp,#296] @ A[1][2] 1393#endif 1394#ifndef __thumb2__ 1395 ldr r3,[sp,#296+4] 1396#else 1397 ldrd r2,r3,[sp,#296] @ A[1][2] 1398#endif 1399 ldmia r9,{r6,r7,r8,r9} @ D[3..4] 1400 1401 eor r1,r1,r10 1402#ifndef __thumb2__ 1403 ldr r4,[sp,#344] @ A[2][3] 1404#endif 1405 eor r0,r0,r11 1406#ifndef __thumb2__ 1407 ldr r5,[sp,#344+4] 1408#else 1409 ldrd r4,r5,[sp,#344] @ A[2][3] 1410#endif 1411 mov r0,r0,ror#32-1 @ C[0] = ROL64(A[0][1] ^ D[1], rhotates[0][1]); 1412 1413 eor r2,r2,r12 1414#ifndef __thumb2__ 1415 ldr r10,[sp,#392] @ A[3][4] 1416#endif 1417 eor r3,r3,r14 1418#ifndef __thumb2__ 1419 ldr r11,[sp,#392+4] 1420#else 1421 ldrd r10,r11,[sp,#392] @ A[3][4] 1422#endif 1423 @ mov r2,r2,ror#32-3 @ C[1] = ROL64(A[1][2] ^ D[2], rhotates[1][2]); 1424#ifndef __thumb2__ 1425 ldr r12,[sp,#200] @ D[0] 1426#endif 1427 @ mov r3,r3,ror#32-3 1428#ifndef __thumb2__ 1429 ldr r14,[sp,#200+4] 1430#else 1431 ldrd r12,r14,[sp,#200] @ D[0] 1432#endif 1433 1434 eor r4,r4,r6 1435 eor r5,r5,r7 1436 @ mov r5,r6,ror#32-12 @ C[2] = ROL64(A[2][3] ^ D[3], rhotates[2][3]); 1437 @ mov r4,r7,ror#32-13 @ [track reverse order below] 1438 1439 eor r10,r10,r8 1440#ifndef __thumb2__ 1441 ldr r8,[sp,#400] @ A[4][0] 1442#endif 1443 eor r11,r11,r9 1444#ifndef __thumb2__ 1445 ldr r9,[sp,#400+4] 1446#else 1447 ldrd r8,r9,[sp,#400] @ A[4][0] 1448#endif 1449 mov r6,r10,ror#32-4 @ C[3] = ROL64(A[3][4] ^ D[4], rhotates[3][4]); 1450 mov r7,r11,ror#32-4 1451 1452 eor r12,r12,r8 1453 eor r14,r14,r9 1454 mov r8,r12,ror#32-9 @ C[4] = ROL64(A[4][0] ^ D[0], rhotates[4][0]); 1455 mov r9,r14,ror#32-9 1456 1457 bic r10,r5,r2,ror#13-3 1458 bic r11,r4,r3,ror#12-3 1459 bic r12,r6,r5,ror#32-13 1460 bic r14,r7,r4,ror#32-12 1461 eor r10,r0,r10,ror#32-13 1462 eor r11,r1,r11,ror#32-12 1463#ifndef __thumb2__ 1464 str r10,[sp,#80] @ R[2][0] = C[0] ^ (~C[1] & C[2]) 1465#endif 1466 eor r12,r12,r2,ror#32-3 1467#ifndef __thumb2__ 1468 str r11,[sp,#80+4] 1469#else 1470 strd r10,r11,[sp,#80] @ R[2][0] = C[0] ^ (~C[1] & C[2]) 1471#endif 1472 eor r14,r14,r3,ror#32-3 1473#ifndef __thumb2__ 1474 str r12,[sp,#88] @ R[2][1] = C[1] ^ (~C[2] & C[3]); 1475#endif 1476 bic r10,r8,r6 1477 bic r11,r9,r7 1478#ifndef __thumb2__ 1479 str r14,[sp,#88+4] 1480#else 1481 strd r12,r14,[sp,#88] @ R[2][1] = C[1] ^ (~C[2] & C[3]); 1482#endif 1483 eor r10,r10,r5,ror#32-13 1484 eor r11,r11,r4,ror#32-12 1485#ifndef __thumb2__ 1486 str r10,[sp,#96] @ R[2][2] = C[2] ^ (~C[3] & C[4]); 1487#endif 1488 bic r12,r0,r8 1489#ifndef __thumb2__ 1490 str r11,[sp,#96+4] 1491#else 1492 strd r10,r11,[sp,#96] @ R[2][2] = C[2] ^ (~C[3] & C[4]); 1493#endif 1494 bic r14,r1,r9 1495 eor r12,r12,r6 1496 eor r14,r14,r7 1497#ifndef __thumb2__ 1498 str r12,[sp,#104] @ R[2][3] = C[3] ^ (~C[4] & C[0]); 1499#endif 1500 bic r10,r2,r0,ror#3 1501#ifndef __thumb2__ 1502 str r14,[sp,#104+4] 1503#else 1504 strd r12,r14,[sp,#104] @ R[2][3] = C[3] ^ (~C[4] & C[0]); 1505#endif 1506 bic r11,r3,r1,ror#3 1507#ifndef __thumb2__ 1508 ldr r1,[sp,#272] @ A[0][4] [in reverse order] 1509#endif 1510 eor r10,r8,r10,ror#32-3 1511#ifndef __thumb2__ 1512 ldr r0,[sp,#272+4] 1513#else 1514 ldrd r1,r0,[sp,#272] @ A[0][4] [in reverse order] 1515#endif 1516 eor r11,r9,r11,ror#32-3 1517#ifndef __thumb2__ 1518 str r10,[sp,#112] @ R[2][4] = C[4] ^ (~C[0] & C[1]); 1519#endif 1520 add r9,sp,#208 1521#ifndef __thumb2__ 1522 str r11,[sp,#112+4] 1523#else 1524 strd r10,r11,[sp,#112] @ R[2][4] = C[4] ^ (~C[0] & C[1]); 1525#endif 1526 1527#ifndef __thumb2__ 1528 ldr r10,[sp,#232] @ D[4] 1529#endif 1530#ifndef __thumb2__ 1531 ldr r11,[sp,#232+4] 1532#else 1533 ldrd r10,r11,[sp,#232] @ D[4] 1534#endif 1535#ifndef __thumb2__ 1536 ldr r12,[sp,#200] @ D[0] 1537#endif 1538#ifndef __thumb2__ 1539 ldr r14,[sp,#200+4] 1540#else 1541 ldrd r12,r14,[sp,#200] @ D[0] 1542#endif 1543 1544 ldmia r9,{r6,r7,r8,r9} @ D[1..2] 1545 1546 eor r1,r1,r10 1547#ifndef __thumb2__ 1548 ldr r2,[sp,#280] @ A[1][0] 1549#endif 1550 eor r0,r0,r11 1551#ifndef __thumb2__ 1552 ldr r3,[sp,#280+4] 1553#else 1554 ldrd r2,r3,[sp,#280] @ A[1][0] 1555#endif 1556 @ mov r1,r10,ror#32-13 @ C[0] = ROL64(A[0][4] ^ D[4], rhotates[0][4]); 1557#ifndef __thumb2__ 1558 ldr r4,[sp,#328] @ A[2][1] 1559#endif 1560 @ mov r0,r11,ror#32-14 @ [was loaded in reverse order] 1561#ifndef __thumb2__ 1562 ldr r5,[sp,#328+4] 1563#else 1564 ldrd r4,r5,[sp,#328] @ A[2][1] 1565#endif 1566 1567 eor r2,r2,r12 1568#ifndef __thumb2__ 1569 ldr r10,[sp,#376] @ A[3][2] 1570#endif 1571 eor r3,r3,r14 1572#ifndef __thumb2__ 1573 ldr r11,[sp,#376+4] 1574#else 1575 ldrd r10,r11,[sp,#376] @ A[3][2] 1576#endif 1577 @ mov r2,r2,ror#32-18 @ C[1] = ROL64(A[1][0] ^ D[0], rhotates[1][0]); 1578#ifndef __thumb2__ 1579 ldr r12,[sp,#224] @ D[3] 1580#endif 1581 @ mov r3,r3,ror#32-18 1582#ifndef __thumb2__ 1583 ldr r14,[sp,#224+4] 1584#else 1585 ldrd r12,r14,[sp,#224] @ D[3] 1586#endif 1587 1588 eor r6,r6,r4 1589 eor r7,r7,r5 1590 mov r4,r6,ror#32-5 @ C[2] = ROL64(A[2][1] ^ D[1], rhotates[2][1]); 1591 mov r5,r7,ror#32-5 1592 1593 eor r10,r10,r8 1594#ifndef __thumb2__ 1595 ldr r8,[sp,#424] @ A[4][3] 1596#endif 1597 eor r11,r11,r9 1598#ifndef __thumb2__ 1599 ldr r9,[sp,#424+4] 1600#else 1601 ldrd r8,r9,[sp,#424] @ A[4][3] 1602#endif 1603 mov r7,r10,ror#32-7 @ C[3] = ROL64(A[3][2] ^ D[2], rhotates[3][2]); 1604 mov r6,r11,ror#32-8 1605 1606 eor r12,r12,r8 1607 eor r14,r14,r9 1608 mov r8,r12,ror#32-28 @ C[4] = ROL64(A[4][3] ^ D[3], rhotates[4][3]); 1609 mov r9,r14,ror#32-28 1610 1611 bic r10,r4,r2,ror#32-18 1612 bic r11,r5,r3,ror#32-18 1613 eor r10,r10,r0,ror#32-14 1614 eor r11,r11,r1,ror#32-13 1615#ifndef __thumb2__ 1616 str r10,[sp,#120] @ R[3][0] = C[0] ^ (~C[1] & C[2]) 1617#endif 1618 bic r12,r6,r4 1619#ifndef __thumb2__ 1620 str r11,[sp,#120+4] 1621#else 1622 strd r10,r11,[sp,#120] @ R[3][0] = C[0] ^ (~C[1] & C[2]) 1623#endif 1624 bic r14,r7,r5 1625 eor r12,r12,r2,ror#32-18 1626#ifndef __thumb2__ 1627 str r12,[sp,#128] @ R[3][1] = C[1] ^ (~C[2] & C[3]); 1628#endif 1629 eor r14,r14,r3,ror#32-18 1630#ifndef __thumb2__ 1631 str r14,[sp,#128+4] 1632#else 1633 strd r12,r14,[sp,#128] @ R[3][1] = C[1] ^ (~C[2] & C[3]); 1634#endif 1635 bic r10,r8,r6 1636 bic r11,r9,r7 1637 bic r12,r0,r8,ror#14 1638 bic r14,r1,r9,ror#13 1639 eor r10,r10,r4 1640 eor r11,r11,r5 1641#ifndef __thumb2__ 1642 str r10,[sp,#136] @ R[3][2] = C[2] ^ (~C[3] & C[4]); 1643#endif 1644 bic r2,r2,r0,ror#18-14 1645#ifndef __thumb2__ 1646 str r11,[sp,#136+4] 1647#else 1648 strd r10,r11,[sp,#136] @ R[3][2] = C[2] ^ (~C[3] & C[4]); 1649#endif 1650 eor r12,r6,r12,ror#32-14 1651 bic r11,r3,r1,ror#18-13 1652 eor r14,r7,r14,ror#32-13 1653#ifndef __thumb2__ 1654 str r12,[sp,#144] @ R[3][3] = C[3] ^ (~C[4] & C[0]); 1655#endif 1656#ifndef __thumb2__ 1657 str r14,[sp,#144+4] 1658#else 1659 strd r12,r14,[sp,#144] @ R[3][3] = C[3] ^ (~C[4] & C[0]); 1660#endif 1661 add r14,sp,#216 1662#ifndef __thumb2__ 1663 ldr r0,[sp,#256] @ A[0][2] 1664#endif 1665 eor r10,r8,r2,ror#32-18 1666#ifndef __thumb2__ 1667 ldr r1,[sp,#256+4] 1668#else 1669 ldrd r0,r1,[sp,#256] @ A[0][2] 1670#endif 1671 eor r11,r9,r11,ror#32-18 1672#ifndef __thumb2__ 1673 str r10,[sp,#152] @ R[3][4] = C[4] ^ (~C[0] & C[1]); 1674#endif 1675#ifndef __thumb2__ 1676 str r11,[sp,#152+4] 1677#else 1678 strd r10,r11,[sp,#152] @ R[3][4] = C[4] ^ (~C[0] & C[1]); 1679#endif 1680 1681 ldmia r14,{r10,r11,r12,r14} @ D[2..3] 1682#ifndef __thumb2__ 1683 ldr r2,[sp,#304] @ A[1][3] 1684#endif 1685#ifndef __thumb2__ 1686 ldr r3,[sp,#304+4] 1687#else 1688 ldrd r2,r3,[sp,#304] @ A[1][3] 1689#endif 1690#ifndef __thumb2__ 1691 ldr r6,[sp,#232] @ D[4] 1692#endif 1693#ifndef __thumb2__ 1694 ldr r7,[sp,#232+4] 1695#else 1696 ldrd r6,r7,[sp,#232] @ D[4] 1697#endif 1698 1699 eor r0,r0,r10 1700#ifndef __thumb2__ 1701 ldr r4,[sp,#352] @ A[2][4] 1702#endif 1703 eor r1,r1,r11 1704#ifndef __thumb2__ 1705 ldr r5,[sp,#352+4] 1706#else 1707 ldrd r4,r5,[sp,#352] @ A[2][4] 1708#endif 1709 @ mov r0,r0,ror#32-31 @ C[0] = ROL64(A[0][2] ^ D[2], rhotates[0][2]); 1710#ifndef __thumb2__ 1711 ldr r8,[sp,#200] @ D[0] 1712#endif 1713 @ mov r1,r1,ror#32-31 1714#ifndef __thumb2__ 1715 ldr r9,[sp,#200+4] 1716#else 1717 ldrd r8,r9,[sp,#200] @ D[0] 1718#endif 1719 1720 eor r12,r12,r2 1721#ifndef __thumb2__ 1722 ldr r10,[sp,#360] @ A[3][0] 1723#endif 1724 eor r14,r14,r3 1725#ifndef __thumb2__ 1726 ldr r11,[sp,#360+4] 1727#else 1728 ldrd r10,r11,[sp,#360] @ A[3][0] 1729#endif 1730 mov r3,r12,ror#32-27 @ C[1] = ROL64(A[1][3] ^ D[3], rhotates[1][3]); 1731#ifndef __thumb2__ 1732 ldr r12,[sp,#208] @ D[1] 1733#endif 1734 mov r2,r14,ror#32-28 1735#ifndef __thumb2__ 1736 ldr r14,[sp,#208+4] 1737#else 1738 ldrd r12,r14,[sp,#208] @ D[1] 1739#endif 1740 1741 eor r6,r6,r4 1742 eor r7,r7,r5 1743 mov r5,r6,ror#32-19 @ C[2] = ROL64(A[2][4] ^ D[4], rhotates[2][4]); 1744 mov r4,r7,ror#32-20 1745 1746 eor r10,r10,r8 1747#ifndef __thumb2__ 1748 ldr r8,[sp,#408] @ A[4][1] 1749#endif 1750 eor r11,r11,r9 1751#ifndef __thumb2__ 1752 ldr r9,[sp,#408+4] 1753#else 1754 ldrd r8,r9,[sp,#408] @ A[4][1] 1755#endif 1756 mov r7,r10,ror#32-20 @ C[3] = ROL64(A[3][0] ^ D[0], rhotates[3][0]); 1757 mov r6,r11,ror#32-21 1758 1759 eor r8,r8,r12 1760 eor r9,r9,r14 1761 @ mov r8,r2,ror#32-1 @ C[4] = ROL64(A[4][1] ^ D[1], rhotates[4][1]); 1762 @ mov r9,r3,ror#32-1 1763 1764 bic r10,r4,r2 1765 bic r11,r5,r3 1766 eor r10,r10,r0,ror#32-31 1767#ifndef __thumb2__ 1768 str r10,[sp,#160] @ R[4][0] = C[0] ^ (~C[1] & C[2]) 1769#endif 1770 eor r11,r11,r1,ror#32-31 1771#ifndef __thumb2__ 1772 str r11,[sp,#160+4] 1773#else 1774 strd r10,r11,[sp,#160] @ R[4][0] = C[0] ^ (~C[1] & C[2]) 1775#endif 1776 bic r12,r6,r4 1777 bic r14,r7,r5 1778 eor r12,r12,r2 1779 eor r14,r14,r3 1780#ifndef __thumb2__ 1781 str r12,[sp,#168] @ R[4][1] = C[1] ^ (~C[2] & C[3]); 1782#endif 1783 bic r10,r8,r6,ror#1 1784#ifndef __thumb2__ 1785 str r14,[sp,#168+4] 1786#else 1787 strd r12,r14,[sp,#168] @ R[4][1] = C[1] ^ (~C[2] & C[3]); 1788#endif 1789 bic r11,r9,r7,ror#1 1790 bic r12,r0,r8,ror#31-1 1791 bic r14,r1,r9,ror#31-1 1792 eor r4,r4,r10,ror#32-1 1793#ifndef __thumb2__ 1794 str r4,[sp,#176] @ R[4][2] = C[2] ^= (~C[3] & C[4]); 1795#endif 1796 eor r5,r5,r11,ror#32-1 1797#ifndef __thumb2__ 1798 str r5,[sp,#176+4] 1799#else 1800 strd r4,r5,[sp,#176] @ R[4][2] = C[2] ^= (~C[3] & C[4]); 1801#endif 1802 eor r6,r6,r12,ror#32-31 1803 eor r7,r7,r14,ror#32-31 1804#ifndef __thumb2__ 1805 str r6,[sp,#184] @ R[4][3] = C[3] ^= (~C[4] & C[0]); 1806#endif 1807 bic r10,r2,r0,ror#32-31 1808#ifndef __thumb2__ 1809 str r7,[sp,#184+4] 1810#else 1811 strd r6,r7,[sp,#184] @ R[4][3] = C[3] ^= (~C[4] & C[0]); 1812#endif 1813 bic r11,r3,r1,ror#32-31 1814 add r12,sp,#0 1815 eor r8,r10,r8,ror#32-1 1816 add r10,sp,#40 1817 eor r9,r11,r9,ror#32-1 1818#ifndef __thumb2__ 1819 str r8,[sp,#192] @ R[4][4] = C[4] ^= (~C[0] & C[1]); 1820#endif 1821#ifndef __thumb2__ 1822 str r9,[sp,#192+4] 1823#else 1824 strd r8,r9,[sp,#192] @ R[4][4] = C[4] ^= (~C[0] & C[1]); 1825#endif 1826 blo .Lround2x 1827 1828#if __ARM_ARCH__>=5 1829 ldr pc,[sp,#440] 1830#else 1831 ldr lr,[sp,#440] 1832 tst lr,#1 1833 moveq pc,lr @ be binary compatible with V4, yet 1834.word 0xe12fff1e @ interoperable with Thumb ISA:-) 1835#endif 1836.size KeccakF1600_int,.-KeccakF1600_int 1837 1838.type KeccakF1600, %function 1839.align 5 1840KeccakF1600: 1841 stmdb sp!,{r0,r4-r11,lr} 1842 sub sp,sp,#440+16 @ space for A[5][5],D[5],T[5][5],... 1843 1844 add r10,r0,#40 1845 add r11,sp,#40 1846 ldmia r0, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} @ copy A[5][5] to stack 1847 stmia sp, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1848 ldmia r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1849 stmia r11!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1850 ldmia r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1851 stmia r11!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1852 ldmia r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1853 stmia r11!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1854 ldmia r10, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1855 add r12,sp,#0 1856 add r10,sp,#40 1857 stmia r11, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1858 1859 bl KeccakF1600_enter 1860 1861 ldr r11, [sp,#440+16] @ restore pointer to A 1862 ldmia sp, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1863 stmia r11!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} @ return A[5][5] 1864 ldmia r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1865 stmia r11!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1866 ldmia r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1867 stmia r11!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1868 ldmia r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1869 stmia r11!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1870 ldmia r10, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1871 stmia r11, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1872 1873 add sp,sp,#440+20 1874#if __ARM_ARCH__>=5 1875 ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,pc} 1876#else 1877 ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,lr} 1878 tst lr,#1 1879 moveq pc,lr @ be binary compatible with V4, yet 1880.word 0xe12fff1e @ interoperable with Thumb ISA:-) 1881#endif 1882.size KeccakF1600,.-KeccakF1600 1883.globl SHA3_absorb 1884.type SHA3_absorb,%function 1885.align 5 1886SHA3_absorb: 1887 stmdb sp!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11,r12,lr} 1888 sub sp,sp,#456+16 1889 1890 add r10,r0,#40 1891 @ mov r11,r1 1892 mov r12,r2 1893 mov r14,r3 1894 cmp r2,r3 1895 blo .Labsorb_abort 1896 1897 add r11,sp,#0 1898 ldmia r0, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} @ copy A[5][5] to stack 1899 stmia r11!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1900 ldmia r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1901 stmia r11!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1902 ldmia r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1903 stmia r11!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1904 ldmia r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1905 stmia r11!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1906 ldmia r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1907 stmia r11, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 1908 1909 ldr r11,[sp,#476] @ restore r11 1910#ifdef __thumb2__ 1911 mov r9,#0x00ff00ff 1912 mov r8,#0x0f0f0f0f 1913 mov r7,#0x33333333 1914 mov r6,#0x55555555 1915#else 1916 mov r6,#0x11 @ compose constants 1917 mov r8,#0x0f 1918 mov r9,#0xff 1919 orr r6,r6,r6,lsl#8 1920 orr r8,r8,r8,lsl#8 1921 orr r6,r6,r6,lsl#16 @ 0x11111111 1922 orr r9,r9,r9,lsl#16 @ 0x00ff00ff 1923 orr r8,r8,r8,lsl#16 @ 0x0f0f0f0f 1924 orr r7,r6,r6,lsl#1 @ 0x33333333 1925 orr r6,r6,r6,lsl#2 @ 0x55555555 1926#endif 1927 str r9,[sp,#468] 1928 str r8,[sp,#464] 1929 str r7,[sp,#460] 1930 str r6,[sp,#456] 1931 b .Loop_absorb 1932 1933.align 4 1934.Loop_absorb: 1935 subs r0,r12,r14 1936 blo .Labsorbed 1937 add r10,sp,#0 1938 str r0,[sp,#480] @ save len - bsz 1939 1940.align 4 1941.Loop_block: 1942 ldrb r0,[r11],#1 1943 ldrb r1,[r11],#1 1944 ldrb r2,[r11],#1 1945 ldrb r3,[r11],#1 1946 ldrb r4,[r11],#1 1947 orr r0,r0,r1,lsl#8 1948 ldrb r1,[r11],#1 1949 orr r0,r0,r2,lsl#16 1950 ldrb r2,[r11],#1 1951 orr r0,r0,r3,lsl#24 @ lo 1952 ldrb r3,[r11],#1 1953 orr r1,r4,r1,lsl#8 1954 orr r1,r1,r2,lsl#16 1955 orr r1,r1,r3,lsl#24 @ hi 1956 1957 and r2,r0,r6 @ &=0x55555555 1958 and r0,r0,r6,lsl#1 @ &=0xaaaaaaaa 1959 and r3,r1,r6 @ &=0x55555555 1960 and r1,r1,r6,lsl#1 @ &=0xaaaaaaaa 1961 orr r2,r2,r2,lsr#1 1962 orr r0,r0,r0,lsl#1 1963 orr r3,r3,r3,lsr#1 1964 orr r1,r1,r1,lsl#1 1965 and r2,r2,r7 @ &=0x33333333 1966 and r0,r0,r7,lsl#2 @ &=0xcccccccc 1967 and r3,r3,r7 @ &=0x33333333 1968 and r1,r1,r7,lsl#2 @ &=0xcccccccc 1969 orr r2,r2,r2,lsr#2 1970 orr r0,r0,r0,lsl#2 1971 orr r3,r3,r3,lsr#2 1972 orr r1,r1,r1,lsl#2 1973 and r2,r2,r8 @ &=0x0f0f0f0f 1974 and r0,r0,r8,lsl#4 @ &=0xf0f0f0f0 1975 and r3,r3,r8 @ &=0x0f0f0f0f 1976 and r1,r1,r8,lsl#4 @ &=0xf0f0f0f0 1977 ldmia r10,{r4,r5} @ A_flat[i] 1978 orr r2,r2,r2,lsr#4 1979 orr r0,r0,r0,lsl#4 1980 orr r3,r3,r3,lsr#4 1981 orr r1,r1,r1,lsl#4 1982 and r2,r2,r9 @ &=0x00ff00ff 1983 and r0,r0,r9,lsl#8 @ &=0xff00ff00 1984 and r3,r3,r9 @ &=0x00ff00ff 1985 and r1,r1,r9,lsl#8 @ &=0xff00ff00 1986 orr r2,r2,r2,lsr#8 1987 orr r0,r0,r0,lsl#8 1988 orr r3,r3,r3,lsr#8 1989 orr r1,r1,r1,lsl#8 1990 1991 mov r2,r2,lsl#16 1992 mov r1,r1,lsr#16 1993 eor r4,r4,r3,lsl#16 1994 eor r5,r5,r0,lsr#16 1995 eor r4,r4,r2,lsr#16 1996 eor r5,r5,r1,lsl#16 1997 stmia r10!,{r4,r5} @ A_flat[i++] ^= BitInterleave(inp[0..7]) 1998 1999 subs r14,r14,#8 2000 bhi .Loop_block 2001 2002 str r11,[sp,#476] 2003 2004 bl KeccakF1600_int 2005 2006 add r14,sp,#456 2007 ldmia r14,{r6,r7,r8,r9,r10,r11,r12,r14} @ restore constants and variables 2008 b .Loop_absorb 2009 2010.align 4 2011.Labsorbed: 2012 add r11,sp,#40 2013 ldmia sp, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 2014 stmia r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} @ return A[5][5] 2015 ldmia r11!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 2016 stmia r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 2017 ldmia r11!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 2018 stmia r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 2019 ldmia r11!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 2020 stmia r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 2021 ldmia r11, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 2022 stmia r10, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} 2023 2024.Labsorb_abort: 2025 add sp,sp,#456+32 2026 mov r0,r12 @ return value 2027#if __ARM_ARCH__>=5 2028 ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,pc} 2029#else 2030 ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr} 2031 tst lr,#1 2032 moveq pc,lr @ be binary compatible with V4, yet 2033.word 0xe12fff1e @ interoperable with Thumb ISA:-) 2034#endif 2035.size SHA3_absorb,.-SHA3_absorb 2036.globl SHA3_squeeze 2037.type SHA3_squeeze,%function 2038.align 5 2039SHA3_squeeze: 2040 stmdb sp!,{r0,r3-r10,lr} 2041 2042 mov r10,r0 2043 mov r4,r1 2044 mov r5,r2 2045 mov r12,r3 2046 2047#ifdef __thumb2__ 2048 mov r9,#0x00ff00ff 2049 mov r8,#0x0f0f0f0f 2050 mov r7,#0x33333333 2051 mov r6,#0x55555555 2052#else 2053 mov r6,#0x11 @ compose constants 2054 mov r8,#0x0f 2055 mov r9,#0xff 2056 orr r6,r6,r6,lsl#8 2057 orr r8,r8,r8,lsl#8 2058 orr r6,r6,r6,lsl#16 @ 0x11111111 2059 orr r9,r9,r9,lsl#16 @ 0x00ff00ff 2060 orr r8,r8,r8,lsl#16 @ 0x0f0f0f0f 2061 orr r7,r6,r6,lsl#1 @ 0x33333333 2062 orr r6,r6,r6,lsl#2 @ 0x55555555 2063#endif 2064 stmdb sp!,{r6,r7,r8,r9} 2065 2066 mov r14,r10 2067 b .Loop_squeeze 2068 2069.align 4 2070.Loop_squeeze: 2071 ldmia r10!,{r0,r1} @ A_flat[i++] 2072 2073 mov r2,r0,lsl#16 2074 mov r3,r1,lsl#16 @ r3 = r1 << 16 2075 mov r2,r2,lsr#16 @ r2 = r0 & 0x0000ffff 2076 mov r1,r1,lsr#16 2077 mov r0,r0,lsr#16 @ r0 = r0 >> 16 2078 mov r1,r1,lsl#16 @ r1 = r1 & 0xffff0000 2079 2080 orr r2,r2,r2,lsl#8 2081 orr r3,r3,r3,lsr#8 2082 orr r0,r0,r0,lsl#8 2083 orr r1,r1,r1,lsr#8 2084 and r2,r2,r9 @ &=0x00ff00ff 2085 and r3,r3,r9,lsl#8 @ &=0xff00ff00 2086 and r0,r0,r9 @ &=0x00ff00ff 2087 and r1,r1,r9,lsl#8 @ &=0xff00ff00 2088 orr r2,r2,r2,lsl#4 2089 orr r3,r3,r3,lsr#4 2090 orr r0,r0,r0,lsl#4 2091 orr r1,r1,r1,lsr#4 2092 and r2,r2,r8 @ &=0x0f0f0f0f 2093 and r3,r3,r8,lsl#4 @ &=0xf0f0f0f0 2094 and r0,r0,r8 @ &=0x0f0f0f0f 2095 and r1,r1,r8,lsl#4 @ &=0xf0f0f0f0 2096 orr r2,r2,r2,lsl#2 2097 orr r3,r3,r3,lsr#2 2098 orr r0,r0,r0,lsl#2 2099 orr r1,r1,r1,lsr#2 2100 and r2,r2,r7 @ &=0x33333333 2101 and r3,r3,r7,lsl#2 @ &=0xcccccccc 2102 and r0,r0,r7 @ &=0x33333333 2103 and r1,r1,r7,lsl#2 @ &=0xcccccccc 2104 orr r2,r2,r2,lsl#1 2105 orr r3,r3,r3,lsr#1 2106 orr r0,r0,r0,lsl#1 2107 orr r1,r1,r1,lsr#1 2108 and r2,r2,r6 @ &=0x55555555 2109 and r3,r3,r6,lsl#1 @ &=0xaaaaaaaa 2110 and r0,r0,r6 @ &=0x55555555 2111 and r1,r1,r6,lsl#1 @ &=0xaaaaaaaa 2112 2113 orr r2,r2,r3 2114 orr r0,r0,r1 2115 2116 cmp r5,#8 2117 blo .Lsqueeze_tail 2118 mov r1,r2,lsr#8 2119 strb r2,[r4],#1 2120 mov r3,r2,lsr#16 2121 strb r1,[r4],#1 2122 mov r2,r2,lsr#24 2123 strb r3,[r4],#1 2124 strb r2,[r4],#1 2125 2126 mov r1,r0,lsr#8 2127 strb r0,[r4],#1 2128 mov r3,r0,lsr#16 2129 strb r1,[r4],#1 2130 mov r0,r0,lsr#24 2131 strb r3,[r4],#1 2132 strb r0,[r4],#1 2133 subs r5,r5,#8 2134 beq .Lsqueeze_done 2135 2136 subs r12,r12,#8 @ bsz -= 8 2137 bhi .Loop_squeeze 2138 2139 mov r0,r14 @ original r10 2140 2141 bl KeccakF1600 2142 2143 ldmia sp,{r6,r7,r8,r9,r10,r12} @ restore constants and variables 2144 mov r14,r10 2145 b .Loop_squeeze 2146 2147.align 4 2148.Lsqueeze_tail: 2149 strb r2,[r4],#1 2150 mov r2,r2,lsr#8 2151 subs r5,r5,#1 2152 beq .Lsqueeze_done 2153 strb r2,[r4],#1 2154 mov r2,r2,lsr#8 2155 subs r5,r5,#1 2156 beq .Lsqueeze_done 2157 strb r2,[r4],#1 2158 mov r2,r2,lsr#8 2159 subs r5,r5,#1 2160 beq .Lsqueeze_done 2161 strb r2,[r4],#1 2162 subs r5,r5,#1 2163 beq .Lsqueeze_done 2164 2165 strb r0,[r4],#1 2166 mov r0,r0,lsr#8 2167 subs r5,r5,#1 2168 beq .Lsqueeze_done 2169 strb r0,[r4],#1 2170 mov r0,r0,lsr#8 2171 subs r5,r5,#1 2172 beq .Lsqueeze_done 2173 strb r0,[r4] 2174 b .Lsqueeze_done 2175 2176.align 4 2177.Lsqueeze_done: 2178 add sp,sp,#24 2179#if __ARM_ARCH__>=5 2180 ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,pc} 2181#else 2182 ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,lr} 2183 tst lr,#1 2184 moveq pc,lr @ be binary compatible with V4, yet 2185.word 0xe12fff1e @ interoperable with Thumb ISA:-) 2186#endif 2187.size SHA3_squeeze,.-SHA3_squeeze 2188#if __ARM_MAX_ARCH__>=7 2189.fpu neon 2190 2191.type iotas64, %object 2192.align 5 2193iotas64: 2194.quad 0x0000000000000001 2195.quad 0x0000000000008082 2196.quad 0x800000000000808a 2197.quad 0x8000000080008000 2198.quad 0x000000000000808b 2199.quad 0x0000000080000001 2200.quad 0x8000000080008081 2201.quad 0x8000000000008009 2202.quad 0x000000000000008a 2203.quad 0x0000000000000088 2204.quad 0x0000000080008009 2205.quad 0x000000008000000a 2206.quad 0x000000008000808b 2207.quad 0x800000000000008b 2208.quad 0x8000000000008089 2209.quad 0x8000000000008003 2210.quad 0x8000000000008002 2211.quad 0x8000000000000080 2212.quad 0x000000000000800a 2213.quad 0x800000008000000a 2214.quad 0x8000000080008081 2215.quad 0x8000000000008080 2216.quad 0x0000000080000001 2217.quad 0x8000000080008008 2218.size iotas64,.-iotas64 2219 2220.type KeccakF1600_neon, %function 2221.align 5 2222KeccakF1600_neon: 2223 add r1, r0, #16 2224 adr r2, iotas64 2225 mov r3, #24 @ loop counter 2226 b .Loop_neon 2227 2228.align 4 2229.Loop_neon: 2230 @ Theta 2231 vst1.64 {q4}, [r0,:64] @ offload A[0..1][4] 2232 veor q13, q0, q5 @ A[0..1][0]^A[2..3][0] 2233 vst1.64 {d18}, [r1,:64] @ offload A[2][4] 2234 veor q14, q1, q6 @ A[0..1][1]^A[2..3][1] 2235 veor q15, q2, q7 @ A[0..1][2]^A[2..3][2] 2236 veor d26, d26, d27 @ C[0]=A[0][0]^A[1][0]^A[2][0]^A[3][0] 2237 veor d27, d28, d29 @ C[1]=A[0][1]^A[1][1]^A[2][1]^A[3][1] 2238 veor q14, q3, q8 @ A[0..1][3]^A[2..3][3] 2239 veor q4, q4, q9 @ A[0..1][4]^A[2..3][4] 2240 veor d30, d30, d31 @ C[2]=A[0][2]^A[1][2]^A[2][2]^A[3][2] 2241 veor d31, d28, d29 @ C[3]=A[0][3]^A[1][3]^A[2][3]^A[3][3] 2242 veor d25, d8, d9 @ C[4]=A[0][4]^A[1][4]^A[2][4]^A[3][4] 2243 veor q13, q13, q10 @ C[0..1]^=A[4][0..1] 2244 veor q14, q15, q11 @ C[2..3]^=A[4][2..3] 2245 veor d25, d25, d24 @ C[4]^=A[4][4] 2246 2247 vadd.u64 q4, q13, q13 @ C[0..1]<<1 2248 vadd.u64 q15, q14, q14 @ C[2..3]<<1 2249 vadd.u64 d18, d25, d25 @ C[4]<<1 2250 vsri.u64 q4, q13, #63 @ ROL64(C[0..1],1) 2251 vsri.u64 q15, q14, #63 @ ROL64(C[2..3],1) 2252 vsri.u64 d18, d25, #63 @ ROL64(C[4],1) 2253 veor d25, d25, d9 @ D[0] = C[4] ^= ROL64(C[1],1) 2254 veor q13, q13, q15 @ D[1..2] = C[0..1] ^ ROL64(C[2..3],1) 2255 veor d28, d28, d18 @ D[3] = C[2] ^= ROL64(C[4],1) 2256 veor d29, d29, d8 @ D[4] = C[3] ^= ROL64(C[0],1) 2257 2258 veor d0, d0, d25 @ A[0][0] ^= C[4] 2259 veor d1, d1, d25 @ A[1][0] ^= C[4] 2260 veor d10, d10, d25 @ A[2][0] ^= C[4] 2261 veor d11, d11, d25 @ A[3][0] ^= C[4] 2262 veor d20, d20, d25 @ A[4][0] ^= C[4] 2263 2264 veor d2, d2, d26 @ A[0][1] ^= D[1] 2265 veor d3, d3, d26 @ A[1][1] ^= D[1] 2266 veor d12, d12, d26 @ A[2][1] ^= D[1] 2267 veor d13, d13, d26 @ A[3][1] ^= D[1] 2268 veor d21, d21, d26 @ A[4][1] ^= D[1] 2269 vmov d26, d27 2270 2271 veor d6, d6, d28 @ A[0][3] ^= C[2] 2272 veor d7, d7, d28 @ A[1][3] ^= C[2] 2273 veor d16, d16, d28 @ A[2][3] ^= C[2] 2274 veor d17, d17, d28 @ A[3][3] ^= C[2] 2275 veor d23, d23, d28 @ A[4][3] ^= C[2] 2276 vld1.64 {q4}, [r0,:64] @ restore A[0..1][4] 2277 vmov d28, d29 2278 2279 vld1.64 {d18}, [r1,:64] @ restore A[2][4] 2280 veor q2, q2, q13 @ A[0..1][2] ^= D[2] 2281 veor q7, q7, q13 @ A[2..3][2] ^= D[2] 2282 veor d22, d22, d27 @ A[4][2] ^= D[2] 2283 2284 veor q4, q4, q14 @ A[0..1][4] ^= C[3] 2285 veor q9, q9, q14 @ A[2..3][4] ^= C[3] 2286 veor d24, d24, d29 @ A[4][4] ^= C[3] 2287 2288 @ Rho + Pi 2289 vmov d26, d2 @ C[1] = A[0][1] 2290 vshl.u64 d2, d3, #44 2291 vmov d27, d4 @ C[2] = A[0][2] 2292 vshl.u64 d4, d14, #43 2293 vmov d28, d6 @ C[3] = A[0][3] 2294 vshl.u64 d6, d17, #21 2295 vmov d29, d8 @ C[4] = A[0][4] 2296 vshl.u64 d8, d24, #14 2297 vsri.u64 d2, d3, #64-44 @ A[0][1] = ROL64(A[1][1], rhotates[1][1]) 2298 vsri.u64 d4, d14, #64-43 @ A[0][2] = ROL64(A[2][2], rhotates[2][2]) 2299 vsri.u64 d6, d17, #64-21 @ A[0][3] = ROL64(A[3][3], rhotates[3][3]) 2300 vsri.u64 d8, d24, #64-14 @ A[0][4] = ROL64(A[4][4], rhotates[4][4]) 2301 2302 vshl.u64 d3, d9, #20 2303 vshl.u64 d14, d16, #25 2304 vshl.u64 d17, d15, #15 2305 vshl.u64 d24, d21, #2 2306 vsri.u64 d3, d9, #64-20 @ A[1][1] = ROL64(A[1][4], rhotates[1][4]) 2307 vsri.u64 d14, d16, #64-25 @ A[2][2] = ROL64(A[2][3], rhotates[2][3]) 2308 vsri.u64 d17, d15, #64-15 @ A[3][3] = ROL64(A[3][2], rhotates[3][2]) 2309 vsri.u64 d24, d21, #64-2 @ A[4][4] = ROL64(A[4][1], rhotates[4][1]) 2310 2311 vshl.u64 d9, d22, #61 2312 @ vshl.u64 d16, d19, #8 2313 vshl.u64 d15, d12, #10 2314 vshl.u64 d21, d7, #55 2315 vsri.u64 d9, d22, #64-61 @ A[1][4] = ROL64(A[4][2], rhotates[4][2]) 2316 vext.8 d16, d19, d19, #8-1 @ A[2][3] = ROL64(A[3][4], rhotates[3][4]) 2317 vsri.u64 d15, d12, #64-10 @ A[3][2] = ROL64(A[2][1], rhotates[2][1]) 2318 vsri.u64 d21, d7, #64-55 @ A[4][1] = ROL64(A[1][3], rhotates[1][3]) 2319 2320 vshl.u64 d22, d18, #39 2321 @ vshl.u64 d19, d23, #56 2322 vshl.u64 d12, d5, #6 2323 vshl.u64 d7, d13, #45 2324 vsri.u64 d22, d18, #64-39 @ A[4][2] = ROL64(A[2][4], rhotates[2][4]) 2325 vext.8 d19, d23, d23, #8-7 @ A[3][4] = ROL64(A[4][3], rhotates[4][3]) 2326 vsri.u64 d12, d5, #64-6 @ A[2][1] = ROL64(A[1][2], rhotates[1][2]) 2327 vsri.u64 d7, d13, #64-45 @ A[1][3] = ROL64(A[3][1], rhotates[3][1]) 2328 2329 vshl.u64 d18, d20, #18 2330 vshl.u64 d23, d11, #41 2331 vshl.u64 d5, d10, #3 2332 vshl.u64 d13, d1, #36 2333 vsri.u64 d18, d20, #64-18 @ A[2][4] = ROL64(A[4][0], rhotates[4][0]) 2334 vsri.u64 d23, d11, #64-41 @ A[4][3] = ROL64(A[3][0], rhotates[3][0]) 2335 vsri.u64 d5, d10, #64-3 @ A[1][2] = ROL64(A[2][0], rhotates[2][0]) 2336 vsri.u64 d13, d1, #64-36 @ A[3][1] = ROL64(A[1][0], rhotates[1][0]) 2337 2338 vshl.u64 d1, d28, #28 2339 vshl.u64 d10, d26, #1 2340 vshl.u64 d11, d29, #27 2341 vshl.u64 d20, d27, #62 2342 vsri.u64 d1, d28, #64-28 @ A[1][0] = ROL64(C[3], rhotates[0][3]) 2343 vsri.u64 d10, d26, #64-1 @ A[2][0] = ROL64(C[1], rhotates[0][1]) 2344 vsri.u64 d11, d29, #64-27 @ A[3][0] = ROL64(C[4], rhotates[0][4]) 2345 vsri.u64 d20, d27, #64-62 @ A[4][0] = ROL64(C[2], rhotates[0][2]) 2346 2347 @ Chi + Iota 2348 vbic q13, q2, q1 2349 vbic q14, q3, q2 2350 vbic q15, q4, q3 2351 veor q13, q13, q0 @ A[0..1][0] ^ (~A[0..1][1] & A[0..1][2]) 2352 veor q14, q14, q1 @ A[0..1][1] ^ (~A[0..1][2] & A[0..1][3]) 2353 veor q2, q2, q15 @ A[0..1][2] ^= (~A[0..1][3] & A[0..1][4]) 2354 vst1.64 {q13}, [r0,:64] @ offload A[0..1][0] 2355 vbic q13, q0, q4 2356 vbic q15, q1, q0 2357 vmov q1, q14 @ A[0..1][1] 2358 veor q3, q3, q13 @ A[0..1][3] ^= (~A[0..1][4] & A[0..1][0]) 2359 veor q4, q4, q15 @ A[0..1][4] ^= (~A[0..1][0] & A[0..1][1]) 2360 2361 vbic q13, q7, q6 2362 vmov q0, q5 @ A[2..3][0] 2363 vbic q14, q8, q7 2364 vmov q15, q6 @ A[2..3][1] 2365 veor q5, q5, q13 @ A[2..3][0] ^= (~A[2..3][1] & A[2..3][2]) 2366 vbic q13, q9, q8 2367 veor q6, q6, q14 @ A[2..3][1] ^= (~A[2..3][2] & A[2..3][3]) 2368 vbic q14, q0, q9 2369 veor q7, q7, q13 @ A[2..3][2] ^= (~A[2..3][3] & A[2..3][4]) 2370 vbic q13, q15, q0 2371 veor q8, q8, q14 @ A[2..3][3] ^= (~A[2..3][4] & A[2..3][0]) 2372 vmov q14, q10 @ A[4][0..1] 2373 veor q9, q9, q13 @ A[2..3][4] ^= (~A[2..3][0] & A[2..3][1]) 2374 2375 vld1.64 d25, [r2,:64]! @ Iota[i++] 2376 vbic d26, d22, d21 2377 vbic d27, d23, d22 2378 vld1.64 {q0}, [r0,:64] @ restore A[0..1][0] 2379 veor d20, d20, d26 @ A[4][0] ^= (~A[4][1] & A[4][2]) 2380 vbic d26, d24, d23 2381 veor d21, d21, d27 @ A[4][1] ^= (~A[4][2] & A[4][3]) 2382 vbic d27, d28, d24 2383 veor d22, d22, d26 @ A[4][2] ^= (~A[4][3] & A[4][4]) 2384 vbic d26, d29, d28 2385 veor d23, d23, d27 @ A[4][3] ^= (~A[4][4] & A[4][0]) 2386 veor d0, d0, d25 @ A[0][0] ^= Iota[i] 2387 veor d24, d24, d26 @ A[4][4] ^= (~A[4][0] & A[4][1]) 2388 2389 subs r3, r3, #1 2390 bne .Loop_neon 2391 2392 bx lr 2393.size KeccakF1600_neon,.-KeccakF1600_neon 2394 2395.globl SHA3_absorb_neon 2396.type SHA3_absorb_neon, %function 2397.align 5 2398SHA3_absorb_neon: 2399 stmdb sp!, {r4,r5,r6,lr} 2400 vstmdb sp!, {d8,d9,d10,d11,d12,d13,d14,d15} 2401 2402 mov r4, r1 @ inp 2403 mov r5, r2 @ len 2404 mov r6, r3 @ bsz 2405 2406 vld1.32 {d0}, [r0,:64]! @ A[0][0] 2407 vld1.32 {d2}, [r0,:64]! @ A[0][1] 2408 vld1.32 {d4}, [r0,:64]! @ A[0][2] 2409 vld1.32 {d6}, [r0,:64]! @ A[0][3] 2410 vld1.32 {d8}, [r0,:64]! @ A[0][4] 2411 2412 vld1.32 {d1}, [r0,:64]! @ A[1][0] 2413 vld1.32 {d3}, [r0,:64]! @ A[1][1] 2414 vld1.32 {d5}, [r0,:64]! @ A[1][2] 2415 vld1.32 {d7}, [r0,:64]! @ A[1][3] 2416 vld1.32 {d9}, [r0,:64]! @ A[1][4] 2417 2418 vld1.32 {d10}, [r0,:64]! @ A[2][0] 2419 vld1.32 {d12}, [r0,:64]! @ A[2][1] 2420 vld1.32 {d14}, [r0,:64]! @ A[2][2] 2421 vld1.32 {d16}, [r0,:64]! @ A[2][3] 2422 vld1.32 {d18}, [r0,:64]! @ A[2][4] 2423 2424 vld1.32 {d11}, [r0,:64]! @ A[3][0] 2425 vld1.32 {d13}, [r0,:64]! @ A[3][1] 2426 vld1.32 {d15}, [r0,:64]! @ A[3][2] 2427 vld1.32 {d17}, [r0,:64]! @ A[3][3] 2428 vld1.32 {d19}, [r0,:64]! @ A[3][4] 2429 2430 vld1.32 {d20,d21,d22,d23}, [r0,:64]! @ A[4][0..3] 2431 vld1.32 {d24}, [r0,:64] @ A[4][4] 2432 sub r0, r0, #24*8 @ rewind 2433 b .Loop_absorb_neon 2434 2435.align 4 2436.Loop_absorb_neon: 2437 subs r12, r5, r6 @ len - bsz 2438 blo .Labsorbed_neon 2439 mov r5, r12 2440 2441 vld1.8 {d31}, [r4]! @ endian-neutral loads... 2442 cmp r6, #8*2 2443 veor d0, d0, d31 @ A[0][0] ^= *inp++ 2444 blo .Lprocess_neon 2445 vld1.8 {d31}, [r4]! 2446 veor d2, d2, d31 @ A[0][1] ^= *inp++ 2447 beq .Lprocess_neon 2448 vld1.8 {d31}, [r4]! 2449 cmp r6, #8*4 2450 veor d4, d4, d31 @ A[0][2] ^= *inp++ 2451 blo .Lprocess_neon 2452 vld1.8 {d31}, [r4]! 2453 veor d6, d6, d31 @ A[0][3] ^= *inp++ 2454 beq .Lprocess_neon 2455 vld1.8 {d31},[r4]! 2456 cmp r6, #8*6 2457 veor d8, d8, d31 @ A[0][4] ^= *inp++ 2458 blo .Lprocess_neon 2459 2460 vld1.8 {d31}, [r4]! 2461 veor d1, d1, d31 @ A[1][0] ^= *inp++ 2462 beq .Lprocess_neon 2463 vld1.8 {d31}, [r4]! 2464 cmp r6, #8*8 2465 veor d3, d3, d31 @ A[1][1] ^= *inp++ 2466 blo .Lprocess_neon 2467 vld1.8 {d31}, [r4]! 2468 veor d5, d5, d31 @ A[1][2] ^= *inp++ 2469 beq .Lprocess_neon 2470 vld1.8 {d31}, [r4]! 2471 cmp r6, #8*10 2472 veor d7, d7, d31 @ A[1][3] ^= *inp++ 2473 blo .Lprocess_neon 2474 vld1.8 {d31}, [r4]! 2475 veor d9, d9, d31 @ A[1][4] ^= *inp++ 2476 beq .Lprocess_neon 2477 2478 vld1.8 {d31}, [r4]! 2479 cmp r6, #8*12 2480 veor d10, d10, d31 @ A[2][0] ^= *inp++ 2481 blo .Lprocess_neon 2482 vld1.8 {d31}, [r4]! 2483 veor d12, d12, d31 @ A[2][1] ^= *inp++ 2484 beq .Lprocess_neon 2485 vld1.8 {d31}, [r4]! 2486 cmp r6, #8*14 2487 veor d14, d14, d31 @ A[2][2] ^= *inp++ 2488 blo .Lprocess_neon 2489 vld1.8 {d31}, [r4]! 2490 veor d16, d16, d31 @ A[2][3] ^= *inp++ 2491 beq .Lprocess_neon 2492 vld1.8 {d31}, [r4]! 2493 cmp r6, #8*16 2494 veor d18, d18, d31 @ A[2][4] ^= *inp++ 2495 blo .Lprocess_neon 2496 2497 vld1.8 {d31}, [r4]! 2498 veor d11, d11, d31 @ A[3][0] ^= *inp++ 2499 beq .Lprocess_neon 2500 vld1.8 {d31}, [r4]! 2501 cmp r6, #8*18 2502 veor d13, d13, d31 @ A[3][1] ^= *inp++ 2503 blo .Lprocess_neon 2504 vld1.8 {d31}, [r4]! 2505 veor d15, d15, d31 @ A[3][2] ^= *inp++ 2506 beq .Lprocess_neon 2507 vld1.8 {d31}, [r4]! 2508 cmp r6, #8*20 2509 veor d17, d17, d31 @ A[3][3] ^= *inp++ 2510 blo .Lprocess_neon 2511 vld1.8 {d31}, [r4]! 2512 veor d19, d19, d31 @ A[3][4] ^= *inp++ 2513 beq .Lprocess_neon 2514 2515 vld1.8 {d31}, [r4]! 2516 cmp r6, #8*22 2517 veor d20, d20, d31 @ A[4][0] ^= *inp++ 2518 blo .Lprocess_neon 2519 vld1.8 {d31}, [r4]! 2520 veor d21, d21, d31 @ A[4][1] ^= *inp++ 2521 beq .Lprocess_neon 2522 vld1.8 {d31}, [r4]! 2523 cmp r6, #8*24 2524 veor d22, d22, d31 @ A[4][2] ^= *inp++ 2525 blo .Lprocess_neon 2526 vld1.8 {d31}, [r4]! 2527 veor d23, d23, d31 @ A[4][3] ^= *inp++ 2528 beq .Lprocess_neon 2529 vld1.8 {d31}, [r4]! 2530 veor d24, d24, d31 @ A[4][4] ^= *inp++ 2531 2532.Lprocess_neon: 2533 bl KeccakF1600_neon 2534 b .Loop_absorb_neon 2535 2536.align 4 2537.Labsorbed_neon: 2538 vst1.32 {d0}, [r0,:64]! @ A[0][0..4] 2539 vst1.32 {d2}, [r0,:64]! 2540 vst1.32 {d4}, [r0,:64]! 2541 vst1.32 {d6}, [r0,:64]! 2542 vst1.32 {d8}, [r0,:64]! 2543 2544 vst1.32 {d1}, [r0,:64]! @ A[1][0..4] 2545 vst1.32 {d3}, [r0,:64]! 2546 vst1.32 {d5}, [r0,:64]! 2547 vst1.32 {d7}, [r0,:64]! 2548 vst1.32 {d9}, [r0,:64]! 2549 2550 vst1.32 {d10}, [r0,:64]! @ A[2][0..4] 2551 vst1.32 {d12}, [r0,:64]! 2552 vst1.32 {d14}, [r0,:64]! 2553 vst1.32 {d16}, [r0,:64]! 2554 vst1.32 {d18}, [r0,:64]! 2555 2556 vst1.32 {d11}, [r0,:64]! @ A[3][0..4] 2557 vst1.32 {d13}, [r0,:64]! 2558 vst1.32 {d15}, [r0,:64]! 2559 vst1.32 {d17}, [r0,:64]! 2560 vst1.32 {d19}, [r0,:64]! 2561 2562 vst1.32 {d20,d21,d22,d23}, [r0,:64]! @ A[4][0..4] 2563 vst1.32 {d24}, [r0,:64] 2564 2565 mov r0, r5 @ return value 2566 vldmia sp!, {d8,d9,d10,d11,d12,d13,d14,d15} 2567 ldmia sp!, {r4,r5,r6,pc} 2568.size SHA3_absorb_neon,.-SHA3_absorb_neon 2569 2570.globl SHA3_squeeze_neon 2571.type SHA3_squeeze_neon, %function 2572.align 5 2573SHA3_squeeze_neon: 2574 stmdb sp!, {r4,r5,r6,lr} 2575 2576 mov r4, r1 @ out 2577 mov r5, r2 @ len 2578 mov r6, r3 @ bsz 2579 mov r12, r0 @ A_flat 2580 mov r14, r3 @ bsz 2581 b .Loop_squeeze_neon 2582 2583.align 4 2584.Loop_squeeze_neon: 2585 cmp r5, #8 2586 blo .Lsqueeze_neon_tail 2587 vld1.32 {d0}, [r12]! 2588 vst1.8 {d0}, [r4]! @ endian-neutral store 2589 2590 subs r5, r5, #8 @ len -= 8 2591 beq .Lsqueeze_neon_done 2592 2593 subs r14, r14, #8 @ bsz -= 8 2594 bhi .Loop_squeeze_neon 2595 2596 vstmdb sp!, {d8,d9,d10,d11,d12,d13,d14,d15} 2597 2598 vld1.32 {d0}, [r0,:64]! @ A[0][0..4] 2599 vld1.32 {d2}, [r0,:64]! 2600 vld1.32 {d4}, [r0,:64]! 2601 vld1.32 {d6}, [r0,:64]! 2602 vld1.32 {d8}, [r0,:64]! 2603 2604 vld1.32 {d1}, [r0,:64]! @ A[1][0..4] 2605 vld1.32 {d3}, [r0,:64]! 2606 vld1.32 {d5}, [r0,:64]! 2607 vld1.32 {d7}, [r0,:64]! 2608 vld1.32 {d9}, [r0,:64]! 2609 2610 vld1.32 {d10}, [r0,:64]! @ A[2][0..4] 2611 vld1.32 {d12}, [r0,:64]! 2612 vld1.32 {d14}, [r0,:64]! 2613 vld1.32 {d16}, [r0,:64]! 2614 vld1.32 {d18}, [r0,:64]! 2615 2616 vld1.32 {d11}, [r0,:64]! @ A[3][0..4] 2617 vld1.32 {d13}, [r0,:64]! 2618 vld1.32 {d15}, [r0,:64]! 2619 vld1.32 {d17}, [r0,:64]! 2620 vld1.32 {d19}, [r0,:64]! 2621 2622 vld1.32 {d20,d21,d22,d23}, [r0,:64]! @ A[4][0..4] 2623 vld1.32 {d24}, [r0,:64] 2624 sub r0, r0, #24*8 @ rewind 2625 2626 bl KeccakF1600_neon 2627 2628 mov r12, r0 @ A_flat 2629 vst1.32 {d0}, [r0,:64]! @ A[0][0..4] 2630 vst1.32 {d2}, [r0,:64]! 2631 vst1.32 {d4}, [r0,:64]! 2632 vst1.32 {d6}, [r0,:64]! 2633 vst1.32 {d8}, [r0,:64]! 2634 2635 vst1.32 {d1}, [r0,:64]! @ A[1][0..4] 2636 vst1.32 {d3}, [r0,:64]! 2637 vst1.32 {d5}, [r0,:64]! 2638 vst1.32 {d7}, [r0,:64]! 2639 vst1.32 {d9}, [r0,:64]! 2640 2641 vst1.32 {d10}, [r0,:64]! @ A[2][0..4] 2642 vst1.32 {d12}, [r0,:64]! 2643 vst1.32 {d14}, [r0,:64]! 2644 vst1.32 {d16}, [r0,:64]! 2645 vst1.32 {d18}, [r0,:64]! 2646 2647 vst1.32 {d11}, [r0,:64]! @ A[3][0..4] 2648 vst1.32 {d13}, [r0,:64]! 2649 vst1.32 {d15}, [r0,:64]! 2650 vst1.32 {d17}, [r0,:64]! 2651 vst1.32 {d19}, [r0,:64]! 2652 2653 vst1.32 {d20,d21,d22,d23}, [r0,:64]! @ A[4][0..4] 2654 mov r14, r6 @ bsz 2655 vst1.32 {d24}, [r0,:64] 2656 mov r0, r12 @ rewind 2657 2658 vldmia sp!, {d8,d9,d10,d11,d12,d13,d14,d15} 2659 b .Loop_squeeze_neon 2660 2661.align 4 2662.Lsqueeze_neon_tail: 2663 ldmia r12, {r2,r3} 2664 cmp r5, #2 2665 strb r2, [r4],#1 @ endian-neutral store 2666 mov r2, r2, lsr#8 2667 blo .Lsqueeze_neon_done 2668 strb r2, [r4], #1 2669 mov r2, r2, lsr#8 2670 beq .Lsqueeze_neon_done 2671 strb r2, [r4], #1 2672 mov r2, r2, lsr#8 2673 cmp r5, #4 2674 blo .Lsqueeze_neon_done 2675 strb r2, [r4], #1 2676 beq .Lsqueeze_neon_done 2677 2678 strb r3, [r4], #1 2679 mov r3, r3, lsr#8 2680 cmp r5, #6 2681 blo .Lsqueeze_neon_done 2682 strb r3, [r4], #1 2683 mov r3, r3, lsr#8 2684 beq .Lsqueeze_neon_done 2685 strb r3, [r4], #1 2686 2687.Lsqueeze_neon_done: 2688 ldmia sp!, {r4,r5,r6,pc} 2689.size SHA3_squeeze_neon,.-SHA3_squeeze_neon 2690#endif 2691.byte 75,101,99,99,97,107,45,49,54,48,48,32,97,98,115,111,114,98,32,97,110,100,32,115,113,117,101,101,122,101,32,102,111,114,32,65,82,77,118,52,47,78,69,79,78,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 2692.align 2 2693.align 2 2694