1// This file is generated from a similarly-named Perl script in the BoringSSL 2// source tree. Do not edit by hand. 3 4#if !defined(__has_feature) 5#define __has_feature(x) 0 6#endif 7#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM) 8#define OPENSSL_NO_ASM 9#endif 10 11#if !defined(OPENSSL_NO_ASM) 12#if defined(__aarch64__) 13#if defined(BORINGSSL_PREFIX) 14#include <boringssl_prefix_symbols_asm.h> 15#endif 16// Copyright 2014-2020 The OpenSSL Project Authors. All Rights Reserved. 17// 18// Licensed under the OpenSSL license (the "License"). You may not use 19// this file except in compliance with the License. You can obtain a copy 20// in the file LICENSE in the source distribution or at 21// https://www.openssl.org/source/license.html 22 23// ==================================================================== 24// Written by Andy Polyakov <appro@openssl.org> for the OpenSSL 25// project. The module is, however, dual licensed under OpenSSL and 26// CRYPTOGAMS licenses depending on where you obtain it. For further 27// details see http://www.openssl.org/~appro/cryptogams/. 28// 29// Permission to use under GPLv2 terms is granted. 30// ==================================================================== 31// 32// SHA256/512 for ARMv8. 33// 34// Performance in cycles per processed byte and improvement coefficient 35// over code generated with "default" compiler: 36// 37// SHA256-hw SHA256(*) SHA512 38// Apple A7 1.97 10.5 (+33%) 6.73 (-1%(**)) 39// Cortex-A53 2.38 15.5 (+115%) 10.0 (+150%(***)) 40// Cortex-A57 2.31 11.6 (+86%) 7.51 (+260%(***)) 41// Denver 2.01 10.5 (+26%) 6.70 (+8%) 42// X-Gene 20.0 (+100%) 12.8 (+300%(***)) 43// Mongoose 2.36 13.0 (+50%) 8.36 (+33%) 44// Kryo 1.92 17.4 (+30%) 11.2 (+8%) 45// 46// (*) Software SHA256 results are of lesser relevance, presented 47// mostly for informational purposes. 48// (**) The result is a trade-off: it's possible to improve it by 49// 10% (or by 1 cycle per round), but at the cost of 20% loss 50// on Cortex-A53 (or by 4 cycles per round). 51// (***) Super-impressive coefficients over gcc-generated code are 52// indication of some compiler "pathology", most notably code 53// generated with -mgeneral-regs-only is significantly faster 54// and the gap is only 40-90%. 55 56#ifndef __KERNEL__ 57# include <openssl/arm_arch.h> 58#endif 59 60.text 61 62 63 64.globl sha512_block_data_order 65 66.def sha512_block_data_order 67 .type 32 68.endef 69.align 6 70sha512_block_data_order: 71 AARCH64_VALID_CALL_TARGET 72#ifndef __KERNEL__ 73#if __has_feature(hwaddress_sanitizer) && __clang_major__ >= 10 74 adrp x16,:pg_hi21_nc:OPENSSL_armcap_P 75#else 76 adrp x16,OPENSSL_armcap_P 77#endif 78 ldr w16,[x16,:lo12:OPENSSL_armcap_P] 79 tst w16,#ARMV8_SHA512 80 b.ne Lv8_entry 81#endif 82 AARCH64_SIGN_LINK_REGISTER 83 stp x29,x30,[sp,#-128]! 84 add x29,sp,#0 85 86 stp x19,x20,[sp,#16] 87 stp x21,x22,[sp,#32] 88 stp x23,x24,[sp,#48] 89 stp x25,x26,[sp,#64] 90 stp x27,x28,[sp,#80] 91 sub sp,sp,#4*8 92 93 ldp x20,x21,[x0] // load context 94 ldp x22,x23,[x0,#2*8] 95 ldp x24,x25,[x0,#4*8] 96 add x2,x1,x2,lsl#7 // end of input 97 ldp x26,x27,[x0,#6*8] 98 adrp x30,LK512 99 add x30,x30,:lo12:LK512 100 stp x0,x2,[x29,#96] 101 102Loop: 103 ldp x3,x4,[x1],#2*8 104 ldr x19,[x30],#8 // *K++ 105 eor x28,x21,x22 // magic seed 106 str x1,[x29,#112] 107#ifndef __AARCH64EB__ 108 rev x3,x3 // 0 109#endif 110 ror x16,x24,#14 111 add x27,x27,x19 // h+=K[i] 112 eor x6,x24,x24,ror#23 113 and x17,x25,x24 114 bic x19,x26,x24 115 add x27,x27,x3 // h+=X[i] 116 orr x17,x17,x19 // Ch(e,f,g) 117 eor x19,x20,x21 // a^b, b^c in next round 118 eor x16,x16,x6,ror#18 // Sigma1(e) 119 ror x6,x20,#28 120 add x27,x27,x17 // h+=Ch(e,f,g) 121 eor x17,x20,x20,ror#5 122 add x27,x27,x16 // h+=Sigma1(e) 123 and x28,x28,x19 // (b^c)&=(a^b) 124 add x23,x23,x27 // d+=h 125 eor x28,x28,x21 // Maj(a,b,c) 126 eor x17,x6,x17,ror#34 // Sigma0(a) 127 add x27,x27,x28 // h+=Maj(a,b,c) 128 ldr x28,[x30],#8 // *K++, x19 in next round 129 //add x27,x27,x17 // h+=Sigma0(a) 130#ifndef __AARCH64EB__ 131 rev x4,x4 // 1 132#endif 133 ldp x5,x6,[x1],#2*8 134 add x27,x27,x17 // h+=Sigma0(a) 135 ror x16,x23,#14 136 add x26,x26,x28 // h+=K[i] 137 eor x7,x23,x23,ror#23 138 and x17,x24,x23 139 bic x28,x25,x23 140 add x26,x26,x4 // h+=X[i] 141 orr x17,x17,x28 // Ch(e,f,g) 142 eor x28,x27,x20 // a^b, b^c in next round 143 eor x16,x16,x7,ror#18 // Sigma1(e) 144 ror x7,x27,#28 145 add x26,x26,x17 // h+=Ch(e,f,g) 146 eor x17,x27,x27,ror#5 147 add x26,x26,x16 // h+=Sigma1(e) 148 and x19,x19,x28 // (b^c)&=(a^b) 149 add x22,x22,x26 // d+=h 150 eor x19,x19,x20 // Maj(a,b,c) 151 eor x17,x7,x17,ror#34 // Sigma0(a) 152 add x26,x26,x19 // h+=Maj(a,b,c) 153 ldr x19,[x30],#8 // *K++, x28 in next round 154 //add x26,x26,x17 // h+=Sigma0(a) 155#ifndef __AARCH64EB__ 156 rev x5,x5 // 2 157#endif 158 add x26,x26,x17 // h+=Sigma0(a) 159 ror x16,x22,#14 160 add x25,x25,x19 // h+=K[i] 161 eor x8,x22,x22,ror#23 162 and x17,x23,x22 163 bic x19,x24,x22 164 add x25,x25,x5 // h+=X[i] 165 orr x17,x17,x19 // Ch(e,f,g) 166 eor x19,x26,x27 // a^b, b^c in next round 167 eor x16,x16,x8,ror#18 // Sigma1(e) 168 ror x8,x26,#28 169 add x25,x25,x17 // h+=Ch(e,f,g) 170 eor x17,x26,x26,ror#5 171 add x25,x25,x16 // h+=Sigma1(e) 172 and x28,x28,x19 // (b^c)&=(a^b) 173 add x21,x21,x25 // d+=h 174 eor x28,x28,x27 // Maj(a,b,c) 175 eor x17,x8,x17,ror#34 // Sigma0(a) 176 add x25,x25,x28 // h+=Maj(a,b,c) 177 ldr x28,[x30],#8 // *K++, x19 in next round 178 //add x25,x25,x17 // h+=Sigma0(a) 179#ifndef __AARCH64EB__ 180 rev x6,x6 // 3 181#endif 182 ldp x7,x8,[x1],#2*8 183 add x25,x25,x17 // h+=Sigma0(a) 184 ror x16,x21,#14 185 add x24,x24,x28 // h+=K[i] 186 eor x9,x21,x21,ror#23 187 and x17,x22,x21 188 bic x28,x23,x21 189 add x24,x24,x6 // h+=X[i] 190 orr x17,x17,x28 // Ch(e,f,g) 191 eor x28,x25,x26 // a^b, b^c in next round 192 eor x16,x16,x9,ror#18 // Sigma1(e) 193 ror x9,x25,#28 194 add x24,x24,x17 // h+=Ch(e,f,g) 195 eor x17,x25,x25,ror#5 196 add x24,x24,x16 // h+=Sigma1(e) 197 and x19,x19,x28 // (b^c)&=(a^b) 198 add x20,x20,x24 // d+=h 199 eor x19,x19,x26 // Maj(a,b,c) 200 eor x17,x9,x17,ror#34 // Sigma0(a) 201 add x24,x24,x19 // h+=Maj(a,b,c) 202 ldr x19,[x30],#8 // *K++, x28 in next round 203 //add x24,x24,x17 // h+=Sigma0(a) 204#ifndef __AARCH64EB__ 205 rev x7,x7 // 4 206#endif 207 add x24,x24,x17 // h+=Sigma0(a) 208 ror x16,x20,#14 209 add x23,x23,x19 // h+=K[i] 210 eor x10,x20,x20,ror#23 211 and x17,x21,x20 212 bic x19,x22,x20 213 add x23,x23,x7 // h+=X[i] 214 orr x17,x17,x19 // Ch(e,f,g) 215 eor x19,x24,x25 // a^b, b^c in next round 216 eor x16,x16,x10,ror#18 // Sigma1(e) 217 ror x10,x24,#28 218 add x23,x23,x17 // h+=Ch(e,f,g) 219 eor x17,x24,x24,ror#5 220 add x23,x23,x16 // h+=Sigma1(e) 221 and x28,x28,x19 // (b^c)&=(a^b) 222 add x27,x27,x23 // d+=h 223 eor x28,x28,x25 // Maj(a,b,c) 224 eor x17,x10,x17,ror#34 // Sigma0(a) 225 add x23,x23,x28 // h+=Maj(a,b,c) 226 ldr x28,[x30],#8 // *K++, x19 in next round 227 //add x23,x23,x17 // h+=Sigma0(a) 228#ifndef __AARCH64EB__ 229 rev x8,x8 // 5 230#endif 231 ldp x9,x10,[x1],#2*8 232 add x23,x23,x17 // h+=Sigma0(a) 233 ror x16,x27,#14 234 add x22,x22,x28 // h+=K[i] 235 eor x11,x27,x27,ror#23 236 and x17,x20,x27 237 bic x28,x21,x27 238 add x22,x22,x8 // h+=X[i] 239 orr x17,x17,x28 // Ch(e,f,g) 240 eor x28,x23,x24 // a^b, b^c in next round 241 eor x16,x16,x11,ror#18 // Sigma1(e) 242 ror x11,x23,#28 243 add x22,x22,x17 // h+=Ch(e,f,g) 244 eor x17,x23,x23,ror#5 245 add x22,x22,x16 // h+=Sigma1(e) 246 and x19,x19,x28 // (b^c)&=(a^b) 247 add x26,x26,x22 // d+=h 248 eor x19,x19,x24 // Maj(a,b,c) 249 eor x17,x11,x17,ror#34 // Sigma0(a) 250 add x22,x22,x19 // h+=Maj(a,b,c) 251 ldr x19,[x30],#8 // *K++, x28 in next round 252 //add x22,x22,x17 // h+=Sigma0(a) 253#ifndef __AARCH64EB__ 254 rev x9,x9 // 6 255#endif 256 add x22,x22,x17 // h+=Sigma0(a) 257 ror x16,x26,#14 258 add x21,x21,x19 // h+=K[i] 259 eor x12,x26,x26,ror#23 260 and x17,x27,x26 261 bic x19,x20,x26 262 add x21,x21,x9 // h+=X[i] 263 orr x17,x17,x19 // Ch(e,f,g) 264 eor x19,x22,x23 // a^b, b^c in next round 265 eor x16,x16,x12,ror#18 // Sigma1(e) 266 ror x12,x22,#28 267 add x21,x21,x17 // h+=Ch(e,f,g) 268 eor x17,x22,x22,ror#5 269 add x21,x21,x16 // h+=Sigma1(e) 270 and x28,x28,x19 // (b^c)&=(a^b) 271 add x25,x25,x21 // d+=h 272 eor x28,x28,x23 // Maj(a,b,c) 273 eor x17,x12,x17,ror#34 // Sigma0(a) 274 add x21,x21,x28 // h+=Maj(a,b,c) 275 ldr x28,[x30],#8 // *K++, x19 in next round 276 //add x21,x21,x17 // h+=Sigma0(a) 277#ifndef __AARCH64EB__ 278 rev x10,x10 // 7 279#endif 280 ldp x11,x12,[x1],#2*8 281 add x21,x21,x17 // h+=Sigma0(a) 282 ror x16,x25,#14 283 add x20,x20,x28 // h+=K[i] 284 eor x13,x25,x25,ror#23 285 and x17,x26,x25 286 bic x28,x27,x25 287 add x20,x20,x10 // h+=X[i] 288 orr x17,x17,x28 // Ch(e,f,g) 289 eor x28,x21,x22 // a^b, b^c in next round 290 eor x16,x16,x13,ror#18 // Sigma1(e) 291 ror x13,x21,#28 292 add x20,x20,x17 // h+=Ch(e,f,g) 293 eor x17,x21,x21,ror#5 294 add x20,x20,x16 // h+=Sigma1(e) 295 and x19,x19,x28 // (b^c)&=(a^b) 296 add x24,x24,x20 // d+=h 297 eor x19,x19,x22 // Maj(a,b,c) 298 eor x17,x13,x17,ror#34 // Sigma0(a) 299 add x20,x20,x19 // h+=Maj(a,b,c) 300 ldr x19,[x30],#8 // *K++, x28 in next round 301 //add x20,x20,x17 // h+=Sigma0(a) 302#ifndef __AARCH64EB__ 303 rev x11,x11 // 8 304#endif 305 add x20,x20,x17 // h+=Sigma0(a) 306 ror x16,x24,#14 307 add x27,x27,x19 // h+=K[i] 308 eor x14,x24,x24,ror#23 309 and x17,x25,x24 310 bic x19,x26,x24 311 add x27,x27,x11 // h+=X[i] 312 orr x17,x17,x19 // Ch(e,f,g) 313 eor x19,x20,x21 // a^b, b^c in next round 314 eor x16,x16,x14,ror#18 // Sigma1(e) 315 ror x14,x20,#28 316 add x27,x27,x17 // h+=Ch(e,f,g) 317 eor x17,x20,x20,ror#5 318 add x27,x27,x16 // h+=Sigma1(e) 319 and x28,x28,x19 // (b^c)&=(a^b) 320 add x23,x23,x27 // d+=h 321 eor x28,x28,x21 // Maj(a,b,c) 322 eor x17,x14,x17,ror#34 // Sigma0(a) 323 add x27,x27,x28 // h+=Maj(a,b,c) 324 ldr x28,[x30],#8 // *K++, x19 in next round 325 //add x27,x27,x17 // h+=Sigma0(a) 326#ifndef __AARCH64EB__ 327 rev x12,x12 // 9 328#endif 329 ldp x13,x14,[x1],#2*8 330 add x27,x27,x17 // h+=Sigma0(a) 331 ror x16,x23,#14 332 add x26,x26,x28 // h+=K[i] 333 eor x15,x23,x23,ror#23 334 and x17,x24,x23 335 bic x28,x25,x23 336 add x26,x26,x12 // h+=X[i] 337 orr x17,x17,x28 // Ch(e,f,g) 338 eor x28,x27,x20 // a^b, b^c in next round 339 eor x16,x16,x15,ror#18 // Sigma1(e) 340 ror x15,x27,#28 341 add x26,x26,x17 // h+=Ch(e,f,g) 342 eor x17,x27,x27,ror#5 343 add x26,x26,x16 // h+=Sigma1(e) 344 and x19,x19,x28 // (b^c)&=(a^b) 345 add x22,x22,x26 // d+=h 346 eor x19,x19,x20 // Maj(a,b,c) 347 eor x17,x15,x17,ror#34 // Sigma0(a) 348 add x26,x26,x19 // h+=Maj(a,b,c) 349 ldr x19,[x30],#8 // *K++, x28 in next round 350 //add x26,x26,x17 // h+=Sigma0(a) 351#ifndef __AARCH64EB__ 352 rev x13,x13 // 10 353#endif 354 add x26,x26,x17 // h+=Sigma0(a) 355 ror x16,x22,#14 356 add x25,x25,x19 // h+=K[i] 357 eor x0,x22,x22,ror#23 358 and x17,x23,x22 359 bic x19,x24,x22 360 add x25,x25,x13 // h+=X[i] 361 orr x17,x17,x19 // Ch(e,f,g) 362 eor x19,x26,x27 // a^b, b^c in next round 363 eor x16,x16,x0,ror#18 // Sigma1(e) 364 ror x0,x26,#28 365 add x25,x25,x17 // h+=Ch(e,f,g) 366 eor x17,x26,x26,ror#5 367 add x25,x25,x16 // h+=Sigma1(e) 368 and x28,x28,x19 // (b^c)&=(a^b) 369 add x21,x21,x25 // d+=h 370 eor x28,x28,x27 // Maj(a,b,c) 371 eor x17,x0,x17,ror#34 // Sigma0(a) 372 add x25,x25,x28 // h+=Maj(a,b,c) 373 ldr x28,[x30],#8 // *K++, x19 in next round 374 //add x25,x25,x17 // h+=Sigma0(a) 375#ifndef __AARCH64EB__ 376 rev x14,x14 // 11 377#endif 378 ldp x15,x0,[x1],#2*8 379 add x25,x25,x17 // h+=Sigma0(a) 380 str x6,[sp,#24] 381 ror x16,x21,#14 382 add x24,x24,x28 // h+=K[i] 383 eor x6,x21,x21,ror#23 384 and x17,x22,x21 385 bic x28,x23,x21 386 add x24,x24,x14 // h+=X[i] 387 orr x17,x17,x28 // Ch(e,f,g) 388 eor x28,x25,x26 // a^b, b^c in next round 389 eor x16,x16,x6,ror#18 // Sigma1(e) 390 ror x6,x25,#28 391 add x24,x24,x17 // h+=Ch(e,f,g) 392 eor x17,x25,x25,ror#5 393 add x24,x24,x16 // h+=Sigma1(e) 394 and x19,x19,x28 // (b^c)&=(a^b) 395 add x20,x20,x24 // d+=h 396 eor x19,x19,x26 // Maj(a,b,c) 397 eor x17,x6,x17,ror#34 // Sigma0(a) 398 add x24,x24,x19 // h+=Maj(a,b,c) 399 ldr x19,[x30],#8 // *K++, x28 in next round 400 //add x24,x24,x17 // h+=Sigma0(a) 401#ifndef __AARCH64EB__ 402 rev x15,x15 // 12 403#endif 404 add x24,x24,x17 // h+=Sigma0(a) 405 str x7,[sp,#0] 406 ror x16,x20,#14 407 add x23,x23,x19 // h+=K[i] 408 eor x7,x20,x20,ror#23 409 and x17,x21,x20 410 bic x19,x22,x20 411 add x23,x23,x15 // h+=X[i] 412 orr x17,x17,x19 // Ch(e,f,g) 413 eor x19,x24,x25 // a^b, b^c in next round 414 eor x16,x16,x7,ror#18 // Sigma1(e) 415 ror x7,x24,#28 416 add x23,x23,x17 // h+=Ch(e,f,g) 417 eor x17,x24,x24,ror#5 418 add x23,x23,x16 // h+=Sigma1(e) 419 and x28,x28,x19 // (b^c)&=(a^b) 420 add x27,x27,x23 // d+=h 421 eor x28,x28,x25 // Maj(a,b,c) 422 eor x17,x7,x17,ror#34 // Sigma0(a) 423 add x23,x23,x28 // h+=Maj(a,b,c) 424 ldr x28,[x30],#8 // *K++, x19 in next round 425 //add x23,x23,x17 // h+=Sigma0(a) 426#ifndef __AARCH64EB__ 427 rev x0,x0 // 13 428#endif 429 ldp x1,x2,[x1] 430 add x23,x23,x17 // h+=Sigma0(a) 431 str x8,[sp,#8] 432 ror x16,x27,#14 433 add x22,x22,x28 // h+=K[i] 434 eor x8,x27,x27,ror#23 435 and x17,x20,x27 436 bic x28,x21,x27 437 add x22,x22,x0 // h+=X[i] 438 orr x17,x17,x28 // Ch(e,f,g) 439 eor x28,x23,x24 // a^b, b^c in next round 440 eor x16,x16,x8,ror#18 // Sigma1(e) 441 ror x8,x23,#28 442 add x22,x22,x17 // h+=Ch(e,f,g) 443 eor x17,x23,x23,ror#5 444 add x22,x22,x16 // h+=Sigma1(e) 445 and x19,x19,x28 // (b^c)&=(a^b) 446 add x26,x26,x22 // d+=h 447 eor x19,x19,x24 // Maj(a,b,c) 448 eor x17,x8,x17,ror#34 // Sigma0(a) 449 add x22,x22,x19 // h+=Maj(a,b,c) 450 ldr x19,[x30],#8 // *K++, x28 in next round 451 //add x22,x22,x17 // h+=Sigma0(a) 452#ifndef __AARCH64EB__ 453 rev x1,x1 // 14 454#endif 455 ldr x6,[sp,#24] 456 add x22,x22,x17 // h+=Sigma0(a) 457 str x9,[sp,#16] 458 ror x16,x26,#14 459 add x21,x21,x19 // h+=K[i] 460 eor x9,x26,x26,ror#23 461 and x17,x27,x26 462 bic x19,x20,x26 463 add x21,x21,x1 // h+=X[i] 464 orr x17,x17,x19 // Ch(e,f,g) 465 eor x19,x22,x23 // a^b, b^c in next round 466 eor x16,x16,x9,ror#18 // Sigma1(e) 467 ror x9,x22,#28 468 add x21,x21,x17 // h+=Ch(e,f,g) 469 eor x17,x22,x22,ror#5 470 add x21,x21,x16 // h+=Sigma1(e) 471 and x28,x28,x19 // (b^c)&=(a^b) 472 add x25,x25,x21 // d+=h 473 eor x28,x28,x23 // Maj(a,b,c) 474 eor x17,x9,x17,ror#34 // Sigma0(a) 475 add x21,x21,x28 // h+=Maj(a,b,c) 476 ldr x28,[x30],#8 // *K++, x19 in next round 477 //add x21,x21,x17 // h+=Sigma0(a) 478#ifndef __AARCH64EB__ 479 rev x2,x2 // 15 480#endif 481 ldr x7,[sp,#0] 482 add x21,x21,x17 // h+=Sigma0(a) 483 str x10,[sp,#24] 484 ror x16,x25,#14 485 add x20,x20,x28 // h+=K[i] 486 ror x9,x4,#1 487 and x17,x26,x25 488 ror x8,x1,#19 489 bic x28,x27,x25 490 ror x10,x21,#28 491 add x20,x20,x2 // h+=X[i] 492 eor x16,x16,x25,ror#18 493 eor x9,x9,x4,ror#8 494 orr x17,x17,x28 // Ch(e,f,g) 495 eor x28,x21,x22 // a^b, b^c in next round 496 eor x16,x16,x25,ror#41 // Sigma1(e) 497 eor x10,x10,x21,ror#34 498 add x20,x20,x17 // h+=Ch(e,f,g) 499 and x19,x19,x28 // (b^c)&=(a^b) 500 eor x8,x8,x1,ror#61 501 eor x9,x9,x4,lsr#7 // sigma0(X[i+1]) 502 add x20,x20,x16 // h+=Sigma1(e) 503 eor x19,x19,x22 // Maj(a,b,c) 504 eor x17,x10,x21,ror#39 // Sigma0(a) 505 eor x8,x8,x1,lsr#6 // sigma1(X[i+14]) 506 add x3,x3,x12 507 add x24,x24,x20 // d+=h 508 add x20,x20,x19 // h+=Maj(a,b,c) 509 ldr x19,[x30],#8 // *K++, x28 in next round 510 add x3,x3,x9 511 add x20,x20,x17 // h+=Sigma0(a) 512 add x3,x3,x8 513Loop_16_xx: 514 ldr x8,[sp,#8] 515 str x11,[sp,#0] 516 ror x16,x24,#14 517 add x27,x27,x19 // h+=K[i] 518 ror x10,x5,#1 519 and x17,x25,x24 520 ror x9,x2,#19 521 bic x19,x26,x24 522 ror x11,x20,#28 523 add x27,x27,x3 // h+=X[i] 524 eor x16,x16,x24,ror#18 525 eor x10,x10,x5,ror#8 526 orr x17,x17,x19 // Ch(e,f,g) 527 eor x19,x20,x21 // a^b, b^c in next round 528 eor x16,x16,x24,ror#41 // Sigma1(e) 529 eor x11,x11,x20,ror#34 530 add x27,x27,x17 // h+=Ch(e,f,g) 531 and x28,x28,x19 // (b^c)&=(a^b) 532 eor x9,x9,x2,ror#61 533 eor x10,x10,x5,lsr#7 // sigma0(X[i+1]) 534 add x27,x27,x16 // h+=Sigma1(e) 535 eor x28,x28,x21 // Maj(a,b,c) 536 eor x17,x11,x20,ror#39 // Sigma0(a) 537 eor x9,x9,x2,lsr#6 // sigma1(X[i+14]) 538 add x4,x4,x13 539 add x23,x23,x27 // d+=h 540 add x27,x27,x28 // h+=Maj(a,b,c) 541 ldr x28,[x30],#8 // *K++, x19 in next round 542 add x4,x4,x10 543 add x27,x27,x17 // h+=Sigma0(a) 544 add x4,x4,x9 545 ldr x9,[sp,#16] 546 str x12,[sp,#8] 547 ror x16,x23,#14 548 add x26,x26,x28 // h+=K[i] 549 ror x11,x6,#1 550 and x17,x24,x23 551 ror x10,x3,#19 552 bic x28,x25,x23 553 ror x12,x27,#28 554 add x26,x26,x4 // h+=X[i] 555 eor x16,x16,x23,ror#18 556 eor x11,x11,x6,ror#8 557 orr x17,x17,x28 // Ch(e,f,g) 558 eor x28,x27,x20 // a^b, b^c in next round 559 eor x16,x16,x23,ror#41 // Sigma1(e) 560 eor x12,x12,x27,ror#34 561 add x26,x26,x17 // h+=Ch(e,f,g) 562 and x19,x19,x28 // (b^c)&=(a^b) 563 eor x10,x10,x3,ror#61 564 eor x11,x11,x6,lsr#7 // sigma0(X[i+1]) 565 add x26,x26,x16 // h+=Sigma1(e) 566 eor x19,x19,x20 // Maj(a,b,c) 567 eor x17,x12,x27,ror#39 // Sigma0(a) 568 eor x10,x10,x3,lsr#6 // sigma1(X[i+14]) 569 add x5,x5,x14 570 add x22,x22,x26 // d+=h 571 add x26,x26,x19 // h+=Maj(a,b,c) 572 ldr x19,[x30],#8 // *K++, x28 in next round 573 add x5,x5,x11 574 add x26,x26,x17 // h+=Sigma0(a) 575 add x5,x5,x10 576 ldr x10,[sp,#24] 577 str x13,[sp,#16] 578 ror x16,x22,#14 579 add x25,x25,x19 // h+=K[i] 580 ror x12,x7,#1 581 and x17,x23,x22 582 ror x11,x4,#19 583 bic x19,x24,x22 584 ror x13,x26,#28 585 add x25,x25,x5 // h+=X[i] 586 eor x16,x16,x22,ror#18 587 eor x12,x12,x7,ror#8 588 orr x17,x17,x19 // Ch(e,f,g) 589 eor x19,x26,x27 // a^b, b^c in next round 590 eor x16,x16,x22,ror#41 // Sigma1(e) 591 eor x13,x13,x26,ror#34 592 add x25,x25,x17 // h+=Ch(e,f,g) 593 and x28,x28,x19 // (b^c)&=(a^b) 594 eor x11,x11,x4,ror#61 595 eor x12,x12,x7,lsr#7 // sigma0(X[i+1]) 596 add x25,x25,x16 // h+=Sigma1(e) 597 eor x28,x28,x27 // Maj(a,b,c) 598 eor x17,x13,x26,ror#39 // Sigma0(a) 599 eor x11,x11,x4,lsr#6 // sigma1(X[i+14]) 600 add x6,x6,x15 601 add x21,x21,x25 // d+=h 602 add x25,x25,x28 // h+=Maj(a,b,c) 603 ldr x28,[x30],#8 // *K++, x19 in next round 604 add x6,x6,x12 605 add x25,x25,x17 // h+=Sigma0(a) 606 add x6,x6,x11 607 ldr x11,[sp,#0] 608 str x14,[sp,#24] 609 ror x16,x21,#14 610 add x24,x24,x28 // h+=K[i] 611 ror x13,x8,#1 612 and x17,x22,x21 613 ror x12,x5,#19 614 bic x28,x23,x21 615 ror x14,x25,#28 616 add x24,x24,x6 // h+=X[i] 617 eor x16,x16,x21,ror#18 618 eor x13,x13,x8,ror#8 619 orr x17,x17,x28 // Ch(e,f,g) 620 eor x28,x25,x26 // a^b, b^c in next round 621 eor x16,x16,x21,ror#41 // Sigma1(e) 622 eor x14,x14,x25,ror#34 623 add x24,x24,x17 // h+=Ch(e,f,g) 624 and x19,x19,x28 // (b^c)&=(a^b) 625 eor x12,x12,x5,ror#61 626 eor x13,x13,x8,lsr#7 // sigma0(X[i+1]) 627 add x24,x24,x16 // h+=Sigma1(e) 628 eor x19,x19,x26 // Maj(a,b,c) 629 eor x17,x14,x25,ror#39 // Sigma0(a) 630 eor x12,x12,x5,lsr#6 // sigma1(X[i+14]) 631 add x7,x7,x0 632 add x20,x20,x24 // d+=h 633 add x24,x24,x19 // h+=Maj(a,b,c) 634 ldr x19,[x30],#8 // *K++, x28 in next round 635 add x7,x7,x13 636 add x24,x24,x17 // h+=Sigma0(a) 637 add x7,x7,x12 638 ldr x12,[sp,#8] 639 str x15,[sp,#0] 640 ror x16,x20,#14 641 add x23,x23,x19 // h+=K[i] 642 ror x14,x9,#1 643 and x17,x21,x20 644 ror x13,x6,#19 645 bic x19,x22,x20 646 ror x15,x24,#28 647 add x23,x23,x7 // h+=X[i] 648 eor x16,x16,x20,ror#18 649 eor x14,x14,x9,ror#8 650 orr x17,x17,x19 // Ch(e,f,g) 651 eor x19,x24,x25 // a^b, b^c in next round 652 eor x16,x16,x20,ror#41 // Sigma1(e) 653 eor x15,x15,x24,ror#34 654 add x23,x23,x17 // h+=Ch(e,f,g) 655 and x28,x28,x19 // (b^c)&=(a^b) 656 eor x13,x13,x6,ror#61 657 eor x14,x14,x9,lsr#7 // sigma0(X[i+1]) 658 add x23,x23,x16 // h+=Sigma1(e) 659 eor x28,x28,x25 // Maj(a,b,c) 660 eor x17,x15,x24,ror#39 // Sigma0(a) 661 eor x13,x13,x6,lsr#6 // sigma1(X[i+14]) 662 add x8,x8,x1 663 add x27,x27,x23 // d+=h 664 add x23,x23,x28 // h+=Maj(a,b,c) 665 ldr x28,[x30],#8 // *K++, x19 in next round 666 add x8,x8,x14 667 add x23,x23,x17 // h+=Sigma0(a) 668 add x8,x8,x13 669 ldr x13,[sp,#16] 670 str x0,[sp,#8] 671 ror x16,x27,#14 672 add x22,x22,x28 // h+=K[i] 673 ror x15,x10,#1 674 and x17,x20,x27 675 ror x14,x7,#19 676 bic x28,x21,x27 677 ror x0,x23,#28 678 add x22,x22,x8 // h+=X[i] 679 eor x16,x16,x27,ror#18 680 eor x15,x15,x10,ror#8 681 orr x17,x17,x28 // Ch(e,f,g) 682 eor x28,x23,x24 // a^b, b^c in next round 683 eor x16,x16,x27,ror#41 // Sigma1(e) 684 eor x0,x0,x23,ror#34 685 add x22,x22,x17 // h+=Ch(e,f,g) 686 and x19,x19,x28 // (b^c)&=(a^b) 687 eor x14,x14,x7,ror#61 688 eor x15,x15,x10,lsr#7 // sigma0(X[i+1]) 689 add x22,x22,x16 // h+=Sigma1(e) 690 eor x19,x19,x24 // Maj(a,b,c) 691 eor x17,x0,x23,ror#39 // Sigma0(a) 692 eor x14,x14,x7,lsr#6 // sigma1(X[i+14]) 693 add x9,x9,x2 694 add x26,x26,x22 // d+=h 695 add x22,x22,x19 // h+=Maj(a,b,c) 696 ldr x19,[x30],#8 // *K++, x28 in next round 697 add x9,x9,x15 698 add x22,x22,x17 // h+=Sigma0(a) 699 add x9,x9,x14 700 ldr x14,[sp,#24] 701 str x1,[sp,#16] 702 ror x16,x26,#14 703 add x21,x21,x19 // h+=K[i] 704 ror x0,x11,#1 705 and x17,x27,x26 706 ror x15,x8,#19 707 bic x19,x20,x26 708 ror x1,x22,#28 709 add x21,x21,x9 // h+=X[i] 710 eor x16,x16,x26,ror#18 711 eor x0,x0,x11,ror#8 712 orr x17,x17,x19 // Ch(e,f,g) 713 eor x19,x22,x23 // a^b, b^c in next round 714 eor x16,x16,x26,ror#41 // Sigma1(e) 715 eor x1,x1,x22,ror#34 716 add x21,x21,x17 // h+=Ch(e,f,g) 717 and x28,x28,x19 // (b^c)&=(a^b) 718 eor x15,x15,x8,ror#61 719 eor x0,x0,x11,lsr#7 // sigma0(X[i+1]) 720 add x21,x21,x16 // h+=Sigma1(e) 721 eor x28,x28,x23 // Maj(a,b,c) 722 eor x17,x1,x22,ror#39 // Sigma0(a) 723 eor x15,x15,x8,lsr#6 // sigma1(X[i+14]) 724 add x10,x10,x3 725 add x25,x25,x21 // d+=h 726 add x21,x21,x28 // h+=Maj(a,b,c) 727 ldr x28,[x30],#8 // *K++, x19 in next round 728 add x10,x10,x0 729 add x21,x21,x17 // h+=Sigma0(a) 730 add x10,x10,x15 731 ldr x15,[sp,#0] 732 str x2,[sp,#24] 733 ror x16,x25,#14 734 add x20,x20,x28 // h+=K[i] 735 ror x1,x12,#1 736 and x17,x26,x25 737 ror x0,x9,#19 738 bic x28,x27,x25 739 ror x2,x21,#28 740 add x20,x20,x10 // h+=X[i] 741 eor x16,x16,x25,ror#18 742 eor x1,x1,x12,ror#8 743 orr x17,x17,x28 // Ch(e,f,g) 744 eor x28,x21,x22 // a^b, b^c in next round 745 eor x16,x16,x25,ror#41 // Sigma1(e) 746 eor x2,x2,x21,ror#34 747 add x20,x20,x17 // h+=Ch(e,f,g) 748 and x19,x19,x28 // (b^c)&=(a^b) 749 eor x0,x0,x9,ror#61 750 eor x1,x1,x12,lsr#7 // sigma0(X[i+1]) 751 add x20,x20,x16 // h+=Sigma1(e) 752 eor x19,x19,x22 // Maj(a,b,c) 753 eor x17,x2,x21,ror#39 // Sigma0(a) 754 eor x0,x0,x9,lsr#6 // sigma1(X[i+14]) 755 add x11,x11,x4 756 add x24,x24,x20 // d+=h 757 add x20,x20,x19 // h+=Maj(a,b,c) 758 ldr x19,[x30],#8 // *K++, x28 in next round 759 add x11,x11,x1 760 add x20,x20,x17 // h+=Sigma0(a) 761 add x11,x11,x0 762 ldr x0,[sp,#8] 763 str x3,[sp,#0] 764 ror x16,x24,#14 765 add x27,x27,x19 // h+=K[i] 766 ror x2,x13,#1 767 and x17,x25,x24 768 ror x1,x10,#19 769 bic x19,x26,x24 770 ror x3,x20,#28 771 add x27,x27,x11 // h+=X[i] 772 eor x16,x16,x24,ror#18 773 eor x2,x2,x13,ror#8 774 orr x17,x17,x19 // Ch(e,f,g) 775 eor x19,x20,x21 // a^b, b^c in next round 776 eor x16,x16,x24,ror#41 // Sigma1(e) 777 eor x3,x3,x20,ror#34 778 add x27,x27,x17 // h+=Ch(e,f,g) 779 and x28,x28,x19 // (b^c)&=(a^b) 780 eor x1,x1,x10,ror#61 781 eor x2,x2,x13,lsr#7 // sigma0(X[i+1]) 782 add x27,x27,x16 // h+=Sigma1(e) 783 eor x28,x28,x21 // Maj(a,b,c) 784 eor x17,x3,x20,ror#39 // Sigma0(a) 785 eor x1,x1,x10,lsr#6 // sigma1(X[i+14]) 786 add x12,x12,x5 787 add x23,x23,x27 // d+=h 788 add x27,x27,x28 // h+=Maj(a,b,c) 789 ldr x28,[x30],#8 // *K++, x19 in next round 790 add x12,x12,x2 791 add x27,x27,x17 // h+=Sigma0(a) 792 add x12,x12,x1 793 ldr x1,[sp,#16] 794 str x4,[sp,#8] 795 ror x16,x23,#14 796 add x26,x26,x28 // h+=K[i] 797 ror x3,x14,#1 798 and x17,x24,x23 799 ror x2,x11,#19 800 bic x28,x25,x23 801 ror x4,x27,#28 802 add x26,x26,x12 // h+=X[i] 803 eor x16,x16,x23,ror#18 804 eor x3,x3,x14,ror#8 805 orr x17,x17,x28 // Ch(e,f,g) 806 eor x28,x27,x20 // a^b, b^c in next round 807 eor x16,x16,x23,ror#41 // Sigma1(e) 808 eor x4,x4,x27,ror#34 809 add x26,x26,x17 // h+=Ch(e,f,g) 810 and x19,x19,x28 // (b^c)&=(a^b) 811 eor x2,x2,x11,ror#61 812 eor x3,x3,x14,lsr#7 // sigma0(X[i+1]) 813 add x26,x26,x16 // h+=Sigma1(e) 814 eor x19,x19,x20 // Maj(a,b,c) 815 eor x17,x4,x27,ror#39 // Sigma0(a) 816 eor x2,x2,x11,lsr#6 // sigma1(X[i+14]) 817 add x13,x13,x6 818 add x22,x22,x26 // d+=h 819 add x26,x26,x19 // h+=Maj(a,b,c) 820 ldr x19,[x30],#8 // *K++, x28 in next round 821 add x13,x13,x3 822 add x26,x26,x17 // h+=Sigma0(a) 823 add x13,x13,x2 824 ldr x2,[sp,#24] 825 str x5,[sp,#16] 826 ror x16,x22,#14 827 add x25,x25,x19 // h+=K[i] 828 ror x4,x15,#1 829 and x17,x23,x22 830 ror x3,x12,#19 831 bic x19,x24,x22 832 ror x5,x26,#28 833 add x25,x25,x13 // h+=X[i] 834 eor x16,x16,x22,ror#18 835 eor x4,x4,x15,ror#8 836 orr x17,x17,x19 // Ch(e,f,g) 837 eor x19,x26,x27 // a^b, b^c in next round 838 eor x16,x16,x22,ror#41 // Sigma1(e) 839 eor x5,x5,x26,ror#34 840 add x25,x25,x17 // h+=Ch(e,f,g) 841 and x28,x28,x19 // (b^c)&=(a^b) 842 eor x3,x3,x12,ror#61 843 eor x4,x4,x15,lsr#7 // sigma0(X[i+1]) 844 add x25,x25,x16 // h+=Sigma1(e) 845 eor x28,x28,x27 // Maj(a,b,c) 846 eor x17,x5,x26,ror#39 // Sigma0(a) 847 eor x3,x3,x12,lsr#6 // sigma1(X[i+14]) 848 add x14,x14,x7 849 add x21,x21,x25 // d+=h 850 add x25,x25,x28 // h+=Maj(a,b,c) 851 ldr x28,[x30],#8 // *K++, x19 in next round 852 add x14,x14,x4 853 add x25,x25,x17 // h+=Sigma0(a) 854 add x14,x14,x3 855 ldr x3,[sp,#0] 856 str x6,[sp,#24] 857 ror x16,x21,#14 858 add x24,x24,x28 // h+=K[i] 859 ror x5,x0,#1 860 and x17,x22,x21 861 ror x4,x13,#19 862 bic x28,x23,x21 863 ror x6,x25,#28 864 add x24,x24,x14 // h+=X[i] 865 eor x16,x16,x21,ror#18 866 eor x5,x5,x0,ror#8 867 orr x17,x17,x28 // Ch(e,f,g) 868 eor x28,x25,x26 // a^b, b^c in next round 869 eor x16,x16,x21,ror#41 // Sigma1(e) 870 eor x6,x6,x25,ror#34 871 add x24,x24,x17 // h+=Ch(e,f,g) 872 and x19,x19,x28 // (b^c)&=(a^b) 873 eor x4,x4,x13,ror#61 874 eor x5,x5,x0,lsr#7 // sigma0(X[i+1]) 875 add x24,x24,x16 // h+=Sigma1(e) 876 eor x19,x19,x26 // Maj(a,b,c) 877 eor x17,x6,x25,ror#39 // Sigma0(a) 878 eor x4,x4,x13,lsr#6 // sigma1(X[i+14]) 879 add x15,x15,x8 880 add x20,x20,x24 // d+=h 881 add x24,x24,x19 // h+=Maj(a,b,c) 882 ldr x19,[x30],#8 // *K++, x28 in next round 883 add x15,x15,x5 884 add x24,x24,x17 // h+=Sigma0(a) 885 add x15,x15,x4 886 ldr x4,[sp,#8] 887 str x7,[sp,#0] 888 ror x16,x20,#14 889 add x23,x23,x19 // h+=K[i] 890 ror x6,x1,#1 891 and x17,x21,x20 892 ror x5,x14,#19 893 bic x19,x22,x20 894 ror x7,x24,#28 895 add x23,x23,x15 // h+=X[i] 896 eor x16,x16,x20,ror#18 897 eor x6,x6,x1,ror#8 898 orr x17,x17,x19 // Ch(e,f,g) 899 eor x19,x24,x25 // a^b, b^c in next round 900 eor x16,x16,x20,ror#41 // Sigma1(e) 901 eor x7,x7,x24,ror#34 902 add x23,x23,x17 // h+=Ch(e,f,g) 903 and x28,x28,x19 // (b^c)&=(a^b) 904 eor x5,x5,x14,ror#61 905 eor x6,x6,x1,lsr#7 // sigma0(X[i+1]) 906 add x23,x23,x16 // h+=Sigma1(e) 907 eor x28,x28,x25 // Maj(a,b,c) 908 eor x17,x7,x24,ror#39 // Sigma0(a) 909 eor x5,x5,x14,lsr#6 // sigma1(X[i+14]) 910 add x0,x0,x9 911 add x27,x27,x23 // d+=h 912 add x23,x23,x28 // h+=Maj(a,b,c) 913 ldr x28,[x30],#8 // *K++, x19 in next round 914 add x0,x0,x6 915 add x23,x23,x17 // h+=Sigma0(a) 916 add x0,x0,x5 917 ldr x5,[sp,#16] 918 str x8,[sp,#8] 919 ror x16,x27,#14 920 add x22,x22,x28 // h+=K[i] 921 ror x7,x2,#1 922 and x17,x20,x27 923 ror x6,x15,#19 924 bic x28,x21,x27 925 ror x8,x23,#28 926 add x22,x22,x0 // h+=X[i] 927 eor x16,x16,x27,ror#18 928 eor x7,x7,x2,ror#8 929 orr x17,x17,x28 // Ch(e,f,g) 930 eor x28,x23,x24 // a^b, b^c in next round 931 eor x16,x16,x27,ror#41 // Sigma1(e) 932 eor x8,x8,x23,ror#34 933 add x22,x22,x17 // h+=Ch(e,f,g) 934 and x19,x19,x28 // (b^c)&=(a^b) 935 eor x6,x6,x15,ror#61 936 eor x7,x7,x2,lsr#7 // sigma0(X[i+1]) 937 add x22,x22,x16 // h+=Sigma1(e) 938 eor x19,x19,x24 // Maj(a,b,c) 939 eor x17,x8,x23,ror#39 // Sigma0(a) 940 eor x6,x6,x15,lsr#6 // sigma1(X[i+14]) 941 add x1,x1,x10 942 add x26,x26,x22 // d+=h 943 add x22,x22,x19 // h+=Maj(a,b,c) 944 ldr x19,[x30],#8 // *K++, x28 in next round 945 add x1,x1,x7 946 add x22,x22,x17 // h+=Sigma0(a) 947 add x1,x1,x6 948 ldr x6,[sp,#24] 949 str x9,[sp,#16] 950 ror x16,x26,#14 951 add x21,x21,x19 // h+=K[i] 952 ror x8,x3,#1 953 and x17,x27,x26 954 ror x7,x0,#19 955 bic x19,x20,x26 956 ror x9,x22,#28 957 add x21,x21,x1 // h+=X[i] 958 eor x16,x16,x26,ror#18 959 eor x8,x8,x3,ror#8 960 orr x17,x17,x19 // Ch(e,f,g) 961 eor x19,x22,x23 // a^b, b^c in next round 962 eor x16,x16,x26,ror#41 // Sigma1(e) 963 eor x9,x9,x22,ror#34 964 add x21,x21,x17 // h+=Ch(e,f,g) 965 and x28,x28,x19 // (b^c)&=(a^b) 966 eor x7,x7,x0,ror#61 967 eor x8,x8,x3,lsr#7 // sigma0(X[i+1]) 968 add x21,x21,x16 // h+=Sigma1(e) 969 eor x28,x28,x23 // Maj(a,b,c) 970 eor x17,x9,x22,ror#39 // Sigma0(a) 971 eor x7,x7,x0,lsr#6 // sigma1(X[i+14]) 972 add x2,x2,x11 973 add x25,x25,x21 // d+=h 974 add x21,x21,x28 // h+=Maj(a,b,c) 975 ldr x28,[x30],#8 // *K++, x19 in next round 976 add x2,x2,x8 977 add x21,x21,x17 // h+=Sigma0(a) 978 add x2,x2,x7 979 ldr x7,[sp,#0] 980 str x10,[sp,#24] 981 ror x16,x25,#14 982 add x20,x20,x28 // h+=K[i] 983 ror x9,x4,#1 984 and x17,x26,x25 985 ror x8,x1,#19 986 bic x28,x27,x25 987 ror x10,x21,#28 988 add x20,x20,x2 // h+=X[i] 989 eor x16,x16,x25,ror#18 990 eor x9,x9,x4,ror#8 991 orr x17,x17,x28 // Ch(e,f,g) 992 eor x28,x21,x22 // a^b, b^c in next round 993 eor x16,x16,x25,ror#41 // Sigma1(e) 994 eor x10,x10,x21,ror#34 995 add x20,x20,x17 // h+=Ch(e,f,g) 996 and x19,x19,x28 // (b^c)&=(a^b) 997 eor x8,x8,x1,ror#61 998 eor x9,x9,x4,lsr#7 // sigma0(X[i+1]) 999 add x20,x20,x16 // h+=Sigma1(e) 1000 eor x19,x19,x22 // Maj(a,b,c) 1001 eor x17,x10,x21,ror#39 // Sigma0(a) 1002 eor x8,x8,x1,lsr#6 // sigma1(X[i+14]) 1003 add x3,x3,x12 1004 add x24,x24,x20 // d+=h 1005 add x20,x20,x19 // h+=Maj(a,b,c) 1006 ldr x19,[x30],#8 // *K++, x28 in next round 1007 add x3,x3,x9 1008 add x20,x20,x17 // h+=Sigma0(a) 1009 add x3,x3,x8 1010 cbnz x19,Loop_16_xx 1011 1012 ldp x0,x2,[x29,#96] 1013 ldr x1,[x29,#112] 1014 sub x30,x30,#648 // rewind 1015 1016 ldp x3,x4,[x0] 1017 ldp x5,x6,[x0,#2*8] 1018 add x1,x1,#14*8 // advance input pointer 1019 ldp x7,x8,[x0,#4*8] 1020 add x20,x20,x3 1021 ldp x9,x10,[x0,#6*8] 1022 add x21,x21,x4 1023 add x22,x22,x5 1024 add x23,x23,x6 1025 stp x20,x21,[x0] 1026 add x24,x24,x7 1027 add x25,x25,x8 1028 stp x22,x23,[x0,#2*8] 1029 add x26,x26,x9 1030 add x27,x27,x10 1031 cmp x1,x2 1032 stp x24,x25,[x0,#4*8] 1033 stp x26,x27,[x0,#6*8] 1034 b.ne Loop 1035 1036 ldp x19,x20,[x29,#16] 1037 add sp,sp,#4*8 1038 ldp x21,x22,[x29,#32] 1039 ldp x23,x24,[x29,#48] 1040 ldp x25,x26,[x29,#64] 1041 ldp x27,x28,[x29,#80] 1042 ldp x29,x30,[sp],#128 1043 AARCH64_VALIDATE_LINK_REGISTER 1044 ret 1045 1046 1047.section .rodata 1048.align 6 1049 1050LK512: 1051.quad 0x428a2f98d728ae22,0x7137449123ef65cd 1052.quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc 1053.quad 0x3956c25bf348b538,0x59f111f1b605d019 1054.quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118 1055.quad 0xd807aa98a3030242,0x12835b0145706fbe 1056.quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2 1057.quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1 1058.quad 0x9bdc06a725c71235,0xc19bf174cf692694 1059.quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3 1060.quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65 1061.quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483 1062.quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5 1063.quad 0x983e5152ee66dfab,0xa831c66d2db43210 1064.quad 0xb00327c898fb213f,0xbf597fc7beef0ee4 1065.quad 0xc6e00bf33da88fc2,0xd5a79147930aa725 1066.quad 0x06ca6351e003826f,0x142929670a0e6e70 1067.quad 0x27b70a8546d22ffc,0x2e1b21385c26c926 1068.quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df 1069.quad 0x650a73548baf63de,0x766a0abb3c77b2a8 1070.quad 0x81c2c92e47edaee6,0x92722c851482353b 1071.quad 0xa2bfe8a14cf10364,0xa81a664bbc423001 1072.quad 0xc24b8b70d0f89791,0xc76c51a30654be30 1073.quad 0xd192e819d6ef5218,0xd69906245565a910 1074.quad 0xf40e35855771202a,0x106aa07032bbd1b8 1075.quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53 1076.quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8 1077.quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb 1078.quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3 1079.quad 0x748f82ee5defb2fc,0x78a5636f43172f60 1080.quad 0x84c87814a1f0ab72,0x8cc702081a6439ec 1081.quad 0x90befffa23631e28,0xa4506cebde82bde9 1082.quad 0xbef9a3f7b2c67915,0xc67178f2e372532b 1083.quad 0xca273eceea26619c,0xd186b8c721c0c207 1084.quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178 1085.quad 0x06f067aa72176fba,0x0a637dc5a2c898a6 1086.quad 0x113f9804bef90dae,0x1b710b35131c471b 1087.quad 0x28db77f523047d84,0x32caab7b40c72493 1088.quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c 1089.quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a 1090.quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817 1091.quad 0 // terminator 1092 1093.byte 83,72,65,53,49,50,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 1094.align 2 1095.align 2 1096.text 1097#ifndef __KERNEL__ 1098.def sha512_block_armv8 1099 .type 32 1100.endef 1101.align 6 1102sha512_block_armv8: 1103Lv8_entry: 1104 stp x29,x30,[sp,#-16]! 1105 add x29,sp,#0 1106 1107 ld1 {v16.16b,v17.16b,v18.16b,v19.16b},[x1],#64 // load input 1108 ld1 {v20.16b,v21.16b,v22.16b,v23.16b},[x1],#64 1109 1110 ld1 {v0.2d,v1.2d,v2.2d,v3.2d},[x0] // load context 1111 adrp x3,LK512 1112 add x3,x3,:lo12:LK512 1113 1114 rev64 v16.16b,v16.16b 1115 rev64 v17.16b,v17.16b 1116 rev64 v18.16b,v18.16b 1117 rev64 v19.16b,v19.16b 1118 rev64 v20.16b,v20.16b 1119 rev64 v21.16b,v21.16b 1120 rev64 v22.16b,v22.16b 1121 rev64 v23.16b,v23.16b 1122 b Loop_hw 1123 1124.align 4 1125Loop_hw: 1126 ld1 {v24.2d},[x3],#16 1127 subs x2,x2,#1 1128 sub x4,x1,#128 1129 orr v26.16b,v0.16b,v0.16b // offload 1130 orr v27.16b,v1.16b,v1.16b 1131 orr v28.16b,v2.16b,v2.16b 1132 orr v29.16b,v3.16b,v3.16b 1133 csel x1,x1,x4,ne // conditional rewind 1134 add v24.2d,v24.2d,v16.2d 1135 ld1 {v25.2d},[x3],#16 1136 ext v24.16b,v24.16b,v24.16b,#8 1137 ext v5.16b,v2.16b,v3.16b,#8 1138 ext v6.16b,v1.16b,v2.16b,#8 1139 add v3.2d,v3.2d,v24.2d // "T1 + H + K512[i]" 1140.long 0xcec08230 //sha512su0 v16.16b,v17.16b 1141 ext v7.16b,v20.16b,v21.16b,#8 1142.long 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b 1143.long 0xce678af0 //sha512su1 v16.16b,v23.16b,v7.16b 1144 add v4.2d,v1.2d,v3.2d // "D + T1" 1145.long 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b 1146 add v25.2d,v25.2d,v17.2d 1147 ld1 {v24.2d},[x3],#16 1148 ext v25.16b,v25.16b,v25.16b,#8 1149 ext v5.16b,v4.16b,v2.16b,#8 1150 ext v6.16b,v0.16b,v4.16b,#8 1151 add v2.2d,v2.2d,v25.2d // "T1 + H + K512[i]" 1152.long 0xcec08251 //sha512su0 v17.16b,v18.16b 1153 ext v7.16b,v21.16b,v22.16b,#8 1154.long 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b 1155.long 0xce678a11 //sha512su1 v17.16b,v16.16b,v7.16b 1156 add v1.2d,v0.2d,v2.2d // "D + T1" 1157.long 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b 1158 add v24.2d,v24.2d,v18.2d 1159 ld1 {v25.2d},[x3],#16 1160 ext v24.16b,v24.16b,v24.16b,#8 1161 ext v5.16b,v1.16b,v4.16b,#8 1162 ext v6.16b,v3.16b,v1.16b,#8 1163 add v4.2d,v4.2d,v24.2d // "T1 + H + K512[i]" 1164.long 0xcec08272 //sha512su0 v18.16b,v19.16b 1165 ext v7.16b,v22.16b,v23.16b,#8 1166.long 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b 1167.long 0xce678a32 //sha512su1 v18.16b,v17.16b,v7.16b 1168 add v0.2d,v3.2d,v4.2d // "D + T1" 1169.long 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b 1170 add v25.2d,v25.2d,v19.2d 1171 ld1 {v24.2d},[x3],#16 1172 ext v25.16b,v25.16b,v25.16b,#8 1173 ext v5.16b,v0.16b,v1.16b,#8 1174 ext v6.16b,v2.16b,v0.16b,#8 1175 add v1.2d,v1.2d,v25.2d // "T1 + H + K512[i]" 1176.long 0xcec08293 //sha512su0 v19.16b,v20.16b 1177 ext v7.16b,v23.16b,v16.16b,#8 1178.long 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b 1179.long 0xce678a53 //sha512su1 v19.16b,v18.16b,v7.16b 1180 add v3.2d,v2.2d,v1.2d // "D + T1" 1181.long 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b 1182 add v24.2d,v24.2d,v20.2d 1183 ld1 {v25.2d},[x3],#16 1184 ext v24.16b,v24.16b,v24.16b,#8 1185 ext v5.16b,v3.16b,v0.16b,#8 1186 ext v6.16b,v4.16b,v3.16b,#8 1187 add v0.2d,v0.2d,v24.2d // "T1 + H + K512[i]" 1188.long 0xcec082b4 //sha512su0 v20.16b,v21.16b 1189 ext v7.16b,v16.16b,v17.16b,#8 1190.long 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b 1191.long 0xce678a74 //sha512su1 v20.16b,v19.16b,v7.16b 1192 add v2.2d,v4.2d,v0.2d // "D + T1" 1193.long 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b 1194 add v25.2d,v25.2d,v21.2d 1195 ld1 {v24.2d},[x3],#16 1196 ext v25.16b,v25.16b,v25.16b,#8 1197 ext v5.16b,v2.16b,v3.16b,#8 1198 ext v6.16b,v1.16b,v2.16b,#8 1199 add v3.2d,v3.2d,v25.2d // "T1 + H + K512[i]" 1200.long 0xcec082d5 //sha512su0 v21.16b,v22.16b 1201 ext v7.16b,v17.16b,v18.16b,#8 1202.long 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b 1203.long 0xce678a95 //sha512su1 v21.16b,v20.16b,v7.16b 1204 add v4.2d,v1.2d,v3.2d // "D + T1" 1205.long 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b 1206 add v24.2d,v24.2d,v22.2d 1207 ld1 {v25.2d},[x3],#16 1208 ext v24.16b,v24.16b,v24.16b,#8 1209 ext v5.16b,v4.16b,v2.16b,#8 1210 ext v6.16b,v0.16b,v4.16b,#8 1211 add v2.2d,v2.2d,v24.2d // "T1 + H + K512[i]" 1212.long 0xcec082f6 //sha512su0 v22.16b,v23.16b 1213 ext v7.16b,v18.16b,v19.16b,#8 1214.long 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b 1215.long 0xce678ab6 //sha512su1 v22.16b,v21.16b,v7.16b 1216 add v1.2d,v0.2d,v2.2d // "D + T1" 1217.long 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b 1218 add v25.2d,v25.2d,v23.2d 1219 ld1 {v24.2d},[x3],#16 1220 ext v25.16b,v25.16b,v25.16b,#8 1221 ext v5.16b,v1.16b,v4.16b,#8 1222 ext v6.16b,v3.16b,v1.16b,#8 1223 add v4.2d,v4.2d,v25.2d // "T1 + H + K512[i]" 1224.long 0xcec08217 //sha512su0 v23.16b,v16.16b 1225 ext v7.16b,v19.16b,v20.16b,#8 1226.long 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b 1227.long 0xce678ad7 //sha512su1 v23.16b,v22.16b,v7.16b 1228 add v0.2d,v3.2d,v4.2d // "D + T1" 1229.long 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b 1230 add v24.2d,v24.2d,v16.2d 1231 ld1 {v25.2d},[x3],#16 1232 ext v24.16b,v24.16b,v24.16b,#8 1233 ext v5.16b,v0.16b,v1.16b,#8 1234 ext v6.16b,v2.16b,v0.16b,#8 1235 add v1.2d,v1.2d,v24.2d // "T1 + H + K512[i]" 1236.long 0xcec08230 //sha512su0 v16.16b,v17.16b 1237 ext v7.16b,v20.16b,v21.16b,#8 1238.long 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b 1239.long 0xce678af0 //sha512su1 v16.16b,v23.16b,v7.16b 1240 add v3.2d,v2.2d,v1.2d // "D + T1" 1241.long 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b 1242 add v25.2d,v25.2d,v17.2d 1243 ld1 {v24.2d},[x3],#16 1244 ext v25.16b,v25.16b,v25.16b,#8 1245 ext v5.16b,v3.16b,v0.16b,#8 1246 ext v6.16b,v4.16b,v3.16b,#8 1247 add v0.2d,v0.2d,v25.2d // "T1 + H + K512[i]" 1248.long 0xcec08251 //sha512su0 v17.16b,v18.16b 1249 ext v7.16b,v21.16b,v22.16b,#8 1250.long 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b 1251.long 0xce678a11 //sha512su1 v17.16b,v16.16b,v7.16b 1252 add v2.2d,v4.2d,v0.2d // "D + T1" 1253.long 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b 1254 add v24.2d,v24.2d,v18.2d 1255 ld1 {v25.2d},[x3],#16 1256 ext v24.16b,v24.16b,v24.16b,#8 1257 ext v5.16b,v2.16b,v3.16b,#8 1258 ext v6.16b,v1.16b,v2.16b,#8 1259 add v3.2d,v3.2d,v24.2d // "T1 + H + K512[i]" 1260.long 0xcec08272 //sha512su0 v18.16b,v19.16b 1261 ext v7.16b,v22.16b,v23.16b,#8 1262.long 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b 1263.long 0xce678a32 //sha512su1 v18.16b,v17.16b,v7.16b 1264 add v4.2d,v1.2d,v3.2d // "D + T1" 1265.long 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b 1266 add v25.2d,v25.2d,v19.2d 1267 ld1 {v24.2d},[x3],#16 1268 ext v25.16b,v25.16b,v25.16b,#8 1269 ext v5.16b,v4.16b,v2.16b,#8 1270 ext v6.16b,v0.16b,v4.16b,#8 1271 add v2.2d,v2.2d,v25.2d // "T1 + H + K512[i]" 1272.long 0xcec08293 //sha512su0 v19.16b,v20.16b 1273 ext v7.16b,v23.16b,v16.16b,#8 1274.long 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b 1275.long 0xce678a53 //sha512su1 v19.16b,v18.16b,v7.16b 1276 add v1.2d,v0.2d,v2.2d // "D + T1" 1277.long 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b 1278 add v24.2d,v24.2d,v20.2d 1279 ld1 {v25.2d},[x3],#16 1280 ext v24.16b,v24.16b,v24.16b,#8 1281 ext v5.16b,v1.16b,v4.16b,#8 1282 ext v6.16b,v3.16b,v1.16b,#8 1283 add v4.2d,v4.2d,v24.2d // "T1 + H + K512[i]" 1284.long 0xcec082b4 //sha512su0 v20.16b,v21.16b 1285 ext v7.16b,v16.16b,v17.16b,#8 1286.long 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b 1287.long 0xce678a74 //sha512su1 v20.16b,v19.16b,v7.16b 1288 add v0.2d,v3.2d,v4.2d // "D + T1" 1289.long 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b 1290 add v25.2d,v25.2d,v21.2d 1291 ld1 {v24.2d},[x3],#16 1292 ext v25.16b,v25.16b,v25.16b,#8 1293 ext v5.16b,v0.16b,v1.16b,#8 1294 ext v6.16b,v2.16b,v0.16b,#8 1295 add v1.2d,v1.2d,v25.2d // "T1 + H + K512[i]" 1296.long 0xcec082d5 //sha512su0 v21.16b,v22.16b 1297 ext v7.16b,v17.16b,v18.16b,#8 1298.long 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b 1299.long 0xce678a95 //sha512su1 v21.16b,v20.16b,v7.16b 1300 add v3.2d,v2.2d,v1.2d // "D + T1" 1301.long 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b 1302 add v24.2d,v24.2d,v22.2d 1303 ld1 {v25.2d},[x3],#16 1304 ext v24.16b,v24.16b,v24.16b,#8 1305 ext v5.16b,v3.16b,v0.16b,#8 1306 ext v6.16b,v4.16b,v3.16b,#8 1307 add v0.2d,v0.2d,v24.2d // "T1 + H + K512[i]" 1308.long 0xcec082f6 //sha512su0 v22.16b,v23.16b 1309 ext v7.16b,v18.16b,v19.16b,#8 1310.long 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b 1311.long 0xce678ab6 //sha512su1 v22.16b,v21.16b,v7.16b 1312 add v2.2d,v4.2d,v0.2d // "D + T1" 1313.long 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b 1314 add v25.2d,v25.2d,v23.2d 1315 ld1 {v24.2d},[x3],#16 1316 ext v25.16b,v25.16b,v25.16b,#8 1317 ext v5.16b,v2.16b,v3.16b,#8 1318 ext v6.16b,v1.16b,v2.16b,#8 1319 add v3.2d,v3.2d,v25.2d // "T1 + H + K512[i]" 1320.long 0xcec08217 //sha512su0 v23.16b,v16.16b 1321 ext v7.16b,v19.16b,v20.16b,#8 1322.long 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b 1323.long 0xce678ad7 //sha512su1 v23.16b,v22.16b,v7.16b 1324 add v4.2d,v1.2d,v3.2d // "D + T1" 1325.long 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b 1326 add v24.2d,v24.2d,v16.2d 1327 ld1 {v25.2d},[x3],#16 1328 ext v24.16b,v24.16b,v24.16b,#8 1329 ext v5.16b,v4.16b,v2.16b,#8 1330 ext v6.16b,v0.16b,v4.16b,#8 1331 add v2.2d,v2.2d,v24.2d // "T1 + H + K512[i]" 1332.long 0xcec08230 //sha512su0 v16.16b,v17.16b 1333 ext v7.16b,v20.16b,v21.16b,#8 1334.long 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b 1335.long 0xce678af0 //sha512su1 v16.16b,v23.16b,v7.16b 1336 add v1.2d,v0.2d,v2.2d // "D + T1" 1337.long 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b 1338 add v25.2d,v25.2d,v17.2d 1339 ld1 {v24.2d},[x3],#16 1340 ext v25.16b,v25.16b,v25.16b,#8 1341 ext v5.16b,v1.16b,v4.16b,#8 1342 ext v6.16b,v3.16b,v1.16b,#8 1343 add v4.2d,v4.2d,v25.2d // "T1 + H + K512[i]" 1344.long 0xcec08251 //sha512su0 v17.16b,v18.16b 1345 ext v7.16b,v21.16b,v22.16b,#8 1346.long 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b 1347.long 0xce678a11 //sha512su1 v17.16b,v16.16b,v7.16b 1348 add v0.2d,v3.2d,v4.2d // "D + T1" 1349.long 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b 1350 add v24.2d,v24.2d,v18.2d 1351 ld1 {v25.2d},[x3],#16 1352 ext v24.16b,v24.16b,v24.16b,#8 1353 ext v5.16b,v0.16b,v1.16b,#8 1354 ext v6.16b,v2.16b,v0.16b,#8 1355 add v1.2d,v1.2d,v24.2d // "T1 + H + K512[i]" 1356.long 0xcec08272 //sha512su0 v18.16b,v19.16b 1357 ext v7.16b,v22.16b,v23.16b,#8 1358.long 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b 1359.long 0xce678a32 //sha512su1 v18.16b,v17.16b,v7.16b 1360 add v3.2d,v2.2d,v1.2d // "D + T1" 1361.long 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b 1362 add v25.2d,v25.2d,v19.2d 1363 ld1 {v24.2d},[x3],#16 1364 ext v25.16b,v25.16b,v25.16b,#8 1365 ext v5.16b,v3.16b,v0.16b,#8 1366 ext v6.16b,v4.16b,v3.16b,#8 1367 add v0.2d,v0.2d,v25.2d // "T1 + H + K512[i]" 1368.long 0xcec08293 //sha512su0 v19.16b,v20.16b 1369 ext v7.16b,v23.16b,v16.16b,#8 1370.long 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b 1371.long 0xce678a53 //sha512su1 v19.16b,v18.16b,v7.16b 1372 add v2.2d,v4.2d,v0.2d // "D + T1" 1373.long 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b 1374 add v24.2d,v24.2d,v20.2d 1375 ld1 {v25.2d},[x3],#16 1376 ext v24.16b,v24.16b,v24.16b,#8 1377 ext v5.16b,v2.16b,v3.16b,#8 1378 ext v6.16b,v1.16b,v2.16b,#8 1379 add v3.2d,v3.2d,v24.2d // "T1 + H + K512[i]" 1380.long 0xcec082b4 //sha512su0 v20.16b,v21.16b 1381 ext v7.16b,v16.16b,v17.16b,#8 1382.long 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b 1383.long 0xce678a74 //sha512su1 v20.16b,v19.16b,v7.16b 1384 add v4.2d,v1.2d,v3.2d // "D + T1" 1385.long 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b 1386 add v25.2d,v25.2d,v21.2d 1387 ld1 {v24.2d},[x3],#16 1388 ext v25.16b,v25.16b,v25.16b,#8 1389 ext v5.16b,v4.16b,v2.16b,#8 1390 ext v6.16b,v0.16b,v4.16b,#8 1391 add v2.2d,v2.2d,v25.2d // "T1 + H + K512[i]" 1392.long 0xcec082d5 //sha512su0 v21.16b,v22.16b 1393 ext v7.16b,v17.16b,v18.16b,#8 1394.long 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b 1395.long 0xce678a95 //sha512su1 v21.16b,v20.16b,v7.16b 1396 add v1.2d,v0.2d,v2.2d // "D + T1" 1397.long 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b 1398 add v24.2d,v24.2d,v22.2d 1399 ld1 {v25.2d},[x3],#16 1400 ext v24.16b,v24.16b,v24.16b,#8 1401 ext v5.16b,v1.16b,v4.16b,#8 1402 ext v6.16b,v3.16b,v1.16b,#8 1403 add v4.2d,v4.2d,v24.2d // "T1 + H + K512[i]" 1404.long 0xcec082f6 //sha512su0 v22.16b,v23.16b 1405 ext v7.16b,v18.16b,v19.16b,#8 1406.long 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b 1407.long 0xce678ab6 //sha512su1 v22.16b,v21.16b,v7.16b 1408 add v0.2d,v3.2d,v4.2d // "D + T1" 1409.long 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b 1410 add v25.2d,v25.2d,v23.2d 1411 ld1 {v24.2d},[x3],#16 1412 ext v25.16b,v25.16b,v25.16b,#8 1413 ext v5.16b,v0.16b,v1.16b,#8 1414 ext v6.16b,v2.16b,v0.16b,#8 1415 add v1.2d,v1.2d,v25.2d // "T1 + H + K512[i]" 1416.long 0xcec08217 //sha512su0 v23.16b,v16.16b 1417 ext v7.16b,v19.16b,v20.16b,#8 1418.long 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b 1419.long 0xce678ad7 //sha512su1 v23.16b,v22.16b,v7.16b 1420 add v3.2d,v2.2d,v1.2d // "D + T1" 1421.long 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b 1422 add v24.2d,v24.2d,v16.2d 1423 ld1 {v25.2d},[x3],#16 1424 ext v24.16b,v24.16b,v24.16b,#8 1425 ext v5.16b,v3.16b,v0.16b,#8 1426 ext v6.16b,v4.16b,v3.16b,#8 1427 add v0.2d,v0.2d,v24.2d // "T1 + H + K512[i]" 1428.long 0xcec08230 //sha512su0 v16.16b,v17.16b 1429 ext v7.16b,v20.16b,v21.16b,#8 1430.long 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b 1431.long 0xce678af0 //sha512su1 v16.16b,v23.16b,v7.16b 1432 add v2.2d,v4.2d,v0.2d // "D + T1" 1433.long 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b 1434 add v25.2d,v25.2d,v17.2d 1435 ld1 {v24.2d},[x3],#16 1436 ext v25.16b,v25.16b,v25.16b,#8 1437 ext v5.16b,v2.16b,v3.16b,#8 1438 ext v6.16b,v1.16b,v2.16b,#8 1439 add v3.2d,v3.2d,v25.2d // "T1 + H + K512[i]" 1440.long 0xcec08251 //sha512su0 v17.16b,v18.16b 1441 ext v7.16b,v21.16b,v22.16b,#8 1442.long 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b 1443.long 0xce678a11 //sha512su1 v17.16b,v16.16b,v7.16b 1444 add v4.2d,v1.2d,v3.2d // "D + T1" 1445.long 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b 1446 add v24.2d,v24.2d,v18.2d 1447 ld1 {v25.2d},[x3],#16 1448 ext v24.16b,v24.16b,v24.16b,#8 1449 ext v5.16b,v4.16b,v2.16b,#8 1450 ext v6.16b,v0.16b,v4.16b,#8 1451 add v2.2d,v2.2d,v24.2d // "T1 + H + K512[i]" 1452.long 0xcec08272 //sha512su0 v18.16b,v19.16b 1453 ext v7.16b,v22.16b,v23.16b,#8 1454.long 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b 1455.long 0xce678a32 //sha512su1 v18.16b,v17.16b,v7.16b 1456 add v1.2d,v0.2d,v2.2d // "D + T1" 1457.long 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b 1458 add v25.2d,v25.2d,v19.2d 1459 ld1 {v24.2d},[x3],#16 1460 ext v25.16b,v25.16b,v25.16b,#8 1461 ext v5.16b,v1.16b,v4.16b,#8 1462 ext v6.16b,v3.16b,v1.16b,#8 1463 add v4.2d,v4.2d,v25.2d // "T1 + H + K512[i]" 1464.long 0xcec08293 //sha512su0 v19.16b,v20.16b 1465 ext v7.16b,v23.16b,v16.16b,#8 1466.long 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b 1467.long 0xce678a53 //sha512su1 v19.16b,v18.16b,v7.16b 1468 add v0.2d,v3.2d,v4.2d // "D + T1" 1469.long 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b 1470 add v24.2d,v24.2d,v20.2d 1471 ld1 {v25.2d},[x3],#16 1472 ext v24.16b,v24.16b,v24.16b,#8 1473 ext v5.16b,v0.16b,v1.16b,#8 1474 ext v6.16b,v2.16b,v0.16b,#8 1475 add v1.2d,v1.2d,v24.2d // "T1 + H + K512[i]" 1476.long 0xcec082b4 //sha512su0 v20.16b,v21.16b 1477 ext v7.16b,v16.16b,v17.16b,#8 1478.long 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b 1479.long 0xce678a74 //sha512su1 v20.16b,v19.16b,v7.16b 1480 add v3.2d,v2.2d,v1.2d // "D + T1" 1481.long 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b 1482 add v25.2d,v25.2d,v21.2d 1483 ld1 {v24.2d},[x3],#16 1484 ext v25.16b,v25.16b,v25.16b,#8 1485 ext v5.16b,v3.16b,v0.16b,#8 1486 ext v6.16b,v4.16b,v3.16b,#8 1487 add v0.2d,v0.2d,v25.2d // "T1 + H + K512[i]" 1488.long 0xcec082d5 //sha512su0 v21.16b,v22.16b 1489 ext v7.16b,v17.16b,v18.16b,#8 1490.long 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b 1491.long 0xce678a95 //sha512su1 v21.16b,v20.16b,v7.16b 1492 add v2.2d,v4.2d,v0.2d // "D + T1" 1493.long 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b 1494 add v24.2d,v24.2d,v22.2d 1495 ld1 {v25.2d},[x3],#16 1496 ext v24.16b,v24.16b,v24.16b,#8 1497 ext v5.16b,v2.16b,v3.16b,#8 1498 ext v6.16b,v1.16b,v2.16b,#8 1499 add v3.2d,v3.2d,v24.2d // "T1 + H + K512[i]" 1500.long 0xcec082f6 //sha512su0 v22.16b,v23.16b 1501 ext v7.16b,v18.16b,v19.16b,#8 1502.long 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b 1503.long 0xce678ab6 //sha512su1 v22.16b,v21.16b,v7.16b 1504 add v4.2d,v1.2d,v3.2d // "D + T1" 1505.long 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b 1506 add v25.2d,v25.2d,v23.2d 1507 ld1 {v24.2d},[x3],#16 1508 ext v25.16b,v25.16b,v25.16b,#8 1509 ext v5.16b,v4.16b,v2.16b,#8 1510 ext v6.16b,v0.16b,v4.16b,#8 1511 add v2.2d,v2.2d,v25.2d // "T1 + H + K512[i]" 1512.long 0xcec08217 //sha512su0 v23.16b,v16.16b 1513 ext v7.16b,v19.16b,v20.16b,#8 1514.long 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b 1515.long 0xce678ad7 //sha512su1 v23.16b,v22.16b,v7.16b 1516 add v1.2d,v0.2d,v2.2d // "D + T1" 1517.long 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b 1518 ld1 {v25.2d},[x3],#16 1519 add v24.2d,v24.2d,v16.2d 1520 ld1 {v16.16b},[x1],#16 // load next input 1521 ext v24.16b,v24.16b,v24.16b,#8 1522 ext v5.16b,v1.16b,v4.16b,#8 1523 ext v6.16b,v3.16b,v1.16b,#8 1524 add v4.2d,v4.2d,v24.2d // "T1 + H + K512[i]" 1525.long 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b 1526 rev64 v16.16b,v16.16b 1527 add v0.2d,v3.2d,v4.2d // "D + T1" 1528.long 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b 1529 ld1 {v24.2d},[x3],#16 1530 add v25.2d,v25.2d,v17.2d 1531 ld1 {v17.16b},[x1],#16 // load next input 1532 ext v25.16b,v25.16b,v25.16b,#8 1533 ext v5.16b,v0.16b,v1.16b,#8 1534 ext v6.16b,v2.16b,v0.16b,#8 1535 add v1.2d,v1.2d,v25.2d // "T1 + H + K512[i]" 1536.long 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b 1537 rev64 v17.16b,v17.16b 1538 add v3.2d,v2.2d,v1.2d // "D + T1" 1539.long 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b 1540 ld1 {v25.2d},[x3],#16 1541 add v24.2d,v24.2d,v18.2d 1542 ld1 {v18.16b},[x1],#16 // load next input 1543 ext v24.16b,v24.16b,v24.16b,#8 1544 ext v5.16b,v3.16b,v0.16b,#8 1545 ext v6.16b,v4.16b,v3.16b,#8 1546 add v0.2d,v0.2d,v24.2d // "T1 + H + K512[i]" 1547.long 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b 1548 rev64 v18.16b,v18.16b 1549 add v2.2d,v4.2d,v0.2d // "D + T1" 1550.long 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b 1551 ld1 {v24.2d},[x3],#16 1552 add v25.2d,v25.2d,v19.2d 1553 ld1 {v19.16b},[x1],#16 // load next input 1554 ext v25.16b,v25.16b,v25.16b,#8 1555 ext v5.16b,v2.16b,v3.16b,#8 1556 ext v6.16b,v1.16b,v2.16b,#8 1557 add v3.2d,v3.2d,v25.2d // "T1 + H + K512[i]" 1558.long 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b 1559 rev64 v19.16b,v19.16b 1560 add v4.2d,v1.2d,v3.2d // "D + T1" 1561.long 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b 1562 ld1 {v25.2d},[x3],#16 1563 add v24.2d,v24.2d,v20.2d 1564 ld1 {v20.16b},[x1],#16 // load next input 1565 ext v24.16b,v24.16b,v24.16b,#8 1566 ext v5.16b,v4.16b,v2.16b,#8 1567 ext v6.16b,v0.16b,v4.16b,#8 1568 add v2.2d,v2.2d,v24.2d // "T1 + H + K512[i]" 1569.long 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b 1570 rev64 v20.16b,v20.16b 1571 add v1.2d,v0.2d,v2.2d // "D + T1" 1572.long 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b 1573 ld1 {v24.2d},[x3],#16 1574 add v25.2d,v25.2d,v21.2d 1575 ld1 {v21.16b},[x1],#16 // load next input 1576 ext v25.16b,v25.16b,v25.16b,#8 1577 ext v5.16b,v1.16b,v4.16b,#8 1578 ext v6.16b,v3.16b,v1.16b,#8 1579 add v4.2d,v4.2d,v25.2d // "T1 + H + K512[i]" 1580.long 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b 1581 rev64 v21.16b,v21.16b 1582 add v0.2d,v3.2d,v4.2d // "D + T1" 1583.long 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b 1584 ld1 {v25.2d},[x3],#16 1585 add v24.2d,v24.2d,v22.2d 1586 ld1 {v22.16b},[x1],#16 // load next input 1587 ext v24.16b,v24.16b,v24.16b,#8 1588 ext v5.16b,v0.16b,v1.16b,#8 1589 ext v6.16b,v2.16b,v0.16b,#8 1590 add v1.2d,v1.2d,v24.2d // "T1 + H + K512[i]" 1591.long 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b 1592 rev64 v22.16b,v22.16b 1593 add v3.2d,v2.2d,v1.2d // "D + T1" 1594.long 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b 1595 sub x3,x3,#80*8 // rewind 1596 add v25.2d,v25.2d,v23.2d 1597 ld1 {v23.16b},[x1],#16 // load next input 1598 ext v25.16b,v25.16b,v25.16b,#8 1599 ext v5.16b,v3.16b,v0.16b,#8 1600 ext v6.16b,v4.16b,v3.16b,#8 1601 add v0.2d,v0.2d,v25.2d // "T1 + H + K512[i]" 1602.long 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b 1603 rev64 v23.16b,v23.16b 1604 add v2.2d,v4.2d,v0.2d // "D + T1" 1605.long 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b 1606 add v0.2d,v0.2d,v26.2d // accumulate 1607 add v1.2d,v1.2d,v27.2d 1608 add v2.2d,v2.2d,v28.2d 1609 add v3.2d,v3.2d,v29.2d 1610 1611 cbnz x2,Loop_hw 1612 1613 st1 {v0.2d,v1.2d,v2.2d,v3.2d},[x0] // store context 1614 1615 ldr x29,[sp],#16 1616 ret 1617 1618#endif 1619#endif 1620#endif // !OPENSSL_NO_ASM 1621