1// This file is generated from a similarly-named Perl script in the BoringSSL 2// source tree. Do not edit by hand. 3 4#if !defined(__has_feature) 5#define __has_feature(x) 0 6#endif 7#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM) 8#define OPENSSL_NO_ASM 9#endif 10 11#if !defined(OPENSSL_NO_ASM) && defined(__AARCH64EL__) && defined(_WIN32) 12#if defined(BORINGSSL_PREFIX) 13#include <boringssl_prefix_symbols_asm.h> 14#endif 15// Copyright 2014-2020 The OpenSSL Project Authors. All Rights Reserved. 16// 17// Licensed under the OpenSSL license (the "License"). You may not use 18// this file except in compliance with the License. You can obtain a copy 19// in the file LICENSE in the source distribution or at 20// https://www.openssl.org/source/license.html 21 22// ==================================================================== 23// Written by Andy Polyakov <appro@openssl.org> for the OpenSSL 24// project. The module is, however, dual licensed under OpenSSL and 25// CRYPTOGAMS licenses depending on where you obtain it. For further 26// details see http://www.openssl.org/~appro/cryptogams/. 27// 28// Permission to use under GPLv2 terms is granted. 29// ==================================================================== 30// 31// SHA256/512 for ARMv8. 32// 33// Performance in cycles per processed byte and improvement coefficient 34// over code generated with "default" compiler: 35// 36// SHA256-hw SHA256(*) SHA512 37// Apple A7 1.97 10.5 (+33%) 6.73 (-1%(**)) 38// Cortex-A53 2.38 15.5 (+115%) 10.0 (+150%(***)) 39// Cortex-A57 2.31 11.6 (+86%) 7.51 (+260%(***)) 40// Denver 2.01 10.5 (+26%) 6.70 (+8%) 41// X-Gene 20.0 (+100%) 12.8 (+300%(***)) 42// Mongoose 2.36 13.0 (+50%) 8.36 (+33%) 43// Kryo 1.92 17.4 (+30%) 11.2 (+8%) 44// 45// (*) Software SHA256 results are of lesser relevance, presented 46// mostly for informational purposes. 47// (**) The result is a trade-off: it's possible to improve it by 48// 10% (or by 1 cycle per round), but at the cost of 20% loss 49// on Cortex-A53 (or by 4 cycles per round). 50// (***) Super-impressive coefficients over gcc-generated code are 51// indication of some compiler "pathology", most notably code 52// generated with -mgeneral-regs-only is significantly faster 53// and the gap is only 40-90%. 54 55#ifndef __KERNEL__ 56# include <openssl/arm_arch.h> 57#endif 58 59.text 60 61 62 63.globl sha512_block_data_order 64 65.def sha512_block_data_order 66 .type 32 67.endef 68.align 6 69sha512_block_data_order: 70 AARCH64_VALID_CALL_TARGET 71#ifndef __KERNEL__ 72#if __has_feature(hwaddress_sanitizer) && __clang_major__ >= 10 73 adrp x16,:pg_hi21_nc:OPENSSL_armcap_P 74#else 75 adrp x16,OPENSSL_armcap_P 76#endif 77 ldr w16,[x16,:lo12:OPENSSL_armcap_P] 78 tst w16,#ARMV8_SHA512 79 b.ne Lv8_entry 80#endif 81 AARCH64_SIGN_LINK_REGISTER 82 stp x29,x30,[sp,#-128]! 83 add x29,sp,#0 84 85 stp x19,x20,[sp,#16] 86 stp x21,x22,[sp,#32] 87 stp x23,x24,[sp,#48] 88 stp x25,x26,[sp,#64] 89 stp x27,x28,[sp,#80] 90 sub sp,sp,#4*8 91 92 ldp x20,x21,[x0] // load context 93 ldp x22,x23,[x0,#2*8] 94 ldp x24,x25,[x0,#4*8] 95 add x2,x1,x2,lsl#7 // end of input 96 ldp x26,x27,[x0,#6*8] 97 adrp x30,LK512 98 add x30,x30,:lo12:LK512 99 stp x0,x2,[x29,#96] 100 101Loop: 102 ldp x3,x4,[x1],#2*8 103 ldr x19,[x30],#8 // *K++ 104 eor x28,x21,x22 // magic seed 105 str x1,[x29,#112] 106#ifndef __AARCH64EB__ 107 rev x3,x3 // 0 108#endif 109 ror x16,x24,#14 110 add x27,x27,x19 // h+=K[i] 111 eor x6,x24,x24,ror#23 112 and x17,x25,x24 113 bic x19,x26,x24 114 add x27,x27,x3 // h+=X[i] 115 orr x17,x17,x19 // Ch(e,f,g) 116 eor x19,x20,x21 // a^b, b^c in next round 117 eor x16,x16,x6,ror#18 // Sigma1(e) 118 ror x6,x20,#28 119 add x27,x27,x17 // h+=Ch(e,f,g) 120 eor x17,x20,x20,ror#5 121 add x27,x27,x16 // h+=Sigma1(e) 122 and x28,x28,x19 // (b^c)&=(a^b) 123 add x23,x23,x27 // d+=h 124 eor x28,x28,x21 // Maj(a,b,c) 125 eor x17,x6,x17,ror#34 // Sigma0(a) 126 add x27,x27,x28 // h+=Maj(a,b,c) 127 ldr x28,[x30],#8 // *K++, x19 in next round 128 //add x27,x27,x17 // h+=Sigma0(a) 129#ifndef __AARCH64EB__ 130 rev x4,x4 // 1 131#endif 132 ldp x5,x6,[x1],#2*8 133 add x27,x27,x17 // h+=Sigma0(a) 134 ror x16,x23,#14 135 add x26,x26,x28 // h+=K[i] 136 eor x7,x23,x23,ror#23 137 and x17,x24,x23 138 bic x28,x25,x23 139 add x26,x26,x4 // h+=X[i] 140 orr x17,x17,x28 // Ch(e,f,g) 141 eor x28,x27,x20 // a^b, b^c in next round 142 eor x16,x16,x7,ror#18 // Sigma1(e) 143 ror x7,x27,#28 144 add x26,x26,x17 // h+=Ch(e,f,g) 145 eor x17,x27,x27,ror#5 146 add x26,x26,x16 // h+=Sigma1(e) 147 and x19,x19,x28 // (b^c)&=(a^b) 148 add x22,x22,x26 // d+=h 149 eor x19,x19,x20 // Maj(a,b,c) 150 eor x17,x7,x17,ror#34 // Sigma0(a) 151 add x26,x26,x19 // h+=Maj(a,b,c) 152 ldr x19,[x30],#8 // *K++, x28 in next round 153 //add x26,x26,x17 // h+=Sigma0(a) 154#ifndef __AARCH64EB__ 155 rev x5,x5 // 2 156#endif 157 add x26,x26,x17 // h+=Sigma0(a) 158 ror x16,x22,#14 159 add x25,x25,x19 // h+=K[i] 160 eor x8,x22,x22,ror#23 161 and x17,x23,x22 162 bic x19,x24,x22 163 add x25,x25,x5 // h+=X[i] 164 orr x17,x17,x19 // Ch(e,f,g) 165 eor x19,x26,x27 // a^b, b^c in next round 166 eor x16,x16,x8,ror#18 // Sigma1(e) 167 ror x8,x26,#28 168 add x25,x25,x17 // h+=Ch(e,f,g) 169 eor x17,x26,x26,ror#5 170 add x25,x25,x16 // h+=Sigma1(e) 171 and x28,x28,x19 // (b^c)&=(a^b) 172 add x21,x21,x25 // d+=h 173 eor x28,x28,x27 // Maj(a,b,c) 174 eor x17,x8,x17,ror#34 // Sigma0(a) 175 add x25,x25,x28 // h+=Maj(a,b,c) 176 ldr x28,[x30],#8 // *K++, x19 in next round 177 //add x25,x25,x17 // h+=Sigma0(a) 178#ifndef __AARCH64EB__ 179 rev x6,x6 // 3 180#endif 181 ldp x7,x8,[x1],#2*8 182 add x25,x25,x17 // h+=Sigma0(a) 183 ror x16,x21,#14 184 add x24,x24,x28 // h+=K[i] 185 eor x9,x21,x21,ror#23 186 and x17,x22,x21 187 bic x28,x23,x21 188 add x24,x24,x6 // h+=X[i] 189 orr x17,x17,x28 // Ch(e,f,g) 190 eor x28,x25,x26 // a^b, b^c in next round 191 eor x16,x16,x9,ror#18 // Sigma1(e) 192 ror x9,x25,#28 193 add x24,x24,x17 // h+=Ch(e,f,g) 194 eor x17,x25,x25,ror#5 195 add x24,x24,x16 // h+=Sigma1(e) 196 and x19,x19,x28 // (b^c)&=(a^b) 197 add x20,x20,x24 // d+=h 198 eor x19,x19,x26 // Maj(a,b,c) 199 eor x17,x9,x17,ror#34 // Sigma0(a) 200 add x24,x24,x19 // h+=Maj(a,b,c) 201 ldr x19,[x30],#8 // *K++, x28 in next round 202 //add x24,x24,x17 // h+=Sigma0(a) 203#ifndef __AARCH64EB__ 204 rev x7,x7 // 4 205#endif 206 add x24,x24,x17 // h+=Sigma0(a) 207 ror x16,x20,#14 208 add x23,x23,x19 // h+=K[i] 209 eor x10,x20,x20,ror#23 210 and x17,x21,x20 211 bic x19,x22,x20 212 add x23,x23,x7 // h+=X[i] 213 orr x17,x17,x19 // Ch(e,f,g) 214 eor x19,x24,x25 // a^b, b^c in next round 215 eor x16,x16,x10,ror#18 // Sigma1(e) 216 ror x10,x24,#28 217 add x23,x23,x17 // h+=Ch(e,f,g) 218 eor x17,x24,x24,ror#5 219 add x23,x23,x16 // h+=Sigma1(e) 220 and x28,x28,x19 // (b^c)&=(a^b) 221 add x27,x27,x23 // d+=h 222 eor x28,x28,x25 // Maj(a,b,c) 223 eor x17,x10,x17,ror#34 // Sigma0(a) 224 add x23,x23,x28 // h+=Maj(a,b,c) 225 ldr x28,[x30],#8 // *K++, x19 in next round 226 //add x23,x23,x17 // h+=Sigma0(a) 227#ifndef __AARCH64EB__ 228 rev x8,x8 // 5 229#endif 230 ldp x9,x10,[x1],#2*8 231 add x23,x23,x17 // h+=Sigma0(a) 232 ror x16,x27,#14 233 add x22,x22,x28 // h+=K[i] 234 eor x11,x27,x27,ror#23 235 and x17,x20,x27 236 bic x28,x21,x27 237 add x22,x22,x8 // h+=X[i] 238 orr x17,x17,x28 // Ch(e,f,g) 239 eor x28,x23,x24 // a^b, b^c in next round 240 eor x16,x16,x11,ror#18 // Sigma1(e) 241 ror x11,x23,#28 242 add x22,x22,x17 // h+=Ch(e,f,g) 243 eor x17,x23,x23,ror#5 244 add x22,x22,x16 // h+=Sigma1(e) 245 and x19,x19,x28 // (b^c)&=(a^b) 246 add x26,x26,x22 // d+=h 247 eor x19,x19,x24 // Maj(a,b,c) 248 eor x17,x11,x17,ror#34 // Sigma0(a) 249 add x22,x22,x19 // h+=Maj(a,b,c) 250 ldr x19,[x30],#8 // *K++, x28 in next round 251 //add x22,x22,x17 // h+=Sigma0(a) 252#ifndef __AARCH64EB__ 253 rev x9,x9 // 6 254#endif 255 add x22,x22,x17 // h+=Sigma0(a) 256 ror x16,x26,#14 257 add x21,x21,x19 // h+=K[i] 258 eor x12,x26,x26,ror#23 259 and x17,x27,x26 260 bic x19,x20,x26 261 add x21,x21,x9 // h+=X[i] 262 orr x17,x17,x19 // Ch(e,f,g) 263 eor x19,x22,x23 // a^b, b^c in next round 264 eor x16,x16,x12,ror#18 // Sigma1(e) 265 ror x12,x22,#28 266 add x21,x21,x17 // h+=Ch(e,f,g) 267 eor x17,x22,x22,ror#5 268 add x21,x21,x16 // h+=Sigma1(e) 269 and x28,x28,x19 // (b^c)&=(a^b) 270 add x25,x25,x21 // d+=h 271 eor x28,x28,x23 // Maj(a,b,c) 272 eor x17,x12,x17,ror#34 // Sigma0(a) 273 add x21,x21,x28 // h+=Maj(a,b,c) 274 ldr x28,[x30],#8 // *K++, x19 in next round 275 //add x21,x21,x17 // h+=Sigma0(a) 276#ifndef __AARCH64EB__ 277 rev x10,x10 // 7 278#endif 279 ldp x11,x12,[x1],#2*8 280 add x21,x21,x17 // h+=Sigma0(a) 281 ror x16,x25,#14 282 add x20,x20,x28 // h+=K[i] 283 eor x13,x25,x25,ror#23 284 and x17,x26,x25 285 bic x28,x27,x25 286 add x20,x20,x10 // h+=X[i] 287 orr x17,x17,x28 // Ch(e,f,g) 288 eor x28,x21,x22 // a^b, b^c in next round 289 eor x16,x16,x13,ror#18 // Sigma1(e) 290 ror x13,x21,#28 291 add x20,x20,x17 // h+=Ch(e,f,g) 292 eor x17,x21,x21,ror#5 293 add x20,x20,x16 // h+=Sigma1(e) 294 and x19,x19,x28 // (b^c)&=(a^b) 295 add x24,x24,x20 // d+=h 296 eor x19,x19,x22 // Maj(a,b,c) 297 eor x17,x13,x17,ror#34 // Sigma0(a) 298 add x20,x20,x19 // h+=Maj(a,b,c) 299 ldr x19,[x30],#8 // *K++, x28 in next round 300 //add x20,x20,x17 // h+=Sigma0(a) 301#ifndef __AARCH64EB__ 302 rev x11,x11 // 8 303#endif 304 add x20,x20,x17 // h+=Sigma0(a) 305 ror x16,x24,#14 306 add x27,x27,x19 // h+=K[i] 307 eor x14,x24,x24,ror#23 308 and x17,x25,x24 309 bic x19,x26,x24 310 add x27,x27,x11 // h+=X[i] 311 orr x17,x17,x19 // Ch(e,f,g) 312 eor x19,x20,x21 // a^b, b^c in next round 313 eor x16,x16,x14,ror#18 // Sigma1(e) 314 ror x14,x20,#28 315 add x27,x27,x17 // h+=Ch(e,f,g) 316 eor x17,x20,x20,ror#5 317 add x27,x27,x16 // h+=Sigma1(e) 318 and x28,x28,x19 // (b^c)&=(a^b) 319 add x23,x23,x27 // d+=h 320 eor x28,x28,x21 // Maj(a,b,c) 321 eor x17,x14,x17,ror#34 // Sigma0(a) 322 add x27,x27,x28 // h+=Maj(a,b,c) 323 ldr x28,[x30],#8 // *K++, x19 in next round 324 //add x27,x27,x17 // h+=Sigma0(a) 325#ifndef __AARCH64EB__ 326 rev x12,x12 // 9 327#endif 328 ldp x13,x14,[x1],#2*8 329 add x27,x27,x17 // h+=Sigma0(a) 330 ror x16,x23,#14 331 add x26,x26,x28 // h+=K[i] 332 eor x15,x23,x23,ror#23 333 and x17,x24,x23 334 bic x28,x25,x23 335 add x26,x26,x12 // h+=X[i] 336 orr x17,x17,x28 // Ch(e,f,g) 337 eor x28,x27,x20 // a^b, b^c in next round 338 eor x16,x16,x15,ror#18 // Sigma1(e) 339 ror x15,x27,#28 340 add x26,x26,x17 // h+=Ch(e,f,g) 341 eor x17,x27,x27,ror#5 342 add x26,x26,x16 // h+=Sigma1(e) 343 and x19,x19,x28 // (b^c)&=(a^b) 344 add x22,x22,x26 // d+=h 345 eor x19,x19,x20 // Maj(a,b,c) 346 eor x17,x15,x17,ror#34 // Sigma0(a) 347 add x26,x26,x19 // h+=Maj(a,b,c) 348 ldr x19,[x30],#8 // *K++, x28 in next round 349 //add x26,x26,x17 // h+=Sigma0(a) 350#ifndef __AARCH64EB__ 351 rev x13,x13 // 10 352#endif 353 add x26,x26,x17 // h+=Sigma0(a) 354 ror x16,x22,#14 355 add x25,x25,x19 // h+=K[i] 356 eor x0,x22,x22,ror#23 357 and x17,x23,x22 358 bic x19,x24,x22 359 add x25,x25,x13 // h+=X[i] 360 orr x17,x17,x19 // Ch(e,f,g) 361 eor x19,x26,x27 // a^b, b^c in next round 362 eor x16,x16,x0,ror#18 // Sigma1(e) 363 ror x0,x26,#28 364 add x25,x25,x17 // h+=Ch(e,f,g) 365 eor x17,x26,x26,ror#5 366 add x25,x25,x16 // h+=Sigma1(e) 367 and x28,x28,x19 // (b^c)&=(a^b) 368 add x21,x21,x25 // d+=h 369 eor x28,x28,x27 // Maj(a,b,c) 370 eor x17,x0,x17,ror#34 // Sigma0(a) 371 add x25,x25,x28 // h+=Maj(a,b,c) 372 ldr x28,[x30],#8 // *K++, x19 in next round 373 //add x25,x25,x17 // h+=Sigma0(a) 374#ifndef __AARCH64EB__ 375 rev x14,x14 // 11 376#endif 377 ldp x15,x0,[x1],#2*8 378 add x25,x25,x17 // h+=Sigma0(a) 379 str x6,[sp,#24] 380 ror x16,x21,#14 381 add x24,x24,x28 // h+=K[i] 382 eor x6,x21,x21,ror#23 383 and x17,x22,x21 384 bic x28,x23,x21 385 add x24,x24,x14 // h+=X[i] 386 orr x17,x17,x28 // Ch(e,f,g) 387 eor x28,x25,x26 // a^b, b^c in next round 388 eor x16,x16,x6,ror#18 // Sigma1(e) 389 ror x6,x25,#28 390 add x24,x24,x17 // h+=Ch(e,f,g) 391 eor x17,x25,x25,ror#5 392 add x24,x24,x16 // h+=Sigma1(e) 393 and x19,x19,x28 // (b^c)&=(a^b) 394 add x20,x20,x24 // d+=h 395 eor x19,x19,x26 // Maj(a,b,c) 396 eor x17,x6,x17,ror#34 // Sigma0(a) 397 add x24,x24,x19 // h+=Maj(a,b,c) 398 ldr x19,[x30],#8 // *K++, x28 in next round 399 //add x24,x24,x17 // h+=Sigma0(a) 400#ifndef __AARCH64EB__ 401 rev x15,x15 // 12 402#endif 403 add x24,x24,x17 // h+=Sigma0(a) 404 str x7,[sp,#0] 405 ror x16,x20,#14 406 add x23,x23,x19 // h+=K[i] 407 eor x7,x20,x20,ror#23 408 and x17,x21,x20 409 bic x19,x22,x20 410 add x23,x23,x15 // h+=X[i] 411 orr x17,x17,x19 // Ch(e,f,g) 412 eor x19,x24,x25 // a^b, b^c in next round 413 eor x16,x16,x7,ror#18 // Sigma1(e) 414 ror x7,x24,#28 415 add x23,x23,x17 // h+=Ch(e,f,g) 416 eor x17,x24,x24,ror#5 417 add x23,x23,x16 // h+=Sigma1(e) 418 and x28,x28,x19 // (b^c)&=(a^b) 419 add x27,x27,x23 // d+=h 420 eor x28,x28,x25 // Maj(a,b,c) 421 eor x17,x7,x17,ror#34 // Sigma0(a) 422 add x23,x23,x28 // h+=Maj(a,b,c) 423 ldr x28,[x30],#8 // *K++, x19 in next round 424 //add x23,x23,x17 // h+=Sigma0(a) 425#ifndef __AARCH64EB__ 426 rev x0,x0 // 13 427#endif 428 ldp x1,x2,[x1] 429 add x23,x23,x17 // h+=Sigma0(a) 430 str x8,[sp,#8] 431 ror x16,x27,#14 432 add x22,x22,x28 // h+=K[i] 433 eor x8,x27,x27,ror#23 434 and x17,x20,x27 435 bic x28,x21,x27 436 add x22,x22,x0 // h+=X[i] 437 orr x17,x17,x28 // Ch(e,f,g) 438 eor x28,x23,x24 // a^b, b^c in next round 439 eor x16,x16,x8,ror#18 // Sigma1(e) 440 ror x8,x23,#28 441 add x22,x22,x17 // h+=Ch(e,f,g) 442 eor x17,x23,x23,ror#5 443 add x22,x22,x16 // h+=Sigma1(e) 444 and x19,x19,x28 // (b^c)&=(a^b) 445 add x26,x26,x22 // d+=h 446 eor x19,x19,x24 // Maj(a,b,c) 447 eor x17,x8,x17,ror#34 // Sigma0(a) 448 add x22,x22,x19 // h+=Maj(a,b,c) 449 ldr x19,[x30],#8 // *K++, x28 in next round 450 //add x22,x22,x17 // h+=Sigma0(a) 451#ifndef __AARCH64EB__ 452 rev x1,x1 // 14 453#endif 454 ldr x6,[sp,#24] 455 add x22,x22,x17 // h+=Sigma0(a) 456 str x9,[sp,#16] 457 ror x16,x26,#14 458 add x21,x21,x19 // h+=K[i] 459 eor x9,x26,x26,ror#23 460 and x17,x27,x26 461 bic x19,x20,x26 462 add x21,x21,x1 // h+=X[i] 463 orr x17,x17,x19 // Ch(e,f,g) 464 eor x19,x22,x23 // a^b, b^c in next round 465 eor x16,x16,x9,ror#18 // Sigma1(e) 466 ror x9,x22,#28 467 add x21,x21,x17 // h+=Ch(e,f,g) 468 eor x17,x22,x22,ror#5 469 add x21,x21,x16 // h+=Sigma1(e) 470 and x28,x28,x19 // (b^c)&=(a^b) 471 add x25,x25,x21 // d+=h 472 eor x28,x28,x23 // Maj(a,b,c) 473 eor x17,x9,x17,ror#34 // Sigma0(a) 474 add x21,x21,x28 // h+=Maj(a,b,c) 475 ldr x28,[x30],#8 // *K++, x19 in next round 476 //add x21,x21,x17 // h+=Sigma0(a) 477#ifndef __AARCH64EB__ 478 rev x2,x2 // 15 479#endif 480 ldr x7,[sp,#0] 481 add x21,x21,x17 // h+=Sigma0(a) 482 str x10,[sp,#24] 483 ror x16,x25,#14 484 add x20,x20,x28 // h+=K[i] 485 ror x9,x4,#1 486 and x17,x26,x25 487 ror x8,x1,#19 488 bic x28,x27,x25 489 ror x10,x21,#28 490 add x20,x20,x2 // h+=X[i] 491 eor x16,x16,x25,ror#18 492 eor x9,x9,x4,ror#8 493 orr x17,x17,x28 // Ch(e,f,g) 494 eor x28,x21,x22 // a^b, b^c in next round 495 eor x16,x16,x25,ror#41 // Sigma1(e) 496 eor x10,x10,x21,ror#34 497 add x20,x20,x17 // h+=Ch(e,f,g) 498 and x19,x19,x28 // (b^c)&=(a^b) 499 eor x8,x8,x1,ror#61 500 eor x9,x9,x4,lsr#7 // sigma0(X[i+1]) 501 add x20,x20,x16 // h+=Sigma1(e) 502 eor x19,x19,x22 // Maj(a,b,c) 503 eor x17,x10,x21,ror#39 // Sigma0(a) 504 eor x8,x8,x1,lsr#6 // sigma1(X[i+14]) 505 add x3,x3,x12 506 add x24,x24,x20 // d+=h 507 add x20,x20,x19 // h+=Maj(a,b,c) 508 ldr x19,[x30],#8 // *K++, x28 in next round 509 add x3,x3,x9 510 add x20,x20,x17 // h+=Sigma0(a) 511 add x3,x3,x8 512Loop_16_xx: 513 ldr x8,[sp,#8] 514 str x11,[sp,#0] 515 ror x16,x24,#14 516 add x27,x27,x19 // h+=K[i] 517 ror x10,x5,#1 518 and x17,x25,x24 519 ror x9,x2,#19 520 bic x19,x26,x24 521 ror x11,x20,#28 522 add x27,x27,x3 // h+=X[i] 523 eor x16,x16,x24,ror#18 524 eor x10,x10,x5,ror#8 525 orr x17,x17,x19 // Ch(e,f,g) 526 eor x19,x20,x21 // a^b, b^c in next round 527 eor x16,x16,x24,ror#41 // Sigma1(e) 528 eor x11,x11,x20,ror#34 529 add x27,x27,x17 // h+=Ch(e,f,g) 530 and x28,x28,x19 // (b^c)&=(a^b) 531 eor x9,x9,x2,ror#61 532 eor x10,x10,x5,lsr#7 // sigma0(X[i+1]) 533 add x27,x27,x16 // h+=Sigma1(e) 534 eor x28,x28,x21 // Maj(a,b,c) 535 eor x17,x11,x20,ror#39 // Sigma0(a) 536 eor x9,x9,x2,lsr#6 // sigma1(X[i+14]) 537 add x4,x4,x13 538 add x23,x23,x27 // d+=h 539 add x27,x27,x28 // h+=Maj(a,b,c) 540 ldr x28,[x30],#8 // *K++, x19 in next round 541 add x4,x4,x10 542 add x27,x27,x17 // h+=Sigma0(a) 543 add x4,x4,x9 544 ldr x9,[sp,#16] 545 str x12,[sp,#8] 546 ror x16,x23,#14 547 add x26,x26,x28 // h+=K[i] 548 ror x11,x6,#1 549 and x17,x24,x23 550 ror x10,x3,#19 551 bic x28,x25,x23 552 ror x12,x27,#28 553 add x26,x26,x4 // h+=X[i] 554 eor x16,x16,x23,ror#18 555 eor x11,x11,x6,ror#8 556 orr x17,x17,x28 // Ch(e,f,g) 557 eor x28,x27,x20 // a^b, b^c in next round 558 eor x16,x16,x23,ror#41 // Sigma1(e) 559 eor x12,x12,x27,ror#34 560 add x26,x26,x17 // h+=Ch(e,f,g) 561 and x19,x19,x28 // (b^c)&=(a^b) 562 eor x10,x10,x3,ror#61 563 eor x11,x11,x6,lsr#7 // sigma0(X[i+1]) 564 add x26,x26,x16 // h+=Sigma1(e) 565 eor x19,x19,x20 // Maj(a,b,c) 566 eor x17,x12,x27,ror#39 // Sigma0(a) 567 eor x10,x10,x3,lsr#6 // sigma1(X[i+14]) 568 add x5,x5,x14 569 add x22,x22,x26 // d+=h 570 add x26,x26,x19 // h+=Maj(a,b,c) 571 ldr x19,[x30],#8 // *K++, x28 in next round 572 add x5,x5,x11 573 add x26,x26,x17 // h+=Sigma0(a) 574 add x5,x5,x10 575 ldr x10,[sp,#24] 576 str x13,[sp,#16] 577 ror x16,x22,#14 578 add x25,x25,x19 // h+=K[i] 579 ror x12,x7,#1 580 and x17,x23,x22 581 ror x11,x4,#19 582 bic x19,x24,x22 583 ror x13,x26,#28 584 add x25,x25,x5 // h+=X[i] 585 eor x16,x16,x22,ror#18 586 eor x12,x12,x7,ror#8 587 orr x17,x17,x19 // Ch(e,f,g) 588 eor x19,x26,x27 // a^b, b^c in next round 589 eor x16,x16,x22,ror#41 // Sigma1(e) 590 eor x13,x13,x26,ror#34 591 add x25,x25,x17 // h+=Ch(e,f,g) 592 and x28,x28,x19 // (b^c)&=(a^b) 593 eor x11,x11,x4,ror#61 594 eor x12,x12,x7,lsr#7 // sigma0(X[i+1]) 595 add x25,x25,x16 // h+=Sigma1(e) 596 eor x28,x28,x27 // Maj(a,b,c) 597 eor x17,x13,x26,ror#39 // Sigma0(a) 598 eor x11,x11,x4,lsr#6 // sigma1(X[i+14]) 599 add x6,x6,x15 600 add x21,x21,x25 // d+=h 601 add x25,x25,x28 // h+=Maj(a,b,c) 602 ldr x28,[x30],#8 // *K++, x19 in next round 603 add x6,x6,x12 604 add x25,x25,x17 // h+=Sigma0(a) 605 add x6,x6,x11 606 ldr x11,[sp,#0] 607 str x14,[sp,#24] 608 ror x16,x21,#14 609 add x24,x24,x28 // h+=K[i] 610 ror x13,x8,#1 611 and x17,x22,x21 612 ror x12,x5,#19 613 bic x28,x23,x21 614 ror x14,x25,#28 615 add x24,x24,x6 // h+=X[i] 616 eor x16,x16,x21,ror#18 617 eor x13,x13,x8,ror#8 618 orr x17,x17,x28 // Ch(e,f,g) 619 eor x28,x25,x26 // a^b, b^c in next round 620 eor x16,x16,x21,ror#41 // Sigma1(e) 621 eor x14,x14,x25,ror#34 622 add x24,x24,x17 // h+=Ch(e,f,g) 623 and x19,x19,x28 // (b^c)&=(a^b) 624 eor x12,x12,x5,ror#61 625 eor x13,x13,x8,lsr#7 // sigma0(X[i+1]) 626 add x24,x24,x16 // h+=Sigma1(e) 627 eor x19,x19,x26 // Maj(a,b,c) 628 eor x17,x14,x25,ror#39 // Sigma0(a) 629 eor x12,x12,x5,lsr#6 // sigma1(X[i+14]) 630 add x7,x7,x0 631 add x20,x20,x24 // d+=h 632 add x24,x24,x19 // h+=Maj(a,b,c) 633 ldr x19,[x30],#8 // *K++, x28 in next round 634 add x7,x7,x13 635 add x24,x24,x17 // h+=Sigma0(a) 636 add x7,x7,x12 637 ldr x12,[sp,#8] 638 str x15,[sp,#0] 639 ror x16,x20,#14 640 add x23,x23,x19 // h+=K[i] 641 ror x14,x9,#1 642 and x17,x21,x20 643 ror x13,x6,#19 644 bic x19,x22,x20 645 ror x15,x24,#28 646 add x23,x23,x7 // h+=X[i] 647 eor x16,x16,x20,ror#18 648 eor x14,x14,x9,ror#8 649 orr x17,x17,x19 // Ch(e,f,g) 650 eor x19,x24,x25 // a^b, b^c in next round 651 eor x16,x16,x20,ror#41 // Sigma1(e) 652 eor x15,x15,x24,ror#34 653 add x23,x23,x17 // h+=Ch(e,f,g) 654 and x28,x28,x19 // (b^c)&=(a^b) 655 eor x13,x13,x6,ror#61 656 eor x14,x14,x9,lsr#7 // sigma0(X[i+1]) 657 add x23,x23,x16 // h+=Sigma1(e) 658 eor x28,x28,x25 // Maj(a,b,c) 659 eor x17,x15,x24,ror#39 // Sigma0(a) 660 eor x13,x13,x6,lsr#6 // sigma1(X[i+14]) 661 add x8,x8,x1 662 add x27,x27,x23 // d+=h 663 add x23,x23,x28 // h+=Maj(a,b,c) 664 ldr x28,[x30],#8 // *K++, x19 in next round 665 add x8,x8,x14 666 add x23,x23,x17 // h+=Sigma0(a) 667 add x8,x8,x13 668 ldr x13,[sp,#16] 669 str x0,[sp,#8] 670 ror x16,x27,#14 671 add x22,x22,x28 // h+=K[i] 672 ror x15,x10,#1 673 and x17,x20,x27 674 ror x14,x7,#19 675 bic x28,x21,x27 676 ror x0,x23,#28 677 add x22,x22,x8 // h+=X[i] 678 eor x16,x16,x27,ror#18 679 eor x15,x15,x10,ror#8 680 orr x17,x17,x28 // Ch(e,f,g) 681 eor x28,x23,x24 // a^b, b^c in next round 682 eor x16,x16,x27,ror#41 // Sigma1(e) 683 eor x0,x0,x23,ror#34 684 add x22,x22,x17 // h+=Ch(e,f,g) 685 and x19,x19,x28 // (b^c)&=(a^b) 686 eor x14,x14,x7,ror#61 687 eor x15,x15,x10,lsr#7 // sigma0(X[i+1]) 688 add x22,x22,x16 // h+=Sigma1(e) 689 eor x19,x19,x24 // Maj(a,b,c) 690 eor x17,x0,x23,ror#39 // Sigma0(a) 691 eor x14,x14,x7,lsr#6 // sigma1(X[i+14]) 692 add x9,x9,x2 693 add x26,x26,x22 // d+=h 694 add x22,x22,x19 // h+=Maj(a,b,c) 695 ldr x19,[x30],#8 // *K++, x28 in next round 696 add x9,x9,x15 697 add x22,x22,x17 // h+=Sigma0(a) 698 add x9,x9,x14 699 ldr x14,[sp,#24] 700 str x1,[sp,#16] 701 ror x16,x26,#14 702 add x21,x21,x19 // h+=K[i] 703 ror x0,x11,#1 704 and x17,x27,x26 705 ror x15,x8,#19 706 bic x19,x20,x26 707 ror x1,x22,#28 708 add x21,x21,x9 // h+=X[i] 709 eor x16,x16,x26,ror#18 710 eor x0,x0,x11,ror#8 711 orr x17,x17,x19 // Ch(e,f,g) 712 eor x19,x22,x23 // a^b, b^c in next round 713 eor x16,x16,x26,ror#41 // Sigma1(e) 714 eor x1,x1,x22,ror#34 715 add x21,x21,x17 // h+=Ch(e,f,g) 716 and x28,x28,x19 // (b^c)&=(a^b) 717 eor x15,x15,x8,ror#61 718 eor x0,x0,x11,lsr#7 // sigma0(X[i+1]) 719 add x21,x21,x16 // h+=Sigma1(e) 720 eor x28,x28,x23 // Maj(a,b,c) 721 eor x17,x1,x22,ror#39 // Sigma0(a) 722 eor x15,x15,x8,lsr#6 // sigma1(X[i+14]) 723 add x10,x10,x3 724 add x25,x25,x21 // d+=h 725 add x21,x21,x28 // h+=Maj(a,b,c) 726 ldr x28,[x30],#8 // *K++, x19 in next round 727 add x10,x10,x0 728 add x21,x21,x17 // h+=Sigma0(a) 729 add x10,x10,x15 730 ldr x15,[sp,#0] 731 str x2,[sp,#24] 732 ror x16,x25,#14 733 add x20,x20,x28 // h+=K[i] 734 ror x1,x12,#1 735 and x17,x26,x25 736 ror x0,x9,#19 737 bic x28,x27,x25 738 ror x2,x21,#28 739 add x20,x20,x10 // h+=X[i] 740 eor x16,x16,x25,ror#18 741 eor x1,x1,x12,ror#8 742 orr x17,x17,x28 // Ch(e,f,g) 743 eor x28,x21,x22 // a^b, b^c in next round 744 eor x16,x16,x25,ror#41 // Sigma1(e) 745 eor x2,x2,x21,ror#34 746 add x20,x20,x17 // h+=Ch(e,f,g) 747 and x19,x19,x28 // (b^c)&=(a^b) 748 eor x0,x0,x9,ror#61 749 eor x1,x1,x12,lsr#7 // sigma0(X[i+1]) 750 add x20,x20,x16 // h+=Sigma1(e) 751 eor x19,x19,x22 // Maj(a,b,c) 752 eor x17,x2,x21,ror#39 // Sigma0(a) 753 eor x0,x0,x9,lsr#6 // sigma1(X[i+14]) 754 add x11,x11,x4 755 add x24,x24,x20 // d+=h 756 add x20,x20,x19 // h+=Maj(a,b,c) 757 ldr x19,[x30],#8 // *K++, x28 in next round 758 add x11,x11,x1 759 add x20,x20,x17 // h+=Sigma0(a) 760 add x11,x11,x0 761 ldr x0,[sp,#8] 762 str x3,[sp,#0] 763 ror x16,x24,#14 764 add x27,x27,x19 // h+=K[i] 765 ror x2,x13,#1 766 and x17,x25,x24 767 ror x1,x10,#19 768 bic x19,x26,x24 769 ror x3,x20,#28 770 add x27,x27,x11 // h+=X[i] 771 eor x16,x16,x24,ror#18 772 eor x2,x2,x13,ror#8 773 orr x17,x17,x19 // Ch(e,f,g) 774 eor x19,x20,x21 // a^b, b^c in next round 775 eor x16,x16,x24,ror#41 // Sigma1(e) 776 eor x3,x3,x20,ror#34 777 add x27,x27,x17 // h+=Ch(e,f,g) 778 and x28,x28,x19 // (b^c)&=(a^b) 779 eor x1,x1,x10,ror#61 780 eor x2,x2,x13,lsr#7 // sigma0(X[i+1]) 781 add x27,x27,x16 // h+=Sigma1(e) 782 eor x28,x28,x21 // Maj(a,b,c) 783 eor x17,x3,x20,ror#39 // Sigma0(a) 784 eor x1,x1,x10,lsr#6 // sigma1(X[i+14]) 785 add x12,x12,x5 786 add x23,x23,x27 // d+=h 787 add x27,x27,x28 // h+=Maj(a,b,c) 788 ldr x28,[x30],#8 // *K++, x19 in next round 789 add x12,x12,x2 790 add x27,x27,x17 // h+=Sigma0(a) 791 add x12,x12,x1 792 ldr x1,[sp,#16] 793 str x4,[sp,#8] 794 ror x16,x23,#14 795 add x26,x26,x28 // h+=K[i] 796 ror x3,x14,#1 797 and x17,x24,x23 798 ror x2,x11,#19 799 bic x28,x25,x23 800 ror x4,x27,#28 801 add x26,x26,x12 // h+=X[i] 802 eor x16,x16,x23,ror#18 803 eor x3,x3,x14,ror#8 804 orr x17,x17,x28 // Ch(e,f,g) 805 eor x28,x27,x20 // a^b, b^c in next round 806 eor x16,x16,x23,ror#41 // Sigma1(e) 807 eor x4,x4,x27,ror#34 808 add x26,x26,x17 // h+=Ch(e,f,g) 809 and x19,x19,x28 // (b^c)&=(a^b) 810 eor x2,x2,x11,ror#61 811 eor x3,x3,x14,lsr#7 // sigma0(X[i+1]) 812 add x26,x26,x16 // h+=Sigma1(e) 813 eor x19,x19,x20 // Maj(a,b,c) 814 eor x17,x4,x27,ror#39 // Sigma0(a) 815 eor x2,x2,x11,lsr#6 // sigma1(X[i+14]) 816 add x13,x13,x6 817 add x22,x22,x26 // d+=h 818 add x26,x26,x19 // h+=Maj(a,b,c) 819 ldr x19,[x30],#8 // *K++, x28 in next round 820 add x13,x13,x3 821 add x26,x26,x17 // h+=Sigma0(a) 822 add x13,x13,x2 823 ldr x2,[sp,#24] 824 str x5,[sp,#16] 825 ror x16,x22,#14 826 add x25,x25,x19 // h+=K[i] 827 ror x4,x15,#1 828 and x17,x23,x22 829 ror x3,x12,#19 830 bic x19,x24,x22 831 ror x5,x26,#28 832 add x25,x25,x13 // h+=X[i] 833 eor x16,x16,x22,ror#18 834 eor x4,x4,x15,ror#8 835 orr x17,x17,x19 // Ch(e,f,g) 836 eor x19,x26,x27 // a^b, b^c in next round 837 eor x16,x16,x22,ror#41 // Sigma1(e) 838 eor x5,x5,x26,ror#34 839 add x25,x25,x17 // h+=Ch(e,f,g) 840 and x28,x28,x19 // (b^c)&=(a^b) 841 eor x3,x3,x12,ror#61 842 eor x4,x4,x15,lsr#7 // sigma0(X[i+1]) 843 add x25,x25,x16 // h+=Sigma1(e) 844 eor x28,x28,x27 // Maj(a,b,c) 845 eor x17,x5,x26,ror#39 // Sigma0(a) 846 eor x3,x3,x12,lsr#6 // sigma1(X[i+14]) 847 add x14,x14,x7 848 add x21,x21,x25 // d+=h 849 add x25,x25,x28 // h+=Maj(a,b,c) 850 ldr x28,[x30],#8 // *K++, x19 in next round 851 add x14,x14,x4 852 add x25,x25,x17 // h+=Sigma0(a) 853 add x14,x14,x3 854 ldr x3,[sp,#0] 855 str x6,[sp,#24] 856 ror x16,x21,#14 857 add x24,x24,x28 // h+=K[i] 858 ror x5,x0,#1 859 and x17,x22,x21 860 ror x4,x13,#19 861 bic x28,x23,x21 862 ror x6,x25,#28 863 add x24,x24,x14 // h+=X[i] 864 eor x16,x16,x21,ror#18 865 eor x5,x5,x0,ror#8 866 orr x17,x17,x28 // Ch(e,f,g) 867 eor x28,x25,x26 // a^b, b^c in next round 868 eor x16,x16,x21,ror#41 // Sigma1(e) 869 eor x6,x6,x25,ror#34 870 add x24,x24,x17 // h+=Ch(e,f,g) 871 and x19,x19,x28 // (b^c)&=(a^b) 872 eor x4,x4,x13,ror#61 873 eor x5,x5,x0,lsr#7 // sigma0(X[i+1]) 874 add x24,x24,x16 // h+=Sigma1(e) 875 eor x19,x19,x26 // Maj(a,b,c) 876 eor x17,x6,x25,ror#39 // Sigma0(a) 877 eor x4,x4,x13,lsr#6 // sigma1(X[i+14]) 878 add x15,x15,x8 879 add x20,x20,x24 // d+=h 880 add x24,x24,x19 // h+=Maj(a,b,c) 881 ldr x19,[x30],#8 // *K++, x28 in next round 882 add x15,x15,x5 883 add x24,x24,x17 // h+=Sigma0(a) 884 add x15,x15,x4 885 ldr x4,[sp,#8] 886 str x7,[sp,#0] 887 ror x16,x20,#14 888 add x23,x23,x19 // h+=K[i] 889 ror x6,x1,#1 890 and x17,x21,x20 891 ror x5,x14,#19 892 bic x19,x22,x20 893 ror x7,x24,#28 894 add x23,x23,x15 // h+=X[i] 895 eor x16,x16,x20,ror#18 896 eor x6,x6,x1,ror#8 897 orr x17,x17,x19 // Ch(e,f,g) 898 eor x19,x24,x25 // a^b, b^c in next round 899 eor x16,x16,x20,ror#41 // Sigma1(e) 900 eor x7,x7,x24,ror#34 901 add x23,x23,x17 // h+=Ch(e,f,g) 902 and x28,x28,x19 // (b^c)&=(a^b) 903 eor x5,x5,x14,ror#61 904 eor x6,x6,x1,lsr#7 // sigma0(X[i+1]) 905 add x23,x23,x16 // h+=Sigma1(e) 906 eor x28,x28,x25 // Maj(a,b,c) 907 eor x17,x7,x24,ror#39 // Sigma0(a) 908 eor x5,x5,x14,lsr#6 // sigma1(X[i+14]) 909 add x0,x0,x9 910 add x27,x27,x23 // d+=h 911 add x23,x23,x28 // h+=Maj(a,b,c) 912 ldr x28,[x30],#8 // *K++, x19 in next round 913 add x0,x0,x6 914 add x23,x23,x17 // h+=Sigma0(a) 915 add x0,x0,x5 916 ldr x5,[sp,#16] 917 str x8,[sp,#8] 918 ror x16,x27,#14 919 add x22,x22,x28 // h+=K[i] 920 ror x7,x2,#1 921 and x17,x20,x27 922 ror x6,x15,#19 923 bic x28,x21,x27 924 ror x8,x23,#28 925 add x22,x22,x0 // h+=X[i] 926 eor x16,x16,x27,ror#18 927 eor x7,x7,x2,ror#8 928 orr x17,x17,x28 // Ch(e,f,g) 929 eor x28,x23,x24 // a^b, b^c in next round 930 eor x16,x16,x27,ror#41 // Sigma1(e) 931 eor x8,x8,x23,ror#34 932 add x22,x22,x17 // h+=Ch(e,f,g) 933 and x19,x19,x28 // (b^c)&=(a^b) 934 eor x6,x6,x15,ror#61 935 eor x7,x7,x2,lsr#7 // sigma0(X[i+1]) 936 add x22,x22,x16 // h+=Sigma1(e) 937 eor x19,x19,x24 // Maj(a,b,c) 938 eor x17,x8,x23,ror#39 // Sigma0(a) 939 eor x6,x6,x15,lsr#6 // sigma1(X[i+14]) 940 add x1,x1,x10 941 add x26,x26,x22 // d+=h 942 add x22,x22,x19 // h+=Maj(a,b,c) 943 ldr x19,[x30],#8 // *K++, x28 in next round 944 add x1,x1,x7 945 add x22,x22,x17 // h+=Sigma0(a) 946 add x1,x1,x6 947 ldr x6,[sp,#24] 948 str x9,[sp,#16] 949 ror x16,x26,#14 950 add x21,x21,x19 // h+=K[i] 951 ror x8,x3,#1 952 and x17,x27,x26 953 ror x7,x0,#19 954 bic x19,x20,x26 955 ror x9,x22,#28 956 add x21,x21,x1 // h+=X[i] 957 eor x16,x16,x26,ror#18 958 eor x8,x8,x3,ror#8 959 orr x17,x17,x19 // Ch(e,f,g) 960 eor x19,x22,x23 // a^b, b^c in next round 961 eor x16,x16,x26,ror#41 // Sigma1(e) 962 eor x9,x9,x22,ror#34 963 add x21,x21,x17 // h+=Ch(e,f,g) 964 and x28,x28,x19 // (b^c)&=(a^b) 965 eor x7,x7,x0,ror#61 966 eor x8,x8,x3,lsr#7 // sigma0(X[i+1]) 967 add x21,x21,x16 // h+=Sigma1(e) 968 eor x28,x28,x23 // Maj(a,b,c) 969 eor x17,x9,x22,ror#39 // Sigma0(a) 970 eor x7,x7,x0,lsr#6 // sigma1(X[i+14]) 971 add x2,x2,x11 972 add x25,x25,x21 // d+=h 973 add x21,x21,x28 // h+=Maj(a,b,c) 974 ldr x28,[x30],#8 // *K++, x19 in next round 975 add x2,x2,x8 976 add x21,x21,x17 // h+=Sigma0(a) 977 add x2,x2,x7 978 ldr x7,[sp,#0] 979 str x10,[sp,#24] 980 ror x16,x25,#14 981 add x20,x20,x28 // h+=K[i] 982 ror x9,x4,#1 983 and x17,x26,x25 984 ror x8,x1,#19 985 bic x28,x27,x25 986 ror x10,x21,#28 987 add x20,x20,x2 // h+=X[i] 988 eor x16,x16,x25,ror#18 989 eor x9,x9,x4,ror#8 990 orr x17,x17,x28 // Ch(e,f,g) 991 eor x28,x21,x22 // a^b, b^c in next round 992 eor x16,x16,x25,ror#41 // Sigma1(e) 993 eor x10,x10,x21,ror#34 994 add x20,x20,x17 // h+=Ch(e,f,g) 995 and x19,x19,x28 // (b^c)&=(a^b) 996 eor x8,x8,x1,ror#61 997 eor x9,x9,x4,lsr#7 // sigma0(X[i+1]) 998 add x20,x20,x16 // h+=Sigma1(e) 999 eor x19,x19,x22 // Maj(a,b,c) 1000 eor x17,x10,x21,ror#39 // Sigma0(a) 1001 eor x8,x8,x1,lsr#6 // sigma1(X[i+14]) 1002 add x3,x3,x12 1003 add x24,x24,x20 // d+=h 1004 add x20,x20,x19 // h+=Maj(a,b,c) 1005 ldr x19,[x30],#8 // *K++, x28 in next round 1006 add x3,x3,x9 1007 add x20,x20,x17 // h+=Sigma0(a) 1008 add x3,x3,x8 1009 cbnz x19,Loop_16_xx 1010 1011 ldp x0,x2,[x29,#96] 1012 ldr x1,[x29,#112] 1013 sub x30,x30,#648 // rewind 1014 1015 ldp x3,x4,[x0] 1016 ldp x5,x6,[x0,#2*8] 1017 add x1,x1,#14*8 // advance input pointer 1018 ldp x7,x8,[x0,#4*8] 1019 add x20,x20,x3 1020 ldp x9,x10,[x0,#6*8] 1021 add x21,x21,x4 1022 add x22,x22,x5 1023 add x23,x23,x6 1024 stp x20,x21,[x0] 1025 add x24,x24,x7 1026 add x25,x25,x8 1027 stp x22,x23,[x0,#2*8] 1028 add x26,x26,x9 1029 add x27,x27,x10 1030 cmp x1,x2 1031 stp x24,x25,[x0,#4*8] 1032 stp x26,x27,[x0,#6*8] 1033 b.ne Loop 1034 1035 ldp x19,x20,[x29,#16] 1036 add sp,sp,#4*8 1037 ldp x21,x22,[x29,#32] 1038 ldp x23,x24,[x29,#48] 1039 ldp x25,x26,[x29,#64] 1040 ldp x27,x28,[x29,#80] 1041 ldp x29,x30,[sp],#128 1042 AARCH64_VALIDATE_LINK_REGISTER 1043 ret 1044 1045 1046.section .rodata 1047.align 6 1048 1049LK512: 1050.quad 0x428a2f98d728ae22,0x7137449123ef65cd 1051.quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc 1052.quad 0x3956c25bf348b538,0x59f111f1b605d019 1053.quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118 1054.quad 0xd807aa98a3030242,0x12835b0145706fbe 1055.quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2 1056.quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1 1057.quad 0x9bdc06a725c71235,0xc19bf174cf692694 1058.quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3 1059.quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65 1060.quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483 1061.quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5 1062.quad 0x983e5152ee66dfab,0xa831c66d2db43210 1063.quad 0xb00327c898fb213f,0xbf597fc7beef0ee4 1064.quad 0xc6e00bf33da88fc2,0xd5a79147930aa725 1065.quad 0x06ca6351e003826f,0x142929670a0e6e70 1066.quad 0x27b70a8546d22ffc,0x2e1b21385c26c926 1067.quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df 1068.quad 0x650a73548baf63de,0x766a0abb3c77b2a8 1069.quad 0x81c2c92e47edaee6,0x92722c851482353b 1070.quad 0xa2bfe8a14cf10364,0xa81a664bbc423001 1071.quad 0xc24b8b70d0f89791,0xc76c51a30654be30 1072.quad 0xd192e819d6ef5218,0xd69906245565a910 1073.quad 0xf40e35855771202a,0x106aa07032bbd1b8 1074.quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53 1075.quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8 1076.quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb 1077.quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3 1078.quad 0x748f82ee5defb2fc,0x78a5636f43172f60 1079.quad 0x84c87814a1f0ab72,0x8cc702081a6439ec 1080.quad 0x90befffa23631e28,0xa4506cebde82bde9 1081.quad 0xbef9a3f7b2c67915,0xc67178f2e372532b 1082.quad 0xca273eceea26619c,0xd186b8c721c0c207 1083.quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178 1084.quad 0x06f067aa72176fba,0x0a637dc5a2c898a6 1085.quad 0x113f9804bef90dae,0x1b710b35131c471b 1086.quad 0x28db77f523047d84,0x32caab7b40c72493 1087.quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c 1088.quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a 1089.quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817 1090.quad 0 // terminator 1091 1092.byte 83,72,65,53,49,50,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 1093.align 2 1094.align 2 1095.text 1096#ifndef __KERNEL__ 1097.def sha512_block_armv8 1098 .type 32 1099.endef 1100.align 6 1101sha512_block_armv8: 1102Lv8_entry: 1103 stp x29,x30,[sp,#-16]! 1104 add x29,sp,#0 1105 1106 ld1 {v16.16b,v17.16b,v18.16b,v19.16b},[x1],#64 // load input 1107 ld1 {v20.16b,v21.16b,v22.16b,v23.16b},[x1],#64 1108 1109 ld1 {v0.2d,v1.2d,v2.2d,v3.2d},[x0] // load context 1110 adrp x3,LK512 1111 add x3,x3,:lo12:LK512 1112 1113 rev64 v16.16b,v16.16b 1114 rev64 v17.16b,v17.16b 1115 rev64 v18.16b,v18.16b 1116 rev64 v19.16b,v19.16b 1117 rev64 v20.16b,v20.16b 1118 rev64 v21.16b,v21.16b 1119 rev64 v22.16b,v22.16b 1120 rev64 v23.16b,v23.16b 1121 b Loop_hw 1122 1123.align 4 1124Loop_hw: 1125 ld1 {v24.2d},[x3],#16 1126 subs x2,x2,#1 1127 sub x4,x1,#128 1128 orr v26.16b,v0.16b,v0.16b // offload 1129 orr v27.16b,v1.16b,v1.16b 1130 orr v28.16b,v2.16b,v2.16b 1131 orr v29.16b,v3.16b,v3.16b 1132 csel x1,x1,x4,ne // conditional rewind 1133 add v24.2d,v24.2d,v16.2d 1134 ld1 {v25.2d},[x3],#16 1135 ext v24.16b,v24.16b,v24.16b,#8 1136 ext v5.16b,v2.16b,v3.16b,#8 1137 ext v6.16b,v1.16b,v2.16b,#8 1138 add v3.2d,v3.2d,v24.2d // "T1 + H + K512[i]" 1139.long 0xcec08230 //sha512su0 v16.16b,v17.16b 1140 ext v7.16b,v20.16b,v21.16b,#8 1141.long 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b 1142.long 0xce678af0 //sha512su1 v16.16b,v23.16b,v7.16b 1143 add v4.2d,v1.2d,v3.2d // "D + T1" 1144.long 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b 1145 add v25.2d,v25.2d,v17.2d 1146 ld1 {v24.2d},[x3],#16 1147 ext v25.16b,v25.16b,v25.16b,#8 1148 ext v5.16b,v4.16b,v2.16b,#8 1149 ext v6.16b,v0.16b,v4.16b,#8 1150 add v2.2d,v2.2d,v25.2d // "T1 + H + K512[i]" 1151.long 0xcec08251 //sha512su0 v17.16b,v18.16b 1152 ext v7.16b,v21.16b,v22.16b,#8 1153.long 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b 1154.long 0xce678a11 //sha512su1 v17.16b,v16.16b,v7.16b 1155 add v1.2d,v0.2d,v2.2d // "D + T1" 1156.long 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b 1157 add v24.2d,v24.2d,v18.2d 1158 ld1 {v25.2d},[x3],#16 1159 ext v24.16b,v24.16b,v24.16b,#8 1160 ext v5.16b,v1.16b,v4.16b,#8 1161 ext v6.16b,v3.16b,v1.16b,#8 1162 add v4.2d,v4.2d,v24.2d // "T1 + H + K512[i]" 1163.long 0xcec08272 //sha512su0 v18.16b,v19.16b 1164 ext v7.16b,v22.16b,v23.16b,#8 1165.long 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b 1166.long 0xce678a32 //sha512su1 v18.16b,v17.16b,v7.16b 1167 add v0.2d,v3.2d,v4.2d // "D + T1" 1168.long 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b 1169 add v25.2d,v25.2d,v19.2d 1170 ld1 {v24.2d},[x3],#16 1171 ext v25.16b,v25.16b,v25.16b,#8 1172 ext v5.16b,v0.16b,v1.16b,#8 1173 ext v6.16b,v2.16b,v0.16b,#8 1174 add v1.2d,v1.2d,v25.2d // "T1 + H + K512[i]" 1175.long 0xcec08293 //sha512su0 v19.16b,v20.16b 1176 ext v7.16b,v23.16b,v16.16b,#8 1177.long 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b 1178.long 0xce678a53 //sha512su1 v19.16b,v18.16b,v7.16b 1179 add v3.2d,v2.2d,v1.2d // "D + T1" 1180.long 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b 1181 add v24.2d,v24.2d,v20.2d 1182 ld1 {v25.2d},[x3],#16 1183 ext v24.16b,v24.16b,v24.16b,#8 1184 ext v5.16b,v3.16b,v0.16b,#8 1185 ext v6.16b,v4.16b,v3.16b,#8 1186 add v0.2d,v0.2d,v24.2d // "T1 + H + K512[i]" 1187.long 0xcec082b4 //sha512su0 v20.16b,v21.16b 1188 ext v7.16b,v16.16b,v17.16b,#8 1189.long 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b 1190.long 0xce678a74 //sha512su1 v20.16b,v19.16b,v7.16b 1191 add v2.2d,v4.2d,v0.2d // "D + T1" 1192.long 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b 1193 add v25.2d,v25.2d,v21.2d 1194 ld1 {v24.2d},[x3],#16 1195 ext v25.16b,v25.16b,v25.16b,#8 1196 ext v5.16b,v2.16b,v3.16b,#8 1197 ext v6.16b,v1.16b,v2.16b,#8 1198 add v3.2d,v3.2d,v25.2d // "T1 + H + K512[i]" 1199.long 0xcec082d5 //sha512su0 v21.16b,v22.16b 1200 ext v7.16b,v17.16b,v18.16b,#8 1201.long 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b 1202.long 0xce678a95 //sha512su1 v21.16b,v20.16b,v7.16b 1203 add v4.2d,v1.2d,v3.2d // "D + T1" 1204.long 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b 1205 add v24.2d,v24.2d,v22.2d 1206 ld1 {v25.2d},[x3],#16 1207 ext v24.16b,v24.16b,v24.16b,#8 1208 ext v5.16b,v4.16b,v2.16b,#8 1209 ext v6.16b,v0.16b,v4.16b,#8 1210 add v2.2d,v2.2d,v24.2d // "T1 + H + K512[i]" 1211.long 0xcec082f6 //sha512su0 v22.16b,v23.16b 1212 ext v7.16b,v18.16b,v19.16b,#8 1213.long 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b 1214.long 0xce678ab6 //sha512su1 v22.16b,v21.16b,v7.16b 1215 add v1.2d,v0.2d,v2.2d // "D + T1" 1216.long 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b 1217 add v25.2d,v25.2d,v23.2d 1218 ld1 {v24.2d},[x3],#16 1219 ext v25.16b,v25.16b,v25.16b,#8 1220 ext v5.16b,v1.16b,v4.16b,#8 1221 ext v6.16b,v3.16b,v1.16b,#8 1222 add v4.2d,v4.2d,v25.2d // "T1 + H + K512[i]" 1223.long 0xcec08217 //sha512su0 v23.16b,v16.16b 1224 ext v7.16b,v19.16b,v20.16b,#8 1225.long 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b 1226.long 0xce678ad7 //sha512su1 v23.16b,v22.16b,v7.16b 1227 add v0.2d,v3.2d,v4.2d // "D + T1" 1228.long 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b 1229 add v24.2d,v24.2d,v16.2d 1230 ld1 {v25.2d},[x3],#16 1231 ext v24.16b,v24.16b,v24.16b,#8 1232 ext v5.16b,v0.16b,v1.16b,#8 1233 ext v6.16b,v2.16b,v0.16b,#8 1234 add v1.2d,v1.2d,v24.2d // "T1 + H + K512[i]" 1235.long 0xcec08230 //sha512su0 v16.16b,v17.16b 1236 ext v7.16b,v20.16b,v21.16b,#8 1237.long 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b 1238.long 0xce678af0 //sha512su1 v16.16b,v23.16b,v7.16b 1239 add v3.2d,v2.2d,v1.2d // "D + T1" 1240.long 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b 1241 add v25.2d,v25.2d,v17.2d 1242 ld1 {v24.2d},[x3],#16 1243 ext v25.16b,v25.16b,v25.16b,#8 1244 ext v5.16b,v3.16b,v0.16b,#8 1245 ext v6.16b,v4.16b,v3.16b,#8 1246 add v0.2d,v0.2d,v25.2d // "T1 + H + K512[i]" 1247.long 0xcec08251 //sha512su0 v17.16b,v18.16b 1248 ext v7.16b,v21.16b,v22.16b,#8 1249.long 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b 1250.long 0xce678a11 //sha512su1 v17.16b,v16.16b,v7.16b 1251 add v2.2d,v4.2d,v0.2d // "D + T1" 1252.long 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b 1253 add v24.2d,v24.2d,v18.2d 1254 ld1 {v25.2d},[x3],#16 1255 ext v24.16b,v24.16b,v24.16b,#8 1256 ext v5.16b,v2.16b,v3.16b,#8 1257 ext v6.16b,v1.16b,v2.16b,#8 1258 add v3.2d,v3.2d,v24.2d // "T1 + H + K512[i]" 1259.long 0xcec08272 //sha512su0 v18.16b,v19.16b 1260 ext v7.16b,v22.16b,v23.16b,#8 1261.long 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b 1262.long 0xce678a32 //sha512su1 v18.16b,v17.16b,v7.16b 1263 add v4.2d,v1.2d,v3.2d // "D + T1" 1264.long 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b 1265 add v25.2d,v25.2d,v19.2d 1266 ld1 {v24.2d},[x3],#16 1267 ext v25.16b,v25.16b,v25.16b,#8 1268 ext v5.16b,v4.16b,v2.16b,#8 1269 ext v6.16b,v0.16b,v4.16b,#8 1270 add v2.2d,v2.2d,v25.2d // "T1 + H + K512[i]" 1271.long 0xcec08293 //sha512su0 v19.16b,v20.16b 1272 ext v7.16b,v23.16b,v16.16b,#8 1273.long 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b 1274.long 0xce678a53 //sha512su1 v19.16b,v18.16b,v7.16b 1275 add v1.2d,v0.2d,v2.2d // "D + T1" 1276.long 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b 1277 add v24.2d,v24.2d,v20.2d 1278 ld1 {v25.2d},[x3],#16 1279 ext v24.16b,v24.16b,v24.16b,#8 1280 ext v5.16b,v1.16b,v4.16b,#8 1281 ext v6.16b,v3.16b,v1.16b,#8 1282 add v4.2d,v4.2d,v24.2d // "T1 + H + K512[i]" 1283.long 0xcec082b4 //sha512su0 v20.16b,v21.16b 1284 ext v7.16b,v16.16b,v17.16b,#8 1285.long 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b 1286.long 0xce678a74 //sha512su1 v20.16b,v19.16b,v7.16b 1287 add v0.2d,v3.2d,v4.2d // "D + T1" 1288.long 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b 1289 add v25.2d,v25.2d,v21.2d 1290 ld1 {v24.2d},[x3],#16 1291 ext v25.16b,v25.16b,v25.16b,#8 1292 ext v5.16b,v0.16b,v1.16b,#8 1293 ext v6.16b,v2.16b,v0.16b,#8 1294 add v1.2d,v1.2d,v25.2d // "T1 + H + K512[i]" 1295.long 0xcec082d5 //sha512su0 v21.16b,v22.16b 1296 ext v7.16b,v17.16b,v18.16b,#8 1297.long 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b 1298.long 0xce678a95 //sha512su1 v21.16b,v20.16b,v7.16b 1299 add v3.2d,v2.2d,v1.2d // "D + T1" 1300.long 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b 1301 add v24.2d,v24.2d,v22.2d 1302 ld1 {v25.2d},[x3],#16 1303 ext v24.16b,v24.16b,v24.16b,#8 1304 ext v5.16b,v3.16b,v0.16b,#8 1305 ext v6.16b,v4.16b,v3.16b,#8 1306 add v0.2d,v0.2d,v24.2d // "T1 + H + K512[i]" 1307.long 0xcec082f6 //sha512su0 v22.16b,v23.16b 1308 ext v7.16b,v18.16b,v19.16b,#8 1309.long 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b 1310.long 0xce678ab6 //sha512su1 v22.16b,v21.16b,v7.16b 1311 add v2.2d,v4.2d,v0.2d // "D + T1" 1312.long 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b 1313 add v25.2d,v25.2d,v23.2d 1314 ld1 {v24.2d},[x3],#16 1315 ext v25.16b,v25.16b,v25.16b,#8 1316 ext v5.16b,v2.16b,v3.16b,#8 1317 ext v6.16b,v1.16b,v2.16b,#8 1318 add v3.2d,v3.2d,v25.2d // "T1 + H + K512[i]" 1319.long 0xcec08217 //sha512su0 v23.16b,v16.16b 1320 ext v7.16b,v19.16b,v20.16b,#8 1321.long 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b 1322.long 0xce678ad7 //sha512su1 v23.16b,v22.16b,v7.16b 1323 add v4.2d,v1.2d,v3.2d // "D + T1" 1324.long 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b 1325 add v24.2d,v24.2d,v16.2d 1326 ld1 {v25.2d},[x3],#16 1327 ext v24.16b,v24.16b,v24.16b,#8 1328 ext v5.16b,v4.16b,v2.16b,#8 1329 ext v6.16b,v0.16b,v4.16b,#8 1330 add v2.2d,v2.2d,v24.2d // "T1 + H + K512[i]" 1331.long 0xcec08230 //sha512su0 v16.16b,v17.16b 1332 ext v7.16b,v20.16b,v21.16b,#8 1333.long 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b 1334.long 0xce678af0 //sha512su1 v16.16b,v23.16b,v7.16b 1335 add v1.2d,v0.2d,v2.2d // "D + T1" 1336.long 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b 1337 add v25.2d,v25.2d,v17.2d 1338 ld1 {v24.2d},[x3],#16 1339 ext v25.16b,v25.16b,v25.16b,#8 1340 ext v5.16b,v1.16b,v4.16b,#8 1341 ext v6.16b,v3.16b,v1.16b,#8 1342 add v4.2d,v4.2d,v25.2d // "T1 + H + K512[i]" 1343.long 0xcec08251 //sha512su0 v17.16b,v18.16b 1344 ext v7.16b,v21.16b,v22.16b,#8 1345.long 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b 1346.long 0xce678a11 //sha512su1 v17.16b,v16.16b,v7.16b 1347 add v0.2d,v3.2d,v4.2d // "D + T1" 1348.long 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b 1349 add v24.2d,v24.2d,v18.2d 1350 ld1 {v25.2d},[x3],#16 1351 ext v24.16b,v24.16b,v24.16b,#8 1352 ext v5.16b,v0.16b,v1.16b,#8 1353 ext v6.16b,v2.16b,v0.16b,#8 1354 add v1.2d,v1.2d,v24.2d // "T1 + H + K512[i]" 1355.long 0xcec08272 //sha512su0 v18.16b,v19.16b 1356 ext v7.16b,v22.16b,v23.16b,#8 1357.long 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b 1358.long 0xce678a32 //sha512su1 v18.16b,v17.16b,v7.16b 1359 add v3.2d,v2.2d,v1.2d // "D + T1" 1360.long 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b 1361 add v25.2d,v25.2d,v19.2d 1362 ld1 {v24.2d},[x3],#16 1363 ext v25.16b,v25.16b,v25.16b,#8 1364 ext v5.16b,v3.16b,v0.16b,#8 1365 ext v6.16b,v4.16b,v3.16b,#8 1366 add v0.2d,v0.2d,v25.2d // "T1 + H + K512[i]" 1367.long 0xcec08293 //sha512su0 v19.16b,v20.16b 1368 ext v7.16b,v23.16b,v16.16b,#8 1369.long 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b 1370.long 0xce678a53 //sha512su1 v19.16b,v18.16b,v7.16b 1371 add v2.2d,v4.2d,v0.2d // "D + T1" 1372.long 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b 1373 add v24.2d,v24.2d,v20.2d 1374 ld1 {v25.2d},[x3],#16 1375 ext v24.16b,v24.16b,v24.16b,#8 1376 ext v5.16b,v2.16b,v3.16b,#8 1377 ext v6.16b,v1.16b,v2.16b,#8 1378 add v3.2d,v3.2d,v24.2d // "T1 + H + K512[i]" 1379.long 0xcec082b4 //sha512su0 v20.16b,v21.16b 1380 ext v7.16b,v16.16b,v17.16b,#8 1381.long 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b 1382.long 0xce678a74 //sha512su1 v20.16b,v19.16b,v7.16b 1383 add v4.2d,v1.2d,v3.2d // "D + T1" 1384.long 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b 1385 add v25.2d,v25.2d,v21.2d 1386 ld1 {v24.2d},[x3],#16 1387 ext v25.16b,v25.16b,v25.16b,#8 1388 ext v5.16b,v4.16b,v2.16b,#8 1389 ext v6.16b,v0.16b,v4.16b,#8 1390 add v2.2d,v2.2d,v25.2d // "T1 + H + K512[i]" 1391.long 0xcec082d5 //sha512su0 v21.16b,v22.16b 1392 ext v7.16b,v17.16b,v18.16b,#8 1393.long 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b 1394.long 0xce678a95 //sha512su1 v21.16b,v20.16b,v7.16b 1395 add v1.2d,v0.2d,v2.2d // "D + T1" 1396.long 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b 1397 add v24.2d,v24.2d,v22.2d 1398 ld1 {v25.2d},[x3],#16 1399 ext v24.16b,v24.16b,v24.16b,#8 1400 ext v5.16b,v1.16b,v4.16b,#8 1401 ext v6.16b,v3.16b,v1.16b,#8 1402 add v4.2d,v4.2d,v24.2d // "T1 + H + K512[i]" 1403.long 0xcec082f6 //sha512su0 v22.16b,v23.16b 1404 ext v7.16b,v18.16b,v19.16b,#8 1405.long 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b 1406.long 0xce678ab6 //sha512su1 v22.16b,v21.16b,v7.16b 1407 add v0.2d,v3.2d,v4.2d // "D + T1" 1408.long 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b 1409 add v25.2d,v25.2d,v23.2d 1410 ld1 {v24.2d},[x3],#16 1411 ext v25.16b,v25.16b,v25.16b,#8 1412 ext v5.16b,v0.16b,v1.16b,#8 1413 ext v6.16b,v2.16b,v0.16b,#8 1414 add v1.2d,v1.2d,v25.2d // "T1 + H + K512[i]" 1415.long 0xcec08217 //sha512su0 v23.16b,v16.16b 1416 ext v7.16b,v19.16b,v20.16b,#8 1417.long 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b 1418.long 0xce678ad7 //sha512su1 v23.16b,v22.16b,v7.16b 1419 add v3.2d,v2.2d,v1.2d // "D + T1" 1420.long 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b 1421 add v24.2d,v24.2d,v16.2d 1422 ld1 {v25.2d},[x3],#16 1423 ext v24.16b,v24.16b,v24.16b,#8 1424 ext v5.16b,v3.16b,v0.16b,#8 1425 ext v6.16b,v4.16b,v3.16b,#8 1426 add v0.2d,v0.2d,v24.2d // "T1 + H + K512[i]" 1427.long 0xcec08230 //sha512su0 v16.16b,v17.16b 1428 ext v7.16b,v20.16b,v21.16b,#8 1429.long 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b 1430.long 0xce678af0 //sha512su1 v16.16b,v23.16b,v7.16b 1431 add v2.2d,v4.2d,v0.2d // "D + T1" 1432.long 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b 1433 add v25.2d,v25.2d,v17.2d 1434 ld1 {v24.2d},[x3],#16 1435 ext v25.16b,v25.16b,v25.16b,#8 1436 ext v5.16b,v2.16b,v3.16b,#8 1437 ext v6.16b,v1.16b,v2.16b,#8 1438 add v3.2d,v3.2d,v25.2d // "T1 + H + K512[i]" 1439.long 0xcec08251 //sha512su0 v17.16b,v18.16b 1440 ext v7.16b,v21.16b,v22.16b,#8 1441.long 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b 1442.long 0xce678a11 //sha512su1 v17.16b,v16.16b,v7.16b 1443 add v4.2d,v1.2d,v3.2d // "D + T1" 1444.long 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b 1445 add v24.2d,v24.2d,v18.2d 1446 ld1 {v25.2d},[x3],#16 1447 ext v24.16b,v24.16b,v24.16b,#8 1448 ext v5.16b,v4.16b,v2.16b,#8 1449 ext v6.16b,v0.16b,v4.16b,#8 1450 add v2.2d,v2.2d,v24.2d // "T1 + H + K512[i]" 1451.long 0xcec08272 //sha512su0 v18.16b,v19.16b 1452 ext v7.16b,v22.16b,v23.16b,#8 1453.long 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b 1454.long 0xce678a32 //sha512su1 v18.16b,v17.16b,v7.16b 1455 add v1.2d,v0.2d,v2.2d // "D + T1" 1456.long 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b 1457 add v25.2d,v25.2d,v19.2d 1458 ld1 {v24.2d},[x3],#16 1459 ext v25.16b,v25.16b,v25.16b,#8 1460 ext v5.16b,v1.16b,v4.16b,#8 1461 ext v6.16b,v3.16b,v1.16b,#8 1462 add v4.2d,v4.2d,v25.2d // "T1 + H + K512[i]" 1463.long 0xcec08293 //sha512su0 v19.16b,v20.16b 1464 ext v7.16b,v23.16b,v16.16b,#8 1465.long 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b 1466.long 0xce678a53 //sha512su1 v19.16b,v18.16b,v7.16b 1467 add v0.2d,v3.2d,v4.2d // "D + T1" 1468.long 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b 1469 add v24.2d,v24.2d,v20.2d 1470 ld1 {v25.2d},[x3],#16 1471 ext v24.16b,v24.16b,v24.16b,#8 1472 ext v5.16b,v0.16b,v1.16b,#8 1473 ext v6.16b,v2.16b,v0.16b,#8 1474 add v1.2d,v1.2d,v24.2d // "T1 + H + K512[i]" 1475.long 0xcec082b4 //sha512su0 v20.16b,v21.16b 1476 ext v7.16b,v16.16b,v17.16b,#8 1477.long 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b 1478.long 0xce678a74 //sha512su1 v20.16b,v19.16b,v7.16b 1479 add v3.2d,v2.2d,v1.2d // "D + T1" 1480.long 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b 1481 add v25.2d,v25.2d,v21.2d 1482 ld1 {v24.2d},[x3],#16 1483 ext v25.16b,v25.16b,v25.16b,#8 1484 ext v5.16b,v3.16b,v0.16b,#8 1485 ext v6.16b,v4.16b,v3.16b,#8 1486 add v0.2d,v0.2d,v25.2d // "T1 + H + K512[i]" 1487.long 0xcec082d5 //sha512su0 v21.16b,v22.16b 1488 ext v7.16b,v17.16b,v18.16b,#8 1489.long 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b 1490.long 0xce678a95 //sha512su1 v21.16b,v20.16b,v7.16b 1491 add v2.2d,v4.2d,v0.2d // "D + T1" 1492.long 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b 1493 add v24.2d,v24.2d,v22.2d 1494 ld1 {v25.2d},[x3],#16 1495 ext v24.16b,v24.16b,v24.16b,#8 1496 ext v5.16b,v2.16b,v3.16b,#8 1497 ext v6.16b,v1.16b,v2.16b,#8 1498 add v3.2d,v3.2d,v24.2d // "T1 + H + K512[i]" 1499.long 0xcec082f6 //sha512su0 v22.16b,v23.16b 1500 ext v7.16b,v18.16b,v19.16b,#8 1501.long 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b 1502.long 0xce678ab6 //sha512su1 v22.16b,v21.16b,v7.16b 1503 add v4.2d,v1.2d,v3.2d // "D + T1" 1504.long 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b 1505 add v25.2d,v25.2d,v23.2d 1506 ld1 {v24.2d},[x3],#16 1507 ext v25.16b,v25.16b,v25.16b,#8 1508 ext v5.16b,v4.16b,v2.16b,#8 1509 ext v6.16b,v0.16b,v4.16b,#8 1510 add v2.2d,v2.2d,v25.2d // "T1 + H + K512[i]" 1511.long 0xcec08217 //sha512su0 v23.16b,v16.16b 1512 ext v7.16b,v19.16b,v20.16b,#8 1513.long 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b 1514.long 0xce678ad7 //sha512su1 v23.16b,v22.16b,v7.16b 1515 add v1.2d,v0.2d,v2.2d // "D + T1" 1516.long 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b 1517 ld1 {v25.2d},[x3],#16 1518 add v24.2d,v24.2d,v16.2d 1519 ld1 {v16.16b},[x1],#16 // load next input 1520 ext v24.16b,v24.16b,v24.16b,#8 1521 ext v5.16b,v1.16b,v4.16b,#8 1522 ext v6.16b,v3.16b,v1.16b,#8 1523 add v4.2d,v4.2d,v24.2d // "T1 + H + K512[i]" 1524.long 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b 1525 rev64 v16.16b,v16.16b 1526 add v0.2d,v3.2d,v4.2d // "D + T1" 1527.long 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b 1528 ld1 {v24.2d},[x3],#16 1529 add v25.2d,v25.2d,v17.2d 1530 ld1 {v17.16b},[x1],#16 // load next input 1531 ext v25.16b,v25.16b,v25.16b,#8 1532 ext v5.16b,v0.16b,v1.16b,#8 1533 ext v6.16b,v2.16b,v0.16b,#8 1534 add v1.2d,v1.2d,v25.2d // "T1 + H + K512[i]" 1535.long 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b 1536 rev64 v17.16b,v17.16b 1537 add v3.2d,v2.2d,v1.2d // "D + T1" 1538.long 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b 1539 ld1 {v25.2d},[x3],#16 1540 add v24.2d,v24.2d,v18.2d 1541 ld1 {v18.16b},[x1],#16 // load next input 1542 ext v24.16b,v24.16b,v24.16b,#8 1543 ext v5.16b,v3.16b,v0.16b,#8 1544 ext v6.16b,v4.16b,v3.16b,#8 1545 add v0.2d,v0.2d,v24.2d // "T1 + H + K512[i]" 1546.long 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b 1547 rev64 v18.16b,v18.16b 1548 add v2.2d,v4.2d,v0.2d // "D + T1" 1549.long 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b 1550 ld1 {v24.2d},[x3],#16 1551 add v25.2d,v25.2d,v19.2d 1552 ld1 {v19.16b},[x1],#16 // load next input 1553 ext v25.16b,v25.16b,v25.16b,#8 1554 ext v5.16b,v2.16b,v3.16b,#8 1555 ext v6.16b,v1.16b,v2.16b,#8 1556 add v3.2d,v3.2d,v25.2d // "T1 + H + K512[i]" 1557.long 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b 1558 rev64 v19.16b,v19.16b 1559 add v4.2d,v1.2d,v3.2d // "D + T1" 1560.long 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b 1561 ld1 {v25.2d},[x3],#16 1562 add v24.2d,v24.2d,v20.2d 1563 ld1 {v20.16b},[x1],#16 // load next input 1564 ext v24.16b,v24.16b,v24.16b,#8 1565 ext v5.16b,v4.16b,v2.16b,#8 1566 ext v6.16b,v0.16b,v4.16b,#8 1567 add v2.2d,v2.2d,v24.2d // "T1 + H + K512[i]" 1568.long 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b 1569 rev64 v20.16b,v20.16b 1570 add v1.2d,v0.2d,v2.2d // "D + T1" 1571.long 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b 1572 ld1 {v24.2d},[x3],#16 1573 add v25.2d,v25.2d,v21.2d 1574 ld1 {v21.16b},[x1],#16 // load next input 1575 ext v25.16b,v25.16b,v25.16b,#8 1576 ext v5.16b,v1.16b,v4.16b,#8 1577 ext v6.16b,v3.16b,v1.16b,#8 1578 add v4.2d,v4.2d,v25.2d // "T1 + H + K512[i]" 1579.long 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b 1580 rev64 v21.16b,v21.16b 1581 add v0.2d,v3.2d,v4.2d // "D + T1" 1582.long 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b 1583 ld1 {v25.2d},[x3],#16 1584 add v24.2d,v24.2d,v22.2d 1585 ld1 {v22.16b},[x1],#16 // load next input 1586 ext v24.16b,v24.16b,v24.16b,#8 1587 ext v5.16b,v0.16b,v1.16b,#8 1588 ext v6.16b,v2.16b,v0.16b,#8 1589 add v1.2d,v1.2d,v24.2d // "T1 + H + K512[i]" 1590.long 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b 1591 rev64 v22.16b,v22.16b 1592 add v3.2d,v2.2d,v1.2d // "D + T1" 1593.long 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b 1594 sub x3,x3,#80*8 // rewind 1595 add v25.2d,v25.2d,v23.2d 1596 ld1 {v23.16b},[x1],#16 // load next input 1597 ext v25.16b,v25.16b,v25.16b,#8 1598 ext v5.16b,v3.16b,v0.16b,#8 1599 ext v6.16b,v4.16b,v3.16b,#8 1600 add v0.2d,v0.2d,v25.2d // "T1 + H + K512[i]" 1601.long 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b 1602 rev64 v23.16b,v23.16b 1603 add v2.2d,v4.2d,v0.2d // "D + T1" 1604.long 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b 1605 add v0.2d,v0.2d,v26.2d // accumulate 1606 add v1.2d,v1.2d,v27.2d 1607 add v2.2d,v2.2d,v28.2d 1608 add v3.2d,v3.2d,v29.2d 1609 1610 cbnz x2,Loop_hw 1611 1612 st1 {v0.2d,v1.2d,v2.2d,v3.2d},[x0] // store context 1613 1614 ldr x29,[sp],#16 1615 ret 1616 1617#endif 1618#endif // !OPENSSL_NO_ASM && defined(__AARCH64EL__) && defined(_WIN32) 1619#if defined(__ELF__) 1620// See https://www.airs.com/blog/archives/518. 1621.section .note.GNU-stack,"",%progbits 1622#endif 1623