1// This file is generated from a similarly-named Perl script in the BoringSSL 2// source tree. Do not edit by hand. 3 4#if !defined(__has_feature) 5#define __has_feature(x) 0 6#endif 7#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM) 8#define OPENSSL_NO_ASM 9#endif 10 11#if !defined(OPENSSL_NO_ASM) 12#if defined(BORINGSSL_PREFIX) 13#include <boringssl_prefix_symbols_asm.h> 14#endif 15// Copyright 2014-2020 The OpenSSL Project Authors. All Rights Reserved. 16// 17// Licensed under the OpenSSL license (the "License"). You may not use 18// this file except in compliance with the License. You can obtain a copy 19// in the file LICENSE in the source distribution or at 20// https://www.openssl.org/source/license.html 21 22// ==================================================================== 23// Written by Andy Polyakov <appro@openssl.org> for the OpenSSL 24// project. The module is, however, dual licensed under OpenSSL and 25// CRYPTOGAMS licenses depending on where you obtain it. For further 26// details see http://www.openssl.org/~appro/cryptogams/. 27// 28// Permission to use under GPLv2 terms is granted. 29// ==================================================================== 30// 31// SHA256/512 for ARMv8. 32// 33// Performance in cycles per processed byte and improvement coefficient 34// over code generated with "default" compiler: 35// 36// SHA256-hw SHA256(*) SHA512 37// Apple A7 1.97 10.5 (+33%) 6.73 (-1%(**)) 38// Cortex-A53 2.38 15.5 (+115%) 10.0 (+150%(***)) 39// Cortex-A57 2.31 11.6 (+86%) 7.51 (+260%(***)) 40// Denver 2.01 10.5 (+26%) 6.70 (+8%) 41// X-Gene 20.0 (+100%) 12.8 (+300%(***)) 42// Mongoose 2.36 13.0 (+50%) 8.36 (+33%) 43// Kryo 1.92 17.4 (+30%) 11.2 (+8%) 44// 45// (*) Software SHA256 results are of lesser relevance, presented 46// mostly for informational purposes. 47// (**) The result is a trade-off: it's possible to improve it by 48// 10% (or by 1 cycle per round), but at the cost of 20% loss 49// on Cortex-A53 (or by 4 cycles per round). 50// (***) Super-impressive coefficients over gcc-generated code are 51// indication of some compiler "pathology", most notably code 52// generated with -mgeneral-regs-only is significantly faster 53// and the gap is only 40-90%. 54 55#ifndef __KERNEL__ 56# include <openssl/arm_arch.h> 57#endif 58 59.text 60 61 62.private_extern _OPENSSL_armcap_P 63.globl _sha512_block_data_order 64.private_extern _sha512_block_data_order 65 66.align 6 67_sha512_block_data_order: 68 AARCH64_VALID_CALL_TARGET 69#ifndef __KERNEL__ 70#if __has_feature(hwaddress_sanitizer) && __clang_major__ >= 10 71 adrp x16,:pg_hi21_nc:_OPENSSL_armcap_P 72#else 73 adrp x16,_OPENSSL_armcap_P@PAGE 74#endif 75 ldr w16,[x16,_OPENSSL_armcap_P@PAGEOFF] 76 tst w16,#ARMV8_SHA512 77 b.ne Lv8_entry 78#endif 79 AARCH64_SIGN_LINK_REGISTER 80 stp x29,x30,[sp,#-128]! 81 add x29,sp,#0 82 83 stp x19,x20,[sp,#16] 84 stp x21,x22,[sp,#32] 85 stp x23,x24,[sp,#48] 86 stp x25,x26,[sp,#64] 87 stp x27,x28,[sp,#80] 88 sub sp,sp,#4*8 89 90 ldp x20,x21,[x0] // load context 91 ldp x22,x23,[x0,#2*8] 92 ldp x24,x25,[x0,#4*8] 93 add x2,x1,x2,lsl#7 // end of input 94 ldp x26,x27,[x0,#6*8] 95 adrp x30,LK512@PAGE 96 add x30,x30,LK512@PAGEOFF 97 stp x0,x2,[x29,#96] 98 99Loop: 100 ldp x3,x4,[x1],#2*8 101 ldr x19,[x30],#8 // *K++ 102 eor x28,x21,x22 // magic seed 103 str x1,[x29,#112] 104#ifndef __AARCH64EB__ 105 rev x3,x3 // 0 106#endif 107 ror x16,x24,#14 108 add x27,x27,x19 // h+=K[i] 109 eor x6,x24,x24,ror#23 110 and x17,x25,x24 111 bic x19,x26,x24 112 add x27,x27,x3 // h+=X[i] 113 orr x17,x17,x19 // Ch(e,f,g) 114 eor x19,x20,x21 // a^b, b^c in next round 115 eor x16,x16,x6,ror#18 // Sigma1(e) 116 ror x6,x20,#28 117 add x27,x27,x17 // h+=Ch(e,f,g) 118 eor x17,x20,x20,ror#5 119 add x27,x27,x16 // h+=Sigma1(e) 120 and x28,x28,x19 // (b^c)&=(a^b) 121 add x23,x23,x27 // d+=h 122 eor x28,x28,x21 // Maj(a,b,c) 123 eor x17,x6,x17,ror#34 // Sigma0(a) 124 add x27,x27,x28 // h+=Maj(a,b,c) 125 ldr x28,[x30],#8 // *K++, x19 in next round 126 //add x27,x27,x17 // h+=Sigma0(a) 127#ifndef __AARCH64EB__ 128 rev x4,x4 // 1 129#endif 130 ldp x5,x6,[x1],#2*8 131 add x27,x27,x17 // h+=Sigma0(a) 132 ror x16,x23,#14 133 add x26,x26,x28 // h+=K[i] 134 eor x7,x23,x23,ror#23 135 and x17,x24,x23 136 bic x28,x25,x23 137 add x26,x26,x4 // h+=X[i] 138 orr x17,x17,x28 // Ch(e,f,g) 139 eor x28,x27,x20 // a^b, b^c in next round 140 eor x16,x16,x7,ror#18 // Sigma1(e) 141 ror x7,x27,#28 142 add x26,x26,x17 // h+=Ch(e,f,g) 143 eor x17,x27,x27,ror#5 144 add x26,x26,x16 // h+=Sigma1(e) 145 and x19,x19,x28 // (b^c)&=(a^b) 146 add x22,x22,x26 // d+=h 147 eor x19,x19,x20 // Maj(a,b,c) 148 eor x17,x7,x17,ror#34 // Sigma0(a) 149 add x26,x26,x19 // h+=Maj(a,b,c) 150 ldr x19,[x30],#8 // *K++, x28 in next round 151 //add x26,x26,x17 // h+=Sigma0(a) 152#ifndef __AARCH64EB__ 153 rev x5,x5 // 2 154#endif 155 add x26,x26,x17 // h+=Sigma0(a) 156 ror x16,x22,#14 157 add x25,x25,x19 // h+=K[i] 158 eor x8,x22,x22,ror#23 159 and x17,x23,x22 160 bic x19,x24,x22 161 add x25,x25,x5 // h+=X[i] 162 orr x17,x17,x19 // Ch(e,f,g) 163 eor x19,x26,x27 // a^b, b^c in next round 164 eor x16,x16,x8,ror#18 // Sigma1(e) 165 ror x8,x26,#28 166 add x25,x25,x17 // h+=Ch(e,f,g) 167 eor x17,x26,x26,ror#5 168 add x25,x25,x16 // h+=Sigma1(e) 169 and x28,x28,x19 // (b^c)&=(a^b) 170 add x21,x21,x25 // d+=h 171 eor x28,x28,x27 // Maj(a,b,c) 172 eor x17,x8,x17,ror#34 // Sigma0(a) 173 add x25,x25,x28 // h+=Maj(a,b,c) 174 ldr x28,[x30],#8 // *K++, x19 in next round 175 //add x25,x25,x17 // h+=Sigma0(a) 176#ifndef __AARCH64EB__ 177 rev x6,x6 // 3 178#endif 179 ldp x7,x8,[x1],#2*8 180 add x25,x25,x17 // h+=Sigma0(a) 181 ror x16,x21,#14 182 add x24,x24,x28 // h+=K[i] 183 eor x9,x21,x21,ror#23 184 and x17,x22,x21 185 bic x28,x23,x21 186 add x24,x24,x6 // h+=X[i] 187 orr x17,x17,x28 // Ch(e,f,g) 188 eor x28,x25,x26 // a^b, b^c in next round 189 eor x16,x16,x9,ror#18 // Sigma1(e) 190 ror x9,x25,#28 191 add x24,x24,x17 // h+=Ch(e,f,g) 192 eor x17,x25,x25,ror#5 193 add x24,x24,x16 // h+=Sigma1(e) 194 and x19,x19,x28 // (b^c)&=(a^b) 195 add x20,x20,x24 // d+=h 196 eor x19,x19,x26 // Maj(a,b,c) 197 eor x17,x9,x17,ror#34 // Sigma0(a) 198 add x24,x24,x19 // h+=Maj(a,b,c) 199 ldr x19,[x30],#8 // *K++, x28 in next round 200 //add x24,x24,x17 // h+=Sigma0(a) 201#ifndef __AARCH64EB__ 202 rev x7,x7 // 4 203#endif 204 add x24,x24,x17 // h+=Sigma0(a) 205 ror x16,x20,#14 206 add x23,x23,x19 // h+=K[i] 207 eor x10,x20,x20,ror#23 208 and x17,x21,x20 209 bic x19,x22,x20 210 add x23,x23,x7 // h+=X[i] 211 orr x17,x17,x19 // Ch(e,f,g) 212 eor x19,x24,x25 // a^b, b^c in next round 213 eor x16,x16,x10,ror#18 // Sigma1(e) 214 ror x10,x24,#28 215 add x23,x23,x17 // h+=Ch(e,f,g) 216 eor x17,x24,x24,ror#5 217 add x23,x23,x16 // h+=Sigma1(e) 218 and x28,x28,x19 // (b^c)&=(a^b) 219 add x27,x27,x23 // d+=h 220 eor x28,x28,x25 // Maj(a,b,c) 221 eor x17,x10,x17,ror#34 // Sigma0(a) 222 add x23,x23,x28 // h+=Maj(a,b,c) 223 ldr x28,[x30],#8 // *K++, x19 in next round 224 //add x23,x23,x17 // h+=Sigma0(a) 225#ifndef __AARCH64EB__ 226 rev x8,x8 // 5 227#endif 228 ldp x9,x10,[x1],#2*8 229 add x23,x23,x17 // h+=Sigma0(a) 230 ror x16,x27,#14 231 add x22,x22,x28 // h+=K[i] 232 eor x11,x27,x27,ror#23 233 and x17,x20,x27 234 bic x28,x21,x27 235 add x22,x22,x8 // h+=X[i] 236 orr x17,x17,x28 // Ch(e,f,g) 237 eor x28,x23,x24 // a^b, b^c in next round 238 eor x16,x16,x11,ror#18 // Sigma1(e) 239 ror x11,x23,#28 240 add x22,x22,x17 // h+=Ch(e,f,g) 241 eor x17,x23,x23,ror#5 242 add x22,x22,x16 // h+=Sigma1(e) 243 and x19,x19,x28 // (b^c)&=(a^b) 244 add x26,x26,x22 // d+=h 245 eor x19,x19,x24 // Maj(a,b,c) 246 eor x17,x11,x17,ror#34 // Sigma0(a) 247 add x22,x22,x19 // h+=Maj(a,b,c) 248 ldr x19,[x30],#8 // *K++, x28 in next round 249 //add x22,x22,x17 // h+=Sigma0(a) 250#ifndef __AARCH64EB__ 251 rev x9,x9 // 6 252#endif 253 add x22,x22,x17 // h+=Sigma0(a) 254 ror x16,x26,#14 255 add x21,x21,x19 // h+=K[i] 256 eor x12,x26,x26,ror#23 257 and x17,x27,x26 258 bic x19,x20,x26 259 add x21,x21,x9 // h+=X[i] 260 orr x17,x17,x19 // Ch(e,f,g) 261 eor x19,x22,x23 // a^b, b^c in next round 262 eor x16,x16,x12,ror#18 // Sigma1(e) 263 ror x12,x22,#28 264 add x21,x21,x17 // h+=Ch(e,f,g) 265 eor x17,x22,x22,ror#5 266 add x21,x21,x16 // h+=Sigma1(e) 267 and x28,x28,x19 // (b^c)&=(a^b) 268 add x25,x25,x21 // d+=h 269 eor x28,x28,x23 // Maj(a,b,c) 270 eor x17,x12,x17,ror#34 // Sigma0(a) 271 add x21,x21,x28 // h+=Maj(a,b,c) 272 ldr x28,[x30],#8 // *K++, x19 in next round 273 //add x21,x21,x17 // h+=Sigma0(a) 274#ifndef __AARCH64EB__ 275 rev x10,x10 // 7 276#endif 277 ldp x11,x12,[x1],#2*8 278 add x21,x21,x17 // h+=Sigma0(a) 279 ror x16,x25,#14 280 add x20,x20,x28 // h+=K[i] 281 eor x13,x25,x25,ror#23 282 and x17,x26,x25 283 bic x28,x27,x25 284 add x20,x20,x10 // h+=X[i] 285 orr x17,x17,x28 // Ch(e,f,g) 286 eor x28,x21,x22 // a^b, b^c in next round 287 eor x16,x16,x13,ror#18 // Sigma1(e) 288 ror x13,x21,#28 289 add x20,x20,x17 // h+=Ch(e,f,g) 290 eor x17,x21,x21,ror#5 291 add x20,x20,x16 // h+=Sigma1(e) 292 and x19,x19,x28 // (b^c)&=(a^b) 293 add x24,x24,x20 // d+=h 294 eor x19,x19,x22 // Maj(a,b,c) 295 eor x17,x13,x17,ror#34 // Sigma0(a) 296 add x20,x20,x19 // h+=Maj(a,b,c) 297 ldr x19,[x30],#8 // *K++, x28 in next round 298 //add x20,x20,x17 // h+=Sigma0(a) 299#ifndef __AARCH64EB__ 300 rev x11,x11 // 8 301#endif 302 add x20,x20,x17 // h+=Sigma0(a) 303 ror x16,x24,#14 304 add x27,x27,x19 // h+=K[i] 305 eor x14,x24,x24,ror#23 306 and x17,x25,x24 307 bic x19,x26,x24 308 add x27,x27,x11 // h+=X[i] 309 orr x17,x17,x19 // Ch(e,f,g) 310 eor x19,x20,x21 // a^b, b^c in next round 311 eor x16,x16,x14,ror#18 // Sigma1(e) 312 ror x14,x20,#28 313 add x27,x27,x17 // h+=Ch(e,f,g) 314 eor x17,x20,x20,ror#5 315 add x27,x27,x16 // h+=Sigma1(e) 316 and x28,x28,x19 // (b^c)&=(a^b) 317 add x23,x23,x27 // d+=h 318 eor x28,x28,x21 // Maj(a,b,c) 319 eor x17,x14,x17,ror#34 // Sigma0(a) 320 add x27,x27,x28 // h+=Maj(a,b,c) 321 ldr x28,[x30],#8 // *K++, x19 in next round 322 //add x27,x27,x17 // h+=Sigma0(a) 323#ifndef __AARCH64EB__ 324 rev x12,x12 // 9 325#endif 326 ldp x13,x14,[x1],#2*8 327 add x27,x27,x17 // h+=Sigma0(a) 328 ror x16,x23,#14 329 add x26,x26,x28 // h+=K[i] 330 eor x15,x23,x23,ror#23 331 and x17,x24,x23 332 bic x28,x25,x23 333 add x26,x26,x12 // h+=X[i] 334 orr x17,x17,x28 // Ch(e,f,g) 335 eor x28,x27,x20 // a^b, b^c in next round 336 eor x16,x16,x15,ror#18 // Sigma1(e) 337 ror x15,x27,#28 338 add x26,x26,x17 // h+=Ch(e,f,g) 339 eor x17,x27,x27,ror#5 340 add x26,x26,x16 // h+=Sigma1(e) 341 and x19,x19,x28 // (b^c)&=(a^b) 342 add x22,x22,x26 // d+=h 343 eor x19,x19,x20 // Maj(a,b,c) 344 eor x17,x15,x17,ror#34 // Sigma0(a) 345 add x26,x26,x19 // h+=Maj(a,b,c) 346 ldr x19,[x30],#8 // *K++, x28 in next round 347 //add x26,x26,x17 // h+=Sigma0(a) 348#ifndef __AARCH64EB__ 349 rev x13,x13 // 10 350#endif 351 add x26,x26,x17 // h+=Sigma0(a) 352 ror x16,x22,#14 353 add x25,x25,x19 // h+=K[i] 354 eor x0,x22,x22,ror#23 355 and x17,x23,x22 356 bic x19,x24,x22 357 add x25,x25,x13 // h+=X[i] 358 orr x17,x17,x19 // Ch(e,f,g) 359 eor x19,x26,x27 // a^b, b^c in next round 360 eor x16,x16,x0,ror#18 // Sigma1(e) 361 ror x0,x26,#28 362 add x25,x25,x17 // h+=Ch(e,f,g) 363 eor x17,x26,x26,ror#5 364 add x25,x25,x16 // h+=Sigma1(e) 365 and x28,x28,x19 // (b^c)&=(a^b) 366 add x21,x21,x25 // d+=h 367 eor x28,x28,x27 // Maj(a,b,c) 368 eor x17,x0,x17,ror#34 // Sigma0(a) 369 add x25,x25,x28 // h+=Maj(a,b,c) 370 ldr x28,[x30],#8 // *K++, x19 in next round 371 //add x25,x25,x17 // h+=Sigma0(a) 372#ifndef __AARCH64EB__ 373 rev x14,x14 // 11 374#endif 375 ldp x15,x0,[x1],#2*8 376 add x25,x25,x17 // h+=Sigma0(a) 377 str x6,[sp,#24] 378 ror x16,x21,#14 379 add x24,x24,x28 // h+=K[i] 380 eor x6,x21,x21,ror#23 381 and x17,x22,x21 382 bic x28,x23,x21 383 add x24,x24,x14 // h+=X[i] 384 orr x17,x17,x28 // Ch(e,f,g) 385 eor x28,x25,x26 // a^b, b^c in next round 386 eor x16,x16,x6,ror#18 // Sigma1(e) 387 ror x6,x25,#28 388 add x24,x24,x17 // h+=Ch(e,f,g) 389 eor x17,x25,x25,ror#5 390 add x24,x24,x16 // h+=Sigma1(e) 391 and x19,x19,x28 // (b^c)&=(a^b) 392 add x20,x20,x24 // d+=h 393 eor x19,x19,x26 // Maj(a,b,c) 394 eor x17,x6,x17,ror#34 // Sigma0(a) 395 add x24,x24,x19 // h+=Maj(a,b,c) 396 ldr x19,[x30],#8 // *K++, x28 in next round 397 //add x24,x24,x17 // h+=Sigma0(a) 398#ifndef __AARCH64EB__ 399 rev x15,x15 // 12 400#endif 401 add x24,x24,x17 // h+=Sigma0(a) 402 str x7,[sp,#0] 403 ror x16,x20,#14 404 add x23,x23,x19 // h+=K[i] 405 eor x7,x20,x20,ror#23 406 and x17,x21,x20 407 bic x19,x22,x20 408 add x23,x23,x15 // h+=X[i] 409 orr x17,x17,x19 // Ch(e,f,g) 410 eor x19,x24,x25 // a^b, b^c in next round 411 eor x16,x16,x7,ror#18 // Sigma1(e) 412 ror x7,x24,#28 413 add x23,x23,x17 // h+=Ch(e,f,g) 414 eor x17,x24,x24,ror#5 415 add x23,x23,x16 // h+=Sigma1(e) 416 and x28,x28,x19 // (b^c)&=(a^b) 417 add x27,x27,x23 // d+=h 418 eor x28,x28,x25 // Maj(a,b,c) 419 eor x17,x7,x17,ror#34 // Sigma0(a) 420 add x23,x23,x28 // h+=Maj(a,b,c) 421 ldr x28,[x30],#8 // *K++, x19 in next round 422 //add x23,x23,x17 // h+=Sigma0(a) 423#ifndef __AARCH64EB__ 424 rev x0,x0 // 13 425#endif 426 ldp x1,x2,[x1] 427 add x23,x23,x17 // h+=Sigma0(a) 428 str x8,[sp,#8] 429 ror x16,x27,#14 430 add x22,x22,x28 // h+=K[i] 431 eor x8,x27,x27,ror#23 432 and x17,x20,x27 433 bic x28,x21,x27 434 add x22,x22,x0 // h+=X[i] 435 orr x17,x17,x28 // Ch(e,f,g) 436 eor x28,x23,x24 // a^b, b^c in next round 437 eor x16,x16,x8,ror#18 // Sigma1(e) 438 ror x8,x23,#28 439 add x22,x22,x17 // h+=Ch(e,f,g) 440 eor x17,x23,x23,ror#5 441 add x22,x22,x16 // h+=Sigma1(e) 442 and x19,x19,x28 // (b^c)&=(a^b) 443 add x26,x26,x22 // d+=h 444 eor x19,x19,x24 // Maj(a,b,c) 445 eor x17,x8,x17,ror#34 // Sigma0(a) 446 add x22,x22,x19 // h+=Maj(a,b,c) 447 ldr x19,[x30],#8 // *K++, x28 in next round 448 //add x22,x22,x17 // h+=Sigma0(a) 449#ifndef __AARCH64EB__ 450 rev x1,x1 // 14 451#endif 452 ldr x6,[sp,#24] 453 add x22,x22,x17 // h+=Sigma0(a) 454 str x9,[sp,#16] 455 ror x16,x26,#14 456 add x21,x21,x19 // h+=K[i] 457 eor x9,x26,x26,ror#23 458 and x17,x27,x26 459 bic x19,x20,x26 460 add x21,x21,x1 // h+=X[i] 461 orr x17,x17,x19 // Ch(e,f,g) 462 eor x19,x22,x23 // a^b, b^c in next round 463 eor x16,x16,x9,ror#18 // Sigma1(e) 464 ror x9,x22,#28 465 add x21,x21,x17 // h+=Ch(e,f,g) 466 eor x17,x22,x22,ror#5 467 add x21,x21,x16 // h+=Sigma1(e) 468 and x28,x28,x19 // (b^c)&=(a^b) 469 add x25,x25,x21 // d+=h 470 eor x28,x28,x23 // Maj(a,b,c) 471 eor x17,x9,x17,ror#34 // Sigma0(a) 472 add x21,x21,x28 // h+=Maj(a,b,c) 473 ldr x28,[x30],#8 // *K++, x19 in next round 474 //add x21,x21,x17 // h+=Sigma0(a) 475#ifndef __AARCH64EB__ 476 rev x2,x2 // 15 477#endif 478 ldr x7,[sp,#0] 479 add x21,x21,x17 // h+=Sigma0(a) 480 str x10,[sp,#24] 481 ror x16,x25,#14 482 add x20,x20,x28 // h+=K[i] 483 ror x9,x4,#1 484 and x17,x26,x25 485 ror x8,x1,#19 486 bic x28,x27,x25 487 ror x10,x21,#28 488 add x20,x20,x2 // h+=X[i] 489 eor x16,x16,x25,ror#18 490 eor x9,x9,x4,ror#8 491 orr x17,x17,x28 // Ch(e,f,g) 492 eor x28,x21,x22 // a^b, b^c in next round 493 eor x16,x16,x25,ror#41 // Sigma1(e) 494 eor x10,x10,x21,ror#34 495 add x20,x20,x17 // h+=Ch(e,f,g) 496 and x19,x19,x28 // (b^c)&=(a^b) 497 eor x8,x8,x1,ror#61 498 eor x9,x9,x4,lsr#7 // sigma0(X[i+1]) 499 add x20,x20,x16 // h+=Sigma1(e) 500 eor x19,x19,x22 // Maj(a,b,c) 501 eor x17,x10,x21,ror#39 // Sigma0(a) 502 eor x8,x8,x1,lsr#6 // sigma1(X[i+14]) 503 add x3,x3,x12 504 add x24,x24,x20 // d+=h 505 add x20,x20,x19 // h+=Maj(a,b,c) 506 ldr x19,[x30],#8 // *K++, x28 in next round 507 add x3,x3,x9 508 add x20,x20,x17 // h+=Sigma0(a) 509 add x3,x3,x8 510Loop_16_xx: 511 ldr x8,[sp,#8] 512 str x11,[sp,#0] 513 ror x16,x24,#14 514 add x27,x27,x19 // h+=K[i] 515 ror x10,x5,#1 516 and x17,x25,x24 517 ror x9,x2,#19 518 bic x19,x26,x24 519 ror x11,x20,#28 520 add x27,x27,x3 // h+=X[i] 521 eor x16,x16,x24,ror#18 522 eor x10,x10,x5,ror#8 523 orr x17,x17,x19 // Ch(e,f,g) 524 eor x19,x20,x21 // a^b, b^c in next round 525 eor x16,x16,x24,ror#41 // Sigma1(e) 526 eor x11,x11,x20,ror#34 527 add x27,x27,x17 // h+=Ch(e,f,g) 528 and x28,x28,x19 // (b^c)&=(a^b) 529 eor x9,x9,x2,ror#61 530 eor x10,x10,x5,lsr#7 // sigma0(X[i+1]) 531 add x27,x27,x16 // h+=Sigma1(e) 532 eor x28,x28,x21 // Maj(a,b,c) 533 eor x17,x11,x20,ror#39 // Sigma0(a) 534 eor x9,x9,x2,lsr#6 // sigma1(X[i+14]) 535 add x4,x4,x13 536 add x23,x23,x27 // d+=h 537 add x27,x27,x28 // h+=Maj(a,b,c) 538 ldr x28,[x30],#8 // *K++, x19 in next round 539 add x4,x4,x10 540 add x27,x27,x17 // h+=Sigma0(a) 541 add x4,x4,x9 542 ldr x9,[sp,#16] 543 str x12,[sp,#8] 544 ror x16,x23,#14 545 add x26,x26,x28 // h+=K[i] 546 ror x11,x6,#1 547 and x17,x24,x23 548 ror x10,x3,#19 549 bic x28,x25,x23 550 ror x12,x27,#28 551 add x26,x26,x4 // h+=X[i] 552 eor x16,x16,x23,ror#18 553 eor x11,x11,x6,ror#8 554 orr x17,x17,x28 // Ch(e,f,g) 555 eor x28,x27,x20 // a^b, b^c in next round 556 eor x16,x16,x23,ror#41 // Sigma1(e) 557 eor x12,x12,x27,ror#34 558 add x26,x26,x17 // h+=Ch(e,f,g) 559 and x19,x19,x28 // (b^c)&=(a^b) 560 eor x10,x10,x3,ror#61 561 eor x11,x11,x6,lsr#7 // sigma0(X[i+1]) 562 add x26,x26,x16 // h+=Sigma1(e) 563 eor x19,x19,x20 // Maj(a,b,c) 564 eor x17,x12,x27,ror#39 // Sigma0(a) 565 eor x10,x10,x3,lsr#6 // sigma1(X[i+14]) 566 add x5,x5,x14 567 add x22,x22,x26 // d+=h 568 add x26,x26,x19 // h+=Maj(a,b,c) 569 ldr x19,[x30],#8 // *K++, x28 in next round 570 add x5,x5,x11 571 add x26,x26,x17 // h+=Sigma0(a) 572 add x5,x5,x10 573 ldr x10,[sp,#24] 574 str x13,[sp,#16] 575 ror x16,x22,#14 576 add x25,x25,x19 // h+=K[i] 577 ror x12,x7,#1 578 and x17,x23,x22 579 ror x11,x4,#19 580 bic x19,x24,x22 581 ror x13,x26,#28 582 add x25,x25,x5 // h+=X[i] 583 eor x16,x16,x22,ror#18 584 eor x12,x12,x7,ror#8 585 orr x17,x17,x19 // Ch(e,f,g) 586 eor x19,x26,x27 // a^b, b^c in next round 587 eor x16,x16,x22,ror#41 // Sigma1(e) 588 eor x13,x13,x26,ror#34 589 add x25,x25,x17 // h+=Ch(e,f,g) 590 and x28,x28,x19 // (b^c)&=(a^b) 591 eor x11,x11,x4,ror#61 592 eor x12,x12,x7,lsr#7 // sigma0(X[i+1]) 593 add x25,x25,x16 // h+=Sigma1(e) 594 eor x28,x28,x27 // Maj(a,b,c) 595 eor x17,x13,x26,ror#39 // Sigma0(a) 596 eor x11,x11,x4,lsr#6 // sigma1(X[i+14]) 597 add x6,x6,x15 598 add x21,x21,x25 // d+=h 599 add x25,x25,x28 // h+=Maj(a,b,c) 600 ldr x28,[x30],#8 // *K++, x19 in next round 601 add x6,x6,x12 602 add x25,x25,x17 // h+=Sigma0(a) 603 add x6,x6,x11 604 ldr x11,[sp,#0] 605 str x14,[sp,#24] 606 ror x16,x21,#14 607 add x24,x24,x28 // h+=K[i] 608 ror x13,x8,#1 609 and x17,x22,x21 610 ror x12,x5,#19 611 bic x28,x23,x21 612 ror x14,x25,#28 613 add x24,x24,x6 // h+=X[i] 614 eor x16,x16,x21,ror#18 615 eor x13,x13,x8,ror#8 616 orr x17,x17,x28 // Ch(e,f,g) 617 eor x28,x25,x26 // a^b, b^c in next round 618 eor x16,x16,x21,ror#41 // Sigma1(e) 619 eor x14,x14,x25,ror#34 620 add x24,x24,x17 // h+=Ch(e,f,g) 621 and x19,x19,x28 // (b^c)&=(a^b) 622 eor x12,x12,x5,ror#61 623 eor x13,x13,x8,lsr#7 // sigma0(X[i+1]) 624 add x24,x24,x16 // h+=Sigma1(e) 625 eor x19,x19,x26 // Maj(a,b,c) 626 eor x17,x14,x25,ror#39 // Sigma0(a) 627 eor x12,x12,x5,lsr#6 // sigma1(X[i+14]) 628 add x7,x7,x0 629 add x20,x20,x24 // d+=h 630 add x24,x24,x19 // h+=Maj(a,b,c) 631 ldr x19,[x30],#8 // *K++, x28 in next round 632 add x7,x7,x13 633 add x24,x24,x17 // h+=Sigma0(a) 634 add x7,x7,x12 635 ldr x12,[sp,#8] 636 str x15,[sp,#0] 637 ror x16,x20,#14 638 add x23,x23,x19 // h+=K[i] 639 ror x14,x9,#1 640 and x17,x21,x20 641 ror x13,x6,#19 642 bic x19,x22,x20 643 ror x15,x24,#28 644 add x23,x23,x7 // h+=X[i] 645 eor x16,x16,x20,ror#18 646 eor x14,x14,x9,ror#8 647 orr x17,x17,x19 // Ch(e,f,g) 648 eor x19,x24,x25 // a^b, b^c in next round 649 eor x16,x16,x20,ror#41 // Sigma1(e) 650 eor x15,x15,x24,ror#34 651 add x23,x23,x17 // h+=Ch(e,f,g) 652 and x28,x28,x19 // (b^c)&=(a^b) 653 eor x13,x13,x6,ror#61 654 eor x14,x14,x9,lsr#7 // sigma0(X[i+1]) 655 add x23,x23,x16 // h+=Sigma1(e) 656 eor x28,x28,x25 // Maj(a,b,c) 657 eor x17,x15,x24,ror#39 // Sigma0(a) 658 eor x13,x13,x6,lsr#6 // sigma1(X[i+14]) 659 add x8,x8,x1 660 add x27,x27,x23 // d+=h 661 add x23,x23,x28 // h+=Maj(a,b,c) 662 ldr x28,[x30],#8 // *K++, x19 in next round 663 add x8,x8,x14 664 add x23,x23,x17 // h+=Sigma0(a) 665 add x8,x8,x13 666 ldr x13,[sp,#16] 667 str x0,[sp,#8] 668 ror x16,x27,#14 669 add x22,x22,x28 // h+=K[i] 670 ror x15,x10,#1 671 and x17,x20,x27 672 ror x14,x7,#19 673 bic x28,x21,x27 674 ror x0,x23,#28 675 add x22,x22,x8 // h+=X[i] 676 eor x16,x16,x27,ror#18 677 eor x15,x15,x10,ror#8 678 orr x17,x17,x28 // Ch(e,f,g) 679 eor x28,x23,x24 // a^b, b^c in next round 680 eor x16,x16,x27,ror#41 // Sigma1(e) 681 eor x0,x0,x23,ror#34 682 add x22,x22,x17 // h+=Ch(e,f,g) 683 and x19,x19,x28 // (b^c)&=(a^b) 684 eor x14,x14,x7,ror#61 685 eor x15,x15,x10,lsr#7 // sigma0(X[i+1]) 686 add x22,x22,x16 // h+=Sigma1(e) 687 eor x19,x19,x24 // Maj(a,b,c) 688 eor x17,x0,x23,ror#39 // Sigma0(a) 689 eor x14,x14,x7,lsr#6 // sigma1(X[i+14]) 690 add x9,x9,x2 691 add x26,x26,x22 // d+=h 692 add x22,x22,x19 // h+=Maj(a,b,c) 693 ldr x19,[x30],#8 // *K++, x28 in next round 694 add x9,x9,x15 695 add x22,x22,x17 // h+=Sigma0(a) 696 add x9,x9,x14 697 ldr x14,[sp,#24] 698 str x1,[sp,#16] 699 ror x16,x26,#14 700 add x21,x21,x19 // h+=K[i] 701 ror x0,x11,#1 702 and x17,x27,x26 703 ror x15,x8,#19 704 bic x19,x20,x26 705 ror x1,x22,#28 706 add x21,x21,x9 // h+=X[i] 707 eor x16,x16,x26,ror#18 708 eor x0,x0,x11,ror#8 709 orr x17,x17,x19 // Ch(e,f,g) 710 eor x19,x22,x23 // a^b, b^c in next round 711 eor x16,x16,x26,ror#41 // Sigma1(e) 712 eor x1,x1,x22,ror#34 713 add x21,x21,x17 // h+=Ch(e,f,g) 714 and x28,x28,x19 // (b^c)&=(a^b) 715 eor x15,x15,x8,ror#61 716 eor x0,x0,x11,lsr#7 // sigma0(X[i+1]) 717 add x21,x21,x16 // h+=Sigma1(e) 718 eor x28,x28,x23 // Maj(a,b,c) 719 eor x17,x1,x22,ror#39 // Sigma0(a) 720 eor x15,x15,x8,lsr#6 // sigma1(X[i+14]) 721 add x10,x10,x3 722 add x25,x25,x21 // d+=h 723 add x21,x21,x28 // h+=Maj(a,b,c) 724 ldr x28,[x30],#8 // *K++, x19 in next round 725 add x10,x10,x0 726 add x21,x21,x17 // h+=Sigma0(a) 727 add x10,x10,x15 728 ldr x15,[sp,#0] 729 str x2,[sp,#24] 730 ror x16,x25,#14 731 add x20,x20,x28 // h+=K[i] 732 ror x1,x12,#1 733 and x17,x26,x25 734 ror x0,x9,#19 735 bic x28,x27,x25 736 ror x2,x21,#28 737 add x20,x20,x10 // h+=X[i] 738 eor x16,x16,x25,ror#18 739 eor x1,x1,x12,ror#8 740 orr x17,x17,x28 // Ch(e,f,g) 741 eor x28,x21,x22 // a^b, b^c in next round 742 eor x16,x16,x25,ror#41 // Sigma1(e) 743 eor x2,x2,x21,ror#34 744 add x20,x20,x17 // h+=Ch(e,f,g) 745 and x19,x19,x28 // (b^c)&=(a^b) 746 eor x0,x0,x9,ror#61 747 eor x1,x1,x12,lsr#7 // sigma0(X[i+1]) 748 add x20,x20,x16 // h+=Sigma1(e) 749 eor x19,x19,x22 // Maj(a,b,c) 750 eor x17,x2,x21,ror#39 // Sigma0(a) 751 eor x0,x0,x9,lsr#6 // sigma1(X[i+14]) 752 add x11,x11,x4 753 add x24,x24,x20 // d+=h 754 add x20,x20,x19 // h+=Maj(a,b,c) 755 ldr x19,[x30],#8 // *K++, x28 in next round 756 add x11,x11,x1 757 add x20,x20,x17 // h+=Sigma0(a) 758 add x11,x11,x0 759 ldr x0,[sp,#8] 760 str x3,[sp,#0] 761 ror x16,x24,#14 762 add x27,x27,x19 // h+=K[i] 763 ror x2,x13,#1 764 and x17,x25,x24 765 ror x1,x10,#19 766 bic x19,x26,x24 767 ror x3,x20,#28 768 add x27,x27,x11 // h+=X[i] 769 eor x16,x16,x24,ror#18 770 eor x2,x2,x13,ror#8 771 orr x17,x17,x19 // Ch(e,f,g) 772 eor x19,x20,x21 // a^b, b^c in next round 773 eor x16,x16,x24,ror#41 // Sigma1(e) 774 eor x3,x3,x20,ror#34 775 add x27,x27,x17 // h+=Ch(e,f,g) 776 and x28,x28,x19 // (b^c)&=(a^b) 777 eor x1,x1,x10,ror#61 778 eor x2,x2,x13,lsr#7 // sigma0(X[i+1]) 779 add x27,x27,x16 // h+=Sigma1(e) 780 eor x28,x28,x21 // Maj(a,b,c) 781 eor x17,x3,x20,ror#39 // Sigma0(a) 782 eor x1,x1,x10,lsr#6 // sigma1(X[i+14]) 783 add x12,x12,x5 784 add x23,x23,x27 // d+=h 785 add x27,x27,x28 // h+=Maj(a,b,c) 786 ldr x28,[x30],#8 // *K++, x19 in next round 787 add x12,x12,x2 788 add x27,x27,x17 // h+=Sigma0(a) 789 add x12,x12,x1 790 ldr x1,[sp,#16] 791 str x4,[sp,#8] 792 ror x16,x23,#14 793 add x26,x26,x28 // h+=K[i] 794 ror x3,x14,#1 795 and x17,x24,x23 796 ror x2,x11,#19 797 bic x28,x25,x23 798 ror x4,x27,#28 799 add x26,x26,x12 // h+=X[i] 800 eor x16,x16,x23,ror#18 801 eor x3,x3,x14,ror#8 802 orr x17,x17,x28 // Ch(e,f,g) 803 eor x28,x27,x20 // a^b, b^c in next round 804 eor x16,x16,x23,ror#41 // Sigma1(e) 805 eor x4,x4,x27,ror#34 806 add x26,x26,x17 // h+=Ch(e,f,g) 807 and x19,x19,x28 // (b^c)&=(a^b) 808 eor x2,x2,x11,ror#61 809 eor x3,x3,x14,lsr#7 // sigma0(X[i+1]) 810 add x26,x26,x16 // h+=Sigma1(e) 811 eor x19,x19,x20 // Maj(a,b,c) 812 eor x17,x4,x27,ror#39 // Sigma0(a) 813 eor x2,x2,x11,lsr#6 // sigma1(X[i+14]) 814 add x13,x13,x6 815 add x22,x22,x26 // d+=h 816 add x26,x26,x19 // h+=Maj(a,b,c) 817 ldr x19,[x30],#8 // *K++, x28 in next round 818 add x13,x13,x3 819 add x26,x26,x17 // h+=Sigma0(a) 820 add x13,x13,x2 821 ldr x2,[sp,#24] 822 str x5,[sp,#16] 823 ror x16,x22,#14 824 add x25,x25,x19 // h+=K[i] 825 ror x4,x15,#1 826 and x17,x23,x22 827 ror x3,x12,#19 828 bic x19,x24,x22 829 ror x5,x26,#28 830 add x25,x25,x13 // h+=X[i] 831 eor x16,x16,x22,ror#18 832 eor x4,x4,x15,ror#8 833 orr x17,x17,x19 // Ch(e,f,g) 834 eor x19,x26,x27 // a^b, b^c in next round 835 eor x16,x16,x22,ror#41 // Sigma1(e) 836 eor x5,x5,x26,ror#34 837 add x25,x25,x17 // h+=Ch(e,f,g) 838 and x28,x28,x19 // (b^c)&=(a^b) 839 eor x3,x3,x12,ror#61 840 eor x4,x4,x15,lsr#7 // sigma0(X[i+1]) 841 add x25,x25,x16 // h+=Sigma1(e) 842 eor x28,x28,x27 // Maj(a,b,c) 843 eor x17,x5,x26,ror#39 // Sigma0(a) 844 eor x3,x3,x12,lsr#6 // sigma1(X[i+14]) 845 add x14,x14,x7 846 add x21,x21,x25 // d+=h 847 add x25,x25,x28 // h+=Maj(a,b,c) 848 ldr x28,[x30],#8 // *K++, x19 in next round 849 add x14,x14,x4 850 add x25,x25,x17 // h+=Sigma0(a) 851 add x14,x14,x3 852 ldr x3,[sp,#0] 853 str x6,[sp,#24] 854 ror x16,x21,#14 855 add x24,x24,x28 // h+=K[i] 856 ror x5,x0,#1 857 and x17,x22,x21 858 ror x4,x13,#19 859 bic x28,x23,x21 860 ror x6,x25,#28 861 add x24,x24,x14 // h+=X[i] 862 eor x16,x16,x21,ror#18 863 eor x5,x5,x0,ror#8 864 orr x17,x17,x28 // Ch(e,f,g) 865 eor x28,x25,x26 // a^b, b^c in next round 866 eor x16,x16,x21,ror#41 // Sigma1(e) 867 eor x6,x6,x25,ror#34 868 add x24,x24,x17 // h+=Ch(e,f,g) 869 and x19,x19,x28 // (b^c)&=(a^b) 870 eor x4,x4,x13,ror#61 871 eor x5,x5,x0,lsr#7 // sigma0(X[i+1]) 872 add x24,x24,x16 // h+=Sigma1(e) 873 eor x19,x19,x26 // Maj(a,b,c) 874 eor x17,x6,x25,ror#39 // Sigma0(a) 875 eor x4,x4,x13,lsr#6 // sigma1(X[i+14]) 876 add x15,x15,x8 877 add x20,x20,x24 // d+=h 878 add x24,x24,x19 // h+=Maj(a,b,c) 879 ldr x19,[x30],#8 // *K++, x28 in next round 880 add x15,x15,x5 881 add x24,x24,x17 // h+=Sigma0(a) 882 add x15,x15,x4 883 ldr x4,[sp,#8] 884 str x7,[sp,#0] 885 ror x16,x20,#14 886 add x23,x23,x19 // h+=K[i] 887 ror x6,x1,#1 888 and x17,x21,x20 889 ror x5,x14,#19 890 bic x19,x22,x20 891 ror x7,x24,#28 892 add x23,x23,x15 // h+=X[i] 893 eor x16,x16,x20,ror#18 894 eor x6,x6,x1,ror#8 895 orr x17,x17,x19 // Ch(e,f,g) 896 eor x19,x24,x25 // a^b, b^c in next round 897 eor x16,x16,x20,ror#41 // Sigma1(e) 898 eor x7,x7,x24,ror#34 899 add x23,x23,x17 // h+=Ch(e,f,g) 900 and x28,x28,x19 // (b^c)&=(a^b) 901 eor x5,x5,x14,ror#61 902 eor x6,x6,x1,lsr#7 // sigma0(X[i+1]) 903 add x23,x23,x16 // h+=Sigma1(e) 904 eor x28,x28,x25 // Maj(a,b,c) 905 eor x17,x7,x24,ror#39 // Sigma0(a) 906 eor x5,x5,x14,lsr#6 // sigma1(X[i+14]) 907 add x0,x0,x9 908 add x27,x27,x23 // d+=h 909 add x23,x23,x28 // h+=Maj(a,b,c) 910 ldr x28,[x30],#8 // *K++, x19 in next round 911 add x0,x0,x6 912 add x23,x23,x17 // h+=Sigma0(a) 913 add x0,x0,x5 914 ldr x5,[sp,#16] 915 str x8,[sp,#8] 916 ror x16,x27,#14 917 add x22,x22,x28 // h+=K[i] 918 ror x7,x2,#1 919 and x17,x20,x27 920 ror x6,x15,#19 921 bic x28,x21,x27 922 ror x8,x23,#28 923 add x22,x22,x0 // h+=X[i] 924 eor x16,x16,x27,ror#18 925 eor x7,x7,x2,ror#8 926 orr x17,x17,x28 // Ch(e,f,g) 927 eor x28,x23,x24 // a^b, b^c in next round 928 eor x16,x16,x27,ror#41 // Sigma1(e) 929 eor x8,x8,x23,ror#34 930 add x22,x22,x17 // h+=Ch(e,f,g) 931 and x19,x19,x28 // (b^c)&=(a^b) 932 eor x6,x6,x15,ror#61 933 eor x7,x7,x2,lsr#7 // sigma0(X[i+1]) 934 add x22,x22,x16 // h+=Sigma1(e) 935 eor x19,x19,x24 // Maj(a,b,c) 936 eor x17,x8,x23,ror#39 // Sigma0(a) 937 eor x6,x6,x15,lsr#6 // sigma1(X[i+14]) 938 add x1,x1,x10 939 add x26,x26,x22 // d+=h 940 add x22,x22,x19 // h+=Maj(a,b,c) 941 ldr x19,[x30],#8 // *K++, x28 in next round 942 add x1,x1,x7 943 add x22,x22,x17 // h+=Sigma0(a) 944 add x1,x1,x6 945 ldr x6,[sp,#24] 946 str x9,[sp,#16] 947 ror x16,x26,#14 948 add x21,x21,x19 // h+=K[i] 949 ror x8,x3,#1 950 and x17,x27,x26 951 ror x7,x0,#19 952 bic x19,x20,x26 953 ror x9,x22,#28 954 add x21,x21,x1 // h+=X[i] 955 eor x16,x16,x26,ror#18 956 eor x8,x8,x3,ror#8 957 orr x17,x17,x19 // Ch(e,f,g) 958 eor x19,x22,x23 // a^b, b^c in next round 959 eor x16,x16,x26,ror#41 // Sigma1(e) 960 eor x9,x9,x22,ror#34 961 add x21,x21,x17 // h+=Ch(e,f,g) 962 and x28,x28,x19 // (b^c)&=(a^b) 963 eor x7,x7,x0,ror#61 964 eor x8,x8,x3,lsr#7 // sigma0(X[i+1]) 965 add x21,x21,x16 // h+=Sigma1(e) 966 eor x28,x28,x23 // Maj(a,b,c) 967 eor x17,x9,x22,ror#39 // Sigma0(a) 968 eor x7,x7,x0,lsr#6 // sigma1(X[i+14]) 969 add x2,x2,x11 970 add x25,x25,x21 // d+=h 971 add x21,x21,x28 // h+=Maj(a,b,c) 972 ldr x28,[x30],#8 // *K++, x19 in next round 973 add x2,x2,x8 974 add x21,x21,x17 // h+=Sigma0(a) 975 add x2,x2,x7 976 ldr x7,[sp,#0] 977 str x10,[sp,#24] 978 ror x16,x25,#14 979 add x20,x20,x28 // h+=K[i] 980 ror x9,x4,#1 981 and x17,x26,x25 982 ror x8,x1,#19 983 bic x28,x27,x25 984 ror x10,x21,#28 985 add x20,x20,x2 // h+=X[i] 986 eor x16,x16,x25,ror#18 987 eor x9,x9,x4,ror#8 988 orr x17,x17,x28 // Ch(e,f,g) 989 eor x28,x21,x22 // a^b, b^c in next round 990 eor x16,x16,x25,ror#41 // Sigma1(e) 991 eor x10,x10,x21,ror#34 992 add x20,x20,x17 // h+=Ch(e,f,g) 993 and x19,x19,x28 // (b^c)&=(a^b) 994 eor x8,x8,x1,ror#61 995 eor x9,x9,x4,lsr#7 // sigma0(X[i+1]) 996 add x20,x20,x16 // h+=Sigma1(e) 997 eor x19,x19,x22 // Maj(a,b,c) 998 eor x17,x10,x21,ror#39 // Sigma0(a) 999 eor x8,x8,x1,lsr#6 // sigma1(X[i+14]) 1000 add x3,x3,x12 1001 add x24,x24,x20 // d+=h 1002 add x20,x20,x19 // h+=Maj(a,b,c) 1003 ldr x19,[x30],#8 // *K++, x28 in next round 1004 add x3,x3,x9 1005 add x20,x20,x17 // h+=Sigma0(a) 1006 add x3,x3,x8 1007 cbnz x19,Loop_16_xx 1008 1009 ldp x0,x2,[x29,#96] 1010 ldr x1,[x29,#112] 1011 sub x30,x30,#648 // rewind 1012 1013 ldp x3,x4,[x0] 1014 ldp x5,x6,[x0,#2*8] 1015 add x1,x1,#14*8 // advance input pointer 1016 ldp x7,x8,[x0,#4*8] 1017 add x20,x20,x3 1018 ldp x9,x10,[x0,#6*8] 1019 add x21,x21,x4 1020 add x22,x22,x5 1021 add x23,x23,x6 1022 stp x20,x21,[x0] 1023 add x24,x24,x7 1024 add x25,x25,x8 1025 stp x22,x23,[x0,#2*8] 1026 add x26,x26,x9 1027 add x27,x27,x10 1028 cmp x1,x2 1029 stp x24,x25,[x0,#4*8] 1030 stp x26,x27,[x0,#6*8] 1031 b.ne Loop 1032 1033 ldp x19,x20,[x29,#16] 1034 add sp,sp,#4*8 1035 ldp x21,x22,[x29,#32] 1036 ldp x23,x24,[x29,#48] 1037 ldp x25,x26,[x29,#64] 1038 ldp x27,x28,[x29,#80] 1039 ldp x29,x30,[sp],#128 1040 AARCH64_VALIDATE_LINK_REGISTER 1041 ret 1042 1043 1044.section __TEXT,__const 1045.align 6 1046 1047LK512: 1048.quad 0x428a2f98d728ae22,0x7137449123ef65cd 1049.quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc 1050.quad 0x3956c25bf348b538,0x59f111f1b605d019 1051.quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118 1052.quad 0xd807aa98a3030242,0x12835b0145706fbe 1053.quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2 1054.quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1 1055.quad 0x9bdc06a725c71235,0xc19bf174cf692694 1056.quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3 1057.quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65 1058.quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483 1059.quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5 1060.quad 0x983e5152ee66dfab,0xa831c66d2db43210 1061.quad 0xb00327c898fb213f,0xbf597fc7beef0ee4 1062.quad 0xc6e00bf33da88fc2,0xd5a79147930aa725 1063.quad 0x06ca6351e003826f,0x142929670a0e6e70 1064.quad 0x27b70a8546d22ffc,0x2e1b21385c26c926 1065.quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df 1066.quad 0x650a73548baf63de,0x766a0abb3c77b2a8 1067.quad 0x81c2c92e47edaee6,0x92722c851482353b 1068.quad 0xa2bfe8a14cf10364,0xa81a664bbc423001 1069.quad 0xc24b8b70d0f89791,0xc76c51a30654be30 1070.quad 0xd192e819d6ef5218,0xd69906245565a910 1071.quad 0xf40e35855771202a,0x106aa07032bbd1b8 1072.quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53 1073.quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8 1074.quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb 1075.quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3 1076.quad 0x748f82ee5defb2fc,0x78a5636f43172f60 1077.quad 0x84c87814a1f0ab72,0x8cc702081a6439ec 1078.quad 0x90befffa23631e28,0xa4506cebde82bde9 1079.quad 0xbef9a3f7b2c67915,0xc67178f2e372532b 1080.quad 0xca273eceea26619c,0xd186b8c721c0c207 1081.quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178 1082.quad 0x06f067aa72176fba,0x0a637dc5a2c898a6 1083.quad 0x113f9804bef90dae,0x1b710b35131c471b 1084.quad 0x28db77f523047d84,0x32caab7b40c72493 1085.quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c 1086.quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a 1087.quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817 1088.quad 0 // terminator 1089 1090.byte 83,72,65,53,49,50,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 1091.align 2 1092.align 2 1093.text 1094#ifndef __KERNEL__ 1095 1096.align 6 1097sha512_block_armv8: 1098Lv8_entry: 1099 stp x29,x30,[sp,#-16]! 1100 add x29,sp,#0 1101 1102 ld1 {v16.16b,v17.16b,v18.16b,v19.16b},[x1],#64 // load input 1103 ld1 {v20.16b,v21.16b,v22.16b,v23.16b},[x1],#64 1104 1105 ld1 {v0.2d,v1.2d,v2.2d,v3.2d},[x0] // load context 1106 adrp x3,LK512@PAGE 1107 add x3,x3,LK512@PAGEOFF 1108 1109 rev64 v16.16b,v16.16b 1110 rev64 v17.16b,v17.16b 1111 rev64 v18.16b,v18.16b 1112 rev64 v19.16b,v19.16b 1113 rev64 v20.16b,v20.16b 1114 rev64 v21.16b,v21.16b 1115 rev64 v22.16b,v22.16b 1116 rev64 v23.16b,v23.16b 1117 b Loop_hw 1118 1119.align 4 1120Loop_hw: 1121 ld1 {v24.2d},[x3],#16 1122 subs x2,x2,#1 1123 sub x4,x1,#128 1124 orr v26.16b,v0.16b,v0.16b // offload 1125 orr v27.16b,v1.16b,v1.16b 1126 orr v28.16b,v2.16b,v2.16b 1127 orr v29.16b,v3.16b,v3.16b 1128 csel x1,x1,x4,ne // conditional rewind 1129 add v24.2d,v24.2d,v16.2d 1130 ld1 {v25.2d},[x3],#16 1131 ext v24.16b,v24.16b,v24.16b,#8 1132 ext v5.16b,v2.16b,v3.16b,#8 1133 ext v6.16b,v1.16b,v2.16b,#8 1134 add v3.2d,v3.2d,v24.2d // "T1 + H + K512[i]" 1135.long 0xcec08230 //sha512su0 v16.16b,v17.16b 1136 ext v7.16b,v20.16b,v21.16b,#8 1137.long 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b 1138.long 0xce678af0 //sha512su1 v16.16b,v23.16b,v7.16b 1139 add v4.2d,v1.2d,v3.2d // "D + T1" 1140.long 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b 1141 add v25.2d,v25.2d,v17.2d 1142 ld1 {v24.2d},[x3],#16 1143 ext v25.16b,v25.16b,v25.16b,#8 1144 ext v5.16b,v4.16b,v2.16b,#8 1145 ext v6.16b,v0.16b,v4.16b,#8 1146 add v2.2d,v2.2d,v25.2d // "T1 + H + K512[i]" 1147.long 0xcec08251 //sha512su0 v17.16b,v18.16b 1148 ext v7.16b,v21.16b,v22.16b,#8 1149.long 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b 1150.long 0xce678a11 //sha512su1 v17.16b,v16.16b,v7.16b 1151 add v1.2d,v0.2d,v2.2d // "D + T1" 1152.long 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b 1153 add v24.2d,v24.2d,v18.2d 1154 ld1 {v25.2d},[x3],#16 1155 ext v24.16b,v24.16b,v24.16b,#8 1156 ext v5.16b,v1.16b,v4.16b,#8 1157 ext v6.16b,v3.16b,v1.16b,#8 1158 add v4.2d,v4.2d,v24.2d // "T1 + H + K512[i]" 1159.long 0xcec08272 //sha512su0 v18.16b,v19.16b 1160 ext v7.16b,v22.16b,v23.16b,#8 1161.long 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b 1162.long 0xce678a32 //sha512su1 v18.16b,v17.16b,v7.16b 1163 add v0.2d,v3.2d,v4.2d // "D + T1" 1164.long 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b 1165 add v25.2d,v25.2d,v19.2d 1166 ld1 {v24.2d},[x3],#16 1167 ext v25.16b,v25.16b,v25.16b,#8 1168 ext v5.16b,v0.16b,v1.16b,#8 1169 ext v6.16b,v2.16b,v0.16b,#8 1170 add v1.2d,v1.2d,v25.2d // "T1 + H + K512[i]" 1171.long 0xcec08293 //sha512su0 v19.16b,v20.16b 1172 ext v7.16b,v23.16b,v16.16b,#8 1173.long 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b 1174.long 0xce678a53 //sha512su1 v19.16b,v18.16b,v7.16b 1175 add v3.2d,v2.2d,v1.2d // "D + T1" 1176.long 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b 1177 add v24.2d,v24.2d,v20.2d 1178 ld1 {v25.2d},[x3],#16 1179 ext v24.16b,v24.16b,v24.16b,#8 1180 ext v5.16b,v3.16b,v0.16b,#8 1181 ext v6.16b,v4.16b,v3.16b,#8 1182 add v0.2d,v0.2d,v24.2d // "T1 + H + K512[i]" 1183.long 0xcec082b4 //sha512su0 v20.16b,v21.16b 1184 ext v7.16b,v16.16b,v17.16b,#8 1185.long 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b 1186.long 0xce678a74 //sha512su1 v20.16b,v19.16b,v7.16b 1187 add v2.2d,v4.2d,v0.2d // "D + T1" 1188.long 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b 1189 add v25.2d,v25.2d,v21.2d 1190 ld1 {v24.2d},[x3],#16 1191 ext v25.16b,v25.16b,v25.16b,#8 1192 ext v5.16b,v2.16b,v3.16b,#8 1193 ext v6.16b,v1.16b,v2.16b,#8 1194 add v3.2d,v3.2d,v25.2d // "T1 + H + K512[i]" 1195.long 0xcec082d5 //sha512su0 v21.16b,v22.16b 1196 ext v7.16b,v17.16b,v18.16b,#8 1197.long 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b 1198.long 0xce678a95 //sha512su1 v21.16b,v20.16b,v7.16b 1199 add v4.2d,v1.2d,v3.2d // "D + T1" 1200.long 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b 1201 add v24.2d,v24.2d,v22.2d 1202 ld1 {v25.2d},[x3],#16 1203 ext v24.16b,v24.16b,v24.16b,#8 1204 ext v5.16b,v4.16b,v2.16b,#8 1205 ext v6.16b,v0.16b,v4.16b,#8 1206 add v2.2d,v2.2d,v24.2d // "T1 + H + K512[i]" 1207.long 0xcec082f6 //sha512su0 v22.16b,v23.16b 1208 ext v7.16b,v18.16b,v19.16b,#8 1209.long 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b 1210.long 0xce678ab6 //sha512su1 v22.16b,v21.16b,v7.16b 1211 add v1.2d,v0.2d,v2.2d // "D + T1" 1212.long 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b 1213 add v25.2d,v25.2d,v23.2d 1214 ld1 {v24.2d},[x3],#16 1215 ext v25.16b,v25.16b,v25.16b,#8 1216 ext v5.16b,v1.16b,v4.16b,#8 1217 ext v6.16b,v3.16b,v1.16b,#8 1218 add v4.2d,v4.2d,v25.2d // "T1 + H + K512[i]" 1219.long 0xcec08217 //sha512su0 v23.16b,v16.16b 1220 ext v7.16b,v19.16b,v20.16b,#8 1221.long 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b 1222.long 0xce678ad7 //sha512su1 v23.16b,v22.16b,v7.16b 1223 add v0.2d,v3.2d,v4.2d // "D + T1" 1224.long 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b 1225 add v24.2d,v24.2d,v16.2d 1226 ld1 {v25.2d},[x3],#16 1227 ext v24.16b,v24.16b,v24.16b,#8 1228 ext v5.16b,v0.16b,v1.16b,#8 1229 ext v6.16b,v2.16b,v0.16b,#8 1230 add v1.2d,v1.2d,v24.2d // "T1 + H + K512[i]" 1231.long 0xcec08230 //sha512su0 v16.16b,v17.16b 1232 ext v7.16b,v20.16b,v21.16b,#8 1233.long 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b 1234.long 0xce678af0 //sha512su1 v16.16b,v23.16b,v7.16b 1235 add v3.2d,v2.2d,v1.2d // "D + T1" 1236.long 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b 1237 add v25.2d,v25.2d,v17.2d 1238 ld1 {v24.2d},[x3],#16 1239 ext v25.16b,v25.16b,v25.16b,#8 1240 ext v5.16b,v3.16b,v0.16b,#8 1241 ext v6.16b,v4.16b,v3.16b,#8 1242 add v0.2d,v0.2d,v25.2d // "T1 + H + K512[i]" 1243.long 0xcec08251 //sha512su0 v17.16b,v18.16b 1244 ext v7.16b,v21.16b,v22.16b,#8 1245.long 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b 1246.long 0xce678a11 //sha512su1 v17.16b,v16.16b,v7.16b 1247 add v2.2d,v4.2d,v0.2d // "D + T1" 1248.long 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b 1249 add v24.2d,v24.2d,v18.2d 1250 ld1 {v25.2d},[x3],#16 1251 ext v24.16b,v24.16b,v24.16b,#8 1252 ext v5.16b,v2.16b,v3.16b,#8 1253 ext v6.16b,v1.16b,v2.16b,#8 1254 add v3.2d,v3.2d,v24.2d // "T1 + H + K512[i]" 1255.long 0xcec08272 //sha512su0 v18.16b,v19.16b 1256 ext v7.16b,v22.16b,v23.16b,#8 1257.long 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b 1258.long 0xce678a32 //sha512su1 v18.16b,v17.16b,v7.16b 1259 add v4.2d,v1.2d,v3.2d // "D + T1" 1260.long 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b 1261 add v25.2d,v25.2d,v19.2d 1262 ld1 {v24.2d},[x3],#16 1263 ext v25.16b,v25.16b,v25.16b,#8 1264 ext v5.16b,v4.16b,v2.16b,#8 1265 ext v6.16b,v0.16b,v4.16b,#8 1266 add v2.2d,v2.2d,v25.2d // "T1 + H + K512[i]" 1267.long 0xcec08293 //sha512su0 v19.16b,v20.16b 1268 ext v7.16b,v23.16b,v16.16b,#8 1269.long 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b 1270.long 0xce678a53 //sha512su1 v19.16b,v18.16b,v7.16b 1271 add v1.2d,v0.2d,v2.2d // "D + T1" 1272.long 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b 1273 add v24.2d,v24.2d,v20.2d 1274 ld1 {v25.2d},[x3],#16 1275 ext v24.16b,v24.16b,v24.16b,#8 1276 ext v5.16b,v1.16b,v4.16b,#8 1277 ext v6.16b,v3.16b,v1.16b,#8 1278 add v4.2d,v4.2d,v24.2d // "T1 + H + K512[i]" 1279.long 0xcec082b4 //sha512su0 v20.16b,v21.16b 1280 ext v7.16b,v16.16b,v17.16b,#8 1281.long 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b 1282.long 0xce678a74 //sha512su1 v20.16b,v19.16b,v7.16b 1283 add v0.2d,v3.2d,v4.2d // "D + T1" 1284.long 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b 1285 add v25.2d,v25.2d,v21.2d 1286 ld1 {v24.2d},[x3],#16 1287 ext v25.16b,v25.16b,v25.16b,#8 1288 ext v5.16b,v0.16b,v1.16b,#8 1289 ext v6.16b,v2.16b,v0.16b,#8 1290 add v1.2d,v1.2d,v25.2d // "T1 + H + K512[i]" 1291.long 0xcec082d5 //sha512su0 v21.16b,v22.16b 1292 ext v7.16b,v17.16b,v18.16b,#8 1293.long 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b 1294.long 0xce678a95 //sha512su1 v21.16b,v20.16b,v7.16b 1295 add v3.2d,v2.2d,v1.2d // "D + T1" 1296.long 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b 1297 add v24.2d,v24.2d,v22.2d 1298 ld1 {v25.2d},[x3],#16 1299 ext v24.16b,v24.16b,v24.16b,#8 1300 ext v5.16b,v3.16b,v0.16b,#8 1301 ext v6.16b,v4.16b,v3.16b,#8 1302 add v0.2d,v0.2d,v24.2d // "T1 + H + K512[i]" 1303.long 0xcec082f6 //sha512su0 v22.16b,v23.16b 1304 ext v7.16b,v18.16b,v19.16b,#8 1305.long 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b 1306.long 0xce678ab6 //sha512su1 v22.16b,v21.16b,v7.16b 1307 add v2.2d,v4.2d,v0.2d // "D + T1" 1308.long 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b 1309 add v25.2d,v25.2d,v23.2d 1310 ld1 {v24.2d},[x3],#16 1311 ext v25.16b,v25.16b,v25.16b,#8 1312 ext v5.16b,v2.16b,v3.16b,#8 1313 ext v6.16b,v1.16b,v2.16b,#8 1314 add v3.2d,v3.2d,v25.2d // "T1 + H + K512[i]" 1315.long 0xcec08217 //sha512su0 v23.16b,v16.16b 1316 ext v7.16b,v19.16b,v20.16b,#8 1317.long 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b 1318.long 0xce678ad7 //sha512su1 v23.16b,v22.16b,v7.16b 1319 add v4.2d,v1.2d,v3.2d // "D + T1" 1320.long 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b 1321 add v24.2d,v24.2d,v16.2d 1322 ld1 {v25.2d},[x3],#16 1323 ext v24.16b,v24.16b,v24.16b,#8 1324 ext v5.16b,v4.16b,v2.16b,#8 1325 ext v6.16b,v0.16b,v4.16b,#8 1326 add v2.2d,v2.2d,v24.2d // "T1 + H + K512[i]" 1327.long 0xcec08230 //sha512su0 v16.16b,v17.16b 1328 ext v7.16b,v20.16b,v21.16b,#8 1329.long 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b 1330.long 0xce678af0 //sha512su1 v16.16b,v23.16b,v7.16b 1331 add v1.2d,v0.2d,v2.2d // "D + T1" 1332.long 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b 1333 add v25.2d,v25.2d,v17.2d 1334 ld1 {v24.2d},[x3],#16 1335 ext v25.16b,v25.16b,v25.16b,#8 1336 ext v5.16b,v1.16b,v4.16b,#8 1337 ext v6.16b,v3.16b,v1.16b,#8 1338 add v4.2d,v4.2d,v25.2d // "T1 + H + K512[i]" 1339.long 0xcec08251 //sha512su0 v17.16b,v18.16b 1340 ext v7.16b,v21.16b,v22.16b,#8 1341.long 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b 1342.long 0xce678a11 //sha512su1 v17.16b,v16.16b,v7.16b 1343 add v0.2d,v3.2d,v4.2d // "D + T1" 1344.long 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b 1345 add v24.2d,v24.2d,v18.2d 1346 ld1 {v25.2d},[x3],#16 1347 ext v24.16b,v24.16b,v24.16b,#8 1348 ext v5.16b,v0.16b,v1.16b,#8 1349 ext v6.16b,v2.16b,v0.16b,#8 1350 add v1.2d,v1.2d,v24.2d // "T1 + H + K512[i]" 1351.long 0xcec08272 //sha512su0 v18.16b,v19.16b 1352 ext v7.16b,v22.16b,v23.16b,#8 1353.long 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b 1354.long 0xce678a32 //sha512su1 v18.16b,v17.16b,v7.16b 1355 add v3.2d,v2.2d,v1.2d // "D + T1" 1356.long 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b 1357 add v25.2d,v25.2d,v19.2d 1358 ld1 {v24.2d},[x3],#16 1359 ext v25.16b,v25.16b,v25.16b,#8 1360 ext v5.16b,v3.16b,v0.16b,#8 1361 ext v6.16b,v4.16b,v3.16b,#8 1362 add v0.2d,v0.2d,v25.2d // "T1 + H + K512[i]" 1363.long 0xcec08293 //sha512su0 v19.16b,v20.16b 1364 ext v7.16b,v23.16b,v16.16b,#8 1365.long 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b 1366.long 0xce678a53 //sha512su1 v19.16b,v18.16b,v7.16b 1367 add v2.2d,v4.2d,v0.2d // "D + T1" 1368.long 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b 1369 add v24.2d,v24.2d,v20.2d 1370 ld1 {v25.2d},[x3],#16 1371 ext v24.16b,v24.16b,v24.16b,#8 1372 ext v5.16b,v2.16b,v3.16b,#8 1373 ext v6.16b,v1.16b,v2.16b,#8 1374 add v3.2d,v3.2d,v24.2d // "T1 + H + K512[i]" 1375.long 0xcec082b4 //sha512su0 v20.16b,v21.16b 1376 ext v7.16b,v16.16b,v17.16b,#8 1377.long 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b 1378.long 0xce678a74 //sha512su1 v20.16b,v19.16b,v7.16b 1379 add v4.2d,v1.2d,v3.2d // "D + T1" 1380.long 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b 1381 add v25.2d,v25.2d,v21.2d 1382 ld1 {v24.2d},[x3],#16 1383 ext v25.16b,v25.16b,v25.16b,#8 1384 ext v5.16b,v4.16b,v2.16b,#8 1385 ext v6.16b,v0.16b,v4.16b,#8 1386 add v2.2d,v2.2d,v25.2d // "T1 + H + K512[i]" 1387.long 0xcec082d5 //sha512su0 v21.16b,v22.16b 1388 ext v7.16b,v17.16b,v18.16b,#8 1389.long 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b 1390.long 0xce678a95 //sha512su1 v21.16b,v20.16b,v7.16b 1391 add v1.2d,v0.2d,v2.2d // "D + T1" 1392.long 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b 1393 add v24.2d,v24.2d,v22.2d 1394 ld1 {v25.2d},[x3],#16 1395 ext v24.16b,v24.16b,v24.16b,#8 1396 ext v5.16b,v1.16b,v4.16b,#8 1397 ext v6.16b,v3.16b,v1.16b,#8 1398 add v4.2d,v4.2d,v24.2d // "T1 + H + K512[i]" 1399.long 0xcec082f6 //sha512su0 v22.16b,v23.16b 1400 ext v7.16b,v18.16b,v19.16b,#8 1401.long 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b 1402.long 0xce678ab6 //sha512su1 v22.16b,v21.16b,v7.16b 1403 add v0.2d,v3.2d,v4.2d // "D + T1" 1404.long 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b 1405 add v25.2d,v25.2d,v23.2d 1406 ld1 {v24.2d},[x3],#16 1407 ext v25.16b,v25.16b,v25.16b,#8 1408 ext v5.16b,v0.16b,v1.16b,#8 1409 ext v6.16b,v2.16b,v0.16b,#8 1410 add v1.2d,v1.2d,v25.2d // "T1 + H + K512[i]" 1411.long 0xcec08217 //sha512su0 v23.16b,v16.16b 1412 ext v7.16b,v19.16b,v20.16b,#8 1413.long 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b 1414.long 0xce678ad7 //sha512su1 v23.16b,v22.16b,v7.16b 1415 add v3.2d,v2.2d,v1.2d // "D + T1" 1416.long 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b 1417 add v24.2d,v24.2d,v16.2d 1418 ld1 {v25.2d},[x3],#16 1419 ext v24.16b,v24.16b,v24.16b,#8 1420 ext v5.16b,v3.16b,v0.16b,#8 1421 ext v6.16b,v4.16b,v3.16b,#8 1422 add v0.2d,v0.2d,v24.2d // "T1 + H + K512[i]" 1423.long 0xcec08230 //sha512su0 v16.16b,v17.16b 1424 ext v7.16b,v20.16b,v21.16b,#8 1425.long 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b 1426.long 0xce678af0 //sha512su1 v16.16b,v23.16b,v7.16b 1427 add v2.2d,v4.2d,v0.2d // "D + T1" 1428.long 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b 1429 add v25.2d,v25.2d,v17.2d 1430 ld1 {v24.2d},[x3],#16 1431 ext v25.16b,v25.16b,v25.16b,#8 1432 ext v5.16b,v2.16b,v3.16b,#8 1433 ext v6.16b,v1.16b,v2.16b,#8 1434 add v3.2d,v3.2d,v25.2d // "T1 + H + K512[i]" 1435.long 0xcec08251 //sha512su0 v17.16b,v18.16b 1436 ext v7.16b,v21.16b,v22.16b,#8 1437.long 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b 1438.long 0xce678a11 //sha512su1 v17.16b,v16.16b,v7.16b 1439 add v4.2d,v1.2d,v3.2d // "D + T1" 1440.long 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b 1441 add v24.2d,v24.2d,v18.2d 1442 ld1 {v25.2d},[x3],#16 1443 ext v24.16b,v24.16b,v24.16b,#8 1444 ext v5.16b,v4.16b,v2.16b,#8 1445 ext v6.16b,v0.16b,v4.16b,#8 1446 add v2.2d,v2.2d,v24.2d // "T1 + H + K512[i]" 1447.long 0xcec08272 //sha512su0 v18.16b,v19.16b 1448 ext v7.16b,v22.16b,v23.16b,#8 1449.long 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b 1450.long 0xce678a32 //sha512su1 v18.16b,v17.16b,v7.16b 1451 add v1.2d,v0.2d,v2.2d // "D + T1" 1452.long 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b 1453 add v25.2d,v25.2d,v19.2d 1454 ld1 {v24.2d},[x3],#16 1455 ext v25.16b,v25.16b,v25.16b,#8 1456 ext v5.16b,v1.16b,v4.16b,#8 1457 ext v6.16b,v3.16b,v1.16b,#8 1458 add v4.2d,v4.2d,v25.2d // "T1 + H + K512[i]" 1459.long 0xcec08293 //sha512su0 v19.16b,v20.16b 1460 ext v7.16b,v23.16b,v16.16b,#8 1461.long 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b 1462.long 0xce678a53 //sha512su1 v19.16b,v18.16b,v7.16b 1463 add v0.2d,v3.2d,v4.2d // "D + T1" 1464.long 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b 1465 add v24.2d,v24.2d,v20.2d 1466 ld1 {v25.2d},[x3],#16 1467 ext v24.16b,v24.16b,v24.16b,#8 1468 ext v5.16b,v0.16b,v1.16b,#8 1469 ext v6.16b,v2.16b,v0.16b,#8 1470 add v1.2d,v1.2d,v24.2d // "T1 + H + K512[i]" 1471.long 0xcec082b4 //sha512su0 v20.16b,v21.16b 1472 ext v7.16b,v16.16b,v17.16b,#8 1473.long 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b 1474.long 0xce678a74 //sha512su1 v20.16b,v19.16b,v7.16b 1475 add v3.2d,v2.2d,v1.2d // "D + T1" 1476.long 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b 1477 add v25.2d,v25.2d,v21.2d 1478 ld1 {v24.2d},[x3],#16 1479 ext v25.16b,v25.16b,v25.16b,#8 1480 ext v5.16b,v3.16b,v0.16b,#8 1481 ext v6.16b,v4.16b,v3.16b,#8 1482 add v0.2d,v0.2d,v25.2d // "T1 + H + K512[i]" 1483.long 0xcec082d5 //sha512su0 v21.16b,v22.16b 1484 ext v7.16b,v17.16b,v18.16b,#8 1485.long 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b 1486.long 0xce678a95 //sha512su1 v21.16b,v20.16b,v7.16b 1487 add v2.2d,v4.2d,v0.2d // "D + T1" 1488.long 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b 1489 add v24.2d,v24.2d,v22.2d 1490 ld1 {v25.2d},[x3],#16 1491 ext v24.16b,v24.16b,v24.16b,#8 1492 ext v5.16b,v2.16b,v3.16b,#8 1493 ext v6.16b,v1.16b,v2.16b,#8 1494 add v3.2d,v3.2d,v24.2d // "T1 + H + K512[i]" 1495.long 0xcec082f6 //sha512su0 v22.16b,v23.16b 1496 ext v7.16b,v18.16b,v19.16b,#8 1497.long 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b 1498.long 0xce678ab6 //sha512su1 v22.16b,v21.16b,v7.16b 1499 add v4.2d,v1.2d,v3.2d // "D + T1" 1500.long 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b 1501 add v25.2d,v25.2d,v23.2d 1502 ld1 {v24.2d},[x3],#16 1503 ext v25.16b,v25.16b,v25.16b,#8 1504 ext v5.16b,v4.16b,v2.16b,#8 1505 ext v6.16b,v0.16b,v4.16b,#8 1506 add v2.2d,v2.2d,v25.2d // "T1 + H + K512[i]" 1507.long 0xcec08217 //sha512su0 v23.16b,v16.16b 1508 ext v7.16b,v19.16b,v20.16b,#8 1509.long 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b 1510.long 0xce678ad7 //sha512su1 v23.16b,v22.16b,v7.16b 1511 add v1.2d,v0.2d,v2.2d // "D + T1" 1512.long 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b 1513 ld1 {v25.2d},[x3],#16 1514 add v24.2d,v24.2d,v16.2d 1515 ld1 {v16.16b},[x1],#16 // load next input 1516 ext v24.16b,v24.16b,v24.16b,#8 1517 ext v5.16b,v1.16b,v4.16b,#8 1518 ext v6.16b,v3.16b,v1.16b,#8 1519 add v4.2d,v4.2d,v24.2d // "T1 + H + K512[i]" 1520.long 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b 1521 rev64 v16.16b,v16.16b 1522 add v0.2d,v3.2d,v4.2d // "D + T1" 1523.long 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b 1524 ld1 {v24.2d},[x3],#16 1525 add v25.2d,v25.2d,v17.2d 1526 ld1 {v17.16b},[x1],#16 // load next input 1527 ext v25.16b,v25.16b,v25.16b,#8 1528 ext v5.16b,v0.16b,v1.16b,#8 1529 ext v6.16b,v2.16b,v0.16b,#8 1530 add v1.2d,v1.2d,v25.2d // "T1 + H + K512[i]" 1531.long 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b 1532 rev64 v17.16b,v17.16b 1533 add v3.2d,v2.2d,v1.2d // "D + T1" 1534.long 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b 1535 ld1 {v25.2d},[x3],#16 1536 add v24.2d,v24.2d,v18.2d 1537 ld1 {v18.16b},[x1],#16 // load next input 1538 ext v24.16b,v24.16b,v24.16b,#8 1539 ext v5.16b,v3.16b,v0.16b,#8 1540 ext v6.16b,v4.16b,v3.16b,#8 1541 add v0.2d,v0.2d,v24.2d // "T1 + H + K512[i]" 1542.long 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b 1543 rev64 v18.16b,v18.16b 1544 add v2.2d,v4.2d,v0.2d // "D + T1" 1545.long 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b 1546 ld1 {v24.2d},[x3],#16 1547 add v25.2d,v25.2d,v19.2d 1548 ld1 {v19.16b},[x1],#16 // load next input 1549 ext v25.16b,v25.16b,v25.16b,#8 1550 ext v5.16b,v2.16b,v3.16b,#8 1551 ext v6.16b,v1.16b,v2.16b,#8 1552 add v3.2d,v3.2d,v25.2d // "T1 + H + K512[i]" 1553.long 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b 1554 rev64 v19.16b,v19.16b 1555 add v4.2d,v1.2d,v3.2d // "D + T1" 1556.long 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b 1557 ld1 {v25.2d},[x3],#16 1558 add v24.2d,v24.2d,v20.2d 1559 ld1 {v20.16b},[x1],#16 // load next input 1560 ext v24.16b,v24.16b,v24.16b,#8 1561 ext v5.16b,v4.16b,v2.16b,#8 1562 ext v6.16b,v0.16b,v4.16b,#8 1563 add v2.2d,v2.2d,v24.2d // "T1 + H + K512[i]" 1564.long 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b 1565 rev64 v20.16b,v20.16b 1566 add v1.2d,v0.2d,v2.2d // "D + T1" 1567.long 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b 1568 ld1 {v24.2d},[x3],#16 1569 add v25.2d,v25.2d,v21.2d 1570 ld1 {v21.16b},[x1],#16 // load next input 1571 ext v25.16b,v25.16b,v25.16b,#8 1572 ext v5.16b,v1.16b,v4.16b,#8 1573 ext v6.16b,v3.16b,v1.16b,#8 1574 add v4.2d,v4.2d,v25.2d // "T1 + H + K512[i]" 1575.long 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b 1576 rev64 v21.16b,v21.16b 1577 add v0.2d,v3.2d,v4.2d // "D + T1" 1578.long 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b 1579 ld1 {v25.2d},[x3],#16 1580 add v24.2d,v24.2d,v22.2d 1581 ld1 {v22.16b},[x1],#16 // load next input 1582 ext v24.16b,v24.16b,v24.16b,#8 1583 ext v5.16b,v0.16b,v1.16b,#8 1584 ext v6.16b,v2.16b,v0.16b,#8 1585 add v1.2d,v1.2d,v24.2d // "T1 + H + K512[i]" 1586.long 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b 1587 rev64 v22.16b,v22.16b 1588 add v3.2d,v2.2d,v1.2d // "D + T1" 1589.long 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b 1590 sub x3,x3,#80*8 // rewind 1591 add v25.2d,v25.2d,v23.2d 1592 ld1 {v23.16b},[x1],#16 // load next input 1593 ext v25.16b,v25.16b,v25.16b,#8 1594 ext v5.16b,v3.16b,v0.16b,#8 1595 ext v6.16b,v4.16b,v3.16b,#8 1596 add v0.2d,v0.2d,v25.2d // "T1 + H + K512[i]" 1597.long 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b 1598 rev64 v23.16b,v23.16b 1599 add v2.2d,v4.2d,v0.2d // "D + T1" 1600.long 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b 1601 add v0.2d,v0.2d,v26.2d // accumulate 1602 add v1.2d,v1.2d,v27.2d 1603 add v2.2d,v2.2d,v28.2d 1604 add v3.2d,v3.2d,v29.2d 1605 1606 cbnz x2,Loop_hw 1607 1608 st1 {v0.2d,v1.2d,v2.2d,v3.2d},[x0] // store context 1609 1610 ldr x29,[sp],#16 1611 ret 1612 1613#endif 1614#endif // !OPENSSL_NO_ASM 1615