1// This file is generated from a similarly-named Perl script in the BoringSSL 2// source tree. Do not edit by hand. 3 4#if !defined(__has_feature) 5#define __has_feature(x) 0 6#endif 7#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM) 8#define OPENSSL_NO_ASM 9#endif 10 11#if !defined(OPENSSL_NO_ASM) 12#if defined(__aarch64__) 13#if defined(BORINGSSL_PREFIX) 14#include <boringssl_prefix_symbols_asm.h> 15#endif 16// Copyright 2014-2020 The OpenSSL Project Authors. All Rights Reserved. 17// 18// Licensed under the OpenSSL license (the "License"). You may not use 19// this file except in compliance with the License. You can obtain a copy 20// in the file LICENSE in the source distribution or at 21// https://www.openssl.org/source/license.html 22 23// ==================================================================== 24// Written by Andy Polyakov <appro@openssl.org> for the OpenSSL 25// project. The module is, however, dual licensed under OpenSSL and 26// CRYPTOGAMS licenses depending on where you obtain it. For further 27// details see http://www.openssl.org/~appro/cryptogams/. 28// 29// Permission to use under GPLv2 terms is granted. 30// ==================================================================== 31// 32// SHA256/512 for ARMv8. 33// 34// Performance in cycles per processed byte and improvement coefficient 35// over code generated with "default" compiler: 36// 37// SHA256-hw SHA256(*) SHA512 38// Apple A7 1.97 10.5 (+33%) 6.73 (-1%(**)) 39// Cortex-A53 2.38 15.5 (+115%) 10.0 (+150%(***)) 40// Cortex-A57 2.31 11.6 (+86%) 7.51 (+260%(***)) 41// Denver 2.01 10.5 (+26%) 6.70 (+8%) 42// X-Gene 20.0 (+100%) 12.8 (+300%(***)) 43// Mongoose 2.36 13.0 (+50%) 8.36 (+33%) 44// Kryo 1.92 17.4 (+30%) 11.2 (+8%) 45// 46// (*) Software SHA256 results are of lesser relevance, presented 47// mostly for informational purposes. 48// (**) The result is a trade-off: it's possible to improve it by 49// 10% (or by 1 cycle per round), but at the cost of 20% loss 50// on Cortex-A53 (or by 4 cycles per round). 51// (***) Super-impressive coefficients over gcc-generated code are 52// indication of some compiler "pathology", most notably code 53// generated with -mgeneral-regs-only is significantly faster 54// and the gap is only 40-90%. 55 56#ifndef __KERNEL__ 57# include <openssl/arm_arch.h> 58#endif 59 60.text 61 62 63.hidden OPENSSL_armcap_P 64.globl sha512_block_data_order 65.hidden sha512_block_data_order 66.type sha512_block_data_order,%function 67.align 6 68sha512_block_data_order: 69 AARCH64_VALID_CALL_TARGET 70#ifndef __KERNEL__ 71#if __has_feature(hwaddress_sanitizer) && __clang_major__ >= 10 72 adrp x16,:pg_hi21_nc:OPENSSL_armcap_P 73#else 74 adrp x16,OPENSSL_armcap_P 75#endif 76 ldr w16,[x16,:lo12:OPENSSL_armcap_P] 77 tst w16,#ARMV8_SHA512 78 b.ne .Lv8_entry 79#endif 80 AARCH64_SIGN_LINK_REGISTER 81 stp x29,x30,[sp,#-128]! 82 add x29,sp,#0 83 84 stp x19,x20,[sp,#16] 85 stp x21,x22,[sp,#32] 86 stp x23,x24,[sp,#48] 87 stp x25,x26,[sp,#64] 88 stp x27,x28,[sp,#80] 89 sub sp,sp,#4*8 90 91 ldp x20,x21,[x0] // load context 92 ldp x22,x23,[x0,#2*8] 93 ldp x24,x25,[x0,#4*8] 94 add x2,x1,x2,lsl#7 // end of input 95 ldp x26,x27,[x0,#6*8] 96 adrp x30,.LK512 97 add x30,x30,:lo12:.LK512 98 stp x0,x2,[x29,#96] 99 100.Loop: 101 ldp x3,x4,[x1],#2*8 102 ldr x19,[x30],#8 // *K++ 103 eor x28,x21,x22 // magic seed 104 str x1,[x29,#112] 105#ifndef __AARCH64EB__ 106 rev x3,x3 // 0 107#endif 108 ror x16,x24,#14 109 add x27,x27,x19 // h+=K[i] 110 eor x6,x24,x24,ror#23 111 and x17,x25,x24 112 bic x19,x26,x24 113 add x27,x27,x3 // h+=X[i] 114 orr x17,x17,x19 // Ch(e,f,g) 115 eor x19,x20,x21 // a^b, b^c in next round 116 eor x16,x16,x6,ror#18 // Sigma1(e) 117 ror x6,x20,#28 118 add x27,x27,x17 // h+=Ch(e,f,g) 119 eor x17,x20,x20,ror#5 120 add x27,x27,x16 // h+=Sigma1(e) 121 and x28,x28,x19 // (b^c)&=(a^b) 122 add x23,x23,x27 // d+=h 123 eor x28,x28,x21 // Maj(a,b,c) 124 eor x17,x6,x17,ror#34 // Sigma0(a) 125 add x27,x27,x28 // h+=Maj(a,b,c) 126 ldr x28,[x30],#8 // *K++, x19 in next round 127 //add x27,x27,x17 // h+=Sigma0(a) 128#ifndef __AARCH64EB__ 129 rev x4,x4 // 1 130#endif 131 ldp x5,x6,[x1],#2*8 132 add x27,x27,x17 // h+=Sigma0(a) 133 ror x16,x23,#14 134 add x26,x26,x28 // h+=K[i] 135 eor x7,x23,x23,ror#23 136 and x17,x24,x23 137 bic x28,x25,x23 138 add x26,x26,x4 // h+=X[i] 139 orr x17,x17,x28 // Ch(e,f,g) 140 eor x28,x27,x20 // a^b, b^c in next round 141 eor x16,x16,x7,ror#18 // Sigma1(e) 142 ror x7,x27,#28 143 add x26,x26,x17 // h+=Ch(e,f,g) 144 eor x17,x27,x27,ror#5 145 add x26,x26,x16 // h+=Sigma1(e) 146 and x19,x19,x28 // (b^c)&=(a^b) 147 add x22,x22,x26 // d+=h 148 eor x19,x19,x20 // Maj(a,b,c) 149 eor x17,x7,x17,ror#34 // Sigma0(a) 150 add x26,x26,x19 // h+=Maj(a,b,c) 151 ldr x19,[x30],#8 // *K++, x28 in next round 152 //add x26,x26,x17 // h+=Sigma0(a) 153#ifndef __AARCH64EB__ 154 rev x5,x5 // 2 155#endif 156 add x26,x26,x17 // h+=Sigma0(a) 157 ror x16,x22,#14 158 add x25,x25,x19 // h+=K[i] 159 eor x8,x22,x22,ror#23 160 and x17,x23,x22 161 bic x19,x24,x22 162 add x25,x25,x5 // h+=X[i] 163 orr x17,x17,x19 // Ch(e,f,g) 164 eor x19,x26,x27 // a^b, b^c in next round 165 eor x16,x16,x8,ror#18 // Sigma1(e) 166 ror x8,x26,#28 167 add x25,x25,x17 // h+=Ch(e,f,g) 168 eor x17,x26,x26,ror#5 169 add x25,x25,x16 // h+=Sigma1(e) 170 and x28,x28,x19 // (b^c)&=(a^b) 171 add x21,x21,x25 // d+=h 172 eor x28,x28,x27 // Maj(a,b,c) 173 eor x17,x8,x17,ror#34 // Sigma0(a) 174 add x25,x25,x28 // h+=Maj(a,b,c) 175 ldr x28,[x30],#8 // *K++, x19 in next round 176 //add x25,x25,x17 // h+=Sigma0(a) 177#ifndef __AARCH64EB__ 178 rev x6,x6 // 3 179#endif 180 ldp x7,x8,[x1],#2*8 181 add x25,x25,x17 // h+=Sigma0(a) 182 ror x16,x21,#14 183 add x24,x24,x28 // h+=K[i] 184 eor x9,x21,x21,ror#23 185 and x17,x22,x21 186 bic x28,x23,x21 187 add x24,x24,x6 // h+=X[i] 188 orr x17,x17,x28 // Ch(e,f,g) 189 eor x28,x25,x26 // a^b, b^c in next round 190 eor x16,x16,x9,ror#18 // Sigma1(e) 191 ror x9,x25,#28 192 add x24,x24,x17 // h+=Ch(e,f,g) 193 eor x17,x25,x25,ror#5 194 add x24,x24,x16 // h+=Sigma1(e) 195 and x19,x19,x28 // (b^c)&=(a^b) 196 add x20,x20,x24 // d+=h 197 eor x19,x19,x26 // Maj(a,b,c) 198 eor x17,x9,x17,ror#34 // Sigma0(a) 199 add x24,x24,x19 // h+=Maj(a,b,c) 200 ldr x19,[x30],#8 // *K++, x28 in next round 201 //add x24,x24,x17 // h+=Sigma0(a) 202#ifndef __AARCH64EB__ 203 rev x7,x7 // 4 204#endif 205 add x24,x24,x17 // h+=Sigma0(a) 206 ror x16,x20,#14 207 add x23,x23,x19 // h+=K[i] 208 eor x10,x20,x20,ror#23 209 and x17,x21,x20 210 bic x19,x22,x20 211 add x23,x23,x7 // h+=X[i] 212 orr x17,x17,x19 // Ch(e,f,g) 213 eor x19,x24,x25 // a^b, b^c in next round 214 eor x16,x16,x10,ror#18 // Sigma1(e) 215 ror x10,x24,#28 216 add x23,x23,x17 // h+=Ch(e,f,g) 217 eor x17,x24,x24,ror#5 218 add x23,x23,x16 // h+=Sigma1(e) 219 and x28,x28,x19 // (b^c)&=(a^b) 220 add x27,x27,x23 // d+=h 221 eor x28,x28,x25 // Maj(a,b,c) 222 eor x17,x10,x17,ror#34 // Sigma0(a) 223 add x23,x23,x28 // h+=Maj(a,b,c) 224 ldr x28,[x30],#8 // *K++, x19 in next round 225 //add x23,x23,x17 // h+=Sigma0(a) 226#ifndef __AARCH64EB__ 227 rev x8,x8 // 5 228#endif 229 ldp x9,x10,[x1],#2*8 230 add x23,x23,x17 // h+=Sigma0(a) 231 ror x16,x27,#14 232 add x22,x22,x28 // h+=K[i] 233 eor x11,x27,x27,ror#23 234 and x17,x20,x27 235 bic x28,x21,x27 236 add x22,x22,x8 // h+=X[i] 237 orr x17,x17,x28 // Ch(e,f,g) 238 eor x28,x23,x24 // a^b, b^c in next round 239 eor x16,x16,x11,ror#18 // Sigma1(e) 240 ror x11,x23,#28 241 add x22,x22,x17 // h+=Ch(e,f,g) 242 eor x17,x23,x23,ror#5 243 add x22,x22,x16 // h+=Sigma1(e) 244 and x19,x19,x28 // (b^c)&=(a^b) 245 add x26,x26,x22 // d+=h 246 eor x19,x19,x24 // Maj(a,b,c) 247 eor x17,x11,x17,ror#34 // Sigma0(a) 248 add x22,x22,x19 // h+=Maj(a,b,c) 249 ldr x19,[x30],#8 // *K++, x28 in next round 250 //add x22,x22,x17 // h+=Sigma0(a) 251#ifndef __AARCH64EB__ 252 rev x9,x9 // 6 253#endif 254 add x22,x22,x17 // h+=Sigma0(a) 255 ror x16,x26,#14 256 add x21,x21,x19 // h+=K[i] 257 eor x12,x26,x26,ror#23 258 and x17,x27,x26 259 bic x19,x20,x26 260 add x21,x21,x9 // h+=X[i] 261 orr x17,x17,x19 // Ch(e,f,g) 262 eor x19,x22,x23 // a^b, b^c in next round 263 eor x16,x16,x12,ror#18 // Sigma1(e) 264 ror x12,x22,#28 265 add x21,x21,x17 // h+=Ch(e,f,g) 266 eor x17,x22,x22,ror#5 267 add x21,x21,x16 // h+=Sigma1(e) 268 and x28,x28,x19 // (b^c)&=(a^b) 269 add x25,x25,x21 // d+=h 270 eor x28,x28,x23 // Maj(a,b,c) 271 eor x17,x12,x17,ror#34 // Sigma0(a) 272 add x21,x21,x28 // h+=Maj(a,b,c) 273 ldr x28,[x30],#8 // *K++, x19 in next round 274 //add x21,x21,x17 // h+=Sigma0(a) 275#ifndef __AARCH64EB__ 276 rev x10,x10 // 7 277#endif 278 ldp x11,x12,[x1],#2*8 279 add x21,x21,x17 // h+=Sigma0(a) 280 ror x16,x25,#14 281 add x20,x20,x28 // h+=K[i] 282 eor x13,x25,x25,ror#23 283 and x17,x26,x25 284 bic x28,x27,x25 285 add x20,x20,x10 // h+=X[i] 286 orr x17,x17,x28 // Ch(e,f,g) 287 eor x28,x21,x22 // a^b, b^c in next round 288 eor x16,x16,x13,ror#18 // Sigma1(e) 289 ror x13,x21,#28 290 add x20,x20,x17 // h+=Ch(e,f,g) 291 eor x17,x21,x21,ror#5 292 add x20,x20,x16 // h+=Sigma1(e) 293 and x19,x19,x28 // (b^c)&=(a^b) 294 add x24,x24,x20 // d+=h 295 eor x19,x19,x22 // Maj(a,b,c) 296 eor x17,x13,x17,ror#34 // Sigma0(a) 297 add x20,x20,x19 // h+=Maj(a,b,c) 298 ldr x19,[x30],#8 // *K++, x28 in next round 299 //add x20,x20,x17 // h+=Sigma0(a) 300#ifndef __AARCH64EB__ 301 rev x11,x11 // 8 302#endif 303 add x20,x20,x17 // h+=Sigma0(a) 304 ror x16,x24,#14 305 add x27,x27,x19 // h+=K[i] 306 eor x14,x24,x24,ror#23 307 and x17,x25,x24 308 bic x19,x26,x24 309 add x27,x27,x11 // h+=X[i] 310 orr x17,x17,x19 // Ch(e,f,g) 311 eor x19,x20,x21 // a^b, b^c in next round 312 eor x16,x16,x14,ror#18 // Sigma1(e) 313 ror x14,x20,#28 314 add x27,x27,x17 // h+=Ch(e,f,g) 315 eor x17,x20,x20,ror#5 316 add x27,x27,x16 // h+=Sigma1(e) 317 and x28,x28,x19 // (b^c)&=(a^b) 318 add x23,x23,x27 // d+=h 319 eor x28,x28,x21 // Maj(a,b,c) 320 eor x17,x14,x17,ror#34 // Sigma0(a) 321 add x27,x27,x28 // h+=Maj(a,b,c) 322 ldr x28,[x30],#8 // *K++, x19 in next round 323 //add x27,x27,x17 // h+=Sigma0(a) 324#ifndef __AARCH64EB__ 325 rev x12,x12 // 9 326#endif 327 ldp x13,x14,[x1],#2*8 328 add x27,x27,x17 // h+=Sigma0(a) 329 ror x16,x23,#14 330 add x26,x26,x28 // h+=K[i] 331 eor x15,x23,x23,ror#23 332 and x17,x24,x23 333 bic x28,x25,x23 334 add x26,x26,x12 // h+=X[i] 335 orr x17,x17,x28 // Ch(e,f,g) 336 eor x28,x27,x20 // a^b, b^c in next round 337 eor x16,x16,x15,ror#18 // Sigma1(e) 338 ror x15,x27,#28 339 add x26,x26,x17 // h+=Ch(e,f,g) 340 eor x17,x27,x27,ror#5 341 add x26,x26,x16 // h+=Sigma1(e) 342 and x19,x19,x28 // (b^c)&=(a^b) 343 add x22,x22,x26 // d+=h 344 eor x19,x19,x20 // Maj(a,b,c) 345 eor x17,x15,x17,ror#34 // Sigma0(a) 346 add x26,x26,x19 // h+=Maj(a,b,c) 347 ldr x19,[x30],#8 // *K++, x28 in next round 348 //add x26,x26,x17 // h+=Sigma0(a) 349#ifndef __AARCH64EB__ 350 rev x13,x13 // 10 351#endif 352 add x26,x26,x17 // h+=Sigma0(a) 353 ror x16,x22,#14 354 add x25,x25,x19 // h+=K[i] 355 eor x0,x22,x22,ror#23 356 and x17,x23,x22 357 bic x19,x24,x22 358 add x25,x25,x13 // h+=X[i] 359 orr x17,x17,x19 // Ch(e,f,g) 360 eor x19,x26,x27 // a^b, b^c in next round 361 eor x16,x16,x0,ror#18 // Sigma1(e) 362 ror x0,x26,#28 363 add x25,x25,x17 // h+=Ch(e,f,g) 364 eor x17,x26,x26,ror#5 365 add x25,x25,x16 // h+=Sigma1(e) 366 and x28,x28,x19 // (b^c)&=(a^b) 367 add x21,x21,x25 // d+=h 368 eor x28,x28,x27 // Maj(a,b,c) 369 eor x17,x0,x17,ror#34 // Sigma0(a) 370 add x25,x25,x28 // h+=Maj(a,b,c) 371 ldr x28,[x30],#8 // *K++, x19 in next round 372 //add x25,x25,x17 // h+=Sigma0(a) 373#ifndef __AARCH64EB__ 374 rev x14,x14 // 11 375#endif 376 ldp x15,x0,[x1],#2*8 377 add x25,x25,x17 // h+=Sigma0(a) 378 str x6,[sp,#24] 379 ror x16,x21,#14 380 add x24,x24,x28 // h+=K[i] 381 eor x6,x21,x21,ror#23 382 and x17,x22,x21 383 bic x28,x23,x21 384 add x24,x24,x14 // h+=X[i] 385 orr x17,x17,x28 // Ch(e,f,g) 386 eor x28,x25,x26 // a^b, b^c in next round 387 eor x16,x16,x6,ror#18 // Sigma1(e) 388 ror x6,x25,#28 389 add x24,x24,x17 // h+=Ch(e,f,g) 390 eor x17,x25,x25,ror#5 391 add x24,x24,x16 // h+=Sigma1(e) 392 and x19,x19,x28 // (b^c)&=(a^b) 393 add x20,x20,x24 // d+=h 394 eor x19,x19,x26 // Maj(a,b,c) 395 eor x17,x6,x17,ror#34 // Sigma0(a) 396 add x24,x24,x19 // h+=Maj(a,b,c) 397 ldr x19,[x30],#8 // *K++, x28 in next round 398 //add x24,x24,x17 // h+=Sigma0(a) 399#ifndef __AARCH64EB__ 400 rev x15,x15 // 12 401#endif 402 add x24,x24,x17 // h+=Sigma0(a) 403 str x7,[sp,#0] 404 ror x16,x20,#14 405 add x23,x23,x19 // h+=K[i] 406 eor x7,x20,x20,ror#23 407 and x17,x21,x20 408 bic x19,x22,x20 409 add x23,x23,x15 // h+=X[i] 410 orr x17,x17,x19 // Ch(e,f,g) 411 eor x19,x24,x25 // a^b, b^c in next round 412 eor x16,x16,x7,ror#18 // Sigma1(e) 413 ror x7,x24,#28 414 add x23,x23,x17 // h+=Ch(e,f,g) 415 eor x17,x24,x24,ror#5 416 add x23,x23,x16 // h+=Sigma1(e) 417 and x28,x28,x19 // (b^c)&=(a^b) 418 add x27,x27,x23 // d+=h 419 eor x28,x28,x25 // Maj(a,b,c) 420 eor x17,x7,x17,ror#34 // Sigma0(a) 421 add x23,x23,x28 // h+=Maj(a,b,c) 422 ldr x28,[x30],#8 // *K++, x19 in next round 423 //add x23,x23,x17 // h+=Sigma0(a) 424#ifndef __AARCH64EB__ 425 rev x0,x0 // 13 426#endif 427 ldp x1,x2,[x1] 428 add x23,x23,x17 // h+=Sigma0(a) 429 str x8,[sp,#8] 430 ror x16,x27,#14 431 add x22,x22,x28 // h+=K[i] 432 eor x8,x27,x27,ror#23 433 and x17,x20,x27 434 bic x28,x21,x27 435 add x22,x22,x0 // h+=X[i] 436 orr x17,x17,x28 // Ch(e,f,g) 437 eor x28,x23,x24 // a^b, b^c in next round 438 eor x16,x16,x8,ror#18 // Sigma1(e) 439 ror x8,x23,#28 440 add x22,x22,x17 // h+=Ch(e,f,g) 441 eor x17,x23,x23,ror#5 442 add x22,x22,x16 // h+=Sigma1(e) 443 and x19,x19,x28 // (b^c)&=(a^b) 444 add x26,x26,x22 // d+=h 445 eor x19,x19,x24 // Maj(a,b,c) 446 eor x17,x8,x17,ror#34 // Sigma0(a) 447 add x22,x22,x19 // h+=Maj(a,b,c) 448 ldr x19,[x30],#8 // *K++, x28 in next round 449 //add x22,x22,x17 // h+=Sigma0(a) 450#ifndef __AARCH64EB__ 451 rev x1,x1 // 14 452#endif 453 ldr x6,[sp,#24] 454 add x22,x22,x17 // h+=Sigma0(a) 455 str x9,[sp,#16] 456 ror x16,x26,#14 457 add x21,x21,x19 // h+=K[i] 458 eor x9,x26,x26,ror#23 459 and x17,x27,x26 460 bic x19,x20,x26 461 add x21,x21,x1 // h+=X[i] 462 orr x17,x17,x19 // Ch(e,f,g) 463 eor x19,x22,x23 // a^b, b^c in next round 464 eor x16,x16,x9,ror#18 // Sigma1(e) 465 ror x9,x22,#28 466 add x21,x21,x17 // h+=Ch(e,f,g) 467 eor x17,x22,x22,ror#5 468 add x21,x21,x16 // h+=Sigma1(e) 469 and x28,x28,x19 // (b^c)&=(a^b) 470 add x25,x25,x21 // d+=h 471 eor x28,x28,x23 // Maj(a,b,c) 472 eor x17,x9,x17,ror#34 // Sigma0(a) 473 add x21,x21,x28 // h+=Maj(a,b,c) 474 ldr x28,[x30],#8 // *K++, x19 in next round 475 //add x21,x21,x17 // h+=Sigma0(a) 476#ifndef __AARCH64EB__ 477 rev x2,x2 // 15 478#endif 479 ldr x7,[sp,#0] 480 add x21,x21,x17 // h+=Sigma0(a) 481 str x10,[sp,#24] 482 ror x16,x25,#14 483 add x20,x20,x28 // h+=K[i] 484 ror x9,x4,#1 485 and x17,x26,x25 486 ror x8,x1,#19 487 bic x28,x27,x25 488 ror x10,x21,#28 489 add x20,x20,x2 // h+=X[i] 490 eor x16,x16,x25,ror#18 491 eor x9,x9,x4,ror#8 492 orr x17,x17,x28 // Ch(e,f,g) 493 eor x28,x21,x22 // a^b, b^c in next round 494 eor x16,x16,x25,ror#41 // Sigma1(e) 495 eor x10,x10,x21,ror#34 496 add x20,x20,x17 // h+=Ch(e,f,g) 497 and x19,x19,x28 // (b^c)&=(a^b) 498 eor x8,x8,x1,ror#61 499 eor x9,x9,x4,lsr#7 // sigma0(X[i+1]) 500 add x20,x20,x16 // h+=Sigma1(e) 501 eor x19,x19,x22 // Maj(a,b,c) 502 eor x17,x10,x21,ror#39 // Sigma0(a) 503 eor x8,x8,x1,lsr#6 // sigma1(X[i+14]) 504 add x3,x3,x12 505 add x24,x24,x20 // d+=h 506 add x20,x20,x19 // h+=Maj(a,b,c) 507 ldr x19,[x30],#8 // *K++, x28 in next round 508 add x3,x3,x9 509 add x20,x20,x17 // h+=Sigma0(a) 510 add x3,x3,x8 511.Loop_16_xx: 512 ldr x8,[sp,#8] 513 str x11,[sp,#0] 514 ror x16,x24,#14 515 add x27,x27,x19 // h+=K[i] 516 ror x10,x5,#1 517 and x17,x25,x24 518 ror x9,x2,#19 519 bic x19,x26,x24 520 ror x11,x20,#28 521 add x27,x27,x3 // h+=X[i] 522 eor x16,x16,x24,ror#18 523 eor x10,x10,x5,ror#8 524 orr x17,x17,x19 // Ch(e,f,g) 525 eor x19,x20,x21 // a^b, b^c in next round 526 eor x16,x16,x24,ror#41 // Sigma1(e) 527 eor x11,x11,x20,ror#34 528 add x27,x27,x17 // h+=Ch(e,f,g) 529 and x28,x28,x19 // (b^c)&=(a^b) 530 eor x9,x9,x2,ror#61 531 eor x10,x10,x5,lsr#7 // sigma0(X[i+1]) 532 add x27,x27,x16 // h+=Sigma1(e) 533 eor x28,x28,x21 // Maj(a,b,c) 534 eor x17,x11,x20,ror#39 // Sigma0(a) 535 eor x9,x9,x2,lsr#6 // sigma1(X[i+14]) 536 add x4,x4,x13 537 add x23,x23,x27 // d+=h 538 add x27,x27,x28 // h+=Maj(a,b,c) 539 ldr x28,[x30],#8 // *K++, x19 in next round 540 add x4,x4,x10 541 add x27,x27,x17 // h+=Sigma0(a) 542 add x4,x4,x9 543 ldr x9,[sp,#16] 544 str x12,[sp,#8] 545 ror x16,x23,#14 546 add x26,x26,x28 // h+=K[i] 547 ror x11,x6,#1 548 and x17,x24,x23 549 ror x10,x3,#19 550 bic x28,x25,x23 551 ror x12,x27,#28 552 add x26,x26,x4 // h+=X[i] 553 eor x16,x16,x23,ror#18 554 eor x11,x11,x6,ror#8 555 orr x17,x17,x28 // Ch(e,f,g) 556 eor x28,x27,x20 // a^b, b^c in next round 557 eor x16,x16,x23,ror#41 // Sigma1(e) 558 eor x12,x12,x27,ror#34 559 add x26,x26,x17 // h+=Ch(e,f,g) 560 and x19,x19,x28 // (b^c)&=(a^b) 561 eor x10,x10,x3,ror#61 562 eor x11,x11,x6,lsr#7 // sigma0(X[i+1]) 563 add x26,x26,x16 // h+=Sigma1(e) 564 eor x19,x19,x20 // Maj(a,b,c) 565 eor x17,x12,x27,ror#39 // Sigma0(a) 566 eor x10,x10,x3,lsr#6 // sigma1(X[i+14]) 567 add x5,x5,x14 568 add x22,x22,x26 // d+=h 569 add x26,x26,x19 // h+=Maj(a,b,c) 570 ldr x19,[x30],#8 // *K++, x28 in next round 571 add x5,x5,x11 572 add x26,x26,x17 // h+=Sigma0(a) 573 add x5,x5,x10 574 ldr x10,[sp,#24] 575 str x13,[sp,#16] 576 ror x16,x22,#14 577 add x25,x25,x19 // h+=K[i] 578 ror x12,x7,#1 579 and x17,x23,x22 580 ror x11,x4,#19 581 bic x19,x24,x22 582 ror x13,x26,#28 583 add x25,x25,x5 // h+=X[i] 584 eor x16,x16,x22,ror#18 585 eor x12,x12,x7,ror#8 586 orr x17,x17,x19 // Ch(e,f,g) 587 eor x19,x26,x27 // a^b, b^c in next round 588 eor x16,x16,x22,ror#41 // Sigma1(e) 589 eor x13,x13,x26,ror#34 590 add x25,x25,x17 // h+=Ch(e,f,g) 591 and x28,x28,x19 // (b^c)&=(a^b) 592 eor x11,x11,x4,ror#61 593 eor x12,x12,x7,lsr#7 // sigma0(X[i+1]) 594 add x25,x25,x16 // h+=Sigma1(e) 595 eor x28,x28,x27 // Maj(a,b,c) 596 eor x17,x13,x26,ror#39 // Sigma0(a) 597 eor x11,x11,x4,lsr#6 // sigma1(X[i+14]) 598 add x6,x6,x15 599 add x21,x21,x25 // d+=h 600 add x25,x25,x28 // h+=Maj(a,b,c) 601 ldr x28,[x30],#8 // *K++, x19 in next round 602 add x6,x6,x12 603 add x25,x25,x17 // h+=Sigma0(a) 604 add x6,x6,x11 605 ldr x11,[sp,#0] 606 str x14,[sp,#24] 607 ror x16,x21,#14 608 add x24,x24,x28 // h+=K[i] 609 ror x13,x8,#1 610 and x17,x22,x21 611 ror x12,x5,#19 612 bic x28,x23,x21 613 ror x14,x25,#28 614 add x24,x24,x6 // h+=X[i] 615 eor x16,x16,x21,ror#18 616 eor x13,x13,x8,ror#8 617 orr x17,x17,x28 // Ch(e,f,g) 618 eor x28,x25,x26 // a^b, b^c in next round 619 eor x16,x16,x21,ror#41 // Sigma1(e) 620 eor x14,x14,x25,ror#34 621 add x24,x24,x17 // h+=Ch(e,f,g) 622 and x19,x19,x28 // (b^c)&=(a^b) 623 eor x12,x12,x5,ror#61 624 eor x13,x13,x8,lsr#7 // sigma0(X[i+1]) 625 add x24,x24,x16 // h+=Sigma1(e) 626 eor x19,x19,x26 // Maj(a,b,c) 627 eor x17,x14,x25,ror#39 // Sigma0(a) 628 eor x12,x12,x5,lsr#6 // sigma1(X[i+14]) 629 add x7,x7,x0 630 add x20,x20,x24 // d+=h 631 add x24,x24,x19 // h+=Maj(a,b,c) 632 ldr x19,[x30],#8 // *K++, x28 in next round 633 add x7,x7,x13 634 add x24,x24,x17 // h+=Sigma0(a) 635 add x7,x7,x12 636 ldr x12,[sp,#8] 637 str x15,[sp,#0] 638 ror x16,x20,#14 639 add x23,x23,x19 // h+=K[i] 640 ror x14,x9,#1 641 and x17,x21,x20 642 ror x13,x6,#19 643 bic x19,x22,x20 644 ror x15,x24,#28 645 add x23,x23,x7 // h+=X[i] 646 eor x16,x16,x20,ror#18 647 eor x14,x14,x9,ror#8 648 orr x17,x17,x19 // Ch(e,f,g) 649 eor x19,x24,x25 // a^b, b^c in next round 650 eor x16,x16,x20,ror#41 // Sigma1(e) 651 eor x15,x15,x24,ror#34 652 add x23,x23,x17 // h+=Ch(e,f,g) 653 and x28,x28,x19 // (b^c)&=(a^b) 654 eor x13,x13,x6,ror#61 655 eor x14,x14,x9,lsr#7 // sigma0(X[i+1]) 656 add x23,x23,x16 // h+=Sigma1(e) 657 eor x28,x28,x25 // Maj(a,b,c) 658 eor x17,x15,x24,ror#39 // Sigma0(a) 659 eor x13,x13,x6,lsr#6 // sigma1(X[i+14]) 660 add x8,x8,x1 661 add x27,x27,x23 // d+=h 662 add x23,x23,x28 // h+=Maj(a,b,c) 663 ldr x28,[x30],#8 // *K++, x19 in next round 664 add x8,x8,x14 665 add x23,x23,x17 // h+=Sigma0(a) 666 add x8,x8,x13 667 ldr x13,[sp,#16] 668 str x0,[sp,#8] 669 ror x16,x27,#14 670 add x22,x22,x28 // h+=K[i] 671 ror x15,x10,#1 672 and x17,x20,x27 673 ror x14,x7,#19 674 bic x28,x21,x27 675 ror x0,x23,#28 676 add x22,x22,x8 // h+=X[i] 677 eor x16,x16,x27,ror#18 678 eor x15,x15,x10,ror#8 679 orr x17,x17,x28 // Ch(e,f,g) 680 eor x28,x23,x24 // a^b, b^c in next round 681 eor x16,x16,x27,ror#41 // Sigma1(e) 682 eor x0,x0,x23,ror#34 683 add x22,x22,x17 // h+=Ch(e,f,g) 684 and x19,x19,x28 // (b^c)&=(a^b) 685 eor x14,x14,x7,ror#61 686 eor x15,x15,x10,lsr#7 // sigma0(X[i+1]) 687 add x22,x22,x16 // h+=Sigma1(e) 688 eor x19,x19,x24 // Maj(a,b,c) 689 eor x17,x0,x23,ror#39 // Sigma0(a) 690 eor x14,x14,x7,lsr#6 // sigma1(X[i+14]) 691 add x9,x9,x2 692 add x26,x26,x22 // d+=h 693 add x22,x22,x19 // h+=Maj(a,b,c) 694 ldr x19,[x30],#8 // *K++, x28 in next round 695 add x9,x9,x15 696 add x22,x22,x17 // h+=Sigma0(a) 697 add x9,x9,x14 698 ldr x14,[sp,#24] 699 str x1,[sp,#16] 700 ror x16,x26,#14 701 add x21,x21,x19 // h+=K[i] 702 ror x0,x11,#1 703 and x17,x27,x26 704 ror x15,x8,#19 705 bic x19,x20,x26 706 ror x1,x22,#28 707 add x21,x21,x9 // h+=X[i] 708 eor x16,x16,x26,ror#18 709 eor x0,x0,x11,ror#8 710 orr x17,x17,x19 // Ch(e,f,g) 711 eor x19,x22,x23 // a^b, b^c in next round 712 eor x16,x16,x26,ror#41 // Sigma1(e) 713 eor x1,x1,x22,ror#34 714 add x21,x21,x17 // h+=Ch(e,f,g) 715 and x28,x28,x19 // (b^c)&=(a^b) 716 eor x15,x15,x8,ror#61 717 eor x0,x0,x11,lsr#7 // sigma0(X[i+1]) 718 add x21,x21,x16 // h+=Sigma1(e) 719 eor x28,x28,x23 // Maj(a,b,c) 720 eor x17,x1,x22,ror#39 // Sigma0(a) 721 eor x15,x15,x8,lsr#6 // sigma1(X[i+14]) 722 add x10,x10,x3 723 add x25,x25,x21 // d+=h 724 add x21,x21,x28 // h+=Maj(a,b,c) 725 ldr x28,[x30],#8 // *K++, x19 in next round 726 add x10,x10,x0 727 add x21,x21,x17 // h+=Sigma0(a) 728 add x10,x10,x15 729 ldr x15,[sp,#0] 730 str x2,[sp,#24] 731 ror x16,x25,#14 732 add x20,x20,x28 // h+=K[i] 733 ror x1,x12,#1 734 and x17,x26,x25 735 ror x0,x9,#19 736 bic x28,x27,x25 737 ror x2,x21,#28 738 add x20,x20,x10 // h+=X[i] 739 eor x16,x16,x25,ror#18 740 eor x1,x1,x12,ror#8 741 orr x17,x17,x28 // Ch(e,f,g) 742 eor x28,x21,x22 // a^b, b^c in next round 743 eor x16,x16,x25,ror#41 // Sigma1(e) 744 eor x2,x2,x21,ror#34 745 add x20,x20,x17 // h+=Ch(e,f,g) 746 and x19,x19,x28 // (b^c)&=(a^b) 747 eor x0,x0,x9,ror#61 748 eor x1,x1,x12,lsr#7 // sigma0(X[i+1]) 749 add x20,x20,x16 // h+=Sigma1(e) 750 eor x19,x19,x22 // Maj(a,b,c) 751 eor x17,x2,x21,ror#39 // Sigma0(a) 752 eor x0,x0,x9,lsr#6 // sigma1(X[i+14]) 753 add x11,x11,x4 754 add x24,x24,x20 // d+=h 755 add x20,x20,x19 // h+=Maj(a,b,c) 756 ldr x19,[x30],#8 // *K++, x28 in next round 757 add x11,x11,x1 758 add x20,x20,x17 // h+=Sigma0(a) 759 add x11,x11,x0 760 ldr x0,[sp,#8] 761 str x3,[sp,#0] 762 ror x16,x24,#14 763 add x27,x27,x19 // h+=K[i] 764 ror x2,x13,#1 765 and x17,x25,x24 766 ror x1,x10,#19 767 bic x19,x26,x24 768 ror x3,x20,#28 769 add x27,x27,x11 // h+=X[i] 770 eor x16,x16,x24,ror#18 771 eor x2,x2,x13,ror#8 772 orr x17,x17,x19 // Ch(e,f,g) 773 eor x19,x20,x21 // a^b, b^c in next round 774 eor x16,x16,x24,ror#41 // Sigma1(e) 775 eor x3,x3,x20,ror#34 776 add x27,x27,x17 // h+=Ch(e,f,g) 777 and x28,x28,x19 // (b^c)&=(a^b) 778 eor x1,x1,x10,ror#61 779 eor x2,x2,x13,lsr#7 // sigma0(X[i+1]) 780 add x27,x27,x16 // h+=Sigma1(e) 781 eor x28,x28,x21 // Maj(a,b,c) 782 eor x17,x3,x20,ror#39 // Sigma0(a) 783 eor x1,x1,x10,lsr#6 // sigma1(X[i+14]) 784 add x12,x12,x5 785 add x23,x23,x27 // d+=h 786 add x27,x27,x28 // h+=Maj(a,b,c) 787 ldr x28,[x30],#8 // *K++, x19 in next round 788 add x12,x12,x2 789 add x27,x27,x17 // h+=Sigma0(a) 790 add x12,x12,x1 791 ldr x1,[sp,#16] 792 str x4,[sp,#8] 793 ror x16,x23,#14 794 add x26,x26,x28 // h+=K[i] 795 ror x3,x14,#1 796 and x17,x24,x23 797 ror x2,x11,#19 798 bic x28,x25,x23 799 ror x4,x27,#28 800 add x26,x26,x12 // h+=X[i] 801 eor x16,x16,x23,ror#18 802 eor x3,x3,x14,ror#8 803 orr x17,x17,x28 // Ch(e,f,g) 804 eor x28,x27,x20 // a^b, b^c in next round 805 eor x16,x16,x23,ror#41 // Sigma1(e) 806 eor x4,x4,x27,ror#34 807 add x26,x26,x17 // h+=Ch(e,f,g) 808 and x19,x19,x28 // (b^c)&=(a^b) 809 eor x2,x2,x11,ror#61 810 eor x3,x3,x14,lsr#7 // sigma0(X[i+1]) 811 add x26,x26,x16 // h+=Sigma1(e) 812 eor x19,x19,x20 // Maj(a,b,c) 813 eor x17,x4,x27,ror#39 // Sigma0(a) 814 eor x2,x2,x11,lsr#6 // sigma1(X[i+14]) 815 add x13,x13,x6 816 add x22,x22,x26 // d+=h 817 add x26,x26,x19 // h+=Maj(a,b,c) 818 ldr x19,[x30],#8 // *K++, x28 in next round 819 add x13,x13,x3 820 add x26,x26,x17 // h+=Sigma0(a) 821 add x13,x13,x2 822 ldr x2,[sp,#24] 823 str x5,[sp,#16] 824 ror x16,x22,#14 825 add x25,x25,x19 // h+=K[i] 826 ror x4,x15,#1 827 and x17,x23,x22 828 ror x3,x12,#19 829 bic x19,x24,x22 830 ror x5,x26,#28 831 add x25,x25,x13 // h+=X[i] 832 eor x16,x16,x22,ror#18 833 eor x4,x4,x15,ror#8 834 orr x17,x17,x19 // Ch(e,f,g) 835 eor x19,x26,x27 // a^b, b^c in next round 836 eor x16,x16,x22,ror#41 // Sigma1(e) 837 eor x5,x5,x26,ror#34 838 add x25,x25,x17 // h+=Ch(e,f,g) 839 and x28,x28,x19 // (b^c)&=(a^b) 840 eor x3,x3,x12,ror#61 841 eor x4,x4,x15,lsr#7 // sigma0(X[i+1]) 842 add x25,x25,x16 // h+=Sigma1(e) 843 eor x28,x28,x27 // Maj(a,b,c) 844 eor x17,x5,x26,ror#39 // Sigma0(a) 845 eor x3,x3,x12,lsr#6 // sigma1(X[i+14]) 846 add x14,x14,x7 847 add x21,x21,x25 // d+=h 848 add x25,x25,x28 // h+=Maj(a,b,c) 849 ldr x28,[x30],#8 // *K++, x19 in next round 850 add x14,x14,x4 851 add x25,x25,x17 // h+=Sigma0(a) 852 add x14,x14,x3 853 ldr x3,[sp,#0] 854 str x6,[sp,#24] 855 ror x16,x21,#14 856 add x24,x24,x28 // h+=K[i] 857 ror x5,x0,#1 858 and x17,x22,x21 859 ror x4,x13,#19 860 bic x28,x23,x21 861 ror x6,x25,#28 862 add x24,x24,x14 // h+=X[i] 863 eor x16,x16,x21,ror#18 864 eor x5,x5,x0,ror#8 865 orr x17,x17,x28 // Ch(e,f,g) 866 eor x28,x25,x26 // a^b, b^c in next round 867 eor x16,x16,x21,ror#41 // Sigma1(e) 868 eor x6,x6,x25,ror#34 869 add x24,x24,x17 // h+=Ch(e,f,g) 870 and x19,x19,x28 // (b^c)&=(a^b) 871 eor x4,x4,x13,ror#61 872 eor x5,x5,x0,lsr#7 // sigma0(X[i+1]) 873 add x24,x24,x16 // h+=Sigma1(e) 874 eor x19,x19,x26 // Maj(a,b,c) 875 eor x17,x6,x25,ror#39 // Sigma0(a) 876 eor x4,x4,x13,lsr#6 // sigma1(X[i+14]) 877 add x15,x15,x8 878 add x20,x20,x24 // d+=h 879 add x24,x24,x19 // h+=Maj(a,b,c) 880 ldr x19,[x30],#8 // *K++, x28 in next round 881 add x15,x15,x5 882 add x24,x24,x17 // h+=Sigma0(a) 883 add x15,x15,x4 884 ldr x4,[sp,#8] 885 str x7,[sp,#0] 886 ror x16,x20,#14 887 add x23,x23,x19 // h+=K[i] 888 ror x6,x1,#1 889 and x17,x21,x20 890 ror x5,x14,#19 891 bic x19,x22,x20 892 ror x7,x24,#28 893 add x23,x23,x15 // h+=X[i] 894 eor x16,x16,x20,ror#18 895 eor x6,x6,x1,ror#8 896 orr x17,x17,x19 // Ch(e,f,g) 897 eor x19,x24,x25 // a^b, b^c in next round 898 eor x16,x16,x20,ror#41 // Sigma1(e) 899 eor x7,x7,x24,ror#34 900 add x23,x23,x17 // h+=Ch(e,f,g) 901 and x28,x28,x19 // (b^c)&=(a^b) 902 eor x5,x5,x14,ror#61 903 eor x6,x6,x1,lsr#7 // sigma0(X[i+1]) 904 add x23,x23,x16 // h+=Sigma1(e) 905 eor x28,x28,x25 // Maj(a,b,c) 906 eor x17,x7,x24,ror#39 // Sigma0(a) 907 eor x5,x5,x14,lsr#6 // sigma1(X[i+14]) 908 add x0,x0,x9 909 add x27,x27,x23 // d+=h 910 add x23,x23,x28 // h+=Maj(a,b,c) 911 ldr x28,[x30],#8 // *K++, x19 in next round 912 add x0,x0,x6 913 add x23,x23,x17 // h+=Sigma0(a) 914 add x0,x0,x5 915 ldr x5,[sp,#16] 916 str x8,[sp,#8] 917 ror x16,x27,#14 918 add x22,x22,x28 // h+=K[i] 919 ror x7,x2,#1 920 and x17,x20,x27 921 ror x6,x15,#19 922 bic x28,x21,x27 923 ror x8,x23,#28 924 add x22,x22,x0 // h+=X[i] 925 eor x16,x16,x27,ror#18 926 eor x7,x7,x2,ror#8 927 orr x17,x17,x28 // Ch(e,f,g) 928 eor x28,x23,x24 // a^b, b^c in next round 929 eor x16,x16,x27,ror#41 // Sigma1(e) 930 eor x8,x8,x23,ror#34 931 add x22,x22,x17 // h+=Ch(e,f,g) 932 and x19,x19,x28 // (b^c)&=(a^b) 933 eor x6,x6,x15,ror#61 934 eor x7,x7,x2,lsr#7 // sigma0(X[i+1]) 935 add x22,x22,x16 // h+=Sigma1(e) 936 eor x19,x19,x24 // Maj(a,b,c) 937 eor x17,x8,x23,ror#39 // Sigma0(a) 938 eor x6,x6,x15,lsr#6 // sigma1(X[i+14]) 939 add x1,x1,x10 940 add x26,x26,x22 // d+=h 941 add x22,x22,x19 // h+=Maj(a,b,c) 942 ldr x19,[x30],#8 // *K++, x28 in next round 943 add x1,x1,x7 944 add x22,x22,x17 // h+=Sigma0(a) 945 add x1,x1,x6 946 ldr x6,[sp,#24] 947 str x9,[sp,#16] 948 ror x16,x26,#14 949 add x21,x21,x19 // h+=K[i] 950 ror x8,x3,#1 951 and x17,x27,x26 952 ror x7,x0,#19 953 bic x19,x20,x26 954 ror x9,x22,#28 955 add x21,x21,x1 // h+=X[i] 956 eor x16,x16,x26,ror#18 957 eor x8,x8,x3,ror#8 958 orr x17,x17,x19 // Ch(e,f,g) 959 eor x19,x22,x23 // a^b, b^c in next round 960 eor x16,x16,x26,ror#41 // Sigma1(e) 961 eor x9,x9,x22,ror#34 962 add x21,x21,x17 // h+=Ch(e,f,g) 963 and x28,x28,x19 // (b^c)&=(a^b) 964 eor x7,x7,x0,ror#61 965 eor x8,x8,x3,lsr#7 // sigma0(X[i+1]) 966 add x21,x21,x16 // h+=Sigma1(e) 967 eor x28,x28,x23 // Maj(a,b,c) 968 eor x17,x9,x22,ror#39 // Sigma0(a) 969 eor x7,x7,x0,lsr#6 // sigma1(X[i+14]) 970 add x2,x2,x11 971 add x25,x25,x21 // d+=h 972 add x21,x21,x28 // h+=Maj(a,b,c) 973 ldr x28,[x30],#8 // *K++, x19 in next round 974 add x2,x2,x8 975 add x21,x21,x17 // h+=Sigma0(a) 976 add x2,x2,x7 977 ldr x7,[sp,#0] 978 str x10,[sp,#24] 979 ror x16,x25,#14 980 add x20,x20,x28 // h+=K[i] 981 ror x9,x4,#1 982 and x17,x26,x25 983 ror x8,x1,#19 984 bic x28,x27,x25 985 ror x10,x21,#28 986 add x20,x20,x2 // h+=X[i] 987 eor x16,x16,x25,ror#18 988 eor x9,x9,x4,ror#8 989 orr x17,x17,x28 // Ch(e,f,g) 990 eor x28,x21,x22 // a^b, b^c in next round 991 eor x16,x16,x25,ror#41 // Sigma1(e) 992 eor x10,x10,x21,ror#34 993 add x20,x20,x17 // h+=Ch(e,f,g) 994 and x19,x19,x28 // (b^c)&=(a^b) 995 eor x8,x8,x1,ror#61 996 eor x9,x9,x4,lsr#7 // sigma0(X[i+1]) 997 add x20,x20,x16 // h+=Sigma1(e) 998 eor x19,x19,x22 // Maj(a,b,c) 999 eor x17,x10,x21,ror#39 // Sigma0(a) 1000 eor x8,x8,x1,lsr#6 // sigma1(X[i+14]) 1001 add x3,x3,x12 1002 add x24,x24,x20 // d+=h 1003 add x20,x20,x19 // h+=Maj(a,b,c) 1004 ldr x19,[x30],#8 // *K++, x28 in next round 1005 add x3,x3,x9 1006 add x20,x20,x17 // h+=Sigma0(a) 1007 add x3,x3,x8 1008 cbnz x19,.Loop_16_xx 1009 1010 ldp x0,x2,[x29,#96] 1011 ldr x1,[x29,#112] 1012 sub x30,x30,#648 // rewind 1013 1014 ldp x3,x4,[x0] 1015 ldp x5,x6,[x0,#2*8] 1016 add x1,x1,#14*8 // advance input pointer 1017 ldp x7,x8,[x0,#4*8] 1018 add x20,x20,x3 1019 ldp x9,x10,[x0,#6*8] 1020 add x21,x21,x4 1021 add x22,x22,x5 1022 add x23,x23,x6 1023 stp x20,x21,[x0] 1024 add x24,x24,x7 1025 add x25,x25,x8 1026 stp x22,x23,[x0,#2*8] 1027 add x26,x26,x9 1028 add x27,x27,x10 1029 cmp x1,x2 1030 stp x24,x25,[x0,#4*8] 1031 stp x26,x27,[x0,#6*8] 1032 b.ne .Loop 1033 1034 ldp x19,x20,[x29,#16] 1035 add sp,sp,#4*8 1036 ldp x21,x22,[x29,#32] 1037 ldp x23,x24,[x29,#48] 1038 ldp x25,x26,[x29,#64] 1039 ldp x27,x28,[x29,#80] 1040 ldp x29,x30,[sp],#128 1041 AARCH64_VALIDATE_LINK_REGISTER 1042 ret 1043.size sha512_block_data_order,.-sha512_block_data_order 1044 1045.section .rodata 1046.align 6 1047.type .LK512,%object 1048.LK512: 1049.quad 0x428a2f98d728ae22,0x7137449123ef65cd 1050.quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc 1051.quad 0x3956c25bf348b538,0x59f111f1b605d019 1052.quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118 1053.quad 0xd807aa98a3030242,0x12835b0145706fbe 1054.quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2 1055.quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1 1056.quad 0x9bdc06a725c71235,0xc19bf174cf692694 1057.quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3 1058.quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65 1059.quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483 1060.quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5 1061.quad 0x983e5152ee66dfab,0xa831c66d2db43210 1062.quad 0xb00327c898fb213f,0xbf597fc7beef0ee4 1063.quad 0xc6e00bf33da88fc2,0xd5a79147930aa725 1064.quad 0x06ca6351e003826f,0x142929670a0e6e70 1065.quad 0x27b70a8546d22ffc,0x2e1b21385c26c926 1066.quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df 1067.quad 0x650a73548baf63de,0x766a0abb3c77b2a8 1068.quad 0x81c2c92e47edaee6,0x92722c851482353b 1069.quad 0xa2bfe8a14cf10364,0xa81a664bbc423001 1070.quad 0xc24b8b70d0f89791,0xc76c51a30654be30 1071.quad 0xd192e819d6ef5218,0xd69906245565a910 1072.quad 0xf40e35855771202a,0x106aa07032bbd1b8 1073.quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53 1074.quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8 1075.quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb 1076.quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3 1077.quad 0x748f82ee5defb2fc,0x78a5636f43172f60 1078.quad 0x84c87814a1f0ab72,0x8cc702081a6439ec 1079.quad 0x90befffa23631e28,0xa4506cebde82bde9 1080.quad 0xbef9a3f7b2c67915,0xc67178f2e372532b 1081.quad 0xca273eceea26619c,0xd186b8c721c0c207 1082.quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178 1083.quad 0x06f067aa72176fba,0x0a637dc5a2c898a6 1084.quad 0x113f9804bef90dae,0x1b710b35131c471b 1085.quad 0x28db77f523047d84,0x32caab7b40c72493 1086.quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c 1087.quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a 1088.quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817 1089.quad 0 // terminator 1090.size .LK512,.-.LK512 1091.byte 83,72,65,53,49,50,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 1092.align 2 1093.align 2 1094.text 1095#ifndef __KERNEL__ 1096.type sha512_block_armv8,%function 1097.align 6 1098sha512_block_armv8: 1099.Lv8_entry: 1100 stp x29,x30,[sp,#-16]! 1101 add x29,sp,#0 1102 1103 ld1 {v16.16b,v17.16b,v18.16b,v19.16b},[x1],#64 // load input 1104 ld1 {v20.16b,v21.16b,v22.16b,v23.16b},[x1],#64 1105 1106 ld1 {v0.2d,v1.2d,v2.2d,v3.2d},[x0] // load context 1107 adrp x3,.LK512 1108 add x3,x3,:lo12:.LK512 1109 1110 rev64 v16.16b,v16.16b 1111 rev64 v17.16b,v17.16b 1112 rev64 v18.16b,v18.16b 1113 rev64 v19.16b,v19.16b 1114 rev64 v20.16b,v20.16b 1115 rev64 v21.16b,v21.16b 1116 rev64 v22.16b,v22.16b 1117 rev64 v23.16b,v23.16b 1118 b .Loop_hw 1119 1120.align 4 1121.Loop_hw: 1122 ld1 {v24.2d},[x3],#16 1123 subs x2,x2,#1 1124 sub x4,x1,#128 1125 orr v26.16b,v0.16b,v0.16b // offload 1126 orr v27.16b,v1.16b,v1.16b 1127 orr v28.16b,v2.16b,v2.16b 1128 orr v29.16b,v3.16b,v3.16b 1129 csel x1,x1,x4,ne // conditional rewind 1130 add v24.2d,v24.2d,v16.2d 1131 ld1 {v25.2d},[x3],#16 1132 ext v24.16b,v24.16b,v24.16b,#8 1133 ext v5.16b,v2.16b,v3.16b,#8 1134 ext v6.16b,v1.16b,v2.16b,#8 1135 add v3.2d,v3.2d,v24.2d // "T1 + H + K512[i]" 1136.inst 0xcec08230 //sha512su0 v16.16b,v17.16b 1137 ext v7.16b,v20.16b,v21.16b,#8 1138.inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b 1139.inst 0xce678af0 //sha512su1 v16.16b,v23.16b,v7.16b 1140 add v4.2d,v1.2d,v3.2d // "D + T1" 1141.inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b 1142 add v25.2d,v25.2d,v17.2d 1143 ld1 {v24.2d},[x3],#16 1144 ext v25.16b,v25.16b,v25.16b,#8 1145 ext v5.16b,v4.16b,v2.16b,#8 1146 ext v6.16b,v0.16b,v4.16b,#8 1147 add v2.2d,v2.2d,v25.2d // "T1 + H + K512[i]" 1148.inst 0xcec08251 //sha512su0 v17.16b,v18.16b 1149 ext v7.16b,v21.16b,v22.16b,#8 1150.inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b 1151.inst 0xce678a11 //sha512su1 v17.16b,v16.16b,v7.16b 1152 add v1.2d,v0.2d,v2.2d // "D + T1" 1153.inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b 1154 add v24.2d,v24.2d,v18.2d 1155 ld1 {v25.2d},[x3],#16 1156 ext v24.16b,v24.16b,v24.16b,#8 1157 ext v5.16b,v1.16b,v4.16b,#8 1158 ext v6.16b,v3.16b,v1.16b,#8 1159 add v4.2d,v4.2d,v24.2d // "T1 + H + K512[i]" 1160.inst 0xcec08272 //sha512su0 v18.16b,v19.16b 1161 ext v7.16b,v22.16b,v23.16b,#8 1162.inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b 1163.inst 0xce678a32 //sha512su1 v18.16b,v17.16b,v7.16b 1164 add v0.2d,v3.2d,v4.2d // "D + T1" 1165.inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b 1166 add v25.2d,v25.2d,v19.2d 1167 ld1 {v24.2d},[x3],#16 1168 ext v25.16b,v25.16b,v25.16b,#8 1169 ext v5.16b,v0.16b,v1.16b,#8 1170 ext v6.16b,v2.16b,v0.16b,#8 1171 add v1.2d,v1.2d,v25.2d // "T1 + H + K512[i]" 1172.inst 0xcec08293 //sha512su0 v19.16b,v20.16b 1173 ext v7.16b,v23.16b,v16.16b,#8 1174.inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b 1175.inst 0xce678a53 //sha512su1 v19.16b,v18.16b,v7.16b 1176 add v3.2d,v2.2d,v1.2d // "D + T1" 1177.inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b 1178 add v24.2d,v24.2d,v20.2d 1179 ld1 {v25.2d},[x3],#16 1180 ext v24.16b,v24.16b,v24.16b,#8 1181 ext v5.16b,v3.16b,v0.16b,#8 1182 ext v6.16b,v4.16b,v3.16b,#8 1183 add v0.2d,v0.2d,v24.2d // "T1 + H + K512[i]" 1184.inst 0xcec082b4 //sha512su0 v20.16b,v21.16b 1185 ext v7.16b,v16.16b,v17.16b,#8 1186.inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b 1187.inst 0xce678a74 //sha512su1 v20.16b,v19.16b,v7.16b 1188 add v2.2d,v4.2d,v0.2d // "D + T1" 1189.inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b 1190 add v25.2d,v25.2d,v21.2d 1191 ld1 {v24.2d},[x3],#16 1192 ext v25.16b,v25.16b,v25.16b,#8 1193 ext v5.16b,v2.16b,v3.16b,#8 1194 ext v6.16b,v1.16b,v2.16b,#8 1195 add v3.2d,v3.2d,v25.2d // "T1 + H + K512[i]" 1196.inst 0xcec082d5 //sha512su0 v21.16b,v22.16b 1197 ext v7.16b,v17.16b,v18.16b,#8 1198.inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b 1199.inst 0xce678a95 //sha512su1 v21.16b,v20.16b,v7.16b 1200 add v4.2d,v1.2d,v3.2d // "D + T1" 1201.inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b 1202 add v24.2d,v24.2d,v22.2d 1203 ld1 {v25.2d},[x3],#16 1204 ext v24.16b,v24.16b,v24.16b,#8 1205 ext v5.16b,v4.16b,v2.16b,#8 1206 ext v6.16b,v0.16b,v4.16b,#8 1207 add v2.2d,v2.2d,v24.2d // "T1 + H + K512[i]" 1208.inst 0xcec082f6 //sha512su0 v22.16b,v23.16b 1209 ext v7.16b,v18.16b,v19.16b,#8 1210.inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b 1211.inst 0xce678ab6 //sha512su1 v22.16b,v21.16b,v7.16b 1212 add v1.2d,v0.2d,v2.2d // "D + T1" 1213.inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b 1214 add v25.2d,v25.2d,v23.2d 1215 ld1 {v24.2d},[x3],#16 1216 ext v25.16b,v25.16b,v25.16b,#8 1217 ext v5.16b,v1.16b,v4.16b,#8 1218 ext v6.16b,v3.16b,v1.16b,#8 1219 add v4.2d,v4.2d,v25.2d // "T1 + H + K512[i]" 1220.inst 0xcec08217 //sha512su0 v23.16b,v16.16b 1221 ext v7.16b,v19.16b,v20.16b,#8 1222.inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b 1223.inst 0xce678ad7 //sha512su1 v23.16b,v22.16b,v7.16b 1224 add v0.2d,v3.2d,v4.2d // "D + T1" 1225.inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b 1226 add v24.2d,v24.2d,v16.2d 1227 ld1 {v25.2d},[x3],#16 1228 ext v24.16b,v24.16b,v24.16b,#8 1229 ext v5.16b,v0.16b,v1.16b,#8 1230 ext v6.16b,v2.16b,v0.16b,#8 1231 add v1.2d,v1.2d,v24.2d // "T1 + H + K512[i]" 1232.inst 0xcec08230 //sha512su0 v16.16b,v17.16b 1233 ext v7.16b,v20.16b,v21.16b,#8 1234.inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b 1235.inst 0xce678af0 //sha512su1 v16.16b,v23.16b,v7.16b 1236 add v3.2d,v2.2d,v1.2d // "D + T1" 1237.inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b 1238 add v25.2d,v25.2d,v17.2d 1239 ld1 {v24.2d},[x3],#16 1240 ext v25.16b,v25.16b,v25.16b,#8 1241 ext v5.16b,v3.16b,v0.16b,#8 1242 ext v6.16b,v4.16b,v3.16b,#8 1243 add v0.2d,v0.2d,v25.2d // "T1 + H + K512[i]" 1244.inst 0xcec08251 //sha512su0 v17.16b,v18.16b 1245 ext v7.16b,v21.16b,v22.16b,#8 1246.inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b 1247.inst 0xce678a11 //sha512su1 v17.16b,v16.16b,v7.16b 1248 add v2.2d,v4.2d,v0.2d // "D + T1" 1249.inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b 1250 add v24.2d,v24.2d,v18.2d 1251 ld1 {v25.2d},[x3],#16 1252 ext v24.16b,v24.16b,v24.16b,#8 1253 ext v5.16b,v2.16b,v3.16b,#8 1254 ext v6.16b,v1.16b,v2.16b,#8 1255 add v3.2d,v3.2d,v24.2d // "T1 + H + K512[i]" 1256.inst 0xcec08272 //sha512su0 v18.16b,v19.16b 1257 ext v7.16b,v22.16b,v23.16b,#8 1258.inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b 1259.inst 0xce678a32 //sha512su1 v18.16b,v17.16b,v7.16b 1260 add v4.2d,v1.2d,v3.2d // "D + T1" 1261.inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b 1262 add v25.2d,v25.2d,v19.2d 1263 ld1 {v24.2d},[x3],#16 1264 ext v25.16b,v25.16b,v25.16b,#8 1265 ext v5.16b,v4.16b,v2.16b,#8 1266 ext v6.16b,v0.16b,v4.16b,#8 1267 add v2.2d,v2.2d,v25.2d // "T1 + H + K512[i]" 1268.inst 0xcec08293 //sha512su0 v19.16b,v20.16b 1269 ext v7.16b,v23.16b,v16.16b,#8 1270.inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b 1271.inst 0xce678a53 //sha512su1 v19.16b,v18.16b,v7.16b 1272 add v1.2d,v0.2d,v2.2d // "D + T1" 1273.inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b 1274 add v24.2d,v24.2d,v20.2d 1275 ld1 {v25.2d},[x3],#16 1276 ext v24.16b,v24.16b,v24.16b,#8 1277 ext v5.16b,v1.16b,v4.16b,#8 1278 ext v6.16b,v3.16b,v1.16b,#8 1279 add v4.2d,v4.2d,v24.2d // "T1 + H + K512[i]" 1280.inst 0xcec082b4 //sha512su0 v20.16b,v21.16b 1281 ext v7.16b,v16.16b,v17.16b,#8 1282.inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b 1283.inst 0xce678a74 //sha512su1 v20.16b,v19.16b,v7.16b 1284 add v0.2d,v3.2d,v4.2d // "D + T1" 1285.inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b 1286 add v25.2d,v25.2d,v21.2d 1287 ld1 {v24.2d},[x3],#16 1288 ext v25.16b,v25.16b,v25.16b,#8 1289 ext v5.16b,v0.16b,v1.16b,#8 1290 ext v6.16b,v2.16b,v0.16b,#8 1291 add v1.2d,v1.2d,v25.2d // "T1 + H + K512[i]" 1292.inst 0xcec082d5 //sha512su0 v21.16b,v22.16b 1293 ext v7.16b,v17.16b,v18.16b,#8 1294.inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b 1295.inst 0xce678a95 //sha512su1 v21.16b,v20.16b,v7.16b 1296 add v3.2d,v2.2d,v1.2d // "D + T1" 1297.inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b 1298 add v24.2d,v24.2d,v22.2d 1299 ld1 {v25.2d},[x3],#16 1300 ext v24.16b,v24.16b,v24.16b,#8 1301 ext v5.16b,v3.16b,v0.16b,#8 1302 ext v6.16b,v4.16b,v3.16b,#8 1303 add v0.2d,v0.2d,v24.2d // "T1 + H + K512[i]" 1304.inst 0xcec082f6 //sha512su0 v22.16b,v23.16b 1305 ext v7.16b,v18.16b,v19.16b,#8 1306.inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b 1307.inst 0xce678ab6 //sha512su1 v22.16b,v21.16b,v7.16b 1308 add v2.2d,v4.2d,v0.2d // "D + T1" 1309.inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b 1310 add v25.2d,v25.2d,v23.2d 1311 ld1 {v24.2d},[x3],#16 1312 ext v25.16b,v25.16b,v25.16b,#8 1313 ext v5.16b,v2.16b,v3.16b,#8 1314 ext v6.16b,v1.16b,v2.16b,#8 1315 add v3.2d,v3.2d,v25.2d // "T1 + H + K512[i]" 1316.inst 0xcec08217 //sha512su0 v23.16b,v16.16b 1317 ext v7.16b,v19.16b,v20.16b,#8 1318.inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b 1319.inst 0xce678ad7 //sha512su1 v23.16b,v22.16b,v7.16b 1320 add v4.2d,v1.2d,v3.2d // "D + T1" 1321.inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b 1322 add v24.2d,v24.2d,v16.2d 1323 ld1 {v25.2d},[x3],#16 1324 ext v24.16b,v24.16b,v24.16b,#8 1325 ext v5.16b,v4.16b,v2.16b,#8 1326 ext v6.16b,v0.16b,v4.16b,#8 1327 add v2.2d,v2.2d,v24.2d // "T1 + H + K512[i]" 1328.inst 0xcec08230 //sha512su0 v16.16b,v17.16b 1329 ext v7.16b,v20.16b,v21.16b,#8 1330.inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b 1331.inst 0xce678af0 //sha512su1 v16.16b,v23.16b,v7.16b 1332 add v1.2d,v0.2d,v2.2d // "D + T1" 1333.inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b 1334 add v25.2d,v25.2d,v17.2d 1335 ld1 {v24.2d},[x3],#16 1336 ext v25.16b,v25.16b,v25.16b,#8 1337 ext v5.16b,v1.16b,v4.16b,#8 1338 ext v6.16b,v3.16b,v1.16b,#8 1339 add v4.2d,v4.2d,v25.2d // "T1 + H + K512[i]" 1340.inst 0xcec08251 //sha512su0 v17.16b,v18.16b 1341 ext v7.16b,v21.16b,v22.16b,#8 1342.inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b 1343.inst 0xce678a11 //sha512su1 v17.16b,v16.16b,v7.16b 1344 add v0.2d,v3.2d,v4.2d // "D + T1" 1345.inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b 1346 add v24.2d,v24.2d,v18.2d 1347 ld1 {v25.2d},[x3],#16 1348 ext v24.16b,v24.16b,v24.16b,#8 1349 ext v5.16b,v0.16b,v1.16b,#8 1350 ext v6.16b,v2.16b,v0.16b,#8 1351 add v1.2d,v1.2d,v24.2d // "T1 + H + K512[i]" 1352.inst 0xcec08272 //sha512su0 v18.16b,v19.16b 1353 ext v7.16b,v22.16b,v23.16b,#8 1354.inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b 1355.inst 0xce678a32 //sha512su1 v18.16b,v17.16b,v7.16b 1356 add v3.2d,v2.2d,v1.2d // "D + T1" 1357.inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b 1358 add v25.2d,v25.2d,v19.2d 1359 ld1 {v24.2d},[x3],#16 1360 ext v25.16b,v25.16b,v25.16b,#8 1361 ext v5.16b,v3.16b,v0.16b,#8 1362 ext v6.16b,v4.16b,v3.16b,#8 1363 add v0.2d,v0.2d,v25.2d // "T1 + H + K512[i]" 1364.inst 0xcec08293 //sha512su0 v19.16b,v20.16b 1365 ext v7.16b,v23.16b,v16.16b,#8 1366.inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b 1367.inst 0xce678a53 //sha512su1 v19.16b,v18.16b,v7.16b 1368 add v2.2d,v4.2d,v0.2d // "D + T1" 1369.inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b 1370 add v24.2d,v24.2d,v20.2d 1371 ld1 {v25.2d},[x3],#16 1372 ext v24.16b,v24.16b,v24.16b,#8 1373 ext v5.16b,v2.16b,v3.16b,#8 1374 ext v6.16b,v1.16b,v2.16b,#8 1375 add v3.2d,v3.2d,v24.2d // "T1 + H + K512[i]" 1376.inst 0xcec082b4 //sha512su0 v20.16b,v21.16b 1377 ext v7.16b,v16.16b,v17.16b,#8 1378.inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b 1379.inst 0xce678a74 //sha512su1 v20.16b,v19.16b,v7.16b 1380 add v4.2d,v1.2d,v3.2d // "D + T1" 1381.inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b 1382 add v25.2d,v25.2d,v21.2d 1383 ld1 {v24.2d},[x3],#16 1384 ext v25.16b,v25.16b,v25.16b,#8 1385 ext v5.16b,v4.16b,v2.16b,#8 1386 ext v6.16b,v0.16b,v4.16b,#8 1387 add v2.2d,v2.2d,v25.2d // "T1 + H + K512[i]" 1388.inst 0xcec082d5 //sha512su0 v21.16b,v22.16b 1389 ext v7.16b,v17.16b,v18.16b,#8 1390.inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b 1391.inst 0xce678a95 //sha512su1 v21.16b,v20.16b,v7.16b 1392 add v1.2d,v0.2d,v2.2d // "D + T1" 1393.inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b 1394 add v24.2d,v24.2d,v22.2d 1395 ld1 {v25.2d},[x3],#16 1396 ext v24.16b,v24.16b,v24.16b,#8 1397 ext v5.16b,v1.16b,v4.16b,#8 1398 ext v6.16b,v3.16b,v1.16b,#8 1399 add v4.2d,v4.2d,v24.2d // "T1 + H + K512[i]" 1400.inst 0xcec082f6 //sha512su0 v22.16b,v23.16b 1401 ext v7.16b,v18.16b,v19.16b,#8 1402.inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b 1403.inst 0xce678ab6 //sha512su1 v22.16b,v21.16b,v7.16b 1404 add v0.2d,v3.2d,v4.2d // "D + T1" 1405.inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b 1406 add v25.2d,v25.2d,v23.2d 1407 ld1 {v24.2d},[x3],#16 1408 ext v25.16b,v25.16b,v25.16b,#8 1409 ext v5.16b,v0.16b,v1.16b,#8 1410 ext v6.16b,v2.16b,v0.16b,#8 1411 add v1.2d,v1.2d,v25.2d // "T1 + H + K512[i]" 1412.inst 0xcec08217 //sha512su0 v23.16b,v16.16b 1413 ext v7.16b,v19.16b,v20.16b,#8 1414.inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b 1415.inst 0xce678ad7 //sha512su1 v23.16b,v22.16b,v7.16b 1416 add v3.2d,v2.2d,v1.2d // "D + T1" 1417.inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b 1418 add v24.2d,v24.2d,v16.2d 1419 ld1 {v25.2d},[x3],#16 1420 ext v24.16b,v24.16b,v24.16b,#8 1421 ext v5.16b,v3.16b,v0.16b,#8 1422 ext v6.16b,v4.16b,v3.16b,#8 1423 add v0.2d,v0.2d,v24.2d // "T1 + H + K512[i]" 1424.inst 0xcec08230 //sha512su0 v16.16b,v17.16b 1425 ext v7.16b,v20.16b,v21.16b,#8 1426.inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b 1427.inst 0xce678af0 //sha512su1 v16.16b,v23.16b,v7.16b 1428 add v2.2d,v4.2d,v0.2d // "D + T1" 1429.inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b 1430 add v25.2d,v25.2d,v17.2d 1431 ld1 {v24.2d},[x3],#16 1432 ext v25.16b,v25.16b,v25.16b,#8 1433 ext v5.16b,v2.16b,v3.16b,#8 1434 ext v6.16b,v1.16b,v2.16b,#8 1435 add v3.2d,v3.2d,v25.2d // "T1 + H + K512[i]" 1436.inst 0xcec08251 //sha512su0 v17.16b,v18.16b 1437 ext v7.16b,v21.16b,v22.16b,#8 1438.inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b 1439.inst 0xce678a11 //sha512su1 v17.16b,v16.16b,v7.16b 1440 add v4.2d,v1.2d,v3.2d // "D + T1" 1441.inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b 1442 add v24.2d,v24.2d,v18.2d 1443 ld1 {v25.2d},[x3],#16 1444 ext v24.16b,v24.16b,v24.16b,#8 1445 ext v5.16b,v4.16b,v2.16b,#8 1446 ext v6.16b,v0.16b,v4.16b,#8 1447 add v2.2d,v2.2d,v24.2d // "T1 + H + K512[i]" 1448.inst 0xcec08272 //sha512su0 v18.16b,v19.16b 1449 ext v7.16b,v22.16b,v23.16b,#8 1450.inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b 1451.inst 0xce678a32 //sha512su1 v18.16b,v17.16b,v7.16b 1452 add v1.2d,v0.2d,v2.2d // "D + T1" 1453.inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b 1454 add v25.2d,v25.2d,v19.2d 1455 ld1 {v24.2d},[x3],#16 1456 ext v25.16b,v25.16b,v25.16b,#8 1457 ext v5.16b,v1.16b,v4.16b,#8 1458 ext v6.16b,v3.16b,v1.16b,#8 1459 add v4.2d,v4.2d,v25.2d // "T1 + H + K512[i]" 1460.inst 0xcec08293 //sha512su0 v19.16b,v20.16b 1461 ext v7.16b,v23.16b,v16.16b,#8 1462.inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b 1463.inst 0xce678a53 //sha512su1 v19.16b,v18.16b,v7.16b 1464 add v0.2d,v3.2d,v4.2d // "D + T1" 1465.inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b 1466 add v24.2d,v24.2d,v20.2d 1467 ld1 {v25.2d},[x3],#16 1468 ext v24.16b,v24.16b,v24.16b,#8 1469 ext v5.16b,v0.16b,v1.16b,#8 1470 ext v6.16b,v2.16b,v0.16b,#8 1471 add v1.2d,v1.2d,v24.2d // "T1 + H + K512[i]" 1472.inst 0xcec082b4 //sha512su0 v20.16b,v21.16b 1473 ext v7.16b,v16.16b,v17.16b,#8 1474.inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b 1475.inst 0xce678a74 //sha512su1 v20.16b,v19.16b,v7.16b 1476 add v3.2d,v2.2d,v1.2d // "D + T1" 1477.inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b 1478 add v25.2d,v25.2d,v21.2d 1479 ld1 {v24.2d},[x3],#16 1480 ext v25.16b,v25.16b,v25.16b,#8 1481 ext v5.16b,v3.16b,v0.16b,#8 1482 ext v6.16b,v4.16b,v3.16b,#8 1483 add v0.2d,v0.2d,v25.2d // "T1 + H + K512[i]" 1484.inst 0xcec082d5 //sha512su0 v21.16b,v22.16b 1485 ext v7.16b,v17.16b,v18.16b,#8 1486.inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b 1487.inst 0xce678a95 //sha512su1 v21.16b,v20.16b,v7.16b 1488 add v2.2d,v4.2d,v0.2d // "D + T1" 1489.inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b 1490 add v24.2d,v24.2d,v22.2d 1491 ld1 {v25.2d},[x3],#16 1492 ext v24.16b,v24.16b,v24.16b,#8 1493 ext v5.16b,v2.16b,v3.16b,#8 1494 ext v6.16b,v1.16b,v2.16b,#8 1495 add v3.2d,v3.2d,v24.2d // "T1 + H + K512[i]" 1496.inst 0xcec082f6 //sha512su0 v22.16b,v23.16b 1497 ext v7.16b,v18.16b,v19.16b,#8 1498.inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b 1499.inst 0xce678ab6 //sha512su1 v22.16b,v21.16b,v7.16b 1500 add v4.2d,v1.2d,v3.2d // "D + T1" 1501.inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b 1502 add v25.2d,v25.2d,v23.2d 1503 ld1 {v24.2d},[x3],#16 1504 ext v25.16b,v25.16b,v25.16b,#8 1505 ext v5.16b,v4.16b,v2.16b,#8 1506 ext v6.16b,v0.16b,v4.16b,#8 1507 add v2.2d,v2.2d,v25.2d // "T1 + H + K512[i]" 1508.inst 0xcec08217 //sha512su0 v23.16b,v16.16b 1509 ext v7.16b,v19.16b,v20.16b,#8 1510.inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b 1511.inst 0xce678ad7 //sha512su1 v23.16b,v22.16b,v7.16b 1512 add v1.2d,v0.2d,v2.2d // "D + T1" 1513.inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b 1514 ld1 {v25.2d},[x3],#16 1515 add v24.2d,v24.2d,v16.2d 1516 ld1 {v16.16b},[x1],#16 // load next input 1517 ext v24.16b,v24.16b,v24.16b,#8 1518 ext v5.16b,v1.16b,v4.16b,#8 1519 ext v6.16b,v3.16b,v1.16b,#8 1520 add v4.2d,v4.2d,v24.2d // "T1 + H + K512[i]" 1521.inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b 1522 rev64 v16.16b,v16.16b 1523 add v0.2d,v3.2d,v4.2d // "D + T1" 1524.inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b 1525 ld1 {v24.2d},[x3],#16 1526 add v25.2d,v25.2d,v17.2d 1527 ld1 {v17.16b},[x1],#16 // load next input 1528 ext v25.16b,v25.16b,v25.16b,#8 1529 ext v5.16b,v0.16b,v1.16b,#8 1530 ext v6.16b,v2.16b,v0.16b,#8 1531 add v1.2d,v1.2d,v25.2d // "T1 + H + K512[i]" 1532.inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b 1533 rev64 v17.16b,v17.16b 1534 add v3.2d,v2.2d,v1.2d // "D + T1" 1535.inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b 1536 ld1 {v25.2d},[x3],#16 1537 add v24.2d,v24.2d,v18.2d 1538 ld1 {v18.16b},[x1],#16 // load next input 1539 ext v24.16b,v24.16b,v24.16b,#8 1540 ext v5.16b,v3.16b,v0.16b,#8 1541 ext v6.16b,v4.16b,v3.16b,#8 1542 add v0.2d,v0.2d,v24.2d // "T1 + H + K512[i]" 1543.inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b 1544 rev64 v18.16b,v18.16b 1545 add v2.2d,v4.2d,v0.2d // "D + T1" 1546.inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b 1547 ld1 {v24.2d},[x3],#16 1548 add v25.2d,v25.2d,v19.2d 1549 ld1 {v19.16b},[x1],#16 // load next input 1550 ext v25.16b,v25.16b,v25.16b,#8 1551 ext v5.16b,v2.16b,v3.16b,#8 1552 ext v6.16b,v1.16b,v2.16b,#8 1553 add v3.2d,v3.2d,v25.2d // "T1 + H + K512[i]" 1554.inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b 1555 rev64 v19.16b,v19.16b 1556 add v4.2d,v1.2d,v3.2d // "D + T1" 1557.inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b 1558 ld1 {v25.2d},[x3],#16 1559 add v24.2d,v24.2d,v20.2d 1560 ld1 {v20.16b},[x1],#16 // load next input 1561 ext v24.16b,v24.16b,v24.16b,#8 1562 ext v5.16b,v4.16b,v2.16b,#8 1563 ext v6.16b,v0.16b,v4.16b,#8 1564 add v2.2d,v2.2d,v24.2d // "T1 + H + K512[i]" 1565.inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b 1566 rev64 v20.16b,v20.16b 1567 add v1.2d,v0.2d,v2.2d // "D + T1" 1568.inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b 1569 ld1 {v24.2d},[x3],#16 1570 add v25.2d,v25.2d,v21.2d 1571 ld1 {v21.16b},[x1],#16 // load next input 1572 ext v25.16b,v25.16b,v25.16b,#8 1573 ext v5.16b,v1.16b,v4.16b,#8 1574 ext v6.16b,v3.16b,v1.16b,#8 1575 add v4.2d,v4.2d,v25.2d // "T1 + H + K512[i]" 1576.inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b 1577 rev64 v21.16b,v21.16b 1578 add v0.2d,v3.2d,v4.2d // "D + T1" 1579.inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b 1580 ld1 {v25.2d},[x3],#16 1581 add v24.2d,v24.2d,v22.2d 1582 ld1 {v22.16b},[x1],#16 // load next input 1583 ext v24.16b,v24.16b,v24.16b,#8 1584 ext v5.16b,v0.16b,v1.16b,#8 1585 ext v6.16b,v2.16b,v0.16b,#8 1586 add v1.2d,v1.2d,v24.2d // "T1 + H + K512[i]" 1587.inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b 1588 rev64 v22.16b,v22.16b 1589 add v3.2d,v2.2d,v1.2d // "D + T1" 1590.inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b 1591 sub x3,x3,#80*8 // rewind 1592 add v25.2d,v25.2d,v23.2d 1593 ld1 {v23.16b},[x1],#16 // load next input 1594 ext v25.16b,v25.16b,v25.16b,#8 1595 ext v5.16b,v3.16b,v0.16b,#8 1596 ext v6.16b,v4.16b,v3.16b,#8 1597 add v0.2d,v0.2d,v25.2d // "T1 + H + K512[i]" 1598.inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b 1599 rev64 v23.16b,v23.16b 1600 add v2.2d,v4.2d,v0.2d // "D + T1" 1601.inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b 1602 add v0.2d,v0.2d,v26.2d // accumulate 1603 add v1.2d,v1.2d,v27.2d 1604 add v2.2d,v2.2d,v28.2d 1605 add v3.2d,v3.2d,v29.2d 1606 1607 cbnz x2,.Loop_hw 1608 1609 st1 {v0.2d,v1.2d,v2.2d,v3.2d},[x0] // store context 1610 1611 ldr x29,[sp],#16 1612 ret 1613.size sha512_block_armv8,.-sha512_block_armv8 1614#endif 1615#endif 1616#endif // !OPENSSL_NO_ASM 1617.section .note.GNU-stack,"",%progbits 1618