1// This file is generated from a similarly-named Perl script in the BoringSSL 2// source tree. Do not edit by hand. 3 4#if !defined(__has_feature) 5#define __has_feature(x) 0 6#endif 7#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM) 8#define OPENSSL_NO_ASM 9#endif 10 11#if !defined(OPENSSL_NO_ASM) 12#if defined(__aarch64__) 13#if defined(BORINGSSL_PREFIX) 14#include <boringssl_prefix_symbols_asm.h> 15#endif 16#include <openssl/arm_arch.h> 17 18#if __ARM_MAX_ARCH__>=7 19.text 20.arch armv8-a+crypto 21.section .rodata 22.align 5 23Lrcon: 24.long 0x01,0x01,0x01,0x01 25.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d // rotate-n-splat 26.long 0x1b,0x1b,0x1b,0x1b 27 28.text 29 30.globl aes_hw_set_encrypt_key 31 32.def aes_hw_set_encrypt_key 33 .type 32 34.endef 35.align 5 36aes_hw_set_encrypt_key: 37Lenc_key: 38 // Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later. 39 AARCH64_VALID_CALL_TARGET 40 stp x29,x30,[sp,#-16]! 41 add x29,sp,#0 42 mov x3,#-1 43 cmp x0,#0 44 b.eq Lenc_key_abort 45 cmp x2,#0 46 b.eq Lenc_key_abort 47 mov x3,#-2 48 cmp w1,#128 49 b.lt Lenc_key_abort 50 cmp w1,#256 51 b.gt Lenc_key_abort 52 tst w1,#0x3f 53 b.ne Lenc_key_abort 54 55 adrp x3,Lrcon 56 add x3,x3,:lo12:Lrcon 57 cmp w1,#192 58 59 eor v0.16b,v0.16b,v0.16b 60 ld1 {v3.16b},[x0],#16 61 mov w1,#8 // reuse w1 62 ld1 {v1.4s,v2.4s},[x3],#32 63 64 b.lt Loop128 65 b.eq L192 66 b L256 67 68.align 4 69Loop128: 70 tbl v6.16b,{v3.16b},v2.16b 71 ext v5.16b,v0.16b,v3.16b,#12 72 st1 {v3.4s},[x2],#16 73 aese v6.16b,v0.16b 74 subs w1,w1,#1 75 76 eor v3.16b,v3.16b,v5.16b 77 ext v5.16b,v0.16b,v5.16b,#12 78 eor v3.16b,v3.16b,v5.16b 79 ext v5.16b,v0.16b,v5.16b,#12 80 eor v6.16b,v6.16b,v1.16b 81 eor v3.16b,v3.16b,v5.16b 82 shl v1.16b,v1.16b,#1 83 eor v3.16b,v3.16b,v6.16b 84 b.ne Loop128 85 86 ld1 {v1.4s},[x3] 87 88 tbl v6.16b,{v3.16b},v2.16b 89 ext v5.16b,v0.16b,v3.16b,#12 90 st1 {v3.4s},[x2],#16 91 aese v6.16b,v0.16b 92 93 eor v3.16b,v3.16b,v5.16b 94 ext v5.16b,v0.16b,v5.16b,#12 95 eor v3.16b,v3.16b,v5.16b 96 ext v5.16b,v0.16b,v5.16b,#12 97 eor v6.16b,v6.16b,v1.16b 98 eor v3.16b,v3.16b,v5.16b 99 shl v1.16b,v1.16b,#1 100 eor v3.16b,v3.16b,v6.16b 101 102 tbl v6.16b,{v3.16b},v2.16b 103 ext v5.16b,v0.16b,v3.16b,#12 104 st1 {v3.4s},[x2],#16 105 aese v6.16b,v0.16b 106 107 eor v3.16b,v3.16b,v5.16b 108 ext v5.16b,v0.16b,v5.16b,#12 109 eor v3.16b,v3.16b,v5.16b 110 ext v5.16b,v0.16b,v5.16b,#12 111 eor v6.16b,v6.16b,v1.16b 112 eor v3.16b,v3.16b,v5.16b 113 eor v3.16b,v3.16b,v6.16b 114 st1 {v3.4s},[x2] 115 add x2,x2,#0x50 116 117 mov w12,#10 118 b Ldone 119 120.align 4 121L192: 122 ld1 {v4.8b},[x0],#8 123 movi v6.16b,#8 // borrow v6.16b 124 st1 {v3.4s},[x2],#16 125 sub v2.16b,v2.16b,v6.16b // adjust the mask 126 127Loop192: 128 tbl v6.16b,{v4.16b},v2.16b 129 ext v5.16b,v0.16b,v3.16b,#12 130 st1 {v4.8b},[x2],#8 131 aese v6.16b,v0.16b 132 subs w1,w1,#1 133 134 eor v3.16b,v3.16b,v5.16b 135 ext v5.16b,v0.16b,v5.16b,#12 136 eor v3.16b,v3.16b,v5.16b 137 ext v5.16b,v0.16b,v5.16b,#12 138 eor v3.16b,v3.16b,v5.16b 139 140 dup v5.4s,v3.s[3] 141 eor v5.16b,v5.16b,v4.16b 142 eor v6.16b,v6.16b,v1.16b 143 ext v4.16b,v0.16b,v4.16b,#12 144 shl v1.16b,v1.16b,#1 145 eor v4.16b,v4.16b,v5.16b 146 eor v3.16b,v3.16b,v6.16b 147 eor v4.16b,v4.16b,v6.16b 148 st1 {v3.4s},[x2],#16 149 b.ne Loop192 150 151 mov w12,#12 152 add x2,x2,#0x20 153 b Ldone 154 155.align 4 156L256: 157 ld1 {v4.16b},[x0] 158 mov w1,#7 159 mov w12,#14 160 st1 {v3.4s},[x2],#16 161 162Loop256: 163 tbl v6.16b,{v4.16b},v2.16b 164 ext v5.16b,v0.16b,v3.16b,#12 165 st1 {v4.4s},[x2],#16 166 aese v6.16b,v0.16b 167 subs w1,w1,#1 168 169 eor v3.16b,v3.16b,v5.16b 170 ext v5.16b,v0.16b,v5.16b,#12 171 eor v3.16b,v3.16b,v5.16b 172 ext v5.16b,v0.16b,v5.16b,#12 173 eor v6.16b,v6.16b,v1.16b 174 eor v3.16b,v3.16b,v5.16b 175 shl v1.16b,v1.16b,#1 176 eor v3.16b,v3.16b,v6.16b 177 st1 {v3.4s},[x2],#16 178 b.eq Ldone 179 180 dup v6.4s,v3.s[3] // just splat 181 ext v5.16b,v0.16b,v4.16b,#12 182 aese v6.16b,v0.16b 183 184 eor v4.16b,v4.16b,v5.16b 185 ext v5.16b,v0.16b,v5.16b,#12 186 eor v4.16b,v4.16b,v5.16b 187 ext v5.16b,v0.16b,v5.16b,#12 188 eor v4.16b,v4.16b,v5.16b 189 190 eor v4.16b,v4.16b,v6.16b 191 b Loop256 192 193Ldone: 194 str w12,[x2] 195 mov x3,#0 196 197Lenc_key_abort: 198 mov x0,x3 // return value 199 ldr x29,[sp],#16 200 ret 201 202 203.globl aes_hw_set_decrypt_key 204 205.def aes_hw_set_decrypt_key 206 .type 32 207.endef 208.align 5 209aes_hw_set_decrypt_key: 210 AARCH64_SIGN_LINK_REGISTER 211 stp x29,x30,[sp,#-16]! 212 add x29,sp,#0 213 bl Lenc_key 214 215 cmp x0,#0 216 b.ne Ldec_key_abort 217 218 sub x2,x2,#240 // restore original x2 219 mov x4,#-16 220 add x0,x2,x12,lsl#4 // end of key schedule 221 222 ld1 {v0.4s},[x2] 223 ld1 {v1.4s},[x0] 224 st1 {v0.4s},[x0],x4 225 st1 {v1.4s},[x2],#16 226 227Loop_imc: 228 ld1 {v0.4s},[x2] 229 ld1 {v1.4s},[x0] 230 aesimc v0.16b,v0.16b 231 aesimc v1.16b,v1.16b 232 st1 {v0.4s},[x0],x4 233 st1 {v1.4s},[x2],#16 234 cmp x0,x2 235 b.hi Loop_imc 236 237 ld1 {v0.4s},[x2] 238 aesimc v0.16b,v0.16b 239 st1 {v0.4s},[x0] 240 241 eor x0,x0,x0 // return value 242Ldec_key_abort: 243 ldp x29,x30,[sp],#16 244 AARCH64_VALIDATE_LINK_REGISTER 245 ret 246 247.globl aes_hw_encrypt 248 249.def aes_hw_encrypt 250 .type 32 251.endef 252.align 5 253aes_hw_encrypt: 254 AARCH64_VALID_CALL_TARGET 255 ldr w3,[x2,#240] 256 ld1 {v0.4s},[x2],#16 257 ld1 {v2.16b},[x0] 258 sub w3,w3,#2 259 ld1 {v1.4s},[x2],#16 260 261Loop_enc: 262 aese v2.16b,v0.16b 263 aesmc v2.16b,v2.16b 264 ld1 {v0.4s},[x2],#16 265 subs w3,w3,#2 266 aese v2.16b,v1.16b 267 aesmc v2.16b,v2.16b 268 ld1 {v1.4s},[x2],#16 269 b.gt Loop_enc 270 271 aese v2.16b,v0.16b 272 aesmc v2.16b,v2.16b 273 ld1 {v0.4s},[x2] 274 aese v2.16b,v1.16b 275 eor v2.16b,v2.16b,v0.16b 276 277 st1 {v2.16b},[x1] 278 ret 279 280.globl aes_hw_decrypt 281 282.def aes_hw_decrypt 283 .type 32 284.endef 285.align 5 286aes_hw_decrypt: 287 AARCH64_VALID_CALL_TARGET 288 ldr w3,[x2,#240] 289 ld1 {v0.4s},[x2],#16 290 ld1 {v2.16b},[x0] 291 sub w3,w3,#2 292 ld1 {v1.4s},[x2],#16 293 294Loop_dec: 295 aesd v2.16b,v0.16b 296 aesimc v2.16b,v2.16b 297 ld1 {v0.4s},[x2],#16 298 subs w3,w3,#2 299 aesd v2.16b,v1.16b 300 aesimc v2.16b,v2.16b 301 ld1 {v1.4s},[x2],#16 302 b.gt Loop_dec 303 304 aesd v2.16b,v0.16b 305 aesimc v2.16b,v2.16b 306 ld1 {v0.4s},[x2] 307 aesd v2.16b,v1.16b 308 eor v2.16b,v2.16b,v0.16b 309 310 st1 {v2.16b},[x1] 311 ret 312 313.globl aes_hw_cbc_encrypt 314 315.def aes_hw_cbc_encrypt 316 .type 32 317.endef 318.align 5 319aes_hw_cbc_encrypt: 320 // Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later. 321 AARCH64_VALID_CALL_TARGET 322 stp x29,x30,[sp,#-16]! 323 add x29,sp,#0 324 subs x2,x2,#16 325 mov x8,#16 326 b.lo Lcbc_abort 327 csel x8,xzr,x8,eq 328 329 cmp w5,#0 // en- or decrypting? 330 ldr w5,[x3,#240] 331 and x2,x2,#-16 332 ld1 {v6.16b},[x4] 333 ld1 {v0.16b},[x0],x8 334 335 ld1 {v16.4s,v17.4s},[x3] // load key schedule... 336 sub w5,w5,#6 337 add x7,x3,x5,lsl#4 // pointer to last 7 round keys 338 sub w5,w5,#2 339 ld1 {v18.4s,v19.4s},[x7],#32 340 ld1 {v20.4s,v21.4s},[x7],#32 341 ld1 {v22.4s,v23.4s},[x7],#32 342 ld1 {v7.4s},[x7] 343 344 add x7,x3,#32 345 mov w6,w5 346 b.eq Lcbc_dec 347 348 cmp w5,#2 349 eor v0.16b,v0.16b,v6.16b 350 eor v5.16b,v16.16b,v7.16b 351 b.eq Lcbc_enc128 352 353 ld1 {v2.4s,v3.4s},[x7] 354 add x7,x3,#16 355 add x6,x3,#16*4 356 add x12,x3,#16*5 357 aese v0.16b,v16.16b 358 aesmc v0.16b,v0.16b 359 add x14,x3,#16*6 360 add x3,x3,#16*7 361 b Lenter_cbc_enc 362 363.align 4 364Loop_cbc_enc: 365 aese v0.16b,v16.16b 366 aesmc v0.16b,v0.16b 367 st1 {v6.16b},[x1],#16 368Lenter_cbc_enc: 369 aese v0.16b,v17.16b 370 aesmc v0.16b,v0.16b 371 aese v0.16b,v2.16b 372 aesmc v0.16b,v0.16b 373 ld1 {v16.4s},[x6] 374 cmp w5,#4 375 aese v0.16b,v3.16b 376 aesmc v0.16b,v0.16b 377 ld1 {v17.4s},[x12] 378 b.eq Lcbc_enc192 379 380 aese v0.16b,v16.16b 381 aesmc v0.16b,v0.16b 382 ld1 {v16.4s},[x14] 383 aese v0.16b,v17.16b 384 aesmc v0.16b,v0.16b 385 ld1 {v17.4s},[x3] 386 nop 387 388Lcbc_enc192: 389 aese v0.16b,v16.16b 390 aesmc v0.16b,v0.16b 391 subs x2,x2,#16 392 aese v0.16b,v17.16b 393 aesmc v0.16b,v0.16b 394 csel x8,xzr,x8,eq 395 aese v0.16b,v18.16b 396 aesmc v0.16b,v0.16b 397 aese v0.16b,v19.16b 398 aesmc v0.16b,v0.16b 399 ld1 {v16.16b},[x0],x8 400 aese v0.16b,v20.16b 401 aesmc v0.16b,v0.16b 402 eor v16.16b,v16.16b,v5.16b 403 aese v0.16b,v21.16b 404 aesmc v0.16b,v0.16b 405 ld1 {v17.4s},[x7] // re-pre-load rndkey[1] 406 aese v0.16b,v22.16b 407 aesmc v0.16b,v0.16b 408 aese v0.16b,v23.16b 409 eor v6.16b,v0.16b,v7.16b 410 b.hs Loop_cbc_enc 411 412 st1 {v6.16b},[x1],#16 413 b Lcbc_done 414 415.align 5 416Lcbc_enc128: 417 ld1 {v2.4s,v3.4s},[x7] 418 aese v0.16b,v16.16b 419 aesmc v0.16b,v0.16b 420 b Lenter_cbc_enc128 421Loop_cbc_enc128: 422 aese v0.16b,v16.16b 423 aesmc v0.16b,v0.16b 424 st1 {v6.16b},[x1],#16 425Lenter_cbc_enc128: 426 aese v0.16b,v17.16b 427 aesmc v0.16b,v0.16b 428 subs x2,x2,#16 429 aese v0.16b,v2.16b 430 aesmc v0.16b,v0.16b 431 csel x8,xzr,x8,eq 432 aese v0.16b,v3.16b 433 aesmc v0.16b,v0.16b 434 aese v0.16b,v18.16b 435 aesmc v0.16b,v0.16b 436 aese v0.16b,v19.16b 437 aesmc v0.16b,v0.16b 438 ld1 {v16.16b},[x0],x8 439 aese v0.16b,v20.16b 440 aesmc v0.16b,v0.16b 441 aese v0.16b,v21.16b 442 aesmc v0.16b,v0.16b 443 aese v0.16b,v22.16b 444 aesmc v0.16b,v0.16b 445 eor v16.16b,v16.16b,v5.16b 446 aese v0.16b,v23.16b 447 eor v6.16b,v0.16b,v7.16b 448 b.hs Loop_cbc_enc128 449 450 st1 {v6.16b},[x1],#16 451 b Lcbc_done 452.align 5 453Lcbc_dec: 454 ld1 {v18.16b},[x0],#16 455 subs x2,x2,#32 // bias 456 add w6,w5,#2 457 orr v3.16b,v0.16b,v0.16b 458 orr v1.16b,v0.16b,v0.16b 459 orr v19.16b,v18.16b,v18.16b 460 b.lo Lcbc_dec_tail 461 462 orr v1.16b,v18.16b,v18.16b 463 ld1 {v18.16b},[x0],#16 464 orr v2.16b,v0.16b,v0.16b 465 orr v3.16b,v1.16b,v1.16b 466 orr v19.16b,v18.16b,v18.16b 467 468Loop3x_cbc_dec: 469 aesd v0.16b,v16.16b 470 aesimc v0.16b,v0.16b 471 aesd v1.16b,v16.16b 472 aesimc v1.16b,v1.16b 473 aesd v18.16b,v16.16b 474 aesimc v18.16b,v18.16b 475 ld1 {v16.4s},[x7],#16 476 subs w6,w6,#2 477 aesd v0.16b,v17.16b 478 aesimc v0.16b,v0.16b 479 aesd v1.16b,v17.16b 480 aesimc v1.16b,v1.16b 481 aesd v18.16b,v17.16b 482 aesimc v18.16b,v18.16b 483 ld1 {v17.4s},[x7],#16 484 b.gt Loop3x_cbc_dec 485 486 aesd v0.16b,v16.16b 487 aesimc v0.16b,v0.16b 488 aesd v1.16b,v16.16b 489 aesimc v1.16b,v1.16b 490 aesd v18.16b,v16.16b 491 aesimc v18.16b,v18.16b 492 eor v4.16b,v6.16b,v7.16b 493 subs x2,x2,#0x30 494 eor v5.16b,v2.16b,v7.16b 495 csel x6,x2,x6,lo // x6, w6, is zero at this point 496 aesd v0.16b,v17.16b 497 aesimc v0.16b,v0.16b 498 aesd v1.16b,v17.16b 499 aesimc v1.16b,v1.16b 500 aesd v18.16b,v17.16b 501 aesimc v18.16b,v18.16b 502 eor v17.16b,v3.16b,v7.16b 503 add x0,x0,x6 // x0 is adjusted in such way that 504 // at exit from the loop v1.16b-v18.16b 505 // are loaded with last "words" 506 orr v6.16b,v19.16b,v19.16b 507 mov x7,x3 508 aesd v0.16b,v20.16b 509 aesimc v0.16b,v0.16b 510 aesd v1.16b,v20.16b 511 aesimc v1.16b,v1.16b 512 aesd v18.16b,v20.16b 513 aesimc v18.16b,v18.16b 514 ld1 {v2.16b},[x0],#16 515 aesd v0.16b,v21.16b 516 aesimc v0.16b,v0.16b 517 aesd v1.16b,v21.16b 518 aesimc v1.16b,v1.16b 519 aesd v18.16b,v21.16b 520 aesimc v18.16b,v18.16b 521 ld1 {v3.16b},[x0],#16 522 aesd v0.16b,v22.16b 523 aesimc v0.16b,v0.16b 524 aesd v1.16b,v22.16b 525 aesimc v1.16b,v1.16b 526 aesd v18.16b,v22.16b 527 aesimc v18.16b,v18.16b 528 ld1 {v19.16b},[x0],#16 529 aesd v0.16b,v23.16b 530 aesd v1.16b,v23.16b 531 aesd v18.16b,v23.16b 532 ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0] 533 add w6,w5,#2 534 eor v4.16b,v4.16b,v0.16b 535 eor v5.16b,v5.16b,v1.16b 536 eor v18.16b,v18.16b,v17.16b 537 ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1] 538 st1 {v4.16b},[x1],#16 539 orr v0.16b,v2.16b,v2.16b 540 st1 {v5.16b},[x1],#16 541 orr v1.16b,v3.16b,v3.16b 542 st1 {v18.16b},[x1],#16 543 orr v18.16b,v19.16b,v19.16b 544 b.hs Loop3x_cbc_dec 545 546 cmn x2,#0x30 547 b.eq Lcbc_done 548 nop 549 550Lcbc_dec_tail: 551 aesd v1.16b,v16.16b 552 aesimc v1.16b,v1.16b 553 aesd v18.16b,v16.16b 554 aesimc v18.16b,v18.16b 555 ld1 {v16.4s},[x7],#16 556 subs w6,w6,#2 557 aesd v1.16b,v17.16b 558 aesimc v1.16b,v1.16b 559 aesd v18.16b,v17.16b 560 aesimc v18.16b,v18.16b 561 ld1 {v17.4s},[x7],#16 562 b.gt Lcbc_dec_tail 563 564 aesd v1.16b,v16.16b 565 aesimc v1.16b,v1.16b 566 aesd v18.16b,v16.16b 567 aesimc v18.16b,v18.16b 568 aesd v1.16b,v17.16b 569 aesimc v1.16b,v1.16b 570 aesd v18.16b,v17.16b 571 aesimc v18.16b,v18.16b 572 aesd v1.16b,v20.16b 573 aesimc v1.16b,v1.16b 574 aesd v18.16b,v20.16b 575 aesimc v18.16b,v18.16b 576 cmn x2,#0x20 577 aesd v1.16b,v21.16b 578 aesimc v1.16b,v1.16b 579 aesd v18.16b,v21.16b 580 aesimc v18.16b,v18.16b 581 eor v5.16b,v6.16b,v7.16b 582 aesd v1.16b,v22.16b 583 aesimc v1.16b,v1.16b 584 aesd v18.16b,v22.16b 585 aesimc v18.16b,v18.16b 586 eor v17.16b,v3.16b,v7.16b 587 aesd v1.16b,v23.16b 588 aesd v18.16b,v23.16b 589 b.eq Lcbc_dec_one 590 eor v5.16b,v5.16b,v1.16b 591 eor v17.16b,v17.16b,v18.16b 592 orr v6.16b,v19.16b,v19.16b 593 st1 {v5.16b},[x1],#16 594 st1 {v17.16b},[x1],#16 595 b Lcbc_done 596 597Lcbc_dec_one: 598 eor v5.16b,v5.16b,v18.16b 599 orr v6.16b,v19.16b,v19.16b 600 st1 {v5.16b},[x1],#16 601 602Lcbc_done: 603 st1 {v6.16b},[x4] 604Lcbc_abort: 605 ldr x29,[sp],#16 606 ret 607 608.globl aes_hw_ctr32_encrypt_blocks 609 610.def aes_hw_ctr32_encrypt_blocks 611 .type 32 612.endef 613.align 5 614aes_hw_ctr32_encrypt_blocks: 615 // Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later. 616 AARCH64_VALID_CALL_TARGET 617 stp x29,x30,[sp,#-16]! 618 add x29,sp,#0 619 ldr w5,[x3,#240] 620 621 ldr w8, [x4, #12] 622 ld1 {v0.4s},[x4] 623 624 ld1 {v16.4s,v17.4s},[x3] // load key schedule... 625 sub w5,w5,#4 626 mov x12,#16 627 cmp x2,#2 628 add x7,x3,x5,lsl#4 // pointer to last 5 round keys 629 sub w5,w5,#2 630 ld1 {v20.4s,v21.4s},[x7],#32 631 ld1 {v22.4s,v23.4s},[x7],#32 632 ld1 {v7.4s},[x7] 633 add x7,x3,#32 634 mov w6,w5 635 csel x12,xzr,x12,lo 636 637 // ARM Cortex-A57 and Cortex-A72 cores running in 32-bit mode are 638 // affected by silicon errata #1742098 [0] and #1655431 [1], 639 // respectively, where the second instruction of an aese/aesmc 640 // instruction pair may execute twice if an interrupt is taken right 641 // after the first instruction consumes an input register of which a 642 // single 32-bit lane has been updated the last time it was modified. 643 // 644 // This function uses a counter in one 32-bit lane. The vmov lines 645 // could write to v1.16b and v18.16b directly, but that trips this bugs. 646 // We write to v6.16b and copy to the final register as a workaround. 647 // 648 // [0] ARM-EPM-049219 v23 Cortex-A57 MPCore Software Developers Errata Notice 649 // [1] ARM-EPM-012079 v11.0 Cortex-A72 MPCore Software Developers Errata Notice 650#ifndef __ARMEB__ 651 rev w8, w8 652#endif 653 add w10, w8, #1 654 orr v6.16b,v0.16b,v0.16b 655 rev w10, w10 656 mov v6.s[3],w10 657 add w8, w8, #2 658 orr v1.16b,v6.16b,v6.16b 659 b.ls Lctr32_tail 660 rev w12, w8 661 mov v6.s[3],w12 662 sub x2,x2,#3 // bias 663 orr v18.16b,v6.16b,v6.16b 664 b Loop3x_ctr32 665 666.align 4 667Loop3x_ctr32: 668 aese v0.16b,v16.16b 669 aesmc v0.16b,v0.16b 670 aese v1.16b,v16.16b 671 aesmc v1.16b,v1.16b 672 aese v18.16b,v16.16b 673 aesmc v18.16b,v18.16b 674 ld1 {v16.4s},[x7],#16 675 subs w6,w6,#2 676 aese v0.16b,v17.16b 677 aesmc v0.16b,v0.16b 678 aese v1.16b,v17.16b 679 aesmc v1.16b,v1.16b 680 aese v18.16b,v17.16b 681 aesmc v18.16b,v18.16b 682 ld1 {v17.4s},[x7],#16 683 b.gt Loop3x_ctr32 684 685 aese v0.16b,v16.16b 686 aesmc v4.16b,v0.16b 687 aese v1.16b,v16.16b 688 aesmc v5.16b,v1.16b 689 ld1 {v2.16b},[x0],#16 690 add w9,w8,#1 691 aese v18.16b,v16.16b 692 aesmc v18.16b,v18.16b 693 ld1 {v3.16b},[x0],#16 694 rev w9,w9 695 aese v4.16b,v17.16b 696 aesmc v4.16b,v4.16b 697 aese v5.16b,v17.16b 698 aesmc v5.16b,v5.16b 699 ld1 {v19.16b},[x0],#16 700 mov x7,x3 701 aese v18.16b,v17.16b 702 aesmc v17.16b,v18.16b 703 aese v4.16b,v20.16b 704 aesmc v4.16b,v4.16b 705 aese v5.16b,v20.16b 706 aesmc v5.16b,v5.16b 707 eor v2.16b,v2.16b,v7.16b 708 add w10,w8,#2 709 aese v17.16b,v20.16b 710 aesmc v17.16b,v17.16b 711 eor v3.16b,v3.16b,v7.16b 712 add w8,w8,#3 713 aese v4.16b,v21.16b 714 aesmc v4.16b,v4.16b 715 aese v5.16b,v21.16b 716 aesmc v5.16b,v5.16b 717 // Note the logic to update v0.16b, v1.16b, and v1.16b is written to work 718 // around a bug in ARM Cortex-A57 and Cortex-A72 cores running in 719 // 32-bit mode. See the comment above. 720 eor v19.16b,v19.16b,v7.16b 721 mov v6.s[3], w9 722 aese v17.16b,v21.16b 723 aesmc v17.16b,v17.16b 724 orr v0.16b,v6.16b,v6.16b 725 rev w10,w10 726 aese v4.16b,v22.16b 727 aesmc v4.16b,v4.16b 728 mov v6.s[3], w10 729 rev w12,w8 730 aese v5.16b,v22.16b 731 aesmc v5.16b,v5.16b 732 orr v1.16b,v6.16b,v6.16b 733 mov v6.s[3], w12 734 aese v17.16b,v22.16b 735 aesmc v17.16b,v17.16b 736 orr v18.16b,v6.16b,v6.16b 737 subs x2,x2,#3 738 aese v4.16b,v23.16b 739 aese v5.16b,v23.16b 740 aese v17.16b,v23.16b 741 742 eor v2.16b,v2.16b,v4.16b 743 ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0] 744 st1 {v2.16b},[x1],#16 745 eor v3.16b,v3.16b,v5.16b 746 mov w6,w5 747 st1 {v3.16b},[x1],#16 748 eor v19.16b,v19.16b,v17.16b 749 ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1] 750 st1 {v19.16b},[x1],#16 751 b.hs Loop3x_ctr32 752 753 adds x2,x2,#3 754 b.eq Lctr32_done 755 cmp x2,#1 756 mov x12,#16 757 csel x12,xzr,x12,eq 758 759Lctr32_tail: 760 aese v0.16b,v16.16b 761 aesmc v0.16b,v0.16b 762 aese v1.16b,v16.16b 763 aesmc v1.16b,v1.16b 764 ld1 {v16.4s},[x7],#16 765 subs w6,w6,#2 766 aese v0.16b,v17.16b 767 aesmc v0.16b,v0.16b 768 aese v1.16b,v17.16b 769 aesmc v1.16b,v1.16b 770 ld1 {v17.4s},[x7],#16 771 b.gt Lctr32_tail 772 773 aese v0.16b,v16.16b 774 aesmc v0.16b,v0.16b 775 aese v1.16b,v16.16b 776 aesmc v1.16b,v1.16b 777 aese v0.16b,v17.16b 778 aesmc v0.16b,v0.16b 779 aese v1.16b,v17.16b 780 aesmc v1.16b,v1.16b 781 ld1 {v2.16b},[x0],x12 782 aese v0.16b,v20.16b 783 aesmc v0.16b,v0.16b 784 aese v1.16b,v20.16b 785 aesmc v1.16b,v1.16b 786 ld1 {v3.16b},[x0] 787 aese v0.16b,v21.16b 788 aesmc v0.16b,v0.16b 789 aese v1.16b,v21.16b 790 aesmc v1.16b,v1.16b 791 eor v2.16b,v2.16b,v7.16b 792 aese v0.16b,v22.16b 793 aesmc v0.16b,v0.16b 794 aese v1.16b,v22.16b 795 aesmc v1.16b,v1.16b 796 eor v3.16b,v3.16b,v7.16b 797 aese v0.16b,v23.16b 798 aese v1.16b,v23.16b 799 800 cmp x2,#1 801 eor v2.16b,v2.16b,v0.16b 802 eor v3.16b,v3.16b,v1.16b 803 st1 {v2.16b},[x1],#16 804 b.eq Lctr32_done 805 st1 {v3.16b},[x1] 806 807Lctr32_done: 808 ldr x29,[sp],#16 809 ret 810 811#endif 812#endif 813#endif // !OPENSSL_NO_ASM 814