1// This file is generated from a similarly-named Perl script in the BoringSSL 2// source tree. Do not edit by hand. 3 4#if defined(__has_feature) 5#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM) 6#define OPENSSL_NO_ASM 7#endif 8#endif 9 10#if !defined(OPENSSL_NO_ASM) 11#if defined(BORINGSSL_PREFIX) 12#include <boringssl_prefix_symbols_asm.h> 13#endif 14#include <openssl/arm_arch.h> 15 16#if __ARM_MAX_ARCH__>=7 17.text 18 19.section __TEXT,__const 20.align 5 21Lrcon: 22.long 0x01,0x01,0x01,0x01 23.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d // rotate-n-splat 24.long 0x1b,0x1b,0x1b,0x1b 25 26.text 27 28.globl _aes_hw_set_encrypt_key 29.private_extern _aes_hw_set_encrypt_key 30 31.align 5 32_aes_hw_set_encrypt_key: 33Lenc_key: 34 stp x29,x30,[sp,#-16]! 35 add x29,sp,#0 36 mov x3,#-1 37 cmp x0,#0 38 b.eq Lenc_key_abort 39 cmp x2,#0 40 b.eq Lenc_key_abort 41 mov x3,#-2 42 cmp w1,#128 43 b.lt Lenc_key_abort 44 cmp w1,#256 45 b.gt Lenc_key_abort 46 tst w1,#0x3f 47 b.ne Lenc_key_abort 48 49 adrp x3,Lrcon@PAGE 50 add x3,x3,Lrcon@PAGEOFF 51 cmp w1,#192 52 53 eor v0.16b,v0.16b,v0.16b 54 ld1 {v3.16b},[x0],#16 55 mov w1,#8 // reuse w1 56 ld1 {v1.4s,v2.4s},[x3],#32 57 58 b.lt Loop128 59 b.eq L192 60 b L256 61 62.align 4 63Loop128: 64 tbl v6.16b,{v3.16b},v2.16b 65 ext v5.16b,v0.16b,v3.16b,#12 66 st1 {v3.4s},[x2],#16 67 aese v6.16b,v0.16b 68 subs w1,w1,#1 69 70 eor v3.16b,v3.16b,v5.16b 71 ext v5.16b,v0.16b,v5.16b,#12 72 eor v3.16b,v3.16b,v5.16b 73 ext v5.16b,v0.16b,v5.16b,#12 74 eor v6.16b,v6.16b,v1.16b 75 eor v3.16b,v3.16b,v5.16b 76 shl v1.16b,v1.16b,#1 77 eor v3.16b,v3.16b,v6.16b 78 b.ne Loop128 79 80 ld1 {v1.4s},[x3] 81 82 tbl v6.16b,{v3.16b},v2.16b 83 ext v5.16b,v0.16b,v3.16b,#12 84 st1 {v3.4s},[x2],#16 85 aese v6.16b,v0.16b 86 87 eor v3.16b,v3.16b,v5.16b 88 ext v5.16b,v0.16b,v5.16b,#12 89 eor v3.16b,v3.16b,v5.16b 90 ext v5.16b,v0.16b,v5.16b,#12 91 eor v6.16b,v6.16b,v1.16b 92 eor v3.16b,v3.16b,v5.16b 93 shl v1.16b,v1.16b,#1 94 eor v3.16b,v3.16b,v6.16b 95 96 tbl v6.16b,{v3.16b},v2.16b 97 ext v5.16b,v0.16b,v3.16b,#12 98 st1 {v3.4s},[x2],#16 99 aese v6.16b,v0.16b 100 101 eor v3.16b,v3.16b,v5.16b 102 ext v5.16b,v0.16b,v5.16b,#12 103 eor v3.16b,v3.16b,v5.16b 104 ext v5.16b,v0.16b,v5.16b,#12 105 eor v6.16b,v6.16b,v1.16b 106 eor v3.16b,v3.16b,v5.16b 107 eor v3.16b,v3.16b,v6.16b 108 st1 {v3.4s},[x2] 109 add x2,x2,#0x50 110 111 mov w12,#10 112 b Ldone 113 114.align 4 115L192: 116 ld1 {v4.8b},[x0],#8 117 movi v6.16b,#8 // borrow v6.16b 118 st1 {v3.4s},[x2],#16 119 sub v2.16b,v2.16b,v6.16b // adjust the mask 120 121Loop192: 122 tbl v6.16b,{v4.16b},v2.16b 123 ext v5.16b,v0.16b,v3.16b,#12 124 st1 {v4.8b},[x2],#8 125 aese v6.16b,v0.16b 126 subs w1,w1,#1 127 128 eor v3.16b,v3.16b,v5.16b 129 ext v5.16b,v0.16b,v5.16b,#12 130 eor v3.16b,v3.16b,v5.16b 131 ext v5.16b,v0.16b,v5.16b,#12 132 eor v3.16b,v3.16b,v5.16b 133 134 dup v5.4s,v3.s[3] 135 eor v5.16b,v5.16b,v4.16b 136 eor v6.16b,v6.16b,v1.16b 137 ext v4.16b,v0.16b,v4.16b,#12 138 shl v1.16b,v1.16b,#1 139 eor v4.16b,v4.16b,v5.16b 140 eor v3.16b,v3.16b,v6.16b 141 eor v4.16b,v4.16b,v6.16b 142 st1 {v3.4s},[x2],#16 143 b.ne Loop192 144 145 mov w12,#12 146 add x2,x2,#0x20 147 b Ldone 148 149.align 4 150L256: 151 ld1 {v4.16b},[x0] 152 mov w1,#7 153 mov w12,#14 154 st1 {v3.4s},[x2],#16 155 156Loop256: 157 tbl v6.16b,{v4.16b},v2.16b 158 ext v5.16b,v0.16b,v3.16b,#12 159 st1 {v4.4s},[x2],#16 160 aese v6.16b,v0.16b 161 subs w1,w1,#1 162 163 eor v3.16b,v3.16b,v5.16b 164 ext v5.16b,v0.16b,v5.16b,#12 165 eor v3.16b,v3.16b,v5.16b 166 ext v5.16b,v0.16b,v5.16b,#12 167 eor v6.16b,v6.16b,v1.16b 168 eor v3.16b,v3.16b,v5.16b 169 shl v1.16b,v1.16b,#1 170 eor v3.16b,v3.16b,v6.16b 171 st1 {v3.4s},[x2],#16 172 b.eq Ldone 173 174 dup v6.4s,v3.s[3] // just splat 175 ext v5.16b,v0.16b,v4.16b,#12 176 aese v6.16b,v0.16b 177 178 eor v4.16b,v4.16b,v5.16b 179 ext v5.16b,v0.16b,v5.16b,#12 180 eor v4.16b,v4.16b,v5.16b 181 ext v5.16b,v0.16b,v5.16b,#12 182 eor v4.16b,v4.16b,v5.16b 183 184 eor v4.16b,v4.16b,v6.16b 185 b Loop256 186 187Ldone: 188 str w12,[x2] 189 mov x3,#0 190 191Lenc_key_abort: 192 mov x0,x3 // return value 193 ldr x29,[sp],#16 194 ret 195 196 197.globl _aes_hw_set_decrypt_key 198.private_extern _aes_hw_set_decrypt_key 199 200.align 5 201_aes_hw_set_decrypt_key: 202 stp x29,x30,[sp,#-16]! 203 add x29,sp,#0 204 bl Lenc_key 205 206 cmp x0,#0 207 b.ne Ldec_key_abort 208 209 sub x2,x2,#240 // restore original x2 210 mov x4,#-16 211 add x0,x2,x12,lsl#4 // end of key schedule 212 213 ld1 {v0.4s},[x2] 214 ld1 {v1.4s},[x0] 215 st1 {v0.4s},[x0],x4 216 st1 {v1.4s},[x2],#16 217 218Loop_imc: 219 ld1 {v0.4s},[x2] 220 ld1 {v1.4s},[x0] 221 aesimc v0.16b,v0.16b 222 aesimc v1.16b,v1.16b 223 st1 {v0.4s},[x0],x4 224 st1 {v1.4s},[x2],#16 225 cmp x0,x2 226 b.hi Loop_imc 227 228 ld1 {v0.4s},[x2] 229 aesimc v0.16b,v0.16b 230 st1 {v0.4s},[x0] 231 232 eor x0,x0,x0 // return value 233Ldec_key_abort: 234 ldp x29,x30,[sp],#16 235 ret 236 237.globl _aes_hw_encrypt 238.private_extern _aes_hw_encrypt 239 240.align 5 241_aes_hw_encrypt: 242 ldr w3,[x2,#240] 243 ld1 {v0.4s},[x2],#16 244 ld1 {v2.16b},[x0] 245 sub w3,w3,#2 246 ld1 {v1.4s},[x2],#16 247 248Loop_enc: 249 aese v2.16b,v0.16b 250 aesmc v2.16b,v2.16b 251 ld1 {v0.4s},[x2],#16 252 subs w3,w3,#2 253 aese v2.16b,v1.16b 254 aesmc v2.16b,v2.16b 255 ld1 {v1.4s},[x2],#16 256 b.gt Loop_enc 257 258 aese v2.16b,v0.16b 259 aesmc v2.16b,v2.16b 260 ld1 {v0.4s},[x2] 261 aese v2.16b,v1.16b 262 eor v2.16b,v2.16b,v0.16b 263 264 st1 {v2.16b},[x1] 265 ret 266 267.globl _aes_hw_decrypt 268.private_extern _aes_hw_decrypt 269 270.align 5 271_aes_hw_decrypt: 272 ldr w3,[x2,#240] 273 ld1 {v0.4s},[x2],#16 274 ld1 {v2.16b},[x0] 275 sub w3,w3,#2 276 ld1 {v1.4s},[x2],#16 277 278Loop_dec: 279 aesd v2.16b,v0.16b 280 aesimc v2.16b,v2.16b 281 ld1 {v0.4s},[x2],#16 282 subs w3,w3,#2 283 aesd v2.16b,v1.16b 284 aesimc v2.16b,v2.16b 285 ld1 {v1.4s},[x2],#16 286 b.gt Loop_dec 287 288 aesd v2.16b,v0.16b 289 aesimc v2.16b,v2.16b 290 ld1 {v0.4s},[x2] 291 aesd v2.16b,v1.16b 292 eor v2.16b,v2.16b,v0.16b 293 294 st1 {v2.16b},[x1] 295 ret 296 297.globl _aes_hw_cbc_encrypt 298.private_extern _aes_hw_cbc_encrypt 299 300.align 5 301_aes_hw_cbc_encrypt: 302 stp x29,x30,[sp,#-16]! 303 add x29,sp,#0 304 subs x2,x2,#16 305 mov x8,#16 306 b.lo Lcbc_abort 307 csel x8,xzr,x8,eq 308 309 cmp w5,#0 // en- or decrypting? 310 ldr w5,[x3,#240] 311 and x2,x2,#-16 312 ld1 {v6.16b},[x4] 313 ld1 {v0.16b},[x0],x8 314 315 ld1 {v16.4s,v17.4s},[x3] // load key schedule... 316 sub w5,w5,#6 317 add x7,x3,x5,lsl#4 // pointer to last 7 round keys 318 sub w5,w5,#2 319 ld1 {v18.4s,v19.4s},[x7],#32 320 ld1 {v20.4s,v21.4s},[x7],#32 321 ld1 {v22.4s,v23.4s},[x7],#32 322 ld1 {v7.4s},[x7] 323 324 add x7,x3,#32 325 mov w6,w5 326 b.eq Lcbc_dec 327 328 cmp w5,#2 329 eor v0.16b,v0.16b,v6.16b 330 eor v5.16b,v16.16b,v7.16b 331 b.eq Lcbc_enc128 332 333 ld1 {v2.4s,v3.4s},[x7] 334 add x7,x3,#16 335 add x6,x3,#16*4 336 add x12,x3,#16*5 337 aese v0.16b,v16.16b 338 aesmc v0.16b,v0.16b 339 add x14,x3,#16*6 340 add x3,x3,#16*7 341 b Lenter_cbc_enc 342 343.align 4 344Loop_cbc_enc: 345 aese v0.16b,v16.16b 346 aesmc v0.16b,v0.16b 347 st1 {v6.16b},[x1],#16 348Lenter_cbc_enc: 349 aese v0.16b,v17.16b 350 aesmc v0.16b,v0.16b 351 aese v0.16b,v2.16b 352 aesmc v0.16b,v0.16b 353 ld1 {v16.4s},[x6] 354 cmp w5,#4 355 aese v0.16b,v3.16b 356 aesmc v0.16b,v0.16b 357 ld1 {v17.4s},[x12] 358 b.eq Lcbc_enc192 359 360 aese v0.16b,v16.16b 361 aesmc v0.16b,v0.16b 362 ld1 {v16.4s},[x14] 363 aese v0.16b,v17.16b 364 aesmc v0.16b,v0.16b 365 ld1 {v17.4s},[x3] 366 nop 367 368Lcbc_enc192: 369 aese v0.16b,v16.16b 370 aesmc v0.16b,v0.16b 371 subs x2,x2,#16 372 aese v0.16b,v17.16b 373 aesmc v0.16b,v0.16b 374 csel x8,xzr,x8,eq 375 aese v0.16b,v18.16b 376 aesmc v0.16b,v0.16b 377 aese v0.16b,v19.16b 378 aesmc v0.16b,v0.16b 379 ld1 {v16.16b},[x0],x8 380 aese v0.16b,v20.16b 381 aesmc v0.16b,v0.16b 382 eor v16.16b,v16.16b,v5.16b 383 aese v0.16b,v21.16b 384 aesmc v0.16b,v0.16b 385 ld1 {v17.4s},[x7] // re-pre-load rndkey[1] 386 aese v0.16b,v22.16b 387 aesmc v0.16b,v0.16b 388 aese v0.16b,v23.16b 389 eor v6.16b,v0.16b,v7.16b 390 b.hs Loop_cbc_enc 391 392 st1 {v6.16b},[x1],#16 393 b Lcbc_done 394 395.align 5 396Lcbc_enc128: 397 ld1 {v2.4s,v3.4s},[x7] 398 aese v0.16b,v16.16b 399 aesmc v0.16b,v0.16b 400 b Lenter_cbc_enc128 401Loop_cbc_enc128: 402 aese v0.16b,v16.16b 403 aesmc v0.16b,v0.16b 404 st1 {v6.16b},[x1],#16 405Lenter_cbc_enc128: 406 aese v0.16b,v17.16b 407 aesmc v0.16b,v0.16b 408 subs x2,x2,#16 409 aese v0.16b,v2.16b 410 aesmc v0.16b,v0.16b 411 csel x8,xzr,x8,eq 412 aese v0.16b,v3.16b 413 aesmc v0.16b,v0.16b 414 aese v0.16b,v18.16b 415 aesmc v0.16b,v0.16b 416 aese v0.16b,v19.16b 417 aesmc v0.16b,v0.16b 418 ld1 {v16.16b},[x0],x8 419 aese v0.16b,v20.16b 420 aesmc v0.16b,v0.16b 421 aese v0.16b,v21.16b 422 aesmc v0.16b,v0.16b 423 aese v0.16b,v22.16b 424 aesmc v0.16b,v0.16b 425 eor v16.16b,v16.16b,v5.16b 426 aese v0.16b,v23.16b 427 eor v6.16b,v0.16b,v7.16b 428 b.hs Loop_cbc_enc128 429 430 st1 {v6.16b},[x1],#16 431 b Lcbc_done 432.align 5 433Lcbc_dec: 434 ld1 {v18.16b},[x0],#16 435 subs x2,x2,#32 // bias 436 add w6,w5,#2 437 orr v3.16b,v0.16b,v0.16b 438 orr v1.16b,v0.16b,v0.16b 439 orr v19.16b,v18.16b,v18.16b 440 b.lo Lcbc_dec_tail 441 442 orr v1.16b,v18.16b,v18.16b 443 ld1 {v18.16b},[x0],#16 444 orr v2.16b,v0.16b,v0.16b 445 orr v3.16b,v1.16b,v1.16b 446 orr v19.16b,v18.16b,v18.16b 447 448Loop3x_cbc_dec: 449 aesd v0.16b,v16.16b 450 aesimc v0.16b,v0.16b 451 aesd v1.16b,v16.16b 452 aesimc v1.16b,v1.16b 453 aesd v18.16b,v16.16b 454 aesimc v18.16b,v18.16b 455 ld1 {v16.4s},[x7],#16 456 subs w6,w6,#2 457 aesd v0.16b,v17.16b 458 aesimc v0.16b,v0.16b 459 aesd v1.16b,v17.16b 460 aesimc v1.16b,v1.16b 461 aesd v18.16b,v17.16b 462 aesimc v18.16b,v18.16b 463 ld1 {v17.4s},[x7],#16 464 b.gt Loop3x_cbc_dec 465 466 aesd v0.16b,v16.16b 467 aesimc v0.16b,v0.16b 468 aesd v1.16b,v16.16b 469 aesimc v1.16b,v1.16b 470 aesd v18.16b,v16.16b 471 aesimc v18.16b,v18.16b 472 eor v4.16b,v6.16b,v7.16b 473 subs x2,x2,#0x30 474 eor v5.16b,v2.16b,v7.16b 475 csel x6,x2,x6,lo // x6, w6, is zero at this point 476 aesd v0.16b,v17.16b 477 aesimc v0.16b,v0.16b 478 aesd v1.16b,v17.16b 479 aesimc v1.16b,v1.16b 480 aesd v18.16b,v17.16b 481 aesimc v18.16b,v18.16b 482 eor v17.16b,v3.16b,v7.16b 483 add x0,x0,x6 // x0 is adjusted in such way that 484 // at exit from the loop v1.16b-v18.16b 485 // are loaded with last "words" 486 orr v6.16b,v19.16b,v19.16b 487 mov x7,x3 488 aesd v0.16b,v20.16b 489 aesimc v0.16b,v0.16b 490 aesd v1.16b,v20.16b 491 aesimc v1.16b,v1.16b 492 aesd v18.16b,v20.16b 493 aesimc v18.16b,v18.16b 494 ld1 {v2.16b},[x0],#16 495 aesd v0.16b,v21.16b 496 aesimc v0.16b,v0.16b 497 aesd v1.16b,v21.16b 498 aesimc v1.16b,v1.16b 499 aesd v18.16b,v21.16b 500 aesimc v18.16b,v18.16b 501 ld1 {v3.16b},[x0],#16 502 aesd v0.16b,v22.16b 503 aesimc v0.16b,v0.16b 504 aesd v1.16b,v22.16b 505 aesimc v1.16b,v1.16b 506 aesd v18.16b,v22.16b 507 aesimc v18.16b,v18.16b 508 ld1 {v19.16b},[x0],#16 509 aesd v0.16b,v23.16b 510 aesd v1.16b,v23.16b 511 aesd v18.16b,v23.16b 512 ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0] 513 add w6,w5,#2 514 eor v4.16b,v4.16b,v0.16b 515 eor v5.16b,v5.16b,v1.16b 516 eor v18.16b,v18.16b,v17.16b 517 ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1] 518 st1 {v4.16b},[x1],#16 519 orr v0.16b,v2.16b,v2.16b 520 st1 {v5.16b},[x1],#16 521 orr v1.16b,v3.16b,v3.16b 522 st1 {v18.16b},[x1],#16 523 orr v18.16b,v19.16b,v19.16b 524 b.hs Loop3x_cbc_dec 525 526 cmn x2,#0x30 527 b.eq Lcbc_done 528 nop 529 530Lcbc_dec_tail: 531 aesd v1.16b,v16.16b 532 aesimc v1.16b,v1.16b 533 aesd v18.16b,v16.16b 534 aesimc v18.16b,v18.16b 535 ld1 {v16.4s},[x7],#16 536 subs w6,w6,#2 537 aesd v1.16b,v17.16b 538 aesimc v1.16b,v1.16b 539 aesd v18.16b,v17.16b 540 aesimc v18.16b,v18.16b 541 ld1 {v17.4s},[x7],#16 542 b.gt Lcbc_dec_tail 543 544 aesd v1.16b,v16.16b 545 aesimc v1.16b,v1.16b 546 aesd v18.16b,v16.16b 547 aesimc v18.16b,v18.16b 548 aesd v1.16b,v17.16b 549 aesimc v1.16b,v1.16b 550 aesd v18.16b,v17.16b 551 aesimc v18.16b,v18.16b 552 aesd v1.16b,v20.16b 553 aesimc v1.16b,v1.16b 554 aesd v18.16b,v20.16b 555 aesimc v18.16b,v18.16b 556 cmn x2,#0x20 557 aesd v1.16b,v21.16b 558 aesimc v1.16b,v1.16b 559 aesd v18.16b,v21.16b 560 aesimc v18.16b,v18.16b 561 eor v5.16b,v6.16b,v7.16b 562 aesd v1.16b,v22.16b 563 aesimc v1.16b,v1.16b 564 aesd v18.16b,v22.16b 565 aesimc v18.16b,v18.16b 566 eor v17.16b,v3.16b,v7.16b 567 aesd v1.16b,v23.16b 568 aesd v18.16b,v23.16b 569 b.eq Lcbc_dec_one 570 eor v5.16b,v5.16b,v1.16b 571 eor v17.16b,v17.16b,v18.16b 572 orr v6.16b,v19.16b,v19.16b 573 st1 {v5.16b},[x1],#16 574 st1 {v17.16b},[x1],#16 575 b Lcbc_done 576 577Lcbc_dec_one: 578 eor v5.16b,v5.16b,v18.16b 579 orr v6.16b,v19.16b,v19.16b 580 st1 {v5.16b},[x1],#16 581 582Lcbc_done: 583 st1 {v6.16b},[x4] 584Lcbc_abort: 585 ldr x29,[sp],#16 586 ret 587 588.globl _aes_hw_ctr32_encrypt_blocks 589.private_extern _aes_hw_ctr32_encrypt_blocks 590 591.align 5 592_aes_hw_ctr32_encrypt_blocks: 593 stp x29,x30,[sp,#-16]! 594 add x29,sp,#0 595 ldr w5,[x3,#240] 596 597 ldr w8, [x4, #12] 598 ld1 {v0.4s},[x4] 599 600 ld1 {v16.4s,v17.4s},[x3] // load key schedule... 601 sub w5,w5,#4 602 mov x12,#16 603 cmp x2,#2 604 add x7,x3,x5,lsl#4 // pointer to last 5 round keys 605 sub w5,w5,#2 606 ld1 {v20.4s,v21.4s},[x7],#32 607 ld1 {v22.4s,v23.4s},[x7],#32 608 ld1 {v7.4s},[x7] 609 add x7,x3,#32 610 mov w6,w5 611 csel x12,xzr,x12,lo 612#ifndef __ARMEB__ 613 rev w8, w8 614#endif 615 orr v1.16b,v0.16b,v0.16b 616 add w10, w8, #1 617 orr v18.16b,v0.16b,v0.16b 618 add w8, w8, #2 619 orr v6.16b,v0.16b,v0.16b 620 rev w10, w10 621 mov v1.s[3],w10 622 b.ls Lctr32_tail 623 rev w12, w8 624 sub x2,x2,#3 // bias 625 mov v18.s[3],w12 626 b Loop3x_ctr32 627 628.align 4 629Loop3x_ctr32: 630 aese v0.16b,v16.16b 631 aesmc v0.16b,v0.16b 632 aese v1.16b,v16.16b 633 aesmc v1.16b,v1.16b 634 aese v18.16b,v16.16b 635 aesmc v18.16b,v18.16b 636 ld1 {v16.4s},[x7],#16 637 subs w6,w6,#2 638 aese v0.16b,v17.16b 639 aesmc v0.16b,v0.16b 640 aese v1.16b,v17.16b 641 aesmc v1.16b,v1.16b 642 aese v18.16b,v17.16b 643 aesmc v18.16b,v18.16b 644 ld1 {v17.4s},[x7],#16 645 b.gt Loop3x_ctr32 646 647 aese v0.16b,v16.16b 648 aesmc v4.16b,v0.16b 649 aese v1.16b,v16.16b 650 aesmc v5.16b,v1.16b 651 ld1 {v2.16b},[x0],#16 652 orr v0.16b,v6.16b,v6.16b 653 aese v18.16b,v16.16b 654 aesmc v18.16b,v18.16b 655 ld1 {v3.16b},[x0],#16 656 orr v1.16b,v6.16b,v6.16b 657 aese v4.16b,v17.16b 658 aesmc v4.16b,v4.16b 659 aese v5.16b,v17.16b 660 aesmc v5.16b,v5.16b 661 ld1 {v19.16b},[x0],#16 662 mov x7,x3 663 aese v18.16b,v17.16b 664 aesmc v17.16b,v18.16b 665 orr v18.16b,v6.16b,v6.16b 666 add w9,w8,#1 667 aese v4.16b,v20.16b 668 aesmc v4.16b,v4.16b 669 aese v5.16b,v20.16b 670 aesmc v5.16b,v5.16b 671 eor v2.16b,v2.16b,v7.16b 672 add w10,w8,#2 673 aese v17.16b,v20.16b 674 aesmc v17.16b,v17.16b 675 eor v3.16b,v3.16b,v7.16b 676 add w8,w8,#3 677 aese v4.16b,v21.16b 678 aesmc v4.16b,v4.16b 679 aese v5.16b,v21.16b 680 aesmc v5.16b,v5.16b 681 eor v19.16b,v19.16b,v7.16b 682 rev w9,w9 683 aese v17.16b,v21.16b 684 aesmc v17.16b,v17.16b 685 mov v0.s[3], w9 686 rev w10,w10 687 aese v4.16b,v22.16b 688 aesmc v4.16b,v4.16b 689 aese v5.16b,v22.16b 690 aesmc v5.16b,v5.16b 691 mov v1.s[3], w10 692 rev w12,w8 693 aese v17.16b,v22.16b 694 aesmc v17.16b,v17.16b 695 mov v18.s[3], w12 696 subs x2,x2,#3 697 aese v4.16b,v23.16b 698 aese v5.16b,v23.16b 699 aese v17.16b,v23.16b 700 701 eor v2.16b,v2.16b,v4.16b 702 ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0] 703 st1 {v2.16b},[x1],#16 704 eor v3.16b,v3.16b,v5.16b 705 mov w6,w5 706 st1 {v3.16b},[x1],#16 707 eor v19.16b,v19.16b,v17.16b 708 ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1] 709 st1 {v19.16b},[x1],#16 710 b.hs Loop3x_ctr32 711 712 adds x2,x2,#3 713 b.eq Lctr32_done 714 cmp x2,#1 715 mov x12,#16 716 csel x12,xzr,x12,eq 717 718Lctr32_tail: 719 aese v0.16b,v16.16b 720 aesmc v0.16b,v0.16b 721 aese v1.16b,v16.16b 722 aesmc v1.16b,v1.16b 723 ld1 {v16.4s},[x7],#16 724 subs w6,w6,#2 725 aese v0.16b,v17.16b 726 aesmc v0.16b,v0.16b 727 aese v1.16b,v17.16b 728 aesmc v1.16b,v1.16b 729 ld1 {v17.4s},[x7],#16 730 b.gt Lctr32_tail 731 732 aese v0.16b,v16.16b 733 aesmc v0.16b,v0.16b 734 aese v1.16b,v16.16b 735 aesmc v1.16b,v1.16b 736 aese v0.16b,v17.16b 737 aesmc v0.16b,v0.16b 738 aese v1.16b,v17.16b 739 aesmc v1.16b,v1.16b 740 ld1 {v2.16b},[x0],x12 741 aese v0.16b,v20.16b 742 aesmc v0.16b,v0.16b 743 aese v1.16b,v20.16b 744 aesmc v1.16b,v1.16b 745 ld1 {v3.16b},[x0] 746 aese v0.16b,v21.16b 747 aesmc v0.16b,v0.16b 748 aese v1.16b,v21.16b 749 aesmc v1.16b,v1.16b 750 eor v2.16b,v2.16b,v7.16b 751 aese v0.16b,v22.16b 752 aesmc v0.16b,v0.16b 753 aese v1.16b,v22.16b 754 aesmc v1.16b,v1.16b 755 eor v3.16b,v3.16b,v7.16b 756 aese v0.16b,v23.16b 757 aese v1.16b,v23.16b 758 759 cmp x2,#1 760 eor v2.16b,v2.16b,v0.16b 761 eor v3.16b,v3.16b,v1.16b 762 st1 {v2.16b},[x1],#16 763 b.eq Lctr32_done 764 st1 {v3.16b},[x1] 765 766Lctr32_done: 767 ldr x29,[sp],#16 768 ret 769 770#endif 771#endif // !OPENSSL_NO_ASM 772