1#include <openssl/arm_arch.h> 2 3#if __ARM_MAX_ARCH__>=7 4.text 5#if !defined(__clang__) || defined(BORINGSSL_CLANG_SUPPORTS_DOT_ARCH) 6 7#endif 8.align 5 9Lrcon: 10.long 0x01,0x01,0x01,0x01 11.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d // rotate-n-splat 12.long 0x1b,0x1b,0x1b,0x1b 13 14.globl _aes_hw_set_encrypt_key 15.private_extern _aes_hw_set_encrypt_key 16 17.align 5 18_aes_hw_set_encrypt_key: 19Lenc_key: 20 stp x29,x30,[sp,#-16]! 21 add x29,sp,#0 22 mov x3,#-1 23 cmp x0,#0 24 b.eq Lenc_key_abort 25 cmp x2,#0 26 b.eq Lenc_key_abort 27 mov x3,#-2 28 cmp w1,#128 29 b.lt Lenc_key_abort 30 cmp w1,#256 31 b.gt Lenc_key_abort 32 tst w1,#0x3f 33 b.ne Lenc_key_abort 34 35 adr x3,Lrcon 36 cmp w1,#192 37 38 eor v0.16b,v0.16b,v0.16b 39 ld1 {v3.16b},[x0],#16 40 mov w1,#8 // reuse w1 41 ld1 {v1.4s,v2.4s},[x3],#32 42 43 b.lt Loop128 44 b.eq L192 45 b L256 46 47.align 4 48Loop128: 49 tbl v6.16b,{v3.16b},v2.16b 50 ext v5.16b,v0.16b,v3.16b,#12 51 st1 {v3.4s},[x2],#16 52 aese v6.16b,v0.16b 53 subs w1,w1,#1 54 55 eor v3.16b,v3.16b,v5.16b 56 ext v5.16b,v0.16b,v5.16b,#12 57 eor v3.16b,v3.16b,v5.16b 58 ext v5.16b,v0.16b,v5.16b,#12 59 eor v6.16b,v6.16b,v1.16b 60 eor v3.16b,v3.16b,v5.16b 61 shl v1.16b,v1.16b,#1 62 eor v3.16b,v3.16b,v6.16b 63 b.ne Loop128 64 65 ld1 {v1.4s},[x3] 66 67 tbl v6.16b,{v3.16b},v2.16b 68 ext v5.16b,v0.16b,v3.16b,#12 69 st1 {v3.4s},[x2],#16 70 aese v6.16b,v0.16b 71 72 eor v3.16b,v3.16b,v5.16b 73 ext v5.16b,v0.16b,v5.16b,#12 74 eor v3.16b,v3.16b,v5.16b 75 ext v5.16b,v0.16b,v5.16b,#12 76 eor v6.16b,v6.16b,v1.16b 77 eor v3.16b,v3.16b,v5.16b 78 shl v1.16b,v1.16b,#1 79 eor v3.16b,v3.16b,v6.16b 80 81 tbl v6.16b,{v3.16b},v2.16b 82 ext v5.16b,v0.16b,v3.16b,#12 83 st1 {v3.4s},[x2],#16 84 aese v6.16b,v0.16b 85 86 eor v3.16b,v3.16b,v5.16b 87 ext v5.16b,v0.16b,v5.16b,#12 88 eor v3.16b,v3.16b,v5.16b 89 ext v5.16b,v0.16b,v5.16b,#12 90 eor v6.16b,v6.16b,v1.16b 91 eor v3.16b,v3.16b,v5.16b 92 eor v3.16b,v3.16b,v6.16b 93 st1 {v3.4s},[x2] 94 add x2,x2,#0x50 95 96 mov w12,#10 97 b Ldone 98 99.align 4 100L192: 101 ld1 {v4.8b},[x0],#8 102 movi v6.16b,#8 // borrow v6.16b 103 st1 {v3.4s},[x2],#16 104 sub v2.16b,v2.16b,v6.16b // adjust the mask 105 106Loop192: 107 tbl v6.16b,{v4.16b},v2.16b 108 ext v5.16b,v0.16b,v3.16b,#12 109 st1 {v4.8b},[x2],#8 110 aese v6.16b,v0.16b 111 subs w1,w1,#1 112 113 eor v3.16b,v3.16b,v5.16b 114 ext v5.16b,v0.16b,v5.16b,#12 115 eor v3.16b,v3.16b,v5.16b 116 ext v5.16b,v0.16b,v5.16b,#12 117 eor v3.16b,v3.16b,v5.16b 118 119 dup v5.4s,v3.s[3] 120 eor v5.16b,v5.16b,v4.16b 121 eor v6.16b,v6.16b,v1.16b 122 ext v4.16b,v0.16b,v4.16b,#12 123 shl v1.16b,v1.16b,#1 124 eor v4.16b,v4.16b,v5.16b 125 eor v3.16b,v3.16b,v6.16b 126 eor v4.16b,v4.16b,v6.16b 127 st1 {v3.4s},[x2],#16 128 b.ne Loop192 129 130 mov w12,#12 131 add x2,x2,#0x20 132 b Ldone 133 134.align 4 135L256: 136 ld1 {v4.16b},[x0] 137 mov w1,#7 138 mov w12,#14 139 st1 {v3.4s},[x2],#16 140 141Loop256: 142 tbl v6.16b,{v4.16b},v2.16b 143 ext v5.16b,v0.16b,v3.16b,#12 144 st1 {v4.4s},[x2],#16 145 aese v6.16b,v0.16b 146 subs w1,w1,#1 147 148 eor v3.16b,v3.16b,v5.16b 149 ext v5.16b,v0.16b,v5.16b,#12 150 eor v3.16b,v3.16b,v5.16b 151 ext v5.16b,v0.16b,v5.16b,#12 152 eor v6.16b,v6.16b,v1.16b 153 eor v3.16b,v3.16b,v5.16b 154 shl v1.16b,v1.16b,#1 155 eor v3.16b,v3.16b,v6.16b 156 st1 {v3.4s},[x2],#16 157 b.eq Ldone 158 159 dup v6.4s,v3.s[3] // just splat 160 ext v5.16b,v0.16b,v4.16b,#12 161 aese v6.16b,v0.16b 162 163 eor v4.16b,v4.16b,v5.16b 164 ext v5.16b,v0.16b,v5.16b,#12 165 eor v4.16b,v4.16b,v5.16b 166 ext v5.16b,v0.16b,v5.16b,#12 167 eor v4.16b,v4.16b,v5.16b 168 169 eor v4.16b,v4.16b,v6.16b 170 b Loop256 171 172Ldone: 173 str w12,[x2] 174 mov x3,#0 175 176Lenc_key_abort: 177 mov x0,x3 // return value 178 ldr x29,[sp],#16 179 ret 180 181 182.globl _aes_hw_set_decrypt_key 183.private_extern _aes_hw_set_decrypt_key 184 185.align 5 186_aes_hw_set_decrypt_key: 187 stp x29,x30,[sp,#-16]! 188 add x29,sp,#0 189 bl Lenc_key 190 191 cmp x0,#0 192 b.ne Ldec_key_abort 193 194 sub x2,x2,#240 // restore original x2 195 mov x4,#-16 196 add x0,x2,x12,lsl#4 // end of key schedule 197 198 ld1 {v0.4s},[x2] 199 ld1 {v1.4s},[x0] 200 st1 {v0.4s},[x0],x4 201 st1 {v1.4s},[x2],#16 202 203Loop_imc: 204 ld1 {v0.4s},[x2] 205 ld1 {v1.4s},[x0] 206 aesimc v0.16b,v0.16b 207 aesimc v1.16b,v1.16b 208 st1 {v0.4s},[x0],x4 209 st1 {v1.4s},[x2],#16 210 cmp x0,x2 211 b.hi Loop_imc 212 213 ld1 {v0.4s},[x2] 214 aesimc v0.16b,v0.16b 215 st1 {v0.4s},[x0] 216 217 eor x0,x0,x0 // return value 218Ldec_key_abort: 219 ldp x29,x30,[sp],#16 220 ret 221 222.globl _aes_hw_encrypt 223.private_extern _aes_hw_encrypt 224 225.align 5 226_aes_hw_encrypt: 227 ldr w3,[x2,#240] 228 ld1 {v0.4s},[x2],#16 229 ld1 {v2.16b},[x0] 230 sub w3,w3,#2 231 ld1 {v1.4s},[x2],#16 232 233Loop_enc: 234 aese v2.16b,v0.16b 235 aesmc v2.16b,v2.16b 236 ld1 {v0.4s},[x2],#16 237 subs w3,w3,#2 238 aese v2.16b,v1.16b 239 aesmc v2.16b,v2.16b 240 ld1 {v1.4s},[x2],#16 241 b.gt Loop_enc 242 243 aese v2.16b,v0.16b 244 aesmc v2.16b,v2.16b 245 ld1 {v0.4s},[x2] 246 aese v2.16b,v1.16b 247 eor v2.16b,v2.16b,v0.16b 248 249 st1 {v2.16b},[x1] 250 ret 251 252.globl _aes_hw_decrypt 253.private_extern _aes_hw_decrypt 254 255.align 5 256_aes_hw_decrypt: 257 ldr w3,[x2,#240] 258 ld1 {v0.4s},[x2],#16 259 ld1 {v2.16b},[x0] 260 sub w3,w3,#2 261 ld1 {v1.4s},[x2],#16 262 263Loop_dec: 264 aesd v2.16b,v0.16b 265 aesimc v2.16b,v2.16b 266 ld1 {v0.4s},[x2],#16 267 subs w3,w3,#2 268 aesd v2.16b,v1.16b 269 aesimc v2.16b,v2.16b 270 ld1 {v1.4s},[x2],#16 271 b.gt Loop_dec 272 273 aesd v2.16b,v0.16b 274 aesimc v2.16b,v2.16b 275 ld1 {v0.4s},[x2] 276 aesd v2.16b,v1.16b 277 eor v2.16b,v2.16b,v0.16b 278 279 st1 {v2.16b},[x1] 280 ret 281 282.globl _aes_hw_cbc_encrypt 283.private_extern _aes_hw_cbc_encrypt 284 285.align 5 286_aes_hw_cbc_encrypt: 287 stp x29,x30,[sp,#-16]! 288 add x29,sp,#0 289 subs x2,x2,#16 290 mov x8,#16 291 b.lo Lcbc_abort 292 csel x8,xzr,x8,eq 293 294 cmp w5,#0 // en- or decrypting? 295 ldr w5,[x3,#240] 296 and x2,x2,#-16 297 ld1 {v6.16b},[x4] 298 ld1 {v0.16b},[x0],x8 299 300 ld1 {v16.4s,v17.4s},[x3] // load key schedule... 301 sub w5,w5,#6 302 add x7,x3,x5,lsl#4 // pointer to last 7 round keys 303 sub w5,w5,#2 304 ld1 {v18.4s,v19.4s},[x7],#32 305 ld1 {v20.4s,v21.4s},[x7],#32 306 ld1 {v22.4s,v23.4s},[x7],#32 307 ld1 {v7.4s},[x7] 308 309 add x7,x3,#32 310 mov w6,w5 311 b.eq Lcbc_dec 312 313 cmp w5,#2 314 eor v0.16b,v0.16b,v6.16b 315 eor v5.16b,v16.16b,v7.16b 316 b.eq Lcbc_enc128 317 318 ld1 {v2.4s,v3.4s},[x7] 319 add x7,x3,#16 320 add x6,x3,#16*4 321 add x12,x3,#16*5 322 aese v0.16b,v16.16b 323 aesmc v0.16b,v0.16b 324 add x14,x3,#16*6 325 add x3,x3,#16*7 326 b Lenter_cbc_enc 327 328.align 4 329Loop_cbc_enc: 330 aese v0.16b,v16.16b 331 aesmc v0.16b,v0.16b 332 st1 {v6.16b},[x1],#16 333Lenter_cbc_enc: 334 aese v0.16b,v17.16b 335 aesmc v0.16b,v0.16b 336 aese v0.16b,v2.16b 337 aesmc v0.16b,v0.16b 338 ld1 {v16.4s},[x6] 339 cmp w5,#4 340 aese v0.16b,v3.16b 341 aesmc v0.16b,v0.16b 342 ld1 {v17.4s},[x12] 343 b.eq Lcbc_enc192 344 345 aese v0.16b,v16.16b 346 aesmc v0.16b,v0.16b 347 ld1 {v16.4s},[x14] 348 aese v0.16b,v17.16b 349 aesmc v0.16b,v0.16b 350 ld1 {v17.4s},[x3] 351 nop 352 353Lcbc_enc192: 354 aese v0.16b,v16.16b 355 aesmc v0.16b,v0.16b 356 subs x2,x2,#16 357 aese v0.16b,v17.16b 358 aesmc v0.16b,v0.16b 359 csel x8,xzr,x8,eq 360 aese v0.16b,v18.16b 361 aesmc v0.16b,v0.16b 362 aese v0.16b,v19.16b 363 aesmc v0.16b,v0.16b 364 ld1 {v16.16b},[x0],x8 365 aese v0.16b,v20.16b 366 aesmc v0.16b,v0.16b 367 eor v16.16b,v16.16b,v5.16b 368 aese v0.16b,v21.16b 369 aesmc v0.16b,v0.16b 370 ld1 {v17.4s},[x7] // re-pre-load rndkey[1] 371 aese v0.16b,v22.16b 372 aesmc v0.16b,v0.16b 373 aese v0.16b,v23.16b 374 eor v6.16b,v0.16b,v7.16b 375 b.hs Loop_cbc_enc 376 377 st1 {v6.16b},[x1],#16 378 b Lcbc_done 379 380.align 5 381Lcbc_enc128: 382 ld1 {v2.4s,v3.4s},[x7] 383 aese v0.16b,v16.16b 384 aesmc v0.16b,v0.16b 385 b Lenter_cbc_enc128 386Loop_cbc_enc128: 387 aese v0.16b,v16.16b 388 aesmc v0.16b,v0.16b 389 st1 {v6.16b},[x1],#16 390Lenter_cbc_enc128: 391 aese v0.16b,v17.16b 392 aesmc v0.16b,v0.16b 393 subs x2,x2,#16 394 aese v0.16b,v2.16b 395 aesmc v0.16b,v0.16b 396 csel x8,xzr,x8,eq 397 aese v0.16b,v3.16b 398 aesmc v0.16b,v0.16b 399 aese v0.16b,v18.16b 400 aesmc v0.16b,v0.16b 401 aese v0.16b,v19.16b 402 aesmc v0.16b,v0.16b 403 ld1 {v16.16b},[x0],x8 404 aese v0.16b,v20.16b 405 aesmc v0.16b,v0.16b 406 aese v0.16b,v21.16b 407 aesmc v0.16b,v0.16b 408 aese v0.16b,v22.16b 409 aesmc v0.16b,v0.16b 410 eor v16.16b,v16.16b,v5.16b 411 aese v0.16b,v23.16b 412 eor v6.16b,v0.16b,v7.16b 413 b.hs Loop_cbc_enc128 414 415 st1 {v6.16b},[x1],#16 416 b Lcbc_done 417.align 5 418Lcbc_dec: 419 ld1 {v18.16b},[x0],#16 420 subs x2,x2,#32 // bias 421 add w6,w5,#2 422 orr v3.16b,v0.16b,v0.16b 423 orr v1.16b,v0.16b,v0.16b 424 orr v19.16b,v18.16b,v18.16b 425 b.lo Lcbc_dec_tail 426 427 orr v1.16b,v18.16b,v18.16b 428 ld1 {v18.16b},[x0],#16 429 orr v2.16b,v0.16b,v0.16b 430 orr v3.16b,v1.16b,v1.16b 431 orr v19.16b,v18.16b,v18.16b 432 433Loop3x_cbc_dec: 434 aesd v0.16b,v16.16b 435 aesimc v0.16b,v0.16b 436 aesd v1.16b,v16.16b 437 aesimc v1.16b,v1.16b 438 aesd v18.16b,v16.16b 439 aesimc v18.16b,v18.16b 440 ld1 {v16.4s},[x7],#16 441 subs w6,w6,#2 442 aesd v0.16b,v17.16b 443 aesimc v0.16b,v0.16b 444 aesd v1.16b,v17.16b 445 aesimc v1.16b,v1.16b 446 aesd v18.16b,v17.16b 447 aesimc v18.16b,v18.16b 448 ld1 {v17.4s},[x7],#16 449 b.gt Loop3x_cbc_dec 450 451 aesd v0.16b,v16.16b 452 aesimc v0.16b,v0.16b 453 aesd v1.16b,v16.16b 454 aesimc v1.16b,v1.16b 455 aesd v18.16b,v16.16b 456 aesimc v18.16b,v18.16b 457 eor v4.16b,v6.16b,v7.16b 458 subs x2,x2,#0x30 459 eor v5.16b,v2.16b,v7.16b 460 csel x6,x2,x6,lo // x6, w6, is zero at this point 461 aesd v0.16b,v17.16b 462 aesimc v0.16b,v0.16b 463 aesd v1.16b,v17.16b 464 aesimc v1.16b,v1.16b 465 aesd v18.16b,v17.16b 466 aesimc v18.16b,v18.16b 467 eor v17.16b,v3.16b,v7.16b 468 add x0,x0,x6 // x0 is adjusted in such way that 469 // at exit from the loop v1.16b-v18.16b 470 // are loaded with last "words" 471 orr v6.16b,v19.16b,v19.16b 472 mov x7,x3 473 aesd v0.16b,v20.16b 474 aesimc v0.16b,v0.16b 475 aesd v1.16b,v20.16b 476 aesimc v1.16b,v1.16b 477 aesd v18.16b,v20.16b 478 aesimc v18.16b,v18.16b 479 ld1 {v2.16b},[x0],#16 480 aesd v0.16b,v21.16b 481 aesimc v0.16b,v0.16b 482 aesd v1.16b,v21.16b 483 aesimc v1.16b,v1.16b 484 aesd v18.16b,v21.16b 485 aesimc v18.16b,v18.16b 486 ld1 {v3.16b},[x0],#16 487 aesd v0.16b,v22.16b 488 aesimc v0.16b,v0.16b 489 aesd v1.16b,v22.16b 490 aesimc v1.16b,v1.16b 491 aesd v18.16b,v22.16b 492 aesimc v18.16b,v18.16b 493 ld1 {v19.16b},[x0],#16 494 aesd v0.16b,v23.16b 495 aesd v1.16b,v23.16b 496 aesd v18.16b,v23.16b 497 ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0] 498 add w6,w5,#2 499 eor v4.16b,v4.16b,v0.16b 500 eor v5.16b,v5.16b,v1.16b 501 eor v18.16b,v18.16b,v17.16b 502 ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1] 503 st1 {v4.16b},[x1],#16 504 orr v0.16b,v2.16b,v2.16b 505 st1 {v5.16b},[x1],#16 506 orr v1.16b,v3.16b,v3.16b 507 st1 {v18.16b},[x1],#16 508 orr v18.16b,v19.16b,v19.16b 509 b.hs Loop3x_cbc_dec 510 511 cmn x2,#0x30 512 b.eq Lcbc_done 513 nop 514 515Lcbc_dec_tail: 516 aesd v1.16b,v16.16b 517 aesimc v1.16b,v1.16b 518 aesd v18.16b,v16.16b 519 aesimc v18.16b,v18.16b 520 ld1 {v16.4s},[x7],#16 521 subs w6,w6,#2 522 aesd v1.16b,v17.16b 523 aesimc v1.16b,v1.16b 524 aesd v18.16b,v17.16b 525 aesimc v18.16b,v18.16b 526 ld1 {v17.4s},[x7],#16 527 b.gt Lcbc_dec_tail 528 529 aesd v1.16b,v16.16b 530 aesimc v1.16b,v1.16b 531 aesd v18.16b,v16.16b 532 aesimc v18.16b,v18.16b 533 aesd v1.16b,v17.16b 534 aesimc v1.16b,v1.16b 535 aesd v18.16b,v17.16b 536 aesimc v18.16b,v18.16b 537 aesd v1.16b,v20.16b 538 aesimc v1.16b,v1.16b 539 aesd v18.16b,v20.16b 540 aesimc v18.16b,v18.16b 541 cmn x2,#0x20 542 aesd v1.16b,v21.16b 543 aesimc v1.16b,v1.16b 544 aesd v18.16b,v21.16b 545 aesimc v18.16b,v18.16b 546 eor v5.16b,v6.16b,v7.16b 547 aesd v1.16b,v22.16b 548 aesimc v1.16b,v1.16b 549 aesd v18.16b,v22.16b 550 aesimc v18.16b,v18.16b 551 eor v17.16b,v3.16b,v7.16b 552 aesd v1.16b,v23.16b 553 aesd v18.16b,v23.16b 554 b.eq Lcbc_dec_one 555 eor v5.16b,v5.16b,v1.16b 556 eor v17.16b,v17.16b,v18.16b 557 orr v6.16b,v19.16b,v19.16b 558 st1 {v5.16b},[x1],#16 559 st1 {v17.16b},[x1],#16 560 b Lcbc_done 561 562Lcbc_dec_one: 563 eor v5.16b,v5.16b,v18.16b 564 orr v6.16b,v19.16b,v19.16b 565 st1 {v5.16b},[x1],#16 566 567Lcbc_done: 568 st1 {v6.16b},[x4] 569Lcbc_abort: 570 ldr x29,[sp],#16 571 ret 572 573.globl _aes_hw_ctr32_encrypt_blocks 574.private_extern _aes_hw_ctr32_encrypt_blocks 575 576.align 5 577_aes_hw_ctr32_encrypt_blocks: 578 stp x29,x30,[sp,#-16]! 579 add x29,sp,#0 580 ldr w5,[x3,#240] 581 582 ldr w8, [x4, #12] 583 ld1 {v0.4s},[x4] 584 585 ld1 {v16.4s,v17.4s},[x3] // load key schedule... 586 sub w5,w5,#4 587 mov x12,#16 588 cmp x2,#2 589 add x7,x3,x5,lsl#4 // pointer to last 5 round keys 590 sub w5,w5,#2 591 ld1 {v20.4s,v21.4s},[x7],#32 592 ld1 {v22.4s,v23.4s},[x7],#32 593 ld1 {v7.4s},[x7] 594 add x7,x3,#32 595 mov w6,w5 596 csel x12,xzr,x12,lo 597#ifndef __ARMEB__ 598 rev w8, w8 599#endif 600 orr v1.16b,v0.16b,v0.16b 601 add w10, w8, #1 602 orr v18.16b,v0.16b,v0.16b 603 add w8, w8, #2 604 orr v6.16b,v0.16b,v0.16b 605 rev w10, w10 606 mov v1.s[3],w10 607 b.ls Lctr32_tail 608 rev w12, w8 609 sub x2,x2,#3 // bias 610 mov v18.s[3],w12 611 b Loop3x_ctr32 612 613.align 4 614Loop3x_ctr32: 615 aese v0.16b,v16.16b 616 aesmc v0.16b,v0.16b 617 aese v1.16b,v16.16b 618 aesmc v1.16b,v1.16b 619 aese v18.16b,v16.16b 620 aesmc v18.16b,v18.16b 621 ld1 {v16.4s},[x7],#16 622 subs w6,w6,#2 623 aese v0.16b,v17.16b 624 aesmc v0.16b,v0.16b 625 aese v1.16b,v17.16b 626 aesmc v1.16b,v1.16b 627 aese v18.16b,v17.16b 628 aesmc v18.16b,v18.16b 629 ld1 {v17.4s},[x7],#16 630 b.gt Loop3x_ctr32 631 632 aese v0.16b,v16.16b 633 aesmc v4.16b,v0.16b 634 aese v1.16b,v16.16b 635 aesmc v5.16b,v1.16b 636 ld1 {v2.16b},[x0],#16 637 orr v0.16b,v6.16b,v6.16b 638 aese v18.16b,v16.16b 639 aesmc v18.16b,v18.16b 640 ld1 {v3.16b},[x0],#16 641 orr v1.16b,v6.16b,v6.16b 642 aese v4.16b,v17.16b 643 aesmc v4.16b,v4.16b 644 aese v5.16b,v17.16b 645 aesmc v5.16b,v5.16b 646 ld1 {v19.16b},[x0],#16 647 mov x7,x3 648 aese v18.16b,v17.16b 649 aesmc v17.16b,v18.16b 650 orr v18.16b,v6.16b,v6.16b 651 add w9,w8,#1 652 aese v4.16b,v20.16b 653 aesmc v4.16b,v4.16b 654 aese v5.16b,v20.16b 655 aesmc v5.16b,v5.16b 656 eor v2.16b,v2.16b,v7.16b 657 add w10,w8,#2 658 aese v17.16b,v20.16b 659 aesmc v17.16b,v17.16b 660 eor v3.16b,v3.16b,v7.16b 661 add w8,w8,#3 662 aese v4.16b,v21.16b 663 aesmc v4.16b,v4.16b 664 aese v5.16b,v21.16b 665 aesmc v5.16b,v5.16b 666 eor v19.16b,v19.16b,v7.16b 667 rev w9,w9 668 aese v17.16b,v21.16b 669 aesmc v17.16b,v17.16b 670 mov v0.s[3], w9 671 rev w10,w10 672 aese v4.16b,v22.16b 673 aesmc v4.16b,v4.16b 674 aese v5.16b,v22.16b 675 aesmc v5.16b,v5.16b 676 mov v1.s[3], w10 677 rev w12,w8 678 aese v17.16b,v22.16b 679 aesmc v17.16b,v17.16b 680 mov v18.s[3], w12 681 subs x2,x2,#3 682 aese v4.16b,v23.16b 683 aese v5.16b,v23.16b 684 aese v17.16b,v23.16b 685 686 eor v2.16b,v2.16b,v4.16b 687 ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0] 688 st1 {v2.16b},[x1],#16 689 eor v3.16b,v3.16b,v5.16b 690 mov w6,w5 691 st1 {v3.16b},[x1],#16 692 eor v19.16b,v19.16b,v17.16b 693 ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1] 694 st1 {v19.16b},[x1],#16 695 b.hs Loop3x_ctr32 696 697 adds x2,x2,#3 698 b.eq Lctr32_done 699 cmp x2,#1 700 mov x12,#16 701 csel x12,xzr,x12,eq 702 703Lctr32_tail: 704 aese v0.16b,v16.16b 705 aesmc v0.16b,v0.16b 706 aese v1.16b,v16.16b 707 aesmc v1.16b,v1.16b 708 ld1 {v16.4s},[x7],#16 709 subs w6,w6,#2 710 aese v0.16b,v17.16b 711 aesmc v0.16b,v0.16b 712 aese v1.16b,v17.16b 713 aesmc v1.16b,v1.16b 714 ld1 {v17.4s},[x7],#16 715 b.gt Lctr32_tail 716 717 aese v0.16b,v16.16b 718 aesmc v0.16b,v0.16b 719 aese v1.16b,v16.16b 720 aesmc v1.16b,v1.16b 721 aese v0.16b,v17.16b 722 aesmc v0.16b,v0.16b 723 aese v1.16b,v17.16b 724 aesmc v1.16b,v1.16b 725 ld1 {v2.16b},[x0],x12 726 aese v0.16b,v20.16b 727 aesmc v0.16b,v0.16b 728 aese v1.16b,v20.16b 729 aesmc v1.16b,v1.16b 730 ld1 {v3.16b},[x0] 731 aese v0.16b,v21.16b 732 aesmc v0.16b,v0.16b 733 aese v1.16b,v21.16b 734 aesmc v1.16b,v1.16b 735 eor v2.16b,v2.16b,v7.16b 736 aese v0.16b,v22.16b 737 aesmc v0.16b,v0.16b 738 aese v1.16b,v22.16b 739 aesmc v1.16b,v1.16b 740 eor v3.16b,v3.16b,v7.16b 741 aese v0.16b,v23.16b 742 aese v1.16b,v23.16b 743 744 cmp x2,#1 745 eor v2.16b,v2.16b,v0.16b 746 eor v3.16b,v3.16b,v1.16b 747 st1 {v2.16b},[x1],#16 748 b.eq Lctr32_done 749 st1 {v3.16b},[x1] 750 751Lctr32_done: 752 ldr x29,[sp],#16 753 ret 754 755#endif 756