1#include <openssl/arm_arch.h> 2 3#if __ARM_MAX_ARCH__>=7 4.text 5 6.align 5 7Lrcon: 8.long 0x01,0x01,0x01,0x01 9.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d // rotate-n-splat 10.long 0x1b,0x1b,0x1b,0x1b 11 12.globl _aes_hw_set_encrypt_key 13.private_extern _aes_hw_set_encrypt_key 14 15.align 5 16_aes_hw_set_encrypt_key: 17Lenc_key: 18 stp x29,x30,[sp,#-16]! 19 add x29,sp,#0 20 mov x3,#-1 21 cmp x0,#0 22 b.eq Lenc_key_abort 23 cmp x2,#0 24 b.eq Lenc_key_abort 25 mov x3,#-2 26 cmp w1,#128 27 b.lt Lenc_key_abort 28 cmp w1,#256 29 b.gt Lenc_key_abort 30 tst w1,#0x3f 31 b.ne Lenc_key_abort 32 33 adr x3,Lrcon 34 cmp w1,#192 35 36 eor v0.16b,v0.16b,v0.16b 37 ld1 {v3.16b},[x0],#16 38 mov w1,#8 // reuse w1 39 ld1 {v1.4s,v2.4s},[x3],#32 40 41 b.lt Loop128 42 b.eq L192 43 b L256 44 45.align 4 46Loop128: 47 tbl v6.16b,{v3.16b},v2.16b 48 ext v5.16b,v0.16b,v3.16b,#12 49 st1 {v3.4s},[x2],#16 50 aese v6.16b,v0.16b 51 subs w1,w1,#1 52 53 eor v3.16b,v3.16b,v5.16b 54 ext v5.16b,v0.16b,v5.16b,#12 55 eor v3.16b,v3.16b,v5.16b 56 ext v5.16b,v0.16b,v5.16b,#12 57 eor v6.16b,v6.16b,v1.16b 58 eor v3.16b,v3.16b,v5.16b 59 shl v1.16b,v1.16b,#1 60 eor v3.16b,v3.16b,v6.16b 61 b.ne Loop128 62 63 ld1 {v1.4s},[x3] 64 65 tbl v6.16b,{v3.16b},v2.16b 66 ext v5.16b,v0.16b,v3.16b,#12 67 st1 {v3.4s},[x2],#16 68 aese v6.16b,v0.16b 69 70 eor v3.16b,v3.16b,v5.16b 71 ext v5.16b,v0.16b,v5.16b,#12 72 eor v3.16b,v3.16b,v5.16b 73 ext v5.16b,v0.16b,v5.16b,#12 74 eor v6.16b,v6.16b,v1.16b 75 eor v3.16b,v3.16b,v5.16b 76 shl v1.16b,v1.16b,#1 77 eor v3.16b,v3.16b,v6.16b 78 79 tbl v6.16b,{v3.16b},v2.16b 80 ext v5.16b,v0.16b,v3.16b,#12 81 st1 {v3.4s},[x2],#16 82 aese v6.16b,v0.16b 83 84 eor v3.16b,v3.16b,v5.16b 85 ext v5.16b,v0.16b,v5.16b,#12 86 eor v3.16b,v3.16b,v5.16b 87 ext v5.16b,v0.16b,v5.16b,#12 88 eor v6.16b,v6.16b,v1.16b 89 eor v3.16b,v3.16b,v5.16b 90 eor v3.16b,v3.16b,v6.16b 91 st1 {v3.4s},[x2] 92 add x2,x2,#0x50 93 94 mov w12,#10 95 b Ldone 96 97.align 4 98L192: 99 ld1 {v4.8b},[x0],#8 100 movi v6.16b,#8 // borrow v6.16b 101 st1 {v3.4s},[x2],#16 102 sub v2.16b,v2.16b,v6.16b // adjust the mask 103 104Loop192: 105 tbl v6.16b,{v4.16b},v2.16b 106 ext v5.16b,v0.16b,v3.16b,#12 107 st1 {v4.8b},[x2],#8 108 aese v6.16b,v0.16b 109 subs w1,w1,#1 110 111 eor v3.16b,v3.16b,v5.16b 112 ext v5.16b,v0.16b,v5.16b,#12 113 eor v3.16b,v3.16b,v5.16b 114 ext v5.16b,v0.16b,v5.16b,#12 115 eor v3.16b,v3.16b,v5.16b 116 117 dup v5.4s,v3.s[3] 118 eor v5.16b,v5.16b,v4.16b 119 eor v6.16b,v6.16b,v1.16b 120 ext v4.16b,v0.16b,v4.16b,#12 121 shl v1.16b,v1.16b,#1 122 eor v4.16b,v4.16b,v5.16b 123 eor v3.16b,v3.16b,v6.16b 124 eor v4.16b,v4.16b,v6.16b 125 st1 {v3.4s},[x2],#16 126 b.ne Loop192 127 128 mov w12,#12 129 add x2,x2,#0x20 130 b Ldone 131 132.align 4 133L256: 134 ld1 {v4.16b},[x0] 135 mov w1,#7 136 mov w12,#14 137 st1 {v3.4s},[x2],#16 138 139Loop256: 140 tbl v6.16b,{v4.16b},v2.16b 141 ext v5.16b,v0.16b,v3.16b,#12 142 st1 {v4.4s},[x2],#16 143 aese v6.16b,v0.16b 144 subs w1,w1,#1 145 146 eor v3.16b,v3.16b,v5.16b 147 ext v5.16b,v0.16b,v5.16b,#12 148 eor v3.16b,v3.16b,v5.16b 149 ext v5.16b,v0.16b,v5.16b,#12 150 eor v6.16b,v6.16b,v1.16b 151 eor v3.16b,v3.16b,v5.16b 152 shl v1.16b,v1.16b,#1 153 eor v3.16b,v3.16b,v6.16b 154 st1 {v3.4s},[x2],#16 155 b.eq Ldone 156 157 dup v6.4s,v3.s[3] // just splat 158 ext v5.16b,v0.16b,v4.16b,#12 159 aese v6.16b,v0.16b 160 161 eor v4.16b,v4.16b,v5.16b 162 ext v5.16b,v0.16b,v5.16b,#12 163 eor v4.16b,v4.16b,v5.16b 164 ext v5.16b,v0.16b,v5.16b,#12 165 eor v4.16b,v4.16b,v5.16b 166 167 eor v4.16b,v4.16b,v6.16b 168 b Loop256 169 170Ldone: 171 str w12,[x2] 172 mov x3,#0 173 174Lenc_key_abort: 175 mov x0,x3 // return value 176 ldr x29,[sp],#16 177 ret 178 179 180.globl _aes_hw_set_decrypt_key 181.private_extern _aes_hw_set_decrypt_key 182 183.align 5 184_aes_hw_set_decrypt_key: 185 stp x29,x30,[sp,#-16]! 186 add x29,sp,#0 187 bl Lenc_key 188 189 cmp x0,#0 190 b.ne Ldec_key_abort 191 192 sub x2,x2,#240 // restore original x2 193 mov x4,#-16 194 add x0,x2,x12,lsl#4 // end of key schedule 195 196 ld1 {v0.4s},[x2] 197 ld1 {v1.4s},[x0] 198 st1 {v0.4s},[x0],x4 199 st1 {v1.4s},[x2],#16 200 201Loop_imc: 202 ld1 {v0.4s},[x2] 203 ld1 {v1.4s},[x0] 204 aesimc v0.16b,v0.16b 205 aesimc v1.16b,v1.16b 206 st1 {v0.4s},[x0],x4 207 st1 {v1.4s},[x2],#16 208 cmp x0,x2 209 b.hi Loop_imc 210 211 ld1 {v0.4s},[x2] 212 aesimc v0.16b,v0.16b 213 st1 {v0.4s},[x0] 214 215 eor x0,x0,x0 // return value 216Ldec_key_abort: 217 ldp x29,x30,[sp],#16 218 ret 219 220.globl _aes_hw_encrypt 221.private_extern _aes_hw_encrypt 222 223.align 5 224_aes_hw_encrypt: 225 ldr w3,[x2,#240] 226 ld1 {v0.4s},[x2],#16 227 ld1 {v2.16b},[x0] 228 sub w3,w3,#2 229 ld1 {v1.4s},[x2],#16 230 231Loop_enc: 232 aese v2.16b,v0.16b 233 aesmc v2.16b,v2.16b 234 ld1 {v0.4s},[x2],#16 235 subs w3,w3,#2 236 aese v2.16b,v1.16b 237 aesmc v2.16b,v2.16b 238 ld1 {v1.4s},[x2],#16 239 b.gt Loop_enc 240 241 aese v2.16b,v0.16b 242 aesmc v2.16b,v2.16b 243 ld1 {v0.4s},[x2] 244 aese v2.16b,v1.16b 245 eor v2.16b,v2.16b,v0.16b 246 247 st1 {v2.16b},[x1] 248 ret 249 250.globl _aes_hw_decrypt 251.private_extern _aes_hw_decrypt 252 253.align 5 254_aes_hw_decrypt: 255 ldr w3,[x2,#240] 256 ld1 {v0.4s},[x2],#16 257 ld1 {v2.16b},[x0] 258 sub w3,w3,#2 259 ld1 {v1.4s},[x2],#16 260 261Loop_dec: 262 aesd v2.16b,v0.16b 263 aesimc v2.16b,v2.16b 264 ld1 {v0.4s},[x2],#16 265 subs w3,w3,#2 266 aesd v2.16b,v1.16b 267 aesimc v2.16b,v2.16b 268 ld1 {v1.4s},[x2],#16 269 b.gt Loop_dec 270 271 aesd v2.16b,v0.16b 272 aesimc v2.16b,v2.16b 273 ld1 {v0.4s},[x2] 274 aesd v2.16b,v1.16b 275 eor v2.16b,v2.16b,v0.16b 276 277 st1 {v2.16b},[x1] 278 ret 279 280.globl _aes_hw_cbc_encrypt 281.private_extern _aes_hw_cbc_encrypt 282 283.align 5 284_aes_hw_cbc_encrypt: 285 stp x29,x30,[sp,#-16]! 286 add x29,sp,#0 287 subs x2,x2,#16 288 mov x8,#16 289 b.lo Lcbc_abort 290 csel x8,xzr,x8,eq 291 292 cmp w5,#0 // en- or decrypting? 293 ldr w5,[x3,#240] 294 and x2,x2,#-16 295 ld1 {v6.16b},[x4] 296 ld1 {v0.16b},[x0],x8 297 298 ld1 {v16.4s,v17.4s},[x3] // load key schedule... 299 sub w5,w5,#6 300 add x7,x3,x5,lsl#4 // pointer to last 7 round keys 301 sub w5,w5,#2 302 ld1 {v18.4s,v19.4s},[x7],#32 303 ld1 {v20.4s,v21.4s},[x7],#32 304 ld1 {v22.4s,v23.4s},[x7],#32 305 ld1 {v7.4s},[x7] 306 307 add x7,x3,#32 308 mov w6,w5 309 b.eq Lcbc_dec 310 311 cmp w5,#2 312 eor v0.16b,v0.16b,v6.16b 313 eor v5.16b,v16.16b,v7.16b 314 b.eq Lcbc_enc128 315 316 ld1 {v2.4s,v3.4s},[x7] 317 add x7,x3,#16 318 add x6,x3,#16*4 319 add x12,x3,#16*5 320 aese v0.16b,v16.16b 321 aesmc v0.16b,v0.16b 322 add x14,x3,#16*6 323 add x3,x3,#16*7 324 b Lenter_cbc_enc 325 326.align 4 327Loop_cbc_enc: 328 aese v0.16b,v16.16b 329 aesmc v0.16b,v0.16b 330 st1 {v6.16b},[x1],#16 331Lenter_cbc_enc: 332 aese v0.16b,v17.16b 333 aesmc v0.16b,v0.16b 334 aese v0.16b,v2.16b 335 aesmc v0.16b,v0.16b 336 ld1 {v16.4s},[x6] 337 cmp w5,#4 338 aese v0.16b,v3.16b 339 aesmc v0.16b,v0.16b 340 ld1 {v17.4s},[x12] 341 b.eq Lcbc_enc192 342 343 aese v0.16b,v16.16b 344 aesmc v0.16b,v0.16b 345 ld1 {v16.4s},[x14] 346 aese v0.16b,v17.16b 347 aesmc v0.16b,v0.16b 348 ld1 {v17.4s},[x3] 349 nop 350 351Lcbc_enc192: 352 aese v0.16b,v16.16b 353 aesmc v0.16b,v0.16b 354 subs x2,x2,#16 355 aese v0.16b,v17.16b 356 aesmc v0.16b,v0.16b 357 csel x8,xzr,x8,eq 358 aese v0.16b,v18.16b 359 aesmc v0.16b,v0.16b 360 aese v0.16b,v19.16b 361 aesmc v0.16b,v0.16b 362 ld1 {v16.16b},[x0],x8 363 aese v0.16b,v20.16b 364 aesmc v0.16b,v0.16b 365 eor v16.16b,v16.16b,v5.16b 366 aese v0.16b,v21.16b 367 aesmc v0.16b,v0.16b 368 ld1 {v17.4s},[x7] // re-pre-load rndkey[1] 369 aese v0.16b,v22.16b 370 aesmc v0.16b,v0.16b 371 aese v0.16b,v23.16b 372 eor v6.16b,v0.16b,v7.16b 373 b.hs Loop_cbc_enc 374 375 st1 {v6.16b},[x1],#16 376 b Lcbc_done 377 378.align 5 379Lcbc_enc128: 380 ld1 {v2.4s,v3.4s},[x7] 381 aese v0.16b,v16.16b 382 aesmc v0.16b,v0.16b 383 b Lenter_cbc_enc128 384Loop_cbc_enc128: 385 aese v0.16b,v16.16b 386 aesmc v0.16b,v0.16b 387 st1 {v6.16b},[x1],#16 388Lenter_cbc_enc128: 389 aese v0.16b,v17.16b 390 aesmc v0.16b,v0.16b 391 subs x2,x2,#16 392 aese v0.16b,v2.16b 393 aesmc v0.16b,v0.16b 394 csel x8,xzr,x8,eq 395 aese v0.16b,v3.16b 396 aesmc v0.16b,v0.16b 397 aese v0.16b,v18.16b 398 aesmc v0.16b,v0.16b 399 aese v0.16b,v19.16b 400 aesmc v0.16b,v0.16b 401 ld1 {v16.16b},[x0],x8 402 aese v0.16b,v20.16b 403 aesmc v0.16b,v0.16b 404 aese v0.16b,v21.16b 405 aesmc v0.16b,v0.16b 406 aese v0.16b,v22.16b 407 aesmc v0.16b,v0.16b 408 eor v16.16b,v16.16b,v5.16b 409 aese v0.16b,v23.16b 410 eor v6.16b,v0.16b,v7.16b 411 b.hs Loop_cbc_enc128 412 413 st1 {v6.16b},[x1],#16 414 b Lcbc_done 415.align 5 416Lcbc_dec: 417 ld1 {v18.16b},[x0],#16 418 subs x2,x2,#32 // bias 419 add w6,w5,#2 420 orr v3.16b,v0.16b,v0.16b 421 orr v1.16b,v0.16b,v0.16b 422 orr v19.16b,v18.16b,v18.16b 423 b.lo Lcbc_dec_tail 424 425 orr v1.16b,v18.16b,v18.16b 426 ld1 {v18.16b},[x0],#16 427 orr v2.16b,v0.16b,v0.16b 428 orr v3.16b,v1.16b,v1.16b 429 orr v19.16b,v18.16b,v18.16b 430 431Loop3x_cbc_dec: 432 aesd v0.16b,v16.16b 433 aesimc v0.16b,v0.16b 434 aesd v1.16b,v16.16b 435 aesimc v1.16b,v1.16b 436 aesd v18.16b,v16.16b 437 aesimc v18.16b,v18.16b 438 ld1 {v16.4s},[x7],#16 439 subs w6,w6,#2 440 aesd v0.16b,v17.16b 441 aesimc v0.16b,v0.16b 442 aesd v1.16b,v17.16b 443 aesimc v1.16b,v1.16b 444 aesd v18.16b,v17.16b 445 aesimc v18.16b,v18.16b 446 ld1 {v17.4s},[x7],#16 447 b.gt Loop3x_cbc_dec 448 449 aesd v0.16b,v16.16b 450 aesimc v0.16b,v0.16b 451 aesd v1.16b,v16.16b 452 aesimc v1.16b,v1.16b 453 aesd v18.16b,v16.16b 454 aesimc v18.16b,v18.16b 455 eor v4.16b,v6.16b,v7.16b 456 subs x2,x2,#0x30 457 eor v5.16b,v2.16b,v7.16b 458 csel x6,x2,x6,lo // x6, w6, is zero at this point 459 aesd v0.16b,v17.16b 460 aesimc v0.16b,v0.16b 461 aesd v1.16b,v17.16b 462 aesimc v1.16b,v1.16b 463 aesd v18.16b,v17.16b 464 aesimc v18.16b,v18.16b 465 eor v17.16b,v3.16b,v7.16b 466 add x0,x0,x6 // x0 is adjusted in such way that 467 // at exit from the loop v1.16b-v18.16b 468 // are loaded with last "words" 469 orr v6.16b,v19.16b,v19.16b 470 mov x7,x3 471 aesd v0.16b,v20.16b 472 aesimc v0.16b,v0.16b 473 aesd v1.16b,v20.16b 474 aesimc v1.16b,v1.16b 475 aesd v18.16b,v20.16b 476 aesimc v18.16b,v18.16b 477 ld1 {v2.16b},[x0],#16 478 aesd v0.16b,v21.16b 479 aesimc v0.16b,v0.16b 480 aesd v1.16b,v21.16b 481 aesimc v1.16b,v1.16b 482 aesd v18.16b,v21.16b 483 aesimc v18.16b,v18.16b 484 ld1 {v3.16b},[x0],#16 485 aesd v0.16b,v22.16b 486 aesimc v0.16b,v0.16b 487 aesd v1.16b,v22.16b 488 aesimc v1.16b,v1.16b 489 aesd v18.16b,v22.16b 490 aesimc v18.16b,v18.16b 491 ld1 {v19.16b},[x0],#16 492 aesd v0.16b,v23.16b 493 aesd v1.16b,v23.16b 494 aesd v18.16b,v23.16b 495 ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0] 496 add w6,w5,#2 497 eor v4.16b,v4.16b,v0.16b 498 eor v5.16b,v5.16b,v1.16b 499 eor v18.16b,v18.16b,v17.16b 500 ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1] 501 st1 {v4.16b},[x1],#16 502 orr v0.16b,v2.16b,v2.16b 503 st1 {v5.16b},[x1],#16 504 orr v1.16b,v3.16b,v3.16b 505 st1 {v18.16b},[x1],#16 506 orr v18.16b,v19.16b,v19.16b 507 b.hs Loop3x_cbc_dec 508 509 cmn x2,#0x30 510 b.eq Lcbc_done 511 nop 512 513Lcbc_dec_tail: 514 aesd v1.16b,v16.16b 515 aesimc v1.16b,v1.16b 516 aesd v18.16b,v16.16b 517 aesimc v18.16b,v18.16b 518 ld1 {v16.4s},[x7],#16 519 subs w6,w6,#2 520 aesd v1.16b,v17.16b 521 aesimc v1.16b,v1.16b 522 aesd v18.16b,v17.16b 523 aesimc v18.16b,v18.16b 524 ld1 {v17.4s},[x7],#16 525 b.gt Lcbc_dec_tail 526 527 aesd v1.16b,v16.16b 528 aesimc v1.16b,v1.16b 529 aesd v18.16b,v16.16b 530 aesimc v18.16b,v18.16b 531 aesd v1.16b,v17.16b 532 aesimc v1.16b,v1.16b 533 aesd v18.16b,v17.16b 534 aesimc v18.16b,v18.16b 535 aesd v1.16b,v20.16b 536 aesimc v1.16b,v1.16b 537 aesd v18.16b,v20.16b 538 aesimc v18.16b,v18.16b 539 cmn x2,#0x20 540 aesd v1.16b,v21.16b 541 aesimc v1.16b,v1.16b 542 aesd v18.16b,v21.16b 543 aesimc v18.16b,v18.16b 544 eor v5.16b,v6.16b,v7.16b 545 aesd v1.16b,v22.16b 546 aesimc v1.16b,v1.16b 547 aesd v18.16b,v22.16b 548 aesimc v18.16b,v18.16b 549 eor v17.16b,v3.16b,v7.16b 550 aesd v1.16b,v23.16b 551 aesd v18.16b,v23.16b 552 b.eq Lcbc_dec_one 553 eor v5.16b,v5.16b,v1.16b 554 eor v17.16b,v17.16b,v18.16b 555 orr v6.16b,v19.16b,v19.16b 556 st1 {v5.16b},[x1],#16 557 st1 {v17.16b},[x1],#16 558 b Lcbc_done 559 560Lcbc_dec_one: 561 eor v5.16b,v5.16b,v18.16b 562 orr v6.16b,v19.16b,v19.16b 563 st1 {v5.16b},[x1],#16 564 565Lcbc_done: 566 st1 {v6.16b},[x4] 567Lcbc_abort: 568 ldr x29,[sp],#16 569 ret 570 571.globl _aes_hw_ctr32_encrypt_blocks 572.private_extern _aes_hw_ctr32_encrypt_blocks 573 574.align 5 575_aes_hw_ctr32_encrypt_blocks: 576 stp x29,x30,[sp,#-16]! 577 add x29,sp,#0 578 ldr w5,[x3,#240] 579 580 ldr w8, [x4, #12] 581 ld1 {v0.4s},[x4] 582 583 ld1 {v16.4s,v17.4s},[x3] // load key schedule... 584 sub w5,w5,#4 585 mov x12,#16 586 cmp x2,#2 587 add x7,x3,x5,lsl#4 // pointer to last 5 round keys 588 sub w5,w5,#2 589 ld1 {v20.4s,v21.4s},[x7],#32 590 ld1 {v22.4s,v23.4s},[x7],#32 591 ld1 {v7.4s},[x7] 592 add x7,x3,#32 593 mov w6,w5 594 csel x12,xzr,x12,lo 595#ifndef __ARMEB__ 596 rev w8, w8 597#endif 598 orr v1.16b,v0.16b,v0.16b 599 add w10, w8, #1 600 orr v18.16b,v0.16b,v0.16b 601 add w8, w8, #2 602 orr v6.16b,v0.16b,v0.16b 603 rev w10, w10 604 mov v1.s[3],w10 605 b.ls Lctr32_tail 606 rev w12, w8 607 sub x2,x2,#3 // bias 608 mov v18.s[3],w12 609 b Loop3x_ctr32 610 611.align 4 612Loop3x_ctr32: 613 aese v0.16b,v16.16b 614 aesmc v0.16b,v0.16b 615 aese v1.16b,v16.16b 616 aesmc v1.16b,v1.16b 617 aese v18.16b,v16.16b 618 aesmc v18.16b,v18.16b 619 ld1 {v16.4s},[x7],#16 620 subs w6,w6,#2 621 aese v0.16b,v17.16b 622 aesmc v0.16b,v0.16b 623 aese v1.16b,v17.16b 624 aesmc v1.16b,v1.16b 625 aese v18.16b,v17.16b 626 aesmc v18.16b,v18.16b 627 ld1 {v17.4s},[x7],#16 628 b.gt Loop3x_ctr32 629 630 aese v0.16b,v16.16b 631 aesmc v4.16b,v0.16b 632 aese v1.16b,v16.16b 633 aesmc v5.16b,v1.16b 634 ld1 {v2.16b},[x0],#16 635 orr v0.16b,v6.16b,v6.16b 636 aese v18.16b,v16.16b 637 aesmc v18.16b,v18.16b 638 ld1 {v3.16b},[x0],#16 639 orr v1.16b,v6.16b,v6.16b 640 aese v4.16b,v17.16b 641 aesmc v4.16b,v4.16b 642 aese v5.16b,v17.16b 643 aesmc v5.16b,v5.16b 644 ld1 {v19.16b},[x0],#16 645 mov x7,x3 646 aese v18.16b,v17.16b 647 aesmc v17.16b,v18.16b 648 orr v18.16b,v6.16b,v6.16b 649 add w9,w8,#1 650 aese v4.16b,v20.16b 651 aesmc v4.16b,v4.16b 652 aese v5.16b,v20.16b 653 aesmc v5.16b,v5.16b 654 eor v2.16b,v2.16b,v7.16b 655 add w10,w8,#2 656 aese v17.16b,v20.16b 657 aesmc v17.16b,v17.16b 658 eor v3.16b,v3.16b,v7.16b 659 add w8,w8,#3 660 aese v4.16b,v21.16b 661 aesmc v4.16b,v4.16b 662 aese v5.16b,v21.16b 663 aesmc v5.16b,v5.16b 664 eor v19.16b,v19.16b,v7.16b 665 rev w9,w9 666 aese v17.16b,v21.16b 667 aesmc v17.16b,v17.16b 668 mov v0.s[3], w9 669 rev w10,w10 670 aese v4.16b,v22.16b 671 aesmc v4.16b,v4.16b 672 aese v5.16b,v22.16b 673 aesmc v5.16b,v5.16b 674 mov v1.s[3], w10 675 rev w12,w8 676 aese v17.16b,v22.16b 677 aesmc v17.16b,v17.16b 678 mov v18.s[3], w12 679 subs x2,x2,#3 680 aese v4.16b,v23.16b 681 aese v5.16b,v23.16b 682 aese v17.16b,v23.16b 683 684 eor v2.16b,v2.16b,v4.16b 685 ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0] 686 st1 {v2.16b},[x1],#16 687 eor v3.16b,v3.16b,v5.16b 688 mov w6,w5 689 st1 {v3.16b},[x1],#16 690 eor v19.16b,v19.16b,v17.16b 691 ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1] 692 st1 {v19.16b},[x1],#16 693 b.hs Loop3x_ctr32 694 695 adds x2,x2,#3 696 b.eq Lctr32_done 697 cmp x2,#1 698 mov x12,#16 699 csel x12,xzr,x12,eq 700 701Lctr32_tail: 702 aese v0.16b,v16.16b 703 aesmc v0.16b,v0.16b 704 aese v1.16b,v16.16b 705 aesmc v1.16b,v1.16b 706 ld1 {v16.4s},[x7],#16 707 subs w6,w6,#2 708 aese v0.16b,v17.16b 709 aesmc v0.16b,v0.16b 710 aese v1.16b,v17.16b 711 aesmc v1.16b,v1.16b 712 ld1 {v17.4s},[x7],#16 713 b.gt Lctr32_tail 714 715 aese v0.16b,v16.16b 716 aesmc v0.16b,v0.16b 717 aese v1.16b,v16.16b 718 aesmc v1.16b,v1.16b 719 aese v0.16b,v17.16b 720 aesmc v0.16b,v0.16b 721 aese v1.16b,v17.16b 722 aesmc v1.16b,v1.16b 723 ld1 {v2.16b},[x0],x12 724 aese v0.16b,v20.16b 725 aesmc v0.16b,v0.16b 726 aese v1.16b,v20.16b 727 aesmc v1.16b,v1.16b 728 ld1 {v3.16b},[x0] 729 aese v0.16b,v21.16b 730 aesmc v0.16b,v0.16b 731 aese v1.16b,v21.16b 732 aesmc v1.16b,v1.16b 733 eor v2.16b,v2.16b,v7.16b 734 aese v0.16b,v22.16b 735 aesmc v0.16b,v0.16b 736 aese v1.16b,v22.16b 737 aesmc v1.16b,v1.16b 738 eor v3.16b,v3.16b,v7.16b 739 aese v0.16b,v23.16b 740 aese v1.16b,v23.16b 741 742 cmp x2,#1 743 eor v2.16b,v2.16b,v0.16b 744 eor v3.16b,v3.16b,v1.16b 745 st1 {v2.16b},[x1],#16 746 b.eq Lctr32_done 747 st1 {v3.16b},[x1] 748 749Lctr32_done: 750 ldr x29,[sp],#16 751 ret 752 753#endif 754