1#if defined(__aarch64__) 2#include <openssl/arm_arch.h> 3 4#if __ARM_MAX_ARCH__>=7 5.text 6#if !defined(__clang__) 7.arch armv8-a+crypto 8#endif 9.align 5 10.Lrcon: 11.long 0x01,0x01,0x01,0x01 12.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d // rotate-n-splat 13.long 0x1b,0x1b,0x1b,0x1b 14 15.globl aes_v8_set_encrypt_key 16.type aes_v8_set_encrypt_key,%function 17.align 5 18aes_v8_set_encrypt_key: 19.Lenc_key: 20 stp x29,x30,[sp,#-16]! 21 add x29,sp,#0 22 mov x3,#-1 23 cmp x0,#0 24 b.eq .Lenc_key_abort 25 cmp x2,#0 26 b.eq .Lenc_key_abort 27 mov x3,#-2 28 cmp w1,#128 29 b.lt .Lenc_key_abort 30 cmp w1,#256 31 b.gt .Lenc_key_abort 32 tst w1,#0x3f 33 b.ne .Lenc_key_abort 34 35 adr x3,.Lrcon 36 cmp w1,#192 37 38 eor v0.16b,v0.16b,v0.16b 39 ld1 {v3.16b},[x0],#16 40 mov w1,#8 // reuse w1 41 ld1 {v1.4s,v2.4s},[x3],#32 42 43 b.lt .Loop128 44 b.eq .L192 45 b .L256 46 47.align 4 48.Loop128: 49 tbl v6.16b,{v3.16b},v2.16b 50 ext v5.16b,v0.16b,v3.16b,#12 51 st1 {v3.4s},[x2],#16 52 aese v6.16b,v0.16b 53 subs w1,w1,#1 54 55 eor v3.16b,v3.16b,v5.16b 56 ext v5.16b,v0.16b,v5.16b,#12 57 eor v3.16b,v3.16b,v5.16b 58 ext v5.16b,v0.16b,v5.16b,#12 59 eor v6.16b,v6.16b,v1.16b 60 eor v3.16b,v3.16b,v5.16b 61 shl v1.16b,v1.16b,#1 62 eor v3.16b,v3.16b,v6.16b 63 b.ne .Loop128 64 65 ld1 {v1.4s},[x3] 66 67 tbl v6.16b,{v3.16b},v2.16b 68 ext v5.16b,v0.16b,v3.16b,#12 69 st1 {v3.4s},[x2],#16 70 aese v6.16b,v0.16b 71 72 eor v3.16b,v3.16b,v5.16b 73 ext v5.16b,v0.16b,v5.16b,#12 74 eor v3.16b,v3.16b,v5.16b 75 ext v5.16b,v0.16b,v5.16b,#12 76 eor v6.16b,v6.16b,v1.16b 77 eor v3.16b,v3.16b,v5.16b 78 shl v1.16b,v1.16b,#1 79 eor v3.16b,v3.16b,v6.16b 80 81 tbl v6.16b,{v3.16b},v2.16b 82 ext v5.16b,v0.16b,v3.16b,#12 83 st1 {v3.4s},[x2],#16 84 aese v6.16b,v0.16b 85 86 eor v3.16b,v3.16b,v5.16b 87 ext v5.16b,v0.16b,v5.16b,#12 88 eor v3.16b,v3.16b,v5.16b 89 ext v5.16b,v0.16b,v5.16b,#12 90 eor v6.16b,v6.16b,v1.16b 91 eor v3.16b,v3.16b,v5.16b 92 eor v3.16b,v3.16b,v6.16b 93 st1 {v3.4s},[x2] 94 add x2,x2,#0x50 95 96 mov w12,#10 97 b .Ldone 98 99.align 4 100.L192: 101 ld1 {v4.8b},[x0],#8 102 movi v6.16b,#8 // borrow v6.16b 103 st1 {v3.4s},[x2],#16 104 sub v2.16b,v2.16b,v6.16b // adjust the mask 105 106.Loop192: 107 tbl v6.16b,{v4.16b},v2.16b 108 ext v5.16b,v0.16b,v3.16b,#12 109 st1 {v4.8b},[x2],#8 110 aese v6.16b,v0.16b 111 subs w1,w1,#1 112 113 eor v3.16b,v3.16b,v5.16b 114 ext v5.16b,v0.16b,v5.16b,#12 115 eor v3.16b,v3.16b,v5.16b 116 ext v5.16b,v0.16b,v5.16b,#12 117 eor v3.16b,v3.16b,v5.16b 118 119 dup v5.4s,v3.s[3] 120 eor v5.16b,v5.16b,v4.16b 121 eor v6.16b,v6.16b,v1.16b 122 ext v4.16b,v0.16b,v4.16b,#12 123 shl v1.16b,v1.16b,#1 124 eor v4.16b,v4.16b,v5.16b 125 eor v3.16b,v3.16b,v6.16b 126 eor v4.16b,v4.16b,v6.16b 127 st1 {v3.4s},[x2],#16 128 b.ne .Loop192 129 130 mov w12,#12 131 add x2,x2,#0x20 132 b .Ldone 133 134.align 4 135.L256: 136 ld1 {v4.16b},[x0] 137 mov w1,#7 138 mov w12,#14 139 st1 {v3.4s},[x2],#16 140 141.Loop256: 142 tbl v6.16b,{v4.16b},v2.16b 143 ext v5.16b,v0.16b,v3.16b,#12 144 st1 {v4.4s},[x2],#16 145 aese v6.16b,v0.16b 146 subs w1,w1,#1 147 148 eor v3.16b,v3.16b,v5.16b 149 ext v5.16b,v0.16b,v5.16b,#12 150 eor v3.16b,v3.16b,v5.16b 151 ext v5.16b,v0.16b,v5.16b,#12 152 eor v6.16b,v6.16b,v1.16b 153 eor v3.16b,v3.16b,v5.16b 154 shl v1.16b,v1.16b,#1 155 eor v3.16b,v3.16b,v6.16b 156 st1 {v3.4s},[x2],#16 157 b.eq .Ldone 158 159 dup v6.4s,v3.s[3] // just splat 160 ext v5.16b,v0.16b,v4.16b,#12 161 aese v6.16b,v0.16b 162 163 eor v4.16b,v4.16b,v5.16b 164 ext v5.16b,v0.16b,v5.16b,#12 165 eor v4.16b,v4.16b,v5.16b 166 ext v5.16b,v0.16b,v5.16b,#12 167 eor v4.16b,v4.16b,v5.16b 168 169 eor v4.16b,v4.16b,v6.16b 170 b .Loop256 171 172.Ldone: 173 str w12,[x2] 174 mov x3,#0 175 176.Lenc_key_abort: 177 mov x0,x3 // return value 178 ldr x29,[sp],#16 179 ret 180.size aes_v8_set_encrypt_key,.-aes_v8_set_encrypt_key 181 182.globl aes_v8_set_decrypt_key 183.type aes_v8_set_decrypt_key,%function 184.align 5 185aes_v8_set_decrypt_key: 186 stp x29,x30,[sp,#-16]! 187 add x29,sp,#0 188 bl .Lenc_key 189 190 cmp x0,#0 191 b.ne .Ldec_key_abort 192 193 sub x2,x2,#240 // restore original x2 194 mov x4,#-16 195 add x0,x2,x12,lsl#4 // end of key schedule 196 197 ld1 {v0.4s},[x2] 198 ld1 {v1.4s},[x0] 199 st1 {v0.4s},[x0],x4 200 st1 {v1.4s},[x2],#16 201 202.Loop_imc: 203 ld1 {v0.4s},[x2] 204 ld1 {v1.4s},[x0] 205 aesimc v0.16b,v0.16b 206 aesimc v1.16b,v1.16b 207 st1 {v0.4s},[x0],x4 208 st1 {v1.4s},[x2],#16 209 cmp x0,x2 210 b.hi .Loop_imc 211 212 ld1 {v0.4s},[x2] 213 aesimc v0.16b,v0.16b 214 st1 {v0.4s},[x0] 215 216 eor x0,x0,x0 // return value 217.Ldec_key_abort: 218 ldp x29,x30,[sp],#16 219 ret 220.size aes_v8_set_decrypt_key,.-aes_v8_set_decrypt_key 221.globl aes_v8_encrypt 222.type aes_v8_encrypt,%function 223.align 5 224aes_v8_encrypt: 225 ldr w3,[x2,#240] 226 ld1 {v0.4s},[x2],#16 227 ld1 {v2.16b},[x0] 228 sub w3,w3,#2 229 ld1 {v1.4s},[x2],#16 230 231.Loop_enc: 232 aese v2.16b,v0.16b 233 aesmc v2.16b,v2.16b 234 ld1 {v0.4s},[x2],#16 235 subs w3,w3,#2 236 aese v2.16b,v1.16b 237 aesmc v2.16b,v2.16b 238 ld1 {v1.4s},[x2],#16 239 b.gt .Loop_enc 240 241 aese v2.16b,v0.16b 242 aesmc v2.16b,v2.16b 243 ld1 {v0.4s},[x2] 244 aese v2.16b,v1.16b 245 eor v2.16b,v2.16b,v0.16b 246 247 st1 {v2.16b},[x1] 248 ret 249.size aes_v8_encrypt,.-aes_v8_encrypt 250.globl aes_v8_decrypt 251.type aes_v8_decrypt,%function 252.align 5 253aes_v8_decrypt: 254 ldr w3,[x2,#240] 255 ld1 {v0.4s},[x2],#16 256 ld1 {v2.16b},[x0] 257 sub w3,w3,#2 258 ld1 {v1.4s},[x2],#16 259 260.Loop_dec: 261 aesd v2.16b,v0.16b 262 aesimc v2.16b,v2.16b 263 ld1 {v0.4s},[x2],#16 264 subs w3,w3,#2 265 aesd v2.16b,v1.16b 266 aesimc v2.16b,v2.16b 267 ld1 {v1.4s},[x2],#16 268 b.gt .Loop_dec 269 270 aesd v2.16b,v0.16b 271 aesimc v2.16b,v2.16b 272 ld1 {v0.4s},[x2] 273 aesd v2.16b,v1.16b 274 eor v2.16b,v2.16b,v0.16b 275 276 st1 {v2.16b},[x1] 277 ret 278.size aes_v8_decrypt,.-aes_v8_decrypt 279.globl aes_v8_cbc_encrypt 280.type aes_v8_cbc_encrypt,%function 281.align 5 282aes_v8_cbc_encrypt: 283 stp x29,x30,[sp,#-16]! 284 add x29,sp,#0 285 subs x2,x2,#16 286 mov x8,#16 287 b.lo .Lcbc_abort 288 csel x8,xzr,x8,eq 289 290 cmp w5,#0 // en- or decrypting? 291 ldr w5,[x3,#240] 292 and x2,x2,#-16 293 ld1 {v6.16b},[x4] 294 ld1 {v0.16b},[x0],x8 295 296 ld1 {v16.4s,v17.4s},[x3] // load key schedule... 297 sub w5,w5,#6 298 add x7,x3,x5,lsl#4 // pointer to last 7 round keys 299 sub w5,w5,#2 300 ld1 {v18.4s,v19.4s},[x7],#32 301 ld1 {v20.4s,v21.4s},[x7],#32 302 ld1 {v22.4s,v23.4s},[x7],#32 303 ld1 {v7.4s},[x7] 304 305 add x7,x3,#32 306 mov w6,w5 307 b.eq .Lcbc_dec 308 309 cmp w5,#2 310 eor v0.16b,v0.16b,v6.16b 311 eor v5.16b,v16.16b,v7.16b 312 b.eq .Lcbc_enc128 313 314 ld1 {v2.4s,v3.4s},[x7] 315 add x7,x3,#16 316 add x6,x3,#16*4 317 add x12,x3,#16*5 318 aese v0.16b,v16.16b 319 aesmc v0.16b,v0.16b 320 add x14,x3,#16*6 321 add x3,x3,#16*7 322 b .Lenter_cbc_enc 323 324.align 4 325.Loop_cbc_enc: 326 aese v0.16b,v16.16b 327 aesmc v0.16b,v0.16b 328 st1 {v6.16b},[x1],#16 329.Lenter_cbc_enc: 330 aese v0.16b,v17.16b 331 aesmc v0.16b,v0.16b 332 aese v0.16b,v2.16b 333 aesmc v0.16b,v0.16b 334 ld1 {v16.4s},[x6] 335 cmp w5,#4 336 aese v0.16b,v3.16b 337 aesmc v0.16b,v0.16b 338 ld1 {v17.4s},[x12] 339 b.eq .Lcbc_enc192 340 341 aese v0.16b,v16.16b 342 aesmc v0.16b,v0.16b 343 ld1 {v16.4s},[x14] 344 aese v0.16b,v17.16b 345 aesmc v0.16b,v0.16b 346 ld1 {v17.4s},[x3] 347 nop 348 349.Lcbc_enc192: 350 aese v0.16b,v16.16b 351 aesmc v0.16b,v0.16b 352 subs x2,x2,#16 353 aese v0.16b,v17.16b 354 aesmc v0.16b,v0.16b 355 csel x8,xzr,x8,eq 356 aese v0.16b,v18.16b 357 aesmc v0.16b,v0.16b 358 aese v0.16b,v19.16b 359 aesmc v0.16b,v0.16b 360 ld1 {v16.16b},[x0],x8 361 aese v0.16b,v20.16b 362 aesmc v0.16b,v0.16b 363 eor v16.16b,v16.16b,v5.16b 364 aese v0.16b,v21.16b 365 aesmc v0.16b,v0.16b 366 ld1 {v17.4s},[x7] // re-pre-load rndkey[1] 367 aese v0.16b,v22.16b 368 aesmc v0.16b,v0.16b 369 aese v0.16b,v23.16b 370 eor v6.16b,v0.16b,v7.16b 371 b.hs .Loop_cbc_enc 372 373 st1 {v6.16b},[x1],#16 374 b .Lcbc_done 375 376.align 5 377.Lcbc_enc128: 378 ld1 {v2.4s,v3.4s},[x7] 379 aese v0.16b,v16.16b 380 aesmc v0.16b,v0.16b 381 b .Lenter_cbc_enc128 382.Loop_cbc_enc128: 383 aese v0.16b,v16.16b 384 aesmc v0.16b,v0.16b 385 st1 {v6.16b},[x1],#16 386.Lenter_cbc_enc128: 387 aese v0.16b,v17.16b 388 aesmc v0.16b,v0.16b 389 subs x2,x2,#16 390 aese v0.16b,v2.16b 391 aesmc v0.16b,v0.16b 392 csel x8,xzr,x8,eq 393 aese v0.16b,v3.16b 394 aesmc v0.16b,v0.16b 395 aese v0.16b,v18.16b 396 aesmc v0.16b,v0.16b 397 aese v0.16b,v19.16b 398 aesmc v0.16b,v0.16b 399 ld1 {v16.16b},[x0],x8 400 aese v0.16b,v20.16b 401 aesmc v0.16b,v0.16b 402 aese v0.16b,v21.16b 403 aesmc v0.16b,v0.16b 404 aese v0.16b,v22.16b 405 aesmc v0.16b,v0.16b 406 eor v16.16b,v16.16b,v5.16b 407 aese v0.16b,v23.16b 408 eor v6.16b,v0.16b,v7.16b 409 b.hs .Loop_cbc_enc128 410 411 st1 {v6.16b},[x1],#16 412 b .Lcbc_done 413.align 5 414.Lcbc_dec: 415 ld1 {v18.16b},[x0],#16 416 subs x2,x2,#32 // bias 417 add w6,w5,#2 418 orr v3.16b,v0.16b,v0.16b 419 orr v1.16b,v0.16b,v0.16b 420 orr v19.16b,v18.16b,v18.16b 421 b.lo .Lcbc_dec_tail 422 423 orr v1.16b,v18.16b,v18.16b 424 ld1 {v18.16b},[x0],#16 425 orr v2.16b,v0.16b,v0.16b 426 orr v3.16b,v1.16b,v1.16b 427 orr v19.16b,v18.16b,v18.16b 428 429.Loop3x_cbc_dec: 430 aesd v0.16b,v16.16b 431 aesimc v0.16b,v0.16b 432 aesd v1.16b,v16.16b 433 aesimc v1.16b,v1.16b 434 aesd v18.16b,v16.16b 435 aesimc v18.16b,v18.16b 436 ld1 {v16.4s},[x7],#16 437 subs w6,w6,#2 438 aesd v0.16b,v17.16b 439 aesimc v0.16b,v0.16b 440 aesd v1.16b,v17.16b 441 aesimc v1.16b,v1.16b 442 aesd v18.16b,v17.16b 443 aesimc v18.16b,v18.16b 444 ld1 {v17.4s},[x7],#16 445 b.gt .Loop3x_cbc_dec 446 447 aesd v0.16b,v16.16b 448 aesimc v0.16b,v0.16b 449 aesd v1.16b,v16.16b 450 aesimc v1.16b,v1.16b 451 aesd v18.16b,v16.16b 452 aesimc v18.16b,v18.16b 453 eor v4.16b,v6.16b,v7.16b 454 subs x2,x2,#0x30 455 eor v5.16b,v2.16b,v7.16b 456 csel x6,x2,x6,lo // x6, w6, is zero at this point 457 aesd v0.16b,v17.16b 458 aesimc v0.16b,v0.16b 459 aesd v1.16b,v17.16b 460 aesimc v1.16b,v1.16b 461 aesd v18.16b,v17.16b 462 aesimc v18.16b,v18.16b 463 eor v17.16b,v3.16b,v7.16b 464 add x0,x0,x6 // x0 is adjusted in such way that 465 // at exit from the loop v1.16b-v18.16b 466 // are loaded with last "words" 467 orr v6.16b,v19.16b,v19.16b 468 mov x7,x3 469 aesd v0.16b,v20.16b 470 aesimc v0.16b,v0.16b 471 aesd v1.16b,v20.16b 472 aesimc v1.16b,v1.16b 473 aesd v18.16b,v20.16b 474 aesimc v18.16b,v18.16b 475 ld1 {v2.16b},[x0],#16 476 aesd v0.16b,v21.16b 477 aesimc v0.16b,v0.16b 478 aesd v1.16b,v21.16b 479 aesimc v1.16b,v1.16b 480 aesd v18.16b,v21.16b 481 aesimc v18.16b,v18.16b 482 ld1 {v3.16b},[x0],#16 483 aesd v0.16b,v22.16b 484 aesimc v0.16b,v0.16b 485 aesd v1.16b,v22.16b 486 aesimc v1.16b,v1.16b 487 aesd v18.16b,v22.16b 488 aesimc v18.16b,v18.16b 489 ld1 {v19.16b},[x0],#16 490 aesd v0.16b,v23.16b 491 aesd v1.16b,v23.16b 492 aesd v18.16b,v23.16b 493 ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0] 494 add w6,w5,#2 495 eor v4.16b,v4.16b,v0.16b 496 eor v5.16b,v5.16b,v1.16b 497 eor v18.16b,v18.16b,v17.16b 498 ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1] 499 st1 {v4.16b},[x1],#16 500 orr v0.16b,v2.16b,v2.16b 501 st1 {v5.16b},[x1],#16 502 orr v1.16b,v3.16b,v3.16b 503 st1 {v18.16b},[x1],#16 504 orr v18.16b,v19.16b,v19.16b 505 b.hs .Loop3x_cbc_dec 506 507 cmn x2,#0x30 508 b.eq .Lcbc_done 509 nop 510 511.Lcbc_dec_tail: 512 aesd v1.16b,v16.16b 513 aesimc v1.16b,v1.16b 514 aesd v18.16b,v16.16b 515 aesimc v18.16b,v18.16b 516 ld1 {v16.4s},[x7],#16 517 subs w6,w6,#2 518 aesd v1.16b,v17.16b 519 aesimc v1.16b,v1.16b 520 aesd v18.16b,v17.16b 521 aesimc v18.16b,v18.16b 522 ld1 {v17.4s},[x7],#16 523 b.gt .Lcbc_dec_tail 524 525 aesd v1.16b,v16.16b 526 aesimc v1.16b,v1.16b 527 aesd v18.16b,v16.16b 528 aesimc v18.16b,v18.16b 529 aesd v1.16b,v17.16b 530 aesimc v1.16b,v1.16b 531 aesd v18.16b,v17.16b 532 aesimc v18.16b,v18.16b 533 aesd v1.16b,v20.16b 534 aesimc v1.16b,v1.16b 535 aesd v18.16b,v20.16b 536 aesimc v18.16b,v18.16b 537 cmn x2,#0x20 538 aesd v1.16b,v21.16b 539 aesimc v1.16b,v1.16b 540 aesd v18.16b,v21.16b 541 aesimc v18.16b,v18.16b 542 eor v5.16b,v6.16b,v7.16b 543 aesd v1.16b,v22.16b 544 aesimc v1.16b,v1.16b 545 aesd v18.16b,v22.16b 546 aesimc v18.16b,v18.16b 547 eor v17.16b,v3.16b,v7.16b 548 aesd v1.16b,v23.16b 549 aesd v18.16b,v23.16b 550 b.eq .Lcbc_dec_one 551 eor v5.16b,v5.16b,v1.16b 552 eor v17.16b,v17.16b,v18.16b 553 orr v6.16b,v19.16b,v19.16b 554 st1 {v5.16b},[x1],#16 555 st1 {v17.16b},[x1],#16 556 b .Lcbc_done 557 558.Lcbc_dec_one: 559 eor v5.16b,v5.16b,v18.16b 560 orr v6.16b,v19.16b,v19.16b 561 st1 {v5.16b},[x1],#16 562 563.Lcbc_done: 564 st1 {v6.16b},[x4] 565.Lcbc_abort: 566 ldr x29,[sp],#16 567 ret 568.size aes_v8_cbc_encrypt,.-aes_v8_cbc_encrypt 569.globl aes_v8_ctr32_encrypt_blocks 570.type aes_v8_ctr32_encrypt_blocks,%function 571.align 5 572aes_v8_ctr32_encrypt_blocks: 573 stp x29,x30,[sp,#-16]! 574 add x29,sp,#0 575 ldr w5,[x3,#240] 576 577 ldr w8, [x4, #12] 578 ld1 {v0.4s},[x4] 579 580 ld1 {v16.4s,v17.4s},[x3] // load key schedule... 581 sub w5,w5,#4 582 mov x12,#16 583 cmp x2,#2 584 add x7,x3,x5,lsl#4 // pointer to last 5 round keys 585 sub w5,w5,#2 586 ld1 {v20.4s,v21.4s},[x7],#32 587 ld1 {v22.4s,v23.4s},[x7],#32 588 ld1 {v7.4s},[x7] 589 add x7,x3,#32 590 mov w6,w5 591 csel x12,xzr,x12,lo 592#ifndef __ARMEB__ 593 rev w8, w8 594#endif 595 orr v1.16b,v0.16b,v0.16b 596 add w10, w8, #1 597 orr v18.16b,v0.16b,v0.16b 598 add w8, w8, #2 599 orr v6.16b,v0.16b,v0.16b 600 rev w10, w10 601 mov v1.s[3],w10 602 b.ls .Lctr32_tail 603 rev w12, w8 604 sub x2,x2,#3 // bias 605 mov v18.s[3],w12 606 b .Loop3x_ctr32 607 608.align 4 609.Loop3x_ctr32: 610 aese v0.16b,v16.16b 611 aesmc v0.16b,v0.16b 612 aese v1.16b,v16.16b 613 aesmc v1.16b,v1.16b 614 aese v18.16b,v16.16b 615 aesmc v18.16b,v18.16b 616 ld1 {v16.4s},[x7],#16 617 subs w6,w6,#2 618 aese v0.16b,v17.16b 619 aesmc v0.16b,v0.16b 620 aese v1.16b,v17.16b 621 aesmc v1.16b,v1.16b 622 aese v18.16b,v17.16b 623 aesmc v18.16b,v18.16b 624 ld1 {v17.4s},[x7],#16 625 b.gt .Loop3x_ctr32 626 627 aese v0.16b,v16.16b 628 aesmc v4.16b,v0.16b 629 aese v1.16b,v16.16b 630 aesmc v5.16b,v1.16b 631 ld1 {v2.16b},[x0],#16 632 orr v0.16b,v6.16b,v6.16b 633 aese v18.16b,v16.16b 634 aesmc v18.16b,v18.16b 635 ld1 {v3.16b},[x0],#16 636 orr v1.16b,v6.16b,v6.16b 637 aese v4.16b,v17.16b 638 aesmc v4.16b,v4.16b 639 aese v5.16b,v17.16b 640 aesmc v5.16b,v5.16b 641 ld1 {v19.16b},[x0],#16 642 mov x7,x3 643 aese v18.16b,v17.16b 644 aesmc v17.16b,v18.16b 645 orr v18.16b,v6.16b,v6.16b 646 add w9,w8,#1 647 aese v4.16b,v20.16b 648 aesmc v4.16b,v4.16b 649 aese v5.16b,v20.16b 650 aesmc v5.16b,v5.16b 651 eor v2.16b,v2.16b,v7.16b 652 add w10,w8,#2 653 aese v17.16b,v20.16b 654 aesmc v17.16b,v17.16b 655 eor v3.16b,v3.16b,v7.16b 656 add w8,w8,#3 657 aese v4.16b,v21.16b 658 aesmc v4.16b,v4.16b 659 aese v5.16b,v21.16b 660 aesmc v5.16b,v5.16b 661 eor v19.16b,v19.16b,v7.16b 662 rev w9,w9 663 aese v17.16b,v21.16b 664 aesmc v17.16b,v17.16b 665 mov v0.s[3], w9 666 rev w10,w10 667 aese v4.16b,v22.16b 668 aesmc v4.16b,v4.16b 669 aese v5.16b,v22.16b 670 aesmc v5.16b,v5.16b 671 mov v1.s[3], w10 672 rev w12,w8 673 aese v17.16b,v22.16b 674 aesmc v17.16b,v17.16b 675 mov v18.s[3], w12 676 subs x2,x2,#3 677 aese v4.16b,v23.16b 678 aese v5.16b,v23.16b 679 aese v17.16b,v23.16b 680 681 eor v2.16b,v2.16b,v4.16b 682 ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0] 683 st1 {v2.16b},[x1],#16 684 eor v3.16b,v3.16b,v5.16b 685 mov w6,w5 686 st1 {v3.16b},[x1],#16 687 eor v19.16b,v19.16b,v17.16b 688 ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1] 689 st1 {v19.16b},[x1],#16 690 b.hs .Loop3x_ctr32 691 692 adds x2,x2,#3 693 b.eq .Lctr32_done 694 cmp x2,#1 695 mov x12,#16 696 csel x12,xzr,x12,eq 697 698.Lctr32_tail: 699 aese v0.16b,v16.16b 700 aesmc v0.16b,v0.16b 701 aese v1.16b,v16.16b 702 aesmc v1.16b,v1.16b 703 ld1 {v16.4s},[x7],#16 704 subs w6,w6,#2 705 aese v0.16b,v17.16b 706 aesmc v0.16b,v0.16b 707 aese v1.16b,v17.16b 708 aesmc v1.16b,v1.16b 709 ld1 {v17.4s},[x7],#16 710 b.gt .Lctr32_tail 711 712 aese v0.16b,v16.16b 713 aesmc v0.16b,v0.16b 714 aese v1.16b,v16.16b 715 aesmc v1.16b,v1.16b 716 aese v0.16b,v17.16b 717 aesmc v0.16b,v0.16b 718 aese v1.16b,v17.16b 719 aesmc v1.16b,v1.16b 720 ld1 {v2.16b},[x0],x12 721 aese v0.16b,v20.16b 722 aesmc v0.16b,v0.16b 723 aese v1.16b,v20.16b 724 aesmc v1.16b,v1.16b 725 ld1 {v3.16b},[x0] 726 aese v0.16b,v21.16b 727 aesmc v0.16b,v0.16b 728 aese v1.16b,v21.16b 729 aesmc v1.16b,v1.16b 730 eor v2.16b,v2.16b,v7.16b 731 aese v0.16b,v22.16b 732 aesmc v0.16b,v0.16b 733 aese v1.16b,v22.16b 734 aesmc v1.16b,v1.16b 735 eor v3.16b,v3.16b,v7.16b 736 aese v0.16b,v23.16b 737 aese v1.16b,v23.16b 738 739 cmp x2,#1 740 eor v2.16b,v2.16b,v0.16b 741 eor v3.16b,v3.16b,v1.16b 742 st1 {v2.16b},[x1],#16 743 b.eq .Lctr32_done 744 st1 {v3.16b},[x1] 745 746.Lctr32_done: 747 ldr x29,[sp],#16 748 ret 749.size aes_v8_ctr32_encrypt_blocks,.-aes_v8_ctr32_encrypt_blocks 750#endif 751#endif