1// This file is generated from a similarly-named Perl script in the BoringSSL 2// source tree. Do not edit by hand. 3 4#include <openssl/asm_base.h> 5 6#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_ARM) && defined(__APPLE__) 7#include <openssl/arm_arch.h> 8 9#if __ARM_MAX_ARCH__>=7 10.text 11 12 13.code 32 14#undef __thumb2__ 15.align 5 16Lrcon: 17.long 0x01,0x01,0x01,0x01 18.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d @ rotate-n-splat 19.long 0x1b,0x1b,0x1b,0x1b 20 21.text 22 23.globl _aes_hw_set_encrypt_key 24.private_extern _aes_hw_set_encrypt_key 25#ifdef __thumb2__ 26.thumb_func _aes_hw_set_encrypt_key 27#endif 28.align 5 29_aes_hw_set_encrypt_key: 30Lenc_key: 31 mov r3,#-1 32 cmp r0,#0 33 beq Lenc_key_abort 34 cmp r2,#0 35 beq Lenc_key_abort 36 mov r3,#-2 37 cmp r1,#128 38 blt Lenc_key_abort 39 cmp r1,#256 40 bgt Lenc_key_abort 41 tst r1,#0x3f 42 bne Lenc_key_abort 43 44 adr r3,Lrcon 45 cmp r1,#192 46 47 veor q0,q0,q0 48 vld1.8 {q3},[r0]! 49 mov r1,#8 @ reuse r1 50 vld1.32 {q1,q2},[r3]! 51 52 blt Loop128 53 beq L192 54 b L256 55 56.align 4 57Loop128: 58 vtbl.8 d20,{q3},d4 59 vtbl.8 d21,{q3},d5 60 vext.8 q9,q0,q3,#12 61 vst1.32 {q3},[r2]! 62.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 63 subs r1,r1,#1 64 65 veor q3,q3,q9 66 vext.8 q9,q0,q9,#12 67 veor q3,q3,q9 68 vext.8 q9,q0,q9,#12 69 veor q10,q10,q1 70 veor q3,q3,q9 71 vshl.u8 q1,q1,#1 72 veor q3,q3,q10 73 bne Loop128 74 75 vld1.32 {q1},[r3] 76 77 vtbl.8 d20,{q3},d4 78 vtbl.8 d21,{q3},d5 79 vext.8 q9,q0,q3,#12 80 vst1.32 {q3},[r2]! 81.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 82 83 veor q3,q3,q9 84 vext.8 q9,q0,q9,#12 85 veor q3,q3,q9 86 vext.8 q9,q0,q9,#12 87 veor q10,q10,q1 88 veor q3,q3,q9 89 vshl.u8 q1,q1,#1 90 veor q3,q3,q10 91 92 vtbl.8 d20,{q3},d4 93 vtbl.8 d21,{q3},d5 94 vext.8 q9,q0,q3,#12 95 vst1.32 {q3},[r2]! 96.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 97 98 veor q3,q3,q9 99 vext.8 q9,q0,q9,#12 100 veor q3,q3,q9 101 vext.8 q9,q0,q9,#12 102 veor q10,q10,q1 103 veor q3,q3,q9 104 veor q3,q3,q10 105 vst1.32 {q3},[r2] 106 add r2,r2,#0x50 107 108 mov r12,#10 109 b Ldone 110 111.align 4 112L192: 113 vld1.8 {d16},[r0]! 114 vmov.i8 q10,#8 @ borrow q10 115 vst1.32 {q3},[r2]! 116 vsub.i8 q2,q2,q10 @ adjust the mask 117 118Loop192: 119 vtbl.8 d20,{q8},d4 120 vtbl.8 d21,{q8},d5 121 vext.8 q9,q0,q3,#12 122 vst1.32 {d16},[r2]! 123.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 124 subs r1,r1,#1 125 126 veor q3,q3,q9 127 vext.8 q9,q0,q9,#12 128 veor q3,q3,q9 129 vext.8 q9,q0,q9,#12 130 veor q3,q3,q9 131 132 vdup.32 q9,d7[1] 133 veor q9,q9,q8 134 veor q10,q10,q1 135 vext.8 q8,q0,q8,#12 136 vshl.u8 q1,q1,#1 137 veor q8,q8,q9 138 veor q3,q3,q10 139 veor q8,q8,q10 140 vst1.32 {q3},[r2]! 141 bne Loop192 142 143 mov r12,#12 144 add r2,r2,#0x20 145 b Ldone 146 147.align 4 148L256: 149 vld1.8 {q8},[r0] 150 mov r1,#7 151 mov r12,#14 152 vst1.32 {q3},[r2]! 153 154Loop256: 155 vtbl.8 d20,{q8},d4 156 vtbl.8 d21,{q8},d5 157 vext.8 q9,q0,q3,#12 158 vst1.32 {q8},[r2]! 159.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 160 subs r1,r1,#1 161 162 veor q3,q3,q9 163 vext.8 q9,q0,q9,#12 164 veor q3,q3,q9 165 vext.8 q9,q0,q9,#12 166 veor q10,q10,q1 167 veor q3,q3,q9 168 vshl.u8 q1,q1,#1 169 veor q3,q3,q10 170 vst1.32 {q3},[r2]! 171 beq Ldone 172 173 vdup.32 q10,d7[1] 174 vext.8 q9,q0,q8,#12 175.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 176 177 veor q8,q8,q9 178 vext.8 q9,q0,q9,#12 179 veor q8,q8,q9 180 vext.8 q9,q0,q9,#12 181 veor q8,q8,q9 182 183 veor q8,q8,q10 184 b Loop256 185 186Ldone: 187 str r12,[r2] 188 mov r3,#0 189 190Lenc_key_abort: 191 mov r0,r3 @ return value 192 193 bx lr 194 195 196.globl _aes_hw_set_decrypt_key 197.private_extern _aes_hw_set_decrypt_key 198#ifdef __thumb2__ 199.thumb_func _aes_hw_set_decrypt_key 200#endif 201.align 5 202_aes_hw_set_decrypt_key: 203 stmdb sp!,{r4,lr} 204 bl Lenc_key 205 206 cmp r0,#0 207 bne Ldec_key_abort 208 209 sub r2,r2,#240 @ restore original r2 210 mov r4,#-16 211 add r0,r2,r12,lsl#4 @ end of key schedule 212 213 vld1.32 {q0},[r2] 214 vld1.32 {q1},[r0] 215 vst1.32 {q0},[r0],r4 216 vst1.32 {q1},[r2]! 217 218Loop_imc: 219 vld1.32 {q0},[r2] 220 vld1.32 {q1},[r0] 221.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 222.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 223 vst1.32 {q0},[r0],r4 224 vst1.32 {q1},[r2]! 225 cmp r0,r2 226 bhi Loop_imc 227 228 vld1.32 {q0},[r2] 229.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 230 vst1.32 {q0},[r0] 231 232 eor r0,r0,r0 @ return value 233Ldec_key_abort: 234 ldmia sp!,{r4,pc} 235 236.globl _aes_hw_encrypt 237.private_extern _aes_hw_encrypt 238#ifdef __thumb2__ 239.thumb_func _aes_hw_encrypt 240#endif 241.align 5 242_aes_hw_encrypt: 243 AARCH64_VALID_CALL_TARGET 244 ldr r3,[r2,#240] 245 vld1.32 {q0},[r2]! 246 vld1.8 {q2},[r0] 247 sub r3,r3,#2 248 vld1.32 {q1},[r2]! 249 250Loop_enc: 251.byte 0x00,0x43,0xb0,0xf3 @ aese q2,q0 252.byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2 253 vld1.32 {q0},[r2]! 254 subs r3,r3,#2 255.byte 0x02,0x43,0xb0,0xf3 @ aese q2,q1 256.byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2 257 vld1.32 {q1},[r2]! 258 bgt Loop_enc 259 260.byte 0x00,0x43,0xb0,0xf3 @ aese q2,q0 261.byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2 262 vld1.32 {q0},[r2] 263.byte 0x02,0x43,0xb0,0xf3 @ aese q2,q1 264 veor q2,q2,q0 265 266 vst1.8 {q2},[r1] 267 bx lr 268 269.globl _aes_hw_decrypt 270.private_extern _aes_hw_decrypt 271#ifdef __thumb2__ 272.thumb_func _aes_hw_decrypt 273#endif 274.align 5 275_aes_hw_decrypt: 276 AARCH64_VALID_CALL_TARGET 277 ldr r3,[r2,#240] 278 vld1.32 {q0},[r2]! 279 vld1.8 {q2},[r0] 280 sub r3,r3,#2 281 vld1.32 {q1},[r2]! 282 283Loop_dec: 284.byte 0x40,0x43,0xb0,0xf3 @ aesd q2,q0 285.byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2 286 vld1.32 {q0},[r2]! 287 subs r3,r3,#2 288.byte 0x42,0x43,0xb0,0xf3 @ aesd q2,q1 289.byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2 290 vld1.32 {q1},[r2]! 291 bgt Loop_dec 292 293.byte 0x40,0x43,0xb0,0xf3 @ aesd q2,q0 294.byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2 295 vld1.32 {q0},[r2] 296.byte 0x42,0x43,0xb0,0xf3 @ aesd q2,q1 297 veor q2,q2,q0 298 299 vst1.8 {q2},[r1] 300 bx lr 301 302.globl _aes_hw_cbc_encrypt 303.private_extern _aes_hw_cbc_encrypt 304#ifdef __thumb2__ 305.thumb_func _aes_hw_cbc_encrypt 306#endif 307.align 5 308_aes_hw_cbc_encrypt: 309 mov ip,sp 310 stmdb sp!,{r4,r5,r6,r7,r8,lr} 311 vstmdb sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ ABI specification says so 312 ldmia ip,{r4,r5} @ load remaining args 313 subs r2,r2,#16 314 mov r8,#16 315 blo Lcbc_abort 316 moveq r8,#0 317 318 cmp r5,#0 @ en- or decrypting? 319 ldr r5,[r3,#240] 320 and r2,r2,#-16 321 vld1.8 {q6},[r4] 322 vld1.8 {q0},[r0],r8 323 324 vld1.32 {q8,q9},[r3] @ load key schedule... 325 sub r5,r5,#6 326 add r7,r3,r5,lsl#4 @ pointer to last 7 round keys 327 sub r5,r5,#2 328 vld1.32 {q10,q11},[r7]! 329 vld1.32 {q12,q13},[r7]! 330 vld1.32 {q14,q15},[r7]! 331 vld1.32 {q7},[r7] 332 333 add r7,r3,#32 334 mov r6,r5 335 beq Lcbc_dec 336 337 cmp r5,#2 338 veor q0,q0,q6 339 veor q5,q8,q7 340 beq Lcbc_enc128 341 342 vld1.32 {q2,q3},[r7] 343 add r7,r3,#16 344 add r6,r3,#16*4 345 add r12,r3,#16*5 346.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 347.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 348 add r14,r3,#16*6 349 add r3,r3,#16*7 350 b Lenter_cbc_enc 351 352.align 4 353Loop_cbc_enc: 354.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 355.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 356 vst1.8 {q6},[r1]! 357Lenter_cbc_enc: 358.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 359.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 360.byte 0x04,0x03,0xb0,0xf3 @ aese q0,q2 361.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 362 vld1.32 {q8},[r6] 363 cmp r5,#4 364.byte 0x06,0x03,0xb0,0xf3 @ aese q0,q3 365.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 366 vld1.32 {q9},[r12] 367 beq Lcbc_enc192 368 369.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 370.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 371 vld1.32 {q8},[r14] 372.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 373.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 374 vld1.32 {q9},[r3] 375 nop 376 377Lcbc_enc192: 378.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 379.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 380 subs r2,r2,#16 381.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 382.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 383 moveq r8,#0 384.byte 0x24,0x03,0xb0,0xf3 @ aese q0,q10 385.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 386.byte 0x26,0x03,0xb0,0xf3 @ aese q0,q11 387.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 388 vld1.8 {q8},[r0],r8 389.byte 0x28,0x03,0xb0,0xf3 @ aese q0,q12 390.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 391 veor q8,q8,q5 392.byte 0x2a,0x03,0xb0,0xf3 @ aese q0,q13 393.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 394 vld1.32 {q9},[r7] @ re-pre-load rndkey[1] 395.byte 0x2c,0x03,0xb0,0xf3 @ aese q0,q14 396.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 397.byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15 398 veor q6,q0,q7 399 bhs Loop_cbc_enc 400 401 vst1.8 {q6},[r1]! 402 b Lcbc_done 403 404.align 5 405Lcbc_enc128: 406 vld1.32 {q2,q3},[r7] 407.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 408.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 409 b Lenter_cbc_enc128 410Loop_cbc_enc128: 411.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 412.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 413 vst1.8 {q6},[r1]! 414Lenter_cbc_enc128: 415.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 416.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 417 subs r2,r2,#16 418.byte 0x04,0x03,0xb0,0xf3 @ aese q0,q2 419.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 420 moveq r8,#0 421.byte 0x06,0x03,0xb0,0xf3 @ aese q0,q3 422.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 423.byte 0x24,0x03,0xb0,0xf3 @ aese q0,q10 424.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 425.byte 0x26,0x03,0xb0,0xf3 @ aese q0,q11 426.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 427 vld1.8 {q8},[r0],r8 428.byte 0x28,0x03,0xb0,0xf3 @ aese q0,q12 429.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 430.byte 0x2a,0x03,0xb0,0xf3 @ aese q0,q13 431.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 432.byte 0x2c,0x03,0xb0,0xf3 @ aese q0,q14 433.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 434 veor q8,q8,q5 435.byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15 436 veor q6,q0,q7 437 bhs Loop_cbc_enc128 438 439 vst1.8 {q6},[r1]! 440 b Lcbc_done 441.align 5 442Lcbc_dec: 443 vld1.8 {q10},[r0]! 444 subs r2,r2,#32 @ bias 445 add r6,r5,#2 446 vorr q3,q0,q0 447 vorr q1,q0,q0 448 vorr q11,q10,q10 449 blo Lcbc_dec_tail 450 451 vorr q1,q10,q10 452 vld1.8 {q10},[r0]! 453 vorr q2,q0,q0 454 vorr q3,q1,q1 455 vorr q11,q10,q10 456 457Loop3x_cbc_dec: 458.byte 0x60,0x03,0xb0,0xf3 @ aesd q0,q8 459.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 460.byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8 461.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 462.byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8 463.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 464 vld1.32 {q8},[r7]! 465 subs r6,r6,#2 466.byte 0x62,0x03,0xb0,0xf3 @ aesd q0,q9 467.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 468.byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9 469.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 470.byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9 471.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 472 vld1.32 {q9},[r7]! 473 bgt Loop3x_cbc_dec 474 475.byte 0x60,0x03,0xb0,0xf3 @ aesd q0,q8 476.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 477.byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8 478.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 479.byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8 480.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 481 veor q4,q6,q7 482 subs r2,r2,#0x30 483 veor q5,q2,q7 484 movlo r6,r2 @ r6, r6, is zero at this point 485.byte 0x62,0x03,0xb0,0xf3 @ aesd q0,q9 486.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 487.byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9 488.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 489.byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9 490.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 491 veor q9,q3,q7 492 add r0,r0,r6 @ r0 is adjusted in such way that 493 @ at exit from the loop q1-q10 494 @ are loaded with last "words" 495 vorr q6,q11,q11 496 mov r7,r3 497.byte 0x68,0x03,0xb0,0xf3 @ aesd q0,q12 498.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 499.byte 0x68,0x23,0xb0,0xf3 @ aesd q1,q12 500.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 501.byte 0x68,0x43,0xf0,0xf3 @ aesd q10,q12 502.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 503 vld1.8 {q2},[r0]! 504.byte 0x6a,0x03,0xb0,0xf3 @ aesd q0,q13 505.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 506.byte 0x6a,0x23,0xb0,0xf3 @ aesd q1,q13 507.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 508.byte 0x6a,0x43,0xf0,0xf3 @ aesd q10,q13 509.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 510 vld1.8 {q3},[r0]! 511.byte 0x6c,0x03,0xb0,0xf3 @ aesd q0,q14 512.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 513.byte 0x6c,0x23,0xb0,0xf3 @ aesd q1,q14 514.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 515.byte 0x6c,0x43,0xf0,0xf3 @ aesd q10,q14 516.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 517 vld1.8 {q11},[r0]! 518.byte 0x6e,0x03,0xb0,0xf3 @ aesd q0,q15 519.byte 0x6e,0x23,0xb0,0xf3 @ aesd q1,q15 520.byte 0x6e,0x43,0xf0,0xf3 @ aesd q10,q15 521 vld1.32 {q8},[r7]! @ re-pre-load rndkey[0] 522 add r6,r5,#2 523 veor q4,q4,q0 524 veor q5,q5,q1 525 veor q10,q10,q9 526 vld1.32 {q9},[r7]! @ re-pre-load rndkey[1] 527 vst1.8 {q4},[r1]! 528 vorr q0,q2,q2 529 vst1.8 {q5},[r1]! 530 vorr q1,q3,q3 531 vst1.8 {q10},[r1]! 532 vorr q10,q11,q11 533 bhs Loop3x_cbc_dec 534 535 cmn r2,#0x30 536 beq Lcbc_done 537 nop 538 539Lcbc_dec_tail: 540.byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8 541.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 542.byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8 543.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 544 vld1.32 {q8},[r7]! 545 subs r6,r6,#2 546.byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9 547.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 548.byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9 549.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 550 vld1.32 {q9},[r7]! 551 bgt Lcbc_dec_tail 552 553.byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8 554.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 555.byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8 556.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 557.byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9 558.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 559.byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9 560.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 561.byte 0x68,0x23,0xb0,0xf3 @ aesd q1,q12 562.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 563.byte 0x68,0x43,0xf0,0xf3 @ aesd q10,q12 564.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 565 cmn r2,#0x20 566.byte 0x6a,0x23,0xb0,0xf3 @ aesd q1,q13 567.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 568.byte 0x6a,0x43,0xf0,0xf3 @ aesd q10,q13 569.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 570 veor q5,q6,q7 571.byte 0x6c,0x23,0xb0,0xf3 @ aesd q1,q14 572.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 573.byte 0x6c,0x43,0xf0,0xf3 @ aesd q10,q14 574.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 575 veor q9,q3,q7 576.byte 0x6e,0x23,0xb0,0xf3 @ aesd q1,q15 577.byte 0x6e,0x43,0xf0,0xf3 @ aesd q10,q15 578 beq Lcbc_dec_one 579 veor q5,q5,q1 580 veor q9,q9,q10 581 vorr q6,q11,q11 582 vst1.8 {q5},[r1]! 583 vst1.8 {q9},[r1]! 584 b Lcbc_done 585 586Lcbc_dec_one: 587 veor q5,q5,q10 588 vorr q6,q11,q11 589 vst1.8 {q5},[r1]! 590 591Lcbc_done: 592 vst1.8 {q6},[r4] 593Lcbc_abort: 594 vldmia sp!,{d8,d9,d10,d11,d12,d13,d14,d15} 595 ldmia sp!,{r4,r5,r6,r7,r8,pc} 596 597.globl _aes_hw_ctr32_encrypt_blocks 598.private_extern _aes_hw_ctr32_encrypt_blocks 599#ifdef __thumb2__ 600.thumb_func _aes_hw_ctr32_encrypt_blocks 601#endif 602.align 5 603_aes_hw_ctr32_encrypt_blocks: 604 mov ip,sp 605 stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,lr} 606 vstmdb sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ ABI specification says so 607 ldr r4, [ip] @ load remaining arg 608 ldr r5,[r3,#240] 609 610 ldr r8, [r4, #12] 611 vld1.32 {q0},[r4] 612 613 vld1.32 {q8,q9},[r3] @ load key schedule... 614 sub r5,r5,#4 615 mov r12,#16 616 cmp r2,#2 617 add r7,r3,r5,lsl#4 @ pointer to last 5 round keys 618 sub r5,r5,#2 619 vld1.32 {q12,q13},[r7]! 620 vld1.32 {q14,q15},[r7]! 621 vld1.32 {q7},[r7] 622 add r7,r3,#32 623 mov r6,r5 624 movlo r12,#0 625 626 @ ARM Cortex-A57 and Cortex-A72 cores running in 32-bit mode are 627 @ affected by silicon errata #1742098 [0] and #1655431 [1], 628 @ respectively, where the second instruction of an aese/aesmc 629 @ instruction pair may execute twice if an interrupt is taken right 630 @ after the first instruction consumes an input register of which a 631 @ single 32-bit lane has been updated the last time it was modified. 632 @ 633 @ This function uses a counter in one 32-bit lane. The 634 @ could write to q1 and q10 directly, but that trips this bugs. 635 @ We write to q6 and copy to the final register as a workaround. 636 @ 637 @ [0] ARM-EPM-049219 v23 Cortex-A57 MPCore Software Developers Errata Notice 638 @ [1] ARM-EPM-012079 v11.0 Cortex-A72 MPCore Software Developers Errata Notice 639#ifndef __ARMEB__ 640 rev r8, r8 641#endif 642 add r10, r8, #1 643 vorr q6,q0,q0 644 rev r10, r10 645 vmov.32 d13[1],r10 646 add r8, r8, #2 647 vorr q1,q6,q6 648 bls Lctr32_tail 649 rev r12, r8 650 vmov.32 d13[1],r12 651 sub r2,r2,#3 @ bias 652 vorr q10,q6,q6 653 b Loop3x_ctr32 654 655.align 4 656Loop3x_ctr32: 657.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 658.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 659.byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8 660.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 661.byte 0x20,0x43,0xf0,0xf3 @ aese q10,q8 662.byte 0xa4,0x43,0xf0,0xf3 @ aesmc q10,q10 663 vld1.32 {q8},[r7]! 664 subs r6,r6,#2 665.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 666.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 667.byte 0x22,0x23,0xb0,0xf3 @ aese q1,q9 668.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 669.byte 0x22,0x43,0xf0,0xf3 @ aese q10,q9 670.byte 0xa4,0x43,0xf0,0xf3 @ aesmc q10,q10 671 vld1.32 {q9},[r7]! 672 bgt Loop3x_ctr32 673 674.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 675.byte 0x80,0x83,0xb0,0xf3 @ aesmc q4,q0 676.byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8 677.byte 0x82,0xa3,0xb0,0xf3 @ aesmc q5,q1 678 vld1.8 {q2},[r0]! 679 add r9,r8,#1 680.byte 0x20,0x43,0xf0,0xf3 @ aese q10,q8 681.byte 0xa4,0x43,0xf0,0xf3 @ aesmc q10,q10 682 vld1.8 {q3},[r0]! 683 rev r9,r9 684.byte 0x22,0x83,0xb0,0xf3 @ aese q4,q9 685.byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4 686.byte 0x22,0xa3,0xb0,0xf3 @ aese q5,q9 687.byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5 688 vld1.8 {q11},[r0]! 689 mov r7,r3 690.byte 0x22,0x43,0xf0,0xf3 @ aese q10,q9 691.byte 0xa4,0x23,0xf0,0xf3 @ aesmc q9,q10 692.byte 0x28,0x83,0xb0,0xf3 @ aese q4,q12 693.byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4 694.byte 0x28,0xa3,0xb0,0xf3 @ aese q5,q12 695.byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5 696 veor q2,q2,q7 697 add r10,r8,#2 698.byte 0x28,0x23,0xf0,0xf3 @ aese q9,q12 699.byte 0xa2,0x23,0xf0,0xf3 @ aesmc q9,q9 700 veor q3,q3,q7 701 add r8,r8,#3 702.byte 0x2a,0x83,0xb0,0xf3 @ aese q4,q13 703.byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4 704.byte 0x2a,0xa3,0xb0,0xf3 @ aese q5,q13 705.byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5 706 @ Note the logic to update q0, q1, and q1 is written to work 707 @ around a bug in ARM Cortex-A57 and Cortex-A72 cores running in 708 @ 32-bit mode. See the comment above. 709 veor q11,q11,q7 710 vmov.32 d13[1], r9 711.byte 0x2a,0x23,0xf0,0xf3 @ aese q9,q13 712.byte 0xa2,0x23,0xf0,0xf3 @ aesmc q9,q9 713 vorr q0,q6,q6 714 rev r10,r10 715.byte 0x2c,0x83,0xb0,0xf3 @ aese q4,q14 716.byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4 717 vmov.32 d13[1], r10 718 rev r12,r8 719.byte 0x2c,0xa3,0xb0,0xf3 @ aese q5,q14 720.byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5 721 vorr q1,q6,q6 722 vmov.32 d13[1], r12 723.byte 0x2c,0x23,0xf0,0xf3 @ aese q9,q14 724.byte 0xa2,0x23,0xf0,0xf3 @ aesmc q9,q9 725 vorr q10,q6,q6 726 subs r2,r2,#3 727.byte 0x2e,0x83,0xb0,0xf3 @ aese q4,q15 728.byte 0x2e,0xa3,0xb0,0xf3 @ aese q5,q15 729.byte 0x2e,0x23,0xf0,0xf3 @ aese q9,q15 730 731 veor q2,q2,q4 732 vld1.32 {q8},[r7]! @ re-pre-load rndkey[0] 733 vst1.8 {q2},[r1]! 734 veor q3,q3,q5 735 mov r6,r5 736 vst1.8 {q3},[r1]! 737 veor q11,q11,q9 738 vld1.32 {q9},[r7]! @ re-pre-load rndkey[1] 739 vst1.8 {q11},[r1]! 740 bhs Loop3x_ctr32 741 742 adds r2,r2,#3 743 beq Lctr32_done 744 cmp r2,#1 745 mov r12,#16 746 moveq r12,#0 747 748Lctr32_tail: 749.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 750.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 751.byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8 752.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 753 vld1.32 {q8},[r7]! 754 subs r6,r6,#2 755.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 756.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 757.byte 0x22,0x23,0xb0,0xf3 @ aese q1,q9 758.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 759 vld1.32 {q9},[r7]! 760 bgt Lctr32_tail 761 762.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 763.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 764.byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8 765.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 766.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 767.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 768.byte 0x22,0x23,0xb0,0xf3 @ aese q1,q9 769.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 770 vld1.8 {q2},[r0],r12 771.byte 0x28,0x03,0xb0,0xf3 @ aese q0,q12 772.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 773.byte 0x28,0x23,0xb0,0xf3 @ aese q1,q12 774.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 775 vld1.8 {q3},[r0] 776.byte 0x2a,0x03,0xb0,0xf3 @ aese q0,q13 777.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 778.byte 0x2a,0x23,0xb0,0xf3 @ aese q1,q13 779.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 780 veor q2,q2,q7 781.byte 0x2c,0x03,0xb0,0xf3 @ aese q0,q14 782.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 783.byte 0x2c,0x23,0xb0,0xf3 @ aese q1,q14 784.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 785 veor q3,q3,q7 786.byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15 787.byte 0x2e,0x23,0xb0,0xf3 @ aese q1,q15 788 789 cmp r2,#1 790 veor q2,q2,q0 791 veor q3,q3,q1 792 vst1.8 {q2},[r1]! 793 beq Lctr32_done 794 vst1.8 {q3},[r1] 795 796Lctr32_done: 797 vldmia sp!,{d8,d9,d10,d11,d12,d13,d14,d15} 798 ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,pc} 799 800#endif 801#endif // !OPENSSL_NO_ASM && defined(OPENSSL_ARM) && defined(__APPLE__) 802