1// This file is generated from a similarly-named Perl script in the BoringSSL 2// source tree. Do not edit by hand. 3 4#if !defined(__has_feature) 5#define __has_feature(x) 0 6#endif 7#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM) 8#define OPENSSL_NO_ASM 9#endif 10 11#if !defined(OPENSSL_NO_ASM) && defined(__ARMEL__) && defined(__ELF__) 12#if defined(BORINGSSL_PREFIX) 13#include <boringssl_prefix_symbols_asm.h> 14#endif 15#include <openssl/arm_arch.h> 16 17#if __ARM_MAX_ARCH__>=7 18.text 19.arch armv7-a @ don't confuse not-so-latest binutils with argv8 :-) 20.fpu neon 21.code 32 22#undef __thumb2__ 23.align 5 24.Lrcon: 25.long 0x01,0x01,0x01,0x01 26.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d @ rotate-n-splat 27.long 0x1b,0x1b,0x1b,0x1b 28 29.text 30 31.globl aes_hw_set_encrypt_key 32.hidden aes_hw_set_encrypt_key 33.type aes_hw_set_encrypt_key,%function 34.align 5 35aes_hw_set_encrypt_key: 36.Lenc_key: 37 mov r3,#-1 38 cmp r0,#0 39 beq .Lenc_key_abort 40 cmp r2,#0 41 beq .Lenc_key_abort 42 mov r3,#-2 43 cmp r1,#128 44 blt .Lenc_key_abort 45 cmp r1,#256 46 bgt .Lenc_key_abort 47 tst r1,#0x3f 48 bne .Lenc_key_abort 49 50 adr r3,.Lrcon 51 cmp r1,#192 52 53 veor q0,q0,q0 54 vld1.8 {q3},[r0]! 55 mov r1,#8 @ reuse r1 56 vld1.32 {q1,q2},[r3]! 57 58 blt .Loop128 59 beq .L192 60 b .L256 61 62.align 4 63.Loop128: 64 vtbl.8 d20,{q3},d4 65 vtbl.8 d21,{q3},d5 66 vext.8 q9,q0,q3,#12 67 vst1.32 {q3},[r2]! 68.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 69 subs r1,r1,#1 70 71 veor q3,q3,q9 72 vext.8 q9,q0,q9,#12 73 veor q3,q3,q9 74 vext.8 q9,q0,q9,#12 75 veor q10,q10,q1 76 veor q3,q3,q9 77 vshl.u8 q1,q1,#1 78 veor q3,q3,q10 79 bne .Loop128 80 81 vld1.32 {q1},[r3] 82 83 vtbl.8 d20,{q3},d4 84 vtbl.8 d21,{q3},d5 85 vext.8 q9,q0,q3,#12 86 vst1.32 {q3},[r2]! 87.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 88 89 veor q3,q3,q9 90 vext.8 q9,q0,q9,#12 91 veor q3,q3,q9 92 vext.8 q9,q0,q9,#12 93 veor q10,q10,q1 94 veor q3,q3,q9 95 vshl.u8 q1,q1,#1 96 veor q3,q3,q10 97 98 vtbl.8 d20,{q3},d4 99 vtbl.8 d21,{q3},d5 100 vext.8 q9,q0,q3,#12 101 vst1.32 {q3},[r2]! 102.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 103 104 veor q3,q3,q9 105 vext.8 q9,q0,q9,#12 106 veor q3,q3,q9 107 vext.8 q9,q0,q9,#12 108 veor q10,q10,q1 109 veor q3,q3,q9 110 veor q3,q3,q10 111 vst1.32 {q3},[r2] 112 add r2,r2,#0x50 113 114 mov r12,#10 115 b .Ldone 116 117.align 4 118.L192: 119 vld1.8 {d16},[r0]! 120 vmov.i8 q10,#8 @ borrow q10 121 vst1.32 {q3},[r2]! 122 vsub.i8 q2,q2,q10 @ adjust the mask 123 124.Loop192: 125 vtbl.8 d20,{q8},d4 126 vtbl.8 d21,{q8},d5 127 vext.8 q9,q0,q3,#12 128 vst1.32 {d16},[r2]! 129.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 130 subs r1,r1,#1 131 132 veor q3,q3,q9 133 vext.8 q9,q0,q9,#12 134 veor q3,q3,q9 135 vext.8 q9,q0,q9,#12 136 veor q3,q3,q9 137 138 vdup.32 q9,d7[1] 139 veor q9,q9,q8 140 veor q10,q10,q1 141 vext.8 q8,q0,q8,#12 142 vshl.u8 q1,q1,#1 143 veor q8,q8,q9 144 veor q3,q3,q10 145 veor q8,q8,q10 146 vst1.32 {q3},[r2]! 147 bne .Loop192 148 149 mov r12,#12 150 add r2,r2,#0x20 151 b .Ldone 152 153.align 4 154.L256: 155 vld1.8 {q8},[r0] 156 mov r1,#7 157 mov r12,#14 158 vst1.32 {q3},[r2]! 159 160.Loop256: 161 vtbl.8 d20,{q8},d4 162 vtbl.8 d21,{q8},d5 163 vext.8 q9,q0,q3,#12 164 vst1.32 {q8},[r2]! 165.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 166 subs r1,r1,#1 167 168 veor q3,q3,q9 169 vext.8 q9,q0,q9,#12 170 veor q3,q3,q9 171 vext.8 q9,q0,q9,#12 172 veor q10,q10,q1 173 veor q3,q3,q9 174 vshl.u8 q1,q1,#1 175 veor q3,q3,q10 176 vst1.32 {q3},[r2]! 177 beq .Ldone 178 179 vdup.32 q10,d7[1] 180 vext.8 q9,q0,q8,#12 181.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 182 183 veor q8,q8,q9 184 vext.8 q9,q0,q9,#12 185 veor q8,q8,q9 186 vext.8 q9,q0,q9,#12 187 veor q8,q8,q9 188 189 veor q8,q8,q10 190 b .Loop256 191 192.Ldone: 193 str r12,[r2] 194 mov r3,#0 195 196.Lenc_key_abort: 197 mov r0,r3 @ return value 198 199 bx lr 200.size aes_hw_set_encrypt_key,.-aes_hw_set_encrypt_key 201 202.globl aes_hw_set_decrypt_key 203.hidden aes_hw_set_decrypt_key 204.type aes_hw_set_decrypt_key,%function 205.align 5 206aes_hw_set_decrypt_key: 207 stmdb sp!,{r4,lr} 208 bl .Lenc_key 209 210 cmp r0,#0 211 bne .Ldec_key_abort 212 213 sub r2,r2,#240 @ restore original r2 214 mov r4,#-16 215 add r0,r2,r12,lsl#4 @ end of key schedule 216 217 vld1.32 {q0},[r2] 218 vld1.32 {q1},[r0] 219 vst1.32 {q0},[r0],r4 220 vst1.32 {q1},[r2]! 221 222.Loop_imc: 223 vld1.32 {q0},[r2] 224 vld1.32 {q1},[r0] 225.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 226.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 227 vst1.32 {q0},[r0],r4 228 vst1.32 {q1},[r2]! 229 cmp r0,r2 230 bhi .Loop_imc 231 232 vld1.32 {q0},[r2] 233.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 234 vst1.32 {q0},[r0] 235 236 eor r0,r0,r0 @ return value 237.Ldec_key_abort: 238 ldmia sp!,{r4,pc} 239.size aes_hw_set_decrypt_key,.-aes_hw_set_decrypt_key 240.globl aes_hw_encrypt 241.hidden aes_hw_encrypt 242.type aes_hw_encrypt,%function 243.align 5 244aes_hw_encrypt: 245 AARCH64_VALID_CALL_TARGET 246 ldr r3,[r2,#240] 247 vld1.32 {q0},[r2]! 248 vld1.8 {q2},[r0] 249 sub r3,r3,#2 250 vld1.32 {q1},[r2]! 251 252.Loop_enc: 253.byte 0x00,0x43,0xb0,0xf3 @ aese q2,q0 254.byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2 255 vld1.32 {q0},[r2]! 256 subs r3,r3,#2 257.byte 0x02,0x43,0xb0,0xf3 @ aese q2,q1 258.byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2 259 vld1.32 {q1},[r2]! 260 bgt .Loop_enc 261 262.byte 0x00,0x43,0xb0,0xf3 @ aese q2,q0 263.byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2 264 vld1.32 {q0},[r2] 265.byte 0x02,0x43,0xb0,0xf3 @ aese q2,q1 266 veor q2,q2,q0 267 268 vst1.8 {q2},[r1] 269 bx lr 270.size aes_hw_encrypt,.-aes_hw_encrypt 271.globl aes_hw_decrypt 272.hidden aes_hw_decrypt 273.type aes_hw_decrypt,%function 274.align 5 275aes_hw_decrypt: 276 AARCH64_VALID_CALL_TARGET 277 ldr r3,[r2,#240] 278 vld1.32 {q0},[r2]! 279 vld1.8 {q2},[r0] 280 sub r3,r3,#2 281 vld1.32 {q1},[r2]! 282 283.Loop_dec: 284.byte 0x40,0x43,0xb0,0xf3 @ aesd q2,q0 285.byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2 286 vld1.32 {q0},[r2]! 287 subs r3,r3,#2 288.byte 0x42,0x43,0xb0,0xf3 @ aesd q2,q1 289.byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2 290 vld1.32 {q1},[r2]! 291 bgt .Loop_dec 292 293.byte 0x40,0x43,0xb0,0xf3 @ aesd q2,q0 294.byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2 295 vld1.32 {q0},[r2] 296.byte 0x42,0x43,0xb0,0xf3 @ aesd q2,q1 297 veor q2,q2,q0 298 299 vst1.8 {q2},[r1] 300 bx lr 301.size aes_hw_decrypt,.-aes_hw_decrypt 302.globl aes_hw_cbc_encrypt 303.hidden aes_hw_cbc_encrypt 304.type aes_hw_cbc_encrypt,%function 305.align 5 306aes_hw_cbc_encrypt: 307 mov ip,sp 308 stmdb sp!,{r4,r5,r6,r7,r8,lr} 309 vstmdb sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ ABI specification says so 310 ldmia ip,{r4,r5} @ load remaining args 311 subs r2,r2,#16 312 mov r8,#16 313 blo .Lcbc_abort 314 moveq r8,#0 315 316 cmp r5,#0 @ en- or decrypting? 317 ldr r5,[r3,#240] 318 and r2,r2,#-16 319 vld1.8 {q6},[r4] 320 vld1.8 {q0},[r0],r8 321 322 vld1.32 {q8,q9},[r3] @ load key schedule... 323 sub r5,r5,#6 324 add r7,r3,r5,lsl#4 @ pointer to last 7 round keys 325 sub r5,r5,#2 326 vld1.32 {q10,q11},[r7]! 327 vld1.32 {q12,q13},[r7]! 328 vld1.32 {q14,q15},[r7]! 329 vld1.32 {q7},[r7] 330 331 add r7,r3,#32 332 mov r6,r5 333 beq .Lcbc_dec 334 335 cmp r5,#2 336 veor q0,q0,q6 337 veor q5,q8,q7 338 beq .Lcbc_enc128 339 340 vld1.32 {q2,q3},[r7] 341 add r7,r3,#16 342 add r6,r3,#16*4 343 add r12,r3,#16*5 344.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 345.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 346 add r14,r3,#16*6 347 add r3,r3,#16*7 348 b .Lenter_cbc_enc 349 350.align 4 351.Loop_cbc_enc: 352.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 353.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 354 vst1.8 {q6},[r1]! 355.Lenter_cbc_enc: 356.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 357.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 358.byte 0x04,0x03,0xb0,0xf3 @ aese q0,q2 359.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 360 vld1.32 {q8},[r6] 361 cmp r5,#4 362.byte 0x06,0x03,0xb0,0xf3 @ aese q0,q3 363.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 364 vld1.32 {q9},[r12] 365 beq .Lcbc_enc192 366 367.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 368.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 369 vld1.32 {q8},[r14] 370.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 371.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 372 vld1.32 {q9},[r3] 373 nop 374 375.Lcbc_enc192: 376.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 377.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 378 subs r2,r2,#16 379.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 380.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 381 moveq r8,#0 382.byte 0x24,0x03,0xb0,0xf3 @ aese q0,q10 383.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 384.byte 0x26,0x03,0xb0,0xf3 @ aese q0,q11 385.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 386 vld1.8 {q8},[r0],r8 387.byte 0x28,0x03,0xb0,0xf3 @ aese q0,q12 388.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 389 veor q8,q8,q5 390.byte 0x2a,0x03,0xb0,0xf3 @ aese q0,q13 391.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 392 vld1.32 {q9},[r7] @ re-pre-load rndkey[1] 393.byte 0x2c,0x03,0xb0,0xf3 @ aese q0,q14 394.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 395.byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15 396 veor q6,q0,q7 397 bhs .Loop_cbc_enc 398 399 vst1.8 {q6},[r1]! 400 b .Lcbc_done 401 402.align 5 403.Lcbc_enc128: 404 vld1.32 {q2,q3},[r7] 405.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 406.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 407 b .Lenter_cbc_enc128 408.Loop_cbc_enc128: 409.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 410.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 411 vst1.8 {q6},[r1]! 412.Lenter_cbc_enc128: 413.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 414.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 415 subs r2,r2,#16 416.byte 0x04,0x03,0xb0,0xf3 @ aese q0,q2 417.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 418 moveq r8,#0 419.byte 0x06,0x03,0xb0,0xf3 @ aese q0,q3 420.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 421.byte 0x24,0x03,0xb0,0xf3 @ aese q0,q10 422.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 423.byte 0x26,0x03,0xb0,0xf3 @ aese q0,q11 424.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 425 vld1.8 {q8},[r0],r8 426.byte 0x28,0x03,0xb0,0xf3 @ aese q0,q12 427.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 428.byte 0x2a,0x03,0xb0,0xf3 @ aese q0,q13 429.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 430.byte 0x2c,0x03,0xb0,0xf3 @ aese q0,q14 431.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 432 veor q8,q8,q5 433.byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15 434 veor q6,q0,q7 435 bhs .Loop_cbc_enc128 436 437 vst1.8 {q6},[r1]! 438 b .Lcbc_done 439.align 5 440.Lcbc_dec: 441 vld1.8 {q10},[r0]! 442 subs r2,r2,#32 @ bias 443 add r6,r5,#2 444 vorr q3,q0,q0 445 vorr q1,q0,q0 446 vorr q11,q10,q10 447 blo .Lcbc_dec_tail 448 449 vorr q1,q10,q10 450 vld1.8 {q10},[r0]! 451 vorr q2,q0,q0 452 vorr q3,q1,q1 453 vorr q11,q10,q10 454 455.Loop3x_cbc_dec: 456.byte 0x60,0x03,0xb0,0xf3 @ aesd q0,q8 457.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 458.byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8 459.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 460.byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8 461.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 462 vld1.32 {q8},[r7]! 463 subs r6,r6,#2 464.byte 0x62,0x03,0xb0,0xf3 @ aesd q0,q9 465.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 466.byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9 467.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 468.byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9 469.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 470 vld1.32 {q9},[r7]! 471 bgt .Loop3x_cbc_dec 472 473.byte 0x60,0x03,0xb0,0xf3 @ aesd q0,q8 474.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 475.byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8 476.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 477.byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8 478.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 479 veor q4,q6,q7 480 subs r2,r2,#0x30 481 veor q5,q2,q7 482 movlo r6,r2 @ r6, r6, is zero at this point 483.byte 0x62,0x03,0xb0,0xf3 @ aesd q0,q9 484.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 485.byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9 486.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 487.byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9 488.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 489 veor q9,q3,q7 490 add r0,r0,r6 @ r0 is adjusted in such way that 491 @ at exit from the loop q1-q10 492 @ are loaded with last "words" 493 vorr q6,q11,q11 494 mov r7,r3 495.byte 0x68,0x03,0xb0,0xf3 @ aesd q0,q12 496.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 497.byte 0x68,0x23,0xb0,0xf3 @ aesd q1,q12 498.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 499.byte 0x68,0x43,0xf0,0xf3 @ aesd q10,q12 500.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 501 vld1.8 {q2},[r0]! 502.byte 0x6a,0x03,0xb0,0xf3 @ aesd q0,q13 503.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 504.byte 0x6a,0x23,0xb0,0xf3 @ aesd q1,q13 505.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 506.byte 0x6a,0x43,0xf0,0xf3 @ aesd q10,q13 507.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 508 vld1.8 {q3},[r0]! 509.byte 0x6c,0x03,0xb0,0xf3 @ aesd q0,q14 510.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 511.byte 0x6c,0x23,0xb0,0xf3 @ aesd q1,q14 512.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 513.byte 0x6c,0x43,0xf0,0xf3 @ aesd q10,q14 514.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 515 vld1.8 {q11},[r0]! 516.byte 0x6e,0x03,0xb0,0xf3 @ aesd q0,q15 517.byte 0x6e,0x23,0xb0,0xf3 @ aesd q1,q15 518.byte 0x6e,0x43,0xf0,0xf3 @ aesd q10,q15 519 vld1.32 {q8},[r7]! @ re-pre-load rndkey[0] 520 add r6,r5,#2 521 veor q4,q4,q0 522 veor q5,q5,q1 523 veor q10,q10,q9 524 vld1.32 {q9},[r7]! @ re-pre-load rndkey[1] 525 vst1.8 {q4},[r1]! 526 vorr q0,q2,q2 527 vst1.8 {q5},[r1]! 528 vorr q1,q3,q3 529 vst1.8 {q10},[r1]! 530 vorr q10,q11,q11 531 bhs .Loop3x_cbc_dec 532 533 cmn r2,#0x30 534 beq .Lcbc_done 535 nop 536 537.Lcbc_dec_tail: 538.byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8 539.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 540.byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8 541.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 542 vld1.32 {q8},[r7]! 543 subs r6,r6,#2 544.byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9 545.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 546.byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9 547.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 548 vld1.32 {q9},[r7]! 549 bgt .Lcbc_dec_tail 550 551.byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8 552.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 553.byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8 554.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 555.byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9 556.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 557.byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9 558.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 559.byte 0x68,0x23,0xb0,0xf3 @ aesd q1,q12 560.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 561.byte 0x68,0x43,0xf0,0xf3 @ aesd q10,q12 562.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 563 cmn r2,#0x20 564.byte 0x6a,0x23,0xb0,0xf3 @ aesd q1,q13 565.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 566.byte 0x6a,0x43,0xf0,0xf3 @ aesd q10,q13 567.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 568 veor q5,q6,q7 569.byte 0x6c,0x23,0xb0,0xf3 @ aesd q1,q14 570.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 571.byte 0x6c,0x43,0xf0,0xf3 @ aesd q10,q14 572.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 573 veor q9,q3,q7 574.byte 0x6e,0x23,0xb0,0xf3 @ aesd q1,q15 575.byte 0x6e,0x43,0xf0,0xf3 @ aesd q10,q15 576 beq .Lcbc_dec_one 577 veor q5,q5,q1 578 veor q9,q9,q10 579 vorr q6,q11,q11 580 vst1.8 {q5},[r1]! 581 vst1.8 {q9},[r1]! 582 b .Lcbc_done 583 584.Lcbc_dec_one: 585 veor q5,q5,q10 586 vorr q6,q11,q11 587 vst1.8 {q5},[r1]! 588 589.Lcbc_done: 590 vst1.8 {q6},[r4] 591.Lcbc_abort: 592 vldmia sp!,{d8,d9,d10,d11,d12,d13,d14,d15} 593 ldmia sp!,{r4,r5,r6,r7,r8,pc} 594.size aes_hw_cbc_encrypt,.-aes_hw_cbc_encrypt 595.globl aes_hw_ctr32_encrypt_blocks 596.hidden aes_hw_ctr32_encrypt_blocks 597.type aes_hw_ctr32_encrypt_blocks,%function 598.align 5 599aes_hw_ctr32_encrypt_blocks: 600 mov ip,sp 601 stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,lr} 602 vstmdb sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ ABI specification says so 603 ldr r4, [ip] @ load remaining arg 604 ldr r5,[r3,#240] 605 606 ldr r8, [r4, #12] 607 vld1.32 {q0},[r4] 608 609 vld1.32 {q8,q9},[r3] @ load key schedule... 610 sub r5,r5,#4 611 mov r12,#16 612 cmp r2,#2 613 add r7,r3,r5,lsl#4 @ pointer to last 5 round keys 614 sub r5,r5,#2 615 vld1.32 {q12,q13},[r7]! 616 vld1.32 {q14,q15},[r7]! 617 vld1.32 {q7},[r7] 618 add r7,r3,#32 619 mov r6,r5 620 movlo r12,#0 621 622 @ ARM Cortex-A57 and Cortex-A72 cores running in 32-bit mode are 623 @ affected by silicon errata #1742098 [0] and #1655431 [1], 624 @ respectively, where the second instruction of an aese/aesmc 625 @ instruction pair may execute twice if an interrupt is taken right 626 @ after the first instruction consumes an input register of which a 627 @ single 32-bit lane has been updated the last time it was modified. 628 @ 629 @ This function uses a counter in one 32-bit lane. The 630 @ could write to q1 and q10 directly, but that trips this bugs. 631 @ We write to q6 and copy to the final register as a workaround. 632 @ 633 @ [0] ARM-EPM-049219 v23 Cortex-A57 MPCore Software Developers Errata Notice 634 @ [1] ARM-EPM-012079 v11.0 Cortex-A72 MPCore Software Developers Errata Notice 635#ifndef __ARMEB__ 636 rev r8, r8 637#endif 638 add r10, r8, #1 639 vorr q6,q0,q0 640 rev r10, r10 641 vmov.32 d13[1],r10 642 add r8, r8, #2 643 vorr q1,q6,q6 644 bls .Lctr32_tail 645 rev r12, r8 646 vmov.32 d13[1],r12 647 sub r2,r2,#3 @ bias 648 vorr q10,q6,q6 649 b .Loop3x_ctr32 650 651.align 4 652.Loop3x_ctr32: 653.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 654.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 655.byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8 656.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 657.byte 0x20,0x43,0xf0,0xf3 @ aese q10,q8 658.byte 0xa4,0x43,0xf0,0xf3 @ aesmc q10,q10 659 vld1.32 {q8},[r7]! 660 subs r6,r6,#2 661.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 662.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 663.byte 0x22,0x23,0xb0,0xf3 @ aese q1,q9 664.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 665.byte 0x22,0x43,0xf0,0xf3 @ aese q10,q9 666.byte 0xa4,0x43,0xf0,0xf3 @ aesmc q10,q10 667 vld1.32 {q9},[r7]! 668 bgt .Loop3x_ctr32 669 670.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 671.byte 0x80,0x83,0xb0,0xf3 @ aesmc q4,q0 672.byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8 673.byte 0x82,0xa3,0xb0,0xf3 @ aesmc q5,q1 674 vld1.8 {q2},[r0]! 675 add r9,r8,#1 676.byte 0x20,0x43,0xf0,0xf3 @ aese q10,q8 677.byte 0xa4,0x43,0xf0,0xf3 @ aesmc q10,q10 678 vld1.8 {q3},[r0]! 679 rev r9,r9 680.byte 0x22,0x83,0xb0,0xf3 @ aese q4,q9 681.byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4 682.byte 0x22,0xa3,0xb0,0xf3 @ aese q5,q9 683.byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5 684 vld1.8 {q11},[r0]! 685 mov r7,r3 686.byte 0x22,0x43,0xf0,0xf3 @ aese q10,q9 687.byte 0xa4,0x23,0xf0,0xf3 @ aesmc q9,q10 688.byte 0x28,0x83,0xb0,0xf3 @ aese q4,q12 689.byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4 690.byte 0x28,0xa3,0xb0,0xf3 @ aese q5,q12 691.byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5 692 veor q2,q2,q7 693 add r10,r8,#2 694.byte 0x28,0x23,0xf0,0xf3 @ aese q9,q12 695.byte 0xa2,0x23,0xf0,0xf3 @ aesmc q9,q9 696 veor q3,q3,q7 697 add r8,r8,#3 698.byte 0x2a,0x83,0xb0,0xf3 @ aese q4,q13 699.byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4 700.byte 0x2a,0xa3,0xb0,0xf3 @ aese q5,q13 701.byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5 702 @ Note the logic to update q0, q1, and q1 is written to work 703 @ around a bug in ARM Cortex-A57 and Cortex-A72 cores running in 704 @ 32-bit mode. See the comment above. 705 veor q11,q11,q7 706 vmov.32 d13[1], r9 707.byte 0x2a,0x23,0xf0,0xf3 @ aese q9,q13 708.byte 0xa2,0x23,0xf0,0xf3 @ aesmc q9,q9 709 vorr q0,q6,q6 710 rev r10,r10 711.byte 0x2c,0x83,0xb0,0xf3 @ aese q4,q14 712.byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4 713 vmov.32 d13[1], r10 714 rev r12,r8 715.byte 0x2c,0xa3,0xb0,0xf3 @ aese q5,q14 716.byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5 717 vorr q1,q6,q6 718 vmov.32 d13[1], r12 719.byte 0x2c,0x23,0xf0,0xf3 @ aese q9,q14 720.byte 0xa2,0x23,0xf0,0xf3 @ aesmc q9,q9 721 vorr q10,q6,q6 722 subs r2,r2,#3 723.byte 0x2e,0x83,0xb0,0xf3 @ aese q4,q15 724.byte 0x2e,0xa3,0xb0,0xf3 @ aese q5,q15 725.byte 0x2e,0x23,0xf0,0xf3 @ aese q9,q15 726 727 veor q2,q2,q4 728 vld1.32 {q8},[r7]! @ re-pre-load rndkey[0] 729 vst1.8 {q2},[r1]! 730 veor q3,q3,q5 731 mov r6,r5 732 vst1.8 {q3},[r1]! 733 veor q11,q11,q9 734 vld1.32 {q9},[r7]! @ re-pre-load rndkey[1] 735 vst1.8 {q11},[r1]! 736 bhs .Loop3x_ctr32 737 738 adds r2,r2,#3 739 beq .Lctr32_done 740 cmp r2,#1 741 mov r12,#16 742 moveq r12,#0 743 744.Lctr32_tail: 745.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 746.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 747.byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8 748.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 749 vld1.32 {q8},[r7]! 750 subs r6,r6,#2 751.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 752.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 753.byte 0x22,0x23,0xb0,0xf3 @ aese q1,q9 754.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 755 vld1.32 {q9},[r7]! 756 bgt .Lctr32_tail 757 758.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 759.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 760.byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8 761.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 762.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 763.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 764.byte 0x22,0x23,0xb0,0xf3 @ aese q1,q9 765.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 766 vld1.8 {q2},[r0],r12 767.byte 0x28,0x03,0xb0,0xf3 @ aese q0,q12 768.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 769.byte 0x28,0x23,0xb0,0xf3 @ aese q1,q12 770.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 771 vld1.8 {q3},[r0] 772.byte 0x2a,0x03,0xb0,0xf3 @ aese q0,q13 773.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 774.byte 0x2a,0x23,0xb0,0xf3 @ aese q1,q13 775.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 776 veor q2,q2,q7 777.byte 0x2c,0x03,0xb0,0xf3 @ aese q0,q14 778.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 779.byte 0x2c,0x23,0xb0,0xf3 @ aese q1,q14 780.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 781 veor q3,q3,q7 782.byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15 783.byte 0x2e,0x23,0xb0,0xf3 @ aese q1,q15 784 785 cmp r2,#1 786 veor q2,q2,q0 787 veor q3,q3,q1 788 vst1.8 {q2},[r1]! 789 beq .Lctr32_done 790 vst1.8 {q3},[r1] 791 792.Lctr32_done: 793 vldmia sp!,{d8,d9,d10,d11,d12,d13,d14,d15} 794 ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,pc} 795.size aes_hw_ctr32_encrypt_blocks,.-aes_hw_ctr32_encrypt_blocks 796#endif 797#endif // !OPENSSL_NO_ASM && defined(__ARMEL__) && defined(__ELF__) 798#if defined(__ELF__) 799// See https://www.airs.com/blog/archives/518. 800.section .note.GNU-stack,"",%progbits 801#endif 802