1// This file is generated from a similarly-named Perl script in the BoringSSL 2// source tree. Do not edit by hand. 3 4#if !defined(__has_feature) 5#define __has_feature(x) 0 6#endif 7#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM) 8#define OPENSSL_NO_ASM 9#endif 10 11#if !defined(OPENSSL_NO_ASM) 12#if defined(__arm__) 13#if defined(BORINGSSL_PREFIX) 14#include <boringssl_prefix_symbols_asm.h> 15#endif 16#include <openssl/arm_arch.h> 17 18#if __ARM_MAX_ARCH__>=7 19.text 20.arch armv7-a @ don't confuse not-so-latest binutils with argv8 :-) 21.fpu neon 22.code 32 23#undef __thumb2__ 24.align 5 25.Lrcon: 26.long 0x01,0x01,0x01,0x01 27.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d @ rotate-n-splat 28.long 0x1b,0x1b,0x1b,0x1b 29 30.text 31 32.globl aes_hw_set_encrypt_key 33.hidden aes_hw_set_encrypt_key 34.type aes_hw_set_encrypt_key,%function 35.align 5 36aes_hw_set_encrypt_key: 37.Lenc_key: 38 mov r3,#-1 39 cmp r0,#0 40 beq .Lenc_key_abort 41 cmp r2,#0 42 beq .Lenc_key_abort 43 mov r3,#-2 44 cmp r1,#128 45 blt .Lenc_key_abort 46 cmp r1,#256 47 bgt .Lenc_key_abort 48 tst r1,#0x3f 49 bne .Lenc_key_abort 50 51 adr r3,.Lrcon 52 cmp r1,#192 53 54 veor q0,q0,q0 55 vld1.8 {q3},[r0]! 56 mov r1,#8 @ reuse r1 57 vld1.32 {q1,q2},[r3]! 58 59 blt .Loop128 60 beq .L192 61 b .L256 62 63.align 4 64.Loop128: 65 vtbl.8 d20,{q3},d4 66 vtbl.8 d21,{q3},d5 67 vext.8 q9,q0,q3,#12 68 vst1.32 {q3},[r2]! 69.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 70 subs r1,r1,#1 71 72 veor q3,q3,q9 73 vext.8 q9,q0,q9,#12 74 veor q3,q3,q9 75 vext.8 q9,q0,q9,#12 76 veor q10,q10,q1 77 veor q3,q3,q9 78 vshl.u8 q1,q1,#1 79 veor q3,q3,q10 80 bne .Loop128 81 82 vld1.32 {q1},[r3] 83 84 vtbl.8 d20,{q3},d4 85 vtbl.8 d21,{q3},d5 86 vext.8 q9,q0,q3,#12 87 vst1.32 {q3},[r2]! 88.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 89 90 veor q3,q3,q9 91 vext.8 q9,q0,q9,#12 92 veor q3,q3,q9 93 vext.8 q9,q0,q9,#12 94 veor q10,q10,q1 95 veor q3,q3,q9 96 vshl.u8 q1,q1,#1 97 veor q3,q3,q10 98 99 vtbl.8 d20,{q3},d4 100 vtbl.8 d21,{q3},d5 101 vext.8 q9,q0,q3,#12 102 vst1.32 {q3},[r2]! 103.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 104 105 veor q3,q3,q9 106 vext.8 q9,q0,q9,#12 107 veor q3,q3,q9 108 vext.8 q9,q0,q9,#12 109 veor q10,q10,q1 110 veor q3,q3,q9 111 veor q3,q3,q10 112 vst1.32 {q3},[r2] 113 add r2,r2,#0x50 114 115 mov r12,#10 116 b .Ldone 117 118.align 4 119.L192: 120 vld1.8 {d16},[r0]! 121 vmov.i8 q10,#8 @ borrow q10 122 vst1.32 {q3},[r2]! 123 vsub.i8 q2,q2,q10 @ adjust the mask 124 125.Loop192: 126 vtbl.8 d20,{q8},d4 127 vtbl.8 d21,{q8},d5 128 vext.8 q9,q0,q3,#12 129 vst1.32 {d16},[r2]! 130.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 131 subs r1,r1,#1 132 133 veor q3,q3,q9 134 vext.8 q9,q0,q9,#12 135 veor q3,q3,q9 136 vext.8 q9,q0,q9,#12 137 veor q3,q3,q9 138 139 vdup.32 q9,d7[1] 140 veor q9,q9,q8 141 veor q10,q10,q1 142 vext.8 q8,q0,q8,#12 143 vshl.u8 q1,q1,#1 144 veor q8,q8,q9 145 veor q3,q3,q10 146 veor q8,q8,q10 147 vst1.32 {q3},[r2]! 148 bne .Loop192 149 150 mov r12,#12 151 add r2,r2,#0x20 152 b .Ldone 153 154.align 4 155.L256: 156 vld1.8 {q8},[r0] 157 mov r1,#7 158 mov r12,#14 159 vst1.32 {q3},[r2]! 160 161.Loop256: 162 vtbl.8 d20,{q8},d4 163 vtbl.8 d21,{q8},d5 164 vext.8 q9,q0,q3,#12 165 vst1.32 {q8},[r2]! 166.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 167 subs r1,r1,#1 168 169 veor q3,q3,q9 170 vext.8 q9,q0,q9,#12 171 veor q3,q3,q9 172 vext.8 q9,q0,q9,#12 173 veor q10,q10,q1 174 veor q3,q3,q9 175 vshl.u8 q1,q1,#1 176 veor q3,q3,q10 177 vst1.32 {q3},[r2]! 178 beq .Ldone 179 180 vdup.32 q10,d7[1] 181 vext.8 q9,q0,q8,#12 182.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 183 184 veor q8,q8,q9 185 vext.8 q9,q0,q9,#12 186 veor q8,q8,q9 187 vext.8 q9,q0,q9,#12 188 veor q8,q8,q9 189 190 veor q8,q8,q10 191 b .Loop256 192 193.Ldone: 194 str r12,[r2] 195 mov r3,#0 196 197.Lenc_key_abort: 198 mov r0,r3 @ return value 199 200 bx lr 201.size aes_hw_set_encrypt_key,.-aes_hw_set_encrypt_key 202 203.globl aes_hw_set_decrypt_key 204.hidden aes_hw_set_decrypt_key 205.type aes_hw_set_decrypt_key,%function 206.align 5 207aes_hw_set_decrypt_key: 208 stmdb sp!,{r4,lr} 209 bl .Lenc_key 210 211 cmp r0,#0 212 bne .Ldec_key_abort 213 214 sub r2,r2,#240 @ restore original r2 215 mov r4,#-16 216 add r0,r2,r12,lsl#4 @ end of key schedule 217 218 vld1.32 {q0},[r2] 219 vld1.32 {q1},[r0] 220 vst1.32 {q0},[r0],r4 221 vst1.32 {q1},[r2]! 222 223.Loop_imc: 224 vld1.32 {q0},[r2] 225 vld1.32 {q1},[r0] 226.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 227.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 228 vst1.32 {q0},[r0],r4 229 vst1.32 {q1},[r2]! 230 cmp r0,r2 231 bhi .Loop_imc 232 233 vld1.32 {q0},[r2] 234.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 235 vst1.32 {q0},[r0] 236 237 eor r0,r0,r0 @ return value 238.Ldec_key_abort: 239 ldmia sp!,{r4,pc} 240.size aes_hw_set_decrypt_key,.-aes_hw_set_decrypt_key 241.globl aes_hw_encrypt 242.hidden aes_hw_encrypt 243.type aes_hw_encrypt,%function 244.align 5 245aes_hw_encrypt: 246 AARCH64_VALID_CALL_TARGET 247 ldr r3,[r2,#240] 248 vld1.32 {q0},[r2]! 249 vld1.8 {q2},[r0] 250 sub r3,r3,#2 251 vld1.32 {q1},[r2]! 252 253.Loop_enc: 254.byte 0x00,0x43,0xb0,0xf3 @ aese q2,q0 255.byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2 256 vld1.32 {q0},[r2]! 257 subs r3,r3,#2 258.byte 0x02,0x43,0xb0,0xf3 @ aese q2,q1 259.byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2 260 vld1.32 {q1},[r2]! 261 bgt .Loop_enc 262 263.byte 0x00,0x43,0xb0,0xf3 @ aese q2,q0 264.byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2 265 vld1.32 {q0},[r2] 266.byte 0x02,0x43,0xb0,0xf3 @ aese q2,q1 267 veor q2,q2,q0 268 269 vst1.8 {q2},[r1] 270 bx lr 271.size aes_hw_encrypt,.-aes_hw_encrypt 272.globl aes_hw_decrypt 273.hidden aes_hw_decrypt 274.type aes_hw_decrypt,%function 275.align 5 276aes_hw_decrypt: 277 AARCH64_VALID_CALL_TARGET 278 ldr r3,[r2,#240] 279 vld1.32 {q0},[r2]! 280 vld1.8 {q2},[r0] 281 sub r3,r3,#2 282 vld1.32 {q1},[r2]! 283 284.Loop_dec: 285.byte 0x40,0x43,0xb0,0xf3 @ aesd q2,q0 286.byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2 287 vld1.32 {q0},[r2]! 288 subs r3,r3,#2 289.byte 0x42,0x43,0xb0,0xf3 @ aesd q2,q1 290.byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2 291 vld1.32 {q1},[r2]! 292 bgt .Loop_dec 293 294.byte 0x40,0x43,0xb0,0xf3 @ aesd q2,q0 295.byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2 296 vld1.32 {q0},[r2] 297.byte 0x42,0x43,0xb0,0xf3 @ aesd q2,q1 298 veor q2,q2,q0 299 300 vst1.8 {q2},[r1] 301 bx lr 302.size aes_hw_decrypt,.-aes_hw_decrypt 303.globl aes_hw_cbc_encrypt 304.hidden aes_hw_cbc_encrypt 305.type aes_hw_cbc_encrypt,%function 306.align 5 307aes_hw_cbc_encrypt: 308 mov ip,sp 309 stmdb sp!,{r4,r5,r6,r7,r8,lr} 310 vstmdb sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ ABI specification says so 311 ldmia ip,{r4,r5} @ load remaining args 312 subs r2,r2,#16 313 mov r8,#16 314 blo .Lcbc_abort 315 moveq r8,#0 316 317 cmp r5,#0 @ en- or decrypting? 318 ldr r5,[r3,#240] 319 and r2,r2,#-16 320 vld1.8 {q6},[r4] 321 vld1.8 {q0},[r0],r8 322 323 vld1.32 {q8,q9},[r3] @ load key schedule... 324 sub r5,r5,#6 325 add r7,r3,r5,lsl#4 @ pointer to last 7 round keys 326 sub r5,r5,#2 327 vld1.32 {q10,q11},[r7]! 328 vld1.32 {q12,q13},[r7]! 329 vld1.32 {q14,q15},[r7]! 330 vld1.32 {q7},[r7] 331 332 add r7,r3,#32 333 mov r6,r5 334 beq .Lcbc_dec 335 336 cmp r5,#2 337 veor q0,q0,q6 338 veor q5,q8,q7 339 beq .Lcbc_enc128 340 341 vld1.32 {q2,q3},[r7] 342 add r7,r3,#16 343 add r6,r3,#16*4 344 add r12,r3,#16*5 345.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 346.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 347 add r14,r3,#16*6 348 add r3,r3,#16*7 349 b .Lenter_cbc_enc 350 351.align 4 352.Loop_cbc_enc: 353.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 354.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 355 vst1.8 {q6},[r1]! 356.Lenter_cbc_enc: 357.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 358.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 359.byte 0x04,0x03,0xb0,0xf3 @ aese q0,q2 360.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 361 vld1.32 {q8},[r6] 362 cmp r5,#4 363.byte 0x06,0x03,0xb0,0xf3 @ aese q0,q3 364.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 365 vld1.32 {q9},[r12] 366 beq .Lcbc_enc192 367 368.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 369.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 370 vld1.32 {q8},[r14] 371.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 372.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 373 vld1.32 {q9},[r3] 374 nop 375 376.Lcbc_enc192: 377.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 378.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 379 subs r2,r2,#16 380.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 381.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 382 moveq r8,#0 383.byte 0x24,0x03,0xb0,0xf3 @ aese q0,q10 384.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 385.byte 0x26,0x03,0xb0,0xf3 @ aese q0,q11 386.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 387 vld1.8 {q8},[r0],r8 388.byte 0x28,0x03,0xb0,0xf3 @ aese q0,q12 389.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 390 veor q8,q8,q5 391.byte 0x2a,0x03,0xb0,0xf3 @ aese q0,q13 392.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 393 vld1.32 {q9},[r7] @ re-pre-load rndkey[1] 394.byte 0x2c,0x03,0xb0,0xf3 @ aese q0,q14 395.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 396.byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15 397 veor q6,q0,q7 398 bhs .Loop_cbc_enc 399 400 vst1.8 {q6},[r1]! 401 b .Lcbc_done 402 403.align 5 404.Lcbc_enc128: 405 vld1.32 {q2,q3},[r7] 406.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 407.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 408 b .Lenter_cbc_enc128 409.Loop_cbc_enc128: 410.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 411.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 412 vst1.8 {q6},[r1]! 413.Lenter_cbc_enc128: 414.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 415.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 416 subs r2,r2,#16 417.byte 0x04,0x03,0xb0,0xf3 @ aese q0,q2 418.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 419 moveq r8,#0 420.byte 0x06,0x03,0xb0,0xf3 @ aese q0,q3 421.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 422.byte 0x24,0x03,0xb0,0xf3 @ aese q0,q10 423.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 424.byte 0x26,0x03,0xb0,0xf3 @ aese q0,q11 425.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 426 vld1.8 {q8},[r0],r8 427.byte 0x28,0x03,0xb0,0xf3 @ aese q0,q12 428.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 429.byte 0x2a,0x03,0xb0,0xf3 @ aese q0,q13 430.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 431.byte 0x2c,0x03,0xb0,0xf3 @ aese q0,q14 432.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 433 veor q8,q8,q5 434.byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15 435 veor q6,q0,q7 436 bhs .Loop_cbc_enc128 437 438 vst1.8 {q6},[r1]! 439 b .Lcbc_done 440.align 5 441.Lcbc_dec: 442 vld1.8 {q10},[r0]! 443 subs r2,r2,#32 @ bias 444 add r6,r5,#2 445 vorr q3,q0,q0 446 vorr q1,q0,q0 447 vorr q11,q10,q10 448 blo .Lcbc_dec_tail 449 450 vorr q1,q10,q10 451 vld1.8 {q10},[r0]! 452 vorr q2,q0,q0 453 vorr q3,q1,q1 454 vorr q11,q10,q10 455 456.Loop3x_cbc_dec: 457.byte 0x60,0x03,0xb0,0xf3 @ aesd q0,q8 458.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 459.byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8 460.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 461.byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8 462.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 463 vld1.32 {q8},[r7]! 464 subs r6,r6,#2 465.byte 0x62,0x03,0xb0,0xf3 @ aesd q0,q9 466.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 467.byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9 468.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 469.byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9 470.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 471 vld1.32 {q9},[r7]! 472 bgt .Loop3x_cbc_dec 473 474.byte 0x60,0x03,0xb0,0xf3 @ aesd q0,q8 475.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 476.byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8 477.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 478.byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8 479.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 480 veor q4,q6,q7 481 subs r2,r2,#0x30 482 veor q5,q2,q7 483 movlo r6,r2 @ r6, r6, is zero at this point 484.byte 0x62,0x03,0xb0,0xf3 @ aesd q0,q9 485.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 486.byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9 487.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 488.byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9 489.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 490 veor q9,q3,q7 491 add r0,r0,r6 @ r0 is adjusted in such way that 492 @ at exit from the loop q1-q10 493 @ are loaded with last "words" 494 vorr q6,q11,q11 495 mov r7,r3 496.byte 0x68,0x03,0xb0,0xf3 @ aesd q0,q12 497.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 498.byte 0x68,0x23,0xb0,0xf3 @ aesd q1,q12 499.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 500.byte 0x68,0x43,0xf0,0xf3 @ aesd q10,q12 501.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 502 vld1.8 {q2},[r0]! 503.byte 0x6a,0x03,0xb0,0xf3 @ aesd q0,q13 504.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 505.byte 0x6a,0x23,0xb0,0xf3 @ aesd q1,q13 506.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 507.byte 0x6a,0x43,0xf0,0xf3 @ aesd q10,q13 508.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 509 vld1.8 {q3},[r0]! 510.byte 0x6c,0x03,0xb0,0xf3 @ aesd q0,q14 511.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 512.byte 0x6c,0x23,0xb0,0xf3 @ aesd q1,q14 513.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 514.byte 0x6c,0x43,0xf0,0xf3 @ aesd q10,q14 515.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 516 vld1.8 {q11},[r0]! 517.byte 0x6e,0x03,0xb0,0xf3 @ aesd q0,q15 518.byte 0x6e,0x23,0xb0,0xf3 @ aesd q1,q15 519.byte 0x6e,0x43,0xf0,0xf3 @ aesd q10,q15 520 vld1.32 {q8},[r7]! @ re-pre-load rndkey[0] 521 add r6,r5,#2 522 veor q4,q4,q0 523 veor q5,q5,q1 524 veor q10,q10,q9 525 vld1.32 {q9},[r7]! @ re-pre-load rndkey[1] 526 vst1.8 {q4},[r1]! 527 vorr q0,q2,q2 528 vst1.8 {q5},[r1]! 529 vorr q1,q3,q3 530 vst1.8 {q10},[r1]! 531 vorr q10,q11,q11 532 bhs .Loop3x_cbc_dec 533 534 cmn r2,#0x30 535 beq .Lcbc_done 536 nop 537 538.Lcbc_dec_tail: 539.byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8 540.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 541.byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8 542.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 543 vld1.32 {q8},[r7]! 544 subs r6,r6,#2 545.byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9 546.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 547.byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9 548.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 549 vld1.32 {q9},[r7]! 550 bgt .Lcbc_dec_tail 551 552.byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8 553.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 554.byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8 555.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 556.byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9 557.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 558.byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9 559.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 560.byte 0x68,0x23,0xb0,0xf3 @ aesd q1,q12 561.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 562.byte 0x68,0x43,0xf0,0xf3 @ aesd q10,q12 563.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 564 cmn r2,#0x20 565.byte 0x6a,0x23,0xb0,0xf3 @ aesd q1,q13 566.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 567.byte 0x6a,0x43,0xf0,0xf3 @ aesd q10,q13 568.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 569 veor q5,q6,q7 570.byte 0x6c,0x23,0xb0,0xf3 @ aesd q1,q14 571.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 572.byte 0x6c,0x43,0xf0,0xf3 @ aesd q10,q14 573.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 574 veor q9,q3,q7 575.byte 0x6e,0x23,0xb0,0xf3 @ aesd q1,q15 576.byte 0x6e,0x43,0xf0,0xf3 @ aesd q10,q15 577 beq .Lcbc_dec_one 578 veor q5,q5,q1 579 veor q9,q9,q10 580 vorr q6,q11,q11 581 vst1.8 {q5},[r1]! 582 vst1.8 {q9},[r1]! 583 b .Lcbc_done 584 585.Lcbc_dec_one: 586 veor q5,q5,q10 587 vorr q6,q11,q11 588 vst1.8 {q5},[r1]! 589 590.Lcbc_done: 591 vst1.8 {q6},[r4] 592.Lcbc_abort: 593 vldmia sp!,{d8,d9,d10,d11,d12,d13,d14,d15} 594 ldmia sp!,{r4,r5,r6,r7,r8,pc} 595.size aes_hw_cbc_encrypt,.-aes_hw_cbc_encrypt 596.globl aes_hw_ctr32_encrypt_blocks 597.hidden aes_hw_ctr32_encrypt_blocks 598.type aes_hw_ctr32_encrypt_blocks,%function 599.align 5 600aes_hw_ctr32_encrypt_blocks: 601 mov ip,sp 602 stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,lr} 603 vstmdb sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ ABI specification says so 604 ldr r4, [ip] @ load remaining arg 605 ldr r5,[r3,#240] 606 607 ldr r8, [r4, #12] 608 vld1.32 {q0},[r4] 609 610 vld1.32 {q8,q9},[r3] @ load key schedule... 611 sub r5,r5,#4 612 mov r12,#16 613 cmp r2,#2 614 add r7,r3,r5,lsl#4 @ pointer to last 5 round keys 615 sub r5,r5,#2 616 vld1.32 {q12,q13},[r7]! 617 vld1.32 {q14,q15},[r7]! 618 vld1.32 {q7},[r7] 619 add r7,r3,#32 620 mov r6,r5 621 movlo r12,#0 622 623 @ ARM Cortex-A57 and Cortex-A72 cores running in 32-bit mode are 624 @ affected by silicon errata #1742098 [0] and #1655431 [1], 625 @ respectively, where the second instruction of an aese/aesmc 626 @ instruction pair may execute twice if an interrupt is taken right 627 @ after the first instruction consumes an input register of which a 628 @ single 32-bit lane has been updated the last time it was modified. 629 @ 630 @ This function uses a counter in one 32-bit lane. The 631 @ could write to q1 and q10 directly, but that trips this bugs. 632 @ We write to q6 and copy to the final register as a workaround. 633 @ 634 @ [0] ARM-EPM-049219 v23 Cortex-A57 MPCore Software Developers Errata Notice 635 @ [1] ARM-EPM-012079 v11.0 Cortex-A72 MPCore Software Developers Errata Notice 636#ifndef __ARMEB__ 637 rev r8, r8 638#endif 639 add r10, r8, #1 640 vorr q6,q0,q0 641 rev r10, r10 642 vmov.32 d13[1],r10 643 add r8, r8, #2 644 vorr q1,q6,q6 645 bls .Lctr32_tail 646 rev r12, r8 647 vmov.32 d13[1],r12 648 sub r2,r2,#3 @ bias 649 vorr q10,q6,q6 650 b .Loop3x_ctr32 651 652.align 4 653.Loop3x_ctr32: 654.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 655.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 656.byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8 657.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 658.byte 0x20,0x43,0xf0,0xf3 @ aese q10,q8 659.byte 0xa4,0x43,0xf0,0xf3 @ aesmc q10,q10 660 vld1.32 {q8},[r7]! 661 subs r6,r6,#2 662.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 663.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 664.byte 0x22,0x23,0xb0,0xf3 @ aese q1,q9 665.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 666.byte 0x22,0x43,0xf0,0xf3 @ aese q10,q9 667.byte 0xa4,0x43,0xf0,0xf3 @ aesmc q10,q10 668 vld1.32 {q9},[r7]! 669 bgt .Loop3x_ctr32 670 671.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 672.byte 0x80,0x83,0xb0,0xf3 @ aesmc q4,q0 673.byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8 674.byte 0x82,0xa3,0xb0,0xf3 @ aesmc q5,q1 675 vld1.8 {q2},[r0]! 676 add r9,r8,#1 677.byte 0x20,0x43,0xf0,0xf3 @ aese q10,q8 678.byte 0xa4,0x43,0xf0,0xf3 @ aesmc q10,q10 679 vld1.8 {q3},[r0]! 680 rev r9,r9 681.byte 0x22,0x83,0xb0,0xf3 @ aese q4,q9 682.byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4 683.byte 0x22,0xa3,0xb0,0xf3 @ aese q5,q9 684.byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5 685 vld1.8 {q11},[r0]! 686 mov r7,r3 687.byte 0x22,0x43,0xf0,0xf3 @ aese q10,q9 688.byte 0xa4,0x23,0xf0,0xf3 @ aesmc q9,q10 689.byte 0x28,0x83,0xb0,0xf3 @ aese q4,q12 690.byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4 691.byte 0x28,0xa3,0xb0,0xf3 @ aese q5,q12 692.byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5 693 veor q2,q2,q7 694 add r10,r8,#2 695.byte 0x28,0x23,0xf0,0xf3 @ aese q9,q12 696.byte 0xa2,0x23,0xf0,0xf3 @ aesmc q9,q9 697 veor q3,q3,q7 698 add r8,r8,#3 699.byte 0x2a,0x83,0xb0,0xf3 @ aese q4,q13 700.byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4 701.byte 0x2a,0xa3,0xb0,0xf3 @ aese q5,q13 702.byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5 703 @ Note the logic to update q0, q1, and q1 is written to work 704 @ around a bug in ARM Cortex-A57 and Cortex-A72 cores running in 705 @ 32-bit mode. See the comment above. 706 veor q11,q11,q7 707 vmov.32 d13[1], r9 708.byte 0x2a,0x23,0xf0,0xf3 @ aese q9,q13 709.byte 0xa2,0x23,0xf0,0xf3 @ aesmc q9,q9 710 vorr q0,q6,q6 711 rev r10,r10 712.byte 0x2c,0x83,0xb0,0xf3 @ aese q4,q14 713.byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4 714 vmov.32 d13[1], r10 715 rev r12,r8 716.byte 0x2c,0xa3,0xb0,0xf3 @ aese q5,q14 717.byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5 718 vorr q1,q6,q6 719 vmov.32 d13[1], r12 720.byte 0x2c,0x23,0xf0,0xf3 @ aese q9,q14 721.byte 0xa2,0x23,0xf0,0xf3 @ aesmc q9,q9 722 vorr q10,q6,q6 723 subs r2,r2,#3 724.byte 0x2e,0x83,0xb0,0xf3 @ aese q4,q15 725.byte 0x2e,0xa3,0xb0,0xf3 @ aese q5,q15 726.byte 0x2e,0x23,0xf0,0xf3 @ aese q9,q15 727 728 veor q2,q2,q4 729 vld1.32 {q8},[r7]! @ re-pre-load rndkey[0] 730 vst1.8 {q2},[r1]! 731 veor q3,q3,q5 732 mov r6,r5 733 vst1.8 {q3},[r1]! 734 veor q11,q11,q9 735 vld1.32 {q9},[r7]! @ re-pre-load rndkey[1] 736 vst1.8 {q11},[r1]! 737 bhs .Loop3x_ctr32 738 739 adds r2,r2,#3 740 beq .Lctr32_done 741 cmp r2,#1 742 mov r12,#16 743 moveq r12,#0 744 745.Lctr32_tail: 746.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 747.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 748.byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8 749.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 750 vld1.32 {q8},[r7]! 751 subs r6,r6,#2 752.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 753.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 754.byte 0x22,0x23,0xb0,0xf3 @ aese q1,q9 755.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 756 vld1.32 {q9},[r7]! 757 bgt .Lctr32_tail 758 759.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 760.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 761.byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8 762.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 763.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 764.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 765.byte 0x22,0x23,0xb0,0xf3 @ aese q1,q9 766.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 767 vld1.8 {q2},[r0],r12 768.byte 0x28,0x03,0xb0,0xf3 @ aese q0,q12 769.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 770.byte 0x28,0x23,0xb0,0xf3 @ aese q1,q12 771.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 772 vld1.8 {q3},[r0] 773.byte 0x2a,0x03,0xb0,0xf3 @ aese q0,q13 774.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 775.byte 0x2a,0x23,0xb0,0xf3 @ aese q1,q13 776.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 777 veor q2,q2,q7 778.byte 0x2c,0x03,0xb0,0xf3 @ aese q0,q14 779.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 780.byte 0x2c,0x23,0xb0,0xf3 @ aese q1,q14 781.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 782 veor q3,q3,q7 783.byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15 784.byte 0x2e,0x23,0xb0,0xf3 @ aese q1,q15 785 786 cmp r2,#1 787 veor q2,q2,q0 788 veor q3,q3,q1 789 vst1.8 {q2},[r1]! 790 beq .Lctr32_done 791 vst1.8 {q3},[r1] 792 793.Lctr32_done: 794 vldmia sp!,{d8,d9,d10,d11,d12,d13,d14,d15} 795 ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,pc} 796.size aes_hw_ctr32_encrypt_blocks,.-aes_hw_ctr32_encrypt_blocks 797#endif 798#endif 799#endif // !OPENSSL_NO_ASM 800.section .note.GNU-stack,"",%progbits 801