1// This file is generated from a similarly-named Perl script in the BoringSSL 2// source tree. Do not edit by hand. 3 4#if !defined(__has_feature) 5#define __has_feature(x) 0 6#endif 7#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM) 8#define OPENSSL_NO_ASM 9#endif 10 11#if !defined(OPENSSL_NO_ASM) 12#include "ring_core_generated/prefix_symbols_asm.h" 13#include <ring-core/arm_arch.h> 14 15#if __ARM_MAX_ARCH__>=7 16.text 17 18.section __TEXT,__const 19.align 5 20Lrcon: 21.long 0x01,0x01,0x01,0x01 22.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d // rotate-n-splat 23.long 0x1b,0x1b,0x1b,0x1b 24 25.text 26 27.globl _aes_hw_set_encrypt_key 28.private_extern _aes_hw_set_encrypt_key 29 30.align 5 31_aes_hw_set_encrypt_key: 32Lenc_key: 33 // Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later. 34 AARCH64_VALID_CALL_TARGET 35 stp x29,x30,[sp,#-16]! 36 add x29,sp,#0 37 mov x3,#-1 38 cmp x0,#0 39 b.eq Lenc_key_abort 40 cmp x2,#0 41 b.eq Lenc_key_abort 42 mov x3,#-2 43 cmp w1,#128 44 b.lt Lenc_key_abort 45 cmp w1,#256 46 b.gt Lenc_key_abort 47 tst w1,#0x3f 48 b.ne Lenc_key_abort 49 50 adrp x3,Lrcon@PAGE 51 add x3,x3,Lrcon@PAGEOFF 52 cmp w1,#192 53 54 eor v0.16b,v0.16b,v0.16b 55 ld1 {v3.16b},[x0],#16 56 mov w1,#8 // reuse w1 57 ld1 {v1.4s,v2.4s},[x3],#32 58 59 b.lt Loop128 60 // 192-bit key support was removed. 61 b L256 62 63.align 4 64Loop128: 65 tbl v6.16b,{v3.16b},v2.16b 66 ext v5.16b,v0.16b,v3.16b,#12 67 st1 {v3.4s},[x2],#16 68 aese v6.16b,v0.16b 69 subs w1,w1,#1 70 71 eor v3.16b,v3.16b,v5.16b 72 ext v5.16b,v0.16b,v5.16b,#12 73 eor v3.16b,v3.16b,v5.16b 74 ext v5.16b,v0.16b,v5.16b,#12 75 eor v6.16b,v6.16b,v1.16b 76 eor v3.16b,v3.16b,v5.16b 77 shl v1.16b,v1.16b,#1 78 eor v3.16b,v3.16b,v6.16b 79 b.ne Loop128 80 81 ld1 {v1.4s},[x3] 82 83 tbl v6.16b,{v3.16b},v2.16b 84 ext v5.16b,v0.16b,v3.16b,#12 85 st1 {v3.4s},[x2],#16 86 aese v6.16b,v0.16b 87 88 eor v3.16b,v3.16b,v5.16b 89 ext v5.16b,v0.16b,v5.16b,#12 90 eor v3.16b,v3.16b,v5.16b 91 ext v5.16b,v0.16b,v5.16b,#12 92 eor v6.16b,v6.16b,v1.16b 93 eor v3.16b,v3.16b,v5.16b 94 shl v1.16b,v1.16b,#1 95 eor v3.16b,v3.16b,v6.16b 96 97 tbl v6.16b,{v3.16b},v2.16b 98 ext v5.16b,v0.16b,v3.16b,#12 99 st1 {v3.4s},[x2],#16 100 aese v6.16b,v0.16b 101 102 eor v3.16b,v3.16b,v5.16b 103 ext v5.16b,v0.16b,v5.16b,#12 104 eor v3.16b,v3.16b,v5.16b 105 ext v5.16b,v0.16b,v5.16b,#12 106 eor v6.16b,v6.16b,v1.16b 107 eor v3.16b,v3.16b,v5.16b 108 eor v3.16b,v3.16b,v6.16b 109 st1 {v3.4s},[x2] 110 add x2,x2,#0x50 111 112 mov w12,#10 113 b Ldone 114 115// 192-bit key support was removed. 116 117.align 4 118L256: 119 ld1 {v4.16b},[x0] 120 mov w1,#7 121 mov w12,#14 122 st1 {v3.4s},[x2],#16 123 124Loop256: 125 tbl v6.16b,{v4.16b},v2.16b 126 ext v5.16b,v0.16b,v3.16b,#12 127 st1 {v4.4s},[x2],#16 128 aese v6.16b,v0.16b 129 subs w1,w1,#1 130 131 eor v3.16b,v3.16b,v5.16b 132 ext v5.16b,v0.16b,v5.16b,#12 133 eor v3.16b,v3.16b,v5.16b 134 ext v5.16b,v0.16b,v5.16b,#12 135 eor v6.16b,v6.16b,v1.16b 136 eor v3.16b,v3.16b,v5.16b 137 shl v1.16b,v1.16b,#1 138 eor v3.16b,v3.16b,v6.16b 139 st1 {v3.4s},[x2],#16 140 b.eq Ldone 141 142 dup v6.4s,v3.s[3] // just splat 143 ext v5.16b,v0.16b,v4.16b,#12 144 aese v6.16b,v0.16b 145 146 eor v4.16b,v4.16b,v5.16b 147 ext v5.16b,v0.16b,v5.16b,#12 148 eor v4.16b,v4.16b,v5.16b 149 ext v5.16b,v0.16b,v5.16b,#12 150 eor v4.16b,v4.16b,v5.16b 151 152 eor v4.16b,v4.16b,v6.16b 153 b Loop256 154 155Ldone: 156 str w12,[x2] 157 mov x3,#0 158 159Lenc_key_abort: 160 mov x0,x3 // return value 161 ldr x29,[sp],#16 162 ret 163 164.globl _aes_hw_encrypt 165.private_extern _aes_hw_encrypt 166 167.align 5 168_aes_hw_encrypt: 169 AARCH64_VALID_CALL_TARGET 170 ldr w3,[x2,#240] 171 ld1 {v0.4s},[x2],#16 172 ld1 {v2.16b},[x0] 173 sub w3,w3,#2 174 ld1 {v1.4s},[x2],#16 175 176Loop_enc: 177 aese v2.16b,v0.16b 178 aesmc v2.16b,v2.16b 179 ld1 {v0.4s},[x2],#16 180 subs w3,w3,#2 181 aese v2.16b,v1.16b 182 aesmc v2.16b,v2.16b 183 ld1 {v1.4s},[x2],#16 184 b.gt Loop_enc 185 186 aese v2.16b,v0.16b 187 aesmc v2.16b,v2.16b 188 ld1 {v0.4s},[x2] 189 aese v2.16b,v1.16b 190 eor v2.16b,v2.16b,v0.16b 191 192 st1 {v2.16b},[x1] 193 ret 194 195.globl _aes_hw_ctr32_encrypt_blocks 196.private_extern _aes_hw_ctr32_encrypt_blocks 197 198.align 5 199_aes_hw_ctr32_encrypt_blocks: 200 // Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later. 201 AARCH64_VALID_CALL_TARGET 202 stp x29,x30,[sp,#-16]! 203 add x29,sp,#0 204 ldr w5,[x3,#240] 205 206 ldr w8, [x4, #12] 207 ld1 {v0.4s},[x4] 208 209 ld1 {v16.4s,v17.4s},[x3] // load key schedule... 210 sub w5,w5,#4 211 mov x12,#16 212 cmp x2,#2 213 add x7,x3,x5,lsl#4 // pointer to last 5 round keys 214 sub w5,w5,#2 215 ld1 {v20.4s,v21.4s},[x7],#32 216 ld1 {v22.4s,v23.4s},[x7],#32 217 ld1 {v7.4s},[x7] 218 add x7,x3,#32 219 mov w6,w5 220 csel x12,xzr,x12,lo 221 222 // ARM Cortex-A57 and Cortex-A72 cores running in 32-bit mode are 223 // affected by silicon errata #1742098 [0] and #1655431 [1], 224 // respectively, where the second instruction of an aese/aesmc 225 // instruction pair may execute twice if an interrupt is taken right 226 // after the first instruction consumes an input register of which a 227 // single 32-bit lane has been updated the last time it was modified. 228 // 229 // This function uses a counter in one 32-bit lane. The vmov lines 230 // could write to v1.16b and v18.16b directly, but that trips this bugs. 231 // We write to v6.16b and copy to the final register as a workaround. 232 // 233 // [0] ARM-EPM-049219 v23 Cortex-A57 MPCore Software Developers Errata Notice 234 // [1] ARM-EPM-012079 v11.0 Cortex-A72 MPCore Software Developers Errata Notice 235#ifndef __ARMEB__ 236 rev w8, w8 237#endif 238 add w10, w8, #1 239 orr v6.16b,v0.16b,v0.16b 240 rev w10, w10 241 mov v6.s[3],w10 242 add w8, w8, #2 243 orr v1.16b,v6.16b,v6.16b 244 b.ls Lctr32_tail 245 rev w12, w8 246 mov v6.s[3],w12 247 sub x2,x2,#3 // bias 248 orr v18.16b,v6.16b,v6.16b 249 b Loop3x_ctr32 250 251.align 4 252Loop3x_ctr32: 253 aese v0.16b,v16.16b 254 aesmc v0.16b,v0.16b 255 aese v1.16b,v16.16b 256 aesmc v1.16b,v1.16b 257 aese v18.16b,v16.16b 258 aesmc v18.16b,v18.16b 259 ld1 {v16.4s},[x7],#16 260 subs w6,w6,#2 261 aese v0.16b,v17.16b 262 aesmc v0.16b,v0.16b 263 aese v1.16b,v17.16b 264 aesmc v1.16b,v1.16b 265 aese v18.16b,v17.16b 266 aesmc v18.16b,v18.16b 267 ld1 {v17.4s},[x7],#16 268 b.gt Loop3x_ctr32 269 270 aese v0.16b,v16.16b 271 aesmc v4.16b,v0.16b 272 aese v1.16b,v16.16b 273 aesmc v5.16b,v1.16b 274 ld1 {v2.16b},[x0],#16 275 add w9,w8,#1 276 aese v18.16b,v16.16b 277 aesmc v18.16b,v18.16b 278 ld1 {v3.16b},[x0],#16 279 rev w9,w9 280 aese v4.16b,v17.16b 281 aesmc v4.16b,v4.16b 282 aese v5.16b,v17.16b 283 aesmc v5.16b,v5.16b 284 ld1 {v19.16b},[x0],#16 285 mov x7,x3 286 aese v18.16b,v17.16b 287 aesmc v17.16b,v18.16b 288 aese v4.16b,v20.16b 289 aesmc v4.16b,v4.16b 290 aese v5.16b,v20.16b 291 aesmc v5.16b,v5.16b 292 eor v2.16b,v2.16b,v7.16b 293 add w10,w8,#2 294 aese v17.16b,v20.16b 295 aesmc v17.16b,v17.16b 296 eor v3.16b,v3.16b,v7.16b 297 add w8,w8,#3 298 aese v4.16b,v21.16b 299 aesmc v4.16b,v4.16b 300 aese v5.16b,v21.16b 301 aesmc v5.16b,v5.16b 302 // Note the logic to update v0.16b, v1.16b, and v1.16b is written to work 303 // around a bug in ARM Cortex-A57 and Cortex-A72 cores running in 304 // 32-bit mode. See the comment above. 305 eor v19.16b,v19.16b,v7.16b 306 mov v6.s[3], w9 307 aese v17.16b,v21.16b 308 aesmc v17.16b,v17.16b 309 orr v0.16b,v6.16b,v6.16b 310 rev w10,w10 311 aese v4.16b,v22.16b 312 aesmc v4.16b,v4.16b 313 mov v6.s[3], w10 314 rev w12,w8 315 aese v5.16b,v22.16b 316 aesmc v5.16b,v5.16b 317 orr v1.16b,v6.16b,v6.16b 318 mov v6.s[3], w12 319 aese v17.16b,v22.16b 320 aesmc v17.16b,v17.16b 321 orr v18.16b,v6.16b,v6.16b 322 subs x2,x2,#3 323 aese v4.16b,v23.16b 324 aese v5.16b,v23.16b 325 aese v17.16b,v23.16b 326 327 eor v2.16b,v2.16b,v4.16b 328 ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0] 329 st1 {v2.16b},[x1],#16 330 eor v3.16b,v3.16b,v5.16b 331 mov w6,w5 332 st1 {v3.16b},[x1],#16 333 eor v19.16b,v19.16b,v17.16b 334 ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1] 335 st1 {v19.16b},[x1],#16 336 b.hs Loop3x_ctr32 337 338 adds x2,x2,#3 339 b.eq Lctr32_done 340 cmp x2,#1 341 mov x12,#16 342 csel x12,xzr,x12,eq 343 344Lctr32_tail: 345 aese v0.16b,v16.16b 346 aesmc v0.16b,v0.16b 347 aese v1.16b,v16.16b 348 aesmc v1.16b,v1.16b 349 ld1 {v16.4s},[x7],#16 350 subs w6,w6,#2 351 aese v0.16b,v17.16b 352 aesmc v0.16b,v0.16b 353 aese v1.16b,v17.16b 354 aesmc v1.16b,v1.16b 355 ld1 {v17.4s},[x7],#16 356 b.gt Lctr32_tail 357 358 aese v0.16b,v16.16b 359 aesmc v0.16b,v0.16b 360 aese v1.16b,v16.16b 361 aesmc v1.16b,v1.16b 362 aese v0.16b,v17.16b 363 aesmc v0.16b,v0.16b 364 aese v1.16b,v17.16b 365 aesmc v1.16b,v1.16b 366 ld1 {v2.16b},[x0],x12 367 aese v0.16b,v20.16b 368 aesmc v0.16b,v0.16b 369 aese v1.16b,v20.16b 370 aesmc v1.16b,v1.16b 371 ld1 {v3.16b},[x0] 372 aese v0.16b,v21.16b 373 aesmc v0.16b,v0.16b 374 aese v1.16b,v21.16b 375 aesmc v1.16b,v1.16b 376 eor v2.16b,v2.16b,v7.16b 377 aese v0.16b,v22.16b 378 aesmc v0.16b,v0.16b 379 aese v1.16b,v22.16b 380 aesmc v1.16b,v1.16b 381 eor v3.16b,v3.16b,v7.16b 382 aese v0.16b,v23.16b 383 aese v1.16b,v23.16b 384 385 cmp x2,#1 386 eor v2.16b,v2.16b,v0.16b 387 eor v3.16b,v3.16b,v1.16b 388 st1 {v2.16b},[x1],#16 389 b.eq Lctr32_done 390 st1 {v3.16b},[x1] 391 392Lctr32_done: 393 ldr x29,[sp],#16 394 ret 395 396#endif 397#endif // !OPENSSL_NO_ASM 398