1/* 2 * This file is part of the openHiTLS project. 3 * 4 * openHiTLS is licensed under the Mulan PSL v2. 5 * You can use this software according to the terms and conditions of the Mulan PSL v2. 6 * You may obtain a copy of Mulan PSL v2 at: 7 * 8 * http://license.coscl.org.cn/MulanPSL2 9 * 10 * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 11 * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 12 * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 13 * See the Mulan PSL v2 for more details. 14 */ 15 16#include "hitls_build.h" 17#ifdef HITLS_CRYPTO_AES 18 19#include "crypt_arm.h" 20#include "crypt_aes_macro_armv8.s" 21.file "crypt_aes_armv8.S" 22.text 23.arch armv8-a+crypto 24 25KEY .req x0 26IN .req x1 27OUT .req x2 28 29ROUNDS .req w6 30 31RDK0 .req v17 32RDK1 .req v18 33 34.section .rodata 35.align 5 36.g_cron: 37.long 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36 38.align 5 39 40/* 41 * In Return-oriented programming (ROP) and Jump-oriented programming (JOP), we explored features 42 * that Arm introduced to the Arm architecture to mitigate against JOP-style and ROP-style attacks. 43 * ... 44 * Whether the combined or NOP-compatible instructions are set depends on the architecture 45 * version that the code is built for. When building for Armv8.3-A, or later, the compiler will use 46 * the combined operations. When building for Armv8.2-A, or earlier, it will use the NOP compatible 47 * instructions. 48 * 49 * The paciasp and autiasp instructions are used for function pointer authentication. 50 * The pointer authentication feature is added in armv8.3 and is supported only by AArch64. 51 * The addition of pointer authentication features is described in Section A2.6.1 of 52 * DDI0487H_a_a-profile_architecture_reference_manual.pdf. 53 */ 54 55/* 56 * int32_t CRYPT_AES_Encrypt(const CRYPT_AES_Key *ctx, 57 * const uint8_t *in, 58 * uint8_t *out, 59 * uint32_t len); 60 */ 61.text 62.globl CRYPT_AES_Encrypt 63.type CRYPT_AES_Encrypt, %function 64.align 5 65CRYPT_AES_Encrypt: 66.ecb_aesenc_start: 67AARCH64_PACIASP 68 stp x29, x30, [sp, #-16]! 69 add x29, sp, #0 70 71 ld1 {BLK0.16b}, [IN] 72 AES_ENC_1_BLK KEY BLK0.16b RDK0.4s RDK1.4s RDK0.16b RDK1.16b ROUNDS 73 st1 {BLK0.16b}, [OUT] 74 75 eor x0, x0, x0 76 eor RDK0.16b, RDK0.16b, RDK0.16b 77 eor RDK1.16b, RDK1.16b, RDK1.16b 78 ldp x29, x30, [sp], #16 79AARCH64_AUTIASP 80 ret 81.size CRYPT_AES_Encrypt, .-CRYPT_AES_Encrypt 82 83/* 84 * int32_t CRYPT_AES_Decrypt(const CRYPT_AES_Key *ctx, 85 * const uint8_t *in, 86 * uint8_t *out, 87 * uint32_t len); 88 */ 89.globl CRYPT_AES_Decrypt 90.type CRYPT_AES_Decrypt, %function 91.align 5 92CRYPT_AES_Decrypt: 93.ecb_aesdec_start: 94AARCH64_PACIASP 95 stp x29, x30, [sp, #-16]! 96 add x29, sp, #0 97 98 ld1 {BLK0.16b}, [IN] 99 AES_DEC_1_BLK KEY BLK0.16b RDK0.4s RDK1.4s RDK0.16b RDK1.16b ROUNDS 100 st1 {BLK0.16b}, [OUT] 101 102 eor x0, x0, x0 103 eor RDK0.16b, RDK0.16b, RDK0.16b 104 eor RDK1.16b, RDK1.16b, RDK1.16b 105 ldp x29, x30, [sp], #16 106AARCH64_AUTIASP 107 ret 108.size CRYPT_AES_Decrypt, .-CRYPT_AES_Decrypt 109 110/* 111 * void SetEncryptKey128(CRYPT_AES_Key *ctx, const uint8_t *key); 112 * Generating extended keys. 113 * x0 => CRYPT_AES_Key *ctx; x1 => const uint8_t *key 114 */ 115.globl SetEncryptKey128 116.type SetEncryptKey128, %function 117.align 5 118SetEncryptKey128: 119.Lenc_key_128: 120AARCH64_PACIASP 121 stp x29, x30, [sp, #-64]! 122 add x29, sp, #0 123 stp x25, x26, [sp, #16] 124 stp x23, x24, [sp, #32] 125 stp x21, x22, [sp, #48] // Register push stack completed. 126 127 adrp x23, .g_cron 128 add x23, x23, :lo12:.g_cron // Round key start address. 129 mov x24, x0 // Copy key string address. The address increases by 16 bytes. 130 ld1 {v1.16b}, [x1] // Reads the 16-byte key of a user. 131 mov w26, #10 // Number of encryption rounds, which is filled 132 // with rounds in the structure. 133 st1 {v1.4s}, [x0], #16 // Save the first key. 134 eor v0.16b, v0.16b, v0.16b // Clear zeros in V0. 135 mov w25, #10 // loop for 10 times. 136.Lenc_key_128_loop: 137 ldr w21, [x23], #4 // Obtains the round constant. 138 dup v1.4s, v1.s[3] // Repeated four times,The last word of v1 is changed to v1 (128 bits). 139 ld1 {v2.4s}, [x24], #16 // Obtains the 4 words used for XOR. 140 ext v1.16b, v1.16b, v1.16b, #1 // Byte loop. 141 dup v3.4s, w21 // Repeat four times to change w21 to v3 (128 bits). 142 aese v1.16b, v0.16b // Xor then shift then sbox (XOR operation with 0 is itself, 143 // equivalent to omitting the XOR operation). 144 subs w25, w25, #1 // Count of 10-round key extension. 145 eor v1.16b, v1.16b, v3.16b // Round constant XOR. 146 eor v1.16b, v1.16b, v2.16b // 4 XOR operation (1). 147 ext v2.16b, v0.16b, v2.16b, #12 // 4321->3210. 148 eor v1.16b, v1.16b, v2.16b // 4 XOR operation (2). 149 ext v2.16b, v0.16b, v2.16b, #12 // 3210->2100. 150 eor v1.16b, v1.16b, v2.16b // 4 XOR operation (3). 151 ext v2.16b, v0.16b, v2.16b, #12 // 2100->1000. 152 eor v1.16b, v1.16b, v2.16b // 4 XOR operation (4). 153 st1 {v1.4s}, [x0], #16 // Stores the newly calculated 4-bytes key data into the key string. 154 b.ne .Lenc_key_128_loop // Loop jump. 155 str w26, [x0, #64] // Fill in the number of rounds. 156 eor x24, x24, x24 // Clear sensitivity. 157 eor x0, x0, x0 158 ldp x21, x22, [sp, #48] 159 ldp x23, x24, [sp, #32] 160 ldp x25, x26, [sp, #16] 161 ldp x29, x30, [sp], #64 // Pop stack completed. 162AARCH64_AUTIASP 163 ret 164.size SetEncryptKey128, .-SetEncryptKey128 165 166 167/* 168 * void SetDecryptKey128(CRYPT_AES_Key *ctx, const uint8_t *key); 169 * Set a decryption key string. 170 * x0 => CRYPT_AES_Key *ctx; x1 => const uint8_t *key 171 */ 172.globl SetDecryptKey128 173.type SetDecryptKey128, %function 174.align 5 175SetDecryptKey128: 176AARCH64_PACIASP 177 stp x29, x30, [sp, #-32]! 178 add x29, sp, #0 179 stp x25, x28, [sp, #16] // Register push stack completed. 180 181 mov x28, x0 182 bl .Lenc_key_128 183 ld1 {v0.4s}, [x28], #16 184 SETDECKEY_LDR_9_BLOCK x28 185 ld1 {v10.4s}, [x28] 186 mov x25, #-16 187 SETDECKEY_INVMIX_9_BLOCK 188 st1 {v0.4s}, [x28], x25 189 SETDECKEY_STR_9_BLOCK x28, x25 190 st1 {v10.4s}, [x28] 191 eor x28, x28, x28 192 eor x0, x0, x0 193 ldp x25, x28, [sp, #16] 194 ldp x29, x30, [sp], #32 // Stacking completed. 195AARCH64_AUTIASP 196 ret 197.size SetDecryptKey128, .-SetDecryptKey128 198 199 200/* 201 * void SetEncryptKey192(CRYPT_AES_Key *ctx, const uint8_t *key); 202 * Generating extended keys. 203 * x0 => CRYPT_AES_Key *ctx; x1 => const uint8_t *key 204 */ 205.globl SetEncryptKey192 206.type SetEncryptKey192, %function 207.align 5 208SetEncryptKey192: 209.Lenc_key_192: 210AARCH64_PACIASP 211 stp x29, x30, [sp, #-64]! 212 add x29, sp, #0 213 stp x25, x26, [sp, #16] 214 stp x23, x24, [sp, #32] 215 stp x21, x22, [sp, #48] // Register push stack completed. 216 217 mov x24, x0 // Copy key string address. The address increases by 16 bytes. 218 ld1 {v0.16b}, [x1], #16 // Obtain the first 128-bit key. 219 mov w26, #12 // Number of encryption rounds. 220 st1 {v0.4s}, [x0], #16 // Store the first 128-bit key. 221 ld1 {v1.8b}, [x1] // Obtains the last 64-bit key. 222 adrp x23, .g_cron 223 add x23, x23, :lo12:.g_cron // Round key start address. 224 st1 {v1.2s}, [x0], #8 // Store the last 64-bit key. 225 eor v0.16b, v0.16b, v0.16b // Clear zeros in V0. 226 mov w25, #8 // loop for 8 times. 227.Lenc_key_192_loop: 228 dup v1.4s, v1.s[1] // Repeated four times,The last word of v1 is changed to v1 (128 bits). 229 subs w25, w25, #1 // Count of 8-round key extensions. 230 ext v1.16b, v1.16b, v1.16b, #1 // Byte cycle. 231 ldr w22, [x23], #4 // Obtains the round constant. 232 aese v1.16b, v0.16b // Shift and sbox (XOR operation with 0 is itself,equivalent to omitting the XOR operation). 233 dup v2.4s, w22 // Repeat 4 times. W22 becomes v2(128bit). 234 eor v1.16b, v1.16b, v2.16b // Round constant XOR. 235 ld1 {v2.4s}, [x24], #16 // Obtains the 4 words used for XOR 236 eor v1.16b, v1.16b, v2.16b // 4 XOR operation (1). 237 ext v2.16b, v0.16b, v2.16b, #12 // 4321->3210. 238 eor v1.16b, v1.16b, v2.16b // 4 XOR operation (2). 239 ext v2.16b, v0.16b, v2.16b, #12 // 3210->2100. 240 eor v1.16b, v1.16b, v2.16b // 4 XOR operation (3). 241 ext v2.16b, v0.16b, v2.16b, #12 // 2100->1000. 242 eor v1.16b, v1.16b, v2.16b // 4 XOR operation (4). 243 st1 {v1.4s}, [x0], #16 // Stores the newly calculated 4-word key data into the key string. 244 ld1 {v2.2s}, [x24], #8 // Loads 6 words for the last 2 words of XOR. 245 dup v1.2s, v1.s[3] // Repeated two times,The last word of v1 is changed to v1 (64bit). 246 eor v1.8b, v1.8b, v2.8b // 2 XOR operation (1). 247 ext v2.8b, v0.8b, v2.8b, #4 // 21->10. 248 eor v1.8b, v1.8b, v2.8b // 2 XOR operation (2). 249 st1 {v1.2s}, [x0], #8 // Stores the newly calculated 2-word key data into the key string. 250 b.ne .Lenc_key_192_loop // Loop jump. 251 str w26, [x0, #24] // Fill in the number of rounds. 252 eor x24, x24, x24 // Clear sensitivity. 253 eor x0, x0, x0 254 ldp x21, x22, [sp, #48] 255 ldp x23, x24, [sp, #32] 256 ldp x25, x26, [sp, #16] 257 ldp x29, x30, [sp], #64 // Stacking completed. 258AARCH64_AUTIASP 259 ret 260.size SetEncryptKey192, .-SetEncryptKey192 261 262 263/* 264 * void SetDecryptKey192(CRYPT_AES_Key *ctx, const uint8_t *key); 265 * Set a decryption key string. 266 * x0 => CRYPT_AES_Key *ctx; x1 => const uint8_t *key 267 */ 268.globl SetDecryptKey192 269.type SetDecryptKey192, %function 270.align 5 271SetDecryptKey192: 272AARCH64_PACIASP 273 stp x29, x30, [sp, #-32]! 274 add x29, sp, #0 275 stp x25, x28, [sp, #16] // Register is stacked. 276 277 mov x28, x0 278 bl .Lenc_key_192 279 mov x25, #-16 280 ld1 {v0.4s}, [x28], #16 281 SETDECKEY_LDR_9_BLOCK x28 282 ld1 {v10.4s}, [x28], #16 283 ld1 {v11.4s}, [x28], #16 284 ld1 {v12.4s}, [x28] 285 SETDECKEY_INVMIX_9_BLOCK 286 aesimc v10.16b, v10.16b 287 aesimc v11.16b, v11.16b 288 st1 {v0.4s}, [x28], x25 289 SETDECKEY_STR_9_BLOCK x28, x25 290 st1 {v10.4s}, [x28], x25 291 st1 {v11.4s}, [x28], x25 292 st1 {v12.4s}, [x28] 293 eor x28, x28, x28 294 eor x0, x0, x0 295 ldp x25, x28, [sp, #16] 296 ldp x29, x30, [sp], #32 // Stacking completed. 297AARCH64_AUTIASP 298 ret 299.size SetDecryptKey192, .-SetDecryptKey192 300 301/* 302 * void SetEncryptKey256(CRYPT_AES_Key *ctx, const uint8_t *key); 303 * Generating extended keys. 304 * x0 => CRYPT_AES_Key *ctx; x1 => const uint8_t *key 305 */ 306.globl SetEncryptKey256 307.type SetEncryptKey256, %function 308.align 5 309SetEncryptKey256: 310.Lenc_key_256: 311AARCH64_PACIASP 312 stp x29, x30, [sp, #-64]! 313 add x29, sp, #0 314 stp x25, x26, [sp, #16] 315 stp x23, x24, [sp, #32] 316 stp x21, x22, [sp, #48] // Register is stacked. 317 318 adrp x23, .g_cron 319 add x23, x23, :lo12:.g_cron // Round key start address. 320 ld1 {v0.16b}, [x1], #16 // Obtain the first 128-bit key. 321 mov x24, x0 // Copy key string address. The address increases by 16 bytes. 322 st1 {v0.4s}, [x0], #16 // Store the first 128-bit key. 323 ld1 {v1.16b}, [x1] // Obtain the last 128-bit key. 324 eor v0.16b, v0.16b, v0.16b // Clear zeros in V0. 325 st1 {v1.4s}, [x0], #16 // Store the last 128-bit key. 326 mov w26, #14 // Number of encryption rounds. 327 mov w25, #6 // Loop for 7-1 times. 328.Lenc_key_256_loop: 329 dup v1.4s, v1.s[3] // Repeated four times,The last word of v1 is changed to v1 (128 bits). 330 ldr w22, [x23], #4 // Obtains the round constant. 331 ext v1.16b, v1.16b, v1.16b, #1 // Byte cycle. 332 aese v1.16b, v0.16b // XOR then shift then sbox (XOR operation with 0 is itself, 333 // equivalent to omitting the XOR operation). 334 dup v2.4s, w22 // Repeat 4 times. w22 becomes v2. 335 eor v1.16b, v1.16b, v2.16b // Round constant XOR. 336 ld1 {v2.4s}, [x24], #16 // Obtains the 4 words used for XOR. 337 eor v1.16b, v1.16b, v2.16b // 4 XOR operation (1). 338 ext v2.16b, v0.16b, v2.16b, #12 // 4321->3210. 339 eor v1.16b, v1.16b, v2.16b // 4 XOR operation (2). 340 ext v2.16b, v0.16b, v2.16b, #12 // 3210->2100. 341 eor v1.16b, v1.16b, v2.16b // 4 XOR operation (3). 342 ext v2.16b, v0.16b, v2.16b, #12 // 2100->1000. 343 eor v1.16b, v1.16b, v2.16b // 4 XOR operation (4). 344 st1 {v1.4s}, [x0], #16 // Stores the newly calculated 4-word key data into the key string. 345 subs w25, w25, #1 // Count of 7-1-round key extensions. 346 dup v1.4s, v1.s[3] // Repeated four times,The last word of v1 is changed to v1 (128 bits). 347 ld1 {v2.4s}, [x24], #16 // Obtains the 4 words used for XOR. 348 aese v1.16b, v0.16b // XOR then shift then sbox. 349 eor v1.16b, v1.16b, v2.16b // 4 XOR operation (1). 350 ext v2.16b, v0.16b, v2.16b, #12 // 4321->3210. 351 eor v1.16b, v1.16b, v2.16b // 4 XOR operation (2). 352 ext v2.16b, v0.16b, v2.16b, #12 // 3210->2100. 353 eor v1.16b, v1.16b, v2.16b // 4 XOR operation (3). 354 ext v2.16b, v0.16b, v2.16b, #12 // 2100->1000. 355 eor v1.16b, v1.16b, v2.16b // 4 XOR operation (4). 356 st1 {v1.4s}, [x0], #16 // Stores the newly calculated 4-word key data into the key string. 357 b.ne .Lenc_key_256_loop // Loop jump. 358 359 dup v1.4s, v1.s[3] // Repeated four times,The last word of v1 is changed to v1 (128 bits). 360 ldr w22, [x23], #4 // Obtains the round constant. 361 ext v1.16b, v1.16b, v1.16b, #1 // Byte cycle. 362 aese v1.16b, v0.16b // XOR then shift then sbox. 363 dup v2.4s, w22 // Repeat 4 times. w22 becomes v2(128bit). 364 eor v1.16b, v1.16b, v2.16b // Round constant XOR. 365 ld1 {v2.4s}, [x24], #16 // Obtains the 4 words used for XOR. 366 eor v1.16b, v1.16b, v2.16b // 4 XOR operation (1). 367 ext v2.16b, v0.16b, v2.16b, #12 // 4321->3210. 368 eor v1.16b, v1.16b, v2.16b // 4 XOR operation (2). 369 ext v2.16b, v0.16b, v2.16b, #12 // 3210->2100. 370 eor v1.16b, v1.16b, v2.16b // 4 XOR operation (3). 371 ext v2.16b, v0.16b, v2.16b, #12 // 2100->1000. 372 eor v1.16b, v1.16b, v2.16b // 4 XOR operation (4). 373 st1 {v1.4s}, [x0], #16 // Stores the newly calculated 4-word key data into the key string. 374 str w26, [x0] // Fill in the number of rounds. 375 eor x24, x24, x24 // Clear sensitivity. 376 eor x0, x0, x0 377 ldp x21, x22, [sp, #48] 378 ldp x23, x24, [sp, #32] 379 ldp x25, x26, [sp, #16] 380 ldp x29, x30, [sp], #64 // Stacking completed. 381AARCH64_AUTIASP 382 ret 383.size SetEncryptKey256, .-SetEncryptKey256 384 385/* 386 * void SetDecryptKey256(CRYPT_AES_Key *ctx, const uint8_t *key); 387 * Set a decryption key string. 388 * x0 => CRYPT_AES_Key *ctx; x1 => const uint8_t *key 389 */ 390.globl SetDecryptKey256 391.type SetDecryptKey256, %function 392.align 5 393SetDecryptKey256: 394AARCH64_PACIASP 395 stp x29, x30, [sp, #-32]! 396 add x29, sp, #0 397 stp x25, x28, [sp, #16] 398 399 mov x28, x0 400 bl .Lenc_key_256 401 mov x25, #-16 402 ld1 {v0.4s}, [x28], #16 403 SETDECKEY_LDR_9_BLOCK x28 404 ld1 {v10.4s}, [x28], #16 405 ld1 {v11.4s}, [x28], #16 406 ld1 {v12.4s}, [x28], #16 407 ld1 {v13.4s}, [x28], #16 408 ld1 {v14.4s}, [x28] 409 SETDECKEY_INVMIX_9_BLOCK 410 aesimc v10.16b, v10.16b 411 aesimc v11.16b, v11.16b 412 aesimc v12.16b, v12.16b 413 aesimc v13.16b, v13.16b 414 st1 {v0.4s}, [x28], x25 415 SETDECKEY_STR_9_BLOCK x28, x25 416 st1 {v10.4s}, [x28], x25 417 st1 {v11.4s}, [x28], x25 418 st1 {v12.4s}, [x28], x25 419 st1 {v13.4s}, [x28], x25 420 st1 {v14.4s}, [x28] 421 eor x28, x28, x28 422 eor x0, x0, x0 423 ldp x25, x28, [sp, #16] 424 ldp x29, x30, [sp], #32 // Stack has been popped. 425AARCH64_AUTIASP 426 ret 427.size SetDecryptKey256, .-SetDecryptKey256 428 429#endif 430