• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1/*
2 * This file is part of the openHiTLS project.
3 *
4 * openHiTLS is licensed under the Mulan PSL v2.
5 * You can use this software according to the terms and conditions of the Mulan PSL v2.
6 * You may obtain a copy of Mulan PSL v2 at:
7 *
8 *     http://license.coscl.org.cn/MulanPSL2
9 *
10 * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11 * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12 * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13 * See the Mulan PSL v2 for more details.
14 */
15
16#include "hitls_build.h"
17#ifdef HITLS_CRYPTO_AES
18
19#include "crypt_arm.h"
20#include "crypt_aes_macro_armv8.s"
21.file    "crypt_aes_armv8.S"
22.text
23.arch    armv8-a+crypto
24
25KEY     .req    x0
26IN      .req    x1
27OUT     .req    x2
28
29ROUNDS  .req    w6
30
31RDK0    .req    v17
32RDK1    .req    v18
33
34.section .rodata
35.align  5
36.g_cron:
37.long   0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36
38.align  5
39
40/*
41 * In Return-oriented programming (ROP) and Jump-oriented programming (JOP), we explored features
42 * that Arm introduced to the Arm architecture to mitigate against JOP-style and ROP-style attacks.
43 * ...
44 * Whether the combined or NOP-compatible instructions are set depends on the architecture
45 * version that the code is built for. When building for Armv8.3-A, or later, the compiler will use
46 * the combined operations. When building for Armv8.2-A, or earlier, it will use the NOP compatible
47 * instructions.
48 *
49 * The paciasp and autiasp instructions are used for function pointer authentication.
50 * The pointer authentication feature is added in armv8.3 and is supported only by AArch64.
51 * The addition of pointer authentication features is described in Section A2.6.1 of
52 * DDI0487H_a_a-profile_architecture_reference_manual.pdf.
53 */
54
55/*
56 * int32_t CRYPT_AES_Encrypt(const CRYPT_AES_Key *ctx,
57 *                              const uint8_t *in,
58 *                              uint8_t *out,
59 *                              uint32_t len);
60 */
61.text
62.globl  CRYPT_AES_Encrypt
63.type   CRYPT_AES_Encrypt, %function
64.align  5
65CRYPT_AES_Encrypt:
66.ecb_aesenc_start:
67AARCH64_PACIASP
68    stp x29, x30, [sp, #-16]!
69    add x29, sp, #0
70
71    ld1 {BLK0.16b}, [IN]
72    AES_ENC_1_BLK KEY BLK0.16b RDK0.4s RDK1.4s RDK0.16b RDK1.16b ROUNDS
73    st1 {BLK0.16b}, [OUT]
74
75    eor x0, x0, x0
76    eor RDK0.16b, RDK0.16b, RDK0.16b
77    eor RDK1.16b, RDK1.16b, RDK1.16b
78    ldp x29, x30, [sp], #16
79AARCH64_AUTIASP
80    ret
81.size   CRYPT_AES_Encrypt, .-CRYPT_AES_Encrypt
82
83/*
84 * int32_t CRYPT_AES_Decrypt(const CRYPT_AES_Key *ctx,
85 *                              const uint8_t *in,
86 *                              uint8_t *out,
87 *                              uint32_t len);
88 */
89.globl  CRYPT_AES_Decrypt
90.type   CRYPT_AES_Decrypt, %function
91.align  5
92CRYPT_AES_Decrypt:
93.ecb_aesdec_start:
94AARCH64_PACIASP
95    stp x29, x30, [sp, #-16]!
96    add x29, sp, #0
97
98    ld1 {BLK0.16b}, [IN]
99    AES_DEC_1_BLK KEY BLK0.16b RDK0.4s RDK1.4s RDK0.16b RDK1.16b ROUNDS
100    st1 {BLK0.16b}, [OUT]
101
102    eor x0, x0, x0
103    eor RDK0.16b, RDK0.16b, RDK0.16b
104    eor RDK1.16b, RDK1.16b, RDK1.16b
105    ldp x29, x30, [sp], #16
106AARCH64_AUTIASP
107    ret
108.size   CRYPT_AES_Decrypt, .-CRYPT_AES_Decrypt
109
110/*
111 * void SetEncryptKey128(CRYPT_AES_Key *ctx, const uint8_t *key);
112 * Generating extended keys.
113 * x0 => CRYPT_AES_Key *ctx; x1 => const uint8_t *key
114 */
115.globl  SetEncryptKey128
116.type   SetEncryptKey128, %function
117.align  5
118SetEncryptKey128:
119.Lenc_key_128:
120AARCH64_PACIASP
121    stp x29, x30, [sp, #-64]!
122    add x29, sp, #0
123    stp x25, x26, [sp, #16]
124    stp x23, x24, [sp, #32]
125    stp x21, x22, [sp, #48]             // Register push stack completed.
126
127    adrp x23, .g_cron
128    add x23, x23, :lo12:.g_cron         // Round key start address.
129    mov x24, x0                         // Copy key string address. The address increases by 16 bytes.
130    ld1 {v1.16b}, [x1]                  // Reads the 16-byte key of a user.
131    mov w26, #10                        // Number of encryption rounds, which is filled
132                                        // with rounds in the structure.
133    st1 {v1.4s}, [x0], #16              // Save the first key.
134    eor v0.16b, v0.16b, v0.16b          // Clear zeros in V0.
135    mov w25, #10                        // loop for 10 times.
136.Lenc_key_128_loop:
137    ldr w21, [x23], #4                  // Obtains the round constant.
138    dup v1.4s, v1.s[3]                  // Repeated four times,The last word of v1 is changed to v1 (128 bits).
139    ld1 {v2.4s}, [x24], #16             // Obtains the 4 words used for XOR.
140    ext v1.16b, v1.16b, v1.16b, #1      // Byte loop.
141    dup v3.4s, w21                      // Repeat four times to change w21 to v3 (128 bits).
142    aese v1.16b, v0.16b                 // Xor then shift then sbox (XOR operation with 0 is itself,
143                                        // equivalent to omitting the XOR operation).
144    subs w25, w25, #1                   // Count of 10-round key extension.
145    eor v1.16b, v1.16b, v3.16b          // Round constant XOR.
146    eor v1.16b, v1.16b, v2.16b          // 4 XOR operation (1).
147    ext v2.16b, v0.16b, v2.16b, #12     // 4321->3210.
148    eor v1.16b, v1.16b, v2.16b          // 4 XOR operation (2).
149    ext v2.16b, v0.16b, v2.16b, #12     // 3210->2100.
150    eor v1.16b, v1.16b, v2.16b          // 4 XOR operation (3).
151    ext v2.16b, v0.16b, v2.16b, #12     // 2100->1000.
152    eor v1.16b, v1.16b, v2.16b          // 4 XOR operation (4).
153    st1 {v1.4s}, [x0], #16              // Stores the newly calculated 4-bytes key data into the key string.
154    b.ne .Lenc_key_128_loop             // Loop jump.
155    str w26, [x0, #64]                  // Fill in the number of rounds.
156    eor x24, x24, x24                   // Clear sensitivity.
157    eor x0, x0, x0
158    ldp x21, x22, [sp, #48]
159    ldp x23, x24, [sp, #32]
160    ldp x25, x26, [sp, #16]
161    ldp x29, x30, [sp], #64             // Pop stack completed.
162AARCH64_AUTIASP
163    ret
164.size   SetEncryptKey128, .-SetEncryptKey128
165
166
167/*
168 * void SetDecryptKey128(CRYPT_AES_Key *ctx, const uint8_t *key);
169 * Set a decryption key string.
170 * x0 => CRYPT_AES_Key *ctx; x1 => const uint8_t *key
171 */
172.globl  SetDecryptKey128
173.type   SetDecryptKey128, %function
174.align 5
175SetDecryptKey128:
176AARCH64_PACIASP
177    stp x29, x30, [sp, #-32]!
178    add x29, sp, #0
179    stp x25, x28, [sp, #16]             // Register push stack completed.
180
181    mov x28, x0
182    bl .Lenc_key_128
183    ld1 {v0.4s}, [x28], #16
184    SETDECKEY_LDR_9_BLOCK x28
185    ld1 {v10.4s}, [x28]
186    mov x25, #-16
187    SETDECKEY_INVMIX_9_BLOCK
188    st1 {v0.4s}, [x28], x25
189    SETDECKEY_STR_9_BLOCK x28, x25
190    st1 {v10.4s}, [x28]
191    eor x28, x28, x28
192    eor x0, x0, x0
193    ldp x25, x28, [sp, #16]
194    ldp x29, x30, [sp], #32             // Stacking completed.
195AARCH64_AUTIASP
196    ret
197.size   SetDecryptKey128, .-SetDecryptKey128
198
199
200/*
201 * void SetEncryptKey192(CRYPT_AES_Key *ctx, const uint8_t *key);
202 * Generating extended keys.
203 * x0 => CRYPT_AES_Key *ctx; x1 => const uint8_t *key
204 */
205.globl  SetEncryptKey192
206.type   SetEncryptKey192, %function
207.align 5
208SetEncryptKey192:
209.Lenc_key_192:
210AARCH64_PACIASP
211    stp x29, x30, [sp, #-64]!
212    add x29, sp, #0
213    stp x25, x26, [sp, #16]
214    stp x23, x24, [sp, #32]
215    stp x21, x22, [sp, #48]             // Register push stack completed.
216
217    mov x24, x0                         // Copy key string address. The address increases by 16 bytes.
218    ld1 {v0.16b}, [x1], #16             // Obtain the first 128-bit key.
219    mov w26, #12                        // Number of encryption rounds.
220    st1 {v0.4s}, [x0], #16              // Store the first 128-bit key.
221    ld1 {v1.8b}, [x1]                   // Obtains the last 64-bit key.
222    adrp x23, .g_cron
223    add x23, x23, :lo12:.g_cron         // Round key start address.
224    st1 {v1.2s}, [x0], #8               // Store the last 64-bit key.
225    eor v0.16b, v0.16b, v0.16b          // Clear zeros in V0.
226    mov w25, #8                         // loop for 8 times.
227.Lenc_key_192_loop:
228    dup v1.4s, v1.s[1]                  // Repeated four times,The last word of v1 is changed to v1 (128 bits).
229    subs w25, w25, #1                   // Count of 8-round key extensions.
230    ext v1.16b, v1.16b, v1.16b, #1      // Byte cycle.
231    ldr w22, [x23], #4                  // Obtains the round constant.
232    aese v1.16b, v0.16b                 // Shift and sbox (XOR operation with 0 is itself,equivalent to omitting the XOR operation).
233    dup v2.4s, w22                      // Repeat 4 times. W22 becomes v2(128bit).
234    eor v1.16b, v1.16b, v2.16b          // Round constant XOR.
235    ld1 {v2.4s}, [x24], #16             // Obtains the 4 words used for XOR
236    eor v1.16b, v1.16b, v2.16b          // 4 XOR operation (1).
237    ext v2.16b, v0.16b, v2.16b, #12     // 4321->3210.
238    eor v1.16b, v1.16b, v2.16b          // 4 XOR operation (2).
239    ext v2.16b, v0.16b, v2.16b, #12     // 3210->2100.
240    eor v1.16b, v1.16b, v2.16b          // 4 XOR operation (3).
241    ext v2.16b, v0.16b, v2.16b, #12     // 2100->1000.
242    eor v1.16b, v1.16b, v2.16b          // 4 XOR operation (4).
243    st1 {v1.4s}, [x0], #16              // Stores the newly calculated 4-word key data into the key string.
244    ld1 {v2.2s}, [x24], #8              // Loads 6 words for the last 2 words of XOR.
245    dup v1.2s, v1.s[3]                  // Repeated two times,The last word of v1 is changed to v1 (64bit).
246    eor v1.8b, v1.8b, v2.8b             // 2 XOR operation (1).
247    ext v2.8b, v0.8b, v2.8b, #4         // 21->10.
248    eor v1.8b, v1.8b, v2.8b             // 2 XOR operation (2).
249    st1 {v1.2s}, [x0], #8               // Stores the newly calculated 2-word key data into the key string.
250    b.ne .Lenc_key_192_loop             // Loop jump.
251    str w26, [x0, #24]                  // Fill in the number of rounds.
252    eor x24, x24, x24                   // Clear sensitivity.
253    eor x0, x0, x0
254    ldp x21, x22, [sp, #48]
255    ldp x23, x24, [sp, #32]
256    ldp x25, x26, [sp, #16]
257    ldp x29, x30, [sp], #64             // Stacking completed.
258AARCH64_AUTIASP
259    ret
260.size   SetEncryptKey192, .-SetEncryptKey192
261
262
263/*
264 * void SetDecryptKey192(CRYPT_AES_Key *ctx, const uint8_t *key);
265 * Set a decryption key string.
266 * x0 => CRYPT_AES_Key *ctx; x1 => const uint8_t *key
267 */
268.globl  SetDecryptKey192
269.type   SetDecryptKey192, %function
270.align 5
271SetDecryptKey192:
272AARCH64_PACIASP
273    stp x29, x30, [sp, #-32]!
274    add x29, sp, #0
275    stp x25, x28, [sp, #16]             // Register is stacked.
276
277    mov x28, x0
278    bl .Lenc_key_192
279    mov x25, #-16
280    ld1 {v0.4s}, [x28], #16
281    SETDECKEY_LDR_9_BLOCK x28
282    ld1 {v10.4s}, [x28], #16
283    ld1 {v11.4s}, [x28], #16
284    ld1 {v12.4s}, [x28]
285    SETDECKEY_INVMIX_9_BLOCK
286    aesimc v10.16b, v10.16b
287    aesimc v11.16b, v11.16b
288    st1 {v0.4s}, [x28], x25
289    SETDECKEY_STR_9_BLOCK x28, x25
290    st1 {v10.4s}, [x28], x25
291    st1 {v11.4s}, [x28], x25
292    st1 {v12.4s}, [x28]
293    eor x28, x28, x28
294    eor x0, x0, x0
295    ldp x25, x28, [sp, #16]
296    ldp x29, x30, [sp], #32             // Stacking completed.
297AARCH64_AUTIASP
298    ret
299.size   SetDecryptKey192, .-SetDecryptKey192
300
301/*
302 * void SetEncryptKey256(CRYPT_AES_Key *ctx, const uint8_t *key);
303 * Generating extended keys.
304 * x0 => CRYPT_AES_Key *ctx; x1 => const uint8_t *key
305 */
306.globl  SetEncryptKey256
307.type   SetEncryptKey256, %function
308.align 5
309SetEncryptKey256:
310.Lenc_key_256:
311AARCH64_PACIASP
312    stp x29, x30, [sp, #-64]!
313    add x29, sp, #0
314    stp x25, x26, [sp, #16]
315    stp x23, x24, [sp, #32]
316    stp x21, x22, [sp, #48]             // Register is stacked.
317
318    adrp x23, .g_cron
319    add x23, x23, :lo12:.g_cron         // Round key start address.
320    ld1 {v0.16b}, [x1], #16             // Obtain the first 128-bit key.
321    mov x24, x0                         // Copy key string address. The address increases by 16 bytes.
322    st1 {v0.4s}, [x0], #16              // Store the first 128-bit key.
323    ld1 {v1.16b}, [x1]                  // Obtain the last 128-bit key.
324    eor v0.16b, v0.16b, v0.16b          // Clear zeros in V0.
325    st1 {v1.4s}, [x0], #16              // Store the last 128-bit key.
326    mov w26, #14                        // Number of encryption rounds.
327    mov w25, #6                         // Loop for 7-1 times.
328.Lenc_key_256_loop:
329    dup v1.4s, v1.s[3]                  // Repeated four times,The last word of v1 is changed to v1 (128 bits).
330    ldr w22, [x23], #4                  // Obtains the round constant.
331    ext v1.16b, v1.16b, v1.16b, #1      // Byte cycle.
332    aese v1.16b, v0.16b                 // XOR then shift then sbox (XOR operation with 0 is itself,
333                                        // equivalent to omitting the XOR operation).
334    dup v2.4s, w22                      // Repeat 4 times. w22 becomes v2.
335    eor v1.16b, v1.16b, v2.16b          // Round constant XOR.
336    ld1 {v2.4s}, [x24], #16             // Obtains the 4 words used for XOR.
337    eor v1.16b, v1.16b, v2.16b          // 4 XOR operation (1).
338    ext v2.16b, v0.16b, v2.16b, #12     // 4321->3210.
339    eor v1.16b, v1.16b, v2.16b          // 4 XOR operation (2).
340    ext v2.16b, v0.16b, v2.16b, #12     // 3210->2100.
341    eor v1.16b, v1.16b, v2.16b          // 4 XOR operation (3).
342    ext v2.16b, v0.16b, v2.16b, #12     // 2100->1000.
343    eor v1.16b, v1.16b, v2.16b          // 4 XOR operation (4).
344    st1 {v1.4s}, [x0], #16              // Stores the newly calculated 4-word key data into the key string.
345    subs w25, w25, #1                   // Count of 7-1-round key extensions.
346    dup v1.4s, v1.s[3]                  // Repeated four times,The last word of v1 is changed to v1 (128 bits).
347    ld1 {v2.4s}, [x24], #16             // Obtains the 4 words used for XOR.
348    aese v1.16b, v0.16b                 // XOR then shift then sbox.
349    eor v1.16b, v1.16b, v2.16b          // 4 XOR operation (1).
350    ext v2.16b, v0.16b, v2.16b, #12     // 4321->3210.
351    eor v1.16b, v1.16b, v2.16b          // 4 XOR operation (2).
352    ext v2.16b, v0.16b, v2.16b, #12     // 3210->2100.
353    eor v1.16b, v1.16b, v2.16b          // 4 XOR operation (3).
354    ext v2.16b, v0.16b, v2.16b, #12     // 2100->1000.
355    eor v1.16b, v1.16b, v2.16b          // 4 XOR operation (4).
356    st1 {v1.4s}, [x0], #16              // Stores the newly calculated 4-word key data into the key string.
357    b.ne .Lenc_key_256_loop             // Loop jump.
358
359    dup v1.4s, v1.s[3]                  // Repeated four times,The last word of v1 is changed to v1 (128 bits).
360    ldr w22, [x23], #4                  // Obtains the round constant.
361    ext v1.16b, v1.16b, v1.16b, #1      // Byte cycle.
362    aese v1.16b, v0.16b                 // XOR then shift then sbox.
363    dup v2.4s, w22                      // Repeat 4 times. w22 becomes v2(128bit).
364    eor v1.16b, v1.16b, v2.16b          // Round constant XOR.
365    ld1 {v2.4s}, [x24], #16             // Obtains the 4 words used for XOR.
366    eor v1.16b, v1.16b, v2.16b          // 4 XOR operation (1).
367    ext v2.16b, v0.16b, v2.16b, #12     // 4321->3210.
368    eor v1.16b, v1.16b, v2.16b          // 4 XOR operation (2).
369    ext v2.16b, v0.16b, v2.16b, #12     // 3210->2100.
370    eor v1.16b, v1.16b, v2.16b          // 4 XOR operation (3).
371    ext v2.16b, v0.16b, v2.16b, #12     // 2100->1000.
372    eor v1.16b, v1.16b, v2.16b          // 4 XOR operation (4).
373    st1 {v1.4s}, [x0], #16              // Stores the newly calculated 4-word key data into the key string.
374    str w26, [x0]                       // Fill in the number of rounds.
375    eor x24, x24, x24                   // Clear sensitivity.
376    eor x0, x0, x0
377    ldp x21, x22, [sp, #48]
378    ldp x23, x24, [sp, #32]
379    ldp x25, x26, [sp, #16]
380    ldp x29, x30, [sp], #64             // Stacking completed.
381AARCH64_AUTIASP
382    ret
383.size   SetEncryptKey256, .-SetEncryptKey256
384
385/*
386 * void SetDecryptKey256(CRYPT_AES_Key *ctx, const uint8_t *key);
387 * Set a decryption key string.
388 * x0 => CRYPT_AES_Key *ctx; x1 => const uint8_t *key
389 */
390.globl  SetDecryptKey256
391.type   SetDecryptKey256, %function
392.align 5
393SetDecryptKey256:
394AARCH64_PACIASP
395    stp x29, x30, [sp, #-32]!
396    add x29, sp, #0
397    stp x25, x28, [sp, #16]
398
399    mov x28, x0
400    bl .Lenc_key_256
401    mov x25, #-16
402    ld1 {v0.4s}, [x28], #16
403    SETDECKEY_LDR_9_BLOCK x28
404    ld1 {v10.4s}, [x28], #16
405    ld1 {v11.4s}, [x28], #16
406    ld1 {v12.4s}, [x28], #16
407    ld1 {v13.4s}, [x28], #16
408    ld1 {v14.4s}, [x28]
409    SETDECKEY_INVMIX_9_BLOCK
410    aesimc v10.16b, v10.16b
411    aesimc v11.16b, v11.16b
412    aesimc v12.16b, v12.16b
413    aesimc v13.16b, v13.16b
414    st1 {v0.4s}, [x28], x25
415    SETDECKEY_STR_9_BLOCK x28, x25
416    st1 {v10.4s}, [x28], x25
417    st1 {v11.4s}, [x28], x25
418    st1 {v12.4s}, [x28], x25
419    st1 {v13.4s}, [x28], x25
420    st1 {v14.4s}, [x28]
421    eor x28, x28, x28
422    eor x0, x0, x0
423    ldp x25, x28, [sp, #16]
424    ldp x29, x30, [sp], #32             // Stack has been popped.
425AARCH64_AUTIASP
426    ret
427.size   SetDecryptKey256, .-SetDecryptKey256
428
429#endif
430