• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1/*
2 * This file is part of the openHiTLS project.
3 *
4 * openHiTLS is licensed under the Mulan PSL v2.
5 * You can use this software according to the terms and conditions of the Mulan PSL v2.
6 * You may obtain a copy of Mulan PSL v2 at:
7 *
8 *     http://license.coscl.org.cn/MulanPSL2
9 *
10 * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11 * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12 * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13 * See the Mulan PSL v2 for more details.
14 */
15
16#include "hitls_build.h"
17#if defined(HITLS_CRYPTO_AES) && defined(HITLS_CRYPTO_GCM)
18
19#include "crypt_arm.h"
20#include "aes_gcm_common_aarch64.S"
21#include "aes128_gcm_aarch64.S"
22#include "aes192_gcm_aarch64.S"
23#include "aes256_gcm_aarch64.S"
24
25.text
26.arch armv8-a+crypto
27
28.globl AES_GCM_ClearAsm
29.type AES_GCM_ClearAsm,%function
30.align 4
31AES_GCM_ClearAsm:
32AARCH64_PACIASP
33    eor KEY0.16b, KEY0.16b, KEY0.16b
34    eor KEY1.16b, KEY1.16b, KEY1.16b
35    eor KEY2.16b, KEY2.16b, KEY2.16b
36    eor KEY3.16b, KEY3.16b, KEY3.16b
37    eor KEY4.16b, KEY4.16b, KEY4.16b
38    eor KEY5.16b, KEY5.16b, KEY5.16b
39    eor KEY6.16b, KEY6.16b, KEY6.16b
40    eor KEY7.16b, KEY7.16b, KEY7.16b
41    eor KEY8.16b, KEY8.16b, KEY8.16b
42    eor KEY9.16b, KEY9.16b, KEY9.16b
43    eor KEY10.16b, KEY10.16b, KEY10.16b
44    eor HASH0.16b, HASH0.16b, HASH0.16b
45    eor HASH1.16b, HASH1.16b, HASH1.16b
46    eor HASH2.16b, HASH2.16b, HASH2.16b
47    eor HASH3.16b, HASH3.16b, HASH3.16b
48    eor HASH4.16b, HASH4.16b, HASH4.16b
49AARCH64_AUTIASP
50    ret
51.size AES_GCM_ClearAsm,.-AES_GCM_ClearAsm
52
53.globl AES_GCM_EncryptBlockAsm
54.type AES_GCM_EncryptBlockAsm,%function
55.align 4
56AES_GCM_EncryptBlockAsm:
57AARCH64_PACIASP
58    IN_STP                                      // Register Protection
59    ldr ROUNDS, [KEY00, #240]                   // Number of loading rounds
60    add HTABLE, IVEC0, #16                      // Sets the gHash start address.
61    lsr COUNT, INLEN, #6                        // Divided by 64, count the number of times
62    cmp ROUNDS, #10                             // Number of comparison rounds 10
63    LOAD_KEY                                    // load AES KEY
64    b.eq .LEnc_128_process                      // go to AES128 processing part
65    cmp ROUNDS, #12                             // Number of comparison rounds 12
66    ld1 {KEY10.4s, KEY11.4s}, [KEY00], #32
67    b.eq .LEnc_192_process                      // go to AES192 processing part
68    ld1 {KEY12.4s, KEY13.4s}, [KEY00], #32
69    b .LEnc_256_process                         // go to AES256 processing part
70
71.LEnc_128_process:
72    ldp KEND0, KEND1, [KEY00]                   // load key-10
73    ldp IV_H, IV_L, [IVEC0]                     // load IV
74#ifdef HITLS_BIG_ENDIAN
75    ror KEND0, KEND0, #32
76    ror KEND1, KEND1, #32
77    REV_2S IV_H, IV_L
78#endif
79    lsr IV_C, IV_L, #32
80    ld1 {CTR0.16b}, [IVEC0]                                         // CTR bolck 0
81    lsl IVCTR, COUNTW, #2                                               // <<16
82    LOAD_GHASH_TABLE                            // load gHashTable
83    BEFORE_ROUND
84    FIRST_ROUND                                 // data preprocessing
85    ROUND4 CTR2.16b, CTR1.16b, CTR3.16b, CTR0.16b, KEY7.16b         // round 7
86    ROUND4 CTR2.16b, CTR1.16b, CTR3.16b, CTR0.16b, KEY8.16b         // round 8
87    rev w9, IV_W                                                    // CTR0--Start
88    ROUND4_END CTR2.16b, CTR1.16b, CTR3.16b, CTR0.16b, KEY9.16b     // round 9
89    orr x9, x11, x9, lsl #32                                        // CTR0 block 4k+8
90    add IV_W, IV_W, #1                                              // CTR0++
91    eor v17.16b, v17.16b, v9.16b                                    // h4k | h3k
92    eor v16.16b, v16.16b, v8.16b                                    // h2k | h1k
93    STORE_RESULT                                // data preprocessing
94    b.le .LEnc_end                              // After the first 64-byte processing is complete,
95                                                // check the remaining length.
96    b .LEnc_128_loop                            // Enter the cyclic processing flow.
97
98.LEnc_192_process:
99    ldp KEND0, KEND1, [KEY00]                                       // load key-10
100    ldp IV_H, IV_L, [IVEC0]                                         // load IV
101#ifdef HITLS_BIG_ENDIAN
102    ror KEND0, KEND0, #32
103    ror KEND1, KEND1, #32
104    REV_2S IV_H, IV_L
105#endif
106    lsr IV_C, IV_L, #32                                             // IV-l
107    ld1 {CTR0.16b}, [IVEC0]                                         // CTR bolck 0
108    lsl IVCTR, COUNTW, #2                                               // <<16
109    LOAD_GHASH_TABLE                                                // load hash table
110    BEFORE_ROUND
111    FIRST_ROUND                                                     // aes round
112    ROUND4 CTR2.16b, CTR1.16b, CTR3.16b, CTR0.16b, KEY7.16b         // round 7
113    ROUND4 CTR2.16b, CTR1.16b, CTR3.16b, CTR0.16b, KEY8.16b         // round 8
114    ROUND4 CTR2.16b, CTR1.16b, CTR3.16b, CTR0.16b, KEY9.16b         // round 9
115    ROUND4 CTR2.16b, CTR1.16b, CTR3.16b, CTR0.16b, KEY10.16b        // round 10
116    rev w9, IV_W                                                     // CTR0--Start
117    ROUND4_END CTR2.16b, CTR1.16b, CTR3.16b, CTR0.16b, KEY11.16b    // round 11
118    orr x9, x11, x9, lsl #32                                        // CTR0 block 4k+8
119    add IV_W, IV_W, #1                                                // CTR0++
120    eor v17.16b, v17.16b, v9.16b                                    // h4k | h3k
121    eor v16.16b, v16.16b, v8.16b                                    // h2k | h1k
122    STORE_RESULT
123    b.le .LEnc_end
124    b .LEnc_192_loop
125
126.LEnc_256_process:
127    ldp KEND0, KEND1, [KEY00]                                       // load key-10
128    ldp IV_H, IV_L, [IVEC0]                                         // load IV
129#ifdef HITLS_BIG_ENDIAN
130    ror KEND0, KEND0, #32
131    ror KEND1, KEND1, #32
132    REV_2S IV_H, IV_L
133#endif
134    lsr IV_C, IV_L, #32
135    ld1 {CTR0.16b}, [IVEC0]                                         // CTR bolck 0
136    lsl IVCTR, COUNTW, #2                                           // <<16
137    LOAD_GHASH_TABLE
138    BEFORE_ROUND
139    FIRST_ROUND
140    ROUND4 CTR2.16b, CTR1.16b, CTR3.16b, CTR0.16b, KEY7.16b         // round 7
141    ROUND4 CTR2.16b, CTR1.16b, CTR3.16b, CTR0.16b, KEY8.16b         // round 8
142    ROUND4 CTR2.16b, CTR1.16b, CTR3.16b, CTR0.16b, KEY9.16b         // round 9
143    ROUND4 CTR2.16b, CTR1.16b, CTR3.16b, CTR0.16b, KEY10.16b        // round 10
144    ROUND4 CTR2.16b, CTR1.16b, CTR3.16b, CTR0.16b, KEY11.16b        // round 11
145    ROUND4 CTR2.16b, CTR1.16b, CTR3.16b, CTR0.16b, KEY12.16b        // round 12
146    rev w9, IV_W                                                     // CTR0--Start
147    ROUND4_END CTR2.16b, CTR1.16b, CTR3.16b, CTR0.16b, KEY13.16b    // round 13
148    orr x9, x11, x9, lsl #32                                        // CTR0 block 4k+8
149    add IV_W, IV_W, #1                                                // CTR0++
150    eor v17.16b, v17.16b, v9.16b                                    // h4k | h3k
151    eor v16.16b, v16.16b, v8.16b                                    // h2k | h1k
152    STORE_RESULT
153    b.le .LEnc_end
154    b .LEnc_256_loop
155
156.LEnc_128_loop:
157    GCM_ENC128_LOOP                         // Processes 64 bytes.
158    b.le .LEnc_end                          // If the number of remaining blocks is 0, exit the loop.
159    b .LEnc_128_loop                        // Continue the loop
160
161.LEnc_192_loop:
162    GCM_ENC192_LOOP
163    b.le .LEnc_end                          // <= 0
164    b .LEnc_192_loop
165
166.LEnc_256_loop:
167    GCM_ENC256_LOOP
168    b.le .LEnc_end                          // <= 0
169    b .LEnc_256_loop
170
171.LEnc_end:
172    rev64 OUT0.16b, OUT0.16b                // GHASH block 4k (only t0 is free)
173    rev64 OUT1.16b, OUT1.16b                // GHASH block 4k+1 (t0 and t1 free)
174    rev64 OUT2.16b, OUT2.16b                // GHASH[2] (t0, t1, and t2 free)
175    rev64 OUT3.16b, OUT3.16b                // GHASH[0] (t0, t1, t2 and t3 free)
176    GHASH_BLOCK                             // Ghash calculation and encryption/decryption processing
177    rev w9, IVCTR                           // CTR[0]
178    ext HASH0.16b, HASH0.16b, HASH0.16b, #8
179    add x6, IVEC0, #16
180    orr x9, x11, x9, lsl #32                // CTR[0]
181    fmov d0, x10                            // CTR[0]
182    fmov CTR0.d[1], x9                      // CTR[0]--OK
183    st1 {CTR0.16b }, [IVEC0]                // out hash
184    rev64 HASH0.16b, HASH0.16b
185    st1 {HASH0.16b }, [x6]                  // out hash
186    OUT_STP
187.LEnc_ret:
188    and x0, INLEN, #-64                     // length of processed data
189AARCH64_AUTIASP
190    ret
191.size AES_GCM_EncryptBlockAsm,.-AES_GCM_EncryptBlockAsm
192
193.globl AES_GCM_DecryptBlockAsm
194.type AES_GCM_DecryptBlockAsm,%function
195.align 4
196AES_GCM_DecryptBlockAsm:
197AARCH64_PACIASP
198    IN_STP                                                           // stp
199    ldr ROUNDS, [KEY00, #240]                                       // pull rounds
200    mov IVEC0, x0                                                   // ctr0
201    add HTABLE, IVEC0, #16                                          // htable
202    lsr COUNT, INLEN, #6                                            // divided by 64
203    cmp ROUNDS, #10
204    LOAD_KEY
205    b.eq .LDec_128_process
206    cmp ROUNDS, #12
207    ld1 {KEY10.4s, KEY11.4s}, [KEY00], #32
208    b.eq .LDec_192_process
209    ld1 {KEY12.4s, KEY13.4s}, [KEY00], #32
210    b .LDec_256_process
211
212.LDec_128_process:
213    ldp KEND0, KEND1, [KEY00]                                       // load key-10
214    ldp IV_H, IV_L, [IVEC0]                                         // load IV
215#ifdef HITLS_BIG_ENDIAN
216    ror KEND0, KEND0, #32
217    ror KEND1, KEND1, #32
218    REV_2S IV_H, IV_L
219#endif
220    lsr IV_C, IV_L, #32
221    ld1 {CTR0.16b}, [IVEC0]                                         // CTR[0]
222#ifdef HITLS_BIG_ENDIAN
223    REV_2S KEND0, KEND1
224#endif
225    lsl IVCTR, COUNTW, #2                                           // <<16
226    LOAD_GHASH_TABLE
227    BEFORE_ROUND
228    ROUND4 CTR2.16b, CTR1.16b, CTR3.16b, CTR0.16b, KEY0.16b         // round 0
229    ROUND4 CTR2.16b, CTR1.16b, CTR3.16b, CTR0.16b, KEY1.16b         // round 1
230    ROUND4 CTR2.16b, CTR1.16b, CTR3.16b, CTR0.16b, KEY2.16b         // round 2
231    ROUND4 CTR2.16b, CTR1.16b, CTR3.16b, CTR0.16b, KEY3.16b         // round 3
232    ROUND4 CTR2.16b, CTR1.16b, CTR3.16b, CTR0.16b, KEY4.16b         // round 4
233    ROUND4 CTR2.16b, CTR1.16b, CTR3.16b, CTR0.16b, KEY5.16b         // round 5
234    ROUND4 CTR2.16b, CTR1.16b, CTR3.16b, CTR0.16b, KEY6.16b         // round 6
235    ROUND4 CTR2.16b, CTR1.16b, CTR3.16b, CTR0.16b, KEY7.16b         // round 7
236    ROUND4 CTR2.16b, CTR1.16b, CTR3.16b, CTR0.16b, KEY8.16b         // round 8
237    ROUND4_END CTR2.16b, CTR1.16b, CTR3.16b, CTR0.16b, KEY9.16b     // round 9
238    eor v17.16b, v17.16b, v9.16b                                    // h4k | h3k
239    eor v16.16b, v16.16b, v8.16b                                    // h2k | h1k
240    STORE_DEC_RESULT
241    b.le .LDec_end
242    b .LDec_128_loop
243
244.LDec_192_process:
245    ldp KEND0, KEND1, [KEY00]                                       // load key-10
246    ldp IV_H, IV_L, [IVEC0]                                         // load IV
247#ifdef HITLS_BIG_ENDIAN
248    ror KEND0, KEND0, #32
249    ror KEND1, KEND1, #32
250    REV_2S IV_H, IV_L
251#endif
252    lsr IV_C, IV_L, #32
253    ld1 {CTR0.16b}, [IVEC0]                                         // CTR[0]
254#ifdef HITLS_BIG_ENDIAN
255    REV_2S KEND0, KEND1
256#endif
257    lsl IVCTR, COUNTW, #2                                           // <<16
258    LOAD_GHASH_TABLE
259    BEFORE_ROUND
260    ROUND4 CTR2.16b, CTR1.16b, CTR3.16b, CTR0.16b, KEY0.16b         // round 0
261    ROUND4 CTR2.16b, CTR1.16b, CTR3.16b, CTR0.16b, KEY1.16b         // round 1
262    ROUND4 CTR2.16b, CTR1.16b, CTR3.16b, CTR0.16b, KEY2.16b         // round 2
263    ROUND4 CTR2.16b, CTR1.16b, CTR3.16b, CTR0.16b, KEY3.16b         // round 3
264    ROUND4 CTR2.16b, CTR1.16b, CTR3.16b, CTR0.16b, KEY4.16b         // round 4
265    ROUND4 CTR2.16b, CTR1.16b, CTR3.16b, CTR0.16b, KEY5.16b         // round 5
266    ROUND4 CTR2.16b, CTR1.16b, CTR3.16b, CTR0.16b, KEY6.16b         // round 6
267    ROUND4 CTR2.16b, CTR1.16b, CTR3.16b, CTR0.16b, KEY7.16b         // round 7
268    ROUND4 CTR2.16b, CTR1.16b, CTR3.16b, CTR0.16b, KEY8.16b         // round 8
269    ROUND4 CTR2.16b, CTR1.16b, CTR3.16b, CTR0.16b, KEY9.16b         // round 9
270    ROUND4 CTR2.16b, CTR1.16b, CTR3.16b, CTR0.16b, KEY10.16b        // round 10
271    ROUND4_END CTR2.16b, CTR1.16b, CTR3.16b, CTR0.16b, KEY11.16b    // round 11
272    eor v17.16b, v17.16b, v9.16b                                    // h4k | h3k
273    eor v16.16b, v16.16b, v8.16b                                    // h2k | h1k
274    STORE_DEC_RESULT
275    b.le .LDec_end
276    b .LDec_192_loop
277
278.LDec_256_process:
279    ldp KEND0, KEND1, [KEY00]                                       // load key-10
280    ldp IV_H, IV_L, [IVEC0]                                         // load IV
281#ifdef HITLS_BIG_ENDIAN
282    ror KEND0, KEND0, #32
283    ror KEND1, KEND1, #32
284    REV_2S IV_H, IV_L
285#endif
286    lsr IV_C, IV_L, #32
287    ld1 {CTR0.16b}, [IVEC0]                                         // CTR[0]
288#ifdef HITLS_BIG_ENDIAN
289    REV_2S KEND0, KEND1
290#endif
291    lsl IVCTR, COUNTW, #2                                           // <<16
292    LOAD_GHASH_TABLE
293    BEFORE_ROUND
294
295    ROUND4 CTR2.16b, CTR1.16b, CTR3.16b, CTR0.16b, KEY0.16b         // round 0
296    ROUND4 CTR2.16b, CTR1.16b, CTR3.16b, CTR0.16b, KEY1.16b         // round 1
297    ROUND4 CTR2.16b, CTR1.16b, CTR3.16b, CTR0.16b, KEY2.16b         // round 2
298    ROUND4 CTR2.16b, CTR1.16b, CTR3.16b, CTR0.16b, KEY3.16b         // round 3
299    ROUND4 CTR2.16b, CTR1.16b, CTR3.16b, CTR0.16b, KEY4.16b         // round 4
300    ROUND4 CTR2.16b, CTR1.16b, CTR3.16b, CTR0.16b, KEY5.16b         // round 5
301    ROUND4 CTR2.16b, CTR1.16b, CTR3.16b, CTR0.16b, KEY6.16b         // round 6
302    ROUND4 CTR2.16b, CTR1.16b, CTR3.16b, CTR0.16b, KEY7.16b         // round 7
303    ROUND4 CTR2.16b, CTR1.16b, CTR3.16b, CTR0.16b, KEY8.16b         // round 8
304    ROUND4 CTR2.16b, CTR1.16b, CTR3.16b, CTR0.16b, KEY9.16b         // round 9
305    ROUND4 CTR2.16b, CTR1.16b, CTR3.16b, CTR0.16b, KEY10.16b        // round 10
306    ROUND4 CTR2.16b, CTR1.16b, CTR3.16b, CTR0.16b, KEY11.16b        // round 11
307    ROUND4 CTR2.16b, CTR1.16b, CTR3.16b, CTR0.16b, KEY12.16b        // round 12
308    ROUND4_END CTR2.16b, CTR1.16b, CTR3.16b, CTR0.16b, KEY13.16b    // round 13
309    eor v17.16b, v17.16b, v9.16b                                    // h4k | h3k
310    eor v16.16b, v16.16b, v8.16b                                    // h2k | h1k
311    STORE_DEC_RESULT
312    b.le .LDec_end
313    b .LDec_256_loop
314
315.LDec_128_loop:
316    GCM_DEC128_LOOP
317    b.le .LDec_end                              // <=0
318    b .LDec_128_loop
319
320.LDec_192_loop:
321    GCM_DEC192_LOOP
322    b.le .LDec_end                              // <=0
323    b .LDec_192_loop
324
325.LDec_256_loop:
326    GCM_DEC256_LOOP
327    b.le .LDec_end                              // <=0
328    b .LDec_256_loop
329
330.LDec_end:
331    GHASH_DEC_BLOCK
332    rev w9, IVCTR                          // CTR[0]
333    ext HASH0.16b, HASH0.16b, HASH0.16b, #8
334    add x6, IVEC0, #16
335    orr x9, x11, x9, lsl #32            // CTR[0]
336    fmov d0, x10                        // CTR[0]
337    rev64 HASH0.16b, HASH0.16b
338    fmov CTR0.d[1], x9                  // CTR[0]--OK
339    st1 {CTR0.16b }, [IVEC0]            // out hash
340    st1 {HASH0.16b }, [x6]              // out hash
341    OUT_STP
342.LDec_ret:
343    and x0, INLEN, #-64                     // length of processed data
344AARCH64_AUTIASP
345    ret
346.size AES_GCM_DecryptBlockAsm,.-AES_GCM_DecryptBlockAsm
347#endif