• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1/*
2 * This file is part of the openHiTLS project.
3 *
4 * openHiTLS is licensed under the Mulan PSL v2.
5 * You can use this software according to the terms and conditions of the Mulan PSL v2.
6 * You may obtain a copy of Mulan PSL v2 at:
7 *
8 *     http://license.coscl.org.cn/MulanPSL2
9 *
10 * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11 * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12 * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13 * See the Mulan PSL v2 for more details.
14 */
15
16#include "hitls_build.h"
17#if defined(HITLS_CRYPTO_AES) && defined(HITLS_CRYPTO_CTR)
18
19#include "crypt_arm.h"
20#include "crypt_aes_macro_armv8.s"
21
22.file    "crypt_aes_ctr_armv8.S"
23.text
24.arch    armv8-a+crypto
25
26.align   5
27
28KEY     .req    x0
29IN      .req    x1
30OUT     .req    x2
31LEN     .req    x3
32IV      .req    x4
33
34LTMP    .req    x12
35CTMP    .req    v27
36
37BLK0    .req    v0
38BLK1    .req    v1
39BLK2    .req    v2
40BLK3    .req    v3
41BLK4    .req    v4
42BLK5    .req    v5
43BLK6    .req    v6
44BLK7    .req    v7
45
46CTR0	.req    v19
47CTR1	.req    v20
48CTR2	.req    v21
49CTR3	.req    v22
50CTR4	.req    v23
51CTR5	.req    v24
52CTR6	.req    v25
53CTR7	.req    v26
54
55RDK0    .req    v17
56RDK1    .req    v18
57ROUNDS  .req    w6
58
59/* ctr + 1 */
60.macro ADDCTR ctr
61#ifndef HITLS_BIG_ENDIAN
62    add w11, w11, #1
63    rev w9, w11
64    mov \ctr, w9
65#else
66    rev w11, w11
67    add w11, w11, #1
68    rev w11, w11
69    mov \ctr, w11
70#endif
71.endm
72
73/*
74 * Vn      -  V0 ~ V31
75 * 8bytes  -  Vn.8B  Vn.4H  Vn.2S  Vn.1D
76 * 16bytes -  Vn.16B Vn.8H  Vn.4S  Vn.2D
77 */
78
79/*
80 * int32_t CRYPT_AES_CTR_Encrypt(const CRYPT_AES_Key *ctx,
81 *                              const uint8_t *in,
82 *                              uint8_t *out,
83 *                              uint32_t len,
84 *                              uint8_t *iv);
85 */
86
87.globl CRYPT_AES_CTR_Encrypt
88.type CRYPT_AES_CTR_Encrypt, %function
89CRYPT_AES_CTR_Encrypt:
90AARCH64_PACIASP
91    ld1	{CTR0.16b}, [IV]              // Reads the IV.
92    mov	CTMP.16b, CTR0.16b
93    mov	w11, CTR0.s[3]
94#ifndef HITLS_BIG_ENDIAN
95    rev w11, w11
96#endif
97    mov LTMP, LEN
98
99.Lctr_aesenc_start:
100    cmp LTMP, #64
101    b.ge .Lctr_enc_above_equal_4_blks
102    cmp LTMP, #32
103    b.ge .Lctr_enc_above_equal_2_blks
104    cmp LTMP, #0
105    b.eq .Lctr_len_zero
106    b .Lctr_enc_proc_1_blk
107
108.Lctr_enc_above_equal_2_blks:
109    cmp LTMP, #48
110    b.lt .Lctr_enc_proc_2_blks
111    b .Lctr_enc_proc_3_blks
112
113.Lctr_enc_above_equal_4_blks:
114    cmp LTMP, #96
115    b.ge .Lctr_enc_above_equal_6_blks
116    cmp LTMP, #80
117    b.lt .Lctr_enc_proc_4_blks
118    b .Lctr_enc_proc_5_blks
119
120.Lctr_enc_above_equal_6_blks:
121    cmp LTMP, #112
122    b.lt .Lctr_enc_proc_6_blks
123    cmp LTMP, #128
124    b.lt .Lctr_enc_proc_7_blks
125
126.Lctr_enc_proc_8_blks:
127
128/* When the length is greater than or equal to 128, eight blocks loop is used. */
129.Lctr_aesenc_8_blks_loop:
130
131    /* Calculate eight CTRs. */
132    mov	CTR1.16b, CTMP.16b
133    mov	CTR2.16b, CTMP.16b
134    mov	CTR3.16b, CTMP.16b
135    mov	CTR4.16b, CTMP.16b
136    mov	CTR5.16b, CTMP.16b
137    mov	CTR6.16b, CTMP.16b
138    mov	CTR7.16b, CTMP.16b
139
140    ADDCTR CTR1.s[3]
141    ADDCTR CTR2.s[3]
142    ADDCTR CTR3.s[3]
143    ADDCTR CTR4.s[3]
144    ADDCTR CTR5.s[3]
145    ADDCTR CTR6.s[3]
146    ADDCTR CTR7.s[3]
147
148    mov x14, KEY                      // Prevent the key from being changed.
149    AES_ENC_8_BLKS  x14 CTR0.16b CTR1.16b CTR2.16b CTR3.16b CTR4.16b \
150                    CTR5.16b CTR6.16b CTR7.16b RDK0.4s RDK1.4s RDK0.16b RDK1.16b ROUNDS
151
152    ld1 {BLK0.16b, BLK1.16b, BLK2.16b, BLK3.16b}, [IN], #64
153    ld1 {BLK4.16b, BLK5.16b, BLK6.16b, BLK7.16b}, [IN], #64
154
155    eor BLK0.16b, BLK0.16b, CTR0.16b
156    eor BLK1.16b, BLK1.16b, CTR1.16b
157    eor BLK2.16b, BLK2.16b, CTR2.16b
158    eor BLK3.16b, BLK3.16b, CTR3.16b
159    eor BLK4.16b, BLK4.16b, CTR4.16b
160    eor BLK5.16b, BLK5.16b, CTR5.16b
161    eor BLK6.16b, BLK6.16b, CTR6.16b
162    eor BLK7.16b, BLK7.16b, CTR7.16b
163
164    st1 {BLK0.16b, BLK1.16b, BLK2.16b, BLK3.16b}, [OUT], #64
165    st1 {BLK4.16b, BLK5.16b, BLK6.16b, BLK7.16b}, [OUT], #64
166
167    sub LTMP, LTMP, #128
168    cmp LTMP, #0
169    b.eq .Lctr_aesenc_finish
170
171    ADDCTR CTMP.s[3]
172    mov CTR0.16b, CTMP.16b
173
174    cmp LTMP, #128
175    b.lt .Lctr_aesenc_start
176    b .Lctr_aesenc_8_blks_loop
177
178.Lctr_enc_proc_1_blk:
179
180    AES_ENC_1_BLK KEY CTR0.16b RDK0.4s RDK1.4s RDK0.16b RDK1.16b ROUNDS
181    ld1 {BLK0.16b}, [IN]
182    eor BLK0.16b, CTR0.16b, BLK0.16b
183    st1 {BLK0.16b}, [OUT]
184    b .Lctr_aesenc_finish
185
186.Lctr_enc_proc_2_blks:
187
188    mov	CTR1.16b, CTMP.16b
189    ADDCTR CTR1.s[3]
190
191    AES_ENC_2_BLKS KEY CTR0.16b CTR1.16b RDK0.4s RDK1.4s RDK0.16b RDK1.16b ROUNDS
192
193    ld1 {BLK0.16b, BLK1.16b}, [IN]
194
195    eor BLK0.16b, CTR0.16b, BLK0.16b
196    eor BLK1.16b, CTR1.16b, BLK1.16b
197
198    st1 {BLK0.16b, BLK1.16b}, [OUT]
199    b .Lctr_aesenc_finish
200
201.Lctr_enc_proc_3_blks:
202
203    mov	CTR1.16b, CTMP.16b
204    mov	CTR2.16b, CTMP.16b
205
206    ADDCTR CTR1.s[3]
207    ADDCTR CTR2.s[3]
208
209    AES_ENC_3_BLKS KEY CTR0.16b CTR1.16b CTR2.16b RDK0.4s RDK1.4s RDK0.16b RDK1.16b ROUNDS
210
211    ld1 {BLK0.16b, BLK1.16b, BLK2.16b}, [IN]
212
213    eor BLK0.16b, BLK0.16b, CTR0.16b
214    eor BLK1.16b, BLK1.16b, CTR1.16b
215    eor BLK2.16b, BLK2.16b, CTR2.16b
216
217    st1 {BLK0.16b, BLK1.16b, BLK2.16b}, [OUT]
218    b .Lctr_aesenc_finish
219
220.Lctr_enc_proc_4_blks:
221
222    mov	CTR1.16b, CTMP.16b
223    mov	CTR2.16b, CTMP.16b
224    mov	CTR3.16b, CTMP.16b
225
226    ADDCTR CTR1.s[3]
227    ADDCTR CTR2.s[3]
228    ADDCTR CTR3.s[3]
229
230    AES_ENC_4_BLKS KEY CTR0.16b CTR1.16b CTR2.16b CTR3.16b RDK0.4s RDK1.4s RDK0.16b RDK1.16b ROUNDS
231
232    ld1 {BLK0.16b, BLK1.16b, BLK2.16b, BLK3.16b}, [IN]
233
234    eor BLK0.16b, BLK0.16b, CTR0.16b
235    eor BLK1.16b, BLK1.16b, CTR1.16b
236    eor BLK2.16b, BLK2.16b, CTR2.16b
237    eor BLK3.16b, BLK3.16b, CTR3.16b
238
239    st1 {BLK0.16b, BLK1.16b, BLK2.16b, BLK3.16b}, [OUT]
240    b .Lctr_aesenc_finish
241
242.Lctr_enc_proc_5_blks:
243
244    mov	CTR1.16b, CTMP.16b
245    mov	CTR2.16b, CTMP.16b
246    mov	CTR3.16b, CTMP.16b
247    mov	CTR4.16b, CTMP.16b
248
249    ADDCTR CTR1.s[3]
250    ADDCTR CTR2.s[3]
251    ADDCTR CTR3.s[3]
252    ADDCTR CTR4.s[3]
253
254    AES_ENC_5_BLKS KEY CTR0.16b CTR1.16b CTR2.16b CTR3.16b CTR4.16b RDK0.4s RDK1.4s RDK0.16b RDK1.16b ROUNDS
255
256    ld1 {BLK0.16b, BLK1.16b, BLK2.16b, BLK3.16b}, [IN], #64
257    ld1 {BLK4.16b}, [IN]
258
259    eor BLK0.16b, BLK0.16b, CTR0.16b
260    eor BLK1.16b, BLK1.16b, CTR1.16b
261    eor BLK2.16b, BLK2.16b, CTR2.16b
262    eor BLK3.16b, BLK3.16b, CTR3.16b
263    eor BLK4.16b, BLK4.16b, CTR4.16b
264
265    st1 {BLK0.16b, BLK1.16b, BLK2.16b, BLK3.16b}, [OUT], #64
266    st1 {BLK4.16b}, [OUT]
267    b .Lctr_aesenc_finish
268
269.Lctr_enc_proc_6_blks:
270
271    mov	CTR1.16b, CTMP.16b
272    mov	CTR2.16b, CTMP.16b
273    mov	CTR3.16b, CTMP.16b
274    mov	CTR4.16b, CTMP.16b
275    mov	CTR5.16b, CTMP.16b
276
277    ADDCTR CTR1.s[3]
278    ADDCTR CTR2.s[3]
279    ADDCTR CTR3.s[3]
280    ADDCTR CTR4.s[3]
281    ADDCTR CTR5.s[3]
282
283    AES_ENC_6_BLKS  KEY CTR0.16b CTR1.16b CTR2.16b CTR3.16b CTR4.16b \
284                    CTR5.16b RDK0.4s RDK1.4s RDK0.16b RDK1.16b ROUNDS
285
286    ld1 {BLK0.16b, BLK1.16b, BLK2.16b, BLK3.16b}, [IN], #64
287    ld1 {BLK4.16b, BLK5.16b}, [IN]
288
289    eor BLK0.16b, BLK0.16b, CTR0.16b
290    eor BLK1.16b, BLK1.16b, CTR1.16b
291    eor BLK2.16b, BLK2.16b, CTR2.16b
292    eor BLK3.16b, BLK3.16b, CTR3.16b
293    eor BLK4.16b, BLK4.16b, CTR4.16b
294    eor BLK5.16b, BLK5.16b, CTR5.16b
295
296    st1 {BLK0.16b, BLK1.16b, BLK2.16b, BLK3.16b}, [OUT], #64
297    st1 {BLK4.16b, BLK5.16b}, [OUT]
298    b .Lctr_aesenc_finish
299
300.Lctr_enc_proc_7_blks:
301
302    mov	CTR1.16b, CTMP.16b
303    mov	CTR2.16b, CTMP.16b
304    mov	CTR3.16b, CTMP.16b
305    mov	CTR4.16b, CTMP.16b
306    mov	CTR5.16b, CTMP.16b
307    mov	CTR6.16b, CTMP.16b
308
309    ADDCTR CTR1.s[3]
310    ADDCTR CTR2.s[3]
311    ADDCTR CTR3.s[3]
312    ADDCTR CTR4.s[3]
313    ADDCTR CTR5.s[3]
314    ADDCTR CTR6.s[3]
315
316    AES_ENC_7_BLKS  KEY CTR0.16b CTR1.16b CTR2.16b CTR3.16b CTR4.16b \
317                    CTR5.16b CTR6.16b RDK0.4s RDK1.4s RDK0.16b RDK1.16b ROUNDS
318
319    ld1 {BLK0.16b, BLK1.16b, BLK2.16b, BLK3.16b}, [IN], #64
320    ld1 {BLK4.16b, BLK5.16b, BLK6.16b}, [IN]
321
322    eor BLK0.16b, BLK0.16b, CTR0.16b
323    eor BLK1.16b, BLK1.16b, CTR1.16b
324    eor BLK2.16b, BLK2.16b, CTR2.16b
325    eor BLK3.16b, BLK3.16b, CTR3.16b
326    eor BLK4.16b, BLK4.16b, CTR4.16b
327    eor BLK5.16b, BLK5.16b, CTR5.16b
328    eor BLK6.16b, BLK6.16b, CTR6.16b
329
330    st1 {BLK0.16b, BLK1.16b, BLK2.16b, BLK3.16b}, [OUT], #64
331    st1 {BLK4.16b, BLK5.16b, BLK6.16b}, [OUT]
332
333.Lctr_aesenc_finish:
334    ADDCTR CTMP.s[3]                     // Fill CTR0 for the next round.
335    st1 {CTMP.16b}, [IV]
336
337.Lctr_len_zero:
338    mov x0, #0
339    eor CTR0.16b, CTR0.16b, CTR0.16b
340    eor CTR1.16b, CTR1.16b, CTR1.16b
341    eor CTR2.16b, CTR2.16b, CTR2.16b
342    eor CTR3.16b, CTR3.16b, CTR3.16b
343    eor CTR4.16b, CTR4.16b, CTR4.16b
344    eor CTR5.16b, CTR5.16b, CTR5.16b
345    eor CTR6.16b, CTR6.16b, CTR6.16b
346    eor CTR7.16b, CTR7.16b, CTR7.16b
347    eor RDK0.16b, RDK0.16b, RDK0.16b
348    eor RDK1.16b, RDK1.16b, RDK1.16b
349
350AARCH64_AUTIASP
351    ret
352.size CRYPT_AES_CTR_Encrypt, .-CRYPT_AES_CTR_Encrypt
353
354#endif
355