• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1/*
2 * This file is part of the openHiTLS project.
3 *
4 * openHiTLS is licensed under the Mulan PSL v2.
5 * You can use this software according to the terms and conditions of the Mulan PSL v2.
6 * You may obtain a copy of Mulan PSL v2 at:
7 *
8 *     http://license.coscl.org.cn/MulanPSL2
9 *
10 * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11 * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12 * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13 * See the Mulan PSL v2 for more details.
14 */
15
16#include "hitls_build.h"
17#if defined(HITLS_CRYPTO_AES) && defined(HITLS_CRYPTO_GCM)
18
19.macro GCM_ENC256_LOOP
20    ROUND CTR0.16b, KEY0.16b
21    rev64 v4.16b, v4.16b                            // GHASH block 4k (only t0 is free)
22    ROUND CTR1.16b, KEY0.16b
23    fmov d3, x10                                    // CTR[3]
24    ROUND CTR2.16b, KEY0.16b
25
26    ext HASH0.16b, HASH0.16b, HASH0.16b, #8         // PRE 0
27    ROUND CTR0.16b, KEY1.16b
28    fmov CTR3.d[1], x9                              // CTR[3] - OK
29
30    ROUND CTR1.16b, KEY1.16b
31    ldp x6, x7, [INPUT], #16                        // AES[0] - load plaintext
32    ROUND CTR2.16b, KEY1.16b
33    ldp x19, x20, [INPUT], #16                      // AES[1] - load plaintext
34    ROUND CTR0.16b, KEY2.16b
35#ifdef HITLS_BIG_ENDIAN
36    rev x6, x6
37    rev x7, x7
38    rev x19, x19
39    rev x20, x20
40#endif
41    eor v4.16b, v4.16b, HASH0.16b                   // PRE 1
42    ROUND CTR1.16b, KEY2.16b
43    ROUND CTR3.16b, KEY0.16b
44    eor x6, x6, KEND0                               // AES[0] - round 14 low
45
46    ROUND CTR0.16b, KEY3.16b
47    mov d10, v17.d[1]                               // GHASH block 4k - mid
48    pmull2 v9.1q, v4.2d, HASH4.2d                   // GHASH block 4k - high
49    eor x7, x7, KEND1                               // AES[0] - round 14 high
50    mov d8, v4.d[1]                                 // GHASH block 4k - mid
51    ROUND CTR3.16b, KEY1.16b
52    rev64 v5.16b, v5.16b                            // GHASH block 4k+1 (t0 and t1 free)
53    ROUND CTR0.16b, KEY4.16b
54    pmull HASH0.1q, v4.1d, HASH4.1d                 // GHASH block 4k - low
55    eor v8.8b, v8.8b, v4.8b                         // GHASH block 4k - mid
56    ROUND CTR2.16b, KEY2.16b
57    ROUND CTR0.16b, KEY5.16b
58    rev64 v7.16b, v7.16b                            // GHASH[0] (t0, t1, t2 and t3 free)
59
60    pmull2 v4.1q, v5.2d, HASH3.2d                   // GHASH block 4k+1 - high
61    pmull v10.1q, v8.1d, v10.1d                     // GHASH block 4k - mid
62    rev64 v6.16b, v6.16b                            // GHASH[2] (t0, t1, and t2 free)
63    pmull v8.1q, v5.1d, HASH3.1d                    // GHASH block 4k+1 - low
64    eor v9.16b, v9.16b, v4.16b                      // GHASH block 4k+1 - high
65    mov d4, v5.d[1]                                 // GHASH block 4k+1 - mid
66    ROUND CTR1.16b, KEY3.16b
67    ROUND CTR3.16b, KEY2.16b
68    eor HASH0.16b, HASH0.16b, v8.16b                // GHASH block 4k+1 - low
69    ROUND CTR2.16b, KEY3.16b
70    ROUND CTR1.16b, KEY4.16b
71    mov d8, v6.d[1]                                 // GHASH[2] - mid
72    ROUND CTR3.16b, KEY3.16b
73    eor v4.8b, v4.8b, v5.8b                         // GHASH block 4k+1 - mid
74    ROUND CTR2.16b, KEY4.16b
75    ROUND CTR0.16b, KEY6.16b
76    eor v8.8b, v8.8b, v6.8b                         // GHASH[2] - mid
77    ROUND CTR3.16b, KEY4.16b
78    pmull v4.1q, v4.1d, v17.1d                      // GHASH block 4k+1 - mid
79    ROUND CTR0.16b, KEY7.16b
80    ROUND CTR3.16b, KEY5.16b
81    ins v8.d[1], v8.d[0]                            // GHASH[2] - mid
82    ROUND CTR1.16b, KEY5.16b
83    ROUND CTR0.16b, KEY8.16b
84    ROUND CTR2.16b, KEY5.16b
85    ROUND CTR1.16b, KEY6.16b
86    eor v10.16b, v10.16b, v4.16b                    // GHASH block 4k+1 - mid
87    pmull2 v4.1q, v6.2d, HASH2.2d                   // GHASH[2] - high
88    pmull v5.1q, v6.1d, HASH2.1d                    // GHASH[2] - low
89    ROUND CTR1.16b, KEY7.16b
90    pmull v6.1q, v7.1d, HASH1.1d                    // GHASH[0] - low
91    eor v9.16b, v9.16b, v4.16b                      // GHASH[2] - high
92    ROUND CTR3.16b, KEY6.16b
93
94    ldp x21, x22, [INPUT], #16                      // AES[2] - load plaintext
95    ROUND CTR1.16b, KEY8.16b
96    mov d4, v7.d[1]                                 // GHASH[0] - mid
97#ifdef HITLS_BIG_ENDIAN
98    rev x21, x21
99    rev x22, x22
100#endif
101    ROUND CTR2.16b, KEY6.16b
102    eor HASH0.16b, HASH0.16b, v5.16b                // GHASH[2] - low
103    pmull2 v8.1q, v8.2d, v16.2d                     // GHASH[2] - mid
104    pmull2 v5.1q, v7.2d, HASH1.2d                   // GHASH[0] - high
105    eor v4.8b, v4.8b, v7.8b                         // GHASH[0] - mid
106    ROUND CTR2.16b, KEY7.16b
107    eor x19, x19, KEND0                             // AES[1] - round 14 low
108    ROUND CTR1.16b, KEY9.16b
109    eor v10.16b, v10.16b, v8.16b                    // GHASH[2] - mid
110    ROUND CTR3.16b, KEY7.16b
111    eor x21, x21, KEND0                             // AES[2] - round 14 low
112    ROUND CTR0.16b, KEY9.16b
113    movi v8.8b, #0xc2
114    pmull v4.1q, v4.1d, v16.1d                      // GHASH[0] - mid
115    eor v9.16b, v9.16b, v5.16b                      // GHASH[0] - high
116    fmov d5, x19                                    // AES[1] - mov low
117
118    ROUND CTR2.16b, KEY8.16b
119    ldp x23, x24, [INPUT], #16                      // AES[3] - load plaintext
120    ROUND CTR0.16b, KEY10.16b
121    shl d8, d8, #56                                 // mod_constant
122#ifdef HITLS_BIG_ENDIAN
123    rev x23, x23
124    rev x24, x24
125#endif
126    ROUND CTR3.16b, KEY8.16b
127    eor HASH0.16b, HASH0.16b, v6.16b                // GHASH[0] - low
128    ROUND CTR2.16b, KEY9.16b
129    ROUND CTR1.16b, KEY10.16b
130    eor v10.16b, v10.16b, v4.16b                    // GHASH[0] - mid
131    ROUND CTR3.16b, KEY9.16b
132    add IV_W, IV_W, #1                                // CTR++
133    ROUND CTR0.16b, KEY11.16b
134    eor v4.16b, HASH0.16b, v9.16b                   // MODULO - karatsuba tidy up
135    ROUND CTR1.16b, KEY11.16b
136
137    pmull v7.1q, v9.1d, v8.1d                       // MODULO - top 64b align with mid
138    rev w9, IV_W                                     // CTR block 4k+8
139    ext v9.16b, v9.16b, v9.16b, #8                  // MODULO - other top alignment
140    ROUND CTR2.16b, KEY10.16b
141    eor x23, x23, KEND0                             // AES[3] - round 14 low
142    ROUND CTR1.16b, KEY12.16b
143    eor v10.16b, v10.16b, v4.16b                    // MODULO - karatsuba tidy up
144    ROUND CTR3.16b, KEY10.16b
145    eor x20, x20, KEND1                             // AES[1] - round 14 high
146
147    fmov d4, x6                                     // AES[0] - mov low
148    orr x9, x11, x9, lsl #32                        // CTR block 4k+8
149    eor v7.16b, v9.16b, v7.16b                      // MODULO - fold into mid
150    ROUND CTR0.16b, KEY12.16b
151    eor x22, x22, KEND1                             // AES[2] - round 14 high
152    ROUND CTR2.16b, KEY11.16b
153    eor x24, x24, KEND1                             // AES[3] - round 14 high
154
155    ROUND CTR3.16b, KEY11.16b
156    add IV_W, IV_W, #1                                // CTR++
157    aese CTR0.16b, KEY13.16b                        // AES[0] - round 13
158    fmov OUT0.d[1], x7                              // AES[0] - mov high
159    eor v10.16b, v10.16b, v7.16b                    // MODULO - fold into mid
160    ROUND CTR2.16b, KEY12.16b
161    fmov d7, x23                                    // AES[3] - mov low
162    aese CTR1.16b, KEY13.16b                        // AES[2] - round 13
163    fmov OUT1.d[1], x20                             // AES[1] - mov high
164    fmov d6, x21                                    // AES[2] - mov low
165
166    subs COUNT, COUNT, #1                           // COUNT--
167    fmov OUT2.d[1], x22                             // AES[2] - mov high
168    pmull v9.1q, v10.1d, v8.1d                      // MODULO - mid 64b align with low
169    eor OUT0.16b, OUT0.16b, CTR0.16b                // AES[0] - result
170    fmov d0, x10                                    // CTR[0]
171    fmov CTR0.d[1], x9                                // CTR[0]--OK
172    rev w9, IV_W                                     // CTR[1]
173    add IV_W, IV_W, #1                                // CTR++
174
175    eor OUT1.16b, OUT1.16b, CTR1.16b                // AES[1] - result
176    fmov d1, x10                                    // CTR[1]
177    orr x9, x11, x9, lsl #32                        // CTR[1]
178    ROUND CTR3.16b, KEY12.16b
179    fmov v1.d[1], x9                                // CTR[1]--OK
180    aese CTR2.16b, KEY13.16b                        // AES[3] - round 13
181    rev w9, IV_W                                     // CTR block 4k+10
182
183    st1 {OUT0.16b}, [OUT00], #16                    // AES[0] - store result
184    orr x9, x11, x9, lsl #32                        // CTR block 4k+10
185    eor HASH0.16b, HASH0.16b, v9.16b                // MODULO - fold into low
186    fmov OUT3.d[1], x24                             // AES[3] - mov high
187    ext v10.16b, v10.16b, v10.16b, #8               // MODULO - other mid alignment
188    st1 {OUT1.16b}, [OUT00], #16                    // AES[1] - store result
189    add IV_W, IV_W, #1                                // CTR++
190    aese CTR3.16b, KEY13.16b                        // AES[0] - round 13
191
192    eor OUT2.16b, OUT2.16b, CTR2.16b                // AES[2] - result
193    fmov d2, x10                                    // CTR[2]
194    st1 {OUT2.16b}, [OUT00], #16                    // AES[2] - store result
195    fmov v2.d[1], x9                                // CTR[2]--OK
196    rev w9, IV_W                                     // CTR block 4k+11
197
198    eor OUT3.16b, OUT3.16b, CTR3.16b                // AES[3] - result
199    eor HASH0.16b, HASH0.16b, v10.16b               // MODULO - fold into low
200    orr x9, x11, x9, lsl #32                        // CTR block 4k+11
201    st1 {OUT3.16b}, [OUT00], #16                    // AES[3] - store result
202.endm
203
204.macro GCM_DEC256_LOOP
205    mov x21, CTR2.d[0]                      // AES[2] block - mov low
206    ext HASH0.16b, HASH0.16b, HASH0.16b, #8 // PRE 0
207    eor CTR3.16b, OUT3.16b, CTR3.16b        // AES[3] block - result
208    ROUND CTR0.16b, KEY0.16b
209    mov x22, CTR2.d[1]                      // AES[2] block - mov high
210
211    ROUND CTR1.16b, KEY0.16b
212    fmov d2, x10                            // CTR[2]
213    fmov v2.d[1], x9                        // CTR[2]
214    eor v4.16b, v4.16b, HASH0.16b           // PRE 1
215#ifdef HITLS_BIG_ENDIAN
216    rev x21, x21
217    rev x22, x22
218#endif
219    rev w9, IV_W                             // CTR[0]
220    ROUND CTR0.16b, KEY1.16b
221    mov x24, CTR3.d[1]                      // AES[3] block - mov high
222    ROUND CTR1.16b, KEY1.16b
223    mov x23, CTR3.d[0]                      // AES[3] block - mov low
224
225    pmull2 v9.1q, v4.2d, HASH4.2d           // GHASH block 4k - high
226    mov d8, v4.d[1]                         // GHASH block 4k - mid
227    fmov d3, x10                            // CTR[0]
228#ifdef HITLS_BIG_ENDIAN
229    rev x23, x23
230    rev x24, x24
231#endif
232    ROUND CTR0.16b, KEY2.16b
233    orr x9, x11, x9, lsl #32                // CTR[0]
234    ROUND CTR2.16b, KEY0.16b
235    fmov v3.d[1], x9                        // CTR[0]
236    ROUND CTR1.16b, KEY2.16b
237    eor v8.8b, v8.8b, v4.8b                 // GHASH block 4k - mid
238    ROUND CTR0.16b, KEY3.16b
239    eor x22, x22, KEND1                     // AES[2] - round 14 high
240    ROUND CTR2.16b, KEY1.16b
241    mov d10, v17.d[1]                       // GHASH block 4k - mid
242    ROUND CTR1.16b, KEY3.16b
243    rev64 v6.16b, v6.16b                    // GHASH[2]
244    ROUND CTR3.16b, KEY0.16b
245    eor x21, x21, KEND0                     // AES[2] - round 14 low
246    ROUND CTR2.16b, KEY2.16b
247    stp x21, x22, [OUT00], #16              // AES[2] - store result
248    pmull HASH0.1q, v4.1d, HASH4.1d         // GHASH block 4k - low
249    pmull2 v4.1q, v5.2d, HASH3.2d           // GHASH block 4k+1 - high
250    ROUND CTR2.16b, KEY3.16b
251    rev64 v7.16b, v7.16b                    // GHASH[0]
252
253    pmull v10.1q, v8.1d, v10.1d             // GHASH block 4k - mid
254    eor x23, x23, KEND0                     // AES[3] - round 14 low
255    pmull v8.1q, v5.1d, HASH3.1d            // GHASH block 4k+1 - low
256    eor x24, x24, KEND1                     // AES[3] - round 14 high
257    eor v9.16b, v9.16b, v4.16b              // GHASH block 4k+1 - high
258    ROUND CTR2.16b, KEY4.16b
259    ROUND CTR3.16b, KEY1.16b
260    mov d4, v5.d[1]                         // GHASH block 4k+1 - mid
261    ROUND CTR0.16b, KEY4.16b
262    eor HASH0.16b, HASH0.16b, v8.16b        // GHASH block 4k+1 - low
263    ROUND CTR2.16b, KEY5.16b
264    add IV_W, IV_W, #1                        // CTR[0]
265    ROUND CTR3.16b, KEY2.16b
266    mov d8, v6.d[1]                         // GHASH[2] - mid
267    ROUND CTR1.16b, KEY4.16b
268    eor v4.8b, v4.8b, v5.8b                 // GHASH block 4k+1 - mid
269
270    pmull v5.1q, v6.1d, HASH2.1d            // GHASH[2] - low
271    ROUND CTR3.16b, KEY3.16b
272    eor v8.8b, v8.8b, v6.8b                 // GHASH[2] - mid
273    ROUND CTR1.16b, KEY5.16b
274    ROUND CTR0.16b, KEY5.16b
275    eor HASH0.16b, HASH0.16b, v5.16b        // GHASH[2] - low
276
277    pmull v4.1q, v4.1d, v17.1d              // GHASH block 4k+1 - mid
278    rev w9, IV_W                             // CTR block 4k+8
279    ROUND CTR1.16b, KEY6.16b
280    ins v8.d[1], v8.d[0]                    // GHASH[2] - mid
281    ROUND CTR0.16b, KEY6.16b
282    add IV_W, IV_W, #1                        // CTR block 4k+8
283    ROUND CTR3.16b, KEY4.16b
284    ROUND CTR1.16b, KEY7.16b
285    eor v10.16b, v10.16b, v4.16b            // GHASH block 4k+1 - mid
286    ROUND CTR0.16b, KEY7.16b
287
288    pmull2 v4.1q, v6.2d, HASH2.2d           // GHASH[2] - high
289    mov d6, v7.d[1]                         // GHASH[0] - mid
290    ROUND CTR3.16b, KEY5.16b
291
292    pmull2 v8.1q, v8.2d, v16.2d             // GHASH[2] - mid
293    ROUND CTR0.16b, KEY8.16b
294    eor v9.16b, v9.16b, v4.16b              // GHASH[2] - high
295    ROUND CTR3.16b, KEY6.16b
296
297    pmull v4.1q, v7.1d, HASH1.1d            // GHASH[0] - low
298    orr x9, x11, x9, lsl #32                // CTR block 4k+8
299    eor v10.16b, v10.16b, v8.16b            // GHASH[2] - mid
300
301    pmull2 v5.1q, v7.2d, HASH1.2d           // GHASH[0] - high
302    ROUND CTR0.16b, KEY9.16b
303    eor v6.8b, v6.8b, v7.8b                 // GHASH[0] - mid
304    ROUND CTR1.16b, KEY8.16b
305    ROUND CTR2.16b, KEY6.16b
306    eor v9.16b, v9.16b, v5.16b              // GHASH[0] - high
307    ROUND CTR0.16b, KEY10.16b
308    pmull v6.1q, v6.1d, v16.1d              // GHASH[0] - mid
309    movi v8.8b, #0xc2
310    ROUND CTR2.16b, KEY7.16b
311    eor HASH0.16b, HASH0.16b, v4.16b        // GHASH[0] - low
312    ROUND CTR0.16b, KEY11.16b
313    ROUND CTR3.16b, KEY7.16b
314    shl d8, d8, #56                         // mod_constant
315    ROUND CTR2.16b, KEY8.16b
316    eor v10.16b, v10.16b, v6.16b            // GHASH[0] - mid
317    ROUND CTR0.16b, KEY12.16b
318    pmull v7.1q, v9.1d, v8.1d               // MODULO - top 64b align with mid
319    eor v6.16b, HASH0.16b, v9.16b           // MODULO - karatsuba tidy up
320    ROUND CTR1.16b, KEY9.16b
321
322    ld1 {OUT0.16b}, [INPUT], #16            // AES load[0] ciphertext
323    aese CTR0.16b, KEY13.16b
324    ext v9.16b, v9.16b, v9.16b, #8          // MODULO - other top alignment
325    ROUND CTR1.16b, KEY10.16b
326    eor v10.16b, v10.16b, v6.16b            // MODULO - karatsuba tidy up
327    ROUND CTR2.16b, KEY9.16b
328
329    ld1 {OUT1.16b}, [INPUT], #16            // AES load[1] ciphertext
330    ROUND CTR3.16b, KEY8.16b
331    eor CTR0.16b, OUT0.16b, CTR0.16b        // AES[0] block - result
332    ROUND CTR1.16b, KEY11.16b
333
334    stp x23, x24, [OUT00], #16              // AES[3] block - store result
335    ROUND CTR2.16b, KEY10.16b
336    eor v10.16b, v10.16b, v7.16b            // MODULO - fold into mid
337    ROUND CTR3.16b, KEY9.16b
338    ld1 {OUT2.16b}, [INPUT], #16            // AES load[1] ciphertext
339
340    ROUND CTR1.16b, KEY12.16b
341    ld1 {OUT3.16b}, [INPUT], #16            // AES load[1] ciphertext
342    ROUND CTR2.16b, KEY11.16b
343    mov x7, CTR0.d[1]                       // AES[0] block - mov high
344    ROUND CTR3.16b, KEY10.16b
345    eor v10.16b, v10.16b, v9.16b            // MODULO - fold into mid
346    aese CTR1.16b, KEY13.16b                // AES[2] - round 13
347    mov x6, CTR0.d[0]                       // AES[0] block - mov low
348    ROUND CTR2.16b, KEY12.16b
349    fmov d0, x10                            // CTR[0]
350    ROUND CTR3.16b, KEY11.16b
351#ifdef HITLS_BIG_ENDIAN
352    rev x6, x6
353    rev x7, x7
354#endif
355    fmov CTR0.d[1], x9                      // CTR[0]--OK
356    pmull v8.1q, v10.1d, v8.1d              // MODULO - mid 64b align with low
357    eor CTR1.16b, OUT1.16b, CTR1.16b        // AES[1] block - result
358    rev w9, IV_W                             // CTR block 4k+9
359    aese CTR2.16b, KEY13.16b
360    orr x9, x11, x9, lsl #32                // CTR block 4k+9
361
362    subs COUNT, COUNT, #1                   // COUNT--
363    add IV_W, IV_W, #1                        // CTR++
364    eor x6, x6, KEND0                       // AES[0] block - round 14 low
365    eor x7, x7, KEND1                       // AES[0] block - round 14 high
366
367    mov x20, v1.d[1]                        // AES[1] block - mov high
368    eor CTR2.16b, OUT2.16b, CTR2.16b        // AES[2] block - result
369    eor HASH0.16b, HASH0.16b, v8.16b        // MODULO - fold into low
370    ROUND CTR3.16b, KEY12.16b
371    mov x19, CTR1.d[0]                      // AES[1] block - mov low
372    fmov d1, x10                            // CTR[1]
373    ext v10.16b, v10.16b, v10.16b, #8       // MODULO - other mid alignment
374#ifdef HITLS_BIG_ENDIAN
375    rev x20, x20
376    rev x19, x19
377#endif
378    fmov CTR1.d[1], x9                      // CTR[1]--OK
379    rev w9, IV_W                             // CTR block 4k+10
380    add IV_W, IV_W, #1                        // CTR++
381
382    aese CTR3.16b, KEY13.16b
383    orr x9, x11, x9, lsl #32                // CTR block 4k+10
384    rev64 v5.16b, v5.16b                    // GHASH[2]
385    eor x20, x20, KEND1                     // AES[1] block - round 14 high
386    stp x6, x7, [OUT00], #16                // AES[0] block - store result
387    eor x19, x19, KEND0                     // AES[1] block - round 14 low
388    stp x19, x20, [OUT00], #16              // AES[1] block - store result
389    rev64 OUT0.16b, OUT0.16b                // GHASH block[0]
390    eor HASH0.16b, HASH0.16b, v10.16b       // MODULO - fold into low
391.endm
392
393#endif
394