• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1/*
2 * This file is part of the openHiTLS project.
3 *
4 * openHiTLS is licensed under the Mulan PSL v2.
5 * You can use this software according to the terms and conditions of the Mulan PSL v2.
6 * You may obtain a copy of Mulan PSL v2 at:
7 *
8 *     http://license.coscl.org.cn/MulanPSL2
9 *
10 * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11 * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12 * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13 * See the Mulan PSL v2 for more details.
14 */
15#include "hitls_build.h"
16#if defined(HITLS_CRYPTO_AES) && defined(HITLS_CRYPTO_GCM)
17
18.macro GCM_ENC128_LOOP
19    ldp x6, x7, [INPUT], #16                // AES[0] - load plaintext
20    rev64 OUT0.16b, OUT0.16b                // GHASH blocl[0]
21    rev64 OUT2.16b, OUT2.16b                // GHASH block[2]
22    ROUND CTR2.16b, KEY0.16b
23#ifdef HITLS_BIG_ENDIAN
24    rev x6, x6
25    rev x7, x7
26#endif
27
28    fmov d3, x10                            // CTR[3]
29    ext HASH0.16b, HASH0.16b, HASH0.16b, #8 // PRE 0
30    rev64 OUT1.16b, OUT1.16b                // GHASH block[1]
31    ROUND CTR1.16b, KEY0.16b
32
33    add IV_W, IV_W, #1                        // CTR3++
34    fmov CTR3.d[1], x9                      // CTR[3]--OK
35    ROUND CTR0.16b, KEY0.16b
36    mov d31, OUT2.d[1]                      // GHASH block[2.1]
37
38    ROUND CTR2.16b, KEY1.16b
39    mov d30, OUT1.d[1]                      // GHASH block[1.1]
40    ROUND CTR1.16b, KEY1.16b
41    eor v4.16b, OUT0.16b, HASH0.16b         // PRE 1
42
43    ROUND CTR3.16b, KEY0.16b
44    eor x7, x7, KEND1                       // AES[0] - round 10 high
45    pmull2 v28.1q, OUT1.2d, HASH3.2d        // GHASH block 4k+1 - high
46    eor v31.8b, v31.8b, OUT2.8b             // GHASH[2] - mid
47
48    ldp x19, x20, [INPUT], #16              // AES[1] - load plaintext
49    ROUND CTR0.16b, KEY1.16b
50    rev w9, IV_W                             // CTR0--Start
51    eor v30.8b, v30.8b, OUT1.8b             // GHASH block 4k+1 - mid
52#ifdef HITLS_BIG_ENDIAN
53    rev x19, x19
54    rev x20, x20
55#endif
56
57    mov d8, v4.d[1]                         // GHASH block 4k - mid
58    orr x9, x11, x9, lsl #32                // CTR0 block 4k+8
59    pmull2 v9.1q, v4.2d, HASH4.2d           // GHASH block 4k - high
60    add IV_W, IV_W, #1                        // CTR0++
61
62    mov d10, v17.d[1]                       // GHASH block 4k - mid
63    ROUND CTR0.16b, KEY2.16b
64    pmull HASH0.1q, v4.1d, HASH4.1d         // GHASH block 4k - low
65    eor v8.8b, v8.8b, v4.8b                 // GHASH block 4k - mid
66
67    ROUND CTR1.16b, KEY2.16b
68    ROUND CTR0.16b, KEY3.16b
69    eor v9.16b, v9.16b, v28.16b             // GHASH block 4k+1 - high
70    pmull v28.1q, OUT2.1d, HASH2.1d         // GHASH[2] - low
71
72    pmull v10.1q, v8.1d, v10.1d             // GHASH block 4k - mid
73    rev64 OUT3.16b, OUT3.16b                // GHASH[0] (t0, t1, t2 and t3 free)
74    pmull v30.1q, v30.1d, v17.1d            // GHASH block 4k+1 - mid
75    pmull v29.1q, OUT1.1d, HASH3.1d         // GHASH block 4k+1 - low
76
77    ins v31.d[1], v31.d[0]                  // GHASH[2] - mid
78    pmull2 v8.1q, OUT2.2d, HASH2.2d         // GHASH[2] - high
79    eor x20, x20, KEND1                     // AES[1] - round 10 high
80    eor v10.16b, v10.16b, v30.16b           // GHASH block 4k+1 - mid
81
82    mov d30, OUT3.d[1]                      // GHASH[0] - mid
83    ROUND CTR3.16b, v19.16b
84    eor HASH0.16b, HASH0.16b, v29.16b       // GHASH block 4k+1 - low
85    ROUND CTR2.16b, KEY2.16b
86
87    eor x6, x6, KEND0                       // AES[0] - round 10 low
88    ROUND CTR1.16b, KEY3.16b
89    eor v30.8b, v30.8b, OUT3.8b             // GHASH[0] - mid
90    pmull2 v4.1q, OUT3.2d, HASH1.2d         // GHASH[0] - high
91
92    ROUND CTR2.16b, KEY3.16b
93    eor v9.16b, v9.16b, v8.16b              // GHASH[2] - high
94    pmull2 v31.1q, v31.2d, v16.2d           // GHASH[2] - mid
95    pmull v29.1q, OUT3.1d, HASH1.1d         // GHASH[0] - low
96
97    movi v8.8b, #0xc2
98    pmull v30.1q, v30.1d, v16.1d            // GHASH[0] - mid
99    eor HASH0.16b, HASH0.16b, v28.16b       // GHASH[2] - low
100    ROUND CTR1.16b, KEY4.16b
101
102    ROUND CTR3.16b, v20.16b
103    shl d8, d8, #56                         // mod_constant
104    ROUND CTR0.16b, KEY4.16b
105    eor v9.16b, v9.16b, v4.16b              // GHASH[0] - high
106
107    ROUND CTR1.16b, KEY5.16b
108    ldp x21, x22, [INPUT], #16              // AES[2] - load plaintext
109    ROUND CTR3.16b, v21.16b
110    eor v10.16b, v10.16b, v31.16b           // GHASH[2] - mid
111#ifdef HITLS_BIG_ENDIAN
112    rev x21, x21
113    rev x22, x22
114#endif
115
116    ROUND CTR0.16b, KEY5.16b
117    ldp x23, x24, [INPUT], #16              // AES[3] - load plaintext
118    pmull v31.1q, v9.1d, v8.1d              // MODULO - top 64b align with mid
119    eor HASH0.16b, HASH0.16b, v29.16b       // GHASH[0] - low
120#ifdef HITLS_BIG_ENDIAN
121    rev x23, x23
122    rev x24, x24
123#endif
124
125    ROUND CTR2.16b, KEY4.16b
126    eor x19, x19, KEND0                     // AES[1] - round 10 low
127    ROUND CTR3.16b, v22.16b
128    eor v10.16b, v10.16b, v30.16b           // GHASH[0] - mid
129
130    ROUND CTR1.16b, KEY6.16b
131    eor x23, x23, KEND0                     // AES[3] - round 10 low
132    ROUND CTR2.16b, KEY5.16b
133    eor v30.16b, HASH0.16b, v9.16b          // MODULO - karatsuba tidy up
134
135    fmov d4, x6                             // AES[0] - mov low
136    ROUND CTR0.16b, KEY6.16b
137    fmov OUT0.d[1], x7                      // AES[0] - mov high
138    fmov d7, x23                            // AES[3] - mov low
139
140    ext v9.16b, v9.16b, v9.16b, #8          // MODULO - other top alignment
141    ROUND CTR3.16b, v23.16b
142    fmov d5, x19                            // AES[2] - mov low
143    ROUND CTR0.16b, KEY7.16b
144
145    eor v10.16b, v10.16b, v30.16b           // MODULO - karatsuba tidy up
146    ROUND CTR2.16b, KEY6.16b
147    eor x24, x24, KEND1                     // AES[3] - round 10 high
148    ROUND CTR1.16b, KEY7.16b
149
150    fmov OUT1.d[1], x20                     // AES[1] - mov high
151    ROUND CTR0.16b, KEY8.16b
152    fmov OUT3.d[1], x24                     // AES[3] - mov high
153    ROUND CTR3.16b, v24.16b
154
155    subs COUNT, COUNT, #1                          // count--
156    ROUND CTR1.16b, KEY8.16b
157    eor v10.16b, v10.16b, v31.16b           // MODULO - fold into mid
158    aese CTR0.16b, KEY9.16b
159
160    eor x21, x21, KEND0                     // AES[2] - round 10 low
161    eor x22, x22, KEND1                     // AES[2] - round 10 high
162    ROUND CTR3.16b, v25.16b
163    fmov d6, x21                            // AES[2] - mov low
164
165    aese CTR1.16b, KEY9.16b                 // AES[1] - round 9
166    fmov OUT2.d[1], x22                     // AES[2] - mov high
167    ROUND CTR2.16b, KEY7.16b
168    eor OUT0.16b, OUT0.16b, CTR0.16b        // AES[0] - result
169
170    fmov d0, x10                            // CTR0-0
171    ROUND CTR3.16b, KEY8.16b
172    fmov CTR0.d[1], x9                      // CTR0-1--OK
173    rev w9, IV_W                            // CTR1--start
174
175    eor v10.16b, v10.16b, v9.16b            // MODULO - fold into mid
176    ROUND CTR2.16b, KEY8.16b
177    eor OUT1.16b, OUT1.16b, CTR1.16b        // AES[1] - result
178    add IV_W, IV_W, #1                      // CTR1++
179
180    orr x9, x11, x9, lsl #32                // CTR1 block 4k+9
181    fmov d1, x10                            // CTR1-0
182    pmull v9.1q, v10.1d, v8.1d              // MODULO - mid 64b align with low
183    fmov CTR1.d[1], x9                      // CTR1-1--OK
184
185    rev w9, IV_W                            // CTR2--Start
186    aese CTR2.16b, KEY9.16b
187    st1 {OUT0.16b}, [OUT00], #16            // Write back - OUT0
188    eor OUT2.16b, OUT2.16b, CTR2.16b        // AES[2]-result
189
190    orr x9, x11, x9, lsl #32                // CTR2 block 4k+10
191    aese CTR3.16b, KEY9.16b
192    add IV_W, IV_W, #1                      // CTR2++
193    ext v10.16b, v10.16b, v10.16b, #8       // MODULO - other mid alignment
194    fmov d2, x10                            // CTR2-0
195
196    eor HASH0.16b, HASH0.16b, v9.16b        // MODULO - fold into low
197    st1 {OUT1.16b}, [OUT00], #16            // Write back - OUT1
198    fmov CTR2.d[1], x9                      // CTR2-1--OK
199    st1 {OUT2.16b}, [OUT00], #16            // Write back - OUT2
200
201    rev w9, IV_W                            // CTR3--start
202    eor OUT3.16b, OUT3.16b, CTR3.16b        // AES[3]-result
203    orr x9, x11, x9, lsl #32                // CTR3 block 4k+11
204    eor HASH0.16b, HASH0.16b, v10.16b       // MODULO - fold into low
205    st1 {OUT3.16b}, [OUT00], #16            // Write back - OUT3
206.endm
207
208.macro GCM_DEC128_LOOP
209    eor CTR3.16b, OUT3.16b, CTR3.16b            // AES[3] - result
210    ext HASH0.16b, HASH0.16b, HASH0.16b, #8     // PRE 0
211    mov x21, CTR2.d[0]                          // AES[2] - mov low
212    pmull2 v28.1q, v5.2d, HASH3.2d              // GHASH block 4k+1 - high
213    mov x22, CTR2.d[1]                          // AES[2] - mov high
214    ROUND CTR1.16b, KEY0.16b
215    fmov d2, x10                                // CTR[3]
216#ifdef HITLS_BIG_ENDIAN
217    rev x21, x21
218    rev x22, x22
219#endif
220    rev64 OUT2.16b, OUT2.16b                    // GHASH[2]
221    fmov v2.d[1], x9                            // CTR[3]
222    rev w9, IV_W                                // CTR[0]
223    mov x23, CTR3.d[0]                          // AES[3] - mov low
224    eor v4.16b, v4.16b, HASH0.16b               // PRE 1
225    mov d30, v5.d[1]                            // GHASH block 4k+1 - mid
226    ROUND CTR1.16b, KEY1.16b
227    rev64 v7.16b, v7.16b                        // GHASH[0]
228    pmull v29.1q, v5.1d, HASH3.1d               // GHASH block 4k+1 - low
229    mov x24, CTR3.d[1]                          // AES[3] - mov high
230    orr x9, x11, x9, lsl #32                    // CTR[0]
231    pmull HASH0.1q, v4.1d, HASH4.1d             // GHASH block 4k - low
232#ifdef HITLS_BIG_ENDIAN
233    rev x23, x23
234    rev x24, x24
235#endif
236    fmov d3, x10                                // CTR[0]
237    eor v30.8b, v30.8b, v5.8b                   // GHASH block 4k+1 - mid
238    ROUND CTR1.16b, KEY2.16b
239    fmov v3.d[1], x9                            // CTR[0]
240    ROUND CTR2.16b, KEY0.16b
241    mov d10, v17.d[1]                           // GHASH block 4k - mid
242    pmull2 v9.1q, v4.2d, HASH4.2d               // GHASH block 4k - high
243    eor HASH0.16b, HASH0.16b, v29.16b           // GHASH block 4k+1 - low
244    pmull v29.1q, v7.1d, HASH1.1d               // GHASH[0] - low
245    ROUND CTR1.16b, KEY3.16b
246    mov d8, v4.d[1]                             // GHASH block 4k - mid
247    ROUND CTR3.16b, KEY0.16b
248    eor v9.16b, v9.16b, v28.16b                 // GHASH block 4k+1 - high
249    ROUND CTR0.16b, KEY0.16b
250    pmull v28.1q, v6.1d, HASH2.1d               // GHASH[2] - low
251    eor v8.8b, v8.8b, v4.8b                     // GHASH block 4k - mid
252    ROUND CTR3.16b, KEY1.16b
253    eor x23, x23, KEND0                         // AES[3] - round 10 low
254    pmull v30.1q, v30.1d, v17.1d                // GHASH block 4k+1 - mid
255    eor x22, x22, KEND1                         // AES[2] - round 10 high
256    mov d31, v6.d[1]                            // GHASH[2] - mid
257    ROUND CTR0.16b, KEY1.16b
258    eor HASH0.16b, HASH0.16b, v28.16b           // GHASH[2] - low
259    pmull v10.1q, v8.1d, v10.1d                 // GHASH block 4k - mid
260    ROUND CTR3.16b, KEY2.16b
261    eor v31.8b, v31.8b, v6.8b                   // GHASH[2] - mid
262    ROUND CTR0.16b, KEY2.16b
263    ROUND CTR1.16b, KEY4.16b
264    eor v10.16b, v10.16b, v30.16b               // GHASH block 4k+1 - mid
265    pmull2 v8.1q, v6.2d, HASH2.2d               // GHASH[2] - high
266    ROUND CTR0.16b, KEY3.16b
267    ins v31.d[1], v31.d[0]                      // GHASH[2] - mid
268    pmull2 v4.1q, v7.2d, HASH1.2d               // GHASH[0] - high
269    ROUND CTR2.16b, KEY1.16b
270    mov d30, v7.d[1]                            // GHASH[0] - mid
271    ROUND CTR0.16b, KEY4.16b
272    eor v9.16b, v9.16b, v8.16b                  // GHASH[2] - high
273    pmull2 v31.1q, v31.2d, v16.2d               // GHASH[2] - mid
274    eor x24, x24, KEND1                         // AES[3] - round 10 high
275    ROUND CTR2.16b, KEY2.16b
276    eor v30.8b, v30.8b, v7.8b                   // GHASH[0] - mid
277    ROUND CTR1.16b, KEY5.16b
278    eor x21, x21, KEND0                         // AES[2] - round 10 low
279    ROUND CTR0.16b, KEY5.16b
280    movi v8.8b, #0xc2
281    ROUND CTR2.16b, KEY3.16b
282    eor HASH0.16b, HASH0.16b, v29.16b           // GHASH[0] - low
283    ROUND CTR1.16b, KEY6.16b
284    ROUND CTR0.16b, KEY6.16b
285    eor v10.16b, v10.16b, v31.16b               // GHASH[2] - mid
286    ROUND CTR2.16b, KEY4.16b
287    stp x21, x22, [OUT00], #16                  // AES[2] - store result
288    pmull v30.1q, v30.1d, v16.1d                // GHASH[0] - mid
289    eor v9.16b, v9.16b, v4.16b                  // GHASH[0] - high
290
291    ld1 {OUT0.16b}, [INPUT], #16                // AES[0] - load ciphertext
292
293    ROUND CTR1.16b, KEY7.16b
294    add IV_W, IV_W, #1                            // CTR++
295    ROUND CTR0.16b, KEY7.16b
296    shl d8, d8, #56                             // mod_constant
297    ROUND CTR2.16b, KEY5.16b
298    eor v10.16b, v10.16b, v30.16b               // GHASH[0] - mid
299    ROUND CTR1.16b, KEY8.16b
300    stp x23, x24, [OUT00], #16                  // AES[3] - store result
301    ROUND CTR0.16b, KEY8.16b
302    eor v30.16b, HASH0.16b, v9.16b              // MODULO - karatsuba tidy up
303    ROUND CTR3.16b, KEY3.16b
304    rev w9, IV_W                                 // CTR block 4k+8
305    pmull v31.1q, v9.1d, v8.1d                  // MODULO - top 64b align with mid
306    ld1 {OUT1.16b}, [INPUT], #16                // AES[1] - load
307    ext v9.16b, v9.16b, v9.16b, #8              // MODULO - other top alignment
308    aese CTR0.16b, KEY9.16b                     // AES[0] - round 9
309    orr x9, x11, x9, lsl #32                    // CTR block 4k+8
310    ROUND CTR3.16b, KEY4.16b
311    eor v10.16b, v10.16b, v30.16b               // MODULO - karatsuba tidy up
312    aese CTR1.16b, KEY9.16b                     // AES[1] - round 9
313    ROUND CTR2.16b, KEY6.16b
314
315    eor CTR0.16b, OUT0.16b, CTR0.16b            // AES[0] - result
316    ROUND CTR3.16b, KEY5.16b
317    ld1 {OUT2.16b}, [INPUT], #16                // AES[2] - load
318    add IV_W, IV_W, #1                            // CTR++
319    eor v10.16b, v10.16b, v31.16b               // MODULO - fold into mid
320    eor CTR1.16b, OUT1.16b, CTR1.16b            // AES[1] - result
321    ROUND CTR2.16b, KEY7.16b
322    ld1 {OUT3.16b}, [INPUT], #16
323    ROUND CTR3.16b, KEY6.16b
324
325    rev64 OUT1.16b, OUT1.16b                    // GHASH block[1]
326    eor v10.16b, v10.16b, v9.16b                // MODULO - fold into mid
327    mov x7, CTR0.d[1]                           // AES[0] - mov high
328    ROUND CTR2.16b, KEY8.16b
329    mov x6, CTR0.d[0]                           // AES[0] - mov low
330
331    ROUND CTR3.16b, KEY7.16b
332    fmov d0, x10                                // CTR[0]
333#ifdef HITLS_BIG_ENDIAN
334    rev x7, x7
335    rev x6, x6
336#endif
337    pmull v8.1q, v10.1d, v8.1d                  // MODULO - mid 64b align with low
338    fmov CTR0.d[1], x9                            // CTR[0] - OK
339    rev w9, IV_W                                 // CTR block 4k+9
340    aese CTR2.16b, KEY9.16b
341    orr x9, x11, x9, lsl #32                    // CTR block 4k+9
342
343    ext v10.16b, v10.16b, v10.16b, #8           // MODULO - other mid alignment
344
345    ROUND CTR3.16b, KEY8.16b
346
347    eor x7, x7, KEND1                           // AES[0] - round 10 high
348    eor HASH0.16b, HASH0.16b, v8.16b            // MODULO - fold into low
349    mov x20, CTR1.d[1]                          // AES[1] - mov high
350    eor x6, x6, KEND0                           // AES[0] - round 10 low
351    eor CTR2.16b, OUT2.16b, CTR2.16b            // AES[2] - result
352    mov x19, CTR1.d[0]                          // AES[1] - mov low
353    add IV_W, IV_W, #1                            // CTR++
354    aese CTR3.16b, KEY9.16b
355    fmov d1, x10                                // CTR[1]
356#ifdef HITLS_BIG_ENDIAN
357    rev x20, x20
358    rev x19, x19
359#endif
360    subs COUNT, COUNT, #1                       // COUNT--
361    rev64 OUT0.16b, OUT0.16b                    // GHASH block[0]
362    eor HASH0.16b, HASH0.16b, v10.16b           // MODULO - fold into low
363
364    fmov v1.d[1], x9                            // CTR[1] - OK
365    rev w9, IV_W                                 // CTR block 4k+10
366    add IV_W, IV_W, #1                            // CTR block 4k+10
367
368    eor x20, x20, KEND1                         // AES[1] - round 10 high
369    stp x6, x7, [OUT00], #16                    // AES[0] - store result
370    eor x19, x19, KEND0                         // AES[1] - round 10 low
371    stp x19, x20, [OUT00], #16                  // AES[1] - store result
372    orr x9, x11, x9, lsl #32                    // CTR block 4k+10
373.endm
374
375#endif
376