• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1/*
2 * This file is part of the openHiTLS project.
3 *
4 * openHiTLS is licensed under the Mulan PSL v2.
5 * You can use this software according to the terms and conditions of the Mulan PSL v2.
6 * You may obtain a copy of Mulan PSL v2 at:
7 *
8 *     http://license.coscl.org.cn/MulanPSL2
9 *
10 * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11 * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12 * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13 * See the Mulan PSL v2 for more details.
14 */
15
16#include "hitls_build.h"
17#if defined(HITLS_CRYPTO_AES) && defined(HITLS_CRYPTO_GCM)
18
19.macro GCM_ENC192_LOOP
20    ROUND CTR2.16b, KEY0.16b
21    rev64 OUT1.16b, OUT1.16b                // GHASH block 4k+1 (t0 and t1 free)
22    ROUND CTR1.16b, KEY0.16b
23    ldp x6, x7, [INPUT], #16                // AES[0] - load plaintext
24    ext HASH0.16b, HASH0.16b, HASH0.16b, #8 // PRE 0
25
26    fmov d3, x10                            // CTR[3]
27    rev64 OUT0.16b, OUT0.16b                // GHASH block 4k (only t0 is free)
28    ROUND CTR2.16b, KEY1.16b
29    fmov CTR3.d[1], x9                      // CTR[3]--OK
30#ifdef HITLS_BIG_ENDIAN
31    rev x6, x6
32    rev x7, x7
33#endif
34    pmull2 v30.1q, v5.2d, HASH3.2d          // GHASH block 4k+1 - high
35    rev64 OUT3.16b, OUT3.16b                // GHASH[0] (t0, t1, t2 and t3 free)
36    ldp x19, x20, [INPUT], #16              // AES[1] - load plaintext
37    ROUND CTR0.16b, KEY0.16b
38    ldp x21, x22, [INPUT], #16              // AES[2] - load plaintext
39    pmull v31.1q, v5.1d, HASH3.1d           // GHASH block 4k+1 - low
40    eor v4.16b, v4.16b, HASH0.16b           // PRE 1
41#ifdef HITLS_BIG_ENDIAN
42    rev x19, x19
43    rev x20, x20
44    rev x21, x21
45    rev x22, x22
46#endif
47    ROUND CTR1.16b, KEY1.16b
48    ROUND CTR0.16b, KEY1.16b
49    rev64 OUT2.16b, OUT2.16b                // GHASH[2] (t0, t1, and t2 free)
50    ROUND CTR3.16b, KEY0.16b
51    eor x7, x7, KEND1                       // AES[0] - round 12 high
52    pmull HASH0.1q, v4.1d, HASH4.1d         // GHASH block 4k - low
53    mov d8, v4.d[1]                         // GHASH block 4k - mid
54    ROUND CTR0.16b, KEY2.16b
55    ROUND CTR3.16b, KEY1.16b
56
57    eor x6, x6, KEND0                       // AES[0] - round 12 low
58    eor v8.8b, v8.8b, v4.8b                 // GHASH block 4k - mid
59    eor HASH0.16b, HASH0.16b, v31.16b       // GHASH block 4k+1 - low
60    ROUND CTR0.16b, KEY3.16b
61    eor x19, x19, KEND0                     // AES[1] - round 12 low
62    ROUND CTR1.16b, KEY2.16b
63    mov d31, v6.d[1]                        // GHASH[2] - mid
64    pmull2 v9.1q, v4.2d, HASH4.2d           // GHASH block 4k - high
65    mov d4, v5.d[1]                         // GHASH block 4k+1 - mid
66
67    ROUND CTR2.16b, KEY2.16b
68    ROUND CTR1.16b, KEY3.16b
69    mov d10, v17.d[1]                       // GHASH block 4k - mid
70    eor v9.16b, v9.16b, v30.16b             // GHASH block 4k+1 - high
71    ROUND CTR3.16b, KEY2.16b
72    eor v31.8b, v31.8b, v6.8b               // GHASH[2] - mid
73    pmull2 v30.1q, v6.2d, HASH2.2d          // GHASH[2] - high
74    ROUND CTR0.16b, KEY4.16b
75    eor v4.8b, v4.8b, v5.8b                 // GHASH block 4k+1 - mid
76    ROUND CTR3.16b, KEY3.16b
77    pmull2 v5.1q, v7.2d, HASH1.2d           // GHASH[0] - high
78    eor x20, x20, KEND1                     // AES[1] - round 12 high
79
80    ins v31.d[1], v31.d[0]                  // GHASH[2] - mid
81    ROUND CTR0.16b, KEY5.16b
82    add IV_W, IV_W, #1                        // CTR++
83    ROUND CTR3.16b, KEY4.16b
84    eor v9.16b, v9.16b, v30.16b             // GHASH[2] - high
85    pmull v4.1q, v4.1d, v17.1d              // GHASH block 4k+1 - mid
86    eor x22, x22, KEND1                     // AES[2] - round 12 high
87
88    pmull2 v31.1q, v31.2d, v16.2d           // GHASH[2] - mid
89    eor x21, x21, KEND0                     // AES[2] - round 12 low
90    mov d30, v7.d[1]                        // GHASH[0] - mid
91    pmull v10.1q, v8.1d, v10.1d             // GHASH block 4k - mid
92    rev w9, IV_W                             // CTR[0]
93    pmull v8.1q, v6.1d, HASH2.1d            // GHASH[2] - low
94    orr x9, x11, x9, lsl #32                // CTR[0]
95    ROUND CTR2.16b, KEY3.16b
96    eor v30.8b, v30.8b, v7.8b               // GHASH[0] - mid
97    ROUND CTR1.16b, KEY4.16b
98
99    ldp x23, x24, [INPUT], #16              // AES[3] - load plaintext
100    ROUND CTR0.16b, KEY6.16b
101    eor HASH0.16b, HASH0.16b, v8.16b        // GHASH[2] - low
102    ROUND CTR2.16b, KEY4.16b
103#ifdef HITLS_BIG_ENDIAN
104    rev x23, x23
105    rev x24, x24
106#endif
107    ROUND CTR1.16b, KEY5.16b
108    movi v8.8b, #0xc2
109    pmull v6.1q, v7.1d, HASH1.1d            // GHASH[0] - low
110    eor x24, x24, KEND1                     // AES[3] - round 12 high
111    eor v10.16b, v10.16b, v4.16b            // GHASH block 4k+1 - mid
112    ROUND CTR2.16b, KEY5.16b
113    eor x23, x23, KEND0                     // AES[3] - round 12 low
114
115    ROUND CTR1.16b, KEY6.16b
116    shl d8, d8, #56                         // mod_constant
117    ROUND CTR3.16b, KEY5.16b
118    eor v9.16b, v9.16b, v5.16b              // GHASH[0] - high
119    ROUND CTR0.16b, KEY7.16b
120    fmov d5, x19                            // AES[1] - mov low
121    ROUND CTR1.16b, KEY7.16b
122    eor v10.16b, v10.16b, v31.16b           // GHASH[2] - mid
123    ROUND CTR3.16b, KEY6.16b
124    fmov OUT1.d[1], x20                     // AES[1] - mov high
125
126    ROUND CTR0.16b, KEY8.16b
127    eor HASH0.16b, HASH0.16b, v6.16b        // GHASH[0] - low
128    pmull v30.1q, v30.1d, v16.1d            // GHASH[0] - mid
129
130    subs COUNT, COUNT, #1                          // count--
131    fmov d4, x6                             // AES[0] - mov low
132    ROUND CTR2.16b, KEY6.16b
133    fmov OUT0.d[1], x7                      // AES[0] - mov high
134
135    ROUND CTR1.16b, KEY8.16b
136    fmov d7, x23                            // AES[0] - mov low
137    eor v10.16b, v10.16b, v30.16b           // GHASH[0] - mid
138    eor v30.16b, HASH0.16b, v9.16b          // MODULO - karatsuba tidy up
139    add IV_W, IV_W, #1                        // CTR++
140    ROUND CTR2.16b, KEY7.16b
141    fmov OUT3.d[1], x24                     // AES[3] - mov high
142
143    pmull v31.1q, v9.1d, v8.1d              // MODULO - top 64b align with mid
144    ext v9.16b, v9.16b, v9.16b, #8          // MODULO - other top alignment
145    fmov d6, x21                            // AES[3] - mov low
146    ROUND CTR3.16b, KEY7.16b
147    ROUND CTR0.16b, KEY9.16b
148    eor v10.16b, v10.16b, v30.16b           // MODULO - karatsuba tidy up
149    ROUND CTR2.16b, KEY8.16b
150    ROUND CTR3.16b, KEY8.16b
151    ROUND CTR1.16b, KEY9.16b
152    ROUND CTR0.16b, KEY10.16b
153    eor v10.16b, v10.16b, v31.16b           // MODULO - fold into mid
154    ROUND CTR3.16b, KEY9.16b
155    ROUND CTR2.16b, KEY9.16b
156    aese CTR0.16b, KEY11.16b                // AES[1] - round 11
157
158    ROUND CTR1.16b, KEY10.16b
159    eor v10.16b, v10.16b, v9.16b            // MODULO - fold into mid
160    ROUND CTR2.16b, KEY10.16b
161
162    eor OUT0.16b, OUT0.16b, CTR0.16b        // AES[0] - result
163    fmov d0, x10                            // CTR[0]
164    aese CTR1.16b, KEY11.16b                // AES[2] - round 11
165    fmov CTR0.d[1], x9                      // CTR[0]--OK
166
167    rev w9, IV_W                             // CTR[1]
168    pmull v9.1q, v10.1d, v8.1d              // MODULO - mid 64b align with low
169    fmov OUT2.d[1], x22                     // AES[2] - mov high
170    st1 {OUT0.16b}, [OUT00], #16            // AES[0] - store result
171
172    ROUND CTR3.16b, KEY10.16b
173    orr x9, x11, x9, lsl #32                // CTR[1]
174    eor OUT1.16b, OUT1.16b, CTR1.16b        // AES[1] - result
175    add IV_W, IV_W, #1                        // CTR++
176    fmov d1, x10                            // CTR[1]
177    aese CTR2.16b, KEY11.16b
178
179    fmov v1.d[1], x9                        // CTR[1]--OK
180    rev w9, IV_W                             // CTR[2]
181    add IV_W, IV_W, #1                        // CTR++
182    ext v10.16b, v10.16b, v10.16b, #8       // MODULO - other mid alignment
183    orr x9, x11, x9, lsl #32                // CTR[2]
184    st1 {OUT1.16b}, [OUT00], #16            // AES[1] - store result
185
186    eor HASH0.16b, HASH0.16b, v9.16b        // MODULO - fold into low
187    aese CTR3.16b, KEY11.16b                // AES[2] - round 11
188    eor OUT2.16b, OUT2.16b, CTR2.16b        // AES[2] - result
189    fmov d2, x10                            // CTR[2]
190    st1 {OUT2.16b}, [OUT00], #16            // AES[2] - store result
191
192    fmov CTR2.d[1], x9                      // CTR[2]--OK
193    rev w9, IV_W                             // CTR[3]
194    eor OUT3.16b, OUT3.16b, CTR3.16b        // AES[3] - result
195    eor HASH0.16b, HASH0.16b, v10.16b       // MODULO - fold into low
196    orr x9, x11, x9, lsl #32                // CTR[3]
197    st1 {OUT3.16b}, [OUT00], #16            // AES[3] - store result
198.endm
199
200.macro GCM_DEC192_LOOP
201    ROUND CTR1.16b, KEY0.16b
202    ext HASH0.16b, HASH0.16b, HASH0.16b, #8     // PRE 0
203    pmull v31.1q, OUT1.1d, HASH3.1d             // GHASH block 4k+1 - low
204    mov x21, CTR2.d[0]                          // AES[2] block - mov low
205    mov x22, CTR2.d[1]                          // AES[2] block - mov high
206    eor CTR3.16b, OUT3.16b, CTR3.16b            // AES[3] block - result
207
208    rev64 v7.16b, v7.16b                        // GHASH[0]
209    ROUND CTR1.16b, KEY1.16b
210    fmov d2, x10                                // CTR[2] block
211    ROUND CTR0.16b, KEY0.16b
212#ifdef HITLS_BIG_ENDIAN
213    rev x21, x21
214    rev x22, x22
215#endif
216    eor v4.16b, v4.16b, HASH0.16b               // PRE 1
217    pmull2 v30.1q, v5.2d, HASH3.2d              // GHASH block 4k+1 - high
218    fmov CTR2.d[1], x9                          // CTR[2]--OK
219
220    ROUND CTR1.16b, KEY2.16b
221    mov x24, CTR3.d[1]                          // AES[3] block - mov high
222    ROUND CTR0.16b, KEY1.16b
223    mov x23, CTR3.d[0]                          // AES[3] block  - mov low
224
225    pmull2 v9.1q, v4.2d, HASH4.2d               // GHASH block 4k - high
226    fmov d3, x10                                // CTR[3]
227    mov d8, v4.d[1]                             // GHASH block 4k - mid
228    pmull HASH0.1q, v4.1d, HASH4.1d             // GHASH block 4k - low
229#ifdef HITLS_BIG_ENDIAN
230    rev x23, x23
231    rev x24, x24
232#endif
233    mov d10, v17.d[1]                           // GHASH block 4k - mid
234    rev w9, IV_W                                 // CTR[3]
235    ROUND CTR2.16b, KEY0.16b
236    orr x9, x11, x9, lsl #32                    // CTR[3]
237    fmov CTR3.d[1], x9                          // CTR[3]--OK
238
239    eor v8.8b, v8.8b, v4.8b                     // GHASH block 4k - mid
240    mov d4, v5.d[1]                             // GHASH block 4k+1 - mid
241    ROUND CTR1.16b, KEY3.16b
242    ROUND CTR0.16b, KEY2.16b
243    eor x22, x22, KEND1                         // AES[2] block - round 12 high
244
245    ROUND CTR2.16b, KEY1.16b
246    eor v4.8b, v4.8b, v5.8b                     // GHASH block 4k+1 - mid
247    pmull v10.1q, v8.1d, v10.1d                 // GHASH block 4k - mid
248    ROUND CTR3.16b, KEY0.16b
249    rev64 v6.16b, v6.16b                        // GHASH[2]
250    ROUND CTR2.16b, KEY2.16b
251    pmull v4.1q, v4.1d, v17.1d                  // GHASH block 4k+1 - mid
252    eor HASH0.16b, HASH0.16b, v31.16b           // GHASH block 4k+1 - low
253    eor x21, x21, KEND0                         // AES[2] block  - round 12 low
254
255    ROUND CTR1.16b, KEY4.16b
256    ROUND CTR0.16b, KEY3.16b
257    eor v10.16b, v10.16b, v4.16b                // GHASH block 4k+1 - mid
258    mov d31, v6.d[1]                            // GHASH[2] - mid
259    ROUND CTR3.16b, KEY1.16b
260    eor v9.16b, v9.16b, v30.16b                 // GHASH block 4k+1 - high
261    ROUND CTR0.16b, KEY4.16b
262    pmull2 v30.1q, v6.2d, HASH2.2d              // GHASH[2] - high
263    eor v31.8b, v31.8b, v6.8b                   // GHASH[2] - mid
264    pmull v8.1q, v6.1d, HASH2.1d                // GHASH[2] - low
265    ROUND CTR0.16b, KEY5.16b
266    eor v9.16b, v9.16b, v30.16b                 // GHASH[2] - high
267    mov d30, v7.d[1]                            // GHASH[0] - mid
268    ROUND CTR1.16b, KEY5.16b
269    pmull2 v5.1q, v7.2d, HASH1.2d               // GHASH[0] - high
270    ROUND CTR3.16b, KEY2.16b
271    eor v30.8b, v30.8b, v7.8b                   // GHASH[0] - mid
272    ROUND CTR1.16b, KEY6.16b
273    ROUND CTR0.16b, KEY6.16b
274    ins v31.d[1], v31.d[0]                      // GHASH[2] - mid
275    ROUND CTR3.16b, KEY3.16b
276    pmull v30.1q, v30.1d, v16.1d                // GHASH[0] - mid
277    eor HASH0.16b, HASH0.16b, v8.16b            // GHASH[2] - low
278    ROUND CTR0.16b, KEY7.16b
279    pmull2 v31.1q, v31.2d, v16.2d               // GHASH[2] - mid
280    eor v9.16b, v9.16b, v5.16b                  // GHASH[0] - high
281    ROUND CTR1.16b, KEY7.16b
282    ROUND CTR0.16b, KEY8.16b
283    movi v8.8b, #0xc2
284    pmull v6.1q, v7.1d, HASH1.1d                // GHASH[0] - low
285    ROUND CTR1.16b, KEY8.16b
286    eor v10.16b, v10.16b, v31.16b               // GHASH[2] - mid
287    ROUND CTR2.16b, KEY3.16b
288    ROUND CTR0.16b, KEY9.16b
289    eor HASH0.16b, HASH0.16b, v6.16b            // GHASH[0] - low
290    ROUND CTR3.16b, KEY4.16b
291    ROUND CTR2.16b, KEY4.16b
292    eor v10.16b, v10.16b, v30.16b               // GHASH[0] - mid
293    ROUND CTR0.16b, KEY10.16b
294    ROUND CTR1.16b, KEY9.16b
295    eor v30.16b, HASH0.16b, v9.16b              // MODULO - karatsuba tidy up
296    ROUND CTR2.16b, KEY5.16b
297    ROUND CTR3.16b, KEY5.16b
298    shl d8, d8, #56                             // mod_constant
299    ROUND CTR1.16b, KEY10.16b
300    ROUND CTR2.16b, KEY6.16b
301    ld1 {OUT0.16b}, [INPUT], #16                // AES load[0] ciphertext
302    ROUND CTR3.16b, KEY6.16b
303    eor v10.16b, v10.16b, v30.16b               // MODULO - karatsuba tidy up
304    pmull v31.1q, v9.1d, v8.1d                  // MODULO - top 64b align with mid
305    ld1 {OUT1.16b}, [INPUT], #16                // AES load[1] ciphertext
306    eor x23, x23, KEND0                         // AES[3] block - round 12 low
307    ROUND CTR2.16b, KEY7.16b
308    ext v9.16b, v9.16b, v9.16b, #8              // MODULO - other top alignment
309    aese CTR0.16b, KEY11.16b
310    add IV_W, IV_W, #1                            // CTR++
311    ROUND CTR3.16b, KEY7.16b
312    eor v10.16b, v10.16b, v31.16b               // MODULO - fold into mid
313    ld1 {OUT2.16b}, [INPUT], #16                // AES load[2] ciphertext
314    ROUND CTR2.16b, KEY8.16b
315    aese CTR1.16b, KEY11.16b
316    ld1 {OUT3.16b}, [INPUT], #16                // AES load[3] ciphertext
317    rev w9, IV_W                                 // CTR block 4k+8
318    ROUND CTR3.16b, KEY8.16b
319
320    stp x21, x22, [OUT00], #16                  // AES[2] block - store result
321    ROUND CTR2.16b, KEY9.16b
322    eor v10.16b, v10.16b, v9.16b                // MODULO - fold into mid
323
324    subs COUNT, COUNT, #1                       // COUNT--
325    eor CTR0.16b, OUT0.16b, CTR0.16b            // AES[0] block - result
326    eor x24, x24, KEND1                         // AES[3] block - round 12 high
327    eor CTR1.16b, OUT1.16b, CTR1.16b            // AES[1] block - result
328    ROUND CTR2.16b, KEY10.16b
329    orr x9, x11, x9, lsl #32                    // CTR block 4k+8
330    ROUND CTR3.16b, KEY9.16b
331    pmull v8.1q, v10.1d, v8.1d                  // MODULO - mid 64b align with low
332    mov x19, CTR1.d[0]                          // AES[1] block - mov low
333    mov x6, CTR0.d[0]                           // AES[0] block - mov low
334
335    stp x23, x24, [OUT00], #16                  // AES[3] - store result
336    rev64 v5.16b, v5.16b                        // GHASH[2]
337    aese CTR2.16b, KEY11.16b
338    mov x7, CTR0.d[1]                           // AES[0] block - mov high
339    ROUND CTR3.16b, KEY10.16b
340    mov x20, CTR1.d[1]                          // AES[1] block - mov high
341#ifdef HITLS_BIG_ENDIAN
342    rev x6, x6
343    rev x7, x7
344    rev x19, x19
345    rev x20, x20
346#endif
347    fmov d0, x10                                // CTR[0]
348    add IV_W, IV_W, #1                            // CTR++
349    ext v10.16b, v10.16b, v10.16b, #8           // MODULO - other mid alignment
350    eor CTR2.16b, OUT2.16b, CTR2.16b            // AES[2] block - result
351    fmov CTR0.d[1], x9                            // CTR[0]--OK
352    rev w9, IV_W                                 // CTR block 4k+9
353    eor x6, x6, KEND0                           // AES[0] block - round 12 low
354    orr x9, x11, x9, lsl #32                    // CTR block 4k+9
355    eor HASH0.16b, HASH0.16b, v8.16b            // MODULO - fold into low
356    fmov d1, x10                                // CTR[1]
357    add IV_W, IV_W, #1                            // CTR++
358    eor x19, x19, KEND0                         // AES[1] block - round 12 low
359    fmov CTR1.d[1], x9                          // CTR[1]--OK
360    rev w9, IV_W                                 // CTR block 4k+10
361    eor x20, x20, KEND1                         // AES[2] - round 12 high
362    eor x7, x7, KEND1                           // AES[0] - round 12 high
363
364    stp x6, x7, [OUT00], #16                    // AES[0] block - store result
365    eor HASH0.16b, HASH0.16b, v10.16b           // MODULO - fold into low
366    add IV_W, IV_W, #1                            // CTR++
367    rev64 v4.16b, v4.16b                        // GHASH[1]
368    orr x9, x11, x9, lsl #32                    // CTR block 4k+10
369    aese CTR3.16b, KEY11.16b                    // AES[3] round 11
370    stp x19, x20, [OUT00], #16                  // AES[1] block - store result
371.endm
372
373#endif
374