• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1/*
2 * This file is part of the openHiTLS project.
3 *
4 * openHiTLS is licensed under the Mulan PSL v2.
5 * You can use this software according to the terms and conditions of the Mulan PSL v2.
6 * You may obtain a copy of Mulan PSL v2 at:
7 *
8 *     http://license.coscl.org.cn/MulanPSL2
9 *
10 * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11 * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12 * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13 * See the Mulan PSL v2 for more details.
14 */
15
16#include "hitls_build.h"
17#if defined(HITLS_CRYPTO_AES) && defined(HITLS_CRYPTO_GCM)
18
19.text
20INPUT  .req x1
21OUT00  .req x2
22INLEN  .req x3
23KEY00  .req x4
24IVCTR  .req w4
25HTABLE .req x5
26IVEC0  .req x0
27ROUNDS .req w8
28COUNT  .req x15
29COUNTW .req w15
30IV_H   .req x10     // high 64 bits
31IV_L   .req x11     // lower 64 bits
32IV_C   .req x12
33IV_W   .req w12
34IV_CW  .req w9
35IV_CX  .req x9
36CTR0  .req v0
37CTR1  .req v1
38CTR2  .req v2
39CTR3  .req v3
40OUT0  .req v4
41OUT1  .req v5
42OUT2  .req v6
43OUT3  .req v7
44KEY0  .req v18
45KEY1  .req v19
46KEY2  .req v20
47KEY3  .req v21
48KEY4  .req v22
49KEY5  .req v23
50KEY6  .req v24
51KEY7  .req v25
52KEY8  .req v26
53KEY9  .req v27
54KEY10 .req v28
55KEY11 .req v29
56KEY12 .req v30
57KEY13 .req v31
58KEND0 .req x13
59KEND1 .req x14
60HASH0 .req v11
61HASH1 .req v12
62HASH2 .req v13
63HASH3 .req v14
64HASH4 .req v15
65MULL_C2 .req v13
66HASH1_2 .req v12
67
68.macro IN_STP
69    stp x19, x20, [sp, #-112]!
70    stp x21, x22, [sp, #16]
71    stp x23, x24, [sp, #32]
72    stp d8, d9, [sp, #48]
73    stp d10, d11, [sp, #64]
74    stp d12, d13, [sp, #80]
75    stp d14, d15, [sp, #96]
76.endm
77
78.macro OUT_STP
79    ldp x21, x22, [sp, #16]
80    ldp x23, x24, [sp, #32]
81    ldp d8, d9, [sp, #48]
82    ldp d10, d11, [sp, #64]
83    ldp d12, d13, [sp, #80]
84    ldp d14, d15, [sp, #96]
85    ldp x19, x20, [sp], #112
86.endm
87
88.macro REV_2S REG0, REG1
89    rev \REG0, \REG0
90    rev \REG1, \REG1
91.endm
92
93.macro LOAD_KEY
94    ld1 {KEY0.4s, KEY1.4s}, [KEY00], #32                          // load key-0-1
95    ld1 {KEY2.4s, KEY3.4s}, [KEY00], #32                          // load key-2-3
96    ld1 {KEY4.4s, KEY5.4s}, [KEY00], #32                          // load key-4-5
97    ld1 {KEY6.4s, KEY7.4s}, [KEY00], #32                          // load key-6-7
98    ld1 {KEY8.4s, KEY9.4s}, [KEY00], #32                          // load key-8-9
99.endm
100
101.macro LOAD_GHASH_TABLE
102    ld1 {HASH0.16b}, [HTABLE], #16                                // load ghash
103    ld1 {HASH1.2d}, [HTABLE], #16                                 // load h^1
104    add HTABLE, HTABLE, #16
105    ld1 {HASH2.2d}, [HTABLE], #16                                 // load h^2
106    ld1 {HASH3.2d}, [HTABLE], #16                                 // load h^3
107    add HTABLE, HTABLE, #16
108    ld1 {HASH4.2d}, [HTABLE]                                      // load h^4
109.endm
110
111.macro ROUND4 BLOCK0, BLOCK1, BLOCK2, BLOCK3, KEY
112    aese \BLOCK0, \KEY
113    aesmc \BLOCK0, \BLOCK0
114    aese \BLOCK1, \KEY
115    aesmc \BLOCK1, \BLOCK1
116    aese \BLOCK2, \KEY
117    aesmc \BLOCK2, \BLOCK2
118    aese \BLOCK3, \KEY
119    aesmc \BLOCK3, \BLOCK3
120.endm
121
122.macro ROUND4_END BLOCK0, BLOCK1, BLOCK2, BLOCK3, KEY
123    aese \BLOCK0, \KEY
124    aese \BLOCK1, \KEY
125    aese \BLOCK2, \KEY
126    aese \BLOCK3, \KEY
127.endm
128
129.macro ROUND BLOCK, KEY
130    aese \BLOCK, \KEY
131    aesmc \BLOCK, \BLOCK
132.endm
133
134.macro LOAD_CTR DI, VI
135    rev IV_CW, IV_W
136    fmov \DI, IV_H                          // set h64
137    orr IV_CX, IV_L, IV_CX, lsl #32
138    add IV_W, IV_W, #1                      // CTR++
139    fmov \VI, IV_CX                         // set l64
140.endm
141
142.macro BEFORE_ROUND
143    ext HASH0.16b, HASH0.16b, HASH0.16b, #8                         // xi
144    ext HASH1.16b, HASH1.16b, HASH1.16b, #8                         // h^1
145    rev IV_W, IV_W                                               // rev_ctr32
146    ext HASH2.16b, HASH2.16b, HASH2.16b, #8                         // h^2
147    ext HASH3.16b, HASH3.16b, HASH3.16b, #8                         // h^3
148    add IVCTR, IV_W, IVCTR
149    ext HASH4.16b, HASH4.16b, HASH4.16b, #8                         // h^4
150    add IV_W, IV_W, #1                                              // ctr++
151    rev64 HASH0.16b, HASH0.16b                                      //
152    orr w11, w11, w11                                               //
153    trn2 v17.2d, HASH3.2d, HASH4.2d                                 // h4l | h3l
154    LOAD_CTR d1, CTR1.d[1]                                          // CTR bolck 1
155    trn1 v9.2d, HASH3.2d, HASH4.2d                                  // h4h | h3h
156    LOAD_CTR d2, CTR2.d[1]                                          // CTR bolck 2
157    trn2 v16.2d, HASH1.2d, HASH2.2d                                 // h2l | h1l
158    LOAD_CTR d3, CTR3.d[1]                                          // CTR bolck 3
159    trn1 v8.2d, HASH1.2d, HASH2.2d                                  // h2h | h1h
160.endm
161
162.macro FIRST_ROUND
163    ROUND4 CTR2.16b, CTR1.16b, CTR3.16b, CTR0.16b, KEY0.16b         // round 0
164    ldp x6, x7, [INPUT, #0]                                         // load INPUT 0
165#ifdef HITLS_BIG_ENDIAN
166    REV_2S x6, x7
167#endif
168    ROUND4 CTR2.16b, CTR1.16b, CTR3.16b, CTR0.16b, KEY1.16b         // round 1
169    ldp x19, x20, [INPUT, #16]                                      // AES[1] - load plaintext
170#ifdef HITLS_BIG_ENDIAN
171    REV_2S x19, x20
172#endif
173    eor x6, x6, KEND0                                               // round 10 low
174    eor x7, x7, KEND1                                               // round 10 high
175    ROUND4 CTR2.16b, CTR1.16b, CTR3.16b, CTR0.16b, KEY2.16b         // round 2
176    ldp x21, x22, [INPUT, #32]                                      // AES[2] - load plaintext
177#ifdef HITLS_BIG_ENDIAN
178    REV_2S x21, x22
179#endif
180    eor x19, x19, KEND0                                             // AES[1] - round 10 low
181    eor x20, x20, KEND1                                             // AES[1] - round 10 high
182    ROUND4 CTR2.16b, CTR1.16b, CTR3.16b, CTR0.16b, KEY3.16b         // round 3
183    ldp x23, x24, [INPUT, #48]                                      // AES[3] - load plaintext
184#ifdef HITLS_BIG_ENDIAN
185    REV_2S x23, x24
186#endif
187    eor x21, x21, KEND0                                             // AES[2] - round 10 low
188    eor x22, x22, KEND1                                             // AES[2] - round 10 high
189    ROUND4 CTR2.16b, CTR1.16b, CTR3.16b, CTR0.16b, KEY4.16b         // round 4
190    eor x23, x23, KEND0                                             // AES[3] - round 10 low
191    eor x24, x24, KEND1                                             // AES[3] - round 10 high
192    ROUND4 CTR2.16b, CTR1.16b, CTR3.16b, CTR0.16b, KEY5.16b         // round 5
193    fmov d4, x6                                                     // INPUT 0 - mov low
194    fmov d5, x19                                                    // AES[1] - mov low
195    fmov d6, x21                                                    // AES[2] - mov low
196    fmov d7, x23                                                    // AES[3] - mov low
197    ROUND4 CTR2.16b, CTR1.16b, CTR3.16b, CTR0.16b, KEY6.16b         // round 6
198    fmov OUT0.d[1], x7                                              // AES[0] - mov high
199    fmov OUT1.d[1], x20                                             // AES[1] - mov high
200    fmov OUT2.d[1], x22                                             // AES[2] - mov high
201    fmov OUT3.d[1], x24                                             // AES[3] - mov high
202.endm
203
204.macro STORE_RESULT
205    add INPUT, INPUT, #64                                           // AES input_ptr update
206    eor OUT0.16b, OUT0.16b, CTR0.16b                                // AES[0] - result
207    eor OUT1.16b, OUT1.16b, CTR1.16b                                // AES[1] - result
208    eor OUT2.16b, OUT2.16b, CTR2.16b                                // AES[2] - result
209    fmov d0, x10                                                    // CTR[0]
210    eor OUT3.16b, OUT3.16b, CTR3.16b                                // AES[3] - result
211    subs COUNT, COUNT, #1                                           // count--
212    fmov CTR0.d[1], x9                                              // CTR[0]--OK
213    rev w9, IV_W                                                    // CTR[1]--Start
214    st1 {OUT0.16b}, [OUT00], #16                                    // AES[0] - store result
215    orr x9, x11, x9, lsl #32                                        // CTR[1]
216    st1 {OUT1.16b}, [OUT00], #16                                    // AES[1] - store result
217    add IV_W, IV_W, #1                                              // CTR++
218    fmov d1, x10                                                    // CTR[1]
219    st1 {OUT2.16b}, [OUT00], #16                                    // AES[2] - store result
220    fmov v1.d[1], x9                                                // CTR[1]--OK
221    rev w9, IV_W                                                    // CTR[2]--Start
222    st1 {OUT3.16b}, [OUT00], #16                                    // AES[3] - store result
223    orr x9, x11, x9, lsl #32                                        // CTR[2]
224    add IV_W, IV_W, #1                                              // CTR++
225    fmov d2, x10                                                    // CTR2-0
226    fmov v2.d[1], x9                                                // CTR[2]--OK
227    rev w9, IV_W                                                    // CTR[3]--Start
228    orr x9, x11, x9, lsl #32                                        // CTR[3]                                                 // <= 0
229.endm
230
231.macro STORE_DEC_RESULT
232    ld1 {OUT0.16b}, [INPUT], #16
233    ld1 {OUT1.16b}, [INPUT], #16
234    ld1 {OUT2.16b}, [INPUT], #16
235    eor CTR0.16b, CTR0.16b, OUT0.16b
236    ld1 {OUT3.16b}, [INPUT], #16
237    eor CTR1.16b, CTR1.16b, OUT1.16b
238    eor CTR2.16b, CTR2.16b, OUT2.16b
239    mov	x6, CTR0.d[0]
240    mov	x7, CTR0.d[1]
241    mov	x19, CTR1.d[0]
242    mov	x20, CTR1.d[1]
243#ifdef HITLS_BIG_ENDIAN
244    REV_2S x6, x7
245    REV_2S x19, x20
246#endif
247    rev w9, IV_W                         // CTR[0]
248    eor x6, x6, KEND0
249    orr x9, x11, x9, lsl #32            // CTR[0]
250    eor x7, x7, KEND1
251    add IV_W, IV_W, #1                    // CTR++
252    fmov d0, x10                        // CTR[0]
253    eor x19, x19, KEND0
254    fmov CTR0.d[1], x9                    // CTR[0]--OK
255    rev w9, IV_W                         // CTR[1]
256    eor x20, x20, KEND1
257    orr x9, x11, x9, lsl #32            // CTR[1]
258    subs COUNT, COUNT, #1                                           // count--
259    add IV_W, IV_W, #1                    // CTR++
260    fmov d1, x10                        // CTR[1]
261    stp x6, x7, [OUT00], #16
262    fmov v1.d[1], x9                    // CTR[1]--OK
263    stp x19, x20, [OUT00], #16
264    rev w9, IV_W                         // CTR[2]
265    rev64 OUT0.16b, OUT0.16b
266    add IV_W, IV_W, #1                    // CTR++
267    rev64 OUT1.16b, OUT1.16b
268    orr x9, x11, x9, lsl #32            // CTR[2]
269.endm
270
271.macro GHASH_BLOCK
272    ext HASH0.16b, HASH0.16b, HASH0.16b, #8                         // PRE 0
273    mov d30, OUT1.d[1]                                              // GHASH block 4k+1 - mid
274    mov d31, OUT2.d[1]                                              // GHASH[2] - mid
275    eor OUT0.16b, OUT0.16b, HASH0.16b                               // PRE 1 tag ^ out
276    pmull2 v28.1q, OUT1.2d, HASH3.2d                                // GHASH block 4k+1 - high
277    eor v30.8b, v30.8b, OUT1.8b                                     // GHASH block 4k+1 - mid
278    eor v31.8b, v31.8b, OUT2.8b                                     // GHASH[2] - mid
279    mov d8, OUT0.d[1]                                               // GHASH block 4k - mid
280    mov d10, v17.d[1]                                               // GHASH block 4k - mid
281    pmull2 v9.1q, OUT0.2d, HASH4.2d                                 // GHASH block 4k - high
282    pmull HASH0.1q, OUT0.1d, HASH4.1d                               // GHASH block 4k - low
283    eor v8.8b, v8.8b, OUT0.8b                                       // GHASH block 4k - mid
284    eor v9.16b, v9.16b, v28.16b                                     // GHASH block 4k+1 - high
285    pmull v29.1q, OUT1.1d, HASH3.1d                                 // GHASH block 4k+1 - low
286    pmull v28.1q, OUT2.1d, HASH2.1d                                 // GHASH[2] - low
287    pmull v10.1q, v8.1d, v10.1d                                     // GHASH block 4k - mid
288    pmull v30.1q, v30.1d, v17.1d                                    // GHASH block 4k+1 - mid
289    ins v31.d[1], v31.d[0]                                          // GHASH[2] - mid
290    pmull2 v8.1q, OUT2.2d, HASH2.2d                                 // GHASH[2] - high
291    eor v10.16b, v10.16b, v30.16b                                   // GHASH block 4k+1 - mid
292    mov d30, OUT3.d[1]                                              // GHASH[0] - mid
293    eor HASH0.16b, HASH0.16b, v29.16b                               // GHASH block 4k+1 - low
294    eor v30.8b, v30.8b, OUT3.8b                                     // GHASH[0] - mid
295    pmull2 OUT0.1q, OUT3.2d, HASH1.2d                               // GHASH[0] - high
296    eor v9.16b, v9.16b, v8.16b                                      // GHASH[2] - high
297    pmull2 v31.1q, v31.2d, v16.2d                                   // GHASH[2] - mid
298    pmull v29.1q, OUT3.1d, HASH1.1d                                 // GHASH[0] - low
299    movi v8.8b, #0xc2
300    pmull v30.1q, v30.1d, v16.1d                                    // GHASH[0] - mid
301    eor HASH0.16b, HASH0.16b, v28.16b                               // GHASH[2] - low
302    shl d8, d8, #56                                                 // mod_constant
303    eor v9.16b, v9.16b, OUT0.16b                                    // GHASH[0] - high
304    eor v10.16b, v10.16b, v31.16b                                   // GHASH[2] - mid
305    pmull v31.1q, v9.1d, v8.1d                                      // MODULO - top 64b align with mid
306    eor HASH0.16b, HASH0.16b, v29.16b                               // GHASH[0] - low
307    eor v10.16b, v10.16b, v30.16b                                   // GHASH[0] - mid
308    eor v30.16b, HASH0.16b, v9.16b                                  // MODULO - karatsuba tidy up
309    ext v9.16b, v9.16b, v9.16b, #8                                  // MODULO - other top alignment
310    eor v10.16b, v10.16b, v30.16b                                   // MODULO - karatsuba tidy up
311    eor v10.16b, v10.16b, v31.16b                                   // MODULO - fold into mid
312    eor v10.16b, v10.16b, v9.16b                                    // MODULO - fold into mid
313    pmull v9.1q, v10.1d, v8.1d                                      // MODULO - mid 64b align with low
314    ext v10.16b, v10.16b, v10.16b, #8                               // MODULO - other mid alignment
315    eor HASH0.16b, HASH0.16b, v9.16b                                // MODULO - fold into low
316    eor HASH0.16b, HASH0.16b, v10.16b                               // MODULO - fold into low
317.endm
318
319.macro GHASH_DEC_BLOCK
320    ext HASH0.16b, HASH0.16b, HASH0.16b, #8 // PRE 0
321    mov x21, v2.d[0]                        // AES[2] block - mov low
322    mov x22, v2.d[1]                        // AES[2] block - mov high
323    rev64 v6.16b, v6.16b                    // GHASH[2]
324#ifdef HITLS_BIG_ENDIAN
325    REV_2S x21, x22
326#endif
327    eor v4.16b, v4.16b, HASH0.16b           // PRE 1
328    eor CTR3.16b, OUT3.16b, CTR3.16b        // AES[3] block - result
329    eor x21, x21, KEND0                     // AES[2] - round 14 low
330    eor x22, x22, KEND1                     // AES[2] - round 14 high
331    pmull2 v9.1q, v4.2d, HASH4.2d           // GHASH block 4k - high
332    mov d8, v4.d[1]                         // GHASH block 4k - mid
333    mov d10, v17.d[1]                       // GHASH block 4k - mid
334    mov x24, CTR3.d[1]                      // AES[3] block - mov high
335    pmull HASH0.1q, v4.1d, HASH4.1d         // GHASH block 4k - low
336    eor v8.8b, v8.8b, v4.8b                 // GHASH block 4k - mid
337    pmull2 v4.1q, v5.2d, HASH3.2d           // GHASH block 4k+1 - high
338    mov x23, CTR3.d[0]                      // AES[3] block - mov low
339    rev64 v7.16b, v7.16b                    // GHASH[0]
340#ifdef HITLS_BIG_ENDIAN
341    REV_2S x24, x23
342#endif
343    pmull v10.1q, v8.1d, v10.1d             // GHASH block 4k - mid
344    eor x23, x23, KEND0                     // AES[3] block - round 14 low
345    pmull v8.1q, v5.1d, HASH3.1d            // GHASH block 4k+1 - low
346    eor x24, x24, KEND1                     // AES[3] block - round 14 high
347    eor v9.16b, v9.16b, v4.16b              // GHASH block 4k+1 - high
348    mov d4, v5.d[1]                         // GHASH block 4k+1 - mid
349    eor HASH0.16b, HASH0.16b, v8.16b        // GHASH block 4k+1 - low
350    mov d8, v6.d[1]                         // GHASH[2] - mid
351    eor v4.8b, v4.8b, v5.8b                 // GHASH block 4k+1 - mid
352    pmull v5.1q, v6.1d, HASH2.1d            // GHASH[2] - low
353    eor v8.8b, v8.8b, v6.8b                 // GHASH[2] - mid
354    eor HASH0.16b, HASH0.16b, v5.16b        // GHASH[2] - low
355    pmull v4.1q, v4.1d, v17.1d              // GHASH block 4k+1 - mid
356    ins v8.d[1], v8.d[0]                    // GHASH[2] - mid
357    eor v10.16b, v10.16b, v4.16b            // GHASH block 4k+1 - mid
358    pmull2 v4.1q, v6.2d, HASH2.2d           // GHASH[2] - high
359    mov d6, v7.d[1]                         // GHASH[0] - mid
360    pmull2 v8.1q, v8.2d, v16.2d             // GHASH[2] - mid
361    eor v9.16b, v9.16b, v4.16b              // GHASH[2] - high
362    pmull v4.1q, v7.1d, HASH1.1d            // GHASH[0] - low
363    eor v10.16b, v10.16b, v8.16b            // GHASH[2] - mid
364    pmull2 v5.1q, v7.2d, HASH1.2d           // GHASH[0] - high
365    eor v6.8b, v6.8b, v7.8b                 // GHASH[0] - mid
366    eor v9.16b, v9.16b, v5.16b              // GHASH[0] - high
367    pmull v6.1q, v6.1d, v16.1d              // GHASH[0] - mid
368    movi v8.8b, #0xc2
369    eor HASH0.16b, HASH0.16b, v4.16b        // GHASH[0] - low
370    shl d8, d8, #56                         // mod_constant
371    eor v10.16b, v10.16b, v6.16b            // GHASH[0] - mid
372    pmull v7.1q, v9.1d, v8.1d               // MODULO - top 64b align with mid
373    eor v6.16b, HASH0.16b, v9.16b           // MODULO - karatsuba tidy up
374    ext v9.16b, v9.16b, v9.16b, #8          // MODULO - other top alignment
375    eor v10.16b, v10.16b, v6.16b            // MODULO - karatsuba tidy up
376    eor v10.16b, v10.16b, v7.16b            // MODULO - fold into mid
377    eor v10.16b, v10.16b, v9.16b            // MODULO - fold into mid
378    pmull v8.1q, v10.1d, v8.1d              // MODULO - mid 64b align with low
379    eor HASH0.16b, HASH0.16b, v8.16b        // MODULO - fold into low
380    stp x21, x22, [OUT00], #16              // AES[2] block - store result
381    ext v10.16b, v10.16b, v10.16b, #8       // MODULO - other mid alignment
382    stp x23, x24, [OUT00], #16              // AES[3] block - store result
383    eor HASH0.16b, HASH0.16b, v10.16b       // MODULO - fold into low
384.endm
385
386.macro FIRST16_ROUND
387    ROUND CTR0.16b, KEY0.16b
388    ROUND CTR0.16b, KEY1.16b
389    ROUND CTR0.16b, KEY2.16b
390    ROUND CTR0.16b, KEY3.16b
391    ROUND CTR0.16b, KEY4.16b
392    ROUND CTR0.16b, KEY5.16b
393    ROUND CTR0.16b, KEY6.16b
394    ROUND CTR0.16b, KEY7.16b
395    ROUND CTR0.16b, KEY8.16b
396.endm
397
398.macro DEC16_BLOCK
399    ld1 {OUT0.16b}, [INPUT], #16
400    eor CTR0.16b, CTR0.16b, OUT0.16b        // data->out[i] = data->in[i] ^ data->ctr[i];
401    subs COUNT, COUNT, #1                   // COUNT--
402    mov	x6, CTR0.d[0]
403    mov	x7, CTR0.d[1]
404#ifdef HITLS_BIG_ENDIAN
405    REV_2S x6, x7
406#endif
407    rev w9, IV_W                            // CTR[0]
408    eor x6, x6, KEND0
409    orr x9, x11, x9, lsl #32                // CTR[0]
410    eor x7, x7, KEND1
411    stp x6, x7, [OUT00], #16                // OUT OK
412    add IV_W, IV_W, #1                      // CTR++
413    fmov d0, x10                            // CTR[0]
414    fmov CTR0.d[1], x9                      // CTR[0]--OK
415    ext	v8.16b, HASH0.16b, HASH0.16b, #8    // prepare final partial tag
416    movi v11.8b, #0
417    movi v9.8b, #0
418    movi v10.8b, #0
419    rev64 v4.16b, OUT0.16b                  // GHASH final block
420    mov CTR1.16b, CTR0.16b
421    eor	v4.16b, v4.16b, v8.16b              // feed in partial tag
422    mov	d8, v4.d[1]                         // GHASH final block - mid
423    pmull v6.1q, v4.1d, HASH1_2.1d          // GHASH final block - low
424    eor	v8.8b, v8.8b, v4.8b                 // GHASH final block - mid
425    pmull2 v5.1q, v4.2d, HASH1_2.2d         // GHASH final block - high
426    pmull v8.1q, v8.1d, v16.1d              // GHASH final block - mid
427    eor	HASH0.16b, HASH0.16b, v6.16b        // GHASH final block - low
428    eor	v9.16b, v9.16b, v5.16b              // GHASH final block - high
429    eor	v10.16b, v10.16b, v8.16b            // GHASH final block - mid
430    movi v8.8b, #0xc2
431    eor	v7.16b, HASH0.16b, v9.16b           // MODULO - karatsuba tidy up
432    shl	d8, d8, #56                         // mod_constant
433    eor	v10.16b, v10.16b, v7.16b            // MODULO - karatsuba tidy up
434    pmull v5.1q, v9.1d, v8.1d               // MODULO - top 64b align with mid
435    ext	v9.16b, v9.16b, v9.16b, #8          // MODULO - other top alignment
436    eor	v10.16b, v10.16b, v5.16b            // MODULO - fold into mid
437    eor	v10.16b, v10.16b, v9.16b            // MODULO - fold into mid
438    pmull v9.1q, v10.1d, v8.1d              // MODULO - mid 64b align with low
439    ext	v10.16b, v10.16b, v10.16b, #8       // MODULO - other mid alignment
440    eor	HASH0.16b, HASH0.16b, v9.16b        // MODULO - fold into low
441    eor	HASH0.16b, HASH0.16b, v10.16b       // MODULO - fold into low
442.endm
443
444.macro ENC16_BLOCK
445    eor x6, x6, KEND0                       // round 10 low
446    eor x7, x7, KEND1                       // round 10 high
447    rev w9, IV_W                            // CTR[0]
448    fmov d4, x6                             // INPUT 0 - mov low
449    fmov OUT0.d[1], x7                      // AES[0] - mov high
450    orr x9, x11, x9, lsl #32                // CTR[0]
451    add IV_W, IV_W, #1                      // CTR++
452    eor OUT0.16b, OUT0.16b, CTR0.16b        // AES[0] - result
453    st1 {OUT0.16b}, [OUT00], #16            // AES[0] - store result
454    fmov d0, x10                            // CTR[0]
455    fmov CTR0.d[1], x9                      // CTR[0]--OK
456    ext	v8.16b, HASH0.16b, HASH0.16b, #8    // prepare final partial tag
457    movi v11.8b, #0
458    movi v9.8b, #0
459    movi v10.8b, #0
460    rev64 v4.16b, OUT0.16b                  // GHASH final block
461    mov CTR1.16b, CTR0.16b
462    eor	v4.16b, v4.16b, v8.16b              // feed in partial tag
463    mov	d8, v4.d[1]                         // GHASH final block - mid
464    pmull v6.1q, v4.1d, HASH1_2.1d          // GHASH final block - low
465    eor	v8.8b, v8.8b, v4.8b                 // GHASH final block - mid
466    pmull2 v5.1q, v4.2d, HASH1_2.2d         // GHASH final block - high
467    pmull v8.1q, v8.1d, v16.1d              // GHASH final block - mid
468    eor	HASH0.16b, HASH0.16b, v6.16b        // GHASH final block - low
469    eor	v9.16b, v9.16b, v5.16b              // GHASH final block - high
470    eor	v10.16b, v10.16b, v8.16b            // GHASH final block - mid
471    movi v8.8b, #0xc2
472    eor	v7.16b, HASH0.16b, v9.16b           // MODULO - karatsuba tidy up
473    shl	d8, d8, #56                         // mod_constant
474    eor	v10.16b, v10.16b, v7.16b            // MODULO - karatsuba tidy up
475    pmull v5.1q, v9.1d, v8.1d               // MODULO - top 64b align with mid
476    ext	v9.16b, v9.16b, v9.16b, #8          // MODULO - other top alignment
477    eor	v10.16b, v10.16b, v5.16b            // MODULO - fold into mid
478    eor	v10.16b, v10.16b, v9.16b            // MODULO - fold into mid
479    pmull v9.1q, v10.1d, v8.1d              // MODULO - mid 64b align with low
480    ext	v10.16b, v10.16b, v10.16b, #8       // MODULO - other mid alignment
481    eor	HASH0.16b, HASH0.16b, v9.16b        // MODULO - fold into low
482    eor	HASH0.16b, HASH0.16b, v10.16b       // MODULO - fold into low
483.endm
484
485.macro BEFORE16_ROUND
486    ext HASH0.16b, HASH0.16b, HASH0.16b, #8                         // xi
487    ext HASH1.16b, HASH1.16b, HASH1.16b, #8                         // h^1                                                 // rev_ctr32
488    ext HASH2.16b, HASH2.16b, HASH2.16b, #8                         // h^2
489    ldp KEND0, KEND1, [KEY00]                                       // load key-10
490#ifdef HITLS_BIG_ENDIAN
491    ror KEND0, KEND0, #32
492    ror KEND1, KEND1, #32
493#endif
494    ldp IV_H, IV_L, [IVEC0]                                         // load IV
495#ifdef HITLS_BIG_ENDIAN
496    rev IV_H, IV_H
497    rev IV_L, IV_L
498#endif
499    lsr IV_C, IV_L, #32
500    ld1 {CTR0.16b}, [IVEC0]                                         // CTR[0]
501    rev IV_W, IV_W                                                  // rev_ctr32
502    trn1 v8.2d, HASH1.2d, HASH2.2d                                  // h2h | h1h
503    trn2 v16.2d, HASH1.2d, HASH2.2d                                 // h2l | h1l
504    orr w11, w11, w11                                               //
505    rev64 HASH0.16b, HASH0.16b                                      //
506    add IV_W, IV_W, #1                                              // ctr++
507    eor	v16.16b, v16.16b, v8.16b                                    //h2k | h1k
508.endm
509
510#endif