• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1/*
2 * This file is part of the openHiTLS project.
3 *
4 * openHiTLS is licensed under the Mulan PSL v2.
5 * You can use this software according to the terms and conditions of the Mulan PSL v2.
6 * You may obtain a copy of Mulan PSL v2 at:
7 *
8 *     http://license.coscl.org.cn/MulanPSL2
9 *
10 * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11 * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12 * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13 * See the Mulan PSL v2 for more details.
14 */
15
16#include "hitls_build.h"
17#if defined(HITLS_CRYPTO_AES) && defined(HITLS_CRYPTO_GCM)
18
19.text
20
21.balign 16
22g_byteSwapMask:
23.byte   0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08
24.byte   0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00
25.size   g_byteSwapMask, .-g_byteSwapMask
26.balign 16
27g_poly:
28.byte   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
29.byte   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc2
30.size   g_poly, .-g_poly
31
32/*
33 * void AES_GCM_Encrypt16BlockAsm(MODES_GCM_Ctx *ctx, const uint8_t *in,
34 *                                       uint8_t *out, uint32_t len, void *key);
35 * ctx  %rdi
36 * in   %rsi
37 * out  %rdx
38 * len  %rcx
39 * key  %r8
40 */
41.globl  AES_GCM_Encrypt16BlockAsm
42.type   AES_GCM_Encrypt16BlockAsm, @function
43.align 32
44AES_GCM_Encrypt16BlockAsm:
45.cfi_startproc
46    leaq g_byteSwapMask(%rip), %r11
47    shrl $4, %ecx                           // blocks number = loop times
48    mov 240(%r8), %r9d                      // rounds
49.Lenc_loop:
50    mov 12(%rdi), %eax                      // counter eax(32bit)
51    addl $0x1000000, %eax                   // ctr inc
52    vmovdqu (%rdi), %xmm0                   // iv
53    jc .Lenc_ctr_carry
54    jmp .Lenc_aes_cipher
55.Lenc_ctr_carry:
56    bswap %eax
57    addl $0x100, %eax                       // add carry bit
58    bswap %eax
59    jmp .Lenc_aes_cipher
60.Lenc_aes_cipher:
61    mov %eax, 12(%rdi)                      // out iv
62    vmovdqu (%r8), %xmm1                    // key0
63    vpxor %xmm1, %xmm0, %xmm0
64
65    vmovdqu 0x10(%r8), %xmm2                // key1
66    lea 0xa0(%r8), %r10                     // point to the last key in 128-bit encryption
67    vmovdqu 0x20(%r8), %xmm3                // key2
68    vaesenc %xmm2, %xmm0, %xmm0
69
70    vmovdqu 0x30(%r8), %xmm4                // key3
71    vaesenc %xmm3, %xmm0, %xmm0
72
73    vmovdqu 0x40(%r8), %xmm5                // key4
74    vaesenc %xmm4, %xmm0, %xmm0
75
76    vmovdqu 0x50(%r8), %xmm6                // key5
77    vaesenc %xmm5, %xmm0, %xmm0
78
79    vmovdqu 0x60(%r8), %xmm7                // key6
80    vaesenc %xmm6, %xmm0, %xmm0
81
82    vmovdqu 0x70(%r8), %xmm8                // key7
83    vaesenc %xmm7, %xmm0, %xmm0
84
85    vmovdqu 0x80(%r8), %xmm9                // key8
86    vaesenc %xmm8, %xmm0, %xmm0
87
88    vmovdqu 0x90(%r8), %xmm10               // key9
89    vaesenc %xmm9, %xmm0, %xmm0
90
91    vaesenc %xmm10, %xmm0, %xmm0
92    cmp $12, %r9d                           // compare the number of rounds to determine
93                                            // when to jump to the next processing part
94
95    jb .Lenc_aes_end
96
97    vmovdqu (%r10), %xmm1                   // key10
98    vaesenc %xmm1, %xmm0, %xmm0
99
100    vmovdqu 0x10(%r10), %xmm2               // key11
101    vaesenc %xmm2, %xmm0, %xmm0
102    lea 0x20(%r10), %r10
103
104    je .Lenc_aes_end
105
106    vmovdqu (%r10), %xmm1                   // key12
107    vaesenc %xmm1, %xmm0, %xmm0
108
109    vmovdqu 0x10(%r10), %xmm2               // key13
110    vaesenc %xmm2, %xmm0, %xmm0
111    lea 0x20(%r10), %r10
112    jmp .Lenc_aes_end
113.Lenc_aes_end:
114    vmovdqu (%r10), %xmm1                   // key last
115    vpxor (%rsi), %xmm1, %xmm1              // Advance ciphertext XOR in
116    vaesenclast %xmm1, %xmm0, %xmm0
117    vmovdqu %xmm0, (%rdx)                   // out
118
119    vmovdqu 16(%rdi), %xmm1                 // ghash
120    vmovdqa (%r11), %xmm15                  // .LByte_Swap_Mask
121    vpxor %xmm1, %xmm0, %xmm0               // input for ghash operation
122    vmovdqu 32(%rdi), %xmm1                 // Hash key H^1
123    vpshufb %xmm15, %xmm0, %xmm0            // data transform
124    vmovdqu 32+32(%rdi), %xmm2              // Hash key H^1_2
125
126    vpalignr $8, %xmm0, %xmm0, %xmm3        // data transform
127
128    vpclmulqdq $0x11, %xmm1, %xmm0, %xmm5   // Karatsuba Multiply
129    vpxor %xmm0, %xmm3, %xmm3
130    vpclmulqdq $0x00, %xmm1, %xmm0, %xmm0
131    vpxor %xmm0, %xmm5, %xmm1
132    vpclmulqdq $0x00, %xmm2, %xmm3, %xmm3
133    vpxor %xmm1, %xmm3, %xmm3
134
135    vpslldq $8, %xmm3, %xmm4
136    vpsrldq $8, %xmm3, %xmm3
137    vpxor %xmm4, %xmm0, %xmm0
138    vpxor %xmm3, %xmm5, %xmm5
139
140    vmovdqa 0x10(%r11), %xmm14              // g_poly
141    vpalignr $8, %xmm0, %xmm0, %xmm2        // 1st phase of reduction
142    vpclmulqdq $0x10, %xmm14, %xmm0, %xmm0
143    vpxor %xmm2, %xmm0, %xmm0
144
145    vpalignr $8, %xmm0, %xmm0, %xmm2        // 2nd phase of reduction
146    vpclmulqdq $0x10, %xmm14, %xmm0, %xmm0
147    vpxor %xmm5, %xmm2, %xmm2
148    vpxor %xmm2, %xmm0, %xmm0
149
150    vpshufb %xmm15, %xmm0, %xmm0
151    lea 0x10(%rsi), %rsi
152    vmovdqu %xmm0, 16(%rdi)                 // out
153    lea 0x10(%rdx), %rdx
154    dec %ecx
155    jnz .Lenc_loop
156    ret
157.cfi_endproc
158.size   AES_GCM_Encrypt16BlockAsm, .-AES_GCM_Encrypt16BlockAsm
159
160/*
161 * void AES_GCM_Decrypt16BlockAsm(MODES_GCM_Ctx *ctx, const uint8_t *in,
162 *                                       uint8_t *out, uint32_t len, void *key);
163 * ctx  %rdi
164 * in   %rsi
165 * out  %rdx
166 * len  %rcx
167 * key  %r8
168 */
169.globl  AES_GCM_Decrypt16BlockAsm
170.type   AES_GCM_Decrypt16BlockAsm, @function
171.balign 32
172AES_GCM_Decrypt16BlockAsm:
173.cfi_startproc
174    leaq g_byteSwapMask(%rip), %r11
175    vmovdqu 16(%rdi), %xmm10                // ghash
176    shrl $4, %ecx                           // blocks number = loop times
177    vmovdqa (%r11), %xmm15                  // g_byteSwapMask
178.Ldec_loop:
179    mov 12(%rdi), %eax                      // counter  eax(32bit)
180    addl $0x1000000, %eax                   // ctr inc
181    mov 240(%r8), %r9d                      // rounds
182    vmovdqu (%rdi), %xmm0                   // iv
183    jc .Ldec_ctr_carry
184    jmp .Ldec_aes_cipher
185.Ldec_ctr_carry:
186    bswap %eax
187    addl $0x100, %eax                       // add carry bit
188    bswap %eax
189    jmp .Ldec_aes_cipher
190.balign 32
191.Ldec_aes_cipher:
192    mov %eax, 12(%rdi)                      // out iv
193    cmp $12, %r9d                           // Compare the number of rounds to determine
194                                            // when to jump to the next processing part
195    vmovdqu (%r8), %xmm1                    // key 0
196    vpxor (%rsi), %xmm10, %xmm10            // input for ghash operation
197    lea 0xa0(%r8), %r10                     // Point to the last key in 128-bit encryption
198    vpxor %xmm1, %xmm0, %xmm0
199    vmovdqu 0x10(%r8), %xmm1                // key 1
200    vmovdqu 32(%rdi), %xmm11                // Hash key H^1
201    vmovdqu 32+32(%rdi), %xmm12             // Hash key H^1_2
202
203    vaesenc %xmm1, %xmm0, %xmm0
204    vmovdqu 0x20(%r8), %xmm1                // key 2
205    vpshufb %xmm15, %xmm10, %xmm10          // data transform
206    vpshufd $0x4e, %xmm10, %xmm13
207
208    vaesenc %xmm1, %xmm0, %xmm0
209    vmovdqu 0x30(%r8), %xmm1                // key 3
210    vpclmulqdq $0x11, %xmm11, %xmm10, %xmm14// Karatsuba Multiply
211    vpxor %xmm10, %xmm13, %xmm13
212
213    vaesenc %xmm1, %xmm0, %xmm0
214    vmovdqu 0x40(%r8), %xmm1                // key 4
215    vpclmulqdq $0x00, %xmm11, %xmm10, %xmm10
216    vpxor %xmm10, %xmm14, %xmm11
217
218    vaesenc %xmm1, %xmm0, %xmm0
219    vmovdqu 0x50(%r8), %xmm1                // key 5
220    vpclmulqdq $0x00, %xmm12, %xmm13, %xmm13
221    vpxor %xmm11, %xmm13, %xmm13
222
223    vaesenc %xmm1, %xmm0, %xmm0
224    vmovdqu 0x60(%r8), %xmm1                // key 6
225    vpslldq $8, %xmm13, %xmm11
226    vpsrldq $8, %xmm13, %xmm13
227    vpxor %xmm11, %xmm10, %xmm10
228    vpxor %xmm13, %xmm14, %xmm14
229
230    vaesenc %xmm1, %xmm0, %xmm0
231    vmovdqu 0x70(%r8), %xmm1                // key 7
232    vmovdqa 0x10(%r11), %xmm13              // g_poly
233    vpalignr $8, %xmm10, %xmm10, %xmm12     // 1st phase of reduction
234
235    vaesenc %xmm1, %xmm0, %xmm0
236    vmovdqu 0x80(%r8), %xmm1                // key 8
237    vpclmulqdq $0x10, %xmm13, %xmm10, %xmm10
238    vpxor %xmm12, %xmm10, %xmm10
239
240    vaesenc %xmm1, %xmm0, %xmm0
241    vmovdqu 0x90(%r8), %xmm1                // key 9
242    vpalignr $8, %xmm10, %xmm10, %xmm12     // 2nd phase of reduction
243    vpclmulqdq $0x10, %xmm13, %xmm10, %xmm10
244
245    vaesenc %xmm1, %xmm0, %xmm0
246    vpxor %xmm14, %xmm12, %xmm12
247
248    jb .Ldec_ending
249
250    vmovdqu (%r10), %xmm1                   // key 10
251    vmovdqu 0x10(%r10), %xmm2               // key 11
252    lea 0x20(%r10), %r10
253    vaesenc %xmm1, %xmm0, %xmm0
254    vaesenc %xmm2, %xmm0, %xmm0
255
256    je .Ldec_ending
257
258    vmovdqu (%r10), %xmm1                   // key 12
259    vmovdqu 0x10(%r10), %xmm2               // key 13
260    lea 0x20(%r10), %r10
261    vaesenc %xmm1, %xmm0, %xmm0
262    vaesenc %xmm2, %xmm0, %xmm0
263
264    jmp .Ldec_ending
265
266.Ldec_ending:
267    vmovdqu (%r10), %xmm1                   // key last
268    vpxor %xmm12, %xmm10, %xmm10
269    vpxor (%rsi), %xmm1, %xmm1
270    vaesenclast %xmm1, %xmm0, %xmm0
271    vpshufb %xmm15, %xmm10, %xmm10
272    vmovdqu %xmm0, (%rdx)                   // out
273    lea 0x10(%rsi), %rsi
274    lea 0x10(%rdx), %rdx
275    dec %ecx
276    jnz .Ldec_loop
277    vmovdqu %xmm10, 16(%rdi)                // out
278    ret
279.cfi_endproc
280.size   AES_GCM_Decrypt16BlockAsm, .-AES_GCM_Decrypt16BlockAsm
281
282.globl  AES_GCM_ClearAsm
283.type   AES_GCM_ClearAsm, @function
284.balign 32
285AES_GCM_ClearAsm:
286.cfi_startproc
287    vpxor %xmm1, %xmm1, %xmm1
288    vpxor %xmm2, %xmm2, %xmm2
289    vpxor %xmm3, %xmm3, %xmm3
290    vpxor %xmm4, %xmm4, %xmm4
291    vpxor %xmm5, %xmm5, %xmm5
292    vpxor %xmm6, %xmm6, %xmm6
293    vpxor %xmm7, %xmm7, %xmm7
294    vpxor %xmm8, %xmm8, %xmm8
295    vpxor %xmm9, %xmm9, %xmm9
296    vpxor %xmm10, %xmm10, %xmm10
297    vpxor %xmm11, %xmm11, %xmm11
298    vpxor %xmm12, %xmm12, %xmm12
299    ret
300.cfi_endproc
301.size   AES_GCM_ClearAsm, .-AES_GCM_ClearAsm
302#endif