• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1/*
2 * This file is part of the openHiTLS project.
3 *
4 * openHiTLS is licensed under the Mulan PSL v2.
5 * You can use this software according to the terms and conditions of the Mulan PSL v2.
6 * You may obtain a copy of Mulan PSL v2 at:
7 *
8 *     http://license.coscl.org.cn/MulanPSL2
9 *
10 * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11 * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12 * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13 * See the Mulan PSL v2 for more details.
14 */
15
16#include "hitls_build.h"
17#if defined(HITLS_CRYPTO_AES) && defined(HITLS_CRYPTO_CCM)
18
19.text
20
21.balign 16
22g_byteSwapMask:
23.byte   0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08
24.byte   0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00
25.size   g_byteSwapMask, .-g_byteSwapMask
26.balign 16
27g_one:
28.byte   0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
29.byte   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
30.size   g_one, .-g_one
31
32/*
33 * void AesCcmEncryptAsm(void *key, uint8_t *nonce, const uint8_t *in, uint8_t *out, uint32_t len)
34 * rdi *key
35 * rsi *nonce
36 * rdx *in
37 * rcx *out
38 * r8 len
39 */
40.globl  AesCcmEncryptAsm
41.type   AesCcmEncryptAsm, @function
42.balign 16
43AesCcmEncryptAsm:
44.cfi_startproc
45    shr $4, %r8d                    // loop times
46    jz .Lenc_ret
47    lea g_byteSwapMask(%rip), %r11
48    mov 0xf0(%rdi), %r9d            // key->rounds
49    vmovdqa (%r11), %xmm15          // g_byteSwapMask
50    sub $1, %r9d
51    vmovdqa 0x10(%r11), %xmm14      // g_one
52    vmovdqu (%rsi), %xmm0           // nonce(counter)
53    vmovdqu 0x10(%rsi), %xmm8       // tag
54    vmovdqu 0x20(%rsi), %xmm9       // last
55.balign 16
56.Lenc_outer_loop:
57    mov %r9d, %r10d
58    vpxor (%rdx), %xmm8, %xmm8      // in ^ tag = tag
59    vmovdqu (%rdi), %xmm1           // key0
60    lea 0x10(%rdi), %r11            // &key + 1
61    vpxor %xmm0, %xmm1, %xmm2       // first round xor(aes-ctr)
62    vpshufb %xmm15, %xmm0, %xmm0    // reverse byte order of nonce => nonce'
63    vpxor %xmm8, %xmm1, %xmm3       // first round xor(aes-cmac)
64.balign 16
65.Lenc_aes_loop:
66    vmovdqu (%r11), %xmm1
67    vaesenc %xmm1, %xmm2, %xmm2
68    vaesenc %xmm1, %xmm3, %xmm3
69    lea 0x10(%r11), %r11            // to next key ptr
70    dec %r10d
71    jnz .Lenc_aes_loop
72    vmovdqu (%r11), %xmm1           // get the last key
73    vpaddq %xmm14, %xmm0, %xmm0     // nonce' + 1
74    vaesenclast %xmm1, %xmm2, %xmm9
75    vaesenclast %xmm1, %xmm3, %xmm8
76    vpxor (%rdx), %xmm9, %xmm2      // in ^ last = out
77    vpshufb %xmm15, %xmm0, %xmm0    // reverse byte order of nonce' => nonce
78    lea 0x10(%rdx), %rdx            // go to next ptr
79    vmovdqu %xmm2, (%rcx)           // out out
80
81    lea 0x10(%rcx), %rcx            // go to next ptr
82    dec %r8d
83    jnz .Lenc_outer_loop
84    vpxor %xmm1, %xmm1, %xmm1
85    vpxor %xmm2, %xmm2, %xmm2
86    vpxor %xmm3, %xmm3, %xmm3
87    vmovdqu %xmm0, (%rsi)           // out nonce
88    vpxor %xmm0, %xmm0, %xmm0
89    vmovdqu %xmm8, 0x10(%rsi)       // out tag
90    vpxor %xmm8, %xmm8, %xmm8
91    vmovdqu %xmm9, 0x20(%rsi)       // out last
92    vpxor %xmm9, %xmm9, %xmm9
93.Lenc_ret:
94    ret
95.cfi_endproc
96.size   AesCcmEncryptAsm, .-AesCcmEncryptAsm
97
98/*
99 * void AesCcmDecryptAsm(void *key, uint8_t *nonce, const uint8_t *in, uint8_t *out, uint32_t len)
100 * rdi *key
101 * rsi *nonce
102 * rdx *in
103 * rcx *out
104 * r8 len
105 */
106.globl  AesCcmDecryptAsm
107.type   AesCcmDecryptAsm, @function
108.balign 16
109AesCcmDecryptAsm:
110.cfi_startproc
111    shr $4, %r8d                    // loop times
112    jz .Ldec_ret
113    lea g_byteSwapMask(%rip), %r11
114    mov 0xf0(%rdi), %r9d            // key->rounds
115    vmovdqa (%r11), %xmm15          // g_byteSwapMask
116    sub $1, %r9d
117    vmovdqa 0x10(%r11), %xmm14      // g_one
118    vmovdqu (%rsi), %xmm0           // nonce(counter)
119    vmovdqu 0x10(%rsi), %xmm8       // tag
120
121.balign 16
122.Ldec_outer_loop:
123    mov %r9d, %r10d
124    lea 0x10(%rdi), %r11            // &key
125    vmovdqu (%rdi), %xmm1           // key0
126    vpxor %xmm0, %xmm1, %xmm2       // first round xor(aes-ctr)
127.Ldec_aes_loop:
128    vmovdqu (%r11), %xmm1
129    vaesenc %xmm1, %xmm2, %xmm2
130    lea 0x10(%r11), %r11
131    dec %r10d
132    jnz .Ldec_aes_loop
133    vmovdqu (%r11), %xmm1
134    vaesenclast %xmm1, %xmm2, %xmm4
135    vmovdqu %xmm4, 0x20(%rsi)       // out last
136    vpxor (%rdx), %xmm4, %xmm2      // in ^ last = out
137    vpxor %xmm2, %xmm8, %xmm8       // out ^ tag = tag
138    vmovdqu %xmm2, (%rcx)           // out out
139    lea 0x10(%rdx), %rdx
140    lea 0x10(%rcx), %rcx
141    vpshufb %xmm15, %xmm0, %xmm0    // reverse byte order of nonce => nonce'
142    vpaddq %xmm14, %xmm0, %xmm0     // nonce' + 1
143    vpshufb %xmm15, %xmm0, %xmm0    // reverse byte order of nonce' => nonce
144    cmp $2, %r8d
145    jb .Ldec_parallel_out
146
147.Ldec_parallel_loop:
148    mov %r9d, %r10d
149    lea 0x10(%rdi), %r11            // &key
150    vmovdqu (%rdi), %xmm1           // key0
151    vpxor %xmm0, %xmm1, %xmm2       // first round xor(aes-ctr)
152    vpxor %xmm8, %xmm1, %xmm3       // first round xor(aes-cmac)
153.Ldec_parallel_inner_loop:
154    vmovdqu (%r11), %xmm1
155    vaesenc %xmm1, %xmm2, %xmm2
156    lea 0x10(%r11), %r11
157    vaesenc %xmm1, %xmm3, %xmm3
158    dec %r10d
159    jnz .Ldec_parallel_inner_loop
160    vmovdqu (%r11), %xmm1
161    vaesenclast %xmm1, %xmm2, %xmm4
162    vaesenclast %xmm1, %xmm3, %xmm8
163    vmovdqu %xmm4, 0x20(%rsi)       // out last
164    vpxor (%rdx), %xmm4, %xmm2      // in ^ last = out
165    vpxor %xmm2, %xmm8, %xmm8       // out ^ tag = tag
166    vmovdqu %xmm2, (%rcx)           // out out
167    lea 0x10(%rdx), %rdx
168    lea 0x10(%rcx), %rcx
169    vpshufb %xmm15, %xmm0, %xmm0    // reverse byte order of nonce => nonce'
170    vpaddq %xmm14, %xmm0, %xmm0     // nonce' + 1
171    vpshufb %xmm15, %xmm0, %xmm0    // reverse byte order of nonce' => nonce
172    dec %r8d
173    cmp $2, %r8d
174    jae .Ldec_parallel_loop
175
176.Ldec_parallel_out:
177    mov %r9d, %r10d
178    lea 0x10(%rdi), %r11            // &key
179    vmovdqu (%rdi), %xmm1           // key0
180    vpxor %xmm8, %xmm1, %xmm3       // first round xor(aes-cmac)
181.Ldec_aes_loop_1:
182    vmovdqu (%r11), %xmm1
183    vaesenc %xmm1, %xmm3, %xmm3
184    lea 0x10(%r11), %r11
185    dec %r10d
186    jnz .Ldec_aes_loop_1
187    vmovdqu (%r11), %xmm1
188    vaesenclast %xmm1, %xmm3, %xmm8
189    dec %r8d
190    jnz .Ldec_outer_loop
191
192    vmovdqu %xmm0, (%rsi)           // out nonce
193    vpxor %xmm0, %xmm0, %xmm0
194    vpxor %xmm1, %xmm1, %xmm1
195    vpxor %xmm2, %xmm2, %xmm2
196    vmovdqu %xmm8, 0x10(%rsi)       // out tag
197    vpxor %xmm8, %xmm8, %xmm8
198    vpxor %xmm3, %xmm3, %xmm3
199    vpxor %xmm4, %xmm4, %xmm4
200.Ldec_ret:
201    ret
202.cfi_endproc
203.size   AesCcmDecryptAsm, .-AesCcmDecryptAsm
204#endif
205