• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1/*
2 * This file is part of the openHiTLS project.
3 *
4 * openHiTLS is licensed under the Mulan PSL v2.
5 * You can use this software according to the terms and conditions of the Mulan PSL v2.
6 * You may obtain a copy of Mulan PSL v2 at:
7 *
8 *     http://license.coscl.org.cn/MulanPSL2
9 *
10 * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11 * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12 * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13 * See the Mulan PSL v2 for more details.
14 */
15
16#include "hitls_build.h"
17#ifdef HITLS_CRYPTO_AES
18
19 #include "crypt_aes_macro_x86_64.s"
20
21 .file "crypt_aes_x86_64.S"
22 .text
23
24.set    ARG1, %rdi
25.set    ARG2, %rsi
26.set    ARG3, %rdx
27.set    ARG4, %rcx
28.set    ARG5, %r8
29.set    ARG6, %r9
30.set    RET, %eax
31
32.set    XM0, %xmm0
33.set    XM1, %xmm1
34.set    XM2, %xmm2
35.set    XM3, %xmm3
36.set    XM4, %xmm4
37.set    XM5, %xmm5
38
39/**
40 * aes128 macros for key extension processing.
41 */
42.macro KEY_EXPANSION_HELPER_128 xm0 xm1 xm2
43    vpermilps $0xff, \xm1, \xm1
44    vpslldq $4, \xm0, \xm2
45    vpxor \xm2, \xm0, \xm0
46    vpslldq $4, \xm2, \xm2
47    vpxor \xm2, \xm0, \xm0
48    vpslldq $4, \xm2, \xm2
49    vpxor \xm2, \xm0, \xm0
50    vpxor \xm1, \xm0, \xm0
51.endm
52
53/**
54 * aes192 macros for key extension processing.
55 */
56.macro KEY_EXPANSION_HELPER_192 xm1 xm3
57    vpslldq $4, \xm1, \xm3
58    vpxor \xm3, \xm1, \xm1
59    vpslldq $4, \xm3, \xm3
60    vpxor \xm3, \xm1, \xm1
61    vpslldq $4, \xm3, \xm3
62    vpxor \xm3, \xm1, \xm1
63.endm
64
65/**
66 *  Function description: Sets the AES encryption key. Key length: 128 bits.
67 *  Function prototype: void SetEncryptKey128(CRYPT_AES_Key *ctx, const uint8_t *key);
68 *  Input register:
69 *        x0:Pointer to the output key structure.
70 *        x1:Pointer to the input key.
71 *  Change register:xmm0-xmm2.
72 *  Output register:None.
73 *  Function/Macro Call: None.
74 */
75    .globl SetEncryptKey128
76    .type SetEncryptKey128, @function
77SetEncryptKey128:
78    .cfi_startproc
79
80    movl $10, 240(%rdi)
81    movdqu (ARG2), XM0
82    movdqu XM0, (ARG1)
83
84    aeskeygenassist $0x01, XM0, XM1
85    KEY_EXPANSION_HELPER_128 XM0, XM1, XM2
86    movdqu XM0, 16(ARG1)
87
88    aeskeygenassist $0x02, XM0, XM1
89    KEY_EXPANSION_HELPER_128 XM0, XM1, XM2
90    movdqu XM0, 32(ARG1)
91
92    aeskeygenassist $0x04, XM0, XM1
93    KEY_EXPANSION_HELPER_128 XM0, XM1, XM2
94    movdqu XM0, 48(ARG1)
95
96    aeskeygenassist $0x08, XM0, XM1
97    KEY_EXPANSION_HELPER_128 XM0, XM1, XM2
98    movdqu XM0, 64(ARG1)
99
100    aeskeygenassist $0x10, XM0, XM1
101    KEY_EXPANSION_HELPER_128 XM0, XM1, XM2
102    movdqu XM0, 80(ARG1)
103
104    aeskeygenassist $0x20, XM0, XM1
105    KEY_EXPANSION_HELPER_128 XM0, XM1, XM2
106    movdqu XM0, 96(ARG1)
107
108    aeskeygenassist $0x40, XM0, XM1
109    KEY_EXPANSION_HELPER_128 XM0, XM1, XM2
110    movdqu XM0, 112(ARG1)
111
112    aeskeygenassist $0x80, XM0, XM1
113    KEY_EXPANSION_HELPER_128 XM0, XM1, XM2
114    movdqu XM0, 128(ARG1)
115
116    aeskeygenassist $0x1b, XM0, XM1
117    KEY_EXPANSION_HELPER_128 XM0, XM1, XM2
118    movdqu XM0, 144(ARG1)
119
120    aeskeygenassist $0x36, XM0, XM1
121    KEY_EXPANSION_HELPER_128 XM0, XM1, XM2
122    movdqu XM0, 160(ARG1)
123
124    vpxor XM0, XM0, XM0
125    vpxor XM1, XM1, XM1
126    vpxor XM2, XM2, XM2
127
128    ret
129    .cfi_endproc
130    .size SetEncryptKey128, .-SetEncryptKey128
131
132/**
133 *  Function description: Sets the AES decryption key. Key length: 128 bits.
134 *  Function prototype: void SetDecryptKey128(CRYPT_AES_Key *ctx, const uint8_t *key);
135 *  Input register:
136 *        x0:Pointer to the output key structure.
137 *        x1:Pointer to the input key.
138 *  Change register:xmm0-xmm3.
139 *  Output register: None.
140 *  Function/Macro Call: None.
141 */
142    .globl SetDecryptKey128
143    .type SetDecryptKey128, @function
144SetDecryptKey128:
145    .cfi_startproc
146
147    movl $10, 240(%rdi)
148    movdqu (ARG2), XM0
149    movdqu XM0, 160(ARG1)
150
151    aeskeygenassist $0x01, XM0, XM1
152    KEY_EXPANSION_HELPER_128 XM0, XM1, XM2
153    aesimc  XM0, XM3
154    movdqu XM3, 144(ARG1)
155
156    aeskeygenassist $0x02, XM0, XM1
157    KEY_EXPANSION_HELPER_128 XM0, XM1, XM2
158    aesimc  XM0, XM3
159    movdqu XM3, 128(ARG1)
160
161    aeskeygenassist $0x04, XM0, XM1
162    KEY_EXPANSION_HELPER_128 XM0, XM1, XM2
163    aesimc  XM0, XM3
164    movdqu XM3, 112(ARG1)
165
166    aeskeygenassist $0x08, XM0, XM1
167    KEY_EXPANSION_HELPER_128 XM0, XM1, XM2
168    aesimc  XM0, XM3
169    movdqu XM3, 96(ARG1)
170
171    aeskeygenassist $0x10, XM0, XM1
172    KEY_EXPANSION_HELPER_128 XM0, XM1, XM2
173    aesimc  XM0, XM3
174    movdqu XM3, 80(ARG1)
175
176    aeskeygenassist $0x20, XM0, XM1
177    KEY_EXPANSION_HELPER_128 XM0, XM1, XM2
178    aesimc  XM0, XM3
179    movdqu XM3, 64(ARG1)
180
181    aeskeygenassist $0x40, XM0, XM1
182    KEY_EXPANSION_HELPER_128 XM0, XM1, XM2
183    aesimc  XM0, XM3
184    movdqu XM3, 48(ARG1)
185
186    aeskeygenassist $0x80, XM0, XM1
187    KEY_EXPANSION_HELPER_128 XM0, XM1, XM2
188    aesimc  XM0, XM3
189    movdqu XM3, 32(ARG1)
190
191    aeskeygenassist $0x1b, XM0, XM1
192    KEY_EXPANSION_HELPER_128 XM0, XM1, XM2
193    aesimc  XM0, XM3
194    movdqu XM3, 16(ARG1)
195
196    aeskeygenassist $0x36, XM0, XM1
197    KEY_EXPANSION_HELPER_128 XM0, XM1, XM2
198    movdqu XM0,(ARG1)
199
200    vpxor XM0, XM0, XM0
201    vpxor XM1, XM1, XM1
202    vpxor XM2, XM2, XM2
203    vpxor XM3, XM3, XM3
204
205    ret
206    .cfi_endproc
207    .size SetDecryptKey128, .-SetDecryptKey128
208
209/**
210 *  Function description: Sets the AES encryption key. Key length: 192 bits.
211 *  Function prototype: void SetEncryptKey192(CRYPT_AES_Key *ctx, const uint8_t *key);
212 *  Input register:
213 *        x0:Pointer to the output key structure.
214 *        x1:Pointer to the input key.
215 *  Change register: xmm0-xmm4.
216 *  Output register: None.
217 *  Function/Macro Call: None.
218 */
219    .globl SetEncryptKey192
220    .type SetEncryptKey192, @function
221SetEncryptKey192:
222    .cfi_startproc
223
224    movl $12, 240(ARG1)
225    movdqu (ARG2), XM0
226    movdqu 8(ARG2), XM1
227    movdqu XM0,(ARG1)
228
229    vpxor XM4, XM4, XM4
230    vshufps $0x40, XM0, XM4, XM2
231    aeskeygenassist $0x01, XM1, XM0
232    vshufps $0xf0, XM0, XM4, XM0
233    vpslldq $0x04, XM2, XM3
234    vpxor XM3, XM2, XM2
235    vpxor XM2, XM0, XM0
236    vshufps $0xee, XM0, XM1, XM0
237    movdqu XM0, 16(ARG1)
238
239    movdqu XM1, XM2
240    vpslldq $4, XM2, XM3
241    vpxor XM3, XM2, XM2
242    vpslldq $4, XM3, XM3
243    vpxor XM3, XM2, XM2
244    vpslldq $4, XM3, XM3
245    vpxor XM3, XM2, XM2
246    vpermilps $0xff, XM0, XM3
247    vpxor XM3, XM2, XM2
248    movdqu XM2, 32(ARG1)
249
250    vshufps $0x4e, XM2, XM0, XM1
251    aeskeygenassist $0x02, XM2, XM0
252    KEY_EXPANSION_HELPER_192 XM1, XM3
253    vpermilps $0xff, XM0, XM0
254    vpxor XM1, XM0, XM0
255    movdqu XM0, 48(ARG1)
256
257    vshufps $0x4e, XM0, XM2, XM1
258    vpslldq $8, XM1, XM2
259    vpslldq $4, XM2, XM3
260    vpxor  XM3, XM2, XM2
261    vpermilps $0xff, XM0, XM3
262    vpxor  XM3, XM2, XM2
263    aeskeygenassist $0x04, XM2, XM3
264    vpermilps $0xff, XM3, XM3
265    vpsrldq $8, XM1, XM4
266    vpslldq $12, XM4, XM4
267    vpxor  XM4, XM1, XM1
268    vpxor  XM3, XM1, XM1
269    vshufps $0xee, XM1, XM2, XM2
270    movdqu XM2, 64(ARG1)
271
272    vshufps $0x4e, XM2, XM0, XM1
273    KEY_EXPANSION_HELPER_192 XM1, XM3
274    vpermilps $0xff, XM2, XM0
275    vpxor XM1, XM0, XM0
276    movdqu XM0, 80(ARG1)
277
278    vshufps $0x4e, XM0, XM2, XM1
279    aeskeygenassist $0x08, XM0, XM2
280    KEY_EXPANSION_HELPER_192 XM1, XM3
281    vpermilps $0xff, XM2, XM2
282    vpxor XM1, XM2, XM2
283    movdqu XM2, 96(ARG1)
284
285    vshufps $0x4e, XM2, XM0, XM1
286    vpslldq $8, XM1, XM0
287    vpslldq $4, XM0, XM3
288    vpxor  XM3, XM0, XM0
289    vpermilps $0xff, XM2, XM3
290    vpxor  XM3, XM0, XM0
291    aeskeygenassist $0x10, XM0, XM3
292    vpermilps $0xff, XM3, XM3
293    vpsrldq $8, XM1, XM4
294    vpslldq $12, XM4, XM4
295    vpxor  XM4, XM1, XM1
296    vpxor  XM3, XM1, XM1
297    vshufps $0xee, XM1, XM0, XM0
298    movdqu XM0, 112(ARG1)
299
300    vshufps $0x4e, XM0, XM2, XM1
301    KEY_EXPANSION_HELPER_192 XM1, XM3
302    vpermilps $0xff, XM0, XM2
303    vpxor XM1, XM2, XM2
304    movdqu XM2, 128(ARG1)
305
306    vshufps $0x4e, XM2, XM0, XM1
307    aeskeygenassist $0x20, XM2, XM0
308    KEY_EXPANSION_HELPER_192 XM1, XM3
309    vpermilps $0xff, XM0, XM0
310    vpxor XM1, XM0, XM0
311    movdqu XM0, 144(ARG1)
312
313    vshufps $0x4e, XM0, XM2, XM1
314    vpslldq $8, XM1, XM2
315    vpslldq $4, XM2, XM3
316    vpxor  XM3, XM2, XM2
317    vpermilps $0xff, XM0, XM3
318    vpxor  XM3, XM2, XM2
319    aeskeygenassist $0x40, XM2, XM3
320    vpermilps $0xff, XM3, XM3
321    vpsrldq $8, XM1, XM4
322    vpslldq $12, XM4, XM4
323    vpxor  XM4, XM1, XM1
324    vpxor  XM3, XM1, XM1
325    vshufps $0xee, XM1, XM2, XM2
326    movdqu XM2, 160(ARG1)
327
328    vshufps $0x4e, XM2, XM0, XM1
329    KEY_EXPANSION_HELPER_192 XM1, XM3
330    vpermilps $0xff, XM2, XM0
331    vpxor XM1, XM0, XM0
332    movdqu XM0, 176(ARG1)
333
334    vshufps $0x4e, XM0, XM2, XM1
335    aeskeygenassist $0x80, XM0, XM2
336    KEY_EXPANSION_HELPER_192 XM1, XM3
337    vpermilps $0xff, XM2, XM2
338    vpxor XM1, XM2, XM2
339    movdqu XM2, 192(ARG1)
340
341    vpxor XM0, XM0, XM0
342    vpxor XM1, XM1, XM1
343    vpxor XM2, XM2, XM2
344    vpxor XM3, XM3, XM3
345    vpxor XM4, XM4, XM4
346
347    ret
348    .cfi_endproc
349    .size SetEncryptKey192, .-SetEncryptKey192
350
351/**
352 *  Function description: Sets the AES decryption key. Key length: 192 bits.
353 *  Function prototype: void SetDecryptKey192(CRYPT_AES_Key *ctx, const uint8_t *key);
354 *  Input register:
355 *        x0:Pointer to the output key structure.
356 *        x1:Pointer to the input key.
357 *  Change register: xmm0-xmm5
358 *  Output register: None.
359 *  Function/Macro Call: None.
360 */
361     .globl SetDecryptKey192
362    .type SetDecryptKey192, @function
363SetDecryptKey192:
364    .cfi_startproc
365
366    movl $12, 240(ARG1)
367    movdqu (ARG2), XM0
368    movdqu 8(ARG2), XM1
369    movdqu XM0, 192(ARG1)
370
371    vpxor XM4, XM4, XM4
372    vshufps $0x40, XM0, XM4, XM2
373    aeskeygenassist $0x01, XM1, XM0
374    vshufps $0xf0, XM0, XM4, XM0
375    vpslldq $0x04, XM2, XM3
376    vpxor XM3, XM2, XM2
377    vpxor XM2, XM0, XM0
378    vshufps $0xee, XM0, XM1, XM0
379    aesimc  XM0, XM5
380    movdqu XM5, 176(ARG1)
381
382    movdqu XM1, XM2
383    vpslldq $4, XM2, XM3
384    vpxor XM3, XM2, XM2
385    vpslldq $4, XM3, XM3
386    vpxor XM3, XM2, XM2
387    vpslldq $4, XM3, XM3
388    vpxor XM3, XM2, XM2
389    vpermilps $0xff, XM0, XM3
390    vpxor XM3, XM2, XM2
391    aesimc  XM2, XM5
392    movdqu XM5, 160(ARG1)
393
394    vshufps $0x4e, XM2, XM0, XM1
395    aeskeygenassist $0x02, XM2, XM0
396    KEY_EXPANSION_HELPER_192 XM1, XM3
397    vpermilps $0xff, XM0, XM0
398    vpxor XM1, XM0, XM0
399    aesimc  XM0, XM5
400    movdqu XM5, 144(ARG1)
401
402    vshufps $0x4e, XM0, XM2, XM1
403    vpslldq $8, XM1, XM2
404    vpslldq $4, XM2, XM3
405    vpxor  XM3, XM2, XM2
406    vpermilps $0xff, XM0, XM3
407    vpxor  XM3, XM2, XM2
408    aeskeygenassist $0x04, XM2, XM3
409    vpermilps $0xff, XM3, XM3
410    vpsrldq $8, XM1, XM4
411    vpslldq $12, XM4, XM4
412    vpxor  XM4, XM1, XM1
413    vpxor  XM3, XM1, XM1
414    vshufps $0xee, XM1, XM2, XM2
415    aesimc  XM2, XM5
416    movdqu XM5, 128(ARG1)
417
418    vshufps $0x4e, XM2, XM0, XM1
419    KEY_EXPANSION_HELPER_192 XM1, XM3
420    vpermilps $0xff, XM2, XM0
421    vpxor XM1, XM0, XM0
422    aesimc  XM0, XM5
423    movdqu XM5,112(ARG1)
424
425    vshufps $0x4e, XM0, XM2, XM1
426    aeskeygenassist $0x08, XM0, XM2
427    KEY_EXPANSION_HELPER_192 XM1, XM3
428    vpermilps $0xff, XM2, XM2
429    vpxor XM1, XM2, XM2
430    aesimc  XM2, XM5
431    movdqu XM5, 96(ARG1)
432
433    vshufps $0x4e, XM2, XM0, XM1
434    vpslldq $8, XM1, XM0
435    vpslldq $4, XM0, XM3
436    vpxor  XM3, XM0, XM0
437    vpermilps $0xff, XM2, XM3
438    vpxor  XM3, XM0, XM0
439    aeskeygenassist $0x10, XM0, XM3
440    vpermilps $0xff, XM3, XM3
441    vpsrldq $8, XM1, XM4
442    vpslldq $12, XM4, XM4
443    vpxor  XM4, XM1, XM1
444    vpxor  XM3, XM1, XM1
445    vshufps $0xee, XM1, XM0, XM0
446    aesimc  XM0, XM5
447    movdqu XM5, 80(ARG1)
448
449    vshufps $0x4e, XM0, XM2, XM1
450    KEY_EXPANSION_HELPER_192 XM1, XM3
451    vpermilps $0xff, XM0, XM2
452    vpxor XM1, XM2, XM2
453    aesimc  XM2, XM5
454    movdqu XM5, 64(ARG1)
455
456    vshufps $0x4e, XM2, XM0, XM1
457    aeskeygenassist $0x20, XM2, XM0
458    KEY_EXPANSION_HELPER_192 XM1, XM3
459    vpermilps $0xff, XM0, XM0
460    vpxor XM1, XM0, XM0
461    aesimc  XM0, XM5
462    movdqu XM5, 48(ARG1)
463
464    vshufps $0x4e, XM0, XM2, XM1
465    vpslldq $8, XM1, XM2
466    vpslldq $4, XM2, XM3
467    vpxor  XM3, XM2, XM2
468    vpermilps $0xff, XM0, XM3
469    vpxor  XM3, XM2, XM2
470    aeskeygenassist $0x40, XM2, XM3
471    vpermilps $0xff, XM3, XM3
472    vpsrldq $8, XM1, XM4
473    vpslldq $12, XM4, XM4
474    vpxor  XM4, XM1, XM1
475    vpxor  XM3, XM1, XM1
476    vshufps $0xee, XM1, XM2, XM2
477    aesimc  XM2, XM5
478    movdqu XM5, 32(ARG1)
479
480    vshufps $0x4e, XM2, XM0, XM1
481    KEY_EXPANSION_HELPER_192 XM1, XM3
482    vpermilps $0xff, XM2, XM0
483    vpxor XM1, XM0, XM0
484    aesimc  XM0, XM5
485    movdqu XM5, 16(ARG1)
486
487    vshufps $0x4e, XM0, XM2, XM1
488    aeskeygenassist $0x80, XM0, XM2
489    KEY_EXPANSION_HELPER_192 XM1, XM3
490    vpermilps $0xff, XM2, XM2
491    vpxor XM1, XM2, XM2
492    movdqu XM2,(ARG1)
493
494    vpxor XM0, XM0, XM0
495    vpxor XM1, XM1, XM1
496    vpxor XM2, XM2, XM2
497    vpxor XM3, XM3, XM3
498    vpxor XM4, XM4, XM4
499    vpxor XM5, XM5, XM5
500
501    ret
502    .cfi_endproc
503    .size SetDecryptKey192, .-SetDecryptKey192
504
505/**
506 *  Function description: Sets the AES encryption key. Key length: 192 bits.
507 *  Function prototype: void SetEncryptKey256(CRYPT_AES_Key *ctx, const uint8_t *key);
508 *  Input register:
509 *        x0:Pointer to the output key structure.
510 *        x1:Pointer to the input key.
511 *  Change register: xmm0-xmm3.
512 *  Output register: None.
513 *  Function/Macro Call: None.
514 */
515    .globl SetEncryptKey256
516    .type SetEncryptKey256, @function
517SetEncryptKey256:
518    .cfi_startproc
519
520    movl $14, 240(ARG1)
521    movdqu (ARG2), XM0
522    movdqu 16(ARG2), XM1
523    movdqu XM0, (ARG1)
524    movdqu XM1, 16(ARG1)
525
526    aeskeygenassist $0x01, XM1, XM2
527    vpermilps $0xff, XM2, XM2
528    vpslldq $4, XM0, XM3
529    vpxor XM3, XM0, XM0
530    vpslldq $4, XM3, XM3
531    vpxor XM3, XM0, XM0
532    vpslldq $4, XM3, XM3
533    vpxor XM3, XM0, XM0
534    vpxor XM0, XM2, XM2
535    movdqu XM2, 32(ARG1)
536
537    aeskeygenassist $0x01, XM2, XM0
538    vpermilps $0xAA, XM0, XM0
539    vpslldq $4, XM1, XM3
540    vpxor XM3, XM1, XM1
541    vpslldq $4, XM3, XM3
542    vpxor XM3, XM1, XM1
543    vpslldq $4, XM3, XM3
544    vpxor XM3, XM1, XM1
545    vpxor XM1, XM0, XM0
546    movdqu XM0, 48(ARG1)
547    /*2*/
548    aeskeygenassist $0x02, XM0, XM1
549    vpermilps $0xff, XM1, XM1
550    vpslldq $4, XM2, XM3
551    vpxor XM3, XM2, XM2
552    vpslldq $4, XM3, XM3
553    vpxor XM3, XM2, XM2
554    vpslldq $4, XM3, XM3
555    vpxor XM3, XM2, XM2
556    vpxor XM2, XM1, XM1
557    movdqu XM1, 64(ARG1)
558
559    aeskeygenassist $0x02, XM1, XM2
560    vpermilps $0xAA, XM2, XM2
561    vpslldq $4, XM0, XM3
562    vpxor XM3, XM0, XM0
563    vpslldq $4, XM3, XM3
564    vpxor XM3, XM0, XM0
565    vpslldq $4, XM3, XM3
566    vpxor XM3, XM0, XM0
567    vpxor XM0, XM2, XM2
568    movdqu XM2, 80(ARG1)
569    /*3*/
570    aeskeygenassist $0x04, XM2, XM0
571    vpermilps $0xff, XM0, XM0
572    vpslldq $4, XM1, XM3
573    vpxor XM3, XM1, XM1
574    vpslldq $4, XM3, XM3
575    vpxor XM3, XM1, XM1
576    vpslldq $4, XM3, XM3
577    vpxor XM3, XM1, XM1
578    vpxor XM1, XM0, XM0
579    movdqu XM0, 96(ARG1)
580
581    aeskeygenassist $0x04, XM0, XM1
582    vpermilps $0xAA, XM1, XM1
583    vpslldq $4, XM2, XM3
584    vpxor XM3, XM2, XM2
585    vpslldq $4, XM3, XM3
586    vpxor XM3, XM2, XM2
587    vpslldq $4, XM3, XM3
588    vpxor XM3, XM2, XM2
589    vpxor XM2, XM1, XM1
590    movdqu XM1, 112(ARG1)
591    /*4*/
592    aeskeygenassist $0x08, XM1, XM2
593    vpermilps $0xff, XM2, XM2
594    vpslldq $4, XM0, XM3
595    vpxor XM3, XM0, XM0
596    vpslldq $4, XM3, XM3
597    vpxor XM3, XM0, XM0
598    vpslldq $4, XM3, XM3
599    vpxor XM3, XM0, XM0
600    vpxor XM0, XM2, XM2
601    movdqu XM2, 128(ARG1)
602
603    aeskeygenassist $0x08, XM2, XM0
604    vpermilps $0xAA, XM0, XM0
605    vpslldq $4, XM1, XM3
606    vpxor XM3, XM1, XM1
607    vpslldq $4, XM3, XM3
608    vpxor XM3, XM1, XM1
609    vpslldq $4, XM3, XM3
610    vpxor XM3, XM1, XM1
611    vpxor XM1, XM0, XM0
612    movdqu XM0, 144(ARG1)
613    /*5*/
614    aeskeygenassist $0x10, XM0, XM1
615    vpermilps $0xff, XM1, XM1
616    vpslldq $4, XM2, XM3
617    vpxor XM3, XM2, XM2
618    vpslldq $4, XM3, XM3
619    vpxor XM3, XM2, XM2
620    vpslldq $4, XM3, XM3
621    vpxor XM3, XM2, XM2
622    vpxor XM2, XM1, XM1
623    movdqu XM1, 160(ARG1)
624
625    aeskeygenassist $0x10, XM1, XM2
626    vpermilps $0xAA, XM2, XM2
627    vpslldq $4, XM0, XM3
628    vpxor XM3, XM0, XM0
629    vpslldq $4, XM3, XM3
630    vpxor XM3, XM0, XM0
631    vpslldq $4, XM3, XM3
632    vpxor XM3, XM0, XM0
633    vpxor XM0, XM2, XM2
634    movdqu XM2, 176(ARG1)
635    /*6*/
636    aeskeygenassist $0x20, XM2, XM0
637    vpermilps $0xff, XM0, XM0
638    vpslldq $4, XM1, XM3
639    vpxor XM3, XM1, XM1
640    vpslldq $4, XM3, XM3
641    vpxor XM3, XM1, XM1
642    vpslldq $4, XM3, XM3
643    vpxor XM3, XM1, XM1
644    vpxor XM1, XM0, XM0
645    movdqu XM0, 192(ARG1)
646
647    aeskeygenassist $0x20, XM0, XM1
648    vpermilps $0xAA, XM1, XM1
649    vpslldq $4, XM2, XM3
650    vpxor XM3, XM2, XM2
651    vpslldq $4, XM3, XM3
652    vpxor XM3, XM2, XM2
653    vpslldq $4, XM3, XM3
654    vpxor XM3, XM2, XM2
655    vpxor XM2, XM1, XM1
656    movdqu XM1, 208(ARG1)
657    /*7*/
658    aeskeygenassist $0x40, XM1, XM2
659    vpermilps $0xff, XM2, XM2
660    vpslldq $4, XM0, XM3
661    vpxor XM3, XM0, XM0
662    vpslldq $4, XM3, XM3
663    vpxor XM3, XM0, XM0
664    vpslldq $4, XM3, XM3
665    vpxor XM3, XM0, XM0
666    vpxor XM0, XM2, XM2
667    movdqu XM2, 224(ARG1)
668
669    vpxor XM0, XM0, XM0
670    vpxor XM1, XM1, XM1
671    vpxor XM2, XM2, XM2
672    vpxor XM3, XM3, XM3
673
674    ret
675    .cfi_endproc
676    .size SetEncryptKey256, .-SetEncryptKey256
677
678
679 /**
680 *  Function description: Sets the AES encryption key. Key length: 192 bits.
681 *  Function prototype: void SetDecryptKey256(CRYPT_AES_Key *ctx, const uint8_t *key);
682 *  Input register:
683 *        x0:Pointer to the output key structure.
684 *        x1:Pointer to the input key.
685 *  Change register: xmm0-xmm4.
686 *  Output register: None.
687 *  Function/Macro Call: None.
688 */
689     .globl SetDecryptKey256
690    .type SetDecryptKey256, @function
691SetDecryptKey256:
692    .cfi_startproc
693
694    movl $14, 240(ARG1)
695    movdqu (ARG2), XM0
696    movdqu 16(ARG2), XM1
697    movdqu XM0, 224(ARG1)
698
699    aesimc  XM1, XM4
700    movdqu XM4, 208(ARG1)
701
702    aeskeygenassist $0x01, XM1, XM2
703    vpermilps $0xff, XM2, XM2
704    vpslldq $4, XM0, XM3
705    vpxor XM3, XM0, XM0
706    vpslldq $4, XM3, XM3
707    vpxor XM3, XM0, XM0
708    vpslldq $4, XM3, XM3
709    vpxor XM3, XM0, XM0
710    vpxor XM0, XM2, XM2
711    aesimc  XM2, XM4
712    movdqu XM4, 192(ARG1)
713
714    aeskeygenassist $0x01, XM2, XM0
715    vpermilps $0xAA, XM0, XM0
716    vpslldq $4, XM1, XM3
717    vpxor XM3, XM1, XM1
718    vpslldq $4, XM3, XM3
719    vpxor XM3, XM1, XM1
720    vpslldq $4, XM3, XM3
721    vpxor XM3, XM1, XM1
722    vpxor XM1, XM0, XM0
723    aesimc  XM0, XM4
724    movdqu XM4, 176(ARG1)
725    /*2*/
726    aeskeygenassist $0x02, XM0, XM1
727    vpermilps $0xff, XM1, XM1
728    vpslldq $4, XM2, XM3
729    vpxor XM3, XM2, XM2
730    vpslldq $4, XM3, XM3
731    vpxor XM3, XM2, XM2
732    vpslldq $4, XM3, XM3
733    vpxor XM3, XM2, XM2
734    vpxor XM2, XM1, XM1
735    aesimc  XM1, XM4
736    movdqu XM4, 160(ARG1)
737
738    aeskeygenassist $0x02, XM1, XM2
739    vpermilps $0xAA, XM2, XM2
740    vpslldq $4, XM0, XM3
741    vpxor XM3, XM0, XM0
742    vpslldq $4, XM3, XM3
743    vpxor XM3, XM0, XM0
744    vpslldq $4, XM3, XM3
745    vpxor XM3, XM0, XM0
746    vpxor XM0, XM2, XM2
747    aesimc  XM2, XM4
748    movdqu XM4, 144(ARG1)
749    /*3*/
750    aeskeygenassist $0x04, XM2, XM0
751    vpermilps $0xff, XM0, XM0
752    vpslldq $4, XM1, XM3
753    vpxor XM3, XM1, XM1
754    vpslldq $4, XM3, XM3
755    vpxor XM3, XM1, XM1
756    vpslldq $4, XM3, XM3
757    vpxor XM3, XM1, XM1
758    vpxor XM1, XM0, XM0
759    aesimc  XM0, XM4
760    movdqu XM4, 128(ARG1)
761
762    aeskeygenassist $0x04, XM0, XM1
763    vpermilps $0xAA, XM1, XM1
764    vpslldq $4, XM2, XM3
765    vpxor XM3, XM2, XM2
766    vpslldq $4, XM3, XM3
767    vpxor XM3, XM2, XM2
768    vpslldq $4, XM3, XM3
769    vpxor XM3, XM2, XM2
770    vpxor XM2, XM1, XM1
771    aesimc  XM1, XM4
772    movdqu XM4, 112(ARG1)
773    /*4*/
774    aeskeygenassist $0x08, XM1, XM2
775    vpermilps $0xff, XM2, XM2
776    vpslldq $4, XM0, XM3
777    vpxor XM3, XM0, XM0
778    vpslldq $4, XM3, XM3
779    vpxor XM3, XM0, XM0
780    vpslldq $4, XM3, XM3
781    vpxor XM3, XM0, XM0
782    vpxor XM0, XM2, XM2
783    aesimc  XM2, XM4
784    movdqu XM4, 96(ARG1)
785
786    aeskeygenassist $0x08, XM2, XM0
787    vpermilps $0xAA, XM0, XM0
788    vpslldq $4, XM1, XM3
789    vpxor XM3, XM1, XM1
790    vpslldq $4, XM3, XM3
791    vpxor XM3, XM1, XM1
792    vpslldq $4, XM3, XM3
793    vpxor XM3, XM1, XM1
794    vpxor XM1, XM0, XM0
795    aesimc  XM0, XM4
796    movdqu XM4, 80(ARG1)
797    /*5*/
798    aeskeygenassist $0x10, XM0, XM1
799    vpermilps $0xff, XM1, XM1
800    vpslldq $4, XM2, XM3
801    vpxor XM3, XM2, XM2
802    vpslldq $4, XM3, XM3
803    vpxor XM3, XM2, XM2
804    vpslldq $4, XM3, XM3
805    vpxor XM3, XM2, XM2
806    vpxor XM2, XM1, XM1
807    aesimc  XM1, XM4
808    movdqu XM4, 64(ARG1)
809
810    aeskeygenassist $0x10, XM1, XM2
811    vpermilps $0xAA, XM2, XM2
812    vpslldq $4, XM0, XM3
813    vpxor XM3, XM0, XM0
814    vpslldq $4, XM3, XM3
815    vpxor XM3, XM0, XM0
816    vpslldq $4, XM3, XM3
817    vpxor XM3, XM0, XM0
818    vpxor XM0, XM2, XM2
819    aesimc  XM2, XM4
820    movdqu XM4, 48(ARG1)
821    /*6*/
822    aeskeygenassist $0x20, XM2, XM0
823    vpermilps $0xff, XM0, XM0
824    vpslldq $4, XM1, XM3
825    vpxor XM3, XM1, XM1
826    vpslldq $4, XM3, XM3
827    vpxor XM3, XM1, XM1
828    vpslldq $4, XM3, XM3
829    vpxor XM3, XM1, XM1
830    vpxor XM1, XM0, XM0
831    aesimc  XM0, XM4
832    movdqu XM4, 32(ARG1)
833
834    aeskeygenassist $0x20, XM0, XM1
835    vpermilps $0xAA, XM1, XM1
836    vpslldq $4, XM2, XM3
837    vpxor XM3, XM2, XM2
838    vpslldq $4, XM3, XM3
839    vpxor XM3, XM2, XM2
840    vpslldq $4, XM3, XM3
841    vpxor XM3, XM2, XM2
842    vpxor XM2, XM1, XM1
843    aesimc  XM1, XM4
844    movdqu XM4, 16(ARG1)
845    /*7*/
846    aeskeygenassist $0x40, XM1, XM2
847    vpermilps $0xff, XM2, XM2
848    vpslldq $4, XM0, XM3
849    vpxor XM3, XM0, XM0
850    vpslldq $4, XM3, XM3
851    vpxor XM3, XM0, XM0
852    vpslldq $4, XM3, XM3
853    vpxor XM3, XM0, XM0
854    vpxor XM0, XM2, XM2
855    movdqu XM2, (ARG1)
856
857    vpxor XM0, XM0, XM0
858    vpxor XM1, XM1, XM1
859    vpxor XM2, XM2, XM2
860    vpxor XM3, XM3, XM3
861    vpxor XM4, XM4, XM4
862
863    ret
864    .cfi_endproc
865    .size SetDecryptKey256, .-SetDecryptKey256
866
867/**
868 *  Function description: This API is used to set the AES encryption assembly acceleration.
869 *  Function prototype: int32_t CRYPT_AES_Encrypt(const CRYPT_AES_Key *ctx, const uint8_t *in, uint8_t *out, uint32_t len);
870 *  Input register:
871 *        x0:Pointer to the input key structure.
872 *        x1:Points to the 128-bit input data.
873 *        x2:Points to the 128-bit output data.
874 *        x3:Indicates the length of a data block, that is, 16 bytes.
875 *  Change register: xmm0-xmm1.
876 *  Output register: eax.
877 *  Function/Macro Call: None.
878 */
879     .globl CRYPT_AES_Encrypt
880    .type CRYPT_AES_Encrypt, @function
881CRYPT_AES_Encrypt:
882    .cfi_startproc
883    .set    ROUNDS,%eax
884
885    movdqu (ARG2), XM0
886    movl 240(ARG1),ROUNDS
887
888    vpxor (ARG1), XM0, XM0
889
890    movdqu 16(ARG1), XM1
891    aesenc  XM1, XM0
892
893    movdqu 32(ARG1), XM1
894    aesenc  XM1, XM0
895
896    movdqu 48(ARG1), XM1
897    aesenc  XM1, XM0
898
899    movdqu 64(ARG1), XM1
900    aesenc  XM1, XM0
901
902    movdqu 80(ARG1), XM1
903    aesenc  XM1, XM0
904
905    movdqu 96(ARG1), XM1
906    aesenc  XM1, XM0
907
908    movdqu 112(ARG1), XM1
909    aesenc  XM1, XM0
910
911    movdqu 128(ARG1), XM1
912    aesenc  XM1, XM0
913
914    movdqu 144(ARG1), XM1
915    aesenc  XM1, XM0
916
917    cmpl $10,ROUNDS
918    je  .Laesenc_128
919
920    movdqu 160(ARG1), XM1
921    aesenc  XM1, XM0
922
923    movdqu 176(ARG1), XM1
924    aesenc  XM1, XM0
925
926    cmpl $12,ROUNDS
927    je  .Laesenc_192
928
929    movdqu 192(ARG1), XM1
930    aesenc  XM1, XM0
931
932    movdqu 208(ARG1), XM1
933    aesenc  XM1, XM0
934
935    cmpl $14,ROUNDS
936    je  .Laesenc_256
937
938.Laesenc_128:
939    movdqu 160(ARG1), XM1
940    aesenclast XM1, XM0
941    jmp  .Laesenc_end
942
943.Laesenc_192:
944    movdqu 192(ARG1), XM1
945    aesenclast XM1, XM0
946    jmp  .Laesenc_end
947
948.Laesenc_256:
949    movdqu 224(ARG1), XM1
950    aesenclast XM1, XM0
951
952.Laesenc_end:
953    vpxor XM1, XM1, XM1
954    movdqu XM0,(ARG3)
955    vpxor XM0, XM0, XM0
956    movl $0,RET
957    ret
958    .cfi_endproc
959    .size CRYPT_AES_Encrypt, .-CRYPT_AES_Encrypt
960
961/**
962 *  Function description: AES decryption and assembly acceleration API.
963 *  Function prototype: int32_t CRYPT_AES_Decrypt(const CRYPT_AES_Key *ctx, const uint8_t *in, uint8_t *out, uint32_t len);
964 *  Input register:
965 *        x0:Pointer to the input key structure.
966 *        x1:Points to the 128-bit input data.
967 *        x2:Points to the 128-bit output data.
968 *        x3:Indicates the length of a data block, that is, 16 bytes.
969 *  Change register: xmm0-xmm1.
970 *  Output register: eax.
971 *  Function/Macro Call: None.
972 */
973     .globl CRYPT_AES_Decrypt
974    .type CRYPT_AES_Decrypt, @function
975CRYPT_AES_Decrypt:
976    .cfi_startproc
977    .set    ROUNDS,%eax
978
979    movdqu (ARG2), XM0
980    movl 240(ARG1),ROUNDS
981    vpxor (ARG1), XM0, XM0
982
983    movdqu 16(ARG1), XM1
984    aesdec  XM1, XM0
985
986    movdqu 32(ARG1), XM1
987    aesdec  XM1, XM0
988
989    movdqu 48(ARG1), XM1
990    aesdec  XM1, XM0
991
992    movdqu 64(ARG1), XM1
993    aesdec  XM1, XM0
994
995    movdqu 80(ARG1), XM1
996    aesdec  XM1, XM0
997
998    movdqu 96(ARG1), XM1
999    aesdec  XM1, XM0
1000
1001    movdqu 112(ARG1), XM1
1002    aesdec  XM1, XM0
1003
1004    movdqu 128(ARG1), XM1
1005    aesdec  XM1, XM0
1006
1007    movdqu 144(ARG1), XM1
1008    aesdec  XM1, XM0
1009
1010    cmpl $10,ROUNDS
1011    je  .aesdec_128
1012
1013    movdqu 160(ARG1), XM1
1014    aesdec  XM1, XM0
1015
1016    movdqu 176(ARG1), XM1
1017    aesdec  XM1, XM0
1018
1019    cmpl $12,ROUNDS
1020    je  .aesdec_192
1021
1022    movdqu 192(ARG1), XM1
1023    aesdec  XM1, XM0
1024
1025    movdqu 208(ARG1), XM1
1026    aesdec  XM1, XM0
1027
1028    cmpl $14,ROUNDS
1029    je  .aesdec_256
1030
1031.aesdec_128:
1032    movdqu 160(ARG1), XM1
1033    aesdeclast XM1, XM0
1034    jmp  .aesdec_end
1035
1036.aesdec_192:
1037    movdqu 192(ARG1), XM1
1038    aesdeclast XM1, XM0
1039    jmp  .aesdec_end
1040
1041.aesdec_256:
1042    movdqu 224(ARG1), XM1
1043    aesdeclast XM1, XM0
1044
1045.aesdec_end:
1046
1047    vpxor XM1, XM1, XM1
1048    movdqu XM0,(ARG3)
1049    vpxor XM0, XM0, XM0
1050    movl $0,RET
1051
1052    ret
1053    .cfi_endproc
1054    .size CRYPT_AES_Decrypt, .-CRYPT_AES_Decrypt
1055
1056#endif
1057