• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1/*
2 * This file is part of the openHiTLS project.
3 *
4 * openHiTLS is licensed under the Mulan PSL v2.
5 * You can use this software according to the terms and conditions of the Mulan PSL v2.
6 * You may obtain a copy of Mulan PSL v2 at:
7 *
8 *     http://license.coscl.org.cn/MulanPSL2
9 *
10 * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11 * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12 * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13 * See the Mulan PSL v2 for more details.
14 */
15
16#include "hitls_build.h"
17#if defined(HITLS_CRYPTO_AES) && defined(HITLS_CRYPTO_CBC)
18
19#include "crypt_aes_macro_x86_64.s"
20
21.file   "crypt_aes_cbc_x86_64.S"
22.text
23
24.set    ARG1, %rdi
25.set    ARG2, %rsi
26.set    ARG3, %rdx
27.set    ARG4, %ecx
28.set    ARG5, %r8
29.set    ARG6, %r9
30
31.set    RDK, %xmm3
32.set    KEY, %rdi
33.set    KTMP, %r9
34.set    ROUNDS, %eax
35.set    RET, %eax
36
37.set    BLK0, %xmm1
38.set    BLK1, %xmm4
39.set    BLK2, %xmm5
40.set    BLK3, %xmm6
41.set    BLK4, %xmm10
42.set    BLK5, %xmm11
43.set    BLK6, %xmm12
44.set    BLK7, %xmm13
45.set    IV0, %xmm0
46.set    IV1, %xmm7
47.set    IV2, %xmm8
48.set    IV3, %xmm9
49
50.set    KEY1, %xmm4
51.set    KEY2, %xmm5
52.set    KEY3, %xmm6
53.set    KEY4, %xmm10
54.set    KEY5, %xmm11
55.set    KEY6, %xmm12
56.set    KEY7, %xmm13
57.set    KEY8, %xmm14
58.set    KEY9, %xmm15
59.set    KEY10, %xmm2
60.set    KEY11, %xmm7
61.set    KEY12, %xmm8
62.set    KEY13, %xmm9
63.set    KEYTEMP, %xmm3
64
65/**
66 *  Function description:AES encrypted assembly acceleration API in CBC mode.
67 *  Function prototype:int32_t CRYPT_AES_CBC_Encrypt(const CRYPT_AES_Key *ctx,
68 *                      const uint8_t *in,
69 *                      uint8_t *out,
70 *                      uint32_t len,
71 *                      uint8_t *iv);
72 *  Input register:
73 *        rdi:pointer to the input key structure
74 *        rsi:points to the input data address
75 *        rdx:points to the output data address
76 *        rcx:Length of the input data, which must be a multiple of 16
77 *        r8: Points to the CBC mode mask address
78 *  Change register:xmm0-xmm15
79 *  Output register:eax
80 *  Function/Macro Call: None
81 */
82    .globl CRYPT_AES_CBC_Encrypt
83    .type CRYPT_AES_CBC_Encrypt, @function
84CRYPT_AES_CBC_Encrypt:
85    .cfi_startproc
86    .align 16
87    cmpl  $16, ARG4
88    jb .Laescbcend_end
89    movl 240(KEY), ROUNDS
90    vmovdqu (ARG5), IV0
91    vmovdqu  (KEY), KEY1
92    vmovdqu  16(KEY), KEY2
93    vmovdqu  32(KEY), KEY3
94    vmovdqu  48(KEY), KEY4
95    vmovdqu  64(KEY), KEY5
96    vmovdqu  80(KEY), KEY6
97    vmovdqu  96(KEY), KEY7
98    vmovdqu  112(KEY), KEY8
99    vmovdqu  128(KEY), KEY9
100    vmovdqu  144(KEY), KEY10
101    vmovdqu 160(KEY), KEY11
102    cmpl $12, ROUNDS
103    jb  .Laes_128_cbc_start
104    je  .Laes_192_cbc_start
105.align 16
106.Laes_256_cbc_start:
107    vmovdqu 176(KEY), KEY12
108    vmovdqu 192(KEY), KEY13
109.Laes_256_cbc_loop:
110    vpxor (ARG2), IV0, BLK0
111    vmovdqu 208(KEY), KEYTEMP
112    vpxor BLK0, KEY1, BLK0
113    aesenc  KEY2, BLK0
114    aesenc  KEY3, BLK0
115    aesenc  KEY4, BLK0
116    aesenc  KEY5, BLK0
117    aesenc  KEY6, BLK0
118    aesenc  KEY7, BLK0
119    aesenc  KEY8, BLK0
120    aesenc  KEY9, BLK0
121    aesenc  KEY10, BLK0
122    aesenc  KEY11, BLK0
123    aesenc  KEY12, BLK0
124    aesenc  KEY13, BLK0
125    aesenc  KEYTEMP, BLK0
126    vmovdqu 224(KEY), KEYTEMP
127    aesenclast KEYTEMP, BLK0
128    leaq 16(ARG2), ARG2
129    vmovdqu BLK0, (ARG3)
130    movdqa BLK0,  IV0
131    leaq 16(ARG3), ARG3
132    subl $16, ARG4
133    cmpl $16, ARG4
134    jnb .Laes_256_cbc_loop   // Special value processing
135    vpxor KEY12, KEY12, KEY12
136    vpxor  KEY13, KEY13, KEY13
137    vpxor KEYTEMP, KEYTEMP, KEYTEMP
138    jmp .Laescbcenc_finish
139
140.align 16
141.Laes_192_cbc_start:
142    vmovdqu 176(KEY), KEY12
143    vmovdqu 192(KEY), KEY13
144.Laes_192_cbc_loop:
145    vpxor (ARG2), IV0, BLK0
146    vpxor BLK0, KEY1, BLK0
147    aesenc  KEY2, BLK0
148    aesenc  KEY3, BLK0
149    aesenc  KEY4, BLK0
150    aesenc  KEY5, BLK0
151    aesenc  KEY6, BLK0
152    aesenc  KEY7, BLK0
153    aesenc  KEY8, BLK0
154    aesenc  KEY9, BLK0
155    aesenc  KEY10, BLK0
156    aesenc  KEY11, BLK0
157    aesenc  KEY12, BLK0
158    aesenclast KEY13, BLK0
159    leaq 16(ARG2),  ARG2
160    vmovdqu BLK0,  (ARG3)
161    movdqa BLK0,  IV0
162    leaq 16(ARG3), ARG3
163    subl $16 , ARG4
164    jnz .Laes_192_cbc_loop
165    vpxor KEY12, KEY12, KEY12
166    vpxor KEY13, KEY13, KEY13
167    jmp .Laescbcenc_finish
168
169.align 16
170.Laes_128_cbc_start:
171    vpxor (ARG2), IV0, BLK0
172    vpxor BLK0, KEY1, BLK0
173    aesenc  KEY2, BLK0
174    aesenc  KEY3, BLK0
175    aesenc  KEY4, BLK0
176    aesenc  KEY5, BLK0
177    aesenc  KEY6, BLK0
178    aesenc  KEY7, BLK0
179    aesenc  KEY8, BLK0
180    aesenc  KEY9, BLK0
181    aesenc  KEY10, BLK0
182    aesenclast KEY11, BLK0
183    leaq 16(ARG2), ARG2
184    vmovdqu BLK0, (ARG3)
185    movdqa BLK0, IV0
186    leaq 16(ARG3), ARG3
187    subl $16, ARG4
188    jnz .Laes_128_cbc_start
189    jmp .Laescbcenc_finish
190
191.Laescbcenc_finish:
192    vmovdqu BLK0,(ARG5)
193    vpxor KEY1, KEY1, KEY1
194    vpxor KEY2, KEY2, KEY2
195    vpxor KEY3, KEY3, KEY3
196    vpxor KEY4, KEY4, KEY4
197    vpxor KEY5, KEY5, KEY5
198    vpxor KEY6, KEY6, KEY6
199    vpxor KEY7, KEY7, KEY7
200    vpxor KEY8, KEY8, KEY8
201    vpxor KEY9, KEY9, KEY9
202    vpxor KEY10, KEY10, KEY10
203    vpxor KEY11, KEY11, KEY11
204.Laescbcend_end:
205    movl $0, RET
206    ret
207    .cfi_endproc
208    .size CRYPT_AES_CBC_Encrypt, .-CRYPT_AES_CBC_Encrypt
209
210/**
211 *  Function description: Sets the AES decryption and assembly accelerated implementation interface in CBC mode
212 *  Function prototype:int32_t CRYPT_AES_CBC_Decrypt(const CRYPT_AES_Key *ctx,
213 *                      const uint8_t *in,
214 *                      uint8_t *out,
215 *                      uint32_t len,
216 *                      uint8_t *iv);
217 *  Input register:
218 *        rdi:pointer to the input key structure
219 *        rsi:points to the input data address.
220 *        rdx:points to the output data address.
221 *        rcx:Length of the input data, which must be a multiple of 16
222 *        r8: Points to the CBC mode mask address
223 *  Change register:xmm0-xmm13
224 *  Output register:eax
225 *  Function/Macro Call: None
226 */
227    .globl CRYPT_AES_CBC_Decrypt
228    .type CRYPT_AES_CBC_Decrypt, @function
229CRYPT_AES_CBC_Decrypt:
230    .cfi_startproc
231.align 16
232    vmovdqu (ARG5), IV0
233.Laes_cbc_dec_start:
234    cmpl    $64, ARG4
235    jae     .Labove_equal_4_blks
236    cmpl    $32, ARG4
237    jae     .Labove_equal_2_blks
238    cmpl    $0, ARG4
239    je      .Laes_cbc_dec_finish
240    jmp     .Lproc_1_blk
241
242.Labove_equal_2_blks:
243    cmpl    $48, ARG4
244    jb      .Lproc_2_blks
245    jmp     .Lproc_3_blks
246
247.Labove_equal_4_blks:
248    cmpl    $96, ARG4
249    jae     .Labove_equal_6_blks
250    cmpl    $80, ARG4
251    jb      .Lproc_4_blks
252    jmp     .Lproc_5_blks
253
254.Labove_equal_6_blks:
255    cmpl    $112, ARG4
256    jb      .Lproc_6_blks
257    cmpl    $128, ARG4
258    jb      .Lproc_7_blks
259
260.align 16
261.Lproc_8_blks:
262.Laescbcdec_8_blks_loop:
263    vmovdqu (ARG2), BLK0
264    vmovdqu 16(ARG2), BLK1
265    vmovdqu 32(ARG2), BLK2
266    movdqa BLK0, IV1
267    movdqa BLK1, IV2
268    movdqa BLK2, IV3
269    movq    KEY, KTMP
270    movl 240(KEY), ROUNDS
271    vmovdqu (KEY), RDK
272    vpxor BLK0, RDK, BLK0
273    vpxor BLK1, RDK, BLK1
274    vpxor BLK2, RDK, BLK2
275    vpxor 48(ARG2), RDK, BLK3
276    vpxor 64(ARG2), RDK, BLK4
277    vpxor 80(ARG2), RDK, BLK5
278    vpxor 96(ARG2), RDK, BLK6
279    vpxor 112(ARG2), RDK, BLK7
280    decl ROUNDS
281    AES_DEC_8_BLKS    KTMP ROUNDS RDK BLK0 BLK1 BLK2 BLK3 BLK4 BLK5 BLK6 BLK7
282    vpxor BLK0, IV0, BLK0
283    vpxor BLK1, IV1, BLK1
284    vpxor BLK2, IV2, BLK2
285    vpxor BLK3, IV3, BLK3
286    vpxor 48(ARG2), BLK4, BLK4
287    vpxor 64(ARG2), BLK5, BLK5
288    vpxor 80(ARG2), BLK6, BLK6
289    vpxor 96(ARG2), BLK7, BLK7
290    vmovdqu 112(ARG2), IV0
291    vmovdqu BLK0, (ARG3)
292    vmovdqu BLK1, 16(ARG3)
293    vmovdqu BLK2, 32(ARG3)
294    vmovdqu BLK3, 48(ARG3)
295    vmovdqu BLK4, 64(ARG3)
296    vmovdqu BLK5, 80(ARG3)
297    vmovdqu BLK6, 96(ARG3)
298    vmovdqu BLK7, 112(ARG3)
299    subl $128, ARG4
300    leaq 128(ARG2), ARG2
301    leaq 128(ARG3), ARG3
302    cmpl    $128, ARG4
303    jb  .Laes_cbc_dec_start
304    jmp .Laescbcdec_8_blks_loop
305
306.align 16
307.Lproc_1_blk:
308    movl 240(KEY), ROUNDS
309    vmovdqu (KEY), RDK
310    vpxor (ARG2), RDK, BLK0
311    decl ROUNDS
312    AES_DEC_1_BLK    KEY ROUNDS RDK BLK0
313    vpxor BLK0, IV0, BLK0
314    vmovdqu (ARG2), IV0
315    vmovdqu BLK0, (ARG3)
316    jmp  .Laes_cbc_dec_finish
317
318.align 16
319.Lproc_2_blks:
320    vmovdqu (ARG2), BLK0
321    movl 240(KEY), ROUNDS
322    vmovdqu (KEY), RDK
323    movdqa BLK0, IV1
324    vpxor BLK0, RDK, BLK0
325    vpxor 16(ARG2), RDK, BLK1
326    decl ROUNDS
327    AES_DEC_2_BLKS    KEY ROUNDS RDK BLK0 BLK1
328    vpxor BLK0, IV0, BLK0
329    vpxor BLK1, IV1, BLK1
330    vmovdqu 16(ARG2), IV0
331    vmovdqu BLK0, (ARG3)
332    vmovdqu BLK1, 16(ARG3)
333    jmp  .Laes_cbc_dec_finish
334
335.align 16
336.Lproc_3_blks:
337    vmovdqu (ARG2), BLK0
338    vmovdqu 16(ARG2), BLK1
339    movl 240(KEY), ROUNDS
340    vmovdqu (KEY), RDK
341    movdqa BLK0, IV1
342    movdqa BLK1, IV2
343    vpxor BLK0, RDK, BLK0
344    vpxor BLK1, RDK, BLK1
345    vpxor 32(ARG2), RDK, BLK2
346    decl ROUNDS
347    AES_DEC_3_BLKS    KEY ROUNDS RDK BLK0 BLK1 BLK2
348    vpxor BLK0, IV0, BLK0
349    vpxor BLK1, IV1, BLK1
350    vpxor BLK2, IV2, BLK2
351    vmovdqu 32(ARG2), IV0
352    vmovdqu BLK0, (ARG3)
353    vmovdqu BLK1, 16(ARG3)
354    vmovdqu BLK2, 32(ARG3)
355    jmp  .Laes_cbc_dec_finish
356
357.align 16
358.Lproc_4_blks:
359    vmovdqu (ARG2), BLK0
360    vmovdqu 16(ARG2), BLK1
361    vmovdqu 32(ARG2), BLK2
362    movl 240(KEY), ROUNDS
363    vmovdqu (KEY), RDK
364    movdqa BLK0, IV1
365    movdqa BLK1, IV2
366    movdqa BLK2, IV3
367    vpxor BLK0, RDK, BLK0
368    vpxor BLK1, RDK, BLK1
369    vpxor BLK2, RDK, BLK2
370    vpxor 48(ARG2), RDK, BLK3
371    decl ROUNDS
372    AES_DEC_4_BLKS    KEY ROUNDS RDK BLK0 BLK1 BLK2 BLK3
373    vpxor BLK0, IV0, BLK0
374    vpxor BLK1, IV1, BLK1
375    vpxor BLK2, IV2, BLK2
376    vpxor BLK3, IV3, BLK3
377    vmovdqu 48(ARG2), IV0
378    vmovdqu BLK0, (ARG3)
379    vmovdqu BLK1, 16(ARG3)
380    vmovdqu BLK2, 32(ARG3)
381    vmovdqu BLK3, 48(ARG3)
382    jmp  .Laes_cbc_dec_finish
383
384.align 16
385.Lproc_5_blks:
386    vmovdqu (ARG2), BLK0
387    vmovdqu 16(ARG2), BLK1
388    vmovdqu 32(ARG2), BLK2
389    movl 240(KEY), ROUNDS
390    vmovdqu (KEY), RDK
391    movdqa BLK0, IV1
392    movdqa BLK1, IV2
393    movdqa BLK2, IV3
394    vpxor BLK0, RDK, BLK0
395    vpxor BLK1, RDK, BLK1
396    vpxor BLK2, RDK, BLK2
397    vpxor 48(ARG2), RDK, BLK3
398    vpxor 64(ARG2), RDK, BLK4
399    decl ROUNDS
400    AES_DEC_5_BLKS    KEY ROUNDS RDK BLK0 BLK1 BLK2 BLK3 BLK4
401    vpxor BLK0, IV0, BLK0
402    vpxor BLK1, IV1, BLK1
403    vpxor BLK2, IV2, BLK2
404    vpxor BLK3, IV3, BLK3
405    vpxor 48(ARG2), BLK4, BLK4
406    vmovdqu 64(ARG2), IV0
407    vmovdqu BLK0, (ARG3)
408    vmovdqu BLK1, 16(ARG3)
409    vmovdqu BLK2, 32(ARG3)
410    vmovdqu BLK3, 48(ARG3)
411    vmovdqu BLK4, 64(ARG3)
412    jmp  .Laes_cbc_dec_finish
413
414.align 16
415.Lproc_6_blks:
416    vmovdqu (ARG2), BLK0
417    vmovdqu 16(ARG2), BLK1
418    vmovdqu 32(ARG2), BLK2
419    movl 240(KEY), ROUNDS
420    vmovdqu (KEY), RDK
421    movdqa BLK0, IV1
422    movdqa BLK1, IV2
423    movdqa BLK2, IV3
424    vpxor (ARG2), RDK, BLK0
425    vpxor 16(ARG2), RDK, BLK1
426    vpxor 32(ARG2), RDK, BLK2
427    vpxor 48(ARG2), RDK, BLK3
428    vpxor 64(ARG2), RDK, BLK4
429    vpxor 80(ARG2), RDK, BLK5
430    decl ROUNDS
431    AES_DEC_6_BLKS    KEY ROUNDS RDK BLK0 BLK1 BLK2 BLK3 BLK4 BLK5
432    vpxor BLK0, IV0, BLK0
433    vpxor BLK1, IV1, BLK1
434    vpxor BLK2, IV2, BLK2
435    vpxor BLK3, IV3, BLK3
436    vpxor 48(ARG2), BLK4, BLK4
437    vpxor 64(ARG2), BLK5, BLK5
438    vmovdqu 80(ARG2), IV0
439    vmovdqu BLK0, (ARG3)
440    vmovdqu BLK1, 16(ARG3)
441    vmovdqu BLK2, 32(ARG3)
442    vmovdqu BLK3, 48(ARG3)
443    vmovdqu BLK4, 64(ARG3)
444    vmovdqu BLK5, 80(ARG3)
445    jmp  .Laes_cbc_dec_finish
446
447.align 16
448.Lproc_7_blks:
449    vmovdqu (ARG2), BLK0
450    vmovdqu 16(ARG2), BLK1
451    vmovdqu 32(ARG2), BLK2
452    movl 240(KEY), ROUNDS
453    vmovdqu (KEY), RDK
454    movdqa BLK0, IV1
455    movdqa BLK1, IV2
456    movdqa BLK2, IV3
457    vpxor (ARG2), RDK, BLK0
458    vpxor 16(ARG2), RDK, BLK1
459    vpxor 32(ARG2), RDK, BLK2
460    vpxor 48(ARG2), RDK, BLK3
461    vpxor 64(ARG2), RDK, BLK4
462    vpxor 80(ARG2), RDK, BLK5
463    vpxor 96(ARG2), RDK, BLK6
464    decl ROUNDS
465    AES_DEC_7_BLKS    KEY ROUNDS RDK BLK0 BLK1 BLK2 BLK3 BLK4 BLK5 BLK6
466    vpxor BLK0, IV0, BLK0
467    vpxor BLK1, IV1, BLK1
468    vpxor BLK2, IV2, BLK2
469    vpxor BLK3, IV3, BLK3
470    vpxor 48(ARG2), BLK4, BLK4
471    vpxor 64(ARG2), BLK5, BLK5
472    vpxor 80(ARG2), BLK6, BLK6
473    vmovdqu 96(ARG2), IV0
474    vmovdqu BLK0, (ARG3)
475    vmovdqu BLK1, 16(ARG3)
476    vmovdqu BLK2, 32(ARG3)
477    vmovdqu BLK3, 48(ARG3)
478    vmovdqu BLK4, 64(ARG3)
479    vmovdqu BLK5, 80(ARG3)
480    vmovdqu BLK6, 96(ARG3)
481
482.align 16
483.Laes_cbc_dec_finish:
484    vmovdqu IV0, (ARG5)
485    vpxor BLK0, BLK0, BLK0
486    vpxor BLK1, BLK1, BLK1
487    vpxor BLK2, BLK2, BLK2
488    vpxor BLK3, BLK3, BLK3
489    vpxor BLK4, BLK4, BLK4
490    vpxor BLK5, BLK5, BLK5
491    vpxor BLK6, BLK6, BLK6
492    vpxor BLK7, BLK7, BLK7
493    vpxor RDK, RDK, RDK
494    movl $0, RET
495    ret
496    .cfi_endproc
497    .size CRYPT_AES_CBC_Decrypt,  .-CRYPT_AES_CBC_Decrypt
498
499#endif
500