• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1/*
2 * This file is part of the openHiTLS project.
3 *
4 * openHiTLS is licensed under the Mulan PSL v2.
5 * You can use this software according to the terms and conditions of the Mulan PSL v2.
6 * You may obtain a copy of Mulan PSL v2 at:
7 *
8 *     http://license.coscl.org.cn/MulanPSL2
9 *
10 * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11 * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12 * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13 * See the Mulan PSL v2 for more details.
14 */
15
16#include "hitls_build.h"
17#if defined(HITLS_CRYPTO_AES) && defined(HITLS_CRYPTO_CBC)
18
19#include "crypt_arm.h"
20#include "crypt_aes_macro_armv8.s"
21
22.file    "crypt_aes_cbc_armv8.S"
23.text
24.arch	armv8-a+crypto
25
26KEY     .req    x0
27IN      .req    x1
28OUT     .req    x2
29LEN     .req    x3
30P_IV    .req    x4
31
32KTMP    .req    x5
33ROUNDS  .req    w6
34
35BLK0	.req	v0
36BLK1	.req	v1
37BLK2	.req	v2
38BLK3	.req	v3
39BLK4	.req	v4
40BLK5	.req	v5
41BLK6	.req	v6
42BLK7	.req	v7
43
44KEY0_END .req   v16
45KEY0    .req    v17
46KEY1    .req    v18
47KEY2    .req    v19
48KEY3    .req    v20
49KEY4    .req    v21
50KEY5    .req    v22
51KEY6    .req    v23
52KEY7    .req    v24
53KEY8    .req    v25
54KEY9    .req    v26
55KEY10   .req    v27
56KEY11   .req    v28
57KEY12   .req    v29
58KEY13   .req    v30
59KEY14   .req    v31
60
61IVENC	.req	v1
62IV0		.req    v17
63IV1		.req    v18
64IV2		.req    v19
65IV3		.req    v20
66IV4		.req    v21
67IV5		.req    v22
68IV6		.req    v23
69IV7		.req    v24
70IVT		.req	v25
71
72RDK0    .req    v26
73RDK1    .req    v27
74RDK2    .req    v28
75
76/*
77 * One round of encryption process.
78 * block:input the plaintext.
79 * key: One round key.
80 */
81.macro ROUND block, key
82    aese \block, \key
83    aesmc \block, \block
84.endm
85
86/*
87 * Eight blocks of decryption.
88 * block0_7:Input the ciphertext.
89 * rdk0: Round key.
90 * ktmp: Temporarily stores pointers to keys.
91 */
92.macro DEC8 rdk0s rdk0 blk0 blk1 blk2 blk3 blk4 blk5 blk6 blk7 ktmp
93    aesd \blk0, \rdk0
94    aesimc \blk0, \blk0
95    aesd \blk5, \rdk0
96    aesimc \blk5, \blk5
97    aesd \blk1, \rdk0
98    aesimc \blk1, \blk1
99    aesd \blk6, \rdk0
100    aesimc \blk6, \blk6
101    aesd \blk2, \rdk0
102    aesimc \blk2, \blk2
103    aesd \blk3, \rdk0
104    aesimc \blk3, \blk3
105    aesd \blk4, \rdk0
106    aesimc \blk4, \blk4
107    aesd \blk7, \rdk0
108    aesimc \blk7, \blk7
109    ld1 {\rdk0s}, [\ktmp], #16
110.endm
111
112/**
113 * Function description: AES encrypted assembly acceleration API in CBC mode.
114 * int32_t CRYPT_AES_CBC_Encrypt(const CRYPT_AES_Key *ctx,
115 *                          const uint8_t *in,
116 *                          uint8_t *out,
117 *                          uint32_t len,
118 *                          uint8_t *iv);
119 * Input register:
120 *        x0:Pointer to the input key structure
121 *        x1:points to the input data address
122 *        x2:points to the output data address
123 *        x3:Length of the input data, which must be a multiple of 16
124 *        x4:Points to the CBC mode mask address
125 * Change register:x5, x6, v0-v31
126 * Output register:x0
127 * Function/Macro Call: None
128 */
129.globl    CRYPT_AES_CBC_Encrypt
130.type    CRYPT_AES_CBC_Encrypt, %function
131CRYPT_AES_CBC_Encrypt:
132AARCH64_PACIASP
133    ld1 {IVENC.16b}, [P_IV]                    // load IV
134    ldr w6, [KEY, #240]                      // load rounds
135    ld1 {BLK0.16b}, [IN], #16                // load in
136    ld1 {KEY0.4s, KEY1.4s}, [KEY], #32       // load keys
137    cmp w6, #12
138    ld1 {KEY2.4s, KEY3.4s}, [KEY], #32
139    ld1 {KEY4.4s, KEY5.4s}, [KEY], #32
140    ld1 {KEY6.4s, KEY7.4s}, [KEY], #32
141    ld1 {KEY8.4s, KEY9.4s}, [KEY], #32
142    eor IVENC.16b, IVENC.16b, BLK0.16b           // iv + in
143    b.lt .Laes_cbc_128_start
144
145    ld1 {KEY10.4s, KEY11.4s}, [KEY], #32
146    b.eq .Laes_cbc_192_start
147    ld1 {KEY12.4s, KEY13.4s}, [KEY], #32
148
149.Laes_cbc_256_start:
150    ld1 {KEY14.4s}, [KEY]
151    ROUND IVENC.16b, KEY0.16b
152    eor KEY0_END.16b, KEY0.16b, KEY14.16b   // key0 + keyEnd
153    b .Laes_cbc_256_round_loop
154
155.Laes_cbc_256_loop:
156    ROUND IVENC.16b, KEY0.16b
157    st1 {BLK0.16b}, [OUT], #16
158
159.Laes_cbc_256_round_loop:
160    ROUND IVENC.16b, KEY1.16b
161    ROUND IVENC.16b, KEY2.16b
162    subs LEN, LEN, #16
163    ROUND IVENC.16b, KEY3.16b
164    ROUND IVENC.16b, KEY4.16b
165    ROUND IVENC.16b, KEY5.16b
166    ld1 {KEY0.16b}, [IN], #16             // load IN
167    ROUND IVENC.16b, KEY6.16b
168    ROUND IVENC.16b, KEY7.16b
169    ROUND IVENC.16b, KEY8.16b
170    ROUND IVENC.16b, KEY9.16b
171    ROUND IVENC.16b, KEY10.16b
172    ROUND IVENC.16b, KEY11.16b
173    ROUND IVENC.16b, KEY12.16b
174    aese IVENC.16b, KEY13.16b
175    eor KEY0.16b, KEY0.16b, KEY0_END.16b  // IN + KEY0 + KEYEND
176    eor BLK0.16b, IVENC.16b, KEY14.16b
177    b.gt .Laes_cbc_256_loop
178    b .Lescbcenc_finish
179
180.Laes_cbc_128_start:
181    ld1 {KEY10.4s}, [KEY]
182    ROUND IVENC.16b, KEY0.16b
183    eor KEY0_END.16b, KEY0.16b, KEY10.16b      // key0 + keyEnd
184    b .Laes_cbc_128_round_loop
185
186.Laes_cbc_128_loop:
187    ROUND IVENC.16b, KEY0.16b
188    st1 {BLK0.16b}, [OUT], #16
189
190.Laes_cbc_128_round_loop:
191    ROUND IVENC.16b, KEY1.16b
192    ROUND IVENC.16b, KEY2.16b
193    subs LEN, LEN, #16
194    ROUND IVENC.16b, KEY3.16b
195    ROUND IVENC.16b, KEY4.16b
196    ROUND IVENC.16b, KEY5.16b
197    ld1 {KEY0.16b}, [IN], #16             // load IN
198    ROUND IVENC.16b, KEY6.16b
199    ROUND IVENC.16b, KEY7.16b
200    ROUND IVENC.16b, KEY8.16b
201    aese IVENC.16b, KEY9.16b
202    eor KEY0.16b, KEY0.16b, KEY0_END.16b  // IN + KEY0 + KEYEND
203    eor BLK0.16b, IVENC.16b, KEY10.16b      // enc OK
204    b.gt .Laes_cbc_128_loop
205    b .Lescbcenc_finish
206
207.Laes_cbc_192_start:
208    ld1 {KEY12.4s}, [KEY]
209    ROUND IVENC.16b, KEY0.16b
210    eor KEY0_END.16b, KEY0.16b, KEY12.16b   // key0 + keyEnd
211    b .Laes_cbc_192_round_loop
212
213.Laes_cbc_192_loop:
214    ROUND IVENC.16b, KEY0.16b
215    st1 {BLK0.16b}, [OUT], #16
216
217.Laes_cbc_192_round_loop:
218    ROUND IVENC.16b, KEY1.16b
219    ROUND IVENC.16b, KEY2.16b
220    subs LEN, LEN, #16
221    ROUND IVENC.16b, KEY3.16b
222    ROUND IVENC.16b, KEY4.16b
223    ROUND IVENC.16b, KEY5.16b
224    ld1 {KEY0.16b}, [IN], #16             // load IN
225    ROUND IVENC.16b, KEY6.16b
226    ROUND IVENC.16b, KEY7.16b
227    ROUND IVENC.16b, KEY8.16b
228    ROUND IVENC.16b, KEY9.16b
229    ROUND IVENC.16b, KEY10.16b
230    aese IVENC.16b, KEY11.16b
231    eor KEY0.16b, KEY0.16b, KEY0_END.16b  // IN + KEY0 + KEYEND
232    eor BLK0.16b, IVENC.16b, KEY12.16b
233    b.gt .Laes_cbc_192_loop
234
235.Lescbcenc_finish:
236    st1 {BLK0.16b}, [OUT], #16
237    st1 {BLK0.16b}, [P_IV]
238    mov x0, #0
239AARCH64_AUTIASP
240    ret
241.size CRYPT_AES_CBC_Encrypt, .-CRYPT_AES_CBC_Encrypt
242
243/**
244 * Function description: AES decryption and assembly acceleration API in CBC mode.
245 * int32_t CRYPT_AES_CBC_Decrypt(const CRYPT_AES_Key *ctx,
246 *                          const uint8_t *in,
247 *                          uint8_t *out,
248 *                          uint32_t len,
249 *                          uint8_t *iv);
250 * Input register:
251 *        x0:pointer to the input key structure
252 *        x1:points to the input data address
253 *        x2:points to the output data address
254 *        x3:Length of the input data, which must be a multiple of 16
255 *        x4:Points to the CBC mode mask address
256 * Change register:x5, x6, v0-v31
257 * Output register:x0
258 * Function/Macro Call: AES_DEC_8_BLKS, AES_DEC_1_BLK, AES_DEC_2_BLKS, AES_DEC_3_BLKS,
259 *             AES_DEC_4_BLKS, AES_DEC_5_BLKS, AES_DEC_6_BLKS, AES_DEC_7_BLKS
260 */
261.globl	CRYPT_AES_CBC_Decrypt
262.type	CRYPT_AES_CBC_Decrypt, %function
263CRYPT_AES_CBC_Decrypt:
264AARCH64_PACIASP
265    ld1	{IV0.16b}, [P_IV]
266.Lcbc_aesdec_start:
267    cmp    LEN, #64
268    b.ge      .Lcbc_dec_above_equal_4_blks
269    cmp    LEN, #32
270    b.ge      .Lcbc_dec_above_equal_2_blks
271    cmp    LEN, #0
272    b.eq      .Lcbc_aesdec_finish
273    b     .Lcbc_dec_proc_1_blk
274
275.Lcbc_dec_above_equal_2_blks:
276    cmp    LEN, #48
277    b.lt      .Lcbc_dec_proc_2_blks
278    b     .Lcbc_dec_proc_3_blks
279
280.Lcbc_dec_above_equal_4_blks:
281    cmp    LEN, #96
282    b.ge      .Lcbc_dec_above_equal_6_blks
283    cmp    LEN, #80
284    b.lt      .Lcbc_dec_proc_4_blks
285    b     .Lcbc_dec_proc_5_blks
286
287.Lcbc_dec_above_equal_6_blks:
288    cmp    LEN, #112
289    b.lt      .Lcbc_dec_proc_6_blks
290    cmp    LEN, #128
291    b.lt      .Lcbc_dec_proc_7_blks
292
293.align	4
294.Lcbc_aesdec_8_blks_loop:
295    ld1	{BLK0.16b, BLK1.16b, BLK2.16b, BLK3.16b}, [IN], #64
296    mov KTMP, KEY
297    ldr ROUNDS, [KEY, #240]
298    ld1	{BLK4.16b, BLK5.16b, BLK6.16b, BLK7.16b}, [IN], #64
299
300    mov	IV1.16b, BLK0.16b
301    mov	IV2.16b, BLK1.16b
302    mov	IV3.16b, BLK2.16b
303    ld1 {RDK0.4s, RDK1.4s}, [KTMP], #32
304    mov	IV4.16b, BLK3.16b
305    mov	IV5.16b, BLK4.16b
306    mov	IV6.16b, BLK5.16b
307    mov	IV7.16b, BLK6.16b
308    mov	IVT.16b, BLK7.16b
309
310
311    DEC8 RDK0.4s, RDK0.16b, BLK0.16b, BLK1.16b, BLK2.16b, BLK3.16b, BLK4.16b, BLK5.16b, BLK6.16b, BLK7.16b, KTMP
312    DEC8 RDK1.4s, RDK1.16b, BLK0.16b, BLK1.16b, BLK2.16b, BLK3.16b, BLK4.16b, BLK5.16b, BLK6.16b, BLK7.16b, KTMP
313    DEC8 RDK0.4s, RDK0.16b, BLK0.16b, BLK1.16b, BLK2.16b, BLK3.16b, BLK4.16b, BLK5.16b, BLK6.16b, BLK7.16b, KTMP
314    DEC8 RDK1.4s, RDK1.16b, BLK0.16b, BLK1.16b, BLK2.16b, BLK3.16b, BLK4.16b, BLK5.16b, BLK6.16b, BLK7.16b, KTMP
315    DEC8 RDK0.4s, RDK0.16b, BLK0.16b, BLK1.16b, BLK2.16b, BLK3.16b, BLK4.16b, BLK5.16b, BLK6.16b, BLK7.16b, KTMP
316    DEC8 RDK1.4s, RDK1.16b, BLK0.16b, BLK1.16b, BLK2.16b, BLK3.16b, BLK4.16b, BLK5.16b, BLK6.16b, BLK7.16b, KTMP
317    DEC8 RDK0.4s, RDK0.16b, BLK0.16b, BLK1.16b, BLK2.16b, BLK3.16b, BLK4.16b, BLK5.16b, BLK6.16b, BLK7.16b, KTMP
318    DEC8 RDK1.4s, RDK1.16b, BLK0.16b, BLK1.16b, BLK2.16b, BLK3.16b, BLK4.16b, BLK5.16b, BLK6.16b, BLK7.16b, KTMP
319
320    cmp ROUNDS, #12
321    b.lt .Ldec_8_blks_last
322    DEC8 RDK0.4s, RDK0.16b, BLK0.16b, BLK1.16b, BLK2.16b, BLK3.16b, BLK4.16b, BLK5.16b, BLK6.16b, BLK7.16b, KTMP
323    DEC8 RDK1.4s, RDK1.16b, BLK0.16b, BLK1.16b, BLK2.16b, BLK3.16b, BLK4.16b, BLK5.16b, BLK6.16b, BLK7.16b, KTMP
324    b.eq .Ldec_8_blks_last
325    DEC8 RDK0.4s, RDK0.16b, BLK0.16b, BLK1.16b, BLK2.16b, BLK3.16b, BLK4.16b, BLK5.16b, BLK6.16b, BLK7.16b, KTMP
326    DEC8 RDK1.4s, RDK1.16b, BLK0.16b, BLK1.16b, BLK2.16b, BLK3.16b, BLK4.16b, BLK5.16b, BLK6.16b, BLK7.16b, KTMP
327
328.Ldec_8_blks_last:
329    ld1 {RDK2.4s}, [KTMP]
330    aesd BLK0.16b, RDK0.16b
331    aesimc BLK0.16b, BLK0.16b
332    aesd BLK1.16b, RDK0.16b
333    aesimc BLK1.16b, BLK1.16b
334    aesd BLK2.16b, RDK0.16b
335    aesimc BLK2.16b, BLK2.16b
336    eor IV0.16b, IV0.16b, RDK2.16b
337    aesd BLK3.16b, RDK0.16b
338    aesimc BLK3.16b, BLK3.16b
339    eor IV1.16b, IV1.16b, RDK2.16b
340    aesd BLK4.16b, RDK0.16b
341    aesimc BLK4.16b, BLK4.16b
342    eor IV2.16b, IV2.16b, RDK2.16b
343    aesd BLK5.16b, RDK0.16b
344    aesimc BLK5.16b, BLK5.16b
345    eor IV3.16b, IV3.16b, RDK2.16b
346    aesd BLK6.16b, RDK0.16b
347    aesimc BLK6.16b, BLK6.16b
348    eor IV4.16b, IV4.16b, RDK2.16b
349    aesd BLK7.16b, RDK0.16b
350    aesimc BLK7.16b, BLK7.16b
351    eor IV5.16b, IV5.16b, RDK2.16b
352
353    aesd BLK0.16b, RDK1.16b
354    aesd BLK1.16b, RDK1.16b
355    eor IV6.16b, IV6.16b, RDK2.16b
356    aesd BLK2.16b, RDK1.16b
357    aesd BLK3.16b, RDK1.16b
358    eor IV7.16b, IV7.16b, RDK2.16b
359    aesd BLK4.16b, RDK1.16b
360    aesd BLK5.16b, RDK1.16b
361    aesd BLK6.16b, RDK1.16b
362    aesd BLK7.16b, RDK1.16b
363
364    sub	LEN, LEN, #128
365    eor	BLK0.16b, BLK0.16b, IV0.16b
366    eor	BLK1.16b, BLK1.16b, IV1.16b
367    eor	BLK2.16b, BLK2.16b, IV2.16b
368    eor	BLK3.16b, BLK3.16b, IV3.16b
369    st1	{BLK0.16b, BLK1.16b, BLK2.16b, BLK3.16b}, [OUT], #64
370    eor	BLK4.16b, BLK4.16b, IV4.16b
371    eor	BLK5.16b, BLK5.16b, IV5.16b
372    cmp LEN, #0
373    eor	BLK6.16b, BLK6.16b, IV6.16b
374    eor	BLK7.16b, BLK7.16b, IV7.16b
375    mov	IV0.16b, IVT.16b
376    st1	{BLK4.16b, BLK5.16b, BLK6.16b, BLK7.16b}, [OUT], #64
377    b.eq  .Lcbc_aesdec_finish
378    cmp LEN, #128
379    b.lt    .Lcbc_aesdec_start
380    b   .Lcbc_aesdec_8_blks_loop
381
382.Lcbc_dec_proc_1_blk:
383    ld1	{BLK0.16b}, [IN]
384    AES_DEC_1_BLK KEY BLK0.16b RDK0.4s RDK1.4s RDK0.16b RDK1.16b ROUNDS
385    eor	BLK0.16b, BLK0.16b, IV0.16b
386    ld1	{IV0.16b}, [IN]
387    st1	{BLK0.16b}, [OUT]
388    b		.Lcbc_aesdec_finish
389
390.Lcbc_dec_proc_2_blks:
391    ld1	{BLK0.16b, BLK1.16b}, [IN]
392    ld1	{IV1.16b}, [IN], #16
393    AES_DEC_2_BLKS KEY BLK0.16b BLK1.16b RDK0.4s RDK1.4s RDK0.16b RDK1.16b ROUNDS
394    eor	BLK0.16b, BLK0.16b, IV0.16b
395    eor	BLK1.16b, BLK1.16b, IV1.16b
396    ld1	{IV0.16b}, [IN]
397    st1	{BLK0.16b, BLK1.16b}, [OUT]
398    b		.Lcbc_aesdec_finish
399
400.Lcbc_dec_proc_3_blks:
401    ld1	{BLK0.16b, BLK1.16b, BLK2.16b}, [IN]
402    ld1	{IV1.16b, IV2.16b}, [IN], #32
403    AES_DEC_3_BLKS KEY BLK0.16b BLK1.16b BLK2.16b RDK0.4s RDK1.4s RDK0.16b RDK1.16b ROUNDS
404    eor	BLK0.16b, BLK0.16b, IV0.16b
405    eor	BLK1.16b, BLK1.16b, IV1.16b
406    eor	BLK2.16b, BLK2.16b, IV2.16b
407    ld1	{IV0.16b}, [IN]
408    st1	{BLK0.16b, BLK1.16b, BLK2.16b}, [OUT]
409    b		.Lcbc_aesdec_finish
410
411.Lcbc_dec_proc_4_blks:
412    ld1	{BLK0.16b, BLK1.16b, BLK2.16b, BLK3.16b}, [IN]
413    ld1	{IV1.16b, IV2.16b, IV3.16b}, [IN], #48
414    AES_DEC_4_BLKS KEY BLK0.16b BLK1.16b BLK2.16b BLK3.16b RDK0.4s RDK1.4s RDK0.16b RDK1.16b ROUNDS
415    eor	BLK0.16b, BLK0.16b, IV0.16b
416    eor	BLK1.16b, BLK1.16b, IV1.16b
417    eor	BLK2.16b, BLK2.16b, IV2.16b
418    eor	BLK3.16b, BLK3.16b, IV3.16b
419    ld1	{IV0.16b}, [IN]
420    st1	{BLK0.16b, BLK1.16b, BLK2.16b, BLK3.16b}, [OUT]
421    b		.Lcbc_aesdec_finish
422
423.Lcbc_dec_proc_5_blks:
424    ld1	{BLK0.16b, BLK1.16b, BLK2.16b, BLK3.16b}, [IN]
425    ld1	{IV1.16b, IV2.16b, IV3.16b, IV4.16b}, [IN], #64
426    ld1	{BLK4.16b}, [IN]
427    AES_DEC_5_BLKS KEY BLK0.16b BLK1.16b BLK2.16b BLK3.16b BLK4.16b RDK0.4s RDK1.4s RDK0.16b RDK1.16b ROUNDS
428    eor	BLK0.16b, BLK0.16b, IV0.16b
429    eor	BLK1.16b, BLK1.16b, IV1.16b
430    eor	BLK2.16b, BLK2.16b, IV2.16b
431    eor	BLK3.16b, BLK3.16b, IV3.16b
432    eor	BLK4.16b, BLK4.16b, IV4.16b
433    ld1	{IV0.16b}, [IN]
434    st1	{BLK0.16b, BLK1.16b, BLK2.16b, BLK3.16b}, [OUT], #64
435    st1	{BLK4.16b}, [OUT]
436    b		.Lcbc_aesdec_finish
437
438.Lcbc_dec_proc_6_blks:
439    ld1	{BLK0.16b, BLK1.16b, BLK2.16b, BLK3.16b}, [IN]
440    ld1	{IV1.16b, IV2.16b, IV3.16b, IV4.16b}, [IN], #64
441    ld1	{BLK4.16b, BLK5.16b}, [IN]
442    ld1	{IV5.16b}, [IN], #16
443    AES_DEC_6_BLKS KEY BLK0.16b BLK1.16b BLK2.16b BLK3.16b BLK4.16b BLK5.16b RDK0.4s RDK1.4s RDK0.16b RDK1.16b ROUNDS
444    eor	BLK0.16b, BLK0.16b, IV0.16b
445    eor	BLK1.16b, BLK1.16b, IV1.16b
446    eor	BLK2.16b, BLK2.16b, IV2.16b
447    eor	BLK3.16b, BLK3.16b, IV3.16b
448    eor	BLK4.16b, BLK4.16b, IV4.16b
449    eor	BLK5.16b, BLK5.16b, IV5.16b
450    ld1	{IV0.16b}, [IN]
451    st1	{BLK0.16b, BLK1.16b, BLK2.16b, BLK3.16b}, [OUT], #64
452    st1	{BLK4.16b, BLK5.16b}, [OUT]
453    b		.Lcbc_aesdec_finish
454
455.Lcbc_dec_proc_7_blks:
456    ld1	{BLK0.16b, BLK1.16b, BLK2.16b, BLK3.16b}, [IN]
457    ld1	{IV1.16b, IV2.16b, IV3.16b, IV4.16b}, [IN], #64
458    ld1	{BLK4.16b, BLK5.16b, BLK6.16b}, [IN]
459    ld1	{IV5.16b, IV6.16b}, [IN], #32
460    AES_DEC_7_BLKS KEY BLK0.16b BLK1.16b BLK2.16b BLK3.16b BLK4.16b BLK5.16b BLK6.16b RDK0.4s RDK1.4s RDK0.16b RDK1.16b ROUNDS
461    eor	BLK0.16b, BLK0.16b, IV0.16b
462    eor	BLK1.16b, BLK1.16b, IV1.16b
463    eor	BLK2.16b, BLK2.16b, IV2.16b
464    eor	BLK3.16b, BLK3.16b, IV3.16b
465    eor	BLK4.16b, BLK4.16b, IV4.16b
466    eor	BLK5.16b, BLK5.16b, IV5.16b
467    eor	BLK6.16b, BLK6.16b, IV6.16b
468
469    ld1	{IV0.16b}, [IN]
470    st1	{BLK0.16b, BLK1.16b, BLK2.16b, BLK3.16b}, [OUT], #64
471    st1	{BLK4.16b, BLK5.16b, BLK6.16b}, [OUT]
472
473.Lcbc_aesdec_finish:
474    st1	{IV0.16b}, [P_IV]
475    mov x0, #0
476    eor RDK0.16b, RDK0.16b, RDK0.16b
477    eor RDK1.16b, RDK1.16b, RDK1.16b
478    eor RDK2.16b, RDK2.16b, RDK2.16b
479AARCH64_AUTIASP
480    ret
481.size	CRYPT_AES_CBC_Decrypt, .-CRYPT_AES_CBC_Decrypt
482
483#endif
484