• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1/*
2 * linux/arch/arm64/crypto/aes-modes.S - chaining mode wrappers for AES
3 *
4 * Copyright (C) 2013 Linaro Ltd <ard.biesheuvel@linaro.org>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
10
11/* included by aes-ce.S and aes-neon.S */
12
13	.text
14	.align		4
15
16/*
17 * There are several ways to instantiate this code:
18 * - no interleave, all inline
19 * - 2-way interleave, 2x calls out of line (-DINTERLEAVE=2)
20 * - 2-way interleave, all inline (-DINTERLEAVE=2 -DINTERLEAVE_INLINE)
21 * - 4-way interleave, 4x calls out of line (-DINTERLEAVE=4)
22 * - 4-way interleave, all inline (-DINTERLEAVE=4 -DINTERLEAVE_INLINE)
23 *
24 * Macros imported by this code:
25 * - enc_prepare	- setup NEON registers for encryption
26 * - dec_prepare	- setup NEON registers for decryption
27 * - enc_switch_key	- change to new key after having prepared for encryption
28 * - encrypt_block	- encrypt a single block
29 * - decrypt block	- decrypt a single block
30 * - encrypt_block2x	- encrypt 2 blocks in parallel (if INTERLEAVE == 2)
31 * - decrypt_block2x	- decrypt 2 blocks in parallel (if INTERLEAVE == 2)
32 * - encrypt_block4x	- encrypt 4 blocks in parallel (if INTERLEAVE == 4)
33 * - decrypt_block4x	- decrypt 4 blocks in parallel (if INTERLEAVE == 4)
34 */
35
36#if defined(INTERLEAVE) && !defined(INTERLEAVE_INLINE)
37#define FRAME_PUSH	stp x29, x30, [sp,#-16]! ; mov x29, sp
38#define FRAME_POP	ldp x29, x30, [sp],#16
39
40#if INTERLEAVE == 2
41
42aes_encrypt_block2x:
43	encrypt_block2x	v0, v1, w3, x2, x6, w7
44	ret
45ENDPROC(aes_encrypt_block2x)
46
47aes_decrypt_block2x:
48	decrypt_block2x	v0, v1, w3, x2, x6, w7
49	ret
50ENDPROC(aes_decrypt_block2x)
51
52#elif INTERLEAVE == 4
53
54aes_encrypt_block4x:
55	encrypt_block4x	v0, v1, v2, v3, w3, x2, x6, w7
56	ret
57ENDPROC(aes_encrypt_block4x)
58
59aes_decrypt_block4x:
60	decrypt_block4x	v0, v1, v2, v3, w3, x2, x6, w7
61	ret
62ENDPROC(aes_decrypt_block4x)
63
64#else
65#error INTERLEAVE should equal 2 or 4
66#endif
67
68	.macro		do_encrypt_block2x
69	bl		aes_encrypt_block2x
70	.endm
71
72	.macro		do_decrypt_block2x
73	bl		aes_decrypt_block2x
74	.endm
75
76	.macro		do_encrypt_block4x
77	bl		aes_encrypt_block4x
78	.endm
79
80	.macro		do_decrypt_block4x
81	bl		aes_decrypt_block4x
82	.endm
83
84#else
85#define FRAME_PUSH
86#define FRAME_POP
87
88	.macro		do_encrypt_block2x
89	encrypt_block2x	v0, v1, w3, x2, x6, w7
90	.endm
91
92	.macro		do_decrypt_block2x
93	decrypt_block2x	v0, v1, w3, x2, x6, w7
94	.endm
95
96	.macro		do_encrypt_block4x
97	encrypt_block4x	v0, v1, v2, v3, w3, x2, x6, w7
98	.endm
99
100	.macro		do_decrypt_block4x
101	decrypt_block4x	v0, v1, v2, v3, w3, x2, x6, w7
102	.endm
103
104#endif
105
106	/*
107	 * aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
108	 *		   int blocks, int first)
109	 * aes_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
110	 *		   int blocks, int first)
111	 */
112
113AES_ENTRY(aes_ecb_encrypt)
114	FRAME_PUSH
115	cbz		w5, .LecbencloopNx
116
117	enc_prepare	w3, x2, x5
118
119.LecbencloopNx:
120#if INTERLEAVE >= 2
121	subs		w4, w4, #INTERLEAVE
122	bmi		.Lecbenc1x
123#if INTERLEAVE == 2
124	ld1		{v0.16b-v1.16b}, [x1], #32	/* get 2 pt blocks */
125	do_encrypt_block2x
126	st1		{v0.16b-v1.16b}, [x0], #32
127#else
128	ld1		{v0.16b-v3.16b}, [x1], #64	/* get 4 pt blocks */
129	do_encrypt_block4x
130	st1		{v0.16b-v3.16b}, [x0], #64
131#endif
132	b		.LecbencloopNx
133.Lecbenc1x:
134	adds		w4, w4, #INTERLEAVE
135	beq		.Lecbencout
136#endif
137.Lecbencloop:
138	ld1		{v0.16b}, [x1], #16		/* get next pt block */
139	encrypt_block	v0, w3, x2, x5, w6
140	st1		{v0.16b}, [x0], #16
141	subs		w4, w4, #1
142	bne		.Lecbencloop
143.Lecbencout:
144	FRAME_POP
145	ret
146AES_ENDPROC(aes_ecb_encrypt)
147
148
149AES_ENTRY(aes_ecb_decrypt)
150	FRAME_PUSH
151	cbz		w5, .LecbdecloopNx
152
153	dec_prepare	w3, x2, x5
154
155.LecbdecloopNx:
156#if INTERLEAVE >= 2
157	subs		w4, w4, #INTERLEAVE
158	bmi		.Lecbdec1x
159#if INTERLEAVE == 2
160	ld1		{v0.16b-v1.16b}, [x1], #32	/* get 2 ct blocks */
161	do_decrypt_block2x
162	st1		{v0.16b-v1.16b}, [x0], #32
163#else
164	ld1		{v0.16b-v3.16b}, [x1], #64	/* get 4 ct blocks */
165	do_decrypt_block4x
166	st1		{v0.16b-v3.16b}, [x0], #64
167#endif
168	b		.LecbdecloopNx
169.Lecbdec1x:
170	adds		w4, w4, #INTERLEAVE
171	beq		.Lecbdecout
172#endif
173.Lecbdecloop:
174	ld1		{v0.16b}, [x1], #16		/* get next ct block */
175	decrypt_block	v0, w3, x2, x5, w6
176	st1		{v0.16b}, [x0], #16
177	subs		w4, w4, #1
178	bne		.Lecbdecloop
179.Lecbdecout:
180	FRAME_POP
181	ret
182AES_ENDPROC(aes_ecb_decrypt)
183
184
185	/*
186	 * aes_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
187	 *		   int blocks, u8 iv[], int first)
188	 * aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
189	 *		   int blocks, u8 iv[], int first)
190	 */
191
192AES_ENTRY(aes_cbc_encrypt)
193	cbz		w6, .Lcbcencloop
194
195	ld1		{v0.16b}, [x5]			/* get iv */
196	enc_prepare	w3, x2, x6
197
198.Lcbcencloop:
199	ld1		{v1.16b}, [x1], #16		/* get next pt block */
200	eor		v0.16b, v0.16b, v1.16b		/* ..and xor with iv */
201	encrypt_block	v0, w3, x2, x6, w7
202	st1		{v0.16b}, [x0], #16
203	subs		w4, w4, #1
204	bne		.Lcbcencloop
205	st1		{v0.16b}, [x5]			/* return iv */
206	ret
207AES_ENDPROC(aes_cbc_encrypt)
208
209
210AES_ENTRY(aes_cbc_decrypt)
211	FRAME_PUSH
212	cbz		w6, .LcbcdecloopNx
213
214	ld1		{v7.16b}, [x5]			/* get iv */
215	dec_prepare	w3, x2, x6
216
217.LcbcdecloopNx:
218#if INTERLEAVE >= 2
219	subs		w4, w4, #INTERLEAVE
220	bmi		.Lcbcdec1x
221#if INTERLEAVE == 2
222	ld1		{v0.16b-v1.16b}, [x1], #32	/* get 2 ct blocks */
223	mov		v2.16b, v0.16b
224	mov		v3.16b, v1.16b
225	do_decrypt_block2x
226	eor		v0.16b, v0.16b, v7.16b
227	eor		v1.16b, v1.16b, v2.16b
228	mov		v7.16b, v3.16b
229	st1		{v0.16b-v1.16b}, [x0], #32
230#else
231	ld1		{v0.16b-v3.16b}, [x1], #64	/* get 4 ct blocks */
232	mov		v4.16b, v0.16b
233	mov		v5.16b, v1.16b
234	mov		v6.16b, v2.16b
235	do_decrypt_block4x
236	sub		x1, x1, #16
237	eor		v0.16b, v0.16b, v7.16b
238	eor		v1.16b, v1.16b, v4.16b
239	ld1		{v7.16b}, [x1], #16		/* reload 1 ct block */
240	eor		v2.16b, v2.16b, v5.16b
241	eor		v3.16b, v3.16b, v6.16b
242	st1		{v0.16b-v3.16b}, [x0], #64
243#endif
244	b		.LcbcdecloopNx
245.Lcbcdec1x:
246	adds		w4, w4, #INTERLEAVE
247	beq		.Lcbcdecout
248#endif
249.Lcbcdecloop:
250	ld1		{v1.16b}, [x1], #16		/* get next ct block */
251	mov		v0.16b, v1.16b			/* ...and copy to v0 */
252	decrypt_block	v0, w3, x2, x6, w7
253	eor		v0.16b, v0.16b, v7.16b		/* xor with iv => pt */
254	mov		v7.16b, v1.16b			/* ct is next iv */
255	st1		{v0.16b}, [x0], #16
256	subs		w4, w4, #1
257	bne		.Lcbcdecloop
258.Lcbcdecout:
259	FRAME_POP
260	st1		{v7.16b}, [x5]			/* return iv */
261	ret
262AES_ENDPROC(aes_cbc_decrypt)
263
264
265	/*
266	 * aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
267	 *		   int blocks, u8 ctr[], int first)
268	 */
269
270AES_ENTRY(aes_ctr_encrypt)
271	FRAME_PUSH
272	cbz		w6, .Lctrnotfirst	/* 1st time around? */
273	enc_prepare	w3, x2, x6
274	ld1		{v4.16b}, [x5]
275
276.Lctrnotfirst:
277	umov		x8, v4.d[1]		/* keep swabbed ctr in reg */
278	rev		x8, x8
279#if INTERLEAVE >= 2
280	cmn		w8, w4			/* 32 bit overflow? */
281	bcs		.Lctrloop
282.LctrloopNx:
283	subs		w4, w4, #INTERLEAVE
284	bmi		.Lctr1x
285#if INTERLEAVE == 2
286	mov		v0.8b, v4.8b
287	mov		v1.8b, v4.8b
288	rev		x7, x8
289	add		x8, x8, #1
290	ins		v0.d[1], x7
291	rev		x7, x8
292	add		x8, x8, #1
293	ins		v1.d[1], x7
294	ld1		{v2.16b-v3.16b}, [x1], #32	/* get 2 input blocks */
295	do_encrypt_block2x
296	eor		v0.16b, v0.16b, v2.16b
297	eor		v1.16b, v1.16b, v3.16b
298	st1		{v0.16b-v1.16b}, [x0], #32
299#else
300	ldr		q8, =0x30000000200000001	/* addends 1,2,3[,0] */
301	dup		v7.4s, w8
302	mov		v0.16b, v4.16b
303	add		v7.4s, v7.4s, v8.4s
304	mov		v1.16b, v4.16b
305	rev32		v8.16b, v7.16b
306	mov		v2.16b, v4.16b
307	mov		v3.16b, v4.16b
308	mov		v1.s[3], v8.s[0]
309	mov		v2.s[3], v8.s[1]
310	mov		v3.s[3], v8.s[2]
311	ld1		{v5.16b-v7.16b}, [x1], #48	/* get 3 input blocks */
312	do_encrypt_block4x
313	eor		v0.16b, v5.16b, v0.16b
314	ld1		{v5.16b}, [x1], #16		/* get 1 input block  */
315	eor		v1.16b, v6.16b, v1.16b
316	eor		v2.16b, v7.16b, v2.16b
317	eor		v3.16b, v5.16b, v3.16b
318	st1		{v0.16b-v3.16b}, [x0], #64
319	add		x8, x8, #INTERLEAVE
320#endif
321	rev		x7, x8
322	ins		v4.d[1], x7
323	cbz		w4, .Lctrout
324	b		.LctrloopNx
325.Lctr1x:
326	adds		w4, w4, #INTERLEAVE
327	beq		.Lctrout
328#endif
329.Lctrloop:
330	mov		v0.16b, v4.16b
331	encrypt_block	v0, w3, x2, x6, w7
332
333	adds		x8, x8, #1		/* increment BE ctr */
334	rev		x7, x8
335	ins		v4.d[1], x7
336	bcs		.Lctrcarry		/* overflow? */
337
338.Lctrcarrydone:
339	subs		w4, w4, #1
340	bmi		.Lctrhalfblock		/* blocks < 0 means 1/2 block */
341	ld1		{v3.16b}, [x1], #16
342	eor		v3.16b, v0.16b, v3.16b
343	st1		{v3.16b}, [x0], #16
344	bne		.Lctrloop
345
346.Lctrout:
347	st1		{v4.16b}, [x5]		/* return next CTR value */
348	FRAME_POP
349	ret
350
351.Lctrhalfblock:
352	ld1		{v3.8b}, [x1]
353	eor		v3.8b, v0.8b, v3.8b
354	st1		{v3.8b}, [x0]
355	FRAME_POP
356	ret
357
358.Lctrcarry:
359	umov		x7, v4.d[0]		/* load upper word of ctr  */
360	rev		x7, x7			/* ... to handle the carry */
361	add		x7, x7, #1
362	rev		x7, x7
363	ins		v4.d[0], x7
364	b		.Lctrcarrydone
365AES_ENDPROC(aes_ctr_encrypt)
366	.ltorg
367
368
369	/*
370	 * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
371	 *		   int blocks, u8 const rk2[], u8 iv[], int first)
372	 * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
373	 *		   int blocks, u8 const rk2[], u8 iv[], int first)
374	 */
375
376	.macro		next_tweak, out, in, const, tmp
377	sshr		\tmp\().2d,  \in\().2d,   #63
378	and		\tmp\().16b, \tmp\().16b, \const\().16b
379	add		\out\().2d,  \in\().2d,   \in\().2d
380	ext		\tmp\().16b, \tmp\().16b, \tmp\().16b, #8
381	eor		\out\().16b, \out\().16b, \tmp\().16b
382	.endm
383
384.Lxts_mul_x:
385CPU_LE(	.quad		1, 0x87		)
386CPU_BE(	.quad		0x87, 1		)
387
388AES_ENTRY(aes_xts_encrypt)
389	FRAME_PUSH
390	cbz		w7, .LxtsencloopNx
391
392	ld1		{v4.16b}, [x6]
393	enc_prepare	w3, x5, x6
394	encrypt_block	v4, w3, x5, x6, w7		/* first tweak */
395	enc_switch_key	w3, x2, x6
396	ldr		q7, .Lxts_mul_x
397	b		.LxtsencNx
398
399.LxtsencloopNx:
400	ldr		q7, .Lxts_mul_x
401	next_tweak	v4, v4, v7, v8
402.LxtsencNx:
403#if INTERLEAVE >= 2
404	subs		w4, w4, #INTERLEAVE
405	bmi		.Lxtsenc1x
406#if INTERLEAVE == 2
407	ld1		{v0.16b-v1.16b}, [x1], #32	/* get 2 pt blocks */
408	next_tweak	v5, v4, v7, v8
409	eor		v0.16b, v0.16b, v4.16b
410	eor		v1.16b, v1.16b, v5.16b
411	do_encrypt_block2x
412	eor		v0.16b, v0.16b, v4.16b
413	eor		v1.16b, v1.16b, v5.16b
414	st1		{v0.16b-v1.16b}, [x0], #32
415	cbz		w4, .LxtsencoutNx
416	next_tweak	v4, v5, v7, v8
417	b		.LxtsencNx
418.LxtsencoutNx:
419	mov		v4.16b, v5.16b
420	b		.Lxtsencout
421#else
422	ld1		{v0.16b-v3.16b}, [x1], #64	/* get 4 pt blocks */
423	next_tweak	v5, v4, v7, v8
424	eor		v0.16b, v0.16b, v4.16b
425	next_tweak	v6, v5, v7, v8
426	eor		v1.16b, v1.16b, v5.16b
427	eor		v2.16b, v2.16b, v6.16b
428	next_tweak	v7, v6, v7, v8
429	eor		v3.16b, v3.16b, v7.16b
430	do_encrypt_block4x
431	eor		v3.16b, v3.16b, v7.16b
432	eor		v0.16b, v0.16b, v4.16b
433	eor		v1.16b, v1.16b, v5.16b
434	eor		v2.16b, v2.16b, v6.16b
435	st1		{v0.16b-v3.16b}, [x0], #64
436	mov		v4.16b, v7.16b
437	cbz		w4, .Lxtsencout
438	b		.LxtsencloopNx
439#endif
440.Lxtsenc1x:
441	adds		w4, w4, #INTERLEAVE
442	beq		.Lxtsencout
443#endif
444.Lxtsencloop:
445	ld1		{v1.16b}, [x1], #16
446	eor		v0.16b, v1.16b, v4.16b
447	encrypt_block	v0, w3, x2, x6, w7
448	eor		v0.16b, v0.16b, v4.16b
449	st1		{v0.16b}, [x0], #16
450	subs		w4, w4, #1
451	beq		.Lxtsencout
452	next_tweak	v4, v4, v7, v8
453	b		.Lxtsencloop
454.Lxtsencout:
455	FRAME_POP
456	ret
457AES_ENDPROC(aes_xts_encrypt)
458
459
460AES_ENTRY(aes_xts_decrypt)
461	FRAME_PUSH
462	cbz		w7, .LxtsdecloopNx
463
464	ld1		{v4.16b}, [x6]
465	enc_prepare	w3, x5, x6
466	encrypt_block	v4, w3, x5, x6, w7		/* first tweak */
467	dec_prepare	w3, x2, x6
468	ldr		q7, .Lxts_mul_x
469	b		.LxtsdecNx
470
471.LxtsdecloopNx:
472	ldr		q7, .Lxts_mul_x
473	next_tweak	v4, v4, v7, v8
474.LxtsdecNx:
475#if INTERLEAVE >= 2
476	subs		w4, w4, #INTERLEAVE
477	bmi		.Lxtsdec1x
478#if INTERLEAVE == 2
479	ld1		{v0.16b-v1.16b}, [x1], #32	/* get 2 ct blocks */
480	next_tweak	v5, v4, v7, v8
481	eor		v0.16b, v0.16b, v4.16b
482	eor		v1.16b, v1.16b, v5.16b
483	do_decrypt_block2x
484	eor		v0.16b, v0.16b, v4.16b
485	eor		v1.16b, v1.16b, v5.16b
486	st1		{v0.16b-v1.16b}, [x0], #32
487	cbz		w4, .LxtsdecoutNx
488	next_tweak	v4, v5, v7, v8
489	b		.LxtsdecNx
490.LxtsdecoutNx:
491	mov		v4.16b, v5.16b
492	b		.Lxtsdecout
493#else
494	ld1		{v0.16b-v3.16b}, [x1], #64	/* get 4 ct blocks */
495	next_tweak	v5, v4, v7, v8
496	eor		v0.16b, v0.16b, v4.16b
497	next_tweak	v6, v5, v7, v8
498	eor		v1.16b, v1.16b, v5.16b
499	eor		v2.16b, v2.16b, v6.16b
500	next_tweak	v7, v6, v7, v8
501	eor		v3.16b, v3.16b, v7.16b
502	do_decrypt_block4x
503	eor		v3.16b, v3.16b, v7.16b
504	eor		v0.16b, v0.16b, v4.16b
505	eor		v1.16b, v1.16b, v5.16b
506	eor		v2.16b, v2.16b, v6.16b
507	st1		{v0.16b-v3.16b}, [x0], #64
508	mov		v4.16b, v7.16b
509	cbz		w4, .Lxtsdecout
510	b		.LxtsdecloopNx
511#endif
512.Lxtsdec1x:
513	adds		w4, w4, #INTERLEAVE
514	beq		.Lxtsdecout
515#endif
516.Lxtsdecloop:
517	ld1		{v1.16b}, [x1], #16
518	eor		v0.16b, v1.16b, v4.16b
519	decrypt_block	v0, w3, x2, x6, w7
520	eor		v0.16b, v0.16b, v4.16b
521	st1		{v0.16b}, [x0], #16
522	subs		w4, w4, #1
523	beq		.Lxtsdecout
524	next_tweak	v4, v4, v7, v8
525	b		.Lxtsdecloop
526.Lxtsdecout:
527	FRAME_POP
528	ret
529AES_ENDPROC(aes_xts_decrypt)
530