• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#include "arm_arch.h"
2
3#if __ARM_MAX_ARCH__>=7
4.arch	armv7-a	@ don't confuse not-so-latest binutils with argv8 :-)
5.fpu	neon
6#ifdef	__thumb2__
7.syntax	unified
8.thumb
9# define INST(a,b,c,d)	.byte	c,d|0xc,a,b
10#else
11.code	32
12# define INST(a,b,c,d)	.byte	a,b,c,d
13#endif
14
15.text
16.align	5
17.Lrcon:
18.long	0x01,0x01,0x01,0x01
19.long	0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d	@ rotate-n-splat
20.long	0x1b,0x1b,0x1b,0x1b
21
22.globl	aes_v8_set_encrypt_key
23.type	aes_v8_set_encrypt_key,%function
24.align	5
25aes_v8_set_encrypt_key:
26.Lenc_key:
27	mov	r3,#-1
28	cmp	r0,#0
29	beq	.Lenc_key_abort
30	cmp	r2,#0
31	beq	.Lenc_key_abort
32	mov	r3,#-2
33	cmp	r1,#128
34	blt	.Lenc_key_abort
35	cmp	r1,#256
36	bgt	.Lenc_key_abort
37	tst	r1,#0x3f
38	bne	.Lenc_key_abort
39
40	adr	r3,.Lrcon
41	cmp	r1,#192
42
43	veor	q0,q0,q0
44	vld1.8	{q3},[r0]!
45	mov	r1,#8		@ reuse r1
46	vld1.32	{q1,q2},[r3]!
47
48	blt	.Loop128
49	beq	.L192
50	b	.L256
51
52.align	4
53.Loop128:
54	vtbl.8	d20,{q3},d4
55	vtbl.8	d21,{q3},d5
56	vext.8	q9,q0,q3,#12
57	vst1.32	{q3},[r2]!
58	INST(0x00,0x43,0xf0,0xf3)	@ aese q10,q0
59	subs	r1,r1,#1
60
61	veor	q3,q3,q9
62	vext.8	q9,q0,q9,#12
63	veor	q3,q3,q9
64	vext.8	q9,q0,q9,#12
65	veor	q10,q10,q1
66	veor	q3,q3,q9
67	vshl.u8	q1,q1,#1
68	veor	q3,q3,q10
69	bne	.Loop128
70
71	vld1.32	{q1},[r3]
72
73	vtbl.8	d20,{q3},d4
74	vtbl.8	d21,{q3},d5
75	vext.8	q9,q0,q3,#12
76	vst1.32	{q3},[r2]!
77	INST(0x00,0x43,0xf0,0xf3)	@ aese q10,q0
78
79	veor	q3,q3,q9
80	vext.8	q9,q0,q9,#12
81	veor	q3,q3,q9
82	vext.8	q9,q0,q9,#12
83	veor	q10,q10,q1
84	veor	q3,q3,q9
85	vshl.u8	q1,q1,#1
86	veor	q3,q3,q10
87
88	vtbl.8	d20,{q3},d4
89	vtbl.8	d21,{q3},d5
90	vext.8	q9,q0,q3,#12
91	vst1.32	{q3},[r2]!
92	INST(0x00,0x43,0xf0,0xf3)	@ aese q10,q0
93
94	veor	q3,q3,q9
95	vext.8	q9,q0,q9,#12
96	veor	q3,q3,q9
97	vext.8	q9,q0,q9,#12
98	veor	q10,q10,q1
99	veor	q3,q3,q9
100	veor	q3,q3,q10
101	vst1.32	{q3},[r2]
102	add	r2,r2,#0x50
103
104	mov	r12,#10
105	b	.Ldone
106
107.align	4
108.L192:
109	vld1.8	{d16},[r0]!
110	vmov.i8	q10,#8			@ borrow q10
111	vst1.32	{q3},[r2]!
112	vsub.i8	q2,q2,q10	@ adjust the mask
113
114.Loop192:
115	vtbl.8	d20,{q8},d4
116	vtbl.8	d21,{q8},d5
117	vext.8	q9,q0,q3,#12
118#ifdef __ARMEB__
119	vst1.32	{q8},[r2]!
120	sub	r2,r2,#8
121#else
122	vst1.32	{d16},[r2]!
123#endif
124	INST(0x00,0x43,0xf0,0xf3)	@ aese q10,q0
125	subs	r1,r1,#1
126
127	veor	q3,q3,q9
128	vext.8	q9,q0,q9,#12
129	veor	q3,q3,q9
130	vext.8	q9,q0,q9,#12
131	veor	q3,q3,q9
132
133	vdup.32	q9,d7[1]
134	veor	q9,q9,q8
135	veor	q10,q10,q1
136	vext.8	q8,q0,q8,#12
137	vshl.u8	q1,q1,#1
138	veor	q8,q8,q9
139	veor	q3,q3,q10
140	veor	q8,q8,q10
141	vst1.32	{q3},[r2]!
142	bne	.Loop192
143
144	mov	r12,#12
145	add	r2,r2,#0x20
146	b	.Ldone
147
148.align	4
149.L256:
150	vld1.8	{q8},[r0]
151	mov	r1,#7
152	mov	r12,#14
153	vst1.32	{q3},[r2]!
154
155.Loop256:
156	vtbl.8	d20,{q8},d4
157	vtbl.8	d21,{q8},d5
158	vext.8	q9,q0,q3,#12
159	vst1.32	{q8},[r2]!
160	INST(0x00,0x43,0xf0,0xf3)	@ aese q10,q0
161	subs	r1,r1,#1
162
163	veor	q3,q3,q9
164	vext.8	q9,q0,q9,#12
165	veor	q3,q3,q9
166	vext.8	q9,q0,q9,#12
167	veor	q10,q10,q1
168	veor	q3,q3,q9
169	vshl.u8	q1,q1,#1
170	veor	q3,q3,q10
171	vst1.32	{q3},[r2]!
172	beq	.Ldone
173
174	vdup.32	q10,d7[1]
175	vext.8	q9,q0,q8,#12
176	INST(0x00,0x43,0xf0,0xf3)	@ aese q10,q0
177
178	veor	q8,q8,q9
179	vext.8	q9,q0,q9,#12
180	veor	q8,q8,q9
181	vext.8	q9,q0,q9,#12
182	veor	q8,q8,q9
183
184	veor	q8,q8,q10
185	b	.Loop256
186
187.Ldone:
188	str	r12,[r2]
189	mov	r3,#0
190
191.Lenc_key_abort:
192	mov	r0,r3			@ return value
193
194	bx	lr
195.size	aes_v8_set_encrypt_key,.-aes_v8_set_encrypt_key
196
197.globl	aes_v8_set_decrypt_key
198.type	aes_v8_set_decrypt_key,%function
199.align	5
200aes_v8_set_decrypt_key:
201	stmdb	sp!,{r4,lr}
202	bl	.Lenc_key
203
204	cmp	r0,#0
205	bne	.Ldec_key_abort
206
207	sub	r2,r2,#240		@ restore original r2
208	mov	r4,#-16
209	add	r0,r2,r12,lsl#4	@ end of key schedule
210
211	vld1.32	{q0},[r2]
212	vld1.32	{q1},[r0]
213	vst1.32	{q0},[r0],r4
214	vst1.32	{q1},[r2]!
215
216.Loop_imc:
217	vld1.32	{q0},[r2]
218	vld1.32	{q1},[r0]
219	INST(0xc0,0x03,0xb0,0xf3)	@ aesimc q0,q0
220	INST(0xc2,0x23,0xb0,0xf3)	@ aesimc q1,q1
221	vst1.32	{q0},[r0],r4
222	vst1.32	{q1},[r2]!
223	cmp	r0,r2
224	bhi	.Loop_imc
225
226	vld1.32	{q0},[r2]
227	INST(0xc0,0x03,0xb0,0xf3)	@ aesimc q0,q0
228	vst1.32	{q0},[r0]
229
230	eor	r0,r0,r0		@ return value
231.Ldec_key_abort:
232	ldmia	sp!,{r4,pc}
233.size	aes_v8_set_decrypt_key,.-aes_v8_set_decrypt_key
234.globl	aes_v8_encrypt
235.type	aes_v8_encrypt,%function
236.align	5
237aes_v8_encrypt:
238	ldr	r3,[r2,#240]
239	vld1.32	{q0},[r2]!
240	vld1.8	{q2},[r0]
241	sub	r3,r3,#2
242	vld1.32	{q1},[r2]!
243
244.Loop_enc:
245	INST(0x00,0x43,0xb0,0xf3)	@ aese q2,q0
246	INST(0x84,0x43,0xb0,0xf3)	@ aesmc q2,q2
247	vld1.32	{q0},[r2]!
248	subs	r3,r3,#2
249	INST(0x02,0x43,0xb0,0xf3)	@ aese q2,q1
250	INST(0x84,0x43,0xb0,0xf3)	@ aesmc q2,q2
251	vld1.32	{q1},[r2]!
252	bgt	.Loop_enc
253
254	INST(0x00,0x43,0xb0,0xf3)	@ aese q2,q0
255	INST(0x84,0x43,0xb0,0xf3)	@ aesmc q2,q2
256	vld1.32	{q0},[r2]
257	INST(0x02,0x43,0xb0,0xf3)	@ aese q2,q1
258	veor	q2,q2,q0
259
260	vst1.8	{q2},[r1]
261	bx	lr
262.size	aes_v8_encrypt,.-aes_v8_encrypt
263.globl	aes_v8_decrypt
264.type	aes_v8_decrypt,%function
265.align	5
266aes_v8_decrypt:
267	ldr	r3,[r2,#240]
268	vld1.32	{q0},[r2]!
269	vld1.8	{q2},[r0]
270	sub	r3,r3,#2
271	vld1.32	{q1},[r2]!
272
273.Loop_dec:
274	INST(0x40,0x43,0xb0,0xf3)	@ aesd q2,q0
275	INST(0xc4,0x43,0xb0,0xf3)	@ aesimc q2,q2
276	vld1.32	{q0},[r2]!
277	subs	r3,r3,#2
278	INST(0x42,0x43,0xb0,0xf3)	@ aesd q2,q1
279	INST(0xc4,0x43,0xb0,0xf3)	@ aesimc q2,q2
280	vld1.32	{q1},[r2]!
281	bgt	.Loop_dec
282
283	INST(0x40,0x43,0xb0,0xf3)	@ aesd q2,q0
284	INST(0xc4,0x43,0xb0,0xf3)	@ aesimc q2,q2
285	vld1.32	{q0},[r2]
286	INST(0x42,0x43,0xb0,0xf3)	@ aesd q2,q1
287	veor	q2,q2,q0
288
289	vst1.8	{q2},[r1]
290	bx	lr
291.size	aes_v8_decrypt,.-aes_v8_decrypt
292.globl	aes_v8_ecb_encrypt
293.type	aes_v8_ecb_encrypt,%function
294.align	5
295aes_v8_ecb_encrypt:
296	mov	ip,sp
297	stmdb	sp!,{r4,r5,r6,r7,r8,lr}
298	vstmdb	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}			@ ABI specification says so
299	ldmia	ip,{r4,r5}			@ load remaining args
300	subs	r2,r2,#16
301	mov	r8,#16
302	blo	.Lecb_done
303	it	eq
304	moveq	r8,#0
305
306	cmp	r4,#0					@ en- or decrypting?
307	ldr	r5,[r3,#240]
308	and	r2,r2,#-16
309	vld1.8	{q0},[r0],r8
310
311	vld1.32	{q8,q9},[r3]				@ load key schedule...
312	sub	r5,r5,#6
313	add	r7,r3,r5,lsl#4				@ pointer to last 7 round keys
314	sub	r5,r5,#2
315	vld1.32	{q10,q11},[r7]!
316	vld1.32	{q12,q13},[r7]!
317	vld1.32	{q14,q15},[r7]!
318	vld1.32	{q7},[r7]
319
320	add	r7,r3,#32
321	mov	r6,r5
322	beq	.Lecb_dec
323
324	vld1.8	{q1},[r0]!
325	subs	r2,r2,#32				@ bias
326	add	r6,r5,#2
327	vorr	q3,q1,q1
328	vorr	q10,q1,q1
329	vorr	q1,q0,q0
330	blo	.Lecb_enc_tail
331
332	vorr	q1,q3,q3
333	vld1.8	{q10},[r0]!
334.Loop3x_ecb_enc:
335	INST(0x20,0x03,0xb0,0xf3)	@ aese q0,q8
336	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
337	INST(0x20,0x23,0xb0,0xf3)	@ aese q1,q8
338	INST(0x82,0x23,0xb0,0xf3)	@ aesmc q1,q1
339	INST(0x20,0x43,0xf0,0xf3)	@ aese q10,q8
340	INST(0xa4,0x43,0xf0,0xf3)	@ aesmc q10,q10
341	vld1.32	{q8},[r7]!
342	subs	r6,r6,#2
343	INST(0x22,0x03,0xb0,0xf3)	@ aese q0,q9
344	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
345	INST(0x22,0x23,0xb0,0xf3)	@ aese q1,q9
346	INST(0x82,0x23,0xb0,0xf3)	@ aesmc q1,q1
347	INST(0x22,0x43,0xf0,0xf3)	@ aese q10,q9
348	INST(0xa4,0x43,0xf0,0xf3)	@ aesmc q10,q10
349	vld1.32	{q9},[r7]!
350	bgt	.Loop3x_ecb_enc
351
352	INST(0x20,0x03,0xb0,0xf3)	@ aese q0,q8
353	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
354	INST(0x20,0x23,0xb0,0xf3)	@ aese q1,q8
355	INST(0x82,0x23,0xb0,0xf3)	@ aesmc q1,q1
356	INST(0x20,0x43,0xf0,0xf3)	@ aese q10,q8
357	INST(0xa4,0x43,0xf0,0xf3)	@ aesmc q10,q10
358	subs	r2,r2,#0x30
359	it	lo
360	movlo	r6,r2				@ r6, r6, is zero at this point
361	INST(0x22,0x03,0xb0,0xf3)	@ aese q0,q9
362	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
363	INST(0x22,0x23,0xb0,0xf3)	@ aese q1,q9
364	INST(0x82,0x23,0xb0,0xf3)	@ aesmc q1,q1
365	INST(0x22,0x43,0xf0,0xf3)	@ aese q10,q9
366	INST(0xa4,0x43,0xf0,0xf3)	@ aesmc q10,q10
367	add	r0,r0,r6			@ r0 is adjusted in such way that
368						@ at exit from the loop q1-q10
369						@ are loaded with last "words"
370	mov	r7,r3
371	INST(0x28,0x03,0xb0,0xf3)	@ aese q0,q12
372	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
373	INST(0x28,0x23,0xb0,0xf3)	@ aese q1,q12
374	INST(0x82,0x23,0xb0,0xf3)	@ aesmc q1,q1
375	INST(0x28,0x43,0xf0,0xf3)	@ aese q10,q12
376	INST(0xa4,0x43,0xf0,0xf3)	@ aesmc q10,q10
377	vld1.8	{q2},[r0]!
378	INST(0x2a,0x03,0xb0,0xf3)	@ aese q0,q13
379	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
380	INST(0x2a,0x23,0xb0,0xf3)	@ aese q1,q13
381	INST(0x82,0x23,0xb0,0xf3)	@ aesmc q1,q1
382	INST(0x2a,0x43,0xf0,0xf3)	@ aese q10,q13
383	INST(0xa4,0x43,0xf0,0xf3)	@ aesmc q10,q10
384	vld1.8	{q3},[r0]!
385	INST(0x2c,0x03,0xb0,0xf3)	@ aese q0,q14
386	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
387	INST(0x2c,0x23,0xb0,0xf3)	@ aese q1,q14
388	INST(0x82,0x23,0xb0,0xf3)	@ aesmc q1,q1
389	INST(0x2c,0x43,0xf0,0xf3)	@ aese q10,q14
390	INST(0xa4,0x43,0xf0,0xf3)	@ aesmc q10,q10
391	vld1.8	{q11},[r0]!
392	INST(0x2e,0x03,0xb0,0xf3)	@ aese q0,q15
393	INST(0x2e,0x23,0xb0,0xf3)	@ aese q1,q15
394	INST(0x2e,0x43,0xf0,0xf3)	@ aese q10,q15
395	vld1.32	{q8},[r7]!		@ re-pre-load rndkey[0]
396	add	r6,r5,#2
397	veor	q4,q7,q0
398	veor	q5,q7,q1
399	veor	q10,q10,q7
400	vld1.32	{q9},[r7]!		@ re-pre-load rndkey[1]
401	vst1.8	{q4},[r1]!
402	vorr	q0,q2,q2
403	vst1.8	{q5},[r1]!
404	vorr	q1,q3,q3
405	vst1.8	{q10},[r1]!
406	vorr	q10,q11,q11
407	bhs	.Loop3x_ecb_enc
408
409	cmn	r2,#0x30
410	beq	.Lecb_done
411	nop
412
413.Lecb_enc_tail:
414	INST(0x20,0x23,0xb0,0xf3)	@ aese q1,q8
415	INST(0x82,0x23,0xb0,0xf3)	@ aesmc q1,q1
416	INST(0x20,0x43,0xf0,0xf3)	@ aese q10,q8
417	INST(0xa4,0x43,0xf0,0xf3)	@ aesmc q10,q10
418	vld1.32	{q8},[r7]!
419	subs	r6,r6,#2
420	INST(0x22,0x23,0xb0,0xf3)	@ aese q1,q9
421	INST(0x82,0x23,0xb0,0xf3)	@ aesmc q1,q1
422	INST(0x22,0x43,0xf0,0xf3)	@ aese q10,q9
423	INST(0xa4,0x43,0xf0,0xf3)	@ aesmc q10,q10
424	vld1.32	{q9},[r7]!
425	bgt	.Lecb_enc_tail
426
427	INST(0x20,0x23,0xb0,0xf3)	@ aese q1,q8
428	INST(0x82,0x23,0xb0,0xf3)	@ aesmc q1,q1
429	INST(0x20,0x43,0xf0,0xf3)	@ aese q10,q8
430	INST(0xa4,0x43,0xf0,0xf3)	@ aesmc q10,q10
431	INST(0x22,0x23,0xb0,0xf3)	@ aese q1,q9
432	INST(0x82,0x23,0xb0,0xf3)	@ aesmc q1,q1
433	INST(0x22,0x43,0xf0,0xf3)	@ aese q10,q9
434	INST(0xa4,0x43,0xf0,0xf3)	@ aesmc q10,q10
435	INST(0x28,0x23,0xb0,0xf3)	@ aese q1,q12
436	INST(0x82,0x23,0xb0,0xf3)	@ aesmc q1,q1
437	INST(0x28,0x43,0xf0,0xf3)	@ aese q10,q12
438	INST(0xa4,0x43,0xf0,0xf3)	@ aesmc q10,q10
439	cmn	r2,#0x20
440	INST(0x2a,0x23,0xb0,0xf3)	@ aese q1,q13
441	INST(0x82,0x23,0xb0,0xf3)	@ aesmc q1,q1
442	INST(0x2a,0x43,0xf0,0xf3)	@ aese q10,q13
443	INST(0xa4,0x43,0xf0,0xf3)	@ aesmc q10,q10
444	INST(0x2c,0x23,0xb0,0xf3)	@ aese q1,q14
445	INST(0x82,0x23,0xb0,0xf3)	@ aesmc q1,q1
446	INST(0x2c,0x43,0xf0,0xf3)	@ aese q10,q14
447	INST(0xa4,0x43,0xf0,0xf3)	@ aesmc q10,q10
448	INST(0x2e,0x23,0xb0,0xf3)	@ aese q1,q15
449	INST(0x2e,0x43,0xf0,0xf3)	@ aese q10,q15
450	beq	.Lecb_enc_one
451	veor	q5,q7,q1
452	veor	q9,q7,q10
453	vst1.8	{q5},[r1]!
454	vst1.8	{q9},[r1]!
455	b	.Lecb_done
456
457.Lecb_enc_one:
458	veor	q5,q7,q10
459	vst1.8	{q5},[r1]!
460	b	.Lecb_done
461.align	5
462.Lecb_dec:
463	vld1.8	{q1},[r0]!
464	subs	r2,r2,#32			@ bias
465	add	r6,r5,#2
466	vorr	q3,q1,q1
467	vorr	q10,q1,q1
468	vorr	q1,q0,q0
469	blo	.Lecb_dec_tail
470
471	vorr	q1,q3,q3
472	vld1.8	{q10},[r0]!
473.Loop3x_ecb_dec:
474	INST(0x60,0x03,0xb0,0xf3)	@ aesd q0,q8
475	INST(0xc0,0x03,0xb0,0xf3)	@ aesimc q0,q0
476	INST(0x60,0x23,0xb0,0xf3)	@ aesd q1,q8
477	INST(0xc2,0x23,0xb0,0xf3)	@ aesimc q1,q1
478	INST(0x60,0x43,0xf0,0xf3)	@ aesd q10,q8
479	INST(0xe4,0x43,0xf0,0xf3)	@ aesimc q10,q10
480	vld1.32	{q8},[r7]!
481	subs	r6,r6,#2
482	INST(0x62,0x03,0xb0,0xf3)	@ aesd q0,q9
483	INST(0xc0,0x03,0xb0,0xf3)	@ aesimc q0,q0
484	INST(0x62,0x23,0xb0,0xf3)	@ aesd q1,q9
485	INST(0xc2,0x23,0xb0,0xf3)	@ aesimc q1,q1
486	INST(0x62,0x43,0xf0,0xf3)	@ aesd q10,q9
487	INST(0xe4,0x43,0xf0,0xf3)	@ aesimc q10,q10
488	vld1.32	{q9},[r7]!
489	bgt	.Loop3x_ecb_dec
490
491	INST(0x60,0x03,0xb0,0xf3)	@ aesd q0,q8
492	INST(0xc0,0x03,0xb0,0xf3)	@ aesimc q0,q0
493	INST(0x60,0x23,0xb0,0xf3)	@ aesd q1,q8
494	INST(0xc2,0x23,0xb0,0xf3)	@ aesimc q1,q1
495	INST(0x60,0x43,0xf0,0xf3)	@ aesd q10,q8
496	INST(0xe4,0x43,0xf0,0xf3)	@ aesimc q10,q10
497	subs	r2,r2,#0x30
498	it	lo
499	movlo	r6,r2				@ r6, r6, is zero at this point
500	INST(0x62,0x03,0xb0,0xf3)	@ aesd q0,q9
501	INST(0xc0,0x03,0xb0,0xf3)	@ aesimc q0,q0
502	INST(0x62,0x23,0xb0,0xf3)	@ aesd q1,q9
503	INST(0xc2,0x23,0xb0,0xf3)	@ aesimc q1,q1
504	INST(0x62,0x43,0xf0,0xf3)	@ aesd q10,q9
505	INST(0xe4,0x43,0xf0,0xf3)	@ aesimc q10,q10
506	add	r0,r0,r6 			@ r0 is adjusted in such way that
507						@ at exit from the loop q1-q10
508						@ are loaded with last "words"
509	mov	r7,r3
510	INST(0x68,0x03,0xb0,0xf3)	@ aesd q0,q12
511	INST(0xc0,0x03,0xb0,0xf3)	@ aesimc q0,q0
512	INST(0x68,0x23,0xb0,0xf3)	@ aesd q1,q12
513	INST(0xc2,0x23,0xb0,0xf3)	@ aesimc q1,q1
514	INST(0x68,0x43,0xf0,0xf3)	@ aesd q10,q12
515	INST(0xe4,0x43,0xf0,0xf3)	@ aesimc q10,q10
516	vld1.8	{q2},[r0]!
517	INST(0x6a,0x03,0xb0,0xf3)	@ aesd q0,q13
518	INST(0xc0,0x03,0xb0,0xf3)	@ aesimc q0,q0
519	INST(0x6a,0x23,0xb0,0xf3)	@ aesd q1,q13
520	INST(0xc2,0x23,0xb0,0xf3)	@ aesimc q1,q1
521	INST(0x6a,0x43,0xf0,0xf3)	@ aesd q10,q13
522	INST(0xe4,0x43,0xf0,0xf3)	@ aesimc q10,q10
523	vld1.8	{q3},[r0]!
524	INST(0x6c,0x03,0xb0,0xf3)	@ aesd q0,q14
525	INST(0xc0,0x03,0xb0,0xf3)	@ aesimc q0,q0
526	INST(0x6c,0x23,0xb0,0xf3)	@ aesd q1,q14
527	INST(0xc2,0x23,0xb0,0xf3)	@ aesimc q1,q1
528	INST(0x6c,0x43,0xf0,0xf3)	@ aesd q10,q14
529	INST(0xe4,0x43,0xf0,0xf3)	@ aesimc q10,q10
530	vld1.8	{q11},[r0]!
531	INST(0x6e,0x03,0xb0,0xf3)	@ aesd q0,q15
532	INST(0x6e,0x23,0xb0,0xf3)	@ aesd q1,q15
533	INST(0x6e,0x43,0xf0,0xf3)	@ aesd q10,q15
534	vld1.32	{q8},[r7]!			@ re-pre-load rndkey[0]
535	add	r6,r5,#2
536	veor	q4,q7,q0
537	veor	q5,q7,q1
538	veor	q10,q10,q7
539	vld1.32	{q9},[r7]!			@ re-pre-load rndkey[1]
540	vst1.8	{q4},[r1]!
541	vorr	q0,q2,q2
542	vst1.8	{q5},[r1]!
543	vorr	q1,q3,q3
544	vst1.8	{q10},[r1]!
545	vorr	q10,q11,q11
546	bhs	.Loop3x_ecb_dec
547
548	cmn	r2,#0x30
549	beq	.Lecb_done
550	nop
551
552.Lecb_dec_tail:
553	INST(0x60,0x23,0xb0,0xf3)	@ aesd q1,q8
554	INST(0xc2,0x23,0xb0,0xf3)	@ aesimc q1,q1
555	INST(0x60,0x43,0xf0,0xf3)	@ aesd q10,q8
556	INST(0xe4,0x43,0xf0,0xf3)	@ aesimc q10,q10
557	vld1.32	{q8},[r7]!
558	subs	r6,r6,#2
559	INST(0x62,0x23,0xb0,0xf3)	@ aesd q1,q9
560	INST(0xc2,0x23,0xb0,0xf3)	@ aesimc q1,q1
561	INST(0x62,0x43,0xf0,0xf3)	@ aesd q10,q9
562	INST(0xe4,0x43,0xf0,0xf3)	@ aesimc q10,q10
563	vld1.32	{q9},[r7]!
564	bgt	.Lecb_dec_tail
565
566	INST(0x60,0x23,0xb0,0xf3)	@ aesd q1,q8
567	INST(0xc2,0x23,0xb0,0xf3)	@ aesimc q1,q1
568	INST(0x60,0x43,0xf0,0xf3)	@ aesd q10,q8
569	INST(0xe4,0x43,0xf0,0xf3)	@ aesimc q10,q10
570	INST(0x62,0x23,0xb0,0xf3)	@ aesd q1,q9
571	INST(0xc2,0x23,0xb0,0xf3)	@ aesimc q1,q1
572	INST(0x62,0x43,0xf0,0xf3)	@ aesd q10,q9
573	INST(0xe4,0x43,0xf0,0xf3)	@ aesimc q10,q10
574	INST(0x68,0x23,0xb0,0xf3)	@ aesd q1,q12
575	INST(0xc2,0x23,0xb0,0xf3)	@ aesimc q1,q1
576	INST(0x68,0x43,0xf0,0xf3)	@ aesd q10,q12
577	INST(0xe4,0x43,0xf0,0xf3)	@ aesimc q10,q10
578	cmn	r2,#0x20
579	INST(0x6a,0x23,0xb0,0xf3)	@ aesd q1,q13
580	INST(0xc2,0x23,0xb0,0xf3)	@ aesimc q1,q1
581	INST(0x6a,0x43,0xf0,0xf3)	@ aesd q10,q13
582	INST(0xe4,0x43,0xf0,0xf3)	@ aesimc q10,q10
583	INST(0x6c,0x23,0xb0,0xf3)	@ aesd q1,q14
584	INST(0xc2,0x23,0xb0,0xf3)	@ aesimc q1,q1
585	INST(0x6c,0x43,0xf0,0xf3)	@ aesd q10,q14
586	INST(0xe4,0x43,0xf0,0xf3)	@ aesimc q10,q10
587	INST(0x6e,0x23,0xb0,0xf3)	@ aesd q1,q15
588	INST(0x6e,0x43,0xf0,0xf3)	@ aesd q10,q15
589	beq	.Lecb_dec_one
590	veor	q5,q7,q1
591	veor	q9,q7,q10
592	vst1.8	{q5},[r1]!
593	vst1.8	{q9},[r1]!
594	b	.Lecb_done
595
596.Lecb_dec_one:
597	veor	q5,q7,q10
598	vst1.8	{q5},[r1]!
599
600.Lecb_done:
601	vldmia	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}
602	ldmia	sp!,{r4,r5,r6,r7,r8,pc}
603.size	aes_v8_ecb_encrypt,.-aes_v8_ecb_encrypt
604.globl	aes_v8_cbc_encrypt
605.type	aes_v8_cbc_encrypt,%function
606.align	5
607aes_v8_cbc_encrypt:
608	mov	ip,sp
609	stmdb	sp!,{r4,r5,r6,r7,r8,lr}
610	vstmdb	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}            @ ABI specification says so
611	ldmia	ip,{r4,r5}		@ load remaining args
612	subs	r2,r2,#16
613	mov	r8,#16
614	blo	.Lcbc_abort
615	it	eq
616	moveq	r8,#0
617
618	cmp	r5,#0			@ en- or decrypting?
619	ldr	r5,[r3,#240]
620	and	r2,r2,#-16
621	vld1.8	{q6},[r4]
622	vld1.8	{q0},[r0],r8
623
624	vld1.32	{q8,q9},[r3]		@ load key schedule...
625	sub	r5,r5,#6
626	add	r7,r3,r5,lsl#4	@ pointer to last 7 round keys
627	sub	r5,r5,#2
628	vld1.32	{q10,q11},[r7]!
629	vld1.32	{q12,q13},[r7]!
630	vld1.32	{q14,q15},[r7]!
631	vld1.32	{q7},[r7]
632
633	add	r7,r3,#32
634	mov	r6,r5
635	beq	.Lcbc_dec
636
637	cmp	r5,#2
638	veor	q0,q0,q6
639	veor	q5,q8,q7
640	beq	.Lcbc_enc128
641
642	vld1.32	{q2,q3},[r7]
643	add	r7,r3,#16
644	add	r6,r3,#16*4
645	add	r12,r3,#16*5
646	INST(0x20,0x03,0xb0,0xf3)	@ aese q0,q8
647	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
648	add	r14,r3,#16*6
649	add	r3,r3,#16*7
650	b	.Lenter_cbc_enc
651
652.align	4
653.Loop_cbc_enc:
654	INST(0x20,0x03,0xb0,0xf3)	@ aese q0,q8
655	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
656	vst1.8	{q6},[r1]!
657.Lenter_cbc_enc:
658	INST(0x22,0x03,0xb0,0xf3)	@ aese q0,q9
659	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
660	INST(0x04,0x03,0xb0,0xf3)	@ aese q0,q2
661	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
662	vld1.32	{q8},[r6]
663	cmp	r5,#4
664	INST(0x06,0x03,0xb0,0xf3)	@ aese q0,q3
665	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
666	vld1.32	{q9},[r12]
667	beq	.Lcbc_enc192
668
669	INST(0x20,0x03,0xb0,0xf3)	@ aese q0,q8
670	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
671	vld1.32	{q8},[r14]
672	INST(0x22,0x03,0xb0,0xf3)	@ aese q0,q9
673	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
674	vld1.32	{q9},[r3]
675	nop
676
677.Lcbc_enc192:
678	INST(0x20,0x03,0xb0,0xf3)	@ aese q0,q8
679	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
680	subs	r2,r2,#16
681	INST(0x22,0x03,0xb0,0xf3)	@ aese q0,q9
682	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
683	it	eq
684	moveq	r8,#0
685	INST(0x24,0x03,0xb0,0xf3)	@ aese q0,q10
686	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
687	INST(0x26,0x03,0xb0,0xf3)	@ aese q0,q11
688	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
689	vld1.8	{q8},[r0],r8
690	INST(0x28,0x03,0xb0,0xf3)	@ aese q0,q12
691	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
692	veor	q8,q8,q5
693	INST(0x2a,0x03,0xb0,0xf3)	@ aese q0,q13
694	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
695	vld1.32	{q9},[r7]		@ re-pre-load rndkey[1]
696	INST(0x2c,0x03,0xb0,0xf3)	@ aese q0,q14
697	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
698	INST(0x2e,0x03,0xb0,0xf3)	@ aese q0,q15
699	veor	q6,q0,q7
700	bhs	.Loop_cbc_enc
701
702	vst1.8	{q6},[r1]!
703	b	.Lcbc_done
704
705.align	5
706.Lcbc_enc128:
707	vld1.32	{q2,q3},[r7]
708	INST(0x20,0x03,0xb0,0xf3)	@ aese q0,q8
709	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
710	b	.Lenter_cbc_enc128
711.Loop_cbc_enc128:
712	INST(0x20,0x03,0xb0,0xf3)	@ aese q0,q8
713	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
714	vst1.8	{q6},[r1]!
715.Lenter_cbc_enc128:
716	INST(0x22,0x03,0xb0,0xf3)	@ aese q0,q9
717	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
718	subs	r2,r2,#16
719	INST(0x04,0x03,0xb0,0xf3)	@ aese q0,q2
720	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
721	it	eq
722	moveq	r8,#0
723	INST(0x06,0x03,0xb0,0xf3)	@ aese q0,q3
724	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
725	INST(0x24,0x03,0xb0,0xf3)	@ aese q0,q10
726	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
727	INST(0x26,0x03,0xb0,0xf3)	@ aese q0,q11
728	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
729	vld1.8	{q8},[r0],r8
730	INST(0x28,0x03,0xb0,0xf3)	@ aese q0,q12
731	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
732	INST(0x2a,0x03,0xb0,0xf3)	@ aese q0,q13
733	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
734	INST(0x2c,0x03,0xb0,0xf3)	@ aese q0,q14
735	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
736	veor	q8,q8,q5
737	INST(0x2e,0x03,0xb0,0xf3)	@ aese q0,q15
738	veor	q6,q0,q7
739	bhs	.Loop_cbc_enc128
740
741	vst1.8	{q6},[r1]!
742	b	.Lcbc_done
743.align	5
744.Lcbc_dec:
745	vld1.8	{q10},[r0]!
746	subs	r2,r2,#32		@ bias
747	add	r6,r5,#2
748	vorr	q3,q0,q0
749	vorr	q1,q0,q0
750	vorr	q11,q10,q10
751	blo	.Lcbc_dec_tail
752
753	vorr	q1,q10,q10
754	vld1.8	{q10},[r0]!
755	vorr	q2,q0,q0
756	vorr	q3,q1,q1
757	vorr	q11,q10,q10
758.Loop3x_cbc_dec:
759	INST(0x60,0x03,0xb0,0xf3)	@ aesd q0,q8
760	INST(0xc0,0x03,0xb0,0xf3)	@ aesimc q0,q0
761	INST(0x60,0x23,0xb0,0xf3)	@ aesd q1,q8
762	INST(0xc2,0x23,0xb0,0xf3)	@ aesimc q1,q1
763	INST(0x60,0x43,0xf0,0xf3)	@ aesd q10,q8
764	INST(0xe4,0x43,0xf0,0xf3)	@ aesimc q10,q10
765	vld1.32	{q8},[r7]!
766	subs	r6,r6,#2
767	INST(0x62,0x03,0xb0,0xf3)	@ aesd q0,q9
768	INST(0xc0,0x03,0xb0,0xf3)	@ aesimc q0,q0
769	INST(0x62,0x23,0xb0,0xf3)	@ aesd q1,q9
770	INST(0xc2,0x23,0xb0,0xf3)	@ aesimc q1,q1
771	INST(0x62,0x43,0xf0,0xf3)	@ aesd q10,q9
772	INST(0xe4,0x43,0xf0,0xf3)	@ aesimc q10,q10
773	vld1.32	{q9},[r7]!
774	bgt	.Loop3x_cbc_dec
775
776	INST(0x60,0x03,0xb0,0xf3)	@ aesd q0,q8
777	INST(0xc0,0x03,0xb0,0xf3)	@ aesimc q0,q0
778	INST(0x60,0x23,0xb0,0xf3)	@ aesd q1,q8
779	INST(0xc2,0x23,0xb0,0xf3)	@ aesimc q1,q1
780	INST(0x60,0x43,0xf0,0xf3)	@ aesd q10,q8
781	INST(0xe4,0x43,0xf0,0xf3)	@ aesimc q10,q10
782	veor	q4,q6,q7
783	subs	r2,r2,#0x30
784	veor	q5,q2,q7
785	it	lo
786	movlo	r6,r2			@ r6, r6, is zero at this point
787	INST(0x62,0x03,0xb0,0xf3)	@ aesd q0,q9
788	INST(0xc0,0x03,0xb0,0xf3)	@ aesimc q0,q0
789	INST(0x62,0x23,0xb0,0xf3)	@ aesd q1,q9
790	INST(0xc2,0x23,0xb0,0xf3)	@ aesimc q1,q1
791	INST(0x62,0x43,0xf0,0xf3)	@ aesd q10,q9
792	INST(0xe4,0x43,0xf0,0xf3)	@ aesimc q10,q10
793	veor	q9,q3,q7
794	add	r0,r0,r6		@ r0 is adjusted in such way that
795					@ at exit from the loop q1-q10
796					@ are loaded with last "words"
797	vorr	q6,q11,q11
798	mov	r7,r3
799	INST(0x68,0x03,0xb0,0xf3)	@ aesd q0,q12
800	INST(0xc0,0x03,0xb0,0xf3)	@ aesimc q0,q0
801	INST(0x68,0x23,0xb0,0xf3)	@ aesd q1,q12
802	INST(0xc2,0x23,0xb0,0xf3)	@ aesimc q1,q1
803	INST(0x68,0x43,0xf0,0xf3)	@ aesd q10,q12
804	INST(0xe4,0x43,0xf0,0xf3)	@ aesimc q10,q10
805	vld1.8	{q2},[r0]!
806	INST(0x6a,0x03,0xb0,0xf3)	@ aesd q0,q13
807	INST(0xc0,0x03,0xb0,0xf3)	@ aesimc q0,q0
808	INST(0x6a,0x23,0xb0,0xf3)	@ aesd q1,q13
809	INST(0xc2,0x23,0xb0,0xf3)	@ aesimc q1,q1
810	INST(0x6a,0x43,0xf0,0xf3)	@ aesd q10,q13
811	INST(0xe4,0x43,0xf0,0xf3)	@ aesimc q10,q10
812	vld1.8	{q3},[r0]!
813	INST(0x6c,0x03,0xb0,0xf3)	@ aesd q0,q14
814	INST(0xc0,0x03,0xb0,0xf3)	@ aesimc q0,q0
815	INST(0x6c,0x23,0xb0,0xf3)	@ aesd q1,q14
816	INST(0xc2,0x23,0xb0,0xf3)	@ aesimc q1,q1
817	INST(0x6c,0x43,0xf0,0xf3)	@ aesd q10,q14
818	INST(0xe4,0x43,0xf0,0xf3)	@ aesimc q10,q10
819	vld1.8	{q11},[r0]!
820	INST(0x6e,0x03,0xb0,0xf3)	@ aesd q0,q15
821	INST(0x6e,0x23,0xb0,0xf3)	@ aesd q1,q15
822	INST(0x6e,0x43,0xf0,0xf3)	@ aesd q10,q15
823	vld1.32	{q8},[r7]!	@ re-pre-load rndkey[0]
824	add	r6,r5,#2
825	veor	q4,q4,q0
826	veor	q5,q5,q1
827	veor	q10,q10,q9
828	vld1.32	{q9},[r7]!	@ re-pre-load rndkey[1]
829	vst1.8	{q4},[r1]!
830	vorr	q0,q2,q2
831	vst1.8	{q5},[r1]!
832	vorr	q1,q3,q3
833	vst1.8	{q10},[r1]!
834	vorr	q10,q11,q11
835	bhs	.Loop3x_cbc_dec
836
837	cmn	r2,#0x30
838	beq	.Lcbc_done
839	nop
840
841.Lcbc_dec_tail:
842	INST(0x60,0x23,0xb0,0xf3)	@ aesd q1,q8
843	INST(0xc2,0x23,0xb0,0xf3)	@ aesimc q1,q1
844	INST(0x60,0x43,0xf0,0xf3)	@ aesd q10,q8
845	INST(0xe4,0x43,0xf0,0xf3)	@ aesimc q10,q10
846	vld1.32	{q8},[r7]!
847	subs	r6,r6,#2
848	INST(0x62,0x23,0xb0,0xf3)	@ aesd q1,q9
849	INST(0xc2,0x23,0xb0,0xf3)	@ aesimc q1,q1
850	INST(0x62,0x43,0xf0,0xf3)	@ aesd q10,q9
851	INST(0xe4,0x43,0xf0,0xf3)	@ aesimc q10,q10
852	vld1.32	{q9},[r7]!
853	bgt	.Lcbc_dec_tail
854
855	INST(0x60,0x23,0xb0,0xf3)	@ aesd q1,q8
856	INST(0xc2,0x23,0xb0,0xf3)	@ aesimc q1,q1
857	INST(0x60,0x43,0xf0,0xf3)	@ aesd q10,q8
858	INST(0xe4,0x43,0xf0,0xf3)	@ aesimc q10,q10
859	INST(0x62,0x23,0xb0,0xf3)	@ aesd q1,q9
860	INST(0xc2,0x23,0xb0,0xf3)	@ aesimc q1,q1
861	INST(0x62,0x43,0xf0,0xf3)	@ aesd q10,q9
862	INST(0xe4,0x43,0xf0,0xf3)	@ aesimc q10,q10
863	INST(0x68,0x23,0xb0,0xf3)	@ aesd q1,q12
864	INST(0xc2,0x23,0xb0,0xf3)	@ aesimc q1,q1
865	INST(0x68,0x43,0xf0,0xf3)	@ aesd q10,q12
866	INST(0xe4,0x43,0xf0,0xf3)	@ aesimc q10,q10
867	cmn	r2,#0x20
868	INST(0x6a,0x23,0xb0,0xf3)	@ aesd q1,q13
869	INST(0xc2,0x23,0xb0,0xf3)	@ aesimc q1,q1
870	INST(0x6a,0x43,0xf0,0xf3)	@ aesd q10,q13
871	INST(0xe4,0x43,0xf0,0xf3)	@ aesimc q10,q10
872	veor	q5,q6,q7
873	INST(0x6c,0x23,0xb0,0xf3)	@ aesd q1,q14
874	INST(0xc2,0x23,0xb0,0xf3)	@ aesimc q1,q1
875	INST(0x6c,0x43,0xf0,0xf3)	@ aesd q10,q14
876	INST(0xe4,0x43,0xf0,0xf3)	@ aesimc q10,q10
877	veor	q9,q3,q7
878	INST(0x6e,0x23,0xb0,0xf3)	@ aesd q1,q15
879	INST(0x6e,0x43,0xf0,0xf3)	@ aesd q10,q15
880	beq	.Lcbc_dec_one
881	veor	q5,q5,q1
882	veor	q9,q9,q10
883	vorr	q6,q11,q11
884	vst1.8	{q5},[r1]!
885	vst1.8	{q9},[r1]!
886	b	.Lcbc_done
887
888.Lcbc_dec_one:
889	veor	q5,q5,q10
890	vorr	q6,q11,q11
891	vst1.8	{q5},[r1]!
892
893.Lcbc_done:
894	vst1.8	{q6},[r4]
895.Lcbc_abort:
896	vldmia	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}
897	ldmia	sp!,{r4,r5,r6,r7,r8,pc}
898.size	aes_v8_cbc_encrypt,.-aes_v8_cbc_encrypt
899.globl	aes_v8_ctr32_encrypt_blocks
900.type	aes_v8_ctr32_encrypt_blocks,%function
901.align	5
902aes_v8_ctr32_encrypt_blocks:
903	mov	ip,sp
904	stmdb	sp!,{r4,r5,r6,r7,r8,r9,r10,lr}
905	vstmdb	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}            @ ABI specification says so
906	ldr	r4, [ip]		@ load remaining arg
907	ldr	r5,[r3,#240]
908
909	ldr	r8, [r4, #12]
910#ifdef __ARMEB__
911	vld1.8	{q0},[r4]
912#else
913	vld1.32	{q0},[r4]
914#endif
915	vld1.32	{q8,q9},[r3]		@ load key schedule...
916	sub	r5,r5,#4
917	mov	r12,#16
918	cmp	r2,#2
919	add	r7,r3,r5,lsl#4	@ pointer to last 5 round keys
920	sub	r5,r5,#2
921	vld1.32	{q12,q13},[r7]!
922	vld1.32	{q14,q15},[r7]!
923	vld1.32	{q7},[r7]
924	add	r7,r3,#32
925	mov	r6,r5
926	it	lo
927	movlo	r12,#0
928#ifndef __ARMEB__
929	rev	r8, r8
930#endif
931	add	r10, r8, #1
932	vorr	q6,q0,q0
933	rev	r10, r10
934	vmov.32	d13[1],r10
935	add	r8, r8, #2
936	vorr	q1,q6,q6
937	bls	.Lctr32_tail
938	rev	r12, r8
939	vmov.32	d13[1],r12
940	sub	r2,r2,#3		@ bias
941	vorr	q10,q6,q6
942	b	.Loop3x_ctr32
943
944.align	4
945.Loop3x_ctr32:
946	INST(0x20,0x03,0xb0,0xf3)	@ aese q0,q8
947	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
948	INST(0x20,0x23,0xb0,0xf3)	@ aese q1,q8
949	INST(0x82,0x23,0xb0,0xf3)	@ aesmc q1,q1
950	INST(0x20,0x43,0xf0,0xf3)	@ aese q10,q8
951	INST(0xa4,0x43,0xf0,0xf3)	@ aesmc q10,q10
952	vld1.32	{q8},[r7]!
953	subs	r6,r6,#2
954	INST(0x22,0x03,0xb0,0xf3)	@ aese q0,q9
955	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
956	INST(0x22,0x23,0xb0,0xf3)	@ aese q1,q9
957	INST(0x82,0x23,0xb0,0xf3)	@ aesmc q1,q1
958	INST(0x22,0x43,0xf0,0xf3)	@ aese q10,q9
959	INST(0xa4,0x43,0xf0,0xf3)	@ aesmc q10,q10
960	vld1.32	{q9},[r7]!
961	bgt	.Loop3x_ctr32
962
963	INST(0x20,0x03,0xb0,0xf3)	@ aese q0,q8
964	INST(0x80,0x83,0xb0,0xf3)	@ aesmc q4,q0
965	INST(0x20,0x23,0xb0,0xf3)	@ aese q1,q8
966	INST(0x82,0xa3,0xb0,0xf3)	@ aesmc q5,q1
967	vld1.8	{q2},[r0]!
968	add	r9,r8,#1
969	INST(0x20,0x43,0xf0,0xf3)	@ aese q10,q8
970	INST(0xa4,0x43,0xf0,0xf3)	@ aesmc q10,q10
971	vld1.8	{q3},[r0]!
972	rev	r9,r9
973	INST(0x22,0x83,0xb0,0xf3)	@ aese q4,q9
974	INST(0x88,0x83,0xb0,0xf3)	@ aesmc q4,q4
975	INST(0x22,0xa3,0xb0,0xf3)	@ aese q5,q9
976	INST(0x8a,0xa3,0xb0,0xf3)	@ aesmc q5,q5
977	vld1.8	{q11},[r0]!
978	mov	r7,r3
979	INST(0x22,0x43,0xf0,0xf3)	@ aese q10,q9
980	INST(0xa4,0x23,0xf0,0xf3)	@ aesmc q9,q10
981	INST(0x28,0x83,0xb0,0xf3)	@ aese q4,q12
982	INST(0x88,0x83,0xb0,0xf3)	@ aesmc q4,q4
983	INST(0x28,0xa3,0xb0,0xf3)	@ aese q5,q12
984	INST(0x8a,0xa3,0xb0,0xf3)	@ aesmc q5,q5
985	veor	q2,q2,q7
986	add	r10,r8,#2
987	INST(0x28,0x23,0xf0,0xf3)	@ aese q9,q12
988	INST(0xa2,0x23,0xf0,0xf3)	@ aesmc q9,q9
989	veor	q3,q3,q7
990	add	r8,r8,#3
991	INST(0x2a,0x83,0xb0,0xf3)	@ aese q4,q13
992	INST(0x88,0x83,0xb0,0xf3)	@ aesmc q4,q4
993	INST(0x2a,0xa3,0xb0,0xf3)	@ aese q5,q13
994	INST(0x8a,0xa3,0xb0,0xf3)	@ aesmc q5,q5
995	veor	q11,q11,q7
996	vmov.32	d13[1], r9
997	INST(0x2a,0x23,0xf0,0xf3)	@ aese q9,q13
998	INST(0xa2,0x23,0xf0,0xf3)	@ aesmc q9,q9
999	vorr	q0,q6,q6
1000	rev	r10,r10
1001	INST(0x2c,0x83,0xb0,0xf3)	@ aese q4,q14
1002	INST(0x88,0x83,0xb0,0xf3)	@ aesmc q4,q4
1003	vmov.32	d13[1], r10
1004	rev	r12,r8
1005	INST(0x2c,0xa3,0xb0,0xf3)	@ aese q5,q14
1006	INST(0x8a,0xa3,0xb0,0xf3)	@ aesmc q5,q5
1007	vorr	q1,q6,q6
1008	vmov.32	d13[1], r12
1009	INST(0x2c,0x23,0xf0,0xf3)	@ aese q9,q14
1010	INST(0xa2,0x23,0xf0,0xf3)	@ aesmc q9,q9
1011	vorr	q10,q6,q6
1012	subs	r2,r2,#3
1013	INST(0x2e,0x83,0xb0,0xf3)	@ aese q4,q15
1014	INST(0x2e,0xa3,0xb0,0xf3)	@ aese q5,q15
1015	INST(0x2e,0x23,0xf0,0xf3)	@ aese q9,q15
1016
1017	veor	q2,q2,q4
1018	vld1.32	{q8},[r7]!	@ re-pre-load rndkey[0]
1019	vst1.8	{q2},[r1]!
1020	veor	q3,q3,q5
1021	mov	r6,r5
1022	vst1.8	{q3},[r1]!
1023	veor	q11,q11,q9
1024	vld1.32	{q9},[r7]!	@ re-pre-load rndkey[1]
1025	vst1.8	{q11},[r1]!
1026	bhs	.Loop3x_ctr32
1027
1028	adds	r2,r2,#3
1029	beq	.Lctr32_done
1030	cmp	r2,#1
1031	mov	r12,#16
1032	it	eq
1033	moveq	r12,#0
1034
1035.Lctr32_tail:
1036	INST(0x20,0x03,0xb0,0xf3)	@ aese q0,q8
1037	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
1038	INST(0x20,0x23,0xb0,0xf3)	@ aese q1,q8
1039	INST(0x82,0x23,0xb0,0xf3)	@ aesmc q1,q1
1040	vld1.32	{q8},[r7]!
1041	subs	r6,r6,#2
1042	INST(0x22,0x03,0xb0,0xf3)	@ aese q0,q9
1043	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
1044	INST(0x22,0x23,0xb0,0xf3)	@ aese q1,q9
1045	INST(0x82,0x23,0xb0,0xf3)	@ aesmc q1,q1
1046	vld1.32	{q9},[r7]!
1047	bgt	.Lctr32_tail
1048
1049	INST(0x20,0x03,0xb0,0xf3)	@ aese q0,q8
1050	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
1051	INST(0x20,0x23,0xb0,0xf3)	@ aese q1,q8
1052	INST(0x82,0x23,0xb0,0xf3)	@ aesmc q1,q1
1053	INST(0x22,0x03,0xb0,0xf3)	@ aese q0,q9
1054	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
1055	INST(0x22,0x23,0xb0,0xf3)	@ aese q1,q9
1056	INST(0x82,0x23,0xb0,0xf3)	@ aesmc q1,q1
1057	vld1.8	{q2},[r0],r12
1058	INST(0x28,0x03,0xb0,0xf3)	@ aese q0,q12
1059	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
1060	INST(0x28,0x23,0xb0,0xf3)	@ aese q1,q12
1061	INST(0x82,0x23,0xb0,0xf3)	@ aesmc q1,q1
1062	vld1.8	{q3},[r0]
1063	INST(0x2a,0x03,0xb0,0xf3)	@ aese q0,q13
1064	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
1065	INST(0x2a,0x23,0xb0,0xf3)	@ aese q1,q13
1066	INST(0x82,0x23,0xb0,0xf3)	@ aesmc q1,q1
1067	veor	q2,q2,q7
1068	INST(0x2c,0x03,0xb0,0xf3)	@ aese q0,q14
1069	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
1070	INST(0x2c,0x23,0xb0,0xf3)	@ aese q1,q14
1071	INST(0x82,0x23,0xb0,0xf3)	@ aesmc q1,q1
1072	veor	q3,q3,q7
1073	INST(0x2e,0x03,0xb0,0xf3)	@ aese q0,q15
1074	INST(0x2e,0x23,0xb0,0xf3)	@ aese q1,q15
1075
1076	cmp	r2,#1
1077	veor	q2,q2,q0
1078	veor	q3,q3,q1
1079	vst1.8	{q2},[r1]!
1080	beq	.Lctr32_done
1081	vst1.8	{q3},[r1]
1082
1083.Lctr32_done:
1084	vldmia	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}
1085	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,pc}
1086.size	aes_v8_ctr32_encrypt_blocks,.-aes_v8_ctr32_encrypt_blocks
1087#endif
1088