• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#include "arm_arch.h"
2
3#if __ARM_MAX_ARCH__>=7
4.text
5.arch	armv7-a	@ don't confuse not-so-latest binutils with argv8 :-)
6.fpu	neon
7.code	32
8#undef	__thumb2__
9.align	5
10.Lrcon:
11.long	0x01,0x01,0x01,0x01
12.long	0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d	@ rotate-n-splat
13.long	0x1b,0x1b,0x1b,0x1b
14
15.globl	aes_v8_set_encrypt_key
16.type	aes_v8_set_encrypt_key,%function
17.align	5
18aes_v8_set_encrypt_key:
19.Lenc_key:
20	mov	r3,#-1
21	cmp	r0,#0
22	beq	.Lenc_key_abort
23	cmp	r2,#0
24	beq	.Lenc_key_abort
25	mov	r3,#-2
26	cmp	r1,#128
27	blt	.Lenc_key_abort
28	cmp	r1,#256
29	bgt	.Lenc_key_abort
30	tst	r1,#0x3f
31	bne	.Lenc_key_abort
32
33	adr	r3,.Lrcon
34	cmp	r1,#192
35
36	veor	q0,q0,q0
37	vld1.8	{q3},[r0]!
38	mov	r1,#8		@ reuse r1
39	vld1.32	{q1,q2},[r3]!
40
41	blt	.Loop128
42	beq	.L192
43	b	.L256
44
45.align	4
46.Loop128:
47	vtbl.8	d20,{q3},d4
48	vtbl.8	d21,{q3},d5
49	vext.8	q9,q0,q3,#12
50	vst1.32	{q3},[r2]!
51.byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
52	subs	r1,r1,#1
53
54	veor	q3,q3,q9
55	vext.8	q9,q0,q9,#12
56	veor	q3,q3,q9
57	vext.8	q9,q0,q9,#12
58	veor	q10,q10,q1
59	veor	q3,q3,q9
60	vshl.u8	q1,q1,#1
61	veor	q3,q3,q10
62	bne	.Loop128
63
64	vld1.32	{q1},[r3]
65
66	vtbl.8	d20,{q3},d4
67	vtbl.8	d21,{q3},d5
68	vext.8	q9,q0,q3,#12
69	vst1.32	{q3},[r2]!
70.byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
71
72	veor	q3,q3,q9
73	vext.8	q9,q0,q9,#12
74	veor	q3,q3,q9
75	vext.8	q9,q0,q9,#12
76	veor	q10,q10,q1
77	veor	q3,q3,q9
78	vshl.u8	q1,q1,#1
79	veor	q3,q3,q10
80
81	vtbl.8	d20,{q3},d4
82	vtbl.8	d21,{q3},d5
83	vext.8	q9,q0,q3,#12
84	vst1.32	{q3},[r2]!
85.byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
86
87	veor	q3,q3,q9
88	vext.8	q9,q0,q9,#12
89	veor	q3,q3,q9
90	vext.8	q9,q0,q9,#12
91	veor	q10,q10,q1
92	veor	q3,q3,q9
93	veor	q3,q3,q10
94	vst1.32	{q3},[r2]
95	add	r2,r2,#0x50
96
97	mov	r12,#10
98	b	.Ldone
99
100.align	4
101.L192:
102	vld1.8	{d16},[r0]!
103	vmov.i8	q10,#8			@ borrow q10
104	vst1.32	{q3},[r2]!
105	vsub.i8	q2,q2,q10	@ adjust the mask
106
107.Loop192:
108	vtbl.8	d20,{q8},d4
109	vtbl.8	d21,{q8},d5
110	vext.8	q9,q0,q3,#12
111#ifdef __ARMEB__
112	vst1.32	{q8},[r2]!
113	sub	r2,r2,#8
114#else
115	vst1.32	{d16},[r2]!
116#endif
117.byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
118	subs	r1,r1,#1
119
120	veor	q3,q3,q9
121	vext.8	q9,q0,q9,#12
122	veor	q3,q3,q9
123	vext.8	q9,q0,q9,#12
124	veor	q3,q3,q9
125
126	vdup.32	q9,d7[1]
127	veor	q9,q9,q8
128	veor	q10,q10,q1
129	vext.8	q8,q0,q8,#12
130	vshl.u8	q1,q1,#1
131	veor	q8,q8,q9
132	veor	q3,q3,q10
133	veor	q8,q8,q10
134	vst1.32	{q3},[r2]!
135	bne	.Loop192
136
137	mov	r12,#12
138	add	r2,r2,#0x20
139	b	.Ldone
140
141.align	4
142.L256:
143	vld1.8	{q8},[r0]
144	mov	r1,#7
145	mov	r12,#14
146	vst1.32	{q3},[r2]!
147
148.Loop256:
149	vtbl.8	d20,{q8},d4
150	vtbl.8	d21,{q8},d5
151	vext.8	q9,q0,q3,#12
152	vst1.32	{q8},[r2]!
153.byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
154	subs	r1,r1,#1
155
156	veor	q3,q3,q9
157	vext.8	q9,q0,q9,#12
158	veor	q3,q3,q9
159	vext.8	q9,q0,q9,#12
160	veor	q10,q10,q1
161	veor	q3,q3,q9
162	vshl.u8	q1,q1,#1
163	veor	q3,q3,q10
164	vst1.32	{q3},[r2]!
165	beq	.Ldone
166
167	vdup.32	q10,d7[1]
168	vext.8	q9,q0,q8,#12
169.byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
170
171	veor	q8,q8,q9
172	vext.8	q9,q0,q9,#12
173	veor	q8,q8,q9
174	vext.8	q9,q0,q9,#12
175	veor	q8,q8,q9
176
177	veor	q8,q8,q10
178	b	.Loop256
179
180.Ldone:
181	str	r12,[r2]
182	mov	r3,#0
183
184.Lenc_key_abort:
185	mov	r0,r3			@ return value
186
187	bx	lr
188.size	aes_v8_set_encrypt_key,.-aes_v8_set_encrypt_key
189
190.globl	aes_v8_set_decrypt_key
191.type	aes_v8_set_decrypt_key,%function
192.align	5
193aes_v8_set_decrypt_key:
194	stmdb	sp!,{r4,lr}
195	bl	.Lenc_key
196
197	cmp	r0,#0
198	bne	.Ldec_key_abort
199
200	sub	r2,r2,#240		@ restore original r2
201	mov	r4,#-16
202	add	r0,r2,r12,lsl#4	@ end of key schedule
203
204	vld1.32	{q0},[r2]
205	vld1.32	{q1},[r0]
206	vst1.32	{q0},[r0],r4
207	vst1.32	{q1},[r2]!
208
209.Loop_imc:
210	vld1.32	{q0},[r2]
211	vld1.32	{q1},[r0]
212.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
213.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
214	vst1.32	{q0},[r0],r4
215	vst1.32	{q1},[r2]!
216	cmp	r0,r2
217	bhi	.Loop_imc
218
219	vld1.32	{q0},[r2]
220.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
221	vst1.32	{q0},[r0]
222
223	eor	r0,r0,r0		@ return value
224.Ldec_key_abort:
225	ldmia	sp!,{r4,pc}
226.size	aes_v8_set_decrypt_key,.-aes_v8_set_decrypt_key
227.globl	aes_v8_encrypt
228.type	aes_v8_encrypt,%function
229.align	5
230aes_v8_encrypt:
231	ldr	r3,[r2,#240]
232	vld1.32	{q0},[r2]!
233	vld1.8	{q2},[r0]
234	sub	r3,r3,#2
235	vld1.32	{q1},[r2]!
236
237.Loop_enc:
238.byte	0x00,0x43,0xb0,0xf3	@ aese q2,q0
239.byte	0x84,0x43,0xb0,0xf3	@ aesmc q2,q2
240	vld1.32	{q0},[r2]!
241	subs	r3,r3,#2
242.byte	0x02,0x43,0xb0,0xf3	@ aese q2,q1
243.byte	0x84,0x43,0xb0,0xf3	@ aesmc q2,q2
244	vld1.32	{q1},[r2]!
245	bgt	.Loop_enc
246
247.byte	0x00,0x43,0xb0,0xf3	@ aese q2,q0
248.byte	0x84,0x43,0xb0,0xf3	@ aesmc q2,q2
249	vld1.32	{q0},[r2]
250.byte	0x02,0x43,0xb0,0xf3	@ aese q2,q1
251	veor	q2,q2,q0
252
253	vst1.8	{q2},[r1]
254	bx	lr
255.size	aes_v8_encrypt,.-aes_v8_encrypt
256.globl	aes_v8_decrypt
257.type	aes_v8_decrypt,%function
258.align	5
259aes_v8_decrypt:
260	ldr	r3,[r2,#240]
261	vld1.32	{q0},[r2]!
262	vld1.8	{q2},[r0]
263	sub	r3,r3,#2
264	vld1.32	{q1},[r2]!
265
266.Loop_dec:
267.byte	0x40,0x43,0xb0,0xf3	@ aesd q2,q0
268.byte	0xc4,0x43,0xb0,0xf3	@ aesimc q2,q2
269	vld1.32	{q0},[r2]!
270	subs	r3,r3,#2
271.byte	0x42,0x43,0xb0,0xf3	@ aesd q2,q1
272.byte	0xc4,0x43,0xb0,0xf3	@ aesimc q2,q2
273	vld1.32	{q1},[r2]!
274	bgt	.Loop_dec
275
276.byte	0x40,0x43,0xb0,0xf3	@ aesd q2,q0
277.byte	0xc4,0x43,0xb0,0xf3	@ aesimc q2,q2
278	vld1.32	{q0},[r2]
279.byte	0x42,0x43,0xb0,0xf3	@ aesd q2,q1
280	veor	q2,q2,q0
281
282	vst1.8	{q2},[r1]
283	bx	lr
284.size	aes_v8_decrypt,.-aes_v8_decrypt
285.globl	aes_v8_cbc_encrypt
286.type	aes_v8_cbc_encrypt,%function
287.align	5
288aes_v8_cbc_encrypt:
289	mov	ip,sp
290	stmdb	sp!,{r4,r5,r6,r7,r8,lr}
291	vstmdb	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}            @ ABI specification says so
292	ldmia	ip,{r4,r5}		@ load remaining args
293	subs	r2,r2,#16
294	mov	r8,#16
295	blo	.Lcbc_abort
296	moveq	r8,#0
297
298	cmp	r5,#0			@ en- or decrypting?
299	ldr	r5,[r3,#240]
300	and	r2,r2,#-16
301	vld1.8	{q6},[r4]
302	vld1.8	{q0},[r0],r8
303
304	vld1.32	{q8,q9},[r3]		@ load key schedule...
305	sub	r5,r5,#6
306	add	r7,r3,r5,lsl#4	@ pointer to last 7 round keys
307	sub	r5,r5,#2
308	vld1.32	{q10,q11},[r7]!
309	vld1.32	{q12,q13},[r7]!
310	vld1.32	{q14,q15},[r7]!
311	vld1.32	{q7},[r7]
312
313	add	r7,r3,#32
314	mov	r6,r5
315	beq	.Lcbc_dec
316
317	cmp	r5,#2
318	veor	q0,q0,q6
319	veor	q5,q8,q7
320	beq	.Lcbc_enc128
321
322	vld1.32	{q2,q3},[r7]
323	add	r7,r3,#16
324	add	r6,r3,#16*4
325	add	r12,r3,#16*5
326.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
327.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
328	add	r14,r3,#16*6
329	add	r3,r3,#16*7
330	b	.Lenter_cbc_enc
331
332.align	4
333.Loop_cbc_enc:
334.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
335.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
336	vst1.8	{q6},[r1]!
337.Lenter_cbc_enc:
338.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
339.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
340.byte	0x04,0x03,0xb0,0xf3	@ aese q0,q2
341.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
342	vld1.32	{q8},[r6]
343	cmp	r5,#4
344.byte	0x06,0x03,0xb0,0xf3	@ aese q0,q3
345.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
346	vld1.32	{q9},[r12]
347	beq	.Lcbc_enc192
348
349.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
350.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
351	vld1.32	{q8},[r14]
352.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
353.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
354	vld1.32	{q9},[r3]
355	nop
356
357.Lcbc_enc192:
358.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
359.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
360	subs	r2,r2,#16
361.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
362.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
363	moveq	r8,#0
364.byte	0x24,0x03,0xb0,0xf3	@ aese q0,q10
365.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
366.byte	0x26,0x03,0xb0,0xf3	@ aese q0,q11
367.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
368	vld1.8	{q8},[r0],r8
369.byte	0x28,0x03,0xb0,0xf3	@ aese q0,q12
370.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
371	veor	q8,q8,q5
372.byte	0x2a,0x03,0xb0,0xf3	@ aese q0,q13
373.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
374	vld1.32	{q9},[r7]		@ re-pre-load rndkey[1]
375.byte	0x2c,0x03,0xb0,0xf3	@ aese q0,q14
376.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
377.byte	0x2e,0x03,0xb0,0xf3	@ aese q0,q15
378	veor	q6,q0,q7
379	bhs	.Loop_cbc_enc
380
381	vst1.8	{q6},[r1]!
382	b	.Lcbc_done
383
384.align	5
385.Lcbc_enc128:
386	vld1.32	{q2,q3},[r7]
387.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
388.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
389	b	.Lenter_cbc_enc128
390.Loop_cbc_enc128:
391.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
392.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
393	vst1.8	{q6},[r1]!
394.Lenter_cbc_enc128:
395.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
396.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
397	subs	r2,r2,#16
398.byte	0x04,0x03,0xb0,0xf3	@ aese q0,q2
399.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
400	moveq	r8,#0
401.byte	0x06,0x03,0xb0,0xf3	@ aese q0,q3
402.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
403.byte	0x24,0x03,0xb0,0xf3	@ aese q0,q10
404.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
405.byte	0x26,0x03,0xb0,0xf3	@ aese q0,q11
406.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
407	vld1.8	{q8},[r0],r8
408.byte	0x28,0x03,0xb0,0xf3	@ aese q0,q12
409.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
410.byte	0x2a,0x03,0xb0,0xf3	@ aese q0,q13
411.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
412.byte	0x2c,0x03,0xb0,0xf3	@ aese q0,q14
413.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
414	veor	q8,q8,q5
415.byte	0x2e,0x03,0xb0,0xf3	@ aese q0,q15
416	veor	q6,q0,q7
417	bhs	.Loop_cbc_enc128
418
419	vst1.8	{q6},[r1]!
420	b	.Lcbc_done
421.align	5
422.Lcbc_dec:
423	vld1.8	{q10},[r0]!
424	subs	r2,r2,#32		@ bias
425	add	r6,r5,#2
426	vorr	q3,q0,q0
427	vorr	q1,q0,q0
428	vorr	q11,q10,q10
429	blo	.Lcbc_dec_tail
430
431	vorr	q1,q10,q10
432	vld1.8	{q10},[r0]!
433	vorr	q2,q0,q0
434	vorr	q3,q1,q1
435	vorr	q11,q10,q10
436
437.Loop3x_cbc_dec:
438.byte	0x60,0x03,0xb0,0xf3	@ aesd q0,q8
439.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
440.byte	0x60,0x23,0xb0,0xf3	@ aesd q1,q8
441.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
442.byte	0x60,0x43,0xf0,0xf3	@ aesd q10,q8
443.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
444	vld1.32	{q8},[r7]!
445	subs	r6,r6,#2
446.byte	0x62,0x03,0xb0,0xf3	@ aesd q0,q9
447.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
448.byte	0x62,0x23,0xb0,0xf3	@ aesd q1,q9
449.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
450.byte	0x62,0x43,0xf0,0xf3	@ aesd q10,q9
451.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
452	vld1.32	{q9},[r7]!
453	bgt	.Loop3x_cbc_dec
454
455.byte	0x60,0x03,0xb0,0xf3	@ aesd q0,q8
456.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
457.byte	0x60,0x23,0xb0,0xf3	@ aesd q1,q8
458.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
459.byte	0x60,0x43,0xf0,0xf3	@ aesd q10,q8
460.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
461	veor	q4,q6,q7
462	subs	r2,r2,#0x30
463	veor	q5,q2,q7
464	movlo	r6,r2			@ r6, r6, is zero at this point
465.byte	0x62,0x03,0xb0,0xf3	@ aesd q0,q9
466.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
467.byte	0x62,0x23,0xb0,0xf3	@ aesd q1,q9
468.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
469.byte	0x62,0x43,0xf0,0xf3	@ aesd q10,q9
470.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
471	veor	q9,q3,q7
472	add	r0,r0,r6		@ r0 is adjusted in such way that
473					@ at exit from the loop q1-q10
474					@ are loaded with last "words"
475	vorr	q6,q11,q11
476	mov	r7,r3
477.byte	0x68,0x03,0xb0,0xf3	@ aesd q0,q12
478.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
479.byte	0x68,0x23,0xb0,0xf3	@ aesd q1,q12
480.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
481.byte	0x68,0x43,0xf0,0xf3	@ aesd q10,q12
482.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
483	vld1.8	{q2},[r0]!
484.byte	0x6a,0x03,0xb0,0xf3	@ aesd q0,q13
485.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
486.byte	0x6a,0x23,0xb0,0xf3	@ aesd q1,q13
487.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
488.byte	0x6a,0x43,0xf0,0xf3	@ aesd q10,q13
489.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
490	vld1.8	{q3},[r0]!
491.byte	0x6c,0x03,0xb0,0xf3	@ aesd q0,q14
492.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
493.byte	0x6c,0x23,0xb0,0xf3	@ aesd q1,q14
494.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
495.byte	0x6c,0x43,0xf0,0xf3	@ aesd q10,q14
496.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
497	vld1.8	{q11},[r0]!
498.byte	0x6e,0x03,0xb0,0xf3	@ aesd q0,q15
499.byte	0x6e,0x23,0xb0,0xf3	@ aesd q1,q15
500.byte	0x6e,0x43,0xf0,0xf3	@ aesd q10,q15
501	vld1.32	{q8},[r7]!	@ re-pre-load rndkey[0]
502	add	r6,r5,#2
503	veor	q4,q4,q0
504	veor	q5,q5,q1
505	veor	q10,q10,q9
506	vld1.32	{q9},[r7]!	@ re-pre-load rndkey[1]
507	vst1.8	{q4},[r1]!
508	vorr	q0,q2,q2
509	vst1.8	{q5},[r1]!
510	vorr	q1,q3,q3
511	vst1.8	{q10},[r1]!
512	vorr	q10,q11,q11
513	bhs	.Loop3x_cbc_dec
514
515	cmn	r2,#0x30
516	beq	.Lcbc_done
517	nop
518
519.Lcbc_dec_tail:
520.byte	0x60,0x23,0xb0,0xf3	@ aesd q1,q8
521.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
522.byte	0x60,0x43,0xf0,0xf3	@ aesd q10,q8
523.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
524	vld1.32	{q8},[r7]!
525	subs	r6,r6,#2
526.byte	0x62,0x23,0xb0,0xf3	@ aesd q1,q9
527.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
528.byte	0x62,0x43,0xf0,0xf3	@ aesd q10,q9
529.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
530	vld1.32	{q9},[r7]!
531	bgt	.Lcbc_dec_tail
532
533.byte	0x60,0x23,0xb0,0xf3	@ aesd q1,q8
534.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
535.byte	0x60,0x43,0xf0,0xf3	@ aesd q10,q8
536.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
537.byte	0x62,0x23,0xb0,0xf3	@ aesd q1,q9
538.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
539.byte	0x62,0x43,0xf0,0xf3	@ aesd q10,q9
540.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
541.byte	0x68,0x23,0xb0,0xf3	@ aesd q1,q12
542.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
543.byte	0x68,0x43,0xf0,0xf3	@ aesd q10,q12
544.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
545	cmn	r2,#0x20
546.byte	0x6a,0x23,0xb0,0xf3	@ aesd q1,q13
547.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
548.byte	0x6a,0x43,0xf0,0xf3	@ aesd q10,q13
549.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
550	veor	q5,q6,q7
551.byte	0x6c,0x23,0xb0,0xf3	@ aesd q1,q14
552.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
553.byte	0x6c,0x43,0xf0,0xf3	@ aesd q10,q14
554.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
555	veor	q9,q3,q7
556.byte	0x6e,0x23,0xb0,0xf3	@ aesd q1,q15
557.byte	0x6e,0x43,0xf0,0xf3	@ aesd q10,q15
558	beq	.Lcbc_dec_one
559	veor	q5,q5,q1
560	veor	q9,q9,q10
561	vorr	q6,q11,q11
562	vst1.8	{q5},[r1]!
563	vst1.8	{q9},[r1]!
564	b	.Lcbc_done
565
566.Lcbc_dec_one:
567	veor	q5,q5,q10
568	vorr	q6,q11,q11
569	vst1.8	{q5},[r1]!
570
571.Lcbc_done:
572	vst1.8	{q6},[r4]
573.Lcbc_abort:
574	vldmia	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}
575	ldmia	sp!,{r4,r5,r6,r7,r8,pc}
576.size	aes_v8_cbc_encrypt,.-aes_v8_cbc_encrypt
577.globl	aes_v8_ctr32_encrypt_blocks
578.type	aes_v8_ctr32_encrypt_blocks,%function
579.align	5
580aes_v8_ctr32_encrypt_blocks:
581	mov	ip,sp
582	stmdb	sp!,{r4,r5,r6,r7,r8,r9,r10,lr}
583	vstmdb	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}            @ ABI specification says so
584	ldr	r4, [ip]		@ load remaining arg
585	ldr	r5,[r3,#240]
586
587	ldr	r8, [r4, #12]
588#ifdef __ARMEB__
589	vld1.8	{q0},[r4]
590#else
591	vld1.32	{q0},[r4]
592#endif
593	vld1.32	{q8,q9},[r3]		@ load key schedule...
594	sub	r5,r5,#4
595	mov	r12,#16
596	cmp	r2,#2
597	add	r7,r3,r5,lsl#4	@ pointer to last 5 round keys
598	sub	r5,r5,#2
599	vld1.32	{q12,q13},[r7]!
600	vld1.32	{q14,q15},[r7]!
601	vld1.32	{q7},[r7]
602	add	r7,r3,#32
603	mov	r6,r5
604	movlo	r12,#0
605#ifndef __ARMEB__
606	rev	r8, r8
607#endif
608	add	r10, r8, #1
609	vorr	q6,q0,q0
610	rev	r10, r10
611	vmov.32	d13[1],r10
612	add	r8, r8, #2
613	vorr	q1,q6,q6
614	bls	.Lctr32_tail
615	rev	r12, r8
616	vmov.32	d13[1],r12
617	sub	r2,r2,#3		@ bias
618	vorr	q10,q6,q6
619	b	.Loop3x_ctr32
620
621.align	4
622.Loop3x_ctr32:
623.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
624.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
625.byte	0x20,0x23,0xb0,0xf3	@ aese q1,q8
626.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
627.byte	0x20,0x43,0xf0,0xf3	@ aese q10,q8
628.byte	0xa4,0x43,0xf0,0xf3	@ aesmc q10,q10
629	vld1.32	{q8},[r7]!
630	subs	r6,r6,#2
631.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
632.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
633.byte	0x22,0x23,0xb0,0xf3	@ aese q1,q9
634.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
635.byte	0x22,0x43,0xf0,0xf3	@ aese q10,q9
636.byte	0xa4,0x43,0xf0,0xf3	@ aesmc q10,q10
637	vld1.32	{q9},[r7]!
638	bgt	.Loop3x_ctr32
639
640.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
641.byte	0x80,0x83,0xb0,0xf3	@ aesmc q4,q0
642.byte	0x20,0x23,0xb0,0xf3	@ aese q1,q8
643.byte	0x82,0xa3,0xb0,0xf3	@ aesmc q5,q1
644	vld1.8	{q2},[r0]!
645	add	r9,r8,#1
646.byte	0x20,0x43,0xf0,0xf3	@ aese q10,q8
647.byte	0xa4,0x43,0xf0,0xf3	@ aesmc q10,q10
648	vld1.8	{q3},[r0]!
649	rev	r9,r9
650.byte	0x22,0x83,0xb0,0xf3	@ aese q4,q9
651.byte	0x88,0x83,0xb0,0xf3	@ aesmc q4,q4
652.byte	0x22,0xa3,0xb0,0xf3	@ aese q5,q9
653.byte	0x8a,0xa3,0xb0,0xf3	@ aesmc q5,q5
654	vld1.8	{q11},[r0]!
655	mov	r7,r3
656.byte	0x22,0x43,0xf0,0xf3	@ aese q10,q9
657.byte	0xa4,0x23,0xf0,0xf3	@ aesmc q9,q10
658.byte	0x28,0x83,0xb0,0xf3	@ aese q4,q12
659.byte	0x88,0x83,0xb0,0xf3	@ aesmc q4,q4
660.byte	0x28,0xa3,0xb0,0xf3	@ aese q5,q12
661.byte	0x8a,0xa3,0xb0,0xf3	@ aesmc q5,q5
662	veor	q2,q2,q7
663	add	r10,r8,#2
664.byte	0x28,0x23,0xf0,0xf3	@ aese q9,q12
665.byte	0xa2,0x23,0xf0,0xf3	@ aesmc q9,q9
666	veor	q3,q3,q7
667	add	r8,r8,#3
668.byte	0x2a,0x83,0xb0,0xf3	@ aese q4,q13
669.byte	0x88,0x83,0xb0,0xf3	@ aesmc q4,q4
670.byte	0x2a,0xa3,0xb0,0xf3	@ aese q5,q13
671.byte	0x8a,0xa3,0xb0,0xf3	@ aesmc q5,q5
672	veor	q11,q11,q7
673	vmov.32	d13[1], r9
674.byte	0x2a,0x23,0xf0,0xf3	@ aese q9,q13
675.byte	0xa2,0x23,0xf0,0xf3	@ aesmc q9,q9
676	vorr	q0,q6,q6
677	rev	r10,r10
678.byte	0x2c,0x83,0xb0,0xf3	@ aese q4,q14
679.byte	0x88,0x83,0xb0,0xf3	@ aesmc q4,q4
680	vmov.32	d13[1], r10
681	rev	r12,r8
682.byte	0x2c,0xa3,0xb0,0xf3	@ aese q5,q14
683.byte	0x8a,0xa3,0xb0,0xf3	@ aesmc q5,q5
684	vorr	q1,q6,q6
685	vmov.32	d13[1], r12
686.byte	0x2c,0x23,0xf0,0xf3	@ aese q9,q14
687.byte	0xa2,0x23,0xf0,0xf3	@ aesmc q9,q9
688	vorr	q10,q6,q6
689	subs	r2,r2,#3
690.byte	0x2e,0x83,0xb0,0xf3	@ aese q4,q15
691.byte	0x2e,0xa3,0xb0,0xf3	@ aese q5,q15
692.byte	0x2e,0x23,0xf0,0xf3	@ aese q9,q15
693
694	veor	q2,q2,q4
695	vld1.32	{q8},[r7]!	@ re-pre-load rndkey[0]
696	vst1.8	{q2},[r1]!
697	veor	q3,q3,q5
698	mov	r6,r5
699	vst1.8	{q3},[r1]!
700	veor	q11,q11,q9
701	vld1.32	{q9},[r7]!	@ re-pre-load rndkey[1]
702	vst1.8	{q11},[r1]!
703	bhs	.Loop3x_ctr32
704
705	adds	r2,r2,#3
706	beq	.Lctr32_done
707	cmp	r2,#1
708	mov	r12,#16
709	moveq	r12,#0
710
711.Lctr32_tail:
712.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
713.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
714.byte	0x20,0x23,0xb0,0xf3	@ aese q1,q8
715.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
716	vld1.32	{q8},[r7]!
717	subs	r6,r6,#2
718.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
719.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
720.byte	0x22,0x23,0xb0,0xf3	@ aese q1,q9
721.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
722	vld1.32	{q9},[r7]!
723	bgt	.Lctr32_tail
724
725.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
726.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
727.byte	0x20,0x23,0xb0,0xf3	@ aese q1,q8
728.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
729.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
730.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
731.byte	0x22,0x23,0xb0,0xf3	@ aese q1,q9
732.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
733	vld1.8	{q2},[r0],r12
734.byte	0x28,0x03,0xb0,0xf3	@ aese q0,q12
735.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
736.byte	0x28,0x23,0xb0,0xf3	@ aese q1,q12
737.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
738	vld1.8	{q3},[r0]
739.byte	0x2a,0x03,0xb0,0xf3	@ aese q0,q13
740.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
741.byte	0x2a,0x23,0xb0,0xf3	@ aese q1,q13
742.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
743	veor	q2,q2,q7
744.byte	0x2c,0x03,0xb0,0xf3	@ aese q0,q14
745.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
746.byte	0x2c,0x23,0xb0,0xf3	@ aese q1,q14
747.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
748	veor	q3,q3,q7
749.byte	0x2e,0x03,0xb0,0xf3	@ aese q0,q15
750.byte	0x2e,0x23,0xb0,0xf3	@ aese q1,q15
751
752	cmp	r2,#1
753	veor	q2,q2,q0
754	veor	q3,q3,q1
755	vst1.8	{q2},[r1]!
756	beq	.Lctr32_done
757	vst1.8	{q3},[r1]
758
759.Lctr32_done:
760	vldmia	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}
761	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,pc}
762.size	aes_v8_ctr32_encrypt_blocks,.-aes_v8_ctr32_encrypt_blocks
763#endif
764