• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1// This file is generated from a similarly-named Perl script in the BoringSSL
2// source tree. Do not edit by hand.
3
4#if defined(__has_feature)
5#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
6#define OPENSSL_NO_ASM
7#endif
8#endif
9
10#if !defined(OPENSSL_NO_ASM)
11#if defined(__arm__)
12#if defined(BORINGSSL_PREFIX)
13#include <boringssl_prefix_symbols_asm.h>
14#endif
15#include <openssl/arm_arch.h>
16
17#if __ARM_MAX_ARCH__>=7
18.text
19.arch	armv7-a	@ don't confuse not-so-latest binutils with argv8 :-)
20.fpu	neon
21.code	32
22#undef	__thumb2__
23.align	5
24.Lrcon:
25.long	0x01,0x01,0x01,0x01
26.long	0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d	@ rotate-n-splat
27.long	0x1b,0x1b,0x1b,0x1b
28
29.text
30
31.globl	aes_hw_set_encrypt_key
32.hidden	aes_hw_set_encrypt_key
33.type	aes_hw_set_encrypt_key,%function
34.align	5
35aes_hw_set_encrypt_key:
36.Lenc_key:
37	mov	r3,#-1
38	cmp	r0,#0
39	beq	.Lenc_key_abort
40	cmp	r2,#0
41	beq	.Lenc_key_abort
42	mov	r3,#-2
43	cmp	r1,#128
44	blt	.Lenc_key_abort
45	cmp	r1,#256
46	bgt	.Lenc_key_abort
47	tst	r1,#0x3f
48	bne	.Lenc_key_abort
49
50	adr	r3,.Lrcon
51	cmp	r1,#192
52
53	veor	q0,q0,q0
54	vld1.8	{q3},[r0]!
55	mov	r1,#8		@ reuse r1
56	vld1.32	{q1,q2},[r3]!
57
58	blt	.Loop128
59	beq	.L192
60	b	.L256
61
62.align	4
63.Loop128:
64	vtbl.8	d20,{q3},d4
65	vtbl.8	d21,{q3},d5
66	vext.8	q9,q0,q3,#12
67	vst1.32	{q3},[r2]!
68.byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
69	subs	r1,r1,#1
70
71	veor	q3,q3,q9
72	vext.8	q9,q0,q9,#12
73	veor	q3,q3,q9
74	vext.8	q9,q0,q9,#12
75	veor	q10,q10,q1
76	veor	q3,q3,q9
77	vshl.u8	q1,q1,#1
78	veor	q3,q3,q10
79	bne	.Loop128
80
81	vld1.32	{q1},[r3]
82
83	vtbl.8	d20,{q3},d4
84	vtbl.8	d21,{q3},d5
85	vext.8	q9,q0,q3,#12
86	vst1.32	{q3},[r2]!
87.byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
88
89	veor	q3,q3,q9
90	vext.8	q9,q0,q9,#12
91	veor	q3,q3,q9
92	vext.8	q9,q0,q9,#12
93	veor	q10,q10,q1
94	veor	q3,q3,q9
95	vshl.u8	q1,q1,#1
96	veor	q3,q3,q10
97
98	vtbl.8	d20,{q3},d4
99	vtbl.8	d21,{q3},d5
100	vext.8	q9,q0,q3,#12
101	vst1.32	{q3},[r2]!
102.byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
103
104	veor	q3,q3,q9
105	vext.8	q9,q0,q9,#12
106	veor	q3,q3,q9
107	vext.8	q9,q0,q9,#12
108	veor	q10,q10,q1
109	veor	q3,q3,q9
110	veor	q3,q3,q10
111	vst1.32	{q3},[r2]
112	add	r2,r2,#0x50
113
114	mov	r12,#10
115	b	.Ldone
116
117.align	4
118.L192:
119	vld1.8	{d16},[r0]!
120	vmov.i8	q10,#8			@ borrow q10
121	vst1.32	{q3},[r2]!
122	vsub.i8	q2,q2,q10	@ adjust the mask
123
124.Loop192:
125	vtbl.8	d20,{q8},d4
126	vtbl.8	d21,{q8},d5
127	vext.8	q9,q0,q3,#12
128	vst1.32	{d16},[r2]!
129.byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
130	subs	r1,r1,#1
131
132	veor	q3,q3,q9
133	vext.8	q9,q0,q9,#12
134	veor	q3,q3,q9
135	vext.8	q9,q0,q9,#12
136	veor	q3,q3,q9
137
138	vdup.32	q9,d7[1]
139	veor	q9,q9,q8
140	veor	q10,q10,q1
141	vext.8	q8,q0,q8,#12
142	vshl.u8	q1,q1,#1
143	veor	q8,q8,q9
144	veor	q3,q3,q10
145	veor	q8,q8,q10
146	vst1.32	{q3},[r2]!
147	bne	.Loop192
148
149	mov	r12,#12
150	add	r2,r2,#0x20
151	b	.Ldone
152
153.align	4
154.L256:
155	vld1.8	{q8},[r0]
156	mov	r1,#7
157	mov	r12,#14
158	vst1.32	{q3},[r2]!
159
160.Loop256:
161	vtbl.8	d20,{q8},d4
162	vtbl.8	d21,{q8},d5
163	vext.8	q9,q0,q3,#12
164	vst1.32	{q8},[r2]!
165.byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
166	subs	r1,r1,#1
167
168	veor	q3,q3,q9
169	vext.8	q9,q0,q9,#12
170	veor	q3,q3,q9
171	vext.8	q9,q0,q9,#12
172	veor	q10,q10,q1
173	veor	q3,q3,q9
174	vshl.u8	q1,q1,#1
175	veor	q3,q3,q10
176	vst1.32	{q3},[r2]!
177	beq	.Ldone
178
179	vdup.32	q10,d7[1]
180	vext.8	q9,q0,q8,#12
181.byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
182
183	veor	q8,q8,q9
184	vext.8	q9,q0,q9,#12
185	veor	q8,q8,q9
186	vext.8	q9,q0,q9,#12
187	veor	q8,q8,q9
188
189	veor	q8,q8,q10
190	b	.Loop256
191
192.Ldone:
193	str	r12,[r2]
194	mov	r3,#0
195
196.Lenc_key_abort:
197	mov	r0,r3			@ return value
198
199	bx	lr
200.size	aes_hw_set_encrypt_key,.-aes_hw_set_encrypt_key
201
202.globl	aes_hw_set_decrypt_key
203.hidden	aes_hw_set_decrypt_key
204.type	aes_hw_set_decrypt_key,%function
205.align	5
206aes_hw_set_decrypt_key:
207	stmdb	sp!,{r4,lr}
208	bl	.Lenc_key
209
210	cmp	r0,#0
211	bne	.Ldec_key_abort
212
213	sub	r2,r2,#240		@ restore original r2
214	mov	r4,#-16
215	add	r0,r2,r12,lsl#4	@ end of key schedule
216
217	vld1.32	{q0},[r2]
218	vld1.32	{q1},[r0]
219	vst1.32	{q0},[r0],r4
220	vst1.32	{q1},[r2]!
221
222.Loop_imc:
223	vld1.32	{q0},[r2]
224	vld1.32	{q1},[r0]
225.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
226.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
227	vst1.32	{q0},[r0],r4
228	vst1.32	{q1},[r2]!
229	cmp	r0,r2
230	bhi	.Loop_imc
231
232	vld1.32	{q0},[r2]
233.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
234	vst1.32	{q0},[r0]
235
236	eor	r0,r0,r0		@ return value
237.Ldec_key_abort:
238	ldmia	sp!,{r4,pc}
239.size	aes_hw_set_decrypt_key,.-aes_hw_set_decrypt_key
240.globl	aes_hw_encrypt
241.hidden	aes_hw_encrypt
242.type	aes_hw_encrypt,%function
243.align	5
244aes_hw_encrypt:
245	ldr	r3,[r2,#240]
246	vld1.32	{q0},[r2]!
247	vld1.8	{q2},[r0]
248	sub	r3,r3,#2
249	vld1.32	{q1},[r2]!
250
251.Loop_enc:
252.byte	0x00,0x43,0xb0,0xf3	@ aese q2,q0
253.byte	0x84,0x43,0xb0,0xf3	@ aesmc q2,q2
254	vld1.32	{q0},[r2]!
255	subs	r3,r3,#2
256.byte	0x02,0x43,0xb0,0xf3	@ aese q2,q1
257.byte	0x84,0x43,0xb0,0xf3	@ aesmc q2,q2
258	vld1.32	{q1},[r2]!
259	bgt	.Loop_enc
260
261.byte	0x00,0x43,0xb0,0xf3	@ aese q2,q0
262.byte	0x84,0x43,0xb0,0xf3	@ aesmc q2,q2
263	vld1.32	{q0},[r2]
264.byte	0x02,0x43,0xb0,0xf3	@ aese q2,q1
265	veor	q2,q2,q0
266
267	vst1.8	{q2},[r1]
268	bx	lr
269.size	aes_hw_encrypt,.-aes_hw_encrypt
270.globl	aes_hw_decrypt
271.hidden	aes_hw_decrypt
272.type	aes_hw_decrypt,%function
273.align	5
274aes_hw_decrypt:
275	ldr	r3,[r2,#240]
276	vld1.32	{q0},[r2]!
277	vld1.8	{q2},[r0]
278	sub	r3,r3,#2
279	vld1.32	{q1},[r2]!
280
281.Loop_dec:
282.byte	0x40,0x43,0xb0,0xf3	@ aesd q2,q0
283.byte	0xc4,0x43,0xb0,0xf3	@ aesimc q2,q2
284	vld1.32	{q0},[r2]!
285	subs	r3,r3,#2
286.byte	0x42,0x43,0xb0,0xf3	@ aesd q2,q1
287.byte	0xc4,0x43,0xb0,0xf3	@ aesimc q2,q2
288	vld1.32	{q1},[r2]!
289	bgt	.Loop_dec
290
291.byte	0x40,0x43,0xb0,0xf3	@ aesd q2,q0
292.byte	0xc4,0x43,0xb0,0xf3	@ aesimc q2,q2
293	vld1.32	{q0},[r2]
294.byte	0x42,0x43,0xb0,0xf3	@ aesd q2,q1
295	veor	q2,q2,q0
296
297	vst1.8	{q2},[r1]
298	bx	lr
299.size	aes_hw_decrypt,.-aes_hw_decrypt
300.globl	aes_hw_cbc_encrypt
301.hidden	aes_hw_cbc_encrypt
302.type	aes_hw_cbc_encrypt,%function
303.align	5
304aes_hw_cbc_encrypt:
305	mov	ip,sp
306	stmdb	sp!,{r4,r5,r6,r7,r8,lr}
307	vstmdb	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}            @ ABI specification says so
308	ldmia	ip,{r4,r5}		@ load remaining args
309	subs	r2,r2,#16
310	mov	r8,#16
311	blo	.Lcbc_abort
312	moveq	r8,#0
313
314	cmp	r5,#0			@ en- or decrypting?
315	ldr	r5,[r3,#240]
316	and	r2,r2,#-16
317	vld1.8	{q6},[r4]
318	vld1.8	{q0},[r0],r8
319
320	vld1.32	{q8,q9},[r3]		@ load key schedule...
321	sub	r5,r5,#6
322	add	r7,r3,r5,lsl#4	@ pointer to last 7 round keys
323	sub	r5,r5,#2
324	vld1.32	{q10,q11},[r7]!
325	vld1.32	{q12,q13},[r7]!
326	vld1.32	{q14,q15},[r7]!
327	vld1.32	{q7},[r7]
328
329	add	r7,r3,#32
330	mov	r6,r5
331	beq	.Lcbc_dec
332
333	cmp	r5,#2
334	veor	q0,q0,q6
335	veor	q5,q8,q7
336	beq	.Lcbc_enc128
337
338	vld1.32	{q2,q3},[r7]
339	add	r7,r3,#16
340	add	r6,r3,#16*4
341	add	r12,r3,#16*5
342.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
343.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
344	add	r14,r3,#16*6
345	add	r3,r3,#16*7
346	b	.Lenter_cbc_enc
347
348.align	4
349.Loop_cbc_enc:
350.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
351.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
352	vst1.8	{q6},[r1]!
353.Lenter_cbc_enc:
354.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
355.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
356.byte	0x04,0x03,0xb0,0xf3	@ aese q0,q2
357.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
358	vld1.32	{q8},[r6]
359	cmp	r5,#4
360.byte	0x06,0x03,0xb0,0xf3	@ aese q0,q3
361.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
362	vld1.32	{q9},[r12]
363	beq	.Lcbc_enc192
364
365.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
366.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
367	vld1.32	{q8},[r14]
368.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
369.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
370	vld1.32	{q9},[r3]
371	nop
372
373.Lcbc_enc192:
374.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
375.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
376	subs	r2,r2,#16
377.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
378.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
379	moveq	r8,#0
380.byte	0x24,0x03,0xb0,0xf3	@ aese q0,q10
381.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
382.byte	0x26,0x03,0xb0,0xf3	@ aese q0,q11
383.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
384	vld1.8	{q8},[r0],r8
385.byte	0x28,0x03,0xb0,0xf3	@ aese q0,q12
386.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
387	veor	q8,q8,q5
388.byte	0x2a,0x03,0xb0,0xf3	@ aese q0,q13
389.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
390	vld1.32	{q9},[r7]		@ re-pre-load rndkey[1]
391.byte	0x2c,0x03,0xb0,0xf3	@ aese q0,q14
392.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
393.byte	0x2e,0x03,0xb0,0xf3	@ aese q0,q15
394	veor	q6,q0,q7
395	bhs	.Loop_cbc_enc
396
397	vst1.8	{q6},[r1]!
398	b	.Lcbc_done
399
400.align	5
401.Lcbc_enc128:
402	vld1.32	{q2,q3},[r7]
403.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
404.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
405	b	.Lenter_cbc_enc128
406.Loop_cbc_enc128:
407.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
408.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
409	vst1.8	{q6},[r1]!
410.Lenter_cbc_enc128:
411.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
412.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
413	subs	r2,r2,#16
414.byte	0x04,0x03,0xb0,0xf3	@ aese q0,q2
415.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
416	moveq	r8,#0
417.byte	0x06,0x03,0xb0,0xf3	@ aese q0,q3
418.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
419.byte	0x24,0x03,0xb0,0xf3	@ aese q0,q10
420.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
421.byte	0x26,0x03,0xb0,0xf3	@ aese q0,q11
422.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
423	vld1.8	{q8},[r0],r8
424.byte	0x28,0x03,0xb0,0xf3	@ aese q0,q12
425.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
426.byte	0x2a,0x03,0xb0,0xf3	@ aese q0,q13
427.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
428.byte	0x2c,0x03,0xb0,0xf3	@ aese q0,q14
429.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
430	veor	q8,q8,q5
431.byte	0x2e,0x03,0xb0,0xf3	@ aese q0,q15
432	veor	q6,q0,q7
433	bhs	.Loop_cbc_enc128
434
435	vst1.8	{q6},[r1]!
436	b	.Lcbc_done
437.align	5
438.Lcbc_dec:
439	vld1.8	{q10},[r0]!
440	subs	r2,r2,#32		@ bias
441	add	r6,r5,#2
442	vorr	q3,q0,q0
443	vorr	q1,q0,q0
444	vorr	q11,q10,q10
445	blo	.Lcbc_dec_tail
446
447	vorr	q1,q10,q10
448	vld1.8	{q10},[r0]!
449	vorr	q2,q0,q0
450	vorr	q3,q1,q1
451	vorr	q11,q10,q10
452
453.Loop3x_cbc_dec:
454.byte	0x60,0x03,0xb0,0xf3	@ aesd q0,q8
455.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
456.byte	0x60,0x23,0xb0,0xf3	@ aesd q1,q8
457.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
458.byte	0x60,0x43,0xf0,0xf3	@ aesd q10,q8
459.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
460	vld1.32	{q8},[r7]!
461	subs	r6,r6,#2
462.byte	0x62,0x03,0xb0,0xf3	@ aesd q0,q9
463.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
464.byte	0x62,0x23,0xb0,0xf3	@ aesd q1,q9
465.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
466.byte	0x62,0x43,0xf0,0xf3	@ aesd q10,q9
467.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
468	vld1.32	{q9},[r7]!
469	bgt	.Loop3x_cbc_dec
470
471.byte	0x60,0x03,0xb0,0xf3	@ aesd q0,q8
472.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
473.byte	0x60,0x23,0xb0,0xf3	@ aesd q1,q8
474.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
475.byte	0x60,0x43,0xf0,0xf3	@ aesd q10,q8
476.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
477	veor	q4,q6,q7
478	subs	r2,r2,#0x30
479	veor	q5,q2,q7
480	movlo	r6,r2			@ r6, r6, is zero at this point
481.byte	0x62,0x03,0xb0,0xf3	@ aesd q0,q9
482.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
483.byte	0x62,0x23,0xb0,0xf3	@ aesd q1,q9
484.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
485.byte	0x62,0x43,0xf0,0xf3	@ aesd q10,q9
486.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
487	veor	q9,q3,q7
488	add	r0,r0,r6		@ r0 is adjusted in such way that
489					@ at exit from the loop q1-q10
490					@ are loaded with last "words"
491	vorr	q6,q11,q11
492	mov	r7,r3
493.byte	0x68,0x03,0xb0,0xf3	@ aesd q0,q12
494.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
495.byte	0x68,0x23,0xb0,0xf3	@ aesd q1,q12
496.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
497.byte	0x68,0x43,0xf0,0xf3	@ aesd q10,q12
498.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
499	vld1.8	{q2},[r0]!
500.byte	0x6a,0x03,0xb0,0xf3	@ aesd q0,q13
501.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
502.byte	0x6a,0x23,0xb0,0xf3	@ aesd q1,q13
503.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
504.byte	0x6a,0x43,0xf0,0xf3	@ aesd q10,q13
505.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
506	vld1.8	{q3},[r0]!
507.byte	0x6c,0x03,0xb0,0xf3	@ aesd q0,q14
508.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
509.byte	0x6c,0x23,0xb0,0xf3	@ aesd q1,q14
510.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
511.byte	0x6c,0x43,0xf0,0xf3	@ aesd q10,q14
512.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
513	vld1.8	{q11},[r0]!
514.byte	0x6e,0x03,0xb0,0xf3	@ aesd q0,q15
515.byte	0x6e,0x23,0xb0,0xf3	@ aesd q1,q15
516.byte	0x6e,0x43,0xf0,0xf3	@ aesd q10,q15
517	vld1.32	{q8},[r7]!	@ re-pre-load rndkey[0]
518	add	r6,r5,#2
519	veor	q4,q4,q0
520	veor	q5,q5,q1
521	veor	q10,q10,q9
522	vld1.32	{q9},[r7]!	@ re-pre-load rndkey[1]
523	vst1.8	{q4},[r1]!
524	vorr	q0,q2,q2
525	vst1.8	{q5},[r1]!
526	vorr	q1,q3,q3
527	vst1.8	{q10},[r1]!
528	vorr	q10,q11,q11
529	bhs	.Loop3x_cbc_dec
530
531	cmn	r2,#0x30
532	beq	.Lcbc_done
533	nop
534
535.Lcbc_dec_tail:
536.byte	0x60,0x23,0xb0,0xf3	@ aesd q1,q8
537.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
538.byte	0x60,0x43,0xf0,0xf3	@ aesd q10,q8
539.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
540	vld1.32	{q8},[r7]!
541	subs	r6,r6,#2
542.byte	0x62,0x23,0xb0,0xf3	@ aesd q1,q9
543.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
544.byte	0x62,0x43,0xf0,0xf3	@ aesd q10,q9
545.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
546	vld1.32	{q9},[r7]!
547	bgt	.Lcbc_dec_tail
548
549.byte	0x60,0x23,0xb0,0xf3	@ aesd q1,q8
550.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
551.byte	0x60,0x43,0xf0,0xf3	@ aesd q10,q8
552.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
553.byte	0x62,0x23,0xb0,0xf3	@ aesd q1,q9
554.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
555.byte	0x62,0x43,0xf0,0xf3	@ aesd q10,q9
556.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
557.byte	0x68,0x23,0xb0,0xf3	@ aesd q1,q12
558.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
559.byte	0x68,0x43,0xf0,0xf3	@ aesd q10,q12
560.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
561	cmn	r2,#0x20
562.byte	0x6a,0x23,0xb0,0xf3	@ aesd q1,q13
563.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
564.byte	0x6a,0x43,0xf0,0xf3	@ aesd q10,q13
565.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
566	veor	q5,q6,q7
567.byte	0x6c,0x23,0xb0,0xf3	@ aesd q1,q14
568.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
569.byte	0x6c,0x43,0xf0,0xf3	@ aesd q10,q14
570.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
571	veor	q9,q3,q7
572.byte	0x6e,0x23,0xb0,0xf3	@ aesd q1,q15
573.byte	0x6e,0x43,0xf0,0xf3	@ aesd q10,q15
574	beq	.Lcbc_dec_one
575	veor	q5,q5,q1
576	veor	q9,q9,q10
577	vorr	q6,q11,q11
578	vst1.8	{q5},[r1]!
579	vst1.8	{q9},[r1]!
580	b	.Lcbc_done
581
582.Lcbc_dec_one:
583	veor	q5,q5,q10
584	vorr	q6,q11,q11
585	vst1.8	{q5},[r1]!
586
587.Lcbc_done:
588	vst1.8	{q6},[r4]
589.Lcbc_abort:
590	vldmia	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}
591	ldmia	sp!,{r4,r5,r6,r7,r8,pc}
592.size	aes_hw_cbc_encrypt,.-aes_hw_cbc_encrypt
593.globl	aes_hw_ctr32_encrypt_blocks
594.hidden	aes_hw_ctr32_encrypt_blocks
595.type	aes_hw_ctr32_encrypt_blocks,%function
596.align	5
597aes_hw_ctr32_encrypt_blocks:
598	mov	ip,sp
599	stmdb	sp!,{r4,r5,r6,r7,r8,r9,r10,lr}
600	vstmdb	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}            @ ABI specification says so
601	ldr	r4, [ip]		@ load remaining arg
602	ldr	r5,[r3,#240]
603
604	ldr	r8, [r4, #12]
605	vld1.32	{q0},[r4]
606
607	vld1.32	{q8,q9},[r3]		@ load key schedule...
608	sub	r5,r5,#4
609	mov	r12,#16
610	cmp	r2,#2
611	add	r7,r3,r5,lsl#4	@ pointer to last 5 round keys
612	sub	r5,r5,#2
613	vld1.32	{q12,q13},[r7]!
614	vld1.32	{q14,q15},[r7]!
615	vld1.32	{q7},[r7]
616	add	r7,r3,#32
617	mov	r6,r5
618	movlo	r12,#0
619#ifndef __ARMEB__
620	rev	r8, r8
621#endif
622	vorr	q1,q0,q0
623	add	r10, r8, #1
624	vorr	q10,q0,q0
625	add	r8, r8, #2
626	vorr	q6,q0,q0
627	rev	r10, r10
628	vmov.32	d3[1],r10
629	bls	.Lctr32_tail
630	rev	r12, r8
631	sub	r2,r2,#3		@ bias
632	vmov.32	d21[1],r12
633	b	.Loop3x_ctr32
634
635.align	4
636.Loop3x_ctr32:
637.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
638.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
639.byte	0x20,0x23,0xb0,0xf3	@ aese q1,q8
640.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
641.byte	0x20,0x43,0xf0,0xf3	@ aese q10,q8
642.byte	0xa4,0x43,0xf0,0xf3	@ aesmc q10,q10
643	vld1.32	{q8},[r7]!
644	subs	r6,r6,#2
645.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
646.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
647.byte	0x22,0x23,0xb0,0xf3	@ aese q1,q9
648.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
649.byte	0x22,0x43,0xf0,0xf3	@ aese q10,q9
650.byte	0xa4,0x43,0xf0,0xf3	@ aesmc q10,q10
651	vld1.32	{q9},[r7]!
652	bgt	.Loop3x_ctr32
653
654.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
655.byte	0x80,0x83,0xb0,0xf3	@ aesmc q4,q0
656.byte	0x20,0x23,0xb0,0xf3	@ aese q1,q8
657.byte	0x82,0xa3,0xb0,0xf3	@ aesmc q5,q1
658	vld1.8	{q2},[r0]!
659	vorr	q0,q6,q6
660.byte	0x20,0x43,0xf0,0xf3	@ aese q10,q8
661.byte	0xa4,0x43,0xf0,0xf3	@ aesmc q10,q10
662	vld1.8	{q3},[r0]!
663	vorr	q1,q6,q6
664.byte	0x22,0x83,0xb0,0xf3	@ aese q4,q9
665.byte	0x88,0x83,0xb0,0xf3	@ aesmc q4,q4
666.byte	0x22,0xa3,0xb0,0xf3	@ aese q5,q9
667.byte	0x8a,0xa3,0xb0,0xf3	@ aesmc q5,q5
668	vld1.8	{q11},[r0]!
669	mov	r7,r3
670.byte	0x22,0x43,0xf0,0xf3	@ aese q10,q9
671.byte	0xa4,0x23,0xf0,0xf3	@ aesmc q9,q10
672	vorr	q10,q6,q6
673	add	r9,r8,#1
674.byte	0x28,0x83,0xb0,0xf3	@ aese q4,q12
675.byte	0x88,0x83,0xb0,0xf3	@ aesmc q4,q4
676.byte	0x28,0xa3,0xb0,0xf3	@ aese q5,q12
677.byte	0x8a,0xa3,0xb0,0xf3	@ aesmc q5,q5
678	veor	q2,q2,q7
679	add	r10,r8,#2
680.byte	0x28,0x23,0xf0,0xf3	@ aese q9,q12
681.byte	0xa2,0x23,0xf0,0xf3	@ aesmc q9,q9
682	veor	q3,q3,q7
683	add	r8,r8,#3
684.byte	0x2a,0x83,0xb0,0xf3	@ aese q4,q13
685.byte	0x88,0x83,0xb0,0xf3	@ aesmc q4,q4
686.byte	0x2a,0xa3,0xb0,0xf3	@ aese q5,q13
687.byte	0x8a,0xa3,0xb0,0xf3	@ aesmc q5,q5
688	veor	q11,q11,q7
689	rev	r9,r9
690.byte	0x2a,0x23,0xf0,0xf3	@ aese q9,q13
691.byte	0xa2,0x23,0xf0,0xf3	@ aesmc q9,q9
692	vmov.32	d1[1], r9
693	rev	r10,r10
694.byte	0x2c,0x83,0xb0,0xf3	@ aese q4,q14
695.byte	0x88,0x83,0xb0,0xf3	@ aesmc q4,q4
696.byte	0x2c,0xa3,0xb0,0xf3	@ aese q5,q14
697.byte	0x8a,0xa3,0xb0,0xf3	@ aesmc q5,q5
698	vmov.32	d3[1], r10
699	rev	r12,r8
700.byte	0x2c,0x23,0xf0,0xf3	@ aese q9,q14
701.byte	0xa2,0x23,0xf0,0xf3	@ aesmc q9,q9
702	vmov.32	d21[1], r12
703	subs	r2,r2,#3
704.byte	0x2e,0x83,0xb0,0xf3	@ aese q4,q15
705.byte	0x2e,0xa3,0xb0,0xf3	@ aese q5,q15
706.byte	0x2e,0x23,0xf0,0xf3	@ aese q9,q15
707
708	veor	q2,q2,q4
709	vld1.32	{q8},[r7]!	@ re-pre-load rndkey[0]
710	vst1.8	{q2},[r1]!
711	veor	q3,q3,q5
712	mov	r6,r5
713	vst1.8	{q3},[r1]!
714	veor	q11,q11,q9
715	vld1.32	{q9},[r7]!	@ re-pre-load rndkey[1]
716	vst1.8	{q11},[r1]!
717	bhs	.Loop3x_ctr32
718
719	adds	r2,r2,#3
720	beq	.Lctr32_done
721	cmp	r2,#1
722	mov	r12,#16
723	moveq	r12,#0
724
725.Lctr32_tail:
726.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
727.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
728.byte	0x20,0x23,0xb0,0xf3	@ aese q1,q8
729.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
730	vld1.32	{q8},[r7]!
731	subs	r6,r6,#2
732.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
733.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
734.byte	0x22,0x23,0xb0,0xf3	@ aese q1,q9
735.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
736	vld1.32	{q9},[r7]!
737	bgt	.Lctr32_tail
738
739.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
740.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
741.byte	0x20,0x23,0xb0,0xf3	@ aese q1,q8
742.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
743.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
744.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
745.byte	0x22,0x23,0xb0,0xf3	@ aese q1,q9
746.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
747	vld1.8	{q2},[r0],r12
748.byte	0x28,0x03,0xb0,0xf3	@ aese q0,q12
749.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
750.byte	0x28,0x23,0xb0,0xf3	@ aese q1,q12
751.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
752	vld1.8	{q3},[r0]
753.byte	0x2a,0x03,0xb0,0xf3	@ aese q0,q13
754.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
755.byte	0x2a,0x23,0xb0,0xf3	@ aese q1,q13
756.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
757	veor	q2,q2,q7
758.byte	0x2c,0x03,0xb0,0xf3	@ aese q0,q14
759.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
760.byte	0x2c,0x23,0xb0,0xf3	@ aese q1,q14
761.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
762	veor	q3,q3,q7
763.byte	0x2e,0x03,0xb0,0xf3	@ aese q0,q15
764.byte	0x2e,0x23,0xb0,0xf3	@ aese q1,q15
765
766	cmp	r2,#1
767	veor	q2,q2,q0
768	veor	q3,q3,q1
769	vst1.8	{q2},[r1]!
770	beq	.Lctr32_done
771	vst1.8	{q3},[r1]
772
773.Lctr32_done:
774	vldmia	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}
775	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,pc}
776.size	aes_hw_ctr32_encrypt_blocks,.-aes_hw_ctr32_encrypt_blocks
777#endif
778#endif
779#endif  // !OPENSSL_NO_ASM
780