• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1// This file is generated from a similarly-named Perl script in the BoringSSL
2// source tree. Do not edit by hand.
3
4#if !defined(__has_feature)
5#define __has_feature(x) 0
6#endif
7#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
8#define OPENSSL_NO_ASM
9#endif
10
11#if !defined(OPENSSL_NO_ASM) && defined(__ARMEL__) && defined(__ELF__)
12#if defined(BORINGSSL_PREFIX)
13#include <boringssl_prefix_symbols_asm.h>
14#endif
15#include <openssl/arm_arch.h>
16
17#if __ARM_MAX_ARCH__>=7
18.text
19.arch	armv7-a	@ don't confuse not-so-latest binutils with argv8 :-)
20.fpu	neon
21.code	32
22#undef	__thumb2__
23.align	5
24.Lrcon:
25.long	0x01,0x01,0x01,0x01
26.long	0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d	@ rotate-n-splat
27.long	0x1b,0x1b,0x1b,0x1b
28
29.text
30
31.globl	aes_hw_set_encrypt_key
32.hidden	aes_hw_set_encrypt_key
33.type	aes_hw_set_encrypt_key,%function
34.align	5
35aes_hw_set_encrypt_key:
36.Lenc_key:
37	mov	r3,#-1
38	cmp	r0,#0
39	beq	.Lenc_key_abort
40	cmp	r2,#0
41	beq	.Lenc_key_abort
42	mov	r3,#-2
43	cmp	r1,#128
44	blt	.Lenc_key_abort
45	cmp	r1,#256
46	bgt	.Lenc_key_abort
47	tst	r1,#0x3f
48	bne	.Lenc_key_abort
49
50	adr	r3,.Lrcon
51	cmp	r1,#192
52
53	veor	q0,q0,q0
54	vld1.8	{q3},[r0]!
55	mov	r1,#8		@ reuse r1
56	vld1.32	{q1,q2},[r3]!
57
58	blt	.Loop128
59	beq	.L192
60	b	.L256
61
62.align	4
63.Loop128:
64	vtbl.8	d20,{q3},d4
65	vtbl.8	d21,{q3},d5
66	vext.8	q9,q0,q3,#12
67	vst1.32	{q3},[r2]!
68.byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
69	subs	r1,r1,#1
70
71	veor	q3,q3,q9
72	vext.8	q9,q0,q9,#12
73	veor	q3,q3,q9
74	vext.8	q9,q0,q9,#12
75	veor	q10,q10,q1
76	veor	q3,q3,q9
77	vshl.u8	q1,q1,#1
78	veor	q3,q3,q10
79	bne	.Loop128
80
81	vld1.32	{q1},[r3]
82
83	vtbl.8	d20,{q3},d4
84	vtbl.8	d21,{q3},d5
85	vext.8	q9,q0,q3,#12
86	vst1.32	{q3},[r2]!
87.byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
88
89	veor	q3,q3,q9
90	vext.8	q9,q0,q9,#12
91	veor	q3,q3,q9
92	vext.8	q9,q0,q9,#12
93	veor	q10,q10,q1
94	veor	q3,q3,q9
95	vshl.u8	q1,q1,#1
96	veor	q3,q3,q10
97
98	vtbl.8	d20,{q3},d4
99	vtbl.8	d21,{q3},d5
100	vext.8	q9,q0,q3,#12
101	vst1.32	{q3},[r2]!
102.byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
103
104	veor	q3,q3,q9
105	vext.8	q9,q0,q9,#12
106	veor	q3,q3,q9
107	vext.8	q9,q0,q9,#12
108	veor	q10,q10,q1
109	veor	q3,q3,q9
110	veor	q3,q3,q10
111	vst1.32	{q3},[r2]
112	add	r2,r2,#0x50
113
114	mov	r12,#10
115	b	.Ldone
116
117.align	4
118.L192:
119	vld1.8	{d16},[r0]!
120	vmov.i8	q10,#8			@ borrow q10
121	vst1.32	{q3},[r2]!
122	vsub.i8	q2,q2,q10	@ adjust the mask
123
124.Loop192:
125	vtbl.8	d20,{q8},d4
126	vtbl.8	d21,{q8},d5
127	vext.8	q9,q0,q3,#12
128	vst1.32	{d16},[r2]!
129.byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
130	subs	r1,r1,#1
131
132	veor	q3,q3,q9
133	vext.8	q9,q0,q9,#12
134	veor	q3,q3,q9
135	vext.8	q9,q0,q9,#12
136	veor	q3,q3,q9
137
138	vdup.32	q9,d7[1]
139	veor	q9,q9,q8
140	veor	q10,q10,q1
141	vext.8	q8,q0,q8,#12
142	vshl.u8	q1,q1,#1
143	veor	q8,q8,q9
144	veor	q3,q3,q10
145	veor	q8,q8,q10
146	vst1.32	{q3},[r2]!
147	bne	.Loop192
148
149	mov	r12,#12
150	add	r2,r2,#0x20
151	b	.Ldone
152
153.align	4
154.L256:
155	vld1.8	{q8},[r0]
156	mov	r1,#7
157	mov	r12,#14
158	vst1.32	{q3},[r2]!
159
160.Loop256:
161	vtbl.8	d20,{q8},d4
162	vtbl.8	d21,{q8},d5
163	vext.8	q9,q0,q3,#12
164	vst1.32	{q8},[r2]!
165.byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
166	subs	r1,r1,#1
167
168	veor	q3,q3,q9
169	vext.8	q9,q0,q9,#12
170	veor	q3,q3,q9
171	vext.8	q9,q0,q9,#12
172	veor	q10,q10,q1
173	veor	q3,q3,q9
174	vshl.u8	q1,q1,#1
175	veor	q3,q3,q10
176	vst1.32	{q3},[r2]!
177	beq	.Ldone
178
179	vdup.32	q10,d7[1]
180	vext.8	q9,q0,q8,#12
181.byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
182
183	veor	q8,q8,q9
184	vext.8	q9,q0,q9,#12
185	veor	q8,q8,q9
186	vext.8	q9,q0,q9,#12
187	veor	q8,q8,q9
188
189	veor	q8,q8,q10
190	b	.Loop256
191
192.Ldone:
193	str	r12,[r2]
194	mov	r3,#0
195
196.Lenc_key_abort:
197	mov	r0,r3			@ return value
198
199	bx	lr
200.size	aes_hw_set_encrypt_key,.-aes_hw_set_encrypt_key
201
202.globl	aes_hw_set_decrypt_key
203.hidden	aes_hw_set_decrypt_key
204.type	aes_hw_set_decrypt_key,%function
205.align	5
206aes_hw_set_decrypt_key:
207	stmdb	sp!,{r4,lr}
208	bl	.Lenc_key
209
210	cmp	r0,#0
211	bne	.Ldec_key_abort
212
213	sub	r2,r2,#240		@ restore original r2
214	mov	r4,#-16
215	add	r0,r2,r12,lsl#4	@ end of key schedule
216
217	vld1.32	{q0},[r2]
218	vld1.32	{q1},[r0]
219	vst1.32	{q0},[r0],r4
220	vst1.32	{q1},[r2]!
221
222.Loop_imc:
223	vld1.32	{q0},[r2]
224	vld1.32	{q1},[r0]
225.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
226.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
227	vst1.32	{q0},[r0],r4
228	vst1.32	{q1},[r2]!
229	cmp	r0,r2
230	bhi	.Loop_imc
231
232	vld1.32	{q0},[r2]
233.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
234	vst1.32	{q0},[r0]
235
236	eor	r0,r0,r0		@ return value
237.Ldec_key_abort:
238	ldmia	sp!,{r4,pc}
239.size	aes_hw_set_decrypt_key,.-aes_hw_set_decrypt_key
240.globl	aes_hw_encrypt
241.hidden	aes_hw_encrypt
242.type	aes_hw_encrypt,%function
243.align	5
244aes_hw_encrypt:
245	AARCH64_VALID_CALL_TARGET
246	ldr	r3,[r2,#240]
247	vld1.32	{q0},[r2]!
248	vld1.8	{q2},[r0]
249	sub	r3,r3,#2
250	vld1.32	{q1},[r2]!
251
252.Loop_enc:
253.byte	0x00,0x43,0xb0,0xf3	@ aese q2,q0
254.byte	0x84,0x43,0xb0,0xf3	@ aesmc q2,q2
255	vld1.32	{q0},[r2]!
256	subs	r3,r3,#2
257.byte	0x02,0x43,0xb0,0xf3	@ aese q2,q1
258.byte	0x84,0x43,0xb0,0xf3	@ aesmc q2,q2
259	vld1.32	{q1},[r2]!
260	bgt	.Loop_enc
261
262.byte	0x00,0x43,0xb0,0xf3	@ aese q2,q0
263.byte	0x84,0x43,0xb0,0xf3	@ aesmc q2,q2
264	vld1.32	{q0},[r2]
265.byte	0x02,0x43,0xb0,0xf3	@ aese q2,q1
266	veor	q2,q2,q0
267
268	vst1.8	{q2},[r1]
269	bx	lr
270.size	aes_hw_encrypt,.-aes_hw_encrypt
271.globl	aes_hw_decrypt
272.hidden	aes_hw_decrypt
273.type	aes_hw_decrypt,%function
274.align	5
275aes_hw_decrypt:
276	AARCH64_VALID_CALL_TARGET
277	ldr	r3,[r2,#240]
278	vld1.32	{q0},[r2]!
279	vld1.8	{q2},[r0]
280	sub	r3,r3,#2
281	vld1.32	{q1},[r2]!
282
283.Loop_dec:
284.byte	0x40,0x43,0xb0,0xf3	@ aesd q2,q0
285.byte	0xc4,0x43,0xb0,0xf3	@ aesimc q2,q2
286	vld1.32	{q0},[r2]!
287	subs	r3,r3,#2
288.byte	0x42,0x43,0xb0,0xf3	@ aesd q2,q1
289.byte	0xc4,0x43,0xb0,0xf3	@ aesimc q2,q2
290	vld1.32	{q1},[r2]!
291	bgt	.Loop_dec
292
293.byte	0x40,0x43,0xb0,0xf3	@ aesd q2,q0
294.byte	0xc4,0x43,0xb0,0xf3	@ aesimc q2,q2
295	vld1.32	{q0},[r2]
296.byte	0x42,0x43,0xb0,0xf3	@ aesd q2,q1
297	veor	q2,q2,q0
298
299	vst1.8	{q2},[r1]
300	bx	lr
301.size	aes_hw_decrypt,.-aes_hw_decrypt
302.globl	aes_hw_cbc_encrypt
303.hidden	aes_hw_cbc_encrypt
304.type	aes_hw_cbc_encrypt,%function
305.align	5
306aes_hw_cbc_encrypt:
307	mov	ip,sp
308	stmdb	sp!,{r4,r5,r6,r7,r8,lr}
309	vstmdb	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}            @ ABI specification says so
310	ldmia	ip,{r4,r5}		@ load remaining args
311	subs	r2,r2,#16
312	mov	r8,#16
313	blo	.Lcbc_abort
314	moveq	r8,#0
315
316	cmp	r5,#0			@ en- or decrypting?
317	ldr	r5,[r3,#240]
318	and	r2,r2,#-16
319	vld1.8	{q6},[r4]
320	vld1.8	{q0},[r0],r8
321
322	vld1.32	{q8,q9},[r3]		@ load key schedule...
323	sub	r5,r5,#6
324	add	r7,r3,r5,lsl#4	@ pointer to last 7 round keys
325	sub	r5,r5,#2
326	vld1.32	{q10,q11},[r7]!
327	vld1.32	{q12,q13},[r7]!
328	vld1.32	{q14,q15},[r7]!
329	vld1.32	{q7},[r7]
330
331	add	r7,r3,#32
332	mov	r6,r5
333	beq	.Lcbc_dec
334
335	cmp	r5,#2
336	veor	q0,q0,q6
337	veor	q5,q8,q7
338	beq	.Lcbc_enc128
339
340	vld1.32	{q2,q3},[r7]
341	add	r7,r3,#16
342	add	r6,r3,#16*4
343	add	r12,r3,#16*5
344.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
345.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
346	add	r14,r3,#16*6
347	add	r3,r3,#16*7
348	b	.Lenter_cbc_enc
349
350.align	4
351.Loop_cbc_enc:
352.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
353.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
354	vst1.8	{q6},[r1]!
355.Lenter_cbc_enc:
356.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
357.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
358.byte	0x04,0x03,0xb0,0xf3	@ aese q0,q2
359.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
360	vld1.32	{q8},[r6]
361	cmp	r5,#4
362.byte	0x06,0x03,0xb0,0xf3	@ aese q0,q3
363.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
364	vld1.32	{q9},[r12]
365	beq	.Lcbc_enc192
366
367.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
368.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
369	vld1.32	{q8},[r14]
370.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
371.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
372	vld1.32	{q9},[r3]
373	nop
374
375.Lcbc_enc192:
376.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
377.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
378	subs	r2,r2,#16
379.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
380.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
381	moveq	r8,#0
382.byte	0x24,0x03,0xb0,0xf3	@ aese q0,q10
383.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
384.byte	0x26,0x03,0xb0,0xf3	@ aese q0,q11
385.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
386	vld1.8	{q8},[r0],r8
387.byte	0x28,0x03,0xb0,0xf3	@ aese q0,q12
388.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
389	veor	q8,q8,q5
390.byte	0x2a,0x03,0xb0,0xf3	@ aese q0,q13
391.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
392	vld1.32	{q9},[r7]		@ re-pre-load rndkey[1]
393.byte	0x2c,0x03,0xb0,0xf3	@ aese q0,q14
394.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
395.byte	0x2e,0x03,0xb0,0xf3	@ aese q0,q15
396	veor	q6,q0,q7
397	bhs	.Loop_cbc_enc
398
399	vst1.8	{q6},[r1]!
400	b	.Lcbc_done
401
402.align	5
403.Lcbc_enc128:
404	vld1.32	{q2,q3},[r7]
405.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
406.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
407	b	.Lenter_cbc_enc128
408.Loop_cbc_enc128:
409.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
410.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
411	vst1.8	{q6},[r1]!
412.Lenter_cbc_enc128:
413.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
414.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
415	subs	r2,r2,#16
416.byte	0x04,0x03,0xb0,0xf3	@ aese q0,q2
417.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
418	moveq	r8,#0
419.byte	0x06,0x03,0xb0,0xf3	@ aese q0,q3
420.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
421.byte	0x24,0x03,0xb0,0xf3	@ aese q0,q10
422.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
423.byte	0x26,0x03,0xb0,0xf3	@ aese q0,q11
424.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
425	vld1.8	{q8},[r0],r8
426.byte	0x28,0x03,0xb0,0xf3	@ aese q0,q12
427.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
428.byte	0x2a,0x03,0xb0,0xf3	@ aese q0,q13
429.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
430.byte	0x2c,0x03,0xb0,0xf3	@ aese q0,q14
431.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
432	veor	q8,q8,q5
433.byte	0x2e,0x03,0xb0,0xf3	@ aese q0,q15
434	veor	q6,q0,q7
435	bhs	.Loop_cbc_enc128
436
437	vst1.8	{q6},[r1]!
438	b	.Lcbc_done
439.align	5
440.Lcbc_dec:
441	vld1.8	{q10},[r0]!
442	subs	r2,r2,#32		@ bias
443	add	r6,r5,#2
444	vorr	q3,q0,q0
445	vorr	q1,q0,q0
446	vorr	q11,q10,q10
447	blo	.Lcbc_dec_tail
448
449	vorr	q1,q10,q10
450	vld1.8	{q10},[r0]!
451	vorr	q2,q0,q0
452	vorr	q3,q1,q1
453	vorr	q11,q10,q10
454
455.Loop3x_cbc_dec:
456.byte	0x60,0x03,0xb0,0xf3	@ aesd q0,q8
457.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
458.byte	0x60,0x23,0xb0,0xf3	@ aesd q1,q8
459.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
460.byte	0x60,0x43,0xf0,0xf3	@ aesd q10,q8
461.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
462	vld1.32	{q8},[r7]!
463	subs	r6,r6,#2
464.byte	0x62,0x03,0xb0,0xf3	@ aesd q0,q9
465.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
466.byte	0x62,0x23,0xb0,0xf3	@ aesd q1,q9
467.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
468.byte	0x62,0x43,0xf0,0xf3	@ aesd q10,q9
469.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
470	vld1.32	{q9},[r7]!
471	bgt	.Loop3x_cbc_dec
472
473.byte	0x60,0x03,0xb0,0xf3	@ aesd q0,q8
474.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
475.byte	0x60,0x23,0xb0,0xf3	@ aesd q1,q8
476.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
477.byte	0x60,0x43,0xf0,0xf3	@ aesd q10,q8
478.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
479	veor	q4,q6,q7
480	subs	r2,r2,#0x30
481	veor	q5,q2,q7
482	movlo	r6,r2			@ r6, r6, is zero at this point
483.byte	0x62,0x03,0xb0,0xf3	@ aesd q0,q9
484.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
485.byte	0x62,0x23,0xb0,0xf3	@ aesd q1,q9
486.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
487.byte	0x62,0x43,0xf0,0xf3	@ aesd q10,q9
488.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
489	veor	q9,q3,q7
490	add	r0,r0,r6		@ r0 is adjusted in such way that
491					@ at exit from the loop q1-q10
492					@ are loaded with last "words"
493	vorr	q6,q11,q11
494	mov	r7,r3
495.byte	0x68,0x03,0xb0,0xf3	@ aesd q0,q12
496.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
497.byte	0x68,0x23,0xb0,0xf3	@ aesd q1,q12
498.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
499.byte	0x68,0x43,0xf0,0xf3	@ aesd q10,q12
500.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
501	vld1.8	{q2},[r0]!
502.byte	0x6a,0x03,0xb0,0xf3	@ aesd q0,q13
503.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
504.byte	0x6a,0x23,0xb0,0xf3	@ aesd q1,q13
505.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
506.byte	0x6a,0x43,0xf0,0xf3	@ aesd q10,q13
507.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
508	vld1.8	{q3},[r0]!
509.byte	0x6c,0x03,0xb0,0xf3	@ aesd q0,q14
510.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
511.byte	0x6c,0x23,0xb0,0xf3	@ aesd q1,q14
512.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
513.byte	0x6c,0x43,0xf0,0xf3	@ aesd q10,q14
514.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
515	vld1.8	{q11},[r0]!
516.byte	0x6e,0x03,0xb0,0xf3	@ aesd q0,q15
517.byte	0x6e,0x23,0xb0,0xf3	@ aesd q1,q15
518.byte	0x6e,0x43,0xf0,0xf3	@ aesd q10,q15
519	vld1.32	{q8},[r7]!	@ re-pre-load rndkey[0]
520	add	r6,r5,#2
521	veor	q4,q4,q0
522	veor	q5,q5,q1
523	veor	q10,q10,q9
524	vld1.32	{q9},[r7]!	@ re-pre-load rndkey[1]
525	vst1.8	{q4},[r1]!
526	vorr	q0,q2,q2
527	vst1.8	{q5},[r1]!
528	vorr	q1,q3,q3
529	vst1.8	{q10},[r1]!
530	vorr	q10,q11,q11
531	bhs	.Loop3x_cbc_dec
532
533	cmn	r2,#0x30
534	beq	.Lcbc_done
535	nop
536
537.Lcbc_dec_tail:
538.byte	0x60,0x23,0xb0,0xf3	@ aesd q1,q8
539.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
540.byte	0x60,0x43,0xf0,0xf3	@ aesd q10,q8
541.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
542	vld1.32	{q8},[r7]!
543	subs	r6,r6,#2
544.byte	0x62,0x23,0xb0,0xf3	@ aesd q1,q9
545.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
546.byte	0x62,0x43,0xf0,0xf3	@ aesd q10,q9
547.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
548	vld1.32	{q9},[r7]!
549	bgt	.Lcbc_dec_tail
550
551.byte	0x60,0x23,0xb0,0xf3	@ aesd q1,q8
552.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
553.byte	0x60,0x43,0xf0,0xf3	@ aesd q10,q8
554.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
555.byte	0x62,0x23,0xb0,0xf3	@ aesd q1,q9
556.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
557.byte	0x62,0x43,0xf0,0xf3	@ aesd q10,q9
558.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
559.byte	0x68,0x23,0xb0,0xf3	@ aesd q1,q12
560.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
561.byte	0x68,0x43,0xf0,0xf3	@ aesd q10,q12
562.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
563	cmn	r2,#0x20
564.byte	0x6a,0x23,0xb0,0xf3	@ aesd q1,q13
565.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
566.byte	0x6a,0x43,0xf0,0xf3	@ aesd q10,q13
567.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
568	veor	q5,q6,q7
569.byte	0x6c,0x23,0xb0,0xf3	@ aesd q1,q14
570.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
571.byte	0x6c,0x43,0xf0,0xf3	@ aesd q10,q14
572.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
573	veor	q9,q3,q7
574.byte	0x6e,0x23,0xb0,0xf3	@ aesd q1,q15
575.byte	0x6e,0x43,0xf0,0xf3	@ aesd q10,q15
576	beq	.Lcbc_dec_one
577	veor	q5,q5,q1
578	veor	q9,q9,q10
579	vorr	q6,q11,q11
580	vst1.8	{q5},[r1]!
581	vst1.8	{q9},[r1]!
582	b	.Lcbc_done
583
584.Lcbc_dec_one:
585	veor	q5,q5,q10
586	vorr	q6,q11,q11
587	vst1.8	{q5},[r1]!
588
589.Lcbc_done:
590	vst1.8	{q6},[r4]
591.Lcbc_abort:
592	vldmia	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}
593	ldmia	sp!,{r4,r5,r6,r7,r8,pc}
594.size	aes_hw_cbc_encrypt,.-aes_hw_cbc_encrypt
595.globl	aes_hw_ctr32_encrypt_blocks
596.hidden	aes_hw_ctr32_encrypt_blocks
597.type	aes_hw_ctr32_encrypt_blocks,%function
598.align	5
599aes_hw_ctr32_encrypt_blocks:
600	mov	ip,sp
601	stmdb	sp!,{r4,r5,r6,r7,r8,r9,r10,lr}
602	vstmdb	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}            @ ABI specification says so
603	ldr	r4, [ip]		@ load remaining arg
604	ldr	r5,[r3,#240]
605
606	ldr	r8, [r4, #12]
607	vld1.32	{q0},[r4]
608
609	vld1.32	{q8,q9},[r3]		@ load key schedule...
610	sub	r5,r5,#4
611	mov	r12,#16
612	cmp	r2,#2
613	add	r7,r3,r5,lsl#4	@ pointer to last 5 round keys
614	sub	r5,r5,#2
615	vld1.32	{q12,q13},[r7]!
616	vld1.32	{q14,q15},[r7]!
617	vld1.32	{q7},[r7]
618	add	r7,r3,#32
619	mov	r6,r5
620	movlo	r12,#0
621
622	@ ARM Cortex-A57 and Cortex-A72 cores running in 32-bit mode are
623	@ affected by silicon errata #1742098 [0] and #1655431 [1],
624	@ respectively, where the second instruction of an aese/aesmc
625	@ instruction pair may execute twice if an interrupt is taken right
626	@ after the first instruction consumes an input register of which a
627	@ single 32-bit lane has been updated the last time it was modified.
628	@
629	@ This function uses a counter in one 32-bit lane. The
630	@ could write to q1 and q10 directly, but that trips this bugs.
631	@ We write to q6 and copy to the final register as a workaround.
632	@
633	@ [0] ARM-EPM-049219 v23 Cortex-A57 MPCore Software Developers Errata Notice
634	@ [1] ARM-EPM-012079 v11.0 Cortex-A72 MPCore Software Developers Errata Notice
635#ifndef __ARMEB__
636	rev	r8, r8
637#endif
638	add	r10, r8, #1
639	vorr	q6,q0,q0
640	rev	r10, r10
641	vmov.32	d13[1],r10
642	add	r8, r8, #2
643	vorr	q1,q6,q6
644	bls	.Lctr32_tail
645	rev	r12, r8
646	vmov.32	d13[1],r12
647	sub	r2,r2,#3		@ bias
648	vorr	q10,q6,q6
649	b	.Loop3x_ctr32
650
651.align	4
652.Loop3x_ctr32:
653.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
654.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
655.byte	0x20,0x23,0xb0,0xf3	@ aese q1,q8
656.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
657.byte	0x20,0x43,0xf0,0xf3	@ aese q10,q8
658.byte	0xa4,0x43,0xf0,0xf3	@ aesmc q10,q10
659	vld1.32	{q8},[r7]!
660	subs	r6,r6,#2
661.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
662.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
663.byte	0x22,0x23,0xb0,0xf3	@ aese q1,q9
664.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
665.byte	0x22,0x43,0xf0,0xf3	@ aese q10,q9
666.byte	0xa4,0x43,0xf0,0xf3	@ aesmc q10,q10
667	vld1.32	{q9},[r7]!
668	bgt	.Loop3x_ctr32
669
670.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
671.byte	0x80,0x83,0xb0,0xf3	@ aesmc q4,q0
672.byte	0x20,0x23,0xb0,0xf3	@ aese q1,q8
673.byte	0x82,0xa3,0xb0,0xf3	@ aesmc q5,q1
674	vld1.8	{q2},[r0]!
675	add	r9,r8,#1
676.byte	0x20,0x43,0xf0,0xf3	@ aese q10,q8
677.byte	0xa4,0x43,0xf0,0xf3	@ aesmc q10,q10
678	vld1.8	{q3},[r0]!
679	rev	r9,r9
680.byte	0x22,0x83,0xb0,0xf3	@ aese q4,q9
681.byte	0x88,0x83,0xb0,0xf3	@ aesmc q4,q4
682.byte	0x22,0xa3,0xb0,0xf3	@ aese q5,q9
683.byte	0x8a,0xa3,0xb0,0xf3	@ aesmc q5,q5
684	vld1.8	{q11},[r0]!
685	mov	r7,r3
686.byte	0x22,0x43,0xf0,0xf3	@ aese q10,q9
687.byte	0xa4,0x23,0xf0,0xf3	@ aesmc q9,q10
688.byte	0x28,0x83,0xb0,0xf3	@ aese q4,q12
689.byte	0x88,0x83,0xb0,0xf3	@ aesmc q4,q4
690.byte	0x28,0xa3,0xb0,0xf3	@ aese q5,q12
691.byte	0x8a,0xa3,0xb0,0xf3	@ aesmc q5,q5
692	veor	q2,q2,q7
693	add	r10,r8,#2
694.byte	0x28,0x23,0xf0,0xf3	@ aese q9,q12
695.byte	0xa2,0x23,0xf0,0xf3	@ aesmc q9,q9
696	veor	q3,q3,q7
697	add	r8,r8,#3
698.byte	0x2a,0x83,0xb0,0xf3	@ aese q4,q13
699.byte	0x88,0x83,0xb0,0xf3	@ aesmc q4,q4
700.byte	0x2a,0xa3,0xb0,0xf3	@ aese q5,q13
701.byte	0x8a,0xa3,0xb0,0xf3	@ aesmc q5,q5
702	 @ Note the logic to update q0, q1, and q1 is written to work
703	 @ around a bug in ARM Cortex-A57 and Cortex-A72 cores running in
704	 @ 32-bit mode. See the comment above.
705	veor	q11,q11,q7
706	vmov.32	d13[1], r9
707.byte	0x2a,0x23,0xf0,0xf3	@ aese q9,q13
708.byte	0xa2,0x23,0xf0,0xf3	@ aesmc q9,q9
709	vorr	q0,q6,q6
710	rev	r10,r10
711.byte	0x2c,0x83,0xb0,0xf3	@ aese q4,q14
712.byte	0x88,0x83,0xb0,0xf3	@ aesmc q4,q4
713	vmov.32	d13[1], r10
714	rev	r12,r8
715.byte	0x2c,0xa3,0xb0,0xf3	@ aese q5,q14
716.byte	0x8a,0xa3,0xb0,0xf3	@ aesmc q5,q5
717	vorr	q1,q6,q6
718	vmov.32	d13[1], r12
719.byte	0x2c,0x23,0xf0,0xf3	@ aese q9,q14
720.byte	0xa2,0x23,0xf0,0xf3	@ aesmc q9,q9
721	vorr	q10,q6,q6
722	subs	r2,r2,#3
723.byte	0x2e,0x83,0xb0,0xf3	@ aese q4,q15
724.byte	0x2e,0xa3,0xb0,0xf3	@ aese q5,q15
725.byte	0x2e,0x23,0xf0,0xf3	@ aese q9,q15
726
727	veor	q2,q2,q4
728	vld1.32	{q8},[r7]!	@ re-pre-load rndkey[0]
729	vst1.8	{q2},[r1]!
730	veor	q3,q3,q5
731	mov	r6,r5
732	vst1.8	{q3},[r1]!
733	veor	q11,q11,q9
734	vld1.32	{q9},[r7]!	@ re-pre-load rndkey[1]
735	vst1.8	{q11},[r1]!
736	bhs	.Loop3x_ctr32
737
738	adds	r2,r2,#3
739	beq	.Lctr32_done
740	cmp	r2,#1
741	mov	r12,#16
742	moveq	r12,#0
743
744.Lctr32_tail:
745.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
746.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
747.byte	0x20,0x23,0xb0,0xf3	@ aese q1,q8
748.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
749	vld1.32	{q8},[r7]!
750	subs	r6,r6,#2
751.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
752.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
753.byte	0x22,0x23,0xb0,0xf3	@ aese q1,q9
754.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
755	vld1.32	{q9},[r7]!
756	bgt	.Lctr32_tail
757
758.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
759.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
760.byte	0x20,0x23,0xb0,0xf3	@ aese q1,q8
761.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
762.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
763.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
764.byte	0x22,0x23,0xb0,0xf3	@ aese q1,q9
765.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
766	vld1.8	{q2},[r0],r12
767.byte	0x28,0x03,0xb0,0xf3	@ aese q0,q12
768.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
769.byte	0x28,0x23,0xb0,0xf3	@ aese q1,q12
770.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
771	vld1.8	{q3},[r0]
772.byte	0x2a,0x03,0xb0,0xf3	@ aese q0,q13
773.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
774.byte	0x2a,0x23,0xb0,0xf3	@ aese q1,q13
775.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
776	veor	q2,q2,q7
777.byte	0x2c,0x03,0xb0,0xf3	@ aese q0,q14
778.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
779.byte	0x2c,0x23,0xb0,0xf3	@ aese q1,q14
780.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
781	veor	q3,q3,q7
782.byte	0x2e,0x03,0xb0,0xf3	@ aese q0,q15
783.byte	0x2e,0x23,0xb0,0xf3	@ aese q1,q15
784
785	cmp	r2,#1
786	veor	q2,q2,q0
787	veor	q3,q3,q1
788	vst1.8	{q2},[r1]!
789	beq	.Lctr32_done
790	vst1.8	{q3},[r1]
791
792.Lctr32_done:
793	vldmia	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}
794	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,pc}
795.size	aes_hw_ctr32_encrypt_blocks,.-aes_hw_ctr32_encrypt_blocks
796#endif
797#endif  // !OPENSSL_NO_ASM && defined(__ARMEL__) && defined(__ELF__)
798#if defined(__ELF__)
799// See https://www.airs.com/blog/archives/518.
800.section .note.GNU-stack,"",%progbits
801#endif
802