• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1// This file is generated from a similarly-named Perl script in the BoringSSL
2// source tree. Do not edit by hand.
3
4#if !defined(__has_feature)
5#define __has_feature(x) 0
6#endif
7#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
8#define OPENSSL_NO_ASM
9#endif
10
11#if !defined(OPENSSL_NO_ASM)
12#if defined(__arm__)
13#if defined(BORINGSSL_PREFIX)
14#include <boringssl_prefix_symbols_asm.h>
15#endif
16#include <openssl/arm_arch.h>
17
18#if __ARM_MAX_ARCH__>=7
19.text
20.arch	armv7-a	@ don't confuse not-so-latest binutils with argv8 :-)
21.fpu	neon
22.code	32
23#undef	__thumb2__
24.align	5
25.Lrcon:
26.long	0x01,0x01,0x01,0x01
27.long	0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d	@ rotate-n-splat
28.long	0x1b,0x1b,0x1b,0x1b
29
30.text
31
32.globl	aes_hw_set_encrypt_key
33.hidden	aes_hw_set_encrypt_key
34.type	aes_hw_set_encrypt_key,%function
35.align	5
36aes_hw_set_encrypt_key:
37.Lenc_key:
38	mov	r3,#-1
39	cmp	r0,#0
40	beq	.Lenc_key_abort
41	cmp	r2,#0
42	beq	.Lenc_key_abort
43	mov	r3,#-2
44	cmp	r1,#128
45	blt	.Lenc_key_abort
46	cmp	r1,#256
47	bgt	.Lenc_key_abort
48	tst	r1,#0x3f
49	bne	.Lenc_key_abort
50
51	adr	r3,.Lrcon
52	cmp	r1,#192
53
54	veor	q0,q0,q0
55	vld1.8	{q3},[r0]!
56	mov	r1,#8		@ reuse r1
57	vld1.32	{q1,q2},[r3]!
58
59	blt	.Loop128
60	beq	.L192
61	b	.L256
62
63.align	4
64.Loop128:
65	vtbl.8	d20,{q3},d4
66	vtbl.8	d21,{q3},d5
67	vext.8	q9,q0,q3,#12
68	vst1.32	{q3},[r2]!
69.byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
70	subs	r1,r1,#1
71
72	veor	q3,q3,q9
73	vext.8	q9,q0,q9,#12
74	veor	q3,q3,q9
75	vext.8	q9,q0,q9,#12
76	veor	q10,q10,q1
77	veor	q3,q3,q9
78	vshl.u8	q1,q1,#1
79	veor	q3,q3,q10
80	bne	.Loop128
81
82	vld1.32	{q1},[r3]
83
84	vtbl.8	d20,{q3},d4
85	vtbl.8	d21,{q3},d5
86	vext.8	q9,q0,q3,#12
87	vst1.32	{q3},[r2]!
88.byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
89
90	veor	q3,q3,q9
91	vext.8	q9,q0,q9,#12
92	veor	q3,q3,q9
93	vext.8	q9,q0,q9,#12
94	veor	q10,q10,q1
95	veor	q3,q3,q9
96	vshl.u8	q1,q1,#1
97	veor	q3,q3,q10
98
99	vtbl.8	d20,{q3},d4
100	vtbl.8	d21,{q3},d5
101	vext.8	q9,q0,q3,#12
102	vst1.32	{q3},[r2]!
103.byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
104
105	veor	q3,q3,q9
106	vext.8	q9,q0,q9,#12
107	veor	q3,q3,q9
108	vext.8	q9,q0,q9,#12
109	veor	q10,q10,q1
110	veor	q3,q3,q9
111	veor	q3,q3,q10
112	vst1.32	{q3},[r2]
113	add	r2,r2,#0x50
114
115	mov	r12,#10
116	b	.Ldone
117
118.align	4
119.L192:
120	vld1.8	{d16},[r0]!
121	vmov.i8	q10,#8			@ borrow q10
122	vst1.32	{q3},[r2]!
123	vsub.i8	q2,q2,q10	@ adjust the mask
124
125.Loop192:
126	vtbl.8	d20,{q8},d4
127	vtbl.8	d21,{q8},d5
128	vext.8	q9,q0,q3,#12
129	vst1.32	{d16},[r2]!
130.byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
131	subs	r1,r1,#1
132
133	veor	q3,q3,q9
134	vext.8	q9,q0,q9,#12
135	veor	q3,q3,q9
136	vext.8	q9,q0,q9,#12
137	veor	q3,q3,q9
138
139	vdup.32	q9,d7[1]
140	veor	q9,q9,q8
141	veor	q10,q10,q1
142	vext.8	q8,q0,q8,#12
143	vshl.u8	q1,q1,#1
144	veor	q8,q8,q9
145	veor	q3,q3,q10
146	veor	q8,q8,q10
147	vst1.32	{q3},[r2]!
148	bne	.Loop192
149
150	mov	r12,#12
151	add	r2,r2,#0x20
152	b	.Ldone
153
154.align	4
155.L256:
156	vld1.8	{q8},[r0]
157	mov	r1,#7
158	mov	r12,#14
159	vst1.32	{q3},[r2]!
160
161.Loop256:
162	vtbl.8	d20,{q8},d4
163	vtbl.8	d21,{q8},d5
164	vext.8	q9,q0,q3,#12
165	vst1.32	{q8},[r2]!
166.byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
167	subs	r1,r1,#1
168
169	veor	q3,q3,q9
170	vext.8	q9,q0,q9,#12
171	veor	q3,q3,q9
172	vext.8	q9,q0,q9,#12
173	veor	q10,q10,q1
174	veor	q3,q3,q9
175	vshl.u8	q1,q1,#1
176	veor	q3,q3,q10
177	vst1.32	{q3},[r2]!
178	beq	.Ldone
179
180	vdup.32	q10,d7[1]
181	vext.8	q9,q0,q8,#12
182.byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
183
184	veor	q8,q8,q9
185	vext.8	q9,q0,q9,#12
186	veor	q8,q8,q9
187	vext.8	q9,q0,q9,#12
188	veor	q8,q8,q9
189
190	veor	q8,q8,q10
191	b	.Loop256
192
193.Ldone:
194	str	r12,[r2]
195	mov	r3,#0
196
197.Lenc_key_abort:
198	mov	r0,r3			@ return value
199
200	bx	lr
201.size	aes_hw_set_encrypt_key,.-aes_hw_set_encrypt_key
202
203.globl	aes_hw_set_decrypt_key
204.hidden	aes_hw_set_decrypt_key
205.type	aes_hw_set_decrypt_key,%function
206.align	5
207aes_hw_set_decrypt_key:
208	stmdb	sp!,{r4,lr}
209	bl	.Lenc_key
210
211	cmp	r0,#0
212	bne	.Ldec_key_abort
213
214	sub	r2,r2,#240		@ restore original r2
215	mov	r4,#-16
216	add	r0,r2,r12,lsl#4	@ end of key schedule
217
218	vld1.32	{q0},[r2]
219	vld1.32	{q1},[r0]
220	vst1.32	{q0},[r0],r4
221	vst1.32	{q1},[r2]!
222
223.Loop_imc:
224	vld1.32	{q0},[r2]
225	vld1.32	{q1},[r0]
226.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
227.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
228	vst1.32	{q0},[r0],r4
229	vst1.32	{q1},[r2]!
230	cmp	r0,r2
231	bhi	.Loop_imc
232
233	vld1.32	{q0},[r2]
234.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
235	vst1.32	{q0},[r0]
236
237	eor	r0,r0,r0		@ return value
238.Ldec_key_abort:
239	ldmia	sp!,{r4,pc}
240.size	aes_hw_set_decrypt_key,.-aes_hw_set_decrypt_key
241.globl	aes_hw_encrypt
242.hidden	aes_hw_encrypt
243.type	aes_hw_encrypt,%function
244.align	5
245aes_hw_encrypt:
246	AARCH64_VALID_CALL_TARGET
247	ldr	r3,[r2,#240]
248	vld1.32	{q0},[r2]!
249	vld1.8	{q2},[r0]
250	sub	r3,r3,#2
251	vld1.32	{q1},[r2]!
252
253.Loop_enc:
254.byte	0x00,0x43,0xb0,0xf3	@ aese q2,q0
255.byte	0x84,0x43,0xb0,0xf3	@ aesmc q2,q2
256	vld1.32	{q0},[r2]!
257	subs	r3,r3,#2
258.byte	0x02,0x43,0xb0,0xf3	@ aese q2,q1
259.byte	0x84,0x43,0xb0,0xf3	@ aesmc q2,q2
260	vld1.32	{q1},[r2]!
261	bgt	.Loop_enc
262
263.byte	0x00,0x43,0xb0,0xf3	@ aese q2,q0
264.byte	0x84,0x43,0xb0,0xf3	@ aesmc q2,q2
265	vld1.32	{q0},[r2]
266.byte	0x02,0x43,0xb0,0xf3	@ aese q2,q1
267	veor	q2,q2,q0
268
269	vst1.8	{q2},[r1]
270	bx	lr
271.size	aes_hw_encrypt,.-aes_hw_encrypt
272.globl	aes_hw_decrypt
273.hidden	aes_hw_decrypt
274.type	aes_hw_decrypt,%function
275.align	5
276aes_hw_decrypt:
277	AARCH64_VALID_CALL_TARGET
278	ldr	r3,[r2,#240]
279	vld1.32	{q0},[r2]!
280	vld1.8	{q2},[r0]
281	sub	r3,r3,#2
282	vld1.32	{q1},[r2]!
283
284.Loop_dec:
285.byte	0x40,0x43,0xb0,0xf3	@ aesd q2,q0
286.byte	0xc4,0x43,0xb0,0xf3	@ aesimc q2,q2
287	vld1.32	{q0},[r2]!
288	subs	r3,r3,#2
289.byte	0x42,0x43,0xb0,0xf3	@ aesd q2,q1
290.byte	0xc4,0x43,0xb0,0xf3	@ aesimc q2,q2
291	vld1.32	{q1},[r2]!
292	bgt	.Loop_dec
293
294.byte	0x40,0x43,0xb0,0xf3	@ aesd q2,q0
295.byte	0xc4,0x43,0xb0,0xf3	@ aesimc q2,q2
296	vld1.32	{q0},[r2]
297.byte	0x42,0x43,0xb0,0xf3	@ aesd q2,q1
298	veor	q2,q2,q0
299
300	vst1.8	{q2},[r1]
301	bx	lr
302.size	aes_hw_decrypt,.-aes_hw_decrypt
303.globl	aes_hw_cbc_encrypt
304.hidden	aes_hw_cbc_encrypt
305.type	aes_hw_cbc_encrypt,%function
306.align	5
307aes_hw_cbc_encrypt:
308	mov	ip,sp
309	stmdb	sp!,{r4,r5,r6,r7,r8,lr}
310	vstmdb	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}            @ ABI specification says so
311	ldmia	ip,{r4,r5}		@ load remaining args
312	subs	r2,r2,#16
313	mov	r8,#16
314	blo	.Lcbc_abort
315	moveq	r8,#0
316
317	cmp	r5,#0			@ en- or decrypting?
318	ldr	r5,[r3,#240]
319	and	r2,r2,#-16
320	vld1.8	{q6},[r4]
321	vld1.8	{q0},[r0],r8
322
323	vld1.32	{q8,q9},[r3]		@ load key schedule...
324	sub	r5,r5,#6
325	add	r7,r3,r5,lsl#4	@ pointer to last 7 round keys
326	sub	r5,r5,#2
327	vld1.32	{q10,q11},[r7]!
328	vld1.32	{q12,q13},[r7]!
329	vld1.32	{q14,q15},[r7]!
330	vld1.32	{q7},[r7]
331
332	add	r7,r3,#32
333	mov	r6,r5
334	beq	.Lcbc_dec
335
336	cmp	r5,#2
337	veor	q0,q0,q6
338	veor	q5,q8,q7
339	beq	.Lcbc_enc128
340
341	vld1.32	{q2,q3},[r7]
342	add	r7,r3,#16
343	add	r6,r3,#16*4
344	add	r12,r3,#16*5
345.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
346.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
347	add	r14,r3,#16*6
348	add	r3,r3,#16*7
349	b	.Lenter_cbc_enc
350
351.align	4
352.Loop_cbc_enc:
353.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
354.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
355	vst1.8	{q6},[r1]!
356.Lenter_cbc_enc:
357.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
358.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
359.byte	0x04,0x03,0xb0,0xf3	@ aese q0,q2
360.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
361	vld1.32	{q8},[r6]
362	cmp	r5,#4
363.byte	0x06,0x03,0xb0,0xf3	@ aese q0,q3
364.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
365	vld1.32	{q9},[r12]
366	beq	.Lcbc_enc192
367
368.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
369.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
370	vld1.32	{q8},[r14]
371.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
372.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
373	vld1.32	{q9},[r3]
374	nop
375
376.Lcbc_enc192:
377.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
378.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
379	subs	r2,r2,#16
380.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
381.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
382	moveq	r8,#0
383.byte	0x24,0x03,0xb0,0xf3	@ aese q0,q10
384.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
385.byte	0x26,0x03,0xb0,0xf3	@ aese q0,q11
386.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
387	vld1.8	{q8},[r0],r8
388.byte	0x28,0x03,0xb0,0xf3	@ aese q0,q12
389.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
390	veor	q8,q8,q5
391.byte	0x2a,0x03,0xb0,0xf3	@ aese q0,q13
392.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
393	vld1.32	{q9},[r7]		@ re-pre-load rndkey[1]
394.byte	0x2c,0x03,0xb0,0xf3	@ aese q0,q14
395.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
396.byte	0x2e,0x03,0xb0,0xf3	@ aese q0,q15
397	veor	q6,q0,q7
398	bhs	.Loop_cbc_enc
399
400	vst1.8	{q6},[r1]!
401	b	.Lcbc_done
402
403.align	5
404.Lcbc_enc128:
405	vld1.32	{q2,q3},[r7]
406.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
407.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
408	b	.Lenter_cbc_enc128
409.Loop_cbc_enc128:
410.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
411.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
412	vst1.8	{q6},[r1]!
413.Lenter_cbc_enc128:
414.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
415.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
416	subs	r2,r2,#16
417.byte	0x04,0x03,0xb0,0xf3	@ aese q0,q2
418.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
419	moveq	r8,#0
420.byte	0x06,0x03,0xb0,0xf3	@ aese q0,q3
421.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
422.byte	0x24,0x03,0xb0,0xf3	@ aese q0,q10
423.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
424.byte	0x26,0x03,0xb0,0xf3	@ aese q0,q11
425.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
426	vld1.8	{q8},[r0],r8
427.byte	0x28,0x03,0xb0,0xf3	@ aese q0,q12
428.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
429.byte	0x2a,0x03,0xb0,0xf3	@ aese q0,q13
430.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
431.byte	0x2c,0x03,0xb0,0xf3	@ aese q0,q14
432.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
433	veor	q8,q8,q5
434.byte	0x2e,0x03,0xb0,0xf3	@ aese q0,q15
435	veor	q6,q0,q7
436	bhs	.Loop_cbc_enc128
437
438	vst1.8	{q6},[r1]!
439	b	.Lcbc_done
440.align	5
441.Lcbc_dec:
442	vld1.8	{q10},[r0]!
443	subs	r2,r2,#32		@ bias
444	add	r6,r5,#2
445	vorr	q3,q0,q0
446	vorr	q1,q0,q0
447	vorr	q11,q10,q10
448	blo	.Lcbc_dec_tail
449
450	vorr	q1,q10,q10
451	vld1.8	{q10},[r0]!
452	vorr	q2,q0,q0
453	vorr	q3,q1,q1
454	vorr	q11,q10,q10
455
456.Loop3x_cbc_dec:
457.byte	0x60,0x03,0xb0,0xf3	@ aesd q0,q8
458.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
459.byte	0x60,0x23,0xb0,0xf3	@ aesd q1,q8
460.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
461.byte	0x60,0x43,0xf0,0xf3	@ aesd q10,q8
462.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
463	vld1.32	{q8},[r7]!
464	subs	r6,r6,#2
465.byte	0x62,0x03,0xb0,0xf3	@ aesd q0,q9
466.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
467.byte	0x62,0x23,0xb0,0xf3	@ aesd q1,q9
468.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
469.byte	0x62,0x43,0xf0,0xf3	@ aesd q10,q9
470.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
471	vld1.32	{q9},[r7]!
472	bgt	.Loop3x_cbc_dec
473
474.byte	0x60,0x03,0xb0,0xf3	@ aesd q0,q8
475.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
476.byte	0x60,0x23,0xb0,0xf3	@ aesd q1,q8
477.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
478.byte	0x60,0x43,0xf0,0xf3	@ aesd q10,q8
479.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
480	veor	q4,q6,q7
481	subs	r2,r2,#0x30
482	veor	q5,q2,q7
483	movlo	r6,r2			@ r6, r6, is zero at this point
484.byte	0x62,0x03,0xb0,0xf3	@ aesd q0,q9
485.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
486.byte	0x62,0x23,0xb0,0xf3	@ aesd q1,q9
487.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
488.byte	0x62,0x43,0xf0,0xf3	@ aesd q10,q9
489.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
490	veor	q9,q3,q7
491	add	r0,r0,r6		@ r0 is adjusted in such way that
492					@ at exit from the loop q1-q10
493					@ are loaded with last "words"
494	vorr	q6,q11,q11
495	mov	r7,r3
496.byte	0x68,0x03,0xb0,0xf3	@ aesd q0,q12
497.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
498.byte	0x68,0x23,0xb0,0xf3	@ aesd q1,q12
499.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
500.byte	0x68,0x43,0xf0,0xf3	@ aesd q10,q12
501.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
502	vld1.8	{q2},[r0]!
503.byte	0x6a,0x03,0xb0,0xf3	@ aesd q0,q13
504.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
505.byte	0x6a,0x23,0xb0,0xf3	@ aesd q1,q13
506.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
507.byte	0x6a,0x43,0xf0,0xf3	@ aesd q10,q13
508.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
509	vld1.8	{q3},[r0]!
510.byte	0x6c,0x03,0xb0,0xf3	@ aesd q0,q14
511.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
512.byte	0x6c,0x23,0xb0,0xf3	@ aesd q1,q14
513.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
514.byte	0x6c,0x43,0xf0,0xf3	@ aesd q10,q14
515.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
516	vld1.8	{q11},[r0]!
517.byte	0x6e,0x03,0xb0,0xf3	@ aesd q0,q15
518.byte	0x6e,0x23,0xb0,0xf3	@ aesd q1,q15
519.byte	0x6e,0x43,0xf0,0xf3	@ aesd q10,q15
520	vld1.32	{q8},[r7]!	@ re-pre-load rndkey[0]
521	add	r6,r5,#2
522	veor	q4,q4,q0
523	veor	q5,q5,q1
524	veor	q10,q10,q9
525	vld1.32	{q9},[r7]!	@ re-pre-load rndkey[1]
526	vst1.8	{q4},[r1]!
527	vorr	q0,q2,q2
528	vst1.8	{q5},[r1]!
529	vorr	q1,q3,q3
530	vst1.8	{q10},[r1]!
531	vorr	q10,q11,q11
532	bhs	.Loop3x_cbc_dec
533
534	cmn	r2,#0x30
535	beq	.Lcbc_done
536	nop
537
538.Lcbc_dec_tail:
539.byte	0x60,0x23,0xb0,0xf3	@ aesd q1,q8
540.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
541.byte	0x60,0x43,0xf0,0xf3	@ aesd q10,q8
542.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
543	vld1.32	{q8},[r7]!
544	subs	r6,r6,#2
545.byte	0x62,0x23,0xb0,0xf3	@ aesd q1,q9
546.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
547.byte	0x62,0x43,0xf0,0xf3	@ aesd q10,q9
548.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
549	vld1.32	{q9},[r7]!
550	bgt	.Lcbc_dec_tail
551
552.byte	0x60,0x23,0xb0,0xf3	@ aesd q1,q8
553.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
554.byte	0x60,0x43,0xf0,0xf3	@ aesd q10,q8
555.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
556.byte	0x62,0x23,0xb0,0xf3	@ aesd q1,q9
557.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
558.byte	0x62,0x43,0xf0,0xf3	@ aesd q10,q9
559.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
560.byte	0x68,0x23,0xb0,0xf3	@ aesd q1,q12
561.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
562.byte	0x68,0x43,0xf0,0xf3	@ aesd q10,q12
563.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
564	cmn	r2,#0x20
565.byte	0x6a,0x23,0xb0,0xf3	@ aesd q1,q13
566.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
567.byte	0x6a,0x43,0xf0,0xf3	@ aesd q10,q13
568.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
569	veor	q5,q6,q7
570.byte	0x6c,0x23,0xb0,0xf3	@ aesd q1,q14
571.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
572.byte	0x6c,0x43,0xf0,0xf3	@ aesd q10,q14
573.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
574	veor	q9,q3,q7
575.byte	0x6e,0x23,0xb0,0xf3	@ aesd q1,q15
576.byte	0x6e,0x43,0xf0,0xf3	@ aesd q10,q15
577	beq	.Lcbc_dec_one
578	veor	q5,q5,q1
579	veor	q9,q9,q10
580	vorr	q6,q11,q11
581	vst1.8	{q5},[r1]!
582	vst1.8	{q9},[r1]!
583	b	.Lcbc_done
584
585.Lcbc_dec_one:
586	veor	q5,q5,q10
587	vorr	q6,q11,q11
588	vst1.8	{q5},[r1]!
589
590.Lcbc_done:
591	vst1.8	{q6},[r4]
592.Lcbc_abort:
593	vldmia	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}
594	ldmia	sp!,{r4,r5,r6,r7,r8,pc}
595.size	aes_hw_cbc_encrypt,.-aes_hw_cbc_encrypt
596.globl	aes_hw_ctr32_encrypt_blocks
597.hidden	aes_hw_ctr32_encrypt_blocks
598.type	aes_hw_ctr32_encrypt_blocks,%function
599.align	5
600aes_hw_ctr32_encrypt_blocks:
601	mov	ip,sp
602	stmdb	sp!,{r4,r5,r6,r7,r8,r9,r10,lr}
603	vstmdb	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}            @ ABI specification says so
604	ldr	r4, [ip]		@ load remaining arg
605	ldr	r5,[r3,#240]
606
607	ldr	r8, [r4, #12]
608	vld1.32	{q0},[r4]
609
610	vld1.32	{q8,q9},[r3]		@ load key schedule...
611	sub	r5,r5,#4
612	mov	r12,#16
613	cmp	r2,#2
614	add	r7,r3,r5,lsl#4	@ pointer to last 5 round keys
615	sub	r5,r5,#2
616	vld1.32	{q12,q13},[r7]!
617	vld1.32	{q14,q15},[r7]!
618	vld1.32	{q7},[r7]
619	add	r7,r3,#32
620	mov	r6,r5
621	movlo	r12,#0
622
623	@ ARM Cortex-A57 and Cortex-A72 cores running in 32-bit mode are
624	@ affected by silicon errata #1742098 [0] and #1655431 [1],
625	@ respectively, where the second instruction of an aese/aesmc
626	@ instruction pair may execute twice if an interrupt is taken right
627	@ after the first instruction consumes an input register of which a
628	@ single 32-bit lane has been updated the last time it was modified.
629	@
630	@ This function uses a counter in one 32-bit lane. The
631	@ could write to q1 and q10 directly, but that trips this bugs.
632	@ We write to q6 and copy to the final register as a workaround.
633	@
634	@ [0] ARM-EPM-049219 v23 Cortex-A57 MPCore Software Developers Errata Notice
635	@ [1] ARM-EPM-012079 v11.0 Cortex-A72 MPCore Software Developers Errata Notice
636#ifndef __ARMEB__
637	rev	r8, r8
638#endif
639	add	r10, r8, #1
640	vorr	q6,q0,q0
641	rev	r10, r10
642	vmov.32	d13[1],r10
643	add	r8, r8, #2
644	vorr	q1,q6,q6
645	bls	.Lctr32_tail
646	rev	r12, r8
647	vmov.32	d13[1],r12
648	sub	r2,r2,#3		@ bias
649	vorr	q10,q6,q6
650	b	.Loop3x_ctr32
651
652.align	4
653.Loop3x_ctr32:
654.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
655.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
656.byte	0x20,0x23,0xb0,0xf3	@ aese q1,q8
657.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
658.byte	0x20,0x43,0xf0,0xf3	@ aese q10,q8
659.byte	0xa4,0x43,0xf0,0xf3	@ aesmc q10,q10
660	vld1.32	{q8},[r7]!
661	subs	r6,r6,#2
662.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
663.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
664.byte	0x22,0x23,0xb0,0xf3	@ aese q1,q9
665.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
666.byte	0x22,0x43,0xf0,0xf3	@ aese q10,q9
667.byte	0xa4,0x43,0xf0,0xf3	@ aesmc q10,q10
668	vld1.32	{q9},[r7]!
669	bgt	.Loop3x_ctr32
670
671.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
672.byte	0x80,0x83,0xb0,0xf3	@ aesmc q4,q0
673.byte	0x20,0x23,0xb0,0xf3	@ aese q1,q8
674.byte	0x82,0xa3,0xb0,0xf3	@ aesmc q5,q1
675	vld1.8	{q2},[r0]!
676	add	r9,r8,#1
677.byte	0x20,0x43,0xf0,0xf3	@ aese q10,q8
678.byte	0xa4,0x43,0xf0,0xf3	@ aesmc q10,q10
679	vld1.8	{q3},[r0]!
680	rev	r9,r9
681.byte	0x22,0x83,0xb0,0xf3	@ aese q4,q9
682.byte	0x88,0x83,0xb0,0xf3	@ aesmc q4,q4
683.byte	0x22,0xa3,0xb0,0xf3	@ aese q5,q9
684.byte	0x8a,0xa3,0xb0,0xf3	@ aesmc q5,q5
685	vld1.8	{q11},[r0]!
686	mov	r7,r3
687.byte	0x22,0x43,0xf0,0xf3	@ aese q10,q9
688.byte	0xa4,0x23,0xf0,0xf3	@ aesmc q9,q10
689.byte	0x28,0x83,0xb0,0xf3	@ aese q4,q12
690.byte	0x88,0x83,0xb0,0xf3	@ aesmc q4,q4
691.byte	0x28,0xa3,0xb0,0xf3	@ aese q5,q12
692.byte	0x8a,0xa3,0xb0,0xf3	@ aesmc q5,q5
693	veor	q2,q2,q7
694	add	r10,r8,#2
695.byte	0x28,0x23,0xf0,0xf3	@ aese q9,q12
696.byte	0xa2,0x23,0xf0,0xf3	@ aesmc q9,q9
697	veor	q3,q3,q7
698	add	r8,r8,#3
699.byte	0x2a,0x83,0xb0,0xf3	@ aese q4,q13
700.byte	0x88,0x83,0xb0,0xf3	@ aesmc q4,q4
701.byte	0x2a,0xa3,0xb0,0xf3	@ aese q5,q13
702.byte	0x8a,0xa3,0xb0,0xf3	@ aesmc q5,q5
703	 @ Note the logic to update q0, q1, and q1 is written to work
704	 @ around a bug in ARM Cortex-A57 and Cortex-A72 cores running in
705	 @ 32-bit mode. See the comment above.
706	veor	q11,q11,q7
707	vmov.32	d13[1], r9
708.byte	0x2a,0x23,0xf0,0xf3	@ aese q9,q13
709.byte	0xa2,0x23,0xf0,0xf3	@ aesmc q9,q9
710	vorr	q0,q6,q6
711	rev	r10,r10
712.byte	0x2c,0x83,0xb0,0xf3	@ aese q4,q14
713.byte	0x88,0x83,0xb0,0xf3	@ aesmc q4,q4
714	vmov.32	d13[1], r10
715	rev	r12,r8
716.byte	0x2c,0xa3,0xb0,0xf3	@ aese q5,q14
717.byte	0x8a,0xa3,0xb0,0xf3	@ aesmc q5,q5
718	vorr	q1,q6,q6
719	vmov.32	d13[1], r12
720.byte	0x2c,0x23,0xf0,0xf3	@ aese q9,q14
721.byte	0xa2,0x23,0xf0,0xf3	@ aesmc q9,q9
722	vorr	q10,q6,q6
723	subs	r2,r2,#3
724.byte	0x2e,0x83,0xb0,0xf3	@ aese q4,q15
725.byte	0x2e,0xa3,0xb0,0xf3	@ aese q5,q15
726.byte	0x2e,0x23,0xf0,0xf3	@ aese q9,q15
727
728	veor	q2,q2,q4
729	vld1.32	{q8},[r7]!	@ re-pre-load rndkey[0]
730	vst1.8	{q2},[r1]!
731	veor	q3,q3,q5
732	mov	r6,r5
733	vst1.8	{q3},[r1]!
734	veor	q11,q11,q9
735	vld1.32	{q9},[r7]!	@ re-pre-load rndkey[1]
736	vst1.8	{q11},[r1]!
737	bhs	.Loop3x_ctr32
738
739	adds	r2,r2,#3
740	beq	.Lctr32_done
741	cmp	r2,#1
742	mov	r12,#16
743	moveq	r12,#0
744
745.Lctr32_tail:
746.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
747.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
748.byte	0x20,0x23,0xb0,0xf3	@ aese q1,q8
749.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
750	vld1.32	{q8},[r7]!
751	subs	r6,r6,#2
752.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
753.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
754.byte	0x22,0x23,0xb0,0xf3	@ aese q1,q9
755.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
756	vld1.32	{q9},[r7]!
757	bgt	.Lctr32_tail
758
759.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
760.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
761.byte	0x20,0x23,0xb0,0xf3	@ aese q1,q8
762.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
763.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
764.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
765.byte	0x22,0x23,0xb0,0xf3	@ aese q1,q9
766.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
767	vld1.8	{q2},[r0],r12
768.byte	0x28,0x03,0xb0,0xf3	@ aese q0,q12
769.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
770.byte	0x28,0x23,0xb0,0xf3	@ aese q1,q12
771.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
772	vld1.8	{q3},[r0]
773.byte	0x2a,0x03,0xb0,0xf3	@ aese q0,q13
774.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
775.byte	0x2a,0x23,0xb0,0xf3	@ aese q1,q13
776.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
777	veor	q2,q2,q7
778.byte	0x2c,0x03,0xb0,0xf3	@ aese q0,q14
779.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
780.byte	0x2c,0x23,0xb0,0xf3	@ aese q1,q14
781.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
782	veor	q3,q3,q7
783.byte	0x2e,0x03,0xb0,0xf3	@ aese q0,q15
784.byte	0x2e,0x23,0xb0,0xf3	@ aese q1,q15
785
786	cmp	r2,#1
787	veor	q2,q2,q0
788	veor	q3,q3,q1
789	vst1.8	{q2},[r1]!
790	beq	.Lctr32_done
791	vst1.8	{q3},[r1]
792
793.Lctr32_done:
794	vldmia	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}
795	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,pc}
796.size	aes_hw_ctr32_encrypt_blocks,.-aes_hw_ctr32_encrypt_blocks
797#endif
798#endif
799#endif  // !OPENSSL_NO_ASM
800.section	.note.GNU-stack,"",%progbits
801