• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1// This file is generated from a similarly-named Perl script in the BoringSSL
2// source tree. Do not edit by hand.
3
4#if !defined(__has_feature)
5#define __has_feature(x) 0
6#endif
7#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
8#define OPENSSL_NO_ASM
9#endif
10
11#if !defined(OPENSSL_NO_ASM)
12#if defined(__arm__)
13#if defined(BORINGSSL_PREFIX)
14#include <boringssl_prefix_symbols_asm.h>
15#endif
16#include <openssl/arm_arch.h>
17
18#if __ARM_MAX_ARCH__>=7
19.text
20.arch	armv7-a	@ don't confuse not-so-latest binutils with argv8 :-)
21.fpu	neon
22.code	32
23#undef	__thumb2__
24.align	5
25.Lrcon:
26.long	0x01,0x01,0x01,0x01
27.long	0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d	@ rotate-n-splat
28.long	0x1b,0x1b,0x1b,0x1b
29
30.text
31
32.globl	aes_hw_set_encrypt_key
33.hidden	aes_hw_set_encrypt_key
34.type	aes_hw_set_encrypt_key,%function
35.align	5
36aes_hw_set_encrypt_key:
37.Lenc_key:
38	mov	r3,#-1
39	cmp	r0,#0
40	beq	.Lenc_key_abort
41	cmp	r2,#0
42	beq	.Lenc_key_abort
43	mov	r3,#-2
44	cmp	r1,#128
45	blt	.Lenc_key_abort
46	cmp	r1,#256
47	bgt	.Lenc_key_abort
48	tst	r1,#0x3f
49	bne	.Lenc_key_abort
50
51	adr	r3,.Lrcon
52	cmp	r1,#192
53
54	veor	q0,q0,q0
55	vld1.8	{q3},[r0]!
56	mov	r1,#8		@ reuse r1
57	vld1.32	{q1,q2},[r3]!
58
59	blt	.Loop128
60	beq	.L192
61	b	.L256
62
63.align	4
64.Loop128:
65	vtbl.8	d20,{q3},d4
66	vtbl.8	d21,{q3},d5
67	vext.8	q9,q0,q3,#12
68	vst1.32	{q3},[r2]!
69.byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
70	subs	r1,r1,#1
71
72	veor	q3,q3,q9
73	vext.8	q9,q0,q9,#12
74	veor	q3,q3,q9
75	vext.8	q9,q0,q9,#12
76	veor	q10,q10,q1
77	veor	q3,q3,q9
78	vshl.u8	q1,q1,#1
79	veor	q3,q3,q10
80	bne	.Loop128
81
82	vld1.32	{q1},[r3]
83
84	vtbl.8	d20,{q3},d4
85	vtbl.8	d21,{q3},d5
86	vext.8	q9,q0,q3,#12
87	vst1.32	{q3},[r2]!
88.byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
89
90	veor	q3,q3,q9
91	vext.8	q9,q0,q9,#12
92	veor	q3,q3,q9
93	vext.8	q9,q0,q9,#12
94	veor	q10,q10,q1
95	veor	q3,q3,q9
96	vshl.u8	q1,q1,#1
97	veor	q3,q3,q10
98
99	vtbl.8	d20,{q3},d4
100	vtbl.8	d21,{q3},d5
101	vext.8	q9,q0,q3,#12
102	vst1.32	{q3},[r2]!
103.byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
104
105	veor	q3,q3,q9
106	vext.8	q9,q0,q9,#12
107	veor	q3,q3,q9
108	vext.8	q9,q0,q9,#12
109	veor	q10,q10,q1
110	veor	q3,q3,q9
111	veor	q3,q3,q10
112	vst1.32	{q3},[r2]
113	add	r2,r2,#0x50
114
115	mov	r12,#10
116	b	.Ldone
117
118.align	4
119.L192:
120	vld1.8	{d16},[r0]!
121	vmov.i8	q10,#8			@ borrow q10
122	vst1.32	{q3},[r2]!
123	vsub.i8	q2,q2,q10	@ adjust the mask
124
125.Loop192:
126	vtbl.8	d20,{q8},d4
127	vtbl.8	d21,{q8},d5
128	vext.8	q9,q0,q3,#12
129	vst1.32	{d16},[r2]!
130.byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
131	subs	r1,r1,#1
132
133	veor	q3,q3,q9
134	vext.8	q9,q0,q9,#12
135	veor	q3,q3,q9
136	vext.8	q9,q0,q9,#12
137	veor	q3,q3,q9
138
139	vdup.32	q9,d7[1]
140	veor	q9,q9,q8
141	veor	q10,q10,q1
142	vext.8	q8,q0,q8,#12
143	vshl.u8	q1,q1,#1
144	veor	q8,q8,q9
145	veor	q3,q3,q10
146	veor	q8,q8,q10
147	vst1.32	{q3},[r2]!
148	bne	.Loop192
149
150	mov	r12,#12
151	add	r2,r2,#0x20
152	b	.Ldone
153
154.align	4
155.L256:
156	vld1.8	{q8},[r0]
157	mov	r1,#7
158	mov	r12,#14
159	vst1.32	{q3},[r2]!
160
161.Loop256:
162	vtbl.8	d20,{q8},d4
163	vtbl.8	d21,{q8},d5
164	vext.8	q9,q0,q3,#12
165	vst1.32	{q8},[r2]!
166.byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
167	subs	r1,r1,#1
168
169	veor	q3,q3,q9
170	vext.8	q9,q0,q9,#12
171	veor	q3,q3,q9
172	vext.8	q9,q0,q9,#12
173	veor	q10,q10,q1
174	veor	q3,q3,q9
175	vshl.u8	q1,q1,#1
176	veor	q3,q3,q10
177	vst1.32	{q3},[r2]!
178	beq	.Ldone
179
180	vdup.32	q10,d7[1]
181	vext.8	q9,q0,q8,#12
182.byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
183
184	veor	q8,q8,q9
185	vext.8	q9,q0,q9,#12
186	veor	q8,q8,q9
187	vext.8	q9,q0,q9,#12
188	veor	q8,q8,q9
189
190	veor	q8,q8,q10
191	b	.Loop256
192
193.Ldone:
194	str	r12,[r2]
195	mov	r3,#0
196
197.Lenc_key_abort:
198	mov	r0,r3			@ return value
199
200	bx	lr
201.size	aes_hw_set_encrypt_key,.-aes_hw_set_encrypt_key
202
203.globl	aes_hw_set_decrypt_key
204.hidden	aes_hw_set_decrypt_key
205.type	aes_hw_set_decrypt_key,%function
206.align	5
207aes_hw_set_decrypt_key:
208	stmdb	sp!,{r4,lr}
209	bl	.Lenc_key
210
211	cmp	r0,#0
212	bne	.Ldec_key_abort
213
214	sub	r2,r2,#240		@ restore original r2
215	mov	r4,#-16
216	add	r0,r2,r12,lsl#4	@ end of key schedule
217
218	vld1.32	{q0},[r2]
219	vld1.32	{q1},[r0]
220	vst1.32	{q0},[r0],r4
221	vst1.32	{q1},[r2]!
222
223.Loop_imc:
224	vld1.32	{q0},[r2]
225	vld1.32	{q1},[r0]
226.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
227.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
228	vst1.32	{q0},[r0],r4
229	vst1.32	{q1},[r2]!
230	cmp	r0,r2
231	bhi	.Loop_imc
232
233	vld1.32	{q0},[r2]
234.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
235	vst1.32	{q0},[r0]
236
237	eor	r0,r0,r0		@ return value
238.Ldec_key_abort:
239	ldmia	sp!,{r4,pc}
240.size	aes_hw_set_decrypt_key,.-aes_hw_set_decrypt_key
241.globl	aes_hw_encrypt
242.hidden	aes_hw_encrypt
243.type	aes_hw_encrypt,%function
244.align	5
245aes_hw_encrypt:
246	ldr	r3,[r2,#240]
247	vld1.32	{q0},[r2]!
248	vld1.8	{q2},[r0]
249	sub	r3,r3,#2
250	vld1.32	{q1},[r2]!
251
252.Loop_enc:
253.byte	0x00,0x43,0xb0,0xf3	@ aese q2,q0
254.byte	0x84,0x43,0xb0,0xf3	@ aesmc q2,q2
255	vld1.32	{q0},[r2]!
256	subs	r3,r3,#2
257.byte	0x02,0x43,0xb0,0xf3	@ aese q2,q1
258.byte	0x84,0x43,0xb0,0xf3	@ aesmc q2,q2
259	vld1.32	{q1},[r2]!
260	bgt	.Loop_enc
261
262.byte	0x00,0x43,0xb0,0xf3	@ aese q2,q0
263.byte	0x84,0x43,0xb0,0xf3	@ aesmc q2,q2
264	vld1.32	{q0},[r2]
265.byte	0x02,0x43,0xb0,0xf3	@ aese q2,q1
266	veor	q2,q2,q0
267
268	vst1.8	{q2},[r1]
269	bx	lr
270.size	aes_hw_encrypt,.-aes_hw_encrypt
271.globl	aes_hw_decrypt
272.hidden	aes_hw_decrypt
273.type	aes_hw_decrypt,%function
274.align	5
275aes_hw_decrypt:
276	ldr	r3,[r2,#240]
277	vld1.32	{q0},[r2]!
278	vld1.8	{q2},[r0]
279	sub	r3,r3,#2
280	vld1.32	{q1},[r2]!
281
282.Loop_dec:
283.byte	0x40,0x43,0xb0,0xf3	@ aesd q2,q0
284.byte	0xc4,0x43,0xb0,0xf3	@ aesimc q2,q2
285	vld1.32	{q0},[r2]!
286	subs	r3,r3,#2
287.byte	0x42,0x43,0xb0,0xf3	@ aesd q2,q1
288.byte	0xc4,0x43,0xb0,0xf3	@ aesimc q2,q2
289	vld1.32	{q1},[r2]!
290	bgt	.Loop_dec
291
292.byte	0x40,0x43,0xb0,0xf3	@ aesd q2,q0
293.byte	0xc4,0x43,0xb0,0xf3	@ aesimc q2,q2
294	vld1.32	{q0},[r2]
295.byte	0x42,0x43,0xb0,0xf3	@ aesd q2,q1
296	veor	q2,q2,q0
297
298	vst1.8	{q2},[r1]
299	bx	lr
300.size	aes_hw_decrypt,.-aes_hw_decrypt
301.globl	aes_hw_cbc_encrypt
302.hidden	aes_hw_cbc_encrypt
303.type	aes_hw_cbc_encrypt,%function
304.align	5
305aes_hw_cbc_encrypt:
306	mov	ip,sp
307	stmdb	sp!,{r4,r5,r6,r7,r8,lr}
308	vstmdb	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}            @ ABI specification says so
309	ldmia	ip,{r4,r5}		@ load remaining args
310	subs	r2,r2,#16
311	mov	r8,#16
312	blo	.Lcbc_abort
313	moveq	r8,#0
314
315	cmp	r5,#0			@ en- or decrypting?
316	ldr	r5,[r3,#240]
317	and	r2,r2,#-16
318	vld1.8	{q6},[r4]
319	vld1.8	{q0},[r0],r8
320
321	vld1.32	{q8,q9},[r3]		@ load key schedule...
322	sub	r5,r5,#6
323	add	r7,r3,r5,lsl#4	@ pointer to last 7 round keys
324	sub	r5,r5,#2
325	vld1.32	{q10,q11},[r7]!
326	vld1.32	{q12,q13},[r7]!
327	vld1.32	{q14,q15},[r7]!
328	vld1.32	{q7},[r7]
329
330	add	r7,r3,#32
331	mov	r6,r5
332	beq	.Lcbc_dec
333
334	cmp	r5,#2
335	veor	q0,q0,q6
336	veor	q5,q8,q7
337	beq	.Lcbc_enc128
338
339	vld1.32	{q2,q3},[r7]
340	add	r7,r3,#16
341	add	r6,r3,#16*4
342	add	r12,r3,#16*5
343.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
344.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
345	add	r14,r3,#16*6
346	add	r3,r3,#16*7
347	b	.Lenter_cbc_enc
348
349.align	4
350.Loop_cbc_enc:
351.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
352.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
353	vst1.8	{q6},[r1]!
354.Lenter_cbc_enc:
355.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
356.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
357.byte	0x04,0x03,0xb0,0xf3	@ aese q0,q2
358.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
359	vld1.32	{q8},[r6]
360	cmp	r5,#4
361.byte	0x06,0x03,0xb0,0xf3	@ aese q0,q3
362.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
363	vld1.32	{q9},[r12]
364	beq	.Lcbc_enc192
365
366.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
367.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
368	vld1.32	{q8},[r14]
369.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
370.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
371	vld1.32	{q9},[r3]
372	nop
373
374.Lcbc_enc192:
375.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
376.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
377	subs	r2,r2,#16
378.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
379.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
380	moveq	r8,#0
381.byte	0x24,0x03,0xb0,0xf3	@ aese q0,q10
382.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
383.byte	0x26,0x03,0xb0,0xf3	@ aese q0,q11
384.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
385	vld1.8	{q8},[r0],r8
386.byte	0x28,0x03,0xb0,0xf3	@ aese q0,q12
387.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
388	veor	q8,q8,q5
389.byte	0x2a,0x03,0xb0,0xf3	@ aese q0,q13
390.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
391	vld1.32	{q9},[r7]		@ re-pre-load rndkey[1]
392.byte	0x2c,0x03,0xb0,0xf3	@ aese q0,q14
393.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
394.byte	0x2e,0x03,0xb0,0xf3	@ aese q0,q15
395	veor	q6,q0,q7
396	bhs	.Loop_cbc_enc
397
398	vst1.8	{q6},[r1]!
399	b	.Lcbc_done
400
401.align	5
402.Lcbc_enc128:
403	vld1.32	{q2,q3},[r7]
404.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
405.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
406	b	.Lenter_cbc_enc128
407.Loop_cbc_enc128:
408.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
409.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
410	vst1.8	{q6},[r1]!
411.Lenter_cbc_enc128:
412.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
413.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
414	subs	r2,r2,#16
415.byte	0x04,0x03,0xb0,0xf3	@ aese q0,q2
416.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
417	moveq	r8,#0
418.byte	0x06,0x03,0xb0,0xf3	@ aese q0,q3
419.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
420.byte	0x24,0x03,0xb0,0xf3	@ aese q0,q10
421.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
422.byte	0x26,0x03,0xb0,0xf3	@ aese q0,q11
423.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
424	vld1.8	{q8},[r0],r8
425.byte	0x28,0x03,0xb0,0xf3	@ aese q0,q12
426.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
427.byte	0x2a,0x03,0xb0,0xf3	@ aese q0,q13
428.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
429.byte	0x2c,0x03,0xb0,0xf3	@ aese q0,q14
430.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
431	veor	q8,q8,q5
432.byte	0x2e,0x03,0xb0,0xf3	@ aese q0,q15
433	veor	q6,q0,q7
434	bhs	.Loop_cbc_enc128
435
436	vst1.8	{q6},[r1]!
437	b	.Lcbc_done
438.align	5
439.Lcbc_dec:
440	vld1.8	{q10},[r0]!
441	subs	r2,r2,#32		@ bias
442	add	r6,r5,#2
443	vorr	q3,q0,q0
444	vorr	q1,q0,q0
445	vorr	q11,q10,q10
446	blo	.Lcbc_dec_tail
447
448	vorr	q1,q10,q10
449	vld1.8	{q10},[r0]!
450	vorr	q2,q0,q0
451	vorr	q3,q1,q1
452	vorr	q11,q10,q10
453
454.Loop3x_cbc_dec:
455.byte	0x60,0x03,0xb0,0xf3	@ aesd q0,q8
456.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
457.byte	0x60,0x23,0xb0,0xf3	@ aesd q1,q8
458.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
459.byte	0x60,0x43,0xf0,0xf3	@ aesd q10,q8
460.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
461	vld1.32	{q8},[r7]!
462	subs	r6,r6,#2
463.byte	0x62,0x03,0xb0,0xf3	@ aesd q0,q9
464.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
465.byte	0x62,0x23,0xb0,0xf3	@ aesd q1,q9
466.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
467.byte	0x62,0x43,0xf0,0xf3	@ aesd q10,q9
468.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
469	vld1.32	{q9},[r7]!
470	bgt	.Loop3x_cbc_dec
471
472.byte	0x60,0x03,0xb0,0xf3	@ aesd q0,q8
473.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
474.byte	0x60,0x23,0xb0,0xf3	@ aesd q1,q8
475.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
476.byte	0x60,0x43,0xf0,0xf3	@ aesd q10,q8
477.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
478	veor	q4,q6,q7
479	subs	r2,r2,#0x30
480	veor	q5,q2,q7
481	movlo	r6,r2			@ r6, r6, is zero at this point
482.byte	0x62,0x03,0xb0,0xf3	@ aesd q0,q9
483.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
484.byte	0x62,0x23,0xb0,0xf3	@ aesd q1,q9
485.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
486.byte	0x62,0x43,0xf0,0xf3	@ aesd q10,q9
487.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
488	veor	q9,q3,q7
489	add	r0,r0,r6		@ r0 is adjusted in such way that
490					@ at exit from the loop q1-q10
491					@ are loaded with last "words"
492	vorr	q6,q11,q11
493	mov	r7,r3
494.byte	0x68,0x03,0xb0,0xf3	@ aesd q0,q12
495.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
496.byte	0x68,0x23,0xb0,0xf3	@ aesd q1,q12
497.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
498.byte	0x68,0x43,0xf0,0xf3	@ aesd q10,q12
499.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
500	vld1.8	{q2},[r0]!
501.byte	0x6a,0x03,0xb0,0xf3	@ aesd q0,q13
502.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
503.byte	0x6a,0x23,0xb0,0xf3	@ aesd q1,q13
504.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
505.byte	0x6a,0x43,0xf0,0xf3	@ aesd q10,q13
506.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
507	vld1.8	{q3},[r0]!
508.byte	0x6c,0x03,0xb0,0xf3	@ aesd q0,q14
509.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
510.byte	0x6c,0x23,0xb0,0xf3	@ aesd q1,q14
511.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
512.byte	0x6c,0x43,0xf0,0xf3	@ aesd q10,q14
513.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
514	vld1.8	{q11},[r0]!
515.byte	0x6e,0x03,0xb0,0xf3	@ aesd q0,q15
516.byte	0x6e,0x23,0xb0,0xf3	@ aesd q1,q15
517.byte	0x6e,0x43,0xf0,0xf3	@ aesd q10,q15
518	vld1.32	{q8},[r7]!	@ re-pre-load rndkey[0]
519	add	r6,r5,#2
520	veor	q4,q4,q0
521	veor	q5,q5,q1
522	veor	q10,q10,q9
523	vld1.32	{q9},[r7]!	@ re-pre-load rndkey[1]
524	vst1.8	{q4},[r1]!
525	vorr	q0,q2,q2
526	vst1.8	{q5},[r1]!
527	vorr	q1,q3,q3
528	vst1.8	{q10},[r1]!
529	vorr	q10,q11,q11
530	bhs	.Loop3x_cbc_dec
531
532	cmn	r2,#0x30
533	beq	.Lcbc_done
534	nop
535
536.Lcbc_dec_tail:
537.byte	0x60,0x23,0xb0,0xf3	@ aesd q1,q8
538.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
539.byte	0x60,0x43,0xf0,0xf3	@ aesd q10,q8
540.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
541	vld1.32	{q8},[r7]!
542	subs	r6,r6,#2
543.byte	0x62,0x23,0xb0,0xf3	@ aesd q1,q9
544.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
545.byte	0x62,0x43,0xf0,0xf3	@ aesd q10,q9
546.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
547	vld1.32	{q9},[r7]!
548	bgt	.Lcbc_dec_tail
549
550.byte	0x60,0x23,0xb0,0xf3	@ aesd q1,q8
551.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
552.byte	0x60,0x43,0xf0,0xf3	@ aesd q10,q8
553.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
554.byte	0x62,0x23,0xb0,0xf3	@ aesd q1,q9
555.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
556.byte	0x62,0x43,0xf0,0xf3	@ aesd q10,q9
557.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
558.byte	0x68,0x23,0xb0,0xf3	@ aesd q1,q12
559.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
560.byte	0x68,0x43,0xf0,0xf3	@ aesd q10,q12
561.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
562	cmn	r2,#0x20
563.byte	0x6a,0x23,0xb0,0xf3	@ aesd q1,q13
564.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
565.byte	0x6a,0x43,0xf0,0xf3	@ aesd q10,q13
566.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
567	veor	q5,q6,q7
568.byte	0x6c,0x23,0xb0,0xf3	@ aesd q1,q14
569.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
570.byte	0x6c,0x43,0xf0,0xf3	@ aesd q10,q14
571.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
572	veor	q9,q3,q7
573.byte	0x6e,0x23,0xb0,0xf3	@ aesd q1,q15
574.byte	0x6e,0x43,0xf0,0xf3	@ aesd q10,q15
575	beq	.Lcbc_dec_one
576	veor	q5,q5,q1
577	veor	q9,q9,q10
578	vorr	q6,q11,q11
579	vst1.8	{q5},[r1]!
580	vst1.8	{q9},[r1]!
581	b	.Lcbc_done
582
583.Lcbc_dec_one:
584	veor	q5,q5,q10
585	vorr	q6,q11,q11
586	vst1.8	{q5},[r1]!
587
588.Lcbc_done:
589	vst1.8	{q6},[r4]
590.Lcbc_abort:
591	vldmia	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}
592	ldmia	sp!,{r4,r5,r6,r7,r8,pc}
593.size	aes_hw_cbc_encrypt,.-aes_hw_cbc_encrypt
594.globl	aes_hw_ctr32_encrypt_blocks
595.hidden	aes_hw_ctr32_encrypt_blocks
596.type	aes_hw_ctr32_encrypt_blocks,%function
597.align	5
598aes_hw_ctr32_encrypt_blocks:
599	mov	ip,sp
600	stmdb	sp!,{r4,r5,r6,r7,r8,r9,r10,lr}
601	vstmdb	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}            @ ABI specification says so
602	ldr	r4, [ip]		@ load remaining arg
603	ldr	r5,[r3,#240]
604
605	ldr	r8, [r4, #12]
606	vld1.32	{q0},[r4]
607
608	vld1.32	{q8,q9},[r3]		@ load key schedule...
609	sub	r5,r5,#4
610	mov	r12,#16
611	cmp	r2,#2
612	add	r7,r3,r5,lsl#4	@ pointer to last 5 round keys
613	sub	r5,r5,#2
614	vld1.32	{q12,q13},[r7]!
615	vld1.32	{q14,q15},[r7]!
616	vld1.32	{q7},[r7]
617	add	r7,r3,#32
618	mov	r6,r5
619	movlo	r12,#0
620#ifndef __ARMEB__
621	rev	r8, r8
622#endif
623	vorr	q1,q0,q0
624	add	r10, r8, #1
625	vorr	q10,q0,q0
626	add	r8, r8, #2
627	vorr	q6,q0,q0
628	rev	r10, r10
629	vmov.32	d3[1],r10
630	bls	.Lctr32_tail
631	rev	r12, r8
632	sub	r2,r2,#3		@ bias
633	vmov.32	d21[1],r12
634	b	.Loop3x_ctr32
635
636.align	4
637.Loop3x_ctr32:
638.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
639.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
640.byte	0x20,0x23,0xb0,0xf3	@ aese q1,q8
641.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
642.byte	0x20,0x43,0xf0,0xf3	@ aese q10,q8
643.byte	0xa4,0x43,0xf0,0xf3	@ aesmc q10,q10
644	vld1.32	{q8},[r7]!
645	subs	r6,r6,#2
646.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
647.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
648.byte	0x22,0x23,0xb0,0xf3	@ aese q1,q9
649.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
650.byte	0x22,0x43,0xf0,0xf3	@ aese q10,q9
651.byte	0xa4,0x43,0xf0,0xf3	@ aesmc q10,q10
652	vld1.32	{q9},[r7]!
653	bgt	.Loop3x_ctr32
654
655.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
656.byte	0x80,0x83,0xb0,0xf3	@ aesmc q4,q0
657.byte	0x20,0x23,0xb0,0xf3	@ aese q1,q8
658.byte	0x82,0xa3,0xb0,0xf3	@ aesmc q5,q1
659	vld1.8	{q2},[r0]!
660	vorr	q0,q6,q6
661.byte	0x20,0x43,0xf0,0xf3	@ aese q10,q8
662.byte	0xa4,0x43,0xf0,0xf3	@ aesmc q10,q10
663	vld1.8	{q3},[r0]!
664	vorr	q1,q6,q6
665.byte	0x22,0x83,0xb0,0xf3	@ aese q4,q9
666.byte	0x88,0x83,0xb0,0xf3	@ aesmc q4,q4
667.byte	0x22,0xa3,0xb0,0xf3	@ aese q5,q9
668.byte	0x8a,0xa3,0xb0,0xf3	@ aesmc q5,q5
669	vld1.8	{q11},[r0]!
670	mov	r7,r3
671.byte	0x22,0x43,0xf0,0xf3	@ aese q10,q9
672.byte	0xa4,0x23,0xf0,0xf3	@ aesmc q9,q10
673	vorr	q10,q6,q6
674	add	r9,r8,#1
675.byte	0x28,0x83,0xb0,0xf3	@ aese q4,q12
676.byte	0x88,0x83,0xb0,0xf3	@ aesmc q4,q4
677.byte	0x28,0xa3,0xb0,0xf3	@ aese q5,q12
678.byte	0x8a,0xa3,0xb0,0xf3	@ aesmc q5,q5
679	veor	q2,q2,q7
680	add	r10,r8,#2
681.byte	0x28,0x23,0xf0,0xf3	@ aese q9,q12
682.byte	0xa2,0x23,0xf0,0xf3	@ aesmc q9,q9
683	veor	q3,q3,q7
684	add	r8,r8,#3
685.byte	0x2a,0x83,0xb0,0xf3	@ aese q4,q13
686.byte	0x88,0x83,0xb0,0xf3	@ aesmc q4,q4
687.byte	0x2a,0xa3,0xb0,0xf3	@ aese q5,q13
688.byte	0x8a,0xa3,0xb0,0xf3	@ aesmc q5,q5
689	veor	q11,q11,q7
690	rev	r9,r9
691.byte	0x2a,0x23,0xf0,0xf3	@ aese q9,q13
692.byte	0xa2,0x23,0xf0,0xf3	@ aesmc q9,q9
693	vmov.32	d1[1], r9
694	rev	r10,r10
695.byte	0x2c,0x83,0xb0,0xf3	@ aese q4,q14
696.byte	0x88,0x83,0xb0,0xf3	@ aesmc q4,q4
697.byte	0x2c,0xa3,0xb0,0xf3	@ aese q5,q14
698.byte	0x8a,0xa3,0xb0,0xf3	@ aesmc q5,q5
699	vmov.32	d3[1], r10
700	rev	r12,r8
701.byte	0x2c,0x23,0xf0,0xf3	@ aese q9,q14
702.byte	0xa2,0x23,0xf0,0xf3	@ aesmc q9,q9
703	vmov.32	d21[1], r12
704	subs	r2,r2,#3
705.byte	0x2e,0x83,0xb0,0xf3	@ aese q4,q15
706.byte	0x2e,0xa3,0xb0,0xf3	@ aese q5,q15
707.byte	0x2e,0x23,0xf0,0xf3	@ aese q9,q15
708
709	veor	q2,q2,q4
710	vld1.32	{q8},[r7]!	@ re-pre-load rndkey[0]
711	vst1.8	{q2},[r1]!
712	veor	q3,q3,q5
713	mov	r6,r5
714	vst1.8	{q3},[r1]!
715	veor	q11,q11,q9
716	vld1.32	{q9},[r7]!	@ re-pre-load rndkey[1]
717	vst1.8	{q11},[r1]!
718	bhs	.Loop3x_ctr32
719
720	adds	r2,r2,#3
721	beq	.Lctr32_done
722	cmp	r2,#1
723	mov	r12,#16
724	moveq	r12,#0
725
726.Lctr32_tail:
727.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
728.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
729.byte	0x20,0x23,0xb0,0xf3	@ aese q1,q8
730.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
731	vld1.32	{q8},[r7]!
732	subs	r6,r6,#2
733.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
734.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
735.byte	0x22,0x23,0xb0,0xf3	@ aese q1,q9
736.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
737	vld1.32	{q9},[r7]!
738	bgt	.Lctr32_tail
739
740.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
741.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
742.byte	0x20,0x23,0xb0,0xf3	@ aese q1,q8
743.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
744.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
745.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
746.byte	0x22,0x23,0xb0,0xf3	@ aese q1,q9
747.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
748	vld1.8	{q2},[r0],r12
749.byte	0x28,0x03,0xb0,0xf3	@ aese q0,q12
750.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
751.byte	0x28,0x23,0xb0,0xf3	@ aese q1,q12
752.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
753	vld1.8	{q3},[r0]
754.byte	0x2a,0x03,0xb0,0xf3	@ aese q0,q13
755.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
756.byte	0x2a,0x23,0xb0,0xf3	@ aese q1,q13
757.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
758	veor	q2,q2,q7
759.byte	0x2c,0x03,0xb0,0xf3	@ aese q0,q14
760.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
761.byte	0x2c,0x23,0xb0,0xf3	@ aese q1,q14
762.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
763	veor	q3,q3,q7
764.byte	0x2e,0x03,0xb0,0xf3	@ aese q0,q15
765.byte	0x2e,0x23,0xb0,0xf3	@ aese q1,q15
766
767	cmp	r2,#1
768	veor	q2,q2,q0
769	veor	q3,q3,q1
770	vst1.8	{q2},[r1]!
771	beq	.Lctr32_done
772	vst1.8	{q3},[r1]
773
774.Lctr32_done:
775	vldmia	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}
776	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,pc}
777.size	aes_hw_ctr32_encrypt_blocks,.-aes_hw_ctr32_encrypt_blocks
778#endif
779#endif
780#endif  // !OPENSSL_NO_ASM
781.section	.note.GNU-stack,"",%progbits
782