• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1// This file is generated from a similarly-named Perl script in the BoringSSL
2// source tree. Do not edit by hand.
3
4#include <openssl/asm_base.h>
5
6#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_ARM) && defined(__APPLE__)
7#include <openssl/arm_arch.h>
8
9#if __ARM_MAX_ARCH__>=7
10.text
11
12
13.code	32
14#undef	__thumb2__
15.align	5
16Lrcon:
17.long	0x01,0x01,0x01,0x01
18.long	0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d	@ rotate-n-splat
19.long	0x1b,0x1b,0x1b,0x1b
20
21.text
22
23.globl	_aes_hw_set_encrypt_key
24.private_extern	_aes_hw_set_encrypt_key
25#ifdef __thumb2__
26.thumb_func	_aes_hw_set_encrypt_key
27#endif
28.align	5
29_aes_hw_set_encrypt_key:
30Lenc_key:
31	mov	r3,#-1
32	cmp	r0,#0
33	beq	Lenc_key_abort
34	cmp	r2,#0
35	beq	Lenc_key_abort
36	mov	r3,#-2
37	cmp	r1,#128
38	blt	Lenc_key_abort
39	cmp	r1,#256
40	bgt	Lenc_key_abort
41	tst	r1,#0x3f
42	bne	Lenc_key_abort
43
44	adr	r3,Lrcon
45	cmp	r1,#192
46
47	veor	q0,q0,q0
48	vld1.8	{q3},[r0]!
49	mov	r1,#8		@ reuse r1
50	vld1.32	{q1,q2},[r3]!
51
52	blt	Loop128
53	beq	L192
54	b	L256
55
56.align	4
57Loop128:
58	vtbl.8	d20,{q3},d4
59	vtbl.8	d21,{q3},d5
60	vext.8	q9,q0,q3,#12
61	vst1.32	{q3},[r2]!
62.byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
63	subs	r1,r1,#1
64
65	veor	q3,q3,q9
66	vext.8	q9,q0,q9,#12
67	veor	q3,q3,q9
68	vext.8	q9,q0,q9,#12
69	veor	q10,q10,q1
70	veor	q3,q3,q9
71	vshl.u8	q1,q1,#1
72	veor	q3,q3,q10
73	bne	Loop128
74
75	vld1.32	{q1},[r3]
76
77	vtbl.8	d20,{q3},d4
78	vtbl.8	d21,{q3},d5
79	vext.8	q9,q0,q3,#12
80	vst1.32	{q3},[r2]!
81.byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
82
83	veor	q3,q3,q9
84	vext.8	q9,q0,q9,#12
85	veor	q3,q3,q9
86	vext.8	q9,q0,q9,#12
87	veor	q10,q10,q1
88	veor	q3,q3,q9
89	vshl.u8	q1,q1,#1
90	veor	q3,q3,q10
91
92	vtbl.8	d20,{q3},d4
93	vtbl.8	d21,{q3},d5
94	vext.8	q9,q0,q3,#12
95	vst1.32	{q3},[r2]!
96.byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
97
98	veor	q3,q3,q9
99	vext.8	q9,q0,q9,#12
100	veor	q3,q3,q9
101	vext.8	q9,q0,q9,#12
102	veor	q10,q10,q1
103	veor	q3,q3,q9
104	veor	q3,q3,q10
105	vst1.32	{q3},[r2]
106	add	r2,r2,#0x50
107
108	mov	r12,#10
109	b	Ldone
110
111.align	4
112L192:
113	vld1.8	{d16},[r0]!
114	vmov.i8	q10,#8			@ borrow q10
115	vst1.32	{q3},[r2]!
116	vsub.i8	q2,q2,q10	@ adjust the mask
117
118Loop192:
119	vtbl.8	d20,{q8},d4
120	vtbl.8	d21,{q8},d5
121	vext.8	q9,q0,q3,#12
122	vst1.32	{d16},[r2]!
123.byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
124	subs	r1,r1,#1
125
126	veor	q3,q3,q9
127	vext.8	q9,q0,q9,#12
128	veor	q3,q3,q9
129	vext.8	q9,q0,q9,#12
130	veor	q3,q3,q9
131
132	vdup.32	q9,d7[1]
133	veor	q9,q9,q8
134	veor	q10,q10,q1
135	vext.8	q8,q0,q8,#12
136	vshl.u8	q1,q1,#1
137	veor	q8,q8,q9
138	veor	q3,q3,q10
139	veor	q8,q8,q10
140	vst1.32	{q3},[r2]!
141	bne	Loop192
142
143	mov	r12,#12
144	add	r2,r2,#0x20
145	b	Ldone
146
147.align	4
148L256:
149	vld1.8	{q8},[r0]
150	mov	r1,#7
151	mov	r12,#14
152	vst1.32	{q3},[r2]!
153
154Loop256:
155	vtbl.8	d20,{q8},d4
156	vtbl.8	d21,{q8},d5
157	vext.8	q9,q0,q3,#12
158	vst1.32	{q8},[r2]!
159.byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
160	subs	r1,r1,#1
161
162	veor	q3,q3,q9
163	vext.8	q9,q0,q9,#12
164	veor	q3,q3,q9
165	vext.8	q9,q0,q9,#12
166	veor	q10,q10,q1
167	veor	q3,q3,q9
168	vshl.u8	q1,q1,#1
169	veor	q3,q3,q10
170	vst1.32	{q3},[r2]!
171	beq	Ldone
172
173	vdup.32	q10,d7[1]
174	vext.8	q9,q0,q8,#12
175.byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
176
177	veor	q8,q8,q9
178	vext.8	q9,q0,q9,#12
179	veor	q8,q8,q9
180	vext.8	q9,q0,q9,#12
181	veor	q8,q8,q9
182
183	veor	q8,q8,q10
184	b	Loop256
185
186Ldone:
187	str	r12,[r2]
188	mov	r3,#0
189
190Lenc_key_abort:
191	mov	r0,r3			@ return value
192
193	bx	lr
194
195
196.globl	_aes_hw_set_decrypt_key
197.private_extern	_aes_hw_set_decrypt_key
198#ifdef __thumb2__
199.thumb_func	_aes_hw_set_decrypt_key
200#endif
201.align	5
202_aes_hw_set_decrypt_key:
203	stmdb	sp!,{r4,lr}
204	bl	Lenc_key
205
206	cmp	r0,#0
207	bne	Ldec_key_abort
208
209	sub	r2,r2,#240		@ restore original r2
210	mov	r4,#-16
211	add	r0,r2,r12,lsl#4	@ end of key schedule
212
213	vld1.32	{q0},[r2]
214	vld1.32	{q1},[r0]
215	vst1.32	{q0},[r0],r4
216	vst1.32	{q1},[r2]!
217
218Loop_imc:
219	vld1.32	{q0},[r2]
220	vld1.32	{q1},[r0]
221.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
222.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
223	vst1.32	{q0},[r0],r4
224	vst1.32	{q1},[r2]!
225	cmp	r0,r2
226	bhi	Loop_imc
227
228	vld1.32	{q0},[r2]
229.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
230	vst1.32	{q0},[r0]
231
232	eor	r0,r0,r0		@ return value
233Ldec_key_abort:
234	ldmia	sp!,{r4,pc}
235
236.globl	_aes_hw_encrypt
237.private_extern	_aes_hw_encrypt
238#ifdef __thumb2__
239.thumb_func	_aes_hw_encrypt
240#endif
241.align	5
242_aes_hw_encrypt:
243	AARCH64_VALID_CALL_TARGET
244	ldr	r3,[r2,#240]
245	vld1.32	{q0},[r2]!
246	vld1.8	{q2},[r0]
247	sub	r3,r3,#2
248	vld1.32	{q1},[r2]!
249
250Loop_enc:
251.byte	0x00,0x43,0xb0,0xf3	@ aese q2,q0
252.byte	0x84,0x43,0xb0,0xf3	@ aesmc q2,q2
253	vld1.32	{q0},[r2]!
254	subs	r3,r3,#2
255.byte	0x02,0x43,0xb0,0xf3	@ aese q2,q1
256.byte	0x84,0x43,0xb0,0xf3	@ aesmc q2,q2
257	vld1.32	{q1},[r2]!
258	bgt	Loop_enc
259
260.byte	0x00,0x43,0xb0,0xf3	@ aese q2,q0
261.byte	0x84,0x43,0xb0,0xf3	@ aesmc q2,q2
262	vld1.32	{q0},[r2]
263.byte	0x02,0x43,0xb0,0xf3	@ aese q2,q1
264	veor	q2,q2,q0
265
266	vst1.8	{q2},[r1]
267	bx	lr
268
269.globl	_aes_hw_decrypt
270.private_extern	_aes_hw_decrypt
271#ifdef __thumb2__
272.thumb_func	_aes_hw_decrypt
273#endif
274.align	5
275_aes_hw_decrypt:
276	AARCH64_VALID_CALL_TARGET
277	ldr	r3,[r2,#240]
278	vld1.32	{q0},[r2]!
279	vld1.8	{q2},[r0]
280	sub	r3,r3,#2
281	vld1.32	{q1},[r2]!
282
283Loop_dec:
284.byte	0x40,0x43,0xb0,0xf3	@ aesd q2,q0
285.byte	0xc4,0x43,0xb0,0xf3	@ aesimc q2,q2
286	vld1.32	{q0},[r2]!
287	subs	r3,r3,#2
288.byte	0x42,0x43,0xb0,0xf3	@ aesd q2,q1
289.byte	0xc4,0x43,0xb0,0xf3	@ aesimc q2,q2
290	vld1.32	{q1},[r2]!
291	bgt	Loop_dec
292
293.byte	0x40,0x43,0xb0,0xf3	@ aesd q2,q0
294.byte	0xc4,0x43,0xb0,0xf3	@ aesimc q2,q2
295	vld1.32	{q0},[r2]
296.byte	0x42,0x43,0xb0,0xf3	@ aesd q2,q1
297	veor	q2,q2,q0
298
299	vst1.8	{q2},[r1]
300	bx	lr
301
302.globl	_aes_hw_cbc_encrypt
303.private_extern	_aes_hw_cbc_encrypt
304#ifdef __thumb2__
305.thumb_func	_aes_hw_cbc_encrypt
306#endif
307.align	5
308_aes_hw_cbc_encrypt:
309	mov	ip,sp
310	stmdb	sp!,{r4,r5,r6,r7,r8,lr}
311	vstmdb	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}            @ ABI specification says so
312	ldmia	ip,{r4,r5}		@ load remaining args
313	subs	r2,r2,#16
314	mov	r8,#16
315	blo	Lcbc_abort
316	moveq	r8,#0
317
318	cmp	r5,#0			@ en- or decrypting?
319	ldr	r5,[r3,#240]
320	and	r2,r2,#-16
321	vld1.8	{q6},[r4]
322	vld1.8	{q0},[r0],r8
323
324	vld1.32	{q8,q9},[r3]		@ load key schedule...
325	sub	r5,r5,#6
326	add	r7,r3,r5,lsl#4	@ pointer to last 7 round keys
327	sub	r5,r5,#2
328	vld1.32	{q10,q11},[r7]!
329	vld1.32	{q12,q13},[r7]!
330	vld1.32	{q14,q15},[r7]!
331	vld1.32	{q7},[r7]
332
333	add	r7,r3,#32
334	mov	r6,r5
335	beq	Lcbc_dec
336
337	cmp	r5,#2
338	veor	q0,q0,q6
339	veor	q5,q8,q7
340	beq	Lcbc_enc128
341
342	vld1.32	{q2,q3},[r7]
343	add	r7,r3,#16
344	add	r6,r3,#16*4
345	add	r12,r3,#16*5
346.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
347.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
348	add	r14,r3,#16*6
349	add	r3,r3,#16*7
350	b	Lenter_cbc_enc
351
352.align	4
353Loop_cbc_enc:
354.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
355.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
356	vst1.8	{q6},[r1]!
357Lenter_cbc_enc:
358.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
359.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
360.byte	0x04,0x03,0xb0,0xf3	@ aese q0,q2
361.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
362	vld1.32	{q8},[r6]
363	cmp	r5,#4
364.byte	0x06,0x03,0xb0,0xf3	@ aese q0,q3
365.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
366	vld1.32	{q9},[r12]
367	beq	Lcbc_enc192
368
369.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
370.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
371	vld1.32	{q8},[r14]
372.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
373.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
374	vld1.32	{q9},[r3]
375	nop
376
377Lcbc_enc192:
378.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
379.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
380	subs	r2,r2,#16
381.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
382.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
383	moveq	r8,#0
384.byte	0x24,0x03,0xb0,0xf3	@ aese q0,q10
385.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
386.byte	0x26,0x03,0xb0,0xf3	@ aese q0,q11
387.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
388	vld1.8	{q8},[r0],r8
389.byte	0x28,0x03,0xb0,0xf3	@ aese q0,q12
390.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
391	veor	q8,q8,q5
392.byte	0x2a,0x03,0xb0,0xf3	@ aese q0,q13
393.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
394	vld1.32	{q9},[r7]		@ re-pre-load rndkey[1]
395.byte	0x2c,0x03,0xb0,0xf3	@ aese q0,q14
396.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
397.byte	0x2e,0x03,0xb0,0xf3	@ aese q0,q15
398	veor	q6,q0,q7
399	bhs	Loop_cbc_enc
400
401	vst1.8	{q6},[r1]!
402	b	Lcbc_done
403
404.align	5
405Lcbc_enc128:
406	vld1.32	{q2,q3},[r7]
407.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
408.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
409	b	Lenter_cbc_enc128
410Loop_cbc_enc128:
411.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
412.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
413	vst1.8	{q6},[r1]!
414Lenter_cbc_enc128:
415.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
416.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
417	subs	r2,r2,#16
418.byte	0x04,0x03,0xb0,0xf3	@ aese q0,q2
419.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
420	moveq	r8,#0
421.byte	0x06,0x03,0xb0,0xf3	@ aese q0,q3
422.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
423.byte	0x24,0x03,0xb0,0xf3	@ aese q0,q10
424.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
425.byte	0x26,0x03,0xb0,0xf3	@ aese q0,q11
426.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
427	vld1.8	{q8},[r0],r8
428.byte	0x28,0x03,0xb0,0xf3	@ aese q0,q12
429.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
430.byte	0x2a,0x03,0xb0,0xf3	@ aese q0,q13
431.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
432.byte	0x2c,0x03,0xb0,0xf3	@ aese q0,q14
433.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
434	veor	q8,q8,q5
435.byte	0x2e,0x03,0xb0,0xf3	@ aese q0,q15
436	veor	q6,q0,q7
437	bhs	Loop_cbc_enc128
438
439	vst1.8	{q6},[r1]!
440	b	Lcbc_done
441.align	5
442Lcbc_dec:
443	vld1.8	{q10},[r0]!
444	subs	r2,r2,#32		@ bias
445	add	r6,r5,#2
446	vorr	q3,q0,q0
447	vorr	q1,q0,q0
448	vorr	q11,q10,q10
449	blo	Lcbc_dec_tail
450
451	vorr	q1,q10,q10
452	vld1.8	{q10},[r0]!
453	vorr	q2,q0,q0
454	vorr	q3,q1,q1
455	vorr	q11,q10,q10
456
457Loop3x_cbc_dec:
458.byte	0x60,0x03,0xb0,0xf3	@ aesd q0,q8
459.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
460.byte	0x60,0x23,0xb0,0xf3	@ aesd q1,q8
461.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
462.byte	0x60,0x43,0xf0,0xf3	@ aesd q10,q8
463.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
464	vld1.32	{q8},[r7]!
465	subs	r6,r6,#2
466.byte	0x62,0x03,0xb0,0xf3	@ aesd q0,q9
467.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
468.byte	0x62,0x23,0xb0,0xf3	@ aesd q1,q9
469.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
470.byte	0x62,0x43,0xf0,0xf3	@ aesd q10,q9
471.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
472	vld1.32	{q9},[r7]!
473	bgt	Loop3x_cbc_dec
474
475.byte	0x60,0x03,0xb0,0xf3	@ aesd q0,q8
476.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
477.byte	0x60,0x23,0xb0,0xf3	@ aesd q1,q8
478.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
479.byte	0x60,0x43,0xf0,0xf3	@ aesd q10,q8
480.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
481	veor	q4,q6,q7
482	subs	r2,r2,#0x30
483	veor	q5,q2,q7
484	movlo	r6,r2			@ r6, r6, is zero at this point
485.byte	0x62,0x03,0xb0,0xf3	@ aesd q0,q9
486.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
487.byte	0x62,0x23,0xb0,0xf3	@ aesd q1,q9
488.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
489.byte	0x62,0x43,0xf0,0xf3	@ aesd q10,q9
490.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
491	veor	q9,q3,q7
492	add	r0,r0,r6		@ r0 is adjusted in such way that
493					@ at exit from the loop q1-q10
494					@ are loaded with last "words"
495	vorr	q6,q11,q11
496	mov	r7,r3
497.byte	0x68,0x03,0xb0,0xf3	@ aesd q0,q12
498.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
499.byte	0x68,0x23,0xb0,0xf3	@ aesd q1,q12
500.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
501.byte	0x68,0x43,0xf0,0xf3	@ aesd q10,q12
502.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
503	vld1.8	{q2},[r0]!
504.byte	0x6a,0x03,0xb0,0xf3	@ aesd q0,q13
505.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
506.byte	0x6a,0x23,0xb0,0xf3	@ aesd q1,q13
507.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
508.byte	0x6a,0x43,0xf0,0xf3	@ aesd q10,q13
509.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
510	vld1.8	{q3},[r0]!
511.byte	0x6c,0x03,0xb0,0xf3	@ aesd q0,q14
512.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
513.byte	0x6c,0x23,0xb0,0xf3	@ aesd q1,q14
514.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
515.byte	0x6c,0x43,0xf0,0xf3	@ aesd q10,q14
516.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
517	vld1.8	{q11},[r0]!
518.byte	0x6e,0x03,0xb0,0xf3	@ aesd q0,q15
519.byte	0x6e,0x23,0xb0,0xf3	@ aesd q1,q15
520.byte	0x6e,0x43,0xf0,0xf3	@ aesd q10,q15
521	vld1.32	{q8},[r7]!	@ re-pre-load rndkey[0]
522	add	r6,r5,#2
523	veor	q4,q4,q0
524	veor	q5,q5,q1
525	veor	q10,q10,q9
526	vld1.32	{q9},[r7]!	@ re-pre-load rndkey[1]
527	vst1.8	{q4},[r1]!
528	vorr	q0,q2,q2
529	vst1.8	{q5},[r1]!
530	vorr	q1,q3,q3
531	vst1.8	{q10},[r1]!
532	vorr	q10,q11,q11
533	bhs	Loop3x_cbc_dec
534
535	cmn	r2,#0x30
536	beq	Lcbc_done
537	nop
538
539Lcbc_dec_tail:
540.byte	0x60,0x23,0xb0,0xf3	@ aesd q1,q8
541.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
542.byte	0x60,0x43,0xf0,0xf3	@ aesd q10,q8
543.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
544	vld1.32	{q8},[r7]!
545	subs	r6,r6,#2
546.byte	0x62,0x23,0xb0,0xf3	@ aesd q1,q9
547.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
548.byte	0x62,0x43,0xf0,0xf3	@ aesd q10,q9
549.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
550	vld1.32	{q9},[r7]!
551	bgt	Lcbc_dec_tail
552
553.byte	0x60,0x23,0xb0,0xf3	@ aesd q1,q8
554.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
555.byte	0x60,0x43,0xf0,0xf3	@ aesd q10,q8
556.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
557.byte	0x62,0x23,0xb0,0xf3	@ aesd q1,q9
558.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
559.byte	0x62,0x43,0xf0,0xf3	@ aesd q10,q9
560.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
561.byte	0x68,0x23,0xb0,0xf3	@ aesd q1,q12
562.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
563.byte	0x68,0x43,0xf0,0xf3	@ aesd q10,q12
564.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
565	cmn	r2,#0x20
566.byte	0x6a,0x23,0xb0,0xf3	@ aesd q1,q13
567.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
568.byte	0x6a,0x43,0xf0,0xf3	@ aesd q10,q13
569.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
570	veor	q5,q6,q7
571.byte	0x6c,0x23,0xb0,0xf3	@ aesd q1,q14
572.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
573.byte	0x6c,0x43,0xf0,0xf3	@ aesd q10,q14
574.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
575	veor	q9,q3,q7
576.byte	0x6e,0x23,0xb0,0xf3	@ aesd q1,q15
577.byte	0x6e,0x43,0xf0,0xf3	@ aesd q10,q15
578	beq	Lcbc_dec_one
579	veor	q5,q5,q1
580	veor	q9,q9,q10
581	vorr	q6,q11,q11
582	vst1.8	{q5},[r1]!
583	vst1.8	{q9},[r1]!
584	b	Lcbc_done
585
586Lcbc_dec_one:
587	veor	q5,q5,q10
588	vorr	q6,q11,q11
589	vst1.8	{q5},[r1]!
590
591Lcbc_done:
592	vst1.8	{q6},[r4]
593Lcbc_abort:
594	vldmia	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}
595	ldmia	sp!,{r4,r5,r6,r7,r8,pc}
596
597.globl	_aes_hw_ctr32_encrypt_blocks
598.private_extern	_aes_hw_ctr32_encrypt_blocks
599#ifdef __thumb2__
600.thumb_func	_aes_hw_ctr32_encrypt_blocks
601#endif
602.align	5
603_aes_hw_ctr32_encrypt_blocks:
604	mov	ip,sp
605	stmdb	sp!,{r4,r5,r6,r7,r8,r9,r10,lr}
606	vstmdb	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}            @ ABI specification says so
607	ldr	r4, [ip]		@ load remaining arg
608	ldr	r5,[r3,#240]
609
610	ldr	r8, [r4, #12]
611	vld1.32	{q0},[r4]
612
613	vld1.32	{q8,q9},[r3]		@ load key schedule...
614	sub	r5,r5,#4
615	mov	r12,#16
616	cmp	r2,#2
617	add	r7,r3,r5,lsl#4	@ pointer to last 5 round keys
618	sub	r5,r5,#2
619	vld1.32	{q12,q13},[r7]!
620	vld1.32	{q14,q15},[r7]!
621	vld1.32	{q7},[r7]
622	add	r7,r3,#32
623	mov	r6,r5
624	movlo	r12,#0
625
626	@ ARM Cortex-A57 and Cortex-A72 cores running in 32-bit mode are
627	@ affected by silicon errata #1742098 [0] and #1655431 [1],
628	@ respectively, where the second instruction of an aese/aesmc
629	@ instruction pair may execute twice if an interrupt is taken right
630	@ after the first instruction consumes an input register of which a
631	@ single 32-bit lane has been updated the last time it was modified.
632	@
633	@ This function uses a counter in one 32-bit lane. The
634	@ could write to q1 and q10 directly, but that trips this bugs.
635	@ We write to q6 and copy to the final register as a workaround.
636	@
637	@ [0] ARM-EPM-049219 v23 Cortex-A57 MPCore Software Developers Errata Notice
638	@ [1] ARM-EPM-012079 v11.0 Cortex-A72 MPCore Software Developers Errata Notice
639#ifndef __ARMEB__
640	rev	r8, r8
641#endif
642	add	r10, r8, #1
643	vorr	q6,q0,q0
644	rev	r10, r10
645	vmov.32	d13[1],r10
646	add	r8, r8, #2
647	vorr	q1,q6,q6
648	bls	Lctr32_tail
649	rev	r12, r8
650	vmov.32	d13[1],r12
651	sub	r2,r2,#3		@ bias
652	vorr	q10,q6,q6
653	b	Loop3x_ctr32
654
655.align	4
656Loop3x_ctr32:
657.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
658.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
659.byte	0x20,0x23,0xb0,0xf3	@ aese q1,q8
660.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
661.byte	0x20,0x43,0xf0,0xf3	@ aese q10,q8
662.byte	0xa4,0x43,0xf0,0xf3	@ aesmc q10,q10
663	vld1.32	{q8},[r7]!
664	subs	r6,r6,#2
665.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
666.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
667.byte	0x22,0x23,0xb0,0xf3	@ aese q1,q9
668.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
669.byte	0x22,0x43,0xf0,0xf3	@ aese q10,q9
670.byte	0xa4,0x43,0xf0,0xf3	@ aesmc q10,q10
671	vld1.32	{q9},[r7]!
672	bgt	Loop3x_ctr32
673
674.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
675.byte	0x80,0x83,0xb0,0xf3	@ aesmc q4,q0
676.byte	0x20,0x23,0xb0,0xf3	@ aese q1,q8
677.byte	0x82,0xa3,0xb0,0xf3	@ aesmc q5,q1
678	vld1.8	{q2},[r0]!
679	add	r9,r8,#1
680.byte	0x20,0x43,0xf0,0xf3	@ aese q10,q8
681.byte	0xa4,0x43,0xf0,0xf3	@ aesmc q10,q10
682	vld1.8	{q3},[r0]!
683	rev	r9,r9
684.byte	0x22,0x83,0xb0,0xf3	@ aese q4,q9
685.byte	0x88,0x83,0xb0,0xf3	@ aesmc q4,q4
686.byte	0x22,0xa3,0xb0,0xf3	@ aese q5,q9
687.byte	0x8a,0xa3,0xb0,0xf3	@ aesmc q5,q5
688	vld1.8	{q11},[r0]!
689	mov	r7,r3
690.byte	0x22,0x43,0xf0,0xf3	@ aese q10,q9
691.byte	0xa4,0x23,0xf0,0xf3	@ aesmc q9,q10
692.byte	0x28,0x83,0xb0,0xf3	@ aese q4,q12
693.byte	0x88,0x83,0xb0,0xf3	@ aesmc q4,q4
694.byte	0x28,0xa3,0xb0,0xf3	@ aese q5,q12
695.byte	0x8a,0xa3,0xb0,0xf3	@ aesmc q5,q5
696	veor	q2,q2,q7
697	add	r10,r8,#2
698.byte	0x28,0x23,0xf0,0xf3	@ aese q9,q12
699.byte	0xa2,0x23,0xf0,0xf3	@ aesmc q9,q9
700	veor	q3,q3,q7
701	add	r8,r8,#3
702.byte	0x2a,0x83,0xb0,0xf3	@ aese q4,q13
703.byte	0x88,0x83,0xb0,0xf3	@ aesmc q4,q4
704.byte	0x2a,0xa3,0xb0,0xf3	@ aese q5,q13
705.byte	0x8a,0xa3,0xb0,0xf3	@ aesmc q5,q5
706	 @ Note the logic to update q0, q1, and q1 is written to work
707	 @ around a bug in ARM Cortex-A57 and Cortex-A72 cores running in
708	 @ 32-bit mode. See the comment above.
709	veor	q11,q11,q7
710	vmov.32	d13[1], r9
711.byte	0x2a,0x23,0xf0,0xf3	@ aese q9,q13
712.byte	0xa2,0x23,0xf0,0xf3	@ aesmc q9,q9
713	vorr	q0,q6,q6
714	rev	r10,r10
715.byte	0x2c,0x83,0xb0,0xf3	@ aese q4,q14
716.byte	0x88,0x83,0xb0,0xf3	@ aesmc q4,q4
717	vmov.32	d13[1], r10
718	rev	r12,r8
719.byte	0x2c,0xa3,0xb0,0xf3	@ aese q5,q14
720.byte	0x8a,0xa3,0xb0,0xf3	@ aesmc q5,q5
721	vorr	q1,q6,q6
722	vmov.32	d13[1], r12
723.byte	0x2c,0x23,0xf0,0xf3	@ aese q9,q14
724.byte	0xa2,0x23,0xf0,0xf3	@ aesmc q9,q9
725	vorr	q10,q6,q6
726	subs	r2,r2,#3
727.byte	0x2e,0x83,0xb0,0xf3	@ aese q4,q15
728.byte	0x2e,0xa3,0xb0,0xf3	@ aese q5,q15
729.byte	0x2e,0x23,0xf0,0xf3	@ aese q9,q15
730
731	veor	q2,q2,q4
732	vld1.32	{q8},[r7]!	@ re-pre-load rndkey[0]
733	vst1.8	{q2},[r1]!
734	veor	q3,q3,q5
735	mov	r6,r5
736	vst1.8	{q3},[r1]!
737	veor	q11,q11,q9
738	vld1.32	{q9},[r7]!	@ re-pre-load rndkey[1]
739	vst1.8	{q11},[r1]!
740	bhs	Loop3x_ctr32
741
742	adds	r2,r2,#3
743	beq	Lctr32_done
744	cmp	r2,#1
745	mov	r12,#16
746	moveq	r12,#0
747
748Lctr32_tail:
749.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
750.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
751.byte	0x20,0x23,0xb0,0xf3	@ aese q1,q8
752.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
753	vld1.32	{q8},[r7]!
754	subs	r6,r6,#2
755.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
756.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
757.byte	0x22,0x23,0xb0,0xf3	@ aese q1,q9
758.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
759	vld1.32	{q9},[r7]!
760	bgt	Lctr32_tail
761
762.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
763.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
764.byte	0x20,0x23,0xb0,0xf3	@ aese q1,q8
765.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
766.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
767.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
768.byte	0x22,0x23,0xb0,0xf3	@ aese q1,q9
769.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
770	vld1.8	{q2},[r0],r12
771.byte	0x28,0x03,0xb0,0xf3	@ aese q0,q12
772.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
773.byte	0x28,0x23,0xb0,0xf3	@ aese q1,q12
774.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
775	vld1.8	{q3},[r0]
776.byte	0x2a,0x03,0xb0,0xf3	@ aese q0,q13
777.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
778.byte	0x2a,0x23,0xb0,0xf3	@ aese q1,q13
779.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
780	veor	q2,q2,q7
781.byte	0x2c,0x03,0xb0,0xf3	@ aese q0,q14
782.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
783.byte	0x2c,0x23,0xb0,0xf3	@ aese q1,q14
784.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
785	veor	q3,q3,q7
786.byte	0x2e,0x03,0xb0,0xf3	@ aese q0,q15
787.byte	0x2e,0x23,0xb0,0xf3	@ aese q1,q15
788
789	cmp	r2,#1
790	veor	q2,q2,q0
791	veor	q3,q3,q1
792	vst1.8	{q2},[r1]!
793	beq	Lctr32_done
794	vst1.8	{q3},[r1]
795
796Lctr32_done:
797	vldmia	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}
798	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,pc}
799
800#endif
801#endif  // !OPENSSL_NO_ASM && defined(OPENSSL_ARM) && defined(__APPLE__)
802