• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1// This file is generated from a similarly-named Perl script in the BoringSSL
2// source tree. Do not edit by hand.
3
4#if defined(__has_feature)
5#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
6#define OPENSSL_NO_ASM
7#endif
8#endif
9
10#if !defined(OPENSSL_NO_ASM)
11#if defined(BORINGSSL_PREFIX)
12#include <boringssl_prefix_symbols_asm.h>
13#endif
14#include <openssl/arm_arch.h>
15
16#if __ARM_MAX_ARCH__>=7
17.text
18
19.section	__TEXT,__const
20.align	5
21Lrcon:
22.long	0x01,0x01,0x01,0x01
23.long	0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d	// rotate-n-splat
24.long	0x1b,0x1b,0x1b,0x1b
25
26.text
27
28.globl	_aes_hw_set_encrypt_key
29.private_extern	_aes_hw_set_encrypt_key
30
31.align	5
32_aes_hw_set_encrypt_key:
33Lenc_key:
34	stp	x29,x30,[sp,#-16]!
35	add	x29,sp,#0
36	mov	x3,#-1
37	cmp	x0,#0
38	b.eq	Lenc_key_abort
39	cmp	x2,#0
40	b.eq	Lenc_key_abort
41	mov	x3,#-2
42	cmp	w1,#128
43	b.lt	Lenc_key_abort
44	cmp	w1,#256
45	b.gt	Lenc_key_abort
46	tst	w1,#0x3f
47	b.ne	Lenc_key_abort
48
49	adrp	x3,Lrcon@PAGE
50	add	x3,x3,Lrcon@PAGEOFF
51	cmp	w1,#192
52
53	eor	v0.16b,v0.16b,v0.16b
54	ld1	{v3.16b},[x0],#16
55	mov	w1,#8		// reuse w1
56	ld1	{v1.4s,v2.4s},[x3],#32
57
58	b.lt	Loop128
59	b.eq	L192
60	b	L256
61
62.align	4
63Loop128:
64	tbl	v6.16b,{v3.16b},v2.16b
65	ext	v5.16b,v0.16b,v3.16b,#12
66	st1	{v3.4s},[x2],#16
67	aese	v6.16b,v0.16b
68	subs	w1,w1,#1
69
70	eor	v3.16b,v3.16b,v5.16b
71	ext	v5.16b,v0.16b,v5.16b,#12
72	eor	v3.16b,v3.16b,v5.16b
73	ext	v5.16b,v0.16b,v5.16b,#12
74	eor	v6.16b,v6.16b,v1.16b
75	eor	v3.16b,v3.16b,v5.16b
76	shl	v1.16b,v1.16b,#1
77	eor	v3.16b,v3.16b,v6.16b
78	b.ne	Loop128
79
80	ld1	{v1.4s},[x3]
81
82	tbl	v6.16b,{v3.16b},v2.16b
83	ext	v5.16b,v0.16b,v3.16b,#12
84	st1	{v3.4s},[x2],#16
85	aese	v6.16b,v0.16b
86
87	eor	v3.16b,v3.16b,v5.16b
88	ext	v5.16b,v0.16b,v5.16b,#12
89	eor	v3.16b,v3.16b,v5.16b
90	ext	v5.16b,v0.16b,v5.16b,#12
91	eor	v6.16b,v6.16b,v1.16b
92	eor	v3.16b,v3.16b,v5.16b
93	shl	v1.16b,v1.16b,#1
94	eor	v3.16b,v3.16b,v6.16b
95
96	tbl	v6.16b,{v3.16b},v2.16b
97	ext	v5.16b,v0.16b,v3.16b,#12
98	st1	{v3.4s},[x2],#16
99	aese	v6.16b,v0.16b
100
101	eor	v3.16b,v3.16b,v5.16b
102	ext	v5.16b,v0.16b,v5.16b,#12
103	eor	v3.16b,v3.16b,v5.16b
104	ext	v5.16b,v0.16b,v5.16b,#12
105	eor	v6.16b,v6.16b,v1.16b
106	eor	v3.16b,v3.16b,v5.16b
107	eor	v3.16b,v3.16b,v6.16b
108	st1	{v3.4s},[x2]
109	add	x2,x2,#0x50
110
111	mov	w12,#10
112	b	Ldone
113
114.align	4
115L192:
116	ld1	{v4.8b},[x0],#8
117	movi	v6.16b,#8			// borrow v6.16b
118	st1	{v3.4s},[x2],#16
119	sub	v2.16b,v2.16b,v6.16b	// adjust the mask
120
121Loop192:
122	tbl	v6.16b,{v4.16b},v2.16b
123	ext	v5.16b,v0.16b,v3.16b,#12
124	st1	{v4.8b},[x2],#8
125	aese	v6.16b,v0.16b
126	subs	w1,w1,#1
127
128	eor	v3.16b,v3.16b,v5.16b
129	ext	v5.16b,v0.16b,v5.16b,#12
130	eor	v3.16b,v3.16b,v5.16b
131	ext	v5.16b,v0.16b,v5.16b,#12
132	eor	v3.16b,v3.16b,v5.16b
133
134	dup	v5.4s,v3.s[3]
135	eor	v5.16b,v5.16b,v4.16b
136	eor	v6.16b,v6.16b,v1.16b
137	ext	v4.16b,v0.16b,v4.16b,#12
138	shl	v1.16b,v1.16b,#1
139	eor	v4.16b,v4.16b,v5.16b
140	eor	v3.16b,v3.16b,v6.16b
141	eor	v4.16b,v4.16b,v6.16b
142	st1	{v3.4s},[x2],#16
143	b.ne	Loop192
144
145	mov	w12,#12
146	add	x2,x2,#0x20
147	b	Ldone
148
149.align	4
150L256:
151	ld1	{v4.16b},[x0]
152	mov	w1,#7
153	mov	w12,#14
154	st1	{v3.4s},[x2],#16
155
156Loop256:
157	tbl	v6.16b,{v4.16b},v2.16b
158	ext	v5.16b,v0.16b,v3.16b,#12
159	st1	{v4.4s},[x2],#16
160	aese	v6.16b,v0.16b
161	subs	w1,w1,#1
162
163	eor	v3.16b,v3.16b,v5.16b
164	ext	v5.16b,v0.16b,v5.16b,#12
165	eor	v3.16b,v3.16b,v5.16b
166	ext	v5.16b,v0.16b,v5.16b,#12
167	eor	v6.16b,v6.16b,v1.16b
168	eor	v3.16b,v3.16b,v5.16b
169	shl	v1.16b,v1.16b,#1
170	eor	v3.16b,v3.16b,v6.16b
171	st1	{v3.4s},[x2],#16
172	b.eq	Ldone
173
174	dup	v6.4s,v3.s[3]		// just splat
175	ext	v5.16b,v0.16b,v4.16b,#12
176	aese	v6.16b,v0.16b
177
178	eor	v4.16b,v4.16b,v5.16b
179	ext	v5.16b,v0.16b,v5.16b,#12
180	eor	v4.16b,v4.16b,v5.16b
181	ext	v5.16b,v0.16b,v5.16b,#12
182	eor	v4.16b,v4.16b,v5.16b
183
184	eor	v4.16b,v4.16b,v6.16b
185	b	Loop256
186
187Ldone:
188	str	w12,[x2]
189	mov	x3,#0
190
191Lenc_key_abort:
192	mov	x0,x3			// return value
193	ldr	x29,[sp],#16
194	ret
195
196
197.globl	_aes_hw_set_decrypt_key
198.private_extern	_aes_hw_set_decrypt_key
199
200.align	5
201_aes_hw_set_decrypt_key:
202	stp	x29,x30,[sp,#-16]!
203	add	x29,sp,#0
204	bl	Lenc_key
205
206	cmp	x0,#0
207	b.ne	Ldec_key_abort
208
209	sub	x2,x2,#240		// restore original x2
210	mov	x4,#-16
211	add	x0,x2,x12,lsl#4	// end of key schedule
212
213	ld1	{v0.4s},[x2]
214	ld1	{v1.4s},[x0]
215	st1	{v0.4s},[x0],x4
216	st1	{v1.4s},[x2],#16
217
218Loop_imc:
219	ld1	{v0.4s},[x2]
220	ld1	{v1.4s},[x0]
221	aesimc	v0.16b,v0.16b
222	aesimc	v1.16b,v1.16b
223	st1	{v0.4s},[x0],x4
224	st1	{v1.4s},[x2],#16
225	cmp	x0,x2
226	b.hi	Loop_imc
227
228	ld1	{v0.4s},[x2]
229	aesimc	v0.16b,v0.16b
230	st1	{v0.4s},[x0]
231
232	eor	x0,x0,x0		// return value
233Ldec_key_abort:
234	ldp	x29,x30,[sp],#16
235	ret
236
237.globl	_aes_hw_encrypt
238.private_extern	_aes_hw_encrypt
239
240.align	5
241_aes_hw_encrypt:
242	ldr	w3,[x2,#240]
243	ld1	{v0.4s},[x2],#16
244	ld1	{v2.16b},[x0]
245	sub	w3,w3,#2
246	ld1	{v1.4s},[x2],#16
247
248Loop_enc:
249	aese	v2.16b,v0.16b
250	aesmc	v2.16b,v2.16b
251	ld1	{v0.4s},[x2],#16
252	subs	w3,w3,#2
253	aese	v2.16b,v1.16b
254	aesmc	v2.16b,v2.16b
255	ld1	{v1.4s},[x2],#16
256	b.gt	Loop_enc
257
258	aese	v2.16b,v0.16b
259	aesmc	v2.16b,v2.16b
260	ld1	{v0.4s},[x2]
261	aese	v2.16b,v1.16b
262	eor	v2.16b,v2.16b,v0.16b
263
264	st1	{v2.16b},[x1]
265	ret
266
267.globl	_aes_hw_decrypt
268.private_extern	_aes_hw_decrypt
269
270.align	5
271_aes_hw_decrypt:
272	ldr	w3,[x2,#240]
273	ld1	{v0.4s},[x2],#16
274	ld1	{v2.16b},[x0]
275	sub	w3,w3,#2
276	ld1	{v1.4s},[x2],#16
277
278Loop_dec:
279	aesd	v2.16b,v0.16b
280	aesimc	v2.16b,v2.16b
281	ld1	{v0.4s},[x2],#16
282	subs	w3,w3,#2
283	aesd	v2.16b,v1.16b
284	aesimc	v2.16b,v2.16b
285	ld1	{v1.4s},[x2],#16
286	b.gt	Loop_dec
287
288	aesd	v2.16b,v0.16b
289	aesimc	v2.16b,v2.16b
290	ld1	{v0.4s},[x2]
291	aesd	v2.16b,v1.16b
292	eor	v2.16b,v2.16b,v0.16b
293
294	st1	{v2.16b},[x1]
295	ret
296
297.globl	_aes_hw_cbc_encrypt
298.private_extern	_aes_hw_cbc_encrypt
299
300.align	5
301_aes_hw_cbc_encrypt:
302	stp	x29,x30,[sp,#-16]!
303	add	x29,sp,#0
304	subs	x2,x2,#16
305	mov	x8,#16
306	b.lo	Lcbc_abort
307	csel	x8,xzr,x8,eq
308
309	cmp	w5,#0			// en- or decrypting?
310	ldr	w5,[x3,#240]
311	and	x2,x2,#-16
312	ld1	{v6.16b},[x4]
313	ld1	{v0.16b},[x0],x8
314
315	ld1	{v16.4s,v17.4s},[x3]		// load key schedule...
316	sub	w5,w5,#6
317	add	x7,x3,x5,lsl#4	// pointer to last 7 round keys
318	sub	w5,w5,#2
319	ld1	{v18.4s,v19.4s},[x7],#32
320	ld1	{v20.4s,v21.4s},[x7],#32
321	ld1	{v22.4s,v23.4s},[x7],#32
322	ld1	{v7.4s},[x7]
323
324	add	x7,x3,#32
325	mov	w6,w5
326	b.eq	Lcbc_dec
327
328	cmp	w5,#2
329	eor	v0.16b,v0.16b,v6.16b
330	eor	v5.16b,v16.16b,v7.16b
331	b.eq	Lcbc_enc128
332
333	ld1	{v2.4s,v3.4s},[x7]
334	add	x7,x3,#16
335	add	x6,x3,#16*4
336	add	x12,x3,#16*5
337	aese	v0.16b,v16.16b
338	aesmc	v0.16b,v0.16b
339	add	x14,x3,#16*6
340	add	x3,x3,#16*7
341	b	Lenter_cbc_enc
342
343.align	4
344Loop_cbc_enc:
345	aese	v0.16b,v16.16b
346	aesmc	v0.16b,v0.16b
347	st1	{v6.16b},[x1],#16
348Lenter_cbc_enc:
349	aese	v0.16b,v17.16b
350	aesmc	v0.16b,v0.16b
351	aese	v0.16b,v2.16b
352	aesmc	v0.16b,v0.16b
353	ld1	{v16.4s},[x6]
354	cmp	w5,#4
355	aese	v0.16b,v3.16b
356	aesmc	v0.16b,v0.16b
357	ld1	{v17.4s},[x12]
358	b.eq	Lcbc_enc192
359
360	aese	v0.16b,v16.16b
361	aesmc	v0.16b,v0.16b
362	ld1	{v16.4s},[x14]
363	aese	v0.16b,v17.16b
364	aesmc	v0.16b,v0.16b
365	ld1	{v17.4s},[x3]
366	nop
367
368Lcbc_enc192:
369	aese	v0.16b,v16.16b
370	aesmc	v0.16b,v0.16b
371	subs	x2,x2,#16
372	aese	v0.16b,v17.16b
373	aesmc	v0.16b,v0.16b
374	csel	x8,xzr,x8,eq
375	aese	v0.16b,v18.16b
376	aesmc	v0.16b,v0.16b
377	aese	v0.16b,v19.16b
378	aesmc	v0.16b,v0.16b
379	ld1	{v16.16b},[x0],x8
380	aese	v0.16b,v20.16b
381	aesmc	v0.16b,v0.16b
382	eor	v16.16b,v16.16b,v5.16b
383	aese	v0.16b,v21.16b
384	aesmc	v0.16b,v0.16b
385	ld1	{v17.4s},[x7]		// re-pre-load rndkey[1]
386	aese	v0.16b,v22.16b
387	aesmc	v0.16b,v0.16b
388	aese	v0.16b,v23.16b
389	eor	v6.16b,v0.16b,v7.16b
390	b.hs	Loop_cbc_enc
391
392	st1	{v6.16b},[x1],#16
393	b	Lcbc_done
394
395.align	5
396Lcbc_enc128:
397	ld1	{v2.4s,v3.4s},[x7]
398	aese	v0.16b,v16.16b
399	aesmc	v0.16b,v0.16b
400	b	Lenter_cbc_enc128
401Loop_cbc_enc128:
402	aese	v0.16b,v16.16b
403	aesmc	v0.16b,v0.16b
404	st1	{v6.16b},[x1],#16
405Lenter_cbc_enc128:
406	aese	v0.16b,v17.16b
407	aesmc	v0.16b,v0.16b
408	subs	x2,x2,#16
409	aese	v0.16b,v2.16b
410	aesmc	v0.16b,v0.16b
411	csel	x8,xzr,x8,eq
412	aese	v0.16b,v3.16b
413	aesmc	v0.16b,v0.16b
414	aese	v0.16b,v18.16b
415	aesmc	v0.16b,v0.16b
416	aese	v0.16b,v19.16b
417	aesmc	v0.16b,v0.16b
418	ld1	{v16.16b},[x0],x8
419	aese	v0.16b,v20.16b
420	aesmc	v0.16b,v0.16b
421	aese	v0.16b,v21.16b
422	aesmc	v0.16b,v0.16b
423	aese	v0.16b,v22.16b
424	aesmc	v0.16b,v0.16b
425	eor	v16.16b,v16.16b,v5.16b
426	aese	v0.16b,v23.16b
427	eor	v6.16b,v0.16b,v7.16b
428	b.hs	Loop_cbc_enc128
429
430	st1	{v6.16b},[x1],#16
431	b	Lcbc_done
432.align	5
433Lcbc_dec:
434	ld1	{v18.16b},[x0],#16
435	subs	x2,x2,#32		// bias
436	add	w6,w5,#2
437	orr	v3.16b,v0.16b,v0.16b
438	orr	v1.16b,v0.16b,v0.16b
439	orr	v19.16b,v18.16b,v18.16b
440	b.lo	Lcbc_dec_tail
441
442	orr	v1.16b,v18.16b,v18.16b
443	ld1	{v18.16b},[x0],#16
444	orr	v2.16b,v0.16b,v0.16b
445	orr	v3.16b,v1.16b,v1.16b
446	orr	v19.16b,v18.16b,v18.16b
447
448Loop3x_cbc_dec:
449	aesd	v0.16b,v16.16b
450	aesimc	v0.16b,v0.16b
451	aesd	v1.16b,v16.16b
452	aesimc	v1.16b,v1.16b
453	aesd	v18.16b,v16.16b
454	aesimc	v18.16b,v18.16b
455	ld1	{v16.4s},[x7],#16
456	subs	w6,w6,#2
457	aesd	v0.16b,v17.16b
458	aesimc	v0.16b,v0.16b
459	aesd	v1.16b,v17.16b
460	aesimc	v1.16b,v1.16b
461	aesd	v18.16b,v17.16b
462	aesimc	v18.16b,v18.16b
463	ld1	{v17.4s},[x7],#16
464	b.gt	Loop3x_cbc_dec
465
466	aesd	v0.16b,v16.16b
467	aesimc	v0.16b,v0.16b
468	aesd	v1.16b,v16.16b
469	aesimc	v1.16b,v1.16b
470	aesd	v18.16b,v16.16b
471	aesimc	v18.16b,v18.16b
472	eor	v4.16b,v6.16b,v7.16b
473	subs	x2,x2,#0x30
474	eor	v5.16b,v2.16b,v7.16b
475	csel	x6,x2,x6,lo			// x6, w6, is zero at this point
476	aesd	v0.16b,v17.16b
477	aesimc	v0.16b,v0.16b
478	aesd	v1.16b,v17.16b
479	aesimc	v1.16b,v1.16b
480	aesd	v18.16b,v17.16b
481	aesimc	v18.16b,v18.16b
482	eor	v17.16b,v3.16b,v7.16b
483	add	x0,x0,x6		// x0 is adjusted in such way that
484					// at exit from the loop v1.16b-v18.16b
485					// are loaded with last "words"
486	orr	v6.16b,v19.16b,v19.16b
487	mov	x7,x3
488	aesd	v0.16b,v20.16b
489	aesimc	v0.16b,v0.16b
490	aesd	v1.16b,v20.16b
491	aesimc	v1.16b,v1.16b
492	aesd	v18.16b,v20.16b
493	aesimc	v18.16b,v18.16b
494	ld1	{v2.16b},[x0],#16
495	aesd	v0.16b,v21.16b
496	aesimc	v0.16b,v0.16b
497	aesd	v1.16b,v21.16b
498	aesimc	v1.16b,v1.16b
499	aesd	v18.16b,v21.16b
500	aesimc	v18.16b,v18.16b
501	ld1	{v3.16b},[x0],#16
502	aesd	v0.16b,v22.16b
503	aesimc	v0.16b,v0.16b
504	aesd	v1.16b,v22.16b
505	aesimc	v1.16b,v1.16b
506	aesd	v18.16b,v22.16b
507	aesimc	v18.16b,v18.16b
508	ld1	{v19.16b},[x0],#16
509	aesd	v0.16b,v23.16b
510	aesd	v1.16b,v23.16b
511	aesd	v18.16b,v23.16b
512	ld1	{v16.4s},[x7],#16	// re-pre-load rndkey[0]
513	add	w6,w5,#2
514	eor	v4.16b,v4.16b,v0.16b
515	eor	v5.16b,v5.16b,v1.16b
516	eor	v18.16b,v18.16b,v17.16b
517	ld1	{v17.4s},[x7],#16	// re-pre-load rndkey[1]
518	st1	{v4.16b},[x1],#16
519	orr	v0.16b,v2.16b,v2.16b
520	st1	{v5.16b},[x1],#16
521	orr	v1.16b,v3.16b,v3.16b
522	st1	{v18.16b},[x1],#16
523	orr	v18.16b,v19.16b,v19.16b
524	b.hs	Loop3x_cbc_dec
525
526	cmn	x2,#0x30
527	b.eq	Lcbc_done
528	nop
529
530Lcbc_dec_tail:
531	aesd	v1.16b,v16.16b
532	aesimc	v1.16b,v1.16b
533	aesd	v18.16b,v16.16b
534	aesimc	v18.16b,v18.16b
535	ld1	{v16.4s},[x7],#16
536	subs	w6,w6,#2
537	aesd	v1.16b,v17.16b
538	aesimc	v1.16b,v1.16b
539	aesd	v18.16b,v17.16b
540	aesimc	v18.16b,v18.16b
541	ld1	{v17.4s},[x7],#16
542	b.gt	Lcbc_dec_tail
543
544	aesd	v1.16b,v16.16b
545	aesimc	v1.16b,v1.16b
546	aesd	v18.16b,v16.16b
547	aesimc	v18.16b,v18.16b
548	aesd	v1.16b,v17.16b
549	aesimc	v1.16b,v1.16b
550	aesd	v18.16b,v17.16b
551	aesimc	v18.16b,v18.16b
552	aesd	v1.16b,v20.16b
553	aesimc	v1.16b,v1.16b
554	aesd	v18.16b,v20.16b
555	aesimc	v18.16b,v18.16b
556	cmn	x2,#0x20
557	aesd	v1.16b,v21.16b
558	aesimc	v1.16b,v1.16b
559	aesd	v18.16b,v21.16b
560	aesimc	v18.16b,v18.16b
561	eor	v5.16b,v6.16b,v7.16b
562	aesd	v1.16b,v22.16b
563	aesimc	v1.16b,v1.16b
564	aesd	v18.16b,v22.16b
565	aesimc	v18.16b,v18.16b
566	eor	v17.16b,v3.16b,v7.16b
567	aesd	v1.16b,v23.16b
568	aesd	v18.16b,v23.16b
569	b.eq	Lcbc_dec_one
570	eor	v5.16b,v5.16b,v1.16b
571	eor	v17.16b,v17.16b,v18.16b
572	orr	v6.16b,v19.16b,v19.16b
573	st1	{v5.16b},[x1],#16
574	st1	{v17.16b},[x1],#16
575	b	Lcbc_done
576
577Lcbc_dec_one:
578	eor	v5.16b,v5.16b,v18.16b
579	orr	v6.16b,v19.16b,v19.16b
580	st1	{v5.16b},[x1],#16
581
582Lcbc_done:
583	st1	{v6.16b},[x4]
584Lcbc_abort:
585	ldr	x29,[sp],#16
586	ret
587
588.globl	_aes_hw_ctr32_encrypt_blocks
589.private_extern	_aes_hw_ctr32_encrypt_blocks
590
591.align	5
592_aes_hw_ctr32_encrypt_blocks:
593	stp	x29,x30,[sp,#-16]!
594	add	x29,sp,#0
595	ldr	w5,[x3,#240]
596
597	ldr	w8, [x4, #12]
598	ld1	{v0.4s},[x4]
599
600	ld1	{v16.4s,v17.4s},[x3]		// load key schedule...
601	sub	w5,w5,#4
602	mov	x12,#16
603	cmp	x2,#2
604	add	x7,x3,x5,lsl#4	// pointer to last 5 round keys
605	sub	w5,w5,#2
606	ld1	{v20.4s,v21.4s},[x7],#32
607	ld1	{v22.4s,v23.4s},[x7],#32
608	ld1	{v7.4s},[x7]
609	add	x7,x3,#32
610	mov	w6,w5
611	csel	x12,xzr,x12,lo
612#ifndef __ARMEB__
613	rev	w8, w8
614#endif
615	orr	v1.16b,v0.16b,v0.16b
616	add	w10, w8, #1
617	orr	v18.16b,v0.16b,v0.16b
618	add	w8, w8, #2
619	orr	v6.16b,v0.16b,v0.16b
620	rev	w10, w10
621	mov	v1.s[3],w10
622	b.ls	Lctr32_tail
623	rev	w12, w8
624	sub	x2,x2,#3		// bias
625	mov	v18.s[3],w12
626	b	Loop3x_ctr32
627
628.align	4
629Loop3x_ctr32:
630	aese	v0.16b,v16.16b
631	aesmc	v0.16b,v0.16b
632	aese	v1.16b,v16.16b
633	aesmc	v1.16b,v1.16b
634	aese	v18.16b,v16.16b
635	aesmc	v18.16b,v18.16b
636	ld1	{v16.4s},[x7],#16
637	subs	w6,w6,#2
638	aese	v0.16b,v17.16b
639	aesmc	v0.16b,v0.16b
640	aese	v1.16b,v17.16b
641	aesmc	v1.16b,v1.16b
642	aese	v18.16b,v17.16b
643	aesmc	v18.16b,v18.16b
644	ld1	{v17.4s},[x7],#16
645	b.gt	Loop3x_ctr32
646
647	aese	v0.16b,v16.16b
648	aesmc	v4.16b,v0.16b
649	aese	v1.16b,v16.16b
650	aesmc	v5.16b,v1.16b
651	ld1	{v2.16b},[x0],#16
652	orr	v0.16b,v6.16b,v6.16b
653	aese	v18.16b,v16.16b
654	aesmc	v18.16b,v18.16b
655	ld1	{v3.16b},[x0],#16
656	orr	v1.16b,v6.16b,v6.16b
657	aese	v4.16b,v17.16b
658	aesmc	v4.16b,v4.16b
659	aese	v5.16b,v17.16b
660	aesmc	v5.16b,v5.16b
661	ld1	{v19.16b},[x0],#16
662	mov	x7,x3
663	aese	v18.16b,v17.16b
664	aesmc	v17.16b,v18.16b
665	orr	v18.16b,v6.16b,v6.16b
666	add	w9,w8,#1
667	aese	v4.16b,v20.16b
668	aesmc	v4.16b,v4.16b
669	aese	v5.16b,v20.16b
670	aesmc	v5.16b,v5.16b
671	eor	v2.16b,v2.16b,v7.16b
672	add	w10,w8,#2
673	aese	v17.16b,v20.16b
674	aesmc	v17.16b,v17.16b
675	eor	v3.16b,v3.16b,v7.16b
676	add	w8,w8,#3
677	aese	v4.16b,v21.16b
678	aesmc	v4.16b,v4.16b
679	aese	v5.16b,v21.16b
680	aesmc	v5.16b,v5.16b
681	eor	v19.16b,v19.16b,v7.16b
682	rev	w9,w9
683	aese	v17.16b,v21.16b
684	aesmc	v17.16b,v17.16b
685	mov	v0.s[3], w9
686	rev	w10,w10
687	aese	v4.16b,v22.16b
688	aesmc	v4.16b,v4.16b
689	aese	v5.16b,v22.16b
690	aesmc	v5.16b,v5.16b
691	mov	v1.s[3], w10
692	rev	w12,w8
693	aese	v17.16b,v22.16b
694	aesmc	v17.16b,v17.16b
695	mov	v18.s[3], w12
696	subs	x2,x2,#3
697	aese	v4.16b,v23.16b
698	aese	v5.16b,v23.16b
699	aese	v17.16b,v23.16b
700
701	eor	v2.16b,v2.16b,v4.16b
702	ld1	{v16.4s},[x7],#16	// re-pre-load rndkey[0]
703	st1	{v2.16b},[x1],#16
704	eor	v3.16b,v3.16b,v5.16b
705	mov	w6,w5
706	st1	{v3.16b},[x1],#16
707	eor	v19.16b,v19.16b,v17.16b
708	ld1	{v17.4s},[x7],#16	// re-pre-load rndkey[1]
709	st1	{v19.16b},[x1],#16
710	b.hs	Loop3x_ctr32
711
712	adds	x2,x2,#3
713	b.eq	Lctr32_done
714	cmp	x2,#1
715	mov	x12,#16
716	csel	x12,xzr,x12,eq
717
718Lctr32_tail:
719	aese	v0.16b,v16.16b
720	aesmc	v0.16b,v0.16b
721	aese	v1.16b,v16.16b
722	aesmc	v1.16b,v1.16b
723	ld1	{v16.4s},[x7],#16
724	subs	w6,w6,#2
725	aese	v0.16b,v17.16b
726	aesmc	v0.16b,v0.16b
727	aese	v1.16b,v17.16b
728	aesmc	v1.16b,v1.16b
729	ld1	{v17.4s},[x7],#16
730	b.gt	Lctr32_tail
731
732	aese	v0.16b,v16.16b
733	aesmc	v0.16b,v0.16b
734	aese	v1.16b,v16.16b
735	aesmc	v1.16b,v1.16b
736	aese	v0.16b,v17.16b
737	aesmc	v0.16b,v0.16b
738	aese	v1.16b,v17.16b
739	aesmc	v1.16b,v1.16b
740	ld1	{v2.16b},[x0],x12
741	aese	v0.16b,v20.16b
742	aesmc	v0.16b,v0.16b
743	aese	v1.16b,v20.16b
744	aesmc	v1.16b,v1.16b
745	ld1	{v3.16b},[x0]
746	aese	v0.16b,v21.16b
747	aesmc	v0.16b,v0.16b
748	aese	v1.16b,v21.16b
749	aesmc	v1.16b,v1.16b
750	eor	v2.16b,v2.16b,v7.16b
751	aese	v0.16b,v22.16b
752	aesmc	v0.16b,v0.16b
753	aese	v1.16b,v22.16b
754	aesmc	v1.16b,v1.16b
755	eor	v3.16b,v3.16b,v7.16b
756	aese	v0.16b,v23.16b
757	aese	v1.16b,v23.16b
758
759	cmp	x2,#1
760	eor	v2.16b,v2.16b,v0.16b
761	eor	v3.16b,v3.16b,v1.16b
762	st1	{v2.16b},[x1],#16
763	b.eq	Lctr32_done
764	st1	{v3.16b},[x1]
765
766Lctr32_done:
767	ldr	x29,[sp],#16
768	ret
769
770#endif
771#endif  // !OPENSSL_NO_ASM
772