• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1// This file is generated from a similarly-named Perl script in the BoringSSL
2// source tree. Do not edit by hand.
3
4#include <openssl/asm_base.h>
5
6#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_ARM) && defined(__ELF__)
7@ Copyright 2007-2016 The OpenSSL Project Authors. All Rights Reserved.
8@
9@ Licensed under the OpenSSL license (the "License").  You may not use
10@ this file except in compliance with the License.  You can obtain a copy
11@ in the file LICENSE in the source distribution or at
12@ https://www.openssl.org/source/license.html
13
14
15@ ====================================================================
16@ Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
17@ project. The module is, however, dual licensed under OpenSSL and
18@ CRYPTOGAMS licenses depending on where you obtain it. For further
19@ details see http://www.openssl.org/~appro/cryptogams/.
20@
21@ Permission to use under GPL terms is granted.
22@ ====================================================================
23
24@ SHA256 block procedure for ARMv4. May 2007.
25
26@ Performance is ~2x better than gcc 3.4 generated code and in "abso-
27@ lute" terms is ~2250 cycles per 64-byte block or ~35 cycles per
28@ byte [on single-issue Xscale PXA250 core].
29
30@ July 2010.
31@
32@ Rescheduling for dual-issue pipeline resulted in 22% improvement on
33@ Cortex A8 core and ~20 cycles per processed byte.
34
35@ February 2011.
36@
37@ Profiler-assisted and platform-specific optimization resulted in 16%
38@ improvement on Cortex A8 core and ~15.4 cycles per processed byte.
39
40@ September 2013.
41@
42@ Add NEON implementation. On Cortex A8 it was measured to process one
43@ byte in 12.5 cycles or 23% faster than integer-only code. Snapdragon
44@ S4 does it in 12.5 cycles too, but it's 50% faster than integer-only
45@ code (meaning that latter performs sub-optimally, nothing was done
46@ about it).
47
48@ May 2014.
49@
50@ Add ARMv8 code path performing at 2.0 cpb on Apple A7.
51
52#ifndef __KERNEL__
53# include <openssl/arm_arch.h>
54#else
55# define __ARM_ARCH __LINUX_ARM_ARCH__
56# define __ARM_MAX_ARCH__ 7
57#endif
58
59@ Silence ARMv8 deprecated IT instruction warnings. This file is used by both
60@ ARMv7 and ARMv8 processors. It does have ARMv8-only code, but those
61@ instructions are manually-encoded. (See unsha256.)
62.arch	armv7-a
63
64.text
65#if defined(__thumb2__)
66.syntax	unified
67.thumb
68#else
69.code	32
70#endif
71
72.type	K256,%object
73.align	5
74K256:
75.word	0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
76.word	0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
77.word	0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
78.word	0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
79.word	0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
80.word	0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
81.word	0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
82.word	0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
83.word	0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
84.word	0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
85.word	0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
86.word	0xd192e819,0xd6990624,0xf40e3585,0x106aa070
87.word	0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
88.word	0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
89.word	0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
90.word	0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
91.size	K256,.-K256
92.word	0				@ terminator
93#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
94.LOPENSSL_armcap:
95.word	OPENSSL_armcap_P-.Lsha256_block_data_order
96#endif
97.align	5
98
99.globl	sha256_block_data_order
100.hidden	sha256_block_data_order
101.type	sha256_block_data_order,%function
102sha256_block_data_order:
103.Lsha256_block_data_order:
104#if __ARM_ARCH<7 && !defined(__thumb2__)
105	sub	r3,pc,#8		@ sha256_block_data_order
106#else
107	adr	r3,.Lsha256_block_data_order
108#endif
109#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
110	ldr	r12,.LOPENSSL_armcap
111	ldr	r12,[r3,r12]		@ OPENSSL_armcap_P
112#ifdef	__APPLE__
113	ldr	r12,[r12]
114#endif
115	tst	r12,#ARMV8_SHA256
116	bne	.LARMv8
117	tst	r12,#ARMV7_NEON
118	bne	.LNEON
119#endif
120	add	r2,r1,r2,lsl#6	@ len to point at the end of inp
121	stmdb	sp!,{r0,r1,r2,r4-r11,lr}
122	ldmia	r0,{r4,r5,r6,r7,r8,r9,r10,r11}
123	sub	r14,r3,#256+32	@ K256
124	sub	sp,sp,#16*4		@ alloca(X[16])
125.Loop:
126# if __ARM_ARCH>=7
127	ldr	r2,[r1],#4
128# else
129	ldrb	r2,[r1,#3]
130# endif
131	eor	r3,r5,r6		@ magic
132	eor	r12,r12,r12
133#if __ARM_ARCH>=7
134	@ ldr	r2,[r1],#4			@ 0
135# if 0==15
136	str	r1,[sp,#17*4]			@ make room for r1
137# endif
138	eor	r0,r8,r8,ror#5
139	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
140	eor	r0,r0,r8,ror#19	@ Sigma1(e)
141# ifndef __ARMEB__
142	rev	r2,r2
143# endif
144#else
145	@ ldrb	r2,[r1,#3]			@ 0
146	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
147	ldrb	r12,[r1,#2]
148	ldrb	r0,[r1,#1]
149	orr	r2,r2,r12,lsl#8
150	ldrb	r12,[r1],#4
151	orr	r2,r2,r0,lsl#16
152# if 0==15
153	str	r1,[sp,#17*4]			@ make room for r1
154# endif
155	eor	r0,r8,r8,ror#5
156	orr	r2,r2,r12,lsl#24
157	eor	r0,r0,r8,ror#19	@ Sigma1(e)
158#endif
159	ldr	r12,[r14],#4			@ *K256++
160	add	r11,r11,r2			@ h+=X[i]
161	str	r2,[sp,#0*4]
162	eor	r2,r9,r10
163	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
164	and	r2,r2,r8
165	add	r11,r11,r12			@ h+=K256[i]
166	eor	r2,r2,r10			@ Ch(e,f,g)
167	eor	r0,r4,r4,ror#11
168	add	r11,r11,r2			@ h+=Ch(e,f,g)
169#if 0==31
170	and	r12,r12,#0xff
171	cmp	r12,#0xf2			@ done?
172#endif
173#if 0<15
174# if __ARM_ARCH>=7
175	ldr	r2,[r1],#4			@ prefetch
176# else
177	ldrb	r2,[r1,#3]
178# endif
179	eor	r12,r4,r5			@ a^b, b^c in next round
180#else
181	ldr	r2,[sp,#2*4]		@ from future BODY_16_xx
182	eor	r12,r4,r5			@ a^b, b^c in next round
183	ldr	r1,[sp,#15*4]	@ from future BODY_16_xx
184#endif
185	eor	r0,r0,r4,ror#20	@ Sigma0(a)
186	and	r3,r3,r12			@ (b^c)&=(a^b)
187	add	r7,r7,r11			@ d+=h
188	eor	r3,r3,r5			@ Maj(a,b,c)
189	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
190	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
191#if __ARM_ARCH>=7
192	@ ldr	r2,[r1],#4			@ 1
193# if 1==15
194	str	r1,[sp,#17*4]			@ make room for r1
195# endif
196	eor	r0,r7,r7,ror#5
197	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
198	eor	r0,r0,r7,ror#19	@ Sigma1(e)
199# ifndef __ARMEB__
200	rev	r2,r2
201# endif
202#else
203	@ ldrb	r2,[r1,#3]			@ 1
204	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
205	ldrb	r3,[r1,#2]
206	ldrb	r0,[r1,#1]
207	orr	r2,r2,r3,lsl#8
208	ldrb	r3,[r1],#4
209	orr	r2,r2,r0,lsl#16
210# if 1==15
211	str	r1,[sp,#17*4]			@ make room for r1
212# endif
213	eor	r0,r7,r7,ror#5
214	orr	r2,r2,r3,lsl#24
215	eor	r0,r0,r7,ror#19	@ Sigma1(e)
216#endif
217	ldr	r3,[r14],#4			@ *K256++
218	add	r10,r10,r2			@ h+=X[i]
219	str	r2,[sp,#1*4]
220	eor	r2,r8,r9
221	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
222	and	r2,r2,r7
223	add	r10,r10,r3			@ h+=K256[i]
224	eor	r2,r2,r9			@ Ch(e,f,g)
225	eor	r0,r11,r11,ror#11
226	add	r10,r10,r2			@ h+=Ch(e,f,g)
227#if 1==31
228	and	r3,r3,#0xff
229	cmp	r3,#0xf2			@ done?
230#endif
231#if 1<15
232# if __ARM_ARCH>=7
233	ldr	r2,[r1],#4			@ prefetch
234# else
235	ldrb	r2,[r1,#3]
236# endif
237	eor	r3,r11,r4			@ a^b, b^c in next round
238#else
239	ldr	r2,[sp,#3*4]		@ from future BODY_16_xx
240	eor	r3,r11,r4			@ a^b, b^c in next round
241	ldr	r1,[sp,#0*4]	@ from future BODY_16_xx
242#endif
243	eor	r0,r0,r11,ror#20	@ Sigma0(a)
244	and	r12,r12,r3			@ (b^c)&=(a^b)
245	add	r6,r6,r10			@ d+=h
246	eor	r12,r12,r4			@ Maj(a,b,c)
247	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
248	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
249#if __ARM_ARCH>=7
250	@ ldr	r2,[r1],#4			@ 2
251# if 2==15
252	str	r1,[sp,#17*4]			@ make room for r1
253# endif
254	eor	r0,r6,r6,ror#5
255	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
256	eor	r0,r0,r6,ror#19	@ Sigma1(e)
257# ifndef __ARMEB__
258	rev	r2,r2
259# endif
260#else
261	@ ldrb	r2,[r1,#3]			@ 2
262	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
263	ldrb	r12,[r1,#2]
264	ldrb	r0,[r1,#1]
265	orr	r2,r2,r12,lsl#8
266	ldrb	r12,[r1],#4
267	orr	r2,r2,r0,lsl#16
268# if 2==15
269	str	r1,[sp,#17*4]			@ make room for r1
270# endif
271	eor	r0,r6,r6,ror#5
272	orr	r2,r2,r12,lsl#24
273	eor	r0,r0,r6,ror#19	@ Sigma1(e)
274#endif
275	ldr	r12,[r14],#4			@ *K256++
276	add	r9,r9,r2			@ h+=X[i]
277	str	r2,[sp,#2*4]
278	eor	r2,r7,r8
279	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
280	and	r2,r2,r6
281	add	r9,r9,r12			@ h+=K256[i]
282	eor	r2,r2,r8			@ Ch(e,f,g)
283	eor	r0,r10,r10,ror#11
284	add	r9,r9,r2			@ h+=Ch(e,f,g)
285#if 2==31
286	and	r12,r12,#0xff
287	cmp	r12,#0xf2			@ done?
288#endif
289#if 2<15
290# if __ARM_ARCH>=7
291	ldr	r2,[r1],#4			@ prefetch
292# else
293	ldrb	r2,[r1,#3]
294# endif
295	eor	r12,r10,r11			@ a^b, b^c in next round
296#else
297	ldr	r2,[sp,#4*4]		@ from future BODY_16_xx
298	eor	r12,r10,r11			@ a^b, b^c in next round
299	ldr	r1,[sp,#1*4]	@ from future BODY_16_xx
300#endif
301	eor	r0,r0,r10,ror#20	@ Sigma0(a)
302	and	r3,r3,r12			@ (b^c)&=(a^b)
303	add	r5,r5,r9			@ d+=h
304	eor	r3,r3,r11			@ Maj(a,b,c)
305	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
306	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
307#if __ARM_ARCH>=7
308	@ ldr	r2,[r1],#4			@ 3
309# if 3==15
310	str	r1,[sp,#17*4]			@ make room for r1
311# endif
312	eor	r0,r5,r5,ror#5
313	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
314	eor	r0,r0,r5,ror#19	@ Sigma1(e)
315# ifndef __ARMEB__
316	rev	r2,r2
317# endif
318#else
319	@ ldrb	r2,[r1,#3]			@ 3
320	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
321	ldrb	r3,[r1,#2]
322	ldrb	r0,[r1,#1]
323	orr	r2,r2,r3,lsl#8
324	ldrb	r3,[r1],#4
325	orr	r2,r2,r0,lsl#16
326# if 3==15
327	str	r1,[sp,#17*4]			@ make room for r1
328# endif
329	eor	r0,r5,r5,ror#5
330	orr	r2,r2,r3,lsl#24
331	eor	r0,r0,r5,ror#19	@ Sigma1(e)
332#endif
333	ldr	r3,[r14],#4			@ *K256++
334	add	r8,r8,r2			@ h+=X[i]
335	str	r2,[sp,#3*4]
336	eor	r2,r6,r7
337	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
338	and	r2,r2,r5
339	add	r8,r8,r3			@ h+=K256[i]
340	eor	r2,r2,r7			@ Ch(e,f,g)
341	eor	r0,r9,r9,ror#11
342	add	r8,r8,r2			@ h+=Ch(e,f,g)
343#if 3==31
344	and	r3,r3,#0xff
345	cmp	r3,#0xf2			@ done?
346#endif
347#if 3<15
348# if __ARM_ARCH>=7
349	ldr	r2,[r1],#4			@ prefetch
350# else
351	ldrb	r2,[r1,#3]
352# endif
353	eor	r3,r9,r10			@ a^b, b^c in next round
354#else
355	ldr	r2,[sp,#5*4]		@ from future BODY_16_xx
356	eor	r3,r9,r10			@ a^b, b^c in next round
357	ldr	r1,[sp,#2*4]	@ from future BODY_16_xx
358#endif
359	eor	r0,r0,r9,ror#20	@ Sigma0(a)
360	and	r12,r12,r3			@ (b^c)&=(a^b)
361	add	r4,r4,r8			@ d+=h
362	eor	r12,r12,r10			@ Maj(a,b,c)
363	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
364	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
365#if __ARM_ARCH>=7
366	@ ldr	r2,[r1],#4			@ 4
367# if 4==15
368	str	r1,[sp,#17*4]			@ make room for r1
369# endif
370	eor	r0,r4,r4,ror#5
371	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
372	eor	r0,r0,r4,ror#19	@ Sigma1(e)
373# ifndef __ARMEB__
374	rev	r2,r2
375# endif
376#else
377	@ ldrb	r2,[r1,#3]			@ 4
378	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
379	ldrb	r12,[r1,#2]
380	ldrb	r0,[r1,#1]
381	orr	r2,r2,r12,lsl#8
382	ldrb	r12,[r1],#4
383	orr	r2,r2,r0,lsl#16
384# if 4==15
385	str	r1,[sp,#17*4]			@ make room for r1
386# endif
387	eor	r0,r4,r4,ror#5
388	orr	r2,r2,r12,lsl#24
389	eor	r0,r0,r4,ror#19	@ Sigma1(e)
390#endif
391	ldr	r12,[r14],#4			@ *K256++
392	add	r7,r7,r2			@ h+=X[i]
393	str	r2,[sp,#4*4]
394	eor	r2,r5,r6
395	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
396	and	r2,r2,r4
397	add	r7,r7,r12			@ h+=K256[i]
398	eor	r2,r2,r6			@ Ch(e,f,g)
399	eor	r0,r8,r8,ror#11
400	add	r7,r7,r2			@ h+=Ch(e,f,g)
401#if 4==31
402	and	r12,r12,#0xff
403	cmp	r12,#0xf2			@ done?
404#endif
405#if 4<15
406# if __ARM_ARCH>=7
407	ldr	r2,[r1],#4			@ prefetch
408# else
409	ldrb	r2,[r1,#3]
410# endif
411	eor	r12,r8,r9			@ a^b, b^c in next round
412#else
413	ldr	r2,[sp,#6*4]		@ from future BODY_16_xx
414	eor	r12,r8,r9			@ a^b, b^c in next round
415	ldr	r1,[sp,#3*4]	@ from future BODY_16_xx
416#endif
417	eor	r0,r0,r8,ror#20	@ Sigma0(a)
418	and	r3,r3,r12			@ (b^c)&=(a^b)
419	add	r11,r11,r7			@ d+=h
420	eor	r3,r3,r9			@ Maj(a,b,c)
421	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
422	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
423#if __ARM_ARCH>=7
424	@ ldr	r2,[r1],#4			@ 5
425# if 5==15
426	str	r1,[sp,#17*4]			@ make room for r1
427# endif
428	eor	r0,r11,r11,ror#5
429	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
430	eor	r0,r0,r11,ror#19	@ Sigma1(e)
431# ifndef __ARMEB__
432	rev	r2,r2
433# endif
434#else
435	@ ldrb	r2,[r1,#3]			@ 5
436	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
437	ldrb	r3,[r1,#2]
438	ldrb	r0,[r1,#1]
439	orr	r2,r2,r3,lsl#8
440	ldrb	r3,[r1],#4
441	orr	r2,r2,r0,lsl#16
442# if 5==15
443	str	r1,[sp,#17*4]			@ make room for r1
444# endif
445	eor	r0,r11,r11,ror#5
446	orr	r2,r2,r3,lsl#24
447	eor	r0,r0,r11,ror#19	@ Sigma1(e)
448#endif
449	ldr	r3,[r14],#4			@ *K256++
450	add	r6,r6,r2			@ h+=X[i]
451	str	r2,[sp,#5*4]
452	eor	r2,r4,r5
453	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
454	and	r2,r2,r11
455	add	r6,r6,r3			@ h+=K256[i]
456	eor	r2,r2,r5			@ Ch(e,f,g)
457	eor	r0,r7,r7,ror#11
458	add	r6,r6,r2			@ h+=Ch(e,f,g)
459#if 5==31
460	and	r3,r3,#0xff
461	cmp	r3,#0xf2			@ done?
462#endif
463#if 5<15
464# if __ARM_ARCH>=7
465	ldr	r2,[r1],#4			@ prefetch
466# else
467	ldrb	r2,[r1,#3]
468# endif
469	eor	r3,r7,r8			@ a^b, b^c in next round
470#else
471	ldr	r2,[sp,#7*4]		@ from future BODY_16_xx
472	eor	r3,r7,r8			@ a^b, b^c in next round
473	ldr	r1,[sp,#4*4]	@ from future BODY_16_xx
474#endif
475	eor	r0,r0,r7,ror#20	@ Sigma0(a)
476	and	r12,r12,r3			@ (b^c)&=(a^b)
477	add	r10,r10,r6			@ d+=h
478	eor	r12,r12,r8			@ Maj(a,b,c)
479	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
480	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
481#if __ARM_ARCH>=7
482	@ ldr	r2,[r1],#4			@ 6
483# if 6==15
484	str	r1,[sp,#17*4]			@ make room for r1
485# endif
486	eor	r0,r10,r10,ror#5
487	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
488	eor	r0,r0,r10,ror#19	@ Sigma1(e)
489# ifndef __ARMEB__
490	rev	r2,r2
491# endif
492#else
493	@ ldrb	r2,[r1,#3]			@ 6
494	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
495	ldrb	r12,[r1,#2]
496	ldrb	r0,[r1,#1]
497	orr	r2,r2,r12,lsl#8
498	ldrb	r12,[r1],#4
499	orr	r2,r2,r0,lsl#16
500# if 6==15
501	str	r1,[sp,#17*4]			@ make room for r1
502# endif
503	eor	r0,r10,r10,ror#5
504	orr	r2,r2,r12,lsl#24
505	eor	r0,r0,r10,ror#19	@ Sigma1(e)
506#endif
507	ldr	r12,[r14],#4			@ *K256++
508	add	r5,r5,r2			@ h+=X[i]
509	str	r2,[sp,#6*4]
510	eor	r2,r11,r4
511	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
512	and	r2,r2,r10
513	add	r5,r5,r12			@ h+=K256[i]
514	eor	r2,r2,r4			@ Ch(e,f,g)
515	eor	r0,r6,r6,ror#11
516	add	r5,r5,r2			@ h+=Ch(e,f,g)
517#if 6==31
518	and	r12,r12,#0xff
519	cmp	r12,#0xf2			@ done?
520#endif
521#if 6<15
522# if __ARM_ARCH>=7
523	ldr	r2,[r1],#4			@ prefetch
524# else
525	ldrb	r2,[r1,#3]
526# endif
527	eor	r12,r6,r7			@ a^b, b^c in next round
528#else
529	ldr	r2,[sp,#8*4]		@ from future BODY_16_xx
530	eor	r12,r6,r7			@ a^b, b^c in next round
531	ldr	r1,[sp,#5*4]	@ from future BODY_16_xx
532#endif
533	eor	r0,r0,r6,ror#20	@ Sigma0(a)
534	and	r3,r3,r12			@ (b^c)&=(a^b)
535	add	r9,r9,r5			@ d+=h
536	eor	r3,r3,r7			@ Maj(a,b,c)
537	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
538	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
539#if __ARM_ARCH>=7
540	@ ldr	r2,[r1],#4			@ 7
541# if 7==15
542	str	r1,[sp,#17*4]			@ make room for r1
543# endif
544	eor	r0,r9,r9,ror#5
545	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
546	eor	r0,r0,r9,ror#19	@ Sigma1(e)
547# ifndef __ARMEB__
548	rev	r2,r2
549# endif
550#else
551	@ ldrb	r2,[r1,#3]			@ 7
552	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
553	ldrb	r3,[r1,#2]
554	ldrb	r0,[r1,#1]
555	orr	r2,r2,r3,lsl#8
556	ldrb	r3,[r1],#4
557	orr	r2,r2,r0,lsl#16
558# if 7==15
559	str	r1,[sp,#17*4]			@ make room for r1
560# endif
561	eor	r0,r9,r9,ror#5
562	orr	r2,r2,r3,lsl#24
563	eor	r0,r0,r9,ror#19	@ Sigma1(e)
564#endif
565	ldr	r3,[r14],#4			@ *K256++
566	add	r4,r4,r2			@ h+=X[i]
567	str	r2,[sp,#7*4]
568	eor	r2,r10,r11
569	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
570	and	r2,r2,r9
571	add	r4,r4,r3			@ h+=K256[i]
572	eor	r2,r2,r11			@ Ch(e,f,g)
573	eor	r0,r5,r5,ror#11
574	add	r4,r4,r2			@ h+=Ch(e,f,g)
575#if 7==31
576	and	r3,r3,#0xff
577	cmp	r3,#0xf2			@ done?
578#endif
579#if 7<15
580# if __ARM_ARCH>=7
581	ldr	r2,[r1],#4			@ prefetch
582# else
583	ldrb	r2,[r1,#3]
584# endif
585	eor	r3,r5,r6			@ a^b, b^c in next round
586#else
587	ldr	r2,[sp,#9*4]		@ from future BODY_16_xx
588	eor	r3,r5,r6			@ a^b, b^c in next round
589	ldr	r1,[sp,#6*4]	@ from future BODY_16_xx
590#endif
591	eor	r0,r0,r5,ror#20	@ Sigma0(a)
592	and	r12,r12,r3			@ (b^c)&=(a^b)
593	add	r8,r8,r4			@ d+=h
594	eor	r12,r12,r6			@ Maj(a,b,c)
595	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
596	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
597#if __ARM_ARCH>=7
598	@ ldr	r2,[r1],#4			@ 8
599# if 8==15
600	str	r1,[sp,#17*4]			@ make room for r1
601# endif
602	eor	r0,r8,r8,ror#5
603	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
604	eor	r0,r0,r8,ror#19	@ Sigma1(e)
605# ifndef __ARMEB__
606	rev	r2,r2
607# endif
608#else
609	@ ldrb	r2,[r1,#3]			@ 8
610	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
611	ldrb	r12,[r1,#2]
612	ldrb	r0,[r1,#1]
613	orr	r2,r2,r12,lsl#8
614	ldrb	r12,[r1],#4
615	orr	r2,r2,r0,lsl#16
616# if 8==15
617	str	r1,[sp,#17*4]			@ make room for r1
618# endif
619	eor	r0,r8,r8,ror#5
620	orr	r2,r2,r12,lsl#24
621	eor	r0,r0,r8,ror#19	@ Sigma1(e)
622#endif
623	ldr	r12,[r14],#4			@ *K256++
624	add	r11,r11,r2			@ h+=X[i]
625	str	r2,[sp,#8*4]
626	eor	r2,r9,r10
627	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
628	and	r2,r2,r8
629	add	r11,r11,r12			@ h+=K256[i]
630	eor	r2,r2,r10			@ Ch(e,f,g)
631	eor	r0,r4,r4,ror#11
632	add	r11,r11,r2			@ h+=Ch(e,f,g)
633#if 8==31
634	and	r12,r12,#0xff
635	cmp	r12,#0xf2			@ done?
636#endif
637#if 8<15
638# if __ARM_ARCH>=7
639	ldr	r2,[r1],#4			@ prefetch
640# else
641	ldrb	r2,[r1,#3]
642# endif
643	eor	r12,r4,r5			@ a^b, b^c in next round
644#else
645	ldr	r2,[sp,#10*4]		@ from future BODY_16_xx
646	eor	r12,r4,r5			@ a^b, b^c in next round
647	ldr	r1,[sp,#7*4]	@ from future BODY_16_xx
648#endif
649	eor	r0,r0,r4,ror#20	@ Sigma0(a)
650	and	r3,r3,r12			@ (b^c)&=(a^b)
651	add	r7,r7,r11			@ d+=h
652	eor	r3,r3,r5			@ Maj(a,b,c)
653	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
654	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
655#if __ARM_ARCH>=7
656	@ ldr	r2,[r1],#4			@ 9
657# if 9==15
658	str	r1,[sp,#17*4]			@ make room for r1
659# endif
660	eor	r0,r7,r7,ror#5
661	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
662	eor	r0,r0,r7,ror#19	@ Sigma1(e)
663# ifndef __ARMEB__
664	rev	r2,r2
665# endif
666#else
667	@ ldrb	r2,[r1,#3]			@ 9
668	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
669	ldrb	r3,[r1,#2]
670	ldrb	r0,[r1,#1]
671	orr	r2,r2,r3,lsl#8
672	ldrb	r3,[r1],#4
673	orr	r2,r2,r0,lsl#16
674# if 9==15
675	str	r1,[sp,#17*4]			@ make room for r1
676# endif
677	eor	r0,r7,r7,ror#5
678	orr	r2,r2,r3,lsl#24
679	eor	r0,r0,r7,ror#19	@ Sigma1(e)
680#endif
681	ldr	r3,[r14],#4			@ *K256++
682	add	r10,r10,r2			@ h+=X[i]
683	str	r2,[sp,#9*4]
684	eor	r2,r8,r9
685	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
686	and	r2,r2,r7
687	add	r10,r10,r3			@ h+=K256[i]
688	eor	r2,r2,r9			@ Ch(e,f,g)
689	eor	r0,r11,r11,ror#11
690	add	r10,r10,r2			@ h+=Ch(e,f,g)
691#if 9==31
692	and	r3,r3,#0xff
693	cmp	r3,#0xf2			@ done?
694#endif
695#if 9<15
696# if __ARM_ARCH>=7
697	ldr	r2,[r1],#4			@ prefetch
698# else
699	ldrb	r2,[r1,#3]
700# endif
701	eor	r3,r11,r4			@ a^b, b^c in next round
702#else
703	ldr	r2,[sp,#11*4]		@ from future BODY_16_xx
704	eor	r3,r11,r4			@ a^b, b^c in next round
705	ldr	r1,[sp,#8*4]	@ from future BODY_16_xx
706#endif
707	eor	r0,r0,r11,ror#20	@ Sigma0(a)
708	and	r12,r12,r3			@ (b^c)&=(a^b)
709	add	r6,r6,r10			@ d+=h
710	eor	r12,r12,r4			@ Maj(a,b,c)
711	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
712	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
713#if __ARM_ARCH>=7
714	@ ldr	r2,[r1],#4			@ 10
715# if 10==15
716	str	r1,[sp,#17*4]			@ make room for r1
717# endif
718	eor	r0,r6,r6,ror#5
719	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
720	eor	r0,r0,r6,ror#19	@ Sigma1(e)
721# ifndef __ARMEB__
722	rev	r2,r2
723# endif
724#else
725	@ ldrb	r2,[r1,#3]			@ 10
726	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
727	ldrb	r12,[r1,#2]
728	ldrb	r0,[r1,#1]
729	orr	r2,r2,r12,lsl#8
730	ldrb	r12,[r1],#4
731	orr	r2,r2,r0,lsl#16
732# if 10==15
733	str	r1,[sp,#17*4]			@ make room for r1
734# endif
735	eor	r0,r6,r6,ror#5
736	orr	r2,r2,r12,lsl#24
737	eor	r0,r0,r6,ror#19	@ Sigma1(e)
738#endif
739	ldr	r12,[r14],#4			@ *K256++
740	add	r9,r9,r2			@ h+=X[i]
741	str	r2,[sp,#10*4]
742	eor	r2,r7,r8
743	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
744	and	r2,r2,r6
745	add	r9,r9,r12			@ h+=K256[i]
746	eor	r2,r2,r8			@ Ch(e,f,g)
747	eor	r0,r10,r10,ror#11
748	add	r9,r9,r2			@ h+=Ch(e,f,g)
749#if 10==31
750	and	r12,r12,#0xff
751	cmp	r12,#0xf2			@ done?
752#endif
753#if 10<15
754# if __ARM_ARCH>=7
755	ldr	r2,[r1],#4			@ prefetch
756# else
757	ldrb	r2,[r1,#3]
758# endif
759	eor	r12,r10,r11			@ a^b, b^c in next round
760#else
761	ldr	r2,[sp,#12*4]		@ from future BODY_16_xx
762	eor	r12,r10,r11			@ a^b, b^c in next round
763	ldr	r1,[sp,#9*4]	@ from future BODY_16_xx
764#endif
765	eor	r0,r0,r10,ror#20	@ Sigma0(a)
766	and	r3,r3,r12			@ (b^c)&=(a^b)
767	add	r5,r5,r9			@ d+=h
768	eor	r3,r3,r11			@ Maj(a,b,c)
769	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
770	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
771#if __ARM_ARCH>=7
772	@ ldr	r2,[r1],#4			@ 11
773# if 11==15
774	str	r1,[sp,#17*4]			@ make room for r1
775# endif
776	eor	r0,r5,r5,ror#5
777	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
778	eor	r0,r0,r5,ror#19	@ Sigma1(e)
779# ifndef __ARMEB__
780	rev	r2,r2
781# endif
782#else
783	@ ldrb	r2,[r1,#3]			@ 11
784	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
785	ldrb	r3,[r1,#2]
786	ldrb	r0,[r1,#1]
787	orr	r2,r2,r3,lsl#8
788	ldrb	r3,[r1],#4
789	orr	r2,r2,r0,lsl#16
790# if 11==15
791	str	r1,[sp,#17*4]			@ make room for r1
792# endif
793	eor	r0,r5,r5,ror#5
794	orr	r2,r2,r3,lsl#24
795	eor	r0,r0,r5,ror#19	@ Sigma1(e)
796#endif
797	ldr	r3,[r14],#4			@ *K256++
798	add	r8,r8,r2			@ h+=X[i]
799	str	r2,[sp,#11*4]
800	eor	r2,r6,r7
801	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
802	and	r2,r2,r5
803	add	r8,r8,r3			@ h+=K256[i]
804	eor	r2,r2,r7			@ Ch(e,f,g)
805	eor	r0,r9,r9,ror#11
806	add	r8,r8,r2			@ h+=Ch(e,f,g)
807#if 11==31
808	and	r3,r3,#0xff
809	cmp	r3,#0xf2			@ done?
810#endif
811#if 11<15
812# if __ARM_ARCH>=7
813	ldr	r2,[r1],#4			@ prefetch
814# else
815	ldrb	r2,[r1,#3]
816# endif
817	eor	r3,r9,r10			@ a^b, b^c in next round
818#else
819	ldr	r2,[sp,#13*4]		@ from future BODY_16_xx
820	eor	r3,r9,r10			@ a^b, b^c in next round
821	ldr	r1,[sp,#10*4]	@ from future BODY_16_xx
822#endif
823	eor	r0,r0,r9,ror#20	@ Sigma0(a)
824	and	r12,r12,r3			@ (b^c)&=(a^b)
825	add	r4,r4,r8			@ d+=h
826	eor	r12,r12,r10			@ Maj(a,b,c)
827	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
828	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
829#if __ARM_ARCH>=7
830	@ ldr	r2,[r1],#4			@ 12
831# if 12==15
832	str	r1,[sp,#17*4]			@ make room for r1
833# endif
834	eor	r0,r4,r4,ror#5
835	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
836	eor	r0,r0,r4,ror#19	@ Sigma1(e)
837# ifndef __ARMEB__
838	rev	r2,r2
839# endif
840#else
841	@ ldrb	r2,[r1,#3]			@ 12
842	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
843	ldrb	r12,[r1,#2]
844	ldrb	r0,[r1,#1]
845	orr	r2,r2,r12,lsl#8
846	ldrb	r12,[r1],#4
847	orr	r2,r2,r0,lsl#16
848# if 12==15
849	str	r1,[sp,#17*4]			@ make room for r1
850# endif
851	eor	r0,r4,r4,ror#5
852	orr	r2,r2,r12,lsl#24
853	eor	r0,r0,r4,ror#19	@ Sigma1(e)
854#endif
855	ldr	r12,[r14],#4			@ *K256++
856	add	r7,r7,r2			@ h+=X[i]
857	str	r2,[sp,#12*4]
858	eor	r2,r5,r6
859	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
860	and	r2,r2,r4
861	add	r7,r7,r12			@ h+=K256[i]
862	eor	r2,r2,r6			@ Ch(e,f,g)
863	eor	r0,r8,r8,ror#11
864	add	r7,r7,r2			@ h+=Ch(e,f,g)
865#if 12==31
866	and	r12,r12,#0xff
867	cmp	r12,#0xf2			@ done?
868#endif
869#if 12<15
870# if __ARM_ARCH>=7
871	ldr	r2,[r1],#4			@ prefetch
872# else
873	ldrb	r2,[r1,#3]
874# endif
875	eor	r12,r8,r9			@ a^b, b^c in next round
876#else
877	ldr	r2,[sp,#14*4]		@ from future BODY_16_xx
878	eor	r12,r8,r9			@ a^b, b^c in next round
879	ldr	r1,[sp,#11*4]	@ from future BODY_16_xx
880#endif
881	eor	r0,r0,r8,ror#20	@ Sigma0(a)
882	and	r3,r3,r12			@ (b^c)&=(a^b)
883	add	r11,r11,r7			@ d+=h
884	eor	r3,r3,r9			@ Maj(a,b,c)
885	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
886	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
887#if __ARM_ARCH>=7
888	@ ldr	r2,[r1],#4			@ 13
889# if 13==15
890	str	r1,[sp,#17*4]			@ make room for r1
891# endif
892	eor	r0,r11,r11,ror#5
893	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
894	eor	r0,r0,r11,ror#19	@ Sigma1(e)
895# ifndef __ARMEB__
896	rev	r2,r2
897# endif
898#else
899	@ ldrb	r2,[r1,#3]			@ 13
900	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
901	ldrb	r3,[r1,#2]
902	ldrb	r0,[r1,#1]
903	orr	r2,r2,r3,lsl#8
904	ldrb	r3,[r1],#4
905	orr	r2,r2,r0,lsl#16
906# if 13==15
907	str	r1,[sp,#17*4]			@ make room for r1
908# endif
909	eor	r0,r11,r11,ror#5
910	orr	r2,r2,r3,lsl#24
911	eor	r0,r0,r11,ror#19	@ Sigma1(e)
912#endif
913	ldr	r3,[r14],#4			@ *K256++
914	add	r6,r6,r2			@ h+=X[i]
915	str	r2,[sp,#13*4]
916	eor	r2,r4,r5
917	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
918	and	r2,r2,r11
919	add	r6,r6,r3			@ h+=K256[i]
920	eor	r2,r2,r5			@ Ch(e,f,g)
921	eor	r0,r7,r7,ror#11
922	add	r6,r6,r2			@ h+=Ch(e,f,g)
923#if 13==31
924	and	r3,r3,#0xff
925	cmp	r3,#0xf2			@ done?
926#endif
927#if 13<15
928# if __ARM_ARCH>=7
929	ldr	r2,[r1],#4			@ prefetch
930# else
931	ldrb	r2,[r1,#3]
932# endif
933	eor	r3,r7,r8			@ a^b, b^c in next round
934#else
935	ldr	r2,[sp,#15*4]		@ from future BODY_16_xx
936	eor	r3,r7,r8			@ a^b, b^c in next round
937	ldr	r1,[sp,#12*4]	@ from future BODY_16_xx
938#endif
939	eor	r0,r0,r7,ror#20	@ Sigma0(a)
940	and	r12,r12,r3			@ (b^c)&=(a^b)
941	add	r10,r10,r6			@ d+=h
942	eor	r12,r12,r8			@ Maj(a,b,c)
943	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
944	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
945#if __ARM_ARCH>=7
946	@ ldr	r2,[r1],#4			@ 14
947# if 14==15
948	str	r1,[sp,#17*4]			@ make room for r1
949# endif
950	eor	r0,r10,r10,ror#5
951	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
952	eor	r0,r0,r10,ror#19	@ Sigma1(e)
953# ifndef __ARMEB__
954	rev	r2,r2
955# endif
956#else
957	@ ldrb	r2,[r1,#3]			@ 14
958	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
959	ldrb	r12,[r1,#2]
960	ldrb	r0,[r1,#1]
961	orr	r2,r2,r12,lsl#8
962	ldrb	r12,[r1],#4
963	orr	r2,r2,r0,lsl#16
964# if 14==15
965	str	r1,[sp,#17*4]			@ make room for r1
966# endif
967	eor	r0,r10,r10,ror#5
968	orr	r2,r2,r12,lsl#24
969	eor	r0,r0,r10,ror#19	@ Sigma1(e)
970#endif
971	ldr	r12,[r14],#4			@ *K256++
972	add	r5,r5,r2			@ h+=X[i]
973	str	r2,[sp,#14*4]
974	eor	r2,r11,r4
975	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
976	and	r2,r2,r10
977	add	r5,r5,r12			@ h+=K256[i]
978	eor	r2,r2,r4			@ Ch(e,f,g)
979	eor	r0,r6,r6,ror#11
980	add	r5,r5,r2			@ h+=Ch(e,f,g)
981#if 14==31
982	and	r12,r12,#0xff
983	cmp	r12,#0xf2			@ done?
984#endif
985#if 14<15
986# if __ARM_ARCH>=7
987	ldr	r2,[r1],#4			@ prefetch
988# else
989	ldrb	r2,[r1,#3]
990# endif
991	eor	r12,r6,r7			@ a^b, b^c in next round
992#else
993	ldr	r2,[sp,#0*4]		@ from future BODY_16_xx
994	eor	r12,r6,r7			@ a^b, b^c in next round
995	ldr	r1,[sp,#13*4]	@ from future BODY_16_xx
996#endif
997	eor	r0,r0,r6,ror#20	@ Sigma0(a)
998	and	r3,r3,r12			@ (b^c)&=(a^b)
999	add	r9,r9,r5			@ d+=h
1000	eor	r3,r3,r7			@ Maj(a,b,c)
1001	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
1002	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
1003#if __ARM_ARCH>=7
1004	@ ldr	r2,[r1],#4			@ 15
1005# if 15==15
1006	str	r1,[sp,#17*4]			@ make room for r1
1007# endif
1008	eor	r0,r9,r9,ror#5
1009	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
1010	eor	r0,r0,r9,ror#19	@ Sigma1(e)
1011# ifndef __ARMEB__
1012	rev	r2,r2
1013# endif
1014#else
1015	@ ldrb	r2,[r1,#3]			@ 15
1016	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
1017	ldrb	r3,[r1,#2]
1018	ldrb	r0,[r1,#1]
1019	orr	r2,r2,r3,lsl#8
1020	ldrb	r3,[r1],#4
1021	orr	r2,r2,r0,lsl#16
1022# if 15==15
1023	str	r1,[sp,#17*4]			@ make room for r1
1024# endif
1025	eor	r0,r9,r9,ror#5
1026	orr	r2,r2,r3,lsl#24
1027	eor	r0,r0,r9,ror#19	@ Sigma1(e)
1028#endif
1029	ldr	r3,[r14],#4			@ *K256++
1030	add	r4,r4,r2			@ h+=X[i]
1031	str	r2,[sp,#15*4]
1032	eor	r2,r10,r11
1033	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
1034	and	r2,r2,r9
1035	add	r4,r4,r3			@ h+=K256[i]
1036	eor	r2,r2,r11			@ Ch(e,f,g)
1037	eor	r0,r5,r5,ror#11
1038	add	r4,r4,r2			@ h+=Ch(e,f,g)
1039#if 15==31
1040	and	r3,r3,#0xff
1041	cmp	r3,#0xf2			@ done?
1042#endif
1043#if 15<15
1044# if __ARM_ARCH>=7
1045	ldr	r2,[r1],#4			@ prefetch
1046# else
1047	ldrb	r2,[r1,#3]
1048# endif
1049	eor	r3,r5,r6			@ a^b, b^c in next round
1050#else
1051	ldr	r2,[sp,#1*4]		@ from future BODY_16_xx
1052	eor	r3,r5,r6			@ a^b, b^c in next round
1053	ldr	r1,[sp,#14*4]	@ from future BODY_16_xx
1054#endif
1055	eor	r0,r0,r5,ror#20	@ Sigma0(a)
1056	and	r12,r12,r3			@ (b^c)&=(a^b)
1057	add	r8,r8,r4			@ d+=h
1058	eor	r12,r12,r6			@ Maj(a,b,c)
1059	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
1060	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
1061.Lrounds_16_xx:
1062	@ ldr	r2,[sp,#1*4]		@ 16
1063	@ ldr	r1,[sp,#14*4]
1064	mov	r0,r2,ror#7
1065	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
1066	mov	r12,r1,ror#17
1067	eor	r0,r0,r2,ror#18
1068	eor	r12,r12,r1,ror#19
1069	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1070	ldr	r2,[sp,#0*4]
1071	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1072	ldr	r1,[sp,#9*4]
1073
1074	add	r12,r12,r0
1075	eor	r0,r8,r8,ror#5	@ from BODY_00_15
1076	add	r2,r2,r12
1077	eor	r0,r0,r8,ror#19	@ Sigma1(e)
1078	add	r2,r2,r1			@ X[i]
1079	ldr	r12,[r14],#4			@ *K256++
1080	add	r11,r11,r2			@ h+=X[i]
1081	str	r2,[sp,#0*4]
1082	eor	r2,r9,r10
1083	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
1084	and	r2,r2,r8
1085	add	r11,r11,r12			@ h+=K256[i]
1086	eor	r2,r2,r10			@ Ch(e,f,g)
1087	eor	r0,r4,r4,ror#11
1088	add	r11,r11,r2			@ h+=Ch(e,f,g)
1089#if 16==31
1090	and	r12,r12,#0xff
1091	cmp	r12,#0xf2			@ done?
1092#endif
1093#if 16<15
1094# if __ARM_ARCH>=7
1095	ldr	r2,[r1],#4			@ prefetch
1096# else
1097	ldrb	r2,[r1,#3]
1098# endif
1099	eor	r12,r4,r5			@ a^b, b^c in next round
1100#else
1101	ldr	r2,[sp,#2*4]		@ from future BODY_16_xx
1102	eor	r12,r4,r5			@ a^b, b^c in next round
1103	ldr	r1,[sp,#15*4]	@ from future BODY_16_xx
1104#endif
1105	eor	r0,r0,r4,ror#20	@ Sigma0(a)
1106	and	r3,r3,r12			@ (b^c)&=(a^b)
1107	add	r7,r7,r11			@ d+=h
1108	eor	r3,r3,r5			@ Maj(a,b,c)
1109	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
1110	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
1111	@ ldr	r2,[sp,#2*4]		@ 17
1112	@ ldr	r1,[sp,#15*4]
1113	mov	r0,r2,ror#7
1114	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
1115	mov	r3,r1,ror#17
1116	eor	r0,r0,r2,ror#18
1117	eor	r3,r3,r1,ror#19
1118	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1119	ldr	r2,[sp,#1*4]
1120	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1121	ldr	r1,[sp,#10*4]
1122
1123	add	r3,r3,r0
1124	eor	r0,r7,r7,ror#5	@ from BODY_00_15
1125	add	r2,r2,r3
1126	eor	r0,r0,r7,ror#19	@ Sigma1(e)
1127	add	r2,r2,r1			@ X[i]
1128	ldr	r3,[r14],#4			@ *K256++
1129	add	r10,r10,r2			@ h+=X[i]
1130	str	r2,[sp,#1*4]
1131	eor	r2,r8,r9
1132	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
1133	and	r2,r2,r7
1134	add	r10,r10,r3			@ h+=K256[i]
1135	eor	r2,r2,r9			@ Ch(e,f,g)
1136	eor	r0,r11,r11,ror#11
1137	add	r10,r10,r2			@ h+=Ch(e,f,g)
1138#if 17==31
1139	and	r3,r3,#0xff
1140	cmp	r3,#0xf2			@ done?
1141#endif
1142#if 17<15
1143# if __ARM_ARCH>=7
1144	ldr	r2,[r1],#4			@ prefetch
1145# else
1146	ldrb	r2,[r1,#3]
1147# endif
1148	eor	r3,r11,r4			@ a^b, b^c in next round
1149#else
1150	ldr	r2,[sp,#3*4]		@ from future BODY_16_xx
1151	eor	r3,r11,r4			@ a^b, b^c in next round
1152	ldr	r1,[sp,#0*4]	@ from future BODY_16_xx
1153#endif
1154	eor	r0,r0,r11,ror#20	@ Sigma0(a)
1155	and	r12,r12,r3			@ (b^c)&=(a^b)
1156	add	r6,r6,r10			@ d+=h
1157	eor	r12,r12,r4			@ Maj(a,b,c)
1158	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
1159	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
1160	@ ldr	r2,[sp,#3*4]		@ 18
1161	@ ldr	r1,[sp,#0*4]
1162	mov	r0,r2,ror#7
1163	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
1164	mov	r12,r1,ror#17
1165	eor	r0,r0,r2,ror#18
1166	eor	r12,r12,r1,ror#19
1167	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1168	ldr	r2,[sp,#2*4]
1169	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1170	ldr	r1,[sp,#11*4]
1171
1172	add	r12,r12,r0
1173	eor	r0,r6,r6,ror#5	@ from BODY_00_15
1174	add	r2,r2,r12
1175	eor	r0,r0,r6,ror#19	@ Sigma1(e)
1176	add	r2,r2,r1			@ X[i]
1177	ldr	r12,[r14],#4			@ *K256++
1178	add	r9,r9,r2			@ h+=X[i]
1179	str	r2,[sp,#2*4]
1180	eor	r2,r7,r8
1181	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
1182	and	r2,r2,r6
1183	add	r9,r9,r12			@ h+=K256[i]
1184	eor	r2,r2,r8			@ Ch(e,f,g)
1185	eor	r0,r10,r10,ror#11
1186	add	r9,r9,r2			@ h+=Ch(e,f,g)
1187#if 18==31
1188	and	r12,r12,#0xff
1189	cmp	r12,#0xf2			@ done?
1190#endif
1191#if 18<15
1192# if __ARM_ARCH>=7
1193	ldr	r2,[r1],#4			@ prefetch
1194# else
1195	ldrb	r2,[r1,#3]
1196# endif
1197	eor	r12,r10,r11			@ a^b, b^c in next round
1198#else
1199	ldr	r2,[sp,#4*4]		@ from future BODY_16_xx
1200	eor	r12,r10,r11			@ a^b, b^c in next round
1201	ldr	r1,[sp,#1*4]	@ from future BODY_16_xx
1202#endif
1203	eor	r0,r0,r10,ror#20	@ Sigma0(a)
1204	and	r3,r3,r12			@ (b^c)&=(a^b)
1205	add	r5,r5,r9			@ d+=h
1206	eor	r3,r3,r11			@ Maj(a,b,c)
1207	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
1208	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
1209	@ ldr	r2,[sp,#4*4]		@ 19
1210	@ ldr	r1,[sp,#1*4]
1211	mov	r0,r2,ror#7
1212	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
1213	mov	r3,r1,ror#17
1214	eor	r0,r0,r2,ror#18
1215	eor	r3,r3,r1,ror#19
1216	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1217	ldr	r2,[sp,#3*4]
1218	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1219	ldr	r1,[sp,#12*4]
1220
1221	add	r3,r3,r0
1222	eor	r0,r5,r5,ror#5	@ from BODY_00_15
1223	add	r2,r2,r3
1224	eor	r0,r0,r5,ror#19	@ Sigma1(e)
1225	add	r2,r2,r1			@ X[i]
1226	ldr	r3,[r14],#4			@ *K256++
1227	add	r8,r8,r2			@ h+=X[i]
1228	str	r2,[sp,#3*4]
1229	eor	r2,r6,r7
1230	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
1231	and	r2,r2,r5
1232	add	r8,r8,r3			@ h+=K256[i]
1233	eor	r2,r2,r7			@ Ch(e,f,g)
1234	eor	r0,r9,r9,ror#11
1235	add	r8,r8,r2			@ h+=Ch(e,f,g)
1236#if 19==31
1237	and	r3,r3,#0xff
1238	cmp	r3,#0xf2			@ done?
1239#endif
1240#if 19<15
1241# if __ARM_ARCH>=7
1242	ldr	r2,[r1],#4			@ prefetch
1243# else
1244	ldrb	r2,[r1,#3]
1245# endif
1246	eor	r3,r9,r10			@ a^b, b^c in next round
1247#else
1248	ldr	r2,[sp,#5*4]		@ from future BODY_16_xx
1249	eor	r3,r9,r10			@ a^b, b^c in next round
1250	ldr	r1,[sp,#2*4]	@ from future BODY_16_xx
1251#endif
1252	eor	r0,r0,r9,ror#20	@ Sigma0(a)
1253	and	r12,r12,r3			@ (b^c)&=(a^b)
1254	add	r4,r4,r8			@ d+=h
1255	eor	r12,r12,r10			@ Maj(a,b,c)
1256	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
1257	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
1258	@ ldr	r2,[sp,#5*4]		@ 20
1259	@ ldr	r1,[sp,#2*4]
1260	mov	r0,r2,ror#7
1261	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
1262	mov	r12,r1,ror#17
1263	eor	r0,r0,r2,ror#18
1264	eor	r12,r12,r1,ror#19
1265	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1266	ldr	r2,[sp,#4*4]
1267	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1268	ldr	r1,[sp,#13*4]
1269
1270	add	r12,r12,r0
1271	eor	r0,r4,r4,ror#5	@ from BODY_00_15
1272	add	r2,r2,r12
1273	eor	r0,r0,r4,ror#19	@ Sigma1(e)
1274	add	r2,r2,r1			@ X[i]
1275	ldr	r12,[r14],#4			@ *K256++
1276	add	r7,r7,r2			@ h+=X[i]
1277	str	r2,[sp,#4*4]
1278	eor	r2,r5,r6
1279	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
1280	and	r2,r2,r4
1281	add	r7,r7,r12			@ h+=K256[i]
1282	eor	r2,r2,r6			@ Ch(e,f,g)
1283	eor	r0,r8,r8,ror#11
1284	add	r7,r7,r2			@ h+=Ch(e,f,g)
1285#if 20==31
1286	and	r12,r12,#0xff
1287	cmp	r12,#0xf2			@ done?
1288#endif
1289#if 20<15
1290# if __ARM_ARCH>=7
1291	ldr	r2,[r1],#4			@ prefetch
1292# else
1293	ldrb	r2,[r1,#3]
1294# endif
1295	eor	r12,r8,r9			@ a^b, b^c in next round
1296#else
1297	ldr	r2,[sp,#6*4]		@ from future BODY_16_xx
1298	eor	r12,r8,r9			@ a^b, b^c in next round
1299	ldr	r1,[sp,#3*4]	@ from future BODY_16_xx
1300#endif
1301	eor	r0,r0,r8,ror#20	@ Sigma0(a)
1302	and	r3,r3,r12			@ (b^c)&=(a^b)
1303	add	r11,r11,r7			@ d+=h
1304	eor	r3,r3,r9			@ Maj(a,b,c)
1305	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
1306	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
1307	@ ldr	r2,[sp,#6*4]		@ 21
1308	@ ldr	r1,[sp,#3*4]
1309	mov	r0,r2,ror#7
1310	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
1311	mov	r3,r1,ror#17
1312	eor	r0,r0,r2,ror#18
1313	eor	r3,r3,r1,ror#19
1314	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1315	ldr	r2,[sp,#5*4]
1316	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1317	ldr	r1,[sp,#14*4]
1318
1319	add	r3,r3,r0
1320	eor	r0,r11,r11,ror#5	@ from BODY_00_15
1321	add	r2,r2,r3
1322	eor	r0,r0,r11,ror#19	@ Sigma1(e)
1323	add	r2,r2,r1			@ X[i]
1324	ldr	r3,[r14],#4			@ *K256++
1325	add	r6,r6,r2			@ h+=X[i]
1326	str	r2,[sp,#5*4]
1327	eor	r2,r4,r5
1328	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
1329	and	r2,r2,r11
1330	add	r6,r6,r3			@ h+=K256[i]
1331	eor	r2,r2,r5			@ Ch(e,f,g)
1332	eor	r0,r7,r7,ror#11
1333	add	r6,r6,r2			@ h+=Ch(e,f,g)
1334#if 21==31
1335	and	r3,r3,#0xff
1336	cmp	r3,#0xf2			@ done?
1337#endif
1338#if 21<15
1339# if __ARM_ARCH>=7
1340	ldr	r2,[r1],#4			@ prefetch
1341# else
1342	ldrb	r2,[r1,#3]
1343# endif
1344	eor	r3,r7,r8			@ a^b, b^c in next round
1345#else
1346	ldr	r2,[sp,#7*4]		@ from future BODY_16_xx
1347	eor	r3,r7,r8			@ a^b, b^c in next round
1348	ldr	r1,[sp,#4*4]	@ from future BODY_16_xx
1349#endif
1350	eor	r0,r0,r7,ror#20	@ Sigma0(a)
1351	and	r12,r12,r3			@ (b^c)&=(a^b)
1352	add	r10,r10,r6			@ d+=h
1353	eor	r12,r12,r8			@ Maj(a,b,c)
1354	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
1355	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
1356	@ ldr	r2,[sp,#7*4]		@ 22
1357	@ ldr	r1,[sp,#4*4]
1358	mov	r0,r2,ror#7
1359	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
1360	mov	r12,r1,ror#17
1361	eor	r0,r0,r2,ror#18
1362	eor	r12,r12,r1,ror#19
1363	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1364	ldr	r2,[sp,#6*4]
1365	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1366	ldr	r1,[sp,#15*4]
1367
1368	add	r12,r12,r0
1369	eor	r0,r10,r10,ror#5	@ from BODY_00_15
1370	add	r2,r2,r12
1371	eor	r0,r0,r10,ror#19	@ Sigma1(e)
1372	add	r2,r2,r1			@ X[i]
1373	ldr	r12,[r14],#4			@ *K256++
1374	add	r5,r5,r2			@ h+=X[i]
1375	str	r2,[sp,#6*4]
1376	eor	r2,r11,r4
1377	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
1378	and	r2,r2,r10
1379	add	r5,r5,r12			@ h+=K256[i]
1380	eor	r2,r2,r4			@ Ch(e,f,g)
1381	eor	r0,r6,r6,ror#11
1382	add	r5,r5,r2			@ h+=Ch(e,f,g)
1383#if 22==31
1384	and	r12,r12,#0xff
1385	cmp	r12,#0xf2			@ done?
1386#endif
1387#if 22<15
1388# if __ARM_ARCH>=7
1389	ldr	r2,[r1],#4			@ prefetch
1390# else
1391	ldrb	r2,[r1,#3]
1392# endif
1393	eor	r12,r6,r7			@ a^b, b^c in next round
1394#else
1395	ldr	r2,[sp,#8*4]		@ from future BODY_16_xx
1396	eor	r12,r6,r7			@ a^b, b^c in next round
1397	ldr	r1,[sp,#5*4]	@ from future BODY_16_xx
1398#endif
1399	eor	r0,r0,r6,ror#20	@ Sigma0(a)
1400	and	r3,r3,r12			@ (b^c)&=(a^b)
1401	add	r9,r9,r5			@ d+=h
1402	eor	r3,r3,r7			@ Maj(a,b,c)
1403	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
1404	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
1405	@ ldr	r2,[sp,#8*4]		@ 23
1406	@ ldr	r1,[sp,#5*4]
1407	mov	r0,r2,ror#7
1408	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
1409	mov	r3,r1,ror#17
1410	eor	r0,r0,r2,ror#18
1411	eor	r3,r3,r1,ror#19
1412	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1413	ldr	r2,[sp,#7*4]
1414	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1415	ldr	r1,[sp,#0*4]
1416
1417	add	r3,r3,r0
1418	eor	r0,r9,r9,ror#5	@ from BODY_00_15
1419	add	r2,r2,r3
1420	eor	r0,r0,r9,ror#19	@ Sigma1(e)
1421	add	r2,r2,r1			@ X[i]
1422	ldr	r3,[r14],#4			@ *K256++
1423	add	r4,r4,r2			@ h+=X[i]
1424	str	r2,[sp,#7*4]
1425	eor	r2,r10,r11
1426	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
1427	and	r2,r2,r9
1428	add	r4,r4,r3			@ h+=K256[i]
1429	eor	r2,r2,r11			@ Ch(e,f,g)
1430	eor	r0,r5,r5,ror#11
1431	add	r4,r4,r2			@ h+=Ch(e,f,g)
1432#if 23==31
1433	and	r3,r3,#0xff
1434	cmp	r3,#0xf2			@ done?
1435#endif
1436#if 23<15
1437# if __ARM_ARCH>=7
1438	ldr	r2,[r1],#4			@ prefetch
1439# else
1440	ldrb	r2,[r1,#3]
1441# endif
1442	eor	r3,r5,r6			@ a^b, b^c in next round
1443#else
1444	ldr	r2,[sp,#9*4]		@ from future BODY_16_xx
1445	eor	r3,r5,r6			@ a^b, b^c in next round
1446	ldr	r1,[sp,#6*4]	@ from future BODY_16_xx
1447#endif
1448	eor	r0,r0,r5,ror#20	@ Sigma0(a)
1449	and	r12,r12,r3			@ (b^c)&=(a^b)
1450	add	r8,r8,r4			@ d+=h
1451	eor	r12,r12,r6			@ Maj(a,b,c)
1452	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
1453	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
1454	@ ldr	r2,[sp,#9*4]		@ 24
1455	@ ldr	r1,[sp,#6*4]
1456	mov	r0,r2,ror#7
1457	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
1458	mov	r12,r1,ror#17
1459	eor	r0,r0,r2,ror#18
1460	eor	r12,r12,r1,ror#19
1461	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1462	ldr	r2,[sp,#8*4]
1463	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1464	ldr	r1,[sp,#1*4]
1465
1466	add	r12,r12,r0
1467	eor	r0,r8,r8,ror#5	@ from BODY_00_15
1468	add	r2,r2,r12
1469	eor	r0,r0,r8,ror#19	@ Sigma1(e)
1470	add	r2,r2,r1			@ X[i]
1471	ldr	r12,[r14],#4			@ *K256++
1472	add	r11,r11,r2			@ h+=X[i]
1473	str	r2,[sp,#8*4]
1474	eor	r2,r9,r10
1475	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
1476	and	r2,r2,r8
1477	add	r11,r11,r12			@ h+=K256[i]
1478	eor	r2,r2,r10			@ Ch(e,f,g)
1479	eor	r0,r4,r4,ror#11
1480	add	r11,r11,r2			@ h+=Ch(e,f,g)
1481#if 24==31
1482	and	r12,r12,#0xff
1483	cmp	r12,#0xf2			@ done?
1484#endif
1485#if 24<15
1486# if __ARM_ARCH>=7
1487	ldr	r2,[r1],#4			@ prefetch
1488# else
1489	ldrb	r2,[r1,#3]
1490# endif
1491	eor	r12,r4,r5			@ a^b, b^c in next round
1492#else
1493	ldr	r2,[sp,#10*4]		@ from future BODY_16_xx
1494	eor	r12,r4,r5			@ a^b, b^c in next round
1495	ldr	r1,[sp,#7*4]	@ from future BODY_16_xx
1496#endif
1497	eor	r0,r0,r4,ror#20	@ Sigma0(a)
1498	and	r3,r3,r12			@ (b^c)&=(a^b)
1499	add	r7,r7,r11			@ d+=h
1500	eor	r3,r3,r5			@ Maj(a,b,c)
1501	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
1502	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
1503	@ ldr	r2,[sp,#10*4]		@ 25
1504	@ ldr	r1,[sp,#7*4]
1505	mov	r0,r2,ror#7
1506	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
1507	mov	r3,r1,ror#17
1508	eor	r0,r0,r2,ror#18
1509	eor	r3,r3,r1,ror#19
1510	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1511	ldr	r2,[sp,#9*4]
1512	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1513	ldr	r1,[sp,#2*4]
1514
1515	add	r3,r3,r0
1516	eor	r0,r7,r7,ror#5	@ from BODY_00_15
1517	add	r2,r2,r3
1518	eor	r0,r0,r7,ror#19	@ Sigma1(e)
1519	add	r2,r2,r1			@ X[i]
1520	ldr	r3,[r14],#4			@ *K256++
1521	add	r10,r10,r2			@ h+=X[i]
1522	str	r2,[sp,#9*4]
1523	eor	r2,r8,r9
1524	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
1525	and	r2,r2,r7
1526	add	r10,r10,r3			@ h+=K256[i]
1527	eor	r2,r2,r9			@ Ch(e,f,g)
1528	eor	r0,r11,r11,ror#11
1529	add	r10,r10,r2			@ h+=Ch(e,f,g)
1530#if 25==31
1531	and	r3,r3,#0xff
1532	cmp	r3,#0xf2			@ done?
1533#endif
1534#if 25<15
1535# if __ARM_ARCH>=7
1536	ldr	r2,[r1],#4			@ prefetch
1537# else
1538	ldrb	r2,[r1,#3]
1539# endif
1540	eor	r3,r11,r4			@ a^b, b^c in next round
1541#else
1542	ldr	r2,[sp,#11*4]		@ from future BODY_16_xx
1543	eor	r3,r11,r4			@ a^b, b^c in next round
1544	ldr	r1,[sp,#8*4]	@ from future BODY_16_xx
1545#endif
1546	eor	r0,r0,r11,ror#20	@ Sigma0(a)
1547	and	r12,r12,r3			@ (b^c)&=(a^b)
1548	add	r6,r6,r10			@ d+=h
1549	eor	r12,r12,r4			@ Maj(a,b,c)
1550	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
1551	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
1552	@ ldr	r2,[sp,#11*4]		@ 26
1553	@ ldr	r1,[sp,#8*4]
1554	mov	r0,r2,ror#7
1555	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
1556	mov	r12,r1,ror#17
1557	eor	r0,r0,r2,ror#18
1558	eor	r12,r12,r1,ror#19
1559	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1560	ldr	r2,[sp,#10*4]
1561	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1562	ldr	r1,[sp,#3*4]
1563
1564	add	r12,r12,r0
1565	eor	r0,r6,r6,ror#5	@ from BODY_00_15
1566	add	r2,r2,r12
1567	eor	r0,r0,r6,ror#19	@ Sigma1(e)
1568	add	r2,r2,r1			@ X[i]
1569	ldr	r12,[r14],#4			@ *K256++
1570	add	r9,r9,r2			@ h+=X[i]
1571	str	r2,[sp,#10*4]
1572	eor	r2,r7,r8
1573	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
1574	and	r2,r2,r6
1575	add	r9,r9,r12			@ h+=K256[i]
1576	eor	r2,r2,r8			@ Ch(e,f,g)
1577	eor	r0,r10,r10,ror#11
1578	add	r9,r9,r2			@ h+=Ch(e,f,g)
1579#if 26==31
1580	and	r12,r12,#0xff
1581	cmp	r12,#0xf2			@ done?
1582#endif
1583#if 26<15
1584# if __ARM_ARCH>=7
1585	ldr	r2,[r1],#4			@ prefetch
1586# else
1587	ldrb	r2,[r1,#3]
1588# endif
1589	eor	r12,r10,r11			@ a^b, b^c in next round
1590#else
1591	ldr	r2,[sp,#12*4]		@ from future BODY_16_xx
1592	eor	r12,r10,r11			@ a^b, b^c in next round
1593	ldr	r1,[sp,#9*4]	@ from future BODY_16_xx
1594#endif
1595	eor	r0,r0,r10,ror#20	@ Sigma0(a)
1596	and	r3,r3,r12			@ (b^c)&=(a^b)
1597	add	r5,r5,r9			@ d+=h
1598	eor	r3,r3,r11			@ Maj(a,b,c)
1599	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
1600	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
1601	@ ldr	r2,[sp,#12*4]		@ 27
1602	@ ldr	r1,[sp,#9*4]
1603	mov	r0,r2,ror#7
1604	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
1605	mov	r3,r1,ror#17
1606	eor	r0,r0,r2,ror#18
1607	eor	r3,r3,r1,ror#19
1608	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1609	ldr	r2,[sp,#11*4]
1610	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1611	ldr	r1,[sp,#4*4]
1612
1613	add	r3,r3,r0
1614	eor	r0,r5,r5,ror#5	@ from BODY_00_15
1615	add	r2,r2,r3
1616	eor	r0,r0,r5,ror#19	@ Sigma1(e)
1617	add	r2,r2,r1			@ X[i]
1618	ldr	r3,[r14],#4			@ *K256++
1619	add	r8,r8,r2			@ h+=X[i]
1620	str	r2,[sp,#11*4]
1621	eor	r2,r6,r7
1622	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
1623	and	r2,r2,r5
1624	add	r8,r8,r3			@ h+=K256[i]
1625	eor	r2,r2,r7			@ Ch(e,f,g)
1626	eor	r0,r9,r9,ror#11
1627	add	r8,r8,r2			@ h+=Ch(e,f,g)
1628#if 27==31
1629	and	r3,r3,#0xff
1630	cmp	r3,#0xf2			@ done?
1631#endif
1632#if 27<15
1633# if __ARM_ARCH>=7
1634	ldr	r2,[r1],#4			@ prefetch
1635# else
1636	ldrb	r2,[r1,#3]
1637# endif
1638	eor	r3,r9,r10			@ a^b, b^c in next round
1639#else
1640	ldr	r2,[sp,#13*4]		@ from future BODY_16_xx
1641	eor	r3,r9,r10			@ a^b, b^c in next round
1642	ldr	r1,[sp,#10*4]	@ from future BODY_16_xx
1643#endif
1644	eor	r0,r0,r9,ror#20	@ Sigma0(a)
1645	and	r12,r12,r3			@ (b^c)&=(a^b)
1646	add	r4,r4,r8			@ d+=h
1647	eor	r12,r12,r10			@ Maj(a,b,c)
1648	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
1649	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
1650	@ ldr	r2,[sp,#13*4]		@ 28
1651	@ ldr	r1,[sp,#10*4]
1652	mov	r0,r2,ror#7
1653	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
1654	mov	r12,r1,ror#17
1655	eor	r0,r0,r2,ror#18
1656	eor	r12,r12,r1,ror#19
1657	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1658	ldr	r2,[sp,#12*4]
1659	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1660	ldr	r1,[sp,#5*4]
1661
1662	add	r12,r12,r0
1663	eor	r0,r4,r4,ror#5	@ from BODY_00_15
1664	add	r2,r2,r12
1665	eor	r0,r0,r4,ror#19	@ Sigma1(e)
1666	add	r2,r2,r1			@ X[i]
1667	ldr	r12,[r14],#4			@ *K256++
1668	add	r7,r7,r2			@ h+=X[i]
1669	str	r2,[sp,#12*4]
1670	eor	r2,r5,r6
1671	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
1672	and	r2,r2,r4
1673	add	r7,r7,r12			@ h+=K256[i]
1674	eor	r2,r2,r6			@ Ch(e,f,g)
1675	eor	r0,r8,r8,ror#11
1676	add	r7,r7,r2			@ h+=Ch(e,f,g)
1677#if 28==31
1678	and	r12,r12,#0xff
1679	cmp	r12,#0xf2			@ done?
1680#endif
1681#if 28<15
1682# if __ARM_ARCH>=7
1683	ldr	r2,[r1],#4			@ prefetch
1684# else
1685	ldrb	r2,[r1,#3]
1686# endif
1687	eor	r12,r8,r9			@ a^b, b^c in next round
1688#else
1689	ldr	r2,[sp,#14*4]		@ from future BODY_16_xx
1690	eor	r12,r8,r9			@ a^b, b^c in next round
1691	ldr	r1,[sp,#11*4]	@ from future BODY_16_xx
1692#endif
1693	eor	r0,r0,r8,ror#20	@ Sigma0(a)
1694	and	r3,r3,r12			@ (b^c)&=(a^b)
1695	add	r11,r11,r7			@ d+=h
1696	eor	r3,r3,r9			@ Maj(a,b,c)
1697	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
1698	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
1699	@ ldr	r2,[sp,#14*4]		@ 29
1700	@ ldr	r1,[sp,#11*4]
1701	mov	r0,r2,ror#7
1702	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
1703	mov	r3,r1,ror#17
1704	eor	r0,r0,r2,ror#18
1705	eor	r3,r3,r1,ror#19
1706	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1707	ldr	r2,[sp,#13*4]
1708	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1709	ldr	r1,[sp,#6*4]
1710
1711	add	r3,r3,r0
1712	eor	r0,r11,r11,ror#5	@ from BODY_00_15
1713	add	r2,r2,r3
1714	eor	r0,r0,r11,ror#19	@ Sigma1(e)
1715	add	r2,r2,r1			@ X[i]
1716	ldr	r3,[r14],#4			@ *K256++
1717	add	r6,r6,r2			@ h+=X[i]
1718	str	r2,[sp,#13*4]
1719	eor	r2,r4,r5
1720	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
1721	and	r2,r2,r11
1722	add	r6,r6,r3			@ h+=K256[i]
1723	eor	r2,r2,r5			@ Ch(e,f,g)
1724	eor	r0,r7,r7,ror#11
1725	add	r6,r6,r2			@ h+=Ch(e,f,g)
1726#if 29==31
1727	and	r3,r3,#0xff
1728	cmp	r3,#0xf2			@ done?
1729#endif
1730#if 29<15
1731# if __ARM_ARCH>=7
1732	ldr	r2,[r1],#4			@ prefetch
1733# else
1734	ldrb	r2,[r1,#3]
1735# endif
1736	eor	r3,r7,r8			@ a^b, b^c in next round
1737#else
1738	ldr	r2,[sp,#15*4]		@ from future BODY_16_xx
1739	eor	r3,r7,r8			@ a^b, b^c in next round
1740	ldr	r1,[sp,#12*4]	@ from future BODY_16_xx
1741#endif
1742	eor	r0,r0,r7,ror#20	@ Sigma0(a)
1743	and	r12,r12,r3			@ (b^c)&=(a^b)
1744	add	r10,r10,r6			@ d+=h
1745	eor	r12,r12,r8			@ Maj(a,b,c)
1746	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
1747	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
1748	@ ldr	r2,[sp,#15*4]		@ 30
1749	@ ldr	r1,[sp,#12*4]
1750	mov	r0,r2,ror#7
1751	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
1752	mov	r12,r1,ror#17
1753	eor	r0,r0,r2,ror#18
1754	eor	r12,r12,r1,ror#19
1755	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1756	ldr	r2,[sp,#14*4]
1757	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1758	ldr	r1,[sp,#7*4]
1759
1760	add	r12,r12,r0
1761	eor	r0,r10,r10,ror#5	@ from BODY_00_15
1762	add	r2,r2,r12
1763	eor	r0,r0,r10,ror#19	@ Sigma1(e)
1764	add	r2,r2,r1			@ X[i]
1765	ldr	r12,[r14],#4			@ *K256++
1766	add	r5,r5,r2			@ h+=X[i]
1767	str	r2,[sp,#14*4]
1768	eor	r2,r11,r4
1769	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
1770	and	r2,r2,r10
1771	add	r5,r5,r12			@ h+=K256[i]
1772	eor	r2,r2,r4			@ Ch(e,f,g)
1773	eor	r0,r6,r6,ror#11
1774	add	r5,r5,r2			@ h+=Ch(e,f,g)
1775#if 30==31
1776	and	r12,r12,#0xff
1777	cmp	r12,#0xf2			@ done?
1778#endif
1779#if 30<15
1780# if __ARM_ARCH>=7
1781	ldr	r2,[r1],#4			@ prefetch
1782# else
1783	ldrb	r2,[r1,#3]
1784# endif
1785	eor	r12,r6,r7			@ a^b, b^c in next round
1786#else
1787	ldr	r2,[sp,#0*4]		@ from future BODY_16_xx
1788	eor	r12,r6,r7			@ a^b, b^c in next round
1789	ldr	r1,[sp,#13*4]	@ from future BODY_16_xx
1790#endif
1791	eor	r0,r0,r6,ror#20	@ Sigma0(a)
1792	and	r3,r3,r12			@ (b^c)&=(a^b)
1793	add	r9,r9,r5			@ d+=h
1794	eor	r3,r3,r7			@ Maj(a,b,c)
1795	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
1796	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
1797	@ ldr	r2,[sp,#0*4]		@ 31
1798	@ ldr	r1,[sp,#13*4]
1799	mov	r0,r2,ror#7
1800	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
1801	mov	r3,r1,ror#17
1802	eor	r0,r0,r2,ror#18
1803	eor	r3,r3,r1,ror#19
1804	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1805	ldr	r2,[sp,#15*4]
1806	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1807	ldr	r1,[sp,#8*4]
1808
1809	add	r3,r3,r0
1810	eor	r0,r9,r9,ror#5	@ from BODY_00_15
1811	add	r2,r2,r3
1812	eor	r0,r0,r9,ror#19	@ Sigma1(e)
1813	add	r2,r2,r1			@ X[i]
1814	ldr	r3,[r14],#4			@ *K256++
1815	add	r4,r4,r2			@ h+=X[i]
1816	str	r2,[sp,#15*4]
1817	eor	r2,r10,r11
1818	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
1819	and	r2,r2,r9
1820	add	r4,r4,r3			@ h+=K256[i]
1821	eor	r2,r2,r11			@ Ch(e,f,g)
1822	eor	r0,r5,r5,ror#11
1823	add	r4,r4,r2			@ h+=Ch(e,f,g)
1824#if 31==31
1825	and	r3,r3,#0xff
1826	cmp	r3,#0xf2			@ done?
1827#endif
1828#if 31<15
1829# if __ARM_ARCH>=7
1830	ldr	r2,[r1],#4			@ prefetch
1831# else
1832	ldrb	r2,[r1,#3]
1833# endif
1834	eor	r3,r5,r6			@ a^b, b^c in next round
1835#else
1836	ldr	r2,[sp,#1*4]		@ from future BODY_16_xx
1837	eor	r3,r5,r6			@ a^b, b^c in next round
1838	ldr	r1,[sp,#14*4]	@ from future BODY_16_xx
1839#endif
1840	eor	r0,r0,r5,ror#20	@ Sigma0(a)
1841	and	r12,r12,r3			@ (b^c)&=(a^b)
1842	add	r8,r8,r4			@ d+=h
1843	eor	r12,r12,r6			@ Maj(a,b,c)
1844	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
1845	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
1846#if __ARM_ARCH>=7
1847	ite	eq			@ Thumb2 thing, sanity check in ARM
1848#endif
1849	ldreq	r3,[sp,#16*4]		@ pull ctx
1850	bne	.Lrounds_16_xx
1851
1852	add	r4,r4,r12		@ h+=Maj(a,b,c) from the past
1853	ldr	r0,[r3,#0]
1854	ldr	r2,[r3,#4]
1855	ldr	r12,[r3,#8]
1856	add	r4,r4,r0
1857	ldr	r0,[r3,#12]
1858	add	r5,r5,r2
1859	ldr	r2,[r3,#16]
1860	add	r6,r6,r12
1861	ldr	r12,[r3,#20]
1862	add	r7,r7,r0
1863	ldr	r0,[r3,#24]
1864	add	r8,r8,r2
1865	ldr	r2,[r3,#28]
1866	add	r9,r9,r12
1867	ldr	r1,[sp,#17*4]		@ pull inp
1868	ldr	r12,[sp,#18*4]		@ pull inp+len
1869	add	r10,r10,r0
1870	add	r11,r11,r2
1871	stmia	r3,{r4,r5,r6,r7,r8,r9,r10,r11}
1872	cmp	r1,r12
1873	sub	r14,r14,#256	@ rewind Ktbl
1874	bne	.Loop
1875
1876	add	sp,sp,#19*4	@ destroy frame
1877#if __ARM_ARCH>=5
1878	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,pc}
1879#else
1880	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,lr}
1881	tst	lr,#1
1882	moveq	pc,lr			@ be binary compatible with V4, yet
1883.word	0xe12fff1e			@ interoperable with Thumb ISA:-)
1884#endif
1885.size	sha256_block_data_order,.-sha256_block_data_order
1886#if __ARM_MAX_ARCH__>=7
1887.arch	armv7-a
1888.fpu	neon
1889
1890.globl	sha256_block_data_order_neon
1891.hidden	sha256_block_data_order_neon
1892.type	sha256_block_data_order_neon,%function
1893.align	5
1894.skip	16
1895sha256_block_data_order_neon:
1896.LNEON:
1897	stmdb	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr}
1898
1899	sub	r11,sp,#16*4+16
1900	adr	r14,K256
1901	bic	r11,r11,#15		@ align for 128-bit stores
1902	mov	r12,sp
1903	mov	sp,r11			@ alloca
1904	add	r2,r1,r2,lsl#6	@ len to point at the end of inp
1905
1906	vld1.8	{q0},[r1]!
1907	vld1.8	{q1},[r1]!
1908	vld1.8	{q2},[r1]!
1909	vld1.8	{q3},[r1]!
1910	vld1.32	{q8},[r14,:128]!
1911	vld1.32	{q9},[r14,:128]!
1912	vld1.32	{q10},[r14,:128]!
1913	vld1.32	{q11},[r14,:128]!
1914	vrev32.8	q0,q0		@ yes, even on
1915	str	r0,[sp,#64]
1916	vrev32.8	q1,q1		@ big-endian
1917	str	r1,[sp,#68]
1918	mov	r1,sp
1919	vrev32.8	q2,q2
1920	str	r2,[sp,#72]
1921	vrev32.8	q3,q3
1922	str	r12,[sp,#76]		@ save original sp
1923	vadd.i32	q8,q8,q0
1924	vadd.i32	q9,q9,q1
1925	vst1.32	{q8},[r1,:128]!
1926	vadd.i32	q10,q10,q2
1927	vst1.32	{q9},[r1,:128]!
1928	vadd.i32	q11,q11,q3
1929	vst1.32	{q10},[r1,:128]!
1930	vst1.32	{q11},[r1,:128]!
1931
1932	ldmia	r0,{r4,r5,r6,r7,r8,r9,r10,r11}
1933	sub	r1,r1,#64
1934	ldr	r2,[sp,#0]
1935	eor	r12,r12,r12
1936	eor	r3,r5,r6
1937	b	.L_00_48
1938
1939.align	4
1940.L_00_48:
1941	vext.8	q8,q0,q1,#4
1942	add	r11,r11,r2
1943	eor	r2,r9,r10
1944	eor	r0,r8,r8,ror#5
1945	vext.8	q9,q2,q3,#4
1946	add	r4,r4,r12
1947	and	r2,r2,r8
1948	eor	r12,r0,r8,ror#19
1949	vshr.u32	q10,q8,#7
1950	eor	r0,r4,r4,ror#11
1951	eor	r2,r2,r10
1952	vadd.i32	q0,q0,q9
1953	add	r11,r11,r12,ror#6
1954	eor	r12,r4,r5
1955	vshr.u32	q9,q8,#3
1956	eor	r0,r0,r4,ror#20
1957	add	r11,r11,r2
1958	vsli.32	q10,q8,#25
1959	ldr	r2,[sp,#4]
1960	and	r3,r3,r12
1961	vshr.u32	q11,q8,#18
1962	add	r7,r7,r11
1963	add	r11,r11,r0,ror#2
1964	eor	r3,r3,r5
1965	veor	q9,q9,q10
1966	add	r10,r10,r2
1967	vsli.32	q11,q8,#14
1968	eor	r2,r8,r9
1969	eor	r0,r7,r7,ror#5
1970	vshr.u32	d24,d7,#17
1971	add	r11,r11,r3
1972	and	r2,r2,r7
1973	veor	q9,q9,q11
1974	eor	r3,r0,r7,ror#19
1975	eor	r0,r11,r11,ror#11
1976	vsli.32	d24,d7,#15
1977	eor	r2,r2,r9
1978	add	r10,r10,r3,ror#6
1979	vshr.u32	d25,d7,#10
1980	eor	r3,r11,r4
1981	eor	r0,r0,r11,ror#20
1982	vadd.i32	q0,q0,q9
1983	add	r10,r10,r2
1984	ldr	r2,[sp,#8]
1985	veor	d25,d25,d24
1986	and	r12,r12,r3
1987	add	r6,r6,r10
1988	vshr.u32	d24,d7,#19
1989	add	r10,r10,r0,ror#2
1990	eor	r12,r12,r4
1991	vsli.32	d24,d7,#13
1992	add	r9,r9,r2
1993	eor	r2,r7,r8
1994	veor	d25,d25,d24
1995	eor	r0,r6,r6,ror#5
1996	add	r10,r10,r12
1997	vadd.i32	d0,d0,d25
1998	and	r2,r2,r6
1999	eor	r12,r0,r6,ror#19
2000	vshr.u32	d24,d0,#17
2001	eor	r0,r10,r10,ror#11
2002	eor	r2,r2,r8
2003	vsli.32	d24,d0,#15
2004	add	r9,r9,r12,ror#6
2005	eor	r12,r10,r11
2006	vshr.u32	d25,d0,#10
2007	eor	r0,r0,r10,ror#20
2008	add	r9,r9,r2
2009	veor	d25,d25,d24
2010	ldr	r2,[sp,#12]
2011	and	r3,r3,r12
2012	vshr.u32	d24,d0,#19
2013	add	r5,r5,r9
2014	add	r9,r9,r0,ror#2
2015	eor	r3,r3,r11
2016	vld1.32	{q8},[r14,:128]!
2017	add	r8,r8,r2
2018	vsli.32	d24,d0,#13
2019	eor	r2,r6,r7
2020	eor	r0,r5,r5,ror#5
2021	veor	d25,d25,d24
2022	add	r9,r9,r3
2023	and	r2,r2,r5
2024	vadd.i32	d1,d1,d25
2025	eor	r3,r0,r5,ror#19
2026	eor	r0,r9,r9,ror#11
2027	vadd.i32	q8,q8,q0
2028	eor	r2,r2,r7
2029	add	r8,r8,r3,ror#6
2030	eor	r3,r9,r10
2031	eor	r0,r0,r9,ror#20
2032	add	r8,r8,r2
2033	ldr	r2,[sp,#16]
2034	and	r12,r12,r3
2035	add	r4,r4,r8
2036	vst1.32	{q8},[r1,:128]!
2037	add	r8,r8,r0,ror#2
2038	eor	r12,r12,r10
2039	vext.8	q8,q1,q2,#4
2040	add	r7,r7,r2
2041	eor	r2,r5,r6
2042	eor	r0,r4,r4,ror#5
2043	vext.8	q9,q3,q0,#4
2044	add	r8,r8,r12
2045	and	r2,r2,r4
2046	eor	r12,r0,r4,ror#19
2047	vshr.u32	q10,q8,#7
2048	eor	r0,r8,r8,ror#11
2049	eor	r2,r2,r6
2050	vadd.i32	q1,q1,q9
2051	add	r7,r7,r12,ror#6
2052	eor	r12,r8,r9
2053	vshr.u32	q9,q8,#3
2054	eor	r0,r0,r8,ror#20
2055	add	r7,r7,r2
2056	vsli.32	q10,q8,#25
2057	ldr	r2,[sp,#20]
2058	and	r3,r3,r12
2059	vshr.u32	q11,q8,#18
2060	add	r11,r11,r7
2061	add	r7,r7,r0,ror#2
2062	eor	r3,r3,r9
2063	veor	q9,q9,q10
2064	add	r6,r6,r2
2065	vsli.32	q11,q8,#14
2066	eor	r2,r4,r5
2067	eor	r0,r11,r11,ror#5
2068	vshr.u32	d24,d1,#17
2069	add	r7,r7,r3
2070	and	r2,r2,r11
2071	veor	q9,q9,q11
2072	eor	r3,r0,r11,ror#19
2073	eor	r0,r7,r7,ror#11
2074	vsli.32	d24,d1,#15
2075	eor	r2,r2,r5
2076	add	r6,r6,r3,ror#6
2077	vshr.u32	d25,d1,#10
2078	eor	r3,r7,r8
2079	eor	r0,r0,r7,ror#20
2080	vadd.i32	q1,q1,q9
2081	add	r6,r6,r2
2082	ldr	r2,[sp,#24]
2083	veor	d25,d25,d24
2084	and	r12,r12,r3
2085	add	r10,r10,r6
2086	vshr.u32	d24,d1,#19
2087	add	r6,r6,r0,ror#2
2088	eor	r12,r12,r8
2089	vsli.32	d24,d1,#13
2090	add	r5,r5,r2
2091	eor	r2,r11,r4
2092	veor	d25,d25,d24
2093	eor	r0,r10,r10,ror#5
2094	add	r6,r6,r12
2095	vadd.i32	d2,d2,d25
2096	and	r2,r2,r10
2097	eor	r12,r0,r10,ror#19
2098	vshr.u32	d24,d2,#17
2099	eor	r0,r6,r6,ror#11
2100	eor	r2,r2,r4
2101	vsli.32	d24,d2,#15
2102	add	r5,r5,r12,ror#6
2103	eor	r12,r6,r7
2104	vshr.u32	d25,d2,#10
2105	eor	r0,r0,r6,ror#20
2106	add	r5,r5,r2
2107	veor	d25,d25,d24
2108	ldr	r2,[sp,#28]
2109	and	r3,r3,r12
2110	vshr.u32	d24,d2,#19
2111	add	r9,r9,r5
2112	add	r5,r5,r0,ror#2
2113	eor	r3,r3,r7
2114	vld1.32	{q8},[r14,:128]!
2115	add	r4,r4,r2
2116	vsli.32	d24,d2,#13
2117	eor	r2,r10,r11
2118	eor	r0,r9,r9,ror#5
2119	veor	d25,d25,d24
2120	add	r5,r5,r3
2121	and	r2,r2,r9
2122	vadd.i32	d3,d3,d25
2123	eor	r3,r0,r9,ror#19
2124	eor	r0,r5,r5,ror#11
2125	vadd.i32	q8,q8,q1
2126	eor	r2,r2,r11
2127	add	r4,r4,r3,ror#6
2128	eor	r3,r5,r6
2129	eor	r0,r0,r5,ror#20
2130	add	r4,r4,r2
2131	ldr	r2,[sp,#32]
2132	and	r12,r12,r3
2133	add	r8,r8,r4
2134	vst1.32	{q8},[r1,:128]!
2135	add	r4,r4,r0,ror#2
2136	eor	r12,r12,r6
2137	vext.8	q8,q2,q3,#4
2138	add	r11,r11,r2
2139	eor	r2,r9,r10
2140	eor	r0,r8,r8,ror#5
2141	vext.8	q9,q0,q1,#4
2142	add	r4,r4,r12
2143	and	r2,r2,r8
2144	eor	r12,r0,r8,ror#19
2145	vshr.u32	q10,q8,#7
2146	eor	r0,r4,r4,ror#11
2147	eor	r2,r2,r10
2148	vadd.i32	q2,q2,q9
2149	add	r11,r11,r12,ror#6
2150	eor	r12,r4,r5
2151	vshr.u32	q9,q8,#3
2152	eor	r0,r0,r4,ror#20
2153	add	r11,r11,r2
2154	vsli.32	q10,q8,#25
2155	ldr	r2,[sp,#36]
2156	and	r3,r3,r12
2157	vshr.u32	q11,q8,#18
2158	add	r7,r7,r11
2159	add	r11,r11,r0,ror#2
2160	eor	r3,r3,r5
2161	veor	q9,q9,q10
2162	add	r10,r10,r2
2163	vsli.32	q11,q8,#14
2164	eor	r2,r8,r9
2165	eor	r0,r7,r7,ror#5
2166	vshr.u32	d24,d3,#17
2167	add	r11,r11,r3
2168	and	r2,r2,r7
2169	veor	q9,q9,q11
2170	eor	r3,r0,r7,ror#19
2171	eor	r0,r11,r11,ror#11
2172	vsli.32	d24,d3,#15
2173	eor	r2,r2,r9
2174	add	r10,r10,r3,ror#6
2175	vshr.u32	d25,d3,#10
2176	eor	r3,r11,r4
2177	eor	r0,r0,r11,ror#20
2178	vadd.i32	q2,q2,q9
2179	add	r10,r10,r2
2180	ldr	r2,[sp,#40]
2181	veor	d25,d25,d24
2182	and	r12,r12,r3
2183	add	r6,r6,r10
2184	vshr.u32	d24,d3,#19
2185	add	r10,r10,r0,ror#2
2186	eor	r12,r12,r4
2187	vsli.32	d24,d3,#13
2188	add	r9,r9,r2
2189	eor	r2,r7,r8
2190	veor	d25,d25,d24
2191	eor	r0,r6,r6,ror#5
2192	add	r10,r10,r12
2193	vadd.i32	d4,d4,d25
2194	and	r2,r2,r6
2195	eor	r12,r0,r6,ror#19
2196	vshr.u32	d24,d4,#17
2197	eor	r0,r10,r10,ror#11
2198	eor	r2,r2,r8
2199	vsli.32	d24,d4,#15
2200	add	r9,r9,r12,ror#6
2201	eor	r12,r10,r11
2202	vshr.u32	d25,d4,#10
2203	eor	r0,r0,r10,ror#20
2204	add	r9,r9,r2
2205	veor	d25,d25,d24
2206	ldr	r2,[sp,#44]
2207	and	r3,r3,r12
2208	vshr.u32	d24,d4,#19
2209	add	r5,r5,r9
2210	add	r9,r9,r0,ror#2
2211	eor	r3,r3,r11
2212	vld1.32	{q8},[r14,:128]!
2213	add	r8,r8,r2
2214	vsli.32	d24,d4,#13
2215	eor	r2,r6,r7
2216	eor	r0,r5,r5,ror#5
2217	veor	d25,d25,d24
2218	add	r9,r9,r3
2219	and	r2,r2,r5
2220	vadd.i32	d5,d5,d25
2221	eor	r3,r0,r5,ror#19
2222	eor	r0,r9,r9,ror#11
2223	vadd.i32	q8,q8,q2
2224	eor	r2,r2,r7
2225	add	r8,r8,r3,ror#6
2226	eor	r3,r9,r10
2227	eor	r0,r0,r9,ror#20
2228	add	r8,r8,r2
2229	ldr	r2,[sp,#48]
2230	and	r12,r12,r3
2231	add	r4,r4,r8
2232	vst1.32	{q8},[r1,:128]!
2233	add	r8,r8,r0,ror#2
2234	eor	r12,r12,r10
2235	vext.8	q8,q3,q0,#4
2236	add	r7,r7,r2
2237	eor	r2,r5,r6
2238	eor	r0,r4,r4,ror#5
2239	vext.8	q9,q1,q2,#4
2240	add	r8,r8,r12
2241	and	r2,r2,r4
2242	eor	r12,r0,r4,ror#19
2243	vshr.u32	q10,q8,#7
2244	eor	r0,r8,r8,ror#11
2245	eor	r2,r2,r6
2246	vadd.i32	q3,q3,q9
2247	add	r7,r7,r12,ror#6
2248	eor	r12,r8,r9
2249	vshr.u32	q9,q8,#3
2250	eor	r0,r0,r8,ror#20
2251	add	r7,r7,r2
2252	vsli.32	q10,q8,#25
2253	ldr	r2,[sp,#52]
2254	and	r3,r3,r12
2255	vshr.u32	q11,q8,#18
2256	add	r11,r11,r7
2257	add	r7,r7,r0,ror#2
2258	eor	r3,r3,r9
2259	veor	q9,q9,q10
2260	add	r6,r6,r2
2261	vsli.32	q11,q8,#14
2262	eor	r2,r4,r5
2263	eor	r0,r11,r11,ror#5
2264	vshr.u32	d24,d5,#17
2265	add	r7,r7,r3
2266	and	r2,r2,r11
2267	veor	q9,q9,q11
2268	eor	r3,r0,r11,ror#19
2269	eor	r0,r7,r7,ror#11
2270	vsli.32	d24,d5,#15
2271	eor	r2,r2,r5
2272	add	r6,r6,r3,ror#6
2273	vshr.u32	d25,d5,#10
2274	eor	r3,r7,r8
2275	eor	r0,r0,r7,ror#20
2276	vadd.i32	q3,q3,q9
2277	add	r6,r6,r2
2278	ldr	r2,[sp,#56]
2279	veor	d25,d25,d24
2280	and	r12,r12,r3
2281	add	r10,r10,r6
2282	vshr.u32	d24,d5,#19
2283	add	r6,r6,r0,ror#2
2284	eor	r12,r12,r8
2285	vsli.32	d24,d5,#13
2286	add	r5,r5,r2
2287	eor	r2,r11,r4
2288	veor	d25,d25,d24
2289	eor	r0,r10,r10,ror#5
2290	add	r6,r6,r12
2291	vadd.i32	d6,d6,d25
2292	and	r2,r2,r10
2293	eor	r12,r0,r10,ror#19
2294	vshr.u32	d24,d6,#17
2295	eor	r0,r6,r6,ror#11
2296	eor	r2,r2,r4
2297	vsli.32	d24,d6,#15
2298	add	r5,r5,r12,ror#6
2299	eor	r12,r6,r7
2300	vshr.u32	d25,d6,#10
2301	eor	r0,r0,r6,ror#20
2302	add	r5,r5,r2
2303	veor	d25,d25,d24
2304	ldr	r2,[sp,#60]
2305	and	r3,r3,r12
2306	vshr.u32	d24,d6,#19
2307	add	r9,r9,r5
2308	add	r5,r5,r0,ror#2
2309	eor	r3,r3,r7
2310	vld1.32	{q8},[r14,:128]!
2311	add	r4,r4,r2
2312	vsli.32	d24,d6,#13
2313	eor	r2,r10,r11
2314	eor	r0,r9,r9,ror#5
2315	veor	d25,d25,d24
2316	add	r5,r5,r3
2317	and	r2,r2,r9
2318	vadd.i32	d7,d7,d25
2319	eor	r3,r0,r9,ror#19
2320	eor	r0,r5,r5,ror#11
2321	vadd.i32	q8,q8,q3
2322	eor	r2,r2,r11
2323	add	r4,r4,r3,ror#6
2324	eor	r3,r5,r6
2325	eor	r0,r0,r5,ror#20
2326	add	r4,r4,r2
2327	ldr	r2,[r14]
2328	and	r12,r12,r3
2329	add	r8,r8,r4
2330	vst1.32	{q8},[r1,:128]!
2331	add	r4,r4,r0,ror#2
2332	eor	r12,r12,r6
2333	teq	r2,#0				@ check for K256 terminator
2334	ldr	r2,[sp,#0]
2335	sub	r1,r1,#64
2336	bne	.L_00_48
2337
2338	ldr	r1,[sp,#68]
2339	ldr	r0,[sp,#72]
2340	sub	r14,r14,#256	@ rewind r14
2341	teq	r1,r0
2342	it	eq
2343	subeq	r1,r1,#64		@ avoid SEGV
2344	vld1.8	{q0},[r1]!		@ load next input block
2345	vld1.8	{q1},[r1]!
2346	vld1.8	{q2},[r1]!
2347	vld1.8	{q3},[r1]!
2348	it	ne
2349	strne	r1,[sp,#68]
2350	mov	r1,sp
2351	add	r11,r11,r2
2352	eor	r2,r9,r10
2353	eor	r0,r8,r8,ror#5
2354	add	r4,r4,r12
2355	vld1.32	{q8},[r14,:128]!
2356	and	r2,r2,r8
2357	eor	r12,r0,r8,ror#19
2358	eor	r0,r4,r4,ror#11
2359	eor	r2,r2,r10
2360	vrev32.8	q0,q0
2361	add	r11,r11,r12,ror#6
2362	eor	r12,r4,r5
2363	eor	r0,r0,r4,ror#20
2364	add	r11,r11,r2
2365	vadd.i32	q8,q8,q0
2366	ldr	r2,[sp,#4]
2367	and	r3,r3,r12
2368	add	r7,r7,r11
2369	add	r11,r11,r0,ror#2
2370	eor	r3,r3,r5
2371	add	r10,r10,r2
2372	eor	r2,r8,r9
2373	eor	r0,r7,r7,ror#5
2374	add	r11,r11,r3
2375	and	r2,r2,r7
2376	eor	r3,r0,r7,ror#19
2377	eor	r0,r11,r11,ror#11
2378	eor	r2,r2,r9
2379	add	r10,r10,r3,ror#6
2380	eor	r3,r11,r4
2381	eor	r0,r0,r11,ror#20
2382	add	r10,r10,r2
2383	ldr	r2,[sp,#8]
2384	and	r12,r12,r3
2385	add	r6,r6,r10
2386	add	r10,r10,r0,ror#2
2387	eor	r12,r12,r4
2388	add	r9,r9,r2
2389	eor	r2,r7,r8
2390	eor	r0,r6,r6,ror#5
2391	add	r10,r10,r12
2392	and	r2,r2,r6
2393	eor	r12,r0,r6,ror#19
2394	eor	r0,r10,r10,ror#11
2395	eor	r2,r2,r8
2396	add	r9,r9,r12,ror#6
2397	eor	r12,r10,r11
2398	eor	r0,r0,r10,ror#20
2399	add	r9,r9,r2
2400	ldr	r2,[sp,#12]
2401	and	r3,r3,r12
2402	add	r5,r5,r9
2403	add	r9,r9,r0,ror#2
2404	eor	r3,r3,r11
2405	add	r8,r8,r2
2406	eor	r2,r6,r7
2407	eor	r0,r5,r5,ror#5
2408	add	r9,r9,r3
2409	and	r2,r2,r5
2410	eor	r3,r0,r5,ror#19
2411	eor	r0,r9,r9,ror#11
2412	eor	r2,r2,r7
2413	add	r8,r8,r3,ror#6
2414	eor	r3,r9,r10
2415	eor	r0,r0,r9,ror#20
2416	add	r8,r8,r2
2417	ldr	r2,[sp,#16]
2418	and	r12,r12,r3
2419	add	r4,r4,r8
2420	add	r8,r8,r0,ror#2
2421	eor	r12,r12,r10
2422	vst1.32	{q8},[r1,:128]!
2423	add	r7,r7,r2
2424	eor	r2,r5,r6
2425	eor	r0,r4,r4,ror#5
2426	add	r8,r8,r12
2427	vld1.32	{q8},[r14,:128]!
2428	and	r2,r2,r4
2429	eor	r12,r0,r4,ror#19
2430	eor	r0,r8,r8,ror#11
2431	eor	r2,r2,r6
2432	vrev32.8	q1,q1
2433	add	r7,r7,r12,ror#6
2434	eor	r12,r8,r9
2435	eor	r0,r0,r8,ror#20
2436	add	r7,r7,r2
2437	vadd.i32	q8,q8,q1
2438	ldr	r2,[sp,#20]
2439	and	r3,r3,r12
2440	add	r11,r11,r7
2441	add	r7,r7,r0,ror#2
2442	eor	r3,r3,r9
2443	add	r6,r6,r2
2444	eor	r2,r4,r5
2445	eor	r0,r11,r11,ror#5
2446	add	r7,r7,r3
2447	and	r2,r2,r11
2448	eor	r3,r0,r11,ror#19
2449	eor	r0,r7,r7,ror#11
2450	eor	r2,r2,r5
2451	add	r6,r6,r3,ror#6
2452	eor	r3,r7,r8
2453	eor	r0,r0,r7,ror#20
2454	add	r6,r6,r2
2455	ldr	r2,[sp,#24]
2456	and	r12,r12,r3
2457	add	r10,r10,r6
2458	add	r6,r6,r0,ror#2
2459	eor	r12,r12,r8
2460	add	r5,r5,r2
2461	eor	r2,r11,r4
2462	eor	r0,r10,r10,ror#5
2463	add	r6,r6,r12
2464	and	r2,r2,r10
2465	eor	r12,r0,r10,ror#19
2466	eor	r0,r6,r6,ror#11
2467	eor	r2,r2,r4
2468	add	r5,r5,r12,ror#6
2469	eor	r12,r6,r7
2470	eor	r0,r0,r6,ror#20
2471	add	r5,r5,r2
2472	ldr	r2,[sp,#28]
2473	and	r3,r3,r12
2474	add	r9,r9,r5
2475	add	r5,r5,r0,ror#2
2476	eor	r3,r3,r7
2477	add	r4,r4,r2
2478	eor	r2,r10,r11
2479	eor	r0,r9,r9,ror#5
2480	add	r5,r5,r3
2481	and	r2,r2,r9
2482	eor	r3,r0,r9,ror#19
2483	eor	r0,r5,r5,ror#11
2484	eor	r2,r2,r11
2485	add	r4,r4,r3,ror#6
2486	eor	r3,r5,r6
2487	eor	r0,r0,r5,ror#20
2488	add	r4,r4,r2
2489	ldr	r2,[sp,#32]
2490	and	r12,r12,r3
2491	add	r8,r8,r4
2492	add	r4,r4,r0,ror#2
2493	eor	r12,r12,r6
2494	vst1.32	{q8},[r1,:128]!
2495	add	r11,r11,r2
2496	eor	r2,r9,r10
2497	eor	r0,r8,r8,ror#5
2498	add	r4,r4,r12
2499	vld1.32	{q8},[r14,:128]!
2500	and	r2,r2,r8
2501	eor	r12,r0,r8,ror#19
2502	eor	r0,r4,r4,ror#11
2503	eor	r2,r2,r10
2504	vrev32.8	q2,q2
2505	add	r11,r11,r12,ror#6
2506	eor	r12,r4,r5
2507	eor	r0,r0,r4,ror#20
2508	add	r11,r11,r2
2509	vadd.i32	q8,q8,q2
2510	ldr	r2,[sp,#36]
2511	and	r3,r3,r12
2512	add	r7,r7,r11
2513	add	r11,r11,r0,ror#2
2514	eor	r3,r3,r5
2515	add	r10,r10,r2
2516	eor	r2,r8,r9
2517	eor	r0,r7,r7,ror#5
2518	add	r11,r11,r3
2519	and	r2,r2,r7
2520	eor	r3,r0,r7,ror#19
2521	eor	r0,r11,r11,ror#11
2522	eor	r2,r2,r9
2523	add	r10,r10,r3,ror#6
2524	eor	r3,r11,r4
2525	eor	r0,r0,r11,ror#20
2526	add	r10,r10,r2
2527	ldr	r2,[sp,#40]
2528	and	r12,r12,r3
2529	add	r6,r6,r10
2530	add	r10,r10,r0,ror#2
2531	eor	r12,r12,r4
2532	add	r9,r9,r2
2533	eor	r2,r7,r8
2534	eor	r0,r6,r6,ror#5
2535	add	r10,r10,r12
2536	and	r2,r2,r6
2537	eor	r12,r0,r6,ror#19
2538	eor	r0,r10,r10,ror#11
2539	eor	r2,r2,r8
2540	add	r9,r9,r12,ror#6
2541	eor	r12,r10,r11
2542	eor	r0,r0,r10,ror#20
2543	add	r9,r9,r2
2544	ldr	r2,[sp,#44]
2545	and	r3,r3,r12
2546	add	r5,r5,r9
2547	add	r9,r9,r0,ror#2
2548	eor	r3,r3,r11
2549	add	r8,r8,r2
2550	eor	r2,r6,r7
2551	eor	r0,r5,r5,ror#5
2552	add	r9,r9,r3
2553	and	r2,r2,r5
2554	eor	r3,r0,r5,ror#19
2555	eor	r0,r9,r9,ror#11
2556	eor	r2,r2,r7
2557	add	r8,r8,r3,ror#6
2558	eor	r3,r9,r10
2559	eor	r0,r0,r9,ror#20
2560	add	r8,r8,r2
2561	ldr	r2,[sp,#48]
2562	and	r12,r12,r3
2563	add	r4,r4,r8
2564	add	r8,r8,r0,ror#2
2565	eor	r12,r12,r10
2566	vst1.32	{q8},[r1,:128]!
2567	add	r7,r7,r2
2568	eor	r2,r5,r6
2569	eor	r0,r4,r4,ror#5
2570	add	r8,r8,r12
2571	vld1.32	{q8},[r14,:128]!
2572	and	r2,r2,r4
2573	eor	r12,r0,r4,ror#19
2574	eor	r0,r8,r8,ror#11
2575	eor	r2,r2,r6
2576	vrev32.8	q3,q3
2577	add	r7,r7,r12,ror#6
2578	eor	r12,r8,r9
2579	eor	r0,r0,r8,ror#20
2580	add	r7,r7,r2
2581	vadd.i32	q8,q8,q3
2582	ldr	r2,[sp,#52]
2583	and	r3,r3,r12
2584	add	r11,r11,r7
2585	add	r7,r7,r0,ror#2
2586	eor	r3,r3,r9
2587	add	r6,r6,r2
2588	eor	r2,r4,r5
2589	eor	r0,r11,r11,ror#5
2590	add	r7,r7,r3
2591	and	r2,r2,r11
2592	eor	r3,r0,r11,ror#19
2593	eor	r0,r7,r7,ror#11
2594	eor	r2,r2,r5
2595	add	r6,r6,r3,ror#6
2596	eor	r3,r7,r8
2597	eor	r0,r0,r7,ror#20
2598	add	r6,r6,r2
2599	ldr	r2,[sp,#56]
2600	and	r12,r12,r3
2601	add	r10,r10,r6
2602	add	r6,r6,r0,ror#2
2603	eor	r12,r12,r8
2604	add	r5,r5,r2
2605	eor	r2,r11,r4
2606	eor	r0,r10,r10,ror#5
2607	add	r6,r6,r12
2608	and	r2,r2,r10
2609	eor	r12,r0,r10,ror#19
2610	eor	r0,r6,r6,ror#11
2611	eor	r2,r2,r4
2612	add	r5,r5,r12,ror#6
2613	eor	r12,r6,r7
2614	eor	r0,r0,r6,ror#20
2615	add	r5,r5,r2
2616	ldr	r2,[sp,#60]
2617	and	r3,r3,r12
2618	add	r9,r9,r5
2619	add	r5,r5,r0,ror#2
2620	eor	r3,r3,r7
2621	add	r4,r4,r2
2622	eor	r2,r10,r11
2623	eor	r0,r9,r9,ror#5
2624	add	r5,r5,r3
2625	and	r2,r2,r9
2626	eor	r3,r0,r9,ror#19
2627	eor	r0,r5,r5,ror#11
2628	eor	r2,r2,r11
2629	add	r4,r4,r3,ror#6
2630	eor	r3,r5,r6
2631	eor	r0,r0,r5,ror#20
2632	add	r4,r4,r2
2633	ldr	r2,[sp,#64]
2634	and	r12,r12,r3
2635	add	r8,r8,r4
2636	add	r4,r4,r0,ror#2
2637	eor	r12,r12,r6
2638	vst1.32	{q8},[r1,:128]!
2639	ldr	r0,[r2,#0]
2640	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
2641	ldr	r12,[r2,#4]
2642	ldr	r3,[r2,#8]
2643	ldr	r1,[r2,#12]
2644	add	r4,r4,r0			@ accumulate
2645	ldr	r0,[r2,#16]
2646	add	r5,r5,r12
2647	ldr	r12,[r2,#20]
2648	add	r6,r6,r3
2649	ldr	r3,[r2,#24]
2650	add	r7,r7,r1
2651	ldr	r1,[r2,#28]
2652	add	r8,r8,r0
2653	str	r4,[r2],#4
2654	add	r9,r9,r12
2655	str	r5,[r2],#4
2656	add	r10,r10,r3
2657	str	r6,[r2],#4
2658	add	r11,r11,r1
2659	str	r7,[r2],#4
2660	stmia	r2,{r8,r9,r10,r11}
2661
2662	ittte	ne
2663	movne	r1,sp
2664	ldrne	r2,[sp,#0]
2665	eorne	r12,r12,r12
2666	ldreq	sp,[sp,#76]			@ restore original sp
2667	itt	ne
2668	eorne	r3,r5,r6
2669	bne	.L_00_48
2670
2671	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,pc}
2672.size	sha256_block_data_order_neon,.-sha256_block_data_order_neon
2673#endif
2674#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
2675
2676# if defined(__thumb2__)
2677#  define INST(a,b,c,d)	.byte	c,d|0xc,a,b
2678# else
2679#  define INST(a,b,c,d)	.byte	a,b,c,d
2680# endif
2681
2682.type	sha256_block_data_order_armv8,%function
2683.align	5
2684sha256_block_data_order_armv8:
2685.LARMv8:
2686	vld1.32	{q0,q1},[r0]
2687	sub	r3,r3,#256+32
2688	add	r2,r1,r2,lsl#6	@ len to point at the end of inp
2689	b	.Loop_v8
2690
2691.align	4
2692.Loop_v8:
2693	vld1.8	{q8,q9},[r1]!
2694	vld1.8	{q10,q11},[r1]!
2695	vld1.32	{q12},[r3]!
2696	vrev32.8	q8,q8
2697	vrev32.8	q9,q9
2698	vrev32.8	q10,q10
2699	vrev32.8	q11,q11
2700	vmov	q14,q0	@ offload
2701	vmov	q15,q1
2702	teq	r1,r2
2703	vld1.32	{q13},[r3]!
2704	vadd.i32	q12,q12,q8
2705	INST(0xe2,0x03,0xfa,0xf3)	@ sha256su0 q8,q9
2706	vmov	q2,q0
2707	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2708	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2709	INST(0xe6,0x0c,0x64,0xf3)	@ sha256su1 q8,q10,q11
2710	vld1.32	{q12},[r3]!
2711	vadd.i32	q13,q13,q9
2712	INST(0xe4,0x23,0xfa,0xf3)	@ sha256su0 q9,q10
2713	vmov	q2,q0
2714	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2715	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2716	INST(0xe0,0x2c,0x66,0xf3)	@ sha256su1 q9,q11,q8
2717	vld1.32	{q13},[r3]!
2718	vadd.i32	q12,q12,q10
2719	INST(0xe6,0x43,0xfa,0xf3)	@ sha256su0 q10,q11
2720	vmov	q2,q0
2721	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2722	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2723	INST(0xe2,0x4c,0x60,0xf3)	@ sha256su1 q10,q8,q9
2724	vld1.32	{q12},[r3]!
2725	vadd.i32	q13,q13,q11
2726	INST(0xe0,0x63,0xfa,0xf3)	@ sha256su0 q11,q8
2727	vmov	q2,q0
2728	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2729	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2730	INST(0xe4,0x6c,0x62,0xf3)	@ sha256su1 q11,q9,q10
2731	vld1.32	{q13},[r3]!
2732	vadd.i32	q12,q12,q8
2733	INST(0xe2,0x03,0xfa,0xf3)	@ sha256su0 q8,q9
2734	vmov	q2,q0
2735	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2736	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2737	INST(0xe6,0x0c,0x64,0xf3)	@ sha256su1 q8,q10,q11
2738	vld1.32	{q12},[r3]!
2739	vadd.i32	q13,q13,q9
2740	INST(0xe4,0x23,0xfa,0xf3)	@ sha256su0 q9,q10
2741	vmov	q2,q0
2742	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2743	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2744	INST(0xe0,0x2c,0x66,0xf3)	@ sha256su1 q9,q11,q8
2745	vld1.32	{q13},[r3]!
2746	vadd.i32	q12,q12,q10
2747	INST(0xe6,0x43,0xfa,0xf3)	@ sha256su0 q10,q11
2748	vmov	q2,q0
2749	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2750	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2751	INST(0xe2,0x4c,0x60,0xf3)	@ sha256su1 q10,q8,q9
2752	vld1.32	{q12},[r3]!
2753	vadd.i32	q13,q13,q11
2754	INST(0xe0,0x63,0xfa,0xf3)	@ sha256su0 q11,q8
2755	vmov	q2,q0
2756	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2757	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2758	INST(0xe4,0x6c,0x62,0xf3)	@ sha256su1 q11,q9,q10
2759	vld1.32	{q13},[r3]!
2760	vadd.i32	q12,q12,q8
2761	INST(0xe2,0x03,0xfa,0xf3)	@ sha256su0 q8,q9
2762	vmov	q2,q0
2763	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2764	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2765	INST(0xe6,0x0c,0x64,0xf3)	@ sha256su1 q8,q10,q11
2766	vld1.32	{q12},[r3]!
2767	vadd.i32	q13,q13,q9
2768	INST(0xe4,0x23,0xfa,0xf3)	@ sha256su0 q9,q10
2769	vmov	q2,q0
2770	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2771	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2772	INST(0xe0,0x2c,0x66,0xf3)	@ sha256su1 q9,q11,q8
2773	vld1.32	{q13},[r3]!
2774	vadd.i32	q12,q12,q10
2775	INST(0xe6,0x43,0xfa,0xf3)	@ sha256su0 q10,q11
2776	vmov	q2,q0
2777	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2778	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2779	INST(0xe2,0x4c,0x60,0xf3)	@ sha256su1 q10,q8,q9
2780	vld1.32	{q12},[r3]!
2781	vadd.i32	q13,q13,q11
2782	INST(0xe0,0x63,0xfa,0xf3)	@ sha256su0 q11,q8
2783	vmov	q2,q0
2784	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2785	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2786	INST(0xe4,0x6c,0x62,0xf3)	@ sha256su1 q11,q9,q10
2787	vld1.32	{q13},[r3]!
2788	vadd.i32	q12,q12,q8
2789	vmov	q2,q0
2790	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2791	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2792
2793	vld1.32	{q12},[r3]!
2794	vadd.i32	q13,q13,q9
2795	vmov	q2,q0
2796	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2797	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2798
2799	vld1.32	{q13},[r3]
2800	vadd.i32	q12,q12,q10
2801	sub	r3,r3,#256-16	@ rewind
2802	vmov	q2,q0
2803	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2804	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2805
2806	vadd.i32	q13,q13,q11
2807	vmov	q2,q0
2808	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2809	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2810
2811	vadd.i32	q0,q0,q14
2812	vadd.i32	q1,q1,q15
2813	it	ne
2814	bne	.Loop_v8
2815
2816	vst1.32	{q0,q1},[r0]
2817
2818	bx	lr		@ bx lr
2819.size	sha256_block_data_order_armv8,.-sha256_block_data_order_armv8
2820#endif
2821.byte	83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,52,47,78,69,79,78,47,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
2822.align	2
2823.align	2
2824#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
2825.comm	OPENSSL_armcap_P,4,4
2826.hidden	OPENSSL_armcap_P
2827#endif
2828#endif  // !OPENSSL_NO_ASM && defined(OPENSSL_ARM) && defined(__ELF__)
2829