• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1// This file is generated from a similarly-named Perl script in the BoringSSL
2// source tree. Do not edit by hand.
3
4#include <openssl/asm_base.h>
5
6#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_ARM) && defined(__APPLE__)
7@ Copyright 2007-2016 The OpenSSL Project Authors. All Rights Reserved.
8@
9@ Licensed under the OpenSSL license (the "License").  You may not use
10@ this file except in compliance with the License.  You can obtain a copy
11@ in the file LICENSE in the source distribution or at
12@ https://www.openssl.org/source/license.html
13
14
15@ ====================================================================
16@ Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
17@ project. The module is, however, dual licensed under OpenSSL and
18@ CRYPTOGAMS licenses depending on where you obtain it. For further
19@ details see http://www.openssl.org/~appro/cryptogams/.
20@
21@ Permission to use under GPL terms is granted.
22@ ====================================================================
23
24@ SHA256 block procedure for ARMv4. May 2007.
25
26@ Performance is ~2x better than gcc 3.4 generated code and in "abso-
27@ lute" terms is ~2250 cycles per 64-byte block or ~35 cycles per
28@ byte [on single-issue Xscale PXA250 core].
29
30@ July 2010.
31@
32@ Rescheduling for dual-issue pipeline resulted in 22% improvement on
33@ Cortex A8 core and ~20 cycles per processed byte.
34
35@ February 2011.
36@
37@ Profiler-assisted and platform-specific optimization resulted in 16%
38@ improvement on Cortex A8 core and ~15.4 cycles per processed byte.
39
40@ September 2013.
41@
42@ Add NEON implementation. On Cortex A8 it was measured to process one
43@ byte in 12.5 cycles or 23% faster than integer-only code. Snapdragon
44@ S4 does it in 12.5 cycles too, but it's 50% faster than integer-only
45@ code (meaning that latter performs sub-optimally, nothing was done
46@ about it).
47
48@ May 2014.
49@
50@ Add ARMv8 code path performing at 2.0 cpb on Apple A7.
51
52#ifndef __KERNEL__
53# include <openssl/arm_arch.h>
54#else
55# define __ARM_ARCH __LINUX_ARM_ARCH__
56# define __ARM_MAX_ARCH__ 7
57#endif
58
59@ Silence ARMv8 deprecated IT instruction warnings. This file is used by both
60@ ARMv7 and ARMv8 processors. It does have ARMv8-only code, but those
61@ instructions are manually-encoded. (See unsha256.)
62
63
64.text
65#if defined(__thumb2__)
66.syntax	unified
67.thumb
68#else
69.code	32
70#endif
71
72
73.align	5
74K256:
75.word	0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
76.word	0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
77.word	0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
78.word	0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
79.word	0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
80.word	0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
81.word	0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
82.word	0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
83.word	0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
84.word	0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
85.word	0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
86.word	0xd192e819,0xd6990624,0xf40e3585,0x106aa070
87.word	0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
88.word	0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
89.word	0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
90.word	0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
91
92.word	0				@ terminator
93#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
94LOPENSSL_armcap:
95.word	OPENSSL_armcap_P-Lsha256_block_data_order
96#endif
97.align	5
98
99.globl	_sha256_block_data_order
100.private_extern	_sha256_block_data_order
101#ifdef __thumb2__
102.thumb_func	_sha256_block_data_order
103#endif
104_sha256_block_data_order:
105Lsha256_block_data_order:
106#if __ARM_ARCH<7 && !defined(__thumb2__)
107	sub	r3,pc,#8		@ _sha256_block_data_order
108#else
109	adr	r3,Lsha256_block_data_order
110#endif
111#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
112	ldr	r12,LOPENSSL_armcap
113	ldr	r12,[r3,r12]		@ OPENSSL_armcap_P
114#ifdef	__APPLE__
115	ldr	r12,[r12]
116#endif
117	tst	r12,#ARMV8_SHA256
118	bne	LARMv8
119	tst	r12,#ARMV7_NEON
120	bne	LNEON
121#endif
122	add	r2,r1,r2,lsl#6	@ len to point at the end of inp
123	stmdb	sp!,{r0,r1,r2,r4-r11,lr}
124	ldmia	r0,{r4,r5,r6,r7,r8,r9,r10,r11}
125	sub	r14,r3,#256+32	@ K256
126	sub	sp,sp,#16*4		@ alloca(X[16])
127Loop:
128# if __ARM_ARCH>=7
129	ldr	r2,[r1],#4
130# else
131	ldrb	r2,[r1,#3]
132# endif
133	eor	r3,r5,r6		@ magic
134	eor	r12,r12,r12
135#if __ARM_ARCH>=7
136	@ ldr	r2,[r1],#4			@ 0
137# if 0==15
138	str	r1,[sp,#17*4]			@ make room for r1
139# endif
140	eor	r0,r8,r8,ror#5
141	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
142	eor	r0,r0,r8,ror#19	@ Sigma1(e)
143# ifndef __ARMEB__
144	rev	r2,r2
145# endif
146#else
147	@ ldrb	r2,[r1,#3]			@ 0
148	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
149	ldrb	r12,[r1,#2]
150	ldrb	r0,[r1,#1]
151	orr	r2,r2,r12,lsl#8
152	ldrb	r12,[r1],#4
153	orr	r2,r2,r0,lsl#16
154# if 0==15
155	str	r1,[sp,#17*4]			@ make room for r1
156# endif
157	eor	r0,r8,r8,ror#5
158	orr	r2,r2,r12,lsl#24
159	eor	r0,r0,r8,ror#19	@ Sigma1(e)
160#endif
161	ldr	r12,[r14],#4			@ *K256++
162	add	r11,r11,r2			@ h+=X[i]
163	str	r2,[sp,#0*4]
164	eor	r2,r9,r10
165	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
166	and	r2,r2,r8
167	add	r11,r11,r12			@ h+=K256[i]
168	eor	r2,r2,r10			@ Ch(e,f,g)
169	eor	r0,r4,r4,ror#11
170	add	r11,r11,r2			@ h+=Ch(e,f,g)
171#if 0==31
172	and	r12,r12,#0xff
173	cmp	r12,#0xf2			@ done?
174#endif
175#if 0<15
176# if __ARM_ARCH>=7
177	ldr	r2,[r1],#4			@ prefetch
178# else
179	ldrb	r2,[r1,#3]
180# endif
181	eor	r12,r4,r5			@ a^b, b^c in next round
182#else
183	ldr	r2,[sp,#2*4]		@ from future BODY_16_xx
184	eor	r12,r4,r5			@ a^b, b^c in next round
185	ldr	r1,[sp,#15*4]	@ from future BODY_16_xx
186#endif
187	eor	r0,r0,r4,ror#20	@ Sigma0(a)
188	and	r3,r3,r12			@ (b^c)&=(a^b)
189	add	r7,r7,r11			@ d+=h
190	eor	r3,r3,r5			@ Maj(a,b,c)
191	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
192	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
193#if __ARM_ARCH>=7
194	@ ldr	r2,[r1],#4			@ 1
195# if 1==15
196	str	r1,[sp,#17*4]			@ make room for r1
197# endif
198	eor	r0,r7,r7,ror#5
199	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
200	eor	r0,r0,r7,ror#19	@ Sigma1(e)
201# ifndef __ARMEB__
202	rev	r2,r2
203# endif
204#else
205	@ ldrb	r2,[r1,#3]			@ 1
206	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
207	ldrb	r3,[r1,#2]
208	ldrb	r0,[r1,#1]
209	orr	r2,r2,r3,lsl#8
210	ldrb	r3,[r1],#4
211	orr	r2,r2,r0,lsl#16
212# if 1==15
213	str	r1,[sp,#17*4]			@ make room for r1
214# endif
215	eor	r0,r7,r7,ror#5
216	orr	r2,r2,r3,lsl#24
217	eor	r0,r0,r7,ror#19	@ Sigma1(e)
218#endif
219	ldr	r3,[r14],#4			@ *K256++
220	add	r10,r10,r2			@ h+=X[i]
221	str	r2,[sp,#1*4]
222	eor	r2,r8,r9
223	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
224	and	r2,r2,r7
225	add	r10,r10,r3			@ h+=K256[i]
226	eor	r2,r2,r9			@ Ch(e,f,g)
227	eor	r0,r11,r11,ror#11
228	add	r10,r10,r2			@ h+=Ch(e,f,g)
229#if 1==31
230	and	r3,r3,#0xff
231	cmp	r3,#0xf2			@ done?
232#endif
233#if 1<15
234# if __ARM_ARCH>=7
235	ldr	r2,[r1],#4			@ prefetch
236# else
237	ldrb	r2,[r1,#3]
238# endif
239	eor	r3,r11,r4			@ a^b, b^c in next round
240#else
241	ldr	r2,[sp,#3*4]		@ from future BODY_16_xx
242	eor	r3,r11,r4			@ a^b, b^c in next round
243	ldr	r1,[sp,#0*4]	@ from future BODY_16_xx
244#endif
245	eor	r0,r0,r11,ror#20	@ Sigma0(a)
246	and	r12,r12,r3			@ (b^c)&=(a^b)
247	add	r6,r6,r10			@ d+=h
248	eor	r12,r12,r4			@ Maj(a,b,c)
249	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
250	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
251#if __ARM_ARCH>=7
252	@ ldr	r2,[r1],#4			@ 2
253# if 2==15
254	str	r1,[sp,#17*4]			@ make room for r1
255# endif
256	eor	r0,r6,r6,ror#5
257	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
258	eor	r0,r0,r6,ror#19	@ Sigma1(e)
259# ifndef __ARMEB__
260	rev	r2,r2
261# endif
262#else
263	@ ldrb	r2,[r1,#3]			@ 2
264	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
265	ldrb	r12,[r1,#2]
266	ldrb	r0,[r1,#1]
267	orr	r2,r2,r12,lsl#8
268	ldrb	r12,[r1],#4
269	orr	r2,r2,r0,lsl#16
270# if 2==15
271	str	r1,[sp,#17*4]			@ make room for r1
272# endif
273	eor	r0,r6,r6,ror#5
274	orr	r2,r2,r12,lsl#24
275	eor	r0,r0,r6,ror#19	@ Sigma1(e)
276#endif
277	ldr	r12,[r14],#4			@ *K256++
278	add	r9,r9,r2			@ h+=X[i]
279	str	r2,[sp,#2*4]
280	eor	r2,r7,r8
281	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
282	and	r2,r2,r6
283	add	r9,r9,r12			@ h+=K256[i]
284	eor	r2,r2,r8			@ Ch(e,f,g)
285	eor	r0,r10,r10,ror#11
286	add	r9,r9,r2			@ h+=Ch(e,f,g)
287#if 2==31
288	and	r12,r12,#0xff
289	cmp	r12,#0xf2			@ done?
290#endif
291#if 2<15
292# if __ARM_ARCH>=7
293	ldr	r2,[r1],#4			@ prefetch
294# else
295	ldrb	r2,[r1,#3]
296# endif
297	eor	r12,r10,r11			@ a^b, b^c in next round
298#else
299	ldr	r2,[sp,#4*4]		@ from future BODY_16_xx
300	eor	r12,r10,r11			@ a^b, b^c in next round
301	ldr	r1,[sp,#1*4]	@ from future BODY_16_xx
302#endif
303	eor	r0,r0,r10,ror#20	@ Sigma0(a)
304	and	r3,r3,r12			@ (b^c)&=(a^b)
305	add	r5,r5,r9			@ d+=h
306	eor	r3,r3,r11			@ Maj(a,b,c)
307	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
308	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
309#if __ARM_ARCH>=7
310	@ ldr	r2,[r1],#4			@ 3
311# if 3==15
312	str	r1,[sp,#17*4]			@ make room for r1
313# endif
314	eor	r0,r5,r5,ror#5
315	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
316	eor	r0,r0,r5,ror#19	@ Sigma1(e)
317# ifndef __ARMEB__
318	rev	r2,r2
319# endif
320#else
321	@ ldrb	r2,[r1,#3]			@ 3
322	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
323	ldrb	r3,[r1,#2]
324	ldrb	r0,[r1,#1]
325	orr	r2,r2,r3,lsl#8
326	ldrb	r3,[r1],#4
327	orr	r2,r2,r0,lsl#16
328# if 3==15
329	str	r1,[sp,#17*4]			@ make room for r1
330# endif
331	eor	r0,r5,r5,ror#5
332	orr	r2,r2,r3,lsl#24
333	eor	r0,r0,r5,ror#19	@ Sigma1(e)
334#endif
335	ldr	r3,[r14],#4			@ *K256++
336	add	r8,r8,r2			@ h+=X[i]
337	str	r2,[sp,#3*4]
338	eor	r2,r6,r7
339	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
340	and	r2,r2,r5
341	add	r8,r8,r3			@ h+=K256[i]
342	eor	r2,r2,r7			@ Ch(e,f,g)
343	eor	r0,r9,r9,ror#11
344	add	r8,r8,r2			@ h+=Ch(e,f,g)
345#if 3==31
346	and	r3,r3,#0xff
347	cmp	r3,#0xf2			@ done?
348#endif
349#if 3<15
350# if __ARM_ARCH>=7
351	ldr	r2,[r1],#4			@ prefetch
352# else
353	ldrb	r2,[r1,#3]
354# endif
355	eor	r3,r9,r10			@ a^b, b^c in next round
356#else
357	ldr	r2,[sp,#5*4]		@ from future BODY_16_xx
358	eor	r3,r9,r10			@ a^b, b^c in next round
359	ldr	r1,[sp,#2*4]	@ from future BODY_16_xx
360#endif
361	eor	r0,r0,r9,ror#20	@ Sigma0(a)
362	and	r12,r12,r3			@ (b^c)&=(a^b)
363	add	r4,r4,r8			@ d+=h
364	eor	r12,r12,r10			@ Maj(a,b,c)
365	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
366	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
367#if __ARM_ARCH>=7
368	@ ldr	r2,[r1],#4			@ 4
369# if 4==15
370	str	r1,[sp,#17*4]			@ make room for r1
371# endif
372	eor	r0,r4,r4,ror#5
373	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
374	eor	r0,r0,r4,ror#19	@ Sigma1(e)
375# ifndef __ARMEB__
376	rev	r2,r2
377# endif
378#else
379	@ ldrb	r2,[r1,#3]			@ 4
380	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
381	ldrb	r12,[r1,#2]
382	ldrb	r0,[r1,#1]
383	orr	r2,r2,r12,lsl#8
384	ldrb	r12,[r1],#4
385	orr	r2,r2,r0,lsl#16
386# if 4==15
387	str	r1,[sp,#17*4]			@ make room for r1
388# endif
389	eor	r0,r4,r4,ror#5
390	orr	r2,r2,r12,lsl#24
391	eor	r0,r0,r4,ror#19	@ Sigma1(e)
392#endif
393	ldr	r12,[r14],#4			@ *K256++
394	add	r7,r7,r2			@ h+=X[i]
395	str	r2,[sp,#4*4]
396	eor	r2,r5,r6
397	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
398	and	r2,r2,r4
399	add	r7,r7,r12			@ h+=K256[i]
400	eor	r2,r2,r6			@ Ch(e,f,g)
401	eor	r0,r8,r8,ror#11
402	add	r7,r7,r2			@ h+=Ch(e,f,g)
403#if 4==31
404	and	r12,r12,#0xff
405	cmp	r12,#0xf2			@ done?
406#endif
407#if 4<15
408# if __ARM_ARCH>=7
409	ldr	r2,[r1],#4			@ prefetch
410# else
411	ldrb	r2,[r1,#3]
412# endif
413	eor	r12,r8,r9			@ a^b, b^c in next round
414#else
415	ldr	r2,[sp,#6*4]		@ from future BODY_16_xx
416	eor	r12,r8,r9			@ a^b, b^c in next round
417	ldr	r1,[sp,#3*4]	@ from future BODY_16_xx
418#endif
419	eor	r0,r0,r8,ror#20	@ Sigma0(a)
420	and	r3,r3,r12			@ (b^c)&=(a^b)
421	add	r11,r11,r7			@ d+=h
422	eor	r3,r3,r9			@ Maj(a,b,c)
423	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
424	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
425#if __ARM_ARCH>=7
426	@ ldr	r2,[r1],#4			@ 5
427# if 5==15
428	str	r1,[sp,#17*4]			@ make room for r1
429# endif
430	eor	r0,r11,r11,ror#5
431	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
432	eor	r0,r0,r11,ror#19	@ Sigma1(e)
433# ifndef __ARMEB__
434	rev	r2,r2
435# endif
436#else
437	@ ldrb	r2,[r1,#3]			@ 5
438	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
439	ldrb	r3,[r1,#2]
440	ldrb	r0,[r1,#1]
441	orr	r2,r2,r3,lsl#8
442	ldrb	r3,[r1],#4
443	orr	r2,r2,r0,lsl#16
444# if 5==15
445	str	r1,[sp,#17*4]			@ make room for r1
446# endif
447	eor	r0,r11,r11,ror#5
448	orr	r2,r2,r3,lsl#24
449	eor	r0,r0,r11,ror#19	@ Sigma1(e)
450#endif
451	ldr	r3,[r14],#4			@ *K256++
452	add	r6,r6,r2			@ h+=X[i]
453	str	r2,[sp,#5*4]
454	eor	r2,r4,r5
455	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
456	and	r2,r2,r11
457	add	r6,r6,r3			@ h+=K256[i]
458	eor	r2,r2,r5			@ Ch(e,f,g)
459	eor	r0,r7,r7,ror#11
460	add	r6,r6,r2			@ h+=Ch(e,f,g)
461#if 5==31
462	and	r3,r3,#0xff
463	cmp	r3,#0xf2			@ done?
464#endif
465#if 5<15
466# if __ARM_ARCH>=7
467	ldr	r2,[r1],#4			@ prefetch
468# else
469	ldrb	r2,[r1,#3]
470# endif
471	eor	r3,r7,r8			@ a^b, b^c in next round
472#else
473	ldr	r2,[sp,#7*4]		@ from future BODY_16_xx
474	eor	r3,r7,r8			@ a^b, b^c in next round
475	ldr	r1,[sp,#4*4]	@ from future BODY_16_xx
476#endif
477	eor	r0,r0,r7,ror#20	@ Sigma0(a)
478	and	r12,r12,r3			@ (b^c)&=(a^b)
479	add	r10,r10,r6			@ d+=h
480	eor	r12,r12,r8			@ Maj(a,b,c)
481	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
482	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
483#if __ARM_ARCH>=7
484	@ ldr	r2,[r1],#4			@ 6
485# if 6==15
486	str	r1,[sp,#17*4]			@ make room for r1
487# endif
488	eor	r0,r10,r10,ror#5
489	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
490	eor	r0,r0,r10,ror#19	@ Sigma1(e)
491# ifndef __ARMEB__
492	rev	r2,r2
493# endif
494#else
495	@ ldrb	r2,[r1,#3]			@ 6
496	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
497	ldrb	r12,[r1,#2]
498	ldrb	r0,[r1,#1]
499	orr	r2,r2,r12,lsl#8
500	ldrb	r12,[r1],#4
501	orr	r2,r2,r0,lsl#16
502# if 6==15
503	str	r1,[sp,#17*4]			@ make room for r1
504# endif
505	eor	r0,r10,r10,ror#5
506	orr	r2,r2,r12,lsl#24
507	eor	r0,r0,r10,ror#19	@ Sigma1(e)
508#endif
509	ldr	r12,[r14],#4			@ *K256++
510	add	r5,r5,r2			@ h+=X[i]
511	str	r2,[sp,#6*4]
512	eor	r2,r11,r4
513	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
514	and	r2,r2,r10
515	add	r5,r5,r12			@ h+=K256[i]
516	eor	r2,r2,r4			@ Ch(e,f,g)
517	eor	r0,r6,r6,ror#11
518	add	r5,r5,r2			@ h+=Ch(e,f,g)
519#if 6==31
520	and	r12,r12,#0xff
521	cmp	r12,#0xf2			@ done?
522#endif
523#if 6<15
524# if __ARM_ARCH>=7
525	ldr	r2,[r1],#4			@ prefetch
526# else
527	ldrb	r2,[r1,#3]
528# endif
529	eor	r12,r6,r7			@ a^b, b^c in next round
530#else
531	ldr	r2,[sp,#8*4]		@ from future BODY_16_xx
532	eor	r12,r6,r7			@ a^b, b^c in next round
533	ldr	r1,[sp,#5*4]	@ from future BODY_16_xx
534#endif
535	eor	r0,r0,r6,ror#20	@ Sigma0(a)
536	and	r3,r3,r12			@ (b^c)&=(a^b)
537	add	r9,r9,r5			@ d+=h
538	eor	r3,r3,r7			@ Maj(a,b,c)
539	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
540	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
541#if __ARM_ARCH>=7
542	@ ldr	r2,[r1],#4			@ 7
543# if 7==15
544	str	r1,[sp,#17*4]			@ make room for r1
545# endif
546	eor	r0,r9,r9,ror#5
547	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
548	eor	r0,r0,r9,ror#19	@ Sigma1(e)
549# ifndef __ARMEB__
550	rev	r2,r2
551# endif
552#else
553	@ ldrb	r2,[r1,#3]			@ 7
554	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
555	ldrb	r3,[r1,#2]
556	ldrb	r0,[r1,#1]
557	orr	r2,r2,r3,lsl#8
558	ldrb	r3,[r1],#4
559	orr	r2,r2,r0,lsl#16
560# if 7==15
561	str	r1,[sp,#17*4]			@ make room for r1
562# endif
563	eor	r0,r9,r9,ror#5
564	orr	r2,r2,r3,lsl#24
565	eor	r0,r0,r9,ror#19	@ Sigma1(e)
566#endif
567	ldr	r3,[r14],#4			@ *K256++
568	add	r4,r4,r2			@ h+=X[i]
569	str	r2,[sp,#7*4]
570	eor	r2,r10,r11
571	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
572	and	r2,r2,r9
573	add	r4,r4,r3			@ h+=K256[i]
574	eor	r2,r2,r11			@ Ch(e,f,g)
575	eor	r0,r5,r5,ror#11
576	add	r4,r4,r2			@ h+=Ch(e,f,g)
577#if 7==31
578	and	r3,r3,#0xff
579	cmp	r3,#0xf2			@ done?
580#endif
581#if 7<15
582# if __ARM_ARCH>=7
583	ldr	r2,[r1],#4			@ prefetch
584# else
585	ldrb	r2,[r1,#3]
586# endif
587	eor	r3,r5,r6			@ a^b, b^c in next round
588#else
589	ldr	r2,[sp,#9*4]		@ from future BODY_16_xx
590	eor	r3,r5,r6			@ a^b, b^c in next round
591	ldr	r1,[sp,#6*4]	@ from future BODY_16_xx
592#endif
593	eor	r0,r0,r5,ror#20	@ Sigma0(a)
594	and	r12,r12,r3			@ (b^c)&=(a^b)
595	add	r8,r8,r4			@ d+=h
596	eor	r12,r12,r6			@ Maj(a,b,c)
597	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
598	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
599#if __ARM_ARCH>=7
600	@ ldr	r2,[r1],#4			@ 8
601# if 8==15
602	str	r1,[sp,#17*4]			@ make room for r1
603# endif
604	eor	r0,r8,r8,ror#5
605	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
606	eor	r0,r0,r8,ror#19	@ Sigma1(e)
607# ifndef __ARMEB__
608	rev	r2,r2
609# endif
610#else
611	@ ldrb	r2,[r1,#3]			@ 8
612	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
613	ldrb	r12,[r1,#2]
614	ldrb	r0,[r1,#1]
615	orr	r2,r2,r12,lsl#8
616	ldrb	r12,[r1],#4
617	orr	r2,r2,r0,lsl#16
618# if 8==15
619	str	r1,[sp,#17*4]			@ make room for r1
620# endif
621	eor	r0,r8,r8,ror#5
622	orr	r2,r2,r12,lsl#24
623	eor	r0,r0,r8,ror#19	@ Sigma1(e)
624#endif
625	ldr	r12,[r14],#4			@ *K256++
626	add	r11,r11,r2			@ h+=X[i]
627	str	r2,[sp,#8*4]
628	eor	r2,r9,r10
629	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
630	and	r2,r2,r8
631	add	r11,r11,r12			@ h+=K256[i]
632	eor	r2,r2,r10			@ Ch(e,f,g)
633	eor	r0,r4,r4,ror#11
634	add	r11,r11,r2			@ h+=Ch(e,f,g)
635#if 8==31
636	and	r12,r12,#0xff
637	cmp	r12,#0xf2			@ done?
638#endif
639#if 8<15
640# if __ARM_ARCH>=7
641	ldr	r2,[r1],#4			@ prefetch
642# else
643	ldrb	r2,[r1,#3]
644# endif
645	eor	r12,r4,r5			@ a^b, b^c in next round
646#else
647	ldr	r2,[sp,#10*4]		@ from future BODY_16_xx
648	eor	r12,r4,r5			@ a^b, b^c in next round
649	ldr	r1,[sp,#7*4]	@ from future BODY_16_xx
650#endif
651	eor	r0,r0,r4,ror#20	@ Sigma0(a)
652	and	r3,r3,r12			@ (b^c)&=(a^b)
653	add	r7,r7,r11			@ d+=h
654	eor	r3,r3,r5			@ Maj(a,b,c)
655	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
656	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
657#if __ARM_ARCH>=7
658	@ ldr	r2,[r1],#4			@ 9
659# if 9==15
660	str	r1,[sp,#17*4]			@ make room for r1
661# endif
662	eor	r0,r7,r7,ror#5
663	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
664	eor	r0,r0,r7,ror#19	@ Sigma1(e)
665# ifndef __ARMEB__
666	rev	r2,r2
667# endif
668#else
669	@ ldrb	r2,[r1,#3]			@ 9
670	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
671	ldrb	r3,[r1,#2]
672	ldrb	r0,[r1,#1]
673	orr	r2,r2,r3,lsl#8
674	ldrb	r3,[r1],#4
675	orr	r2,r2,r0,lsl#16
676# if 9==15
677	str	r1,[sp,#17*4]			@ make room for r1
678# endif
679	eor	r0,r7,r7,ror#5
680	orr	r2,r2,r3,lsl#24
681	eor	r0,r0,r7,ror#19	@ Sigma1(e)
682#endif
683	ldr	r3,[r14],#4			@ *K256++
684	add	r10,r10,r2			@ h+=X[i]
685	str	r2,[sp,#9*4]
686	eor	r2,r8,r9
687	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
688	and	r2,r2,r7
689	add	r10,r10,r3			@ h+=K256[i]
690	eor	r2,r2,r9			@ Ch(e,f,g)
691	eor	r0,r11,r11,ror#11
692	add	r10,r10,r2			@ h+=Ch(e,f,g)
693#if 9==31
694	and	r3,r3,#0xff
695	cmp	r3,#0xf2			@ done?
696#endif
697#if 9<15
698# if __ARM_ARCH>=7
699	ldr	r2,[r1],#4			@ prefetch
700# else
701	ldrb	r2,[r1,#3]
702# endif
703	eor	r3,r11,r4			@ a^b, b^c in next round
704#else
705	ldr	r2,[sp,#11*4]		@ from future BODY_16_xx
706	eor	r3,r11,r4			@ a^b, b^c in next round
707	ldr	r1,[sp,#8*4]	@ from future BODY_16_xx
708#endif
709	eor	r0,r0,r11,ror#20	@ Sigma0(a)
710	and	r12,r12,r3			@ (b^c)&=(a^b)
711	add	r6,r6,r10			@ d+=h
712	eor	r12,r12,r4			@ Maj(a,b,c)
713	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
714	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
715#if __ARM_ARCH>=7
716	@ ldr	r2,[r1],#4			@ 10
717# if 10==15
718	str	r1,[sp,#17*4]			@ make room for r1
719# endif
720	eor	r0,r6,r6,ror#5
721	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
722	eor	r0,r0,r6,ror#19	@ Sigma1(e)
723# ifndef __ARMEB__
724	rev	r2,r2
725# endif
726#else
727	@ ldrb	r2,[r1,#3]			@ 10
728	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
729	ldrb	r12,[r1,#2]
730	ldrb	r0,[r1,#1]
731	orr	r2,r2,r12,lsl#8
732	ldrb	r12,[r1],#4
733	orr	r2,r2,r0,lsl#16
734# if 10==15
735	str	r1,[sp,#17*4]			@ make room for r1
736# endif
737	eor	r0,r6,r6,ror#5
738	orr	r2,r2,r12,lsl#24
739	eor	r0,r0,r6,ror#19	@ Sigma1(e)
740#endif
741	ldr	r12,[r14],#4			@ *K256++
742	add	r9,r9,r2			@ h+=X[i]
743	str	r2,[sp,#10*4]
744	eor	r2,r7,r8
745	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
746	and	r2,r2,r6
747	add	r9,r9,r12			@ h+=K256[i]
748	eor	r2,r2,r8			@ Ch(e,f,g)
749	eor	r0,r10,r10,ror#11
750	add	r9,r9,r2			@ h+=Ch(e,f,g)
751#if 10==31
752	and	r12,r12,#0xff
753	cmp	r12,#0xf2			@ done?
754#endif
755#if 10<15
756# if __ARM_ARCH>=7
757	ldr	r2,[r1],#4			@ prefetch
758# else
759	ldrb	r2,[r1,#3]
760# endif
761	eor	r12,r10,r11			@ a^b, b^c in next round
762#else
763	ldr	r2,[sp,#12*4]		@ from future BODY_16_xx
764	eor	r12,r10,r11			@ a^b, b^c in next round
765	ldr	r1,[sp,#9*4]	@ from future BODY_16_xx
766#endif
767	eor	r0,r0,r10,ror#20	@ Sigma0(a)
768	and	r3,r3,r12			@ (b^c)&=(a^b)
769	add	r5,r5,r9			@ d+=h
770	eor	r3,r3,r11			@ Maj(a,b,c)
771	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
772	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
773#if __ARM_ARCH>=7
774	@ ldr	r2,[r1],#4			@ 11
775# if 11==15
776	str	r1,[sp,#17*4]			@ make room for r1
777# endif
778	eor	r0,r5,r5,ror#5
779	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
780	eor	r0,r0,r5,ror#19	@ Sigma1(e)
781# ifndef __ARMEB__
782	rev	r2,r2
783# endif
784#else
785	@ ldrb	r2,[r1,#3]			@ 11
786	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
787	ldrb	r3,[r1,#2]
788	ldrb	r0,[r1,#1]
789	orr	r2,r2,r3,lsl#8
790	ldrb	r3,[r1],#4
791	orr	r2,r2,r0,lsl#16
792# if 11==15
793	str	r1,[sp,#17*4]			@ make room for r1
794# endif
795	eor	r0,r5,r5,ror#5
796	orr	r2,r2,r3,lsl#24
797	eor	r0,r0,r5,ror#19	@ Sigma1(e)
798#endif
799	ldr	r3,[r14],#4			@ *K256++
800	add	r8,r8,r2			@ h+=X[i]
801	str	r2,[sp,#11*4]
802	eor	r2,r6,r7
803	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
804	and	r2,r2,r5
805	add	r8,r8,r3			@ h+=K256[i]
806	eor	r2,r2,r7			@ Ch(e,f,g)
807	eor	r0,r9,r9,ror#11
808	add	r8,r8,r2			@ h+=Ch(e,f,g)
809#if 11==31
810	and	r3,r3,#0xff
811	cmp	r3,#0xf2			@ done?
812#endif
813#if 11<15
814# if __ARM_ARCH>=7
815	ldr	r2,[r1],#4			@ prefetch
816# else
817	ldrb	r2,[r1,#3]
818# endif
819	eor	r3,r9,r10			@ a^b, b^c in next round
820#else
821	ldr	r2,[sp,#13*4]		@ from future BODY_16_xx
822	eor	r3,r9,r10			@ a^b, b^c in next round
823	ldr	r1,[sp,#10*4]	@ from future BODY_16_xx
824#endif
825	eor	r0,r0,r9,ror#20	@ Sigma0(a)
826	and	r12,r12,r3			@ (b^c)&=(a^b)
827	add	r4,r4,r8			@ d+=h
828	eor	r12,r12,r10			@ Maj(a,b,c)
829	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
830	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
831#if __ARM_ARCH>=7
832	@ ldr	r2,[r1],#4			@ 12
833# if 12==15
834	str	r1,[sp,#17*4]			@ make room for r1
835# endif
836	eor	r0,r4,r4,ror#5
837	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
838	eor	r0,r0,r4,ror#19	@ Sigma1(e)
839# ifndef __ARMEB__
840	rev	r2,r2
841# endif
842#else
843	@ ldrb	r2,[r1,#3]			@ 12
844	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
845	ldrb	r12,[r1,#2]
846	ldrb	r0,[r1,#1]
847	orr	r2,r2,r12,lsl#8
848	ldrb	r12,[r1],#4
849	orr	r2,r2,r0,lsl#16
850# if 12==15
851	str	r1,[sp,#17*4]			@ make room for r1
852# endif
853	eor	r0,r4,r4,ror#5
854	orr	r2,r2,r12,lsl#24
855	eor	r0,r0,r4,ror#19	@ Sigma1(e)
856#endif
857	ldr	r12,[r14],#4			@ *K256++
858	add	r7,r7,r2			@ h+=X[i]
859	str	r2,[sp,#12*4]
860	eor	r2,r5,r6
861	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
862	and	r2,r2,r4
863	add	r7,r7,r12			@ h+=K256[i]
864	eor	r2,r2,r6			@ Ch(e,f,g)
865	eor	r0,r8,r8,ror#11
866	add	r7,r7,r2			@ h+=Ch(e,f,g)
867#if 12==31
868	and	r12,r12,#0xff
869	cmp	r12,#0xf2			@ done?
870#endif
871#if 12<15
872# if __ARM_ARCH>=7
873	ldr	r2,[r1],#4			@ prefetch
874# else
875	ldrb	r2,[r1,#3]
876# endif
877	eor	r12,r8,r9			@ a^b, b^c in next round
878#else
879	ldr	r2,[sp,#14*4]		@ from future BODY_16_xx
880	eor	r12,r8,r9			@ a^b, b^c in next round
881	ldr	r1,[sp,#11*4]	@ from future BODY_16_xx
882#endif
883	eor	r0,r0,r8,ror#20	@ Sigma0(a)
884	and	r3,r3,r12			@ (b^c)&=(a^b)
885	add	r11,r11,r7			@ d+=h
886	eor	r3,r3,r9			@ Maj(a,b,c)
887	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
888	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
889#if __ARM_ARCH>=7
890	@ ldr	r2,[r1],#4			@ 13
891# if 13==15
892	str	r1,[sp,#17*4]			@ make room for r1
893# endif
894	eor	r0,r11,r11,ror#5
895	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
896	eor	r0,r0,r11,ror#19	@ Sigma1(e)
897# ifndef __ARMEB__
898	rev	r2,r2
899# endif
900#else
901	@ ldrb	r2,[r1,#3]			@ 13
902	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
903	ldrb	r3,[r1,#2]
904	ldrb	r0,[r1,#1]
905	orr	r2,r2,r3,lsl#8
906	ldrb	r3,[r1],#4
907	orr	r2,r2,r0,lsl#16
908# if 13==15
909	str	r1,[sp,#17*4]			@ make room for r1
910# endif
911	eor	r0,r11,r11,ror#5
912	orr	r2,r2,r3,lsl#24
913	eor	r0,r0,r11,ror#19	@ Sigma1(e)
914#endif
915	ldr	r3,[r14],#4			@ *K256++
916	add	r6,r6,r2			@ h+=X[i]
917	str	r2,[sp,#13*4]
918	eor	r2,r4,r5
919	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
920	and	r2,r2,r11
921	add	r6,r6,r3			@ h+=K256[i]
922	eor	r2,r2,r5			@ Ch(e,f,g)
923	eor	r0,r7,r7,ror#11
924	add	r6,r6,r2			@ h+=Ch(e,f,g)
925#if 13==31
926	and	r3,r3,#0xff
927	cmp	r3,#0xf2			@ done?
928#endif
929#if 13<15
930# if __ARM_ARCH>=7
931	ldr	r2,[r1],#4			@ prefetch
932# else
933	ldrb	r2,[r1,#3]
934# endif
935	eor	r3,r7,r8			@ a^b, b^c in next round
936#else
937	ldr	r2,[sp,#15*4]		@ from future BODY_16_xx
938	eor	r3,r7,r8			@ a^b, b^c in next round
939	ldr	r1,[sp,#12*4]	@ from future BODY_16_xx
940#endif
941	eor	r0,r0,r7,ror#20	@ Sigma0(a)
942	and	r12,r12,r3			@ (b^c)&=(a^b)
943	add	r10,r10,r6			@ d+=h
944	eor	r12,r12,r8			@ Maj(a,b,c)
945	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
946	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
947#if __ARM_ARCH>=7
948	@ ldr	r2,[r1],#4			@ 14
949# if 14==15
950	str	r1,[sp,#17*4]			@ make room for r1
951# endif
952	eor	r0,r10,r10,ror#5
953	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
954	eor	r0,r0,r10,ror#19	@ Sigma1(e)
955# ifndef __ARMEB__
956	rev	r2,r2
957# endif
958#else
959	@ ldrb	r2,[r1,#3]			@ 14
960	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
961	ldrb	r12,[r1,#2]
962	ldrb	r0,[r1,#1]
963	orr	r2,r2,r12,lsl#8
964	ldrb	r12,[r1],#4
965	orr	r2,r2,r0,lsl#16
966# if 14==15
967	str	r1,[sp,#17*4]			@ make room for r1
968# endif
969	eor	r0,r10,r10,ror#5
970	orr	r2,r2,r12,lsl#24
971	eor	r0,r0,r10,ror#19	@ Sigma1(e)
972#endif
973	ldr	r12,[r14],#4			@ *K256++
974	add	r5,r5,r2			@ h+=X[i]
975	str	r2,[sp,#14*4]
976	eor	r2,r11,r4
977	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
978	and	r2,r2,r10
979	add	r5,r5,r12			@ h+=K256[i]
980	eor	r2,r2,r4			@ Ch(e,f,g)
981	eor	r0,r6,r6,ror#11
982	add	r5,r5,r2			@ h+=Ch(e,f,g)
983#if 14==31
984	and	r12,r12,#0xff
985	cmp	r12,#0xf2			@ done?
986#endif
987#if 14<15
988# if __ARM_ARCH>=7
989	ldr	r2,[r1],#4			@ prefetch
990# else
991	ldrb	r2,[r1,#3]
992# endif
993	eor	r12,r6,r7			@ a^b, b^c in next round
994#else
995	ldr	r2,[sp,#0*4]		@ from future BODY_16_xx
996	eor	r12,r6,r7			@ a^b, b^c in next round
997	ldr	r1,[sp,#13*4]	@ from future BODY_16_xx
998#endif
999	eor	r0,r0,r6,ror#20	@ Sigma0(a)
1000	and	r3,r3,r12			@ (b^c)&=(a^b)
1001	add	r9,r9,r5			@ d+=h
1002	eor	r3,r3,r7			@ Maj(a,b,c)
1003	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
1004	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
1005#if __ARM_ARCH>=7
1006	@ ldr	r2,[r1],#4			@ 15
1007# if 15==15
1008	str	r1,[sp,#17*4]			@ make room for r1
1009# endif
1010	eor	r0,r9,r9,ror#5
1011	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
1012	eor	r0,r0,r9,ror#19	@ Sigma1(e)
1013# ifndef __ARMEB__
1014	rev	r2,r2
1015# endif
1016#else
1017	@ ldrb	r2,[r1,#3]			@ 15
1018	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
1019	ldrb	r3,[r1,#2]
1020	ldrb	r0,[r1,#1]
1021	orr	r2,r2,r3,lsl#8
1022	ldrb	r3,[r1],#4
1023	orr	r2,r2,r0,lsl#16
1024# if 15==15
1025	str	r1,[sp,#17*4]			@ make room for r1
1026# endif
1027	eor	r0,r9,r9,ror#5
1028	orr	r2,r2,r3,lsl#24
1029	eor	r0,r0,r9,ror#19	@ Sigma1(e)
1030#endif
1031	ldr	r3,[r14],#4			@ *K256++
1032	add	r4,r4,r2			@ h+=X[i]
1033	str	r2,[sp,#15*4]
1034	eor	r2,r10,r11
1035	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
1036	and	r2,r2,r9
1037	add	r4,r4,r3			@ h+=K256[i]
1038	eor	r2,r2,r11			@ Ch(e,f,g)
1039	eor	r0,r5,r5,ror#11
1040	add	r4,r4,r2			@ h+=Ch(e,f,g)
1041#if 15==31
1042	and	r3,r3,#0xff
1043	cmp	r3,#0xf2			@ done?
1044#endif
1045#if 15<15
1046# if __ARM_ARCH>=7
1047	ldr	r2,[r1],#4			@ prefetch
1048# else
1049	ldrb	r2,[r1,#3]
1050# endif
1051	eor	r3,r5,r6			@ a^b, b^c in next round
1052#else
1053	ldr	r2,[sp,#1*4]		@ from future BODY_16_xx
1054	eor	r3,r5,r6			@ a^b, b^c in next round
1055	ldr	r1,[sp,#14*4]	@ from future BODY_16_xx
1056#endif
1057	eor	r0,r0,r5,ror#20	@ Sigma0(a)
1058	and	r12,r12,r3			@ (b^c)&=(a^b)
1059	add	r8,r8,r4			@ d+=h
1060	eor	r12,r12,r6			@ Maj(a,b,c)
1061	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
1062	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
1063Lrounds_16_xx:
1064	@ ldr	r2,[sp,#1*4]		@ 16
1065	@ ldr	r1,[sp,#14*4]
1066	mov	r0,r2,ror#7
1067	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
1068	mov	r12,r1,ror#17
1069	eor	r0,r0,r2,ror#18
1070	eor	r12,r12,r1,ror#19
1071	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1072	ldr	r2,[sp,#0*4]
1073	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1074	ldr	r1,[sp,#9*4]
1075
1076	add	r12,r12,r0
1077	eor	r0,r8,r8,ror#5	@ from BODY_00_15
1078	add	r2,r2,r12
1079	eor	r0,r0,r8,ror#19	@ Sigma1(e)
1080	add	r2,r2,r1			@ X[i]
1081	ldr	r12,[r14],#4			@ *K256++
1082	add	r11,r11,r2			@ h+=X[i]
1083	str	r2,[sp,#0*4]
1084	eor	r2,r9,r10
1085	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
1086	and	r2,r2,r8
1087	add	r11,r11,r12			@ h+=K256[i]
1088	eor	r2,r2,r10			@ Ch(e,f,g)
1089	eor	r0,r4,r4,ror#11
1090	add	r11,r11,r2			@ h+=Ch(e,f,g)
1091#if 16==31
1092	and	r12,r12,#0xff
1093	cmp	r12,#0xf2			@ done?
1094#endif
1095#if 16<15
1096# if __ARM_ARCH>=7
1097	ldr	r2,[r1],#4			@ prefetch
1098# else
1099	ldrb	r2,[r1,#3]
1100# endif
1101	eor	r12,r4,r5			@ a^b, b^c in next round
1102#else
1103	ldr	r2,[sp,#2*4]		@ from future BODY_16_xx
1104	eor	r12,r4,r5			@ a^b, b^c in next round
1105	ldr	r1,[sp,#15*4]	@ from future BODY_16_xx
1106#endif
1107	eor	r0,r0,r4,ror#20	@ Sigma0(a)
1108	and	r3,r3,r12			@ (b^c)&=(a^b)
1109	add	r7,r7,r11			@ d+=h
1110	eor	r3,r3,r5			@ Maj(a,b,c)
1111	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
1112	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
1113	@ ldr	r2,[sp,#2*4]		@ 17
1114	@ ldr	r1,[sp,#15*4]
1115	mov	r0,r2,ror#7
1116	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
1117	mov	r3,r1,ror#17
1118	eor	r0,r0,r2,ror#18
1119	eor	r3,r3,r1,ror#19
1120	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1121	ldr	r2,[sp,#1*4]
1122	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1123	ldr	r1,[sp,#10*4]
1124
1125	add	r3,r3,r0
1126	eor	r0,r7,r7,ror#5	@ from BODY_00_15
1127	add	r2,r2,r3
1128	eor	r0,r0,r7,ror#19	@ Sigma1(e)
1129	add	r2,r2,r1			@ X[i]
1130	ldr	r3,[r14],#4			@ *K256++
1131	add	r10,r10,r2			@ h+=X[i]
1132	str	r2,[sp,#1*4]
1133	eor	r2,r8,r9
1134	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
1135	and	r2,r2,r7
1136	add	r10,r10,r3			@ h+=K256[i]
1137	eor	r2,r2,r9			@ Ch(e,f,g)
1138	eor	r0,r11,r11,ror#11
1139	add	r10,r10,r2			@ h+=Ch(e,f,g)
1140#if 17==31
1141	and	r3,r3,#0xff
1142	cmp	r3,#0xf2			@ done?
1143#endif
1144#if 17<15
1145# if __ARM_ARCH>=7
1146	ldr	r2,[r1],#4			@ prefetch
1147# else
1148	ldrb	r2,[r1,#3]
1149# endif
1150	eor	r3,r11,r4			@ a^b, b^c in next round
1151#else
1152	ldr	r2,[sp,#3*4]		@ from future BODY_16_xx
1153	eor	r3,r11,r4			@ a^b, b^c in next round
1154	ldr	r1,[sp,#0*4]	@ from future BODY_16_xx
1155#endif
1156	eor	r0,r0,r11,ror#20	@ Sigma0(a)
1157	and	r12,r12,r3			@ (b^c)&=(a^b)
1158	add	r6,r6,r10			@ d+=h
1159	eor	r12,r12,r4			@ Maj(a,b,c)
1160	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
1161	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
1162	@ ldr	r2,[sp,#3*4]		@ 18
1163	@ ldr	r1,[sp,#0*4]
1164	mov	r0,r2,ror#7
1165	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
1166	mov	r12,r1,ror#17
1167	eor	r0,r0,r2,ror#18
1168	eor	r12,r12,r1,ror#19
1169	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1170	ldr	r2,[sp,#2*4]
1171	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1172	ldr	r1,[sp,#11*4]
1173
1174	add	r12,r12,r0
1175	eor	r0,r6,r6,ror#5	@ from BODY_00_15
1176	add	r2,r2,r12
1177	eor	r0,r0,r6,ror#19	@ Sigma1(e)
1178	add	r2,r2,r1			@ X[i]
1179	ldr	r12,[r14],#4			@ *K256++
1180	add	r9,r9,r2			@ h+=X[i]
1181	str	r2,[sp,#2*4]
1182	eor	r2,r7,r8
1183	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
1184	and	r2,r2,r6
1185	add	r9,r9,r12			@ h+=K256[i]
1186	eor	r2,r2,r8			@ Ch(e,f,g)
1187	eor	r0,r10,r10,ror#11
1188	add	r9,r9,r2			@ h+=Ch(e,f,g)
1189#if 18==31
1190	and	r12,r12,#0xff
1191	cmp	r12,#0xf2			@ done?
1192#endif
1193#if 18<15
1194# if __ARM_ARCH>=7
1195	ldr	r2,[r1],#4			@ prefetch
1196# else
1197	ldrb	r2,[r1,#3]
1198# endif
1199	eor	r12,r10,r11			@ a^b, b^c in next round
1200#else
1201	ldr	r2,[sp,#4*4]		@ from future BODY_16_xx
1202	eor	r12,r10,r11			@ a^b, b^c in next round
1203	ldr	r1,[sp,#1*4]	@ from future BODY_16_xx
1204#endif
1205	eor	r0,r0,r10,ror#20	@ Sigma0(a)
1206	and	r3,r3,r12			@ (b^c)&=(a^b)
1207	add	r5,r5,r9			@ d+=h
1208	eor	r3,r3,r11			@ Maj(a,b,c)
1209	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
1210	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
1211	@ ldr	r2,[sp,#4*4]		@ 19
1212	@ ldr	r1,[sp,#1*4]
1213	mov	r0,r2,ror#7
1214	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
1215	mov	r3,r1,ror#17
1216	eor	r0,r0,r2,ror#18
1217	eor	r3,r3,r1,ror#19
1218	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1219	ldr	r2,[sp,#3*4]
1220	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1221	ldr	r1,[sp,#12*4]
1222
1223	add	r3,r3,r0
1224	eor	r0,r5,r5,ror#5	@ from BODY_00_15
1225	add	r2,r2,r3
1226	eor	r0,r0,r5,ror#19	@ Sigma1(e)
1227	add	r2,r2,r1			@ X[i]
1228	ldr	r3,[r14],#4			@ *K256++
1229	add	r8,r8,r2			@ h+=X[i]
1230	str	r2,[sp,#3*4]
1231	eor	r2,r6,r7
1232	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
1233	and	r2,r2,r5
1234	add	r8,r8,r3			@ h+=K256[i]
1235	eor	r2,r2,r7			@ Ch(e,f,g)
1236	eor	r0,r9,r9,ror#11
1237	add	r8,r8,r2			@ h+=Ch(e,f,g)
1238#if 19==31
1239	and	r3,r3,#0xff
1240	cmp	r3,#0xf2			@ done?
1241#endif
1242#if 19<15
1243# if __ARM_ARCH>=7
1244	ldr	r2,[r1],#4			@ prefetch
1245# else
1246	ldrb	r2,[r1,#3]
1247# endif
1248	eor	r3,r9,r10			@ a^b, b^c in next round
1249#else
1250	ldr	r2,[sp,#5*4]		@ from future BODY_16_xx
1251	eor	r3,r9,r10			@ a^b, b^c in next round
1252	ldr	r1,[sp,#2*4]	@ from future BODY_16_xx
1253#endif
1254	eor	r0,r0,r9,ror#20	@ Sigma0(a)
1255	and	r12,r12,r3			@ (b^c)&=(a^b)
1256	add	r4,r4,r8			@ d+=h
1257	eor	r12,r12,r10			@ Maj(a,b,c)
1258	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
1259	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
1260	@ ldr	r2,[sp,#5*4]		@ 20
1261	@ ldr	r1,[sp,#2*4]
1262	mov	r0,r2,ror#7
1263	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
1264	mov	r12,r1,ror#17
1265	eor	r0,r0,r2,ror#18
1266	eor	r12,r12,r1,ror#19
1267	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1268	ldr	r2,[sp,#4*4]
1269	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1270	ldr	r1,[sp,#13*4]
1271
1272	add	r12,r12,r0
1273	eor	r0,r4,r4,ror#5	@ from BODY_00_15
1274	add	r2,r2,r12
1275	eor	r0,r0,r4,ror#19	@ Sigma1(e)
1276	add	r2,r2,r1			@ X[i]
1277	ldr	r12,[r14],#4			@ *K256++
1278	add	r7,r7,r2			@ h+=X[i]
1279	str	r2,[sp,#4*4]
1280	eor	r2,r5,r6
1281	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
1282	and	r2,r2,r4
1283	add	r7,r7,r12			@ h+=K256[i]
1284	eor	r2,r2,r6			@ Ch(e,f,g)
1285	eor	r0,r8,r8,ror#11
1286	add	r7,r7,r2			@ h+=Ch(e,f,g)
1287#if 20==31
1288	and	r12,r12,#0xff
1289	cmp	r12,#0xf2			@ done?
1290#endif
1291#if 20<15
1292# if __ARM_ARCH>=7
1293	ldr	r2,[r1],#4			@ prefetch
1294# else
1295	ldrb	r2,[r1,#3]
1296# endif
1297	eor	r12,r8,r9			@ a^b, b^c in next round
1298#else
1299	ldr	r2,[sp,#6*4]		@ from future BODY_16_xx
1300	eor	r12,r8,r9			@ a^b, b^c in next round
1301	ldr	r1,[sp,#3*4]	@ from future BODY_16_xx
1302#endif
1303	eor	r0,r0,r8,ror#20	@ Sigma0(a)
1304	and	r3,r3,r12			@ (b^c)&=(a^b)
1305	add	r11,r11,r7			@ d+=h
1306	eor	r3,r3,r9			@ Maj(a,b,c)
1307	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
1308	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
1309	@ ldr	r2,[sp,#6*4]		@ 21
1310	@ ldr	r1,[sp,#3*4]
1311	mov	r0,r2,ror#7
1312	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
1313	mov	r3,r1,ror#17
1314	eor	r0,r0,r2,ror#18
1315	eor	r3,r3,r1,ror#19
1316	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1317	ldr	r2,[sp,#5*4]
1318	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1319	ldr	r1,[sp,#14*4]
1320
1321	add	r3,r3,r0
1322	eor	r0,r11,r11,ror#5	@ from BODY_00_15
1323	add	r2,r2,r3
1324	eor	r0,r0,r11,ror#19	@ Sigma1(e)
1325	add	r2,r2,r1			@ X[i]
1326	ldr	r3,[r14],#4			@ *K256++
1327	add	r6,r6,r2			@ h+=X[i]
1328	str	r2,[sp,#5*4]
1329	eor	r2,r4,r5
1330	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
1331	and	r2,r2,r11
1332	add	r6,r6,r3			@ h+=K256[i]
1333	eor	r2,r2,r5			@ Ch(e,f,g)
1334	eor	r0,r7,r7,ror#11
1335	add	r6,r6,r2			@ h+=Ch(e,f,g)
1336#if 21==31
1337	and	r3,r3,#0xff
1338	cmp	r3,#0xf2			@ done?
1339#endif
1340#if 21<15
1341# if __ARM_ARCH>=7
1342	ldr	r2,[r1],#4			@ prefetch
1343# else
1344	ldrb	r2,[r1,#3]
1345# endif
1346	eor	r3,r7,r8			@ a^b, b^c in next round
1347#else
1348	ldr	r2,[sp,#7*4]		@ from future BODY_16_xx
1349	eor	r3,r7,r8			@ a^b, b^c in next round
1350	ldr	r1,[sp,#4*4]	@ from future BODY_16_xx
1351#endif
1352	eor	r0,r0,r7,ror#20	@ Sigma0(a)
1353	and	r12,r12,r3			@ (b^c)&=(a^b)
1354	add	r10,r10,r6			@ d+=h
1355	eor	r12,r12,r8			@ Maj(a,b,c)
1356	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
1357	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
1358	@ ldr	r2,[sp,#7*4]		@ 22
1359	@ ldr	r1,[sp,#4*4]
1360	mov	r0,r2,ror#7
1361	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
1362	mov	r12,r1,ror#17
1363	eor	r0,r0,r2,ror#18
1364	eor	r12,r12,r1,ror#19
1365	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1366	ldr	r2,[sp,#6*4]
1367	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1368	ldr	r1,[sp,#15*4]
1369
1370	add	r12,r12,r0
1371	eor	r0,r10,r10,ror#5	@ from BODY_00_15
1372	add	r2,r2,r12
1373	eor	r0,r0,r10,ror#19	@ Sigma1(e)
1374	add	r2,r2,r1			@ X[i]
1375	ldr	r12,[r14],#4			@ *K256++
1376	add	r5,r5,r2			@ h+=X[i]
1377	str	r2,[sp,#6*4]
1378	eor	r2,r11,r4
1379	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
1380	and	r2,r2,r10
1381	add	r5,r5,r12			@ h+=K256[i]
1382	eor	r2,r2,r4			@ Ch(e,f,g)
1383	eor	r0,r6,r6,ror#11
1384	add	r5,r5,r2			@ h+=Ch(e,f,g)
1385#if 22==31
1386	and	r12,r12,#0xff
1387	cmp	r12,#0xf2			@ done?
1388#endif
1389#if 22<15
1390# if __ARM_ARCH>=7
1391	ldr	r2,[r1],#4			@ prefetch
1392# else
1393	ldrb	r2,[r1,#3]
1394# endif
1395	eor	r12,r6,r7			@ a^b, b^c in next round
1396#else
1397	ldr	r2,[sp,#8*4]		@ from future BODY_16_xx
1398	eor	r12,r6,r7			@ a^b, b^c in next round
1399	ldr	r1,[sp,#5*4]	@ from future BODY_16_xx
1400#endif
1401	eor	r0,r0,r6,ror#20	@ Sigma0(a)
1402	and	r3,r3,r12			@ (b^c)&=(a^b)
1403	add	r9,r9,r5			@ d+=h
1404	eor	r3,r3,r7			@ Maj(a,b,c)
1405	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
1406	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
1407	@ ldr	r2,[sp,#8*4]		@ 23
1408	@ ldr	r1,[sp,#5*4]
1409	mov	r0,r2,ror#7
1410	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
1411	mov	r3,r1,ror#17
1412	eor	r0,r0,r2,ror#18
1413	eor	r3,r3,r1,ror#19
1414	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1415	ldr	r2,[sp,#7*4]
1416	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1417	ldr	r1,[sp,#0*4]
1418
1419	add	r3,r3,r0
1420	eor	r0,r9,r9,ror#5	@ from BODY_00_15
1421	add	r2,r2,r3
1422	eor	r0,r0,r9,ror#19	@ Sigma1(e)
1423	add	r2,r2,r1			@ X[i]
1424	ldr	r3,[r14],#4			@ *K256++
1425	add	r4,r4,r2			@ h+=X[i]
1426	str	r2,[sp,#7*4]
1427	eor	r2,r10,r11
1428	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
1429	and	r2,r2,r9
1430	add	r4,r4,r3			@ h+=K256[i]
1431	eor	r2,r2,r11			@ Ch(e,f,g)
1432	eor	r0,r5,r5,ror#11
1433	add	r4,r4,r2			@ h+=Ch(e,f,g)
1434#if 23==31
1435	and	r3,r3,#0xff
1436	cmp	r3,#0xf2			@ done?
1437#endif
1438#if 23<15
1439# if __ARM_ARCH>=7
1440	ldr	r2,[r1],#4			@ prefetch
1441# else
1442	ldrb	r2,[r1,#3]
1443# endif
1444	eor	r3,r5,r6			@ a^b, b^c in next round
1445#else
1446	ldr	r2,[sp,#9*4]		@ from future BODY_16_xx
1447	eor	r3,r5,r6			@ a^b, b^c in next round
1448	ldr	r1,[sp,#6*4]	@ from future BODY_16_xx
1449#endif
1450	eor	r0,r0,r5,ror#20	@ Sigma0(a)
1451	and	r12,r12,r3			@ (b^c)&=(a^b)
1452	add	r8,r8,r4			@ d+=h
1453	eor	r12,r12,r6			@ Maj(a,b,c)
1454	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
1455	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
1456	@ ldr	r2,[sp,#9*4]		@ 24
1457	@ ldr	r1,[sp,#6*4]
1458	mov	r0,r2,ror#7
1459	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
1460	mov	r12,r1,ror#17
1461	eor	r0,r0,r2,ror#18
1462	eor	r12,r12,r1,ror#19
1463	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1464	ldr	r2,[sp,#8*4]
1465	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1466	ldr	r1,[sp,#1*4]
1467
1468	add	r12,r12,r0
1469	eor	r0,r8,r8,ror#5	@ from BODY_00_15
1470	add	r2,r2,r12
1471	eor	r0,r0,r8,ror#19	@ Sigma1(e)
1472	add	r2,r2,r1			@ X[i]
1473	ldr	r12,[r14],#4			@ *K256++
1474	add	r11,r11,r2			@ h+=X[i]
1475	str	r2,[sp,#8*4]
1476	eor	r2,r9,r10
1477	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
1478	and	r2,r2,r8
1479	add	r11,r11,r12			@ h+=K256[i]
1480	eor	r2,r2,r10			@ Ch(e,f,g)
1481	eor	r0,r4,r4,ror#11
1482	add	r11,r11,r2			@ h+=Ch(e,f,g)
1483#if 24==31
1484	and	r12,r12,#0xff
1485	cmp	r12,#0xf2			@ done?
1486#endif
1487#if 24<15
1488# if __ARM_ARCH>=7
1489	ldr	r2,[r1],#4			@ prefetch
1490# else
1491	ldrb	r2,[r1,#3]
1492# endif
1493	eor	r12,r4,r5			@ a^b, b^c in next round
1494#else
1495	ldr	r2,[sp,#10*4]		@ from future BODY_16_xx
1496	eor	r12,r4,r5			@ a^b, b^c in next round
1497	ldr	r1,[sp,#7*4]	@ from future BODY_16_xx
1498#endif
1499	eor	r0,r0,r4,ror#20	@ Sigma0(a)
1500	and	r3,r3,r12			@ (b^c)&=(a^b)
1501	add	r7,r7,r11			@ d+=h
1502	eor	r3,r3,r5			@ Maj(a,b,c)
1503	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
1504	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
1505	@ ldr	r2,[sp,#10*4]		@ 25
1506	@ ldr	r1,[sp,#7*4]
1507	mov	r0,r2,ror#7
1508	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
1509	mov	r3,r1,ror#17
1510	eor	r0,r0,r2,ror#18
1511	eor	r3,r3,r1,ror#19
1512	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1513	ldr	r2,[sp,#9*4]
1514	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1515	ldr	r1,[sp,#2*4]
1516
1517	add	r3,r3,r0
1518	eor	r0,r7,r7,ror#5	@ from BODY_00_15
1519	add	r2,r2,r3
1520	eor	r0,r0,r7,ror#19	@ Sigma1(e)
1521	add	r2,r2,r1			@ X[i]
1522	ldr	r3,[r14],#4			@ *K256++
1523	add	r10,r10,r2			@ h+=X[i]
1524	str	r2,[sp,#9*4]
1525	eor	r2,r8,r9
1526	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
1527	and	r2,r2,r7
1528	add	r10,r10,r3			@ h+=K256[i]
1529	eor	r2,r2,r9			@ Ch(e,f,g)
1530	eor	r0,r11,r11,ror#11
1531	add	r10,r10,r2			@ h+=Ch(e,f,g)
1532#if 25==31
1533	and	r3,r3,#0xff
1534	cmp	r3,#0xf2			@ done?
1535#endif
1536#if 25<15
1537# if __ARM_ARCH>=7
1538	ldr	r2,[r1],#4			@ prefetch
1539# else
1540	ldrb	r2,[r1,#3]
1541# endif
1542	eor	r3,r11,r4			@ a^b, b^c in next round
1543#else
1544	ldr	r2,[sp,#11*4]		@ from future BODY_16_xx
1545	eor	r3,r11,r4			@ a^b, b^c in next round
1546	ldr	r1,[sp,#8*4]	@ from future BODY_16_xx
1547#endif
1548	eor	r0,r0,r11,ror#20	@ Sigma0(a)
1549	and	r12,r12,r3			@ (b^c)&=(a^b)
1550	add	r6,r6,r10			@ d+=h
1551	eor	r12,r12,r4			@ Maj(a,b,c)
1552	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
1553	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
1554	@ ldr	r2,[sp,#11*4]		@ 26
1555	@ ldr	r1,[sp,#8*4]
1556	mov	r0,r2,ror#7
1557	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
1558	mov	r12,r1,ror#17
1559	eor	r0,r0,r2,ror#18
1560	eor	r12,r12,r1,ror#19
1561	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1562	ldr	r2,[sp,#10*4]
1563	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1564	ldr	r1,[sp,#3*4]
1565
1566	add	r12,r12,r0
1567	eor	r0,r6,r6,ror#5	@ from BODY_00_15
1568	add	r2,r2,r12
1569	eor	r0,r0,r6,ror#19	@ Sigma1(e)
1570	add	r2,r2,r1			@ X[i]
1571	ldr	r12,[r14],#4			@ *K256++
1572	add	r9,r9,r2			@ h+=X[i]
1573	str	r2,[sp,#10*4]
1574	eor	r2,r7,r8
1575	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
1576	and	r2,r2,r6
1577	add	r9,r9,r12			@ h+=K256[i]
1578	eor	r2,r2,r8			@ Ch(e,f,g)
1579	eor	r0,r10,r10,ror#11
1580	add	r9,r9,r2			@ h+=Ch(e,f,g)
1581#if 26==31
1582	and	r12,r12,#0xff
1583	cmp	r12,#0xf2			@ done?
1584#endif
1585#if 26<15
1586# if __ARM_ARCH>=7
1587	ldr	r2,[r1],#4			@ prefetch
1588# else
1589	ldrb	r2,[r1,#3]
1590# endif
1591	eor	r12,r10,r11			@ a^b, b^c in next round
1592#else
1593	ldr	r2,[sp,#12*4]		@ from future BODY_16_xx
1594	eor	r12,r10,r11			@ a^b, b^c in next round
1595	ldr	r1,[sp,#9*4]	@ from future BODY_16_xx
1596#endif
1597	eor	r0,r0,r10,ror#20	@ Sigma0(a)
1598	and	r3,r3,r12			@ (b^c)&=(a^b)
1599	add	r5,r5,r9			@ d+=h
1600	eor	r3,r3,r11			@ Maj(a,b,c)
1601	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
1602	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
1603	@ ldr	r2,[sp,#12*4]		@ 27
1604	@ ldr	r1,[sp,#9*4]
1605	mov	r0,r2,ror#7
1606	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
1607	mov	r3,r1,ror#17
1608	eor	r0,r0,r2,ror#18
1609	eor	r3,r3,r1,ror#19
1610	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1611	ldr	r2,[sp,#11*4]
1612	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1613	ldr	r1,[sp,#4*4]
1614
1615	add	r3,r3,r0
1616	eor	r0,r5,r5,ror#5	@ from BODY_00_15
1617	add	r2,r2,r3
1618	eor	r0,r0,r5,ror#19	@ Sigma1(e)
1619	add	r2,r2,r1			@ X[i]
1620	ldr	r3,[r14],#4			@ *K256++
1621	add	r8,r8,r2			@ h+=X[i]
1622	str	r2,[sp,#11*4]
1623	eor	r2,r6,r7
1624	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
1625	and	r2,r2,r5
1626	add	r8,r8,r3			@ h+=K256[i]
1627	eor	r2,r2,r7			@ Ch(e,f,g)
1628	eor	r0,r9,r9,ror#11
1629	add	r8,r8,r2			@ h+=Ch(e,f,g)
1630#if 27==31
1631	and	r3,r3,#0xff
1632	cmp	r3,#0xf2			@ done?
1633#endif
1634#if 27<15
1635# if __ARM_ARCH>=7
1636	ldr	r2,[r1],#4			@ prefetch
1637# else
1638	ldrb	r2,[r1,#3]
1639# endif
1640	eor	r3,r9,r10			@ a^b, b^c in next round
1641#else
1642	ldr	r2,[sp,#13*4]		@ from future BODY_16_xx
1643	eor	r3,r9,r10			@ a^b, b^c in next round
1644	ldr	r1,[sp,#10*4]	@ from future BODY_16_xx
1645#endif
1646	eor	r0,r0,r9,ror#20	@ Sigma0(a)
1647	and	r12,r12,r3			@ (b^c)&=(a^b)
1648	add	r4,r4,r8			@ d+=h
1649	eor	r12,r12,r10			@ Maj(a,b,c)
1650	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
1651	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
1652	@ ldr	r2,[sp,#13*4]		@ 28
1653	@ ldr	r1,[sp,#10*4]
1654	mov	r0,r2,ror#7
1655	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
1656	mov	r12,r1,ror#17
1657	eor	r0,r0,r2,ror#18
1658	eor	r12,r12,r1,ror#19
1659	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1660	ldr	r2,[sp,#12*4]
1661	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1662	ldr	r1,[sp,#5*4]
1663
1664	add	r12,r12,r0
1665	eor	r0,r4,r4,ror#5	@ from BODY_00_15
1666	add	r2,r2,r12
1667	eor	r0,r0,r4,ror#19	@ Sigma1(e)
1668	add	r2,r2,r1			@ X[i]
1669	ldr	r12,[r14],#4			@ *K256++
1670	add	r7,r7,r2			@ h+=X[i]
1671	str	r2,[sp,#12*4]
1672	eor	r2,r5,r6
1673	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
1674	and	r2,r2,r4
1675	add	r7,r7,r12			@ h+=K256[i]
1676	eor	r2,r2,r6			@ Ch(e,f,g)
1677	eor	r0,r8,r8,ror#11
1678	add	r7,r7,r2			@ h+=Ch(e,f,g)
1679#if 28==31
1680	and	r12,r12,#0xff
1681	cmp	r12,#0xf2			@ done?
1682#endif
1683#if 28<15
1684# if __ARM_ARCH>=7
1685	ldr	r2,[r1],#4			@ prefetch
1686# else
1687	ldrb	r2,[r1,#3]
1688# endif
1689	eor	r12,r8,r9			@ a^b, b^c in next round
1690#else
1691	ldr	r2,[sp,#14*4]		@ from future BODY_16_xx
1692	eor	r12,r8,r9			@ a^b, b^c in next round
1693	ldr	r1,[sp,#11*4]	@ from future BODY_16_xx
1694#endif
1695	eor	r0,r0,r8,ror#20	@ Sigma0(a)
1696	and	r3,r3,r12			@ (b^c)&=(a^b)
1697	add	r11,r11,r7			@ d+=h
1698	eor	r3,r3,r9			@ Maj(a,b,c)
1699	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
1700	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
1701	@ ldr	r2,[sp,#14*4]		@ 29
1702	@ ldr	r1,[sp,#11*4]
1703	mov	r0,r2,ror#7
1704	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
1705	mov	r3,r1,ror#17
1706	eor	r0,r0,r2,ror#18
1707	eor	r3,r3,r1,ror#19
1708	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1709	ldr	r2,[sp,#13*4]
1710	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1711	ldr	r1,[sp,#6*4]
1712
1713	add	r3,r3,r0
1714	eor	r0,r11,r11,ror#5	@ from BODY_00_15
1715	add	r2,r2,r3
1716	eor	r0,r0,r11,ror#19	@ Sigma1(e)
1717	add	r2,r2,r1			@ X[i]
1718	ldr	r3,[r14],#4			@ *K256++
1719	add	r6,r6,r2			@ h+=X[i]
1720	str	r2,[sp,#13*4]
1721	eor	r2,r4,r5
1722	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
1723	and	r2,r2,r11
1724	add	r6,r6,r3			@ h+=K256[i]
1725	eor	r2,r2,r5			@ Ch(e,f,g)
1726	eor	r0,r7,r7,ror#11
1727	add	r6,r6,r2			@ h+=Ch(e,f,g)
1728#if 29==31
1729	and	r3,r3,#0xff
1730	cmp	r3,#0xf2			@ done?
1731#endif
1732#if 29<15
1733# if __ARM_ARCH>=7
1734	ldr	r2,[r1],#4			@ prefetch
1735# else
1736	ldrb	r2,[r1,#3]
1737# endif
1738	eor	r3,r7,r8			@ a^b, b^c in next round
1739#else
1740	ldr	r2,[sp,#15*4]		@ from future BODY_16_xx
1741	eor	r3,r7,r8			@ a^b, b^c in next round
1742	ldr	r1,[sp,#12*4]	@ from future BODY_16_xx
1743#endif
1744	eor	r0,r0,r7,ror#20	@ Sigma0(a)
1745	and	r12,r12,r3			@ (b^c)&=(a^b)
1746	add	r10,r10,r6			@ d+=h
1747	eor	r12,r12,r8			@ Maj(a,b,c)
1748	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
1749	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
1750	@ ldr	r2,[sp,#15*4]		@ 30
1751	@ ldr	r1,[sp,#12*4]
1752	mov	r0,r2,ror#7
1753	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
1754	mov	r12,r1,ror#17
1755	eor	r0,r0,r2,ror#18
1756	eor	r12,r12,r1,ror#19
1757	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1758	ldr	r2,[sp,#14*4]
1759	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1760	ldr	r1,[sp,#7*4]
1761
1762	add	r12,r12,r0
1763	eor	r0,r10,r10,ror#5	@ from BODY_00_15
1764	add	r2,r2,r12
1765	eor	r0,r0,r10,ror#19	@ Sigma1(e)
1766	add	r2,r2,r1			@ X[i]
1767	ldr	r12,[r14],#4			@ *K256++
1768	add	r5,r5,r2			@ h+=X[i]
1769	str	r2,[sp,#14*4]
1770	eor	r2,r11,r4
1771	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
1772	and	r2,r2,r10
1773	add	r5,r5,r12			@ h+=K256[i]
1774	eor	r2,r2,r4			@ Ch(e,f,g)
1775	eor	r0,r6,r6,ror#11
1776	add	r5,r5,r2			@ h+=Ch(e,f,g)
1777#if 30==31
1778	and	r12,r12,#0xff
1779	cmp	r12,#0xf2			@ done?
1780#endif
1781#if 30<15
1782# if __ARM_ARCH>=7
1783	ldr	r2,[r1],#4			@ prefetch
1784# else
1785	ldrb	r2,[r1,#3]
1786# endif
1787	eor	r12,r6,r7			@ a^b, b^c in next round
1788#else
1789	ldr	r2,[sp,#0*4]		@ from future BODY_16_xx
1790	eor	r12,r6,r7			@ a^b, b^c in next round
1791	ldr	r1,[sp,#13*4]	@ from future BODY_16_xx
1792#endif
1793	eor	r0,r0,r6,ror#20	@ Sigma0(a)
1794	and	r3,r3,r12			@ (b^c)&=(a^b)
1795	add	r9,r9,r5			@ d+=h
1796	eor	r3,r3,r7			@ Maj(a,b,c)
1797	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
1798	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
1799	@ ldr	r2,[sp,#0*4]		@ 31
1800	@ ldr	r1,[sp,#13*4]
1801	mov	r0,r2,ror#7
1802	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
1803	mov	r3,r1,ror#17
1804	eor	r0,r0,r2,ror#18
1805	eor	r3,r3,r1,ror#19
1806	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1807	ldr	r2,[sp,#15*4]
1808	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1809	ldr	r1,[sp,#8*4]
1810
1811	add	r3,r3,r0
1812	eor	r0,r9,r9,ror#5	@ from BODY_00_15
1813	add	r2,r2,r3
1814	eor	r0,r0,r9,ror#19	@ Sigma1(e)
1815	add	r2,r2,r1			@ X[i]
1816	ldr	r3,[r14],#4			@ *K256++
1817	add	r4,r4,r2			@ h+=X[i]
1818	str	r2,[sp,#15*4]
1819	eor	r2,r10,r11
1820	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
1821	and	r2,r2,r9
1822	add	r4,r4,r3			@ h+=K256[i]
1823	eor	r2,r2,r11			@ Ch(e,f,g)
1824	eor	r0,r5,r5,ror#11
1825	add	r4,r4,r2			@ h+=Ch(e,f,g)
1826#if 31==31
1827	and	r3,r3,#0xff
1828	cmp	r3,#0xf2			@ done?
1829#endif
1830#if 31<15
1831# if __ARM_ARCH>=7
1832	ldr	r2,[r1],#4			@ prefetch
1833# else
1834	ldrb	r2,[r1,#3]
1835# endif
1836	eor	r3,r5,r6			@ a^b, b^c in next round
1837#else
1838	ldr	r2,[sp,#1*4]		@ from future BODY_16_xx
1839	eor	r3,r5,r6			@ a^b, b^c in next round
1840	ldr	r1,[sp,#14*4]	@ from future BODY_16_xx
1841#endif
1842	eor	r0,r0,r5,ror#20	@ Sigma0(a)
1843	and	r12,r12,r3			@ (b^c)&=(a^b)
1844	add	r8,r8,r4			@ d+=h
1845	eor	r12,r12,r6			@ Maj(a,b,c)
1846	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
1847	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
1848#if __ARM_ARCH>=7
1849	ite	eq			@ Thumb2 thing, sanity check in ARM
1850#endif
1851	ldreq	r3,[sp,#16*4]		@ pull ctx
1852	bne	Lrounds_16_xx
1853
1854	add	r4,r4,r12		@ h+=Maj(a,b,c) from the past
1855	ldr	r0,[r3,#0]
1856	ldr	r2,[r3,#4]
1857	ldr	r12,[r3,#8]
1858	add	r4,r4,r0
1859	ldr	r0,[r3,#12]
1860	add	r5,r5,r2
1861	ldr	r2,[r3,#16]
1862	add	r6,r6,r12
1863	ldr	r12,[r3,#20]
1864	add	r7,r7,r0
1865	ldr	r0,[r3,#24]
1866	add	r8,r8,r2
1867	ldr	r2,[r3,#28]
1868	add	r9,r9,r12
1869	ldr	r1,[sp,#17*4]		@ pull inp
1870	ldr	r12,[sp,#18*4]		@ pull inp+len
1871	add	r10,r10,r0
1872	add	r11,r11,r2
1873	stmia	r3,{r4,r5,r6,r7,r8,r9,r10,r11}
1874	cmp	r1,r12
1875	sub	r14,r14,#256	@ rewind Ktbl
1876	bne	Loop
1877
1878	add	sp,sp,#19*4	@ destroy frame
1879#if __ARM_ARCH>=5
1880	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,pc}
1881#else
1882	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,lr}
1883	tst	lr,#1
1884	moveq	pc,lr			@ be binary compatible with V4, yet
1885.word	0xe12fff1e			@ interoperable with Thumb ISA:-)
1886#endif
1887
1888#if __ARM_MAX_ARCH__>=7
1889
1890
1891
1892.globl	_sha256_block_data_order_neon
1893.private_extern	_sha256_block_data_order_neon
1894#ifdef __thumb2__
1895.thumb_func	_sha256_block_data_order_neon
1896#endif
1897.align	5
1898.skip	16
1899_sha256_block_data_order_neon:
1900LNEON:
1901	stmdb	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr}
1902
1903	sub	r11,sp,#16*4+16
1904	adr	r14,K256
1905	bic	r11,r11,#15		@ align for 128-bit stores
1906	mov	r12,sp
1907	mov	sp,r11			@ alloca
1908	add	r2,r1,r2,lsl#6	@ len to point at the end of inp
1909
1910	vld1.8	{q0},[r1]!
1911	vld1.8	{q1},[r1]!
1912	vld1.8	{q2},[r1]!
1913	vld1.8	{q3},[r1]!
1914	vld1.32	{q8},[r14,:128]!
1915	vld1.32	{q9},[r14,:128]!
1916	vld1.32	{q10},[r14,:128]!
1917	vld1.32	{q11},[r14,:128]!
1918	vrev32.8	q0,q0		@ yes, even on
1919	str	r0,[sp,#64]
1920	vrev32.8	q1,q1		@ big-endian
1921	str	r1,[sp,#68]
1922	mov	r1,sp
1923	vrev32.8	q2,q2
1924	str	r2,[sp,#72]
1925	vrev32.8	q3,q3
1926	str	r12,[sp,#76]		@ save original sp
1927	vadd.i32	q8,q8,q0
1928	vadd.i32	q9,q9,q1
1929	vst1.32	{q8},[r1,:128]!
1930	vadd.i32	q10,q10,q2
1931	vst1.32	{q9},[r1,:128]!
1932	vadd.i32	q11,q11,q3
1933	vst1.32	{q10},[r1,:128]!
1934	vst1.32	{q11},[r1,:128]!
1935
1936	ldmia	r0,{r4,r5,r6,r7,r8,r9,r10,r11}
1937	sub	r1,r1,#64
1938	ldr	r2,[sp,#0]
1939	eor	r12,r12,r12
1940	eor	r3,r5,r6
1941	b	L_00_48
1942
1943.align	4
1944L_00_48:
1945	vext.8	q8,q0,q1,#4
1946	add	r11,r11,r2
1947	eor	r2,r9,r10
1948	eor	r0,r8,r8,ror#5
1949	vext.8	q9,q2,q3,#4
1950	add	r4,r4,r12
1951	and	r2,r2,r8
1952	eor	r12,r0,r8,ror#19
1953	vshr.u32	q10,q8,#7
1954	eor	r0,r4,r4,ror#11
1955	eor	r2,r2,r10
1956	vadd.i32	q0,q0,q9
1957	add	r11,r11,r12,ror#6
1958	eor	r12,r4,r5
1959	vshr.u32	q9,q8,#3
1960	eor	r0,r0,r4,ror#20
1961	add	r11,r11,r2
1962	vsli.32	q10,q8,#25
1963	ldr	r2,[sp,#4]
1964	and	r3,r3,r12
1965	vshr.u32	q11,q8,#18
1966	add	r7,r7,r11
1967	add	r11,r11,r0,ror#2
1968	eor	r3,r3,r5
1969	veor	q9,q9,q10
1970	add	r10,r10,r2
1971	vsli.32	q11,q8,#14
1972	eor	r2,r8,r9
1973	eor	r0,r7,r7,ror#5
1974	vshr.u32	d24,d7,#17
1975	add	r11,r11,r3
1976	and	r2,r2,r7
1977	veor	q9,q9,q11
1978	eor	r3,r0,r7,ror#19
1979	eor	r0,r11,r11,ror#11
1980	vsli.32	d24,d7,#15
1981	eor	r2,r2,r9
1982	add	r10,r10,r3,ror#6
1983	vshr.u32	d25,d7,#10
1984	eor	r3,r11,r4
1985	eor	r0,r0,r11,ror#20
1986	vadd.i32	q0,q0,q9
1987	add	r10,r10,r2
1988	ldr	r2,[sp,#8]
1989	veor	d25,d25,d24
1990	and	r12,r12,r3
1991	add	r6,r6,r10
1992	vshr.u32	d24,d7,#19
1993	add	r10,r10,r0,ror#2
1994	eor	r12,r12,r4
1995	vsli.32	d24,d7,#13
1996	add	r9,r9,r2
1997	eor	r2,r7,r8
1998	veor	d25,d25,d24
1999	eor	r0,r6,r6,ror#5
2000	add	r10,r10,r12
2001	vadd.i32	d0,d0,d25
2002	and	r2,r2,r6
2003	eor	r12,r0,r6,ror#19
2004	vshr.u32	d24,d0,#17
2005	eor	r0,r10,r10,ror#11
2006	eor	r2,r2,r8
2007	vsli.32	d24,d0,#15
2008	add	r9,r9,r12,ror#6
2009	eor	r12,r10,r11
2010	vshr.u32	d25,d0,#10
2011	eor	r0,r0,r10,ror#20
2012	add	r9,r9,r2
2013	veor	d25,d25,d24
2014	ldr	r2,[sp,#12]
2015	and	r3,r3,r12
2016	vshr.u32	d24,d0,#19
2017	add	r5,r5,r9
2018	add	r9,r9,r0,ror#2
2019	eor	r3,r3,r11
2020	vld1.32	{q8},[r14,:128]!
2021	add	r8,r8,r2
2022	vsli.32	d24,d0,#13
2023	eor	r2,r6,r7
2024	eor	r0,r5,r5,ror#5
2025	veor	d25,d25,d24
2026	add	r9,r9,r3
2027	and	r2,r2,r5
2028	vadd.i32	d1,d1,d25
2029	eor	r3,r0,r5,ror#19
2030	eor	r0,r9,r9,ror#11
2031	vadd.i32	q8,q8,q0
2032	eor	r2,r2,r7
2033	add	r8,r8,r3,ror#6
2034	eor	r3,r9,r10
2035	eor	r0,r0,r9,ror#20
2036	add	r8,r8,r2
2037	ldr	r2,[sp,#16]
2038	and	r12,r12,r3
2039	add	r4,r4,r8
2040	vst1.32	{q8},[r1,:128]!
2041	add	r8,r8,r0,ror#2
2042	eor	r12,r12,r10
2043	vext.8	q8,q1,q2,#4
2044	add	r7,r7,r2
2045	eor	r2,r5,r6
2046	eor	r0,r4,r4,ror#5
2047	vext.8	q9,q3,q0,#4
2048	add	r8,r8,r12
2049	and	r2,r2,r4
2050	eor	r12,r0,r4,ror#19
2051	vshr.u32	q10,q8,#7
2052	eor	r0,r8,r8,ror#11
2053	eor	r2,r2,r6
2054	vadd.i32	q1,q1,q9
2055	add	r7,r7,r12,ror#6
2056	eor	r12,r8,r9
2057	vshr.u32	q9,q8,#3
2058	eor	r0,r0,r8,ror#20
2059	add	r7,r7,r2
2060	vsli.32	q10,q8,#25
2061	ldr	r2,[sp,#20]
2062	and	r3,r3,r12
2063	vshr.u32	q11,q8,#18
2064	add	r11,r11,r7
2065	add	r7,r7,r0,ror#2
2066	eor	r3,r3,r9
2067	veor	q9,q9,q10
2068	add	r6,r6,r2
2069	vsli.32	q11,q8,#14
2070	eor	r2,r4,r5
2071	eor	r0,r11,r11,ror#5
2072	vshr.u32	d24,d1,#17
2073	add	r7,r7,r3
2074	and	r2,r2,r11
2075	veor	q9,q9,q11
2076	eor	r3,r0,r11,ror#19
2077	eor	r0,r7,r7,ror#11
2078	vsli.32	d24,d1,#15
2079	eor	r2,r2,r5
2080	add	r6,r6,r3,ror#6
2081	vshr.u32	d25,d1,#10
2082	eor	r3,r7,r8
2083	eor	r0,r0,r7,ror#20
2084	vadd.i32	q1,q1,q9
2085	add	r6,r6,r2
2086	ldr	r2,[sp,#24]
2087	veor	d25,d25,d24
2088	and	r12,r12,r3
2089	add	r10,r10,r6
2090	vshr.u32	d24,d1,#19
2091	add	r6,r6,r0,ror#2
2092	eor	r12,r12,r8
2093	vsli.32	d24,d1,#13
2094	add	r5,r5,r2
2095	eor	r2,r11,r4
2096	veor	d25,d25,d24
2097	eor	r0,r10,r10,ror#5
2098	add	r6,r6,r12
2099	vadd.i32	d2,d2,d25
2100	and	r2,r2,r10
2101	eor	r12,r0,r10,ror#19
2102	vshr.u32	d24,d2,#17
2103	eor	r0,r6,r6,ror#11
2104	eor	r2,r2,r4
2105	vsli.32	d24,d2,#15
2106	add	r5,r5,r12,ror#6
2107	eor	r12,r6,r7
2108	vshr.u32	d25,d2,#10
2109	eor	r0,r0,r6,ror#20
2110	add	r5,r5,r2
2111	veor	d25,d25,d24
2112	ldr	r2,[sp,#28]
2113	and	r3,r3,r12
2114	vshr.u32	d24,d2,#19
2115	add	r9,r9,r5
2116	add	r5,r5,r0,ror#2
2117	eor	r3,r3,r7
2118	vld1.32	{q8},[r14,:128]!
2119	add	r4,r4,r2
2120	vsli.32	d24,d2,#13
2121	eor	r2,r10,r11
2122	eor	r0,r9,r9,ror#5
2123	veor	d25,d25,d24
2124	add	r5,r5,r3
2125	and	r2,r2,r9
2126	vadd.i32	d3,d3,d25
2127	eor	r3,r0,r9,ror#19
2128	eor	r0,r5,r5,ror#11
2129	vadd.i32	q8,q8,q1
2130	eor	r2,r2,r11
2131	add	r4,r4,r3,ror#6
2132	eor	r3,r5,r6
2133	eor	r0,r0,r5,ror#20
2134	add	r4,r4,r2
2135	ldr	r2,[sp,#32]
2136	and	r12,r12,r3
2137	add	r8,r8,r4
2138	vst1.32	{q8},[r1,:128]!
2139	add	r4,r4,r0,ror#2
2140	eor	r12,r12,r6
2141	vext.8	q8,q2,q3,#4
2142	add	r11,r11,r2
2143	eor	r2,r9,r10
2144	eor	r0,r8,r8,ror#5
2145	vext.8	q9,q0,q1,#4
2146	add	r4,r4,r12
2147	and	r2,r2,r8
2148	eor	r12,r0,r8,ror#19
2149	vshr.u32	q10,q8,#7
2150	eor	r0,r4,r4,ror#11
2151	eor	r2,r2,r10
2152	vadd.i32	q2,q2,q9
2153	add	r11,r11,r12,ror#6
2154	eor	r12,r4,r5
2155	vshr.u32	q9,q8,#3
2156	eor	r0,r0,r4,ror#20
2157	add	r11,r11,r2
2158	vsli.32	q10,q8,#25
2159	ldr	r2,[sp,#36]
2160	and	r3,r3,r12
2161	vshr.u32	q11,q8,#18
2162	add	r7,r7,r11
2163	add	r11,r11,r0,ror#2
2164	eor	r3,r3,r5
2165	veor	q9,q9,q10
2166	add	r10,r10,r2
2167	vsli.32	q11,q8,#14
2168	eor	r2,r8,r9
2169	eor	r0,r7,r7,ror#5
2170	vshr.u32	d24,d3,#17
2171	add	r11,r11,r3
2172	and	r2,r2,r7
2173	veor	q9,q9,q11
2174	eor	r3,r0,r7,ror#19
2175	eor	r0,r11,r11,ror#11
2176	vsli.32	d24,d3,#15
2177	eor	r2,r2,r9
2178	add	r10,r10,r3,ror#6
2179	vshr.u32	d25,d3,#10
2180	eor	r3,r11,r4
2181	eor	r0,r0,r11,ror#20
2182	vadd.i32	q2,q2,q9
2183	add	r10,r10,r2
2184	ldr	r2,[sp,#40]
2185	veor	d25,d25,d24
2186	and	r12,r12,r3
2187	add	r6,r6,r10
2188	vshr.u32	d24,d3,#19
2189	add	r10,r10,r0,ror#2
2190	eor	r12,r12,r4
2191	vsli.32	d24,d3,#13
2192	add	r9,r9,r2
2193	eor	r2,r7,r8
2194	veor	d25,d25,d24
2195	eor	r0,r6,r6,ror#5
2196	add	r10,r10,r12
2197	vadd.i32	d4,d4,d25
2198	and	r2,r2,r6
2199	eor	r12,r0,r6,ror#19
2200	vshr.u32	d24,d4,#17
2201	eor	r0,r10,r10,ror#11
2202	eor	r2,r2,r8
2203	vsli.32	d24,d4,#15
2204	add	r9,r9,r12,ror#6
2205	eor	r12,r10,r11
2206	vshr.u32	d25,d4,#10
2207	eor	r0,r0,r10,ror#20
2208	add	r9,r9,r2
2209	veor	d25,d25,d24
2210	ldr	r2,[sp,#44]
2211	and	r3,r3,r12
2212	vshr.u32	d24,d4,#19
2213	add	r5,r5,r9
2214	add	r9,r9,r0,ror#2
2215	eor	r3,r3,r11
2216	vld1.32	{q8},[r14,:128]!
2217	add	r8,r8,r2
2218	vsli.32	d24,d4,#13
2219	eor	r2,r6,r7
2220	eor	r0,r5,r5,ror#5
2221	veor	d25,d25,d24
2222	add	r9,r9,r3
2223	and	r2,r2,r5
2224	vadd.i32	d5,d5,d25
2225	eor	r3,r0,r5,ror#19
2226	eor	r0,r9,r9,ror#11
2227	vadd.i32	q8,q8,q2
2228	eor	r2,r2,r7
2229	add	r8,r8,r3,ror#6
2230	eor	r3,r9,r10
2231	eor	r0,r0,r9,ror#20
2232	add	r8,r8,r2
2233	ldr	r2,[sp,#48]
2234	and	r12,r12,r3
2235	add	r4,r4,r8
2236	vst1.32	{q8},[r1,:128]!
2237	add	r8,r8,r0,ror#2
2238	eor	r12,r12,r10
2239	vext.8	q8,q3,q0,#4
2240	add	r7,r7,r2
2241	eor	r2,r5,r6
2242	eor	r0,r4,r4,ror#5
2243	vext.8	q9,q1,q2,#4
2244	add	r8,r8,r12
2245	and	r2,r2,r4
2246	eor	r12,r0,r4,ror#19
2247	vshr.u32	q10,q8,#7
2248	eor	r0,r8,r8,ror#11
2249	eor	r2,r2,r6
2250	vadd.i32	q3,q3,q9
2251	add	r7,r7,r12,ror#6
2252	eor	r12,r8,r9
2253	vshr.u32	q9,q8,#3
2254	eor	r0,r0,r8,ror#20
2255	add	r7,r7,r2
2256	vsli.32	q10,q8,#25
2257	ldr	r2,[sp,#52]
2258	and	r3,r3,r12
2259	vshr.u32	q11,q8,#18
2260	add	r11,r11,r7
2261	add	r7,r7,r0,ror#2
2262	eor	r3,r3,r9
2263	veor	q9,q9,q10
2264	add	r6,r6,r2
2265	vsli.32	q11,q8,#14
2266	eor	r2,r4,r5
2267	eor	r0,r11,r11,ror#5
2268	vshr.u32	d24,d5,#17
2269	add	r7,r7,r3
2270	and	r2,r2,r11
2271	veor	q9,q9,q11
2272	eor	r3,r0,r11,ror#19
2273	eor	r0,r7,r7,ror#11
2274	vsli.32	d24,d5,#15
2275	eor	r2,r2,r5
2276	add	r6,r6,r3,ror#6
2277	vshr.u32	d25,d5,#10
2278	eor	r3,r7,r8
2279	eor	r0,r0,r7,ror#20
2280	vadd.i32	q3,q3,q9
2281	add	r6,r6,r2
2282	ldr	r2,[sp,#56]
2283	veor	d25,d25,d24
2284	and	r12,r12,r3
2285	add	r10,r10,r6
2286	vshr.u32	d24,d5,#19
2287	add	r6,r6,r0,ror#2
2288	eor	r12,r12,r8
2289	vsli.32	d24,d5,#13
2290	add	r5,r5,r2
2291	eor	r2,r11,r4
2292	veor	d25,d25,d24
2293	eor	r0,r10,r10,ror#5
2294	add	r6,r6,r12
2295	vadd.i32	d6,d6,d25
2296	and	r2,r2,r10
2297	eor	r12,r0,r10,ror#19
2298	vshr.u32	d24,d6,#17
2299	eor	r0,r6,r6,ror#11
2300	eor	r2,r2,r4
2301	vsli.32	d24,d6,#15
2302	add	r5,r5,r12,ror#6
2303	eor	r12,r6,r7
2304	vshr.u32	d25,d6,#10
2305	eor	r0,r0,r6,ror#20
2306	add	r5,r5,r2
2307	veor	d25,d25,d24
2308	ldr	r2,[sp,#60]
2309	and	r3,r3,r12
2310	vshr.u32	d24,d6,#19
2311	add	r9,r9,r5
2312	add	r5,r5,r0,ror#2
2313	eor	r3,r3,r7
2314	vld1.32	{q8},[r14,:128]!
2315	add	r4,r4,r2
2316	vsli.32	d24,d6,#13
2317	eor	r2,r10,r11
2318	eor	r0,r9,r9,ror#5
2319	veor	d25,d25,d24
2320	add	r5,r5,r3
2321	and	r2,r2,r9
2322	vadd.i32	d7,d7,d25
2323	eor	r3,r0,r9,ror#19
2324	eor	r0,r5,r5,ror#11
2325	vadd.i32	q8,q8,q3
2326	eor	r2,r2,r11
2327	add	r4,r4,r3,ror#6
2328	eor	r3,r5,r6
2329	eor	r0,r0,r5,ror#20
2330	add	r4,r4,r2
2331	ldr	r2,[r14]
2332	and	r12,r12,r3
2333	add	r8,r8,r4
2334	vst1.32	{q8},[r1,:128]!
2335	add	r4,r4,r0,ror#2
2336	eor	r12,r12,r6
2337	teq	r2,#0				@ check for K256 terminator
2338	ldr	r2,[sp,#0]
2339	sub	r1,r1,#64
2340	bne	L_00_48
2341
2342	ldr	r1,[sp,#68]
2343	ldr	r0,[sp,#72]
2344	sub	r14,r14,#256	@ rewind r14
2345	teq	r1,r0
2346	it	eq
2347	subeq	r1,r1,#64		@ avoid SEGV
2348	vld1.8	{q0},[r1]!		@ load next input block
2349	vld1.8	{q1},[r1]!
2350	vld1.8	{q2},[r1]!
2351	vld1.8	{q3},[r1]!
2352	it	ne
2353	strne	r1,[sp,#68]
2354	mov	r1,sp
2355	add	r11,r11,r2
2356	eor	r2,r9,r10
2357	eor	r0,r8,r8,ror#5
2358	add	r4,r4,r12
2359	vld1.32	{q8},[r14,:128]!
2360	and	r2,r2,r8
2361	eor	r12,r0,r8,ror#19
2362	eor	r0,r4,r4,ror#11
2363	eor	r2,r2,r10
2364	vrev32.8	q0,q0
2365	add	r11,r11,r12,ror#6
2366	eor	r12,r4,r5
2367	eor	r0,r0,r4,ror#20
2368	add	r11,r11,r2
2369	vadd.i32	q8,q8,q0
2370	ldr	r2,[sp,#4]
2371	and	r3,r3,r12
2372	add	r7,r7,r11
2373	add	r11,r11,r0,ror#2
2374	eor	r3,r3,r5
2375	add	r10,r10,r2
2376	eor	r2,r8,r9
2377	eor	r0,r7,r7,ror#5
2378	add	r11,r11,r3
2379	and	r2,r2,r7
2380	eor	r3,r0,r7,ror#19
2381	eor	r0,r11,r11,ror#11
2382	eor	r2,r2,r9
2383	add	r10,r10,r3,ror#6
2384	eor	r3,r11,r4
2385	eor	r0,r0,r11,ror#20
2386	add	r10,r10,r2
2387	ldr	r2,[sp,#8]
2388	and	r12,r12,r3
2389	add	r6,r6,r10
2390	add	r10,r10,r0,ror#2
2391	eor	r12,r12,r4
2392	add	r9,r9,r2
2393	eor	r2,r7,r8
2394	eor	r0,r6,r6,ror#5
2395	add	r10,r10,r12
2396	and	r2,r2,r6
2397	eor	r12,r0,r6,ror#19
2398	eor	r0,r10,r10,ror#11
2399	eor	r2,r2,r8
2400	add	r9,r9,r12,ror#6
2401	eor	r12,r10,r11
2402	eor	r0,r0,r10,ror#20
2403	add	r9,r9,r2
2404	ldr	r2,[sp,#12]
2405	and	r3,r3,r12
2406	add	r5,r5,r9
2407	add	r9,r9,r0,ror#2
2408	eor	r3,r3,r11
2409	add	r8,r8,r2
2410	eor	r2,r6,r7
2411	eor	r0,r5,r5,ror#5
2412	add	r9,r9,r3
2413	and	r2,r2,r5
2414	eor	r3,r0,r5,ror#19
2415	eor	r0,r9,r9,ror#11
2416	eor	r2,r2,r7
2417	add	r8,r8,r3,ror#6
2418	eor	r3,r9,r10
2419	eor	r0,r0,r9,ror#20
2420	add	r8,r8,r2
2421	ldr	r2,[sp,#16]
2422	and	r12,r12,r3
2423	add	r4,r4,r8
2424	add	r8,r8,r0,ror#2
2425	eor	r12,r12,r10
2426	vst1.32	{q8},[r1,:128]!
2427	add	r7,r7,r2
2428	eor	r2,r5,r6
2429	eor	r0,r4,r4,ror#5
2430	add	r8,r8,r12
2431	vld1.32	{q8},[r14,:128]!
2432	and	r2,r2,r4
2433	eor	r12,r0,r4,ror#19
2434	eor	r0,r8,r8,ror#11
2435	eor	r2,r2,r6
2436	vrev32.8	q1,q1
2437	add	r7,r7,r12,ror#6
2438	eor	r12,r8,r9
2439	eor	r0,r0,r8,ror#20
2440	add	r7,r7,r2
2441	vadd.i32	q8,q8,q1
2442	ldr	r2,[sp,#20]
2443	and	r3,r3,r12
2444	add	r11,r11,r7
2445	add	r7,r7,r0,ror#2
2446	eor	r3,r3,r9
2447	add	r6,r6,r2
2448	eor	r2,r4,r5
2449	eor	r0,r11,r11,ror#5
2450	add	r7,r7,r3
2451	and	r2,r2,r11
2452	eor	r3,r0,r11,ror#19
2453	eor	r0,r7,r7,ror#11
2454	eor	r2,r2,r5
2455	add	r6,r6,r3,ror#6
2456	eor	r3,r7,r8
2457	eor	r0,r0,r7,ror#20
2458	add	r6,r6,r2
2459	ldr	r2,[sp,#24]
2460	and	r12,r12,r3
2461	add	r10,r10,r6
2462	add	r6,r6,r0,ror#2
2463	eor	r12,r12,r8
2464	add	r5,r5,r2
2465	eor	r2,r11,r4
2466	eor	r0,r10,r10,ror#5
2467	add	r6,r6,r12
2468	and	r2,r2,r10
2469	eor	r12,r0,r10,ror#19
2470	eor	r0,r6,r6,ror#11
2471	eor	r2,r2,r4
2472	add	r5,r5,r12,ror#6
2473	eor	r12,r6,r7
2474	eor	r0,r0,r6,ror#20
2475	add	r5,r5,r2
2476	ldr	r2,[sp,#28]
2477	and	r3,r3,r12
2478	add	r9,r9,r5
2479	add	r5,r5,r0,ror#2
2480	eor	r3,r3,r7
2481	add	r4,r4,r2
2482	eor	r2,r10,r11
2483	eor	r0,r9,r9,ror#5
2484	add	r5,r5,r3
2485	and	r2,r2,r9
2486	eor	r3,r0,r9,ror#19
2487	eor	r0,r5,r5,ror#11
2488	eor	r2,r2,r11
2489	add	r4,r4,r3,ror#6
2490	eor	r3,r5,r6
2491	eor	r0,r0,r5,ror#20
2492	add	r4,r4,r2
2493	ldr	r2,[sp,#32]
2494	and	r12,r12,r3
2495	add	r8,r8,r4
2496	add	r4,r4,r0,ror#2
2497	eor	r12,r12,r6
2498	vst1.32	{q8},[r1,:128]!
2499	add	r11,r11,r2
2500	eor	r2,r9,r10
2501	eor	r0,r8,r8,ror#5
2502	add	r4,r4,r12
2503	vld1.32	{q8},[r14,:128]!
2504	and	r2,r2,r8
2505	eor	r12,r0,r8,ror#19
2506	eor	r0,r4,r4,ror#11
2507	eor	r2,r2,r10
2508	vrev32.8	q2,q2
2509	add	r11,r11,r12,ror#6
2510	eor	r12,r4,r5
2511	eor	r0,r0,r4,ror#20
2512	add	r11,r11,r2
2513	vadd.i32	q8,q8,q2
2514	ldr	r2,[sp,#36]
2515	and	r3,r3,r12
2516	add	r7,r7,r11
2517	add	r11,r11,r0,ror#2
2518	eor	r3,r3,r5
2519	add	r10,r10,r2
2520	eor	r2,r8,r9
2521	eor	r0,r7,r7,ror#5
2522	add	r11,r11,r3
2523	and	r2,r2,r7
2524	eor	r3,r0,r7,ror#19
2525	eor	r0,r11,r11,ror#11
2526	eor	r2,r2,r9
2527	add	r10,r10,r3,ror#6
2528	eor	r3,r11,r4
2529	eor	r0,r0,r11,ror#20
2530	add	r10,r10,r2
2531	ldr	r2,[sp,#40]
2532	and	r12,r12,r3
2533	add	r6,r6,r10
2534	add	r10,r10,r0,ror#2
2535	eor	r12,r12,r4
2536	add	r9,r9,r2
2537	eor	r2,r7,r8
2538	eor	r0,r6,r6,ror#5
2539	add	r10,r10,r12
2540	and	r2,r2,r6
2541	eor	r12,r0,r6,ror#19
2542	eor	r0,r10,r10,ror#11
2543	eor	r2,r2,r8
2544	add	r9,r9,r12,ror#6
2545	eor	r12,r10,r11
2546	eor	r0,r0,r10,ror#20
2547	add	r9,r9,r2
2548	ldr	r2,[sp,#44]
2549	and	r3,r3,r12
2550	add	r5,r5,r9
2551	add	r9,r9,r0,ror#2
2552	eor	r3,r3,r11
2553	add	r8,r8,r2
2554	eor	r2,r6,r7
2555	eor	r0,r5,r5,ror#5
2556	add	r9,r9,r3
2557	and	r2,r2,r5
2558	eor	r3,r0,r5,ror#19
2559	eor	r0,r9,r9,ror#11
2560	eor	r2,r2,r7
2561	add	r8,r8,r3,ror#6
2562	eor	r3,r9,r10
2563	eor	r0,r0,r9,ror#20
2564	add	r8,r8,r2
2565	ldr	r2,[sp,#48]
2566	and	r12,r12,r3
2567	add	r4,r4,r8
2568	add	r8,r8,r0,ror#2
2569	eor	r12,r12,r10
2570	vst1.32	{q8},[r1,:128]!
2571	add	r7,r7,r2
2572	eor	r2,r5,r6
2573	eor	r0,r4,r4,ror#5
2574	add	r8,r8,r12
2575	vld1.32	{q8},[r14,:128]!
2576	and	r2,r2,r4
2577	eor	r12,r0,r4,ror#19
2578	eor	r0,r8,r8,ror#11
2579	eor	r2,r2,r6
2580	vrev32.8	q3,q3
2581	add	r7,r7,r12,ror#6
2582	eor	r12,r8,r9
2583	eor	r0,r0,r8,ror#20
2584	add	r7,r7,r2
2585	vadd.i32	q8,q8,q3
2586	ldr	r2,[sp,#52]
2587	and	r3,r3,r12
2588	add	r11,r11,r7
2589	add	r7,r7,r0,ror#2
2590	eor	r3,r3,r9
2591	add	r6,r6,r2
2592	eor	r2,r4,r5
2593	eor	r0,r11,r11,ror#5
2594	add	r7,r7,r3
2595	and	r2,r2,r11
2596	eor	r3,r0,r11,ror#19
2597	eor	r0,r7,r7,ror#11
2598	eor	r2,r2,r5
2599	add	r6,r6,r3,ror#6
2600	eor	r3,r7,r8
2601	eor	r0,r0,r7,ror#20
2602	add	r6,r6,r2
2603	ldr	r2,[sp,#56]
2604	and	r12,r12,r3
2605	add	r10,r10,r6
2606	add	r6,r6,r0,ror#2
2607	eor	r12,r12,r8
2608	add	r5,r5,r2
2609	eor	r2,r11,r4
2610	eor	r0,r10,r10,ror#5
2611	add	r6,r6,r12
2612	and	r2,r2,r10
2613	eor	r12,r0,r10,ror#19
2614	eor	r0,r6,r6,ror#11
2615	eor	r2,r2,r4
2616	add	r5,r5,r12,ror#6
2617	eor	r12,r6,r7
2618	eor	r0,r0,r6,ror#20
2619	add	r5,r5,r2
2620	ldr	r2,[sp,#60]
2621	and	r3,r3,r12
2622	add	r9,r9,r5
2623	add	r5,r5,r0,ror#2
2624	eor	r3,r3,r7
2625	add	r4,r4,r2
2626	eor	r2,r10,r11
2627	eor	r0,r9,r9,ror#5
2628	add	r5,r5,r3
2629	and	r2,r2,r9
2630	eor	r3,r0,r9,ror#19
2631	eor	r0,r5,r5,ror#11
2632	eor	r2,r2,r11
2633	add	r4,r4,r3,ror#6
2634	eor	r3,r5,r6
2635	eor	r0,r0,r5,ror#20
2636	add	r4,r4,r2
2637	ldr	r2,[sp,#64]
2638	and	r12,r12,r3
2639	add	r8,r8,r4
2640	add	r4,r4,r0,ror#2
2641	eor	r12,r12,r6
2642	vst1.32	{q8},[r1,:128]!
2643	ldr	r0,[r2,#0]
2644	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
2645	ldr	r12,[r2,#4]
2646	ldr	r3,[r2,#8]
2647	ldr	r1,[r2,#12]
2648	add	r4,r4,r0			@ accumulate
2649	ldr	r0,[r2,#16]
2650	add	r5,r5,r12
2651	ldr	r12,[r2,#20]
2652	add	r6,r6,r3
2653	ldr	r3,[r2,#24]
2654	add	r7,r7,r1
2655	ldr	r1,[r2,#28]
2656	add	r8,r8,r0
2657	str	r4,[r2],#4
2658	add	r9,r9,r12
2659	str	r5,[r2],#4
2660	add	r10,r10,r3
2661	str	r6,[r2],#4
2662	add	r11,r11,r1
2663	str	r7,[r2],#4
2664	stmia	r2,{r8,r9,r10,r11}
2665
2666	ittte	ne
2667	movne	r1,sp
2668	ldrne	r2,[sp,#0]
2669	eorne	r12,r12,r12
2670	ldreq	sp,[sp,#76]			@ restore original sp
2671	itt	ne
2672	eorne	r3,r5,r6
2673	bne	L_00_48
2674
2675	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,pc}
2676
2677#endif
2678#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
2679
2680# if defined(__thumb2__)
2681#  define INST(a,b,c,d)	.byte	c,d|0xc,a,b
2682# else
2683#  define INST(a,b,c,d)	.byte	a,b,c,d
2684# endif
2685
2686#ifdef __thumb2__
2687.thumb_func	sha256_block_data_order_armv8
2688#endif
2689.align	5
2690sha256_block_data_order_armv8:
2691LARMv8:
2692	vld1.32	{q0,q1},[r0]
2693	sub	r3,r3,#256+32
2694	add	r2,r1,r2,lsl#6	@ len to point at the end of inp
2695	b	Loop_v8
2696
2697.align	4
2698Loop_v8:
2699	vld1.8	{q8,q9},[r1]!
2700	vld1.8	{q10,q11},[r1]!
2701	vld1.32	{q12},[r3]!
2702	vrev32.8	q8,q8
2703	vrev32.8	q9,q9
2704	vrev32.8	q10,q10
2705	vrev32.8	q11,q11
2706	vmov	q14,q0	@ offload
2707	vmov	q15,q1
2708	teq	r1,r2
2709	vld1.32	{q13},[r3]!
2710	vadd.i32	q12,q12,q8
2711	INST(0xe2,0x03,0xfa,0xf3)	@ sha256su0 q8,q9
2712	vmov	q2,q0
2713	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2714	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2715	INST(0xe6,0x0c,0x64,0xf3)	@ sha256su1 q8,q10,q11
2716	vld1.32	{q12},[r3]!
2717	vadd.i32	q13,q13,q9
2718	INST(0xe4,0x23,0xfa,0xf3)	@ sha256su0 q9,q10
2719	vmov	q2,q0
2720	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2721	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2722	INST(0xe0,0x2c,0x66,0xf3)	@ sha256su1 q9,q11,q8
2723	vld1.32	{q13},[r3]!
2724	vadd.i32	q12,q12,q10
2725	INST(0xe6,0x43,0xfa,0xf3)	@ sha256su0 q10,q11
2726	vmov	q2,q0
2727	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2728	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2729	INST(0xe2,0x4c,0x60,0xf3)	@ sha256su1 q10,q8,q9
2730	vld1.32	{q12},[r3]!
2731	vadd.i32	q13,q13,q11
2732	INST(0xe0,0x63,0xfa,0xf3)	@ sha256su0 q11,q8
2733	vmov	q2,q0
2734	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2735	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2736	INST(0xe4,0x6c,0x62,0xf3)	@ sha256su1 q11,q9,q10
2737	vld1.32	{q13},[r3]!
2738	vadd.i32	q12,q12,q8
2739	INST(0xe2,0x03,0xfa,0xf3)	@ sha256su0 q8,q9
2740	vmov	q2,q0
2741	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2742	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2743	INST(0xe6,0x0c,0x64,0xf3)	@ sha256su1 q8,q10,q11
2744	vld1.32	{q12},[r3]!
2745	vadd.i32	q13,q13,q9
2746	INST(0xe4,0x23,0xfa,0xf3)	@ sha256su0 q9,q10
2747	vmov	q2,q0
2748	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2749	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2750	INST(0xe0,0x2c,0x66,0xf3)	@ sha256su1 q9,q11,q8
2751	vld1.32	{q13},[r3]!
2752	vadd.i32	q12,q12,q10
2753	INST(0xe6,0x43,0xfa,0xf3)	@ sha256su0 q10,q11
2754	vmov	q2,q0
2755	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2756	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2757	INST(0xe2,0x4c,0x60,0xf3)	@ sha256su1 q10,q8,q9
2758	vld1.32	{q12},[r3]!
2759	vadd.i32	q13,q13,q11
2760	INST(0xe0,0x63,0xfa,0xf3)	@ sha256su0 q11,q8
2761	vmov	q2,q0
2762	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2763	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2764	INST(0xe4,0x6c,0x62,0xf3)	@ sha256su1 q11,q9,q10
2765	vld1.32	{q13},[r3]!
2766	vadd.i32	q12,q12,q8
2767	INST(0xe2,0x03,0xfa,0xf3)	@ sha256su0 q8,q9
2768	vmov	q2,q0
2769	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2770	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2771	INST(0xe6,0x0c,0x64,0xf3)	@ sha256su1 q8,q10,q11
2772	vld1.32	{q12},[r3]!
2773	vadd.i32	q13,q13,q9
2774	INST(0xe4,0x23,0xfa,0xf3)	@ sha256su0 q9,q10
2775	vmov	q2,q0
2776	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2777	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2778	INST(0xe0,0x2c,0x66,0xf3)	@ sha256su1 q9,q11,q8
2779	vld1.32	{q13},[r3]!
2780	vadd.i32	q12,q12,q10
2781	INST(0xe6,0x43,0xfa,0xf3)	@ sha256su0 q10,q11
2782	vmov	q2,q0
2783	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2784	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2785	INST(0xe2,0x4c,0x60,0xf3)	@ sha256su1 q10,q8,q9
2786	vld1.32	{q12},[r3]!
2787	vadd.i32	q13,q13,q11
2788	INST(0xe0,0x63,0xfa,0xf3)	@ sha256su0 q11,q8
2789	vmov	q2,q0
2790	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2791	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2792	INST(0xe4,0x6c,0x62,0xf3)	@ sha256su1 q11,q9,q10
2793	vld1.32	{q13},[r3]!
2794	vadd.i32	q12,q12,q8
2795	vmov	q2,q0
2796	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2797	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2798
2799	vld1.32	{q12},[r3]!
2800	vadd.i32	q13,q13,q9
2801	vmov	q2,q0
2802	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2803	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2804
2805	vld1.32	{q13},[r3]
2806	vadd.i32	q12,q12,q10
2807	sub	r3,r3,#256-16	@ rewind
2808	vmov	q2,q0
2809	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2810	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2811
2812	vadd.i32	q13,q13,q11
2813	vmov	q2,q0
2814	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2815	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2816
2817	vadd.i32	q0,q0,q14
2818	vadd.i32	q1,q1,q15
2819	it	ne
2820	bne	Loop_v8
2821
2822	vst1.32	{q0,q1},[r0]
2823
2824	bx	lr		@ bx lr
2825
2826#endif
2827.byte	83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,52,47,78,69,79,78,47,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
2828.align	2
2829.align	2
2830#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
2831.comm	_OPENSSL_armcap_P,4
2832.non_lazy_symbol_pointer
2833OPENSSL_armcap_P:
2834.indirect_symbol	_OPENSSL_armcap_P
2835.long	0
2836.private_extern	_OPENSSL_armcap_P
2837#endif
2838#endif  // !OPENSSL_NO_ASM && defined(OPENSSL_ARM) && defined(__APPLE__)
2839