• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1// This file is generated from a similarly-named Perl script in the BoringSSL
2// source tree. Do not edit by hand.
3
4#if !defined(__has_feature)
5#define __has_feature(x) 0
6#endif
7#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
8#define OPENSSL_NO_ASM
9#endif
10
11#if !defined(OPENSSL_NO_ASM)
12#if defined(__arm__)
13#include "ring_core_generated/prefix_symbols_asm.h"
14@ Copyright 2007-2016 The OpenSSL Project Authors. All Rights Reserved.
15@
16@ Licensed under the OpenSSL license (the "License").  You may not use
17@ this file except in compliance with the License.  You can obtain a copy
18@ in the file LICENSE in the source distribution or at
19@ https://www.openssl.org/source/license.html
20
21
22@ ====================================================================
23@ Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
24@ project. The module is, however, dual licensed under OpenSSL and
25@ CRYPTOGAMS licenses depending on where you obtain it. For further
26@ details see http://www.openssl.org/~appro/cryptogams/.
27@
28@ Permission to use under GPL terms is granted.
29@ ====================================================================
30
31@ SHA256 block procedure for ARMv4. May 2007.
32
33@ Performance is ~2x better than gcc 3.4 generated code and in "abso-
34@ lute" terms is ~2250 cycles per 64-byte block or ~35 cycles per
35@ byte [on single-issue Xscale PXA250 core].
36
37@ July 2010.
38@
39@ Rescheduling for dual-issue pipeline resulted in 22% improvement on
40@ Cortex A8 core and ~20 cycles per processed byte.
41
42@ February 2011.
43@
44@ Profiler-assisted and platform-specific optimization resulted in 16%
45@ improvement on Cortex A8 core and ~15.4 cycles per processed byte.
46
47@ September 2013.
48@
49@ Add NEON implementation. On Cortex A8 it was measured to process one
50@ byte in 12.5 cycles or 23% faster than integer-only code. Snapdragon
51@ S4 does it in 12.5 cycles too, but it's 50% faster than integer-only
52@ code (meaning that latter performs sub-optimally, nothing was done
53@ about it).
54
55@ May 2014.
56@
57@ Add ARMv8 code path performing at 2.0 cpb on Apple A7.
58
59#ifndef __KERNEL__
60# include <ring-core/arm_arch.h>
61#else
62# define __ARM_ARCH__ __LINUX_ARM_ARCH__
63# define __ARM_MAX_ARCH__ 7
64#endif
65
66@ Silence ARMv8 deprecated IT instruction warnings. This file is used by both
67@ ARMv7 and ARMv8 processors. It does have ARMv8-only code, but those
68@ instructions are manually-encoded. (See unsha256.)
69.arch	armv7-a
70
71.text
72#if defined(__thumb2__)
73.syntax	unified
74.thumb
75#else
76.code	32
77#endif
78
79.type	K256,%object
80.align	5
81K256:
82.word	0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
83.word	0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
84.word	0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
85.word	0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
86.word	0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
87.word	0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
88.word	0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
89.word	0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
90.word	0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
91.word	0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
92.word	0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
93.word	0xd192e819,0xd6990624,0xf40e3585,0x106aa070
94.word	0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
95.word	0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
96.word	0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
97.word	0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
98.size	K256,.-K256
99.word	0				@ terminator
100#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
101
102.hidden	OPENSSL_armcap_P
103.LOPENSSL_armcap:
104.word	OPENSSL_armcap_P-.Lsha256_block_data_order
105#endif
106.align	5
107
108.globl	sha256_block_data_order
109.hidden	sha256_block_data_order
110.type	sha256_block_data_order,%function
111sha256_block_data_order:
112.Lsha256_block_data_order:
113#if __ARM_ARCH__<7 && !defined(__thumb2__)
114	sub	r3,pc,#8		@ sha256_block_data_order
115#else
116	adr	r3,.Lsha256_block_data_order
117#endif
118#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
119	ldr	r12,.LOPENSSL_armcap
120	ldr	r12,[r3,r12]		@ OPENSSL_armcap_P
121#ifdef	__APPLE__
122	ldr	r12,[r12]
123#endif
124	tst	r12,#ARMV8_SHA256
125	bne	.LARMv8
126	tst	r12,#ARMV7_NEON
127	bne	.LNEON
128#endif
129	add	r2,r1,r2,lsl#6	@ len to point at the end of inp
130	stmdb	sp!,{r0,r1,r2,r4-r11,lr}
131	ldmia	r0,{r4,r5,r6,r7,r8,r9,r10,r11}
132	sub	r14,r3,#256+32	@ K256
133	sub	sp,sp,#16*4		@ alloca(X[16])
134.Loop:
135# if __ARM_ARCH__>=7
136	ldr	r2,[r1],#4
137# else
138	ldrb	r2,[r1,#3]
139# endif
140	eor	r3,r5,r6		@ magic
141	eor	r12,r12,r12
142#if __ARM_ARCH__>=7
143	@ ldr	r2,[r1],#4			@ 0
144# if 0==15
145	str	r1,[sp,#17*4]			@ make room for r1
146# endif
147	eor	r0,r8,r8,ror#5
148	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
149	eor	r0,r0,r8,ror#19	@ Sigma1(e)
150# ifndef __ARMEB__
151	rev	r2,r2
152# endif
153#else
154	@ ldrb	r2,[r1,#3]			@ 0
155	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
156	ldrb	r12,[r1,#2]
157	ldrb	r0,[r1,#1]
158	orr	r2,r2,r12,lsl#8
159	ldrb	r12,[r1],#4
160	orr	r2,r2,r0,lsl#16
161# if 0==15
162	str	r1,[sp,#17*4]			@ make room for r1
163# endif
164	eor	r0,r8,r8,ror#5
165	orr	r2,r2,r12,lsl#24
166	eor	r0,r0,r8,ror#19	@ Sigma1(e)
167#endif
168	ldr	r12,[r14],#4			@ *K256++
169	add	r11,r11,r2			@ h+=X[i]
170	str	r2,[sp,#0*4]
171	eor	r2,r9,r10
172	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
173	and	r2,r2,r8
174	add	r11,r11,r12			@ h+=K256[i]
175	eor	r2,r2,r10			@ Ch(e,f,g)
176	eor	r0,r4,r4,ror#11
177	add	r11,r11,r2			@ h+=Ch(e,f,g)
178#if 0==31
179	and	r12,r12,#0xff
180	cmp	r12,#0xf2			@ done?
181#endif
182#if 0<15
183# if __ARM_ARCH__>=7
184	ldr	r2,[r1],#4			@ prefetch
185# else
186	ldrb	r2,[r1,#3]
187# endif
188	eor	r12,r4,r5			@ a^b, b^c in next round
189#else
190	ldr	r2,[sp,#2*4]		@ from future BODY_16_xx
191	eor	r12,r4,r5			@ a^b, b^c in next round
192	ldr	r1,[sp,#15*4]	@ from future BODY_16_xx
193#endif
194	eor	r0,r0,r4,ror#20	@ Sigma0(a)
195	and	r3,r3,r12			@ (b^c)&=(a^b)
196	add	r7,r7,r11			@ d+=h
197	eor	r3,r3,r5			@ Maj(a,b,c)
198	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
199	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
200#if __ARM_ARCH__>=7
201	@ ldr	r2,[r1],#4			@ 1
202# if 1==15
203	str	r1,[sp,#17*4]			@ make room for r1
204# endif
205	eor	r0,r7,r7,ror#5
206	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
207	eor	r0,r0,r7,ror#19	@ Sigma1(e)
208# ifndef __ARMEB__
209	rev	r2,r2
210# endif
211#else
212	@ ldrb	r2,[r1,#3]			@ 1
213	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
214	ldrb	r3,[r1,#2]
215	ldrb	r0,[r1,#1]
216	orr	r2,r2,r3,lsl#8
217	ldrb	r3,[r1],#4
218	orr	r2,r2,r0,lsl#16
219# if 1==15
220	str	r1,[sp,#17*4]			@ make room for r1
221# endif
222	eor	r0,r7,r7,ror#5
223	orr	r2,r2,r3,lsl#24
224	eor	r0,r0,r7,ror#19	@ Sigma1(e)
225#endif
226	ldr	r3,[r14],#4			@ *K256++
227	add	r10,r10,r2			@ h+=X[i]
228	str	r2,[sp,#1*4]
229	eor	r2,r8,r9
230	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
231	and	r2,r2,r7
232	add	r10,r10,r3			@ h+=K256[i]
233	eor	r2,r2,r9			@ Ch(e,f,g)
234	eor	r0,r11,r11,ror#11
235	add	r10,r10,r2			@ h+=Ch(e,f,g)
236#if 1==31
237	and	r3,r3,#0xff
238	cmp	r3,#0xf2			@ done?
239#endif
240#if 1<15
241# if __ARM_ARCH__>=7
242	ldr	r2,[r1],#4			@ prefetch
243# else
244	ldrb	r2,[r1,#3]
245# endif
246	eor	r3,r11,r4			@ a^b, b^c in next round
247#else
248	ldr	r2,[sp,#3*4]		@ from future BODY_16_xx
249	eor	r3,r11,r4			@ a^b, b^c in next round
250	ldr	r1,[sp,#0*4]	@ from future BODY_16_xx
251#endif
252	eor	r0,r0,r11,ror#20	@ Sigma0(a)
253	and	r12,r12,r3			@ (b^c)&=(a^b)
254	add	r6,r6,r10			@ d+=h
255	eor	r12,r12,r4			@ Maj(a,b,c)
256	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
257	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
258#if __ARM_ARCH__>=7
259	@ ldr	r2,[r1],#4			@ 2
260# if 2==15
261	str	r1,[sp,#17*4]			@ make room for r1
262# endif
263	eor	r0,r6,r6,ror#5
264	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
265	eor	r0,r0,r6,ror#19	@ Sigma1(e)
266# ifndef __ARMEB__
267	rev	r2,r2
268# endif
269#else
270	@ ldrb	r2,[r1,#3]			@ 2
271	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
272	ldrb	r12,[r1,#2]
273	ldrb	r0,[r1,#1]
274	orr	r2,r2,r12,lsl#8
275	ldrb	r12,[r1],#4
276	orr	r2,r2,r0,lsl#16
277# if 2==15
278	str	r1,[sp,#17*4]			@ make room for r1
279# endif
280	eor	r0,r6,r6,ror#5
281	orr	r2,r2,r12,lsl#24
282	eor	r0,r0,r6,ror#19	@ Sigma1(e)
283#endif
284	ldr	r12,[r14],#4			@ *K256++
285	add	r9,r9,r2			@ h+=X[i]
286	str	r2,[sp,#2*4]
287	eor	r2,r7,r8
288	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
289	and	r2,r2,r6
290	add	r9,r9,r12			@ h+=K256[i]
291	eor	r2,r2,r8			@ Ch(e,f,g)
292	eor	r0,r10,r10,ror#11
293	add	r9,r9,r2			@ h+=Ch(e,f,g)
294#if 2==31
295	and	r12,r12,#0xff
296	cmp	r12,#0xf2			@ done?
297#endif
298#if 2<15
299# if __ARM_ARCH__>=7
300	ldr	r2,[r1],#4			@ prefetch
301# else
302	ldrb	r2,[r1,#3]
303# endif
304	eor	r12,r10,r11			@ a^b, b^c in next round
305#else
306	ldr	r2,[sp,#4*4]		@ from future BODY_16_xx
307	eor	r12,r10,r11			@ a^b, b^c in next round
308	ldr	r1,[sp,#1*4]	@ from future BODY_16_xx
309#endif
310	eor	r0,r0,r10,ror#20	@ Sigma0(a)
311	and	r3,r3,r12			@ (b^c)&=(a^b)
312	add	r5,r5,r9			@ d+=h
313	eor	r3,r3,r11			@ Maj(a,b,c)
314	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
315	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
316#if __ARM_ARCH__>=7
317	@ ldr	r2,[r1],#4			@ 3
318# if 3==15
319	str	r1,[sp,#17*4]			@ make room for r1
320# endif
321	eor	r0,r5,r5,ror#5
322	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
323	eor	r0,r0,r5,ror#19	@ Sigma1(e)
324# ifndef __ARMEB__
325	rev	r2,r2
326# endif
327#else
328	@ ldrb	r2,[r1,#3]			@ 3
329	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
330	ldrb	r3,[r1,#2]
331	ldrb	r0,[r1,#1]
332	orr	r2,r2,r3,lsl#8
333	ldrb	r3,[r1],#4
334	orr	r2,r2,r0,lsl#16
335# if 3==15
336	str	r1,[sp,#17*4]			@ make room for r1
337# endif
338	eor	r0,r5,r5,ror#5
339	orr	r2,r2,r3,lsl#24
340	eor	r0,r0,r5,ror#19	@ Sigma1(e)
341#endif
342	ldr	r3,[r14],#4			@ *K256++
343	add	r8,r8,r2			@ h+=X[i]
344	str	r2,[sp,#3*4]
345	eor	r2,r6,r7
346	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
347	and	r2,r2,r5
348	add	r8,r8,r3			@ h+=K256[i]
349	eor	r2,r2,r7			@ Ch(e,f,g)
350	eor	r0,r9,r9,ror#11
351	add	r8,r8,r2			@ h+=Ch(e,f,g)
352#if 3==31
353	and	r3,r3,#0xff
354	cmp	r3,#0xf2			@ done?
355#endif
356#if 3<15
357# if __ARM_ARCH__>=7
358	ldr	r2,[r1],#4			@ prefetch
359# else
360	ldrb	r2,[r1,#3]
361# endif
362	eor	r3,r9,r10			@ a^b, b^c in next round
363#else
364	ldr	r2,[sp,#5*4]		@ from future BODY_16_xx
365	eor	r3,r9,r10			@ a^b, b^c in next round
366	ldr	r1,[sp,#2*4]	@ from future BODY_16_xx
367#endif
368	eor	r0,r0,r9,ror#20	@ Sigma0(a)
369	and	r12,r12,r3			@ (b^c)&=(a^b)
370	add	r4,r4,r8			@ d+=h
371	eor	r12,r12,r10			@ Maj(a,b,c)
372	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
373	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
374#if __ARM_ARCH__>=7
375	@ ldr	r2,[r1],#4			@ 4
376# if 4==15
377	str	r1,[sp,#17*4]			@ make room for r1
378# endif
379	eor	r0,r4,r4,ror#5
380	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
381	eor	r0,r0,r4,ror#19	@ Sigma1(e)
382# ifndef __ARMEB__
383	rev	r2,r2
384# endif
385#else
386	@ ldrb	r2,[r1,#3]			@ 4
387	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
388	ldrb	r12,[r1,#2]
389	ldrb	r0,[r1,#1]
390	orr	r2,r2,r12,lsl#8
391	ldrb	r12,[r1],#4
392	orr	r2,r2,r0,lsl#16
393# if 4==15
394	str	r1,[sp,#17*4]			@ make room for r1
395# endif
396	eor	r0,r4,r4,ror#5
397	orr	r2,r2,r12,lsl#24
398	eor	r0,r0,r4,ror#19	@ Sigma1(e)
399#endif
400	ldr	r12,[r14],#4			@ *K256++
401	add	r7,r7,r2			@ h+=X[i]
402	str	r2,[sp,#4*4]
403	eor	r2,r5,r6
404	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
405	and	r2,r2,r4
406	add	r7,r7,r12			@ h+=K256[i]
407	eor	r2,r2,r6			@ Ch(e,f,g)
408	eor	r0,r8,r8,ror#11
409	add	r7,r7,r2			@ h+=Ch(e,f,g)
410#if 4==31
411	and	r12,r12,#0xff
412	cmp	r12,#0xf2			@ done?
413#endif
414#if 4<15
415# if __ARM_ARCH__>=7
416	ldr	r2,[r1],#4			@ prefetch
417# else
418	ldrb	r2,[r1,#3]
419# endif
420	eor	r12,r8,r9			@ a^b, b^c in next round
421#else
422	ldr	r2,[sp,#6*4]		@ from future BODY_16_xx
423	eor	r12,r8,r9			@ a^b, b^c in next round
424	ldr	r1,[sp,#3*4]	@ from future BODY_16_xx
425#endif
426	eor	r0,r0,r8,ror#20	@ Sigma0(a)
427	and	r3,r3,r12			@ (b^c)&=(a^b)
428	add	r11,r11,r7			@ d+=h
429	eor	r3,r3,r9			@ Maj(a,b,c)
430	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
431	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
432#if __ARM_ARCH__>=7
433	@ ldr	r2,[r1],#4			@ 5
434# if 5==15
435	str	r1,[sp,#17*4]			@ make room for r1
436# endif
437	eor	r0,r11,r11,ror#5
438	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
439	eor	r0,r0,r11,ror#19	@ Sigma1(e)
440# ifndef __ARMEB__
441	rev	r2,r2
442# endif
443#else
444	@ ldrb	r2,[r1,#3]			@ 5
445	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
446	ldrb	r3,[r1,#2]
447	ldrb	r0,[r1,#1]
448	orr	r2,r2,r3,lsl#8
449	ldrb	r3,[r1],#4
450	orr	r2,r2,r0,lsl#16
451# if 5==15
452	str	r1,[sp,#17*4]			@ make room for r1
453# endif
454	eor	r0,r11,r11,ror#5
455	orr	r2,r2,r3,lsl#24
456	eor	r0,r0,r11,ror#19	@ Sigma1(e)
457#endif
458	ldr	r3,[r14],#4			@ *K256++
459	add	r6,r6,r2			@ h+=X[i]
460	str	r2,[sp,#5*4]
461	eor	r2,r4,r5
462	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
463	and	r2,r2,r11
464	add	r6,r6,r3			@ h+=K256[i]
465	eor	r2,r2,r5			@ Ch(e,f,g)
466	eor	r0,r7,r7,ror#11
467	add	r6,r6,r2			@ h+=Ch(e,f,g)
468#if 5==31
469	and	r3,r3,#0xff
470	cmp	r3,#0xf2			@ done?
471#endif
472#if 5<15
473# if __ARM_ARCH__>=7
474	ldr	r2,[r1],#4			@ prefetch
475# else
476	ldrb	r2,[r1,#3]
477# endif
478	eor	r3,r7,r8			@ a^b, b^c in next round
479#else
480	ldr	r2,[sp,#7*4]		@ from future BODY_16_xx
481	eor	r3,r7,r8			@ a^b, b^c in next round
482	ldr	r1,[sp,#4*4]	@ from future BODY_16_xx
483#endif
484	eor	r0,r0,r7,ror#20	@ Sigma0(a)
485	and	r12,r12,r3			@ (b^c)&=(a^b)
486	add	r10,r10,r6			@ d+=h
487	eor	r12,r12,r8			@ Maj(a,b,c)
488	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
489	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
490#if __ARM_ARCH__>=7
491	@ ldr	r2,[r1],#4			@ 6
492# if 6==15
493	str	r1,[sp,#17*4]			@ make room for r1
494# endif
495	eor	r0,r10,r10,ror#5
496	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
497	eor	r0,r0,r10,ror#19	@ Sigma1(e)
498# ifndef __ARMEB__
499	rev	r2,r2
500# endif
501#else
502	@ ldrb	r2,[r1,#3]			@ 6
503	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
504	ldrb	r12,[r1,#2]
505	ldrb	r0,[r1,#1]
506	orr	r2,r2,r12,lsl#8
507	ldrb	r12,[r1],#4
508	orr	r2,r2,r0,lsl#16
509# if 6==15
510	str	r1,[sp,#17*4]			@ make room for r1
511# endif
512	eor	r0,r10,r10,ror#5
513	orr	r2,r2,r12,lsl#24
514	eor	r0,r0,r10,ror#19	@ Sigma1(e)
515#endif
516	ldr	r12,[r14],#4			@ *K256++
517	add	r5,r5,r2			@ h+=X[i]
518	str	r2,[sp,#6*4]
519	eor	r2,r11,r4
520	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
521	and	r2,r2,r10
522	add	r5,r5,r12			@ h+=K256[i]
523	eor	r2,r2,r4			@ Ch(e,f,g)
524	eor	r0,r6,r6,ror#11
525	add	r5,r5,r2			@ h+=Ch(e,f,g)
526#if 6==31
527	and	r12,r12,#0xff
528	cmp	r12,#0xf2			@ done?
529#endif
530#if 6<15
531# if __ARM_ARCH__>=7
532	ldr	r2,[r1],#4			@ prefetch
533# else
534	ldrb	r2,[r1,#3]
535# endif
536	eor	r12,r6,r7			@ a^b, b^c in next round
537#else
538	ldr	r2,[sp,#8*4]		@ from future BODY_16_xx
539	eor	r12,r6,r7			@ a^b, b^c in next round
540	ldr	r1,[sp,#5*4]	@ from future BODY_16_xx
541#endif
542	eor	r0,r0,r6,ror#20	@ Sigma0(a)
543	and	r3,r3,r12			@ (b^c)&=(a^b)
544	add	r9,r9,r5			@ d+=h
545	eor	r3,r3,r7			@ Maj(a,b,c)
546	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
547	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
548#if __ARM_ARCH__>=7
549	@ ldr	r2,[r1],#4			@ 7
550# if 7==15
551	str	r1,[sp,#17*4]			@ make room for r1
552# endif
553	eor	r0,r9,r9,ror#5
554	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
555	eor	r0,r0,r9,ror#19	@ Sigma1(e)
556# ifndef __ARMEB__
557	rev	r2,r2
558# endif
559#else
560	@ ldrb	r2,[r1,#3]			@ 7
561	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
562	ldrb	r3,[r1,#2]
563	ldrb	r0,[r1,#1]
564	orr	r2,r2,r3,lsl#8
565	ldrb	r3,[r1],#4
566	orr	r2,r2,r0,lsl#16
567# if 7==15
568	str	r1,[sp,#17*4]			@ make room for r1
569# endif
570	eor	r0,r9,r9,ror#5
571	orr	r2,r2,r3,lsl#24
572	eor	r0,r0,r9,ror#19	@ Sigma1(e)
573#endif
574	ldr	r3,[r14],#4			@ *K256++
575	add	r4,r4,r2			@ h+=X[i]
576	str	r2,[sp,#7*4]
577	eor	r2,r10,r11
578	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
579	and	r2,r2,r9
580	add	r4,r4,r3			@ h+=K256[i]
581	eor	r2,r2,r11			@ Ch(e,f,g)
582	eor	r0,r5,r5,ror#11
583	add	r4,r4,r2			@ h+=Ch(e,f,g)
584#if 7==31
585	and	r3,r3,#0xff
586	cmp	r3,#0xf2			@ done?
587#endif
588#if 7<15
589# if __ARM_ARCH__>=7
590	ldr	r2,[r1],#4			@ prefetch
591# else
592	ldrb	r2,[r1,#3]
593# endif
594	eor	r3,r5,r6			@ a^b, b^c in next round
595#else
596	ldr	r2,[sp,#9*4]		@ from future BODY_16_xx
597	eor	r3,r5,r6			@ a^b, b^c in next round
598	ldr	r1,[sp,#6*4]	@ from future BODY_16_xx
599#endif
600	eor	r0,r0,r5,ror#20	@ Sigma0(a)
601	and	r12,r12,r3			@ (b^c)&=(a^b)
602	add	r8,r8,r4			@ d+=h
603	eor	r12,r12,r6			@ Maj(a,b,c)
604	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
605	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
606#if __ARM_ARCH__>=7
607	@ ldr	r2,[r1],#4			@ 8
608# if 8==15
609	str	r1,[sp,#17*4]			@ make room for r1
610# endif
611	eor	r0,r8,r8,ror#5
612	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
613	eor	r0,r0,r8,ror#19	@ Sigma1(e)
614# ifndef __ARMEB__
615	rev	r2,r2
616# endif
617#else
618	@ ldrb	r2,[r1,#3]			@ 8
619	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
620	ldrb	r12,[r1,#2]
621	ldrb	r0,[r1,#1]
622	orr	r2,r2,r12,lsl#8
623	ldrb	r12,[r1],#4
624	orr	r2,r2,r0,lsl#16
625# if 8==15
626	str	r1,[sp,#17*4]			@ make room for r1
627# endif
628	eor	r0,r8,r8,ror#5
629	orr	r2,r2,r12,lsl#24
630	eor	r0,r0,r8,ror#19	@ Sigma1(e)
631#endif
632	ldr	r12,[r14],#4			@ *K256++
633	add	r11,r11,r2			@ h+=X[i]
634	str	r2,[sp,#8*4]
635	eor	r2,r9,r10
636	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
637	and	r2,r2,r8
638	add	r11,r11,r12			@ h+=K256[i]
639	eor	r2,r2,r10			@ Ch(e,f,g)
640	eor	r0,r4,r4,ror#11
641	add	r11,r11,r2			@ h+=Ch(e,f,g)
642#if 8==31
643	and	r12,r12,#0xff
644	cmp	r12,#0xf2			@ done?
645#endif
646#if 8<15
647# if __ARM_ARCH__>=7
648	ldr	r2,[r1],#4			@ prefetch
649# else
650	ldrb	r2,[r1,#3]
651# endif
652	eor	r12,r4,r5			@ a^b, b^c in next round
653#else
654	ldr	r2,[sp,#10*4]		@ from future BODY_16_xx
655	eor	r12,r4,r5			@ a^b, b^c in next round
656	ldr	r1,[sp,#7*4]	@ from future BODY_16_xx
657#endif
658	eor	r0,r0,r4,ror#20	@ Sigma0(a)
659	and	r3,r3,r12			@ (b^c)&=(a^b)
660	add	r7,r7,r11			@ d+=h
661	eor	r3,r3,r5			@ Maj(a,b,c)
662	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
663	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
664#if __ARM_ARCH__>=7
665	@ ldr	r2,[r1],#4			@ 9
666# if 9==15
667	str	r1,[sp,#17*4]			@ make room for r1
668# endif
669	eor	r0,r7,r7,ror#5
670	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
671	eor	r0,r0,r7,ror#19	@ Sigma1(e)
672# ifndef __ARMEB__
673	rev	r2,r2
674# endif
675#else
676	@ ldrb	r2,[r1,#3]			@ 9
677	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
678	ldrb	r3,[r1,#2]
679	ldrb	r0,[r1,#1]
680	orr	r2,r2,r3,lsl#8
681	ldrb	r3,[r1],#4
682	orr	r2,r2,r0,lsl#16
683# if 9==15
684	str	r1,[sp,#17*4]			@ make room for r1
685# endif
686	eor	r0,r7,r7,ror#5
687	orr	r2,r2,r3,lsl#24
688	eor	r0,r0,r7,ror#19	@ Sigma1(e)
689#endif
690	ldr	r3,[r14],#4			@ *K256++
691	add	r10,r10,r2			@ h+=X[i]
692	str	r2,[sp,#9*4]
693	eor	r2,r8,r9
694	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
695	and	r2,r2,r7
696	add	r10,r10,r3			@ h+=K256[i]
697	eor	r2,r2,r9			@ Ch(e,f,g)
698	eor	r0,r11,r11,ror#11
699	add	r10,r10,r2			@ h+=Ch(e,f,g)
700#if 9==31
701	and	r3,r3,#0xff
702	cmp	r3,#0xf2			@ done?
703#endif
704#if 9<15
705# if __ARM_ARCH__>=7
706	ldr	r2,[r1],#4			@ prefetch
707# else
708	ldrb	r2,[r1,#3]
709# endif
710	eor	r3,r11,r4			@ a^b, b^c in next round
711#else
712	ldr	r2,[sp,#11*4]		@ from future BODY_16_xx
713	eor	r3,r11,r4			@ a^b, b^c in next round
714	ldr	r1,[sp,#8*4]	@ from future BODY_16_xx
715#endif
716	eor	r0,r0,r11,ror#20	@ Sigma0(a)
717	and	r12,r12,r3			@ (b^c)&=(a^b)
718	add	r6,r6,r10			@ d+=h
719	eor	r12,r12,r4			@ Maj(a,b,c)
720	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
721	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
722#if __ARM_ARCH__>=7
723	@ ldr	r2,[r1],#4			@ 10
724# if 10==15
725	str	r1,[sp,#17*4]			@ make room for r1
726# endif
727	eor	r0,r6,r6,ror#5
728	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
729	eor	r0,r0,r6,ror#19	@ Sigma1(e)
730# ifndef __ARMEB__
731	rev	r2,r2
732# endif
733#else
734	@ ldrb	r2,[r1,#3]			@ 10
735	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
736	ldrb	r12,[r1,#2]
737	ldrb	r0,[r1,#1]
738	orr	r2,r2,r12,lsl#8
739	ldrb	r12,[r1],#4
740	orr	r2,r2,r0,lsl#16
741# if 10==15
742	str	r1,[sp,#17*4]			@ make room for r1
743# endif
744	eor	r0,r6,r6,ror#5
745	orr	r2,r2,r12,lsl#24
746	eor	r0,r0,r6,ror#19	@ Sigma1(e)
747#endif
748	ldr	r12,[r14],#4			@ *K256++
749	add	r9,r9,r2			@ h+=X[i]
750	str	r2,[sp,#10*4]
751	eor	r2,r7,r8
752	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
753	and	r2,r2,r6
754	add	r9,r9,r12			@ h+=K256[i]
755	eor	r2,r2,r8			@ Ch(e,f,g)
756	eor	r0,r10,r10,ror#11
757	add	r9,r9,r2			@ h+=Ch(e,f,g)
758#if 10==31
759	and	r12,r12,#0xff
760	cmp	r12,#0xf2			@ done?
761#endif
762#if 10<15
763# if __ARM_ARCH__>=7
764	ldr	r2,[r1],#4			@ prefetch
765# else
766	ldrb	r2,[r1,#3]
767# endif
768	eor	r12,r10,r11			@ a^b, b^c in next round
769#else
770	ldr	r2,[sp,#12*4]		@ from future BODY_16_xx
771	eor	r12,r10,r11			@ a^b, b^c in next round
772	ldr	r1,[sp,#9*4]	@ from future BODY_16_xx
773#endif
774	eor	r0,r0,r10,ror#20	@ Sigma0(a)
775	and	r3,r3,r12			@ (b^c)&=(a^b)
776	add	r5,r5,r9			@ d+=h
777	eor	r3,r3,r11			@ Maj(a,b,c)
778	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
779	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
780#if __ARM_ARCH__>=7
781	@ ldr	r2,[r1],#4			@ 11
782# if 11==15
783	str	r1,[sp,#17*4]			@ make room for r1
784# endif
785	eor	r0,r5,r5,ror#5
786	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
787	eor	r0,r0,r5,ror#19	@ Sigma1(e)
788# ifndef __ARMEB__
789	rev	r2,r2
790# endif
791#else
792	@ ldrb	r2,[r1,#3]			@ 11
793	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
794	ldrb	r3,[r1,#2]
795	ldrb	r0,[r1,#1]
796	orr	r2,r2,r3,lsl#8
797	ldrb	r3,[r1],#4
798	orr	r2,r2,r0,lsl#16
799# if 11==15
800	str	r1,[sp,#17*4]			@ make room for r1
801# endif
802	eor	r0,r5,r5,ror#5
803	orr	r2,r2,r3,lsl#24
804	eor	r0,r0,r5,ror#19	@ Sigma1(e)
805#endif
806	ldr	r3,[r14],#4			@ *K256++
807	add	r8,r8,r2			@ h+=X[i]
808	str	r2,[sp,#11*4]
809	eor	r2,r6,r7
810	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
811	and	r2,r2,r5
812	add	r8,r8,r3			@ h+=K256[i]
813	eor	r2,r2,r7			@ Ch(e,f,g)
814	eor	r0,r9,r9,ror#11
815	add	r8,r8,r2			@ h+=Ch(e,f,g)
816#if 11==31
817	and	r3,r3,#0xff
818	cmp	r3,#0xf2			@ done?
819#endif
820#if 11<15
821# if __ARM_ARCH__>=7
822	ldr	r2,[r1],#4			@ prefetch
823# else
824	ldrb	r2,[r1,#3]
825# endif
826	eor	r3,r9,r10			@ a^b, b^c in next round
827#else
828	ldr	r2,[sp,#13*4]		@ from future BODY_16_xx
829	eor	r3,r9,r10			@ a^b, b^c in next round
830	ldr	r1,[sp,#10*4]	@ from future BODY_16_xx
831#endif
832	eor	r0,r0,r9,ror#20	@ Sigma0(a)
833	and	r12,r12,r3			@ (b^c)&=(a^b)
834	add	r4,r4,r8			@ d+=h
835	eor	r12,r12,r10			@ Maj(a,b,c)
836	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
837	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
838#if __ARM_ARCH__>=7
839	@ ldr	r2,[r1],#4			@ 12
840# if 12==15
841	str	r1,[sp,#17*4]			@ make room for r1
842# endif
843	eor	r0,r4,r4,ror#5
844	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
845	eor	r0,r0,r4,ror#19	@ Sigma1(e)
846# ifndef __ARMEB__
847	rev	r2,r2
848# endif
849#else
850	@ ldrb	r2,[r1,#3]			@ 12
851	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
852	ldrb	r12,[r1,#2]
853	ldrb	r0,[r1,#1]
854	orr	r2,r2,r12,lsl#8
855	ldrb	r12,[r1],#4
856	orr	r2,r2,r0,lsl#16
857# if 12==15
858	str	r1,[sp,#17*4]			@ make room for r1
859# endif
860	eor	r0,r4,r4,ror#5
861	orr	r2,r2,r12,lsl#24
862	eor	r0,r0,r4,ror#19	@ Sigma1(e)
863#endif
864	ldr	r12,[r14],#4			@ *K256++
865	add	r7,r7,r2			@ h+=X[i]
866	str	r2,[sp,#12*4]
867	eor	r2,r5,r6
868	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
869	and	r2,r2,r4
870	add	r7,r7,r12			@ h+=K256[i]
871	eor	r2,r2,r6			@ Ch(e,f,g)
872	eor	r0,r8,r8,ror#11
873	add	r7,r7,r2			@ h+=Ch(e,f,g)
874#if 12==31
875	and	r12,r12,#0xff
876	cmp	r12,#0xf2			@ done?
877#endif
878#if 12<15
879# if __ARM_ARCH__>=7
880	ldr	r2,[r1],#4			@ prefetch
881# else
882	ldrb	r2,[r1,#3]
883# endif
884	eor	r12,r8,r9			@ a^b, b^c in next round
885#else
886	ldr	r2,[sp,#14*4]		@ from future BODY_16_xx
887	eor	r12,r8,r9			@ a^b, b^c in next round
888	ldr	r1,[sp,#11*4]	@ from future BODY_16_xx
889#endif
890	eor	r0,r0,r8,ror#20	@ Sigma0(a)
891	and	r3,r3,r12			@ (b^c)&=(a^b)
892	add	r11,r11,r7			@ d+=h
893	eor	r3,r3,r9			@ Maj(a,b,c)
894	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
895	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
896#if __ARM_ARCH__>=7
897	@ ldr	r2,[r1],#4			@ 13
898# if 13==15
899	str	r1,[sp,#17*4]			@ make room for r1
900# endif
901	eor	r0,r11,r11,ror#5
902	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
903	eor	r0,r0,r11,ror#19	@ Sigma1(e)
904# ifndef __ARMEB__
905	rev	r2,r2
906# endif
907#else
908	@ ldrb	r2,[r1,#3]			@ 13
909	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
910	ldrb	r3,[r1,#2]
911	ldrb	r0,[r1,#1]
912	orr	r2,r2,r3,lsl#8
913	ldrb	r3,[r1],#4
914	orr	r2,r2,r0,lsl#16
915# if 13==15
916	str	r1,[sp,#17*4]			@ make room for r1
917# endif
918	eor	r0,r11,r11,ror#5
919	orr	r2,r2,r3,lsl#24
920	eor	r0,r0,r11,ror#19	@ Sigma1(e)
921#endif
922	ldr	r3,[r14],#4			@ *K256++
923	add	r6,r6,r2			@ h+=X[i]
924	str	r2,[sp,#13*4]
925	eor	r2,r4,r5
926	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
927	and	r2,r2,r11
928	add	r6,r6,r3			@ h+=K256[i]
929	eor	r2,r2,r5			@ Ch(e,f,g)
930	eor	r0,r7,r7,ror#11
931	add	r6,r6,r2			@ h+=Ch(e,f,g)
932#if 13==31
933	and	r3,r3,#0xff
934	cmp	r3,#0xf2			@ done?
935#endif
936#if 13<15
937# if __ARM_ARCH__>=7
938	ldr	r2,[r1],#4			@ prefetch
939# else
940	ldrb	r2,[r1,#3]
941# endif
942	eor	r3,r7,r8			@ a^b, b^c in next round
943#else
944	ldr	r2,[sp,#15*4]		@ from future BODY_16_xx
945	eor	r3,r7,r8			@ a^b, b^c in next round
946	ldr	r1,[sp,#12*4]	@ from future BODY_16_xx
947#endif
948	eor	r0,r0,r7,ror#20	@ Sigma0(a)
949	and	r12,r12,r3			@ (b^c)&=(a^b)
950	add	r10,r10,r6			@ d+=h
951	eor	r12,r12,r8			@ Maj(a,b,c)
952	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
953	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
954#if __ARM_ARCH__>=7
955	@ ldr	r2,[r1],#4			@ 14
956# if 14==15
957	str	r1,[sp,#17*4]			@ make room for r1
958# endif
959	eor	r0,r10,r10,ror#5
960	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
961	eor	r0,r0,r10,ror#19	@ Sigma1(e)
962# ifndef __ARMEB__
963	rev	r2,r2
964# endif
965#else
966	@ ldrb	r2,[r1,#3]			@ 14
967	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
968	ldrb	r12,[r1,#2]
969	ldrb	r0,[r1,#1]
970	orr	r2,r2,r12,lsl#8
971	ldrb	r12,[r1],#4
972	orr	r2,r2,r0,lsl#16
973# if 14==15
974	str	r1,[sp,#17*4]			@ make room for r1
975# endif
976	eor	r0,r10,r10,ror#5
977	orr	r2,r2,r12,lsl#24
978	eor	r0,r0,r10,ror#19	@ Sigma1(e)
979#endif
980	ldr	r12,[r14],#4			@ *K256++
981	add	r5,r5,r2			@ h+=X[i]
982	str	r2,[sp,#14*4]
983	eor	r2,r11,r4
984	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
985	and	r2,r2,r10
986	add	r5,r5,r12			@ h+=K256[i]
987	eor	r2,r2,r4			@ Ch(e,f,g)
988	eor	r0,r6,r6,ror#11
989	add	r5,r5,r2			@ h+=Ch(e,f,g)
990#if 14==31
991	and	r12,r12,#0xff
992	cmp	r12,#0xf2			@ done?
993#endif
994#if 14<15
995# if __ARM_ARCH__>=7
996	ldr	r2,[r1],#4			@ prefetch
997# else
998	ldrb	r2,[r1,#3]
999# endif
1000	eor	r12,r6,r7			@ a^b, b^c in next round
1001#else
1002	ldr	r2,[sp,#0*4]		@ from future BODY_16_xx
1003	eor	r12,r6,r7			@ a^b, b^c in next round
1004	ldr	r1,[sp,#13*4]	@ from future BODY_16_xx
1005#endif
1006	eor	r0,r0,r6,ror#20	@ Sigma0(a)
1007	and	r3,r3,r12			@ (b^c)&=(a^b)
1008	add	r9,r9,r5			@ d+=h
1009	eor	r3,r3,r7			@ Maj(a,b,c)
1010	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
1011	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
1012#if __ARM_ARCH__>=7
1013	@ ldr	r2,[r1],#4			@ 15
1014# if 15==15
1015	str	r1,[sp,#17*4]			@ make room for r1
1016# endif
1017	eor	r0,r9,r9,ror#5
1018	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
1019	eor	r0,r0,r9,ror#19	@ Sigma1(e)
1020# ifndef __ARMEB__
1021	rev	r2,r2
1022# endif
1023#else
1024	@ ldrb	r2,[r1,#3]			@ 15
1025	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
1026	ldrb	r3,[r1,#2]
1027	ldrb	r0,[r1,#1]
1028	orr	r2,r2,r3,lsl#8
1029	ldrb	r3,[r1],#4
1030	orr	r2,r2,r0,lsl#16
1031# if 15==15
1032	str	r1,[sp,#17*4]			@ make room for r1
1033# endif
1034	eor	r0,r9,r9,ror#5
1035	orr	r2,r2,r3,lsl#24
1036	eor	r0,r0,r9,ror#19	@ Sigma1(e)
1037#endif
1038	ldr	r3,[r14],#4			@ *K256++
1039	add	r4,r4,r2			@ h+=X[i]
1040	str	r2,[sp,#15*4]
1041	eor	r2,r10,r11
1042	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
1043	and	r2,r2,r9
1044	add	r4,r4,r3			@ h+=K256[i]
1045	eor	r2,r2,r11			@ Ch(e,f,g)
1046	eor	r0,r5,r5,ror#11
1047	add	r4,r4,r2			@ h+=Ch(e,f,g)
1048#if 15==31
1049	and	r3,r3,#0xff
1050	cmp	r3,#0xf2			@ done?
1051#endif
1052#if 15<15
1053# if __ARM_ARCH__>=7
1054	ldr	r2,[r1],#4			@ prefetch
1055# else
1056	ldrb	r2,[r1,#3]
1057# endif
1058	eor	r3,r5,r6			@ a^b, b^c in next round
1059#else
1060	ldr	r2,[sp,#1*4]		@ from future BODY_16_xx
1061	eor	r3,r5,r6			@ a^b, b^c in next round
1062	ldr	r1,[sp,#14*4]	@ from future BODY_16_xx
1063#endif
1064	eor	r0,r0,r5,ror#20	@ Sigma0(a)
1065	and	r12,r12,r3			@ (b^c)&=(a^b)
1066	add	r8,r8,r4			@ d+=h
1067	eor	r12,r12,r6			@ Maj(a,b,c)
1068	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
1069	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
1070.Lrounds_16_xx:
1071	@ ldr	r2,[sp,#1*4]		@ 16
1072	@ ldr	r1,[sp,#14*4]
1073	mov	r0,r2,ror#7
1074	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
1075	mov	r12,r1,ror#17
1076	eor	r0,r0,r2,ror#18
1077	eor	r12,r12,r1,ror#19
1078	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1079	ldr	r2,[sp,#0*4]
1080	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1081	ldr	r1,[sp,#9*4]
1082
1083	add	r12,r12,r0
1084	eor	r0,r8,r8,ror#5	@ from BODY_00_15
1085	add	r2,r2,r12
1086	eor	r0,r0,r8,ror#19	@ Sigma1(e)
1087	add	r2,r2,r1			@ X[i]
1088	ldr	r12,[r14],#4			@ *K256++
1089	add	r11,r11,r2			@ h+=X[i]
1090	str	r2,[sp,#0*4]
1091	eor	r2,r9,r10
1092	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
1093	and	r2,r2,r8
1094	add	r11,r11,r12			@ h+=K256[i]
1095	eor	r2,r2,r10			@ Ch(e,f,g)
1096	eor	r0,r4,r4,ror#11
1097	add	r11,r11,r2			@ h+=Ch(e,f,g)
1098#if 16==31
1099	and	r12,r12,#0xff
1100	cmp	r12,#0xf2			@ done?
1101#endif
1102#if 16<15
1103# if __ARM_ARCH__>=7
1104	ldr	r2,[r1],#4			@ prefetch
1105# else
1106	ldrb	r2,[r1,#3]
1107# endif
1108	eor	r12,r4,r5			@ a^b, b^c in next round
1109#else
1110	ldr	r2,[sp,#2*4]		@ from future BODY_16_xx
1111	eor	r12,r4,r5			@ a^b, b^c in next round
1112	ldr	r1,[sp,#15*4]	@ from future BODY_16_xx
1113#endif
1114	eor	r0,r0,r4,ror#20	@ Sigma0(a)
1115	and	r3,r3,r12			@ (b^c)&=(a^b)
1116	add	r7,r7,r11			@ d+=h
1117	eor	r3,r3,r5			@ Maj(a,b,c)
1118	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
1119	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
1120	@ ldr	r2,[sp,#2*4]		@ 17
1121	@ ldr	r1,[sp,#15*4]
1122	mov	r0,r2,ror#7
1123	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
1124	mov	r3,r1,ror#17
1125	eor	r0,r0,r2,ror#18
1126	eor	r3,r3,r1,ror#19
1127	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1128	ldr	r2,[sp,#1*4]
1129	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1130	ldr	r1,[sp,#10*4]
1131
1132	add	r3,r3,r0
1133	eor	r0,r7,r7,ror#5	@ from BODY_00_15
1134	add	r2,r2,r3
1135	eor	r0,r0,r7,ror#19	@ Sigma1(e)
1136	add	r2,r2,r1			@ X[i]
1137	ldr	r3,[r14],#4			@ *K256++
1138	add	r10,r10,r2			@ h+=X[i]
1139	str	r2,[sp,#1*4]
1140	eor	r2,r8,r9
1141	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
1142	and	r2,r2,r7
1143	add	r10,r10,r3			@ h+=K256[i]
1144	eor	r2,r2,r9			@ Ch(e,f,g)
1145	eor	r0,r11,r11,ror#11
1146	add	r10,r10,r2			@ h+=Ch(e,f,g)
1147#if 17==31
1148	and	r3,r3,#0xff
1149	cmp	r3,#0xf2			@ done?
1150#endif
1151#if 17<15
1152# if __ARM_ARCH__>=7
1153	ldr	r2,[r1],#4			@ prefetch
1154# else
1155	ldrb	r2,[r1,#3]
1156# endif
1157	eor	r3,r11,r4			@ a^b, b^c in next round
1158#else
1159	ldr	r2,[sp,#3*4]		@ from future BODY_16_xx
1160	eor	r3,r11,r4			@ a^b, b^c in next round
1161	ldr	r1,[sp,#0*4]	@ from future BODY_16_xx
1162#endif
1163	eor	r0,r0,r11,ror#20	@ Sigma0(a)
1164	and	r12,r12,r3			@ (b^c)&=(a^b)
1165	add	r6,r6,r10			@ d+=h
1166	eor	r12,r12,r4			@ Maj(a,b,c)
1167	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
1168	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
1169	@ ldr	r2,[sp,#3*4]		@ 18
1170	@ ldr	r1,[sp,#0*4]
1171	mov	r0,r2,ror#7
1172	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
1173	mov	r12,r1,ror#17
1174	eor	r0,r0,r2,ror#18
1175	eor	r12,r12,r1,ror#19
1176	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1177	ldr	r2,[sp,#2*4]
1178	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1179	ldr	r1,[sp,#11*4]
1180
1181	add	r12,r12,r0
1182	eor	r0,r6,r6,ror#5	@ from BODY_00_15
1183	add	r2,r2,r12
1184	eor	r0,r0,r6,ror#19	@ Sigma1(e)
1185	add	r2,r2,r1			@ X[i]
1186	ldr	r12,[r14],#4			@ *K256++
1187	add	r9,r9,r2			@ h+=X[i]
1188	str	r2,[sp,#2*4]
1189	eor	r2,r7,r8
1190	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
1191	and	r2,r2,r6
1192	add	r9,r9,r12			@ h+=K256[i]
1193	eor	r2,r2,r8			@ Ch(e,f,g)
1194	eor	r0,r10,r10,ror#11
1195	add	r9,r9,r2			@ h+=Ch(e,f,g)
1196#if 18==31
1197	and	r12,r12,#0xff
1198	cmp	r12,#0xf2			@ done?
1199#endif
1200#if 18<15
1201# if __ARM_ARCH__>=7
1202	ldr	r2,[r1],#4			@ prefetch
1203# else
1204	ldrb	r2,[r1,#3]
1205# endif
1206	eor	r12,r10,r11			@ a^b, b^c in next round
1207#else
1208	ldr	r2,[sp,#4*4]		@ from future BODY_16_xx
1209	eor	r12,r10,r11			@ a^b, b^c in next round
1210	ldr	r1,[sp,#1*4]	@ from future BODY_16_xx
1211#endif
1212	eor	r0,r0,r10,ror#20	@ Sigma0(a)
1213	and	r3,r3,r12			@ (b^c)&=(a^b)
1214	add	r5,r5,r9			@ d+=h
1215	eor	r3,r3,r11			@ Maj(a,b,c)
1216	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
1217	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
1218	@ ldr	r2,[sp,#4*4]		@ 19
1219	@ ldr	r1,[sp,#1*4]
1220	mov	r0,r2,ror#7
1221	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
1222	mov	r3,r1,ror#17
1223	eor	r0,r0,r2,ror#18
1224	eor	r3,r3,r1,ror#19
1225	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1226	ldr	r2,[sp,#3*4]
1227	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1228	ldr	r1,[sp,#12*4]
1229
1230	add	r3,r3,r0
1231	eor	r0,r5,r5,ror#5	@ from BODY_00_15
1232	add	r2,r2,r3
1233	eor	r0,r0,r5,ror#19	@ Sigma1(e)
1234	add	r2,r2,r1			@ X[i]
1235	ldr	r3,[r14],#4			@ *K256++
1236	add	r8,r8,r2			@ h+=X[i]
1237	str	r2,[sp,#3*4]
1238	eor	r2,r6,r7
1239	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
1240	and	r2,r2,r5
1241	add	r8,r8,r3			@ h+=K256[i]
1242	eor	r2,r2,r7			@ Ch(e,f,g)
1243	eor	r0,r9,r9,ror#11
1244	add	r8,r8,r2			@ h+=Ch(e,f,g)
1245#if 19==31
1246	and	r3,r3,#0xff
1247	cmp	r3,#0xf2			@ done?
1248#endif
1249#if 19<15
1250# if __ARM_ARCH__>=7
1251	ldr	r2,[r1],#4			@ prefetch
1252# else
1253	ldrb	r2,[r1,#3]
1254# endif
1255	eor	r3,r9,r10			@ a^b, b^c in next round
1256#else
1257	ldr	r2,[sp,#5*4]		@ from future BODY_16_xx
1258	eor	r3,r9,r10			@ a^b, b^c in next round
1259	ldr	r1,[sp,#2*4]	@ from future BODY_16_xx
1260#endif
1261	eor	r0,r0,r9,ror#20	@ Sigma0(a)
1262	and	r12,r12,r3			@ (b^c)&=(a^b)
1263	add	r4,r4,r8			@ d+=h
1264	eor	r12,r12,r10			@ Maj(a,b,c)
1265	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
1266	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
1267	@ ldr	r2,[sp,#5*4]		@ 20
1268	@ ldr	r1,[sp,#2*4]
1269	mov	r0,r2,ror#7
1270	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
1271	mov	r12,r1,ror#17
1272	eor	r0,r0,r2,ror#18
1273	eor	r12,r12,r1,ror#19
1274	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1275	ldr	r2,[sp,#4*4]
1276	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1277	ldr	r1,[sp,#13*4]
1278
1279	add	r12,r12,r0
1280	eor	r0,r4,r4,ror#5	@ from BODY_00_15
1281	add	r2,r2,r12
1282	eor	r0,r0,r4,ror#19	@ Sigma1(e)
1283	add	r2,r2,r1			@ X[i]
1284	ldr	r12,[r14],#4			@ *K256++
1285	add	r7,r7,r2			@ h+=X[i]
1286	str	r2,[sp,#4*4]
1287	eor	r2,r5,r6
1288	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
1289	and	r2,r2,r4
1290	add	r7,r7,r12			@ h+=K256[i]
1291	eor	r2,r2,r6			@ Ch(e,f,g)
1292	eor	r0,r8,r8,ror#11
1293	add	r7,r7,r2			@ h+=Ch(e,f,g)
1294#if 20==31
1295	and	r12,r12,#0xff
1296	cmp	r12,#0xf2			@ done?
1297#endif
1298#if 20<15
1299# if __ARM_ARCH__>=7
1300	ldr	r2,[r1],#4			@ prefetch
1301# else
1302	ldrb	r2,[r1,#3]
1303# endif
1304	eor	r12,r8,r9			@ a^b, b^c in next round
1305#else
1306	ldr	r2,[sp,#6*4]		@ from future BODY_16_xx
1307	eor	r12,r8,r9			@ a^b, b^c in next round
1308	ldr	r1,[sp,#3*4]	@ from future BODY_16_xx
1309#endif
1310	eor	r0,r0,r8,ror#20	@ Sigma0(a)
1311	and	r3,r3,r12			@ (b^c)&=(a^b)
1312	add	r11,r11,r7			@ d+=h
1313	eor	r3,r3,r9			@ Maj(a,b,c)
1314	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
1315	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
1316	@ ldr	r2,[sp,#6*4]		@ 21
1317	@ ldr	r1,[sp,#3*4]
1318	mov	r0,r2,ror#7
1319	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
1320	mov	r3,r1,ror#17
1321	eor	r0,r0,r2,ror#18
1322	eor	r3,r3,r1,ror#19
1323	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1324	ldr	r2,[sp,#5*4]
1325	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1326	ldr	r1,[sp,#14*4]
1327
1328	add	r3,r3,r0
1329	eor	r0,r11,r11,ror#5	@ from BODY_00_15
1330	add	r2,r2,r3
1331	eor	r0,r0,r11,ror#19	@ Sigma1(e)
1332	add	r2,r2,r1			@ X[i]
1333	ldr	r3,[r14],#4			@ *K256++
1334	add	r6,r6,r2			@ h+=X[i]
1335	str	r2,[sp,#5*4]
1336	eor	r2,r4,r5
1337	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
1338	and	r2,r2,r11
1339	add	r6,r6,r3			@ h+=K256[i]
1340	eor	r2,r2,r5			@ Ch(e,f,g)
1341	eor	r0,r7,r7,ror#11
1342	add	r6,r6,r2			@ h+=Ch(e,f,g)
1343#if 21==31
1344	and	r3,r3,#0xff
1345	cmp	r3,#0xf2			@ done?
1346#endif
1347#if 21<15
1348# if __ARM_ARCH__>=7
1349	ldr	r2,[r1],#4			@ prefetch
1350# else
1351	ldrb	r2,[r1,#3]
1352# endif
1353	eor	r3,r7,r8			@ a^b, b^c in next round
1354#else
1355	ldr	r2,[sp,#7*4]		@ from future BODY_16_xx
1356	eor	r3,r7,r8			@ a^b, b^c in next round
1357	ldr	r1,[sp,#4*4]	@ from future BODY_16_xx
1358#endif
1359	eor	r0,r0,r7,ror#20	@ Sigma0(a)
1360	and	r12,r12,r3			@ (b^c)&=(a^b)
1361	add	r10,r10,r6			@ d+=h
1362	eor	r12,r12,r8			@ Maj(a,b,c)
1363	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
1364	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
1365	@ ldr	r2,[sp,#7*4]		@ 22
1366	@ ldr	r1,[sp,#4*4]
1367	mov	r0,r2,ror#7
1368	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
1369	mov	r12,r1,ror#17
1370	eor	r0,r0,r2,ror#18
1371	eor	r12,r12,r1,ror#19
1372	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1373	ldr	r2,[sp,#6*4]
1374	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1375	ldr	r1,[sp,#15*4]
1376
1377	add	r12,r12,r0
1378	eor	r0,r10,r10,ror#5	@ from BODY_00_15
1379	add	r2,r2,r12
1380	eor	r0,r0,r10,ror#19	@ Sigma1(e)
1381	add	r2,r2,r1			@ X[i]
1382	ldr	r12,[r14],#4			@ *K256++
1383	add	r5,r5,r2			@ h+=X[i]
1384	str	r2,[sp,#6*4]
1385	eor	r2,r11,r4
1386	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
1387	and	r2,r2,r10
1388	add	r5,r5,r12			@ h+=K256[i]
1389	eor	r2,r2,r4			@ Ch(e,f,g)
1390	eor	r0,r6,r6,ror#11
1391	add	r5,r5,r2			@ h+=Ch(e,f,g)
1392#if 22==31
1393	and	r12,r12,#0xff
1394	cmp	r12,#0xf2			@ done?
1395#endif
1396#if 22<15
1397# if __ARM_ARCH__>=7
1398	ldr	r2,[r1],#4			@ prefetch
1399# else
1400	ldrb	r2,[r1,#3]
1401# endif
1402	eor	r12,r6,r7			@ a^b, b^c in next round
1403#else
1404	ldr	r2,[sp,#8*4]		@ from future BODY_16_xx
1405	eor	r12,r6,r7			@ a^b, b^c in next round
1406	ldr	r1,[sp,#5*4]	@ from future BODY_16_xx
1407#endif
1408	eor	r0,r0,r6,ror#20	@ Sigma0(a)
1409	and	r3,r3,r12			@ (b^c)&=(a^b)
1410	add	r9,r9,r5			@ d+=h
1411	eor	r3,r3,r7			@ Maj(a,b,c)
1412	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
1413	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
1414	@ ldr	r2,[sp,#8*4]		@ 23
1415	@ ldr	r1,[sp,#5*4]
1416	mov	r0,r2,ror#7
1417	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
1418	mov	r3,r1,ror#17
1419	eor	r0,r0,r2,ror#18
1420	eor	r3,r3,r1,ror#19
1421	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1422	ldr	r2,[sp,#7*4]
1423	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1424	ldr	r1,[sp,#0*4]
1425
1426	add	r3,r3,r0
1427	eor	r0,r9,r9,ror#5	@ from BODY_00_15
1428	add	r2,r2,r3
1429	eor	r0,r0,r9,ror#19	@ Sigma1(e)
1430	add	r2,r2,r1			@ X[i]
1431	ldr	r3,[r14],#4			@ *K256++
1432	add	r4,r4,r2			@ h+=X[i]
1433	str	r2,[sp,#7*4]
1434	eor	r2,r10,r11
1435	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
1436	and	r2,r2,r9
1437	add	r4,r4,r3			@ h+=K256[i]
1438	eor	r2,r2,r11			@ Ch(e,f,g)
1439	eor	r0,r5,r5,ror#11
1440	add	r4,r4,r2			@ h+=Ch(e,f,g)
1441#if 23==31
1442	and	r3,r3,#0xff
1443	cmp	r3,#0xf2			@ done?
1444#endif
1445#if 23<15
1446# if __ARM_ARCH__>=7
1447	ldr	r2,[r1],#4			@ prefetch
1448# else
1449	ldrb	r2,[r1,#3]
1450# endif
1451	eor	r3,r5,r6			@ a^b, b^c in next round
1452#else
1453	ldr	r2,[sp,#9*4]		@ from future BODY_16_xx
1454	eor	r3,r5,r6			@ a^b, b^c in next round
1455	ldr	r1,[sp,#6*4]	@ from future BODY_16_xx
1456#endif
1457	eor	r0,r0,r5,ror#20	@ Sigma0(a)
1458	and	r12,r12,r3			@ (b^c)&=(a^b)
1459	add	r8,r8,r4			@ d+=h
1460	eor	r12,r12,r6			@ Maj(a,b,c)
1461	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
1462	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
1463	@ ldr	r2,[sp,#9*4]		@ 24
1464	@ ldr	r1,[sp,#6*4]
1465	mov	r0,r2,ror#7
1466	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
1467	mov	r12,r1,ror#17
1468	eor	r0,r0,r2,ror#18
1469	eor	r12,r12,r1,ror#19
1470	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1471	ldr	r2,[sp,#8*4]
1472	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1473	ldr	r1,[sp,#1*4]
1474
1475	add	r12,r12,r0
1476	eor	r0,r8,r8,ror#5	@ from BODY_00_15
1477	add	r2,r2,r12
1478	eor	r0,r0,r8,ror#19	@ Sigma1(e)
1479	add	r2,r2,r1			@ X[i]
1480	ldr	r12,[r14],#4			@ *K256++
1481	add	r11,r11,r2			@ h+=X[i]
1482	str	r2,[sp,#8*4]
1483	eor	r2,r9,r10
1484	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
1485	and	r2,r2,r8
1486	add	r11,r11,r12			@ h+=K256[i]
1487	eor	r2,r2,r10			@ Ch(e,f,g)
1488	eor	r0,r4,r4,ror#11
1489	add	r11,r11,r2			@ h+=Ch(e,f,g)
1490#if 24==31
1491	and	r12,r12,#0xff
1492	cmp	r12,#0xf2			@ done?
1493#endif
1494#if 24<15
1495# if __ARM_ARCH__>=7
1496	ldr	r2,[r1],#4			@ prefetch
1497# else
1498	ldrb	r2,[r1,#3]
1499# endif
1500	eor	r12,r4,r5			@ a^b, b^c in next round
1501#else
1502	ldr	r2,[sp,#10*4]		@ from future BODY_16_xx
1503	eor	r12,r4,r5			@ a^b, b^c in next round
1504	ldr	r1,[sp,#7*4]	@ from future BODY_16_xx
1505#endif
1506	eor	r0,r0,r4,ror#20	@ Sigma0(a)
1507	and	r3,r3,r12			@ (b^c)&=(a^b)
1508	add	r7,r7,r11			@ d+=h
1509	eor	r3,r3,r5			@ Maj(a,b,c)
1510	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
1511	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
1512	@ ldr	r2,[sp,#10*4]		@ 25
1513	@ ldr	r1,[sp,#7*4]
1514	mov	r0,r2,ror#7
1515	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
1516	mov	r3,r1,ror#17
1517	eor	r0,r0,r2,ror#18
1518	eor	r3,r3,r1,ror#19
1519	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1520	ldr	r2,[sp,#9*4]
1521	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1522	ldr	r1,[sp,#2*4]
1523
1524	add	r3,r3,r0
1525	eor	r0,r7,r7,ror#5	@ from BODY_00_15
1526	add	r2,r2,r3
1527	eor	r0,r0,r7,ror#19	@ Sigma1(e)
1528	add	r2,r2,r1			@ X[i]
1529	ldr	r3,[r14],#4			@ *K256++
1530	add	r10,r10,r2			@ h+=X[i]
1531	str	r2,[sp,#9*4]
1532	eor	r2,r8,r9
1533	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
1534	and	r2,r2,r7
1535	add	r10,r10,r3			@ h+=K256[i]
1536	eor	r2,r2,r9			@ Ch(e,f,g)
1537	eor	r0,r11,r11,ror#11
1538	add	r10,r10,r2			@ h+=Ch(e,f,g)
1539#if 25==31
1540	and	r3,r3,#0xff
1541	cmp	r3,#0xf2			@ done?
1542#endif
1543#if 25<15
1544# if __ARM_ARCH__>=7
1545	ldr	r2,[r1],#4			@ prefetch
1546# else
1547	ldrb	r2,[r1,#3]
1548# endif
1549	eor	r3,r11,r4			@ a^b, b^c in next round
1550#else
1551	ldr	r2,[sp,#11*4]		@ from future BODY_16_xx
1552	eor	r3,r11,r4			@ a^b, b^c in next round
1553	ldr	r1,[sp,#8*4]	@ from future BODY_16_xx
1554#endif
1555	eor	r0,r0,r11,ror#20	@ Sigma0(a)
1556	and	r12,r12,r3			@ (b^c)&=(a^b)
1557	add	r6,r6,r10			@ d+=h
1558	eor	r12,r12,r4			@ Maj(a,b,c)
1559	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
1560	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
1561	@ ldr	r2,[sp,#11*4]		@ 26
1562	@ ldr	r1,[sp,#8*4]
1563	mov	r0,r2,ror#7
1564	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
1565	mov	r12,r1,ror#17
1566	eor	r0,r0,r2,ror#18
1567	eor	r12,r12,r1,ror#19
1568	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1569	ldr	r2,[sp,#10*4]
1570	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1571	ldr	r1,[sp,#3*4]
1572
1573	add	r12,r12,r0
1574	eor	r0,r6,r6,ror#5	@ from BODY_00_15
1575	add	r2,r2,r12
1576	eor	r0,r0,r6,ror#19	@ Sigma1(e)
1577	add	r2,r2,r1			@ X[i]
1578	ldr	r12,[r14],#4			@ *K256++
1579	add	r9,r9,r2			@ h+=X[i]
1580	str	r2,[sp,#10*4]
1581	eor	r2,r7,r8
1582	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
1583	and	r2,r2,r6
1584	add	r9,r9,r12			@ h+=K256[i]
1585	eor	r2,r2,r8			@ Ch(e,f,g)
1586	eor	r0,r10,r10,ror#11
1587	add	r9,r9,r2			@ h+=Ch(e,f,g)
1588#if 26==31
1589	and	r12,r12,#0xff
1590	cmp	r12,#0xf2			@ done?
1591#endif
1592#if 26<15
1593# if __ARM_ARCH__>=7
1594	ldr	r2,[r1],#4			@ prefetch
1595# else
1596	ldrb	r2,[r1,#3]
1597# endif
1598	eor	r12,r10,r11			@ a^b, b^c in next round
1599#else
1600	ldr	r2,[sp,#12*4]		@ from future BODY_16_xx
1601	eor	r12,r10,r11			@ a^b, b^c in next round
1602	ldr	r1,[sp,#9*4]	@ from future BODY_16_xx
1603#endif
1604	eor	r0,r0,r10,ror#20	@ Sigma0(a)
1605	and	r3,r3,r12			@ (b^c)&=(a^b)
1606	add	r5,r5,r9			@ d+=h
1607	eor	r3,r3,r11			@ Maj(a,b,c)
1608	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
1609	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
1610	@ ldr	r2,[sp,#12*4]		@ 27
1611	@ ldr	r1,[sp,#9*4]
1612	mov	r0,r2,ror#7
1613	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
1614	mov	r3,r1,ror#17
1615	eor	r0,r0,r2,ror#18
1616	eor	r3,r3,r1,ror#19
1617	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1618	ldr	r2,[sp,#11*4]
1619	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1620	ldr	r1,[sp,#4*4]
1621
1622	add	r3,r3,r0
1623	eor	r0,r5,r5,ror#5	@ from BODY_00_15
1624	add	r2,r2,r3
1625	eor	r0,r0,r5,ror#19	@ Sigma1(e)
1626	add	r2,r2,r1			@ X[i]
1627	ldr	r3,[r14],#4			@ *K256++
1628	add	r8,r8,r2			@ h+=X[i]
1629	str	r2,[sp,#11*4]
1630	eor	r2,r6,r7
1631	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
1632	and	r2,r2,r5
1633	add	r8,r8,r3			@ h+=K256[i]
1634	eor	r2,r2,r7			@ Ch(e,f,g)
1635	eor	r0,r9,r9,ror#11
1636	add	r8,r8,r2			@ h+=Ch(e,f,g)
1637#if 27==31
1638	and	r3,r3,#0xff
1639	cmp	r3,#0xf2			@ done?
1640#endif
1641#if 27<15
1642# if __ARM_ARCH__>=7
1643	ldr	r2,[r1],#4			@ prefetch
1644# else
1645	ldrb	r2,[r1,#3]
1646# endif
1647	eor	r3,r9,r10			@ a^b, b^c in next round
1648#else
1649	ldr	r2,[sp,#13*4]		@ from future BODY_16_xx
1650	eor	r3,r9,r10			@ a^b, b^c in next round
1651	ldr	r1,[sp,#10*4]	@ from future BODY_16_xx
1652#endif
1653	eor	r0,r0,r9,ror#20	@ Sigma0(a)
1654	and	r12,r12,r3			@ (b^c)&=(a^b)
1655	add	r4,r4,r8			@ d+=h
1656	eor	r12,r12,r10			@ Maj(a,b,c)
1657	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
1658	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
1659	@ ldr	r2,[sp,#13*4]		@ 28
1660	@ ldr	r1,[sp,#10*4]
1661	mov	r0,r2,ror#7
1662	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
1663	mov	r12,r1,ror#17
1664	eor	r0,r0,r2,ror#18
1665	eor	r12,r12,r1,ror#19
1666	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1667	ldr	r2,[sp,#12*4]
1668	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1669	ldr	r1,[sp,#5*4]
1670
1671	add	r12,r12,r0
1672	eor	r0,r4,r4,ror#5	@ from BODY_00_15
1673	add	r2,r2,r12
1674	eor	r0,r0,r4,ror#19	@ Sigma1(e)
1675	add	r2,r2,r1			@ X[i]
1676	ldr	r12,[r14],#4			@ *K256++
1677	add	r7,r7,r2			@ h+=X[i]
1678	str	r2,[sp,#12*4]
1679	eor	r2,r5,r6
1680	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
1681	and	r2,r2,r4
1682	add	r7,r7,r12			@ h+=K256[i]
1683	eor	r2,r2,r6			@ Ch(e,f,g)
1684	eor	r0,r8,r8,ror#11
1685	add	r7,r7,r2			@ h+=Ch(e,f,g)
1686#if 28==31
1687	and	r12,r12,#0xff
1688	cmp	r12,#0xf2			@ done?
1689#endif
1690#if 28<15
1691# if __ARM_ARCH__>=7
1692	ldr	r2,[r1],#4			@ prefetch
1693# else
1694	ldrb	r2,[r1,#3]
1695# endif
1696	eor	r12,r8,r9			@ a^b, b^c in next round
1697#else
1698	ldr	r2,[sp,#14*4]		@ from future BODY_16_xx
1699	eor	r12,r8,r9			@ a^b, b^c in next round
1700	ldr	r1,[sp,#11*4]	@ from future BODY_16_xx
1701#endif
1702	eor	r0,r0,r8,ror#20	@ Sigma0(a)
1703	and	r3,r3,r12			@ (b^c)&=(a^b)
1704	add	r11,r11,r7			@ d+=h
1705	eor	r3,r3,r9			@ Maj(a,b,c)
1706	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
1707	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
1708	@ ldr	r2,[sp,#14*4]		@ 29
1709	@ ldr	r1,[sp,#11*4]
1710	mov	r0,r2,ror#7
1711	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
1712	mov	r3,r1,ror#17
1713	eor	r0,r0,r2,ror#18
1714	eor	r3,r3,r1,ror#19
1715	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1716	ldr	r2,[sp,#13*4]
1717	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1718	ldr	r1,[sp,#6*4]
1719
1720	add	r3,r3,r0
1721	eor	r0,r11,r11,ror#5	@ from BODY_00_15
1722	add	r2,r2,r3
1723	eor	r0,r0,r11,ror#19	@ Sigma1(e)
1724	add	r2,r2,r1			@ X[i]
1725	ldr	r3,[r14],#4			@ *K256++
1726	add	r6,r6,r2			@ h+=X[i]
1727	str	r2,[sp,#13*4]
1728	eor	r2,r4,r5
1729	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
1730	and	r2,r2,r11
1731	add	r6,r6,r3			@ h+=K256[i]
1732	eor	r2,r2,r5			@ Ch(e,f,g)
1733	eor	r0,r7,r7,ror#11
1734	add	r6,r6,r2			@ h+=Ch(e,f,g)
1735#if 29==31
1736	and	r3,r3,#0xff
1737	cmp	r3,#0xf2			@ done?
1738#endif
1739#if 29<15
1740# if __ARM_ARCH__>=7
1741	ldr	r2,[r1],#4			@ prefetch
1742# else
1743	ldrb	r2,[r1,#3]
1744# endif
1745	eor	r3,r7,r8			@ a^b, b^c in next round
1746#else
1747	ldr	r2,[sp,#15*4]		@ from future BODY_16_xx
1748	eor	r3,r7,r8			@ a^b, b^c in next round
1749	ldr	r1,[sp,#12*4]	@ from future BODY_16_xx
1750#endif
1751	eor	r0,r0,r7,ror#20	@ Sigma0(a)
1752	and	r12,r12,r3			@ (b^c)&=(a^b)
1753	add	r10,r10,r6			@ d+=h
1754	eor	r12,r12,r8			@ Maj(a,b,c)
1755	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
1756	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
1757	@ ldr	r2,[sp,#15*4]		@ 30
1758	@ ldr	r1,[sp,#12*4]
1759	mov	r0,r2,ror#7
1760	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
1761	mov	r12,r1,ror#17
1762	eor	r0,r0,r2,ror#18
1763	eor	r12,r12,r1,ror#19
1764	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1765	ldr	r2,[sp,#14*4]
1766	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1767	ldr	r1,[sp,#7*4]
1768
1769	add	r12,r12,r0
1770	eor	r0,r10,r10,ror#5	@ from BODY_00_15
1771	add	r2,r2,r12
1772	eor	r0,r0,r10,ror#19	@ Sigma1(e)
1773	add	r2,r2,r1			@ X[i]
1774	ldr	r12,[r14],#4			@ *K256++
1775	add	r5,r5,r2			@ h+=X[i]
1776	str	r2,[sp,#14*4]
1777	eor	r2,r11,r4
1778	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
1779	and	r2,r2,r10
1780	add	r5,r5,r12			@ h+=K256[i]
1781	eor	r2,r2,r4			@ Ch(e,f,g)
1782	eor	r0,r6,r6,ror#11
1783	add	r5,r5,r2			@ h+=Ch(e,f,g)
1784#if 30==31
1785	and	r12,r12,#0xff
1786	cmp	r12,#0xf2			@ done?
1787#endif
1788#if 30<15
1789# if __ARM_ARCH__>=7
1790	ldr	r2,[r1],#4			@ prefetch
1791# else
1792	ldrb	r2,[r1,#3]
1793# endif
1794	eor	r12,r6,r7			@ a^b, b^c in next round
1795#else
1796	ldr	r2,[sp,#0*4]		@ from future BODY_16_xx
1797	eor	r12,r6,r7			@ a^b, b^c in next round
1798	ldr	r1,[sp,#13*4]	@ from future BODY_16_xx
1799#endif
1800	eor	r0,r0,r6,ror#20	@ Sigma0(a)
1801	and	r3,r3,r12			@ (b^c)&=(a^b)
1802	add	r9,r9,r5			@ d+=h
1803	eor	r3,r3,r7			@ Maj(a,b,c)
1804	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
1805	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
1806	@ ldr	r2,[sp,#0*4]		@ 31
1807	@ ldr	r1,[sp,#13*4]
1808	mov	r0,r2,ror#7
1809	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
1810	mov	r3,r1,ror#17
1811	eor	r0,r0,r2,ror#18
1812	eor	r3,r3,r1,ror#19
1813	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1814	ldr	r2,[sp,#15*4]
1815	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1816	ldr	r1,[sp,#8*4]
1817
1818	add	r3,r3,r0
1819	eor	r0,r9,r9,ror#5	@ from BODY_00_15
1820	add	r2,r2,r3
1821	eor	r0,r0,r9,ror#19	@ Sigma1(e)
1822	add	r2,r2,r1			@ X[i]
1823	ldr	r3,[r14],#4			@ *K256++
1824	add	r4,r4,r2			@ h+=X[i]
1825	str	r2,[sp,#15*4]
1826	eor	r2,r10,r11
1827	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
1828	and	r2,r2,r9
1829	add	r4,r4,r3			@ h+=K256[i]
1830	eor	r2,r2,r11			@ Ch(e,f,g)
1831	eor	r0,r5,r5,ror#11
1832	add	r4,r4,r2			@ h+=Ch(e,f,g)
1833#if 31==31
1834	and	r3,r3,#0xff
1835	cmp	r3,#0xf2			@ done?
1836#endif
1837#if 31<15
1838# if __ARM_ARCH__>=7
1839	ldr	r2,[r1],#4			@ prefetch
1840# else
1841	ldrb	r2,[r1,#3]
1842# endif
1843	eor	r3,r5,r6			@ a^b, b^c in next round
1844#else
1845	ldr	r2,[sp,#1*4]		@ from future BODY_16_xx
1846	eor	r3,r5,r6			@ a^b, b^c in next round
1847	ldr	r1,[sp,#14*4]	@ from future BODY_16_xx
1848#endif
1849	eor	r0,r0,r5,ror#20	@ Sigma0(a)
1850	and	r12,r12,r3			@ (b^c)&=(a^b)
1851	add	r8,r8,r4			@ d+=h
1852	eor	r12,r12,r6			@ Maj(a,b,c)
1853	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
1854	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
1855#if __ARM_ARCH__>=7
1856	ite	eq			@ Thumb2 thing, sanity check in ARM
1857#endif
1858	ldreq	r3,[sp,#16*4]		@ pull ctx
1859	bne	.Lrounds_16_xx
1860
1861	add	r4,r4,r12		@ h+=Maj(a,b,c) from the past
1862	ldr	r0,[r3,#0]
1863	ldr	r2,[r3,#4]
1864	ldr	r12,[r3,#8]
1865	add	r4,r4,r0
1866	ldr	r0,[r3,#12]
1867	add	r5,r5,r2
1868	ldr	r2,[r3,#16]
1869	add	r6,r6,r12
1870	ldr	r12,[r3,#20]
1871	add	r7,r7,r0
1872	ldr	r0,[r3,#24]
1873	add	r8,r8,r2
1874	ldr	r2,[r3,#28]
1875	add	r9,r9,r12
1876	ldr	r1,[sp,#17*4]		@ pull inp
1877	ldr	r12,[sp,#18*4]		@ pull inp+len
1878	add	r10,r10,r0
1879	add	r11,r11,r2
1880	stmia	r3,{r4,r5,r6,r7,r8,r9,r10,r11}
1881	cmp	r1,r12
1882	sub	r14,r14,#256	@ rewind Ktbl
1883	bne	.Loop
1884
1885	add	sp,sp,#19*4	@ destroy frame
1886#if __ARM_ARCH__>=5
1887	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,pc}
1888#else
1889	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,lr}
1890	tst	lr,#1
1891	moveq	pc,lr			@ be binary compatible with V4, yet
1892.word	0xe12fff1e			@ interoperable with Thumb ISA:-)
1893#endif
1894.size	sha256_block_data_order,.-sha256_block_data_order
1895#if __ARM_MAX_ARCH__>=7
1896.arch	armv7-a
1897.fpu	neon
1898
1899.type	sha256_block_data_order_neon,%function
1900.align	5
1901.skip	16
1902sha256_block_data_order_neon:
1903.LNEON:
1904	stmdb	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr}
1905
1906	sub	r11,sp,#16*4+16
1907	adr	r14,K256
1908	bic	r11,r11,#15		@ align for 128-bit stores
1909	mov	r12,sp
1910	mov	sp,r11			@ alloca
1911	add	r2,r1,r2,lsl#6	@ len to point at the end of inp
1912
1913	vld1.8	{q0},[r1]!
1914	vld1.8	{q1},[r1]!
1915	vld1.8	{q2},[r1]!
1916	vld1.8	{q3},[r1]!
1917	vld1.32	{q8},[r14,:128]!
1918	vld1.32	{q9},[r14,:128]!
1919	vld1.32	{q10},[r14,:128]!
1920	vld1.32	{q11},[r14,:128]!
1921	vrev32.8	q0,q0		@ yes, even on
1922	str	r0,[sp,#64]
1923	vrev32.8	q1,q1		@ big-endian
1924	str	r1,[sp,#68]
1925	mov	r1,sp
1926	vrev32.8	q2,q2
1927	str	r2,[sp,#72]
1928	vrev32.8	q3,q3
1929	str	r12,[sp,#76]		@ save original sp
1930	vadd.i32	q8,q8,q0
1931	vadd.i32	q9,q9,q1
1932	vst1.32	{q8},[r1,:128]!
1933	vadd.i32	q10,q10,q2
1934	vst1.32	{q9},[r1,:128]!
1935	vadd.i32	q11,q11,q3
1936	vst1.32	{q10},[r1,:128]!
1937	vst1.32	{q11},[r1,:128]!
1938
1939	ldmia	r0,{r4,r5,r6,r7,r8,r9,r10,r11}
1940	sub	r1,r1,#64
1941	ldr	r2,[sp,#0]
1942	eor	r12,r12,r12
1943	eor	r3,r5,r6
1944	b	.L_00_48
1945
1946.align	4
1947.L_00_48:
1948	vext.8	q8,q0,q1,#4
1949	add	r11,r11,r2
1950	eor	r2,r9,r10
1951	eor	r0,r8,r8,ror#5
1952	vext.8	q9,q2,q3,#4
1953	add	r4,r4,r12
1954	and	r2,r2,r8
1955	eor	r12,r0,r8,ror#19
1956	vshr.u32	q10,q8,#7
1957	eor	r0,r4,r4,ror#11
1958	eor	r2,r2,r10
1959	vadd.i32	q0,q0,q9
1960	add	r11,r11,r12,ror#6
1961	eor	r12,r4,r5
1962	vshr.u32	q9,q8,#3
1963	eor	r0,r0,r4,ror#20
1964	add	r11,r11,r2
1965	vsli.32	q10,q8,#25
1966	ldr	r2,[sp,#4]
1967	and	r3,r3,r12
1968	vshr.u32	q11,q8,#18
1969	add	r7,r7,r11
1970	add	r11,r11,r0,ror#2
1971	eor	r3,r3,r5
1972	veor	q9,q9,q10
1973	add	r10,r10,r2
1974	vsli.32	q11,q8,#14
1975	eor	r2,r8,r9
1976	eor	r0,r7,r7,ror#5
1977	vshr.u32	d24,d7,#17
1978	add	r11,r11,r3
1979	and	r2,r2,r7
1980	veor	q9,q9,q11
1981	eor	r3,r0,r7,ror#19
1982	eor	r0,r11,r11,ror#11
1983	vsli.32	d24,d7,#15
1984	eor	r2,r2,r9
1985	add	r10,r10,r3,ror#6
1986	vshr.u32	d25,d7,#10
1987	eor	r3,r11,r4
1988	eor	r0,r0,r11,ror#20
1989	vadd.i32	q0,q0,q9
1990	add	r10,r10,r2
1991	ldr	r2,[sp,#8]
1992	veor	d25,d25,d24
1993	and	r12,r12,r3
1994	add	r6,r6,r10
1995	vshr.u32	d24,d7,#19
1996	add	r10,r10,r0,ror#2
1997	eor	r12,r12,r4
1998	vsli.32	d24,d7,#13
1999	add	r9,r9,r2
2000	eor	r2,r7,r8
2001	veor	d25,d25,d24
2002	eor	r0,r6,r6,ror#5
2003	add	r10,r10,r12
2004	vadd.i32	d0,d0,d25
2005	and	r2,r2,r6
2006	eor	r12,r0,r6,ror#19
2007	vshr.u32	d24,d0,#17
2008	eor	r0,r10,r10,ror#11
2009	eor	r2,r2,r8
2010	vsli.32	d24,d0,#15
2011	add	r9,r9,r12,ror#6
2012	eor	r12,r10,r11
2013	vshr.u32	d25,d0,#10
2014	eor	r0,r0,r10,ror#20
2015	add	r9,r9,r2
2016	veor	d25,d25,d24
2017	ldr	r2,[sp,#12]
2018	and	r3,r3,r12
2019	vshr.u32	d24,d0,#19
2020	add	r5,r5,r9
2021	add	r9,r9,r0,ror#2
2022	eor	r3,r3,r11
2023	vld1.32	{q8},[r14,:128]!
2024	add	r8,r8,r2
2025	vsli.32	d24,d0,#13
2026	eor	r2,r6,r7
2027	eor	r0,r5,r5,ror#5
2028	veor	d25,d25,d24
2029	add	r9,r9,r3
2030	and	r2,r2,r5
2031	vadd.i32	d1,d1,d25
2032	eor	r3,r0,r5,ror#19
2033	eor	r0,r9,r9,ror#11
2034	vadd.i32	q8,q8,q0
2035	eor	r2,r2,r7
2036	add	r8,r8,r3,ror#6
2037	eor	r3,r9,r10
2038	eor	r0,r0,r9,ror#20
2039	add	r8,r8,r2
2040	ldr	r2,[sp,#16]
2041	and	r12,r12,r3
2042	add	r4,r4,r8
2043	vst1.32	{q8},[r1,:128]!
2044	add	r8,r8,r0,ror#2
2045	eor	r12,r12,r10
2046	vext.8	q8,q1,q2,#4
2047	add	r7,r7,r2
2048	eor	r2,r5,r6
2049	eor	r0,r4,r4,ror#5
2050	vext.8	q9,q3,q0,#4
2051	add	r8,r8,r12
2052	and	r2,r2,r4
2053	eor	r12,r0,r4,ror#19
2054	vshr.u32	q10,q8,#7
2055	eor	r0,r8,r8,ror#11
2056	eor	r2,r2,r6
2057	vadd.i32	q1,q1,q9
2058	add	r7,r7,r12,ror#6
2059	eor	r12,r8,r9
2060	vshr.u32	q9,q8,#3
2061	eor	r0,r0,r8,ror#20
2062	add	r7,r7,r2
2063	vsli.32	q10,q8,#25
2064	ldr	r2,[sp,#20]
2065	and	r3,r3,r12
2066	vshr.u32	q11,q8,#18
2067	add	r11,r11,r7
2068	add	r7,r7,r0,ror#2
2069	eor	r3,r3,r9
2070	veor	q9,q9,q10
2071	add	r6,r6,r2
2072	vsli.32	q11,q8,#14
2073	eor	r2,r4,r5
2074	eor	r0,r11,r11,ror#5
2075	vshr.u32	d24,d1,#17
2076	add	r7,r7,r3
2077	and	r2,r2,r11
2078	veor	q9,q9,q11
2079	eor	r3,r0,r11,ror#19
2080	eor	r0,r7,r7,ror#11
2081	vsli.32	d24,d1,#15
2082	eor	r2,r2,r5
2083	add	r6,r6,r3,ror#6
2084	vshr.u32	d25,d1,#10
2085	eor	r3,r7,r8
2086	eor	r0,r0,r7,ror#20
2087	vadd.i32	q1,q1,q9
2088	add	r6,r6,r2
2089	ldr	r2,[sp,#24]
2090	veor	d25,d25,d24
2091	and	r12,r12,r3
2092	add	r10,r10,r6
2093	vshr.u32	d24,d1,#19
2094	add	r6,r6,r0,ror#2
2095	eor	r12,r12,r8
2096	vsli.32	d24,d1,#13
2097	add	r5,r5,r2
2098	eor	r2,r11,r4
2099	veor	d25,d25,d24
2100	eor	r0,r10,r10,ror#5
2101	add	r6,r6,r12
2102	vadd.i32	d2,d2,d25
2103	and	r2,r2,r10
2104	eor	r12,r0,r10,ror#19
2105	vshr.u32	d24,d2,#17
2106	eor	r0,r6,r6,ror#11
2107	eor	r2,r2,r4
2108	vsli.32	d24,d2,#15
2109	add	r5,r5,r12,ror#6
2110	eor	r12,r6,r7
2111	vshr.u32	d25,d2,#10
2112	eor	r0,r0,r6,ror#20
2113	add	r5,r5,r2
2114	veor	d25,d25,d24
2115	ldr	r2,[sp,#28]
2116	and	r3,r3,r12
2117	vshr.u32	d24,d2,#19
2118	add	r9,r9,r5
2119	add	r5,r5,r0,ror#2
2120	eor	r3,r3,r7
2121	vld1.32	{q8},[r14,:128]!
2122	add	r4,r4,r2
2123	vsli.32	d24,d2,#13
2124	eor	r2,r10,r11
2125	eor	r0,r9,r9,ror#5
2126	veor	d25,d25,d24
2127	add	r5,r5,r3
2128	and	r2,r2,r9
2129	vadd.i32	d3,d3,d25
2130	eor	r3,r0,r9,ror#19
2131	eor	r0,r5,r5,ror#11
2132	vadd.i32	q8,q8,q1
2133	eor	r2,r2,r11
2134	add	r4,r4,r3,ror#6
2135	eor	r3,r5,r6
2136	eor	r0,r0,r5,ror#20
2137	add	r4,r4,r2
2138	ldr	r2,[sp,#32]
2139	and	r12,r12,r3
2140	add	r8,r8,r4
2141	vst1.32	{q8},[r1,:128]!
2142	add	r4,r4,r0,ror#2
2143	eor	r12,r12,r6
2144	vext.8	q8,q2,q3,#4
2145	add	r11,r11,r2
2146	eor	r2,r9,r10
2147	eor	r0,r8,r8,ror#5
2148	vext.8	q9,q0,q1,#4
2149	add	r4,r4,r12
2150	and	r2,r2,r8
2151	eor	r12,r0,r8,ror#19
2152	vshr.u32	q10,q8,#7
2153	eor	r0,r4,r4,ror#11
2154	eor	r2,r2,r10
2155	vadd.i32	q2,q2,q9
2156	add	r11,r11,r12,ror#6
2157	eor	r12,r4,r5
2158	vshr.u32	q9,q8,#3
2159	eor	r0,r0,r4,ror#20
2160	add	r11,r11,r2
2161	vsli.32	q10,q8,#25
2162	ldr	r2,[sp,#36]
2163	and	r3,r3,r12
2164	vshr.u32	q11,q8,#18
2165	add	r7,r7,r11
2166	add	r11,r11,r0,ror#2
2167	eor	r3,r3,r5
2168	veor	q9,q9,q10
2169	add	r10,r10,r2
2170	vsli.32	q11,q8,#14
2171	eor	r2,r8,r9
2172	eor	r0,r7,r7,ror#5
2173	vshr.u32	d24,d3,#17
2174	add	r11,r11,r3
2175	and	r2,r2,r7
2176	veor	q9,q9,q11
2177	eor	r3,r0,r7,ror#19
2178	eor	r0,r11,r11,ror#11
2179	vsli.32	d24,d3,#15
2180	eor	r2,r2,r9
2181	add	r10,r10,r3,ror#6
2182	vshr.u32	d25,d3,#10
2183	eor	r3,r11,r4
2184	eor	r0,r0,r11,ror#20
2185	vadd.i32	q2,q2,q9
2186	add	r10,r10,r2
2187	ldr	r2,[sp,#40]
2188	veor	d25,d25,d24
2189	and	r12,r12,r3
2190	add	r6,r6,r10
2191	vshr.u32	d24,d3,#19
2192	add	r10,r10,r0,ror#2
2193	eor	r12,r12,r4
2194	vsli.32	d24,d3,#13
2195	add	r9,r9,r2
2196	eor	r2,r7,r8
2197	veor	d25,d25,d24
2198	eor	r0,r6,r6,ror#5
2199	add	r10,r10,r12
2200	vadd.i32	d4,d4,d25
2201	and	r2,r2,r6
2202	eor	r12,r0,r6,ror#19
2203	vshr.u32	d24,d4,#17
2204	eor	r0,r10,r10,ror#11
2205	eor	r2,r2,r8
2206	vsli.32	d24,d4,#15
2207	add	r9,r9,r12,ror#6
2208	eor	r12,r10,r11
2209	vshr.u32	d25,d4,#10
2210	eor	r0,r0,r10,ror#20
2211	add	r9,r9,r2
2212	veor	d25,d25,d24
2213	ldr	r2,[sp,#44]
2214	and	r3,r3,r12
2215	vshr.u32	d24,d4,#19
2216	add	r5,r5,r9
2217	add	r9,r9,r0,ror#2
2218	eor	r3,r3,r11
2219	vld1.32	{q8},[r14,:128]!
2220	add	r8,r8,r2
2221	vsli.32	d24,d4,#13
2222	eor	r2,r6,r7
2223	eor	r0,r5,r5,ror#5
2224	veor	d25,d25,d24
2225	add	r9,r9,r3
2226	and	r2,r2,r5
2227	vadd.i32	d5,d5,d25
2228	eor	r3,r0,r5,ror#19
2229	eor	r0,r9,r9,ror#11
2230	vadd.i32	q8,q8,q2
2231	eor	r2,r2,r7
2232	add	r8,r8,r3,ror#6
2233	eor	r3,r9,r10
2234	eor	r0,r0,r9,ror#20
2235	add	r8,r8,r2
2236	ldr	r2,[sp,#48]
2237	and	r12,r12,r3
2238	add	r4,r4,r8
2239	vst1.32	{q8},[r1,:128]!
2240	add	r8,r8,r0,ror#2
2241	eor	r12,r12,r10
2242	vext.8	q8,q3,q0,#4
2243	add	r7,r7,r2
2244	eor	r2,r5,r6
2245	eor	r0,r4,r4,ror#5
2246	vext.8	q9,q1,q2,#4
2247	add	r8,r8,r12
2248	and	r2,r2,r4
2249	eor	r12,r0,r4,ror#19
2250	vshr.u32	q10,q8,#7
2251	eor	r0,r8,r8,ror#11
2252	eor	r2,r2,r6
2253	vadd.i32	q3,q3,q9
2254	add	r7,r7,r12,ror#6
2255	eor	r12,r8,r9
2256	vshr.u32	q9,q8,#3
2257	eor	r0,r0,r8,ror#20
2258	add	r7,r7,r2
2259	vsli.32	q10,q8,#25
2260	ldr	r2,[sp,#52]
2261	and	r3,r3,r12
2262	vshr.u32	q11,q8,#18
2263	add	r11,r11,r7
2264	add	r7,r7,r0,ror#2
2265	eor	r3,r3,r9
2266	veor	q9,q9,q10
2267	add	r6,r6,r2
2268	vsli.32	q11,q8,#14
2269	eor	r2,r4,r5
2270	eor	r0,r11,r11,ror#5
2271	vshr.u32	d24,d5,#17
2272	add	r7,r7,r3
2273	and	r2,r2,r11
2274	veor	q9,q9,q11
2275	eor	r3,r0,r11,ror#19
2276	eor	r0,r7,r7,ror#11
2277	vsli.32	d24,d5,#15
2278	eor	r2,r2,r5
2279	add	r6,r6,r3,ror#6
2280	vshr.u32	d25,d5,#10
2281	eor	r3,r7,r8
2282	eor	r0,r0,r7,ror#20
2283	vadd.i32	q3,q3,q9
2284	add	r6,r6,r2
2285	ldr	r2,[sp,#56]
2286	veor	d25,d25,d24
2287	and	r12,r12,r3
2288	add	r10,r10,r6
2289	vshr.u32	d24,d5,#19
2290	add	r6,r6,r0,ror#2
2291	eor	r12,r12,r8
2292	vsli.32	d24,d5,#13
2293	add	r5,r5,r2
2294	eor	r2,r11,r4
2295	veor	d25,d25,d24
2296	eor	r0,r10,r10,ror#5
2297	add	r6,r6,r12
2298	vadd.i32	d6,d6,d25
2299	and	r2,r2,r10
2300	eor	r12,r0,r10,ror#19
2301	vshr.u32	d24,d6,#17
2302	eor	r0,r6,r6,ror#11
2303	eor	r2,r2,r4
2304	vsli.32	d24,d6,#15
2305	add	r5,r5,r12,ror#6
2306	eor	r12,r6,r7
2307	vshr.u32	d25,d6,#10
2308	eor	r0,r0,r6,ror#20
2309	add	r5,r5,r2
2310	veor	d25,d25,d24
2311	ldr	r2,[sp,#60]
2312	and	r3,r3,r12
2313	vshr.u32	d24,d6,#19
2314	add	r9,r9,r5
2315	add	r5,r5,r0,ror#2
2316	eor	r3,r3,r7
2317	vld1.32	{q8},[r14,:128]!
2318	add	r4,r4,r2
2319	vsli.32	d24,d6,#13
2320	eor	r2,r10,r11
2321	eor	r0,r9,r9,ror#5
2322	veor	d25,d25,d24
2323	add	r5,r5,r3
2324	and	r2,r2,r9
2325	vadd.i32	d7,d7,d25
2326	eor	r3,r0,r9,ror#19
2327	eor	r0,r5,r5,ror#11
2328	vadd.i32	q8,q8,q3
2329	eor	r2,r2,r11
2330	add	r4,r4,r3,ror#6
2331	eor	r3,r5,r6
2332	eor	r0,r0,r5,ror#20
2333	add	r4,r4,r2
2334	ldr	r2,[r14]
2335	and	r12,r12,r3
2336	add	r8,r8,r4
2337	vst1.32	{q8},[r1,:128]!
2338	add	r4,r4,r0,ror#2
2339	eor	r12,r12,r6
2340	teq	r2,#0				@ check for K256 terminator
2341	ldr	r2,[sp,#0]
2342	sub	r1,r1,#64
2343	bne	.L_00_48
2344
2345	ldr	r1,[sp,#68]
2346	ldr	r0,[sp,#72]
2347	sub	r14,r14,#256	@ rewind r14
2348	teq	r1,r0
2349	it	eq
2350	subeq	r1,r1,#64		@ avoid SEGV
2351	vld1.8	{q0},[r1]!		@ load next input block
2352	vld1.8	{q1},[r1]!
2353	vld1.8	{q2},[r1]!
2354	vld1.8	{q3},[r1]!
2355	it	ne
2356	strne	r1,[sp,#68]
2357	mov	r1,sp
2358	add	r11,r11,r2
2359	eor	r2,r9,r10
2360	eor	r0,r8,r8,ror#5
2361	add	r4,r4,r12
2362	vld1.32	{q8},[r14,:128]!
2363	and	r2,r2,r8
2364	eor	r12,r0,r8,ror#19
2365	eor	r0,r4,r4,ror#11
2366	eor	r2,r2,r10
2367	vrev32.8	q0,q0
2368	add	r11,r11,r12,ror#6
2369	eor	r12,r4,r5
2370	eor	r0,r0,r4,ror#20
2371	add	r11,r11,r2
2372	vadd.i32	q8,q8,q0
2373	ldr	r2,[sp,#4]
2374	and	r3,r3,r12
2375	add	r7,r7,r11
2376	add	r11,r11,r0,ror#2
2377	eor	r3,r3,r5
2378	add	r10,r10,r2
2379	eor	r2,r8,r9
2380	eor	r0,r7,r7,ror#5
2381	add	r11,r11,r3
2382	and	r2,r2,r7
2383	eor	r3,r0,r7,ror#19
2384	eor	r0,r11,r11,ror#11
2385	eor	r2,r2,r9
2386	add	r10,r10,r3,ror#6
2387	eor	r3,r11,r4
2388	eor	r0,r0,r11,ror#20
2389	add	r10,r10,r2
2390	ldr	r2,[sp,#8]
2391	and	r12,r12,r3
2392	add	r6,r6,r10
2393	add	r10,r10,r0,ror#2
2394	eor	r12,r12,r4
2395	add	r9,r9,r2
2396	eor	r2,r7,r8
2397	eor	r0,r6,r6,ror#5
2398	add	r10,r10,r12
2399	and	r2,r2,r6
2400	eor	r12,r0,r6,ror#19
2401	eor	r0,r10,r10,ror#11
2402	eor	r2,r2,r8
2403	add	r9,r9,r12,ror#6
2404	eor	r12,r10,r11
2405	eor	r0,r0,r10,ror#20
2406	add	r9,r9,r2
2407	ldr	r2,[sp,#12]
2408	and	r3,r3,r12
2409	add	r5,r5,r9
2410	add	r9,r9,r0,ror#2
2411	eor	r3,r3,r11
2412	add	r8,r8,r2
2413	eor	r2,r6,r7
2414	eor	r0,r5,r5,ror#5
2415	add	r9,r9,r3
2416	and	r2,r2,r5
2417	eor	r3,r0,r5,ror#19
2418	eor	r0,r9,r9,ror#11
2419	eor	r2,r2,r7
2420	add	r8,r8,r3,ror#6
2421	eor	r3,r9,r10
2422	eor	r0,r0,r9,ror#20
2423	add	r8,r8,r2
2424	ldr	r2,[sp,#16]
2425	and	r12,r12,r3
2426	add	r4,r4,r8
2427	add	r8,r8,r0,ror#2
2428	eor	r12,r12,r10
2429	vst1.32	{q8},[r1,:128]!
2430	add	r7,r7,r2
2431	eor	r2,r5,r6
2432	eor	r0,r4,r4,ror#5
2433	add	r8,r8,r12
2434	vld1.32	{q8},[r14,:128]!
2435	and	r2,r2,r4
2436	eor	r12,r0,r4,ror#19
2437	eor	r0,r8,r8,ror#11
2438	eor	r2,r2,r6
2439	vrev32.8	q1,q1
2440	add	r7,r7,r12,ror#6
2441	eor	r12,r8,r9
2442	eor	r0,r0,r8,ror#20
2443	add	r7,r7,r2
2444	vadd.i32	q8,q8,q1
2445	ldr	r2,[sp,#20]
2446	and	r3,r3,r12
2447	add	r11,r11,r7
2448	add	r7,r7,r0,ror#2
2449	eor	r3,r3,r9
2450	add	r6,r6,r2
2451	eor	r2,r4,r5
2452	eor	r0,r11,r11,ror#5
2453	add	r7,r7,r3
2454	and	r2,r2,r11
2455	eor	r3,r0,r11,ror#19
2456	eor	r0,r7,r7,ror#11
2457	eor	r2,r2,r5
2458	add	r6,r6,r3,ror#6
2459	eor	r3,r7,r8
2460	eor	r0,r0,r7,ror#20
2461	add	r6,r6,r2
2462	ldr	r2,[sp,#24]
2463	and	r12,r12,r3
2464	add	r10,r10,r6
2465	add	r6,r6,r0,ror#2
2466	eor	r12,r12,r8
2467	add	r5,r5,r2
2468	eor	r2,r11,r4
2469	eor	r0,r10,r10,ror#5
2470	add	r6,r6,r12
2471	and	r2,r2,r10
2472	eor	r12,r0,r10,ror#19
2473	eor	r0,r6,r6,ror#11
2474	eor	r2,r2,r4
2475	add	r5,r5,r12,ror#6
2476	eor	r12,r6,r7
2477	eor	r0,r0,r6,ror#20
2478	add	r5,r5,r2
2479	ldr	r2,[sp,#28]
2480	and	r3,r3,r12
2481	add	r9,r9,r5
2482	add	r5,r5,r0,ror#2
2483	eor	r3,r3,r7
2484	add	r4,r4,r2
2485	eor	r2,r10,r11
2486	eor	r0,r9,r9,ror#5
2487	add	r5,r5,r3
2488	and	r2,r2,r9
2489	eor	r3,r0,r9,ror#19
2490	eor	r0,r5,r5,ror#11
2491	eor	r2,r2,r11
2492	add	r4,r4,r3,ror#6
2493	eor	r3,r5,r6
2494	eor	r0,r0,r5,ror#20
2495	add	r4,r4,r2
2496	ldr	r2,[sp,#32]
2497	and	r12,r12,r3
2498	add	r8,r8,r4
2499	add	r4,r4,r0,ror#2
2500	eor	r12,r12,r6
2501	vst1.32	{q8},[r1,:128]!
2502	add	r11,r11,r2
2503	eor	r2,r9,r10
2504	eor	r0,r8,r8,ror#5
2505	add	r4,r4,r12
2506	vld1.32	{q8},[r14,:128]!
2507	and	r2,r2,r8
2508	eor	r12,r0,r8,ror#19
2509	eor	r0,r4,r4,ror#11
2510	eor	r2,r2,r10
2511	vrev32.8	q2,q2
2512	add	r11,r11,r12,ror#6
2513	eor	r12,r4,r5
2514	eor	r0,r0,r4,ror#20
2515	add	r11,r11,r2
2516	vadd.i32	q8,q8,q2
2517	ldr	r2,[sp,#36]
2518	and	r3,r3,r12
2519	add	r7,r7,r11
2520	add	r11,r11,r0,ror#2
2521	eor	r3,r3,r5
2522	add	r10,r10,r2
2523	eor	r2,r8,r9
2524	eor	r0,r7,r7,ror#5
2525	add	r11,r11,r3
2526	and	r2,r2,r7
2527	eor	r3,r0,r7,ror#19
2528	eor	r0,r11,r11,ror#11
2529	eor	r2,r2,r9
2530	add	r10,r10,r3,ror#6
2531	eor	r3,r11,r4
2532	eor	r0,r0,r11,ror#20
2533	add	r10,r10,r2
2534	ldr	r2,[sp,#40]
2535	and	r12,r12,r3
2536	add	r6,r6,r10
2537	add	r10,r10,r0,ror#2
2538	eor	r12,r12,r4
2539	add	r9,r9,r2
2540	eor	r2,r7,r8
2541	eor	r0,r6,r6,ror#5
2542	add	r10,r10,r12
2543	and	r2,r2,r6
2544	eor	r12,r0,r6,ror#19
2545	eor	r0,r10,r10,ror#11
2546	eor	r2,r2,r8
2547	add	r9,r9,r12,ror#6
2548	eor	r12,r10,r11
2549	eor	r0,r0,r10,ror#20
2550	add	r9,r9,r2
2551	ldr	r2,[sp,#44]
2552	and	r3,r3,r12
2553	add	r5,r5,r9
2554	add	r9,r9,r0,ror#2
2555	eor	r3,r3,r11
2556	add	r8,r8,r2
2557	eor	r2,r6,r7
2558	eor	r0,r5,r5,ror#5
2559	add	r9,r9,r3
2560	and	r2,r2,r5
2561	eor	r3,r0,r5,ror#19
2562	eor	r0,r9,r9,ror#11
2563	eor	r2,r2,r7
2564	add	r8,r8,r3,ror#6
2565	eor	r3,r9,r10
2566	eor	r0,r0,r9,ror#20
2567	add	r8,r8,r2
2568	ldr	r2,[sp,#48]
2569	and	r12,r12,r3
2570	add	r4,r4,r8
2571	add	r8,r8,r0,ror#2
2572	eor	r12,r12,r10
2573	vst1.32	{q8},[r1,:128]!
2574	add	r7,r7,r2
2575	eor	r2,r5,r6
2576	eor	r0,r4,r4,ror#5
2577	add	r8,r8,r12
2578	vld1.32	{q8},[r14,:128]!
2579	and	r2,r2,r4
2580	eor	r12,r0,r4,ror#19
2581	eor	r0,r8,r8,ror#11
2582	eor	r2,r2,r6
2583	vrev32.8	q3,q3
2584	add	r7,r7,r12,ror#6
2585	eor	r12,r8,r9
2586	eor	r0,r0,r8,ror#20
2587	add	r7,r7,r2
2588	vadd.i32	q8,q8,q3
2589	ldr	r2,[sp,#52]
2590	and	r3,r3,r12
2591	add	r11,r11,r7
2592	add	r7,r7,r0,ror#2
2593	eor	r3,r3,r9
2594	add	r6,r6,r2
2595	eor	r2,r4,r5
2596	eor	r0,r11,r11,ror#5
2597	add	r7,r7,r3
2598	and	r2,r2,r11
2599	eor	r3,r0,r11,ror#19
2600	eor	r0,r7,r7,ror#11
2601	eor	r2,r2,r5
2602	add	r6,r6,r3,ror#6
2603	eor	r3,r7,r8
2604	eor	r0,r0,r7,ror#20
2605	add	r6,r6,r2
2606	ldr	r2,[sp,#56]
2607	and	r12,r12,r3
2608	add	r10,r10,r6
2609	add	r6,r6,r0,ror#2
2610	eor	r12,r12,r8
2611	add	r5,r5,r2
2612	eor	r2,r11,r4
2613	eor	r0,r10,r10,ror#5
2614	add	r6,r6,r12
2615	and	r2,r2,r10
2616	eor	r12,r0,r10,ror#19
2617	eor	r0,r6,r6,ror#11
2618	eor	r2,r2,r4
2619	add	r5,r5,r12,ror#6
2620	eor	r12,r6,r7
2621	eor	r0,r0,r6,ror#20
2622	add	r5,r5,r2
2623	ldr	r2,[sp,#60]
2624	and	r3,r3,r12
2625	add	r9,r9,r5
2626	add	r5,r5,r0,ror#2
2627	eor	r3,r3,r7
2628	add	r4,r4,r2
2629	eor	r2,r10,r11
2630	eor	r0,r9,r9,ror#5
2631	add	r5,r5,r3
2632	and	r2,r2,r9
2633	eor	r3,r0,r9,ror#19
2634	eor	r0,r5,r5,ror#11
2635	eor	r2,r2,r11
2636	add	r4,r4,r3,ror#6
2637	eor	r3,r5,r6
2638	eor	r0,r0,r5,ror#20
2639	add	r4,r4,r2
2640	ldr	r2,[sp,#64]
2641	and	r12,r12,r3
2642	add	r8,r8,r4
2643	add	r4,r4,r0,ror#2
2644	eor	r12,r12,r6
2645	vst1.32	{q8},[r1,:128]!
2646	ldr	r0,[r2,#0]
2647	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
2648	ldr	r12,[r2,#4]
2649	ldr	r3,[r2,#8]
2650	ldr	r1,[r2,#12]
2651	add	r4,r4,r0			@ accumulate
2652	ldr	r0,[r2,#16]
2653	add	r5,r5,r12
2654	ldr	r12,[r2,#20]
2655	add	r6,r6,r3
2656	ldr	r3,[r2,#24]
2657	add	r7,r7,r1
2658	ldr	r1,[r2,#28]
2659	add	r8,r8,r0
2660	str	r4,[r2],#4
2661	add	r9,r9,r12
2662	str	r5,[r2],#4
2663	add	r10,r10,r3
2664	str	r6,[r2],#4
2665	add	r11,r11,r1
2666	str	r7,[r2],#4
2667	stmia	r2,{r8,r9,r10,r11}
2668
2669	ittte	ne
2670	movne	r1,sp
2671	ldrne	r2,[sp,#0]
2672	eorne	r12,r12,r12
2673	ldreq	sp,[sp,#76]			@ restore original sp
2674	itt	ne
2675	eorne	r3,r5,r6
2676	bne	.L_00_48
2677
2678	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,pc}
2679.size	sha256_block_data_order_neon,.-sha256_block_data_order_neon
2680#endif
2681#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
2682
2683# if defined(__thumb2__)
2684#  define INST(a,b,c,d)	.byte	c,d|0xc,a,b
2685# else
2686#  define INST(a,b,c,d)	.byte	a,b,c,d
2687# endif
2688
2689.type	sha256_block_data_order_armv8,%function
2690.align	5
2691sha256_block_data_order_armv8:
2692.LARMv8:
2693	vld1.32	{q0,q1},[r0]
2694	sub	r3,r3,#256+32
2695	add	r2,r1,r2,lsl#6	@ len to point at the end of inp
2696	b	.Loop_v8
2697
2698.align	4
2699.Loop_v8:
2700	vld1.8	{q8,q9},[r1]!
2701	vld1.8	{q10,q11},[r1]!
2702	vld1.32	{q12},[r3]!
2703	vrev32.8	q8,q8
2704	vrev32.8	q9,q9
2705	vrev32.8	q10,q10
2706	vrev32.8	q11,q11
2707	vmov	q14,q0	@ offload
2708	vmov	q15,q1
2709	teq	r1,r2
2710	vld1.32	{q13},[r3]!
2711	vadd.i32	q12,q12,q8
2712	INST(0xe2,0x03,0xfa,0xf3)	@ sha256su0 q8,q9
2713	vmov	q2,q0
2714	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2715	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2716	INST(0xe6,0x0c,0x64,0xf3)	@ sha256su1 q8,q10,q11
2717	vld1.32	{q12},[r3]!
2718	vadd.i32	q13,q13,q9
2719	INST(0xe4,0x23,0xfa,0xf3)	@ sha256su0 q9,q10
2720	vmov	q2,q0
2721	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2722	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2723	INST(0xe0,0x2c,0x66,0xf3)	@ sha256su1 q9,q11,q8
2724	vld1.32	{q13},[r3]!
2725	vadd.i32	q12,q12,q10
2726	INST(0xe6,0x43,0xfa,0xf3)	@ sha256su0 q10,q11
2727	vmov	q2,q0
2728	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2729	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2730	INST(0xe2,0x4c,0x60,0xf3)	@ sha256su1 q10,q8,q9
2731	vld1.32	{q12},[r3]!
2732	vadd.i32	q13,q13,q11
2733	INST(0xe0,0x63,0xfa,0xf3)	@ sha256su0 q11,q8
2734	vmov	q2,q0
2735	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2736	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2737	INST(0xe4,0x6c,0x62,0xf3)	@ sha256su1 q11,q9,q10
2738	vld1.32	{q13},[r3]!
2739	vadd.i32	q12,q12,q8
2740	INST(0xe2,0x03,0xfa,0xf3)	@ sha256su0 q8,q9
2741	vmov	q2,q0
2742	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2743	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2744	INST(0xe6,0x0c,0x64,0xf3)	@ sha256su1 q8,q10,q11
2745	vld1.32	{q12},[r3]!
2746	vadd.i32	q13,q13,q9
2747	INST(0xe4,0x23,0xfa,0xf3)	@ sha256su0 q9,q10
2748	vmov	q2,q0
2749	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2750	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2751	INST(0xe0,0x2c,0x66,0xf3)	@ sha256su1 q9,q11,q8
2752	vld1.32	{q13},[r3]!
2753	vadd.i32	q12,q12,q10
2754	INST(0xe6,0x43,0xfa,0xf3)	@ sha256su0 q10,q11
2755	vmov	q2,q0
2756	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2757	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2758	INST(0xe2,0x4c,0x60,0xf3)	@ sha256su1 q10,q8,q9
2759	vld1.32	{q12},[r3]!
2760	vadd.i32	q13,q13,q11
2761	INST(0xe0,0x63,0xfa,0xf3)	@ sha256su0 q11,q8
2762	vmov	q2,q0
2763	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2764	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2765	INST(0xe4,0x6c,0x62,0xf3)	@ sha256su1 q11,q9,q10
2766	vld1.32	{q13},[r3]!
2767	vadd.i32	q12,q12,q8
2768	INST(0xe2,0x03,0xfa,0xf3)	@ sha256su0 q8,q9
2769	vmov	q2,q0
2770	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2771	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2772	INST(0xe6,0x0c,0x64,0xf3)	@ sha256su1 q8,q10,q11
2773	vld1.32	{q12},[r3]!
2774	vadd.i32	q13,q13,q9
2775	INST(0xe4,0x23,0xfa,0xf3)	@ sha256su0 q9,q10
2776	vmov	q2,q0
2777	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2778	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2779	INST(0xe0,0x2c,0x66,0xf3)	@ sha256su1 q9,q11,q8
2780	vld1.32	{q13},[r3]!
2781	vadd.i32	q12,q12,q10
2782	INST(0xe6,0x43,0xfa,0xf3)	@ sha256su0 q10,q11
2783	vmov	q2,q0
2784	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2785	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2786	INST(0xe2,0x4c,0x60,0xf3)	@ sha256su1 q10,q8,q9
2787	vld1.32	{q12},[r3]!
2788	vadd.i32	q13,q13,q11
2789	INST(0xe0,0x63,0xfa,0xf3)	@ sha256su0 q11,q8
2790	vmov	q2,q0
2791	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2792	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2793	INST(0xe4,0x6c,0x62,0xf3)	@ sha256su1 q11,q9,q10
2794	vld1.32	{q13},[r3]!
2795	vadd.i32	q12,q12,q8
2796	vmov	q2,q0
2797	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2798	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2799
2800	vld1.32	{q12},[r3]!
2801	vadd.i32	q13,q13,q9
2802	vmov	q2,q0
2803	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2804	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2805
2806	vld1.32	{q13},[r3]
2807	vadd.i32	q12,q12,q10
2808	sub	r3,r3,#256-16	@ rewind
2809	vmov	q2,q0
2810	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2811	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2812
2813	vadd.i32	q13,q13,q11
2814	vmov	q2,q0
2815	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2816	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2817
2818	vadd.i32	q0,q0,q14
2819	vadd.i32	q1,q1,q15
2820	it	ne
2821	bne	.Loop_v8
2822
2823	vst1.32	{q0,q1},[r0]
2824
2825	bx	lr		@ bx lr
2826.size	sha256_block_data_order_armv8,.-sha256_block_data_order_armv8
2827#endif
2828.byte	83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,52,47,78,69,79,78,47,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
2829.align	2
2830#endif
2831#endif  // !OPENSSL_NO_ASM
2832.section	.note.GNU-stack,"",%progbits
2833